diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5071946aea0d1e45dffd10377148bd73273d7a00
Binary files /dev/null and b/.DS_Store differ
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..5b627cfa60b5313da2c7df81aab2340225c2329f
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,4 @@
+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
+opensource-codeofconduct@amazon.com with any additional questions or comments.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..c4b6a1c5081adcf78822222488e7c5b0f1dc6499
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,59 @@
+# Contributing Guidelines
+
+Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
+documentation, we greatly value feedback and contributions from our community.
+
+Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
+information to effectively respond to your bug report or contribution.
+
+
+## Reporting Bugs/Feature Requests
+
+We welcome you to use the GitHub issue tracker to report bugs or suggest features.
+
+When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
+reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
+
+* A reproducible test case or series of steps
+* The version of our code being used
+* Any modifications you've made relevant to the bug
+* Anything unusual about your environment or deployment
+
+
+## Contributing via Pull Requests
+Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
+
+1. You are working against the latest source on the *main* branch.
+2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
+3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
+
+To send us a pull request, please:
+
+1. Fork the repository.
+2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
+3. Ensure local tests pass.
+4. Commit to your fork using clear commit messages.
+5. Send us a pull request, answering any default questions in the pull request interface.
+6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
+
+GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
+[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
+
+
+## Finding contributions to work on
+Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
+
+
+## Code of Conduct
+This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
+For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
+opensource-codeofconduct@amazon.com with any additional questions or comments.
+
+
+## Security issue notifications
+If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
+
+
+## Licensing
+
+See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..a7767b63a8d61b2622642ccc9012f06af5053e17
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 1999-2022 Alibaba Group Holding Ltd.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 0000000000000000000000000000000000000000..616fc5889451895dbf9768e6787c8308c33bef22
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1 @@
+Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
diff --git a/README.md b/README.md
index 091dc260e6e33da12c8f56a77d2d4db25b04001c..e4b5d45a19b382740a684a270ad101213a0dba80 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,185 @@
----
-title: PolyFormer
-emoji: 🔥
-colorFrom: blue
-colorTo: blue
-sdk: gradio
-sdk_version: 3.29.0
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# PolyFormer: Referring Image Segmentation as Sequential Polygon Generation (CVPR 2023)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/polyformer-referring-image-segmentation-as/referring-expression-segmentation-on-refcocog)](https://paperswithcode.com/sota/referring-expression-segmentation-on-refcocog?p=polyformer-referring-image-segmentation-as)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/polyformer-referring-image-segmentation-as/referring-expression-segmentation-on-refcoco)](https://paperswithcode.com/sota/referring-expression-segmentation-on-refcoco?p=polyformer-referring-image-segmentation-as)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/polyformer-referring-image-segmentation-as/referring-expression-segmentation-on-refcoco-1)](https://paperswithcode.com/sota/referring-expression-segmentation-on-refcoco-1?p=polyformer-referring-image-segmentation-as)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/polyformer-referring-image-segmentation-as/referring-expression-comprehension-on-refcoco)](https://paperswithcode.com/sota/referring-expression-comprehension-on-refcoco?p=polyformer-referring-image-segmentation-as)
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/polyformer-referring-image-segmentation-as/referring-expression-comprehension-on-refcoco-1)](https://paperswithcode.com/sota/referring-expression-comprehension-on-refcoco-1?p=polyformer-referring-image-segmentation-as)
+
+
+\[[Project Page](https://polyformer.github.io/)\]   \[[Paper](https://arxiv.org/abs/2302.07387)\]   
+
+by [Jiang Liu*](https://joellliu.github.io/), [Hui Ding*](http://www.huiding.org/), [Zhaowei Cai](https://zhaoweicai.github.io/),  [Yuting Zhang](https://scholar.google.com/citations?user=9UfZJskAAAAJ&hl=en), [Ravi Kumar Satzoda](https://scholar.google.com.sg/citations?user=4ngycwIAAAAJ&hl=en), [Vijay Mahadevan](https://scholar.google.com/citations?user=n9fRgvkAAAAJ&hl=en), [R. Manmatha](https://ciir.cs.umass.edu/~manmatha/).
+
+
+## :notes: Introduction
+![github_figure](pipeline.gif)
+PolyFormer is a unified model for referring image segmentation (polygon vertex sequence) and referring expression comprehension (bounding box corner points). The polygons are converted to segmentation masks in the end.
+
+**Contributions:**
+
+* State-of-the-art results on referring image segmentation and referring expression comprehension on 6 datasets; 
+* A unified framework for referring image segmentation (RIS) and referring expression comprehension (REC) by formulating them as a sequence-to-sequence (seq2seq) prediction problem; 
+* A regression-based decoder for accurate coordinate prediction, which outputs continuous 2D coordinates directly without quantization error..
+
+
+
+## Getting Started
+### Installation
+```bash
+conda create -n polyformer python=3.7.4
+conda activate polyformer
+python -m pip install -r requirements.txt
+```
+Note: if you are getting import errors from `fairseq`, try the following:
+```bash
+python -m pip install pip==21.2.4
+pip uninstall fairseq
+pip install -r requirements.txt
+```
+
+## Datasets 
+### Prepare Pretraining Data
+1. Create the dataset folders
+```bash
+mkdir datasets
+mkdir datasets/images
+mkdir datasets/annotations
+```
+2. Download the *2014 Train images [83K/13GB]* from [COCO](https://cocodataset.org/#download), 
+original [Flickr30K images](http://shannon.cs.illinois.edu/DenotationGraph/),
+[ReferItGame images](https://drive.google.com/file/d/1R6Tm7tQTHCil6A_eOhjudK3rgaBxkD2t/view?usp=sharing), 
+and [Visual Genome images](http://visualgenome.org/api/v0/api_home.html), and extract them to `datasets/images`. 
+3. Download the annotation file for pretraining datasets [instances.json](https://drive.google.com/drive/folders/1O4hzL8_s3aUsnj_JZnM3CwANd7TejcJO) 
+provided by [SeqTR](https://github.com/sean-zhuh/SeqTR) and store it in `datasets/annotations`. 
+The workspace directory should be organized like this:
+```
+PolyFormer/
+├── datasets/
+│   ├── images
+│   │   ├── flickr30k/*.jpg
+│   │   ├── mscoco/
+│   │   │   └── train2014/*.jpg
+│   │   ├── saiaprtc12/*.jpg
+│   │   └── visual-genome/*.jpg
+│   └── annotations
+│       └── instances.json
+└── ...
+```
+4. Generate the tsv files for pretraining
+```bash
+python data/create_pretraining_data.py
+```
+### Prepare Finetuning Data
+1. Follow the instructions in the `./refer` directory to set up subdirectories
+and download annotations.
+This directory is based on the [refer](https://github.com/lichengunc/refer) API.
+
+2. Generate the tsv files for finetuning
+```bash
+python data/create_finetuning_data.py
+```
+
+
+
+
+## Pretraining
+1. Create the checkpoints folder
+```bash
+mkdir weights
+```
+2. Download pretrain weights of [Swin-base](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth),
+[Swin-large](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth),
+[BERT-base](https://cdn.huggingface.co/bert-base-uncased-pytorch_model.bin)
+and put the weight files in `./pretrained_weights`.
+These weights are needed for training to initialize the model.
+
+
+3. Run the pretraining scripts for model pretraining on the referring expression comprehension task:
+```bash
+cd run_scripts/pretrain
+bash pretrain_polyformer_b.sh  # for pretraining PolyFormer-B model
+bash pretrain_polyformer_l.sh  # for pretraining PolyFormer-L model
+```
+
+## Finetuning
+Run the finetuning scripts for model pretraining on the referring image segmentation and referring expression comprehension tasks:
+```bash
+cd run_scripts/finetune
+bash train_polyformer_b.sh  # for finetuning PolyFormer-B model
+bash train_polyformer_l.sh  # for finetuning PolyFormer-L model
+```
+Please make sure to link the pretrain weight paths (Line 20) in the finetuning scripts to the best pretraining checkpoints. 
+
+## Evaluation
+Run the evaluation scripts for evaluating on the referring image segmentation and referring expression comprehension tasks:
+```bash
+cd run_scripts/evaluation
+
+# for evaluating PolyFormer-B model
+bash evaluate_polyformer_b_refcoco.sh 
+bash evaluate_polyformer_b_refcoco+.sh 
+bash evaluate_polyformer_b_refcocog.sh 
+
+# for evaluating PolyFormer-L model
+bash evaluate_polyformer_l_refcoco.sh 
+bash evaluate_polyformer_l_refcoco+.sh 
+bash evaluate_polyformer_l_refcocog.sh 
+```
+
+## Model Zoo
+Download the model weights to `./weights` if you want to use our trained models for finetuning and evaluation.
+
+|                                                                                                       | Refcoco val|           | | Refcoco testA|       |       | Refcoco testB| ||
+|-------------------------------------------------------------------------------------------------------|------|------|---------|------|-------|------|-----|------|------|
+| Model                                                                                                 | oIoU | mIoU | Prec@0.5 | oIoU | mIoU  |Prec@0.5 | oIoU | mIoU  |Prec@0.5  | 
+| [PolyFormer-B](https://drive.google.com/file/d/1K0y-WBO6cL7gBzNnJaHAeNu3pgq4DbJ9/view?usp=share_link) | 74.82| 75.96 | 89.73   |76.64| 77.09 | 91.73| 71.06| 73.22 | 86.03 | 
+| [PolyFormer-L](https://drive.google.com/file/d/15P6m5RI6HAQE2QXQXMAjw_oBsaPii7b3/view?usp=share_link) | 75.96| 76.94 | 90.38   |78.29| 78.49 | 92.89| 73.25| 74.83 |  87.16| 
+
+
+|                                  [test_demo.py](..%2F..%2FDownloads%2Ftest_demo.py)                                                                     | Refcoco val|           | | Refcoco testA|       |       | Refcoco testB| ||
+|--------------------------------------------------------------------------------------------------------|------|------|------|------|------|------|------|------|------|
+| Model                                                                                                  | oIoU | mIoU |Prec@0.5| oIoU | mIoU  |Prec@0.5 | oIoU | mIoU  |Prec@0.5  | 
+| [PolyFormer-B ](https://drive.google.com/file/d/12_ylFhsbqGySxDqgeEByn8nKoJtT2n2w/view?usp=share_link) |  67.64| 70.65 | 83.73 | 72.89| 74.51 | 88.60 | 59.33| 64.64 | 76.38 | 67.76| 69.36  | 
+| [PolyFormer-L](https://drive.google.com/file/d/1lUCv7dUPctEz4vEpPr7aI8A8ZmfYCB8y/view?usp=share_link)  |  69.33| 72.15 | 84.98 | 74.56| 75.71 | 89.77 | 61.87| 66.73 | 77.97 | 69.20| 71.15 | 
+
+
+|                                                                                                       |  Refcocog val| || | Refcocog test| | 
+|-------------------------------------------------------------------------------------------------------|------|------|------|------|------|------|
+| Model                                                                                                 |  oIoU | mIoU   |Prec@0.5 | oIoU | mIoU   |Prec@0.5 |
+| [PolyFormer-B](https://drive.google.com/file/d/12_ylFhsbqGySxDqgeEByn8nKoJtT2n2w/view?usp=share_link) |  67.76| 69.36  | 84.46| 69.05| 69.88 | 84.96 | 
+| [PolyFormer-L](https://drive.google.com/file/d/1lUCv7dUPctEz4vEpPr7aI8A8ZmfYCB8y/view?usp=share_link) |  69.20| 71.15 | 85.83 | 70.19| 71.17  | 85.91| 
+
+* Pretrained weights:
+  * [PolyFormer-B](https://drive.google.com/file/d/1sAzfChYDdHdaeatB2K14lrJjG4uiXAol/view?usp=share_link)
+  * [PolyFormer-L](https://drive.google.com/file/d/1knRxgM1lmEkuZZ-cOm_fmwKP1H0bJGU9/view?usp=share_link)
+
+# Acknowlegement
+This codebase is developed based on [OFA](https://github.com/OFA-Sys/OFA). 
+Other related codebases include:
+* [Fairseq](https://github.com/pytorch/fairseq)
+* [refer](https://github.com/lichengunc/refer)
+* [LAVT-RIS](https://github.com/yz93/LAVT-RIS/)
+* [SeqTR](https://github.com/sean-zhuh/SeqTR)
+
+
+
+# Citation
+Please cite our paper if you find this codebase helpful :)
+
+```
+@inproceedings{liu2023polyformer,
+  title={PolyFormer: Referring Image Segmentation as Sequential Polygon Generation},
+  author={Liu, Jiang and Ding, Hui and Cai, Zhaowei and Zhang, Yuting and Satzoda, Ravi Kumar and Mahadevan, Vijay and Manmatha, R},
+  booktitle={CVPR},
+  year={2023}
+}
+```
+
+## Security
+
+See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
+
+## License
+
+This project is licensed under the Apache-2.0 License.
+
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1bcc5f66ad394a4546b06753b64886f1885a3ee
--- /dev/null
+++ b/app.py
@@ -0,0 +1,47 @@
+import os
+import torch
+import numpy as np
+from fairseq import utils,tasks
+from utils.checkpoint_utils import load_model_ensemble_and_task
+from models.polyformer import PolyFormerModel
+import cv2
+
+import torch
+import numpy as np
+from fairseq import utils, tasks
+from fairseq import checkpoint_utils
+from utils.eval_utils import eval_step
+from tasks.refcoco import RefcocoTask
+from models.polyformer import PolyFormerModel
+from PIL import Image
+from torchvision import transforms
+import cv2
+import gradio as gr
+import math
+from io import BytesIO
+import base64
+import re
+from demo import visual_grounding
+
+title = "PolyFormer-Visual_Grounding"
+description = "Gradio Demo for PolyFormer-Visual_Grounding. Upload your own image or click any one of the examples, " \
+              "and write a description about a certain object.  " \
+              "Then click \"Submit\" and wait for the result of grounding. For help or to provide feedback, please contact: Hui Ding (@huidin)"
+article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2302.07387.pdf' target='_blank'>PolyFormer CVPR2023" \
+          "</a></p> "
+# examples = [['A bear astronaut in the space.jpeg', 'a bear astronaut in the space'],
+#             ['A unicorn doing computer vision research.jpeg', 'a unicorn doing computer vision research'],
+#             ['pig.jpeg', 'a pig robot preparing a delicious meal'],
+#             ['otta.png', 'a gentleman otter in a 19th century portrait'],
+#             ['pikachu.jpeg', 'a pikachu fine-dining  with  a view  to  the  Eiffel Tower'],
+#             ['A small cabin on top of a snowy mountain in the style of Disney artstation.jpeg', 'a small cabin on top of a snowy mountain in the style of Disney artstation'],
+#
+#             ]
+examples = []
+io = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"],
+                  outputs=[gr.outputs.Image(label="output", type='numpy'), gr.outputs.Image(label="predicted mask", type='numpy')],
+                  title=title, description=description, article=article, examples=examples,
+                  allow_flagging=False, allow_screenshot=False)
+# io.launch(cache_examples=True)
+io.launch(share=True)
+
diff --git a/bert/activations.py b/bert/activations.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a1206ee285ce3f0484d129711a2d684700a20a1
--- /dev/null
+++ b/bert/activations.py
@@ -0,0 +1,56 @@
+import logging
+import math
+
+import torch
+import torch.nn.functional as F
+
+
+logger = logging.getLogger(__name__)
+
+
+def swish(x):
+    return x * torch.sigmoid(x)
+
+
+def _gelu_python(x):
+    """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
+        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
+        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+        This is now written in C in torch.nn.functional
+        Also see https://arxiv.org/abs/1606.08415
+    """
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+
+
+def gelu_new(x):
+    """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
+        Also see https://arxiv.org/abs/1606.08415
+    """
+    return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))
+
+
+if torch.__version__ < "1.4.0":
+    gelu = _gelu_python
+else:
+    gelu = F.gelu
+
+
+def gelu_fast(x):
+    return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x)))
+
+
+ACT2FN = {
+    "relu": F.relu,
+    "swish": swish,
+    "gelu": gelu,
+    "tanh": torch.tanh,
+    "gelu_new": gelu_new,
+    "gelu_fast": gelu_fast,
+}
+
+
+def get_activation(activation_string):
+    if activation_string in ACT2FN:
+        return ACT2FN[activation_string]
+    else:
+        raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
diff --git a/bert/configuration_bert.py b/bert/configuration_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e815837bc4dbc5fc8eec7ee37547b5d41519af5
--- /dev/null
+++ b/bert/configuration_bert.py
@@ -0,0 +1,143 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" BERT model configuration """
+
+
+import logging
+
+from .configuration_utils import PretrainedConfig
+
+
+logger = logging.getLogger(__name__)
+
+BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json",
+    "bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json",
+    "bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json",
+    "bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-config.json",
+    "bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-config.json",
+    "bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-config.json",
+    "bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json",
+    "bert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json",
+    "bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json",
+    "bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json",
+    "bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json",
+    "bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json",
+    "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json",
+    "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-config.json",
+    "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-config.json",
+    "cl-tohoku/bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese/config.json",
+    "cl-tohoku/bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking/config.json",
+    "cl-tohoku/bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char/config.json",
+    "cl-tohoku/bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking/config.json",
+    "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json",
+    "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json",
+    "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/config.json",
+    # See all BERT models at https://huggingface.co/models?filter=bert
+}
+
+
+class BertConfig(PretrainedConfig):
+    r"""
+        This is the configuration class to store the configuration of a :class:`~transformers.BertModel`.
+        It is used to instantiate an BERT model according to the specified arguments, defining the model
+        architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
+        the BERT `bert-base-uncased <https://huggingface.co/bert-base-uncased>`__ architecture.
+
+        Configuration objects inherit from  :class:`~transformers.PretrainedConfig` and can be used
+        to control the model outputs. Read the documentation from  :class:`~transformers.PretrainedConfig`
+        for more information.
+
+
+        Args:
+            vocab_size (:obj:`int`, optional, defaults to 30522):
+                Vocabulary size of the BERT model. Defines the different tokens that
+                can be represented by the `inputs_ids` passed to the forward method of :class:`~transformers.BertModel`.
+            hidden_size (:obj:`int`, optional, defaults to 768):
+                Dimensionality of the encoder layers and the pooler layer.
+            num_hidden_layers (:obj:`int`, optional, defaults to 12):
+                Number of hidden layers in the Transformer encoder.
+            num_attention_heads (:obj:`int`, optional, defaults to 12):
+                Number of attention heads for each attention layer in the Transformer encoder.
+            intermediate_size (:obj:`int`, optional, defaults to 3072):
+                Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+            hidden_act (:obj:`str` or :obj:`function`, optional, defaults to "gelu"):
+                The non-linear activation function (function or string) in the encoder and pooler.
+                If string, "gelu", "relu", "swish" and "gelu_new" are supported.
+            hidden_dropout_prob (:obj:`float`, optional, defaults to 0.1):
+                The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob (:obj:`float`, optional, defaults to 0.1):
+                The dropout ratio for the attention probabilities.
+            max_position_embeddings (:obj:`int`, optional, defaults to 512):
+                The maximum sequence length that this model might ever be used with.
+                Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
+            type_vocab_size (:obj:`int`, optional, defaults to 2):
+                The vocabulary size of the `token_type_ids` passed into :class:`~transformers.BertModel`.
+            initializer_range (:obj:`float`, optional, defaults to 0.02):
+                The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+            layer_norm_eps (:obj:`float`, optional, defaults to 1e-12):
+                The epsilon used by the layer normalization layers.
+            gradient_checkpointing (:obj:`bool`, optional, defaults to False):
+                If True, use gradient checkpointing to save memory at the expense of slower backward pass.
+
+        Example::
+
+            >>> from transformers import BertModel, BertConfig
+
+            >>> # Initializing a BERT bert-base-uncased style configuration
+            >>> configuration = BertConfig()
+
+            >>> # Initializing a model from the bert-base-uncased style configuration
+            >>> model = BertModel(configuration)
+
+            >>> # Accessing the model configuration
+            >>> configuration = model.config
+    """
+    model_type = "bert"
+
+    def __init__(
+        self,
+        vocab_size=30522,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
+        pad_token_id=0,
+        gradient_checkpointing=False,
+        **kwargs
+    ):
+        super().__init__(pad_token_id=pad_token_id, **kwargs)
+
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.initializer_range = initializer_range
+        self.layer_norm_eps = layer_norm_eps
+        self.gradient_checkpointing = gradient_checkpointing
diff --git a/bert/configuration_utils.py b/bert/configuration_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9929ee4c19dab9a88bb51e0281d220a8456c2fce
--- /dev/null
+++ b/bert/configuration_utils.py
@@ -0,0 +1,408 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Configuration base class and utilities."""
+
+
+import copy
+import json
+import logging
+import os
+from typing import Dict, Tuple
+
+from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url
+
+
+logger = logging.getLogger(__name__)
+
+
+class PretrainedConfig(object):
+    r""" Base class for all configuration classes.
+        Handles a few parameters common to all models' configurations as well as methods for loading/downloading/saving configurations.
+
+        Note:
+            A configuration file can be loaded and saved to disk. Loading the configuration file and using this file to initialize a model does **not** load the model weights.
+            It only affects the model's configuration.
+
+        Class attributes (overridden by derived classes):
+            - ``model_type``: a string that identifies the model type, that we serialize into the JSON file, and that we use to recreate the correct object in :class:`~transformers.AutoConfig`.
+
+        Args:
+            finetuning_task (:obj:`string` or :obj:`None`, `optional`, defaults to :obj:`None`):
+                Name of the task used to fine-tune the model. This can be used when converting from an original (TensorFlow or PyTorch) checkpoint.
+            num_labels (:obj:`int`, `optional`, defaults to `2`):
+                Number of classes to use when the model is a classification model (sequences/tokens)
+            output_hidden_states (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Should the model returns all hidden-states.
+            output_attentions (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Should the model returns all attentions.
+            torchscript (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Is the model used with Torchscript (for PyTorch models).
+    """
+    model_type: str = ""
+
+    def __init__(self, **kwargs):
+        # Attributes with defaults
+        self.output_hidden_states = kwargs.pop("output_hidden_states", False)
+        self.output_attentions = kwargs.pop("output_attentions", False)
+        self.use_cache = kwargs.pop("use_cache", True)  # Not used by all models
+        self.torchscript = kwargs.pop("torchscript", False)  # Only used by PyTorch models
+        self.use_bfloat16 = kwargs.pop("use_bfloat16", False)
+        self.pruned_heads = kwargs.pop("pruned_heads", {})
+
+        # Is decoder is used in encoder-decoder models to differentiate encoder from decoder
+        self.is_encoder_decoder = kwargs.pop("is_encoder_decoder", False)
+        self.is_decoder = kwargs.pop("is_decoder", False)
+
+        # Parameters for sequence generation
+        self.max_length = kwargs.pop("max_length", 20)
+        self.min_length = kwargs.pop("min_length", 0)
+        self.do_sample = kwargs.pop("do_sample", False)
+        self.early_stopping = kwargs.pop("early_stopping", False)
+        self.num_beams = kwargs.pop("num_beams", 1)
+        self.temperature = kwargs.pop("temperature", 1.0)
+        self.top_k = kwargs.pop("top_k", 50)
+        self.top_p = kwargs.pop("top_p", 1.0)
+        self.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
+        self.length_penalty = kwargs.pop("length_penalty", 1.0)
+        self.no_repeat_ngram_size = kwargs.pop("no_repeat_ngram_size", 0)
+        self.bad_words_ids = kwargs.pop("bad_words_ids", None)
+        self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
+
+        # Fine-tuning task arguments
+        self.architectures = kwargs.pop("architectures", None)
+        self.finetuning_task = kwargs.pop("finetuning_task", None)
+        self.id2label = kwargs.pop("id2label", None)
+        self.label2id = kwargs.pop("label2id", None)
+        if self.id2label is not None:
+            kwargs.pop("num_labels", None)
+            self.id2label = dict((int(key), value) for key, value in self.id2label.items())
+            # Keys are always strings in JSON so convert ids to int here.
+        else:
+            self.num_labels = kwargs.pop("num_labels", 2)
+
+        # Tokenizer arguments TODO: eventually tokenizer and models should share the same config
+        self.prefix = kwargs.pop("prefix", None)
+        self.bos_token_id = kwargs.pop("bos_token_id", None)
+        self.pad_token_id = kwargs.pop("pad_token_id", None)
+        self.eos_token_id = kwargs.pop("eos_token_id", None)
+        self.decoder_start_token_id = kwargs.pop("decoder_start_token_id", None)
+
+        # task specific arguments
+        self.task_specific_params = kwargs.pop("task_specific_params", None)
+
+        # TPU arguments
+        self.xla_device = kwargs.pop("xla_device", None)
+
+        # Additional attributes without default values
+        for key, value in kwargs.items():
+            try:
+                setattr(self, key, value)
+            except AttributeError as err:
+                logger.error("Can't set {} with value {} for {}".format(key, value, self))
+                raise err
+
+    @property
+    def num_labels(self):
+        return len(self.id2label)
+
+    @num_labels.setter
+    def num_labels(self, num_labels):
+        self.id2label = {i: "LABEL_{}".format(i) for i in range(num_labels)}
+        self.label2id = dict(zip(self.id2label.values(), self.id2label.keys()))
+
+    def save_pretrained(self, save_directory):
+        """
+        Save a configuration object to the directory `save_directory`, so that it
+        can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
+
+        Args:
+            save_directory (:obj:`string`):
+                Directory where the configuration JSON file will be saved.
+        """
+        if os.path.isfile(save_directory):
+            raise AssertionError("Provided path ({}) should be a directory, not a file".format(save_directory))
+        os.makedirs(save_directory, exist_ok=True)
+        # If we save using the predefined names, we can load using `from_pretrained`
+        output_config_file = os.path.join(save_directory, CONFIG_NAME)
+
+        self.to_json_file(output_config_file, use_diff=True)
+        logger.info("Configuration saved in {}".format(output_config_file))
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs) -> "PretrainedConfig":
+        r"""
+
+        Instantiate a :class:`~transformers.PretrainedConfig` (or a derived class) from a pre-trained model configuration.
+
+        Args:
+            pretrained_model_name_or_path (:obj:`string`):
+                either:
+                  - a string with the `shortcut name` of a pre-trained model configuration to load from cache or
+                    download, e.g.: ``bert-base-uncased``.
+                  - a string with the `identifier name` of a pre-trained model configuration that was user-uploaded to
+                    our S3, e.g.: ``dbmdz/bert-base-german-cased``.
+                  - a path to a `directory` containing a configuration file saved using the
+                    :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
+                  - a path or url to a saved configuration JSON `file`, e.g.:
+                    ``./my_model_directory/configuration.json``.
+            cache_dir (:obj:`string`, `optional`):
+                Path to a directory in which a downloaded pre-trained model
+                configuration should be cached if the standard cache should not be used.
+            kwargs (:obj:`Dict[str, any]`, `optional`):
+                The values in kwargs of any keys which are configuration attributes will be used to override the loaded
+                values. Behavior concerning key/value pairs whose keys are *not* configuration attributes is
+                controlled by the `return_unused_kwargs` keyword parameter.
+            force_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Force to (re-)download the model weights and configuration files and override the cached versions if they exist.
+            resume_download (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
+            proxies (:obj:`Dict`, `optional`):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.:
+                :obj:`{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.`
+                The proxies are used on each request.
+            return_unused_kwargs: (`optional`) bool:
+                If False, then this function returns just the final configuration object.
+                If True, then this functions returns a :obj:`Tuple(config, unused_kwargs)` where `unused_kwargs` is a
+                dictionary consisting of the key/value pairs whose keys are not configuration attributes: ie the part
+                of kwargs which has not been used to update `config` and is otherwise ignored.
+
+        Returns:
+            :class:`PretrainedConfig`: An instance of a configuration object
+
+        Examples::
+
+            # We can't instantiate directly the base class `PretrainedConfig` so let's show the examples on a
+            # derived class: BertConfig
+            config = BertConfig.from_pretrained('bert-base-uncased')    # Download configuration from S3 and cache.
+            config = BertConfig.from_pretrained('./test/saved_model/')  # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
+            config = BertConfig.from_pretrained('./test/saved_model/my_configuration.json')
+            config = BertConfig.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
+            assert config.output_attention == True
+            config, unused_kwargs = BertConfig.from_pretrained('bert-base-uncased', output_attention=True,
+                                                               foo=False, return_unused_kwargs=True)
+            assert config.output_attention == True
+            assert unused_kwargs == {'foo': False}
+
+        """
+        config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
+        return cls.from_dict(config_dict, **kwargs)
+
+    @classmethod
+    def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict, Dict]:
+        """
+        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used
+        for instantiating a Config using `from_dict`.
+
+        Parameters:
+            pretrained_model_name_or_path (:obj:`string`):
+                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
+
+        Returns:
+            :obj:`Tuple[Dict, Dict]`: The dictionary that will be used to instantiate the configuration object.
+
+        """
+        cache_dir = kwargs.pop("cache_dir", None)
+        force_download = kwargs.pop("force_download", False)
+        resume_download = kwargs.pop("resume_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", False)
+
+        if os.path.isdir(pretrained_model_name_or_path):
+            config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
+        elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
+            config_file = pretrained_model_name_or_path
+        else:
+            config_file = hf_bucket_url(pretrained_model_name_or_path, filename=CONFIG_NAME, use_cdn=False)
+
+        try:
+            # Load from URL or cache if already cached
+            resolved_config_file = cached_path(
+                config_file,
+                cache_dir=cache_dir,
+                force_download=force_download,
+                proxies=proxies,
+                resume_download=resume_download,
+                local_files_only=local_files_only,
+            )
+            # Load config dict
+            if resolved_config_file is None:
+                raise EnvironmentError
+            config_dict = cls._dict_from_json_file(resolved_config_file)
+
+        except EnvironmentError:
+            msg = (
+                f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
+                f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
+                f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
+            )
+            raise EnvironmentError(msg)
+
+        except json.JSONDecodeError:
+            msg = (
+                "Couldn't reach server at '{}' to download configuration file or "
+                "configuration file is not a valid JSON file. "
+                "Please check network or file content here: {}.".format(config_file, resolved_config_file)
+            )
+            raise EnvironmentError(msg)
+
+        if resolved_config_file == config_file:
+            logger.info("loading configuration file {}".format(config_file))
+        else:
+            logger.info("loading configuration file {} from cache at {}".format(config_file, resolved_config_file))
+
+        return config_dict, kwargs
+
+    @classmethod
+    def from_dict(cls, config_dict: Dict, **kwargs) -> "PretrainedConfig":
+        """
+        Constructs a `Config` from a Python dictionary of parameters.
+
+        Args:
+            config_dict (:obj:`Dict[str, any]`):
+                Dictionary that will be used to instantiate the configuration object. Such a dictionary can be retrieved
+                from a pre-trained checkpoint by leveraging the :func:`~transformers.PretrainedConfig.get_config_dict`
+                method.
+            kwargs (:obj:`Dict[str, any]`):
+                Additional parameters from which to initialize the configuration object.
+
+        Returns:
+            :class:`PretrainedConfig`: An instance of a configuration object
+        """
+        return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)
+
+        config = cls(**config_dict)
+
+        if hasattr(config, "pruned_heads"):
+            config.pruned_heads = dict((int(key), value) for key, value in config.pruned_heads.items())
+
+        # Update config with kwargs if needed
+        to_remove = []
+        for key, value in kwargs.items():
+            if hasattr(config, key):
+                setattr(config, key, value)
+                to_remove.append(key)
+        for key in to_remove:
+            kwargs.pop(key, None)
+
+        logger.info("Model config %s", str(config))
+        if return_unused_kwargs:
+            return config, kwargs
+        else:
+            return config
+
+    @classmethod
+    def from_json_file(cls, json_file: str) -> "PretrainedConfig":
+        """
+        Constructs a `Config` from the path to a json file of parameters.
+
+        Args:
+            json_file (:obj:`string`):
+                Path to the JSON file containing the parameters.
+
+        Returns:
+            :class:`PretrainedConfig`: An instance of a configuration object
+
+        """
+        config_dict = cls._dict_from_json_file(json_file)
+        return cls(**config_dict)
+
+    @classmethod
+    def _dict_from_json_file(cls, json_file: str):
+        with open(json_file, "r", encoding="utf-8") as reader:
+            text = reader.read()
+        return json.loads(text)
+
+    def __eq__(self, other):
+        return self.__dict__ == other.__dict__
+
+    def __repr__(self):
+        return "{} {}".format(self.__class__.__name__, self.to_json_string())
+
+    def to_diff_dict(self):
+        """
+        Removes all attributes from config which correspond to the default
+        config attributes for better readability and serializes to a Python
+        dictionary.
+
+        Returns:
+            :obj:`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
+        """
+        config_dict = self.to_dict()
+
+        # get the default config dict
+        default_config_dict = PretrainedConfig().to_dict()
+
+        serializable_config_dict = {}
+
+        # only serialize values that differ from the default config
+        for key, value in config_dict.items():
+            if key not in default_config_dict or value != default_config_dict[key]:
+                serializable_config_dict[key] = value
+
+        return serializable_config_dict
+
+    def to_dict(self):
+        """
+        Serializes this instance to a Python dictionary.
+
+        Returns:
+            :obj:`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
+        """
+        output = copy.deepcopy(self.__dict__)
+        if hasattr(self.__class__, "model_type"):
+            output["model_type"] = self.__class__.model_type
+        return output
+
+    def to_json_string(self, use_diff=True):
+        """
+        Serializes this instance to a JSON string.
+
+        Args:
+            use_diff (:obj:`bool`):
+                If set to True, only the difference between the config instance and the default PretrainedConfig() is serialized to JSON string.
+
+        Returns:
+            :obj:`string`: String containing all the attributes that make up this configuration instance in JSON format.
+        """
+        if use_diff is True:
+            config_dict = self.to_diff_dict()
+        else:
+            config_dict = self.to_dict()
+        return json.dumps(config_dict, indent=2, sort_keys=True) + "\n"
+
+    def to_json_file(self, json_file_path, use_diff=True):
+        """
+        Save this instance to a json file.
+
+        Args:
+            json_file_path (:obj:`string`):
+                Path to the JSON file in which this configuration instance's parameters will be saved.
+            use_diff (:obj:`bool`):
+                If set to True, only the difference between the config instance and the default PretrainedConfig() is serialized to JSON file.
+        """
+        with open(json_file_path, "w", encoding="utf-8") as writer:
+            writer.write(self.to_json_string(use_diff=use_diff))
+
+    def update(self, config_dict: Dict):
+        """
+        Updates attributes of this class
+        with attributes from `config_dict`.
+
+        Args:
+            :obj:`Dict[str, any]`: Dictionary of attributes that shall be updated for this class.
+        """
+        for key, value in config_dict.items():
+            setattr(self, key, value)
diff --git a/bert/file_utils.py b/bert/file_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..81b76b7fefd186d540fda1014dd69724049a4483
--- /dev/null
+++ b/bert/file_utils.py
@@ -0,0 +1,808 @@
+"""
+Utilities for working with the local dataset cache.
+This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
+Copyright by the AllenNLP authors.
+"""
+
+import fnmatch
+import json
+import logging
+import os
+import shutil
+import sys
+import tarfile
+import tempfile
+from contextlib import contextmanager
+from functools import partial, wraps
+from hashlib import sha256
+from pathlib import Path
+from typing import Dict, Optional, Union
+from urllib.parse import urlparse
+from zipfile import ZipFile, is_zipfile
+
+import requests
+from filelock import FileLock
+from tqdm.auto import tqdm
+
+#from . import __version__
+__version__ = "3.0.2"
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+try:
+    USE_TF = os.environ.get("USE_TF", "AUTO").upper()
+    USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper()
+    if USE_TORCH in ("1", "ON", "YES", "AUTO") and USE_TF not in ("1", "ON", "YES"):
+        import torch
+
+        _torch_available = True  # pylint: disable=invalid-name
+        logger.info("PyTorch version {} available.".format(torch.__version__))
+    else:
+        logger.info("Disabling PyTorch because USE_TF is set")
+        _torch_available = False
+except ImportError:
+    _torch_available = False  # pylint: disable=invalid-name
+
+try:
+    USE_TF = os.environ.get("USE_TF", "AUTO").upper()
+    USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper()
+
+    if USE_TF in ("1", "ON", "YES", "AUTO") and USE_TORCH not in ("1", "ON", "YES"):
+        import tensorflow as tf
+
+        assert hasattr(tf, "__version__") and int(tf.__version__[0]) >= 2
+        _tf_available = True  # pylint: disable=invalid-name
+        logger.info("TensorFlow version {} available.".format(tf.__version__))
+    else:
+        logger.info("Disabling Tensorflow because USE_TORCH is set")
+        _tf_available = False
+except (ImportError, AssertionError):
+    _tf_available = False  # pylint: disable=invalid-name
+
+
+try:
+    from torch.hub import _get_torch_home
+
+    torch_cache_home = _get_torch_home()
+except ImportError:
+    torch_cache_home = os.path.expanduser(
+        os.getenv("TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "torch"))
+    )
+
+
+try:
+    import torch_xla.core.xla_model as xm  # noqa: F401
+
+    if _torch_available:
+        _torch_tpu_available = True  # pylint: disable=
+    else:
+        _torch_tpu_available = False
+except ImportError:
+    _torch_tpu_available = False
+
+
+try:
+    import psutil  # noqa: F401
+
+    _psutil_available = True
+
+except ImportError:
+    _psutil_available = False
+
+
+try:
+    import py3nvml  # noqa: F401
+
+    _py3nvml_available = True
+
+except ImportError:
+    _py3nvml_available = False
+
+
+try:
+    from apex import amp  # noqa: F401
+
+    _has_apex = True
+except ImportError:
+    _has_apex = False
+
+default_cache_path = os.path.join(torch_cache_home, "transformers")
+
+
+PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path)
+PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
+TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE)
+
+WEIGHTS_NAME = "pytorch_model.bin"
+TF2_WEIGHTS_NAME = "tf_model.h5"
+TF_WEIGHTS_NAME = "model.ckpt"
+CONFIG_NAME = "config.json"
+MODEL_CARD_NAME = "modelcard.json"
+
+
+MULTIPLE_CHOICE_DUMMY_INPUTS = [[[0], [1]], [[0], [1]]]
+DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]
+DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]
+
+S3_BUCKET_PREFIX = "https://s3.amazonaws.com/models.huggingface.co/bert"
+CLOUDFRONT_DISTRIB_PREFIX = "https://cdn.huggingface.co"
+
+
+def is_torch_available():
+    return _torch_available
+
+
+def is_tf_available():
+    return _tf_available
+
+
+def is_torch_tpu_available():
+    return _torch_tpu_available
+
+
+def is_psutil_available():
+    return _psutil_available
+
+
+def is_py3nvml_available():
+    return _py3nvml_available
+
+
+def is_apex_available():
+    return _has_apex
+
+
+def add_start_docstrings(*docstr):
+    def docstring_decorator(fn):
+        fn.__doc__ = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "")
+        return fn
+
+    return docstring_decorator
+
+
+def add_start_docstrings_to_callable(*docstr):
+    def docstring_decorator(fn):
+        class_name = ":class:`~transformers.{}`".format(fn.__qualname__.split(".")[0])
+        intro = "   The {} forward method, overrides the :func:`__call__` special method.".format(class_name)
+        note = r"""
+
+    .. note::
+        Although the recipe for forward pass needs to be defined within
+        this function, one should call the :class:`Module` instance afterwards
+        instead of this since the former takes care of running the
+        pre and post processing steps while the latter silently ignores them.
+        """
+        fn.__doc__ = intro + note + "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "")
+        return fn
+
+    return docstring_decorator
+
+
+def add_end_docstrings(*docstr):
+    def docstring_decorator(fn):
+        fn.__doc__ = fn.__doc__ + "".join(docstr)
+        return fn
+
+    return docstring_decorator
+
+
+PT_TOKEN_CLASSIFICATION_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import torch
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> labels = torch.tensor([1] * inputs["input_ids"].size(1)).unsqueeze(0)  # Batch size 1
+
+        >>> outputs = model(**inputs, labels=labels)
+        >>> loss, scores = outputs[:2]
+"""
+
+PT_QUESTION_ANSWERING_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import torch
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> start_positions = torch.tensor([1])
+        >>> end_positions = torch.tensor([3])
+
+        >>> outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
+        >>> loss, start_scores, end_scores = outputs[:3]
+"""
+
+PT_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import torch
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+        >>> outputs = model(**inputs, labels=labels)
+        >>> loss, logits = outputs[:2]
+"""
+
+PT_MASKED_LM_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import torch
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
+
+        >>> outputs = model(input_ids, labels=input_ids)
+        >>> loss, prediction_scores = outputs[:2]
+"""
+
+PT_BASE_MODEL_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import torch
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> outputs = model(**inputs)
+
+        >>> last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
+"""
+
+PT_MULTIPLE_CHOICE_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import torch
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
+        >>> choice0 = "It is eaten with a fork and a knife."
+        >>> choice1 = "It is eaten while held in the hand."
+        >>> labels = torch.tensor(0).unsqueeze(0)  # choice0 is correct (according to Wikipedia ;)), batch size 1
+
+        >>> encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', padding=True)
+        >>> outputs = model(**{{k: v.unsqueeze(0) for k,v in encoding.items()}}, labels=labels)  # batch size is 1
+
+        >>> # the linear classifier still needs to be trained
+        >>> loss, logits = outputs[:2]
+"""
+
+PT_CAUSAL_LM_SAMPLE = r"""
+    Example::
+
+        >>> import torch
+        >>> from transformers import {tokenizer_class}, {model_class}
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> outputs = model(**inputs, labels=inputs["input_ids"])
+        >>> loss, logits = outputs[:2]
+"""
+
+TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
+        >>> input_ids = inputs["input_ids"]
+        >>> inputs["labels"] = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
+
+        >>> outputs = model(inputs)
+        >>> loss, scores = outputs[:2]
+"""
+
+TF_QUESTION_ANSWERING_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
+        >>> input_dict = tokenizer(question, text, return_tensors='tf')
+        >>> start_scores, end_scores = model(input_dict)
+
+        >>> all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
+        >>> answer = ' '.join(all_tokens[tf.math.argmax(start_scores, 1)[0] : tf.math.argmax(end_scores, 1)[0]+1])
+"""
+
+TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
+        >>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
+
+        >>> outputs = model(inputs)
+        >>> loss, logits = outputs[:2]
+"""
+
+TF_MASKED_LM_SAMPLE = r"""
+    Example::
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :]  # Batch size 1
+
+        >>> outputs = model(input_ids)
+        >>> prediction_scores = outputs[0]
+"""
+
+TF_BASE_MODEL_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
+        >>> outputs = model(inputs)
+
+        >>> last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
+"""
+
+TF_MULTIPLE_CHOICE_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
+        >>> choice0 = "It is eaten with a fork and a knife."
+        >>> choice1 = "It is eaten while held in the hand."
+
+        >>> encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='tf', padding=True)
+        >>> inputs = {{k: tf.expand_dims(v, 0) for k, v in encoding.items()}}
+        >>> outputs = model(inputs)  # batch size is 1
+
+        >>> # the linear classifier still needs to be trained
+        >>> logits = outputs[0]
+"""
+
+TF_CAUSAL_LM_SAMPLE = r"""
+    Example::
+
+        >>> from transformers import {tokenizer_class}, {model_class}
+        >>> import tensorflow as tf
+
+        >>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
+        >>> model = {model_class}.from_pretrained('{checkpoint}')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
+        >>> outputs = model(inputs)
+        >>> logits = outputs[0]
+"""
+
+
+def add_code_sample_docstrings(*docstr, tokenizer_class=None, checkpoint=None):
+    def docstring_decorator(fn):
+        model_class = fn.__qualname__.split(".")[0]
+        is_tf_class = model_class[:2] == "TF"
+
+        if "SequenceClassification" in model_class:
+            code_sample = TF_SEQUENCE_CLASSIFICATION_SAMPLE if is_tf_class else PT_SEQUENCE_CLASSIFICATION_SAMPLE
+        elif "QuestionAnswering" in model_class:
+            code_sample = TF_QUESTION_ANSWERING_SAMPLE if is_tf_class else PT_QUESTION_ANSWERING_SAMPLE
+        elif "TokenClassification" in model_class:
+            code_sample = TF_TOKEN_CLASSIFICATION_SAMPLE if is_tf_class else PT_TOKEN_CLASSIFICATION_SAMPLE
+        elif "MultipleChoice" in model_class:
+            code_sample = TF_MULTIPLE_CHOICE_SAMPLE if is_tf_class else PT_MULTIPLE_CHOICE_SAMPLE
+        elif "MaskedLM" in model_class:
+            code_sample = TF_MASKED_LM_SAMPLE if is_tf_class else PT_MASKED_LM_SAMPLE
+        elif "LMHead" in model_class:
+            code_sample = TF_CAUSAL_LM_SAMPLE if is_tf_class else PT_CAUSAL_LM_SAMPLE
+        elif "Model" in model_class:
+            code_sample = TF_BASE_MODEL_SAMPLE if is_tf_class else PT_BASE_MODEL_SAMPLE
+        else:
+            raise ValueError(f"Docstring can't be built for model {model_class}")
+
+        built_doc = code_sample.format(model_class=model_class, tokenizer_class=tokenizer_class, checkpoint=checkpoint)
+        fn.__doc__ = (fn.__doc__ or "") + "".join(docstr) + built_doc
+        return fn
+
+    return docstring_decorator
+
+
+def is_remote_url(url_or_filename):
+    parsed = urlparse(url_or_filename)
+    return parsed.scheme in ("http", "https")
+
+
+def hf_bucket_url(model_id: str, filename: str, use_cdn=True) -> str:
+    """
+    Resolve a model identifier, and a file name, to a HF-hosted url
+    on either S3 or Cloudfront (a Content Delivery Network, or CDN).
+
+    Cloudfront is replicated over the globe so downloads are way faster
+    for the end user (and it also lowers our bandwidth costs). However, it
+    is more aggressively cached by default, so may not always reflect the
+    latest changes to the underlying file (default TTL is 24 hours).
+
+    In terms of client-side caching from this library, even though
+    Cloudfront relays the ETags from S3, using one or the other
+    (or switching from one to the other) will affect caching: cached files
+    are not shared between the two because the cached file's name contains
+    a hash of the url.
+    """
+    endpoint = CLOUDFRONT_DISTRIB_PREFIX if use_cdn else S3_BUCKET_PREFIX
+    legacy_format = "/" not in model_id
+    if legacy_format:
+        return f"{endpoint}/{model_id}-{filename}"
+    else:
+        return f"{endpoint}/{model_id}/{filename}"
+
+
+def url_to_filename(url, etag=None):
+    """
+    Convert `url` into a hashed filename in a repeatable way.
+    If `etag` is specified, append its hash to the url's, delimited
+    by a period.
+    If the url ends with .h5 (Keras HDF5 weights) adds '.h5' to the name
+    so that TF 2.0 can identify it as a HDF5 file
+    (see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
+    """
+    url_bytes = url.encode("utf-8")
+    url_hash = sha256(url_bytes)
+    filename = url_hash.hexdigest()
+
+    if etag:
+        etag_bytes = etag.encode("utf-8")
+        etag_hash = sha256(etag_bytes)
+        filename += "." + etag_hash.hexdigest()
+
+    if url.endswith(".h5"):
+        filename += ".h5"
+
+    return filename
+
+
+def filename_to_url(filename, cache_dir=None):
+    """
+    Return the url and etag (which may be ``None``) stored for `filename`.
+    Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
+    """
+    if cache_dir is None:
+        cache_dir = TRANSFORMERS_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    cache_path = os.path.join(cache_dir, filename)
+    if not os.path.exists(cache_path):
+        raise EnvironmentError("file {} not found".format(cache_path))
+
+    meta_path = cache_path + ".json"
+    if not os.path.exists(meta_path):
+        raise EnvironmentError("file {} not found".format(meta_path))
+
+    with open(meta_path, encoding="utf-8") as meta_file:
+        metadata = json.load(meta_file)
+    url = metadata["url"]
+    etag = metadata["etag"]
+
+    return url, etag
+
+
+def cached_path(
+    url_or_filename,
+    cache_dir=None,
+    force_download=False,
+    proxies=None,
+    resume_download=False,
+    user_agent: Union[Dict, str, None] = None,
+    extract_compressed_file=False,
+    force_extract=False,
+    local_files_only=False,
+) -> Optional[str]:
+    """
+    Given something that might be a URL (or might be a local path),
+    determine which. If it's a URL, download the file and cache it, and
+    return the path to the cached file. If it's already a local path,
+    make sure the file exists and then return the path.
+    Args:
+        cache_dir: specify a cache directory to save the file to (overwrite the default cache dir).
+        force_download: if True, re-dowload the file even if it's already cached in the cache dir.
+        resume_download: if True, resume the download if incompletly recieved file is found.
+        user_agent: Optional string or dict that will be appended to the user-agent on remote requests.
+        extract_compressed_file: if True and the path point to a zip or tar file, extract the compressed
+            file in a folder along the archive.
+        force_extract: if True when extract_compressed_file is True and the archive was already extracted,
+            re-extract the archive and overide the folder where it was extracted.
+
+    Return:
+        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk).
+        Local path (string) otherwise
+    """
+    if cache_dir is None:
+        cache_dir = TRANSFORMERS_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    if is_remote_url(url_or_filename):
+        # URL, so get it from the cache (downloading if necessary)
+        output_path = get_from_cache(
+            url_or_filename,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            resume_download=resume_download,
+            user_agent=user_agent,
+            local_files_only=local_files_only,
+        )
+    elif os.path.exists(url_or_filename):
+        # File, and it exists.
+        output_path = url_or_filename
+    elif urlparse(url_or_filename).scheme == "":
+        # File, but it doesn't exist.
+        raise EnvironmentError("file {} not found".format(url_or_filename))
+    else:
+        # Something unknown
+        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))
+
+    if extract_compressed_file:
+        if not is_zipfile(output_path) and not tarfile.is_tarfile(output_path):
+            return output_path
+
+        # Path where we extract compressed archives
+        # We avoid '.' in dir name and add "-extracted" at the end: "./model.zip" => "./model-zip-extracted/"
+        output_dir, output_file = os.path.split(output_path)
+        output_extract_dir_name = output_file.replace(".", "-") + "-extracted"
+        output_path_extracted = os.path.join(output_dir, output_extract_dir_name)
+
+        if os.path.isdir(output_path_extracted) and os.listdir(output_path_extracted) and not force_extract:
+            return output_path_extracted
+
+        # Prevent parallel extractions
+        lock_path = output_path + ".lock"
+        with FileLock(lock_path):
+            shutil.rmtree(output_path_extracted, ignore_errors=True)
+            os.makedirs(output_path_extracted)
+            if is_zipfile(output_path):
+                with ZipFile(output_path, "r") as zip_file:
+                    zip_file.extractall(output_path_extracted)
+                    zip_file.close()
+            elif tarfile.is_tarfile(output_path):
+                tar_file = tarfile.open(output_path)
+                tar_file.extractall(output_path_extracted)
+                tar_file.close()
+            else:
+                raise EnvironmentError("Archive format of {} could not be identified".format(output_path))
+
+        return output_path_extracted
+
+    return output_path
+
+
+def http_get(url, temp_file, proxies=None, resume_size=0, user_agent: Union[Dict, str, None] = None):
+    ua = "transformers/{}; python/{}".format(__version__, sys.version.split()[0])
+    if is_torch_available():
+        ua += "; torch/{}".format(torch.__version__)
+    if is_tf_available():
+        ua += "; tensorflow/{}".format(tf.__version__)
+    if isinstance(user_agent, dict):
+        ua += "; " + "; ".join("{}/{}".format(k, v) for k, v in user_agent.items())
+    elif isinstance(user_agent, str):
+        ua += "; " + user_agent
+    headers = {"user-agent": ua}
+    if resume_size > 0:
+        headers["Range"] = "bytes=%d-" % (resume_size,)
+    response = requests.get(url, stream=True, proxies=proxies, headers=headers)
+    if response.status_code == 416:  # Range not satisfiable
+        return
+    content_length = response.headers.get("Content-Length")
+    total = resume_size + int(content_length) if content_length is not None else None
+    progress = tqdm(
+        unit="B",
+        unit_scale=True,
+        total=total,
+        initial=resume_size,
+        desc="Downloading",
+        disable=bool(logger.getEffectiveLevel() == logging.NOTSET),
+    )
+    for chunk in response.iter_content(chunk_size=1024):
+        if chunk:  # filter out keep-alive new chunks
+            progress.update(len(chunk))
+            temp_file.write(chunk)
+    progress.close()
+
+
+def get_from_cache(
+    url,
+    cache_dir=None,
+    force_download=False,
+    proxies=None,
+    etag_timeout=10,
+    resume_download=False,
+    user_agent: Union[Dict, str, None] = None,
+    local_files_only=False,
+) -> Optional[str]:
+    """
+    Given a URL, look for the corresponding file in the local cache.
+    If it's not there, download it. Then return the path to the cached file.
+
+    Return:
+        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk).
+        Local path (string) otherwise
+    """
+    if cache_dir is None:
+        cache_dir = TRANSFORMERS_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    os.makedirs(cache_dir, exist_ok=True)
+
+    etag = None
+    if not local_files_only:
+        try:
+            response = requests.head(url, allow_redirects=True, proxies=proxies, timeout=etag_timeout)
+            if response.status_code == 200:
+                etag = response.headers.get("ETag")
+        except (EnvironmentError, requests.exceptions.Timeout):
+            # etag is already None
+            pass
+
+    filename = url_to_filename(url, etag)
+
+    # get cache path to put the file
+    cache_path = os.path.join(cache_dir, filename)
+
+    # etag is None = we don't have a connection, or url doesn't exist, or is otherwise inaccessible.
+    # try to get the last downloaded one
+    if etag is None:
+        if os.path.exists(cache_path):
+            return cache_path
+        else:
+            matching_files = [
+                file
+                for file in fnmatch.filter(os.listdir(cache_dir), filename + ".*")
+                if not file.endswith(".json") and not file.endswith(".lock")
+            ]
+            if len(matching_files) > 0:
+                return os.path.join(cache_dir, matching_files[-1])
+            else:
+                # If files cannot be found and local_files_only=True,
+                # the models might've been found if local_files_only=False
+                # Notify the user about that
+                if local_files_only:
+                    raise ValueError(
+                        "Cannot find the requested files in the cached path and outgoing traffic has been"
+                        " disabled. To enable model look-ups and downloads online, set 'local_files_only'"
+                        " to False."
+                    )
+                return None
+
+    # From now on, etag is not None.
+    if os.path.exists(cache_path) and not force_download:
+        return cache_path
+
+    # Prevent parallel downloads of the same file with a lock.
+    lock_path = cache_path + ".lock"
+    with FileLock(lock_path):
+
+        # If the download just completed while the lock was activated.
+        if os.path.exists(cache_path) and not force_download:
+            # Even if returning early like here, the lock will be released.
+            return cache_path
+
+        if resume_download:
+            incomplete_path = cache_path + ".incomplete"
+
+            @contextmanager
+            def _resumable_file_manager():
+                with open(incomplete_path, "a+b") as f:
+                    yield f
+
+            temp_file_manager = _resumable_file_manager
+            if os.path.exists(incomplete_path):
+                resume_size = os.stat(incomplete_path).st_size
+            else:
+                resume_size = 0
+        else:
+            temp_file_manager = partial(tempfile.NamedTemporaryFile, dir=cache_dir, delete=False)
+            resume_size = 0
+
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with temp_file_manager() as temp_file:
+            logger.info("%s not found in cache or force_download set to True, downloading to %s", url, temp_file.name)
+
+            http_get(url, temp_file, proxies=proxies, resume_size=resume_size, user_agent=user_agent)
+
+        logger.info("storing %s in cache at %s", url, cache_path)
+        os.replace(temp_file.name, cache_path)
+
+        logger.info("creating metadata file for %s", cache_path)
+        meta = {"url": url, "etag": etag}
+        meta_path = cache_path + ".json"
+        with open(meta_path, "w") as meta_file:
+            json.dump(meta, meta_file)
+
+    return cache_path
+
+
+class cached_property(property):
+    """
+    Descriptor that mimics @property but caches output in member variable.
+
+    From tensorflow_datasets
+
+    Built-in in functools from Python 3.8.
+    """
+
+    def __get__(self, obj, objtype=None):
+        # See docs.python.org/3/howto/descriptor.html#properties
+        if obj is None:
+            return self
+        if self.fget is None:
+            raise AttributeError("unreadable attribute")
+        attr = "__cached_" + self.fget.__name__
+        cached = getattr(obj, attr, None)
+        if cached is None:
+            cached = self.fget(obj)
+            setattr(obj, attr, cached)
+        return cached
+
+
+def torch_required(func):
+    # Chose a different decorator name than in tests so it's clear they are not the same.
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        if is_torch_available():
+            return func(*args, **kwargs)
+        else:
+            raise ImportError(f"Method `{func.__name__}` requires PyTorch.")
+
+    return wrapper
+
+
+def tf_required(func):
+    # Chose a different decorator name than in tests so it's clear they are not the same.
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        if is_tf_available():
+            return func(*args, **kwargs)
+        else:
+            raise ImportError(f"Method `{func.__name__}` requires TF.")
+
+    return wrapper
diff --git a/bert/generation_utils.py b/bert/generation_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c49e15abf7bc822940112207f10e18e2e0230cc
--- /dev/null
+++ b/bert/generation_utils.py
@@ -0,0 +1,993 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import Iterable, Optional, Tuple
+
+import torch
+from torch import Tensor
+from torch.nn import functional as F
+
+
+logger = logging.getLogger(__name__)
+
+
+class GenerationMixin:
+    """
+    A class contraining all of the functions supporting generation, to be used as a mixin in PreTrainedModel.
+    """
+
+    def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        return {"input_ids": input_ids}
+
+    def adjust_logits_during_generation(self, logits, **kwargs):
+        return logits
+
+    def _use_cache(self, outputs, use_cache):
+        """During generation, decide whether to pass the `past` variable to the next forward pass."""
+        if len(outputs) <= 1 or use_cache is False:
+            return False
+        if hasattr(self.config, "mem_len") and self.config.mem_len == 0:
+            return False
+        return True
+
+    def enforce_repetition_penalty_(self, lprobs, batch_size, num_beams, prev_output_tokens, repetition_penalty):
+        """repetition penalty (from CTRL paper https://arxiv.org/abs/1909.05858). """
+        for i in range(batch_size * num_beams):
+            for previous_token in set(prev_output_tokens[i].tolist()):
+                # if score < 0 then repetition penalty has to multiplied to reduce the previous token probability
+                if lprobs[i, previous_token] < 0:
+                    lprobs[i, previous_token] *= repetition_penalty
+                else:
+                    lprobs[i, previous_token] /= repetition_penalty
+
+    def postprocess_next_token_scores(
+        self,
+        scores,
+        input_ids,
+        no_repeat_ngram_size,
+        bad_words_ids,
+        cur_len,
+        min_length,
+        max_length,
+        eos_token_id,
+        repetition_penalty,
+        batch_size,
+        num_beams,
+    ):
+        # repetition penalty (from CTRL paper https://arxiv.org/abs/1909.05858)
+        if repetition_penalty != 1.0:
+            self.enforce_repetition_penalty_(
+                scores, batch_size, num_beams, input_ids, repetition_penalty,
+            )
+
+        # set eos token prob to zero if min_length is not reached
+        if eos_token_id is not None and cur_len < min_length:
+            scores[:, eos_token_id] = -float("inf")
+
+        if no_repeat_ngram_size > 0:
+            # calculate a list of banned tokens to prevent repetitively generating the same ngrams
+            num_batch_hypotheses = batch_size * num_beams
+            # from fairseq: https://github.com/pytorch/fairseq/blob/a07cb6f40480928c9e0548b737aadd36ee66ac76/fairseq/sequence_generator.py#L345
+            banned_batch_tokens = calc_banned_ngram_tokens(
+                input_ids, num_batch_hypotheses, no_repeat_ngram_size, cur_len
+            )
+            for i, banned_tokens in enumerate(banned_batch_tokens):
+                scores[i, banned_tokens] = -float("inf")
+
+        if bad_words_ids is not None:
+            # calculate a list of banned tokens according to bad words
+            banned_tokens = calc_banned_bad_words_ids(input_ids, bad_words_ids)
+
+            for i, banned_tokens in enumerate(banned_tokens):
+                scores[i, banned_tokens] = -float("inf")
+
+        return scores
+
+    @torch.no_grad()
+    def generate(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        max_length: Optional[int] = None,
+        min_length: Optional[int] = None,
+        do_sample: Optional[bool] = None,
+        early_stopping: Optional[bool] = None,
+        num_beams: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        repetition_penalty: Optional[float] = None,
+        bad_words_ids: Optional[Iterable[int]] = None,
+        bos_token_id: Optional[int] = None,
+        pad_token_id: Optional[int] = None,
+        eos_token_id: Optional[int] = None,
+        length_penalty: Optional[float] = None,
+        no_repeat_ngram_size: Optional[int] = None,
+        num_return_sequences: Optional[int] = None,
+        attention_mask: Optional[torch.LongTensor] = None,
+        decoder_start_token_id: Optional[int] = None,
+        use_cache: Optional[bool] = None,
+        **model_specific_kwargs
+    ) -> torch.LongTensor:
+        r""" Generates sequences for models with a LM head. The method currently supports greedy decoding, beam-search decoding, sampling with temperature, sampling with top-k or nucleus sampling.
+
+        Adapted in part from `Facebook's XLM beam search code`_.
+
+        .. _`Facebook's XLM beam search code`:
+           https://github.com/facebookresearch/XLM/blob/9e6f6814d17be4fe5b15f2e6c43eb2b2d76daeb4/src/model/transformer.py#L529
+
+
+        Parameters:
+
+            input_ids: (`optional`) `torch.LongTensor` of shape `(batch_size, sequence_length)`
+                The sequence used as a prompt for the generation. If `None` the method initializes
+                it as an empty `torch.LongTensor` of shape `(1,)`.
+
+            max_length: (`optional`) int
+                The max length of the sequence to be generated.  Between `min_length` and infinity. Default to 20.
+
+            min_length: (`optional`) int
+                The min length of the sequence to be generated.  Between 0 and infinity. Default to 0.
+
+            do_sample: (`optional`) bool
+                If set to `False` greedy decoding is used. Otherwise sampling is used. Defaults to `False` as defined in `configuration_utils.PretrainedConfig`.
+
+            early_stopping: (`optional`) bool
+                if set to `True` beam search is stopped when at least `num_beams` sentences finished per batch. Defaults to `False` as defined in `configuration_utils.PretrainedConfig`.
+
+            num_beams: (`optional`) int
+                Number of beams for beam search. Must be between 1 and infinity. 1 means no beam search. Default to 1.
+
+            temperature: (`optional`) float
+                The value used to module the next token probabilities. Must be strictly positive. Default to 1.0.
+
+            top_k: (`optional`) int
+                The number of highest probability vocabulary tokens to keep for top-k-filtering. Between 1 and infinity. Default to 50.
+
+            top_p: (`optional`) float
+                The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Must be between 0 and 1. Default to 1.
+
+            repetition_penalty: (`optional`) float
+                The parameter for repetition penalty. Between 1.0 and infinity. 1.0 means no penalty. Default to 1.0.
+
+            pad_token_id: (`optional`) int
+                Padding token. Default to specicic model pad_token_id or None if it does not exist.
+
+            bos_token_id: (`optional`) int
+                BOS token. Defaults to `bos_token_id` as defined in the models config.
+
+            eos_token_id: (`optional`) int
+                EOS token. Defaults to `eos_token_id` as defined in the models config.
+
+            length_penalty: (`optional`) float
+                Exponential penalty to the length. Default to 1.
+
+            no_repeat_ngram_size: (`optional`) int
+                If set to int > 0, all ngrams of size `no_repeat_ngram_size` can only occur once.
+            bad_words_ids: (`optional`) list of lists of int
+                `bad_words_ids` contains tokens that are not allowed to be generated. In order to get the tokens of the words that should not appear in the generated text, use `tokenizer.encode(bad_word, add_prefix_space=True)`.
+
+            num_return_sequences: (`optional`) int
+                The number of independently computed returned sequences for each element in the batch. Default to 1.
+
+            attention_mask (`optional`) obj: `torch.LongTensor` of same shape as `input_ids`
+                Mask to avoid performing attention on padding token indices.
+                Mask values selected in ``[0, 1]``:
+                ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
+                Defaults to `None`.
+
+                `What are attention masks? <../glossary.html#attention-mask>`__
+
+            decoder_start_token_id=None: (`optional`) int
+                If an encoder-decoder model starts decoding with a different token than BOS.
+                Defaults to `None` and is changed to `BOS` later.
+
+            use_cache: (`optional`) bool
+                If `use_cache` is True, past key values are used to speed up decoding if applicable to model. Defaults to `True`.
+
+            model_specific_kwargs: (`optional`) dict
+                Additional model specific kwargs will be forwarded to the `forward` function of the model.
+
+        Return:
+
+            output: `torch.LongTensor` of shape `(batch_size * num_return_sequences, sequence_length)`
+                sequence_length is either equal to max_length or shorter if all batches finished early due to the `eos_token_id`
+
+        Examples::
+
+            tokenizer = AutoTokenizer.from_pretrained('distilgpt2')   # Initialize tokenizer
+            model = AutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from S3 and cache.
+            outputs = model.generate(max_length=40)  # do greedy decoding
+            print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
+
+            tokenizer = AutoTokenizer.from_pretrained('openai-gpt')   # Initialize tokenizer
+            model = AutoModelWithLMHead.from_pretrained('openai-gpt')    # Download model and configuration from S3 and cache.
+            input_context = 'The dog'
+            input_ids = tokenizer.encode(input_context, return_tensors='pt')  # encode input context
+            outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3, temperature=1.5)  # generate 3 independent sequences using beam search decoding (5 beams) with sampling from initial context 'The dog'
+            for i in range(3): #  3 output sequences were generated
+                print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
+
+            tokenizer = AutoTokenizer.from_pretrained('distilgpt2')   # Initialize tokenizer
+            model = AutoModelWithLMHead.from_pretrained('distilgpt2')    # Download model and configuration from S3 and cache.
+            input_context = 'The dog'
+            input_ids = tokenizer.encode(input_context, return_tensors='pt')  # encode input context
+            outputs = model.generate(input_ids=input_ids, max_length=40, temperature=0.7, num_return_sequences=3)  # 3 generate sequences using by sampling
+            for i in range(3): #  3 output sequences were generated
+                print('Generated {}: {}'.format(i, tokenizer.decode(outputs[i], skip_special_tokens=True)))
+
+            tokenizer = AutoTokenizer.from_pretrained('ctrl')   # Initialize tokenizer
+            model = AutoModelWithLMHead.from_pretrained('ctrl')    # Download model and configuration from S3 and cache.
+            input_context = 'Legal My neighbor is'  # "Legal" is one of the control codes for ctrl
+            input_ids = tokenizer.encode(input_context, return_tensors='pt')  # encode input context
+            outputs = model.generate(input_ids=input_ids, max_length=50, temperature=0.7, repetition_penalty=1.2)  # generate sequences
+            print('Generated: {}'.format(tokenizer.decode(outputs[0], skip_special_tokens=True)))
+
+            tokenizer = AutoTokenizer.from_pretrained('gpt2')   # Initialize tokenizer
+            model = AutoModelWithLMHead.from_pretrained('gpt2')    # Download model and configuration from S3 and cache.
+            input_context = 'My cute dog'  # "Legal" is one of the control codes for ctrl
+            bad_words_ids = [tokenizer.encode(bad_word, add_prefix_space=True) for bad_word in ['idiot', 'stupid', 'shut up']]
+            input_ids = tokenizer.encode(input_context, return_tensors='pt')  # encode input context
+            outputs = model.generate(input_ids=input_ids, max_length=100, do_sample=True, bad_words_ids=bad_words_ids)  # generate sequences without allowing bad_words to be generated
+        """
+
+        # We cannot generate if the model does not have a LM head
+        if self.get_output_embeddings() is None:
+            raise AttributeError(
+                "You tried to generate sequences with a model that does not have a LM Head."
+                "Please use another model class (e.g. `OpenAIGPTLMHeadModel`, `XLNetLMHeadModel`, `GPT2LMHeadModel`, `CTRLLMHeadModel`, `T5WithLMHeadModel`, `TransfoXLLMHeadModel`, `XLMWithLMHeadModel`, `BartForConditionalGeneration` )"
+            )
+
+        max_length = max_length if max_length is not None else self.config.max_length
+        min_length = min_length if min_length is not None else self.config.min_length
+        do_sample = do_sample if do_sample is not None else self.config.do_sample
+        early_stopping = early_stopping if early_stopping is not None else self.config.early_stopping
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        num_beams = num_beams if num_beams is not None else self.config.num_beams
+        temperature = temperature if temperature is not None else self.config.temperature
+        top_k = top_k if top_k is not None else self.config.top_k
+        top_p = top_p if top_p is not None else self.config.top_p
+        repetition_penalty = repetition_penalty if repetition_penalty is not None else self.config.repetition_penalty
+        bos_token_id = bos_token_id if bos_token_id is not None else self.config.bos_token_id
+        pad_token_id = pad_token_id if pad_token_id is not None else self.config.pad_token_id
+        eos_token_id = eos_token_id if eos_token_id is not None else self.config.eos_token_id
+        length_penalty = length_penalty if length_penalty is not None else self.config.length_penalty
+        no_repeat_ngram_size = (
+            no_repeat_ngram_size if no_repeat_ngram_size is not None else self.config.no_repeat_ngram_size
+        )
+        bad_words_ids = bad_words_ids if bad_words_ids is not None else self.config.bad_words_ids
+        num_return_sequences = (
+            num_return_sequences if num_return_sequences is not None else self.config.num_return_sequences
+        )
+        decoder_start_token_id = (
+            decoder_start_token_id if decoder_start_token_id is not None else self.config.decoder_start_token_id
+        )
+
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]  # overriden by the input batch_size
+        else:
+            batch_size = 1
+
+        assert isinstance(max_length, int) and max_length > 0, "`max_length` should be a strictly positive integer."
+        assert isinstance(min_length, int) and min_length >= 0, "`min_length` should be a positive integer."
+        assert isinstance(do_sample, bool), "`do_sample` should be a boolean."
+        assert isinstance(early_stopping, bool), "`early_stopping` should be a boolean."
+        assert isinstance(use_cache, bool), "`use_cache` should be a boolean."
+        assert isinstance(num_beams, int) and num_beams > 0, "`num_beams` should be a strictly positive integer."
+        assert temperature > 0, "`temperature` should be strictly positive."
+        assert isinstance(top_k, int) and top_k >= 0, "`top_k` should be a positive integer."
+        assert 0 <= top_p <= 1, "`top_p` should be between 0 and 1."
+        assert repetition_penalty >= 1.0, "`repetition_penalty` should be >= 1."
+        assert input_ids is not None or (
+            isinstance(bos_token_id, int) and bos_token_id >= 0
+        ), "If input_ids is not defined, `bos_token_id` should be a positive integer."
+        assert pad_token_id is None or (
+            isinstance(pad_token_id, int) and (pad_token_id >= 0)
+        ), "`pad_token_id` should be a positive integer."
+        assert (eos_token_id is None) or (
+            isinstance(eos_token_id, int) and (eos_token_id >= 0)
+        ), "`eos_token_id` should be a positive integer."
+        assert length_penalty > 0, "`length_penalty` should be strictly positive."
+        assert (
+            isinstance(no_repeat_ngram_size, int) and no_repeat_ngram_size >= 0
+        ), "`no_repeat_ngram_size` should be a positive integer."
+        assert (
+            isinstance(num_return_sequences, int) and num_return_sequences > 0
+        ), "`num_return_sequences` should be a strictly positive integer."
+        assert (
+            bad_words_ids is None or isinstance(bad_words_ids, list) and isinstance(bad_words_ids[0], list)
+        ), "`bad_words_ids` is either `None` or a list of lists of tokens that should not be generated"
+
+        if input_ids is None:
+            assert isinstance(bos_token_id, int) and bos_token_id >= 0, (
+                "you should either supply a context to complete as `input_ids` input "
+                "or a `bos_token_id` (integer >= 0) as a first token to start the generation."
+            )
+            input_ids = torch.full(
+                (batch_size, 1), bos_token_id, dtype=torch.long, device=next(self.parameters()).device,
+            )
+        else:
+            assert input_ids.dim() == 2, "Input prompt should be of shape (batch_size, sequence length)."
+
+        # not allow to duplicate outputs when greedy decoding
+        if do_sample is False:
+            if num_beams == 1:
+                # no_beam_search greedy generation conditions
+                assert (
+                    num_return_sequences == 1
+                ), "Greedy decoding will always produce the same output for num_beams == 1 and num_return_sequences > 1. Please set num_return_sequences = 1"
+
+            else:
+                # beam_search greedy generation conditions
+                assert (
+                    num_beams >= num_return_sequences
+                ), "Greedy beam search decoding cannot return more sequences than it has beams. Please set num_beams >= num_return_sequences"
+
+        # create attention mask if necessary
+        # TODO (PVP): this should later be handled by the forward fn() in each model in the future see PR 3140
+        if (attention_mask is None) and (pad_token_id is not None) and (pad_token_id in input_ids):
+            attention_mask = input_ids.ne(pad_token_id).long()
+        elif attention_mask is None:
+            attention_mask = input_ids.new_ones(input_ids.shape)
+
+        # set pad_token_id to eos_token_id if not set. Important that this is done after
+        # attention_mask is created
+        if pad_token_id is None and eos_token_id is not None:
+            logger.warning(
+                "Setting `pad_token_id` to {} (first `eos_token_id`) to generate sequence".format(eos_token_id)
+            )
+            pad_token_id = eos_token_id
+
+        # current position and vocab size
+        if hasattr(self.config, "vocab_size"):
+            vocab_size = self.config.vocab_size
+        elif (
+            self.config.is_encoder_decoder
+            and hasattr(self.config, "decoder")
+            and hasattr(self.config.decoder, "vocab_size")
+        ):
+            vocab_size = self.config.decoder.vocab_size
+
+        # set effective batch size and effective batch multiplier according to do_sample
+        if do_sample:
+            effective_batch_size = batch_size * num_return_sequences
+            effective_batch_mult = num_return_sequences
+        else:
+            effective_batch_size = batch_size
+            effective_batch_mult = 1
+
+        if self.config.is_encoder_decoder:
+            if decoder_start_token_id is None:
+                decoder_start_token_id = bos_token_id
+
+            assert (
+                decoder_start_token_id is not None
+            ), "decoder_start_token_id or bos_token_id has to be defined for encoder-decoder generation"
+            assert hasattr(self, "get_encoder"), "{} should have a 'get_encoder' function defined".format(self)
+            assert callable(self.get_encoder), "{} should be a method".format(self.get_encoder)
+
+            # get encoder and store encoder outputs
+            encoder = self.get_encoder()
+
+            encoder_outputs: tuple = encoder(input_ids, attention_mask=attention_mask)
+
+        # Expand input ids if num_beams > 1 or num_return_sequences > 1
+        if num_return_sequences > 1 or num_beams > 1:
+            input_ids_len = input_ids.shape[-1]
+            input_ids = input_ids.unsqueeze(1).expand(batch_size, effective_batch_mult * num_beams, input_ids_len)
+            attention_mask = attention_mask.unsqueeze(1).expand(
+                batch_size, effective_batch_mult * num_beams, input_ids_len
+            )
+
+            input_ids = input_ids.contiguous().view(
+                effective_batch_size * num_beams, input_ids_len
+            )  # shape: (batch_size * num_return_sequences * num_beams, cur_len)
+            attention_mask = attention_mask.contiguous().view(
+                effective_batch_size * num_beams, input_ids_len
+            )  # shape: (batch_size * num_return_sequences * num_beams, cur_len)
+
+        if self.config.is_encoder_decoder:
+            # create empty decoder_input_ids
+            input_ids = torch.full(
+                (effective_batch_size * num_beams, 1),
+                decoder_start_token_id,
+                dtype=torch.long,
+                device=next(self.parameters()).device,
+            )
+            cur_len = 1
+
+            assert (
+                batch_size == encoder_outputs[0].shape[0]
+            ), f"expected encoder_outputs[0] to have 1st dimension bs={batch_size}, got {encoder_outputs[0].shape[0]} "
+
+            # expand batch_idx to assign correct encoder output for expanded input_ids (due to num_beams > 1 and num_return_sequences > 1)
+            expanded_batch_idxs = (
+                torch.arange(batch_size)
+                .view(-1, 1)
+                .repeat(1, num_beams * effective_batch_mult)
+                .view(-1)
+                .to(input_ids.device)
+            )
+            # expand encoder_outputs
+            encoder_outputs = (encoder_outputs[0].index_select(0, expanded_batch_idxs), *encoder_outputs[1:])
+
+        else:
+            encoder_outputs = None
+            cur_len = input_ids.shape[-1]
+
+        assert (
+            cur_len < max_length
+        ), f"The context has {cur_len} number of tokens, but `max_length` is only {max_length}. Please make sure that `max_length` is bigger than the number of tokens, by setting either `generate(max_length=...,...)` or `config.max_length = ...`"
+
+        if num_beams > 1:
+            output = self._generate_beam_search(
+                input_ids,
+                cur_len=cur_len,
+                max_length=max_length,
+                min_length=min_length,
+                do_sample=do_sample,
+                early_stopping=early_stopping,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                bad_words_ids=bad_words_ids,
+                pad_token_id=pad_token_id,
+                eos_token_id=eos_token_id,
+                batch_size=effective_batch_size,
+                num_return_sequences=num_return_sequences,
+                length_penalty=length_penalty,
+                num_beams=num_beams,
+                vocab_size=vocab_size,
+                encoder_outputs=encoder_outputs,
+                attention_mask=attention_mask,
+                use_cache=use_cache,
+                model_specific_kwargs=model_specific_kwargs,
+            )
+        else:
+            output = self._generate_no_beam_search(
+                input_ids,
+                cur_len=cur_len,
+                max_length=max_length,
+                min_length=min_length,
+                do_sample=do_sample,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                bad_words_ids=bad_words_ids,
+                pad_token_id=pad_token_id,
+                eos_token_id=eos_token_id,
+                batch_size=effective_batch_size,
+                encoder_outputs=encoder_outputs,
+                attention_mask=attention_mask,
+                use_cache=use_cache,
+                model_specific_kwargs=model_specific_kwargs,
+            )
+
+        return output
+
+    def _generate_no_beam_search(
+        self,
+        input_ids,
+        cur_len,
+        max_length,
+        min_length,
+        do_sample,
+        temperature,
+        top_k,
+        top_p,
+        repetition_penalty,
+        no_repeat_ngram_size,
+        bad_words_ids,
+        pad_token_id,
+        eos_token_id,
+        batch_size,
+        encoder_outputs,
+        attention_mask,
+        use_cache,
+        model_specific_kwargs,
+    ):
+        """ Generate sequences for each example without beam search (num_beams == 1).
+            All returned sequence are generated independantly.
+        """
+        # length of generated sentences / unfinished sentences
+        unfinished_sents = input_ids.new(batch_size).fill_(1)
+        sent_lengths = input_ids.new(batch_size).fill_(max_length)
+
+        past = (encoder_outputs, None) if encoder_outputs is not None else None
+
+        while cur_len < max_length:
+            model_inputs = self.prepare_inputs_for_generation(
+                input_ids, past=past, attention_mask=attention_mask, use_cache=use_cache, **model_specific_kwargs
+            )
+
+            outputs = self(**model_inputs)
+            next_token_logits = outputs[0][:, -1, :]
+
+            scores = self.postprocess_next_token_scores(
+                scores=next_token_logits,
+                input_ids=input_ids,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                bad_words_ids=bad_words_ids,
+                cur_len=cur_len,
+                min_length=min_length,
+                max_length=max_length,
+                eos_token_id=eos_token_id,
+                repetition_penalty=repetition_penalty,
+                batch_size=batch_size,
+                num_beams=1,
+            )
+
+            # if model has past, then set the past variable to speed up decoding
+            if self._use_cache(outputs, use_cache):
+                past = outputs[1]
+
+            if do_sample:
+                # Temperature (higher temperature => more likely to sample low probability tokens)
+                if temperature != 1.0:
+                    scores = scores / temperature
+                # Top-p/top-k filtering
+                next_token_logscores = top_k_top_p_filtering(scores, top_k=top_k, top_p=top_p)
+                # Sample
+                probs = F.softmax(next_token_logscores, dim=-1)
+                next_token = torch.multinomial(probs, num_samples=1).squeeze(1)
+            else:
+                # Greedy decoding
+                next_token = torch.argmax(next_token_logits, dim=-1)
+
+            # update generations and finished sentences
+            if eos_token_id is not None:
+                # pad finished sentences if eos_token_id exist
+                tokens_to_add = next_token * unfinished_sents + (pad_token_id) * (1 - unfinished_sents)
+            else:
+                tokens_to_add = next_token
+
+            # add token and increase length by one
+            input_ids = torch.cat([input_ids, tokens_to_add.unsqueeze(-1)], dim=-1)
+            cur_len = cur_len + 1
+
+            if eos_token_id is not None:
+                eos_in_sents = tokens_to_add == eos_token_id
+                # if sentence is unfinished and the token to add is eos, sent_lengths is filled with current length
+                is_sents_unfinished_and_token_to_add_is_eos = unfinished_sents.mul(eos_in_sents.long()).bool()
+                sent_lengths.masked_fill_(is_sents_unfinished_and_token_to_add_is_eos, cur_len)
+                # unfinished_sents is set to zero if eos in sentence
+                unfinished_sents.mul_((~eos_in_sents).long())
+
+            # stop when there is a </s> in each sentence, or if we exceed the maximul length
+            if unfinished_sents.max() == 0:
+                break
+
+            # extend attention_mask for new generated input if only decoder
+            if self.config.is_encoder_decoder is False:
+                attention_mask = torch.cat(
+                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+                )
+
+        return input_ids
+
+    def _generate_beam_search(
+        self,
+        input_ids,
+        cur_len,
+        max_length,
+        min_length,
+        do_sample,
+        early_stopping,
+        temperature,
+        top_k,
+        top_p,
+        repetition_penalty,
+        no_repeat_ngram_size,
+        bad_words_ids,
+        pad_token_id,
+        eos_token_id,
+        batch_size,
+        num_return_sequences,
+        length_penalty,
+        num_beams,
+        vocab_size,
+        encoder_outputs,
+        attention_mask,
+        use_cache,
+        model_specific_kwargs,
+    ):
+        """ Generate sequences for each example with beam search.
+        """
+
+        # generated hypotheses
+        generated_hyps = [
+            BeamHypotheses(num_beams, max_length, length_penalty, early_stopping=early_stopping)
+            for _ in range(batch_size)
+        ]
+
+        # scores for each sentence in the beam
+        beam_scores = torch.zeros((batch_size, num_beams), dtype=torch.float, device=input_ids.device)
+
+        # for greedy decoding it is made sure that only tokens of the first beam are considered to avoid sampling the exact same tokens three times
+        if do_sample is False:
+            beam_scores[:, 1:] = -1e9
+        beam_scores = beam_scores.view(-1)  # shape (batch_size * num_beams,)
+
+        # cache compute states
+        past = (encoder_outputs, None) if encoder_outputs is not None else None
+
+        # done sentences
+        done = [False for _ in range(batch_size)]
+
+        while cur_len < max_length:
+            model_inputs = self.prepare_inputs_for_generation(
+                input_ids, past=past, attention_mask=attention_mask, use_cache=use_cache, **model_specific_kwargs
+            )
+            outputs = self(**model_inputs)  # (batch_size * num_beams, cur_len, vocab_size)
+            next_token_logits = outputs[0][:, -1, :]  # (batch_size * num_beams, vocab_size)
+
+            # if model has past, then set the past variable to speed up decoding
+            if self._use_cache(outputs, use_cache):
+                past = outputs[1]
+            if self.config.is_encoder_decoder and do_sample is False:
+                # TODO (PVP) still a bit hacky here - there might be a better solution
+                next_token_logits = self.adjust_logits_during_generation(
+                    next_token_logits, cur_len=cur_len, max_length=max_length
+                )
+
+            scores = F.log_softmax(next_token_logits, dim=-1)  # (batch_size * num_beams, vocab_size)
+
+            scores = self.postprocess_next_token_scores(
+                scores=scores,
+                input_ids=input_ids,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                bad_words_ids=bad_words_ids,
+                cur_len=cur_len,
+                min_length=min_length,
+                max_length=max_length,
+                eos_token_id=eos_token_id,
+                repetition_penalty=repetition_penalty,
+                batch_size=batch_size,
+                num_beams=num_beams,
+            )
+
+            assert scores.shape == (batch_size * num_beams, vocab_size), "Shapes of scores: {} != {}".format(
+                scores.shape, (batch_size * num_beams, vocab_size)
+            )
+
+            if do_sample:
+                _scores = scores + beam_scores[:, None].expand_as(scores)  # (batch_size * num_beams, vocab_size)
+                # Temperature
+                if temperature != 1.0:
+                    _scores = _scores / temperature
+                # Top-p/top-k filtering
+                _scores = top_k_top_p_filtering(
+                    _scores, top_k=top_k, top_p=top_p, min_tokens_to_keep=2
+                )  # (batch_size * num_beams, vocab_size)
+                # re-organize to group the beam together to sample from all beam_idxs
+                _scores = _scores.contiguous().view(
+                    batch_size, num_beams * vocab_size
+                )  # (batch_size, num_beams * vocab_size)
+
+                # Sample 2 next tokens for each beam (so we have some spare tokens and match output of greedy beam search)
+                probs = F.softmax(_scores, dim=-1)
+                next_tokens = torch.multinomial(probs, num_samples=2 * num_beams)  # (batch_size, num_beams * 2)
+                # Compute next scores
+                next_scores = torch.gather(_scores, -1, next_tokens)  # (batch_size, num_beams * 2)
+                # sort the sampled vector to make sure that the first num_beams samples are the best
+                next_scores, next_scores_indices = torch.sort(next_scores, descending=True, dim=1)
+                next_tokens = torch.gather(next_tokens, -1, next_scores_indices)  # (batch_size, num_beams * 2)
+
+            else:
+                next_scores = scores + beam_scores[:, None].expand_as(scores)  # (batch_size * num_beams, vocab_size)
+
+                # re-organize to group the beam together (we are keeping top hypothesis accross beams)
+                next_scores = next_scores.view(
+                    batch_size, num_beams * vocab_size
+                )  # (batch_size, num_beams * vocab_size)
+
+                next_scores, next_tokens = torch.topk(next_scores, 2 * num_beams, dim=1, largest=True, sorted=True)
+
+            assert next_scores.size() == next_tokens.size() == (batch_size, 2 * num_beams)
+
+            # next batch beam content
+            next_batch_beam = []
+
+            # for each sentence
+            for batch_idx in range(batch_size):
+
+                # if we are done with this sentence, add a pad token
+                if done[batch_idx]:
+                    assert (
+                        len(generated_hyps[batch_idx]) >= num_beams
+                    ), "Batch can only be done if at least {} beams have been generated".format(num_beams)
+                    assert (
+                        eos_token_id is not None and pad_token_id is not None
+                    ), "generated beams >= num_beams -> eos_token_id and pad_token have to be defined"
+                    next_batch_beam.extend([(0, pad_token_id, 0)] * num_beams)  # pad the batch
+                    continue
+
+                # next sentence beam content, this will get added to next_batch_beam
+                next_sent_beam = []
+
+                # next tokens for this sentence
+                for beam_token_rank, (beam_token_id, beam_token_score) in enumerate(
+                    zip(next_tokens[batch_idx], next_scores[batch_idx])
+                ):
+                    # get beam and token IDs
+                    beam_id = beam_token_id // vocab_size
+                    token_id = beam_token_id % vocab_size
+
+                    effective_beam_id = batch_idx * num_beams + beam_id
+                    # add to generated hypotheses if end of sentence
+                    if (eos_token_id is not None) and (token_id.item() == eos_token_id):
+                        # if beam_token does not belong to top num_beams tokens, it should not be added
+                        is_beam_token_worse_than_top_num_beams = beam_token_rank >= num_beams
+                        if is_beam_token_worse_than_top_num_beams:
+                            continue
+                        generated_hyps[batch_idx].add(
+                            input_ids[effective_beam_id].clone(), beam_token_score.item(),
+                        )
+                    else:
+                        # add next predicted token since it is not eos_token
+                        next_sent_beam.append((beam_token_score, token_id, effective_beam_id))
+
+                    # once the beam for next step is full, don't add more tokens to it.
+                    if len(next_sent_beam) == num_beams:
+                        break
+
+                # Check if we are done so that we can save a pad step if all(done)
+                done[batch_idx] = done[batch_idx] or generated_hyps[batch_idx].is_done(
+                    next_scores[batch_idx].max().item(), cur_len
+                )
+
+                # update next beam content
+                assert len(next_sent_beam) == num_beams, "Beam should always be full"
+                next_batch_beam.extend(next_sent_beam)
+                assert len(next_batch_beam) == num_beams * (batch_idx + 1), "We should have added num_beams each step"
+
+            # stop when we are done with each sentence
+            if all(done):
+                break
+
+            # sanity check / prepare next batch
+            assert len(next_batch_beam) == batch_size * num_beams
+            beam_scores = beam_scores.new([x[0] for x in next_batch_beam])
+            beam_tokens = input_ids.new([x[1] for x in next_batch_beam])
+            beam_idx = input_ids.new([x[2] for x in next_batch_beam])
+
+            # re-order batch and update current length
+            input_ids = input_ids[beam_idx, :]
+            input_ids = torch.cat([input_ids, beam_tokens.unsqueeze(1)], dim=-1)
+            cur_len = cur_len + 1
+
+            # re-order internal states
+            if past is not None:
+                past = self._reorder_cache(past, beam_idx)
+
+            # extend attention_mask for new generated input if only decoder
+            if self.config.is_encoder_decoder is False:
+                attention_mask = torch.cat(
+                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+                )
+
+        # finalize all open beam hypotheses and add to generated hypotheses
+        for batch_idx in range(batch_size):
+            if done[batch_idx]:
+                continue
+
+            # test that beam scores match previously calculated scores if not eos and batch_idx not done
+            if eos_token_id is not None and all(
+                (token_id % vocab_size).item() != eos_token_id for token_id in next_tokens[batch_idx]
+            ):
+                assert torch.all(
+                    next_scores[batch_idx, :num_beams] == beam_scores.view(batch_size, num_beams)[batch_idx]
+                ), "If batch_idx is not done, final next scores: {} have to equal to accumulated beam_scores: {}".format(
+                    next_scores[:, :num_beams][batch_idx], beam_scores.view(batch_size, num_beams)[batch_idx],
+                )
+
+            # need to add best num_beams hypotheses to generated hyps
+            for beam_id in range(num_beams):
+                effective_beam_id = batch_idx * num_beams + beam_id
+                final_score = beam_scores[effective_beam_id].item()
+                final_tokens = input_ids[effective_beam_id]
+                generated_hyps[batch_idx].add(final_tokens, final_score)
+
+        # depending on whether greedy generation is wanted or not define different output_batch_size and output_num_return_sequences_per_batch
+        output_batch_size = batch_size if do_sample else batch_size * num_return_sequences
+        output_num_return_sequences_per_batch = 1 if do_sample else num_return_sequences
+
+        # select the best hypotheses
+        sent_lengths = input_ids.new(output_batch_size)
+        best = []
+
+        # retrieve best hypotheses
+        for i, hypotheses in enumerate(generated_hyps):
+            sorted_hyps = sorted(hypotheses.beams, key=lambda x: x[0])
+            for j in range(output_num_return_sequences_per_batch):
+                effective_batch_idx = output_num_return_sequences_per_batch * i + j
+                best_hyp = sorted_hyps.pop()[1]
+                sent_lengths[effective_batch_idx] = len(best_hyp)
+                best.append(best_hyp)
+
+        # shorter batches are padded
+        if sent_lengths.min().item() != sent_lengths.max().item():
+            assert pad_token_id is not None, "`Pad_token_id` has to be defined"
+            sent_max_len = min(sent_lengths.max().item() + 1, max_length)
+            decoded = input_ids.new(output_batch_size, sent_max_len).fill_(pad_token_id)
+
+            # fill with hypothesis and eos_token_id if necessary
+            for i, hypo in enumerate(best):
+                decoded[i, : sent_lengths[i]] = hypo
+                if sent_lengths[i] < max_length:
+                    decoded[i, sent_lengths[i]] = eos_token_id
+        else:
+            # none of the hypotheses have an eos_token
+            assert (len(hypo) == max_length for hypo in best)
+            decoded = torch.stack(best).type(torch.long).to(next(self.parameters()).device)
+
+        return decoded
+
+    @staticmethod
+    def _reorder_cache(past: Tuple, beam_idx: Tensor) -> Tuple[Tensor]:
+        return tuple(layer_past.index_select(1, beam_idx) for layer_past in past)
+
+
+def calc_banned_ngram_tokens(prev_input_ids: Tensor, num_hypos: int, no_repeat_ngram_size: int, cur_len: int) -> None:
+    """Copied from fairseq for no_repeat_ngram in beam_search"""
+    if cur_len + 1 < no_repeat_ngram_size:
+        # return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
+        return [[] for _ in range(num_hypos)]
+    generated_ngrams = [{} for _ in range(num_hypos)]
+    for idx in range(num_hypos):
+        gen_tokens = prev_input_ids[idx].tolist()
+        generated_ngram = generated_ngrams[idx]
+        for ngram in zip(*[gen_tokens[i:] for i in range(no_repeat_ngram_size)]):
+            prev_ngram_tuple = tuple(ngram[:-1])
+            generated_ngram[prev_ngram_tuple] = generated_ngram.get(prev_ngram_tuple, []) + [ngram[-1]]
+
+    def _get_generated_ngrams(hypo_idx):
+        # Before decoding the next token, prevent decoding of ngrams that have already appeared
+        start_idx = cur_len + 1 - no_repeat_ngram_size
+        ngram_idx = tuple(prev_input_ids[hypo_idx, start_idx:cur_len].tolist())
+        return generated_ngrams[hypo_idx].get(ngram_idx, [])
+
+    banned_tokens = [_get_generated_ngrams(hypo_idx) for hypo_idx in range(num_hypos)]
+    return banned_tokens
+
+
+def calc_banned_bad_words_ids(prev_input_ids: Iterable[int], bad_words_ids: Iterable[int]) -> Iterable[int]:
+    banned_tokens = []
+
+    def _tokens_match(prev_tokens, tokens):
+        if len(tokens) == 0:
+            # if bad word tokens is just one token always ban it
+            return True
+        if len(tokens) > len(prev_input_ids):
+            # if bad word tokens are longer then prev input_ids they can't be equal
+            return False
+
+        if prev_tokens[-len(tokens) :] == tokens:
+            # if tokens match
+            return True
+        else:
+            return False
+
+    for prev_input_ids_slice in prev_input_ids:
+        banned_tokens_slice = []
+
+        for banned_token_seq in bad_words_ids:
+            assert len(banned_token_seq) > 0, "Banned words token sequences {} cannot have an empty list".format(
+                bad_words_ids
+            )
+
+            if _tokens_match(prev_input_ids_slice.tolist(), banned_token_seq[:-1]) is False:
+                # if tokens do not match continue
+                continue
+
+            banned_tokens_slice.append(banned_token_seq[-1])
+
+        banned_tokens.append(banned_tokens_slice)
+
+    return banned_tokens
+
+
+def top_k_top_p_filtering(
+    logits: Tensor,
+    top_k: int = 0,
+    top_p: float = 1.0,
+    filter_value: float = -float("Inf"),
+    min_tokens_to_keep: int = 1,
+) -> Tensor:
+    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
+        Args:
+            logits: logits distribution shape (batch size, vocabulary size)
+            if top_k > 0: keep only top k tokens with highest probability (top-k filtering).
+            if top_p < 1.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
+                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
+            Make sure we keep at least min_tokens_to_keep per batch example in the output
+        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
+    """
+    if top_k > 0:
+        top_k = min(max(top_k, min_tokens_to_keep), logits.size(-1))  # Safety check
+        # Remove all tokens with a probability less than the last token of the top-k
+        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
+        logits[indices_to_remove] = filter_value
+
+    if top_p < 1.0:
+        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+
+        # Remove tokens with cumulative probability above the threshold (token with 0 are kept)
+        sorted_indices_to_remove = cumulative_probs > top_p
+        if min_tokens_to_keep > 1:
+            # Keep at least min_tokens_to_keep (set to min_tokens_to_keep-1 because we add the first one below)
+            sorted_indices_to_remove[..., :min_tokens_to_keep] = 0
+        # Shift the indices to the right to keep also the first token above the threshold
+        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+        sorted_indices_to_remove[..., 0] = 0
+
+        # scatter sorted tensors to original indexing
+        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+        logits[indices_to_remove] = filter_value
+    return logits
+
+
+class BeamHypotheses(object):
+    def __init__(self, num_beams, max_length, length_penalty, early_stopping):
+        """
+        Initialize n-best list of hypotheses.
+        """
+        self.max_length = max_length - 1  # ignoring bos_token
+        self.length_penalty = length_penalty
+        self.early_stopping = early_stopping
+        self.num_beams = num_beams
+        self.beams = []
+        self.worst_score = 1e9
+
+    def __len__(self):
+        """
+        Number of hypotheses in the list.
+        """
+        return len(self.beams)
+
+    def add(self, hyp, sum_logprobs):
+        """
+        Add a new hypothesis to the list.
+        """
+        score = sum_logprobs / len(hyp) ** self.length_penalty
+        if len(self) < self.num_beams or score > self.worst_score:
+            self.beams.append((score, hyp))
+            if len(self) > self.num_beams:
+                sorted_scores = sorted([(s, idx) for idx, (s, _) in enumerate(self.beams)])
+                del self.beams[sorted_scores[0][1]]
+                self.worst_score = sorted_scores[1][0]
+            else:
+                self.worst_score = min(score, self.worst_score)
+
+    def is_done(self, best_sum_logprobs, cur_len):
+        """
+        If there are enough hypotheses and that none of the hypotheses being generated
+        can become better than the worst one in the heap, then we are done with this sentence.
+        """
+
+        if len(self) < self.num_beams:
+            return False
+        elif self.early_stopping:
+            return True
+        else:
+            cur_score = best_sum_logprobs / cur_len ** self.length_penalty
+            ret = self.worst_score >= cur_score
+            return ret
diff --git a/bert/modeling_bert.py b/bert/modeling_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..e796878aa2e6e39d6e65b0941396bcedca396a46
--- /dev/null
+++ b/bert/modeling_bert.py
@@ -0,0 +1,1569 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model. """
+
+
+import logging
+import math
+import os
+import warnings
+
+import torch
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import CrossEntropyLoss, MSELoss
+
+from .activations import gelu, gelu_new, swish
+from .configuration_bert import BertConfig
+from .file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_callable
+from .modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
+
+
+logger = logging.getLogger(__name__)
+
+_TOKENIZER_FOR_DOC = "BertTokenizer"
+
+BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "bert-base-uncased",
+    "bert-large-uncased",
+    "bert-base-cased",
+    "bert-large-cased",
+    "bert-base-multilingual-uncased",
+    "bert-base-multilingual-cased",
+    "bert-base-chinese",
+    "bert-base-german-cased",
+    "bert-large-uncased-whole-word-masking",
+    "bert-large-cased-whole-word-masking",
+    "bert-large-uncased-whole-word-masking-finetuned-squad",
+    "bert-large-cased-whole-word-masking-finetuned-squad",
+    "bert-base-cased-finetuned-mrpc",
+    "bert-base-german-dbmdz-cased",
+    "bert-base-german-dbmdz-uncased",
+    "cl-tohoku/bert-base-japanese",
+    "cl-tohoku/bert-base-japanese-whole-word-masking",
+    "cl-tohoku/bert-base-japanese-char",
+    "cl-tohoku/bert-base-japanese-char-whole-word-masking",
+    "TurkuNLP/bert-base-finnish-cased-v1",
+    "TurkuNLP/bert-base-finnish-uncased-v1",
+    "wietsedv/bert-base-dutch-cased",
+    # See all BERT models at https://huggingface.co/models?filter=bert
+]
+
+
+def load_tf_weights_in_bert(model, config, tf_checkpoint_path):
+    """ Load tf checkpoints in a pytorch model.
+    """
+    try:
+        import re
+        import numpy as np
+        import tensorflow as tf
+    except ImportError:
+        logger.error(
+            "Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
+            "https://www.tensorflow.org/install/ for installation instructions."
+        )
+        raise
+    tf_path = os.path.abspath(tf_checkpoint_path)
+    logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
+    # Load weights from TF model
+    init_vars = tf.train.list_variables(tf_path)
+    names = []
+    arrays = []
+    for name, shape in init_vars:
+        logger.info("Loading TF weight {} with shape {}".format(name, shape))
+        array = tf.train.load_variable(tf_path, name)
+        names.append(name)
+        arrays.append(array)
+
+    for name, array in zip(names, arrays):
+        name = name.split("/")
+        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
+        # which are not required for using pretrained model
+        if any(
+            n in ["adam_v", "adam_m", "AdamWeightDecayOptimizer", "AdamWeightDecayOptimizer_1", "global_step"]
+            for n in name
+        ):
+            logger.info("Skipping {}".format("/".join(name)))
+            continue
+        pointer = model
+        for m_name in name:
+            if re.fullmatch(r"[A-Za-z]+_\d+", m_name):
+                scope_names = re.split(r"_(\d+)", m_name)
+            else:
+                scope_names = [m_name]
+            if scope_names[0] == "kernel" or scope_names[0] == "gamma":
+                pointer = getattr(pointer, "weight")
+            elif scope_names[0] == "output_bias" or scope_names[0] == "beta":
+                pointer = getattr(pointer, "bias")
+            elif scope_names[0] == "output_weights":
+                pointer = getattr(pointer, "weight")
+            elif scope_names[0] == "squad":
+                pointer = getattr(pointer, "classifier")
+            else:
+                try:
+                    pointer = getattr(pointer, scope_names[0])
+                except AttributeError:
+                    logger.info("Skipping {}".format("/".join(name)))
+                    continue
+            if len(scope_names) >= 2:
+                num = int(scope_names[1])
+                pointer = pointer[num]
+        if m_name[-11:] == "_embeddings":
+            pointer = getattr(pointer, "weight")
+        elif m_name == "kernel":
+            array = np.transpose(array)
+        try:
+            assert pointer.shape == array.shape
+        except AssertionError as e:
+            e.args += (pointer.shape, array.shape)
+            raise
+        logger.info("Initialize PyTorch weight {}".format(name))
+        pointer.data = torch.from_numpy(array)
+    return model
+
+
+def mish(x):
+    return x * torch.tanh(nn.functional.softplus(x))
+
+
+ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish, "gelu_new": gelu_new, "mish": mish}
+
+
+BertLayerNorm = torch.nn.LayerNorm
+
+
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None):
+        if input_ids is not None:
+            input_shape = input_ids.size()
+        else:
+            input_shape = inputs_embeds.size()[:-1]
+
+        seq_length = input_shape[1]
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+        if position_ids is None:
+            position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
+            position_ids = position_ids.unsqueeze(0).expand(input_shape)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+
+        embeddings = inputs_embeds + position_embeddings + token_type_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+            )
+
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        mixed_query_layer = self.query(hidden_states)
+
+        # If this is instantiated as a cross-attention module, the keys
+        # and values come from an encoder; the attention mask needs to be
+        # such that the encoder's padding tokens are not attended to.
+        if encoder_hidden_states is not None:
+            mixed_key_layer = self.key(encoder_hidden_states)
+            mixed_value_layer = self.value(encoder_hidden_states)
+            attention_mask = encoder_attention_mask
+        else:
+            mixed_key_layer = self.key(hidden_states)
+            mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_mask is not None:
+            # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+            attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+
+        outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
+        return outputs
+
+
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+        self.pruned_heads = set()
+
+    def prune_heads(self, heads):
+        if len(heads) == 0:
+            return
+        heads, index = find_pruneable_heads_and_indices(
+            heads, self.self.num_attention_heads, self.self.attention_head_size, self.pruned_heads
+        )
+
+        # Prune linear layers
+        self.self.query = prune_linear_layer(self.self.query, index)
+        self.self.key = prune_linear_layer(self.self.key, index)
+        self.self.value = prune_linear_layer(self.self.value, index)
+        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
+
+        # Update hyper params and store pruned heads
+        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
+        self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads
+        self.pruned_heads = self.pruned_heads.union(heads)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        self_outputs = self.self(
+            hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions,
+        )
+        attention_output = self.output(self_outputs[0], hidden_states)
+        outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
+        return outputs
+
+
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        if isinstance(config.hidden_act, str):
+            self.intermediate_act_fn = ACT2FN[config.hidden_act]
+        else:
+            self.intermediate_act_fn = config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.attention = BertAttention(config)
+        self.is_decoder = config.is_decoder
+        if self.is_decoder:
+            self.crossattention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        self_attention_outputs = self.attention(
+            hidden_states, attention_mask, head_mask, output_attentions=output_attentions,
+        )
+        attention_output = self_attention_outputs[0]
+        outputs = self_attention_outputs[1:]  # add self attentions if we output attention weights
+
+        if self.is_decoder and encoder_hidden_states is not None:
+            cross_attention_outputs = self.crossattention(
+                attention_output,
+                attention_mask,
+                head_mask,
+                encoder_hidden_states,
+                encoder_attention_mask,
+                output_attentions,
+            )
+            attention_output = cross_attention_outputs[0]
+            outputs = outputs + cross_attention_outputs[1:]  # add cross attentions if we output attention weights
+
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        outputs = (layer_output,) + outputs
+        return outputs
+
+
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layer = nn.ModuleList([BertLayer(config) for _ in range(config.num_hidden_layers)])
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+        output_hidden_states=False,
+    ):
+        all_hidden_states = ()
+        all_attentions = ()
+        for i, layer_module in enumerate(self.layer):
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+
+            if getattr(self.config, "gradient_checkpointing", False):
+
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs, output_attentions)
+
+                    return custom_forward
+
+                layer_outputs = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(layer_module),
+                    hidden_states,
+                    attention_mask,
+                    head_mask[i],
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_mask,
+                    head_mask[i],
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                    output_attentions,
+                )
+            hidden_states = layer_outputs[0]
+
+            if output_attentions:
+                all_attentions = all_attentions + (layer_outputs[1],)
+
+        # Add last layer
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+
+        outputs = (hidden_states,)
+        if output_hidden_states:
+            outputs = outputs + (all_hidden_states,)
+        if output_attentions:
+            outputs = outputs + (all_attentions,)
+        return outputs  # last-layer hidden state, (all hidden states), (all attentions)
+
+
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class BertPredictionHeadTransform(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        if isinstance(config.hidden_act, str):
+            self.transform_act_fn = ACT2FN[config.hidden_act]
+        else:
+            self.transform_act_fn = config.hidden_act
+        self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.transform_act_fn(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states)
+        return hidden_states
+
+
+class BertLMPredictionHead(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.transform = BertPredictionHeadTransform(config)
+
+        # The output weights are the same as the input embeddings, but there is
+        # an output-only bias for each token.
+        self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+
+        self.bias = nn.Parameter(torch.zeros(config.vocab_size))
+
+        # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
+        self.decoder.bias = self.bias
+
+    def forward(self, hidden_states):
+        hidden_states = self.transform(hidden_states)
+        hidden_states = self.decoder(hidden_states)
+        return hidden_states
+
+
+class BertOnlyMLMHead(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.predictions = BertLMPredictionHead(config)
+
+    def forward(self, sequence_output):
+        prediction_scores = self.predictions(sequence_output)
+        return prediction_scores
+
+
+class BertOnlyNSPHead(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, pooled_output):
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return seq_relationship_score
+
+
+class BertPreTrainingHeads(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.predictions = BertLMPredictionHead(config)
+        self.seq_relationship = nn.Linear(config.hidden_size, 2)
+
+    def forward(self, sequence_output, pooled_output):
+        prediction_scores = self.predictions(sequence_output)
+        seq_relationship_score = self.seq_relationship(pooled_output)
+        return prediction_scores, seq_relationship_score
+
+
+class BertPreTrainedModel(PreTrainedModel):
+    """ An abstract class to handle weights initialization and
+        a simple interface for downloading and loading pretrained models.
+    """
+
+    config_class = BertConfig
+    load_tf_weights = load_tf_weights_in_bert
+    base_model_prefix = "bert"
+
+    def _init_weights(self, module):
+        """ Initialize the weights """
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, BertLayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+
+BERT_START_DOCSTRING = r"""
+    This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
+    usage and behavior.
+
+    Parameters:
+        config (:class:`~transformers.BertConfig`): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the configuration.
+            Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
+"""
+
+BERT_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`):
+            Indices of input sequence tokens in the vocabulary.
+
+            Indices can be obtained using :class:`transformers.BertTokenizer`.
+            See :func:`transformers.PreTrainedTokenizer.encode` and
+            :func:`transformers.PreTrainedTokenizer.__call__` for details.
+
+            `What are input IDs? <../glossary.html#input-ids>`__
+        attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
+            Mask to avoid performing attention on padding token indices.
+            Mask values selected in ``[0, 1]``:
+            ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
+
+            `What are attention masks? <../glossary.html#attention-mask>`__
+        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
+            Segment token indices to indicate first and second portions of the inputs.
+            Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1``
+            corresponds to a `sentence B` token
+
+            `What are token type IDs? <../glossary.html#token-type-ids>`_
+        position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`, defaults to :obj:`None`):
+            Indices of positions of each input sequence tokens in the position embeddings.
+            Selected in the range ``[0, config.max_position_embeddings - 1]``.
+
+            `What are position IDs? <../glossary.html#position-ids>`_
+        head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`, defaults to :obj:`None`):
+            Mask to nullify selected heads of the self-attention modules.
+            Mask values selected in ``[0, 1]``:
+            :obj:`1` indicates the head is **not masked**, :obj:`0` indicates the head is **masked**.
+        inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`, defaults to :obj:`None`):
+            Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
+            This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+            than the model's internal embedding lookup matrix.
+        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`, defaults to :obj:`None`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
+            if the model is configured as a decoder.
+        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask
+            is used in the cross-attention if the model is configured as a decoder.
+            Mask values selected in ``[0, 1]``:
+            ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
+        output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
+            If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
+"""
+
+
+@add_start_docstrings(
+    "The bare Bert Model transformer outputting raw hidden-states without any specific head on top.",
+    BERT_START_DOCSTRING,
+)
+class BertModel(BertPreTrainedModel):
+    """
+
+    The model can behave as an encoder (with only self-attention) as well
+    as a decoder, in which case a layer of cross-attention is added between
+    the self-attention layers, following the architecture described in `Attention is all you need`_ by Ashish Vaswani,
+    Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
+
+    To behave as an decoder the model needs to be initialized with the
+    :obj:`is_decoder` argument of the configuration set to :obj:`True`; an
+    :obj:`encoder_hidden_states` is expected as an input to the forward pass.
+
+    .. _`Attention is all you need`:
+        https://arxiv.org/abs/1706.03762
+
+    """
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+
+        self.init_weights()
+
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+
+    def _prune_heads(self, heads_to_prune):
+        """ Prunes heads of the model.
+            heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
+            See base class PreTrainedModel
+        """
+        for layer, heads in heads_to_prune.items():
+            self.encoder.layer[layer].attention.prune_heads(heads)
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        r"""
+    Return:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
+            Sequence of hidden-states at the output of the last layer of the model.
+        pooler_output (:obj:`torch.FloatTensor`: of shape :obj:`(batch_size, hidden_size)`):
+            Last layer hidden-state of the first token of the sequence (classification token)
+            further processed by a Linear layer and a Tanh activation function. The Linear
+            layer weights are trained from the next sentence prediction (classification)
+            objective during pre-training.
+
+            This output is usually *not* a good summary
+            of the semantic content of the input, you're often better with averaging or pooling
+            the sequence of hidden-states for the whole input sequence.
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            input_shape = input_ids.size()
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+
+        if attention_mask is None:
+            attention_mask = torch.ones(input_shape, device=device)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
+
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device)
+
+        # If a 2D ou 3D attention mask is provided for the cross-attention
+        # we need to make broadcastabe to [batch_size, num_heads, seq_length, seq_length]
+        if self.config.is_decoder and encoder_hidden_states is not None:
+            encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
+            encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
+            if encoder_attention_mask is None:
+                encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
+            encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
+        else:
+            encoder_extended_attention_mask = None
+
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
+
+        embedding_output = self.embeddings(
+            input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
+        )
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_mask=extended_attention_mask,
+            head_mask=head_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_extended_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+        sequence_output = encoder_outputs[0]
+        pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
+
+        outputs = (sequence_output, pooled_output,) + encoder_outputs[
+            1:
+        ]  # add hidden_states and attentions if they are here
+        return outputs  # sequence_output, pooled_output, (hidden_states), (attentions)
+
+
+@add_start_docstrings(
+    """Bert Model with two heads on top as done during the pre-training: a `masked language modeling` head and
+    a `next sentence prediction (classification)` head. """,
+    BERT_START_DOCSTRING,
+)
+class BertForPreTraining(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+
+        self.bert = BertModel(config)
+        self.cls = BertPreTrainingHeads(config)
+
+        self.init_weights()
+
+    def get_output_embeddings(self):
+        return self.cls.predictions.decoder
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        next_sentence_label=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        **kwargs
+    ):
+        r"""
+        labels (``torch.LongTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
+            Labels for computing the masked language modeling loss.
+            Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
+            Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
+            in ``[0, ..., config.vocab_size]``
+        next_sentence_label (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`):
+            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see :obj:`input_ids` docstring)
+            Indices should be in ``[0, 1]``.
+            ``0`` indicates sequence B is a continuation of sequence A,
+            ``1`` indicates sequence B is a random sequence.
+        kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
+            Used to hide legacy arguments that have been deprecated.
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
+            Total loss as the sum of the masked language modeling loss and the next sequence prediction (classification) loss.
+        prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`)
+            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
+        seq_relationship_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`):
+            Prediction scores of the next sequence prediction (classification) head (scores of True/False
+            continuation before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+
+
+    Examples::
+
+        >>> from transformers import BertTokenizer, BertForPreTraining
+        >>> import torch
+
+        >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+        >>> model = BertForPreTraining.from_pretrained('bert-base-uncased')
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> outputs = model(**inputs)
+
+        >>> prediction_scores, seq_relationship_scores = outputs[:2]
+
+        """
+        if "masked_lm_labels" in kwargs:
+            warnings.warn(
+                "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
+                DeprecationWarning,
+            )
+            labels = kwargs.pop("masked_lm_labels")
+        assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        sequence_output, pooled_output = outputs[:2]
+        prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output)
+
+        outputs = (prediction_scores, seq_relationship_score,) + outputs[
+            2:
+        ]  # add hidden states and attention if they are here
+
+        if labels is not None and next_sentence_label is not None:
+            loss_fct = CrossEntropyLoss()
+            masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+            next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
+            total_loss = masked_lm_loss + next_sentence_loss
+            outputs = (total_loss,) + outputs
+
+        return outputs  # (loss), prediction_scores, seq_relationship_score, (hidden_states), (attentions)
+
+
+@add_start_docstrings(
+    """Bert Model with a `language modeling` head on top for CLM fine-tuning. """, BERT_START_DOCSTRING
+)
+class BertLMHeadModel(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
+
+        self.bert = BertModel(config)
+        self.cls = BertOnlyMLMHead(config)
+
+        self.init_weights()
+
+    def get_output_embeddings(self):
+        return self.cls.predictions.decoder
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        **kwargs
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the left-to-right language modeling loss (next word prediction).
+            Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
+            Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
+            in ``[0, ..., config.vocab_size]``
+        kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
+            Used to hide legacy arguments that have been deprecated.
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        ltr_lm_loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
+            Next token prediction loss.
+        prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`)
+            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+
+    Example::
+
+        >>> from transformers import BertTokenizer, BertLMHeadModel, BertConfig
+        >>> import torch
+
+        >>> tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
+        >>> config = BertConfig.from_pretrained("bert-base-cased")
+        >>> config.is_decoder = True
+        >>> model = BertLMHeadModel.from_pretrained('bert-base-cased', config=config)
+
+        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+        >>> outputs = model(**inputs)
+
+        >>> last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
+        """
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        sequence_output = outputs[0]
+        prediction_scores = self.cls(sequence_output)
+
+        outputs = (prediction_scores,) + outputs[2:]  # Add hidden states and attention if they are here
+
+        if labels is not None:
+            # we are doing next-token prediction; shift prediction scores and input ids by one
+            prediction_scores = prediction_scores[:, :-1, :].contiguous()
+            labels = labels[:, 1:].contiguous()
+            loss_fct = CrossEntropyLoss()
+            ltr_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+            outputs = (ltr_lm_loss,) + outputs
+
+        return outputs  # (ltr_lm_loss), prediction_scores, (hidden_states), (attentions)
+
+    def prepare_inputs_for_generation(self, input_ids, attention_mask=None, **model_kwargs):
+        input_shape = input_ids.shape
+
+        # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
+        if attention_mask is None:
+            attention_mask = input_ids.new_ones(input_shape)
+
+        return {"input_ids": input_ids, "attention_mask": attention_mask}
+
+
+@add_start_docstrings("""Bert Model with a `language modeling` head on top. """, BERT_START_DOCSTRING)
+class BertForMaskedLM(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        assert (
+            not config.is_decoder
+        ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
+
+        self.bert = BertModel(config)
+        self.cls = BertOnlyMLMHead(config)
+
+        self.init_weights()
+
+    def get_output_embeddings(self):
+        return self.cls.predictions.decoder
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        **kwargs
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the masked language modeling loss.
+            Indices should be in ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring)
+            Tokens with indices set to ``-100`` are ignored (masked), the loss is only computed for the tokens with labels
+            in ``[0, ..., config.vocab_size]``
+        kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
+            Used to hide legacy arguments that have been deprecated.
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        masked_lm_loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
+            Masked language modeling loss.
+        prediction_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`)
+            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        """
+        if "masked_lm_labels" in kwargs:
+            warnings.warn(
+                "The `masked_lm_labels` argument is deprecated and will be removed in a future version, use `labels` instead.",
+                DeprecationWarning,
+            )
+            labels = kwargs.pop("masked_lm_labels")
+        assert "lm_labels" not in kwargs, "Use `BertWithLMHead` for autoregressive language modeling task."
+        assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        sequence_output = outputs[0]
+        prediction_scores = self.cls(sequence_output)
+
+        outputs = (prediction_scores,) + outputs[2:]  # Add hidden states and attention if they are here
+
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()  # -100 index = padding token
+            masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+            outputs = (masked_lm_loss,) + outputs
+
+        return outputs  # (masked_lm_loss), prediction_scores, (hidden_states), (attentions)
+
+    def prepare_inputs_for_generation(self, input_ids, attention_mask=None, **model_kwargs):
+        input_shape = input_ids.shape
+        effective_batch_size = input_shape[0]
+
+        #  add a dummy token
+        assert self.config.pad_token_id is not None, "The PAD token should be defined for generation"
+        attention_mask = torch.cat([attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))], dim=-1)
+        dummy_token = torch.full(
+            (effective_batch_size, 1), self.config.pad_token_id, dtype=torch.long, device=input_ids.device
+        )
+        input_ids = torch.cat([input_ids, dummy_token], dim=1)
+
+        return {"input_ids": input_ids, "attention_mask": attention_mask}
+
+
+@add_start_docstrings(
+    """Bert Model with a `next sentence prediction (classification)` head on top. """, BERT_START_DOCSTRING,
+)
+class BertForNextSentencePrediction(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+
+        self.bert = BertModel(config)
+        self.cls = BertOnlyNSPHead(config)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        next_sentence_label=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        r"""
+        next_sentence_label (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair (see ``input_ids`` docstring)
+            Indices should be in ``[0, 1]``.
+            ``0`` indicates sequence B is a continuation of sequence A,
+            ``1`` indicates sequence B is a random sequence.
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`next_sentence_label` is provided):
+            Next sequence prediction (classification) loss.
+        seq_relationship_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`):
+            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+
+    Examples::
+
+        >>> from transformers import BertTokenizer, BertForNextSentencePrediction
+        >>> import torch
+
+        >>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+        >>> model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
+
+        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
+        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
+        >>> encoding = tokenizer(prompt, next_sentence, return_tensors='pt')
+
+        >>> loss, logits = model(**encoding, next_sentence_label=torch.LongTensor([1]))
+        >>> assert logits[0, 0] < logits[0, 1] # next sentence was random
+        """
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        pooled_output = outputs[1]
+
+        seq_relationship_score = self.cls(pooled_output)
+
+        outputs = (seq_relationship_score,) + outputs[2:]  # add hidden states and attention if they are here
+        if next_sentence_label is not None:
+            loss_fct = CrossEntropyLoss()
+            next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
+            outputs = (next_sentence_loss,) + outputs
+
+        return outputs  # (next_sentence_loss), seq_relationship_score, (hidden_states), (attentions)
+
+
+@add_start_docstrings(
+    """Bert Model transformer with a sequence classification/regression head on top (a linear layer on top of
+    the pooled output) e.g. for GLUE tasks. """,
+    BERT_START_DOCSTRING,
+)
+class BertForSequenceClassification(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+
+        self.bert = BertModel(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the sequence classification/regression loss.
+            Indices should be in :obj:`[0, ..., config.num_labels - 1]`.
+            If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
+            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided):
+            Classification (or regression if config.num_labels==1) loss.
+        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
+            Classification (or regression if config.num_labels==1) scores (before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        """
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        pooled_output = outputs[1]
+
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+
+        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
+
+        if labels is not None:
+            if self.num_labels == 1:
+                #  We are doing regression
+                loss_fct = MSELoss()
+                loss = loss_fct(logits.view(-1), labels.view(-1))
+            else:
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            outputs = (loss,) + outputs
+
+        return outputs  # (loss), logits, (hidden_states), (attentions)
+
+
+@add_start_docstrings(
+    """Bert Model with a multiple choice classification head on top (a linear layer on top of
+    the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,
+    BERT_START_DOCSTRING,
+)
+class BertForMultipleChoice(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+
+        self.bert = BertModel(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, 1)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, num_choices, sequence_length)"))
+    @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the multiple choice classification loss.
+            Indices should be in ``[0, ..., num_choices-1]`` where `num_choices` is the size of the second dimension
+            of the input tensors. (see `input_ids` above)
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided):
+            Classification loss.
+        classification_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`):
+            `num_choices` is the second dimension of the input tensors. (see `input_ids` above).
+
+            Classification scores (before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        """
+        num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
+
+        input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
+        attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
+        token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
+        position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
+        inputs_embeds = (
+            inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1))
+            if inputs_embeds is not None
+            else None
+        )
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        pooled_output = outputs[1]
+
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        reshaped_logits = logits.view(-1, num_choices)
+
+        outputs = (reshaped_logits,) + outputs[2:]  # add hidden states and attention if they are here
+
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()
+            loss = loss_fct(reshaped_logits, labels)
+            outputs = (loss,) + outputs
+
+        return outputs  # (loss), reshaped_logits, (hidden_states), (attentions)
+
+
+@add_start_docstrings(
+    """Bert Model with a token classification head on top (a linear layer on top of
+    the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
+    BERT_START_DOCSTRING,
+)
+class BertForTokenClassification(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+
+        self.bert = BertModel(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
+            Labels for computing the token classification loss.
+            Indices should be in ``[0, ..., config.num_labels - 1]``.
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) :
+            Classification loss.
+        scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`)
+            Classification scores (before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        """
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        sequence_output = outputs[0]
+
+        sequence_output = self.dropout(sequence_output)
+        logits = self.classifier(sequence_output)
+
+        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()
+            # Only keep active parts of the loss
+            if attention_mask is not None:
+                active_loss = attention_mask.view(-1) == 1
+                active_logits = logits.view(-1, self.num_labels)
+                active_labels = torch.where(
+                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
+                )
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            outputs = (loss,) + outputs
+
+        return outputs  # (loss), scores, (hidden_states), (attentions)
+
+
+@add_start_docstrings(
+    """Bert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
+    layers on top of the hidden-states output to compute `span start logits` and `span end logits`). """,
+    BERT_START_DOCSTRING,
+)
+class BertForQuestionAnswering(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+
+        self.bert = BertModel(config)
+        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        output_attentions=None,
+        output_hidden_states=None,
+    ):
+        r"""
+        start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
+            Labels for position (index) of the start of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (`sequence_length`).
+            Position outside of the sequence are not taken into account for computing the loss.
+        end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
+            Labels for position (index) of the end of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (`sequence_length`).
+            Position outside of the sequence are not taken into account for computing the loss.
+
+    Returns:
+        :obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.BertConfig`) and inputs:
+        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
+            Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
+        start_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
+            Span-start scores (before SoftMax).
+        end_scores (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
+            Span-end scores (before SoftMax).
+        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
+            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
+            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
+            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        """
+
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+        )
+
+        sequence_output = outputs[0]
+
+        logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1)
+        end_logits = end_logits.squeeze(-1)
+
+        outputs = (start_logits, end_logits,) + outputs[2:]
+        if start_positions is not None and end_positions is not None:
+            # If we are on multi-GPU, split add a dimension
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1)
+            # sometimes the start/end positions are outside our model inputs, we ignore these terms
+            ignored_index = start_logits.size(1)
+            start_positions.clamp_(0, ignored_index)
+            end_positions.clamp_(0, ignored_index)
+
+            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+            total_loss = (start_loss + end_loss) / 2
+            outputs = (total_loss,) + outputs
+
+        return outputs  # (loss), start_logits, end_logits, (hidden_states), (attentions)
diff --git a/bert/modeling_utils.py b/bert/modeling_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c109aa1d6789aff1640cbd318e7f79cde5e5b916
--- /dev/null
+++ b/bert/modeling_utils.py
@@ -0,0 +1,1269 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors, Facebook AI Research authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Tuple
+
+import torch
+from torch import Tensor, device, dtype, nn
+from torch.nn import CrossEntropyLoss
+from torch.nn import functional as F
+
+from .activations import get_activation
+from .configuration_utils import PretrainedConfig
+from .file_utils import (
+    DUMMY_INPUTS,
+    TF2_WEIGHTS_NAME,
+    TF_WEIGHTS_NAME,
+    WEIGHTS_NAME,
+    cached_path,
+    hf_bucket_url,
+    is_remote_url,
+)
+from .generation_utils import GenerationMixin
+
+
+logger = logging.getLogger(__name__)
+
+
+try:
+    from torch.nn import Identity
+except ImportError:
+    # Older PyTorch compatibility
+    class Identity(nn.Module):
+        r"""A placeholder identity operator that is argument-insensitive.
+        """
+
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+
+        def forward(self, input):
+            return input
+
+
+def find_pruneable_heads_and_indices(
+    heads: List, n_heads: int, head_size: int, already_pruned_heads: set
+) -> Tuple[set, "torch.LongTensor"]:
+    mask = torch.ones(n_heads, head_size)
+    heads = set(heads) - already_pruned_heads  # Convert to set and remove already pruned heads
+    for head in heads:
+        # Compute how many pruned heads are before the head and move the index accordingly
+        head = head - sum(1 if h < head else 0 for h in already_pruned_heads)
+        mask[head] = 0
+    mask = mask.view(-1).contiguous().eq(1)
+    index: torch.LongTensor = torch.arange(len(mask))[mask].long()
+    return heads, index
+
+
+class ModuleUtilsMixin:
+    """
+    A few utilities for torch.nn.Modules, to be used as a mixin.
+    """
+
+    def num_parameters(self, only_trainable: bool = False) -> int:
+        """
+        Get number of (optionally, trainable) parameters in the module.
+        """
+        params = filter(lambda x: x.requires_grad, self.parameters()) if only_trainable else self.parameters()
+        return sum(p.numel() for p in params)
+
+    @staticmethod
+    def _hook_rss_memory_pre_forward(module, *args, **kwargs):
+        try:
+            import psutil
+        except (ImportError):
+            raise ImportError("You need to install psutil (pip install psutil) to use memory tracing.")
+
+        process = psutil.Process(os.getpid())
+        mem = process.memory_info()
+        module.mem_rss_pre_forward = mem.rss
+        return None
+
+    @staticmethod
+    def _hook_rss_memory_post_forward(module, *args, **kwargs):
+        try:
+            import psutil
+        except (ImportError):
+            raise ImportError("You need to install psutil (pip install psutil) to use memory tracing.")
+
+        process = psutil.Process(os.getpid())
+        mem = process.memory_info()
+        module.mem_rss_post_forward = mem.rss
+        mem_rss_diff = module.mem_rss_post_forward - module.mem_rss_pre_forward
+        module.mem_rss_diff = mem_rss_diff + (module.mem_rss_diff if hasattr(module, "mem_rss_diff") else 0)
+        return None
+
+    def add_memory_hooks(self):
+        """ Add a memory hook before and after each sub-module forward pass to record increase in memory consumption.
+            Increase in memory consumption is stored in a `mem_rss_diff` attribute for each module and can be reset to zero with `model.reset_memory_hooks_state()`
+        """
+        for module in self.modules():
+            module.register_forward_pre_hook(self._hook_rss_memory_pre_forward)
+            module.register_forward_hook(self._hook_rss_memory_post_forward)
+        self.reset_memory_hooks_state()
+
+    def reset_memory_hooks_state(self):
+        for module in self.modules():
+            module.mem_rss_diff = 0
+            module.mem_rss_post_forward = 0
+            module.mem_rss_pre_forward = 0
+
+    @property
+    def device(self) -> device:
+        """
+        Get torch.device from module, assuming that the whole module has one device.
+        """
+        try:
+            return next(self.parameters()).device
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+
+            def find_tensor_attributes(module: nn.Module) -> List[Tuple[str, Tensor]]:
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].device
+
+    @property
+    def dtype(self) -> dtype:
+        """
+        Get torch.dtype from module, assuming that the whole module has one dtype.
+        """
+        try:
+            return next(self.parameters()).dtype
+        except StopIteration:
+            # For nn.DataParallel compatibility in PyTorch 1.5
+
+            def find_tensor_attributes(module: nn.Module) -> List[Tuple[str, Tensor]]:
+                tuples = [(k, v) for k, v in module.__dict__.items() if torch.is_tensor(v)]
+                return tuples
+
+            gen = self._named_members(get_members_fn=find_tensor_attributes)
+            first_tuple = next(gen)
+            return first_tuple[1].dtype
+
+    def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Tensor:
+        """type: torch.Tensor -> torch.Tensor"""
+        if encoder_attention_mask.dim() == 3:
+            encoder_extended_attention_mask = encoder_attention_mask[:, None, :, :]
+        if encoder_attention_mask.dim() == 2:
+            encoder_extended_attention_mask = encoder_attention_mask[:, None, None, :]
+        # T5 has a mask that can compare sequence ids, we can simulate this here with this transposition
+        # Cf. https://github.com/tensorflow/mesh/blob/8d2465e9bc93129b913b5ccc6a59aa97abd96ec6/mesh_tensorflow
+        # /transformer/transformer_layers.py#L270
+        # encoder_extended_attention_mask = (encoder_extended_attention_mask ==
+        # encoder_extended_attention_mask.transpose(-1, -2))
+        encoder_extended_attention_mask = encoder_extended_attention_mask.to(dtype=self.dtype)  # fp16 compatibility
+
+        if self.dtype == torch.float16:
+            encoder_extended_attention_mask = (1.0 - encoder_extended_attention_mask) * -1e4
+        elif self.dtype == torch.float32:
+            encoder_extended_attention_mask = (1.0 - encoder_extended_attention_mask) * -1e9
+        else:
+            raise ValueError(
+                "{} not recognized. `dtype` should be set to either `torch.float32` or `torch.float16`".format(
+                    self.dtype
+                )
+            )
+
+        return encoder_extended_attention_mask
+
+    def get_extended_attention_mask(self, attention_mask: Tensor, input_shape: Tuple, device: device) -> Tensor:
+        """Makes broadcastable attention mask and causal mask so that future and maked tokens are ignored.
+
+        Arguments:
+            attention_mask: torch.Tensor with 1 indicating tokens to ATTEND to
+            input_shape: tuple, shape of input_ids
+            device: torch.Device, usually self.device
+
+        Returns:
+            torch.Tensor with dtype of attention_mask.dtype
+        """
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        if attention_mask.dim() == 3:
+            extended_attention_mask = attention_mask[:, None, :, :]
+        elif attention_mask.dim() == 2:
+            # Provided a padding mask of dimensions [batch_size, seq_length]
+            # - if the model is a decoder, apply a causal mask in addition to the padding mask
+            # - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length]
+            if self.config.is_decoder:
+                batch_size, seq_length = input_shape
+                seq_ids = torch.arange(seq_length, device=device)
+                causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
+                # causal and attention masks must have same type with pytorch version < 1.3
+                causal_mask = causal_mask.to(attention_mask.dtype)
+                extended_attention_mask = causal_mask[:, None, :, :] * attention_mask[:, None, None, :]
+            else:
+                extended_attention_mask = attention_mask[:, None, None, :]
+        else:
+            raise ValueError(
+                "Wrong shape for input_ids (shape {}) or attention_mask (shape {})".format(
+                    input_shape, attention_mask.shape
+                )
+            )
+
+        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+        # masked positions, this operation will create a tensor which is 0.0 for
+        # positions we want to attend and -10000.0 for masked positions.
+        # Since we are adding it to the raw scores before the softmax, this is
+        # effectively the same as removing these entirely.
+        extended_attention_mask = extended_attention_mask.to(dtype=self.dtype)  # fp16 compatibility
+        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
+        return extended_attention_mask
+
+    def get_head_mask(self, head_mask: Tensor, num_hidden_layers: int, is_attention_chunked: bool = False) -> Tensor:
+        """
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        attention_probs has shape bsz x n_heads x N x N
+        Arguments:
+            head_mask: torch.Tensor or None: has shape [num_heads] or [num_hidden_layers x num_heads]
+            num_hidden_layers: int
+        Returns:
+             Tensor of shape shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+             or list with [None] for each layer
+        """
+        if head_mask is not None:
+            head_mask = self._convert_head_mask_to_5d(head_mask, num_hidden_layers)
+            if is_attention_chunked is True:
+                head_mask = head_mask.unsqueeze(-1)
+        else:
+            head_mask = [None] * num_hidden_layers
+
+        return head_mask
+
+    def _convert_head_mask_to_5d(self, head_mask, num_hidden_layers):
+        """-> [num_hidden_layers x batch x num_heads x seq_length x seq_length]"""
+        if head_mask.dim() == 1:
+            head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
+            head_mask = head_mask.expand(num_hidden_layers, -1, -1, -1, -1)
+        elif head_mask.dim() == 2:
+            head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1)  # We can specify head_mask for each layer
+        assert head_mask.dim() == 5, f"head_mask.dim != 5, instead {head_mask.dim()}"
+        head_mask = head_mask.to(dtype=self.dtype)  # switch to fload if need + fp16 compatibility
+        return head_mask
+
+
+class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
+    r""" Base class for all models.
+
+        :class:`~transformers.PreTrainedModel` takes care of storing the configuration of the models and handles methods for loading/downloading/saving models
+        as well as a few methods common to all models to (i) resize the input embeddings and (ii) prune heads in the self-attention heads.
+
+        Class attributes (overridden by derived classes):
+            - ``config_class``: a class derived from :class:`~transformers.PretrainedConfig` to use as configuration class for this model architecture.
+            - ``load_tf_weights``: a python ``method`` for loading a TensorFlow checkpoint in a PyTorch model, taking as arguments:
+
+                - ``model``: an instance of the relevant subclass of :class:`~transformers.PreTrainedModel`,
+                - ``config``: an instance of the relevant subclass of :class:`~transformers.PretrainedConfig`,
+                - ``path``: a path (string) to the TensorFlow checkpoint.
+
+            - ``base_model_prefix``: a string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model.
+    """
+    config_class = None
+    base_model_prefix = ""
+
+    @property
+    def dummy_inputs(self):
+        """ Dummy inputs to do a forward pass in the network.
+
+        Returns:
+            torch.Tensor with dummy inputs
+        """
+        return {"input_ids": torch.tensor(DUMMY_INPUTS)}
+
+    def __init__(self, config, *inputs, **kwargs):
+        super().__init__()
+        if not isinstance(config, PretrainedConfig):
+            raise ValueError(
+                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+                "To create a model from a pretrained model use "
+                "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+                    self.__class__.__name__, self.__class__.__name__
+                )
+            )
+        # Save config in model
+        self.config = config
+
+    @property
+    def base_model(self):
+        return getattr(self, self.base_model_prefix, self)
+
+    def get_input_embeddings(self):
+        """
+        Returns the model's input embeddings.
+
+        Returns:
+            :obj:`nn.Module`:
+                A torch module mapping vocabulary to hidden states.
+        """
+        base_model = getattr(self, self.base_model_prefix, self)
+        if base_model is not self:
+            return base_model.get_input_embeddings()
+        else:
+            raise NotImplementedError
+
+    def set_input_embeddings(self, value: nn.Module):
+        """
+        Set model's input embeddings
+
+        Args:
+            value (:obj:`nn.Module`):
+                A module mapping vocabulary to hidden states.
+        """
+        base_model = getattr(self, self.base_model_prefix, self)
+        if base_model is not self:
+            base_model.set_input_embeddings(value)
+        else:
+            raise NotImplementedError
+
+    def get_output_embeddings(self):
+        """
+        Returns the model's output embeddings.
+
+        Returns:
+            :obj:`nn.Module`:
+                A torch module mapping hidden states to vocabulary.
+        """
+        return None  # Overwrite for models with output embeddings
+
+    def tie_weights(self):
+        """
+        Tie the weights between the input embeddings and the output embeddings.
+        If the `torchscript` flag is set in the configuration, can't handle parameter sharing so we are cloning
+        the weights instead.
+        """
+        output_embeddings = self.get_output_embeddings()
+        if output_embeddings is not None:
+            self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings())
+
+    def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
+        """ Tie or clone module weights depending of whether we are using TorchScript or not
+        """
+        if self.config.torchscript:
+            output_embeddings.weight = nn.Parameter(input_embeddings.weight.clone())
+        else:
+            output_embeddings.weight = input_embeddings.weight
+
+        if getattr(output_embeddings, "bias", None) is not None:
+            output_embeddings.bias.data = torch.nn.functional.pad(
+                output_embeddings.bias.data,
+                (0, output_embeddings.weight.shape[0] - output_embeddings.bias.shape[0],),
+                "constant",
+                0,
+            )
+        if hasattr(output_embeddings, "out_features") and hasattr(input_embeddings, "num_embeddings"):
+            output_embeddings.out_features = input_embeddings.num_embeddings
+
+    def resize_token_embeddings(self, new_num_tokens: Optional[int] = None):
+        """ Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
+        Take care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.
+
+        Arguments:
+
+            new_num_tokens: (`optional`) int:
+                New number of tokens in the embedding matrix. Increasing the size will add newly initialized vectors at the end. Reducing the size will remove vectors from the end.
+                If not provided or None: does nothing and just returns a pointer to the input tokens ``torch.nn.Embeddings`` Module of the model.
+
+        Return: ``torch.nn.Embeddings``
+            Pointer to the input tokens Embeddings Module of the model
+        """
+        base_model = getattr(self, self.base_model_prefix, self)  # get the base model if needed
+        model_embeds = base_model._resize_token_embeddings(new_num_tokens)
+        if new_num_tokens is None:
+            return model_embeds
+
+        # Update base model and current model config
+        self.config.vocab_size = new_num_tokens
+        base_model.vocab_size = new_num_tokens
+
+        # Tie weights again if needed
+        self.tie_weights()
+
+        return model_embeds
+
+    def _resize_token_embeddings(self, new_num_tokens):
+        old_embeddings = self.get_input_embeddings()
+        new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
+        self.set_input_embeddings(new_embeddings)
+        return self.get_input_embeddings()
+
+    def _get_resized_embeddings(
+        self, old_embeddings: torch.nn.Embedding, new_num_tokens: Optional[int] = None
+    ) -> torch.nn.Embedding:
+        """ Build a resized Embedding Module from a provided token Embedding Module.
+            Increasing the size will add newly initialized vectors at the end
+            Reducing the size will remove vectors from the end
+
+        Args:
+            old_embeddings: ``torch.nn.Embedding``
+                Old embeddings to be resized.
+            new_num_tokens: (`optional`) int
+                New number of tokens in the embedding matrix.
+                Increasing the size will add newly initialized vectors at the end
+                Reducing the size will remove vectors from the end
+                If not provided or None: return the provided token Embedding Module.
+        Return: ``torch.nn.Embedding``
+            Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
+        """
+        if new_num_tokens is None:
+            return old_embeddings
+
+        old_num_tokens, old_embedding_dim = old_embeddings.weight.size()
+        if old_num_tokens == new_num_tokens:
+            return old_embeddings
+
+        # Build new embeddings
+        new_embeddings = nn.Embedding(new_num_tokens, old_embedding_dim)
+        new_embeddings.to(old_embeddings.weight.device)
+
+        # initialize all new embeddings (in particular added tokens)
+        self._init_weights(new_embeddings)
+
+        # Copy token embeddings from the previous weights
+        num_tokens_to_copy = min(old_num_tokens, new_num_tokens)
+        new_embeddings.weight.data[:num_tokens_to_copy, :] = old_embeddings.weight.data[:num_tokens_to_copy, :]
+
+        return new_embeddings
+
+    def init_weights(self):
+        """ Initialize and prunes weights if needed. """
+        # Initialize weights
+        self.apply(self._init_weights)
+
+        # Prune heads if needed
+        if self.config.pruned_heads:
+            self.prune_heads(self.config.pruned_heads)
+
+        # Tie weights if needed
+        self.tie_weights()
+
+    def prune_heads(self, heads_to_prune: Dict):
+        """ Prunes heads of the base model.
+
+            Arguments:
+
+                heads_to_prune: dict with keys being selected layer indices (`int`) and associated values being the list of heads to prune in said layer (list of `int`).
+                E.g. {1: [0, 2], 2: [2, 3]} will prune heads 0 and 2 on layer 1 and heads 2 and 3 on layer 2.
+        """
+        # save new sets of pruned heads as union of previously stored pruned heads and newly pruned heads
+        for layer, heads in heads_to_prune.items():
+            union_heads = set(self.config.pruned_heads.get(layer, [])) | set(heads)
+            self.config.pruned_heads[layer] = list(union_heads)  # Unfortunately we have to store it as list for JSON
+
+        self.base_model._prune_heads(heads_to_prune)
+
+    def save_pretrained(self, save_directory):
+        """ Save a model and its configuration file to a directory, so that it
+            can be re-loaded using the `:func:`~transformers.PreTrainedModel.from_pretrained`` class method.
+
+            Arguments:
+                save_directory: directory to which to save.
+        """
+        if os.path.isfile(save_directory):
+            logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
+            return
+        os.makedirs(save_directory, exist_ok=True)
+
+        # Only save the model itself if we are using distributed training
+        model_to_save = self.module if hasattr(self, "module") else self
+
+        # Attach architecture to the config
+        model_to_save.config.architectures = [model_to_save.__class__.__name__]
+
+        # If we save using the predefined names, we can load using `from_pretrained`
+        output_model_file = os.path.join(save_directory, WEIGHTS_NAME)
+
+        if getattr(self.config, "xla_device", False):
+            import torch_xla.core.xla_model as xm
+
+            if xm.is_master_ordinal():
+                # Save configuration file
+                model_to_save.config.save_pretrained(save_directory)
+            # xm.save takes care of saving only from master
+            xm.save(model_to_save.state_dict(), output_model_file)
+        else:
+            model_to_save.config.save_pretrained(save_directory)
+            torch.save(model_to_save.state_dict(), output_model_file)
+
+        logger.info("Model weights saved in {}".format(output_model_file))
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        r"""Instantiate a pretrained pytorch model from a pre-trained model configuration.
+
+        The model is set in evaluation mode by default using ``model.eval()`` (Dropout modules are deactivated)
+        To train the model, you should first set it back in training mode with ``model.train()``
+
+        The warning ``Weights from XXX not initialized from pretrained model`` means that the weights of XXX do not come pre-trained with the rest of the model.
+        It is up to you to train those weights with a downstream fine-tuning task.
+
+        The warning ``Weights from XXX not used in YYY`` means that the layer XXX is not used by YYY, therefore those weights are discarded.
+
+        Parameters:
+            pretrained_model_name_or_path: either:
+              - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
+              - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
+              - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
+              - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
+              - None if you are both providing the configuration and state dictionary (resp. with keyword arguments ``config`` and ``state_dict``)
+
+            model_args: (`optional`) Sequence of positional arguments:
+                All remaning positional arguments will be passed to the underlying model's ``__init__`` method
+
+            config: (`optional`) one of:
+                - an instance of a class derived from :class:`~transformers.PretrainedConfig`, or
+                - a string valid as input to :func:`~transformers.PretrainedConfig.from_pretrained()`
+
+                Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
+                    - the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
+                    - the model was saved using :func:`~transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
+                    - the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
+
+            state_dict: (`optional`) dict:
+                an optional state dictionnary for the model to use instead of a state dictionary loaded from saved weights file.
+                This option can be used if you want to create a model from a pretrained configuration but load your own weights.
+                In this case though, you should check if using :func:`~transformers.PreTrainedModel.save_pretrained` and :func:`~transformers.PreTrainedModel.from_pretrained` is not a simpler option.
+
+            cache_dir: (`optional`) string:
+                Path to a directory in which a downloaded pre-trained model
+                configuration should be cached if the standard cache should not be used.
+
+            force_download: (`optional`) boolean, default False:
+                Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
+
+            resume_download: (`optional`) boolean, default False:
+                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
+
+            proxies: (`optional`) dict, default None:
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
+                The proxies are used on each request.
+
+            output_loading_info: (`optional`) boolean:
+                Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
+
+            kwargs: (`optional`) Remaining dictionary of keyword arguments:
+                Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attention=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
+
+                - If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
+                - If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~transformers.PretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
+
+        Examples::
+
+            # For example purposes. Not runnable.
+            model = BertModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
+            model = BertModel.from_pretrained('./test/saved_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            model = BertModel.from_pretrained('bert-base-uncased', output_attention=True)  # Update configuration during loading
+            assert model.config.output_attention == True
+            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json')
+            model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config)
+
+        """
+        config = kwargs.pop("config", None)
+        state_dict = kwargs.pop("state_dict", None)
+        cache_dir = kwargs.pop("cache_dir", None)
+        from_tf = kwargs.pop("from_tf", False)
+        force_download = kwargs.pop("force_download", False)
+        resume_download = kwargs.pop("resume_download", False)
+        proxies = kwargs.pop("proxies", None)
+        output_loading_info = kwargs.pop("output_loading_info", False)
+        local_files_only = kwargs.pop("local_files_only", False)
+        use_cdn = kwargs.pop("use_cdn", True)
+
+        # Load config if we don't provide a configuration
+        if not isinstance(config, PretrainedConfig):
+            config_path = config if config is not None else pretrained_model_name_or_path
+            config, model_kwargs = cls.config_class.from_pretrained(
+                config_path,
+                *model_args,
+                cache_dir=cache_dir,
+                return_unused_kwargs=True,
+                force_download=force_download,
+                resume_download=resume_download,
+                proxies=proxies,
+                local_files_only=local_files_only,
+                **kwargs,
+            )
+        else:
+            model_kwargs = kwargs
+
+        # Load model
+        if pretrained_model_name_or_path is not None:
+            if os.path.isdir(pretrained_model_name_or_path):
+                if from_tf and os.path.isfile(os.path.join(pretrained_model_name_or_path, TF_WEIGHTS_NAME + ".index")):
+                    # Load from a TF 1.0 checkpoint
+                    archive_file = os.path.join(pretrained_model_name_or_path, TF_WEIGHTS_NAME + ".index")
+                elif from_tf and os.path.isfile(os.path.join(pretrained_model_name_or_path, TF2_WEIGHTS_NAME)):
+                    # Load from a TF 2.0 checkpoint
+                    archive_file = os.path.join(pretrained_model_name_or_path, TF2_WEIGHTS_NAME)
+                elif os.path.isfile(os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)):
+                    # Load from a PyTorch checkpoint
+                    archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
+                else:
+                    raise EnvironmentError(
+                        "Error no file named {} found in directory {} or `from_tf` set to False".format(
+                            [WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"],
+                            pretrained_model_name_or_path,
+                        )
+                    )
+            elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
+                archive_file = pretrained_model_name_or_path
+            elif os.path.isfile(pretrained_model_name_or_path + ".index"):
+                assert (
+                    from_tf
+                ), "We found a TensorFlow checkpoint at {}, please set from_tf to True to load from this checkpoint".format(
+                    pretrained_model_name_or_path + ".index"
+                )
+                archive_file = pretrained_model_name_or_path + ".index"
+            else:
+                archive_file = hf_bucket_url(
+                    pretrained_model_name_or_path,
+                    filename=(TF2_WEIGHTS_NAME if from_tf else WEIGHTS_NAME),
+                    use_cdn=use_cdn,
+                )
+                # pytorch_model.bin
+                # https://cdn.huggingface.co/bert-base-uncased-pytorch_model.bin
+            try:
+                # Load from URL or cache if already cached
+                resolved_archive_file = cached_path(
+                    archive_file,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    local_files_only=local_files_only,
+                )
+                if resolved_archive_file is None:
+                    raise EnvironmentError
+            except EnvironmentError:
+                msg = (
+                    f"Can't load weights for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
+                    f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
+                    f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a file named one of {WEIGHTS_NAME}, {TF2_WEIGHTS_NAME}, {TF_WEIGHTS_NAME}.\n\n"
+                )
+                raise EnvironmentError(msg)
+
+            if resolved_archive_file == archive_file:
+                logger.info("loading weights file {}".format(archive_file))
+            else:
+                logger.info("loading weights file {} from cache at {}".format(archive_file, resolved_archive_file))
+        else:
+            resolved_archive_file = None
+
+        # Instantiate model.
+        model = cls(config, *model_args, **model_kwargs)
+
+        if state_dict is None and not from_tf:
+            try:
+                state_dict = torch.load(resolved_archive_file, map_location="cpu")
+            except Exception:
+                raise OSError(
+                    "Unable to load weights from pytorch checkpoint file. "
+                    "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True. "
+                )
+
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+
+        if from_tf:
+            if resolved_archive_file.endswith(".index"):
+                # Load from a TensorFlow 1.X checkpoint - provided by original authors
+                model = cls.load_tf_weights(model, config, resolved_archive_file[:-6])  # Remove the '.index'
+            else:
+                # Load from our TensorFlow 2.0 checkpoints
+                try:
+                    from transformers import load_tf2_checkpoint_in_pytorch_model
+
+                    model = load_tf2_checkpoint_in_pytorch_model(model, resolved_archive_file, allow_missing_keys=True)
+                except ImportError:
+                    logger.error(
+                        "Loading a TensorFlow model in PyTorch, requires both PyTorch and TensorFlow to be installed. Please see "
+                        "https://pytorch.org/ and https://www.tensorflow.org/install/ for installation instructions."
+                    )
+                    raise
+        else:
+            # Convert old format to new format if needed from a PyTorch state_dict
+            old_keys = []
+            new_keys = []
+            for key in state_dict.keys():
+                new_key = None
+                if "gamma" in key:
+                    new_key = key.replace("gamma", "weight")
+                if "beta" in key:
+                    new_key = key.replace("beta", "bias")
+                if new_key:
+                    old_keys.append(key)
+                    new_keys.append(new_key)
+            for old_key, new_key in zip(old_keys, new_keys):
+                state_dict[new_key] = state_dict.pop(old_key)
+
+            # copy state_dict so _load_from_state_dict can modify it
+            metadata = getattr(state_dict, "_metadata", None)
+            state_dict = state_dict.copy()
+            if metadata is not None:
+                state_dict._metadata = metadata
+
+            ##############################################################################################
+            # Print out state_dict's contents: keys
+            '''
+            for key, _ in state_dict.items():
+                print(key)
+            '''
+            ##############################################################################################
+
+
+            # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants
+            # so we need to apply the function recursively.
+            def load(module: nn.Module, prefix=""):
+                local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+                module._load_from_state_dict(
+                    state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs,
+                )
+                for name, child in module._modules.items():
+                    if child is not None:
+                        load(child, prefix + name + ".")
+
+            # Make sure we are able to load base models as well as derived models (with heads)
+            start_prefix = ""
+            model_to_load = model
+            has_prefix_module = any(s.startswith(cls.base_model_prefix) for s in state_dict.keys())
+            if not hasattr(model, cls.base_model_prefix) and has_prefix_module:
+                start_prefix = cls.base_model_prefix + "."
+            if hasattr(model, cls.base_model_prefix) and not has_prefix_module:
+                model_to_load = getattr(model, cls.base_model_prefix)
+
+            load(model_to_load, prefix=start_prefix)
+
+            if model.__class__.__name__ != model_to_load.__class__.__name__:
+                base_model_state_dict = model_to_load.state_dict().keys()
+                head_model_state_dict_without_base_prefix = [
+                    key.split(cls.base_model_prefix + ".")[-1] for key in model.state_dict().keys()
+                ]
+
+                missing_keys.extend(head_model_state_dict_without_base_prefix - base_model_state_dict)
+
+            if len(unexpected_keys) > 0:
+                logger.warning(
+                    f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when "
+                    f"initializing {model.__class__.__name__}: {unexpected_keys}\n"
+                    f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task "
+                    f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n"
+                    f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect "
+                    f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)."
+                )
+            else:
+                logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n")
+            if len(missing_keys) > 0:
+                logger.warning(
+                    f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {pretrained_model_name_or_path} "
+                    f"and are newly initialized: {missing_keys}\n"
+                    f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference."
+                )
+            else:
+                logger.info(
+                    f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {pretrained_model_name_or_path}.\n"
+                    f"If your task is similar to the task the model of the ckeckpoint was trained on, "
+                    f"you can already use {model.__class__.__name__} for predictions without further training."
+                )
+            if len(error_msgs) > 0:
+                raise RuntimeError(
+                    "Error(s) in loading state_dict for {}:\n\t{}".format(
+                        model.__class__.__name__, "\n\t".join(error_msgs)
+                    )
+                )
+        model.tie_weights()  # make sure token embedding weights are still tied if needed
+
+        # Set model in evaluation mode to deactivate DropOut modules by default
+        model.eval()
+
+        if output_loading_info:
+            loading_info = {
+                "missing_keys": missing_keys,
+                "unexpected_keys": unexpected_keys,
+                "error_msgs": error_msgs,
+            }
+            return model, loading_info
+
+        if hasattr(config, "xla_device") and config.xla_device:
+            import torch_xla.core.xla_model as xm
+
+            model = xm.send_cpu_data_to_device(model, xm.xla_device())
+            model.to(xm.xla_device())
+
+        return model
+
+
+class Conv1D(nn.Module):
+    def __init__(self, nf, nx):
+        """ Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
+            Basically works like a Linear layer but the weights are transposed
+        """
+        super().__init__()
+        self.nf = nf
+        w = torch.empty(nx, nf)
+        nn.init.normal_(w, std=0.02)
+        self.weight = nn.Parameter(w)
+        self.bias = nn.Parameter(torch.zeros(nf))
+
+    def forward(self, x):
+        size_out = x.size()[:-1] + (self.nf,)
+        x = torch.addmm(self.bias, x.view(-1, x.size(-1)), self.weight)
+        x = x.view(*size_out)
+        return x
+
+
+class PoolerStartLogits(nn.Module):
+    """ Compute SQuAD start_logits from sequence hidden states. """
+
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, 1)
+
+    def forward(self, hidden_states, p_mask=None):
+        """ Args:
+            **p_mask**: (`optional`) ``torch.FloatTensor`` of shape `(batch_size, seq_len)`
+                invalid position mask such as query and special symbols (PAD, SEP, CLS)
+                1.0 means token should be masked.
+        """
+        x = self.dense(hidden_states).squeeze(-1)
+
+        if p_mask is not None:
+            if next(self.parameters()).dtype == torch.float16:
+                x = x * (1 - p_mask) - 65500 * p_mask
+            else:
+                x = x * (1 - p_mask) - 1e30 * p_mask
+
+        return x
+
+
+class PoolerEndLogits(nn.Module):
+    """ Compute SQuAD end_logits from sequence hidden states and start token hidden state.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
+        self.activation = nn.Tanh()
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dense_1 = nn.Linear(config.hidden_size, 1)
+
+    def forward(self, hidden_states, start_states=None, start_positions=None, p_mask=None):
+        """ Args:
+            One of ``start_states``, ``start_positions`` should be not None.
+            If both are set, ``start_positions`` overrides ``start_states``.
+
+            **start_states**: ``torch.LongTensor`` of shape identical to hidden_states
+                hidden states of the first tokens for the labeled span.
+            **start_positions**: ``torch.LongTensor`` of shape ``(batch_size,)``
+                position of the first token for the labeled span:
+            **p_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, seq_len)``
+                Mask of invalid position such as query and special symbols (PAD, SEP, CLS)
+                1.0 means token should be masked.
+        """
+        assert (
+            start_states is not None or start_positions is not None
+        ), "One of start_states, start_positions should be not None"
+        if start_positions is not None:
+            slen, hsz = hidden_states.shape[-2:]
+            start_positions = start_positions[:, None, None].expand(-1, -1, hsz)  # shape (bsz, 1, hsz)
+            start_states = hidden_states.gather(-2, start_positions)  # shape (bsz, 1, hsz)
+            start_states = start_states.expand(-1, slen, -1)  # shape (bsz, slen, hsz)
+
+        x = self.dense_0(torch.cat([hidden_states, start_states], dim=-1))
+        x = self.activation(x)
+        x = self.LayerNorm(x)
+        x = self.dense_1(x).squeeze(-1)
+
+        if p_mask is not None:
+            if next(self.parameters()).dtype == torch.float16:
+                x = x * (1 - p_mask) - 65500 * p_mask
+            else:
+                x = x * (1 - p_mask) - 1e30 * p_mask
+
+        return x
+
+
+class PoolerAnswerClass(nn.Module):
+    """ Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """
+
+    def __init__(self, config):
+        super().__init__()
+        self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
+        self.activation = nn.Tanh()
+        self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
+
+    def forward(self, hidden_states, start_states=None, start_positions=None, cls_index=None):
+        """
+        Args:
+            One of ``start_states``, ``start_positions`` should be not None.
+            If both are set, ``start_positions`` overrides ``start_states``.
+
+            **start_states**: ``torch.LongTensor`` of shape identical to ``hidden_states``.
+                hidden states of the first tokens for the labeled span.
+            **start_positions**: ``torch.LongTensor`` of shape ``(batch_size,)``
+                position of the first token for the labeled span.
+            **cls_index**: torch.LongTensor of shape ``(batch_size,)``
+                position of the CLS token. If None, take the last token.
+
+            note(Original repo):
+                no dependency on end_feature so that we can obtain one single `cls_logits`
+                for each sample
+        """
+        hsz = hidden_states.shape[-1]
+        assert (
+            start_states is not None or start_positions is not None
+        ), "One of start_states, start_positions should be not None"
+        if start_positions is not None:
+            start_positions = start_positions[:, None, None].expand(-1, -1, hsz)  # shape (bsz, 1, hsz)
+            start_states = hidden_states.gather(-2, start_positions).squeeze(-2)  # shape (bsz, hsz)
+
+        if cls_index is not None:
+            cls_index = cls_index[:, None, None].expand(-1, -1, hsz)  # shape (bsz, 1, hsz)
+            cls_token_state = hidden_states.gather(-2, cls_index).squeeze(-2)  # shape (bsz, hsz)
+        else:
+            cls_token_state = hidden_states[:, -1, :]  # shape (bsz, hsz)
+
+        x = self.dense_0(torch.cat([start_states, cls_token_state], dim=-1))
+        x = self.activation(x)
+        x = self.dense_1(x).squeeze(-1)
+
+        return x
+
+
+class SQuADHead(nn.Module):
+    r""" A SQuAD head inspired by XLNet.
+
+    Parameters:
+        config (:class:`~transformers.XLNetConfig`): Model configuration class with all the parameters of the model.
+
+    Inputs:
+        **hidden_states**: ``torch.FloatTensor`` of shape ``(batch_size, seq_len, hidden_size)``
+            hidden states of sequence tokens
+        **start_positions**: ``torch.LongTensor`` of shape ``(batch_size,)``
+            position of the first token for the labeled span.
+        **end_positions**: ``torch.LongTensor`` of shape ``(batch_size,)``
+            position of the last token for the labeled span.
+        **cls_index**: torch.LongTensor of shape ``(batch_size,)``
+            position of the CLS token. If None, take the last token.
+        **is_impossible**: ``torch.LongTensor`` of shape ``(batch_size,)``
+            Whether the question has a possible answer in the paragraph or not.
+        **p_mask**: (`optional`) ``torch.FloatTensor`` of shape ``(batch_size, seq_len)``
+            Mask of invalid position such as query and special symbols (PAD, SEP, CLS)
+            1.0 means token should be masked.
+
+    Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
+        **loss**: (`optional`, returned if both ``start_positions`` and ``end_positions`` are provided) ``torch.FloatTensor`` of shape ``(1,)``:
+            Classification loss as the sum of start token, end token (and is_impossible if provided) classification losses.
+        **start_top_log_probs**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
+            ``torch.FloatTensor`` of shape ``(batch_size, config.start_n_top)``
+            Log probabilities for the top config.start_n_top start token possibilities (beam-search).
+        **start_top_index**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
+            ``torch.LongTensor`` of shape ``(batch_size, config.start_n_top)``
+            Indices for the top config.start_n_top start token possibilities (beam-search).
+        **end_top_log_probs**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
+            ``torch.FloatTensor`` of shape ``(batch_size, config.start_n_top * config.end_n_top)``
+            Log probabilities for the top ``config.start_n_top * config.end_n_top`` end token possibilities (beam-search).
+        **end_top_index**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
+            ``torch.LongTensor`` of shape ``(batch_size, config.start_n_top * config.end_n_top)``
+            Indices for the top ``config.start_n_top * config.end_n_top`` end token possibilities (beam-search).
+        **cls_logits**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
+            ``torch.FloatTensor`` of shape ``(batch_size,)``
+            Log probabilities for the ``is_impossible`` label of the answers.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        self.start_n_top = config.start_n_top
+        self.end_n_top = config.end_n_top
+
+        self.start_logits = PoolerStartLogits(config)
+        self.end_logits = PoolerEndLogits(config)
+        self.answer_class = PoolerAnswerClass(config)
+
+    def forward(
+        self, hidden_states, start_positions=None, end_positions=None, cls_index=None, is_impossible=None, p_mask=None,
+    ):
+        outputs = ()
+
+        start_logits = self.start_logits(hidden_states, p_mask=p_mask)
+
+        if start_positions is not None and end_positions is not None:
+            # If we are on multi-GPU, let's remove the dimension added by batch splitting
+            for x in (start_positions, end_positions, cls_index, is_impossible):
+                if x is not None and x.dim() > 1:
+                    x.squeeze_(-1)
+
+            # during training, compute the end logits based on the ground truth of the start position
+            end_logits = self.end_logits(hidden_states, start_positions=start_positions, p_mask=p_mask)
+
+            loss_fct = CrossEntropyLoss()
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+            total_loss = (start_loss + end_loss) / 2
+
+            if cls_index is not None and is_impossible is not None:
+                # Predict answerability from the representation of CLS and START
+                cls_logits = self.answer_class(hidden_states, start_positions=start_positions, cls_index=cls_index)
+                loss_fct_cls = nn.BCEWithLogitsLoss()
+                cls_loss = loss_fct_cls(cls_logits, is_impossible)
+
+                # note(zhiliny): by default multiply the loss by 0.5 so that the scale is comparable to start_loss and end_loss
+                total_loss += cls_loss * 0.5
+
+            outputs = (total_loss,) + outputs
+
+        else:
+            # during inference, compute the end logits based on beam search
+            bsz, slen, hsz = hidden_states.size()
+            start_log_probs = F.softmax(start_logits, dim=-1)  # shape (bsz, slen)
+
+            start_top_log_probs, start_top_index = torch.topk(
+                start_log_probs, self.start_n_top, dim=-1
+            )  # shape (bsz, start_n_top)
+            start_top_index_exp = start_top_index.unsqueeze(-1).expand(-1, -1, hsz)  # shape (bsz, start_n_top, hsz)
+            start_states = torch.gather(hidden_states, -2, start_top_index_exp)  # shape (bsz, start_n_top, hsz)
+            start_states = start_states.unsqueeze(1).expand(-1, slen, -1, -1)  # shape (bsz, slen, start_n_top, hsz)
+
+            hidden_states_expanded = hidden_states.unsqueeze(2).expand_as(
+                start_states
+            )  # shape (bsz, slen, start_n_top, hsz)
+            p_mask = p_mask.unsqueeze(-1) if p_mask is not None else None
+            end_logits = self.end_logits(hidden_states_expanded, start_states=start_states, p_mask=p_mask)
+            end_log_probs = F.softmax(end_logits, dim=1)  # shape (bsz, slen, start_n_top)
+
+            end_top_log_probs, end_top_index = torch.topk(
+                end_log_probs, self.end_n_top, dim=1
+            )  # shape (bsz, end_n_top, start_n_top)
+            end_top_log_probs = end_top_log_probs.view(-1, self.start_n_top * self.end_n_top)
+            end_top_index = end_top_index.view(-1, self.start_n_top * self.end_n_top)
+
+            start_states = torch.einsum("blh,bl->bh", hidden_states, start_log_probs)
+            cls_logits = self.answer_class(hidden_states, start_states=start_states, cls_index=cls_index)
+
+            outputs = (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits,) + outputs
+
+        # return start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits
+        # or (if labels are provided) (total_loss,)
+        return outputs
+
+
+class SequenceSummary(nn.Module):
+    r""" Compute a single vector summary of a sequence hidden states according to various possibilities:
+        Args of the config class:
+            summary_type:
+                - 'last' => [default] take the last token hidden state (like XLNet)
+                - 'first' => take the first token hidden state (like Bert)
+                - 'mean' => take the mean of all tokens hidden states
+                - 'cls_index' => supply a Tensor of classification token position (GPT/GPT-2)
+                - 'attn' => Not implemented now, use multi-head attention
+            summary_use_proj: Add a projection after the vector extraction
+            summary_proj_to_labels: If True, the projection outputs to config.num_labels classes (otherwise to hidden_size). Default: False.
+            summary_activation: 'tanh' or another string => add an activation to the output, Other => no activation. Default
+            summary_first_dropout: Add a dropout before the projection and activation
+            summary_last_dropout: Add a dropout after the projection and activation
+    """
+
+    def __init__(self, config: PretrainedConfig):
+        super().__init__()
+
+        self.summary_type = getattr(config, "summary_type", "last")
+        if self.summary_type == "attn":
+            # We should use a standard multi-head attention module with absolute positional embedding for that.
+            # Cf. https://github.com/zihangdai/xlnet/blob/master/modeling.py#L253-L276
+            # We can probably just use the multi-head attention module of PyTorch >=1.1.0
+            raise NotImplementedError
+
+        self.summary = Identity()
+        if hasattr(config, "summary_use_proj") and config.summary_use_proj:
+            if hasattr(config, "summary_proj_to_labels") and config.summary_proj_to_labels and config.num_labels > 0:
+                num_classes = config.num_labels
+            else:
+                num_classes = config.hidden_size
+            self.summary = nn.Linear(config.hidden_size, num_classes)
+
+        activation_string = getattr(config, "summary_activation", None)
+        self.activation: Callable = (get_activation(activation_string) if activation_string else Identity())
+
+        self.first_dropout = Identity()
+        if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0:
+            self.first_dropout = nn.Dropout(config.summary_first_dropout)
+
+        self.last_dropout = Identity()
+        if hasattr(config, "summary_last_dropout") and config.summary_last_dropout > 0:
+            self.last_dropout = nn.Dropout(config.summary_last_dropout)
+
+    def forward(self, hidden_states, cls_index=None):
+        """ hidden_states: float Tensor in shape [bsz, ..., seq_len, hidden_size], the hidden-states of the last layer.
+            cls_index: [optional] position of the classification token if summary_type == 'cls_index',
+                shape (bsz,) or more generally (bsz, ...) where ... are optional leading dimensions of hidden_states.
+                if summary_type == 'cls_index' and cls_index is None:
+                    we take the last token of the sequence as classification token
+        """
+        if self.summary_type == "last":
+            output = hidden_states[:, -1]
+        elif self.summary_type == "first":
+            output = hidden_states[:, 0]
+        elif self.summary_type == "mean":
+            output = hidden_states.mean(dim=1)
+        elif self.summary_type == "cls_index":
+            if cls_index is None:
+                cls_index = torch.full_like(hidden_states[..., :1, :], hidden_states.shape[-2] - 1, dtype=torch.long,)
+            else:
+                cls_index = cls_index.unsqueeze(-1).unsqueeze(-1)
+                cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),))
+            # shape of cls_index: (bsz, XX, 1, hidden_size) where XX are optional leading dim of hidden_states
+            output = hidden_states.gather(-2, cls_index).squeeze(-2)  # shape (bsz, XX, hidden_size)
+        elif self.summary_type == "attn":
+            raise NotImplementedError
+
+        output = self.first_dropout(output)
+        output = self.summary(output)
+        output = self.activation(output)
+        output = self.last_dropout(output)
+
+        return output
+
+
+def prune_linear_layer(layer, index, dim=0):
+    """ Prune a linear layer (a model parameters) to keep only entries in index.
+        Return the pruned layer as a new layer with requires_grad=True.
+        Used to remove heads.
+    """
+    index = index.to(layer.weight.device)
+    W = layer.weight.index_select(dim, index).clone().detach()
+    if layer.bias is not None:
+        if dim == 1:
+            b = layer.bias.clone().detach()
+        else:
+            b = layer.bias[index].clone().detach()
+    new_size = list(layer.weight.size())
+    new_size[dim] = len(index)
+    new_layer = nn.Linear(new_size[1], new_size[0], bias=layer.bias is not None).to(layer.weight.device)
+    new_layer.weight.requires_grad = False
+    new_layer.weight.copy_(W.contiguous())
+    new_layer.weight.requires_grad = True
+    if layer.bias is not None:
+        new_layer.bias.requires_grad = False
+        new_layer.bias.copy_(b.contiguous())
+        new_layer.bias.requires_grad = True
+    return new_layer
+
+
+def prune_conv1d_layer(layer, index, dim=1):
+    """ Prune a Conv1D layer (a model parameters) to keep only entries in index.
+        A Conv1D work as a Linear layer (see e.g. BERT) but the weights are transposed.
+        Return the pruned layer as a new layer with requires_grad=True.
+        Used to remove heads.
+    """
+    index = index.to(layer.weight.device)
+    W = layer.weight.index_select(dim, index).clone().detach()
+    if dim == 0:
+        b = layer.bias.clone().detach()
+    else:
+        b = layer.bias[index].clone().detach()
+    new_size = list(layer.weight.size())
+    new_size[dim] = len(index)
+    new_layer = Conv1D(new_size[1], new_size[0]).to(layer.weight.device)
+    new_layer.weight.requires_grad = False
+    new_layer.weight.copy_(W.contiguous())
+    new_layer.weight.requires_grad = True
+    new_layer.bias.requires_grad = False
+    new_layer.bias.copy_(b.contiguous())
+    new_layer.bias.requires_grad = True
+    return new_layer
+
+
+def prune_layer(layer, index, dim=None):
+    """ Prune a Conv1D or nn.Linear layer (a model parameters) to keep only entries in index.
+        Return the pruned layer as a new layer with requires_grad=True.
+        Used to remove heads.
+    """
+    if isinstance(layer, nn.Linear):
+        return prune_linear_layer(layer, index, dim=0 if dim is None else dim)
+    elif isinstance(layer, Conv1D):
+        return prune_conv1d_layer(layer, index, dim=1 if dim is None else dim)
+    else:
+        raise ValueError("Can't prune layer of class {}".format(layer.__class__))
+
+
+def apply_chunking_to_forward(
+    chunk_size: int, chunk_dim: int, forward_fn: Callable[..., torch.Tensor], *input_tensors
+) -> torch.Tensor:
+    """
+    This function chunks the `input_tensors` into smaller input tensor parts of size `chunk_size` over the dimension `chunk_dim`.
+    It then applies a layer `forward_fn` to each chunk independently to save memory.
+    If the `forward_fn` is independent across the `chunk_dim` this function will yield the
+    same result as not applying it.
+
+    Args:
+        chunk_size: int - the chunk size of a chunked tensor. `num_chunks` = `len(input_tensors[0]) / chunk_size`
+        chunk_dim: int - the dimension over which the input_tensors should be chunked
+        forward_fn: fn - the forward fn of the model
+        input_tensors: tuple(torch.Tensor) - the input tensors of `forward_fn` which are chunked
+    Returns:
+        a Tensor with the same shape the foward_fn would have given if applied
+
+
+    Examples::
+
+        # rename the usual forward() fn to forward_chunk()
+        def forward_chunk(self, hidden_states):
+            hidden_states = self.decoder(hidden_states)
+            return hidden_states
+
+        # implement a chunked forward function
+        def forward(self, hidden_states):
+            return apply_chunking_to_forward(self.chunk_size_lm_head, self.seq_len_dim, self.forward_chunk, hidden_states)
+    """
+
+    assert len(input_tensors) > 0, "{} has to be a tuple/list of tensors".format(input_tensors)
+    tensor_shape = input_tensors[0].shape
+    assert all(
+        input_tensor.shape == tensor_shape for input_tensor in input_tensors
+    ), "All input tenors have to be of the same shape"
+
+    # inspect.signature exist since python 3.5 and is a python method -> no problem with backward compability
+    num_args_in_forward_chunk_fn = len(inspect.signature(forward_fn).parameters)
+    assert num_args_in_forward_chunk_fn == len(
+        input_tensors
+    ), "forward_chunk_fn expects {} arguments, but only {} input tensors are given".format(
+        num_args_in_forward_chunk_fn, len(input_tensors)
+    )
+
+    if chunk_size > 0:
+        assert (
+            input_tensors[0].shape[chunk_dim] % chunk_size == 0
+        ), "The dimension to be chunked {} has to be a multiple of the chunk size {}".format(
+            input_tensors[0].shape[chunk_dim], chunk_size
+        )
+
+        num_chunks = input_tensors[0].shape[chunk_dim] // chunk_size
+
+        # chunk input tensor into tuples
+        input_tensors_chunks = tuple(input_tensor.chunk(num_chunks, dim=chunk_dim) for input_tensor in input_tensors)
+        # apply forward fn to every tuple
+        output_chunks = tuple(forward_fn(*input_tensors_chunk) for input_tensors_chunk in zip(*input_tensors_chunks))
+        # concatenate output at same dimension
+        return torch.cat(output_chunks, dim=chunk_dim)
+
+    return forward_fn(*input_tensors)
diff --git a/bert/tokenization_bert.py b/bert/tokenization_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..972e1733163522359750dddedf6dea885085b2ca
--- /dev/null
+++ b/bert/tokenization_bert.py
@@ -0,0 +1,545 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+
+import collections
+import logging
+import os
+import unicodedata
+from typing import List, Optional
+
+from .tokenization_utils import PreTrainedTokenizer, _is_control, _is_punctuation, _is_whitespace
+
+
+logger = logging.getLogger(__name__)
+
+VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
+
+PRETRAINED_VOCAB_FILES_MAP = {
+    "vocab_file": {
+        "bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+        "bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+        "bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+        "bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+        "bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+        "bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+        "bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+        "bert-base-german-cased": "https://int-deepset-models-bert.s3.eu-central-1.amazonaws.com/pytorch/bert-base-german-cased-vocab.txt",
+        "bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-vocab.txt",
+        "bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-vocab.txt",
+        "bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-vocab.txt",
+        "bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-vocab.txt",
+        "bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-vocab.txt",
+        "bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-vocab.txt",
+        "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt",
+        "TurkuNLP/bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt",
+        "TurkuNLP/bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt",
+        "wietsedv/bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/vocab.txt",
+    }
+}
+
+PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
+    "bert-base-uncased": 512,
+    "bert-large-uncased": 512,
+    "bert-base-cased": 512,
+    "bert-large-cased": 512,
+    "bert-base-multilingual-uncased": 512,
+    "bert-base-multilingual-cased": 512,
+    "bert-base-chinese": 512,
+    "bert-base-german-cased": 512,
+    "bert-large-uncased-whole-word-masking": 512,
+    "bert-large-cased-whole-word-masking": 512,
+    "bert-large-uncased-whole-word-masking-finetuned-squad": 512,
+    "bert-large-cased-whole-word-masking-finetuned-squad": 512,
+    "bert-base-cased-finetuned-mrpc": 512,
+    "bert-base-german-dbmdz-cased": 512,
+    "bert-base-german-dbmdz-uncased": 512,
+    "TurkuNLP/bert-base-finnish-cased-v1": 512,
+    "TurkuNLP/bert-base-finnish-uncased-v1": 512,
+    "wietsedv/bert-base-dutch-cased": 512,
+}
+
+PRETRAINED_INIT_CONFIGURATION = {
+    "bert-base-uncased": {"do_lower_case": True},
+    "bert-large-uncased": {"do_lower_case": True},
+    "bert-base-cased": {"do_lower_case": False},
+    "bert-large-cased": {"do_lower_case": False},
+    "bert-base-multilingual-uncased": {"do_lower_case": True},
+    "bert-base-multilingual-cased": {"do_lower_case": False},
+    "bert-base-chinese": {"do_lower_case": False},
+    "bert-base-german-cased": {"do_lower_case": False},
+    "bert-large-uncased-whole-word-masking": {"do_lower_case": True},
+    "bert-large-cased-whole-word-masking": {"do_lower_case": False},
+    "bert-large-uncased-whole-word-masking-finetuned-squad": {"do_lower_case": True},
+    "bert-large-cased-whole-word-masking-finetuned-squad": {"do_lower_case": False},
+    "bert-base-cased-finetuned-mrpc": {"do_lower_case": False},
+    "bert-base-german-dbmdz-cased": {"do_lower_case": False},
+    "bert-base-german-dbmdz-uncased": {"do_lower_case": True},
+    "TurkuNLP/bert-base-finnish-cased-v1": {"do_lower_case": False},
+    "TurkuNLP/bert-base-finnish-uncased-v1": {"do_lower_case": True},
+    "wietsedv/bert-base-dutch-cased": {"do_lower_case": False},
+}
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        tokens = reader.readlines()
+    for index, token in enumerate(tokens):
+        token = token.rstrip("\n")
+        vocab[token] = index
+    return vocab
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a piece of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(PreTrainedTokenizer):
+    r"""
+    Constructs a BERT tokenizer. Based on WordPiece.
+
+    This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
+    should refer to the superclass for more information regarding methods.
+
+    Args:
+        vocab_file (:obj:`string`):
+            File containing the vocabulary.
+        do_lower_case (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            Whether to lowercase the input when tokenizing.
+        do_basic_tokenize (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            Whether to do basic tokenization before WordPiece.
+        never_split (:obj:`Iterable`, `optional`, defaults to :obj:`None`):
+            Collection of tokens which will never be split during tokenization. Only has an effect when
+            :obj:`do_basic_tokenize=True`
+        unk_token (:obj:`string`, `optional`, defaults to "[UNK]"):
+            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
+            token instead.
+        sep_token (:obj:`string`, `optional`, defaults to "[SEP]"):
+            The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
+            for sequence classification or for a text and a question for question answering.
+            It is also used as the last token of a sequence built with special tokens.
+        pad_token (:obj:`string`, `optional`, defaults to "[PAD]"):
+            The token used for padding, for example when batching sequences of different lengths.
+        cls_token (:obj:`string`, `optional`, defaults to "[CLS]"):
+            The classifier token which is used when doing sequence classification (classification of the whole
+            sequence instead of per-token classification). It is the first token of the sequence when built with
+            special tokens.
+        mask_token (:obj:`string`, `optional`, defaults to "[MASK]"):
+            The token used for masking values. This is the token used when training this model with masked language
+            modeling. This is the token which the model will try to predict.
+        tokenize_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            Whether to tokenize Chinese characters.
+            This should likely be deactivated for Japanese:
+            see: https://github.com/huggingface/transformers/issues/328
+    """
+
+    vocab_files_names = VOCAB_FILES_NAMES
+    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
+    max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
+
+    def __init__(
+        self,
+        vocab_file,
+        do_lower_case=True,
+        do_basic_tokenize=True,
+        never_split=None,
+        unk_token="[UNK]",
+        sep_token="[SEP]",
+        pad_token="[PAD]",
+        cls_token="[CLS]",
+        mask_token="[MASK]",
+        tokenize_chinese_chars=True,
+        **kwargs
+    ):
+        super().__init__(
+            unk_token=unk_token,
+            sep_token=sep_token,
+            pad_token=pad_token,
+            cls_token=cls_token,
+            mask_token=mask_token,
+            **kwargs,
+        )
+
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
+            )
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
+        self.do_basic_tokenize = do_basic_tokenize
+        if do_basic_tokenize:
+            self.basic_tokenizer = BasicTokenizer(
+                do_lower_case=do_lower_case, never_split=never_split, tokenize_chinese_chars=tokenize_chinese_chars
+            )
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab, unk_token=self.unk_token)
+
+    @property
+    def vocab_size(self):
+        return len(self.vocab)
+
+    def get_vocab(self):
+        return dict(self.vocab, **self.added_tokens_encoder)
+
+    def _tokenize(self, text):
+        split_tokens = []
+        if self.do_basic_tokenize:
+            for token in self.basic_tokenizer.tokenize(text, never_split=self.all_special_tokens):
+
+                # If the token is part of the never_split set
+                if token in self.basic_tokenizer.never_split:
+                    split_tokens.append(token)
+                else:
+                    split_tokens += self.wordpiece_tokenizer.tokenize(token)
+        else:
+            split_tokens = self.wordpiece_tokenizer.tokenize(text)
+        return split_tokens
+
+    def _convert_token_to_id(self, token):
+        """ Converts a token (str) in an id using the vocab. """
+        return self.vocab.get(token, self.vocab.get(self.unk_token))
+
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        return self.ids_to_tokens.get(index, self.unk_token)
+
+    def convert_tokens_to_string(self, tokens):
+        """ Converts a sequence of tokens (string) in a single string. """
+        out_string = " ".join(tokens).replace(" ##", "").strip()
+        return out_string
+
+    def build_inputs_with_special_tokens(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Build model inputs from a sequence or a pair of sequence for sequence classification tasks
+        by concatenating and adding special tokens.
+        A BERT sequence has the following format:
+
+        - single sequence: ``[CLS] X [SEP]``
+        - pair of sequences: ``[CLS] A [SEP] B [SEP]``
+
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of IDs to which the special tokens will be added
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+
+        Returns:
+            :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
+        """
+        if token_ids_1 is None:
+            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
+        cls = [self.cls_token_id]
+        sep = [self.sep_token_id]
+        return cls + token_ids_0 + sep + token_ids_1 + sep
+
+    def get_special_tokens_mask(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer ``prepare_for_model`` method.
+
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids.
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True if the token list is already formatted with special tokens for the model
+
+        Returns:
+            :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+
+        if already_has_special_tokens:
+            if token_ids_1 is not None:
+                raise ValueError(
+                    "You should not supply a second sequence if the provided sequence of "
+                    "ids is already formated with special tokens for the model."
+                )
+            return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
+
+        if token_ids_1 is not None:
+            return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
+        return [1] + ([0] * len(token_ids_0)) + [1]
+
+    def create_token_type_ids_from_sequences(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
+        A BERT sequence pair mask has the following format:
+
+        ::
+
+            0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
+            | first sequence    | second sequence |
+
+        if token_ids_1 is None, only returns the first portion of the mask (0's).
+
+        Args:
+            token_ids_0 (:obj:`List[int]`):
+                List of ids.
+            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second list of IDs for sequence pairs.
+
+        Returns:
+            :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
+            sequence(s).
+        """
+        sep = [self.sep_token_id]
+        cls = [self.cls_token_id]
+        if token_ids_1 is None:
+            return len(cls + token_ids_0 + sep) * [0]
+        return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
+
+    def save_vocabulary(self, vocab_path):
+        """
+        Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory.
+
+        Args:
+            vocab_path (:obj:`str`):
+                The directory in which to save the vocabulary.
+
+        Returns:
+            :obj:`Tuple(str)`: Paths to the files saved.
+        """
+        index = 0
+        if os.path.isdir(vocab_path):
+            vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"])
+        else:
+            vocab_file = vocab_path
+        with open(vocab_file, "w", encoding="utf-8") as writer:
+            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
+                if index != token_index:
+                    logger.warning(
+                        "Saving vocabulary to {}: vocabulary indices are not consecutive."
+                        " Please check that the vocabulary is not corrupted!".format(vocab_file)
+                    )
+                    index = token_index
+                writer.write(token + "\n")
+                index += 1
+        return (vocab_file,)
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True, never_split=None, tokenize_chinese_chars=True):
+        """ Constructs a BasicTokenizer.
+
+        Args:
+            **do_lower_case**: Whether to lower case the input.
+            **never_split**: (`optional`) list of str
+                Kept for backward compatibility purposes.
+                Now implemented directly at the base class level (see :func:`PreTrainedTokenizer.tokenize`)
+                List of token not to split.
+            **tokenize_chinese_chars**: (`optional`) boolean (default True)
+                Whether to tokenize Chinese characters.
+                This should likely be deactivated for Japanese:
+                see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
+        """
+        if never_split is None:
+            never_split = []
+        self.do_lower_case = do_lower_case
+        self.never_split = set(never_split)
+        self.tokenize_chinese_chars = tokenize_chinese_chars
+
+    def tokenize(self, text, never_split=None):
+        """ Basic Tokenization of a piece of text.
+            Split on "white spaces" only, for sub-word tokenization, see WordPieceTokenizer.
+
+        Args:
+            **never_split**: (`optional`) list of str
+                Kept for backward compatibility purposes.
+                Now implemented directly at the base class level (see :func:`PreTrainedTokenizer.tokenize`)
+                List of token not to split.
+        """
+        # union() returns a new set by concatenating the two sets.
+        never_split = self.never_split.union(set(never_split)) if never_split else self.never_split
+
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        if self.tokenize_chinese_chars:
+            text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token, never_split))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text, never_split=None):
+        """Splits punctuation on a piece of text."""
+        if never_split is not None and text in never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if (
+            (cp >= 0x4E00 and cp <= 0x9FFF)
+            or (cp >= 0x3400 and cp <= 0x4DBF)  #
+            or (cp >= 0x20000 and cp <= 0x2A6DF)  #
+            or (cp >= 0x2A700 and cp <= 0x2B73F)  #
+            or (cp >= 0x2B740 and cp <= 0x2B81F)  #
+            or (cp >= 0x2B820 and cp <= 0x2CEAF)  #
+            or (cp >= 0xF900 and cp <= 0xFAFF)
+            or (cp >= 0x2F800 and cp <= 0x2FA1F)  #
+        ):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xFFFD or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token, max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
diff --git a/bert/tokenization_utils.py b/bert/tokenization_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d878210f407d8fb10226d9e4435c761a1f7483fc
--- /dev/null
+++ b/bert/tokenization_utils.py
@@ -0,0 +1,723 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Tokenization classes for python tokenizers.
+    For fast tokenizers (provided by HuggingFace's tokenizers library) see tokenization_utils_fast.py
+"""
+
+import itertools
+import logging
+import re
+import unicodedata
+from typing import Dict, List, Optional, Tuple, Union
+
+from .file_utils import add_end_docstrings
+from .tokenization_utils_base import (
+    ENCODE_KWARGS_DOCSTRING,
+    ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING,
+    AddedToken,
+    BatchEncoding,
+    EncodedInput,
+    EncodedInputPair,
+    PaddingStrategy,
+    PreTokenizedInput,
+    PreTokenizedInputPair,
+    PreTrainedTokenizerBase,
+    TensorType,
+    TextInput,
+    TextInputPair,
+    TruncationStrategy,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if (cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
+
+
+def _is_end_of_word(text):
+    """Checks whether the last character in text is one of a punctuation, control or whitespace character."""
+    last_char = text[-1]
+    return bool(_is_control(last_char) | _is_punctuation(last_char) | _is_whitespace(last_char))
+
+
+def _is_start_of_word(text):
+    """Checks whether the first character in text is one of a punctuation, control or whitespace character."""
+    first_char = text[0]
+    return bool(_is_control(first_char) | _is_punctuation(first_char) | _is_whitespace(first_char))
+
+
+class PreTrainedTokenizer(PreTrainedTokenizerBase):
+    """ Base class for all slow tokenizers.
+
+    Handle all the shared methods for tokenization and special tokens as well as methods
+    downloading/caching/loading pretrained tokenizers as well as adding tokens to the vocabulary.
+
+    This class also contain the added tokens in a unified way on top of all tokenizers so we don't
+    have to handle the specific vocabulary augmentation methods of the various underlying
+    dictionary structures (BPE, sentencepiece...).
+
+    Class attributes (overridden by derived classes):
+
+    - ``vocab_files_names``: a python ``dict`` with, as keys, the ``__init__`` keyword name of each vocabulary file
+      required by the model, and as associated values, the filename for saving the associated file (string).
+    - ``pretrained_vocab_files_map``: a python ``dict of dict`` the high-level keys
+      being the ``__init__`` keyword name of each vocabulary file required by the model, the low-level being the
+      `short-cut-names` (string) of the pretrained models with, as associated values, the `url` (string) to the
+      associated pretrained vocabulary file.
+    - ``max_model_input_sizes``: a python ``dict`` with, as keys, the `short-cut-names` (string) of the pretrained
+      models, and as associated values, the maximum length of the sequence inputs of this model, or None if the
+      model has no maximum input size.
+    - ``pretrained_init_configuration``: a python ``dict`` with, as keys, the `short-cut-names` (string) of the
+      pretrained models, and as associated values, a dictionnary of specific arguments to pass to the
+      ``__init__``method of the tokenizer class for this pretrained model when loading the tokenizer with the
+      ``from_pretrained()`` method.
+
+    Args:
+        - ``model_max_length``: (`Optional`) int: the maximum length in number of tokens for the inputs to the transformer model.
+            When the tokenizer is loaded with `from_pretrained`, this will be set to the value stored for the associated
+            model in ``max_model_input_sizes`` (see above). If no value is provided, will default to VERY_LARGE_INTEGER (`int(1e30)`).
+            no associated max_length can be found in ``max_model_input_sizes``.
+        - ``padding_side``: (`Optional`) string: the side on which the model should have padding applied.
+            Should be selected between ['right', 'left']
+        - ``model_input_names``: (`Optional`) List[string]: the list of the forward pass inputs accepted by the
+            model ("token_type_ids", "attention_mask"...).
+        - ``bos_token``: (`Optional`) string: a beginning of sentence token.
+            Will be associated to ``self.bos_token`` and ``self.bos_token_id``
+        - ``eos_token``: (`Optional`) string: an end of sentence token.
+            Will be associated to ``self.eos_token`` and ``self.eos_token_id``
+        - ``unk_token``: (`Optional`) string: an unknown token.
+            Will be associated to ``self.unk_token`` and ``self.unk_token_id``
+        - ``sep_token``: (`Optional`) string: a separation token (e.g. to separate context and query in an input sequence).
+            Will be associated to ``self.sep_token`` and ``self.sep_token_id``
+        - ``pad_token``: (`Optional`) string: a padding token.
+            Will be associated to ``self.pad_token`` and ``self.pad_token_id``
+        - ``cls_token``: (`Optional`) string: a classification token (e.g. to extract a summary of an input sequence
+            leveraging self-attention along the full depth of the model).
+            Will be associated to ``self.cls_token`` and ``self.cls_token_id``
+        - ``mask_token``: (`Optional`) string: a masking token (e.g. when training a model with masked-language
+            modeling). Will be associated to ``self.mask_token`` and ``self.mask_token_id``
+        - ``additional_special_tokens``: (`Optional`) list: a list of additional special tokens.
+            Adding all special tokens here ensure they won't be split by the tokenization process.
+            Will be associated to ``self.additional_special_tokens`` and ``self.additional_special_tokens_ids``
+
+
+    .. automethod:: __call__
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        # Added tokens - We store this for both slow and fast tokenizers
+        # until the serialization of Fast tokenizers is updated
+        self.added_tokens_encoder: Dict[str, int] = {}
+        self.added_tokens_decoder: Dict[int, str] = {}
+        self.unique_no_split_tokens: List[str] = []
+
+    @property
+    def is_fast(self) -> bool:
+        return False
+
+    @property
+    def vocab_size(self) -> int:
+        """ Size of the base vocabulary (without the added tokens) """
+        raise NotImplementedError
+
+    def get_vocab(self):
+        """ Returns the vocabulary as a dict of {token: index} pairs. `tokenizer.get_vocab()[token]` is equivalent to `tokenizer.convert_tokens_to_ids(token)` when `token` is in the vocab. """
+        raise NotImplementedError()
+
+    def get_added_vocab(self) -> Dict[str, int]:
+        return self.added_tokens_encoder
+
+    def __len__(self):
+        """ Size of the full vocabulary with the added tokens """
+        return self.vocab_size + len(self.added_tokens_encoder)
+
+    def _add_tokens(self, new_tokens: Union[List[str], List[AddedToken]], special_tokens=False) -> int:
+        """
+        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
+        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+
+        Args:
+            new_tokens: string or list of string. Each string is a token to add. Tokens are only added if they are not
+                already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+
+        Returns:
+            Number of tokens added to the vocabulary.
+
+        Examples::
+
+            # Let's see how to increase the vocabulary of Bert model and tokenizer
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+            model = BertModel.from_pretrained('bert-base-uncased')
+
+            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+        """
+        new_tokens = [str(tok) for tok in new_tokens]
+
+        tokens_to_add = []
+        for token in new_tokens:
+            assert isinstance(token, str)
+            if not special_tokens and self.init_kwargs.get("do_lower_case", False):
+                token = token.lower()
+            if (
+                token != self.unk_token
+                and self.convert_tokens_to_ids(token) == self.convert_tokens_to_ids(self.unk_token)
+                and token not in tokens_to_add
+            ):
+                tokens_to_add.append(token)
+                if self.verbose:
+                    logger.info("Adding %s to the vocabulary", token)
+
+        added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(tokens_to_add))
+        added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
+        self.added_tokens_encoder.update(added_tok_encoder)
+        self.added_tokens_decoder.update(added_tok_decoder)
+
+        # Make sure we don't split on any special tokens (even they were already in the vocab before e.g. for Albert)
+        if special_tokens:
+            self.unique_no_split_tokens = list(set(self.unique_no_split_tokens).union(set(new_tokens)))
+        else:
+            # Or on the newly added tokens
+            self.unique_no_split_tokens = list(set(self.unique_no_split_tokens).union(set(tokens_to_add)))
+
+        return len(tokens_to_add)
+
+    def num_special_tokens_to_add(self, pair=False):
+        """
+        Returns the number of added tokens when encoding a sequence with special tokens.
+
+        Note:
+            This encodes inputs and checks the number of added tokens, and is therefore not efficient. Do not put this
+            inside your training loop.
+
+        Args:
+            pair: Returns the number of added tokens in the case of a sequence pair if set to True, returns the
+                number of added tokens in the case of a single sequence if set to False.
+
+        Returns:
+            Number of tokens added to sequences
+        """
+        token_ids_0 = []
+        token_ids_1 = []
+        return len(self.build_inputs_with_special_tokens(token_ids_0, token_ids_1 if pair else None))
+
+    def tokenize(self, text: TextInput, **kwargs):
+        """ Converts a string in a sequence of tokens (string), using the tokenizer.
+            Split in words for word-based vocabulary or sub-words for sub-word-based
+            vocabularies (BPE/SentencePieces/WordPieces).
+
+            Take care of added tokens.
+
+            Args:
+                text (:obj:`string`): The sequence to be encoded.
+                **kwargs (:obj: `dict`): Arguments passed to the model-specific `prepare_for_tokenization` preprocessing method.
+        """
+        # Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
+        all_special_tokens_extended = dict(
+            (str(t), t) for t in self.all_special_tokens_extended if isinstance(t, AddedToken)
+        )
+
+        text, kwargs = self.prepare_for_tokenization(text, **kwargs)
+
+        if kwargs:
+            logger.warning(f"Keyword arguments {kwargs} not recognized.")
+
+        # TODO: should this be in the base class?
+        if self.init_kwargs.get("do_lower_case", False):
+            # convert non-special tokens to lowercase
+            escaped_special_toks = [re.escape(s_tok) for s_tok in self.all_special_tokens]
+            pattern = r"(" + r"|".join(escaped_special_toks) + r")|" + r"(.+?)"
+            text = re.sub(pattern, lambda m: m.groups()[0] or m.groups()[1].lower(), text)
+
+        def split_on_token(tok, text):
+            result = []
+            tok_extended = all_special_tokens_extended.get(tok, None)
+            split_text = text.split(tok)
+            full_word = ""
+            for i, sub_text in enumerate(split_text):
+                # AddedToken can control whitespace stripping around them.
+                # We use them for GPT2 and Roberta to have different behavior depending on the special token
+                # Cf. https://github.com/huggingface/transformers/pull/2778
+                # and https://github.com/huggingface/transformers/issues/3788
+                if isinstance(tok_extended, AddedToken):
+                    if tok_extended.single_word:
+                        # Try to avoid splitting on token
+                        if (
+                            i < len(split_text) - 1
+                            and not _is_end_of_word(sub_text)
+                            and not _is_start_of_word(split_text[i + 1])
+                        ):
+                            # Don't extract the special token
+                            full_word += sub_text + tok
+                        elif full_word:
+                            full_word += sub_text
+                            result += [full_word]
+                            full_word = ""
+                            continue
+                    # Strip white spaces on the right
+                    if tok_extended.rstrip and i > 0:
+                        # A bit counter-intuitive but we strip the left of the string
+                        # since tok_extended.rstrip means the special token is eating all white spaces on its right
+                        sub_text = sub_text.lstrip()
+                    # Strip white spaces on the left
+                    if tok_extended.lstrip and i < len(split_text) - 1:
+                        sub_text = sub_text.rstrip()  # Opposite here
+                else:
+                    # We strip left and right by default
+                    if i < len(split_text) - 1:
+                        sub_text = sub_text.rstrip()
+                    if i > 0:
+                        sub_text = sub_text.lstrip()
+
+                if i == 0 and not sub_text:
+                    result += [tok]
+                elif i == len(split_text) - 1:
+                    if sub_text:
+                        result += [sub_text]
+                    else:
+                        pass
+                else:
+                    if sub_text:
+                        result += [sub_text]
+                    result += [tok]
+            return result
+
+        def split_on_tokens(tok_list, text):
+            if not text.strip():
+                return []
+            if not tok_list:
+                return self._tokenize(text)
+
+            tokenized_text = []
+            text_list = [text]
+            for tok in tok_list:
+                tokenized_text = []
+                for sub_text in text_list:
+                    if sub_text not in self.unique_no_split_tokens:
+                        tokenized_text += split_on_token(tok, sub_text)
+                    else:
+                        tokenized_text += [sub_text]
+                text_list = tokenized_text
+
+            return list(
+                itertools.chain.from_iterable(
+                    (
+                        self._tokenize(token) if token not in self.unique_no_split_tokens else [token]
+                        for token in tokenized_text
+                    )
+                )
+            )
+
+        no_split_token = self.unique_no_split_tokens
+        tokenized_text = split_on_tokens(no_split_token, text)
+        return tokenized_text
+
+    def _tokenize(self, text, **kwargs):
+        """ Converts a string in a sequence of tokens (string), using the tokenizer.
+            Split in words for word-based vocabulary or sub-words for sub-word-based
+            vocabularies (BPE/SentencePieces/WordPieces).
+
+            Do NOT take care of added tokens.
+        """
+        raise NotImplementedError
+
+    def convert_tokens_to_ids(self, tokens):
+        """ Converts a token string (or a sequence of tokens) in a single integer id
+            (or a sequence of ids), using the vocabulary.
+        """
+        if tokens is None:
+            return None
+
+        if isinstance(tokens, str):
+            return self._convert_token_to_id_with_added_voc(tokens)
+
+        ids = []
+        for token in tokens:
+            ids.append(self._convert_token_to_id_with_added_voc(token))
+        return ids
+
+    def _convert_token_to_id_with_added_voc(self, token):
+        if token is None:
+            return None
+
+        if token in self.added_tokens_encoder:
+            return self.added_tokens_encoder[token]
+        return self._convert_token_to_id(token)
+
+    def _convert_token_to_id(self, token):
+        raise NotImplementedError
+
+    def _encode_plus(
+        self,
+        text: Union[TextInput, PreTokenizedInput, EncodedInput],
+        text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
+        add_special_tokens: bool = True,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        def get_input_ids(text):
+            if isinstance(text, str):
+                tokens = self.tokenize(text, **kwargs)
+                return self.convert_tokens_to_ids(tokens)
+            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], str):
+                if is_pretokenized:
+                    tokens = list(itertools.chain(*(self.tokenize(t, is_pretokenized=True, **kwargs) for t in text)))
+                    return self.convert_tokens_to_ids(tokens)
+                else:
+                    return self.convert_tokens_to_ids(text)
+            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
+                return text
+            else:
+                if is_pretokenized:
+                    raise ValueError(
+                        f"Input {text} is not valid. Should be a string or a list/tuple of strings when `is_pretokenized=True`."
+                    )
+                else:
+                    raise ValueError(
+                        f"Input {text} is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
+                    )
+
+        if return_offsets_mapping:
+            raise NotImplementedError(
+                "return_offset_mapping is not available when using Python tokenizers."
+                "To use this feature, change your tokenizer to one deriving from "
+                "transformers.PreTrainedTokenizerFast."
+                "More information on available tokenizers at "
+                "https://github.com/huggingface/transformers/pull/2674"
+            )
+
+        first_ids = get_input_ids(text)
+        second_ids = get_input_ids(text_pair) if text_pair is not None else None
+
+        return self.prepare_for_model(
+            first_ids,
+            pair_ids=second_ids,
+            add_special_tokens=add_special_tokens,
+            padding=padding_strategy.value,
+            truncation=truncation_strategy.value,
+            max_length=max_length,
+            stride=stride,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_tensors=return_tensors,
+            prepend_batch_axis=True,
+            return_attention_mask=return_attention_mask,
+            return_token_type_ids=return_token_type_ids,
+            return_overflowing_tokens=return_overflowing_tokens,
+            return_special_tokens_mask=return_special_tokens_mask,
+            return_length=return_length,
+            verbose=verbose,
+        )
+
+    def _batch_encode_plus(
+        self,
+        batch_text_or_text_pairs: Union[
+            List[TextInput],
+            List[TextInputPair],
+            List[PreTokenizedInput],
+            List[PreTokenizedInputPair],
+            List[EncodedInput],
+            List[EncodedInputPair],
+        ],
+        add_special_tokens: bool = True,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        def get_input_ids(text):
+            if isinstance(text, str):
+                tokens = self.tokenize(text, **kwargs)
+                return self.convert_tokens_to_ids(tokens)
+            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], str):
+                if is_pretokenized:
+                    tokens = list(itertools.chain(*(self.tokenize(t, is_pretokenized=True, **kwargs) for t in text)))
+                    return self.convert_tokens_to_ids(tokens)
+                else:
+                    return self.convert_tokens_to_ids(text)
+            elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
+                return text
+            else:
+                raise ValueError(
+                    "Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
+                )
+
+        if return_offsets_mapping:
+            raise NotImplementedError(
+                "return_offset_mapping is not available when using Python tokenizers."
+                "To use this feature, change your tokenizer to one deriving from "
+                "transformers.PreTrainedTokenizerFast."
+            )
+
+        input_ids = []
+        for ids_or_pair_ids in batch_text_or_text_pairs:
+            if not isinstance(ids_or_pair_ids, (list, tuple)):
+                ids, pair_ids = ids_or_pair_ids, None
+            elif is_pretokenized and not isinstance(ids_or_pair_ids[0], (list, tuple)):
+                ids, pair_ids = ids_or_pair_ids, None
+            else:
+                ids, pair_ids = ids_or_pair_ids
+
+            first_ids = get_input_ids(ids)
+            second_ids = get_input_ids(pair_ids) if pair_ids is not None else None
+            input_ids.append((first_ids, second_ids))
+
+        batch_outputs = self._batch_prepare_for_model(
+            input_ids,
+            add_special_tokens=add_special_tokens,
+            padding_strategy=padding_strategy,
+            truncation_strategy=truncation_strategy,
+            max_length=max_length,
+            stride=stride,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_attention_mask=return_attention_mask,
+            return_token_type_ids=return_token_type_ids,
+            return_overflowing_tokens=return_overflowing_tokens,
+            return_special_tokens_mask=return_special_tokens_mask,
+            return_length=return_length,
+            return_tensors=return_tensors,
+            verbose=verbose,
+        )
+
+        return BatchEncoding(batch_outputs)
+
+    @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
+    def _batch_prepare_for_model(
+        self,
+        batch_ids_pairs: List[Union[PreTokenizedInputPair, Tuple[List[int], None]]],
+        add_special_tokens: bool = True,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[str] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+    ) -> BatchEncoding:
+        """ Prepares a sequence of input id, or a pair of sequences of inputs ids so that it can be used by the model.
+        It adds special tokens, truncates sequences if overflowing while taking into account the special tokens and
+        manages a moving window (with user defined stride) for overflowing tokens
+
+        Args:
+            batch_ids_pairs: list of tokenized input ids or input ids pairs
+        """
+
+        batch_outputs = {}
+        for first_ids, second_ids in batch_ids_pairs:
+            outputs = self.prepare_for_model(
+                first_ids,
+                second_ids,
+                add_special_tokens=add_special_tokens,
+                padding=PaddingStrategy.DO_NOT_PAD.value,  # we pad in batch afterward
+                truncation=truncation_strategy.value,
+                max_length=max_length,
+                stride=stride,
+                pad_to_multiple_of=None,  # we pad in batch afterward
+                return_attention_mask=False,  # we pad in batch afterward
+                return_token_type_ids=return_token_type_ids,
+                return_overflowing_tokens=return_overflowing_tokens,
+                return_special_tokens_mask=return_special_tokens_mask,
+                return_length=return_length,
+                return_tensors=None,  # We convert the whole batch to tensors at the end
+                prepend_batch_axis=False,
+                verbose=verbose,
+            )
+
+            for key, value in outputs.items():
+                if key not in batch_outputs:
+                    batch_outputs[key] = []
+                batch_outputs[key].append(value)
+
+        batch_outputs = self.pad(
+            batch_outputs,
+            padding=padding_strategy.value,
+            max_length=max_length,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_attention_mask=return_attention_mask,
+        )
+
+        batch_outputs = BatchEncoding(batch_outputs, tensor_type=return_tensors)
+
+        return batch_outputs
+
+    def prepare_for_tokenization(self, text: str, is_pretokenized=False, **kwargs) -> (str, dict):
+        """ Performs any necessary transformations before tokenization.
+
+            This method should pop the arguments from kwargs and return kwargs as well.
+            We test kwargs at the end of the encoding process to be sure all the arguments have been used.
+        """
+        return (text, kwargs)
+
+    def get_special_tokens_mask(
+        self, token_ids_0: List, token_ids_1: Optional[List] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer ``prepare_for_model`` method.
+
+        Args:
+            token_ids_0: list of ids (must not contain special tokens)
+            token_ids_1: Optional list of ids (must not contain special tokens), necessary when fetching sequence ids
+                for sequence pairs
+            already_has_special_tokens: (default False) Set to True if the token list is already formated with
+                special tokens for the model
+
+        Returns:
+            A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        return [0] * ((len(token_ids_1) if token_ids_1 else 0) + len(token_ids_0))
+
+    def convert_ids_to_tokens(
+        self, ids: Union[int, List[int]], skip_special_tokens: bool = False
+    ) -> Union[str, List[str]]:
+        """ Converts a single index or a sequence of indices (integers) in a token "
+            (resp.) a sequence of tokens (str), using the vocabulary and added tokens.
+
+            Args:
+                skip_special_tokens: Don't decode special tokens (self.all_special_tokens). Default: False
+        """
+        if isinstance(ids, int):
+            if ids in self.added_tokens_decoder:
+                return self.added_tokens_decoder[ids]
+            else:
+                return self._convert_id_to_token(ids)
+        tokens = []
+        for index in ids:
+            index = int(index)
+            if skip_special_tokens and index in self.all_special_ids:
+                continue
+            if index in self.added_tokens_decoder:
+                tokens.append(self.added_tokens_decoder[index])
+            else:
+                tokens.append(self._convert_id_to_token(index))
+        return tokens
+
+    def _convert_id_to_token(self, index: int) -> str:
+        raise NotImplementedError
+
+    def convert_tokens_to_string(self, tokens: List[str]) -> str:
+        """ Converts a sequence of tokens (string) in a single string.
+            The most simple way to do it is ' '.join(self.convert_ids_to_tokens(token_ids))
+            but we often want to remove sub-word tokenization artifacts at the same time.
+        """
+        return " ".join(self.convert_ids_to_tokens(tokens))
+
+    def decode(
+        self, token_ids: List[int], skip_special_tokens: bool = False, clean_up_tokenization_spaces: bool = True
+    ) -> str:
+        filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
+
+        # To avoid mixing byte-level and unicode for byte-level BPT
+        # we need to build string separatly for added tokens and byte-level tokens
+        # cf. https://github.com/huggingface/transformers/issues/1133
+        sub_texts = []
+        current_sub_text = []
+        for token in filtered_tokens:
+            if skip_special_tokens and token in self.all_special_ids:
+                continue
+            if token in self.added_tokens_encoder:
+                if current_sub_text:
+                    sub_texts.append(self.convert_tokens_to_string(current_sub_text))
+                    current_sub_text = []
+                sub_texts.append(token)
+            else:
+                current_sub_text.append(token)
+        if current_sub_text:
+            sub_texts.append(self.convert_tokens_to_string(current_sub_text))
+        text = " ".join(sub_texts)
+
+        if clean_up_tokenization_spaces:
+            clean_text = self.clean_up_tokenization(text)
+            return clean_text
+        else:
+            return text
+
+    def save_vocabulary(self, save_directory) -> Tuple[str]:
+        """ Save the tokenizer vocabulary to a directory. This method does *NOT* save added tokens
+            and special token mappings.
+
+            Please use :func:`~transformers.PreTrainedTokenizer.save_pretrained` `()` to save the full
+            Tokenizer state if you want to reload it using the :func:`~transformers.PreTrainedTokenizer.from_pretrained`
+            class method.
+        """
+        raise NotImplementedError
diff --git a/bert/tokenization_utils_base.py b/bert/tokenization_utils_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a1219a4d3473ae510f0da905ea09a76019a7996
--- /dev/null
+++ b/bert/tokenization_utils_base.py
@@ -0,0 +1,2317 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Base classes common to both the slow and the fast tokenization classes:
+    PreTrainedTokenizerBase (host all the user fronting encoding methodes)
+    Special token mixing (host the special tokens logic) and
+    BatchEncoding (wrap the dictionnary of output with special method for the Fast tokenizers)
+"""
+
+import copy
+import json
+import logging
+import os
+import warnings
+from collections import UserDict
+from enum import Enum
+from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union
+
+import numpy as np
+from tokenizers import AddedToken
+from tokenizers import Encoding as EncodingFast
+
+from .file_utils import (
+    add_end_docstrings,
+    cached_path,
+    hf_bucket_url,
+    is_remote_url,
+    is_tf_available,
+    is_torch_available,
+    torch_required,
+)
+
+
+if is_tf_available():
+    import tensorflow as tf
+if is_torch_available():
+    import torch
+
+
+logger = logging.getLogger(__name__)
+
+VERY_LARGE_INTEGER = int(1e30)  # This is used to set the max input length for a model with infinite size input
+LARGE_INTEGER = int(1e20)  # This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER
+
+# Define type aliases and NamedTuples
+TextInput = str
+PreTokenizedInput = List[str]
+EncodedInput = List[int]
+TextInputPair = Tuple[str, str]
+PreTokenizedInputPair = Tuple[List[str], List[str]]
+EncodedInputPair = Tuple[List[int], List[int]]
+
+
+# Slow tokenizers used to be saved in three separated files
+SPECIAL_TOKENS_MAP_FILE = "special_tokens_map.json"
+ADDED_TOKENS_FILE = "added_tokens.json"
+TOKENIZER_CONFIG_FILE = "tokenizer_config.json"
+
+# Fast tokenizers (provided by HuggingFace tokenizer's library) can be saved in a single file
+FULL_TOKENIZER_FILE = "tokenizer.json"
+
+
+class ExplicitEnum(Enum):
+    """ Enum with more explicit error message for missing values.
+    """
+
+    @classmethod
+    def _missing_(cls, value):
+        raise ValueError(
+            "%r is not a valid %s, please select one of %s"
+            % (value, cls.__name__, str(list(cls._value2member_map_.keys())))
+        )
+
+
+class TruncationStrategy(ExplicitEnum):
+    ONLY_FIRST = "only_first"
+    ONLY_SECOND = "only_second"
+    LONGEST_FIRST = "longest_first"
+    DO_NOT_TRUNCATE = "do_not_truncate"
+
+
+class PaddingStrategy(ExplicitEnum):
+    LONGEST = "longest"
+    MAX_LENGTH = "max_length"
+    DO_NOT_PAD = "do_not_pad"
+
+
+class TensorType(ExplicitEnum):
+    PYTORCH = "pt"
+    TENSORFLOW = "tf"
+    NUMPY = "np"
+
+
+class CharSpan(NamedTuple):
+    """ Character span in the original string
+
+        Args:
+            start: index of the first character in the original string
+            end: index of the character following the last character in the original string
+    """
+
+    start: int
+    end: int
+
+
+class TokenSpan(NamedTuple):
+    """ Token span in an encoded string (list of tokens)
+
+        Args:
+            start: index of the first token in the span
+            end: index of the token following the last token in the span
+    """
+
+    start: int
+    end: int
+
+
+class BatchEncoding(UserDict):
+    """ BatchEncoding hold the output of the encode and batch_encode methods (tokens, attention_masks, etc).
+        This class is derived from a python Dictionary and can be used as a dictionnary.
+        In addition, this class expose utility methods to map from word/char space to token space.
+
+        Args:
+            data (:obj:`dict`): Dictionary of lists/arrays returned by the encode/batch_encode methods ('input_ids', 'attention_mask'...)
+            encoding (:obj:`EncodingFast`, :obj:`list(EncodingFast)`, `optional`, defaults to :obj:`None`):
+                If the tokenizer is a fast tokenizer which outputs additional informations like mapping from word/char space to token space
+                the `EncodingFast` instance or list of instance (for batches) hold these informations.
+            tensor_type (:obj:`Union[None, str, TensorType]`, `optional`, defaults to :obj:`None`):
+                You can give a tensor_type here to convert the lists of integers in PyTorch/TF/Numpy Tensors at initialization
+            prepend_batch_axis (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True to add a batch axis when converting in Tensors (see :obj:`tensor_type` above)
+    """
+
+    def __init__(
+        self,
+        data: Optional[Dict[str, Any]] = None,
+        encoding: Optional[Union[EncodingFast, Sequence[EncodingFast]]] = None,
+        tensor_type: Union[None, str, TensorType] = None,
+        prepend_batch_axis: bool = False,
+    ):
+        super().__init__(data)
+
+        if isinstance(encoding, EncodingFast):
+            encoding = [encoding]
+
+        self._encodings = encoding
+
+        self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=prepend_batch_axis)
+
+    @property
+    def is_fast(self):
+        """
+        Indicate if this BatchEncoding was generated from the result of a PreTrainedTokenizerFast
+        Returns: True if generated from subclasses of PreTrainedTokenizerFast, else otherwise
+        """
+        return self._encodings is not None
+
+    def __getitem__(self, item: Union[int, str]) -> EncodingFast:
+        """ If the key is a string, get the value of the dict associated to `key` ('input_ids', 'attention_mask'...)
+            If the key is an integer, get the EncodingFast for batch item with index `key`
+        """
+        if isinstance(item, str):
+            return self.data[item]
+        elif self._encodings is not None:
+            return self._encodings[item]
+        else:
+            raise KeyError(
+                "Indexing with integers (to access backend Encoding for a given batch index) "
+                "is not available when using Python based tokenizers"
+            )
+
+    def __getattr__(self, item: str):
+        try:
+            return self.data[item]
+        except KeyError:
+            raise AttributeError
+
+    def __getstate__(self):
+        return {"data": self.data, "encodings": self._encodings}
+
+    def __setstate__(self, state):
+        if "data" in state:
+            self.data = state["data"]
+
+        if "encodings" in state:
+            self._encodings = state["encodings"]
+
+    def keys(self):
+        return self.data.keys()
+
+    def values(self):
+        return self.data.values()
+
+    def items(self):
+        return self.data.items()
+
+    # After this point:
+    # Extended properties and methods only available for fast (Rust-based) tokenizers
+    # provided by HuggingFace tokenizers library.
+
+    @property
+    def encodings(self) -> Optional[List[EncodingFast]]:
+        """
+        Return the list all encoding from the tokenization process
+
+        Returns: List[EncodingFast] or None if input was tokenized through Python (i.e. not fast) tokenizer
+        """
+        return self._encodings
+
+    def tokens(self, batch_index: int = 0) -> List[str]:
+        if not self._encodings:
+            raise ValueError("tokens() is not available when using Python based tokenizers")
+        return self._encodings[batch_index].tokens
+
+    def words(self, batch_index: int = 0) -> List[Optional[int]]:
+        if not self._encodings:
+            raise ValueError("words() is not available when using Python based tokenizers")
+        return self._encodings[batch_index].words
+
+    def token_to_word(self, batch_or_token_index: int, token_index: Optional[int] = None) -> int:
+        """
+        Get the index of the word corresponding (i.e. comprising) to an encoded token
+        in a sequence of the batch.
+
+        Can be called as:
+
+        - ``self.token_to_word(token_index)`` if batch size is 1
+        - ``self.token_to_word(batch_index, token_index)`` if batch size is greater than 1
+
+        This method is particularly suited when the input sequences are provided as
+        pre-tokenized sequences (i.e. words are defined by the user). In this case it allows
+        to easily associate encoded tokens with provided tokenized words.
+
+        Args:
+            batch_or_token_index (:obj:`int`):
+                Index of the sequence in the batch. If the batch only comprise one sequence,
+                this can be the index of the token in the sequence
+            token_index (:obj:`int`, `optional`):
+                If a batch index is provided in `batch_or_token_index`, this can be the index
+                of the token in the sequence.
+
+        Returns:
+            :obj:`int`:
+                index of the word in the input sequence.
+
+        """
+
+        if not self._encodings:
+            raise ValueError("token_to_word() is not available when using Python based tokenizers")
+        if token_index is not None:
+            batch_index = batch_or_token_index
+        else:
+            batch_index = 0
+            token_index = batch_or_token_index
+        if batch_index < 0:
+            batch_index = self._batch_size + batch_index
+        if token_index < 0:
+            token_index = self._seq_len + token_index
+        return self._encodings[batch_index].token_to_word(token_index)
+
+    def word_to_tokens(self, batch_or_word_index: int, word_index: Optional[int] = None) -> TokenSpan:
+        """
+        Get the encoded token span corresponding to a word in the sequence of the batch.
+
+        Token spans are returned as a TokenSpan NamedTuple with:
+
+        - start: index of the first token
+        - end: index of the token following the last token
+
+        Can be called as:
+
+        - ``self.word_to_tokens(word_index)`` if batch size is 1
+        - ``self.word_to_tokens(batch_index, word_index)`` if batch size is greater or equal to 1
+
+        This method is particularly suited when the input sequences are provided as
+        pre-tokenized sequences (i.e. words are defined by the user). In this case it allows
+        to easily associate encoded tokens with provided tokenized words.
+
+        Args:
+            batch_or_word_index (:obj:`int`):
+                Index of the sequence in the batch. If the batch only comprises one sequence,
+                this can be the index of the word in the sequence
+            word_index (:obj:`int`, `optional`):
+                If a batch index is provided in `batch_or_token_index`, this can be the index
+                of the word in the sequence.
+
+        Returns:
+            :obj:`TokenSpan`:
+                Span of tokens in the encoded sequence.
+
+                :obj:`TokenSpan` are NamedTuple with:
+
+                - start: index of the first token
+                - end: index of the token following the last token
+        """
+
+        if not self._encodings:
+            raise ValueError("word_to_tokens() is not available when using Python based tokenizers")
+        if word_index is not None:
+            batch_index = batch_or_word_index
+        else:
+            batch_index = 0
+            word_index = batch_or_word_index
+        if batch_index < 0:
+            batch_index = self._batch_size + batch_index
+        if word_index < 0:
+            word_index = self._seq_len + word_index
+        return TokenSpan(*(self._encodings[batch_index].word_to_tokens(word_index)))
+
+    def token_to_chars(self, batch_or_token_index: int, token_index: Optional[int] = None) -> CharSpan:
+        """
+        Get the character span corresponding to an encoded token in a sequence of the batch.
+
+        Character spans are returned as a CharSpan NamedTuple with:
+
+        - start: index of the first character in the original string associated to the token
+        - end: index of the character following the last character in the original string associated to the token
+
+        Can be called as:
+
+        - ``self.token_to_chars(token_index)`` if batch size is 1
+        - ``self.token_to_chars(batch_index, token_index)`` if batch size is greater or equal to 1
+
+        Args:
+            batch_or_token_index (:obj:`int`):
+                Index of the sequence in the batch. If the batch only comprise one sequence,
+                this can be the index of the token in the sequence
+            token_index (:obj:`int`, `optional`):
+                If a batch index is provided in `batch_or_token_index`, this can be the index
+                of the token or tokens in the sequence.
+
+        Returns:
+            :obj:`CharSpan`:
+                Span of characters in the original string.
+
+                :obj:`CharSpan` are NamedTuple with:
+
+                - start: index of the first character in the original string
+                - end: index of the character following the last character in the original string
+        """
+
+        if not self._encodings:
+            raise ValueError("token_to_chars() is not available when using Python based tokenizers")
+        if token_index is not None:
+            batch_index = batch_or_token_index
+        else:
+            batch_index = 0
+            token_index = batch_or_token_index
+        return CharSpan(*(self._encodings[batch_index].token_to_chars(token_index)))
+
+    def char_to_token(self, batch_or_char_index: int, char_index: Optional[int] = None) -> int:
+        """
+        Get the index of the token in the encoded output comprising a character
+        in the original string for a sequence of the batch.
+
+        Can be called as:
+
+        - ``self.char_to_token(char_index)`` if batch size is 1
+        - ``self.char_to_token(batch_index, char_index)`` if batch size is greater or equal to 1
+
+        This method is particularly suited when the input sequences are provided as
+        pre-tokenized sequences (i.e. words are defined by the user). In this case it allows
+        to easily associate encoded tokens with provided tokenized words.
+
+        Args:
+            batch_or_char_index (:obj:`int`):
+                Index of the sequence in the batch. If the batch only comprise one sequence,
+                this can be the index of the word in the sequence
+            char_index (:obj:`int`, `optional`):
+                If a batch index is provided in `batch_or_token_index`, this can be the index
+                of the word in the sequence.
+
+
+        Returns:
+            :obj:`int`: Index of the token.
+        """
+
+        if not self._encodings:
+            raise ValueError("char_to_token() is not available when using Python based tokenizers")
+        if char_index is not None:
+            batch_index = batch_or_char_index
+        else:
+            batch_index = 0
+            char_index = batch_or_char_index
+        return self._encodings[batch_index].char_to_token(char_index)
+
+    def word_to_chars(self, batch_or_word_index: int, word_index: Optional[int] = None) -> CharSpan:
+        """
+        Get the character span in the original string corresponding to given word in a sequence
+        of the batch.
+
+        Character spans are returned as a CharSpan NamedTuple with:
+
+        - start: index of the first character in the original string
+        - end: index of the character following the last character in the original string
+
+        Can be called as:
+
+        - ``self.word_to_chars(word_index)`` if batch size is 1
+        - ``self.word_to_chars(batch_index, word_index)`` if batch size is greater or equal to 1
+
+        Args:
+            batch_or_word_index (:obj:`int`):
+                Index of the sequence in the batch. If the batch only comprise one sequence,
+                this can be the index of the word in the sequence
+            word_index (:obj:`int`, `optional`):
+                If a batch index is provided in `batch_or_token_index`, this can be the index
+                of the word in the sequence.
+
+        Returns:
+            :obj:`CharSpan` or :obj:`List[CharSpan]`:
+                Span(s) of the associated character or characters in the string.
+                CharSpan are NamedTuple with:
+
+                - start: index of the first character associated to the token in the original string
+                - end: index of the character following the last character associated to the token in the original string
+        """
+
+        if not self._encodings:
+            raise ValueError("word_to_chars() is not available when using Python based tokenizers")
+        if word_index is not None:
+            batch_index = batch_or_word_index
+        else:
+            batch_index = 0
+            word_index = batch_or_word_index
+        return CharSpan(*(self._encodings[batch_index].word_to_chars(word_index)))
+
+    def char_to_word(self, batch_or_char_index: int, char_index: Optional[int] = None) -> int:
+        """
+        Get the word in the original string corresponding to a character in the original string of
+        a sequence of the batch.
+
+        Can be called as:
+
+        - ``self.char_to_word(char_index)`` if batch size is 1
+        - ``self.char_to_word(batch_index, char_index)`` if batch size is greater than 1
+
+        This method is particularly suited when the input sequences are provided as
+        pre-tokenized sequences (i.e. words are defined by the user). In this case it allows
+        to easily associate encoded tokens with provided tokenized words.
+
+        Args:
+            batch_or_char_index (:obj:`int`):
+                Index of the sequence in the batch. If the batch only comprise one sequence,
+                this can be the index of the character in the orginal string.
+            char_index (:obj:`int`, `optional`):
+                If a batch index is provided in `batch_or_token_index`, this can be the index
+                of the character in the orginal string.
+
+
+        Returns:
+            :obj:`int` or :obj:`List[int]`:
+                Index or indices of the associated encoded token(s).
+        """
+
+        if not self._encodings:
+            raise ValueError("char_to_word() is not available when using Python based tokenizers")
+        if char_index is not None:
+            batch_index = batch_or_char_index
+        else:
+            batch_index = 0
+            char_index = batch_or_char_index
+        return self._encodings[batch_index].char_to_word(char_index)
+
+    def convert_to_tensors(self, tensor_type: Union[None, str, TensorType], prepend_batch_axis: bool = False):
+        if tensor_type is None:
+            return self
+
+        # Convert to TensorType
+        if not isinstance(tensor_type, TensorType):
+            tensor_type = TensorType(tensor_type)
+
+        # Get a function reference for the correct framework
+        if tensor_type == TensorType.TENSORFLOW and is_tf_available():
+            as_tensor = tf.constant
+        elif tensor_type == TensorType.PYTORCH and is_torch_available():
+            as_tensor = torch.tensor
+        elif tensor_type == TensorType.NUMPY:
+            as_tensor = np.asarray
+        else:
+            raise ImportError(
+                "Unable to convert output to tensors format {}, PyTorch or TensorFlow is not available.".format(
+                    tensor_type
+                )
+            )
+
+        # Do the tensor conversion in batch
+        for key, value in self.items():
+            try:
+                if prepend_batch_axis:
+                    value = [value]
+
+                tensor = as_tensor(value)
+
+                # at-least2d
+                if tensor.ndim > 2:
+                    tensor = tensor.squeeze(0)
+                elif tensor.ndim < 2:
+                    tensor = tensor[None, :]
+
+                self[key] = tensor
+            except:  # noqa E722
+                raise ValueError(
+                    "Unable to create tensor, you should probably activate truncation and/or padding "
+                    "with 'padding=True' 'truncation=True' to have batched tensors with the same length."
+                )
+
+        return self
+
+    @torch_required
+    def to(self, device: str):
+        """Send all values to device by calling v.to(device)"""
+        self.data = {k: v.to(device) for k, v in self.data.items()}
+        return self
+
+
+# class AddedToken(UserString):
+#     """ AddedToken represents a token to be added to a Tokenizer
+
+#         An AddedToken can have special options defining the way it should behave.
+
+#         Args:
+#             content: str:
+#                 The content of the token
+
+#             single_word: bool
+#                 Whether this token should only match against single word. If True,
+#                 this token will never match inside of a word.
+
+#             lstrip: bool
+#                 Whether this token should strip all potential whitespaces on the left side.
+#                 If True, this token will greedily match any whitespace on the left and then strip
+#                 them out.
+
+#             rstrip: bool
+#                 Whether this token should strip all potential whitespaces on the right side.
+#                 If True, this token will greedily match any whitespace on the right and then strip
+#                 them out.
+#     """
+
+#     def __init__(
+#         self, data: str, single_word: bool = False, lstrip: bool = False, rstrip: bool = False,
+#     ):
+#         super().__init__(data)
+
+#         self._single_word = single_word
+#         self._lstrip = lstrip
+#         self._rstrip = rstrip
+
+#     def lower(self):
+#         return AddedToken(self.data.lower(), self._single_word, self._lstrip, self._rstrip)
+
+
+class SpecialTokensMixin:
+    """ SpecialTokensMixin is derived by ``PreTrainedTokenizer`` and ``PreTrainedTokenizerFast`` and
+        handles specific behaviors related to special tokens. In particular, this class hold the
+        attributes which can be used to directly access to these special tokens in a
+        model-independant manner and allow to set and update the special tokens.
+    """
+
+    SPECIAL_TOKENS_ATTRIBUTES = [
+        "bos_token",
+        "eos_token",
+        "unk_token",
+        "sep_token",
+        "pad_token",
+        "cls_token",
+        "mask_token",
+        "additional_special_tokens",
+    ]
+
+    def __init__(self, verbose=True, **kwargs):
+        self._bos_token = None
+        self._eos_token = None
+        self._unk_token = None
+        self._sep_token = None
+        self._pad_token = None
+        self._cls_token = None
+        self._mask_token = None
+        self._pad_token_type_id = 0
+        self._additional_special_tokens = []
+        self.verbose = verbose
+
+        # We directly set the hidden value to allow initialization with special tokens
+        # which are not yet in the vocabulary. Necesssary for serialization/de-serialization
+        # TODO clean this up at some point (probably by sitching to fast tokenizers)
+        for key, value in kwargs.items():
+            if key in self.SPECIAL_TOKENS_ATTRIBUTES:
+                if key == "additional_special_tokens":
+                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
+                    setattr(self, key, value)
+                elif isinstance(value, (str, AddedToken)):
+                    setattr(self, key, value)
+                else:
+                    raise TypeError(
+                        "special token {} has to be either str or AddedToken but got: {}".format(key, type(value))
+                    )
+
+    def sanitize_special_tokens(self) -> int:
+        """ Make sure that all the special tokens attributes of the tokenizer (tokenizer.mask_token, tokenizer.cls_token, ...)
+            are in the vocabulary. Add the missing ones to the vocabulary if needed.
+
+            Return:
+                Number of tokens added in the vocaulary during the operation.
+        """
+        return self.add_tokens(self.all_special_tokens_extended, special_tokens=True)
+
+    def add_special_tokens(self, special_tokens_dict: Dict[str, Union[str, AddedToken]]) -> int:
+        """
+        Add a dictionary of special tokens (eos, pad, cls...) to the encoder and link them
+        to class attributes. If special tokens are NOT in the vocabulary, they are added
+        to it (indexed starting from the last index of the current vocabulary).
+
+        Using `add_special_tokens` will ensure your special tokens can be used in several ways:
+
+        - special tokens are carefully handled by the tokenizer (they are never split)
+        - you can easily refer to special tokens using tokenizer class attributes like `tokenizer.cls_token`. This makes it easy to develop model-agnostic training and fine-tuning scripts.
+
+        When possible, special tokens are already registered for provided pretrained models (ex: BertTokenizer cls_token is already registered to be '[CLS]' and XLM's one is also registered to be '</s>')
+
+        Args:
+            special_tokens_dict: dict of string. Keys should be in the list of predefined special attributes:
+                [``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``,
+                ``additional_special_tokens``].
+
+                Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+
+        Returns:
+            Number of tokens added to the vocabulary.
+
+        Examples::
+
+            # Let's see how to add a new classification token to GPT-2
+            tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+            model = GPT2Model.from_pretrained('gpt2')
+
+            special_tokens_dict = {'cls_token': '<CLS>'}
+
+            num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+
+            assert tokenizer.cls_token == '<CLS>'
+        """
+        if not special_tokens_dict:
+            return 0
+
+        added_tokens = 0
+        for key, value in special_tokens_dict.items():
+            assert key in self.SPECIAL_TOKENS_ATTRIBUTES
+
+            if self.verbose:
+                logger.info("Assigning %s to the %s key of the tokenizer", value, key)
+            setattr(self, key, value)
+
+            if key == "additional_special_tokens":
+                assert isinstance(value, (list, tuple)) and all(
+                    isinstance(t, (str, AddedToken)) for t in value
+                ), f"Tokens {value} for key {key} should all be str or AddedToken instances"
+                added_tokens += self.add_tokens(value, special_tokens=True)
+            else:
+                assert isinstance(
+                    value, (str, AddedToken)
+                ), f"Token {value} for key {key} should be a str or an AddedToken instance"
+                added_tokens += self.add_tokens([value], special_tokens=True)
+
+        return added_tokens
+
+    def add_tokens(self, new_tokens: Union[str, AddedToken, List[str], List[AddedToken]], special_tokens=False) -> int:
+        """
+        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
+        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+
+        Args:
+            new_tokens: string or list of string or :class:`~transformers.AddedToken`. Each string is a token to add.
+                Tokens are only added if they are not already in the vocabulary. AddedToken wrap a string token to
+                let you personnalize it's behavior (Whether this token should only match against single word, whether
+                this token should strip all potential whitespaces on the left side, Whether this token should strip
+                all potential whitespaces on the right side...).
+            special_token: can be used to specify if the token is a special token. This mostly change the normalization
+                behavior (special tokens like CLS or [MASK] are usually not lower-cased for instance)
+
+                See details for :class:`~transformers.AddedToken` in HuggingFace tokenizers library.
+
+        Returns:
+            Number of tokens added to the vocabulary.
+
+        Examples::
+
+            # Let's see how to increase the vocabulary of Bert model and tokenizer
+            tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
+            model = BertModel.from_pretrained('bert-base-uncased')
+
+            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
+            print('We have added', num_added_toks, 'tokens')
+            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+        """
+        if not new_tokens:
+            return 0
+
+        if not isinstance(new_tokens, (list, tuple)):
+            new_tokens = [new_tokens]
+
+        return self._add_tokens(new_tokens, special_tokens=special_tokens)
+
+    @property
+    def bos_token(self):
+        """ Beginning of sentence token (string). Log an error if used while not having been set. """
+        if self._bos_token is None and self.verbose:
+            logger.error("Using bos_token, but it is not set yet.")
+            return None
+        return str(self._bos_token)
+
+    @property
+    def eos_token(self):
+        """ End of sentence token (string). Log an error if used while not having been set. """
+        if self._eos_token is None and self.verbose:
+            logger.error("Using eos_token, but it is not set yet.")
+            return None
+        return str(self._eos_token)
+
+    @property
+    def unk_token(self):
+        """ Unknown token (string). Log an error if used while not having been set. """
+        if self._unk_token is None and self.verbose:
+            logger.error("Using unk_token, but it is not set yet.")
+            return None
+        return str(self._unk_token)
+
+    @property
+    def sep_token(self):
+        """ Separation token (string). E.g. separate context and query in an input sequence. Log an error if used while not having been set. """
+        if self._sep_token is None and self.verbose:
+            logger.error("Using sep_token, but it is not set yet.")
+            return None
+        return str(self._sep_token)
+
+    @property
+    def pad_token(self):
+        """ Padding token (string). Log an error if used while not having been set. """
+        if self._pad_token is None and self.verbose:
+            logger.error("Using pad_token, but it is not set yet.")
+            return None
+        return str(self._pad_token)
+
+    @property
+    def cls_token(self):
+        """ Classification token (string). E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
+        if self._cls_token is None and self.verbose:
+            logger.error("Using cls_token, but it is not set yet.")
+            return None
+        return str(self._cls_token)
+
+    @property
+    def mask_token(self):
+        """ Mask token (string). E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
+        if self._mask_token is None and self.verbose:
+            logger.error("Using mask_token, but it is not set yet.")
+            return None
+        return str(self._mask_token)
+
+    @property
+    def additional_special_tokens(self):
+        """ All the additional special tokens you may want to use (list of strings). Log an error if used while not having been set. """
+        if self._additional_special_tokens is None and self.verbose:
+            logger.error("Using additional_special_tokens, but it is not set yet.")
+            return None
+        return [str(tok) for tok in self._additional_special_tokens]
+
+    @bos_token.setter
+    def bos_token(self, value):
+        self._bos_token = value
+
+    @eos_token.setter
+    def eos_token(self, value):
+        self._eos_token = value
+
+    @unk_token.setter
+    def unk_token(self, value):
+        self._unk_token = value
+
+    @sep_token.setter
+    def sep_token(self, value):
+        self._sep_token = value
+
+    @pad_token.setter
+    def pad_token(self, value):
+        self._pad_token = value
+
+    @cls_token.setter
+    def cls_token(self, value):
+        self._cls_token = value
+
+    @mask_token.setter
+    def mask_token(self, value):
+        self._mask_token = value
+
+    @additional_special_tokens.setter
+    def additional_special_tokens(self, value):
+        self._additional_special_tokens = value
+
+    @property
+    def bos_token_id(self):
+        """ Id of the beginning of sentence token in the vocabulary. Log an error if used while not having been set. """
+        if self._bos_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.bos_token)
+
+    @property
+    def eos_token_id(self):
+        """ Id of the end of sentence token in the vocabulary. Log an error if used while not having been set. """
+        if self._eos_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.eos_token)
+
+    @property
+    def unk_token_id(self):
+        """ Id of the unknown token in the vocabulary. Log an error if used while not having been set. """
+        if self._unk_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.unk_token)
+
+    @property
+    def sep_token_id(self):
+        """ Id of the separation token in the vocabulary. E.g. separate context and query in an input sequence. Log an error if used while not having been set. """
+        if self._sep_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.sep_token)
+
+    @property
+    def pad_token_id(self):
+        """ Id of the padding token in the vocabulary. Log an error if used while not having been set. """
+        if self._pad_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.pad_token)
+
+    @property
+    def pad_token_type_id(self):
+        """ Id of the padding token type in the vocabulary."""
+        return self._pad_token_type_id
+
+    @property
+    def cls_token_id(self):
+        """ Id of the classification token in the vocabulary. E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
+        if self._cls_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.cls_token)
+
+    @property
+    def mask_token_id(self):
+        """ Id of the mask token in the vocabulary. E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
+        if self._mask_token is None:
+            return None
+        return self.convert_tokens_to_ids(self.mask_token)
+
+    @property
+    def additional_special_tokens_ids(self):
+        """ Ids of all the additional special tokens in the vocabulary (list of integers). Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.additional_special_tokens)
+
+    @property
+    def special_tokens_map(self):
+        """ A dictionary mapping special token class attribute (cls_token, unk_token...) to their
+            values ('<unk>', '<cls>'...)
+            Convert tokens of AddedToken type in string.
+            All returned tokens are strings
+        """
+        set_attr = {}
+        for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
+            attr_value = getattr(self, "_" + attr)
+            if attr_value:
+                set_attr[attr] = str(attr_value)
+        return set_attr
+
+    @property
+    def special_tokens_map_extended(self):
+        """ A dictionary mapping special token class attribute (cls_token, unk_token...) to their
+            values ('<unk>', '<cls>'...)
+            Keep the tokens as AddedToken if they are of this type.
+
+            AddedToken can be used to control more finely how special tokens are tokenized.
+        """
+        set_attr = {}
+        for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
+            attr_value = getattr(self, "_" + attr)
+            if attr_value:
+                set_attr[attr] = attr_value
+        return set_attr
+
+    @property
+    def all_special_tokens(self):
+        """ List all the special tokens ('<unk>', '<cls>'...) mapped to class attributes
+            Convert tokens of AddedToken type in string.
+            All returned tokens are strings
+            (cls_token, unk_token...).
+        """
+        all_toks = [str(s) for s in self.all_special_tokens_extended]
+        return all_toks
+
+    @property
+    def all_special_tokens_extended(self):
+        """ List all the special tokens ('<unk>', '<cls>'...) mapped to class attributes
+            Keep the tokens as AddedToken if they are of this type.
+
+            AddedToken can be used to control more finely how special tokens are tokenized.
+        """
+        all_toks = []
+        set_attr = self.special_tokens_map_extended
+        for attr_value in set_attr.values():
+            all_toks = all_toks + (list(attr_value) if isinstance(attr_value, (list, tuple)) else [attr_value])
+        all_toks = list(set(all_toks))
+        return all_toks
+
+    @property
+    def all_special_ids(self):
+        """ List the vocabulary indices of the special tokens ('<unk>', '<cls>'...) mapped to
+            class attributes (cls_token, unk_token...).
+        """
+        all_toks = self.all_special_tokens
+        all_ids = self.convert_tokens_to_ids(all_toks)
+        return all_ids
+
+
+ENCODE_KWARGS_DOCSTRING = r"""
+            add_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`True`):
+                If set to ``True``, the sequences will be encoded with the special tokens relative
+                to their model.
+            `padding` (:obj:`Union[bool, str]`, `optional`, defaults to :obj:`False`):
+                Activate and control padding. Accepts the following values:
+
+                * `True` or `'longest'`: pad to the longest sequence in the batch (or no padding if only a single sequence if provided),
+                * `'max_length'`: pad to a max length specified in `max_length` or to the max acceptable input length for the model if no length is provided (`max_length=None`)
+                * `False` or `'do_not_pad'` (default): No padding (i.e. can output batch with sequences of uneven lengths)
+            `truncation` (:obj:`Union[bool, str]`, `optional`, defaults to :obj:`False`):
+                Activate and control truncation. Accepts the following values:
+
+                * `True` or `'longest_first'`: truncate to a max length specified in `max_length` or to the max acceptable input length for the model if no length is provided (`max_length=None`). This will truncate token by token, removing a token from the longest sequence in the pair if a pair of sequences (or a batch of pairs) is provided,
+                * `'only_first'`: truncate to a max length specified in `max_length` or to the max acceptable input length for the model if no length is provided (`max_length=None`). This will only truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided,
+                * `'only_second'`: truncate to a max length specified in `max_length` or to the max acceptable input length for the model if no length is provided (`max_length=None`). This will only truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided,
+                * `False` or `'do_not_truncate'` (default): No truncation (i.e. can output batch with sequences length greater than the model max admissible input size)
+            `max_length` (:obj:`Union[int, None]`, `optional`, defaults to :obj:`None`):
+                Control the length for padding/truncation. Accepts the following values
+
+                * `None` (default): This will use the predefined model max length if required by one of the truncation/padding parameters. If the model has no specific max input length (e.g. XLNet) truncation/padding to max length is deactivated.
+                * `any integer value` (e.g. `42`): Use this specific maximum length value if required by one of the truncation/padding parameters.
+            stride (:obj:`int`, `optional`, defaults to ``0``):
+                If set to a number along with max_length, the overflowing tokens returned when `return_overflowing_tokens=True`
+                will contain some tokens from the end of the truncated sequence returned to provide some overlap between truncated and overflow ing sequences.
+                The value of this argument defines the number of overlapping tokens.
+            is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
+                Set to True to indicate the input is already tokenized
+            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
+                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
+                >= 7.5 (Volta).
+            return_tensors (:obj:`str`, `optional`, defaults to :obj:`None`):
+                Can be set to 'tf', 'pt' or 'np' to return respectively TensorFlow :obj:`tf.constant`,
+                PyTorch :obj:`torch.Tensor` or Numpy :oj: `np.ndarray` instead of a list of python integers.
+"""
+
+ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r"""
+            return_token_type_ids (:obj:`bool`, `optional`, defaults to :obj:`None`):
+                Whether to return token type IDs. If left to the default, will return the token type IDs according
+                to the specific tokenizer's default, defined by the :obj:`return_outputs` attribute.
+
+                `What are token type IDs? <../glossary.html#token-type-ids>`_
+            return_attention_mask (:obj:`bool`, `optional`, defaults to :obj:`none`):
+                Whether to return the attention mask. If left to the default, will return the attention mask according
+                to the specific tokenizer's default, defined by the :obj:`return_outputs` attribute.
+
+                `What are attention masks? <../glossary.html#attention-mask>`__
+            return_overflowing_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True to return overflowing token sequences (default False).
+            return_special_tokens_mask (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True to return special tokens mask information (default False).
+            return_offsets_mapping (:obj:`bool`, `optional`, defaults to :obj:`False`):
+                Set to True to return (char_start, char_end) for each token (default False).
+                If using Python's tokenizer, this method will raise NotImplementedError.
+                This one is only available on fast tokenizers inheriting from PreTrainedTokenizerFast.
+            **kwargs: passed to the `self.tokenize()` method
+
+        Return:
+            A Dictionary of shape::
+
+                {
+                    input_ids: list[int],
+                    token_type_ids: list[int] if return_token_type_ids is True (default)
+                    attention_mask: list[int] if return_attention_mask is True (default)
+                    overflowing_tokens: list[int] if the tokenizer is a slow tokenize, else a List[List[int]] if a ``max_length`` is specified and ``return_overflowing_tokens=True``
+                    special_tokens_mask: list[int] if ``add_special_tokens`` if set to ``True``
+                    and return_special_tokens_mask is True
+                }
+
+            With the fields:
+
+            - ``input_ids``: list of token ids to be fed to a model
+            - ``token_type_ids``: list of token type ids to be fed to a model
+            - ``attention_mask``: list of indices specifying which tokens should be attended to by the model
+            - ``overflowing_tokens``: list of overflowing tokens sequences if a max length is specified and ``return_overflowing_tokens=True``.
+            - ``special_tokens_mask``: if adding special tokens, this is a list of [0, 1], with 0 specifying special added
+              tokens and 1 specifying sequence tokens.
+"""
+
+
+class PreTrainedTokenizerBase(SpecialTokensMixin):
+    """ Base class for slow and fast tokenizers.
+
+        Handle shared (mostly boiler plate) methods for slow and fast tokenizers.
+    """
+
+    vocab_files_names: Dict[str, str] = {}
+    pretrained_vocab_files_map: Dict[str, Dict[str, str]] = {}
+    pretrained_init_configuration: Dict[str, Dict[str, Any]] = {}
+    max_model_input_sizes: Dict[str, int] = {}
+    model_input_names: List[str] = ["token_type_ids", "attention_mask"]
+
+    padding_side: str = "right"
+
+    def __init__(self, **kwargs):
+        # inputs and kwargs for saving and re-loading (see ``from_pretrained`` and ``save_pretrained``)
+        self.init_inputs = ()
+        self.init_kwargs = kwargs
+
+        # For backward compatibility we fallback to set model_max_length from max_len if provided
+        model_max_length = kwargs.pop("model_max_length", kwargs.pop("max_len", None))
+        self.model_max_length = model_max_length if model_max_length is not None else VERY_LARGE_INTEGER
+
+        # Padding side is right by default and overridden in subclasses. If specified in the kwargs, it is changed.
+        self.padding_side = kwargs.pop("padding_side", self.padding_side)
+        assert self.padding_side in [
+            "right",
+            "left",
+        ], f"Padding side should be selected between 'right' and 'left', current value: {self.padding_side}"
+        self.model_input_names = kwargs.pop("model_input_names", self.model_input_names)
+
+        super().__init__(**kwargs)
+
+    @property
+    def max_len(self) -> int:
+        """ Kept here for backward compatibility.
+            Now renamed to `model_max_length` to avoid ambiguity.
+        """
+        return self.model_max_length
+
+    @property
+    def max_len_single_sentence(self) -> int:
+        return self.model_max_length - self.num_special_tokens_to_add(pair=False)
+
+    @property
+    def max_len_sentences_pair(self) -> int:
+        return self.model_max_length - self.num_special_tokens_to_add(pair=True)
+
+    @max_len_single_sentence.setter
+    def max_len_single_sentence(self, value) -> int:
+        """ For backward compatibility, allow to try to setup 'max_len_single_sentence' """
+        if value == self.model_max_length - self.num_special_tokens_to_add(pair=False) and self.verbose:
+            logger.warning(
+                "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up."
+            )
+        else:
+            raise ValueError(
+                "Setting 'max_len_single_sentence' is now deprecated. " "This value is automatically set up."
+            )
+
+    @max_len_sentences_pair.setter
+    def max_len_sentences_pair(self, value) -> int:
+        """ For backward compatibility, allow to try to setup 'max_len_sentences_pair' """
+        if value == self.model_max_length - self.num_special_tokens_to_add(pair=True) and self.verbose:
+            logger.warning(
+                "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
+            )
+        else:
+            raise ValueError(
+                "Setting 'max_len_sentences_pair' is now deprecated. " "This value is automatically set up."
+            )
+
+    @classmethod
+    def from_pretrained(cls, *inputs, **kwargs):
+        r"""
+        Instantiate a :class:`~transformers.PreTrainedTokenizer` (or a derived class) from a predefined tokenizer.
+
+        Args:
+            pretrained_model_name_or_path: either:
+
+                - a string with the `shortcut name` of a predefined tokenizer to load from cache or download, e.g.: ``bert-base-uncased``.
+                - a string with the `identifier name` of a predefined tokenizer that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
+                - a path to a `directory` containing vocabulary files required by the tokenizer, for instance saved using the :func:`~transformers.PreTrainedTokenizer.save_pretrained` method, e.g.: ``./my_model_directory/``.
+                - (not applicable to all derived classes, deprecated) a path or url to a single saved vocabulary file if and only if the tokenizer only requires a single vocabulary file (e.g. Bert, XLNet), e.g.: ``./my_model_directory/vocab.txt``.
+
+            cache_dir: (`optional`) string:
+                Path to a directory in which a downloaded predefined tokenizer vocabulary files should be cached if the standard cache should not be used.
+
+            force_download: (`optional`) boolean, default False:
+                Force to (re-)download the vocabulary files and override the cached versions if they exists.
+
+            resume_download: (`optional`) boolean, default False:
+                Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
+
+            proxies: (`optional`) dict, default None:
+                A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
+                The proxies are used on each request.
+
+            inputs: (`optional`) positional arguments: will be passed to the Tokenizer ``__init__`` method.
+
+            kwargs: (`optional`) keyword arguments: will be passed to the Tokenizer ``__init__`` method. Can be used to set special tokens like ``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``. See parameters in the doc string of :class:`~transformers.PreTrainedTokenizer` for details.
+
+        Examples::
+
+            # We can't instantiate directly the base class `PreTrainedTokenizer` so let's show our examples on a derived class: BertTokenizer
+
+            # Download vocabulary from S3 and cache.
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+
+            # Download vocabulary from S3 (user-uploaded) and cache.
+            tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-base-german-cased')
+
+            # If vocabulary files are in a directory (e.g. tokenizer was saved using `save_pretrained('./test/saved_model/')`)
+            tokenizer = BertTokenizer.from_pretrained('./test/saved_model/')
+
+            # If the tokenizer uses a single vocabulary file, you can point directly to this file
+            tokenizer = BertTokenizer.from_pretrained('./test/saved_model/my_vocab.txt')
+
+            # You can link tokens to special vocabulary when instantiating
+            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', unk_token='<unk>')
+            # You should be sure '<unk>' is in the vocabulary when doing that.
+            # Otherwise use tokenizer.add_special_tokens({'unk_token': '<unk>'}) instead)
+            assert tokenizer.unk_token == '<unk>'
+
+        """
+        return cls._from_pretrained(*inputs, **kwargs)
+
+    @classmethod
+    def _from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs):
+        cache_dir = kwargs.pop("cache_dir", None)
+        force_download = kwargs.pop("force_download", False)
+        resume_download = kwargs.pop("resume_download", False)
+        proxies = kwargs.pop("proxies", None)
+        local_files_only = kwargs.pop("local_files_only", False)
+
+        s3_models = list(cls.max_model_input_sizes.keys())
+        vocab_files = {}
+        init_configuration = {}
+        if pretrained_model_name_or_path in s3_models:
+            # Get the vocabulary from AWS S3 bucket
+            for file_id, map_list in cls.pretrained_vocab_files_map.items():
+                vocab_files[file_id] = map_list[pretrained_model_name_or_path]
+            if (
+                cls.pretrained_init_configuration
+                and pretrained_model_name_or_path in cls.pretrained_init_configuration
+            ):
+                init_configuration = cls.pretrained_init_configuration[pretrained_model_name_or_path].copy()
+        else:
+            # Get the vocabulary from local files
+            logger.info(
+                "Model name '{}' not found in model shortcut name list ({}). "
+                "Assuming '{}' is a path, a model identifier, or url to a directory containing tokenizer files.".format(
+                    pretrained_model_name_or_path, ", ".join(s3_models), pretrained_model_name_or_path
+                )
+            )
+
+            if os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
+                if len(cls.vocab_files_names) > 1:
+                    raise ValueError(
+                        "Calling {}.from_pretrained() with the path to a single file or url is not supported."
+                        "Use a model identifier or the path to a directory instead.".format(cls.__name__)
+                    )
+                logger.warning(
+                    "Calling {}.from_pretrained() with the path to a single file or url is deprecated".format(
+                        cls.__name__
+                    )
+                )
+                file_id = list(cls.vocab_files_names.keys())[0]
+                vocab_files[file_id] = pretrained_model_name_or_path
+            else:
+                # At this point pretrained_model_name_or_path is either a directory or a model identifier name
+                additional_files_names = {
+                    "added_tokens_file": ADDED_TOKENS_FILE,
+                    "special_tokens_map_file": SPECIAL_TOKENS_MAP_FILE,
+                    "tokenizer_config_file": TOKENIZER_CONFIG_FILE,
+                    "full_tokenizer_file": FULL_TOKENIZER_FILE,
+                }
+                # Look for the tokenizer files
+                for file_id, file_name in {**cls.vocab_files_names, **additional_files_names}.items():
+                    if os.path.isdir(pretrained_model_name_or_path):
+                        full_file_name = os.path.join(pretrained_model_name_or_path, file_name)
+                        if not os.path.exists(full_file_name):
+                            logger.info("Didn't find file {}. We won't load it.".format(full_file_name))
+                            full_file_name = None
+                    else:
+                        full_file_name = hf_bucket_url(
+                            pretrained_model_name_or_path, filename=file_name, use_cdn=False
+                        )
+
+                    vocab_files[file_id] = full_file_name
+
+        # Get files from url, cache, or disk depending on the case
+        try:
+            resolved_vocab_files = {}
+            for file_id, file_path in vocab_files.items():
+                if file_path is None:
+                    resolved_vocab_files[file_id] = None
+                else:
+                    resolved_vocab_files[file_id] = cached_path(
+                        file_path,
+                        cache_dir=cache_dir,
+                        force_download=force_download,
+                        proxies=proxies,
+                        resume_download=resume_download,
+                        local_files_only=local_files_only,
+                    )
+        except EnvironmentError:
+            if pretrained_model_name_or_path in s3_models:
+                msg = "Couldn't reach server at '{}' to download vocabulary files."
+            else:
+                msg = (
+                    "Model name '{}' was not found in tokenizers model name list ({}). "
+                    "We assumed '{}' was a path or url to a directory containing vocabulary files "
+                    "named {}, but couldn't find such vocabulary files at this path or url.".format(
+                        pretrained_model_name_or_path,
+                        ", ".join(s3_models),
+                        pretrained_model_name_or_path,
+                        list(cls.vocab_files_names.values()),
+                    )
+                )
+
+            raise EnvironmentError(msg)
+
+        if all(full_file_name is None for full_file_name in resolved_vocab_files.values()):
+            raise EnvironmentError(
+                "Model name '{}' was not found in tokenizers model name list ({}). "
+                "We assumed '{}' was a path, a model identifier, or url to a directory containing vocabulary files "
+                "named {} but couldn't find such vocabulary files at this path or url.".format(
+                    pretrained_model_name_or_path,
+                    ", ".join(s3_models),
+                    pretrained_model_name_or_path,
+                    list(cls.vocab_files_names.values()),
+                )
+            )
+
+        for file_id, file_path in vocab_files.items():
+            if file_path == resolved_vocab_files[file_id]:
+                logger.info("loading file {}".format(file_path))
+            else:
+                logger.info("loading file {} from cache at {}".format(file_path, resolved_vocab_files[file_id]))
+
+        # Prepare tokenizer initialization kwargs
+        # Did we saved some inputs and kwargs to reload ?
+        tokenizer_config_file = resolved_vocab_files.pop("tokenizer_config_file", None)
+        if tokenizer_config_file is not None:
+            with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
+                init_kwargs = json.load(tokenizer_config_handle)
+            saved_init_inputs = init_kwargs.pop("init_inputs", ())
+            if not init_inputs:
+                init_inputs = saved_init_inputs
+        else:
+            init_kwargs = init_configuration
+
+        # Update with newly provided kwargs
+        init_kwargs.update(kwargs)
+
+        # Set max length if needed
+        if pretrained_model_name_or_path in cls.max_model_input_sizes:
+            # if we're using a pretrained model, ensure the tokenizer
+            # wont index sequences longer than the number of positional embeddings
+            model_max_length = cls.max_model_input_sizes[pretrained_model_name_or_path]
+            if model_max_length is not None and isinstance(model_max_length, (int, float)):
+                init_kwargs["model_max_length"] = min(init_kwargs.get("model_max_length", int(1e30)), model_max_length)
+
+        # Merge resolved_vocab_files arguments in init_kwargs.
+        added_tokens_file = resolved_vocab_files.pop("added_tokens_file", None)
+        for args_name, file_path in resolved_vocab_files.items():
+            if args_name not in init_kwargs:
+                init_kwargs[args_name] = file_path
+
+        # Instantiate tokenizer.
+        try:
+            tokenizer = cls(*init_inputs, **init_kwargs)
+        except OSError:
+            raise OSError(
+                "Unable to load vocabulary from file. "
+                "Please check that the provided vocabulary is accessible and not corrupted."
+            )
+
+        # Save inputs and kwargs for saving and re-loading with ``save_pretrained``
+        tokenizer.init_inputs = init_inputs
+        tokenizer.init_kwargs = init_kwargs
+
+        # If there is a complementary special token map, load it
+        special_tokens_map_file = resolved_vocab_files.pop("special_tokens_map_file", None)
+        if special_tokens_map_file is not None:
+            with open(special_tokens_map_file, encoding="utf-8") as special_tokens_map_handle:
+                special_tokens_map = json.load(special_tokens_map_handle)
+
+            for key, value in special_tokens_map.items():
+                if isinstance(value, dict):
+                    value = AddedToken(**value)
+                setattr(tokenizer, key, value)
+
+        # Add supplementary tokens.
+        special_tokens = tokenizer.all_special_tokens
+        if added_tokens_file is not None:
+            with open(added_tokens_file, encoding="utf-8") as added_tokens_handle:
+                added_tok_encoder = json.load(added_tokens_handle)
+
+            # Sort added tokens by index
+            added_tok_encoder_sorted = list(sorted(added_tok_encoder.items(), key=lambda x: x[1]))
+
+            for token, index in added_tok_encoder_sorted:
+                assert index == len(tokenizer), (
+                    f"Non-consecutive added token '{token}' found. "
+                    f"Should have index {len(tokenizer)} but has index {index} in saved vocabulary."
+                )
+                tokenizer.add_tokens(token, special_tokens=bool(token in special_tokens))
+
+        # Check all our special tokens are registrered as "no split" token (we don't cut them) and are in the vocab
+        added_tokens = tokenizer.sanitize_special_tokens()
+        if added_tokens:
+            logger.warning(
+                "Special tokens have been added in the vocabulary, make sure the associated word emebedding are fine-tuned or trained."
+            )
+
+        return tokenizer
+
+    def save_pretrained(self, save_directory) -> Tuple[str]:
+        """ Save the tokenizer vocabulary files together with:
+                - added tokens,
+                - special-tokens-to-class-attributes-mapping,
+                - tokenizer instantiation positional and keywords inputs (e.g. do_lower_case for Bert).
+
+            Warning: This won't save modifications you may have applied to the tokenizer after the instantiation
+            (e.g. modifying tokenizer.do_lower_case after creation).
+
+            This method make sure the full tokenizer can then be re-loaded using the
+            :func:`~transformers.PreTrainedTokenizer.from_pretrained` class method.
+        """
+        if os.path.isfile(save_directory):
+            logger.error("Provided path ({}) should be a directory, not a file".format(save_directory))
+            return
+        os.makedirs(save_directory, exist_ok=True)
+
+        special_tokens_map_file = os.path.join(save_directory, SPECIAL_TOKENS_MAP_FILE)
+        added_tokens_file = os.path.join(save_directory, ADDED_TOKENS_FILE)
+        tokenizer_config_file = os.path.join(save_directory, TOKENIZER_CONFIG_FILE)
+
+        tokenizer_config = copy.deepcopy(self.init_kwargs)
+        if len(self.init_inputs) > 0:
+            tokenizer_config["init_inputs"] = copy.deepcopy(self.init_inputs)
+        for file_id in self.vocab_files_names.keys():
+            tokenizer_config.pop(file_id, None)
+
+        with open(tokenizer_config_file, "w", encoding="utf-8") as f:
+            f.write(json.dumps(tokenizer_config, ensure_ascii=False))
+
+        with open(special_tokens_map_file, "w", encoding="utf-8") as f:
+            write_dict = {}
+            for key, value in self.special_tokens_map_extended.items():
+                if isinstance(value, AddedToken):
+                    write_dict[key] = value.__getstate__()
+                else:
+                    write_dict[key] = value
+            f.write(json.dumps(write_dict, ensure_ascii=False))
+
+        added_vocab = self.get_added_vocab()
+        if added_vocab:
+            with open(added_tokens_file, "w", encoding="utf-8") as f:
+                out_str = json.dumps(added_vocab, ensure_ascii=False)
+                f.write(out_str)
+
+        vocab_files = self.save_vocabulary(save_directory)
+
+        return vocab_files + (special_tokens_map_file, added_tokens_file)
+
+    @add_end_docstrings(
+        ENCODE_KWARGS_DOCSTRING,
+        """
+            **kwargs: passed to the `self.tokenize()` method.
+    """,
+    )
+    def encode(
+        self,
+        text: Union[TextInput, PreTokenizedInput, EncodedInput],
+        text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
+        add_special_tokens: bool = True,
+        padding: Union[bool, str] = False,
+        truncation: Union[bool, str] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        **kwargs
+    ):
+        """
+        Converts a string in a sequence of ids (integer), using the tokenizer and vocabulary.
+
+        Same as doing ``self.convert_tokens_to_ids(self.tokenize(text))``.
+
+        Args:
+            text (:obj:`str`, :obj:`List[str]` or :obj:`List[int]`):
+                The first sequence to be encoded. This can be a string, a list of strings (tokenized string using
+                the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
+                method)
+            text_pair (:obj:`str`, :obj:`List[str]` or :obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second sequence to be encoded. This can be a string, a list of strings (tokenized
+                string using the `tokenize` method) or a list of integers (tokenized string ids using the
+                `convert_tokens_to_ids` method)
+        """
+        encoded_inputs = self.encode_plus(
+            text,
+            text_pair=text_pair,
+            add_special_tokens=add_special_tokens,
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+            stride=stride,
+            return_tensors=return_tensors,
+            **kwargs,
+        )
+
+        return encoded_inputs["input_ids"]
+
+    def num_special_tokens_to_add(self, pair: bool = False) -> int:
+        raise NotImplementedError
+
+    def _get_padding_truncation_strategies(
+        self, padding=False, truncation=False, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs
+    ):
+        """ Find the correct padding/truncation strategy with backward compatibility
+            for old arguments (truncation_strategy and pad_to_max_length) and behaviors.
+        """
+        old_truncation_strategy = kwargs.pop("truncation_strategy", "do_not_truncate")
+        old_pad_to_max_length = kwargs.pop("pad_to_max_length", False)
+
+        # Backward compatibility for previous behavior, maybe we should deprecate it:
+        # If you only set max_length, it activates truncation for max_length
+        if max_length is not None and padding is False and truncation is False:
+            if verbose:
+                logger.warning(
+                    "Truncation was not explicitely activated but `max_length` is provided a specific value, "
+                    "please use `truncation=True` to explicitely truncate examples to max length. "
+                    "Defaulting to 'longest_first' truncation strategy. "
+                    "If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy "
+                    "more precisely by providing a specific strategy to `truncation`."
+                )
+            truncation = "longest_first"
+
+        # Get padding strategy
+        if padding is False and old_pad_to_max_length:
+            if verbose:
+                warnings.warn(
+                    "The `pad_to_max_length` argument is deprecated and will be removed in a future version, "
+                    "use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or "
+                    "use `padding='max_length'` to pad to a max length. In this case, you can give a specific "
+                    "length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the "
+                    "maximal input size of the model (e.g. 512 for Bert).",
+                    DeprecationWarning,
+                )
+            if max_length is None:
+                padding_strategy = PaddingStrategy.LONGEST
+            else:
+                padding_strategy = PaddingStrategy.MAX_LENGTH
+        elif padding is not False:
+            if padding is True:
+                padding_strategy = PaddingStrategy.LONGEST  # Default to pad to the longest sequence in the batch
+            elif not isinstance(padding, PaddingStrategy):
+                padding_strategy = PaddingStrategy(padding)
+        else:
+            padding_strategy = PaddingStrategy.DO_NOT_PAD
+
+        # Get truncation strategy
+        if truncation is False and old_truncation_strategy != "do_not_truncate":
+            if verbose:
+                warnings.warn(
+                    "The `truncation_strategy` argument is deprecated and will be removed in a future version, "
+                    "use `truncation=True` to truncate examples to a max length. You can give a specific "
+                    "length with `max_length` (e.g. `max_length=45`) or leave max_length to None to truncate to the "
+                    "maximal input size of the model (e.g. 512 for Bert). "
+                    " If you have pairs of inputs, you can give a specific truncation strategy selected among "
+                    "`truncation='only_first'` (will only truncate the first sentence in the pairs) "
+                    "`truncation='only_second'` (will only truncate the second sentence in the pairs) "
+                    "or `truncation='longest_first'` (will iteratively remove tokens from the longest sentence in the pairs).",
+                    DeprecationWarning,
+                )
+            truncation_strategy = TruncationStrategy(old_truncation_strategy)
+        elif truncation is not False:
+            if truncation is True:
+                truncation_strategy = (
+                    TruncationStrategy.LONGEST_FIRST
+                )  # Default to truncate the longest sequences in pairs of inputs
+            elif not isinstance(truncation, TruncationStrategy):
+                truncation_strategy = TruncationStrategy(truncation)
+        else:
+            truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE
+
+        # Set max length if needed
+        if max_length is None:
+            if padding_strategy == PaddingStrategy.MAX_LENGTH:
+                if self.model_max_length > LARGE_INTEGER:
+                    if verbose:
+                        logger.warning(
+                            "Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. "
+                            "Default to no padding."
+                        )
+                    padding_strategy = PaddingStrategy.DO_NOT_PAD
+                else:
+                    max_length = self.model_max_length
+
+            if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE:
+                if self.model_max_length > LARGE_INTEGER:
+                    if verbose:
+                        logger.warning(
+                            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. "
+                            "Default to no truncation."
+                        )
+                    truncation_strategy = TruncationStrategy.DO_NOT_TRUNCATE
+                else:
+                    max_length = self.model_max_length
+
+        # Test if we have a padding token
+        if padding_strategy != PaddingStrategy.DO_NOT_PAD and (not self.pad_token or self.pad_token_id < 0):
+            raise ValueError(
+                "Asking to pad but the tokenizer does not have a padding token. "
+                "Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` "
+                "or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`."
+            )
+
+        # Check that we will truncate to a multiple of pad_to_multiple_of if both are provided
+        if (
+            truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE
+            and padding_strategy != PaddingStrategy.DO_NOT_PAD
+            and pad_to_multiple_of is not None
+            and max_length is not None
+            and (max_length % pad_to_multiple_of != 0)
+        ):
+            raise ValueError(
+                f"Truncation and padding are both activated but "
+                f"truncation length ({max_length}) is not a multiple of pad_to_multiple_of ({pad_to_multiple_of})."
+            )
+
+        return padding_strategy, truncation_strategy, max_length, kwargs
+
+    @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
+    def __call__(
+        self,
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]],
+        text_pair: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
+        add_special_tokens: bool = True,
+        padding: Union[bool, str] = False,
+        truncation: Union[bool, str] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        """
+        Returns a dictionary containing the encoded sequence or sequence pair and additional information:
+        the mask for sequence classification and the overflowing elements if a ``max_length`` is specified.
+
+        Args:
+            text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]``):
+                The sequence or batch of sequences to be encoded.
+                Each sequence can be a string or a list of strings (pre-tokenized string).
+                If the sequences are provided as list of strings (pretokenized), you must set `is_pretokenized=True`
+                (to lift the ambiguity with a batch of sequences)
+            text_pair (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]``):
+                The sequence or batch of sequences to be encoded.
+                Each sequence can be a string or a list of strings (pre-tokenized string).
+                If the sequences are provided as list of strings (pretokenized), you must set `is_pretokenized=True`
+                (to lift the ambiguity with a batch of sequences)
+        """
+        # Input type checking for clearer error
+        assert isinstance(text, str) or (
+            isinstance(text, (list, tuple))
+            and (
+                len(text) == 0
+                or (
+                    isinstance(text[0], str)
+                    or (isinstance(text[0], (list, tuple)) and (len(text[0]) == 0 or isinstance(text[0][0], str)))
+                )
+            )
+        ), (
+            "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
+            "or `List[List[str]]` (batch of pretokenized examples)."
+        )
+
+        assert (
+            text_pair is None
+            or isinstance(text_pair, str)
+            or (
+                isinstance(text_pair, (list, tuple))
+                and (
+                    len(text_pair) == 0
+                    or (
+                        isinstance(text_pair[0], str)
+                        or (
+                            isinstance(text_pair[0], (list, tuple))
+                            and (len(text_pair[0]) == 0 or isinstance(text_pair[0][0], str))
+                        )
+                    )
+                )
+            )
+        ), (
+            "text_pair input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
+            "or `List[List[str]]` (batch of pretokenized examples)."
+        )
+
+        is_batched = bool(
+            (not is_pretokenized and isinstance(text, (list, tuple)))
+            or (is_pretokenized and isinstance(text, (list, tuple)) and text and isinstance(text[0], (list, tuple)))
+        )
+
+        if is_batched:
+            batch_text_or_text_pairs = list(zip(text, text_pair)) if text_pair is not None else text
+            return self.batch_encode_plus(
+                batch_text_or_text_pairs=batch_text_or_text_pairs,
+                add_special_tokens=add_special_tokens,
+                padding=padding,
+                truncation=truncation,
+                max_length=max_length,
+                stride=stride,
+                is_pretokenized=is_pretokenized,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_tensors=return_tensors,
+                return_token_type_ids=return_token_type_ids,
+                return_attention_mask=return_attention_mask,
+                return_overflowing_tokens=return_overflowing_tokens,
+                return_special_tokens_mask=return_special_tokens_mask,
+                return_offsets_mapping=return_offsets_mapping,
+                return_length=return_length,
+                verbose=verbose,
+                **kwargs,
+            )
+        else:
+            return self.encode_plus(
+                text=text,
+                text_pair=text_pair,
+                add_special_tokens=add_special_tokens,
+                padding=padding,
+                truncation=truncation,
+                max_length=max_length,
+                stride=stride,
+                is_pretokenized=is_pretokenized,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_tensors=return_tensors,
+                return_token_type_ids=return_token_type_ids,
+                return_attention_mask=return_attention_mask,
+                return_overflowing_tokens=return_overflowing_tokens,
+                return_special_tokens_mask=return_special_tokens_mask,
+                return_offsets_mapping=return_offsets_mapping,
+                return_length=return_length,
+                verbose=verbose,
+                **kwargs,
+            )
+
+    @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
+    def encode_plus(
+        self,
+        text: Union[TextInput, PreTokenizedInput, EncodedInput],
+        text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
+        add_special_tokens: bool = True,
+        padding: Union[bool, str] = False,
+        truncation: Union[bool, str] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        """
+        Returns a dictionary containing the encoded sequence or sequence pair and additional information:
+        the mask for sequence classification and the overflowing elements if a ``max_length`` is specified.
+
+        Args:
+            text (:obj:`str`, :obj:`List[str]` or :obj:`List[int]` (the later only for not-fast tokenizers)):
+                The first sequence to be encoded. This can be a string, a list of strings (tokenized string using
+                the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
+                method)
+            text_pair (:obj:`str`, :obj:`List[str]` or :obj:`List[int]`, `optional`, defaults to :obj:`None`):
+                Optional second sequence to be encoded. This can be a string, a list of strings (tokenized
+                string using the `tokenize` method) or a list of integers (tokenized string ids using the
+                `convert_tokens_to_ids` method)
+        """
+
+        # Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
+        padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+            pad_to_multiple_of=pad_to_multiple_of,
+            verbose=verbose,
+            **kwargs,
+        )
+
+        return self._encode_plus(
+            text=text,
+            text_pair=text_pair,
+            add_special_tokens=add_special_tokens,
+            padding_strategy=padding_strategy,
+            truncation_strategy=truncation_strategy,
+            max_length=max_length,
+            stride=stride,
+            is_pretokenized=is_pretokenized,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_tensors=return_tensors,
+            return_token_type_ids=return_token_type_ids,
+            return_attention_mask=return_attention_mask,
+            return_overflowing_tokens=return_overflowing_tokens,
+            return_special_tokens_mask=return_special_tokens_mask,
+            return_offsets_mapping=return_offsets_mapping,
+            return_length=return_length,
+            verbose=verbose,
+            **kwargs,
+        )
+
+    def _encode_plus(
+        self,
+        text: Union[TextInput, PreTokenizedInput, EncodedInput],
+        text_pair: Optional[Union[TextInput, PreTokenizedInput, EncodedInput]] = None,
+        add_special_tokens: bool = True,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        raise NotImplementedError
+
+    @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
+    def batch_encode_plus(
+        self,
+        batch_text_or_text_pairs: Union[
+            List[TextInput],
+            List[TextInputPair],
+            List[PreTokenizedInput],
+            List[PreTokenizedInputPair],
+            List[EncodedInput],
+            List[EncodedInputPair],
+        ],
+        add_special_tokens: bool = True,
+        padding: Union[bool, str] = False,
+        truncation: Union[bool, str] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        """
+        Returns a dictionary containing the encoded sequence or sequence pair and additional information:
+        the mask for sequence classification and the overflowing elements if a ``max_length`` is specified.
+
+        Args:
+            batch_text_or_text_pairs (:obj:`List[str]`,  :obj:`List[Tuple[str, str]]`,
+                                      :obj:`List[List[str]]`,  :obj:`List[Tuple[List[str], List[str]]]`,
+                                      and for not-fast tokenizers, also:
+                                      :obj:`List[List[int]]`,  :obj:`List[Tuple[List[int], List[int]]]`):
+                Batch of sequences or pair of sequences to be encoded.
+                This can be a list of string/string-sequences/int-sequences or a list of pair of
+                string/string-sequences/int-sequence (see details in encode_plus)
+        """
+
+        # Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
+        padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+            pad_to_multiple_of=pad_to_multiple_of,
+            verbose=verbose,
+            **kwargs,
+        )
+
+        return self._batch_encode_plus(
+            batch_text_or_text_pairs=batch_text_or_text_pairs,
+            add_special_tokens=add_special_tokens,
+            padding_strategy=padding_strategy,
+            truncation_strategy=truncation_strategy,
+            max_length=max_length,
+            stride=stride,
+            is_pretokenized=is_pretokenized,
+            pad_to_multiple_of=pad_to_multiple_of,
+            return_tensors=return_tensors,
+            return_token_type_ids=return_token_type_ids,
+            return_attention_mask=return_attention_mask,
+            return_overflowing_tokens=return_overflowing_tokens,
+            return_special_tokens_mask=return_special_tokens_mask,
+            return_offsets_mapping=return_offsets_mapping,
+            return_length=return_length,
+            verbose=verbose,
+            **kwargs,
+        )
+
+    def _batch_encode_plus(
+        self,
+        batch_text_or_text_pairs: Union[
+            List[TextInput],
+            List[TextInputPair],
+            List[PreTokenizedInput],
+            List[PreTokenizedInputPair],
+            List[EncodedInput],
+            List[EncodedInputPair],
+        ],
+        add_special_tokens: bool = True,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        truncation_strategy: TruncationStrategy = TruncationStrategy.DO_NOT_TRUNCATE,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        is_pretokenized: bool = False,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        **kwargs
+    ) -> BatchEncoding:
+        raise NotImplementedError
+
+    def pad(
+        self,
+        encoded_inputs: Union[
+            BatchEncoding,
+            List[BatchEncoding],
+            Dict[str, EncodedInput],
+            Dict[str, List[EncodedInput]],
+            List[Dict[str, EncodedInput]],
+        ],
+        padding: Union[bool, str] = True,
+        max_length: Optional[int] = None,
+        pad_to_multiple_of: Optional[int] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        verbose: bool = True,
+    ) -> BatchEncoding:
+        """ Pad a single encoded input or a batch of encoded inputs up to predefined length or to the max sequence length in the batch.
+
+            Padding side (left/right) padding token ids are defined at the tokenizer level
+            (with ``self.padding_side``, ``self.pad_token_id`` and ``self.pad_token_type_id``)
+
+        Args:
+            encoded_inputs: Dictionary of tokenized inputs (`Dict[str, List[int]]`) or batch of tokenized inputs.
+                Batch of tokenized inputs can be given as dicts of lists or lists of dicts, both work so you can
+                use ``tokenizer.pad()`` during pre-processing as well as in a PyTorch Dataloader collate function.
+                (`Dict[str, List[List[int]]]` or `List[Dict[str, List[int]]]`).
+            padding: Boolean or specific strategy to use for padding.
+                Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among:
+                - 'longest' (or `True`) Pad to the longest sequence in the batch
+                - 'max_length': Pad to the max length (default)
+                - 'do_not_pad' (or `False`): Do not pad
+            max_length: maximum length of the returned list and optionally padding length (see below).
+                Will truncate by taking into account the special tokens.
+            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
+                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
+                >= 7.5 (Volta).
+            return_attention_mask: (optional) Set to False to avoid returning attention mask (default: set to model specifics)
+            return_tensors (:obj:`str`, `optional`, defaults to :obj:`None`):
+                Can be set to 'tf', 'pt' or 'np' to return respectively TensorFlow :obj:`tf.constant`,
+                PyTorch :obj:`torch.Tensor` or Numpy :oj: `np.ndarray` instead of a list of python integers.
+            verbose (:obj:`bool`, `optional`, defaults to :obj:`True`):
+                Set to ``False`` to avoid printing infos and warnings.
+        """
+        # If we have a list of dicts, let's convert it in a dict of lists
+        if isinstance(encoded_inputs, (list, tuple)) and isinstance(encoded_inputs[0], (dict, BatchEncoding)):
+            encoded_inputs = {key: [example[key] for example in encoded_inputs] for key in encoded_inputs[0].keys()}
+
+        assert "input_ids" in encoded_inputs, (
+            "You should supply an encoding or a list of encodings to this method. "
+            "An encoding is the output of one the encoding methods of the tokenizer, i.e. "
+            "__call__/encode_plus/batch_encode_plus. "
+        )
+
+        if not encoded_inputs["input_ids"]:
+            if return_attention_mask:
+                encoded_inputs["attention_mask"] = []
+            return encoded_inputs
+
+        # Convert padding_strategy in PaddingStrategy
+        padding_strategy, _, max_length, _ = self._get_padding_truncation_strategies(
+            padding=padding, max_length=max_length, verbose=verbose
+        )
+
+        if encoded_inputs["input_ids"] and not isinstance(encoded_inputs["input_ids"][0], (list, tuple)):
+            encoded_inputs = self._pad(
+                encoded_inputs,
+                max_length=max_length,
+                padding_strategy=padding_strategy,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_attention_mask=return_attention_mask,
+            )
+            return BatchEncoding(encoded_inputs, tensor_type=return_tensors)
+
+        batch_size = len(encoded_inputs["input_ids"])
+        assert all(
+            len(v) == batch_size for v in encoded_inputs.values()
+        ), "Some items in the output dictionnary have a different batch size than others."
+
+        if padding_strategy == PaddingStrategy.LONGEST:
+            max_length = max(len(inputs) for inputs in encoded_inputs["input_ids"])
+            padding_strategy = PaddingStrategy.MAX_LENGTH
+
+        batch_outputs = {}
+        for i in range(batch_size):
+            inputs = dict((k, v[i]) for k, v in encoded_inputs.items())
+            outputs = self._pad(
+                inputs,
+                max_length=max_length,
+                padding_strategy=padding_strategy,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_attention_mask=return_attention_mask,
+            )
+
+            for key, value in outputs.items():
+                if key not in batch_outputs:
+                    batch_outputs[key] = []
+                batch_outputs[key].append(value)
+
+        return BatchEncoding(batch_outputs, tensor_type=return_tensors)
+
+    def create_token_type_ids_from_sequences(self, token_ids_0: List, token_ids_1: Optional[List] = None) -> List[int]:
+        if token_ids_1 is None:
+            return len(token_ids_0) * [0]
+        return [0] * len(token_ids_0) + [1] * len(token_ids_1)
+
+    def build_inputs_with_special_tokens(self, token_ids_0: List, token_ids_1: Optional[List] = None) -> List:
+        """
+        Build model inputs from a sequence or a pair of sequence for sequence classification tasks
+        by concatenating and adding special tokens. This implementation does not add special tokens.
+        """
+        if token_ids_1 is None:
+            return token_ids_0
+        return token_ids_0 + token_ids_1
+
+    @add_end_docstrings(ENCODE_KWARGS_DOCSTRING, ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
+    def prepare_for_model(
+        self,
+        ids: List[int],
+        pair_ids: Optional[List[int]] = None,
+        add_special_tokens: bool = True,
+        padding: Union[bool, str] = False,
+        truncation: Union[bool, str] = False,
+        max_length: Optional[int] = None,
+        stride: int = 0,
+        pad_to_multiple_of: Optional[int] = None,
+        return_tensors: Optional[Union[str, TensorType]] = None,
+        return_token_type_ids: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
+        return_overflowing_tokens: bool = False,
+        return_special_tokens_mask: bool = False,
+        return_offsets_mapping: bool = False,
+        return_length: bool = False,
+        verbose: bool = True,
+        prepend_batch_axis: bool = False,
+        **kwargs
+    ) -> BatchEncoding:
+        """ Prepares a sequence of input id, or a pair of sequences of inputs ids so that it can be used by the model.
+        It adds special tokens, truncates sequences if overflowing while taking into account the special tokens and
+        manages a moving window (with user defined stride) for overflowing tokens
+
+        Args:
+            ids: list of tokenized input ids. Can be obtained from a string by chaining the
+                `tokenize` and `convert_tokens_to_ids` methods.
+            pair_ids: Optional second list of input ids. Can be obtained from a string by chaining the
+                `tokenize` and `convert_tokens_to_ids` methods.
+        """
+
+        if "return_lengths" in kwargs:
+            if verbose:
+                warnings.warn(
+                    "The PreTrainedTokenizerBase.prepare_for_model `return_lengths` parameter is deprecated. "
+                    "Please use `return_length` instead.",
+                    FutureWarning,
+                )
+            return_length = kwargs["return_lengths"]
+
+        # Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
+        padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
+            padding=padding,
+            truncation=truncation,
+            max_length=max_length,
+            pad_to_multiple_of=pad_to_multiple_of,
+            verbose=verbose,
+            **kwargs,
+        )
+
+        pair = bool(pair_ids is not None)
+        len_ids = len(ids)
+        len_pair_ids = len(pair_ids) if pair else 0
+
+        # Load from model defaults
+        if return_token_type_ids is None:
+            return_token_type_ids = "token_type_ids" in self.model_input_names
+        if return_attention_mask is None:
+            return_attention_mask = "attention_mask" in self.model_input_names
+
+        encoded_inputs = {}
+
+        # Compute the total size of the returned encodings
+        total_len = len_ids + len_pair_ids + (self.num_special_tokens_to_add(pair=pair) if add_special_tokens else 0)
+
+        # Truncation: Handle max sequence length
+        if truncation_strategy != TruncationStrategy.DO_NOT_TRUNCATE and max_length and total_len > max_length:
+            ids, pair_ids, overflowing_tokens = self.truncate_sequences(
+                ids,
+                pair_ids=pair_ids,
+                num_tokens_to_remove=total_len - max_length,
+                truncation_strategy=truncation_strategy,
+                stride=stride,
+            )
+            if return_overflowing_tokens:
+                encoded_inputs["overflowing_tokens"] = overflowing_tokens
+                encoded_inputs["num_truncated_tokens"] = total_len - max_length
+
+        # Add special tokens
+        if add_special_tokens:
+            sequence = self.build_inputs_with_special_tokens(ids, pair_ids)
+            token_type_ids = self.create_token_type_ids_from_sequences(ids, pair_ids)
+        else:
+            sequence = ids + pair_ids if pair else ids
+            token_type_ids = [0] * len(ids) + ([1] * len(pair_ids) if pair else [])
+
+        # Build output dictionnary
+        encoded_inputs["input_ids"] = sequence
+        if return_token_type_ids:
+            encoded_inputs["token_type_ids"] = token_type_ids
+        if return_special_tokens_mask:
+            if add_special_tokens:
+                encoded_inputs["special_tokens_mask"] = self.get_special_tokens_mask(ids, pair_ids)
+            else:
+                encoded_inputs["special_tokens_mask"] = [0] * len(sequence)
+
+        # Check lengths
+        if max_length is None and len(encoded_inputs["input_ids"]) > self.model_max_length and verbose:
+            logger.warning(
+                "Token indices sequence length is longer than the specified maximum sequence length "
+                "for this model ({} > {}). Running this sequence through the model will result in "
+                "indexing errors".format(len(ids), self.model_max_length)
+            )
+
+        # Padding
+        if padding_strategy != PaddingStrategy.DO_NOT_PAD or return_attention_mask:
+            encoded_inputs = self.pad(
+                encoded_inputs,
+                max_length=max_length,
+                padding=padding_strategy.value,
+                pad_to_multiple_of=pad_to_multiple_of,
+                return_attention_mask=return_attention_mask,
+            )
+
+        if return_length:
+            encoded_inputs["length"] = len(encoded_inputs["input_ids"])
+
+        batch_outputs = BatchEncoding(
+            encoded_inputs, tensor_type=return_tensors, prepend_batch_axis=prepend_batch_axis
+        )
+
+        return batch_outputs
+
+    def truncate_sequences(
+        self,
+        ids: List[int],
+        pair_ids: Optional[List[int]] = None,
+        num_tokens_to_remove: int = 0,
+        truncation_strategy: Union[str, TruncationStrategy] = "longest_first",
+        stride: int = 0,
+    ) -> Tuple[List[int], List[int], List[int]]:
+        """ Truncates a sequence pair in place to the maximum length.
+
+        Args:
+            ids: list of tokenized input ids. Can be obtained from a string by chaining the
+                `tokenize` and `convert_tokens_to_ids` methods.
+            pair_ids: Optional second list of input ids. Can be obtained from a string by chaining the
+                `tokenize` and `convert_tokens_to_ids` methods.
+            num_tokens_to_remove (:obj:`int`, `optional`, defaults to ``0``):
+                number of tokens to remove using the truncation strategy
+            truncation_strategy (:obj:`string`, `optional`, defaults to "longest_first"):
+                String selected in the following options:
+
+                - 'longest_first' (default): Iteratively reduce the inputs sequence until the input is under max_length
+                  starting from the longest one at each token (when there is a pair of input sequences).
+                  Overflowing tokens only contains overflow from the first sequence.
+                - 'only_first': Only truncate the first sequence. raise an error if the first sequence is shorter or equal to than num_tokens_to_remove.
+                - 'only_second': Only truncate the second sequence
+                - 'do_not_truncate'
+            stride (:obj:`int`, `optional`, defaults to ``0``):
+                If set to a number along with max_length, the overflowing tokens returned will contain some tokens
+                from the main sequence returned. The value of this argument defines the number of additional tokens.
+        """
+        if num_tokens_to_remove <= 0:
+            return ids, pair_ids, []
+
+        if not isinstance(truncation_strategy, TruncationStrategy):
+            truncation_strategy = TruncationStrategy(truncation_strategy)
+
+        overflowing_tokens = []
+        if truncation_strategy == TruncationStrategy.LONGEST_FIRST:
+            for _ in range(num_tokens_to_remove):
+                if pair_ids is None or len(ids) > len(pair_ids):
+                    if not overflowing_tokens:
+                        window_len = min(len(ids), stride + 1)
+                    else:
+                        window_len = 1
+                    overflowing_tokens.extend(ids[-window_len:])
+                    ids = ids[:-1]
+                else:
+                    if not overflowing_tokens:
+                        window_len = min(len(pair_ids), stride + 1)
+                    else:
+                        window_len = 1
+                    overflowing_tokens.extend(pair_ids[-window_len:])
+                    pair_ids = pair_ids[:-1]
+        elif truncation_strategy == TruncationStrategy.ONLY_FIRST:
+            if len(ids) > num_tokens_to_remove:
+                window_len = min(len(ids), stride + num_tokens_to_remove)
+                overflowing_tokens = ids[-window_len:]
+                ids = ids[:-num_tokens_to_remove]
+            else:
+                logger.error(
+                    f"We need to remove {num_tokens_to_remove} to truncate the input"
+                    f"but the first sequence has a length {len(ids)}. "
+                    f"Please select another truncation strategy than {truncation_strategy}, "
+                    f"for instance 'longest_first' or 'only_second'."
+                )
+        elif truncation_strategy == TruncationStrategy.ONLY_SECOND and pair_ids is not None:
+            if len(pair_ids) > num_tokens_to_remove:
+                window_len = min(len(pair_ids), stride + num_tokens_to_remove)
+                overflowing_tokens = pair_ids[-window_len:]
+                pair_ids = pair_ids[:-num_tokens_to_remove]
+            else:
+                logger.error(
+                    f"We need to remove {num_tokens_to_remove} to truncate the input"
+                    f"but the second sequence has a length {len(pair_ids)}. "
+                    f"Please select another truncation strategy than {truncation_strategy}, "
+                    f"for instance 'longest_first' or 'only_first'."
+                )
+
+        return (ids, pair_ids, overflowing_tokens)
+
+    def _pad(
+        self,
+        encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
+        max_length: Optional[int] = None,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        pad_to_multiple_of: Optional[int] = None,
+        return_attention_mask: Optional[bool] = None,
+    ) -> dict:
+        """ Pad encoded inputs (on left/right and up to predefined legnth or max length in the batch)
+
+        Args:
+            encoded_inputs: Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
+            max_length: maximum length of the returned list and optionally padding length (see below).
+                Will truncate by taking into account the special tokens.
+            padding_strategy: PaddingStrategy to use for padding.
+                - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
+                - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
+                - PaddingStrategy.DO_NOT_PAD: Do not pad
+                The tokenizer padding sides are defined in self.padding_side:
+                    - 'left': pads on the left of the sequences
+                    - 'right': pads on the right of the sequences
+            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
+                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
+                >= 7.5 (Volta).
+            return_attention_mask: (optional) Set to False to avoid returning attention mask (default: set to model specifics)
+        """
+        # Load from model defaults
+        if return_attention_mask is None:
+            return_attention_mask = "attention_mask" in self.model_input_names
+
+        if padding_strategy == PaddingStrategy.LONGEST:
+            max_length = len(encoded_inputs["input_ids"])
+
+        if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
+            max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
+
+        needs_to_be_padded = (
+            padding_strategy != PaddingStrategy.DO_NOT_PAD and len(encoded_inputs["input_ids"]) != max_length
+        )
+
+        if needs_to_be_padded:
+            difference = max_length - len(encoded_inputs["input_ids"])
+            if self.padding_side == "right":
+                if return_attention_mask:
+                    encoded_inputs["attention_mask"] = [1] * len(encoded_inputs["input_ids"]) + [0] * difference
+                if "token_type_ids" in encoded_inputs:
+                    encoded_inputs["token_type_ids"] = (
+                        encoded_inputs["token_type_ids"] + [self.pad_token_type_id] * difference
+                    )
+                if "special_tokens_mask" in encoded_inputs:
+                    encoded_inputs["special_tokens_mask"] = encoded_inputs["special_tokens_mask"] + [1] * difference
+                encoded_inputs["input_ids"] = encoded_inputs["input_ids"] + [self.pad_token_id] * difference
+            elif self.padding_side == "left":
+                if return_attention_mask:
+                    encoded_inputs["attention_mask"] = [0] * difference + [1] * len(encoded_inputs["input_ids"])
+                if "token_type_ids" in encoded_inputs:
+                    encoded_inputs["token_type_ids"] = [self.pad_token_type_id] * difference + encoded_inputs[
+                        "token_type_ids"
+                    ]
+                if "special_tokens_mask" in encoded_inputs:
+                    encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
+                encoded_inputs["input_ids"] = [self.pad_token_id] * difference + encoded_inputs["input_ids"]
+            else:
+                raise ValueError("Invalid padding strategy:" + str(self.padding_side))
+        else:
+            if return_attention_mask:
+                encoded_inputs["attention_mask"] = [1] * len(encoded_inputs["input_ids"])
+
+        return encoded_inputs
+
+    def batch_decode(self, sequences: List[List[int]], **kwargs) -> List[str]:
+        return [self.decode(seq, **kwargs) for seq in sequences]
+
+    def decode(
+        self, token_ids: List[int], skip_special_tokens: bool = False, clean_up_tokenization_spaces: bool = True
+    ) -> str:
+        """
+        Converts a sequence of ids (integer) in a string, using the tokenizer and vocabulary
+        with options to remove special tokens and clean up tokenization spaces.
+        Similar to doing ``self.convert_tokens_to_string(self.convert_ids_to_tokens(token_ids))``.
+
+        Args:
+            token_ids: list of tokenized input ids. Can be obtained using the `encode` or `encode_plus` methods.
+            skip_special_tokens: if set to True, will replace special tokens.
+            clean_up_tokenization_spaces: if set to True, will clean up the tokenization spaces.
+        """
+        raise NotImplementedError
+
+    def get_special_tokens_mask(
+        self, token_ids_0: List, token_ids_1: Optional[List] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods.
+
+        Args:
+            token_ids_0: list of ids (must not contain special tokens)
+            token_ids_1: Optional list of ids (must not contain special tokens), necessary when fetching sequence ids
+                for sequence pairs
+            already_has_special_tokens: (default False) Set to True if the token list is already formated with
+                special tokens for the model
+
+        Returns:
+            A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        assert already_has_special_tokens and token_ids_1 is None, (
+            "You cannot use ``already_has_special_tokens=False`` with this tokenizer. "
+            "Please use a slow (full python) tokenizer to activate this argument."
+            "Or set `return_special_token_mask=True` when calling the encoding method "
+            "to get the special tokens mask in any tokenizer. "
+        )
+
+        all_special_ids = self.all_special_ids  # cache the property
+
+        special_tokens_mask = [1 if token in all_special_ids else 0 for token in token_ids_0]
+
+        return special_tokens_mask
+
+    @staticmethod
+    def clean_up_tokenization(out_string: str) -> str:
+        """ Clean up a list of simple English tokenization artifacts like spaces before punctuations and abreviated forms.
+        """
+        out_string = (
+            out_string.replace(" .", ".")
+            .replace(" ?", "?")
+            .replace(" !", "!")
+            .replace(" ,", ",")
+            .replace(" ' ", "'")
+            .replace(" n't", "n't")
+            .replace(" 'm", "'m")
+            .replace(" 's", "'s")
+            .replace(" 've", "'ve")
+            .replace(" 're", "'re")
+        )
+        return out_string
diff --git a/criterions/__init__.py b/criterions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cba954ece281bebf63a60e82130615cfec0bfe6c
--- /dev/null
+++ b/criterions/__init__.py
@@ -0,0 +1 @@
+from .label_smoothed_cross_entropy import AdjustLabelSmoothedCrossEntropyCriterion
diff --git a/criterions/label_smoothed_cross_entropy.py b/criterions/label_smoothed_cross_entropy.py
new file mode 100644
index 0000000000000000000000000000000000000000..718adc1a97a49a6846ce3cc14dff9efc816d575c
--- /dev/null
+++ b/criterions/label_smoothed_cross_entropy.py
@@ -0,0 +1,394 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+from dataclasses import dataclass, field
+from typing import Optional
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from omegaconf import II
+
+
+@dataclass
+class AdjustLabelSmoothedCrossEntropyCriterionConfig(FairseqDataclass):
+    label_smoothing: float = field(
+        default=0.0,
+        metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"},
+    )
+    report_accuracy: bool = field(
+        default=False,
+        metadata={"help": "report accuracy metric"},
+    )
+    det_weight: float = field(
+        default=1.0,
+        metadata={"help": "weight of detection loss"},
+    )
+    cls_weight: float = field(
+        default=1.0,
+        metadata={"help": "weight of classification loss"},
+    )
+
+    ignore_prefix_size: int = field(
+        default=0,
+        metadata={"help": "Ignore first N tokens"},
+    )
+    ignore_eos: bool = field(
+        default=False,
+        metadata={"help": "Ignore eos token"},
+    )
+    sentence_avg: bool = II("optimization.sentence_avg")
+    drop_worst_ratio: float = field(
+        default=0.0,
+        metadata={"help": "ratio for discarding bad samples"},
+    )
+    drop_worst_after: int = field(
+        default=0,
+        metadata={"help": "steps for discarding bad samples"},
+    )
+    use_rdrop: bool = field(
+        default=False, metadata={"help": "use R-Drop"}
+    )
+    reg_alpha: float = field(
+        default=1.0, metadata={"help": "weight for R-Drop"}
+    )
+    sample_patch_num: int = field(
+        default=196, metadata={"help": "sample patches for v1"}
+    )
+    constraint_range: Optional[str] = field(
+        default=None,
+        metadata={"help": "constraint range"}
+    )
+
+
+def construct_rdrop_sample(x):
+    if isinstance(x, dict):
+        for key in x:
+            x[key] = construct_rdrop_sample(x[key])
+        return x
+    elif isinstance(x, torch.Tensor):
+        return x.repeat(2, *([1] * (x.dim() - 1)))
+    elif isinstance(x, int):
+        return x * 2
+    elif isinstance(x, np.ndarray):
+        return x.repeat(2)
+    else:
+        raise NotImplementedError
+
+
+def kl_loss(p, q):
+    p_loss = F.kl_div(p, torch.exp(q), reduction='sum')
+    q_loss = F.kl_div(q, torch.exp(p), reduction='sum')
+    loss = (p_loss + q_loss) / 2
+    return loss
+
+
+def label_smoothed_nll_loss(
+        lprobs, target, epsilon, update_num, reduce=True,
+        drop_worst_ratio=0.0, drop_worst_after=0, use_rdrop=False, reg_alpha=1.0,
+        constraint_masks=None, constraint_start=None, constraint_end=None
+):
+    if target.dim() == lprobs.dim() - 1:
+        target = target.unsqueeze(-1)
+    nll_loss = -lprobs.gather(dim=-1, index=target).squeeze(-1)
+    if constraint_masks is not None:
+        smooth_loss = -lprobs.masked_fill(~constraint_masks, 0).sum(dim=-1, keepdim=True).squeeze(-1)
+        eps_i = epsilon / (constraint_masks.sum(1) - 1 + 1e-6)
+    elif constraint_start is not None and constraint_end is not None:
+        constraint_range = [0, 1, 2, 3] + list(range(constraint_start, constraint_end))
+        smooth_loss = -lprobs[:, constraint_range].sum(dim=-1, keepdim=True).squeeze(-1)
+        eps_i = epsilon / (len(constraint_range) - 1 + 1e-6)
+    else:
+        smooth_loss = -lprobs.sum(dim=-1, keepdim=True).squeeze(-1)
+        eps_i = epsilon / (lprobs.size(-1) - 1)
+    loss = (1.0 - epsilon - eps_i) * nll_loss + eps_i * smooth_loss
+    if drop_worst_ratio > 0 and update_num > drop_worst_after:
+        if use_rdrop:
+            true_batch_size = loss.size(0) // 2
+            _, indices = torch.topk(loss[:true_batch_size], k=int(true_batch_size * (1 - drop_worst_ratio)), largest=False)
+            loss = torch.cat([loss[indices], loss[indices+true_batch_size]])
+            nll_loss = torch.cat([nll_loss[indices], nll_loss[indices+true_batch_size]])
+            lprobs = torch.cat([lprobs[indices], lprobs[indices+true_batch_size]])
+        else:
+            loss, indices = torch.topk(loss, k=int(loss.shape[0] * (1 - drop_worst_ratio)), largest=False)
+            nll_loss = nll_loss[indices]
+            lprobs = lprobs[indices]
+
+
+    ntokens = loss.numel()
+    nll_loss = nll_loss.sum()
+
+    loss = loss.sum()
+    if use_rdrop:
+        true_batch_size = lprobs.size(0) // 2
+        p = lprobs[:true_batch_size]
+        q = lprobs[true_batch_size:]
+        if constraint_start is not None and constraint_end is not None:
+            constraint_range = [0, 1, 2, 3] + list(range(constraint_start, constraint_end))
+            p = p[:, constraint_range]
+            q = q[:, constraint_range]
+        loss += kl_loss(p, q) * reg_alpha
+
+    return loss, nll_loss, ntokens
+
+@register_criterion(
+    "adjust_label_smoothed_cross_entropy", dataclass=AdjustLabelSmoothedCrossEntropyCriterionConfig
+)
+class AdjustLabelSmoothedCrossEntropyCriterion(FairseqCriterion):
+    def __init__(
+            self,
+            task,
+            sentence_avg,
+            label_smoothing,
+            ignore_prefix_size=0,
+            ignore_eos=False,
+            report_accuracy=False,
+            drop_worst_ratio=0,
+            drop_worst_after=0,
+            use_rdrop=False,
+            reg_alpha=1.0,
+            sample_patch_num=196,
+            constraint_range=None,
+            det_weight=1.0,
+            cls_weight=1.0
+    ):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+        self.eps = label_smoothing
+        self.ignore_prefix_size = ignore_prefix_size
+        self.ignore_eos = ignore_eos
+        self.report_accuracy = report_accuracy
+        self.drop_worst_ratio = drop_worst_ratio
+        self.drop_worst_after = drop_worst_after
+        self.use_rdrop = use_rdrop
+        self.reg_alpha = reg_alpha
+        self.sample_patch_num = sample_patch_num
+
+        self.det_weight = det_weight
+        self.cls_weight = cls_weight
+
+        self.constraint_start = None
+        self.constraint_end = None
+        if constraint_range is not None:
+            constraint_start, constraint_end = constraint_range.split(',')
+            self.constraint_start = int(constraint_start)
+            self.constraint_end = int(constraint_end)
+
+    def forward(self, model, sample, update_num=0, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        if isinstance(sample, list):
+            if self.sample_patch_num > 0:
+                sample[0]['net_input']['sample_patch_num'] = self.sample_patch_num
+            loss_v1, sample_size_v1, logging_output_v1 = self.forward(model, sample[0], update_num, reduce)
+            loss_v2, sample_size_v2, logging_output_v2 = self.forward(model, sample[1], update_num, reduce)
+            loss = loss_v1 / sample_size_v1 + loss_v2 / sample_size_v2
+            sample_size = 1
+            logging_output = {
+                "loss": loss.data,
+                "loss_v1": loss_v1.data,
+                "loss_v2": loss_v2.data,
+                "nll_loss": logging_output_v1["nll_loss"].data / sample_size_v1 + logging_output_v2[
+                    "nll_loss"].data / sample_size_v2,
+                "ntokens": logging_output_v1["ntokens"] + logging_output_v2["ntokens"],
+                "nsentences": logging_output_v1["nsentences"] + logging_output_v2["nsentences"],
+                "sample_size": 1,
+                "sample_size_v1": sample_size_v1,
+                "sample_size_v2": sample_size_v2,
+            }
+            return loss, sample_size, logging_output
+
+        if self.use_rdrop:
+            construct_rdrop_sample(sample)
+
+        net_output = model(**sample["net_input"])
+        loss, nll_loss, ntokens = self.compute_loss(model, net_output, sample, update_num, det_weight=self.det_weight,
+                                                    cls_weight=self.cls_weight, reduce=reduce)
+        sample_size = (
+            sample["target"].size(0)
+        )
+        logging_output = {
+            "loss": loss.data,
+            "nll_loss": nll_loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["nsentences"],
+            "sample_size": sample_size,
+        }
+        if self.report_accuracy:
+            n_correct, total = self.compute_accuracy(model, net_output, sample)
+            logging_output["n_correct"] = utils.item(n_correct.data)
+            logging_output["total"] = utils.item(total.data)
+        return loss, sample_size, logging_output
+
+    def get_lprobs_and_target(self, model, net_output, sample):
+        conf = sample['conf'][:, None, None] if 'conf' in sample and sample['conf'] is not None else 1
+        constraint_masks = None
+        if "constraint_masks" in sample and sample["constraint_masks"] is not None:
+            constraint_masks = sample["constraint_masks"]
+            net_output[0].masked_fill_(~constraint_masks, -math.inf)
+        if self.constraint_start is not None and self.constraint_end is not None:
+            net_output[0][:, :, 4:self.constraint_start] = -math.inf
+            net_output[0][:, :, self.constraint_end:] = -math.inf
+        lprobs = model.get_normalized_probs(net_output, log_probs=True) * conf
+        target = sample["token_type"]
+        if self.ignore_prefix_size > 0:
+            lprobs = lprobs[:, self.ignore_prefix_size:, :].contiguous()
+            target = target[:, self.ignore_prefix_size:].contiguous()
+            if constraint_masks is not None:
+                constraint_masks = constraint_masks[:, self.ignore_prefix_size:, :].contiguous()
+        if self.ignore_eos:
+            bsz, seq_len, embed_dim = lprobs.size()
+            eos_indices = target.eq(self.task.tgt_dict.eos())
+            lprobs = lprobs[~eos_indices].reshape(bsz, seq_len - 1, embed_dim)
+            target = target[~eos_indices].reshape(bsz, seq_len - 1)
+            if constraint_masks is not None:
+                constraint_masks = constraint_masks[~eos_indices].reshape(bsz, seq_len - 1, embed_dim)
+        if constraint_masks is not None:
+            constraint_masks = constraint_masks.view(-1, constraint_masks.size(-1))
+
+        # index = torch.zeros(lprobs.shape[:2]).to(lprobs.device)
+        # index[:, :4] = 1   # 1 indicates the location of detection results
+
+        return lprobs.view(-1, lprobs.size(-1)), target.view(-1), constraint_masks, None  # index.view(-1)
+
+    def compute_loss(self, model, net_output, sample, update_num, det_weight=1.0, cls_weight=1.0, reduce=True):
+        b = sample['target'].shape[0]
+        lprobs, target, constraint_masks, index = self.get_lprobs_and_target(model, net_output, sample)
+        if constraint_masks is not None:
+            constraint_masks = constraint_masks[target != -1]
+        # index = index[target != self.padding_idx]
+        lprobs = lprobs[target != -1]
+        target = target[target != -1]
+
+        loss_cls, nll_loss, ntokens = label_smoothed_nll_loss(
+            lprobs,
+            target,
+            self.eps,
+            update_num,
+            reduce=reduce,
+            drop_worst_ratio=self.drop_worst_ratio,
+            drop_worst_after=self.drop_worst_after,
+            use_rdrop=self.use_rdrop,
+            reg_alpha=self.reg_alpha,
+            constraint_masks=constraint_masks,
+            constraint_start=self.constraint_start,
+            constraint_end=self.constraint_end
+        )
+        loss_cls = cls_weight * loss_cls/b
+
+        # compute regression loss
+        token_type = sample["token_type"]
+        token_type = torch.stack([token_type, token_type], -1)
+        target = sample["target"]
+        index = torch.zeros_like(target).to(target.device)
+        index[:, :2, :] = 1  # the first two tokens are bbox points; 1 indicates the location of detection results
+
+        target = target[token_type == 0]
+        index = index[token_type == 0]
+        regression_output = net_output[1].squeeze(-1)
+        regression_output = regression_output[token_type == 0]
+
+        loss_reg = F.l1_loss(target[index == 1], regression_output[index == 1]) * det_weight
+        if (index == 0).any():
+            loss_reg += F.l1_loss(target[index == 0], regression_output[index == 0])
+
+        loss = loss_reg + loss_cls
+        if update_num % 5000 == 1:
+            print(f"loss_reg: {loss_reg.item()} loss_cls: {loss_cls.item()}")
+
+        return loss, nll_loss, ntokens
+
+    def compute_accuracy(self, model, net_output, sample):
+        lprobs, target = self.get_lprobs_and_target(model, net_output, sample)
+        mask = target.ne(self.padding_idx)
+        n_correct = torch.sum(
+            lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask))
+        )
+        total = torch.sum(mask)
+        return n_correct, total
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        loss_sum_v1 = sum(log.get("loss_v1", 0) for log in logging_outputs)
+        loss_sum_v2 = sum(log.get("loss_v2", 0) for log in logging_outputs)
+        nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        sample_size_v1 = sum(log.get("sample_size_v1", 0) for log in logging_outputs)
+        sample_size_v2 = sum(log.get("sample_size_v2", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size, sample_size, round=3
+        )
+        metrics.log_scalar(
+            "loss_v1", loss_sum_v1 / max(sample_size_v1, 1), max(sample_size_v1, 1), round=3
+        )
+        metrics.log_scalar(
+            "loss_v2", loss_sum_v2 / max(sample_size_v2, 1), max(sample_size_v2, 1), round=3
+        )
+        metrics.log_scalar(
+            "nll_loss", nll_loss_sum / sample_size, ntokens, round=3
+        )
+        metrics.log_derived(
+            "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+        )
+
+        metrics.log_scalar(
+            "ntokens", ntokens, 1, round=3
+        )
+        metrics.log_scalar(
+            "nsentences", nsentences, 1, round=3
+        )
+        metrics.log_scalar(
+            "sample_size", sample_size, 1, round=3
+        )
+        metrics.log_scalar(
+            "sample_size_v1", sample_size_v1, 1, round=3
+        )
+        metrics.log_scalar(
+            "sample_size_v2", sample_size_v2, 1, round=3
+        )
+
+        total = utils.item(sum(log.get("total", 0) for log in logging_outputs))
+        if total > 0:
+            metrics.log_scalar("total", total)
+            n_correct = utils.item(
+                sum(log.get("n_correct", 0) for log in logging_outputs)
+            )
+            metrics.log_scalar("n_correct", n_correct)
+            metrics.log_derived(
+                "accuracy",
+                lambda meters: round(
+                    meters["n_correct"].sum * 100.0 / meters["total"].sum, 3
+                )
+                if meters["total"].sum > 0
+                else float("nan"),
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/data/__init__.py b/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..119c43b818da706c03d08d24827d94c28ac4873d
--- /dev/null
+++ b/data/__init__.py
@@ -0,0 +1,2 @@
+from .refcoco_dataset import RefcocoDataset
+from .refcoco_pretrain_dataset import RefcocoPretrainDataset
\ No newline at end of file
diff --git a/data/base_dataset.py b/data/base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2e5897991faa40ba09117edd4ba1a5da16bf416
--- /dev/null
+++ b/data/base_dataset.py
@@ -0,0 +1,84 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import re
+import torch.utils.data
+from fairseq.data import FairseqDataset
+
+logger = logging.getLogger(__name__)
+
+
+class BaseDataset(FairseqDataset):
+    def __init__(self, split, dataset, bpe, src_dict, tgt_dict):
+        self.split = split
+        self.dataset = dataset
+        self.bpe = bpe
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+
+        self.bos = src_dict.bos()
+        self.eos = src_dict.eos()
+        self.pad = src_dict.pad()
+        self.bos_item = torch.LongTensor([self.bos])
+        self.eos_item = torch.LongTensor([self.eos])
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def encode_text(self, text, length=None, append_bos=False, append_eos=False, use_bpe=True):
+        s = self.tgt_dict.encode_line(
+            line=self.bpe.encode(text) if use_bpe else text,
+            add_if_not_exist=False,
+            append_eos=False
+        ).long()
+        if length is not None:
+            s = s[:length]
+        if append_bos:
+            s = torch.cat([self.bos_item, s])
+        if append_eos:
+            s = torch.cat([s, self.eos_item])
+        return s
+
+    def pre_question(self, question, max_ques_words):
+        question = question.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ')
+
+        question = re.sub(
+            r"\s{2,}",
+            ' ',
+            question,
+        )
+        question = question.rstrip('\n')
+        question = question.strip(' ')
+
+        # truncate question
+        question_words = question.split(' ')
+        if len(question_words) > max_ques_words:
+            question = ' '.join(question_words[:max_ques_words])
+
+        return question
+
+    def pre_caption(self, caption, max_words):
+        caption = caption.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ').replace('<person>', 'person')
+
+        caption = re.sub(
+            r"\s{2,}",
+            ' ',
+            caption,
+        )
+        caption = caption.rstrip('\n')
+        caption = caption.strip(' ')
+
+        # truncate caption
+        caption_words = caption.split(' ')
+        if len(caption_words) > max_words:
+            caption = ' '.join(caption_words[:max_words])
+
+        return caption
diff --git a/data/create_finetuning_data.py b/data/create_finetuning_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d627ca17daa9b5b36afe177ba9073921c906646
--- /dev/null
+++ b/data/create_finetuning_data.py
@@ -0,0 +1,123 @@
+from refer.refer import REFER
+import numpy as np
+from PIL import Image
+import random
+import os
+from tqdm import tqdm
+
+import pickle
+from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \
+    approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string
+
+
+max_length = 400
+
+data_root = './refer/data'
+datasets = ['refcoco', 'refcoco+', 'refcocog']
+
+image_dir = './datasets/images/mscoco/train2014'
+val_test_files = pickle.load(open("data/val_test_files.p", "rb"))
+
+combined_train_data = []
+
+for dataset in datasets:
+    if dataset == 'refcoco':
+        splits = ['train', 'val', 'testA', 'testB']
+        splitBy = 'unc'
+    elif dataset == 'refcoco+':
+        splits = ['train', 'val', 'testA', 'testB']
+        splitBy = 'unc'
+    elif dataset == 'refcocog':
+        splits = ['train', 'val']
+        splitBy = 'umd'
+
+    save_dir = f'datasets/finetune/{dataset}'
+    os.makedirs(save_dir, exist_ok=True)
+    for split in splits:
+        num_pts = []
+        max_num_pts = 0
+        file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv")
+        print("creating ", file_name)
+
+        uniq_ids = []
+        image_ids = []
+        sents = []
+        coeffs_strings = []
+        img_strings = []
+
+        writer = open(file_name, 'w')
+        refer = REFER(data_root, dataset, splitBy)
+
+        ref_ids = refer.getRefIds(split=split)
+
+        for this_ref_id in tqdm(ref_ids):
+            this_img_id = refer.getImgIds(this_ref_id)
+            this_img = refer.Imgs[this_img_id[0]]
+            fn = this_img['file_name']
+            img_id = fn.split(".")[0].split("_")[-1]
+
+            # load image
+            img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")
+
+            # convert image to string
+            img_base64 = image_to_base64(img, format='jpeg')
+
+            # load mask
+            ref = refer.loadRefs(this_ref_id)
+            ref_mask = np.array(refer.getMask(ref[0])['mask'])
+            annot = np.zeros(ref_mask.shape)
+            annot[ref_mask == 1] = 1  # 255
+            annot_img = Image.fromarray(annot.astype(np.uint8), mode="P")
+            annot_base64 = image_to_base64(annot_img, format='png')
+
+            polygons = refer.getPolygon(ref[0])['polygon']
+
+            polygons_processed = []
+            for polygon in polygons:
+                # make the polygon clockwise
+                if not is_clockwise(polygon):
+                    polygon = revert_direction(polygon)
+
+                # reorder the polygon so that the first vertex is the one closest to image origin
+                polygon = reorder_points(polygon)
+                polygons_processed.append(polygon)
+
+            polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1]))
+            polygons_interpolated = interpolate_polygons(polygons)
+
+            polygons = approximate_polygons(polygons, 5, max_length)
+
+            pts_string = polygons_to_string(polygons)
+            pts_string_interpolated = polygons_to_string(polygons_interpolated)
+
+            # load box
+            box = refer.getRefBox(this_ref_id)  # x,y,w,h
+            x, y, w, h = box
+            box_string = f'{x},{y},{x + w},{y + h}'
+
+            max_num_pts = max(max_num_pts, check_length(polygons))
+
+            num_pts.append(check_length(polygons))
+            # load text
+            ref_sent = refer.Refs[this_ref_id]
+            for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])):
+                uniq_id = f"{this_ref_id}_{i}"
+                instance = '\t'.join(
+                    [uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64,
+                     pts_string_interpolated]) + '\n'
+                writer.write(instance)
+
+                if img_id not in val_test_files and split == 'train':  # filtered out val/test files
+                    combined_train_data.append(instance)
+        writer.close()
+
+random.shuffle(combined_train_data)
+file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv")
+print("creating ", file_name)
+writer = open(file_name, 'w')
+writer.writelines(combined_train_data)
+writer.close()
+
+
+
+
diff --git a/data/create_pretraining_data.py b/data/create_pretraining_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4950b3ca43f41fb25ee14694d3c013e4e0b3a52
--- /dev/null
+++ b/data/create_pretraining_data.py
@@ -0,0 +1,80 @@
+import json
+import os
+from tqdm import tqdm
+import random
+import pickle
+
+# set up image paths
+imgsfile = dict(
+    coco='mscoco/train2014',
+    vg='visual-genome',
+    saiaprtc12='saiaprtc12',
+    flickr='flickr30k'
+)
+
+# load annotation files
+f = open("datasets/annotations/instances.json")
+print("Loading annotation file")
+data = json.load(f)
+f.close()
+
+# load the validation and test image list of refcoco, refcoco+, and refcocog
+val_test_files = pickle.load(open("data/val_test_files.p", "rb"))
+
+# create result folder
+os.makedirs("datasets/pretrain", exist_ok=True)
+
+# generate training tsv file
+train_instances = data['train']
+tsv_filename = "datasets/pretrain/train_shuffled.tsv"
+writer = open(tsv_filename, 'w')
+print("generating ", tsv_filename)
+
+lines = []
+for i, data_i in enumerate(tqdm(train_instances)):
+    data_source = data_i['data_source']
+    image_id = data_i['image_id']
+    bbox = data_i['bbox']
+    expressions = data_i['expressions']
+    height, width = data_i['height'], data_i['width']
+    x, y, w, h = bbox
+    box_string = f'{x},{y},{x + w},{y + h}'
+    img_name = "COCO_train2014_%012d.jpg" if "coco" in data_source else "%d.jpg"
+    img_name = img_name % image_id
+    filepath = os.path.join(imgsfile[data_source], img_name)
+    line = '\t'.join([str(i), expressions[0].replace('\n', ''), box_string, filepath]) + '\n'
+    lines.append(line)
+
+# shuffle the training set
+random.shuffle(lines)
+
+# write training tsv file
+writer.writelines(lines)
+writer.close()
+
+# generate validation tsv files
+val_sets = ['val_refcoco_unc', 'val_refcocoplus_unc', 'val_refcocog_umd', 'val_flickr30k', 'val_referitgame_berkeley']
+for val_set in val_sets:
+    val_instances = data[val_set]
+    tsv_filename = f"datasets/pretrain/{val_set}.tsv"
+    writer = open(tsv_filename, 'w')
+    print("generating ", tsv_filename)
+
+    lines = []
+    for i, data_i in enumerate(tqdm(val_instances)):
+        data_source = data_i['data_source']
+        image_id = data_i['image_id']
+        bbox = data_i['bbox']
+        expressions = data_i['expressions']
+        height, width = data_i['height'], data_i['width']
+        x, y, w, h = bbox
+        box_string = f'{x},{y},{x + w},{y + h}'
+        img_name = "COCO_train2014_%012d.jpg" if "coco" in data_source else "%d.jpg"
+        img_name = img_name % image_id
+        filepath = os.path.join(imgsfile[data_source], img_name)
+        line = '\t'.join([str(i), expressions[0].replace('\n', ''), box_string, filepath]) + '\n'
+        lines.append(line)
+
+    # write tsv file
+    writer.writelines(lines)
+    writer.close()
diff --git a/data/data_utils.py b/data/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..34c65a80873174167c6238c61e3f686e8a65ce15
--- /dev/null
+++ b/data/data_utils.py
@@ -0,0 +1,606 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+try:
+    from collections.abc import Iterable
+except ImportError:
+    from collections import Iterable
+import contextlib
+import itertools
+import logging
+import re
+import warnings
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+
+from fairseq.file_io import PathManager
+from fairseq import utils
+import os
+
+logger = logging.getLogger(__name__)
+
+
+def infer_language_pair(path):
+    """Infer language pair from filename: <split>.<lang1>-<lang2>.(...).idx"""
+    src, dst = None, None
+    for filename in PathManager.ls(path):
+        parts = filename.split(".")
+        if len(parts) >= 3 and len(parts[1].split("-")) == 2:
+            return parts[1].split("-")
+    return src, dst
+
+
+def collate_tokens(
+    values,
+    pad_idx,
+    eos_idx=None,
+    left_pad=False,
+    move_eos_to_beginning=False,
+    pad_to_length=None,
+    pad_to_multiple=1,
+    pad_to_bsz=None,
+):
+    """Convert a list of 1d tensors into a padded 2d tensor."""
+    size = max(v.size(0) for v in values)
+    size = size if pad_to_length is None else max(size, pad_to_length)
+    if pad_to_multiple != 1 and size % pad_to_multiple != 0:
+        size = int(((size - 0.1) // pad_to_multiple + 1) * pad_to_multiple)
+
+    def copy_tensor(src, dst):
+        assert dst.numel() == src.numel()
+        if move_eos_to_beginning:
+            if eos_idx is None:
+                # if no eos_idx is specified, then use the last token in src
+                dst[0] = src[-1]
+            else:
+                dst[0] = eos_idx
+            dst[1:] = src[:-1]
+        else:
+            dst.copy_(src)
+
+    if values[0].dim() == 1:
+        res = values[0].new(len(values), size).fill_(pad_idx)
+    elif values[0].dim() == 2:
+        assert move_eos_to_beginning is False
+        res = values[0].new(len(values), size, values[0].size(1)).fill_(pad_idx)
+    else:
+        raise NotImplementedError
+
+    for i, v in enumerate(values):
+        copy_tensor(v, res[i][size - len(v) :] if left_pad else res[i][: len(v)])
+    return res
+
+
+def load_indexed_dataset(
+    path, dictionary=None, dataset_impl=None, combine=False, default="cached"
+):
+    """A helper function for loading indexed datasets.
+
+    Args:
+        path (str): path to indexed dataset (e.g., 'data-bin/train')
+        dictionary (~fairseq.data.Dictionary): data dictionary
+        dataset_impl (str, optional): which dataset implementation to use. If
+            not provided, it will be inferred automatically. For legacy indexed
+            data we use the 'cached' implementation by default.
+        combine (bool, optional): automatically load and combine multiple
+            datasets. For example, if *path* is 'data-bin/train', then we will
+            combine 'data-bin/train', 'data-bin/train1', ... and return a
+            single ConcatDataset instance.
+    """
+    import fairseq.data.indexed_dataset as indexed_dataset
+    from fairseq.data.concat_dataset import ConcatDataset
+
+    datasets = []
+    for k in itertools.count():
+        path_k = path + (str(k) if k > 0 else "")
+        try:
+            path_k = indexed_dataset.get_indexed_dataset_to_local(path_k)
+        except Exception as e:
+            if "StorageException: [404] Path not found" in str(e):
+                logger.warning(f"path_k: {e} not found")
+            else:
+                raise e
+
+        dataset_impl_k = dataset_impl
+        if dataset_impl_k is None:
+            dataset_impl_k = indexed_dataset.infer_dataset_impl(path_k)
+        dataset = indexed_dataset.make_dataset(
+            path_k,
+            impl=dataset_impl_k or default,
+            fix_lua_indexing=True,
+            dictionary=dictionary,
+        )
+        if dataset is None:
+            break
+        logger.info("loaded {:,} examples from: {}".format(len(dataset), path_k))
+        datasets.append(dataset)
+        if not combine:
+            break
+    if len(datasets) == 0:
+        return None
+    elif len(datasets) == 1:
+        return datasets[0]
+    else:
+        return ConcatDataset(datasets)
+
+
+@contextlib.contextmanager
+def numpy_seed(seed, *addl_seeds):
+    """Context manager which seeds the NumPy PRNG with the specified seed and
+    restores the state afterward"""
+    if seed is None:
+        yield
+        return
+    if len(addl_seeds) > 0:
+        seed = int(hash((seed, *addl_seeds)) % 1e6)
+    state = np.random.get_state()
+    np.random.seed(seed)
+    try:
+        yield
+    finally:
+        np.random.set_state(state)
+
+
+def collect_filtered(function, iterable, filtered):
+    """
+    Similar to :func:`filter` but collects filtered elements in ``filtered``.
+
+    Args:
+        function (callable): function that returns ``False`` for elements that
+            should be filtered
+        iterable (iterable): iterable to filter
+        filtered (list): list to store filtered elements
+    """
+    for el in iterable:
+        if function(el):
+            yield el
+        else:
+            filtered.append(el)
+
+
+def _filter_by_size_dynamic(indices, size_fn, max_positions, raise_exception=False):
+    def compare_leq(a, b):
+        return a <= b if not isinstance(a, tuple) else max(a) <= b
+
+    def check_size(idx):
+        if isinstance(max_positions, float) or isinstance(max_positions, int):
+            return size_fn(idx) <= max_positions
+        elif isinstance(max_positions, dict):
+            idx_size = size_fn(idx)
+            assert isinstance(idx_size, dict)
+            intersect_keys = set(max_positions.keys()) & set(idx_size.keys())
+            return all(
+                all(
+                    a is None or b is None or a <= b
+                    for a, b in zip(idx_size[key], max_positions[key])
+                )
+                for key in intersect_keys
+            )
+        else:
+            # For MultiCorpusSampledDataset, will generalize it later
+            if not isinstance(size_fn(idx), Iterable):
+                return all(size_fn(idx) <= b for b in max_positions)
+            return all(
+                a is None or b is None or a <= b
+                for a, b in zip(size_fn(idx), max_positions)
+            )
+
+    ignored = []
+    itr = collect_filtered(check_size, indices, ignored)
+    indices = np.fromiter(itr, dtype=np.int64, count=-1)
+    return indices, ignored
+
+
+def filter_by_size(indices, dataset, max_positions, raise_exception=False):
+    """
+    [deprecated] Filter indices based on their size.
+    Use `FairseqDataset::filter_indices_by_size` instead.
+
+    Args:
+        indices (List[int]): ordered list of dataset indices
+        dataset (FairseqDataset): fairseq dataset instance
+        max_positions (tuple): filter elements larger than this size.
+            Comparisons are done component-wise.
+        raise_exception (bool, optional): if ``True``, raise an exception if
+            any elements are filtered (default: False).
+    """
+    warnings.warn(
+        "data_utils.filter_by_size is deprecated. "
+        "Use `FairseqDataset::filter_indices_by_size` instead.",
+        stacklevel=2,
+    )
+    if isinstance(max_positions, float) or isinstance(max_positions, int):
+        if hasattr(dataset, "sizes") and isinstance(dataset.sizes, np.ndarray):
+            ignored = indices[dataset.sizes[indices] > max_positions].tolist()
+            indices = indices[dataset.sizes[indices] <= max_positions]
+        elif (
+            hasattr(dataset, "sizes")
+            and isinstance(dataset.sizes, list)
+            and len(dataset.sizes) == 1
+        ):
+            ignored = indices[dataset.sizes[0][indices] > max_positions].tolist()
+            indices = indices[dataset.sizes[0][indices] <= max_positions]
+        else:
+            indices, ignored = _filter_by_size_dynamic(
+                indices, dataset.size, max_positions
+            )
+    else:
+        indices, ignored = _filter_by_size_dynamic(indices, dataset.size, max_positions)
+
+    if len(ignored) > 0 and raise_exception:
+        raise Exception(
+            (
+                "Size of sample #{} is invalid (={}) since max_positions={}, "
+                "skip this example with --skip-invalid-size-inputs-valid-test"
+            ).format(ignored[0], dataset.size(ignored[0]), max_positions)
+        )
+    if len(ignored) > 0:
+        logger.warning(
+            (
+                "{} samples have invalid sizes and will be skipped, "
+                "max_positions={}, first few sample ids={}"
+            ).format(len(ignored), max_positions, ignored[:10])
+        )
+    return indices
+
+
+def filter_paired_dataset_indices_by_size(src_sizes, tgt_sizes, indices, max_sizes):
+    """Filter a list of sample indices. Remove those that are longer
+        than specified in max_sizes.
+
+    Args:
+        indices (np.array): original array of sample indices
+        max_sizes (int or list[int] or tuple[int]): max sample size,
+            can be defined separately for src and tgt (then list or tuple)
+
+    Returns:
+        np.array: filtered sample array
+        list: list of removed indices
+    """
+    if max_sizes is None:
+        return indices, []
+    if type(max_sizes) in (int, float):
+        max_src_size, max_tgt_size = max_sizes, max_sizes
+    else:
+        max_src_size, max_tgt_size = max_sizes
+    if tgt_sizes is None:
+        ignored = indices[src_sizes[indices] > max_src_size]
+    else:
+        ignored = indices[
+            (src_sizes[indices] > max_src_size) | (tgt_sizes[indices] > max_tgt_size)
+        ]
+    if len(ignored) > 0:
+        if tgt_sizes is None:
+            indices = indices[src_sizes[indices] <= max_src_size]
+        else:
+            indices = indices[
+                (src_sizes[indices] <= max_src_size)
+                & (tgt_sizes[indices] <= max_tgt_size)
+            ]
+    return indices, ignored.tolist()
+
+
+def batch_by_size(
+    indices,
+    num_tokens_fn,
+    num_tokens_vec=None,
+    max_tokens=None,
+    max_sentences=None,
+    required_batch_size_multiple=1,
+    fixed_shapes=None,
+):
+    """
+    Yield mini-batches of indices bucketed by size. Batches may contain
+    sequences of different lengths.
+
+    Args:
+        indices (List[int]): ordered list of dataset indices
+        num_tokens_fn (callable): function that returns the number of tokens at
+            a given index
+        num_tokens_vec (List[int], optional): precomputed vector of the number
+            of tokens for each index in indices (to enable faster batch generation)
+        max_tokens (int, optional): max number of tokens in each batch
+            (default: None).
+        max_sentences (int, optional): max number of sentences in each
+            batch (default: None).
+        required_batch_size_multiple (int, optional): require batch size to
+            be less than N or a multiple of N (default: 1).
+        fixed_shapes (List[Tuple[int, int]], optional): if given, batches will
+            only be created with the given shapes. *max_sentences* and
+            *required_batch_size_multiple* will be ignored (default: None).
+    """
+    try:
+        from fairseq.data.data_utils_fast import (
+            batch_by_size_fn,
+            batch_by_size_vec,
+            batch_fixed_shapes_fast,
+        )
+    except ImportError:
+        raise ImportError(
+            "Please build Cython components with: "
+            "`python setup.py build_ext --inplace`"
+        )
+    except ValueError:
+        raise ValueError(
+            "Please build (or rebuild) Cython components with `python setup.py build_ext --inplace`."
+        )
+
+    # added int() to avoid TypeError: an integer is required
+    max_tokens = (
+        int(max_tokens) if max_tokens is not None else -1
+    )
+    max_sentences = max_sentences if max_sentences is not None else -1
+    bsz_mult = required_batch_size_multiple
+
+    if not isinstance(indices, np.ndarray):
+        indices = np.fromiter(indices, dtype=np.int64, count=-1)
+
+    if num_tokens_vec is not None and not isinstance(num_tokens_vec, np.ndarray):
+        num_tokens_vec = np.fromiter(num_tokens_vec, dtype=np.int64, count=-1)
+
+    if fixed_shapes is None:
+        if num_tokens_vec is None:
+            return batch_by_size_fn(
+                indices,
+                num_tokens_fn,
+                max_tokens,
+                max_sentences,
+                bsz_mult,
+            )
+        else:
+            return batch_by_size_vec(
+                indices,
+                num_tokens_vec,
+                max_tokens,
+                max_sentences,
+                bsz_mult,
+            )
+
+    else:
+        fixed_shapes = np.array(fixed_shapes, dtype=np.int64)
+        sort_order = np.lexsort(
+            [
+                fixed_shapes[:, 1].argsort(),  # length
+                fixed_shapes[:, 0].argsort(),  # bsz
+            ]
+        )
+        fixed_shapes_sorted = fixed_shapes[sort_order]
+        return batch_fixed_shapes_fast(indices, num_tokens_fn, fixed_shapes_sorted)
+
+
+def post_process(sentence: str, symbol: str):
+    if symbol == "sentencepiece":
+        sentence = sentence.replace(" ", "").replace("\u2581", " ").strip()
+    elif symbol == "wordpiece":
+        sentence = sentence.replace(" ", "").replace("_", " ").strip()
+    elif symbol == "letter":
+        sentence = sentence.replace(" ", "").replace("|", " ").strip()
+    elif symbol == "silence":
+        import re
+        sentence = sentence.replace("<SIL>", "")
+        sentence = re.sub(' +', ' ', sentence).strip()
+    elif symbol == "_EOW":
+        sentence = sentence.replace(" ", "").replace("_EOW", " ").strip()
+    elif symbol in {"subword_nmt", "@@ ", "@@"}:
+        if symbol == "subword_nmt":
+            symbol = "@@ "
+        sentence = (sentence + " ").replace(symbol, "").rstrip()
+    elif symbol == "none":
+        pass
+    elif symbol is not None:
+        raise NotImplementedError(f"Unknown post_process option: {symbol}")
+    return sentence
+
+
+def compute_mask_indices(
+    shape: Tuple[int, int],
+    padding_mask: Optional[torch.Tensor],
+    mask_prob: float,
+    mask_length: int,
+    mask_type: str = "static",
+    mask_other: float = 0.0,
+    min_masks: int = 0,
+    no_overlap: bool = False,
+    min_space: int = 0,
+) -> np.ndarray:
+    """
+    Computes random mask spans for a given shape
+
+    Args:
+        shape: the the shape for which to compute masks.
+            should be of size 2 where first element is batch size and 2nd is timesteps
+        padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
+        mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by
+            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
+            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
+        mask_type: how to compute mask lengths
+            static = fixed size
+            uniform = sample from uniform distribution [mask_other, mask_length*2]
+            normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element
+            poisson = sample from possion distribution with lambda = mask length
+        min_masks: minimum number of masked spans
+        no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping
+        min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans
+    """
+
+    bsz, all_sz = shape
+    mask = np.full((bsz, all_sz), False)
+
+    all_num_mask = int(
+        # add a random number for probabilistic rounding
+        mask_prob * all_sz / float(mask_length)
+        + np.random.rand()
+    )
+
+    all_num_mask = max(min_masks, all_num_mask)
+
+    mask_idcs = []
+    for i in range(bsz):
+        if padding_mask is not None:
+            sz = all_sz - padding_mask[i].long().sum().item()
+            num_mask = int(
+                # add a random number for probabilistic rounding
+                mask_prob * sz / float(mask_length)
+                + np.random.rand()
+            )
+            num_mask = max(min_masks, num_mask)
+        else:
+            sz = all_sz
+            num_mask = all_num_mask
+
+        if mask_type == "static":
+            lengths = np.full(num_mask, mask_length)
+        elif mask_type == "uniform":
+            lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask)
+        elif mask_type == "normal":
+            lengths = np.random.normal(mask_length, mask_other, size=num_mask)
+            lengths = [max(1, int(round(x))) for x in lengths]
+        elif mask_type == "poisson":
+            lengths = np.random.poisson(mask_length, size=num_mask)
+            lengths = [int(round(x)) for x in lengths]
+        else:
+            raise Exception("unknown mask selection " + mask_type)
+
+        if sum(lengths) == 0:
+            lengths[0] = min(mask_length, sz - 1)
+
+        if no_overlap:
+            mask_idc = []
+
+            def arrange(s, e, length, keep_length):
+                span_start = np.random.randint(s, e - length)
+                mask_idc.extend(span_start + i for i in range(length))
+
+                new_parts = []
+                if span_start - s - min_space >= keep_length:
+                    new_parts.append((s, span_start - min_space + 1))
+                if e - span_start - keep_length - min_space > keep_length:
+                    new_parts.append((span_start + length + min_space, e))
+                return new_parts
+
+            parts = [(0, sz)]
+            min_length = min(lengths)
+            for length in sorted(lengths, reverse=True):
+                lens = np.fromiter(
+                    (e - s if e - s >= length + min_space else 0 for s, e in parts),
+                    np.int,
+                )
+                l_sum = np.sum(lens)
+                if l_sum == 0:
+                    break
+                probs = lens / np.sum(lens)
+                c = np.random.choice(len(parts), p=probs)
+                s, e = parts.pop(c)
+                parts.extend(arrange(s, e, length, min_length))
+            mask_idc = np.asarray(mask_idc)
+        else:
+            min_len = min(lengths)
+            if sz - min_len <= num_mask:
+                min_len = sz - num_mask - 1
+
+            mask_idc = np.random.choice(sz - min_len, num_mask, replace=False)
+
+            mask_idc = np.asarray(
+                [
+                    mask_idc[j] + offset
+                    for j in range(len(mask_idc))
+                    for offset in range(lengths[j])
+                ]
+            )
+
+        mask_idcs.append(np.unique(mask_idc[mask_idc < sz]))
+
+    min_len = min([len(m) for m in mask_idcs])
+    for i, mask_idc in enumerate(mask_idcs):
+        if len(mask_idc) > min_len:
+            mask_idc = np.random.choice(mask_idc, min_len, replace=False)
+        mask[i, mask_idc] = True
+
+    return mask
+
+
+def get_mem_usage():
+    try:
+        import psutil
+
+        mb = 1024 * 1024
+        return f"used={psutil.virtual_memory().used / mb}Mb; avail={psutil.virtual_memory().available / mb}Mb"
+    except ImportError:
+        return "N/A"
+
+
+# lens: torch.LongTensor
+# returns: torch.BoolTensor
+def lengths_to_padding_mask(lens):
+    bsz, max_lens = lens.size(0), torch.max(lens).item()
+    mask = torch.arange(max_lens).to(lens.device).view(1, max_lens)
+    mask = mask.expand(bsz, -1) >= lens.view(bsz, 1).expand(-1, max_lens)
+    return mask
+
+
+# lens: torch.LongTensor
+# returns: torch.BoolTensor
+def lengths_to_mask(lens):
+    return ~lengths_to_padding_mask(lens)
+
+
+def get_buckets(sizes, num_buckets):
+    buckets = np.unique(
+        np.percentile(
+            sizes,
+            np.linspace(0, 100, num_buckets + 1),
+            interpolation='lower',
+        )[1:]
+    )
+    return buckets
+
+
+def get_bucketed_sizes(orig_sizes, buckets):
+    sizes = np.copy(orig_sizes)
+    assert np.min(sizes) >= 0
+    start_val = -1
+    for end_val in buckets:
+        mask = (sizes > start_val) & (sizes <= end_val)
+        sizes[mask] = end_val
+        start_val = end_val
+    return sizes
+
+
+
+def _find_extra_valid_paths(dataset_path: str) -> set:
+    paths = utils.split_paths(dataset_path)
+    all_valid_paths = set()
+    for sub_dir in paths:
+        contents = PathManager.ls(sub_dir)
+        valid_paths = [c for c in contents if re.match("valid*[0-9].*", c) is not None]
+        all_valid_paths |= {os.path.basename(p) for p in valid_paths}
+    # Remove .bin, .idx etc
+    roots = {os.path.splitext(p)[0] for p in all_valid_paths}
+    return roots
+
+
+def raise_if_valid_subsets_unintentionally_ignored(train_cfg) -> None:
+    """Raises if there are paths matching 'valid*[0-9].*' which are not combined or ignored."""
+    if (
+        train_cfg.dataset.ignore_unused_valid_subsets
+        or train_cfg.dataset.combine_valid_subsets
+        or train_cfg.dataset.disable_validation
+        or not hasattr(train_cfg.task, "data")
+    ):
+        return
+    other_paths = _find_extra_valid_paths(train_cfg.task.data)
+    specified_subsets = train_cfg.dataset.valid_subset.split(",")
+    ignored_paths = [p for p in other_paths if p not in specified_subsets]
+    if ignored_paths:
+        advice = "Set --combine-val to combine them or --ignore-unused-valid-subsets to ignore them."
+        msg = f"Valid paths {ignored_paths} will be ignored. {advice}"
+        raise ValueError(msg)
diff --git a/data/file_dataset.py b/data/file_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a668144877bbd6e0ae997c8a4b32ef7fdb49a7c
--- /dev/null
+++ b/data/file_dataset.py
@@ -0,0 +1,112 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import torch
+import pickle
+
+
+class FileDataset:
+    def __init__(self, file_path, selected_col_ids=None, dtypes=None, separator="\t", cached_index=False):
+        self.file_path = file_path
+        assert os.path.exists(self.file_path), "Error: The local datafile {} not exists!".format(self.file_path)
+
+        self.separator = separator
+        if selected_col_ids is None:
+            # default to all fields
+            self.selected_col_ids = list(
+                range(len(open(self.file_path).readline().rstrip("\n").split(self.separator))))
+        else:
+            self.selected_col_ids = [int(col_id) for col_id in selected_col_ids.split(",")]
+        if dtypes is None:
+            # default to str
+            self.dtypes = [str for col_id in self.selected_col_ids]
+        else:
+            self.dtypes = [eval(col_dtype) for col_dtype in dtypes.split(",")]
+            assert len(self.dtypes) == len(self.selected_col_ids)
+
+        self.data_cnt = 0
+        try:
+            self.slice_id = torch.distributed.get_rank()
+            self.slice_count = torch.distributed.get_world_size()
+        except Exception:
+            self.slice_id = 0
+            self.slice_count = 1
+        self.cached_index = cached_index
+        self._init_seek_index()
+        self._reader = self._get_reader()
+        print("file {} slice_id {} row count {} total row count {}".format(
+            self.file_path, self.slice_id, self.row_count, self.total_row_count)
+        )
+
+    def _init_seek_index(self):
+        if self.cached_index:
+            cache_path = "{}.index".format(self.file_path)
+            assert os.path.exists(cache_path), "cache file {} not exists!".format(cache_path)
+            self.total_row_count, self.lineid_to_offset = pickle.load(open(cache_path, "rb"))
+            print("local datafile {} slice_id {} use cached row_count and line_idx-to-offset mapping".format(
+                self.file_path, self.slice_id))
+        else:
+            # make an iteration over the file to get row_count and line_idx-to-offset mapping
+            fp = open(self.file_path, "r")
+            print("local datafile {} slice_id {} begin to initialize row_count and line_idx-to-offset mapping".format(
+                self.file_path, self.slice_id))
+            self.total_row_count = 0
+            offset = 0
+            self.lineid_to_offset = []
+            for line in fp:
+                self.lineid_to_offset.append(offset)
+                self.total_row_count += 1
+                offset += len(line.encode('utf-8'))
+        self._compute_start_pos_and_row_count()
+        print("local datafile {} slice_id {} finished initializing row_count and line_idx-to-offset mapping".format(
+            self.file_path, self.slice_id))
+
+    def _compute_start_pos_and_row_count(self):
+        self.row_count = self.total_row_count // self.slice_count
+        if self.slice_id < self.total_row_count - self.row_count * self.slice_count:
+            self.row_count += 1
+            self.start_pos = self.row_count * self.slice_id
+        else:
+            self.start_pos = self.row_count * self.slice_id + (self.total_row_count - self.row_count * self.slice_count)
+
+    def _get_reader(self):
+        fp = open(self.file_path, "r")
+        fp.seek(self.lineid_to_offset[self.start_pos])
+        return fp
+
+    def _seek(self, offset=0):
+        try:
+            print("slice_id {} seek offset {}".format(self.slice_id, self.start_pos + offset))
+            self._reader.seek(self.lineid_to_offset[self.start_pos + offset])
+            self.data_cnt = offset
+        except Exception:
+            print("slice_id {} seek offset {}".format(self.slice_id, offset))
+            self._reader.seek(self.lineid_to_offset[offset])
+            self.data_cnt = offset
+
+    def __del__(self):
+        self._reader.close()
+
+    def __len__(self):
+        return self.row_count
+
+    def get_total_row_count(self):
+        return self.total_row_count
+
+    def __getitem__(self, index):
+        if self.data_cnt == self.row_count:
+            print("reach the end of datafile, start a new reader")
+            self.data_cnt = 0
+            self._reader = self._get_reader()
+        column_l = self._reader.readline().rstrip("\n").split(self.separator)
+        self.data_cnt += 1
+        column_l = [dtype(column_l[col_id]) for col_id, dtype in zip(self.selected_col_ids, self.dtypes)]
+        return column_l
diff --git a/data/poly_utils.py b/data/poly_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d11b44021cb54ab47024d7177bffd01a0f48c27
--- /dev/null
+++ b/data/poly_utils.py
@@ -0,0 +1,294 @@
+import re
+import numpy as np
+from itertools import groupby
+from PIL import Image
+import math
+from math import ceil, floor
+from skimage import draw
+from random import sample
+import base64
+from io import BytesIO
+
+convert = lambda text: int(text) if text.isdigit() else text.lower()
+natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
+
+
+def points_to_token_string(box, polygons):
+    polygon_strings = []
+    for polygon in polygons:
+        polygon_string = " ".join([f"<bin_{int(p[0])}_{int(p[1])}>" for p in polygon])
+        polygon_strings.append(polygon_string)
+    polygon_string = " <separator> ".join(polygon_strings)
+    box_string = " ".join([f"<bin_{int(p[0])}_{int(p[1])}>" for p in box])
+    token_string = " ".join([box_string, polygon_string])
+
+    token_type = []
+    for token in token_string.split(" "):
+        if "bin" in token:
+            token_type.append(0)  # 0 for coordinate tokens
+        else:
+            token_type.append(1)  # 1 for separator tokens
+    return token_string, token_type
+
+
+def resize_binary_mask(array, new_size):
+    image = Image.fromarray(array.astype(np.uint8) * 255)
+    image = image.resize(new_size)
+    return np.asarray(image).astype(np.bool_)
+
+
+def close_contour(contour):
+    if not np.array_equal(contour[0], contour[-1]):
+        contour = np.vstack((contour, contour[0]))
+    return contour
+
+
+def binary_mask_to_rle(binary_mask):
+    rle = {'counts': [], 'size': list(binary_mask.shape)}
+    counts = rle.get('counts')
+    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
+        if i == 0 and value == 1:
+            counts.append(0)
+        counts.append(len(list(elements)))
+
+    return rle
+
+
+def revert_direction(poly):
+    poly = np.array(poly).reshape(int(len(poly) / 2), 2)
+    poly = poly[::-1, :]
+    return list(poly.flatten())
+
+
+def reorder_points(poly):
+    poly = np.array(poly)
+    xs = poly[::2]
+    ys = poly[1::2]
+    points = np.array(poly).reshape(int(len(poly) / 2), 2)
+    start = np.argmin(xs ** 2 + ys ** 2)  # smallest distance to the origin
+    poly_reordered = np.concatenate([points[start:], points[:start]], 0)
+    return list(poly_reordered.flatten())
+
+
+def convert_pts(coeffs):
+    pts = []
+    for i in range(len(coeffs) // 2):
+        pts.append([coeffs[2 * i + 1], coeffs[2 * i]])  # y, x
+    return np.array(pts, np.int32)
+
+
+def get_mask_from_codes(codes, img_size):
+    masks = [np.zeros(img_size)]
+    for code in codes:
+        if len(code) > 0:
+            mask = draw.polygon2mask(img_size, convert_pts(code))
+            mask = np.array(mask, np.uint8)
+            masks.append(mask)
+    mask = sum(masks)
+    mask = mask > 0
+    return mask.astype(np.uint8)
+
+
+def is_clockwise(poly):
+    n = len(poly) // 2
+    xs = poly[::2]
+    xs.append(xs[0])
+    ys = poly[1::2]
+    ys.append(ys[0])
+    area = 0
+    for i in range(n):
+        x1, y1 = xs[i], ys[i]
+        x2, y2 = xs[i + 1], ys[i + 1]
+        area += (x2 - x1) * (y2 + y1)
+    return area < 0
+
+
+def close_polygon_contour(poly):
+    poly = np.array(poly).reshape(int(len(poly) / 2), 2)
+    x1, y1 = poly[0]
+    x2, y2 = poly[-1]
+    if x1 != x2:
+        poly = np.concatenate([poly, [poly[0]]], 0)
+    return list(poly.flatten())
+
+
+def close_polygons_contour(polygons):
+    polygons_closed = []
+    for polygon in polygons:
+        polygon_closed = close_polygon_contour(polygon)
+        polygons_closed.append(polygon_closed)
+    return polygons_closed
+
+
+def image_to_base64(img, format):
+    output_buffer = BytesIO()
+    img.save(output_buffer, format=format)
+    byte_data = output_buffer.getvalue()
+    base64_str = base64.b64encode(byte_data)
+    base64_str = str(base64_str, encoding='utf-8')
+    return base64_str
+
+
+def process_polygons(polygons, redirection=True, reorder=True, close=False):
+    polygons_processed = []
+    for polygon in polygons:
+        if redirection and not is_clockwise(polygon):
+            polygon = revert_direction(polygon)
+        if reorder:
+            polygon = reorder_points(polygon)
+        if close:
+            polygon = close_polygon_contour(polygon)
+        polygons_processed.append(polygon)
+    polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1]))
+    return polygons
+
+
+def string_to_polygons(pts_strings):
+    pts_strings = pts_strings.split(" ")[:-1]
+    polygons = []
+    for pts_string in pts_strings:
+        polygon = pts_string.split(",")
+        polygon = [float(p) for p in polygon]
+        polygons.append(polygon)
+    return polygons
+
+
+def downsample_polygon(polygon, ds_rate=25):
+    points = np.array(polygon).reshape(int(len(polygon) / 2), 2)
+    points = points[::ds_rate]
+    return list(points.flatten())
+
+
+def downsample_polygons(polygons, ds_rate=25):
+    polygons_ds = []
+    for polygon in polygons:
+        polygons_ds.append(downsample_polygon(polygon, ds_rate))
+    return polygons_ds
+
+
+def check_length(polygons):
+    length = 0
+    for polygon in polygons:
+        length += len(polygon)
+    return length
+
+
+def approximate_polygon(poly, tolerance=2):
+    poly = np.array(poly).reshape(int(len(poly) / 2), 2)
+    new_poly = [poly[0]]
+    for i in range(1, len(poly)):
+        x1, y1 = new_poly[-1]
+        x2, y2 = poly[i]
+        dist = math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
+        if dist > tolerance:
+            new_poly.append(poly[i])
+    new_poly = np.array(new_poly)
+    return list(new_poly.flatten())
+
+
+def approximate_polygons(polys, tolerance=1.0, max_length=400):
+    tol = tolerance
+    while check_length(polys) > max_length:
+        polys_new = []
+        for poly in polys:
+            polys_new.append(approximate_polygon(poly, tolerance=tol))
+        polys = polys_new
+        tol += 2.0
+    return polys
+
+
+def random_int(low, high):
+    if low < high:
+        return np.random.randint(low, high)
+    else:
+        return max(low, high)
+
+
+def interpolate_points(ps, pe):
+    xs, ys = ps
+    xe, ye = pe
+    points = []
+    dx = xe - xs
+    dy = ye - ys
+    if dx != 0:
+        scale = dy / dx
+        if xe > xs:
+            x_interpolated = list(range(ceil(xs), floor(xe) + 1))
+        else:
+            x_interpolated = list(range(floor(xs), ceil(xe) - 1, -1))
+        for x in x_interpolated:
+            y = ys + (x - xs) * scale
+            points.append([x, y])
+    if dy != 0:
+        scale = dx / dy
+        if ye > ys:
+            y_interpolated = list(range(ceil(ys), floor(ye) + 1))
+        else:
+            y_interpolated = list(range(floor(ys), ceil(ye) - 1, -1))
+        for y in y_interpolated:
+            x = xs + (y - ys) * scale
+            points.append([x, y])
+    if xe > xs:
+        points = sorted(points, key=lambda x: x[0])
+    else:
+        points = sorted(points, key=lambda x: -x[0])
+    return points
+
+
+def interpolate_polygon(polygon):
+    points = np.array(polygon).reshape(int(len(polygon) / 2), 2)
+    points_interpolated = []
+    points_interpolated.append(points[0])
+    for i in range(0, len(points) - 1):
+        points_i = interpolate_points(points[i], points[i + 1])
+        points_interpolated += points_i
+        points_interpolated.append(points[i + 1])
+    points_interpolated = prune_points(points_interpolated)
+    polygon_interpolated = np.array(points_interpolated)
+    return list(polygon_interpolated.flatten())
+
+
+def prune_points(points, th=0.1):
+    points_pruned = [points[0]]
+    for i in range(1, len(points)):
+        x1, y1 = points_pruned[-1]
+        x2, y2 = points[i]
+        dist = (x2 - x1) ** 2 + (y2 - y1) ** 2
+        if dist > th:
+            points_pruned.append(points[i])
+    return points_pruned
+
+
+def interpolate_polygons(polygons):
+    polygons_i = []
+    for polygon in polygons:
+        polygons_i.append(interpolate_polygon(polygon))
+    return polygons_i
+
+
+def sample_polygon(polygon, sample_rate=0.5):
+    points = np.array(polygon).reshape(int(len(polygon) / 2), 2)
+    k = int(len(points) * sample_rate)
+    index = sorted(sample(list(range(len(points))), k))
+    points_sampled = points[index]
+    return list(np.array(points_sampled).flatten())
+
+
+def sample_polygons(polygons, max_length=400.0):
+    n = check_length(polygons)
+    k = max_length / n
+    polygons_s = []
+    for polygon in polygons:
+        polygons_s.append(sample_polygon(polygon, k))
+    return polygons_s
+
+
+def polygons_to_string(polygons):
+    pts_strings = []
+    for polygon in polygons:
+        pts_string = ','.join([str(num) for num in polygon])
+        pts_string += " "  # separator
+        pts_strings.append(pts_string)
+    pts_strings = "".join(pts_strings)
+    return pts_strings
+
diff --git a/data/refcoco_dataset.py b/data/refcoco_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb42f34ffe535863a7218adea36b184f73856602
--- /dev/null
+++ b/data/refcoco_dataset.py
@@ -0,0 +1,294 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from io import BytesIO
+
+import logging
+import warnings
+
+import numpy as np
+import torch
+import base64
+import utils.transforms as T
+import math
+from PIL import Image, ImageFile
+
+from data import data_utils
+from data.base_dataset import BaseDataset
+from bert.tokenization_bert import BertTokenizer
+from data.poly_utils import string_to_polygons, downsample_polygons, polygons_to_string, points_to_token_string
+import cv2
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+ImageFile.MAX_IMAGE_PIXELS = None
+Image.MAX_IMAGE_PIXELS = None
+
+logger = logging.getLogger(__name__)
+warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
+
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+
+
+class RefcocoDataset(BaseDataset):
+    def __init__(
+        self,
+        split,
+        dataset,
+        bpe,
+        src_dict,
+        tgt_dict=None,
+        max_src_length=80,
+        max_tgt_length=30,
+        patch_image_size=512,
+        imagenet_default_mean_and_std=False,
+        num_bins=1000,
+        max_image_size=512
+    ):
+        super().__init__(split, dataset, bpe, src_dict, tgt_dict)
+        self.max_src_length = max_src_length
+        self.max_tgt_length = max_tgt_length
+        self.patch_image_size = patch_image_size
+        self.num_bins = num_bins
+
+        if imagenet_default_mean_and_std:
+            mean = IMAGENET_DEFAULT_MEAN
+            std = IMAGENET_DEFAULT_STD
+        else:
+            mean = [0.5, 0.5, 0.5]
+            std = [0.5, 0.5, 0.5]
+
+        # for positioning
+        self.positioning_transform = T.Compose([
+            T.RandomResize([patch_image_size], max_size=patch_image_size),
+            T.ToTensor(),
+            T.Normalize(mean=mean, std=std, max_image_size=max_image_size)
+        ])
+        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+
+    def __getitem__(self, index):
+        data = self.dataset[index]
+        if len(data) == 7:
+            uniq_id, base64_str, seg64_str, text, poly_original, region_coord, poly_interpolated = data
+            train = True
+        else:
+            uniq_id, base64_str, seg64_str, text, poly, region_coord = data
+            train = False
+
+        # load image and segmentation labels
+        image = Image.open(BytesIO(base64.urlsafe_b64decode(base64_str))).convert("RGB")
+        label = Image.open(BytesIO(base64.urlsafe_b64decode(seg64_str)))
+        label = np.asarray(label)
+        label = cv2.resize(label, [self.patch_image_size, self.patch_image_size], interpolation=cv2.INTER_NEAREST)
+
+        w, h = image.size
+        patch_image = self.positioning_transform(image, target=None)
+        resize_h = self.patch_image_size
+        resize_w = self.patch_image_size
+        patch_mask = torch.tensor([True])
+
+        if train:
+            prob = np.random.uniform()
+            if prob < 0.5:
+                polygons_interpolated = string_to_polygons(poly_interpolated)
+                ds_rate = np.random.randint(25, 41)
+                polygons_augmented = downsample_polygons(polygons_interpolated, ds_rate)
+                poly = polygons_to_string(polygons_augmented)
+            else:
+                poly = poly_original
+
+        polygons = string_to_polygons(poly)
+        polygons_scaled = []
+        for polygon in polygons:
+            n_point = len(polygon) // 2
+            scale = np.concatenate([np.array([w, h]) for _ in range(n_point)], 0)
+            polygon = polygon / scale
+            polygon = polygon.reshape(n_point, 2)
+            polygons_scaled.append(polygon)
+
+        x0, y0, x1, y1 = region_coord.strip().split(',')
+        region_points = [float(x0), float(y0), float(x1), float(y1)]
+        region = np.array(region_points)
+
+        region_points = region_points / np.array([w, h, w, h])  # scaled to [0,1]
+        region_points = torch.tensor(region_points.reshape(2, 2))
+
+        quant_box = region_points * (self.num_bins - 1)
+        quant_box11 = [[math.floor(p[0]), math.floor(p[1])] for p in quant_box]
+        quant_box21 = [[math.ceil(p[0]), math.floor(p[1])] for p in quant_box]
+        quant_box12 = [[math.floor(p[0]), math.ceil(p[1])] for p in quant_box]
+        quant_box22 = [[math.ceil(p[0]), math.ceil(p[1])] for p in quant_box]
+
+        quant_poly = [poly * (self.num_bins - 1) for poly in polygons_scaled]
+        quant_poly11 = [[[math.floor(p[0]), math.floor(p[1])] for p in poly] for poly in quant_poly]
+        quant_poly21 = [[[math.ceil(p[0]), math.floor(p[1])] for p in poly] for poly in quant_poly]
+        quant_poly12 = [[[math.floor(p[0]), math.ceil(p[1])] for p in poly] for poly in quant_poly]
+        quant_poly22 = [[[math.ceil(p[0]), math.ceil(p[1])] for p in poly] for poly in quant_poly]
+
+        region_coord11, _ = points_to_token_string(quant_box11, quant_poly11)
+        region_coord21, _ = points_to_token_string(quant_box21, quant_poly21)
+        region_coord12, _ = points_to_token_string(quant_box12, quant_poly12)
+        region_coord22, token_type = points_to_token_string(quant_box22, quant_poly22)
+
+        # compute bilinear interpolation coefficient
+        delta_x1 = [0] + [p[0] - math.floor(p[0]) for p in quant_box]  # [0] for bos token
+        for polygon in quant_poly:
+            delta = [poly_point[0] - math.floor(poly_point[0]) for poly_point in polygon]
+            delta_x1.extend(delta)
+            delta_x1.extend([0])  # for separator token
+        delta_x1 = delta_x1[:-1]  # there is no separator token in the end
+        delta_x1 = torch.tensor(delta_x1)
+        delta_x2 = 1 - delta_x1
+
+        delta_y1 = [0] + [p[1] - math.floor(p[1]) for p in quant_box]  # [0] for bos token
+        for polygon in quant_poly:
+            delta = [poly_point[1] - math.floor(poly_point[1]) for poly_point in polygon]
+            delta_y1.extend(delta)
+            delta_y1.extend([0])  # for separator token
+        delta_y1 = delta_y1[:-1]  # there is no separator token in the end
+        delta_y1 = torch.tensor(delta_y1)
+        delta_y2 = 1 - delta_y1
+
+        token_type.append(2)  # 2 for eos token
+
+        src_caption = self.pre_caption(text, self.max_src_length)
+
+        prompt = ' which region does the text " {} " describe?'.format(src_caption)
+
+        # tgt for input
+        tgt_item11 = self.encode_text(region_coord11, use_bpe=False)
+        tgt_item12 = self.encode_text(region_coord12, use_bpe=False)
+        tgt_item21 = self.encode_text(region_coord21, use_bpe=False)
+        tgt_item22 = self.encode_text(region_coord22, use_bpe=False)
+
+        # tgt for output
+        target_item = region_points
+        for poly in polygons_scaled:
+            target_item = torch.cat([target_item, torch.tensor(poly), torch.tensor([[0, 0]])], dim=0)  # [0, 0] is padding token for separator and eos
+
+        #target_item = torch.cat([tgt_item, self.eos_item])
+        prev_output_item11 = torch.cat([self.bos_item, tgt_item11])
+        prev_output_item12 = torch.cat([self.bos_item, tgt_item12])
+        prev_output_item21 = torch.cat([self.bos_item, tgt_item21])
+        prev_output_item22 = torch.cat([self.bos_item, tgt_item22])
+        example = {
+            "id": uniq_id,
+            "source": prompt,
+            "patch_image": patch_image,
+            "patch_mask": patch_mask,
+            "target": target_item,
+            "prev_output_tokens_11": prev_output_item11,
+            "prev_output_tokens_12": prev_output_item12,
+            "prev_output_tokens_21": prev_output_item21,
+            "prev_output_tokens_22": prev_output_item22,
+            "delta_x1": delta_x1,
+            "delta_y1": delta_y1,
+            "delta_x2": delta_x2,
+            "delta_y2": delta_y2,
+            "w_resize_ratio": torch.tensor(resize_w / w),
+            "h_resize_ratio": torch.tensor(resize_h / h),
+            "region_coord": torch.tensor(region),
+            "token_type": torch.tensor(token_type),
+            "w": torch.tensor(w),
+            "h": torch.tensor(h),
+            "label": label,
+            "n_poly": len(polygons),
+            "text": src_caption
+        }
+        return example
+
+    def collate(self, samples, pad_idx, eos_idx):
+        if len(samples) == 0:
+            return {}
+
+        def merge(key, padding_item):
+            return data_utils.collate_tokens(
+                [s[key] for s in samples],
+                padding_item,
+                eos_idx=eos_idx,
+            )
+
+        id = np.array([s["id"] for s in samples])
+        captions = [s["source"] for s in samples]
+        tokenized = self.tokenizer.batch_encode_plus(captions, padding="longest", return_tensors="pt")
+        src_tokens = tokenized["input_ids"]
+        att_masks = tokenized["attention_mask"]
+        src_lengths = torch.LongTensor(att_masks.ne(0).long().sum())
+
+        patch_images = torch.stack([sample['patch_image'] for sample in samples], dim=0)
+        patch_masks = torch.cat([sample['patch_mask'] for sample in samples])
+
+        w_resize_ratios = torch.stack([s["w_resize_ratio"] for s in samples], dim=0)
+        h_resize_ratios = torch.stack([s["h_resize_ratio"] for s in samples], dim=0)
+
+        delta_x1 = merge("delta_x1", 0)
+        delta_y1 = merge("delta_y1", 0)
+        delta_x2 = merge("delta_x2", 1)
+        delta_y2 = merge("delta_y2", 1)
+
+        region_coords = torch.stack([s['region_coord'] for s in samples], dim=0)
+
+        target = merge("target", pad_idx)
+        tgt_lengths = torch.LongTensor([s["target"].shape[0] for s in samples])
+        ntokens = tgt_lengths.sum().item()
+
+        prev_output_tokens_11 = merge("prev_output_tokens_11", pad_idx)
+        prev_output_tokens_12 = merge("prev_output_tokens_12", pad_idx)
+        prev_output_tokens_21 = merge("prev_output_tokens_21", pad_idx)
+        prev_output_tokens_22 = merge("prev_output_tokens_22", pad_idx)
+
+        token_type = merge("token_type", -1)
+        w = torch.stack([s["w"] for s in samples], dim=0)
+        h = torch.stack([s["h"] for s in samples], dim=0)
+        n_poly = [s['n_poly'] for s in samples]
+
+        labels = np.stack([sample['label'] for sample in samples], 0)
+        text = [s["text"] for s in samples]
+        batch = {
+            "id": id,
+            "nsentences": len(samples),
+            "ntokens": ntokens,
+            "net_input": {
+                "src_tokens": src_tokens,
+                "src_lengths": src_lengths,
+                "att_masks": att_masks,
+                "patch_images": patch_images,
+                "patch_masks": patch_masks,
+                "prev_output_tokens_11": prev_output_tokens_11,
+                "prev_output_tokens_12": prev_output_tokens_12,
+                "prev_output_tokens_21": prev_output_tokens_21,
+                "prev_output_tokens_22": prev_output_tokens_22,
+                "delta_x1": delta_x1,
+                "delta_y1": delta_y1,
+                "delta_x2": delta_x2,
+                "delta_y2": delta_y2
+            },
+            "target": target,
+            "w_resize_ratios": w_resize_ratios,
+            "h_resize_ratios": h_resize_ratios,
+            "region_coords": region_coords,
+            "label": labels,
+            "token_type": token_type,
+            "w": w,
+            "h": h,
+            "n_poly": n_poly,
+            "text": text
+        }
+
+        return batch
+
+    def collater(self, samples, pad_to_length=None):
+        """Merge a list of samples to form a mini-batch.
+        Args:
+            samples (List[dict]): samples to collate
+        Returns:
+            dict: a mini-batch containing the data of the task
+        """
+        return self.collate(samples, pad_idx=self.pad, eos_idx=self.eos)
\ No newline at end of file
diff --git a/data/refcoco_pretrain_dataset.py b/data/refcoco_pretrain_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..78a090db799e51aadcdc0996b05c421a18a6dd7c
--- /dev/null
+++ b/data/refcoco_pretrain_dataset.py
@@ -0,0 +1,232 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from io import BytesIO
+
+import logging
+import warnings
+
+import numpy as np
+import torch
+import base64
+import utils.transforms as T
+import math
+import os
+from PIL import Image, ImageFile
+
+from data import data_utils
+from data.base_dataset import BaseDataset
+from bert.tokenization_bert import BertTokenizer
+
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+ImageFile.MAX_IMAGE_PIXELS = None
+Image.MAX_IMAGE_PIXELS = None
+
+logger = logging.getLogger(__name__)
+warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
+
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+
+
+class RefcocoPretrainDataset(BaseDataset):
+    def __init__(
+        self,
+        split,
+        dataset,
+        bpe,
+        src_dict,
+        tgt_dict=None,
+        max_src_length=80,
+        max_tgt_length=30,
+        patch_image_size=512,
+        imagenet_default_mean_and_std=False,
+        num_bins=1000,
+        max_image_size=512,
+        image_path="../../datasets/images"
+    ):
+        super().__init__(split, dataset, bpe, src_dict, tgt_dict)
+        self.max_src_length = max_src_length
+        self.max_tgt_length = max_tgt_length
+        self.patch_image_size = patch_image_size
+        self.num_bins = num_bins
+        self.image_path = image_path
+
+        if imagenet_default_mean_and_std:
+            mean = IMAGENET_DEFAULT_MEAN
+            std = IMAGENET_DEFAULT_STD
+        else:
+            mean = [0.5, 0.5, 0.5]
+            std = [0.5, 0.5, 0.5]
+
+        # for positioning
+        self.positioning_transform = T.Compose([
+            T.RandomResize([patch_image_size], max_size=patch_image_size),
+            T.ToTensor(),
+            T.Normalize(mean=mean, std=std, max_image_size=max_image_size)
+        ])
+        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+
+    def __getitem__(self, index):
+        uniq_id, img_file, text, region_coord = self.dataset[index]
+
+        img_path = os.path.join(self.image_path, img_file)
+        image = Image.open(img_path).convert("RGB")
+
+        w, h = image.size
+        boxes_target = {"boxes": [], "labels": [], "area": [], "size": torch.tensor([h, w])}
+        x0, y0, x1, y1 = region_coord.strip().split(',')
+        region = torch.tensor([float(x0), float(y0), float(x1), float(y1)])
+        boxes_target["boxes"] = torch.tensor([[float(x0), float(y0), float(x1), float(y1)]])
+        boxes_target["labels"] = np.array([0])
+        boxes_target["area"] = torch.tensor([(float(x1) - float(x0)) * (float(y1) - float(y0))])
+
+        patch_image, patch_boxes = self.positioning_transform(image, boxes_target)
+        resize_h, resize_w = patch_boxes["size"][0], patch_boxes["size"][1]
+        patch_mask = torch.tensor([True])
+
+        quant_box = [patch_boxes["boxes"][0][i] * (self.num_bins - 1) for i in range(4)]
+        quant_box = np.array(quant_box).reshape(2, 2)
+
+        quant_box11 = [[math.floor(p[0]), math.floor(p[1])] for p in quant_box]
+        quant_box21 = [[math.ceil(p[0]), math.floor(p[1])] for p in quant_box]
+        quant_box12 = [[math.floor(p[0]), math.ceil(p[1])] for p in quant_box]
+        quant_box22 = [[math.ceil(p[0]), math.ceil(p[1])] for p in quant_box]
+
+
+        # compute linear interpolation coefficient (0 for bos token)
+        delta_x1 = torch.tensor([0] + [p[0] - math.floor(p[0]) for p in quant_box])
+        delta_y1 = torch.tensor([0] + [p[1] - math.floor(p[1]) for p in quant_box])
+        delta_x2 = 1 - delta_x1
+        delta_y2 = 1 - delta_y1
+
+        region_coord11 = " ".join([f"<bin_{int(p[0])}_{int(p[1])}>" for p in quant_box11])
+        region_coord21 = " ".join([f"<bin_{int(p[0])}_{int(p[1])}>" for p in quant_box21])
+        region_coord12 = " ".join([f"<bin_{int(p[0])}_{int(p[1])}>" for p in quant_box12])
+        region_coord22 = " ".join([f"<bin_{int(p[0])}_{int(p[1])}>" for p in quant_box22])
+
+        src_caption = self.pre_caption(text, self.max_src_length)
+
+        prompt = ' which region does the text " {} " describe?'.format(src_caption)
+
+        # tgt for input
+        tgt_item11 = self.encode_text(region_coord11, use_bpe=False)
+        tgt_item12 = self.encode_text(region_coord12, use_bpe=False)
+        tgt_item21 = self.encode_text(region_coord21, use_bpe=False)
+        tgt_item22 = self.encode_text(region_coord22, use_bpe=False)
+
+        # tgt for output
+        tgt_box = torch.reshape(patch_boxes["boxes"][0], (2, 2))
+        target_item = torch.cat([tgt_box, torch.tensor([[1, 1]])], dim=0)  # [1, 1] is padding token for eos
+
+        #target_item = torch.cat([tgt_item, self.eos_item])
+        prev_output_item11 = torch.cat([self.bos_item, tgt_item11])
+        prev_output_item12 = torch.cat([self.bos_item, tgt_item12])
+        prev_output_item21 = torch.cat([self.bos_item, tgt_item21])
+        prev_output_item22 = torch.cat([self.bos_item, tgt_item22])
+        example = {
+            "id": uniq_id,
+            "source": prompt,
+            "patch_image": patch_image,
+            "patch_mask": patch_mask,
+            "target": target_item,
+            "prev_output_tokens_11": prev_output_item11,
+            "prev_output_tokens_12": prev_output_item12,
+            "prev_output_tokens_21": prev_output_item21,
+            "prev_output_tokens_22": prev_output_item22,
+            "delta_x1": delta_x1,
+            "delta_y1": delta_y1,
+            "delta_x2": delta_x2,
+            "delta_y2": delta_y2,
+            "w_resize_ratio": resize_w / w,
+            "h_resize_ratio": resize_h / h,
+            "region_coord": region,
+            "token_type": torch.tensor([0, 0, 2])
+        }
+        return example
+
+    def collate(self, samples, pad_idx, eos_idx):
+        if len(samples) == 0:
+            return {}
+
+        def merge(key):
+            return data_utils.collate_tokens(
+                [s[key] for s in samples],
+                pad_idx,
+                eos_idx=eos_idx,
+            )
+
+        id = np.array([s["id"] for s in samples])
+        captions = [s["source"] for s in samples]
+        tokenized = self.tokenizer.batch_encode_plus(captions, padding="longest", return_tensors="pt")
+        src_tokens = tokenized["input_ids"]
+        att_masks = tokenized["attention_mask"]
+        src_lengths = torch.LongTensor(att_masks.ne(0).long().sum())
+
+        patch_images = torch.stack([sample['patch_image'] for sample in samples], dim=0)
+        patch_masks = torch.cat([sample['patch_mask'] for sample in samples])
+
+        w_resize_ratios = torch.stack([s["w_resize_ratio"] for s in samples], dim=0)
+        h_resize_ratios = torch.stack([s["h_resize_ratio"] for s in samples], dim=0)
+
+        delta_x1 = torch.stack([s["delta_x1"] for s in samples], dim=0)
+        delta_y1 = torch.stack([s["delta_y1"] for s in samples], dim=0)
+        delta_x2 = torch.stack([s["delta_x2"] for s in samples], dim=0)
+        delta_y2 = torch.stack([s["delta_y2"] for s in samples], dim=0)
+
+        region_coords = torch.stack([s['region_coord'] for s in samples], dim=0)
+
+        target = merge("target")
+        tgt_lengths = torch.LongTensor([s["target"].ne(pad_idx).long().sum() for s in samples])
+        ntokens = tgt_lengths.sum().item()
+
+        prev_output_tokens_11 = merge("prev_output_tokens_11")
+        prev_output_tokens_12 = merge("prev_output_tokens_12")
+        prev_output_tokens_21 = merge("prev_output_tokens_21")
+        prev_output_tokens_22 = merge("prev_output_tokens_22")
+
+        token_type = merge("token_type")
+
+        batch = {
+            "id": id,
+            "nsentences": len(samples),
+            "ntokens": ntokens,
+            "net_input": {
+                "src_tokens": src_tokens,
+                "src_lengths": src_lengths,
+                "att_masks": att_masks,
+                "patch_images": patch_images,
+                "patch_masks": patch_masks,
+                "prev_output_tokens_11": prev_output_tokens_11,
+                "prev_output_tokens_12": prev_output_tokens_12,
+                "prev_output_tokens_21": prev_output_tokens_21,
+                "prev_output_tokens_22": prev_output_tokens_22,
+                "delta_x1": delta_x1,
+                "delta_y1": delta_y1,
+                "delta_x2": delta_x2,
+                "delta_y2": delta_y2
+            },
+            "target": target,
+            "token_type": token_type,
+            "w_resize_ratios": w_resize_ratios,
+            "h_resize_ratios": h_resize_ratios,
+            "region_coords": region_coords
+        }
+
+        return batch
+
+    def collater(self, samples, pad_to_length=None):
+        """Merge a list of samples to form a mini-batch.
+        Args:
+            samples (List[dict]): samples to collate
+        Returns:
+            dict: a mini-batch containing the data of the task
+        """
+        return self.collate(samples, pad_idx=self.pad, eos_idx=self.eos)
\ No newline at end of file
diff --git a/data/val_test_files.p b/data/val_test_files.p
new file mode 100644
index 0000000000000000000000000000000000000000..c137cdb5b445aa8dab5c9aff31b87608e4bc9b4d
Binary files /dev/null and b/data/val_test_files.p differ
diff --git a/demo.py b/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f83db470b408caa18880e638dea9d30b922f6f7
--- /dev/null
+++ b/demo.py
@@ -0,0 +1,410 @@
+import torch
+import numpy as np
+from fairseq import utils,tasks
+from utils.checkpoint_utils import load_model_ensemble_and_task
+from utils.eval_utils import eval_step
+from tasks.refcoco import RefcocoTask
+from models.polyformer import PolyFormerModel
+from PIL import Image
+import cv2
+import math
+from skimage import draw
+
+
+tasks.register_task('refcoco', RefcocoTask)
+
+# turn on cuda if GPU is available
+use_cuda = torch.cuda.is_available()
+# use fp16 only when GPU is available
+use_fp16 = True
+
+# Load pretrained ckpt & config
+overrides={"bpe_dir":"utils/BPE"}
+models, cfg, task = load_model_ensemble_and_task(
+        utils.split_paths('weights/polyformer_l_refcocog.pt'),
+        arg_overrides=overrides
+    )
+# print(cfg)
+cfg.common.seed = 7
+cfg.generation.beam = 5
+cfg.generation.min_len = 12
+cfg.generation.max_len_a = 0
+cfg.generation.max_len_b = 420
+cfg.generation.no_repeat_ngram_size = 3
+# cfg.max_tgt_length = 256
+#cfg.num_bins = 1000
+cfg.task.patch_image_size = 512
+
+from bert.tokenization_bert import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+
+# Fix seed for stochastic decoding
+if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
+    np.random.seed(cfg.common.seed)
+    utils.set_torch_seed(cfg.common.seed)
+
+# model = ''
+# Move models to GPU
+for model in models:
+    model.eval()
+    if use_fp16:
+        model.half()
+    if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
+        model.cuda()
+    model.prepare_for_inference_(cfg)
+
+# Initialize generator
+generator = task.build_generator(models, cfg.generation)
+
+
+# Image transform
+from torchvision import transforms
+mean = [0.5, 0.5, 0.5]
+std = [0.5, 0.5, 0.5]
+
+patch_resize_transform = transforms.Compose([
+    lambda image: image.convert("RGB"),
+    transforms.Resize((cfg.task.patch_image_size, cfg.task.patch_image_size), interpolation=Image.BICUBIC),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=mean, std=std),
+])
+
+# Text preprocess
+bos_item = torch.LongTensor([task.src_dict.bos()])
+eos_item = torch.LongTensor([task.src_dict.eos()])
+pad_idx = task.src_dict.pad()
+
+
+# Construct input for refcoco task
+patch_image_size = cfg.task.patch_image_size
+def construct_sample(image: Image, text: str):
+    w, h = image.size
+    w_resize_ratio = torch.tensor(patch_image_size / w).unsqueeze(0)
+    h_resize_ratio = torch.tensor(patch_image_size / h).unsqueeze(0)
+    patch_image = patch_resize_transform(image).unsqueeze(0)
+    patch_mask = torch.tensor([True])
+    
+    prompt = ' which region does the text " {} " describe?'.format(text)
+    tokenized = tokenizer.batch_encode_plus([prompt], padding="longest", return_tensors="pt")
+    src_tokens = tokenized["input_ids"]
+    att_masks = tokenized["attention_mask"]
+    src_lengths = torch.LongTensor(att_masks.ne(0).long().sum())
+    
+    sample = {
+        "id":np.array(['42']),
+        "net_input": {
+            "src_tokens": src_tokens,
+            "src_lengths": src_lengths,
+            "att_masks": att_masks,
+            "patch_images": patch_image,
+            "patch_masks": patch_mask,
+        },
+        "w_resize_ratios": w_resize_ratio,
+        "h_resize_ratios": h_resize_ratio,
+        "region_coords": torch.randn(1, 4),
+        "label": np.zeros((512,512)),
+        "poly": 'None',
+        "text": text
+    }
+    return sample
+
+# Function to turn FP32 to FP16
+def apply_half(t):
+    if t.dtype is torch.float32:
+        return t.to(dtype=torch.half)
+    return t
+
+
+from io import BytesIO
+import base64
+import re
+
+def pre_caption(caption):
+    caption = caption.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ').replace('<person>', 'person')
+
+    caption = re.sub(
+        r"\s{2,}",
+        ' ',
+        caption,
+    )
+    caption = caption.rstrip('\n')
+    caption = caption.strip(' ')
+    return caption
+
+
+def convert_pts(coeffs):
+    pts = []
+    for i in range(len(coeffs) // 2):
+        pts.append([coeffs[2 * i + 1], coeffs[2 * i]])  # y, x
+    return np.array(pts, np.int32)
+
+def get_mask_from_codes(codes, img_size):
+    masks = [np.zeros(img_size)]
+    for code in codes:
+        mask = draw.polygon2mask(img_size, convert_pts(code))
+        mask = np.array(mask, np.uint8)
+        masks.append(mask)
+    mask = sum(masks)
+    mask = mask > 0
+    return mask.astype(np.uint8)
+
+
+def overlay_predictions(img, mask=None, polygons=None, bbox=None, color_box=(0, 255, 0), color_mask=[255, 102, 102], color_poly=[255, 0, 0], thickness=3, radius=6):
+    overlayed = img.copy()
+    if bbox is not None:
+        overlayed = draw_bbox(overlayed, bbox, color=color_box, thickness=thickness)
+    if mask is not None:
+        overlayed = overlay_davis(overlayed, mask, colors=[[0, 0, 0], color_mask])
+    if polygons is not None:
+        overlayed = plot_polygons(overlayed, polygons, color=color_poly, radius=radius)
+    return overlayed
+
+
+def overlay_davis(image, mask, colors=[[0, 0, 0], [255, 102, 102]], cscale=1, alpha=0.4):  # [255, 178, 102] orange [102, 178, 255] red
+    from scipy.ndimage.morphology import binary_dilation
+
+    colors = np.reshape(colors, (-1, 3))
+    colors = np.atleast_2d(colors) * cscale
+
+    im_overlay = image.copy()
+    object_ids = np.unique(mask)
+
+    h_i, w_i = image.shape[0:2]
+    h_m, w_m = mask.shape[0:2]
+    if h_i != h_m:
+        mask = cv2.resize(mask, [h_i, w_i], interpolation=cv2.INTER_NEAREST)
+    for object_id in object_ids[1:]:
+        # Overlay color on  binary mask
+        foreground = image*alpha + np.ones(image.shape)*(1-alpha) * np.array(colors[object_id])
+        binary_mask = mask == object_id
+
+        # Compose image
+        im_overlay[binary_mask] = foreground[binary_mask]
+
+    return im_overlay.astype(image.dtype)
+
+
+def draw_bbox(img, box, color=(0, 255, 0), thickness=2):
+    x1, y1, x2, y2 = box
+    return cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness=thickness)
+
+def plot_polygons(img, polygons, color=(255, 0, 0), radius=7):
+    for polygon in polygons:
+        if len(polygon) > 0:
+            polygon = np.reshape(polygon[:len(polygon)-len(polygon)%2], (len(polygon)//2, 2)).astype(np.int16)
+            for i, point in enumerate(polygon):
+                img = cv2.circle(img, point, radius, color, thickness=-1)
+            img = cv2.circle(img, polygon[0], radius, color, thickness=-1)
+    return img
+
+def plot_arrow(img, polygons, color=(128, 128, 128), thickness=3, tip_length=0.3):
+    for polygon in polygons:
+        if len(polygon) > 0:
+            polygon = np.reshape(polygon[:len(polygon)-len(polygon)%2], (len(polygon)//2, 2)).astype(np.int16)
+            for i, point in enumerate(polygon):
+                if i > 0: 
+                    img = cv2.arrowedLine(img, polygon[i-1], point, color, thickness=thickness, tipLength=tip_length)  
+    return img
+
+def downsample_polygon(polygon, ds_rate=25):
+    points = np.array(polygon).reshape(int(len(polygon) / 2), 2)
+    points = points[::ds_rate]
+    return list(points.flatten())
+
+
+def downsample_polygons(polygons, ds_rate=25):
+    polygons_ds = []
+    for polygon in polygons:
+        polygons_ds.append(downsample_polygon(polygon, ds_rate))
+    return polygons_ds
+
+
+
+def visual_grounding(image, text):
+
+    # Construct input sample & preprocess for GPU if cuda available
+    sample = construct_sample(image, text.lower())
+    sample = utils.move_to_cuda(sample) if use_cuda else sample
+    sample = utils.apply_to_sample(apply_half, sample) if use_fp16 else sample
+
+    with torch.no_grad():
+        if isinstance(models, list):
+            model = models[0]
+        min_len = 6
+        max_len = 210
+        model.eval()
+        img = sample["net_input"]["patch_images"]
+        b = img.shape[0]
+        prev_output_token_11 = [[0] for _ in range(b)]
+        prev_output_token_12 = [[0] for _ in range(b)]
+        prev_output_token_21 = [[0] for _ in range(b)]
+        prev_output_token_22 = [[0] for _ in range(b)]
+        delta_x1 = [[0] for _ in range(b)]
+        delta_y1 = [[0] for _ in range(b)]
+        delta_x2 = [[1] for _ in range(b)]
+        delta_y2 = [[1] for _ in range(b)]
+
+        gen_out = [[] for _ in range(b)]
+
+        n_bins = 64
+
+        unfinish_flag = np.ones(b)
+        i = 0
+
+        encoder_out = model.encoder(
+            sample['net_input']['src_tokens'],
+            src_lengths=sample['net_input']['src_lengths'],
+            att_masks=sample['net_input']['att_masks'],
+            patch_images=sample['net_input']['patch_images'],
+            patch_masks=sample['net_input']['patch_masks'],
+            token_embeddings=None,
+            return_all_hiddens=False,
+            sample_patch_num=None
+        )
+        attn_masks = []
+        while i < max_len and unfinish_flag.any():
+            # print(i)
+            prev_output_tokens_11_tensor = torch.tensor(np.array(prev_output_token_11)).to(img.device).long()
+            prev_output_tokens_12_tensor = torch.tensor(np.array(prev_output_token_12)).to(img.device).long()
+            prev_output_tokens_21_tensor = torch.tensor(np.array(prev_output_token_21)).to(img.device).long()
+            prev_output_tokens_22_tensor = torch.tensor(np.array(prev_output_token_22)).to(img.device).long()
+            delta_x1_tensor = torch.tensor(np.array(delta_x1)).to(img.device)
+            delta_x2_tensor = torch.tensor(np.array(delta_x2)).to(img.device)
+            delta_y1_tensor = torch.tensor(np.array(delta_y1)).to(img.device)
+            delta_y2_tensor = torch.tensor(np.array(delta_y2)).to(img.device)
+
+            net_output = model.decoder(
+                prev_output_tokens_11_tensor,
+                prev_output_tokens_12_tensor,
+                prev_output_tokens_21_tensor,
+                prev_output_tokens_22_tensor,
+                delta_x1_tensor,
+                delta_y1_tensor,
+                delta_x2_tensor,
+                delta_y2_tensor,
+                code_masks=None,
+                encoder_out=encoder_out,
+                features_only=False,
+                alignment_layer=None,
+                alignment_heads=None,
+                src_lengths=sample['net_input']['src_lengths'],
+                return_all_hiddens=False
+            )
+
+            cls_output = net_output[0]
+            cls_type = torch.argmax(cls_output, 2)
+            reg_output = net_output[1].squeeze(-1)
+            attn = net_output[2]['attn']
+            attn_arrays = [att.detach().cpu().numpy() for att in attn]
+            attn_arrays = np.concatenate(attn_arrays, 0)
+            attn_arrays = np.mean(attn_arrays, 0)
+            attn_arrays = attn_arrays[i, :256].reshape(16, 16)
+            h, w = image.size
+            attn_mask = cv2.resize(attn_arrays.astype(np.float32), (h, w))
+            attn_masks.append(attn_mask)
+            
+            for j in range(b):
+                # print(j)
+                if unfinish_flag[j] == 1:  # prediction is not finished
+                    cls_j = cls_type[j, i].item()
+                    if cls_j == 0 or (cls_j == 2 and i < min_len):  # 0 for coordinate tokens; 2 for eos
+                        output_j_x, output_j_y = reg_output[j, i].cpu().numpy()
+                        output_j_x = min(output_j_x, 1)
+                        output_j_y = min(output_j_y, 1)
+
+                        gen_out[j].extend([output_j_x, output_j_y])
+
+                        output_j_x = output_j_x * (n_bins - 1)
+                        output_j_y = output_j_y * (n_bins - 1)
+
+                        output_j_x_floor = math.floor(output_j_x)
+                        output_j_y_floor = math.floor(output_j_y)
+                        output_j_x_ceil = math.ceil(output_j_x)
+                        output_j_y_ceil = math.ceil(output_j_y)
+
+                        # convert to token
+                        prev_output_token_11[j].append(output_j_x_floor * n_bins + output_j_y_floor + 4)
+                        prev_output_token_12[j].append(output_j_x_floor * n_bins + output_j_y_ceil + 4)
+                        prev_output_token_21[j].append(output_j_x_ceil * n_bins + output_j_y_floor + 4)
+                        prev_output_token_22[j].append(output_j_x_ceil * n_bins + output_j_y_ceil + 4)
+
+                        delta_x = output_j_x - output_j_x_floor
+                        delta_y = output_j_y - output_j_y_floor
+                    elif cls_j == 1:  # 1 for separator tokens
+                        gen_out[j].append(2)  # insert 2 indicating separator tokens
+                        prev_output_token_11[j].append(3)
+                        prev_output_token_12[j].append(3)
+                        prev_output_token_21[j].append(3)
+                        prev_output_token_22[j].append(3)
+                        delta_x = 0
+                        delta_y = 0
+                    else:  # eos is predicted and i >= min_len
+                        unfinish_flag[j] = 0
+                        gen_out[j].append(-1)
+                        prev_output_token_11[j].append(2)  # 2 is eos token
+                        prev_output_token_12[j].append(2)  # 2 is eos token
+                        prev_output_token_21[j].append(2)  # 2 is eos token
+                        prev_output_token_22[j].append(2)  # 2 is eos token
+                        delta_x = 0
+                        delta_y = 0
+                else:  # prediction is finished
+                    gen_out[j].append(-1)
+                    prev_output_token_11[j].append(1)  # 1 is padding token
+                    prev_output_token_12[j].append(1)
+                    prev_output_token_21[j].append(1)
+                    prev_output_token_22[j].append(1)
+                    delta_x = 0
+                    delta_y = 0
+                delta_x1[j].append(delta_x)
+                delta_y1[j].append(delta_y)
+                delta_x2[j].append(1 - delta_x)
+                delta_y2[j].append(1 - delta_y)
+            i += 1
+        print("inference step: ", i)
+
+    hyps = []
+    hyps_det = []
+    n_poly_pred = []
+    b = len(gen_out)
+    for i in range(b):
+        gen_out_i = np.array(gen_out[i])
+        gen_out_i = gen_out_i[gen_out_i != -1]  # excluding eos and padding indices
+
+
+        gen_out_i_det = gen_out_i[:4]
+        w, h = image.size
+        gen_out_i_det[::2] *= w
+        gen_out_i_det[1::2] *= h
+
+        polygons_pred = gen_out_i[4:]
+        polygons_pred = np.append(polygons_pred, [2])
+        size = len(polygons_pred)
+        idx_list = [idx for idx, val in
+                    enumerate(polygons_pred) if val == 2]   # 2 indicates separator token
+
+        polygons_pred[::2] *= w
+        polygons_pred[1::2] *= h
+        if len(idx_list) > 0:   # multiple polygons
+            polygons = []
+            pred_idx = 0
+            for idx in idx_list:
+                cur_idx = idx
+                if pred_idx == cur_idx or pred_idx == size:
+                    pass
+                else:
+                    polygons.append(polygons_pred[pred_idx: cur_idx])
+                pred_idx = cur_idx + 1
+        else:
+            polygons = [polygons_pred]
+
+        n_poly_pred.append(len(polygons))
+        hyps.append(polygons)
+        hyps_det.append(gen_out_i_det)
+        
+
+    pred_mask = get_mask_from_codes(hyps[0], (h, w))
+    pred_overlayed = overlay_predictions(np.asarray(image), pred_mask, hyps[0], hyps_det[0])
+
+    return pred_overlayed, np.array(pred_mask*255, dtype=np.uint8)
+
+
diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..4471cfb71f8b4cb704d78dd647c91937defd8bc5
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3 -u
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+
+import logging
+import os
+import sys
+
+import numpy as np
+import torch
+from fairseq import distributed_utils, options, tasks, utils
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.logging import progress_bar
+from fairseq.utils import reset_logging
+from omegaconf import DictConfig
+
+from utils import checkpoint_utils
+from utils.eval_utils import eval_step, merge_results
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("ofa.evaluate")
+
+
+def apply_half(t):
+    if t.dtype is torch.float32:
+        return t.to(dtype=torch.half)
+    return t
+
+
+def main(cfg: DictConfig, **kwargs):
+    utils.import_user_module(cfg.common)
+
+    reset_logging()
+    logger.info(cfg)
+
+    assert (
+            cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None
+    ), "Must specify batch size either with --max-tokens or --batch-size"
+
+    # Fix seed for stochastic decoding
+    if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
+        np.random.seed(cfg.common.seed)
+        utils.set_torch_seed(cfg.common.seed)
+
+    use_fp16 = cfg.common.fp16
+    use_cuda = torch.cuda.is_available() and not cfg.common.cpu
+
+    if use_cuda:
+        torch.cuda.set_device(cfg.distributed_training.device_id)
+
+    # Load ensemble
+    overrides = eval(cfg.common_eval.model_overrides)
+    # Deal with beam-search / all-candidate VQA eval
+    if cfg.task._name == "vqa_gen":
+        overrides['val_inference_type'] = "beamsearch" if kwargs['beam_search_vqa_eval'] else "allcand"
+
+    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
+    if kwargs["zero_shot"]:
+        task = tasks.setup_task(cfg.task)
+        models, saved_cfg = checkpoint_utils.load_model_ensemble(
+            utils.split_paths(cfg.common_eval.path),
+            arg_overrides=overrides,
+            task=task,
+            suffix=cfg.checkpoint.checkpoint_suffix,
+            strict=(cfg.checkpoint.checkpoint_shard_count == 1),
+            num_shards=cfg.checkpoint.checkpoint_shard_count,
+        )
+    else:
+        models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+            utils.split_paths(cfg.common_eval.path),
+            arg_overrides=overrides,
+            suffix=cfg.checkpoint.checkpoint_suffix,
+            strict=(cfg.checkpoint.checkpoint_shard_count == 1),
+            num_shards=cfg.checkpoint.checkpoint_shard_count,
+        )
+
+    # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config
+    task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task)
+
+    # Move models to GPU
+    for model, ckpt_path in zip(models, utils.split_paths(cfg.common_eval.path)):
+        if kwargs['ema_eval']:
+            logger.info("loading EMA weights from {}".format(ckpt_path))
+            model.load_state_dict(checkpoint_utils.load_ema_from_checkpoint(ckpt_path)['model'])
+        model.eval()
+        if use_fp16:
+            model.half()
+        if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
+            model.cuda()
+        model.prepare_for_inference_(cfg)
+
+    # Load dataset (possibly sharded)
+    itr = task.get_batch_iterator(
+        dataset=task.dataset(cfg.dataset.gen_subset),
+        max_tokens=cfg.dataset.max_tokens,
+        max_sentences=cfg.dataset.batch_size,
+        max_positions=utils.resolve_max_positions(
+            task.max_positions(), *[m.max_positions() for m in models]
+        ),
+        ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=cfg.dataset.required_batch_size_multiple,
+        seed=cfg.common.seed,
+        num_shards=cfg.distributed_training.distributed_world_size,
+        shard_id=cfg.distributed_training.distributed_rank,
+        num_workers=cfg.dataset.num_workers,
+        data_buffer_size=cfg.dataset.data_buffer_size,
+    ).next_epoch_itr(shuffle=False)
+    progress = progress_bar.progress_bar(
+        itr,
+        log_format=cfg.common.log_format,
+        log_interval=cfg.common.log_interval,
+        default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+    )
+
+    # Initialize generator
+    generator = task.build_generator(models, cfg.generation)
+
+    # for sample in progress:
+    #     if "net_input" not in sample:
+    #         continue
+    #     sample = utils.move_to_cuda(sample) if use_cuda else sample
+    #     sample = utils.apply_to_sample(apply_half, sample) if cfg.common.fp16 else sample
+    #     with torch.no_grad():
+    #         eval_step(task, generator, models, sample, **kwargs)
+    #     progress.log({"sentences": sample["nsentences"]})
+    #
+    # merge_results(task, cfg, logger, kwargs['result_dir'])
+
+    results = []
+    prec_list = [.5, .6, .7, .8, .9]
+    prec_score_sum = [torch.FloatTensor([0]).cuda() for _ in prec_list]
+    f_score_sum = torch.FloatTensor([0]).cuda()
+    ap_det_score_sum = torch.FloatTensor([0]).cuda()
+    score_sum = torch.FloatTensor([0]).cuda()
+    score_cnt = torch.FloatTensor([0]).cuda()
+    cum_I_sum = torch.FloatTensor([0]).cuda()
+    cum_U_sum = torch.FloatTensor([0]).cuda()
+    for sample in progress:
+        if "net_input" not in sample:
+            continue
+        sample = utils.move_to_cuda(sample) if use_cuda else sample
+        sample = utils.apply_to_sample(apply_half, sample) if cfg.common.fp16 else sample
+        with torch.no_grad():
+            result, scores, f_scores, ap_scores, cum_I, cum_U = eval_step(task, generator, models, sample, **kwargs)
+        results += result
+        for prec_score, prec in zip(prec_score_sum, prec_list):
+            prec_score += sum(scores >= prec) if scores is not None else 0
+        cum_I_sum += sum(cum_I) if scores is not None else 0
+        cum_U_sum += sum(cum_U) if scores is not None else 0
+        score_sum += sum(scores) if scores is not None else 0
+        f_score_sum += sum(f_scores) if scores is not None else 0
+        ap_det_score_sum += sum(ap_scores) if scores is not None else 0
+        score_cnt += len(scores) if scores is not None else 0
+        progress.log({"sentences": sample["nsentences"]})
+
+    merge_results(task, cfg, logger, score_cnt, score_sum, f_score_sum, ap_det_score_sum,prec_score_sum, cum_I_sum, cum_U_sum, results)
+
+
+def cli_main():
+    parser = options.get_generation_parser()
+    parser.add_argument("--ema-eval", action='store_true', help="Use EMA weights to make evaluation.")
+    parser.add_argument("--beam-search-vqa-eval", action='store_true', help="Use beam search for vqa evaluation (faster inference speed but sub-optimal result), if not specified, we compute scores for each answer in the candidate set, which is slower but can obtain best result.")
+    parser.add_argument("--zero-shot", action='store_true')
+    parser.add_argument("--vis_dir", type=str, default=None)
+    parser.add_argument("--result_dir", type=str, default=None)
+    parser.add_argument("--vis", action='store_true', default=False)
+    args = options.parse_args_and_arch(parser)
+    cfg = convert_namespace_to_omegaconf(args)
+    if args.result_dir is None:
+        args.result_dir = args.vis_dir
+    distributed_utils.call_main(
+        cfg, main, ema_eval=args.ema_eval, beam_search_vqa_eval=args.beam_search_vqa_eval, zero_shot=args.zero_shot,
+        vis_dir=args.vis_dir, vis=args.vis, result_dir=args.result_dir
+    )
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/.github/ISSUE_TEMPLATE.md b/fairseq/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000000000000000000000000000000000000..5c4c4493e4a8e5386b927e4f4554df925955d129
--- /dev/null
+++ b/fairseq/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,3 @@
+## 👉 [Please follow one of these issue templates](https://github.com/pytorch/fairseq/issues/new/choose) 👈
+
+Note: to keep the backlog clean and actionable, issues may be immediately closed if they do not follow one of the above issue templates.
diff --git a/fairseq/.github/ISSUE_TEMPLATE/bug_report.md b/fairseq/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000000000000000000000000000000000..aa15123d8ef25c2de745572563505cf0ddc4e351
--- /dev/null
+++ b/fairseq/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,43 @@
+---
+name: 🐛 Bug Report
+about: Submit a bug report to help us improve
+labels: 'bug, needs triage'
+---
+
+## 🐛 Bug
+
+<!-- A clear and concise description of what the bug is. -->
+
+### To Reproduce
+
+Steps to reproduce the behavior (**always include the command you ran**):
+
+1. Run cmd '....'
+2. See error
+
+<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
+
+
+#### Code sample
+<!-- Ideally attach a minimal code sample to reproduce the decried issue.
+Minimal means having the shortest code but still preserving the bug. -->
+
+### Expected behavior
+
+<!-- A clear and concise description of what you expected to happen. -->
+
+### Environment
+
+ - fairseq Version (e.g., 1.0 or main):
+ - PyTorch Version (e.g., 1.0)
+ - OS (e.g., Linux):
+ - How you installed fairseq (`pip`, source):
+ - Build command you used (if compiling from source):
+ - Python version:
+ - CUDA/cuDNN version:
+ - GPU models and configuration:
+ - Any other relevant information:
+
+### Additional context
+
+<!-- Add any other context about the problem here. -->
diff --git a/fairseq/.github/ISSUE_TEMPLATE/documentation.md b/fairseq/.github/ISSUE_TEMPLATE/documentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a6e2e9ea4bb71102122c17ff53051eb3770cb5e
--- /dev/null
+++ b/fairseq/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,15 @@
+---
+name: 📚 Documentation/Typos
+about: Report an issue related to documentation or a typo
+labels: 'documentation, needs triage'
+---
+
+## 📚 Documentation
+
+For typos and doc fixes, please go ahead and:
+
+1. Create an issue.
+2. Fix the typo.
+3. Submit a PR.
+
+Thanks!
diff --git a/fairseq/.github/ISSUE_TEMPLATE/feature_request.md b/fairseq/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000000000000000000000000000000000..93c8668041f8a7af29e4c11e905d8b56b946dd51
--- /dev/null
+++ b/fairseq/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,24 @@
+---
+name: 🚀 Feature Request
+about: Submit a proposal/request for a new feature
+labels: 'enhancement, help wanted, needs triage'
+---
+
+## 🚀 Feature Request
+<!-- A clear and concise description of the feature proposal -->
+
+### Motivation
+
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+
+### Pitch
+
+<!-- A clear and concise description of what you want to happen. -->
+
+### Alternatives
+
+<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
+
+### Additional context
+
+<!-- Add any other context or screenshots about the feature request here. -->
diff --git a/fairseq/.github/ISSUE_TEMPLATE/how-to-question.md b/fairseq/.github/ISSUE_TEMPLATE/how-to-question.md
new file mode 100644
index 0000000000000000000000000000000000000000..04f3f15d3ed391e26ca87f726ae88f30d1d414ab
--- /dev/null
+++ b/fairseq/.github/ISSUE_TEMPLATE/how-to-question.md
@@ -0,0 +1,33 @@
+---
+name: ❓ Questions/Help
+about: If you have questions, please first search existing issues and docs
+labels: 'question, needs triage'
+---
+
+## ❓ Questions and Help
+
+### Before asking:
+1. search the issues.
+2. search the docs.
+
+<!-- If you still can't find what you need: -->
+
+#### What is your question?
+
+#### Code
+
+<!-- Please paste a code snippet if your question requires it! -->
+
+#### What have you tried?
+
+#### What's your environment?
+
+ - fairseq Version (e.g., 1.0 or main):
+ - PyTorch Version (e.g., 1.0)
+ - OS (e.g., Linux):
+ - How you installed fairseq (`pip`, source):
+ - Build command you used (if compiling from source):
+ - Python version:
+ - CUDA/cuDNN version:
+ - GPU models and configuration:
+ - Any other relevant information:
diff --git a/fairseq/.github/PULL_REQUEST_TEMPLATE.md b/fairseq/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000000000000000000000000000000000000..d005e2df4f717ea4844a8320981d77d96e425a52
--- /dev/null
+++ b/fairseq/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,16 @@
+# Before submitting
+
+- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
+- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
+- [ ] Did you make sure to update the docs?
+- [ ] Did you write any new necessary tests?
+
+## What does this PR do?
+Fixes # (issue).
+
+## PR review
+Anyone in the community is free to review the PR once the tests have passed.
+If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
+
+## Did you have fun?
+Make sure you had fun coding 🙃
diff --git a/fairseq/.github/stale.yml b/fairseq/.github/stale.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b12867dab005e7a7608d4c7138a67d409c76f7ae
--- /dev/null
+++ b/fairseq/.github/stale.yml
@@ -0,0 +1,30 @@
+# Configuration for probot-stale - https://github.com/probot/stale
+# Mostly copied from github.com/facebook/react/blob/master/.github/stale.yml
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 90
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 7
+# Issues with these labels will never be considered stale
+exemptLabels:
+  - bug
+# Label to use when marking an issue as stale
+staleLabel: stale
+issues:
+  # Comment to post when marking an issue as stale.
+  markComment: >
+    This issue has been automatically marked as stale.
+    **If this issue is still affecting you, please leave any comment** (for example, "bump"), and we'll keep it open.
+    We are sorry that we haven't been able to prioritize it yet. If you have any new additional information, please include it with your comment!
+  # Comment to post when closing a stale issue.
+  closeComment: >
+    Closing this issue after a prolonged period of inactivity. If this issue is still present in the latest release, please create a new issue with up-to-date information. Thank you!
+pulls:
+  # Comment to post when marking a pull request as stale.
+  markComment: >
+    This pull request has been automatically marked as stale.
+    **If this pull request is still relevant, please leave any comment** (for example, "bump"), and we'll keep it open.
+    We are sorry that we haven't been able to prioritize reviewing it yet. Your contribution is very much appreciated.
+  # Comment to post when closing a stale pull request.
+  closeComment: >
+    Closing this pull request after a prolonged period of inactivity. If this issue is still present in the latest release, please ask for this pull request to be reopened. Thank you!
+
diff --git a/fairseq/.github/workflows/build.yml b/fairseq/.github/workflows/build.yml
new file mode 100644
index 0000000000000000000000000000000000000000..981b59416f176121eded2aedfc1af6ea9ee19c84
--- /dev/null
+++ b/fairseq/.github/workflows/build.yml
@@ -0,0 +1,55 @@
+name: build
+
+on:
+  # Trigger the workflow on push to main or any pull request
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  build:
+
+    strategy:
+      max-parallel: 4
+      matrix:
+        platform: [ubuntu-latest, macos-latest]
+        python-version: [3.6, 3.7]
+
+    runs-on: ${{ matrix.platform }}
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Conditionally install pytorch
+      if: matrix.platform == 'windows-latest'
+      run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
+
+    - name: Install locally
+      run: |
+        python -m pip install --upgrade pip
+        git submodule update --init --recursive
+        python setup.py build_ext --inplace
+        python -m pip install --editable .
+
+    - name: Install optional test requirements
+      run: |
+        python -m pip install iopath transformers pyarrow
+        python -m pip install git+https://github.com/facebookresearch/fairscale.git@main
+
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --extend-exclude fairseq/model_parallel/megatron
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --extend-exclude fairseq/model_parallel/megatron
+
+    - name: Run tests
+      run: |
+          python setup.py test
diff --git a/fairseq/.github/workflows/build_wheels.yml b/fairseq/.github/workflows/build_wheels.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7261708596f0c781cf670119cb63c811f9c0d50c
--- /dev/null
+++ b/fairseq/.github/workflows/build_wheels.yml
@@ -0,0 +1,41 @@
+name: build_wheels
+
+on:
+  push:
+    branches:
+      - v[0-9]+.[0-9]+.[x0-9]+
+    tags:
+      - v*
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Install Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.7'
+
+      - name: Install cibuildwheel
+        run: |
+          python -m pip install cibuildwheel
+
+      - name: Build wheels for CPython
+        run: |
+          python -m cibuildwheel --output-dir dist
+        env:
+          CIBW_BUILD: "cp36-*64 cp37-*64 cp38-*64"
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux1
+          CIBW_BEFORE_BUILD: git submodule update --init --recursive && pip install .
+
+      - uses: actions/upload-artifact@v2
+        with:
+          name: wheels
+          path: ./dist/*.whl
diff --git a/fairseq/.gitignore b/fairseq/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..4112804793c441354e6a2e6398075eea72ab6c0a
--- /dev/null
+++ b/fairseq/.gitignore
@@ -0,0 +1,136 @@
+# JetBrains PyCharm IDE
+.idea/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# macOS dir files
+.DS_Store
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Checkpoints
+checkpoints
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# Generated files
+/fairseq/temporal_convolution_tbc
+/fairseq/modules/*_layer/*_forward.cu
+/fairseq/modules/*_layer/*_backward.cu
+/fairseq/version.py
+
+# data
+data-bin/
+
+# reranking
+/examples/reranking/rerank_data
+
+# Cython-generated C++ source files
+/fairseq/data/data_utils_fast.cpp
+/fairseq/data/token_block_utils_fast.cpp
+
+# VSCODE
+.vscode/ftp-sync.json
+.vscode/settings.json
+
+# Experimental Folder
+experimental/*
+
+# Weights and Biases logs
+wandb/
diff --git a/fairseq/.gitmodules b/fairseq/.gitmodules
new file mode 100644
index 0000000000000000000000000000000000000000..07a55d45d4f0bed755dbfc1f440f214ed43d206a
--- /dev/null
+++ b/fairseq/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "fairseq/model_parallel/megatron"]
+    path = fairseq/model_parallel/megatron
+    url = https://github.com/ngoyal2707/Megatron-LM
+    branch = fairseq
diff --git a/fairseq/CODE_OF_CONDUCT.md b/fairseq/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..a0cbeaab7650bf08267fbdbc9bb54e845c88f392
--- /dev/null
+++ b/fairseq/CODE_OF_CONDUCT.md
@@ -0,0 +1,77 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <conduct@pytorch.org>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
+
diff --git a/fairseq/CONTRIBUTING.md b/fairseq/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..3930c46196b7b6082cacc76fd5808b49677ae805
--- /dev/null
+++ b/fairseq/CONTRIBUTING.md
@@ -0,0 +1,28 @@
+# Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq)
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `main`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+## License
+By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq),
+you agree that your contributions will be licensed under the LICENSE file in
+the root directory of this source tree.
diff --git a/fairseq/LICENSE b/fairseq/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..b96dcb0480a0b0be0727976e5202a1e7b23edc3f
--- /dev/null
+++ b/fairseq/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Facebook, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/fairseq/README.md b/fairseq/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dd687174808a6ff341f597eb6a4cc9a1687d74a1
--- /dev/null
+++ b/fairseq/README.md
@@ -0,0 +1,229 @@
+<p align="center">
+  <img src="docs/fairseq_logo.png" width="150">
+  <br />
+  <br />
+  <a href="https://github.com/pytorch/fairseq/blob/main/LICENSE"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue.svg" /></a>
+  <a href="https://github.com/pytorch/fairseq/releases"><img alt="Latest Release" src="https://img.shields.io/github/release/pytorch/fairseq.svg" /></a>
+  <a href="https://github.com/pytorch/fairseq/actions?query=workflow:build"><img alt="Build Status" src="https://github.com/pytorch/fairseq/workflows/build/badge.svg" /></a>
+  <a href="https://fairseq.readthedocs.io/en/latest/?badge=latest"><img alt="Documentation Status" src="https://readthedocs.org/projects/fairseq/badge/?version=latest" /></a>
+</p>
+
+--------------------------------------------------------------------------------
+
+Fairseq(-py) is a sequence modeling toolkit that allows researchers and
+developers to train custom models for translation, summarization, language
+modeling and other text generation tasks.
+
+We provide reference implementations of various sequence modeling papers:
+
+<details><summary>List of implemented papers</summary><p>
+
+* **Convolutional Neural Networks (CNN)**
+  + [Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)](examples/language_model/conv_lm/README.md)
+  + [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](examples/conv_seq2seq/README.md)
+  + [Classical Structured Prediction Losses for Sequence to Sequence Learning (Edunov et al., 2018)](https://github.com/pytorch/fairseq/tree/classic_seqlevel)
+  + [Hierarchical Neural Story Generation (Fan et al., 2018)](examples/stories/README.md)
+  + [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](examples/wav2vec/README.md)
+* **LightConv and DynamicConv models**
+  + [Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)](examples/pay_less_attention_paper/README.md)
+* **Long Short-Term Memory (LSTM) networks**
+  + Effective Approaches to Attention-based Neural Machine Translation (Luong et al., 2015)
+* **Transformer (self-attention) networks**
+  + Attention Is All You Need (Vaswani et al., 2017)
+  + [Scaling Neural Machine Translation (Ott et al., 2018)](examples/scaling_nmt/README.md)
+  + [Understanding Back-Translation at Scale (Edunov et al., 2018)](examples/backtranslation/README.md)
+  + [Adaptive Input Representations for Neural Language Modeling (Baevski and Auli, 2018)](examples/language_model/README.adaptive_inputs.md)
+  + [Lexically constrained decoding with dynamic beam allocation (Post & Vilar, 2018)](examples/constrained_decoding/README.md)
+  + [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context (Dai et al., 2019)](examples/truncated_bptt/README.md)
+  + [Adaptive Attention Span in Transformers (Sukhbaatar et al., 2019)](examples/adaptive_span/README.md)
+  + [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](examples/translation_moe/README.md)
+  + [RoBERTa: A Robustly Optimized BERT Pretraining Approach (Liu et al., 2019)](examples/roberta/README.md)
+  + [Facebook FAIR's WMT19 News Translation Task Submission (Ng et al., 2019)](examples/wmt19/README.md)
+  + [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](examples/joint_alignment_translation/README.md )
+  + [Multilingual Denoising Pre-training for Neural Machine Translation (Liu et at., 2020)](examples/mbart/README.md)
+  + [Neural Machine Translation with Byte-Level Subwords (Wang et al., 2020)](examples/byte_level_bpe/README.md)
+  + [Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)](examples/unsupervised_quality_estimation/README.md)
+  + [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](examples/wav2vec/README.md)
+  + [Generating Medical Reports from Patient-Doctor Conversations Using Sequence-to-Sequence Models (Enarvi et al., 2020)](examples/pointer_generator/README.md)
+  + [Linformer: Self-Attention with Linear Complexity (Wang et al., 2020)](examples/linformer/README.md)
+  + [Cross-lingual Retrieval for Iterative Self-Supervised Training (Tran et al., 2020)](examples/criss/README.md)
+  + [Deep Transformers with Latent Depth (Li et al., 2020)](examples/latent_depth/README.md)
+  + [Unsupervised Cross-lingual Representation Learning for Speech Recognition (Conneau et al., 2020)](https://arxiv.org/abs/2006.13979)
+  + [Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training (Hsu, et al., 2021)](https://arxiv.org/abs/2104.01027)
+  + [Unsupervised Speech Recognition (Baevski, et al., 2021)](https://arxiv.org/abs/2105.11084)
+* **Non-autoregressive Transformers**
+  + Non-Autoregressive Neural Machine Translation (Gu et al., 2017)
+  + Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement (Lee et al. 2018)
+  + Insertion Transformer: Flexible Sequence Generation via Insertion Operations (Stern et al. 2019)
+  + Mask-Predict: Parallel Decoding of Conditional Masked Language Models (Ghazvininejad et al., 2019)
+  + [Levenshtein Transformer (Gu et al., 2019)](examples/nonautoregressive_translation/README.md)
+* **Finetuning**
+  + [Better Fine-Tuning by Reducing Representational Collapse (Aghajanyan et al. 2020)](examples/rxf/README.md)
+
+</p></details>
+
+### What's New:
+
+* September 2021 [`master` branch renamed to `main`](https://github.com/github/renaming).
+* July 2021 [Released DrNMT code](examples/discriminative_reranking_nmt/README.md)
+* July 2021 [Released Robust wav2vec 2.0 model](examples/wav2vec/README.md)
+* June 2021 [Released XLMR-XL and XLMR-XXL models](examples/xlmr/README.md)
+* May 2021 [Released Unsupervised Speech Recognition code](examples/wav2vec/unsupervised/README.md)
+* March 2021 [Added full parameter and optimizer state sharding + CPU offloading](examples/fully_sharded_data_parallel/README.md)
+* February 2021 [Added LASER training code](examples/laser/README.md)
+* December 2020: [Added Adaptive Attention Span code](examples/adaptive_span/README.md)
+* December 2020: [GottBERT model and code released](examples/gottbert/README.md)
+* November 2020: Adopted the [Hydra](https://github.com/facebookresearch/hydra) configuration framework
+  * [see documentation explaining how to use it for new and existing projects](docs/hydra_integration.md)
+* November 2020: [fairseq 0.10.0 released](https://github.com/pytorch/fairseq/releases/tag/v0.10.0)
+* October 2020: [Added R3F/R4F (Better Fine-Tuning) code](examples/rxf/README.md)
+* October 2020: [Deep Transformer with Latent Depth code released](examples/latent_depth/README.md)
+* October 2020: [Added CRISS models and code](examples/criss/README.md)
+
+<details><summary>Previous updates</summary><p>
+
+* September 2020: [Added Linformer code](examples/linformer/README.md)
+* September 2020: [Added pointer-generator networks](examples/pointer_generator/README.md)
+* August 2020: [Added lexically constrained decoding](examples/constrained_decoding/README.md)
+* August 2020: [wav2vec2 models and code released](examples/wav2vec/README.md)
+* July 2020: [Unsupervised Quality Estimation code released](examples/unsupervised_quality_estimation/README.md)
+* May 2020: [Follow fairseq on Twitter](https://twitter.com/fairseq)
+* April 2020: [Monotonic Multihead Attention code released](examples/simultaneous_translation/README.md)
+* April 2020: [Quant-Noise code released](examples/quant_noise/README.md)
+* April 2020: [Initial model parallel support and 11B parameters unidirectional LM released](examples/megatron_11b/README.md)
+* March 2020: [Byte-level BPE code released](examples/byte_level_bpe/README.md)
+* February 2020: [mBART model and code released](examples/mbart/README.md)
+* February 2020: [Added tutorial for back-translation](https://github.com/pytorch/fairseq/tree/main/examples/backtranslation#training-your-own-model-wmt18-english-german)
+* December 2019: [fairseq 0.9.0 released](https://github.com/pytorch/fairseq/releases/tag/v0.9.0)
+* November 2019: [VizSeq released (a visual analysis toolkit for evaluating fairseq models)](https://facebookresearch.github.io/vizseq/docs/getting_started/fairseq_example)
+* November 2019: [CamemBERT model and code released](examples/camembert/README.md)
+* November 2019: [BART model and code released](examples/bart/README.md)
+* November 2019: [XLM-R models and code released](examples/xlmr/README.md)
+* September 2019: [Nonautoregressive translation code released](examples/nonautoregressive_translation/README.md)
+* August 2019: [WMT'19 models released](examples/wmt19/README.md)
+* July 2019: fairseq relicensed under MIT license
+* July 2019: [RoBERTa models and code released](examples/roberta/README.md)
+* June 2019: [wav2vec models and code released](examples/wav2vec/README.md)
+
+</p></details>
+
+### Features:
+
+* multi-GPU training on one machine or across multiple machines (data and model parallel)
+* fast generation on both CPU and GPU with multiple search algorithms implemented:
+  + beam search
+  + Diverse Beam Search ([Vijayakumar et al., 2016](https://arxiv.org/abs/1610.02424))
+  + sampling (unconstrained, top-k and top-p/nucleus)
+  + [lexically constrained decoding](examples/constrained_decoding/README.md) (Post & Vilar, 2018)
+* [gradient accumulation](https://fairseq.readthedocs.io/en/latest/getting_started.html#large-mini-batch-training-with-delayed-updates) enables training with large mini-batches even on a single GPU
+* [mixed precision training](https://fairseq.readthedocs.io/en/latest/getting_started.html#training-with-half-precision-floating-point-fp16) (trains faster with less GPU memory on [NVIDIA tensor cores](https://developer.nvidia.com/tensor-cores))
+* [extensible](https://fairseq.readthedocs.io/en/latest/overview.html): easily register new models, criterions, tasks, optimizers and learning rate schedulers
+* [flexible configuration](docs/hydra_integration.md) based on [Hydra](https://github.com/facebookresearch/hydra) allowing a combination of code, command-line and file based configuration
+* [full parameter and optimizer state sharding](examples/fully_sharded_data_parallel/README.md)
+* [offloading parameters to CPU](examples/fully_sharded_data_parallel/README.md)
+
+We also provide [pre-trained models for translation and language modeling](#pre-trained-models-and-examples)
+with a convenient `torch.hub` interface:
+
+``` python
+en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model')
+en2de.translate('Hello world', beam=5)
+# 'Hallo Welt'
+```
+
+See the PyTorch Hub tutorials for [translation](https://pytorch.org/hub/pytorch_fairseq_translation/)
+and [RoBERTa](https://pytorch.org/hub/pytorch_fairseq_roberta/) for more examples.
+
+# Requirements and Installation
+
+* [PyTorch](http://pytorch.org/) version >= 1.5.0
+* Python version >= 3.6
+* For training new models, you'll also need an NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
+* **To install fairseq** and develop locally:
+
+``` bash
+git clone https://github.com/pytorch/fairseq
+cd fairseq
+pip install --editable ./
+
+# on MacOS:
+# CFLAGS="-stdlib=libc++" pip install --editable ./
+
+# to install the latest stable release (0.10.x)
+# pip install fairseq
+```
+
+* **For faster training** install NVIDIA's [apex](https://github.com/NVIDIA/apex) library:
+
+``` bash
+git clone https://github.com/NVIDIA/apex
+cd apex
+pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" \
+  --global-option="--deprecated_fused_adam" --global-option="--xentropy" \
+  --global-option="--fast_multihead_attn" ./
+```
+
+* **For large datasets** install [PyArrow](https://arrow.apache.org/docs/python/install.html#using-pip): `pip install pyarrow`
+* If you use Docker make sure to increase the shared memory size either with `--ipc=host` or `--shm-size`
+ as command line options to `nvidia-docker run` .
+
+# Getting Started
+
+The [full documentation](https://fairseq.readthedocs.io/) contains instructions
+for getting started, training new models and extending fairseq with new model
+types and tasks.
+
+# Pre-trained models and examples
+
+We provide pre-trained models and pre-processed, binarized test sets for several tasks listed below,
+as well as example training and evaluation commands.
+
+* [Translation](examples/translation/README.md): convolutional and transformer models are available
+* [Language Modeling](examples/language_model/README.md): convolutional and transformer models are available
+
+We also have more detailed READMEs to reproduce results from specific papers:
+
+* [Cross-lingual Retrieval for Iterative Self-Supervised Training (Tran et al., 2020)](examples/criss/README.md)
+* [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](examples/wav2vec/README.md)
+* [Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)](examples/unsupervised_quality_estimation/README.md)
+* [Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)](examples/quant_noise/README.md)
+* [Neural Machine Translation with Byte-Level Subwords (Wang et al., 2020)](examples/byte_level_bpe/README.md)
+* [Multilingual Denoising Pre-training for Neural Machine Translation (Liu et at., 2020)](examples/mbart/README.md)
+* [Reducing Transformer Depth on Demand with Structured Dropout (Fan et al., 2019)](examples/layerdrop/README.md)
+* [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](examples/joint_alignment_translation/README.md)
+* [Levenshtein Transformer (Gu et al., 2019)](examples/nonautoregressive_translation/README.md)
+* [Facebook FAIR's WMT19 News Translation Task Submission (Ng et al., 2019)](examples/wmt19/README.md)
+* [RoBERTa: A Robustly Optimized BERT Pretraining Approach (Liu et al., 2019)](examples/roberta/README.md)
+* [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](examples/wav2vec/README.md)
+* [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](examples/translation_moe/README.md)
+* [Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)](examples/pay_less_attention_paper/README.md)
+* [Understanding Back-Translation at Scale (Edunov et al., 2018)](examples/backtranslation/README.md)
+* [Classical Structured Prediction Losses for Sequence to Sequence Learning (Edunov et al., 2018)](https://github.com/pytorch/fairseq/tree/classic_seqlevel)
+* [Hierarchical Neural Story Generation (Fan et al., 2018)](examples/stories/README.md)
+* [Scaling Neural Machine Translation (Ott et al., 2018)](examples/scaling_nmt/README.md)
+* [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](examples/conv_seq2seq/README.md)
+* [Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)](examples/language_model/README.conv.md)
+
+# Join the fairseq community
+
+* Twitter: https://twitter.com/fairseq
+* Facebook page: https://www.facebook.com/groups/fairseq.users
+* Google group: https://groups.google.com/forum/#!forum/fairseq-users
+
+# License
+
+fairseq(-py) is MIT-licensed.
+The license applies to the pre-trained models as well.
+
+# Citation
+
+Please cite as:
+
+``` bibtex
+@inproceedings{ott2019fairseq,
+  title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
+  author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},
+  booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},
+  year = {2019},
+}
+```
diff --git a/fairseq/examples/.gitignore b/fairseq/examples/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..1ef816f2cd7b4a9aa7adf8bd5635a644834738f1
--- /dev/null
+++ b/fairseq/examples/.gitignore
@@ -0,0 +1,2 @@
+!*/*.sh
+!*/*.md
diff --git a/fairseq/examples/__init__.py b/fairseq/examples/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..44bb24ae614941f23fea29c56d60167650c39bcb
--- /dev/null
+++ b/fairseq/examples/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+try:
+    from fairseq.version import __version__  # noqa
+except ImportError:
+    pass
diff --git a/fairseq/examples/adaptive_span/README.md b/fairseq/examples/adaptive_span/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d5224fb2894606a2a8027e01e224be190776ecfe
--- /dev/null
+++ b/fairseq/examples/adaptive_span/README.md
@@ -0,0 +1,90 @@
+# Adaptive Span
+
+Adaptive Span is a novel self-attention mechanism that can learn its optimal
+attention span. This allows us to extend significantly the maximum context size
+used in Transformer, while maintaining control over their memory footprint
+and computational time. It uses the Truncated BPTT technique for training,
+as in [transformerXL](https://github.com/pytorch/fairseq/blob/main/examples/truncated_bptt/README.md).
+
+Adaptive Span was introduced by paper:
+[Adaptive Attention Span in Transformers](https://arxiv.org/abs/1905.07799),
+which achieved state-of-the-art language modeling results at the time of publication.
+
+We manage to reproduce their result in fairseq and keep most of the
+[original implementation](https://github.com/facebookresearch/adaptive-span) untouched.
+You can refer to the their sweep file as well if any combination of hyperparameter is not clear.
+
+##### 0. Setup
+
+First you need to process the Enwik8 dataset, we use the pre-tokenized dataset
+from [adaptive span paper](https://github.com/facebookresearch/adaptive-span/blob/master/get_data.sh).
+You can download the dataset, and then run:
+```bash
+fairseq-preprocess --only-source --trainpref ~/data/enwik8/train.txt \
+    --validpref ~/data/enwik8/valid.txt --testpref ~/data/enwik8/test.txt \
+    --destdir ~/data/enwik8/data-bin/ --joined-dictionary --workers 20
+```
+
+##### 1. Train a Adaptive Span model on Enwik8
+
+We will train a 12-layer Adaptive Span model following the [hyperparameters
+used in the original
+paper](https://github.com/facebookresearch/adaptive-span/blob/master/experiments/enwik8.sh).
+
+The following command assumes 4 GPUs, so that the total batch size is 64
+sequences (4 x 16). Training should take 2-3 days on 4 V100 GPUs:
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train \
+    --user-dir examples/adaptive_span \
+    --data  ~/data/enwik8/data-bin/ \
+    --fp16 --fp16-no-flatten-grads --max-update 600000 \
+    --task truncated_bptt_lm --tokens-per-sample 512 --arch adaptive_span \
+    --n-layer 12 --d-model 512 --n-head 8 --d-inner 2048 --dropout 0.3 \
+    --attn-span 8192 --optimizer adagrad_with_grad_clip --adagrad-clip 0.03 \
+    --validate-interval-updates 1000 \
+    --lr-scheduler fixed --warmup-updates 32000 --batch-size-valid 32 \
+    --lr 0.07 --criterion adaptive_span_loss --batch-size 16 --update-freq 1 \
+    --seed 2 --log-format json --log-interval 25 --aux-loss-scaler 5e-07
+```
+This should land around 1.05 on validation, 1.03 on test. You can lower the
+--aux-loss-scaler for better performance (longer span). It gives ~0.03 bpc
+improvement to the transformerXL baseline here.
+If training on a single GPU, set `--update-freq=4` to accumulate 4x gradients
+and simulate training on 4 GPUs.
+You can also reproduce the transformerXL result on enwik8 using this code base.
+It should land around 1.06 on test,matching the [original paper](https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/run_enwik8_base.sh).
+You can try by
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train \
+    --user-dir examples/truncated_bptt \
+    ~/data/enwik8/data-bin/ \
+    --task truncated_bptt_lm  --fp16 --max-update 400000 \
+    --tokens-per-sample 512 --arch transformer_xl --n-layer 12 \
+    --d-model 512 --n-head 8 --d-head 64 --d-inner 2048 --dropout 0.1 \
+    --dropatt 0.0 --mem-len 512 --optimizer adam --clip-norm 0.25 \
+    --lr-scheduler cosine --warmup-updates 0 \
+    --lr 0.0 --lr 0.00025 --batch-size 15 \
+    --update-freq 1 --seed 2 --log-format json --log-interval 25 \
+    --fp16
+```
+
+##### 2. Evaluate
+For Adaptive Span:
+```bash
+fairseq-eval-lm ~/data/enwik8/data-bin/ --path model/checkpoint_best.pt \
+ --user-dir examples/adaptive_span \
+ --task truncated_bptt_lm --batch-size 8 --tokens-per-sample 512 --gen-subset test
+```
+For Transformer-XL evaluation:
+```bash
+fairseq-eval-lm ~/data/enwik8/data-bin/ --path model/checkpoint_best.pt \
+    --user-dir examples/truncated_bptt/ --task truncated_bptt_lm --batch-size 8 \
+    --tokens-per-sample 80 \
+    --model-overrides '{"mem_len":2100,"clamp_len":820,"same_length":True}' \
+    --gen-subset valid
+```
+
+*Note:* During training the model saw 512 tokens of context
+(``--tokens-per-sample=512``), with batch size 8. These settings match the evaluation
+settings from [the original
+paper](https://github.com/facebookresearch/adaptive-span/blob/master/experiments/enwik8.sh).
diff --git a/fairseq/examples/adaptive_span/__init__.py b/fairseq/examples/adaptive_span/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0a142a769360e1140bf814c532eaf841f1d52d8
--- /dev/null
+++ b/fairseq/examples/adaptive_span/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+# automatically import any Python files in the current directory
+cur_dir = os.path.dirname(__file__)
+for file in os.listdir(cur_dir):
+    path = os.path.join(cur_dir, file)
+    if (
+        not file.startswith("_")
+        and not file.startswith(".")
+        and (file.endswith(".py") or os.path.isdir(path))
+    ):
+        mod_name = file[: file.find(".py")] if file.endswith(".py") else file
+        module = importlib.import_module(__name__ + "." + mod_name)
diff --git a/fairseq/examples/adaptive_span/adagrad_with_grad_clip.py b/fairseq/examples/adaptive_span/adagrad_with_grad_clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..585ce184ab2d6bbde0d2f7fcafd6536fa8f6d8b6
--- /dev/null
+++ b/fairseq/examples/adaptive_span/adagrad_with_grad_clip.py
@@ -0,0 +1,128 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch.optim import Adagrad
+
+from fairseq.optim import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("adagrad_with_grad_clip")
+class FairseqAdagradWithGradClip(LegacyFairseqOptimizer):
+    def __init__(self, args, params):
+        super().__init__(args)
+        self._optimizer = AdagradWithGradClip(params, **self.optimizer_config)
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        parser.add_argument('--adagrad-clip', default=0.0, type=float, metavar='D',
+                            help='internal grad clip')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "weight_decay": self.args.weight_decay,
+            "grad_clip": self.args.adagrad_clip,
+        }
+
+    @property
+    def supports_flat_params(self):
+        return False
+
+
+def _clip_grad(clr, grad, group_grad_clip):
+    if group_grad_clip > 0:
+        norm = grad.norm(2).item()
+        if norm > group_grad_clip:
+            clr *= group_grad_clip / (norm + 1e-10)
+    return clr
+
+
+class AdagradWithGradClip(Adagrad):
+    """Adagrad algorithm with custom gradient clipping"""
+
+    def __init__(
+        self,
+        params,
+        lr=1e-2,
+        lr_decay=0,
+        weight_decay=0,
+        initial_accumulator_value=0,
+        grad_clip=0,
+    ):
+        Adagrad.__init__(
+            self,
+            params,
+            lr=lr,
+            lr_decay=lr_decay,
+            weight_decay=weight_decay,
+            initial_accumulator_value=initial_accumulator_value,
+        )
+        self.defaults["grad_clip"] = grad_clip
+        self.param_groups[0].setdefault("grad_clip", grad_clip)
+
+    def step(self, closure=None):
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+
+                grad = p.grad.data
+                state = self.state[p]
+
+                state["step"] += 1
+
+                if group["weight_decay"] != 0:
+                    if p.grad.data.is_sparse:
+                        raise RuntimeError(
+                            "weight_decay option is "
+                            "not compatible with sparse "
+                            "gradients"
+                        )
+                    grad = grad.add(group["weight_decay"], p.data)
+
+                clr = group["lr"] / (1 + (state["step"] - 1) * group["lr_decay"])
+
+                # clip
+                clr = _clip_grad(clr=clr, grad=grad, group_grad_clip=group["grad_clip"])
+
+                if grad.is_sparse:
+                    # the update is non-linear so indices must be unique
+                    grad = grad.coalesce()
+                    grad_indices = grad._indices()
+                    grad_values = grad._values()
+                    size = grad.size()
+
+                    def make_sparse(values):
+                        constructor = grad.new
+                        if grad_indices.dim() == 0 or values.dim() == 0:
+                            return constructor().resize_as_(grad)
+                        return constructor(grad_indices, values, size)
+
+                    state["sum"].add_(make_sparse(grad_values.pow(2)))
+                    std = state["sum"]._sparse_mask(grad)
+                    std_values = std._values().sqrt_().add_(1e-10)
+                    p.data.add_(-clr, make_sparse(grad_values / std_values))
+                else:
+                    state["sum"].addcmul_(1, grad, grad)
+                    std = state["sum"].sqrt().add_(1e-10)
+                    p.data.addcdiv_(-clr, grad, std)
+
+        return loss
diff --git a/fairseq/examples/adaptive_span/adaptive_span_attention.py b/fairseq/examples/adaptive_span/adaptive_span_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..07f757bb8e1a8a67b1124175ee338c8735aa8d65
--- /dev/null
+++ b/fairseq/examples/adaptive_span/adaptive_span_attention.py
@@ -0,0 +1,160 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class AdaptiveMask(nn.Module):
+    """Soft masking function for adaptive size.
+    It masks out the last K values of an input. The masking value
+    goes from 1 to 0 gradually, so K can be learned with
+    back-propagation.
+    Args:
+        max_size: maximum size (i.e. input dimension)
+        ramp_size: size of the ramp going from 0 to 1
+        init_val: initial size proportion not to be masked out
+        shape: learn multiple sizes independent of each other
+    """
+
+    def __init__(self, max_size, ramp_size, init_val=0, shape=(1,)):
+        nn.Module.__init__(self)
+        self._max_size = max_size
+        self._ramp_size = ramp_size
+        self.current_val = nn.Parameter(torch.zeros(*shape) + init_val)
+        mask_template = torch.linspace(1 - max_size, 0, steps=max_size)
+        self.register_buffer("mask_template", mask_template)
+
+    def forward(self, x):
+        mask = self.mask_template.float() + self.current_val.float() * self._max_size
+        mask = mask / self._ramp_size + 1
+        mask = mask.clamp(0, 1)
+        if x.size(-1) < self._max_size:
+            # the input could have been trimmed beforehand to save computation
+            mask = mask.narrow(-1, self._max_size - x.size(-1), x.size(-1))
+        x = (x * mask).type_as(x)
+        return x
+
+    def get_current_max_size(self, include_ramp=True):
+        current_size = math.ceil(self.current_val.max().item() * self._max_size)
+        if include_ramp:
+            current_size += self._ramp_size
+        current_size = max(0, min(self._max_size, current_size))
+        return current_size
+
+    def get_current_avg_size(self, include_ramp=True):
+        current_size = math.ceil(
+            self.current_val.float().mean().item() * self._max_size
+        )
+        if include_ramp:
+            current_size += self._ramp_size
+        current_size = max(0, min(self._max_size, current_size))
+        return current_size
+
+    def clamp_param(self):
+        """this need to be called after each update"""
+        self.current_val.data.clamp_(0, 1)
+
+
+class AdaptiveSpan(nn.Module):
+    """Adaptive attention span for Transformerself.
+    This module learns an attention span length from data for each
+    self-attention head.
+    Args:
+        attn_span: maximum attention span
+        adapt_span_loss: loss coefficient for the span length
+        adapt_span_ramp: length of the masking ramp
+        adapt_span_init: initial size ratio
+        adapt_span_cache: adapt cache size to reduce memory usage
+    """
+
+    def __init__(
+        self,
+        attn_span,
+        adapt_span_ramp,
+        adapt_span_init,
+        n_head,
+        adapt_span_layer,
+        **kargs
+    ):
+        nn.Module.__init__(self)
+        self._max_span = attn_span
+        self._n_head = n_head
+        self._adapt_span_layer = adapt_span_layer
+        if self._adapt_span_layer:
+            self._mask = AdaptiveMask(
+                max_size=self._max_span,
+                ramp_size=adapt_span_ramp,
+                init_val=adapt_span_init,
+            )
+        else:
+            self._mask = AdaptiveMask(
+                max_size=self._max_span,
+                ramp_size=adapt_span_ramp,
+                init_val=adapt_span_init,
+                shape=(n_head, 1, 1),
+            )
+
+    def forward(self, attn, normalize=True):
+        """mask attention with the right span"""
+        # batch and head dimensions are merged together, so separate them first
+        self.clamp_param()
+        if self._adapt_span_layer:
+            attn = self._mask(attn)
+        else:
+            B = attn.size(0)  # batch size
+            M = attn.size(1)  # block size
+            attn = attn.reshape(B // self._n_head, self._n_head, M, -1)
+            attn = self._mask(attn)
+            attn = attn.view(B, M, -1)
+        return attn
+
+    def get_trim_len(self):
+        """how much of memory can be trimmed to reduce computation"""
+        L = self._max_span
+        trim_len = min(L - 1, L - self._mask.get_current_max_size())
+        # too fine granularity might be bad for the memory management
+        trim_len = math.floor(trim_len / 64) * 64
+        return trim_len
+
+    def trim_memory(self, query, key, value, key_pe):
+        """trim out unnecessary memory beforehand to reduce computation"""
+        trim_len = self.get_trim_len()
+        cache_size = key.size(1) - query.size(1)
+        trim_len_cache = trim_len - (self._max_span - cache_size)
+        if trim_len_cache > 0:
+            key = key[:, trim_len_cache:, :]
+            value = value[:, trim_len_cache:, :]
+        elif trim_len_cache < 0:
+            # cache is too short! this happens when validation resumes
+            # after a lot of updates.
+            key = F.pad(key, [0, 0, -trim_len_cache, 0])
+            value = F.pad(value, [0, 0, -trim_len_cache, 0])
+        if trim_len > 0:
+            if key_pe is not None:
+                key_pe = key_pe[:, :, trim_len:]
+        return key, value, key_pe
+
+    def get_cache_size(self):
+        """determine how long the cache should be"""
+        trim_len = self.get_trim_len()
+        # give a buffer of 64 steps since a span might increase
+        # in future updates
+        return min(self._max_span, self._max_span - trim_len + 64)
+
+    def get_loss(self):
+        """a loss term for regularizing the span length"""
+        return self._max_span * self._mask.current_val.float().mean()
+
+    def get_current_max_span(self):
+        return self._mask.get_current_max_size()
+
+    def get_current_avg_span(self):
+        return self._mask.get_current_avg_size()
+
+    def clamp_param(self):
+        self._mask.clamp_param()
diff --git a/fairseq/examples/adaptive_span/adaptive_span_loss.py b/fairseq/examples/adaptive_span/adaptive_span_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..056245807e5f8d313a8ad5be68aea4e285f4f580
--- /dev/null
+++ b/fairseq/examples/adaptive_span/adaptive_span_loss.py
@@ -0,0 +1,106 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass
+
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import register_criterion
+from fairseq.criterions.cross_entropy import CrossEntropyCriterion
+from fairseq.dataclass import FairseqDataclass
+from omegaconf import II
+
+
+@dataclass
+class AdaptiveSpanCriterionConfig(FairseqDataclass):
+    sentence_avg: bool = II("optimization.sentence_avg")
+
+
+@register_criterion("adaptive_span_loss", dataclass=AdaptiveSpanCriterionConfig)
+class AdaptiveSpanCriterion(CrossEntropyCriterion):
+    def __init__(self, task, sentence_avg):
+        super().__init__(task, sentence_avg)
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss here is summed, different from the adaptive span code
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(**sample["net_input"])
+        loss, aux_loss, avg_span, max_span = self.compute_loss(
+            model, net_output, sample, reduce=reduce
+        )
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+        loss /= sample_size
+        total_loss = loss + aux_loss
+        sample_size = 1
+
+        logging_output = {
+            "loss": loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+            "total_loss": total_loss.data,
+            "avg_span": avg_span * sample_size,
+            "max_span": max_span * sample_size,
+        }
+        return total_loss, sample_size, logging_output
+
+    def compute_loss(self, model, net_output, sample, reduce=True):
+        loss, _ = super().compute_loss(model, net_output, sample, reduce)
+        aux_loss = model.get_aux_loss()
+        avg_span = model.get_current_avg_span()
+        max_span = model.get_current_max_span()
+        return loss, aux_loss, avg_span, max_span
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        total_loss_sum = sum(log.get("total_loss", 0) for log in logging_outputs)
+        avg_span_sum = sum(log.get("avg_span", 0) for log in logging_outputs)
+        max_span_sum = sum(log.get("max_span", 0) for log in logging_outputs)
+
+        # we divide by log(2) to convert the loss from base e to base 2
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar("avg_span", avg_span_sum / sample_size, sample_size, round=3)
+        metrics.log_scalar("max_span", max_span_sum / sample_size, sample_size, round=3)
+        # total loss contains the L1 norm on adaptive-span
+        metrics.log_scalar(
+            "total_loss",
+            total_loss_sum / sample_size / math.log(2),
+            sample_size,
+            round=3,
+        )
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+            )
+        else:
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/examples/adaptive_span/adaptive_span_model.py b/fairseq/examples/adaptive_span/adaptive_span_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..d96c95b85dbcf29e9384cc6d8d9630d2489991b2
--- /dev/null
+++ b/fairseq/examples/adaptive_span/adaptive_span_model.py
@@ -0,0 +1,263 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from fairseq.modules.layer_norm import LayerNorm
+
+from .adaptive_span_attention import AdaptiveSpan
+
+# Size notations:
+# B = batch_size, H = d_model, M = block_size, L = attn_span
+
+
+def _skew(X, pad_value):
+    """shift every row 1 step to right"""
+    # X = B x M x L
+    B, M, L = X.size()
+    X = F.pad(X, (0, M + 1), value=pad_value)  # B x M x (L+M+1)
+    X = X.view(B, -1)  # B x ML+MM+M
+    X = X[:, :-M]  # B x ML+MM
+    X = X.view(B, M, M + L)  # B x M x L+M
+    return X
+
+
+def _unskew(X):
+    """reverse _skew operation"""
+    # X = B x M x L+M
+    B, M, L = X.size()
+    L -= M
+    X = X.view(B, -1)  # B x ML+MM
+    X = F.pad(X, (0, M))  # B x ML+MM+M
+    X = X.view(B, M, M + L + 1)  # B x M x L+M+1
+    X = X[:, :, :L]  # B x M x L
+    return X
+
+
+class SeqAttention(nn.Module):
+    """Sequential self-attention layer.
+    Each token will attend to its previous fixed number of steps.
+    Note that attention doesn't include the current step itself.
+    """
+
+    def __init__(self, d_model, n_head, attn_span, dropout, adapt_span_layer, **kargs):
+        nn.Module.__init__(self)
+        self.dropout = nn.Dropout(dropout)
+        self.d_model = d_model  # size of a single head
+        self.attn_span = attn_span
+        self.adaptive_span = AdaptiveSpan(
+            attn_span=attn_span,
+            n_head=n_head,
+            adapt_span_layer=adapt_span_layer,
+            **kargs
+        )
+
+    def forward(self, query, key, value, key_pe):
+        # query size = B x M x H
+        # key, value sizes = B x (M+L) x H
+
+        key, value, key_pe = self.adaptive_span.trim_memory(query, key, value, key_pe)
+
+        # compute attention from context
+        # B x M (dest) x (M+L) (src)
+        attn_cont = torch.matmul(query, key.transpose(-1, -2))
+        attn_cont = _unskew(attn_cont)  # B x M x L
+
+        # compute the effect of position embedding
+        attn_pos = torch.matmul(query, key_pe)  # B x M x L_pos
+        attn = attn_cont + attn_pos
+
+        attn = attn / math.sqrt(self.d_model)  # B x M X L_pos
+
+        attn = F.softmax(attn.float(), dim=-1).type_as(attn)
+
+        # trim attention lengths according to the learned span
+        attn = self.adaptive_span(attn)
+
+        attn = self.dropout(attn)  # B x M X L_pos
+
+        attn_cont = _skew(attn, 0)  # B x M X (L+M)
+        out = torch.matmul(attn_cont, value)  # B x M x H
+        return out
+
+    def get_cache_size(self):
+        return self.adaptive_span.get_cache_size()
+
+
+class MultiHeadSeqAttention(nn.Module):
+    def __init__(self, d_model, n_head, **kargs):
+        nn.Module.__init__(self)
+        assert d_model % n_head == 0
+        self.n_head = n_head
+        self.head_dim = d_model // n_head
+        self.attn = SeqAttention(d_model=self.head_dim, n_head=n_head, **kargs)
+        self.proj_query = nn.Linear(d_model, d_model, bias=False)
+        nn.init.xavier_normal_(self.proj_query.weight)
+        self.proj_out = nn.Linear(d_model, d_model, bias=False)
+        nn.init.xavier_normal_(self.proj_out.weight)
+        self.proj_val = nn.Linear(d_model, d_model, bias=False)
+        nn.init.xavier_normal_(self.proj_val.weight)
+        self.proj_key = nn.Linear(d_model, d_model, bias=False)
+        nn.init.xavier_normal_(self.proj_key.weight)
+
+    def head_reshape(self, x):
+        K = self.n_head
+        D = self.head_dim
+        x = x.view(x.size()[:-1] + (K, D))  # B x (M+L) x K x D
+        x = x.transpose(1, 2).contiguous()  # B x K x (M+L) x D
+        x = x.view(-1, x.size(-2), x.size(-1))  # B_K x (M+L) x D
+        return x
+
+    def forward(self, query, key, value, key_pe):
+        B = query.size(0)
+        K = self.n_head
+        D = self.head_dim
+        M = query.size(1)
+
+        query = self.proj_query(query)
+        query = self.head_reshape(query)
+        value = self.proj_val(value)
+        value = self.head_reshape(value)
+        key = self.proj_key(key)
+        key = self.head_reshape(key)
+
+        out = self.attn(query, key, value, key_pe)  # B_K x M x D
+        out = out.view(B, K, M, D)  # B x K x M x D
+        out = out.transpose(1, 2).contiguous()  # B x M x K x D
+        out = out.view(B, M, -1)  # B x M x K_D
+        out = self.proj_out(out)
+        return out
+
+
+class FeedForwardLayer(nn.Module):
+    def __init__(self, d_model, d_inner, dropout, **kargs):
+        nn.Module.__init__(self)
+        self.fc1 = nn.Linear(d_model, d_inner)
+        self.fc2 = nn.Linear(d_inner, d_model)
+        nn.init.xavier_uniform_(self.fc1.weight)
+        nn.init.xavier_uniform_(self.fc2.weight)
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, h):
+        h1 = F.relu(self.fc1(h))
+        h1 = self.dropout(h1)
+        h2 = self.fc2(h1)
+        return h2
+
+
+class TransformerSeqLayer(nn.Module):
+    def __init__(self, d_model, **kargs):
+        nn.Module.__init__(self)
+        self.attn = MultiHeadSeqAttention(d_model=d_model, **kargs)
+        self.norm1 = LayerNorm(d_model)
+        self.ff = FeedForwardLayer(d_model=d_model, **kargs)
+        self.norm2 = LayerNorm(d_model)
+
+    def forward(self, h, h_cache, key_pe):
+        # h = B x M x H
+        # h_cache = B x L x H
+        h_all = torch.cat([h_cache, h], dim=1)  # B x (M+L) x H
+        attn_out = self.attn(h, h_all, h_all, key_pe)
+        h = self.norm1(h + attn_out)  # B x M x H
+        if self.ff is not None:
+            ff_out = self.ff(h)
+            out = self.norm2(h + ff_out)  # B x M x H
+        else:
+            out = h
+        return out
+
+    def get_cache_size(self):
+        return self.attn.attn.get_cache_size()
+
+
+class TransformerSeq(nn.Module):
+    def __init__(
+        self,
+        vocab_size,
+        d_model,
+        n_head,
+        n_layer,
+        attn_span,
+        emb_dropout,
+        aux_loss_scaler,
+        adapt_span_layer,
+        **kargs
+    ):
+        nn.Module.__init__(self)
+        # token embeddings
+        self.in_emb = nn.Embedding(vocab_size, d_model)
+        nn.init.normal_(self.in_emb.weight, mean=0, std=d_model ** -0.5)
+        self.out_emb = nn.Linear(d_model, vocab_size)
+        self.aux_loss_scaler = aux_loss_scaler
+        if emb_dropout > 0:
+            self.emb_dropout = nn.Dropout(emb_dropout)
+        else:
+            self.emb_dropout = None
+        # position embeddings
+        self.key_pe = nn.Parameter(torch.randn(1, d_model // n_head, attn_span))
+
+        self.layers = nn.ModuleList()
+        self.layers.extend(
+            TransformerSeqLayer(
+                d_model=d_model,
+                n_head=n_head,
+                attn_span=attn_span,
+                adapt_span_layer=adapt_span_layer,
+                **kargs
+            )
+            for _ in range(n_layer)
+        )
+
+    def forward(self, x, h_cache, target=None):
+        # x size = B x M
+        block_size = x.size(1)
+        h = self.in_emb(x)  # B x M x H
+        if self.emb_dropout is not None:
+            h = self.emb_dropout(h)
+
+        h_cache_next = []
+        for l, layer in enumerate(self.layers):
+            cache_size = layer.attn.attn.get_cache_size()
+            if cache_size > block_size:
+                h_cache_next_l = torch.cat(
+                    [h_cache[l][:, -cache_size + block_size :, :], h], dim=1
+                ).detach()
+            else:
+                h_cache_next_l = h[:, -cache_size:, :].detach()
+            h_cache_next.append(h_cache_next_l)
+            h = layer(h, h_cache[l], self.key_pe)  # B x M x H
+
+        if self.emb_dropout is not None:
+            h = self.emb_dropout(h)
+
+        out = F.log_softmax(self.out_emb(h).float(), dim=-1).type_as(h)
+        dummy_loss = None
+
+        return out, h_cache_next, dummy_loss
+
+    def get_aux_loss(self):
+        loss = 0.0
+        for layer in self.layers:
+            loss += layer.attn.attn.adaptive_span.get_loss()
+        return self.aux_loss_scaler * loss
+
+    def get_current_max_span(self):
+        max_span = 0.0
+        for layer in self.layers:
+            max_span = max(
+                max_span, layer.attn.attn.adaptive_span.get_current_max_span()
+            )
+        return max_span
+
+    def get_current_avg_span(self):
+        avg_span = 0.0
+        for layer in self.layers:
+            avg_span += layer.attn.attn.adaptive_span.get_current_avg_span()
+        return avg_span / len(self.layers)
diff --git a/fairseq/examples/adaptive_span/adaptive_span_model_wrapper.py b/fairseq/examples/adaptive_span/adaptive_span_model_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b147fe11f9d730438d036321a2d4a5d776efaa2
--- /dev/null
+++ b/fairseq/examples/adaptive_span/adaptive_span_model_wrapper.py
@@ -0,0 +1,145 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+import torch
+from fairseq.dataclass import FairseqDataclass
+from fairseq.models import (
+    FairseqIncrementalDecoder,
+    FairseqLanguageModel,
+    register_model,
+)
+from .adaptive_span_model import TransformerSeq as AdaptiveSpanTransformerModel
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class AdaptiveSpanSmallConfig(FairseqDataclass):
+    # defaults come from https://github.com/facebookresearch/adaptive-span/blob/master/experiments/enwik8_small.sh
+    vocab_size: int = 50
+    d_model: int = 256
+    n_head: int = 4
+    d_inner: int = 1024
+    n_layer: int = 8
+    attn_span: int = 1024
+    dropout: float = 0.0
+    emb_dropout: float = 0.0
+    adapt_span_ramp: int = 32
+    adapt_span_init: float = 0.0
+    aux_loss_scaler: float = 0.000002
+    adapt_span_layer: bool = False
+
+
+@register_model("adaptive_span", dataclass=AdaptiveSpanSmallConfig)
+class AdaptiveSpanTransformer(FairseqLanguageModel):
+    @classmethod
+    def build_model(cls, cfg: AdaptiveSpanSmallConfig, task):
+        return cls(AdaptiveSpanDecoder(cfg, task))
+
+    def get_aux_loss(self):
+        return self.decoder.get_aux_loss()
+
+    def get_current_max_span(self):
+        return self.decoder.get_current_max_span()
+
+    def get_current_avg_span(self):
+        return self.decoder.get_current_avg_span()
+
+
+class AdaptiveSpanDecoder(FairseqIncrementalDecoder):
+    def __init__(self, cfg, task):
+
+        super().__init__(task.target_dictionary)
+
+        self.config = cfg
+        config = AdaptiveSpanSmallConfig(
+            vocab_size=len(task.target_dictionary),
+            d_model=cfg.d_model,
+            n_head=cfg.n_head,
+            d_inner=cfg.d_inner,
+            n_layer=cfg.n_layer,
+            attn_span=cfg.attn_span,
+            dropout=cfg.dropout,
+            emb_dropout=cfg.emb_dropout,
+            adapt_span_ramp=cfg.adapt_span_ramp,
+            adapt_span_init=cfg.adapt_span_init,
+            aux_loss_scaler=cfg.aux_loss_scaler,
+            adapt_span_layer=cfg.adapt_span_layer,
+        )
+        logger.info(config)
+        self.model = AdaptiveSpanTransformerModel(**config.__dict__)
+
+        self._mems = None
+
+    def forward(
+        self,
+        src_tokens,
+        incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None,
+        encoder_out=None,
+    ):
+        bsz = src_tokens.size(0)
+        if incremental_state is not None:  # used during inference
+            mems = self.get_incremental_state("mems")
+            src_tokens = src_tokens[:, -1:]  # only keep the most recent token
+        else:
+            mems = self._mems
+
+        if mems is None:
+            # first time init
+            mems = self.init_hid_cache(bsz)
+        output = self.model(x=src_tokens, h_cache=mems,)
+        if incremental_state is not None:
+            self.set_incremental_state(incremental_state, "mems", output[1])
+        else:
+            self._mems = output[1]
+        return (output[0],)
+
+    def max_positions(self):
+        return self.config.attn_span
+
+    def init_hid_cache(self, batch_sz):
+        hid = []
+        for layer in self.model.layers:
+            param = next(self.model.parameters())
+            h = torch.zeros(
+                batch_sz,
+                layer.get_cache_size(),
+                self.config.d_model,
+                dtype=param.dtype,
+                device=param.device,
+            )
+            hid.append(h)
+        return hid
+
+    def get_aux_loss(self):
+        return self.model.get_aux_loss()
+
+    def get_current_max_span(self):
+        return self.model.get_current_max_span()
+
+    def get_current_avg_span(self):
+        return self.model.get_current_avg_span()
+
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[torch.Tensor]]],
+        new_order: torch.Tensor,
+    ):
+        """Reorder incremental state.
+
+        This will be called when the order of the input has changed from the
+        previous time step. A typical use case is beam search, where the input
+        order changes between time steps based on the selection of beams.
+        """
+        raise NotImplementedError("This is required for generation/beam search")
+        # mems = self.get_incremental_state(incremental_state, "mems")
+        # if mems is not None:
+        #     new_mems = [mems_i.index_select(1, new_order) for mems_i in mems]
+        #     self.set_incremental_state(incremental_state, "mems", new_mems)
diff --git a/fairseq/examples/adaptive_span/truncated_bptt_lm_task.py b/fairseq/examples/adaptive_span/truncated_bptt_lm_task.py
new file mode 120000
index 0000000000000000000000000000000000000000..a92da3a298e21528b7007df3f8198bb3af94a485
--- /dev/null
+++ b/fairseq/examples/adaptive_span/truncated_bptt_lm_task.py
@@ -0,0 +1 @@
+../truncated_bptt/truncated_bptt_lm_task.py
\ No newline at end of file
diff --git a/fairseq/examples/backtranslation/README.md b/fairseq/examples/backtranslation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..73675f1125d80f58aa824db67d8970504d4d6b2a
--- /dev/null
+++ b/fairseq/examples/backtranslation/README.md
@@ -0,0 +1,297 @@
+# Understanding Back-Translation at Scale (Edunov et al., 2018)
+
+This page includes pre-trained models from the paper [Understanding Back-Translation at Scale (Edunov et al., 2018)](https://arxiv.org/abs/1808.09381).
+
+## Pre-trained models
+
+Model | Description | Dataset | Download
+---|---|---|---
+`transformer.wmt18.en-de` | Transformer <br> ([Edunov et al., 2018](https://arxiv.org/abs/1808.09381)) <br> WMT'18 winner | [WMT'18 English-German](http://www.statmt.org/wmt18/translation-task.html) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz) <br> See NOTE in the archive
+
+## Example usage (torch.hub)
+
+We require a few additional Python dependencies for preprocessing:
+```bash
+pip install subword_nmt sacremoses
+```
+
+Then to generate translations from the full model ensemble:
+```python
+import torch
+
+# List available models
+torch.hub.list('pytorch/fairseq')  # [..., 'transformer.wmt18.en-de', ... ]
+
+# Load the WMT'18 En-De ensemble
+en2de_ensemble = torch.hub.load(
+    'pytorch/fairseq', 'transformer.wmt18.en-de',
+    checkpoint_file='wmt18.model1.pt:wmt18.model2.pt:wmt18.model3.pt:wmt18.model4.pt:wmt18.model5.pt',
+    tokenizer='moses', bpe='subword_nmt')
+
+# The ensemble contains 5 models
+len(en2de_ensemble.models)
+# 5
+
+# Translate
+en2de_ensemble.translate('Hello world!')
+# 'Hallo Welt!'
+```
+
+## Training your own model (WMT'18 English-German)
+
+The following instructions can be adapted to reproduce the models from the paper.
+
+
+#### Step 1. Prepare parallel data and optionally train a baseline (English-German) model
+
+First download and preprocess the data:
+```bash
+# Download and prepare the data
+cd examples/backtranslation/
+bash prepare-wmt18en2de.sh
+cd ../..
+
+# Binarize the data
+TEXT=examples/backtranslation/wmt18_en_de
+fairseq-preprocess \
+    --joined-dictionary \
+    --source-lang en --target-lang de \
+    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
+    --destdir data-bin/wmt18_en_de --thresholdtgt 0 --thresholdsrc 0 \
+    --workers 20
+
+# Copy the BPE code into the data-bin directory for future use
+cp examples/backtranslation/wmt18_en_de/code data-bin/wmt18_en_de/code
+```
+
+(Optionally) Train a baseline model (English-German) using just the parallel data:
+```bash
+CHECKPOINT_DIR=checkpoints_en_de_parallel
+fairseq-train --fp16 \
+    data-bin/wmt18_en_de \
+    --source-lang en --target-lang de \
+    --arch transformer_wmt_en_de_big --share-all-embeddings \
+    --dropout 0.3 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
+    --lr 0.001 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+    --max-tokens 3584 --update-freq 16 \
+    --max-update 30000 \
+    --save-dir $CHECKPOINT_DIR
+# Note: the above command assumes 8 GPUs. Adjust `--update-freq` if you have a
+# different number of GPUs.
+```
+
+Average the last 10 checkpoints:
+```bash
+python scripts/average_checkpoints.py \
+    --inputs $CHECKPOINT_DIR \
+    --num-epoch-checkpoints 10 \
+    --output $CHECKPOINT_DIR/checkpoint.avg10.pt
+```
+
+Evaluate BLEU:
+```bash
+# tokenized BLEU on newstest2017:
+bash examples/backtranslation/tokenized_bleu.sh \
+    wmt17 \
+    en-de \
+    data-bin/wmt18_en_de \
+    data-bin/wmt18_en_de/code \
+    $CHECKPOINT_DIR/checkpoint.avg10.pt
+# BLEU4 = 29.57, 60.9/35.4/22.9/15.5 (BP=1.000, ratio=1.014, syslen=63049, reflen=62152)
+# compare to 29.46 in Table 1, which is also for tokenized BLEU
+
+# generally it's better to report (detokenized) sacrebleu though:
+bash examples/backtranslation/sacrebleu.sh \
+    wmt17 \
+    en-de \
+    data-bin/wmt18_en_de \
+    data-bin/wmt18_en_de/code \
+    $CHECKPOINT_DIR/checkpoint.avg10.pt
+# BLEU+case.mixed+lang.en-de+numrefs.1+smooth.exp+test.wmt17+tok.13a+version.1.4.3 = 29.0 60.6/34.7/22.4/14.9 (BP = 1.000 ratio = 1.013 hyp_len = 62099 ref_len = 61287)
+```
+
+
+#### Step 2. Back-translate monolingual German data
+
+Train a reverse model (German-English) to do the back-translation:
+```bash
+CHECKPOINT_DIR=checkpoints_de_en_parallel
+fairseq-train --fp16 \
+    data-bin/wmt18_en_de \
+    --source-lang de --target-lang en \
+    --arch transformer_wmt_en_de_big --share-all-embeddings \
+    --dropout 0.3 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
+    --lr 0.001 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+    --max-tokens 3584 --update-freq 16 \
+    --max-update 30000 \
+    --save-dir $CHECKPOINT_DIR
+# Note: the above command assumes 8 GPUs. Adjust `--update-freq` if you have a
+# different number of GPUs.
+```
+
+Let's evaluate the back-translation (BT) model to make sure it is well trained:
+```bash
+bash examples/backtranslation/sacrebleu.sh \
+    wmt17 \
+    de-en \
+    data-bin/wmt18_en_de \
+    data-bin/wmt18_en_de/code \
+    $CHECKPOINT_DIR/checkpoint_best.py
+# BLEU+case.mixed+lang.de-en+numrefs.1+smooth.exp+test.wmt17+tok.13a+version.1.4.3 = 34.9 66.9/41.8/28.5/19.9 (BP = 0.983 ratio = 0.984 hyp_len = 63342 ref_len = 64399)
+# compare to the best system from WMT'17 which scored 35.1: http://matrix.statmt.org/matrix/systems_list/1868
+```
+
+Next prepare the monolingual data:
+```bash
+# Download and prepare the monolingual data
+# By default the script samples 25M monolingual sentences, which after
+# deduplication should be just over 24M sentences. These are split into 25
+# shards, each with 1M sentences (except for the last shard).
+cd examples/backtranslation/
+bash prepare-de-monolingual.sh
+cd ../..
+
+# Binarize each shard of the monolingual data
+TEXT=examples/backtranslation/wmt18_de_mono
+for SHARD in $(seq -f "%02g" 0 24); do \
+    fairseq-preprocess \
+        --only-source \
+        --source-lang de --target-lang en \
+        --joined-dictionary \
+        --srcdict data-bin/wmt18_en_de/dict.de.txt \
+        --testpref $TEXT/bpe.monolingual.dedup.${SHARD} \
+        --destdir data-bin/wmt18_de_mono/shard${SHARD} \
+        --workers 20; \
+    cp data-bin/wmt18_en_de/dict.en.txt data-bin/wmt18_de_mono/shard${SHARD}/; \
+done
+```
+
+Now we're ready to perform back-translation over the monolingual data. The
+following command generates via sampling, but it's possible to use greedy
+decoding (`--beam 1`), beam search (`--beam 5`),
+top-k sampling (`--sampling --beam 1 --sampling-topk 10`), etc.:
+```bash
+mkdir backtranslation_output
+for SHARD in $(seq -f "%02g" 0 24); do \
+    fairseq-generate --fp16 \
+        data-bin/wmt18_de_mono/shard${SHARD} \
+        --path $CHECKPOINT_DIR/checkpoint_best.pt \
+        --skip-invalid-size-inputs-valid-test \
+        --max-tokens 4096 \
+        --sampling --beam 1 \
+    > backtranslation_output/sampling.shard${SHARD}.out; \
+done
+```
+
+After BT, use the `extract_bt_data.py` script to re-combine the shards, extract
+the back-translations and apply length ratio filters:
+```bash
+python examples/backtranslation/extract_bt_data.py \
+    --minlen 1 --maxlen 250 --ratio 1.5 \
+    --output backtranslation_output/bt_data --srclang en --tgtlang de \
+    backtranslation_output/sampling.shard*.out
+
+# Ensure lengths are the same:
+# wc -l backtranslation_output/bt_data.{en,de}
+#   21795614 backtranslation_output/bt_data.en
+#   21795614 backtranslation_output/bt_data.de
+#   43591228 total
+```
+
+Binarize the filtered BT data and combine it with the parallel data:
+```bash
+TEXT=backtranslation_output
+fairseq-preprocess \
+    --source-lang en --target-lang de \
+    --joined-dictionary \
+    --srcdict data-bin/wmt18_en_de/dict.en.txt \
+    --trainpref $TEXT/bt_data \
+    --destdir data-bin/wmt18_en_de_bt \
+    --workers 20
+
+# We want to train on the combined data, so we'll symlink the parallel + BT data
+# in the wmt18_en_de_para_plus_bt directory. We link the parallel data as "train"
+# and the BT data as "train1", so that fairseq will combine them automatically
+# and so that we can use the `--upsample-primary` option to upsample the
+# parallel data (if desired).
+PARA_DATA=$(readlink -f data-bin/wmt18_en_de)
+BT_DATA=$(readlink -f data-bin/wmt18_en_de_bt)
+COMB_DATA=data-bin/wmt18_en_de_para_plus_bt
+mkdir -p $COMB_DATA
+for LANG in en de; do \
+    ln -s ${PARA_DATA}/dict.$LANG.txt ${COMB_DATA}/dict.$LANG.txt; \
+    for EXT in bin idx; do \
+        ln -s ${PARA_DATA}/train.en-de.$LANG.$EXT ${COMB_DATA}/train.en-de.$LANG.$EXT; \
+        ln -s ${BT_DATA}/train.en-de.$LANG.$EXT ${COMB_DATA}/train1.en-de.$LANG.$EXT; \
+        ln -s ${PARA_DATA}/valid.en-de.$LANG.$EXT ${COMB_DATA}/valid.en-de.$LANG.$EXT; \
+        ln -s ${PARA_DATA}/test.en-de.$LANG.$EXT ${COMB_DATA}/test.en-de.$LANG.$EXT; \
+    done; \
+done
+```
+
+
+#### 3. Train an English-German model over the combined parallel + BT data
+
+Finally we can train a model over the parallel + BT data:
+```bash
+CHECKPOINT_DIR=checkpoints_en_de_parallel_plus_bt
+fairseq-train --fp16 \
+    data-bin/wmt18_en_de_para_plus_bt \
+    --upsample-primary 16 \
+    --source-lang en --target-lang de \
+    --arch transformer_wmt_en_de_big --share-all-embeddings \
+    --dropout 0.3 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
+    --lr 0.0007 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+    --max-tokens 3584 --update-freq 16 \
+    --max-update 100000 \
+    --save-dir $CHECKPOINT_DIR
+# Note: the above command assumes 8 GPUs. Adjust `--update-freq` if you have a
+# different number of GPUs.
+```
+
+Average the last 10 checkpoints:
+```bash
+python scripts/average_checkpoints.py \
+    --inputs $CHECKPOINT_DIR \
+    --num-epoch-checkpoints 10 \
+    --output $CHECKPOINT_DIR/checkpoint.avg10.pt
+```
+
+Evaluate BLEU:
+```bash
+# tokenized BLEU on newstest2017:
+bash examples/backtranslation/tokenized_bleu.sh \
+    wmt17 \
+    en-de \
+    data-bin/wmt18_en_de \
+    data-bin/wmt18_en_de/code \
+    $CHECKPOINT_DIR/checkpoint.avg10.pt
+# BLEU4 = 32.35, 64.4/38.9/26.2/18.3 (BP=0.977, ratio=0.977, syslen=60729, reflen=62152)
+# compare to 32.35 in Table 1, which is also for tokenized BLEU
+
+# generally it's better to report (detokenized) sacrebleu:
+bash examples/backtranslation/sacrebleu.sh \
+    wmt17 \
+    en-de \
+    data-bin/wmt18_en_de \
+    data-bin/wmt18_en_de/code \
+    $CHECKPOINT_DIR/checkpoint.avg10.pt
+# BLEU+case.mixed+lang.en-de+numrefs.1+smooth.exp+test.wmt17+tok.13a+version.1.4.3 = 31.5 64.3/38.2/25.6/17.6 (BP = 0.971 ratio = 0.971 hyp_len = 59515 ref_len = 61287)
+```
+
+
+## Citation
+```bibtex
+@inproceedings{edunov2018backtranslation,
+  title = {Understanding Back-Translation at Scale},
+  author = {Edunov, Sergey and Ott, Myle and Auli, Michael and Grangier, David},
+  booktitle = {Conference of the Association for Computational Linguistics (ACL)},
+  year = 2018,
+}
+```
diff --git a/fairseq/examples/backtranslation/deduplicate_lines.py b/fairseq/examples/backtranslation/deduplicate_lines.py
new file mode 100644
index 0000000000000000000000000000000000000000..50e458328c80b71c42a66d473381ca7e98d294da
--- /dev/null
+++ b/fairseq/examples/backtranslation/deduplicate_lines.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import fileinput
+import hashlib
+import sys
+from multiprocessing import Pool
+
+
+def get_hashes_and_lines(raw_line):
+    hash = hashlib.md5(raw_line).hexdigest()
+    return hash, raw_line
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--workers", type=int, default=10)
+    parser.add_argument("files", nargs="*", help="input files")
+    args = parser.parse_args()
+
+    seen = set()
+    with fileinput.input(args.files, mode="rb") as h:
+        pool = Pool(args.workers)
+        results = pool.imap_unordered(get_hashes_and_lines, h, 1000)
+        for i, (hash, raw_line) in enumerate(results):
+            if hash not in seen:
+                seen.add(hash)
+                sys.stdout.buffer.write(raw_line)
+            if i % 1000000 == 0:
+                print(i, file=sys.stderr, end="", flush=True)
+            elif i % 100000 == 0:
+                print(".", file=sys.stderr, end="", flush=True)
+    print(file=sys.stderr, flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/backtranslation/extract_bt_data.py b/fairseq/examples/backtranslation/extract_bt_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..e766391e873d0d9a9561d67d5864934b2fad0681
--- /dev/null
+++ b/fairseq/examples/backtranslation/extract_bt_data.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import fileinput
+
+from tqdm import tqdm
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Extract back-translations from the stdout of fairseq-generate. "
+            "If there are multiply hypotheses for a source, we only keep the first one. "
+        )
+    )
+    parser.add_argument("--output", required=True, help="output prefix")
+    parser.add_argument(
+        "--srclang", required=True, help="source language (extracted from H-* lines)"
+    )
+    parser.add_argument(
+        "--tgtlang", required=True, help="target language (extracted from S-* lines)"
+    )
+    parser.add_argument("--minlen", type=int, help="min length filter")
+    parser.add_argument("--maxlen", type=int, help="max length filter")
+    parser.add_argument("--ratio", type=float, help="ratio filter")
+    parser.add_argument("files", nargs="*", help="input files")
+    args = parser.parse_args()
+
+    def validate(src, tgt):
+        srclen = len(src.split(" ")) if src != "" else 0
+        tgtlen = len(tgt.split(" ")) if tgt != "" else 0
+        if (
+            (args.minlen is not None and (srclen < args.minlen or tgtlen < args.minlen))
+            or (
+                args.maxlen is not None
+                and (srclen > args.maxlen or tgtlen > args.maxlen)
+            )
+            or (
+                args.ratio is not None
+                and (max(srclen, tgtlen) / float(min(srclen, tgtlen)) > args.ratio)
+            )
+        ):
+            return False
+        return True
+
+    def safe_index(toks, index, default):
+        try:
+            return toks[index]
+        except IndexError:
+            return default
+
+    with open(args.output + "." + args.srclang, "w") as src_h, open(
+        args.output + "." + args.tgtlang, "w"
+    ) as tgt_h:
+        for line in tqdm(fileinput.input(args.files)):
+            if line.startswith("S-"):
+                tgt = safe_index(line.rstrip().split("\t"), 1, "")
+            elif line.startswith("H-"):
+                if tgt is not None:
+                    src = safe_index(line.rstrip().split("\t"), 2, "")
+                    if validate(src, tgt):
+                        print(src, file=src_h)
+                        print(tgt, file=tgt_h)
+                    tgt = None
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/backtranslation/prepare-de-monolingual.sh b/fairseq/examples/backtranslation/prepare-de-monolingual.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5e67b2b3bcf27d3436031453e796e58a0ae79ec4
--- /dev/null
+++ b/fairseq/examples/backtranslation/prepare-de-monolingual.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+SCRIPTS=mosesdecoder/scripts
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
+REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
+BPEROOT=subword-nmt/subword_nmt
+
+
+BPE_CODE=wmt18_en_de/code
+SUBSAMPLE_SIZE=25000000
+LANG=de
+
+
+OUTDIR=wmt18_${LANG}_mono
+orig=orig
+tmp=$OUTDIR/tmp
+mkdir -p $OUTDIR $tmp
+
+
+URLS=(
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2007.de.shuffled.gz"
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2008.de.shuffled.gz"
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2009.de.shuffled.gz"
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2010.de.shuffled.gz"
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2011.de.shuffled.gz"
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2012.de.shuffled.gz"
+    "http://www.statmt.org/wmt14/training-monolingual-news-crawl/news.2013.de.shuffled.gz"
+    "http://www.statmt.org/wmt15/training-monolingual-news-crawl-v2/news.2014.de.shuffled.v2.gz"
+    "http://data.statmt.org/wmt16/translation-task/news.2015.de.shuffled.gz"
+    "http://data.statmt.org/wmt17/translation-task/news.2016.de.shuffled.gz"
+    "http://data.statmt.org/wmt18/translation-task/news.2017.de.shuffled.deduped.gz"
+)
+FILES=(
+    "news.2007.de.shuffled.gz"
+    "news.2008.de.shuffled.gz"
+    "news.2009.de.shuffled.gz"
+    "news.2010.de.shuffled.gz"
+    "news.2011.de.shuffled.gz"
+    "news.2012.de.shuffled.gz"
+    "news.2013.de.shuffled.gz"
+    "news.2014.de.shuffled.v2.gz"
+    "news.2015.de.shuffled.gz"
+    "news.2016.de.shuffled.gz"
+    "news.2017.de.shuffled.deduped.gz"
+)
+
+
+cd $orig
+for ((i=0;i<${#URLS[@]};++i)); do
+    file=${FILES[i]}
+    if [ -f $file ]; then
+        echo "$file already exists, skipping download"
+    else
+        url=${URLS[i]}
+        wget "$url"
+    fi
+done
+cd ..
+
+
+if [ -f $tmp/monolingual.${SUBSAMPLE_SIZE}.${LANG} ]; then
+    echo "found monolingual sample, skipping shuffle/sample/tokenize"
+else
+    gzip -c -d -k $(for FILE in "${FILES[@]}"; do echo $orig/$FILE; done) \
+    | shuf -n $SUBSAMPLE_SIZE \
+    | perl $NORM_PUNC $LANG \
+    | perl $REM_NON_PRINT_CHAR \
+    | perl $TOKENIZER -threads 8 -a -l $LANG \
+    > $tmp/monolingual.${SUBSAMPLE_SIZE}.${LANG}
+fi
+
+
+if [ -f $tmp/bpe.monolingual.${SUBSAMPLE_SIZE}.${LANG} ]; then
+    echo "found BPE monolingual sample, skipping BPE step"
+else
+    python $BPEROOT/apply_bpe.py -c $BPE_CODE \
+        < $tmp/monolingual.${SUBSAMPLE_SIZE}.${LANG} \
+        > $tmp/bpe.monolingual.${SUBSAMPLE_SIZE}.${LANG}
+fi
+
+
+if [ -f $tmp/bpe.monolingual.dedup.${SUBSAMPLE_SIZE}.${LANG} ]; then
+    echo "found deduplicated monolingual sample, skipping deduplication step"
+else
+    python deduplicate_lines.py $tmp/bpe.monolingual.${SUBSAMPLE_SIZE}.${LANG} \
+    > $tmp/bpe.monolingual.dedup.${SUBSAMPLE_SIZE}.${LANG}
+fi
+
+
+if [ -f $OUTDIR/bpe.monolingual.dedup.00.de ]; then
+    echo "found sharded data, skipping sharding step"
+else
+    split --lines 1000000 --numeric-suffixes \
+        --additional-suffix .${LANG} \
+        $tmp/bpe.monolingual.dedup.${SUBSAMPLE_SIZE}.${LANG} \
+        $OUTDIR/bpe.monolingual.dedup.
+fi
diff --git a/fairseq/examples/backtranslation/prepare-wmt18en2de.sh b/fairseq/examples/backtranslation/prepare-wmt18en2de.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f6fd275307db50ca84c299440ae02dce49064030
--- /dev/null
+++ b/fairseq/examples/backtranslation/prepare-wmt18en2de.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh
+
+echo 'Cloning Moses github repository (for tokenization scripts)...'
+git clone https://github.com/moses-smt/mosesdecoder.git
+
+echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
+git clone https://github.com/rsennrich/subword-nmt.git
+
+SCRIPTS=mosesdecoder/scripts
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+CLEAN=$SCRIPTS/training/clean-corpus-n.perl
+NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
+REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
+BPEROOT=subword-nmt/subword_nmt
+BPE_TOKENS=32000
+
+URLS=(
+    "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz"
+    "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz"
+    "http://data.statmt.org/wmt18/translation-task/training-parallel-nc-v13.tgz"
+    "http://data.statmt.org/wmt18/translation-task/rapid2016.tgz"
+    "http://data.statmt.org/wmt17/translation-task/dev.tgz"
+    "http://statmt.org/wmt14/test-full.tgz"
+)
+FILES=(
+    "training-parallel-europarl-v7.tgz"
+    "training-parallel-commoncrawl.tgz"
+    "training-parallel-nc-v13.tgz"
+    "rapid2016.tgz"
+    "dev.tgz"
+    "test-full.tgz"
+)
+CORPORA=(
+    "training/europarl-v7.de-en"
+    "commoncrawl.de-en"
+    "training-parallel-nc-v13/news-commentary-v13.de-en"
+    "rapid2016.de-en"
+)
+
+if [ ! -d "$SCRIPTS" ]; then
+    echo "Please set SCRIPTS variable correctly to point to Moses scripts."
+    exit 1
+fi
+
+OUTDIR=wmt18_en_de
+
+src=en
+tgt=de
+lang=en-de
+prep=$OUTDIR
+tmp=$prep/tmp
+orig=orig
+
+mkdir -p $orig $tmp $prep
+
+cd $orig
+
+for ((i=0;i<${#URLS[@]};++i)); do
+    file=${FILES[i]}
+    if [ -f $file ]; then
+        echo "$file already exists, skipping download"
+    else
+        url=${URLS[i]}
+        wget "$url"
+        if [ -f $file ]; then
+            echo "$url successfully downloaded."
+        else
+            echo "$url not successfully downloaded."
+            exit 1
+        fi
+        if [ ${file: -4} == ".tgz" ]; then
+            tar zxvf $file
+        elif [ ${file: -4} == ".tar" ]; then
+            tar xvf $file
+        fi
+    fi
+done
+cd ..
+
+echo "pre-processing train data..."
+for l in $src $tgt; do
+    rm $tmp/train.tags.$lang.tok.$l
+    for f in "${CORPORA[@]}"; do
+        cat $orig/$f.$l | \
+            perl $NORM_PUNC $l | \
+            perl $REM_NON_PRINT_CHAR | \
+            perl $TOKENIZER -threads 8 -a -l $l >> $tmp/train.tags.$lang.tok.$l
+    done
+done
+
+echo "pre-processing test data..."
+for l in $src $tgt; do
+    if [ "$l" == "$src" ]; then
+        t="src"
+    else
+        t="ref"
+    fi
+    grep '<seg id' $orig/test-full/newstest2014-deen-$t.$l.sgm | \
+        sed -e 's/<seg id="[0-9]*">\s*//g' | \
+        sed -e 's/\s*<\/seg>\s*//g' | \
+        sed -e "s/\’/\'/g" | \
+    perl $TOKENIZER -threads 8 -a -l $l > $tmp/test.$l
+    echo ""
+done
+
+echo "splitting train and valid..."
+for l in $src $tgt; do
+    awk '{if (NR%100 == 0)  print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/valid.$l
+    awk '{if (NR%100 != 0)  print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/train.$l
+done
+
+TRAIN=$tmp/train.de-en
+BPE_CODE=$prep/code
+rm -f $TRAIN
+for l in $src $tgt; do
+    cat $tmp/train.$l >> $TRAIN
+done
+
+echo "learn_bpe.py on ${TRAIN}..."
+python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE
+
+for L in $src $tgt; do
+    for f in train.$L valid.$L test.$L; do
+        echo "apply_bpe.py to ${f}..."
+        python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $tmp/bpe.$f
+    done
+done
+
+perl $CLEAN -ratio 1.5 $tmp/bpe.train $src $tgt $prep/train 1 250
+perl $CLEAN -ratio 1.5 $tmp/bpe.valid $src $tgt $prep/valid 1 250
+
+for L in $src $tgt; do
+    cp $tmp/bpe.test.$L $prep/test.$L
+done
diff --git a/fairseq/examples/backtranslation/sacrebleu.sh b/fairseq/examples/backtranslation/sacrebleu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a70da23f48e2699297799611412783d4560dc45a
--- /dev/null
+++ b/fairseq/examples/backtranslation/sacrebleu.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+if [ $# -ne 5 ]; then
+    echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]"
+    exit
+fi
+
+
+DATASET=$1
+LANGPAIR=$2
+DATABIN=$3
+BPECODE=$4
+MODEL=$5
+
+SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1)
+TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2)
+
+
+BPEROOT=examples/backtranslation/subword-nmt/subword_nmt
+if [ ! -e $BPEROOT ]; then
+    BPEROOT=subword-nmt/subword_nmt
+    if [ ! -e $BPEROOT ]; then
+        echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
+        git clone https://github.com/rsennrich/subword-nmt.git
+    fi
+fi
+
+
+sacrebleu -t $DATASET -l $LANGPAIR --echo src \
+| sacremoses tokenize -a -l $SRCLANG -q \
+| python $BPEROOT/apply_bpe.py -c $BPECODE \
+| fairseq-interactive $DATABIN --path $MODEL \
+    -s $SRCLANG -t $TGTLANG \
+    --beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \
+| grep ^H- | cut -f 3- \
+| sacremoses detokenize -l $TGTLANG -q \
+| sacrebleu -t $DATASET -l $LANGPAIR
diff --git a/fairseq/examples/backtranslation/tokenized_bleu.sh b/fairseq/examples/backtranslation/tokenized_bleu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c6d6aaa193f6059299bc98909324fe4b9b060372
--- /dev/null
+++ b/fairseq/examples/backtranslation/tokenized_bleu.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+if [ $# -ne 5 ]; then
+    echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]"
+    exit
+fi
+
+
+DATASET=$1
+LANGPAIR=$2
+DATABIN=$3
+BPECODE=$4
+MODEL=$5
+
+SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1)
+TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2)
+
+
+BPEROOT=examples/backtranslation/subword-nmt/subword_nmt
+if [ ! -e $BPEROOT ]; then
+    BPEROOT=subword-nmt/subword_nmt
+    if [ ! -e $BPEROOT ]; then
+        echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
+        git clone https://github.com/rsennrich/subword-nmt.git
+    fi
+fi
+
+
+TMP_REF=$(mktemp)
+
+sacrebleu -t $DATASET -l $LANGPAIR --echo ref -q \
+| sacremoses normalize -l $TGTLANG -q \
+| sacremoses tokenize -a -l $TGTLANG -q \
+> $TMP_REF
+
+sacrebleu -t $DATASET -l $LANGPAIR --echo src -q \
+| sacremoses normalize -l $SRCLANG -q \
+| sacremoses tokenize -a -l $SRCLANG -q \
+| python $BPEROOT/apply_bpe.py -c $BPECODE \
+| fairseq-interactive $DATABIN --path $MODEL \
+    -s $SRCLANG -t $TGTLANG \
+    --beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \
+| grep ^H- | cut -f 3- \
+| fairseq-score --ref $TMP_REF
+
+rm -f $TMP_REF
diff --git a/fairseq/examples/bart/README.glue.md b/fairseq/examples/bart/README.glue.md
new file mode 100644
index 0000000000000000000000000000000000000000..a010934e1e6dec491eb1c704ec02ba7405760510
--- /dev/null
+++ b/fairseq/examples/bart/README.glue.md
@@ -0,0 +1,99 @@
+# Fine-tuning BART on GLUE tasks
+
+### 1) Download the data from GLUE website (https://gluebenchmark.com/tasks) using following commands:
+```bash
+wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py
+python download_glue_data.py --data_dir glue_data --tasks all
+```
+
+### 2) Preprocess GLUE task data (same as RoBERTa):
+```bash
+./examples/roberta/preprocess_GLUE_tasks.sh glue_data <glue_task_name>
+```
+`glue_task_name` is one of the following:
+`{ALL, QQP, MNLI, QNLI, MRPC, RTE, STS-B, SST-2, CoLA}`
+Use `ALL` for preprocessing all the glue tasks.
+
+### 3) Fine-tuning on GLUE task:
+Example fine-tuning cmd for `RTE` task
+```bash
+TOTAL_NUM_UPDATES=2036  # 10 epochs through RTE for bsz 16
+WARMUP_UPDATES=61      # 6 percent of the number of updates
+LR=1e-05                # Peak LR for polynomial LR scheduler.
+NUM_CLASSES=2
+MAX_SENTENCES=16        # Batch size.
+BART_PATH=/path/to/bart/model.pt
+
+CUDA_VISIBLE_DEVICES=0,1 fairseq-train RTE-bin/ \
+    --restore-file $BART_PATH \
+    --batch-size $MAX_SENTENCES \
+    --max-tokens 4400 \
+    --task sentence_prediction \
+    --add-prev-output-tokens \
+    --layernorm-embedding \
+    --share-all-embeddings \
+    --share-decoder-input-output-embed \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --required-batch-size-multiple 1 \
+    --init-token 0 \
+    --arch bart_large \
+    --criterion sentence_prediction \
+    --num-classes $NUM_CLASSES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-08 \
+    --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --max-epoch 10 \
+    --find-unused-parameters \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric;
+```
+
+For each of the GLUE task, you will need to use following cmd-line arguments:
+
+Model | MNLI | QNLI | QQP | RTE | SST-2 | MRPC | CoLA | STS-B
+---|---|---|---|---|---|---|---|---
+`--num-classes` | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 1
+`--lr` | 5e-6 | 1e-5 | 1e-5 | 1e-5 | 5e-6 | 2e-5 | 2e-5 | 2e-5
+`bsz` | 128 | 32 | 32 | 32 | 128 | 64 | 64 | 32
+`--total-num-update` | 30968 | 33112 | 113272 | 1018 | 5233 | 1148 | 1334 | 1799
+`--warmup-updates` | 1858 | 1986 | 6796 | 61 | 314 | 68 | 80 | 107
+
+For `STS-B` additionally add `--regression-target --best-checkpoint-metric loss` and remove `--maximize-best-checkpoint-metric`.
+
+**Note:**
+
+a) `--total-num-updates` is used by `--polynomial_decay` scheduler and is calculated for `--max-epoch=10` and `--batch-size=32/64/128` depending on the task.
+
+b) Above cmd-args and hyperparams are tested on Nvidia `V100` GPU with `32gb` of memory for each task. Depending on the GPU memory resources available to you, you can use increase `--update-freq` and reduce `--batch-size`.
+
+### Inference on GLUE task
+After training the model as mentioned in previous step, you can perform inference with checkpoints in `checkpoints/` directory using following python code snippet:
+
+```python
+from fairseq.models.bart import BARTModel
+
+bart = BARTModel.from_pretrained(
+    'checkpoints/',
+    checkpoint_file='checkpoint_best.pt',
+    data_name_or_path='RTE-bin'
+)
+
+label_fn = lambda label: bart.task.label_dictionary.string(
+    [label + bart.task.label_dictionary.nspecial]
+)   
+ncorrect, nsamples = 0, 0
+bart.cuda()
+bart.eval()
+with open('glue_data/RTE/dev.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[1], tokens[2], tokens[3]
+        tokens = bart.encode(sent1, sent2)
+        prediction = bart.predict('sentence_classification_head', tokens).argmax().item()
+        prediction_label = label_fn(prediction)
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+print('| Accuracy: ', float(ncorrect)/float(nsamples))
+```
diff --git a/fairseq/examples/bart/README.md b/fairseq/examples/bart/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4050a724ee6a2f20c9998a95df48c58b64764ab1
--- /dev/null
+++ b/fairseq/examples/bart/README.md
@@ -0,0 +1,228 @@
+# BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension
+
+[https://arxiv.org/abs/1910.13461](https://arxiv.org/abs/1910.13461)
+
+## Introduction
+
+BART is sequence-to-sequence model trained with denoising as pretraining objective. We show that this pretraining objective is more generic and show that we can match [RoBERTa](../roberta) results on SQuAD and GLUE and gain state-of-the-art results on summarization (XSum, CNN dataset), long form generative question answering (ELI5) and dialog response genration (ConvAI2). See the associated paper for more details.
+
+## Pre-trained models
+
+Model | Description | # params | Download
+---|---|---|---
+`bart.base` | BART model with 6 encoder and decoder layers | 140M | [bart.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.base.tar.gz)
+`bart.large` | BART model with 12 encoder and decoder layers | 400M | [bart.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.tar.gz)
+`bart.large.mnli` | `bart.large` finetuned on `MNLI` | 400M | [bart.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.mnli.tar.gz)
+`bart.large.cnn` | `bart.large` finetuned on `CNN-DM` | 400M | [bart.large.cnn.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.cnn.tar.gz)
+`bart.large.xsum` | `bart.large` finetuned on `Xsum` | 400M | [bart.large.xsum.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/bart.large.xsum.tar.gz)
+
+## Results
+
+**[GLUE (Wang et al., 2019)](https://gluebenchmark.com/)**
+_(dev set, single model, single-task finetuning)_
+
+Model | MNLI | QNLI | QQP | RTE | SST-2 | MRPC | CoLA | STS-B
+---|---|---|---|---|---|---|---|---
+`roberta.large` | 90.2 | 94.7 | 92.2 | 86.6 | 96.4 | 90.9 | 68.0 | 92.4
+`bart.large` | 89.9 | 94.9 | 92.5 | 87.0 | 96.6 | 90.4 | 62.8 | 91.2
+
+**[SQuAD (Rajpurkar et al., 2018)](https://rajpurkar.github.io/SQuAD-explorer/)**
+_(dev set, no additional data used)_
+
+Model | SQuAD 1.1 EM/F1 | SQuAD 2.0 EM/F1
+---|---|---
+`roberta.large` | 88.9/94.6 | 86.5/89.4
+`bart.large` | 88.8/94.6 | 86.1/89.2
+
+**[CNN/Daily Mail](http://nlpprogress.com/english/summarization.html)**
+_(test set, no additional data used)_
+
+Model | R1 | R2 | RL
+---|---|---|---
+`BERTSUMEXTABS` | 42.13 | 19.60 | 39.18
+`bart.large` | 44.16 | 21.28 | 40.90
+
+## Example usage
+
+##### Load BART from torch.hub (PyTorch >= 1.1):
+```python
+import torch
+bart = torch.hub.load('pytorch/fairseq', 'bart.large')
+bart.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Load BART (for PyTorch 1.0 or custom models):
+```python
+# Download bart.large model
+wget https://dl.fbaipublicfiles.com/fairseq/models/bart.large.tar.gz
+tar -xzvf bart.large.tar.gz
+
+# Load the model in fairseq
+from fairseq.models.bart import BARTModel
+bart = BARTModel.from_pretrained('/path/to/bart.large', checkpoint_file='model.pt')
+bart.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Apply Byte-Pair Encoding (BPE) to input text:
+```python
+tokens = bart.encode('Hello world!')
+assert tokens.tolist() == [0, 31414, 232, 328, 2]
+bart.decode(tokens)  # 'Hello world!'
+```
+
+##### Extract features from BART:
+```python
+# Extract the last layer's features
+last_layer_features = bart.extract_features(tokens)
+assert last_layer_features.size() == torch.Size([1, 5, 1024])
+
+# Extract all layer's features from decoder (layer 0 is the embedding layer)
+all_layers = bart.extract_features(tokens, return_all_hiddens=True)
+assert len(all_layers) == 13
+assert torch.all(all_layers[-1] == last_layer_features)
+```
+
+##### Use BART for sentence-pair classification tasks:
+```python
+# Download BART already finetuned for MNLI
+bart = torch.hub.load('pytorch/fairseq', 'bart.large.mnli')
+bart.eval()  # disable dropout for evaluation
+
+# Encode a pair of sentences and make a prediction
+tokens = bart.encode('BART is a seq2seq model.', 'BART is not sequence to sequence.')
+bart.predict('mnli', tokens).argmax()  # 0: contradiction
+
+# Encode another pair of sentences
+tokens = bart.encode('BART is denoising autoencoder.', 'BART is version of autoencoder.')
+bart.predict('mnli', tokens).argmax()  # 2: entailment
+```
+
+##### Register a new (randomly initialized) classification head:
+```python
+bart.register_classification_head('new_task', num_classes=3)
+logprobs = bart.predict('new_task', tokens)
+```
+
+##### Batched prediction:
+```python
+import torch
+from fairseq.data.data_utils import collate_tokens
+
+bart = torch.hub.load('pytorch/fairseq', 'bart.large.mnli')
+bart.eval()
+
+batch_of_pairs = [
+    ['BART is a seq2seq model.', 'BART is not sequence to sequence.'],
+    ['BART is denoising autoencoder.', 'BART is version of autoencoder.'],
+]
+
+batch = collate_tokens(
+    [bart.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1
+)
+
+logprobs = bart.predict('mnli', batch)
+print(logprobs.argmax(dim=1))
+# tensor([0, 2])
+```
+
+##### Using the GPU:
+```python
+bart.cuda()
+bart.predict('new_task', tokens)
+```
+
+#### Filling masks:
+
+BART can be used to fill multiple `<mask>` tokens in the input.
+```python
+bart = torch.hub.load('pytorch/fairseq', 'bart.base')
+bart.eval()
+bart.fill_mask(['The cat <mask> on the <mask>.'], topk=3, beam=10)
+# [[('The cat was on the ground.', tensor(-0.6183)), ('The cat was on the floor.', tensor(-0.6798)), ('The cat sleeps on the couch.', tensor(-0.6830))]]
+```
+
+Note that by default we enforce the output length to match the input length.
+This can be disabled by setting ``match_source_len=False``:
+```
+bart.fill_mask(['The cat <mask> on the <mask>.'], topk=3, beam=10, match_source_len=False)
+# [[('The cat was on the ground.', tensor(-0.6185)), ('The cat was asleep on the couch.', tensor(-0.6276)), ('The cat was on the floor.', tensor(-0.6800))]]
+```
+
+Example code to fill masks for a batch of sentences using GPU
+```
+bart.cuda()
+bart.fill_mask(['The cat <mask> on the <mask>.', 'The dog <mask> on the <mask>.'], topk=3, beam=10)
+# [[('The cat was on the ground.', tensor(-0.6183)), ('The cat was on the floor.', tensor(-0.6798)), ('The cat sleeps on the couch.', tensor(-0.6830))], [('The dog was on the ground.', tensor(-0.6190)), ('The dog lay on the ground.', tensor(-0.6711)),
+('The dog was asleep on the couch', tensor(-0.6796))]]
+```
+
+#### Evaluating the `bart.large.mnli` model:
+
+Example python code snippet to evaluate accuracy on the MNLI `dev_matched` set.
+```python
+label_map = {0: 'contradiction', 1: 'neutral', 2: 'entailment'}
+ncorrect, nsamples = 0, 0
+bart.cuda()
+bart.eval()
+with open('glue_data/MNLI/dev_matched.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[8], tokens[9], tokens[-1]
+        tokens = bart.encode(sent1, sent2)
+        prediction = bart.predict('mnli', tokens).argmax().item()
+        prediction_label = label_map[prediction]
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+        print('| Accuracy: ', float(ncorrect)/float(nsamples))
+# Expected output: 0.9010
+```
+
+#### Evaluating the `bart.large.cnn` model:
+- Follow instructions [here](https://github.com/abisee/cnn-dailymail) to download and process into data-files such that `test.source` and `test.target` has one line for each non-tokenized sample.
+- For simpler preprocessing, you can also `wget https://cdn-datasets.huggingface.co/summarization/cnn_dm_v2.tgz`, although there is no guarantee of identical scores
+- `huggingface/transformers` has a simpler interface that supports [single-gpu](https://github.com/huggingface/transformers/blob/master/examples/legacy/seq2seq/run_eval.py) and [multi-gpu](https://github.com/huggingface/transformers/blob/master/examples/legacy/seq2seq/run_distributed_eval.py) beam search.
+    In `huggingface/transformers`, the BART models' paths are `facebook/bart-large-cnn` and `facebook/bart-large-xsum`.
+
+In `fairseq`, summaries can be generated using:
+
+```bash
+cp data-bin/cnn_dm/dict.source.txt  checkpoints/
+python examples/bart/summarize.py \
+  --model-dir pytorch/fairseq \
+  --model-file bart.large.cnn \
+  --src cnn_dm/test.source \
+  --out cnn_dm/test.hypo
+```
+
+For calculating rouge, install `files2rouge` from [here](https://github.com/pltrdy/files2rouge).
+
+```bash
+export CLASSPATH=/path/to/stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0.jar
+
+# Tokenize hypothesis and target files.
+cat test.hypo | java edu.stanford.nlp.process.PTBTokenizer -ioFileList -preserveLines > test.hypo.tokenized
+cat test.target | java edu.stanford.nlp.process.PTBTokenizer -ioFileList -preserveLines > test.hypo.target
+files2rouge test.hypo.tokenized test.hypo.target
+# Expected output: (ROUGE-2 Average_F: 0.21238)
+```
+
+
+## Finetuning
+
+- [Finetuning on GLUE](README.glue.md)
+- [Finetuning on CNN-DM](README.summarization.md)
+
+## Citation
+
+```bibtex
+@article{lewis2019bart,
+    title = {BART: Denoising Sequence-to-Sequence Pre-training for Natural
+Language Generation, Translation, and Comprehension},
+    author = {Mike Lewis and Yinhan Liu and Naman Goyal and Marjan Ghazvininejad and
+              Abdelrahman Mohamed and Omer Levy and Veselin Stoyanov
+              and Luke Zettlemoyer },
+    journal={arXiv preprint arXiv:1910.13461},
+    year = {2019},
+}
+```
diff --git a/fairseq/examples/bart/README.summarization.md b/fairseq/examples/bart/README.summarization.md
new file mode 100644
index 0000000000000000000000000000000000000000..8727584f2b2bdd880c6cd3abbf39b75dfbf4a67c
--- /dev/null
+++ b/fairseq/examples/bart/README.summarization.md
@@ -0,0 +1,102 @@
+# Fine-tuning BART on CNN-Dailymail summarization task
+
+### 1) Download the CNN and Daily Mail data and preprocess it into data files with non-tokenized cased samples.
+
+Follow the instructions [here](https://github.com/abisee/cnn-dailymail) to download the original CNN and Daily Mail datasets. To preprocess the data, refer to the pointers in [this issue](https://github.com/pytorch/fairseq/issues/1391) or check out the code [here](https://github.com/artmatsak/cnn-dailymail).
+
+Follow the instructions [here](https://github.com/EdinburghNLP/XSum) to download the original Extreme Summarization datasets, or check out the code [here](https://github.com/EdinburghNLP/XSum/tree/master/XSum-Dataset), Please keep the raw dataset and make sure no tokenization nor BPE on the dataset.
+
+### 2) BPE preprocess:
+
+```bash
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt'
+
+TASK=cnn_dm
+for SPLIT in train val
+do
+  for LANG in source target
+  do
+    python -m examples.roberta.multiprocessing_bpe_encoder \
+    --encoder-json encoder.json \
+    --vocab-bpe vocab.bpe \
+    --inputs "$TASK/$SPLIT.$LANG" \
+    --outputs "$TASK/$SPLIT.bpe.$LANG" \
+    --workers 60 \
+    --keep-empty;
+  done
+done
+```
+
+### 3) Binarize dataset:
+```bash
+fairseq-preprocess \
+  --source-lang "source" \
+  --target-lang "target" \
+  --trainpref "${TASK}/train.bpe" \
+  --validpref "${TASK}/val.bpe" \
+  --destdir "${TASK}-bin/" \
+  --workers 60 \
+  --srcdict dict.txt \
+  --tgtdict dict.txt;
+```
+
+### 4) Fine-tuning on CNN-DM summarization task:
+Example fine-tuning CNN-DM
+```bash
+TOTAL_NUM_UPDATES=20000  
+WARMUP_UPDATES=500      
+LR=3e-05
+MAX_TOKENS=2048
+UPDATE_FREQ=4
+BART_PATH=/path/to/bart/model.pt
+
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train cnn_dm-bin \
+    --restore-file $BART_PATH \
+    --max-tokens $MAX_TOKENS \
+    --task translation \
+    --source-lang source --target-lang target \
+    --truncate-source \
+    --layernorm-embedding \
+    --share-all-embeddings \
+    --share-decoder-input-output-embed \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --required-batch-size-multiple 1 \
+    --arch bart_large \
+    --criterion label_smoothed_cross_entropy \
+    --label-smoothing 0.1 \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.999)" --adam-eps 1e-08 \
+    --clip-norm 0.1 \
+    --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
+    --fp16 --update-freq $UPDATE_FREQ \
+    --skip-invalid-size-inputs-valid-test \
+    --find-unused-parameters;
+```
+Above is expected to run on `1` node with `8 32gb-V100`.
+Expected training time is about `5 hours`. Training time can be reduced with distributed training on `4` nodes and `--update-freq 1`.
+
+Use TOTAL_NUM_UPDATES=15000 UPDATE_FREQ=2 for Xsum task
+
+### Inference for CNN-DM test data using above trained checkpoint.
+After training the model as mentioned in previous step, you can perform inference with checkpoints in `checkpoints/` directory using `eval_cnn.py`, for example
+
+```bash
+cp data-bin/cnn_dm/dict.source.txt  checkpoints/
+python examples/bart/summarize.py \
+  --model-dir checkpoints \
+  --model-file checkpoint_best.pt \
+  --src cnn_dm/test.source \
+  --out cnn_dm/test.hypo
+```
+For XSUM, which uses beam=6, lenpen=1.0, max_len_b=60, min_len=10:
+```bash
+cp data-bin/cnn_dm/dict.source.txt  checkpoints/
+python examples/bart/summarize.py \
+  --model-dir checkpoints \
+  --model-file checkpoint_best.pt \
+  --src cnn_dm/test.source \
+  --out cnn_dm/test.hypo \
+  --xsum-kwargs
+```
diff --git a/fairseq/examples/bart/summarize.py b/fairseq/examples/bart/summarize.py
new file mode 100644
index 0000000000000000000000000000000000000000..04435f80e39c2d9d894696dae7cba5b381e13da9
--- /dev/null
+++ b/fairseq/examples/bart/summarize.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq.models.bart import BARTModel
+import argparse
+
+XSUM_KWARGS = dict(beam=6, lenpen=1.0, max_len_b=60, min_len=10, no_repeat_ngram_size=3)
+CNN_KWARGS = dict(beam=4, lenpen=2.0, max_len_b=140, min_len=55, no_repeat_ngram_size=3)
+
+
+@torch.no_grad()
+def generate(bart, infile, outfile="bart_hypo.txt", bsz=32, n_obs=None, **eval_kwargs):
+    count = 1
+
+    # if n_obs is not None: bsz = min(bsz, n_obs)
+
+    with open(infile) as source, open(outfile, "w") as fout:
+        sline = source.readline().strip()
+        slines = [sline]
+        for sline in source:
+            if n_obs is not None and count > n_obs:
+                break
+            if count % bsz == 0:
+                hypotheses_batch = bart.sample(slines, **eval_kwargs)
+                for hypothesis in hypotheses_batch:
+                    fout.write(hypothesis + "\n")
+                    fout.flush()
+                slines = []
+
+            slines.append(sline.strip())
+            count += 1
+
+        if slines != []:
+            hypotheses_batch = bart.sample(slines, **eval_kwargs)
+            for hypothesis in hypotheses_batch:
+                fout.write(hypothesis + "\n")
+                fout.flush()
+
+
+def main():
+    """
+    Usage::
+
+         python examples/bart/summarize.py \
+            --model-dir $HOME/bart.large.cnn \
+            --model-file model.pt \
+            --src $HOME/data-bin/cnn_dm/test.source
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model-dir",
+        required=True,
+        type=str,
+        default="bart.large.cnn/",
+        help="path containing model file and src_dict.txt",
+    )
+    parser.add_argument(
+        "--model-file",
+        default="checkpoint_best.pt",
+        help="where in model_dir are weights saved",
+    )
+    parser.add_argument(
+        "--src", default="test.source", help="text to summarize", type=str
+    )
+    parser.add_argument(
+        "--out", default="test.hypo", help="where to save summaries", type=str
+    )
+    parser.add_argument("--bsz", default=32, help="where to save summaries", type=int)
+    parser.add_argument(
+        "--n", default=None, help="how many examples to summarize", type=int
+    )
+    parser.add_argument(
+        "--xsum-kwargs",
+        action="store_true",
+        default=False,
+        help="if true use XSUM_KWARGS else CNN_KWARGS",
+    )
+    args = parser.parse_args()
+    eval_kwargs = XSUM_KWARGS if args.xsum_kwargs else CNN_KWARGS
+    if args.model_dir == "pytorch/fairseq":
+        bart = torch.hub.load("pytorch/fairseq", args.model_file)
+    else:
+        bart = BARTModel.from_pretrained(
+            args.model_dir,
+            checkpoint_file=args.model_file,
+            data_name_or_path=args.model_dir,
+        )
+    bart = bart.eval()
+    if torch.cuda.is_available():
+        bart = bart.cuda().half()
+    generate(
+        bart, args.src, bsz=args.bsz, n_obs=args.n, outfile=args.out, **eval_kwargs
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/byte_level_bpe/README.md b/fairseq/examples/byte_level_bpe/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..657092660eae42d20f67647417623b8b8cb7b66c
--- /dev/null
+++ b/fairseq/examples/byte_level_bpe/README.md
@@ -0,0 +1,88 @@
+# Neural Machine Translation with Byte-Level Subwords
+
+https://arxiv.org/abs/1909.03341
+
+We provide an implementation of byte-level byte-pair encoding (BBPE), taking IWSLT 2017 Fr-En translation as
+example.
+
+## Data
+Get data and generate fairseq binary dataset:
+```bash
+bash ./get_data.sh
+```
+
+## Model Training
+Train Transformer model with Bi-GRU embedding contextualization (implemented in `gru_transformer.py`):
+```bash
+# VOCAB=bytes
+# VOCAB=chars
+VOCAB=bbpe2048
+# VOCAB=bpe2048
+# VOCAB=bbpe4096
+# VOCAB=bpe4096
+# VOCAB=bpe16384
+```
+```bash
+fairseq-train "data/bin_${VOCAB}" --task translation --user-dir examples/byte_level_bpe/gru_transformer \
+    --arch gru_transformer --encoder-layers 2 --decoder-layers 2 --dropout 0.3 --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9, 0.98)' \
+    --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --log-format 'simple' --log-interval 100 --save-dir "checkpoints/${VOCAB}" \
+    --batch-size 100 --max-update 100000 --update-freq 2
+```
+
+## Generation
+`fairseq-generate` requires bytes (BBPE) decoder to convert byte-level representation back to characters:
+```bash
+# BPE=--bpe bytes
+# BPE=--bpe characters
+BPE=--bpe byte_bpe --sentencepiece-model-path data/spm_bbpe2048.model
+# BPE=--bpe sentencepiece --sentencepiece-model data/spm_bpe2048.model
+# BPE=--bpe byte_bpe --sentencepiece-model-path data/spm_bbpe4096.model
+# BPE=--bpe sentencepiece --sentencepiece-model data/spm_bpe4096.model
+# BPE=--bpe sentencepiece --sentencepiece-model data/spm_bpe16384.model
+```
+
+```bash
+fairseq-generate "data/bin_${VOCAB}" --task translation --user-dir examples/byte_level_bpe/gru_transformer \
+    --source-lang fr --gen-subset test --sacrebleu --path "checkpoints/${VOCAB}/checkpoint_last.pt" \
+    --tokenizer moses --moses-target-lang en ${BPE}
+```
+When using `fairseq-interactive`, bytes (BBPE) encoder/decoder is required to tokenize input data and detokenize model predictions:
+```bash
+fairseq-interactive "data/bin_${VOCAB}" --task translation --user-dir examples/byte_level_bpe/gru_transformer \
+    --path "checkpoints/${VOCAB}/checkpoint_last.pt" --input data/test.fr --tokenizer moses --moses-source-lang fr \
+    --moses-target-lang en ${BPE} --buffer-size 1000 --max-tokens 10000
+```
+
+## Results
+| Vocabulary    | Model  | BLEU |
+|:-------------:|:-------------:|:-------------:|
+| Joint BPE 16k ([Kudo, 2018](https://arxiv.org/abs/1804.10959)) | 512d LSTM 2+2 | 33.81 |
+| Joint BPE 16k | Transformer base 2+2 (w/ GRU) | 36.64 (36.72) |
+| Joint BPE 4k | Transformer base 2+2 (w/ GRU) | 35.49 (36.10) |
+| Joint BBPE 4k | Transformer base 2+2 (w/ GRU) | 35.61 (35.82) |
+| Joint BPE 2k | Transformer base 2+2 (w/ GRU) | 34.87 (36.13) |
+| Joint BBPE 2k | Transformer base 2+2 (w/ GRU) | 34.98 (35.43) |
+| Characters | Transformer base 2+2 (w/ GRU) | 31.78 (33.30) |
+| Bytes | Transformer base 2+2 (w/ GRU) | 31.57 (33.62) |
+
+
+## Citation
+```
+@misc{wang2019neural,
+    title={Neural Machine Translation with Byte-Level Subwords},
+    author={Changhan Wang and Kyunghyun Cho and Jiatao Gu},
+    year={2019},
+    eprint={1909.03341},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+
+
+## Contact
+Changhan Wang ([changhan@fb.com](mailto:changhan@fb.com)),
+Kyunghyun Cho ([kyunghyuncho@fb.com](mailto:kyunghyuncho@fb.com)),
+Jiatao Gu ([jgu@fb.com](mailto:jgu@fb.com))
diff --git a/fairseq/examples/byte_level_bpe/get_bitext.py b/fairseq/examples/byte_level_bpe/get_bitext.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ac1eeec1e6167ec6bafd76b37173ee6987cae7e
--- /dev/null
+++ b/fairseq/examples/byte_level_bpe/get_bitext.py
@@ -0,0 +1,254 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import argparse
+import os
+import os.path as op
+from collections import namedtuple
+from multiprocessing import cpu_count
+from typing import List, Optional
+
+import sentencepiece as sp
+from fairseq.data.encoders.byte_bpe import ByteBPE
+from fairseq.data.encoders.byte_utils import byte_encode
+from fairseq.data.encoders.bytes import Bytes
+from fairseq.data.encoders.characters import Characters
+from fairseq.data.encoders.moses_tokenizer import MosesTokenizer
+from fairseq.data.encoders.sentencepiece_bpe import SentencepieceBPE
+
+
+SPLITS = ["train", "valid", "test"]
+
+
+def _convert_xml(in_path: str, out_path: str):
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            ss = s.strip()
+            if not ss.startswith("<seg"):
+                continue
+            ss = ss.replace("</seg>", "").split('">')
+            assert len(ss) == 2
+            f_o.write(ss[1].strip() + "\n")
+
+
+def _convert_train(in_path: str, out_path: str):
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            ss = s.strip()
+            if ss.startswith("<"):
+                continue
+            f_o.write(ss.strip() + "\n")
+
+
+def _get_bytes(in_path: str, out_path: str):
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            f_o.write(Bytes.encode(s.strip()) + "\n")
+
+
+def _get_chars(in_path: str, out_path: str):
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            f_o.write(Characters.encode(s.strip()) + "\n")
+
+
+def pretokenize(in_path: str, out_path: str, src: str, tgt: str):
+    Args = namedtuple(
+        "Args",
+        [
+            "moses_source_lang",
+            "moses_target_lang",
+            "moses_no_dash_splits",
+            "moses_no_escape",
+        ],
+    )
+    args = Args(
+        moses_source_lang=src,
+        moses_target_lang=tgt,
+        moses_no_dash_splits=False,
+        moses_no_escape=False,
+    )
+    pretokenizer = MosesTokenizer(args)
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            f_o.write(pretokenizer.encode(s.strip()) + "\n")
+
+
+def _convert_to_bchar(in_path_prefix: str, src: str, tgt: str, out_path: str):
+    with open(out_path, "w") as f_o:
+        for lang in [src, tgt]:
+            with open(f"{in_path_prefix}.{lang}") as f:
+                for s in f:
+                    f_o.write(byte_encode(s.strip()) + "\n")
+
+
+def _get_bpe(in_path: str, model_prefix: str, vocab_size: int):
+    arguments = [
+        f"--input={in_path}",
+        f"--model_prefix={model_prefix}",
+        f"--model_type=bpe",
+        f"--vocab_size={vocab_size}",
+        "--character_coverage=1.0",
+        "--normalization_rule_name=identity",
+        f"--num_threads={cpu_count()}",
+    ]
+    sp.SentencePieceTrainer.Train(" ".join(arguments))
+
+
+def _apply_bbpe(model_path: str, in_path: str, out_path: str):
+    Args = namedtuple("Args", ["sentencepiece_model_path"])
+    args = Args(sentencepiece_model_path=model_path)
+    tokenizer = ByteBPE(args)
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            f_o.write(tokenizer.encode(s.strip()) + "\n")
+
+
+def _apply_bpe(model_path: str, in_path: str, out_path: str):
+    Args = namedtuple("Args", ["sentencepiece_model"])
+    args = Args(sentencepiece_model=model_path)
+    tokenizer = SentencepieceBPE(args)
+    with open(in_path) as f, open(out_path, "w") as f_o:
+        for s in f:
+            f_o.write(tokenizer.encode(s.strip()) + "\n")
+
+
+def _concat_files(in_paths: List[str], out_path: str):
+    with open(out_path, "w") as f_o:
+        for p in in_paths:
+            with open(p) as f:
+                for r in f:
+                    f_o.write(r)
+
+
+def preprocess_iwslt17(
+    root: str,
+    src: str,
+    tgt: str,
+    bpe_size: Optional[int],
+    need_chars: bool,
+    bbpe_size: Optional[int],
+    need_bytes: bool,
+):
+    # extract bitext
+    in_root = op.join(root, f"{src}-{tgt}")
+    for lang in [src, tgt]:
+        _convert_train(
+            op.join(in_root, f"train.tags.{src}-{tgt}.{lang}"),
+            op.join(root, f"train.{lang}"),
+        )
+        _convert_xml(
+            op.join(in_root, f"IWSLT17.TED.dev2010.{src}-{tgt}.{lang}.xml"),
+            op.join(root, f"valid.{lang}"),
+        )
+        _convert_xml(
+            op.join(in_root, f"IWSLT17.TED.tst2015.{src}-{tgt}.{lang}.xml"),
+            op.join(root, f"test.{lang}"),
+        )
+    # pre-tokenize
+    for lang in [src, tgt]:
+        for split in SPLITS:
+            pretokenize(
+                op.join(root, f"{split}.{lang}"),
+                op.join(root, f"{split}.moses.{lang}"),
+                src,
+                tgt,
+            )
+    # tokenize with BPE vocabulary
+    if bpe_size is not None:
+        # learn vocabulary
+        concated_train_path = op.join(root, "train.all")
+        _concat_files(
+            [op.join(root, "train.moses.fr"), op.join(root, "train.moses.en")],
+            concated_train_path,
+        )
+        bpe_model_prefix = op.join(root, f"spm_bpe{bpe_size}")
+        _get_bpe(concated_train_path, bpe_model_prefix, bpe_size)
+        os.remove(concated_train_path)
+        # apply
+        for lang in [src, tgt]:
+            for split in SPLITS:
+                _apply_bpe(
+                    bpe_model_prefix + ".model",
+                    op.join(root, f"{split}.moses.{lang}"),
+                    op.join(root, f"{split}.moses.bpe{bpe_size}.{lang}"),
+                )
+    # tokenize with bytes vocabulary
+    if need_bytes:
+        for lang in [src, tgt]:
+            for split in SPLITS:
+                _get_bytes(
+                    op.join(root, f"{split}.moses.{lang}"),
+                    op.join(root, f"{split}.moses.bytes.{lang}"),
+                )
+    # tokenize with characters vocabulary
+    if need_chars:
+        for lang in [src, tgt]:
+            for split in SPLITS:
+                _get_chars(
+                    op.join(root, f"{split}.moses.{lang}"),
+                    op.join(root, f"{split}.moses.chars.{lang}"),
+                )
+    # tokenize with byte-level BPE vocabulary
+    if bbpe_size is not None:
+        # learn vocabulary
+        bchar_path = op.join(root, "train.bchar")
+        _convert_to_bchar(op.join(root, "train.moses"), src, tgt, bchar_path)
+        bbpe_model_prefix = op.join(root, f"spm_bbpe{bbpe_size}")
+        _get_bpe(bchar_path, bbpe_model_prefix, bbpe_size)
+        os.remove(bchar_path)
+        # apply
+        for lang in [src, tgt]:
+            for split in SPLITS:
+                _apply_bbpe(
+                    bbpe_model_prefix + ".model",
+                    op.join(root, f"{split}.moses.{lang}"),
+                    op.join(root, f"{split}.moses.bbpe{bbpe_size}.{lang}"),
+                )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--root", type=str, default="data")
+    parser.add_argument(
+        "--bpe-vocab",
+        default=None,
+        type=int,
+        help="Generate tokenized bitext with BPE of size K."
+        "Default to None (disabled).",
+    )
+    parser.add_argument(
+        "--bbpe-vocab",
+        default=None,
+        type=int,
+        help="Generate tokenized bitext with BBPE of size K."
+        "Default to None (disabled).",
+    )
+    parser.add_argument(
+        "--byte-vocab",
+        action="store_true",
+        help="Generate tokenized bitext with bytes vocabulary",
+    )
+    parser.add_argument(
+        "--char-vocab",
+        action="store_true",
+        help="Generate tokenized bitext with chars vocabulary",
+    )
+    args = parser.parse_args()
+
+    preprocess_iwslt17(
+        args.root,
+        "fr",
+        "en",
+        args.bpe_vocab,
+        args.char_vocab,
+        args.bbpe_vocab,
+        args.byte_vocab,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/byte_level_bpe/get_data.sh b/fairseq/examples/byte_level_bpe/get_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c3d55d4925a6e6e23d12d293f093c1ae14acf76e
--- /dev/null
+++ b/fairseq/examples/byte_level_bpe/get_data.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+PY_BIN_ROOT=
+
+# PyPI dependency
+${PY_BIN_ROOT}pip install sentencepiece sacremoses
+
+# Get data
+if [ ! -d "data" ]; then
+  mkdir data
+fi
+
+if [ ! -f "data/fr-en.tgz" ]; then
+  wget https://wit3.fbk.eu/archive/2017-01-trnted/texts/fr/en/fr-en.tgz -P data
+  tar xvf data/fr-en.tgz -C data
+fi
+${PY_BIN_ROOT}python get_bitext.py --bpe-vocab 16384 --byte-vocab --char-vocab
+for VOCAB_SIZE in 2048 4096; do
+  ${PY_BIN_ROOT}python get_bitext.py --bpe-vocab ${VOCAB_SIZE} --bbpe-vocab ${VOCAB_SIZE}
+done
+rm -r data/fr-en data/fr-en.tgz
+
+# Generate binary dataset
+${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir data/bin_bpe16384 --joined-dictionary \
+  --workers "$(nproc)" --trainpref data/train.moses.bpe16384 --validpref data/valid.moses.bpe16384 \
+  --testpref data/test.moses.bpe16384
+
+${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir data/bin_bytes --joined-dictionary \
+  --workers "$(nproc)" --trainpref data/train.moses.bytes --validpref data/valid.moses.bytes \
+  --testpref data/test.moses.bytes
+
+${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir data/bin_chars --joined-dictionary \
+  --workers "$(nproc)" --trainpref data/train.moses.chars --validpref data/valid.moses.chars \
+  --testpref data/test.moses.chars
+
+for VOCAB_SIZE in 2048 4096; do
+  for TYPE in bbpe bpe; do
+    ${PY_BIN_ROOT}/fairseq-preprocess --source-lang fr --target-lang en --destdir "data/bin_${TYPE}${VOCAB_SIZE}" \
+      --joined-dictionary --workers "$(nproc)" --trainpref "data/train.moses.${TYPE}${VOCAB_SIZE}" \
+      --validpref "data/valid.moses.${TYPE}${VOCAB_SIZE}" --testpref "data/test.moses.${TYPE}${VOCAB_SIZE}"
+  done
+done
diff --git a/fairseq/examples/byte_level_bpe/gru_transformer.py b/fairseq/examples/byte_level_bpe/gru_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4efa93a4d75da71c78e786d7f62101ef3266af4
--- /dev/null
+++ b/fairseq/examples/byte_level_bpe/gru_transformer.py
@@ -0,0 +1,107 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.transformer import TransformerEncoder, TransformerModel
+
+
+@register_model("gru_transformer")
+class GRUTransformerModel(TransformerModel):
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return GRUTransformerEncoder(args, src_dict, embed_tokens)
+
+
+class GRUTransformerEncoder(TransformerEncoder):
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(args, dictionary, embed_tokens)
+        self.emb_ctx = nn.GRU(
+            input_size=embed_tokens.embedding_dim,
+            hidden_size=embed_tokens.embedding_dim // 2,
+            num_layers=1,
+            bidirectional=True,
+        )
+
+    def forward_embedding(self, src_tokens):
+        # embed tokens and positions
+        x = embed = self.embed_scale * self.embed_tokens(src_tokens)
+        if self.embed_positions is not None:
+            x = embed + self.embed_positions(src_tokens)
+
+        # contextualize embeddings
+        x = x.transpose(0, 1)
+        x = self.dropout_module(x)
+        x, _ = self.emb_ctx.forward(x)
+        x = x.transpose(0, 1)
+
+        if self.layernorm_embedding is not None:
+            x = self.layernorm_embedding(x)
+        x = self.dropout_module(x)
+        return x, embed
+
+
+@register_model_architecture("gru_transformer", "gru_transformer")
+def gru_transformer_base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.no_cross_attention = getattr(args, "no_cross_attention", False)
+    args.cross_self_attention = getattr(args, "cross_self_attention", False)
+    args.layer_wise_attention = getattr(args, "layer_wise_attention", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", False)
+
+
+@register_model_architecture("gru_transformer", "gru_transformer_big")
+def gru_transformer_big(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.3)
+    gru_transformer_base_architecture(args)
diff --git a/fairseq/examples/camembert/README.md b/fairseq/examples/camembert/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5ef4fe3f151bb468712f3be935ea5bb1b1360bf7
--- /dev/null
+++ b/fairseq/examples/camembert/README.md
@@ -0,0 +1,75 @@
+# CamemBERT: a Tasty French Language Model
+
+## Introduction
+
+[CamemBERT](https://arxiv.org/abs/1911.03894) is a pretrained language model trained on 138GB of French text based on RoBERTa.
+
+Also available in [github.com/huggingface/transformers](https://github.com/huggingface/transformers/).
+
+## Pre-trained models
+
+| Model                          | #params | Download                                                                                                                 | Arch. | Training data                     |
+|--------------------------------|---------|--------------------------------------------------------------------------------------------------------------------------|-------|-----------------------------------|
+| `camembert` / `camembert-base` | 110M    | [camembert-base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz)                             | Base  | OSCAR (138 GB of text)            |
+| `camembert-large`              | 335M    | [camembert-large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-large.tar.gz)                           | Large | CCNet (135 GB of text)            |
+| `camembert-base-ccnet`         | 110M    | [camembert-base-ccnet.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet.tar.gz)                 | Base  | CCNet (135 GB of text)            |
+| `camembert-base-wikipedia-4gb` | 110M    | [camembert-base-wikipedia-4gb.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-wikipedia-4gb.tar.gz) | Base  | Wikipedia (4 GB of text)          |
+| `camembert-base-oscar-4gb`     | 110M    | [camembert-base-oscar-4gb.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-oscar-4gb.tar.gz)         | Base  | Subsample of OSCAR (4 GB of text) |
+| `camembert-base-ccnet-4gb`     | 110M    | [camembert-base-ccnet-4gb.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet-4gb.tar.gz)         | Base  | Subsample of CCNet (4 GB of text) |
+
+## Example usage
+
+### fairseq
+##### Load CamemBERT from torch.hub (PyTorch >= 1.1):
+```python
+import torch
+camembert = torch.hub.load('pytorch/fairseq', 'camembert')
+camembert.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Load CamemBERT (for PyTorch 1.0 or custom models):
+```python
+# Download camembert model
+wget https://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz
+tar -xzvf camembert.tar.gz
+
+# Load the model in fairseq
+from fairseq.models.roberta import CamembertModel
+camembert = CamembertModel.from_pretrained('/path/to/camembert')
+camembert.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Filling masks:
+```python
+masked_line = 'Le camembert est <mask> :)'
+camembert.fill_mask(masked_line, topk=3)
+# [('Le camembert est délicieux :)', 0.4909118115901947, ' délicieux'),
+#  ('Le camembert est excellent :)', 0.10556942224502563, ' excellent'),
+#  ('Le camembert est succulent :)', 0.03453322499990463, ' succulent')]
+```
+
+##### Extract features from Camembert:
+```python
+# Extract the last layer's features
+line = "J'aime le camembert !"
+tokens = camembert.encode(line)
+last_layer_features = camembert.extract_features(tokens)
+assert last_layer_features.size() == torch.Size([1, 10, 768])
+
+# Extract all layer's features (layer 0 is the embedding layer)
+all_layers = camembert.extract_features(tokens, return_all_hiddens=True)
+assert len(all_layers) == 13
+assert torch.all(all_layers[-1] == last_layer_features)
+```
+
+## Citation
+If you use our work, please cite:
+
+```bibtex
+@inproceedings{martin2020camembert,
+  title={CamemBERT: a Tasty French Language Model},
+  author={Martin, Louis and Muller, Benjamin and Su{\'a}rez, Pedro Javier Ortiz and Dupont, Yoann and Romary, Laurent and de la Clergerie, {\'E}ric Villemonte and Seddah, Djam{\'e} and Sagot, Beno{\^\i}t},
+  booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
+  year={2020}
+}
+```
diff --git a/fairseq/examples/constrained_decoding/README.md b/fairseq/examples/constrained_decoding/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e04b8b6a018214c8233fa87fd91d46a6dd1519d4
--- /dev/null
+++ b/fairseq/examples/constrained_decoding/README.md
@@ -0,0 +1,123 @@
+# (Vectorized) Lexically constrained decoding with dynamic beam allocation
+
+This page provides instructions for how to use lexically constrained decoding in Fairseq.
+Fairseq implements the code described in the following papers:
+
+* [Fast Lexically Constrained Decoding With Dynamic Beam Allocation](https://www.aclweb.org/anthology/N18-1119/) (Post & Vilar, 2018)
+* [Improved Lexically Constrained Decoding for Translation and Monolingual Rewriting](https://www.aclweb.org/anthology/N19-1090/) (Hu et al., 2019)
+
+## Quick start
+
+Constrained search is enabled by adding the command-line argument `--constraints` to `fairseq-interactive`.
+Constraints are appended to each line of input, separated by tabs. Each constraint (one or more tokens)
+is a separate field.
+
+The following command, using [Fairseq's WMT19 German--English model](https://github.com/pytorch/fairseq/blob/main/examples/wmt19/README.md),
+translates the sentence *Die maschinelle Übersetzung ist schwer zu kontrollieren.* with the constraints
+"hard" and "to influence".
+
+    echo -e "Die maschinelle Übersetzung ist schwer zu kontrollieren.\thard\ttoinfluence" \
+    | normalize.py | tok.py \
+    | fairseq-interactive /path/to/model \
+      --path /path/to/model/model1.pt \
+      --bpe fastbpe \
+      --bpe-codes /path/to/model/bpecodes \
+      --constraints \
+      -s de -t en \
+      --beam 10
+
+(tok.py and normalize.py can be found in the same directory as this README; they are just shortcuts around Fairseq's WMT19 preprocessing).
+This will generate the following output:
+
+    [snip]
+    S-0     Die masch@@ in@@ elle Über@@ setzung ist schwer zu kontrollieren .
+    W-0     1.844   seconds
+    C-0     hard
+    C-0     influence
+    H-0     -1.5333266258239746     Mach@@ ine trans@@ lation is hard to influence .
+    D-0     -1.5333266258239746     Machine translation is hard to influence .
+    P-0     -0.5434 -0.1423 -0.1930 -0.1415 -0.2346 -1.8031 -0.1701 -11.7727 -0.1815 -0.1511
+
+By default, constraints are generated in the order supplied, with any number (zero or more) of tokens generated
+between constraints. If you wish for the decoder to order the constraints, then use `--constraints unordered`.
+Note that you may want to use a larger beam.
+
+## Implementation details
+
+The heart of the implementation is in `fairseq/search.py`, which adds a `LexicallyConstrainedBeamSearch` instance.
+This instance of beam search tracks the progress of each hypothesis in the beam through the set of constraints
+provided for each input sentence. It does this using one of two classes, both found in `fairseq/token_generation_contstraints.py`:
+
+* OrderedConstraintState: assumes the `C` input constraints will be generated in the provided order
+* UnorderedConstraintState: tries to apply `C` (phrasal) constraints in all `C!` orders
+
+## Differences from Sockeye
+
+There are a number of [differences from Sockeye's implementation](https://awslabs.github.io/sockeye/inference.html#lexical-constraints).
+
+* Generating constraints in the order supplied (the default option here) is not available in Sockeye.
+* Due to an improved beam allocation method, there is no need to prune the beam.
+* Again due to better allocation, beam sizes as low as 10 or even 5 are often sufficient.
+* [The vector extensions described in Hu et al.](https://github.com/edwardjhu/sockeye/tree/trie_constraints) (NAACL 2019) were never merged
+  into the main Sockeye branch.
+
+## Citation
+
+The paper first describing lexical constraints for seq2seq decoding is:
+
+```bibtex
+@inproceedings{hokamp-liu-2017-lexically,
+  title = "Lexically Constrained Decoding for Sequence Generation Using Grid Beam Search",
+  author = "Hokamp, Chris  and
+    Liu, Qun",
+  booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
+  month = jul,
+  year = "2017",
+  address = "Vancouver, Canada",
+  publisher = "Association for Computational Linguistics",
+  url = "https://www.aclweb.org/anthology/P17-1141",
+  doi = "10.18653/v1/P17-1141",
+  pages = "1535--1546",
+}
+```
+
+The fairseq implementation uses the extensions described in
+
+```bibtex
+@inproceedings{post-vilar-2018-fast,
+    title = "Fast Lexically Constrained Decoding with Dynamic Beam Allocation for Neural Machine Translation",
+    author = "Post, Matt  and
+      Vilar, David",
+    booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)",
+    month = jun,
+    year = "2018",
+    address = "New Orleans, Louisiana",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/N18-1119",
+    doi = "10.18653/v1/N18-1119",
+    pages = "1314--1324",
+}
+```
+
+and
+
+```bibtex
+@inproceedings{hu-etal-2019-improved,
+  title = "Improved Lexically Constrained Decoding for Translation and Monolingual Rewriting",
+  author = "Hu, J. Edward  and
+    Khayrallah, Huda  and
+    Culkin, Ryan  and
+    Xia, Patrick  and
+    Chen, Tongfei  and
+    Post, Matt  and
+    Van Durme, Benjamin",
+  booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
+  month = jun,
+  year = "2019",
+  address = "Minneapolis, Minnesota",
+  publisher = "Association for Computational Linguistics",
+  url = "https://www.aclweb.org/anthology/N19-1090",
+  doi = "10.18653/v1/N19-1090",
+  pages = "839--850",
+}
+```
diff --git a/fairseq/examples/constrained_decoding/normalize.py b/fairseq/examples/constrained_decoding/normalize.py
new file mode 100755
index 0000000000000000000000000000000000000000..4ae2b5111ba025acb9e1613865c92fdc339a58d5
--- /dev/null
+++ b/fairseq/examples/constrained_decoding/normalize.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+from sacremoses.normalize import MosesPunctNormalizer
+
+
+def main(args):
+    normalizer = MosesPunctNormalizer(lang=args.lang, penn=args.penn)
+    for line in sys.stdin:
+        print(normalizer.normalize(line.rstrip()), flush=True)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--lang", "-l", default="en")
+    parser.add_argument("--penn", "-p", action="store_true")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/constrained_decoding/tok.py b/fairseq/examples/constrained_decoding/tok.py
new file mode 100755
index 0000000000000000000000000000000000000000..b1f888a8c0d1b8ec7174859476cc3222456e0d2c
--- /dev/null
+++ b/fairseq/examples/constrained_decoding/tok.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+import sacremoses
+
+
+def main(args):
+    """Tokenizes, preserving tabs"""
+    mt = sacremoses.MosesTokenizer(lang=args.lang)
+
+    def tok(s):
+        return mt.tokenize(s, return_str=True)
+
+    for line in sys.stdin:
+        parts = list(map(tok, line.split("\t")))
+        print(*parts, sep="\t", flush=True)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--lang", "-l", default="en")
+    parser.add_argument("--penn", "-p", action="store_true")
+    parser.add_argument("--fields", "-f", help="fields to tokenize")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/conv_seq2seq/README.md b/fairseq/examples/conv_seq2seq/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..95fe7e7909a77ee0e50fe31d4b8be38daa8f3be7
--- /dev/null
+++ b/fairseq/examples/conv_seq2seq/README.md
@@ -0,0 +1,25 @@
+# Convolutional Sequence to Sequence Learning (Gehring et al., 2017)
+
+## Pre-trained models
+
+Description | Dataset | Model | Test set(s)
+---|---|---|---
+Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2) | newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.newstest2014.tar.bz2) <br> newstest2012/2013: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.ntst1213.tar.bz2)
+Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-German](http://statmt.org/wmt14/translation-task.html#Download) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-de.fconv-py.tar.bz2) | newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-de.newstest2014.tar.bz2)
+Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT17 English-German](http://statmt.org/wmt17/translation-task.html#Download) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt17.v2.en-de.fconv-py.tar.bz2) | newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.v2.en-de.newstest2014.tar.bz2)
+
+## Example usage
+
+See the [translation README](../translation/README.md) for instructions on reproducing results for WMT'14 En-De and
+WMT'14 En-Fr using the `fconv_wmt_en_de` and `fconv_wmt_en_fr` model architectures.
+
+## Citation
+
+```bibtex
+@inproceedings{gehring2017convs2s,
+  title = {Convolutional Sequence to Sequence Learning},
+  author = {Gehring, Jonas, and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N},
+  booktitle = {Proc. of ICML},
+  year = 2017,
+}
+```
diff --git a/fairseq/examples/criss/README.md b/fairseq/examples/criss/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4689ed7c10497a5100b28fe6d6801a7c089da569
--- /dev/null
+++ b/fairseq/examples/criss/README.md
@@ -0,0 +1,61 @@
+# Cross-lingual Retrieval for Iterative Self-Supervised Training
+
+https://arxiv.org/pdf/2006.09526.pdf
+
+## Introduction
+
+CRISS is a multilingual sequence-to-sequnce pretraining method where mining and training processes are applied iteratively, improving cross-lingual alignment and translation ability at the same time.
+
+## Requirements:
+
+* faiss: https://github.com/facebookresearch/faiss
+* mosesdecoder: https://github.com/moses-smt/mosesdecoder
+* flores: https://github.com/facebookresearch/flores
+* LASER: https://github.com/facebookresearch/LASER
+
+## Unsupervised Machine Translation
+##### 1. Download and decompress CRISS checkpoints
+```
+cd examples/criss
+wget https://dl.fbaipublicfiles.com/criss/criss_3rd_checkpoints.tar.gz
+tar -xf criss_checkpoints.tar.gz
+```
+##### 2. Download and preprocess Flores test dataset
+Make sure to run all scripts from examples/criss directory
+```
+bash download_and_preprocess_flores_test.sh
+```
+
+##### 3. Run Evaluation on Sinhala-English
+```
+bash unsupervised_mt/eval.sh
+```
+
+## Sentence Retrieval
+##### 1. Download and preprocess Tatoeba dataset
+```
+bash download_and_preprocess_tatoeba.sh
+```
+
+##### 2. Run Sentence Retrieval on Tatoeba Kazakh-English
+```
+bash sentence_retrieval/sentence_retrieval_tatoeba.sh
+```
+
+## Mining
+##### 1. Install faiss
+Follow instructions on https://github.com/facebookresearch/faiss/blob/master/INSTALL.md
+##### 2. Mine pseudo-parallel data between Kazakh and English
+```
+bash mining/mine_example.sh
+```
+
+## Citation
+```bibtex
+@article{tran2020cross,
+  title={Cross-lingual retrieval for iterative self-supervised training},
+  author={Tran, Chau and Tang, Yuqing and Li, Xian and Gu, Jiatao},
+  journal={arXiv preprint arXiv:2006.09526},
+  year={2020}
+}
+```
diff --git a/fairseq/examples/criss/download_and_preprocess_flores_test.sh b/fairseq/examples/criss/download_and_preprocess_flores_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ed4b390fbdee3991efeb298050e12065d7fe605b
--- /dev/null
+++ b/fairseq/examples/criss/download_and_preprocess_flores_test.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+SPM_ENCODE=flores/scripts/spm_encode.py
+DATA=data_tmp
+SPM_MODEL=criss_checkpoints/sentence.bpe.model
+DICT=criss_checkpoints/dict.txt
+
+download_data() {
+  CORPORA=$1
+  URL=$2
+
+  if [ -f $CORPORA ]; then
+    echo "$CORPORA already exists, skipping download"
+  else
+    echo "Downloading $URL"
+    wget $URL -O $CORPORA --no-check-certificate || rm -f $CORPORA
+    if [ -f $CORPORA ]; then
+      echo "$URL successfully downloaded."
+    else
+      echo "$URL not successfully downloaded."
+      rm -f $CORPORA
+    fi
+  fi
+}
+
+if [[ -f flores ]]; then
+  echo "flores already cloned"
+else
+  git clone https://github.com/facebookresearch/flores
+fi
+
+mkdir -p $DATA
+download_data $DATA/wikipedia_en_ne_si_test_sets.tgz "https://github.com/facebookresearch/flores/raw/master/data/wikipedia_en_ne_si_test_sets.tgz"
+pushd $DATA
+pwd
+tar -vxf wikipedia_en_ne_si_test_sets.tgz
+popd
+
+
+for lang in ne_NP si_LK; do
+  datadir=$DATA/${lang}-en_XX-flores
+  rm -rf $datadir
+  mkdir -p $datadir
+  TEST_PREFIX=$DATA/wikipedia_en_ne_si_test_sets/wikipedia.test
+  python $SPM_ENCODE \
+    --model ${SPM_MODEL} \
+    --output_format=piece \
+    --inputs ${TEST_PREFIX}.${lang:0:2}-en.${lang:0:2} ${TEST_PREFIX}.${lang:0:2}-en.en \
+    --outputs $datadir/test.bpe.${lang}-en_XX.${lang} $datadir/test.bpe.${lang}-en_XX.en_XX
+
+  # binarize data
+  fairseq-preprocess \
+    --source-lang ${lang} --target-lang en_XX \
+    --testpref $datadir/test.bpe.${lang}-en_XX \
+    --destdir $datadir \
+    --srcdict ${DICT} \
+    --joined-dictionary \
+    --workers 4
+done
diff --git a/fairseq/examples/criss/download_and_preprocess_tatoeba.sh b/fairseq/examples/criss/download_and_preprocess_tatoeba.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7ed64f017d5e62695ba73745c840507b994abc0f
--- /dev/null
+++ b/fairseq/examples/criss/download_and_preprocess_tatoeba.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+SPM_ENCODE=flores/scripts/spm_encode.py
+DATA=data_tmp
+SPM_MODEL=criss_checkpoints/sentence.bpe.model
+DICT=criss_checkpoints/dict.txt
+
+if [[ -f flores ]]; then
+  echo "flores already cloned"
+else
+  git clone https://github.com/facebookresearch/flores
+fi
+if [[ -f LASER ]]; then
+  echo "LASER already cloned"
+else
+  git clone https://github.com/facebookresearch/LASER
+fi
+mkdir -p data_tmp
+declare -A lang_tatoeba_map=( ["ar_AR"]="ara" ["de_DE"]="deu"  ["es_XX"]="spa" ["et_EE"]="est" ["fi_FI"]="fin" ["fr_XX"]="fra" ["hi_IN"]="hin" ["it_IT"]="ita" ["ja_XX"]="jpn" ["ko_KR"]="kor" ["kk_KZ"]="kaz" ["nl_XX"]="nld" ["ru_RU"]="rus" ["tr_TR"]="tur" ["vi_VN"]="vie" ["zh_CN"]="cmn")
+for lang in ar_AR de_DE es_XX et_EE fi_FI fr_XX hi_IN it_IT ja_XX kk_KZ ko_KR nl_XX ru_RU tr_TR vi_VN zh_CN; do
+  lang_tatoeba=${lang_tatoeba_map[$lang]}
+  echo $lang_tatoeba
+  datadir=$DATA/${lang}-en_XX-tatoeba
+  rm -rf $datadir
+  mkdir -p $datadir
+  TEST_PREFIX=LASER/data/tatoeba/v1/tatoeba
+  python $SPM_ENCODE \
+    --model ${SPM_MODEL} \
+    --output_format=piece \
+    --inputs ${TEST_PREFIX}.${lang_tatoeba}-eng.${lang_tatoeba} ${TEST_PREFIX}.${lang_tatoeba}-eng.eng \
+    --outputs $datadir/test.bpe.${lang}-en_XX.${lang} $datadir/test.bpe.${lang}-en_XX.en_XX
+
+  # binarize data
+  fairseq-preprocess \
+    --source-lang ${lang} --target-lang en_XX \
+    --testpref $datadir/test.bpe.${lang}-en_XX \
+    --destdir $datadir \
+    --srcdict ${DICT} \
+    --joined-dictionary \
+    --workers 4
+done
diff --git a/fairseq/examples/criss/mining/mine.py b/fairseq/examples/criss/mining/mine.py
new file mode 100644
index 0000000000000000000000000000000000000000..c872da196fe0df776622365748ad7963fee1f0a0
--- /dev/null
+++ b/fairseq/examples/criss/mining/mine.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+import glob
+from subprocess import check_call
+
+try:
+    import faiss
+
+    has_faiss = True
+except ImportError:
+    has_faiss = False
+import numpy as np
+
+
+GB = 1024 * 1024 * 1024
+
+
+def call(cmd):
+    print(cmd)
+    check_call(cmd, shell=True)
+
+
+def get_batches(directory, lang, prefix="all_avg_pool"):
+    print(f"Finding in {directory}/{prefix}.{lang}*")
+    files = glob.glob(f"{directory}/{prefix}.{lang}*")
+    emb_files = []
+    txt_files = []
+    for emb_fi in files:
+        emb_files.append(emb_fi)
+        txt_fi = emb_fi.replace(prefix, "sentences")
+        txt_files.append(txt_fi)
+    return emb_files, txt_files
+
+
+def load_batch(emb_file, dim):
+    embeddings = np.fromfile(emb_file, dtype=np.float32)
+    num_rows = int(embeddings.shape[0] / dim)
+    embeddings = embeddings.reshape((num_rows, dim))
+    faiss.normalize_L2(embeddings)
+    return embeddings
+
+
+def knnGPU_sharded(x_batches_f, y_batches_f, dim, k, direction="x2y"):
+    if not has_faiss:
+        raise ImportError("Please install Faiss")
+    sims = []
+    inds = []
+    xfrom = 0
+    xto = 0
+    for x_batch_f in x_batches_f:
+        yfrom = 0
+        yto = 0
+        x_batch = load_batch(x_batch_f, dim)
+        xto = xfrom + x_batch.shape[0]
+        bsims, binds = [], []
+        for y_batch_f in y_batches_f:
+            y_batch = load_batch(y_batch_f, dim)
+            neighbor_size = min(k, y_batch.shape[0])
+            yto = yfrom + y_batch.shape[0]
+            print("{}-{}  ->  {}-{}".format(xfrom, xto, yfrom, yto))
+            idx = faiss.IndexFlatIP(dim)
+            idx = faiss.index_cpu_to_all_gpus(idx)
+            idx.add(y_batch)
+            bsim, bind = idx.search(x_batch, neighbor_size)
+
+            bsims.append(bsim)
+            binds.append(bind + yfrom)
+            yfrom += y_batch.shape[0]
+            del idx
+            del y_batch
+        bsims = np.concatenate(bsims, axis=1)
+        binds = np.concatenate(binds, axis=1)
+        aux = np.argsort(-bsims, axis=1)
+        sim_batch = np.zeros((x_batch.shape[0], k), dtype=np.float32)
+        ind_batch = np.zeros((x_batch.shape[0], k), dtype=np.int64)
+        for i in range(x_batch.shape[0]):
+            for j in range(k):
+                sim_batch[i, j] = bsims[i, aux[i, j]]
+                ind_batch[i, j] = binds[i, aux[i, j]]
+        sims.append(sim_batch)
+        inds.append(ind_batch)
+        xfrom += x_batch.shape[0]
+        del x_batch
+    sim = np.concatenate(sims, axis=0)
+    ind = np.concatenate(inds, axis=0)
+    return sim, ind
+
+
+def score(sim, fwd_mean, bwd_mean, margin):
+    return margin(sim, (fwd_mean + bwd_mean) / 2)
+
+
+def score_candidates(
+    sim_mat, candidate_inds, fwd_mean, bwd_mean, margin, verbose=False
+):
+    print(" - scoring {:d} candidates".format(sim_mat.shape[0]))
+    scores = np.zeros(candidate_inds.shape)
+    for i in range(scores.shape[0]):
+        for j in range(scores.shape[1]):
+            k = int(candidate_inds[i, j])
+            scores[i, j] = score(sim_mat[i, j], fwd_mean[i], bwd_mean[k], margin)
+    return scores
+
+
+def load_text(files):
+    all_sentences = []
+    for fi in files:
+        with open(fi) as sentence_fi:
+            for line in sentence_fi:
+                all_sentences.append(line.strip())
+    print(f"Read {len(all_sentences)} sentences")
+    return all_sentences
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Mine bitext")
+    parser.add_argument("--src-lang", help="Source language")
+    parser.add_argument("--tgt-lang", help="Target language")
+    parser.add_argument(
+        "--dict-path", help="Path to dictionary file", default="dict.txt"
+    )
+    parser.add_argument(
+        "--spm-path", help="Path to SPM model file", default="sentence.bpe.model"
+    )
+    parser.add_argument("--dim", type=int, default=1024, help="Embedding dimension")
+    parser.add_argument("--mem", type=int, default=5, help="Memory in GB")
+    parser.add_argument("--src-dir", help="Source directory")
+    parser.add_argument("--tgt-dir", help="Target directory")
+    parser.add_argument("--output", help="Output path")
+    parser.add_argument(
+        "--neighborhood", type=int, default=4, help="Embedding dimension"
+    )
+    parser.add_argument(
+        "--threshold", type=float, default=1.06, help="Threshold on mined bitext"
+    )
+    parser.add_argument(
+        "--valid-size",
+        type=int,
+        default=2000,
+        help="Number of sentences used for validation set",
+    )
+    parser.add_argument(
+        "--min-count",
+        type=int,
+        default=50000,
+        help="Min num sentences used for each language",
+    )
+    args = parser.parse_args()
+
+    x_batches_f, x_sents_f = get_batches(args.src_dir, args.src_lang)
+    y_batches_f, y_sents_f = get_batches(args.tgt_dir, args.tgt_lang)
+    margin = lambda a, b: a / b
+    y2x_sim, y2x_ind = knnGPU_sharded(
+        y_batches_f, x_batches_f, args.dim, args.neighborhood, direction="y2x"
+    )
+    x2y_sim, x2y_ind = knnGPU_sharded(
+        x_batches_f, y_batches_f, args.dim, args.neighborhood, direction="x2y"
+    )
+
+    x2y_mean = x2y_sim.mean(axis=1)
+    y2x_mean = y2x_sim.mean(axis=1)
+    fwd_scores = score_candidates(x2y_sim, x2y_ind, x2y_mean, y2x_mean, margin)
+    bwd_scores = score_candidates(y2x_sim, y2x_ind, y2x_mean, x2y_mean, margin)
+    fwd_best = x2y_ind[np.arange(x2y_sim.shape[0]), fwd_scores.argmax(axis=1)]
+    bwd_best = y2x_ind[np.arange(y2x_sim.shape[0]), bwd_scores.argmax(axis=1)]
+    indices = np.stack(
+        (
+            np.concatenate((np.arange(x2y_ind.shape[0]), bwd_best)),
+            np.concatenate((fwd_best, np.arange(y2x_ind.shape[0]))),
+        ),
+        axis=1,
+    )
+    scores = np.concatenate((fwd_scores.max(axis=1), bwd_scores.max(axis=1)))
+
+    x_sentences = load_text(x_sents_f)
+    y_sentences = load_text(y_sents_f)
+
+    threshold = args.threshold
+    min_count = args.min_count
+    seen_src, seen_trg = set(), set()
+    directory = args.output
+    call(f"mkdir -p {directory}")
+    src_out = open(
+        f"{directory}/all.{args.src_lang}",
+        mode="w",
+        encoding="utf-8",
+        errors="surrogateescape",
+    )
+    tgt_out = open(
+        f"{directory}/all.{args.tgt_lang}",
+        mode="w",
+        encoding="utf-8",
+        errors="surrogateescape",
+    )
+    scores_out = open(
+        f"{directory}/all.scores", mode="w", encoding="utf-8", errors="surrogateescape"
+    )
+    count = 0
+    for i in np.argsort(-scores):
+        src_ind, trg_ind = indices[i]
+        if src_ind not in seen_src and trg_ind not in seen_trg:
+            seen_src.add(src_ind)
+            seen_trg.add(trg_ind)
+            if scores[i] > threshold or count < min_count:
+                if x_sentences[src_ind]:
+                    print(scores[i], file=scores_out)
+                    print(x_sentences[src_ind], file=src_out)
+                    print(y_sentences[trg_ind], file=tgt_out)
+                    count += 1
+                else:
+                    print(f"Ignoring sentence: {x_sentences[src_ind]}")
+    src_out.close()
+    tgt_out.close()
+    scores_out.close()
+
+    print(f"Found {count} pairs for threshold={threshold}")
+    with open(f"{directory}/all.{args.src_lang}") as all_s, open(
+        f"{directory}/all.{args.tgt_lang}"
+    ) as all_t, open(f"{directory}/valid.{args.src_lang}", "w") as valid_s, open(
+        f"{directory}/valid.{args.tgt_lang}", "w"
+    ) as valid_t, open(
+        f"{directory}/train.{args.src_lang}", "w"
+    ) as train_s, open(
+        f"{directory}/train.{args.tgt_lang}", "w"
+    ) as train_t:
+        count = 0
+        for s_line, t_line in zip(all_s, all_t):
+            s_line = s_line.split("\t")[1]
+            t_line = t_line.split("\t")[1]
+            if count >= args.valid_size:
+                train_s.write(s_line)
+                train_t.write(t_line)
+            else:
+                valid_s.write(s_line)
+                valid_t.write(t_line)
+                count += 1
diff --git a/fairseq/examples/criss/mining/mine_example.sh b/fairseq/examples/criss/mining/mine_example.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ace995ac44665f99d904b6a89d7fbbce24103afe
--- /dev/null
+++ b/fairseq/examples/criss/mining/mine_example.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+source_lang=kk_KZ
+target_lang=en_XX
+MODEL=criss_checkpoints/criss.3rd.pt
+SPM=criss_checkpoints/sentence.bpe.model
+SPLIT=test
+LANG_DICT=criss_checkpoints/lang_dict.txt
+SPM_ENCODE=flores/scripts/spm_encode.py
+SAVE_ENCODER=save_encoder.py
+ENCODER_SAVE_ROOT=sentence_embeddings/$MODEL
+DICT=criss_checkpoints/dict.txt
+THRESHOLD=1.02
+MIN_COUNT=500
+
+DATA_DIR=data_tmp
+SAVE_DIR=mining/${source_lang}_${target_lang}_mined
+ENCODER_SAVE_DIR=${ENCODER_SAVE_ROOT}/${source_lang}-${target_lang}
+INPUT_DIR=$DATA_DIR/${source_lang}-${target_lang}-tatoeba
+
+mkdir -p $ENCODER_SAVE_DIR/${target_lang}
+mkdir -p $ENCODER_SAVE_DIR/${source_lang}
+mkdir -p $SAVE_DIR
+
+## Save encoder outputs
+
+# Save encoder outputs for source sentences
+python $SAVE_ENCODER \
+  ${INPUT_DIR} \
+  --path ${MODEL} \
+  --task translation_multi_simple_epoch \
+  --lang-pairs ${source_lang}-${target_lang} \
+  --lang-dict ${LANG_DICT} \
+  --gen-subset ${SPLIT} \
+  --bpe 'sentencepiece' \
+  -s ${source_lang} -t ${target_lang} \
+  --sentencepiece-model ${SPM} \
+  --remove-bpe 'sentencepiece' \
+  --beam 1 \
+  --lang-tok-style mbart \
+  --encoder-save-dir ${ENCODER_SAVE_DIR}/${source_lang}
+
+## Save encoder outputs for target sentences
+python $SAVE_ENCODER \
+  ${INPUT_DIR} \
+  --path ${MODEL} \
+  --lang-pairs ${source_lang}-${target_lang} \
+  --lang-dict ${LANG_DICT} \
+  --task translation_multi_simple_epoch \
+  --gen-subset ${SPLIT} \
+  --bpe 'sentencepiece' \
+  -t ${source_lang} -s ${target_lang} \
+  --sentencepiece-model ${SPM} \
+  --remove-bpe 'sentencepiece' \
+  --beam 1 \
+  --lang-tok-style mbart \
+  --encoder-save-dir ${ENCODER_SAVE_DIR}/${target_lang}
+
+## Mining
+python mining/mine.py \
+  --src-lang ${source_lang} \
+  --tgt-lang ${target_lang} \
+  --dim 1024 \
+  --mem 10 \
+  --neighborhood 4 \
+  --src-dir ${ENCODER_SAVE_DIR}/${source_lang} \
+  --tgt-dir ${ENCODER_SAVE_DIR}/${target_lang} \
+  --output $SAVE_DIR \
+  --threshold ${THRESHOLD} \
+  --min-count ${MIN_COUNT} \
+  --valid-size 100 \
+  --dict-path ${DICT} \
+  --spm-path ${SPM} \
+
+
+## Process and binarize mined data
+python $SPM_ENCODE \
+  --model ${SPM} \
+  --output_format=piece \
+  --inputs mining/${source_lang}_${target_lang}_mined/train.${source_lang} mining/${source_lang}_${target_lang}_mined/train.${target_lang} \
+  --outputs mining/${source_lang}_${target_lang}_mined/train.bpe.${source_lang} mining/${source_lang}_${target_lang}_mined/train.bpe.${target_lang}
+
+python $SPM_ENCODE \
+  --model ${SPM} \
+  --output_format=piece \
+  --inputs mining/${source_lang}_${target_lang}_mined/valid.${source_lang} mining/${source_lang}_${target_lang}_mined/valid.${target_lang} \
+  --outputs mining/${source_lang}_${target_lang}_mined/valid.bpe.${source_lang} mining/${source_lang}_${target_lang}_mined/valid.bpe.${target_lang}
+
+
+fairseq-preprocess \
+  --source-lang ${source_lang} \
+  --target-lang ${target_lang} \
+  --trainpref mining/${source_lang}_${target_lang}_mined/train.bpe \
+  --validpref mining/${source_lang}_${target_lang}_mined/valid.bpe \
+  --destdir mining/${source_lang}_${target_lang}_mined \
+  --srcdict ${DICT} \
+  --joined-dictionary \
+  --workers 8
diff --git a/fairseq/examples/criss/save_encoder.py b/fairseq/examples/criss/save_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..24a842e4092663c79c92a299fa85747b7c0bed64
--- /dev/null
+++ b/fairseq/examples/criss/save_encoder.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Translate pre-processed data with a trained model.
+"""
+
+import numpy as np
+import torch
+from fairseq import checkpoint_utils, options, progress_bar, tasks, utils
+from fairseq.sequence_generator import EnsembleModel
+from fairseq.utils import safe_hasattr
+
+
+def get_avg_pool(
+    models, sample, prefix_tokens, src_dict, remove_bpe, has_langtok=False
+):
+    model = EnsembleModel(models)
+
+    # model.forward normally channels prev_output_tokens into the decoder
+    # separately, but SequenceGenerator directly calls model.encoder
+    encoder_input = {
+        k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens"
+    }
+
+    # compute the encoder output for each beam
+    encoder_outs = model.forward_encoder(encoder_input)
+    np_encoder_outs = encoder_outs[0].encoder_out.cpu().numpy().astype(np.float32)
+    encoder_mask = 1 - encoder_outs[0].encoder_padding_mask.cpu().numpy().astype(
+        np.float32
+    )
+    encoder_mask = np.expand_dims(encoder_mask.T, axis=2)
+    if has_langtok:
+        encoder_mask = encoder_mask[1:, :, :]
+        np_encoder_outs = np_encoder_outs[1, :, :]
+    masked_encoder_outs = encoder_mask * np_encoder_outs
+    avg_pool = (masked_encoder_outs / encoder_mask.sum(axis=0)).sum(axis=0)
+    return avg_pool
+
+
+def main(args):
+    assert args.path is not None, "--path required for generation!"
+    assert (
+        not args.sampling or args.nbest == args.beam
+    ), "--sampling requires --nbest to be equal to --beam"
+    assert (
+        args.replace_unk is None or args.raw_text
+    ), "--replace-unk requires a raw text dataset (--raw-text)"
+
+    args.beam = 1
+    utils.import_user_module(args)
+
+    if args.max_tokens is None:
+        args.max_tokens = 12000
+    print(args)
+    use_cuda = torch.cuda.is_available() and not args.cpu
+
+    # Load dataset splits
+    task = tasks.setup_task(args)
+    task.load_dataset(args.gen_subset)
+
+    # Set dictionaries
+    try:
+        src_dict = getattr(task, "source_dictionary", None)
+    except NotImplementedError:
+        src_dict = None
+    tgt_dict = task.target_dictionary
+
+    # Load ensemble
+    print("| loading model(s) from {}".format(args.path))
+    models, _model_args = checkpoint_utils.load_model_ensemble(
+        args.path.split(":"),
+        arg_overrides=eval(args.model_overrides),
+        task=task,
+    )
+
+    # Optimize ensemble for generation
+    for model in models:
+        model.make_generation_fast_(
+            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
+            need_attn=args.print_alignment,
+        )
+        if args.fp16:
+            model.half()
+        if use_cuda:
+            model.cuda()
+
+    # Load alignment dictionary for unknown word replacement
+    # (None if no unknown word replacement, empty if no path to align dictionary)
+    align_dict = utils.load_align_dict(args.replace_unk)
+
+    # Load dataset (possibly sharded)
+    itr = task.get_batch_iterator(
+        dataset=task.dataset(args.gen_subset),
+        max_tokens=args.max_tokens,
+        max_positions=utils.resolve_max_positions(
+            task.max_positions(),
+        ),
+        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=args.required_batch_size_multiple,
+        num_shards=args.num_shards,
+        shard_id=args.shard_id,
+        num_workers=args.num_workers,
+    ).next_epoch_itr(shuffle=False)
+
+    num_sentences = 0
+    source_sentences = []
+    shard_id = 0
+    all_avg_pool = None
+    encoder_has_langtok = (
+        safe_hasattr(task.args, "encoder_langtok")
+        and task.args.encoder_langtok is not None
+        and safe_hasattr(task.args, "lang_tok_replacing_bos_eos")
+        and not task.args.lang_tok_replacing_bos_eos
+    )
+    with progress_bar.build_progress_bar(args, itr) as t:
+        for sample in t:
+            if sample is None:
+                print("Skipping None")
+                continue
+            sample = utils.move_to_cuda(sample) if use_cuda else sample
+            if "net_input" not in sample:
+                continue
+
+            prefix_tokens = None
+            if args.prefix_size > 0:
+                prefix_tokens = sample["target"][:, : args.prefix_size]
+
+            with torch.no_grad():
+                avg_pool = get_avg_pool(
+                    models,
+                    sample,
+                    prefix_tokens,
+                    src_dict,
+                    args.post_process,
+                    has_langtok=encoder_has_langtok,
+                )
+                if all_avg_pool is not None:
+                    all_avg_pool = np.concatenate((all_avg_pool, avg_pool))
+                else:
+                    all_avg_pool = avg_pool
+
+            if not isinstance(sample["id"], list):
+                sample_ids = sample["id"].tolist()
+            else:
+                sample_ids = sample["id"]
+            for i, sample_id in enumerate(sample_ids):
+                # Remove padding
+                src_tokens = utils.strip_pad(
+                    sample["net_input"]["src_tokens"][i, :], tgt_dict.pad()
+                )
+
+                # Either retrieve the original sentences or regenerate them from tokens.
+                if align_dict is not None:
+                    src_str = task.dataset(args.gen_subset).src.get_original_text(
+                        sample_id
+                    )
+                else:
+                    if src_dict is not None:
+                        src_str = src_dict.string(src_tokens, args.post_process)
+                    else:
+                        src_str = ""
+
+                if not args.quiet:
+                    if src_dict is not None:
+                        print("S-{}\t{}".format(sample_id, src_str))
+
+                source_sentences.append(f"{sample_id}\t{src_str}")
+
+            num_sentences += sample["nsentences"]
+            if all_avg_pool.shape[0] >= 1000000:
+                with open(
+                    f"{args.encoder_save_dir}/all_avg_pool.{args.source_lang}.{shard_id}",
+                    "w",
+                ) as avg_pool_file:
+                    all_avg_pool.tofile(avg_pool_file)
+                with open(
+                    f"{args.encoder_save_dir}/sentences.{args.source_lang}.{shard_id}",
+                    "w",
+                ) as sentence_file:
+                    sentence_file.writelines(f"{line}\n" for line in source_sentences)
+                all_avg_pool = None
+                source_sentences = []
+                shard_id += 1
+
+    if all_avg_pool is not None:
+        with open(
+            f"{args.encoder_save_dir}/all_avg_pool.{args.source_lang}.{shard_id}", "w"
+        ) as avg_pool_file:
+            all_avg_pool.tofile(avg_pool_file)
+        with open(
+            f"{args.encoder_save_dir}/sentences.{args.source_lang}.{shard_id}", "w"
+        ) as sentence_file:
+            sentence_file.writelines(f"{line}\n" for line in source_sentences)
+    return None
+
+
+def cli_main():
+    parser = options.get_generation_parser()
+    parser.add_argument(
+        "--encoder-save-dir",
+        default="",
+        type=str,
+        metavar="N",
+        help="directory to save encoder outputs",
+    )
+    args = options.parse_args_and_arch(parser)
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/criss/sentence_retrieval/encoder_analysis.py b/fairseq/examples/criss/sentence_retrieval/encoder_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..b41bfbe38789ba14e6a5ea938c75d761424c00ab
--- /dev/null
+++ b/fairseq/examples/criss/sentence_retrieval/encoder_analysis.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import argparse
+import glob
+
+import numpy as np
+
+
+DIM = 1024
+
+
+def compute_dist(source_embs, target_embs, k=5, return_sim_mat=False):
+    target_ids = [tid for tid in target_embs]
+    source_mat = np.stack(source_embs.values(), axis=0)
+    normalized_source_mat = source_mat / np.linalg.norm(
+        source_mat, axis=1, keepdims=True
+    )
+    target_mat = np.stack(target_embs.values(), axis=0)
+    normalized_target_mat = target_mat / np.linalg.norm(
+        target_mat, axis=1, keepdims=True
+    )
+    sim_mat = normalized_source_mat.dot(normalized_target_mat.T)
+    if return_sim_mat:
+        return sim_mat
+    neighbors_map = {}
+    for i, sentence_id in enumerate(source_embs):
+        idx = np.argsort(sim_mat[i, :])[::-1][:k]
+        neighbors_map[sentence_id] = [target_ids[tid] for tid in idx]
+    return neighbors_map
+
+
+def load_embeddings(directory, LANGS):
+    sentence_embeddings = {}
+    sentence_texts = {}
+    for lang in LANGS:
+        sentence_embeddings[lang] = {}
+        sentence_texts[lang] = {}
+        lang_dir = f"{directory}/{lang}"
+        embedding_files = glob.glob(f"{lang_dir}/all_avg_pool.{lang}.*")
+        for embed_file in embedding_files:
+            shard_id = embed_file.split(".")[-1]
+            embeddings = np.fromfile(embed_file, dtype=np.float32)
+            num_rows = embeddings.shape[0] // DIM
+            embeddings = embeddings.reshape((num_rows, DIM))
+
+            with open(f"{lang_dir}/sentences.{lang}.{shard_id}") as sentence_file:
+                for idx, line in enumerate(sentence_file):
+                    sentence_id, sentence = line.strip().split("\t")
+                    sentence_texts[lang][sentence_id] = sentence
+                    sentence_embeddings[lang][sentence_id] = embeddings[idx, :]
+
+    return sentence_embeddings, sentence_texts
+
+
+def compute_accuracy(directory, LANGS):
+    sentence_embeddings, sentence_texts = load_embeddings(directory, LANGS)
+
+    top_1_accuracy = {}
+
+    top1_str = " ".join(LANGS) + "\n"
+    for source_lang in LANGS:
+        top_1_accuracy[source_lang] = {}
+        top1_str += f"{source_lang} "
+        for target_lang in LANGS:
+            top1 = 0
+            top5 = 0
+            neighbors_map = compute_dist(
+                sentence_embeddings[source_lang], sentence_embeddings[target_lang]
+            )
+            for sentence_id, neighbors in neighbors_map.items():
+                if sentence_id == neighbors[0]:
+                    top1 += 1
+                if sentence_id in neighbors[:5]:
+                    top5 += 1
+            n = len(sentence_embeddings[target_lang])
+            top1_str += f"{top1/n} "
+        top1_str += "\n"
+
+    print(top1_str)
+    print(top1_str, file=open(f"{directory}/accuracy", "w"))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Analyze encoder outputs")
+    parser.add_argument("directory", help="Source language corpus")
+    parser.add_argument("--langs", help="List of langs")
+    args = parser.parse_args()
+    langs = args.langs.split(",")
+    compute_accuracy(args.directory, langs)
diff --git a/fairseq/examples/criss/sentence_retrieval/sentence_retrieval_tatoeba.sh b/fairseq/examples/criss/sentence_retrieval/sentence_retrieval_tatoeba.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0428d8bef9d426ac3e664cd281ce0b688f5f580f
--- /dev/null
+++ b/fairseq/examples/criss/sentence_retrieval/sentence_retrieval_tatoeba.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+source_lang=kk_KZ
+target_lang=en_XX
+MODEL=criss_checkpoints/criss.3rd.pt
+SPM=criss_checkpoints/sentence.bpe.model
+SPLIT=test
+LANG_DICT=criss_checkpoints/lang_dict.txt
+ENCODER_ANALYSIS=sentence_retrieval/encoder_analysis.py
+SAVE_ENCODER=save_encoder.py
+ENCODER_SAVE_ROOT=sentence_embeddings/$MODEL
+
+
+
+DATA_DIR=data_tmp
+INPUT_DIR=$DATA_DIR/${source_lang}-${target_lang}-tatoeba
+ENCODER_SAVE_DIR=${ENCODER_SAVE_ROOT}/${source_lang}-${target_lang}
+mkdir -p $ENCODER_SAVE_DIR/${target_lang}
+mkdir -p $ENCODER_SAVE_DIR/${source_lang}
+
+# Save encoder outputs for source sentences
+python $SAVE_ENCODER \
+  ${INPUT_DIR} \
+  --path ${MODEL} \
+  --task translation_multi_simple_epoch \
+  --lang-dict ${LANG_DICT} \
+  --gen-subset ${SPLIT} \
+  --bpe 'sentencepiece' \
+  --lang-pairs ${source_lang}-${target_lang} \
+  -s ${source_lang} -t ${target_lang} \
+  --sentencepiece-model ${SPM} \
+  --remove-bpe 'sentencepiece' \
+  --beam 1 \
+  --lang-tok-style mbart \
+  --encoder-save-dir ${ENCODER_SAVE_DIR}/${source_lang}
+
+# Save encoder outputs for target sentences
+python $SAVE_ENCODER \
+  ${INPUT_DIR} \
+  --path ${MODEL} \
+  --lang-dict ${LANG_DICT} \
+  --task translation_multi_simple_epoch \
+  --gen-subset ${SPLIT} \
+  --bpe 'sentencepiece' \
+  --lang-pairs ${target_lang}-${source_lang} \
+  -t ${source_lang} -s ${target_lang} \
+  --sentencepiece-model ${SPM} \
+  --remove-bpe 'sentencepiece' \
+  --beam 1 \
+  --lang-tok-style mbart \
+  --encoder-save-dir ${ENCODER_SAVE_DIR}/${target_lang}
+
+# Analyze sentence retrieval accuracy
+python $ENCODER_ANALYSIS --langs "${source_lang},${target_lang}" ${ENCODER_SAVE_DIR}
diff --git a/fairseq/examples/criss/unsupervised_mt/eval.sh b/fairseq/examples/criss/unsupervised_mt/eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..03b773ed5a522eb82186fea8ffbb6c557e14b6d3
--- /dev/null
+++ b/fairseq/examples/criss/unsupervised_mt/eval.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+SRC=si_LK
+TGT=en_XX
+MODEL=criss_checkpoints/criss.3rd.pt
+
+MULTIBLEU=mosesdecoder/scripts/generic/multi-bleu.perl
+MOSES=mosesdecoder
+REPLACE_UNICODE_PUNCT=$MOSES/scripts/tokenizer/replace-unicode-punctuation.perl
+NORM_PUNC=$MOSES/scripts/tokenizer/normalize-punctuation.perl
+REM_NON_PRINT_CHAR=$MOSES/scripts/tokenizer/remove-non-printing-char.perl
+TOKENIZER=$MOSES/scripts/tokenizer/tokenizer.perl
+GEN_TMP_DIR=gen_tmp
+LANG_DICT=criss_checkpoints/lang_dict.txt
+
+if [ ! -d "mosesdecoder" ]; then
+  git clone https://github.com/moses-smt/mosesdecoder
+fi
+mkdir -p $GEN_TMP_DIR
+fairseq-generate data_tmp/${SRC}-${TGT}-flores \
+        --task translation_multi_simple_epoch \
+        --max-tokens 2000 \
+        --path ${MODEL} \
+        --skip-invalid-size-inputs-valid-test \
+        --beam 5 --lenpen 1.0 --gen-subset test  \
+        --remove-bpe=sentencepiece \
+        --source-lang ${SRC} --target-lang ${TGT} \
+        --decoder-langtok --lang-pairs 'en_XX-ar_AR,en_XX-de_DE,en_XX-es_XX,en_XX-fr_XX,en_XX-hi_IN,en_XX-it_IT,en_XX-ja_XX,en_XX-ko_KR,en_XX-nl_XX,en_XX-ru_RU,en_XX-zh_CN,en_XX-tr_TR,en_XX-vi_VN,en_XX-ro_RO,en_XX-my_MM,en_XX-ne_NP,en_XX-si_LK,en_XX-cs_CZ,en_XX-lt_LT,en_XX-kk_KZ,en_XX-gu_IN,en_XX-fi_FI,en_XX-et_EE,en_XX-lv_LV,ar_AR-en_XX,cs_CZ-en_XX,de_DE-en_XX,es_XX-en_XX,et_EE-en_XX,fi_FI-en_XX,fr_XX-en_XX,gu_IN-en_XX,hi_IN-en_XX,it_IT-en_XX,ja_XX-en_XX,kk_KZ-en_XX,ko_KR-en_XX,lt_LT-en_XX,lv_LV-en_XX,my_MM-en_XX,ne_NP-en_XX,nl_XX-en_XX,ro_RO-en_XX,ru_RU-en_XX,si_LK-en_XX,tr_TR-en_XX,vi_VN-en_XX,zh_CN-en_XX,ar_AR-es_XX,es_XX-ar_AR,ar_AR-hi_IN,hi_IN-ar_AR,ar_AR-zh_CN,zh_CN-ar_AR,cs_CZ-es_XX,es_XX-cs_CZ,cs_CZ-hi_IN,hi_IN-cs_CZ,cs_CZ-zh_CN,zh_CN-cs_CZ,de_DE-es_XX,es_XX-de_DE,de_DE-hi_IN,hi_IN-de_DE,de_DE-zh_CN,zh_CN-de_DE,es_XX-hi_IN,hi_IN-es_XX,es_XX-zh_CN,zh_CN-es_XX,et_EE-es_XX,es_XX-et_EE,et_EE-hi_IN,hi_IN-et_EE,et_EE-zh_CN,zh_CN-et_EE,fi_FI-es_XX,es_XX-fi_FI,fi_FI-hi_IN,hi_IN-fi_FI,fi_FI-zh_CN,zh_CN-fi_FI,fr_XX-es_XX,es_XX-fr_XX,fr_XX-hi_IN,hi_IN-fr_XX,fr_XX-zh_CN,zh_CN-fr_XX,gu_IN-es_XX,es_XX-gu_IN,gu_IN-hi_IN,hi_IN-gu_IN,gu_IN-zh_CN,zh_CN-gu_IN,hi_IN-zh_CN,zh_CN-hi_IN,it_IT-es_XX,es_XX-it_IT,it_IT-hi_IN,hi_IN-it_IT,it_IT-zh_CN,zh_CN-it_IT,ja_XX-es_XX,es_XX-ja_XX,ja_XX-hi_IN,hi_IN-ja_XX,ja_XX-zh_CN,zh_CN-ja_XX,kk_KZ-es_XX,es_XX-kk_KZ,kk_KZ-hi_IN,hi_IN-kk_KZ,kk_KZ-zh_CN,zh_CN-kk_KZ,ko_KR-es_XX,es_XX-ko_KR,ko_KR-hi_IN,hi_IN-ko_KR,ko_KR-zh_CN,zh_CN-ko_KR,lt_LT-es_XX,es_XX-lt_LT,lt_LT-hi_IN,hi_IN-lt_LT,lt_LT-zh_CN,zh_CN-lt_LT,lv_LV-es_XX,es_XX-lv_LV,lv_LV-hi_IN,hi_IN-lv_LV,lv_LV-zh_CN,zh_CN-lv_LV,my_MM-es_XX,es_XX-my_MM,my_MM-hi_IN,hi_IN-my_MM,my_MM-zh_CN,zh_CN-my_MM,ne_NP-es_XX,es_XX-ne_NP,ne_NP-hi_IN,hi_IN-ne_NP,ne_NP-zh_CN,zh_CN-ne_NP,nl_XX-es_XX,es_XX-nl_XX,nl_XX-hi_IN,hi_IN-nl_XX,nl_XX-zh_CN,zh_CN-nl_XX,ro_RO-es_XX,es_XX-ro_RO,ro_RO-hi_IN,hi_IN-ro_RO,ro_RO-zh_CN,zh_CN-ro_RO,ru_RU-es_XX,es_XX-ru_RU,ru_RU-hi_IN,hi_IN-ru_RU,ru_RU-zh_CN,zh_CN-ru_RU,si_LK-es_XX,es_XX-si_LK,si_LK-hi_IN,hi_IN-si_LK,si_LK-zh_CN,zh_CN-si_LK,tr_TR-es_XX,es_XX-tr_TR,tr_TR-hi_IN,hi_IN-tr_TR,tr_TR-zh_CN,zh_CN-tr_TR,vi_VN-es_XX,es_XX-vi_VN,vi_VN-hi_IN,hi_IN-vi_VN,vi_VN-zh_CN,zh_CN-vi_VN' \
+        --lang-dict ${LANG_DICT} --lang-tok-style 'mbart' --sampling-method 'temperature' --sampling-temperature '1.0'  > $GEN_TMP_DIR/${SRC}_${TGT}.gen
+cat $GEN_TMP_DIR/${SRC}_${TGT}.gen | grep -P "^T-" | cut -f2 | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l ${TGT:0:2} | $REM_NON_PRINT_CHAR | $TOKENIZER -no-escape ${TGT:0:2} > $GEN_TMP_DIR/${SRC}_${TGT}.hyp
+cat $GEN_TMP_DIR/${SRC}_${TGT}.gen | grep -P "^H-" | cut -f3 | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l ${TGT:0:2} | $REM_NON_PRINT_CHAR | $TOKENIZER -no-escape ${TGT:0:2} > $GEN_TMP_DIR/${SRC}_${TGT}.ref
+${MULTIBLEU} $GEN_TMP_DIR/${SRC}_${TGT}.ref < $GEN_TMP_DIR/${SRC}_${TGT}.hyp
diff --git a/fairseq/examples/cross_lingual_language_model/README.md b/fairseq/examples/cross_lingual_language_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..af9128e39e5925e9411d162c2f24a19e4532d618
--- /dev/null
+++ b/fairseq/examples/cross_lingual_language_model/README.md
@@ -0,0 +1,77 @@
+# Cross-Lingual Language Model Pre-training
+
+Below are some details for training Cross-Lingual Language Models (XLM) - similar to the ones presented in [Lample & Conneau, 2019](https://arxiv.org/pdf/1901.07291.pdf) - in Fairseq. The current implementation only supports the Masked Language Model (MLM) from the paper above.
+
+## Downloading and Tokenizing Monolingual Data
+
+Pointers to the monolingual data from wikipedia, used for training the XLM-style MLM model as well as details on processing (tokenization and BPE) it can be found in the [XLM Github Repository](https://github.com/facebookresearch/XLM#download--preprocess-monolingual-data).
+
+Let's assume the following for the code snippets in later sections to work
+- Processed data is in the folder: monolingual_data/processed
+- Each language has 3 files for train, test and validation. For example we have the following files for English:
+    train.en, valid.en
+- We are training a model for 5 languages: Arabic (ar), German (de), English (en), Hindi (hi) and French (fr)
+- The vocabulary file is monolingual_data/processed/vocab_mlm
+
+
+## Fairseq Pre-processing and Binarization
+
+Pre-process and binarize the data with the MaskedLMDictionary and cross_lingual_lm task
+
+```bash
+# Ensure the output directory exists
+DATA_DIR=monolingual_data/fairseq_processed
+mkdir -p "$DATA_DIR"
+
+for lg in ar de en hi fr
+do
+
+  fairseq-preprocess \
+  --task cross_lingual_lm \
+  --srcdict monolingual_data/processed/vocab_mlm \
+  --only-source \
+  --trainpref monolingual_data/processed/train \
+  --validpref monolingual_data/processed/valid \
+  --testpref monolingual_data/processed/test \
+  --destdir monolingual_data/fairseq_processed \
+  --workers 20 \
+  --source-lang $lg
+
+  # Since we only have a source language, the output file has a None for the
+  # target language. Remove this
+
+  for stage in train test valid
+
+    sudo mv "$DATA_DIR/$stage.$lg-None.$lg.bin" "$stage.$lg.bin"
+    sudo mv "$DATA_DIR/$stage.$lg-None.$lg.idx" "$stage.$lg.idx"
+
+  done
+
+done
+```
+
+## Train a Cross-lingual Language Model similar to the XLM MLM model
+
+Use the following command to train the model on 5 languages.
+
+```
+fairseq-train \
+--task cross_lingual_lm monolingual_data/fairseq_processed \
+--save-dir checkpoints/mlm \
+--max-update 2400000 --save-interval 1 --no-epoch-checkpoints \
+--arch xlm_base \
+--optimizer adam --lr-scheduler reduce_lr_on_plateau \
+--lr-shrink 0.5 --lr 0.0001 --stop-min-lr 1e-09 \
+--dropout 0.1 \
+--criterion legacy_masked_lm_loss \
+--max-tokens 2048 --tokens-per-sample 256 --attention-dropout 0.1 \
+--dataset-impl lazy --seed 0 \
+--masked-lm-only \
+--monolingual-langs 'ar,de,en,hi,fr' --num-segment 5 \
+--ddp-backend=legacy_ddp
+```
+
+Some Notes:
+- Using tokens_per_sample greater than 256 can cause OOM (out-of-memory) issues. Usually since MLM packs in streams of text, this parameter doesn't need much tuning.
+- The Evaluation workflow for computing MLM Perplexity on test data is in progress.
+- Finetuning this model on a downstream task is something which is not currently available.
diff --git a/fairseq/examples/discriminative_reranking_nmt/README.md b/fairseq/examples/discriminative_reranking_nmt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b155e855f2f94e30ad22262f260008fda8ac1804
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/README.md
@@ -0,0 +1,202 @@
+# Discriminative Reranking for Neural Machine Translation
+https://aclanthology.org/2021.acl-long.563/
+
+This folder contains source code for training DrNMT, a discriminatively trained reranker for neural machine translation.
+
+## Data preparation
+1. Follow the instructions under `examples/translation` to build a base MT model. Prepare three files, one with source sentences, one with ground truth target sentences, and one with hypotheses generated from the base MT model. Each line in the file contains one sentence in raw text (i.e. no sentencepiece, etc.). Below is an example of the files with _N_ hypotheses for each source sentence.
+
+```
+# Example of the source sentence file: (The file should contain L lines.)
+
+source_sentence_1
+source_sentence_2
+source_sentence_3
+...
+source_sentence_L
+
+# Example of the target sentence file: (The file should contain L lines.)
+
+target_sentence_1
+target_sentence_2
+target_sentence_3
+...
+target_sentence_L
+
+# Example of the hypotheses file: (The file should contain L*N lines.)
+
+source_sentence_1_hypo_1
+source_sentence_1_hypo_2
+...
+source_sentence_1_hypo_N
+source_sentence_2_hypo_1
+...
+source_sentence_2_hypo_N
+...
+source_sentence_L_hypo_1
+...
+source_sentence_L_hypo_N
+```
+
+2. Download the [XLMR model](https://github.com/fairinternal/fairseq-py/tree/main/examples/xlmr#pre-trained-models).
+```
+wget https://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz
+tar zxvf xlmr.base.tar.gz
+
+# The folder should contain dict.txt, model.pt and sentencepiece.bpe.model.
+```
+
+3. Prepare scores and BPE data.
+* `N`: Number of hypotheses per each source sentence. We use 50 in the paper.
+* `SPLIT`: Name of the data split, i.e. train, valid, test. Use split_name, split_name1, split_name2, ..., if there are multiple datasets for a split, e.g. train, train1, valid, valid1.
+* `NUM_SHARDS`: Number of shards. Set this to 1 for non-train splits.
+* `METRIC`: The metric for DrNMT to optimize for. We support either `bleu` or `ter`.
+```
+# For each data split, e.g. train, valid, test, etc., run the following:
+
+SOURCE_FILE=/path/to/source_sentence_file
+TARGET_FILE=/path/to/target_sentence_file
+HYPO_FILE=/path/to/hypo_file
+XLMR_DIR=/path/to/xlmr
+OUTPUT_DIR=/path/to/output
+
+python scripts/prep_data.py \
+    --input-source ${SOURCE_FILE} \
+    --input-target ${TARGET_FILE} \
+    --input-hypo ${HYPO_FILE} \
+    --output-dir ${OUTPUT_DIR} \
+    --split $SPLIT
+    --beam $N \
+    --sentencepiece-model ${XLMR_DIR}/sentencepiece.bpe.model \
+    --metric $METRIC \
+    --num-shards ${NUM_SHARDS}
+
+# The script will create ${OUTPUT_DIR}/$METRIC with ${NUM_SHARDS} splits.
+# Under split*/input_src, split*/input_tgt and split*/$METRIC, there will be $SPLIT.bpe and $SPLIT.$METRIC files, respectively.
+
+```
+
+4. Pre-process the data into fairseq format.
+```
+# use comma to separate if there are more than one train or valid set
+for suffix in src tgt ; do
+    fairseq-preprocess --only-source \
+        --trainpref ${OUTPUT_DIR}/$METRIC/split1/input_${suffix}/train.bpe \
+        --validpref ${OUTPUT_DIR}/$METRIC/split1/input_${suffix}/valid.bpe \
+        --destdir ${OUTPUT_DIR}/$METRIC/split1/input_${suffix} \
+        --workers 60 \
+        --srcdict ${XLMR_DIR}/dict.txt
+done
+
+for i in `seq 2 ${NUM_SHARDS}`; do
+    for suffix in src tgt ; do
+        fairseq-preprocess --only-source \
+            --trainpref ${OUTPUT_DIR}/$METRIC/split${i}/input_${suffix}/train.bpe \
+            --destdir ${OUTPUT_DIR}/$METRIC/split${i}/input_${suffix} \
+            --workers 60 \
+            --srcdict ${XLMR_DIR}/dict.txt
+
+        ln -s ${OUTPUT_DIR}/$METRIC/split1/input_${suffix}/valid* ${OUTPUT_DIR}/$METRIC/split${i}/input_${suffix}/.
+    done
+
+    ln -s ${OUTPUT_DIR}/$METRIC/split1/$METRIC/valid* ${OUTPUT_DIR}/$METRIC/split${i}/$METRIC/.
+done
+```
+
+## Training
+
+```
+EXP_DIR=/path/to/exp
+
+# An example of training the model with the config for De-En experiment in the paper.
+# The config uses 16 GPUs and 50 hypotheses.
+# For training with fewer number of GPUs, set
+# distributed_training.distributed_world_size=k +optimization.update_freq='[x]' where x = 16/k
+# For training with fewer number of hypotheses, set
+# task.mt_beam=N dataset.batch_size=N dataset.required_batch_size_multiple=N
+
+fairseq-hydra-train -m \
+    --config-dir config/ --config-name deen \
+    task.data=${OUTPUT_DIR}/$METRIC/split1/ \
+    task.num_data_splits=${NUM_SHARDS} \
+    model.pretrained_model=${XLMR_DIR}/model.pt \
+    common.user_dir=${FAIRSEQ_ROOT}/examples/discriminative_reranking_nmt \
+    checkpoint.save_dir=${EXP_DIR}
+
+```
+
+## Inference & scoring
+Perform DrNMT reranking (fw + reranker score)
+1. Tune weights on valid sets.
+```
+# genrate N hypotheses with the base MT model (fw score)
+VALID_SOURCE_FILE=/path/to/source_sentences # one sentence per line, converted to the sentencepiece used by the base MT model
+VALID_TARGET_FILE=/path/to/target_sentences # one sentence per line in raw text, i.e. no sentencepiece and tokenization
+MT_MODEL=/path/to/mt_model
+MT_DATA_PATH=/path/to/mt_data
+
+cat ${VALID_SOURCE_FILE} | \
+    fairseq-interactive ${MT_DATA_PATH} \
+    --max-tokens 4000 --buffer-size 16 \
+    --num-workers 32 --path ${MT_MODEL} \
+    --beam $N --nbest $N \
+    --post-process sentencepiece &> valid-hypo.out
+
+# replace "bleu" with "ter" to optimize for TER
+python drnmt_rerank.py \
+    ${OUTPUT_DIR}/$METRIC/split1/ \
+    --path ${EXP_DIR}/checkpoint_best.pt \
+    --in-text valid-hypo.out \
+    --results-path ${EXP_DIR} \
+    --gen-subset valid \
+    --target-text ${VALID_TARGET_FILE} \
+    --user-dir ${FAIRSEQ_ROOT}/examples/discriminative_reranking_nmt \
+    --bpe sentencepiece \
+    --sentencepiece-model ${XLMR_DIR}/sentencepiece.bpe.model \
+    --beam $N \
+    --batch-size $N \
+    --metric bleu \
+    --tune
+
+```
+
+2. Apply best weights on test sets
+```
+# genrate N hypotheses with the base MT model (fw score)
+TEST_SOURCE_FILE=/path/to/source_sentences  # one sentence per line, converted to the sentencepiece used by the base MT model
+
+cat ${TEST_SOURCE_FILE} | \
+    fairseq-interactive ${MT_DATA_PATH} \
+    --max-tokens 4000 --buffer-size 16 \
+    --num-workers 32 --path ${MT_MODEL} \
+    --beam $N --nbest $N \
+    --post-process sentencepiece &> test-hypo.out
+
+# replace "bleu" with "ter" to evaluate TER
+# Add --target-text for evaluating BLEU/TER,
+# otherwise the script will only generate the hypotheses with the highest scores only.
+python drnmt_rerank.py \
+    ${OUTPUT_DIR}/$METRIC/split1/ \
+    --path ${EXP_DIR}/checkpoint_best.pt \
+    --in-text test-hypo.out \
+    --results-path ${EXP_DIR} \
+    --gen-subset test \
+    --user-dir ${FAIRSEQ_ROOT}/examples/discriminative_reranking_nmt \
+    --bpe sentencepiece \
+    --sentencepiece-model ${XLMR_DIR}/sentencepiece.bpe.model \
+    --beam $N \
+    --batch-size $N \
+    --metric bleu \
+    --fw-weight ${BEST_FW_WEIGHT} \
+    --lenpen ${BEST_LENPEN}
+```
+
+## Citation
+```bibtex
+@inproceedings{lee2021discriminative,
+  title={Discriminative Reranking for Neural Machine Translation},
+  author={Lee, Ann and Auli, Michael and Ranzato, Marc'Aurelio},
+  booktitle={ACL},
+  year={2021}
+}
+```
diff --git a/fairseq/examples/discriminative_reranking_nmt/__init__.py b/fairseq/examples/discriminative_reranking_nmt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0278f6a27340c7ff7e207d09348483d1b0d3a100
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/__init__.py
@@ -0,0 +1 @@
+from . import criterions, models, tasks  # noqa
diff --git a/fairseq/examples/discriminative_reranking_nmt/config/deen.yaml b/fairseq/examples/discriminative_reranking_nmt/config/deen.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3fc2d5fcf5bacbb842d181fcfcde80e55331fed7
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/config/deen.yaml
@@ -0,0 +1,56 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 50
+  seed: 2
+
+checkpoint:
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: bleu
+  maximize_best_checkpoint_metric: true
+
+task:
+  _name: discriminative_reranking_nmt
+  data: ???
+  num_data_splits: ???
+  include_src: true
+  mt_beam: 50
+  eval_target_metric: true
+  target_metric: bleu
+
+dataset:
+  batch_size: 50
+  num_workers: 6
+  required_batch_size_multiple: 50
+  valid_subset: ???
+
+criterion:
+  _name: kl_divergence_rereanking
+  target_dist_norm: minmax
+  temperature: 0.5
+
+optimization:
+  max_epoch: 200
+  lr: [0.00005]
+  update_freq: [32]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 8000
+  total_num_update: 320000
+
+model:
+  _name: discriminative_nmt_reranker
+  pretrained_model: ???
+  classifier_dropout: 0.2
+
+distributed_training:
+  ddp_backend: no_c10d
+  distributed_world_size: 16
diff --git a/fairseq/examples/discriminative_reranking_nmt/criterions/__init__.py b/fairseq/examples/discriminative_reranking_nmt/criterions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c257c2700f015cb123a976584aef72f0429eb0c
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/criterions/__init__.py
@@ -0,0 +1,6 @@
+from .discriminative_reranking_criterion import KLDivergenceRerankingCriterion
+
+
+__all__ = [
+    "KLDivergenceRerankingCriterion",
+]
diff --git a/fairseq/examples/discriminative_reranking_nmt/criterions/discriminative_reranking_criterion.py b/fairseq/examples/discriminative_reranking_nmt/criterions/discriminative_reranking_criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b02ce18772454697e61f827d96d76ad361b9cd1
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/criterions/discriminative_reranking_criterion.py
@@ -0,0 +1,138 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+
+import torch
+import torch.nn.functional as F
+
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+
+
+_EPSILON = torch.finfo(torch.float32).eps
+TARGET_DIST_NORM_CHOICES = ChoiceEnum(["none", "minmax"])
+
+
+@dataclass
+class KLDivergenceRerankingCriterionConfig(FairseqDataclass):
+    target_dist_norm: TARGET_DIST_NORM_CHOICES = field(
+        default="none",
+        metadata={"help": "method to normalize the range of target scores"},
+    )
+    temperature: float = field(
+        default=1.0,
+        metadata={"help": "temperature in softmax for target distributions"},
+    )
+    forward_batch_size: int = field(
+        default=32,
+        metadata={
+            "help": "number of hypotheses per batch for model forward (set a value smaller than --mt-beam to avoid OOM when training with a large beam size)"
+        },
+    )
+
+
+@register_criterion(
+    "kl_divergence_rereanking", dataclass=KLDivergenceRerankingCriterionConfig
+)
+class KLDivergenceRerankingCriterion(FairseqCriterion):
+    def __init__(
+        self, task, target_dist_norm, temperature, forward_batch_size,
+    ):
+        super().__init__(task)
+        self.target_dist_norm = target_dist_norm
+        self.temperature = temperature
+        self.forward_batch_size = forward_batch_size
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+
+        sample_size = sample["id"].numel()
+        assert sample_size % self.task.cfg.mt_beam == 0, (
+            f"sample_size ({sample_size}) cannot be divided by beam size ({self.task.cfg.mt_beam})."
+            f"Please set --required-batch-size-multiple={self.task.cfg.mt_beam}."
+        )
+
+        # split into smaller batches for model forward
+        batch_out = []
+        for i in range(0, sample_size, self.forward_batch_size):
+            j = min(i + self.forward_batch_size, sample_size)
+
+            out = model(
+                src_tokens=sample["net_input"]["src_tokens"][i:j, :],
+                src_lengths=sample["net_input"]["src_lengths"][i:j],
+            )
+
+            batch_out.append(
+                model.sentence_forward(out, sample["net_input"]["src_tokens"][i:j, :])
+            )
+
+        batch_out = torch.cat(batch_out, dim=0).view(
+            self.task.cfg.mt_beam, sample_size // self.task.cfg.mt_beam, -1
+        )  # T x B x C
+        if model.joint_classification == "sent":
+            batch_out = model.joint_forward(batch_out)
+        scores = model.classification_forward(batch_out.view(sample_size, 1, -1)).view(
+            -1, self.task.cfg.mt_beam
+        )  # input: B x T x C
+
+        loss = self.compute_kl_loss(
+            scores, sample["target"][:, 0].view(-1, self.task.cfg.mt_beam)
+        )
+
+        sample_size = sample_size // self.task.cfg.mt_beam
+
+        logging_output = {
+            "loss": loss.detach(),
+            "ntokens": sample["ntokens"],
+            "nsentences": sample_size * self.task.cfg.mt_beam,
+            "sample_size": sample_size,
+            "scores": scores.detach(),
+        }
+
+        return loss, sample_size, logging_output
+
+    def compute_kl_loss(self, logits, target):
+        norm_target = target
+        if self.target_dist_norm == "minmax":
+            min_v = torch.min(target, 1, keepdim=True).values
+            max_v = torch.max(target, 1, keepdim=True).values
+            norm_target = (target - min_v) / (max_v - min_v + _EPSILON)
+
+        target_dist = F.softmax(
+            norm_target / self.temperature, dim=-1, dtype=torch.float32
+        )
+        model_dist = F.log_softmax(logits, dim=-1, dtype=torch.float32)
+        loss = -(target_dist * model_dist - target_dist * target_dist.log()).sum()
+        return loss
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+
+        loss = loss_sum / sample_size / math.log(2)
+        metrics.log_scalar("loss", loss, sample_size, round=3)
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/examples/discriminative_reranking_nmt/drnmt_rerank.py b/fairseq/examples/discriminative_reranking_nmt/drnmt_rerank.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e0fc2bd29aedb0b477b7cc8e2c3b606acdd454a
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/drnmt_rerank.py
@@ -0,0 +1,364 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Score raw text with a trained model.
+"""
+
+from collections import namedtuple
+import logging
+from multiprocessing import Pool
+import sys
+import os
+import random
+
+import numpy as np
+import sacrebleu
+import torch
+
+from fairseq import checkpoint_utils, options, utils
+
+
+logger = logging.getLogger("fairseq_cli.drnmt_rerank")
+logger.setLevel(logging.INFO)
+
+Batch = namedtuple("Batch", "ids src_tokens src_lengths")
+
+
+pool_init_variables = {}
+
+
+def init_loaded_scores(mt_scores, model_scores, hyp, ref):
+    global pool_init_variables
+    pool_init_variables["mt_scores"] = mt_scores
+    pool_init_variables["model_scores"] = model_scores
+    pool_init_variables["hyp"] = hyp
+    pool_init_variables["ref"] = ref
+
+
+def parse_fairseq_gen(filename, task):
+    source = {}
+    hypos = {}
+    scores = {}
+    with open(filename, "r", encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if line.startswith("S-"):  # source
+                uid, text = line.split("\t", 1)
+                uid = int(uid[2:])
+                source[uid] = text
+            elif line.startswith("D-"):  # hypo
+                uid, score, text = line.split("\t", 2)
+                uid = int(uid[2:])
+                if uid not in hypos:
+                    hypos[uid] = []
+                    scores[uid] = []
+                hypos[uid].append(text)
+                scores[uid].append(float(score))
+            else:
+                continue
+
+    source_out = [source[i] for i in range(len(hypos))]
+    hypos_out = [h for i in range(len(hypos)) for h in hypos[i]]
+    scores_out = [s for i in range(len(scores)) for s in scores[i]]
+
+    return source_out, hypos_out, scores_out
+
+
+def read_target(filename):
+    with open(filename, "r", encoding="utf-8") as f:
+        output = [line.strip() for line in f]
+    return output
+
+
+def make_batches(args, src, hyp, task, max_positions, encode_fn):
+    assert len(src) * args.beam == len(
+        hyp
+    ), f"Expect {len(src) * args.beam} hypotheses for {len(src)} source sentences with beam size {args.beam}. Got {len(hyp)} hypotheses intead."
+    hyp_encode = [
+        task.source_dictionary.encode_line(encode_fn(h), add_if_not_exist=False).long()
+        for h in hyp
+    ]
+    if task.cfg.include_src:
+        src_encode = [
+            task.source_dictionary.encode_line(
+                encode_fn(s), add_if_not_exist=False
+            ).long()
+            for s in src
+        ]
+        tokens = [(src_encode[i // args.beam], h) for i, h in enumerate(hyp_encode)]
+        lengths = [(t1.numel(), t2.numel()) for t1, t2 in tokens]
+    else:
+        tokens = [(h,) for h in hyp_encode]
+        lengths = [(h.numel(),) for h in hyp_encode]
+
+    itr = task.get_batch_iterator(
+        dataset=task.build_dataset_for_inference(tokens, lengths),
+        max_tokens=args.max_tokens,
+        max_sentences=args.batch_size,
+        max_positions=max_positions,
+        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
+    ).next_epoch_itr(shuffle=False)
+
+    for batch in itr:
+        yield Batch(
+            ids=batch["id"],
+            src_tokens=batch["net_input"]["src_tokens"],
+            src_lengths=batch["net_input"]["src_lengths"],
+        )
+
+
+def decode_rerank_scores(args):
+    if args.max_tokens is None and args.batch_size is None:
+        args.batch_size = 1
+
+    logger.info(args)
+
+    use_cuda = torch.cuda.is_available() and not args.cpu
+
+    # Load ensemble
+    logger.info("loading model(s) from {}".format(args.path))
+    models, _model_args, task = checkpoint_utils.load_model_ensemble_and_task(
+        [args.path], arg_overrides=eval(args.model_overrides),
+    )
+
+    for model in models:
+        if args.fp16:
+            model.half()
+        if use_cuda:
+            model.cuda()
+
+    # Initialize generator
+    generator = task.build_generator(args)
+
+    # Handle tokenization and BPE
+    tokenizer = task.build_tokenizer(args)
+    bpe = task.build_bpe(args)
+
+    def encode_fn(x):
+        if tokenizer is not None:
+            x = tokenizer.encode(x)
+        if bpe is not None:
+            x = bpe.encode(x)
+        return x
+
+    max_positions = utils.resolve_max_positions(
+        task.max_positions(), *[model.max_positions() for model in models]
+    )
+
+    src, hyp, mt_scores = parse_fairseq_gen(args.in_text, task)
+    model_scores = {}
+    logger.info("decode reranker score")
+    for batch in make_batches(args, src, hyp, task, max_positions, encode_fn):
+        src_tokens = batch.src_tokens
+        src_lengths = batch.src_lengths
+        if use_cuda:
+            src_tokens = src_tokens.cuda()
+            src_lengths = src_lengths.cuda()
+
+        sample = {
+            "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths},
+        }
+        scores = task.inference_step(generator, models, sample)
+
+        for id, sc in zip(batch.ids.tolist(), scores.tolist()):
+            model_scores[id] = sc[0]
+
+    model_scores = [model_scores[i] for i in range(len(model_scores))]
+
+    return src, hyp, mt_scores, model_scores
+
+
+def get_score(mt_s, md_s, w1, lp, tgt_len):
+    return mt_s / (tgt_len ** lp) * w1 + md_s
+
+
+def get_best_hyps(mt_scores, md_scores, hypos, fw_weight, lenpen, beam):
+    assert len(mt_scores) == len(md_scores) and len(mt_scores) == len(hypos)
+    hypo_scores = []
+    best_hypos = []
+    best_scores = []
+    offset = 0
+    for i in range(len(hypos)):
+        tgt_len = len(hypos[i].split())
+        hypo_scores.append(
+            get_score(mt_scores[i], md_scores[i], fw_weight, lenpen, tgt_len)
+        )
+
+        if (i + 1) % beam == 0:
+            max_i = np.argmax(hypo_scores)
+            best_hypos.append(hypos[offset + max_i])
+            best_scores.append(hypo_scores[max_i])
+            hypo_scores = []
+            offset += beam
+    return best_hypos, best_scores
+
+
+def eval_metric(args, hypos, ref):
+    if args.metric == "bleu":
+        score = sacrebleu.corpus_bleu(hypos, [ref]).score
+    else:
+        score = sacrebleu.corpus_ter(hypos, [ref]).score
+
+    return score
+
+
+def score_target_hypo(args, fw_weight, lp):
+    mt_scores = pool_init_variables["mt_scores"]
+    model_scores = pool_init_variables["model_scores"]
+    hyp = pool_init_variables["hyp"]
+    ref = pool_init_variables["ref"]
+    best_hypos, _ = get_best_hyps(
+        mt_scores, model_scores, hyp, fw_weight, lp, args.beam
+    )
+    rerank_eval = None
+    if ref:
+        rerank_eval = eval_metric(args, best_hypos, ref)
+        print(f"fw_weight {fw_weight}, lenpen {lp}, eval {rerank_eval}")
+
+    return rerank_eval
+
+
+def print_result(best_scores, best_hypos, output_file):
+    for i, (s, h) in enumerate(zip(best_scores, best_hypos)):
+        print(f"{i}\t{s}\t{h}", file=output_file)
+
+
+def main(args):
+    utils.import_user_module(args)
+
+    src, hyp, mt_scores, model_scores = decode_rerank_scores(args)
+
+    assert (
+        not args.tune or args.target_text is not None
+    ), "--target-text has to be set when tuning weights"
+    if args.target_text:
+        ref = read_target(args.target_text)
+        assert len(src) == len(
+            ref
+        ), f"different numbers of source and target sentences ({len(src)} vs. {len(ref)})"
+
+        orig_best_hypos = [hyp[i] for i in range(0, len(hyp), args.beam)]
+        orig_eval = eval_metric(args, orig_best_hypos, ref)
+
+    if args.tune:
+        logger.info("tune weights for reranking")
+
+        random_params = np.array(
+            [
+                [
+                    random.uniform(
+                        args.lower_bound_fw_weight, args.upper_bound_fw_weight
+                    ),
+                    random.uniform(args.lower_bound_lenpen, args.upper_bound_lenpen),
+                ]
+                for k in range(args.num_trials)
+            ]
+        )
+
+        logger.info("launching pool")
+        with Pool(
+            32,
+            initializer=init_loaded_scores,
+            initargs=(mt_scores, model_scores, hyp, ref),
+        ) as p:
+            rerank_scores = p.starmap(
+                score_target_hypo,
+                [
+                    (args, random_params[i][0], random_params[i][1],)
+                    for i in range(args.num_trials)
+                ],
+            )
+        if args.metric == "bleu":
+            best_index = np.argmax(rerank_scores)
+        else:
+            best_index = np.argmin(rerank_scores)
+        best_fw_weight = random_params[best_index][0]
+        best_lenpen = random_params[best_index][1]
+    else:
+        assert (
+            args.lenpen is not None and args.fw_weight is not None
+        ), "--lenpen and --fw-weight should be set"
+        best_fw_weight, best_lenpen = args.fw_weight, args.lenpen
+
+    best_hypos, best_scores = get_best_hyps(
+        mt_scores, model_scores, hyp, best_fw_weight, best_lenpen, args.beam
+    )
+
+    if args.results_path is not None:
+        os.makedirs(args.results_path, exist_ok=True)
+        output_path = os.path.join(
+            args.results_path, "generate-{}.txt".format(args.gen_subset),
+        )
+        with open(output_path, "w", buffering=1, encoding="utf-8") as o:
+            print_result(best_scores, best_hypos, o)
+    else:
+        print_result(best_scores, best_hypos, sys.stdout)
+
+    if args.target_text:
+        rerank_eval = eval_metric(args, best_hypos, ref)
+        print(f"before reranking, {args.metric.upper()}:", orig_eval)
+        print(
+            f"after reranking with fw_weight={best_fw_weight}, lenpen={best_lenpen}, {args.metric.upper()}:",
+            rerank_eval,
+        )
+
+
+def cli_main():
+    parser = options.get_generation_parser(interactive=True)
+
+    parser.add_argument(
+        "--in-text",
+        default=None,
+        required=True,
+        help="text from fairseq-interactive output, containing source sentences and hypotheses",
+    )
+    parser.add_argument("--target-text", default=None, help="reference text")
+    parser.add_argument("--metric", type=str, choices=["bleu", "ter"], default="bleu")
+    parser.add_argument(
+        "--tune",
+        action="store_true",
+        help="if set, tune weights on fw scores and lenpen instead of applying fixed weights for reranking",
+    )
+    parser.add_argument(
+        "--lower-bound-fw-weight",
+        default=0.0,
+        type=float,
+        help="lower bound of search space",
+    )
+    parser.add_argument(
+        "--upper-bound-fw-weight",
+        default=3,
+        type=float,
+        help="upper bound of search space",
+    )
+    parser.add_argument(
+        "--lower-bound-lenpen",
+        default=0.0,
+        type=float,
+        help="lower bound of search space",
+    )
+    parser.add_argument(
+        "--upper-bound-lenpen",
+        default=3,
+        type=float,
+        help="upper bound of search space",
+    )
+    parser.add_argument(
+        "--fw-weight", type=float, default=None, help="weight on the fw model score"
+    )
+    parser.add_argument(
+        "--num-trials",
+        default=1000,
+        type=int,
+        help="number of trials to do for random search",
+    )
+
+    args = options.parse_args_and_arch(parser)
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/discriminative_reranking_nmt/models/__init__.py b/fairseq/examples/discriminative_reranking_nmt/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c593ea5f1842794bfcc952fc93c679a5f16aeb98
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/models/__init__.py
@@ -0,0 +1,6 @@
+from .discriminative_reranking_model import DiscriminativeNMTReranker
+
+
+__all__ = [
+    "DiscriminativeNMTReranker",
+]
diff --git a/fairseq/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py b/fairseq/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4b5887f825df36f4e1e0384f38fefe790e485e6
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py
@@ -0,0 +1,365 @@
+from dataclasses import dataclass, field
+import os
+
+import torch
+import torch.nn as nn
+
+from fairseq import utils
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.models import (
+    BaseFairseqModel,
+    register_model,
+)
+
+from fairseq.models.roberta.model import RobertaClassificationHead
+
+from fairseq.modules import (
+    LayerNorm,
+    TransformerSentenceEncoder,
+    TransformerSentenceEncoderLayer,
+)
+
+
+ACTIVATION_FN_CHOICES = ChoiceEnum(utils.get_available_activation_fns())
+JOINT_CLASSIFICATION_CHOICES = ChoiceEnum(["none", "sent"])
+SENTENCE_REP_CHOICES = ChoiceEnum(["head", "meanpool", "maxpool"])
+
+
+def update_init_roberta_model_state(state):
+    """
+   update the state_dict of a Roberta model for initializing
+   weights of the BertRanker
+   """
+    for k in list(state.keys()):
+        if ".lm_head." in k or "version" in k:
+            del state[k]
+            continue
+        # remove 'encoder/decoder.sentence_encoder.' from the key
+        assert k.startswith("encoder.sentence_encoder.") or k.startswith(
+            "decoder.sentence_encoder."
+        ), f"Cannot recognize parameter name {k}"
+        if "layernorm_embedding" in k:
+            new_k = k.replace(".layernorm_embedding.", ".emb_layer_norm.")
+            state[new_k[25:]] = state[k]
+        else:
+            state[k[25:]] = state[k]
+        del state[k]
+
+
+class BaseRanker(nn.Module):
+    def __init__(self, args, task):
+        super().__init__()
+
+        self.separator_token = task.dictionary.eos()
+        self.padding_idx = task.dictionary.pad()
+
+    def forward(self, src_tokens):
+        raise NotImplementedError
+
+    def get_segment_labels(self, src_tokens):
+        segment_boundary = (src_tokens == self.separator_token).long()
+        segment_labels = (
+            segment_boundary.cumsum(dim=1)
+            - segment_boundary
+            - (src_tokens == self.padding_idx).long()
+        )
+
+        return segment_labels
+
+    def get_positions(self, src_tokens, segment_labels):
+        segment_positions = (
+            torch.arange(src_tokens.shape[1])
+            .to(src_tokens.device)
+            .repeat(src_tokens.shape[0], 1)
+        )
+        segment_boundary = (src_tokens == self.separator_token).long()
+        _, col_idx = (segment_positions * segment_boundary).nonzero(as_tuple=True)
+        col_idx = torch.cat([torch.zeros(1).type_as(col_idx), col_idx])
+        offset = torch.cat(
+            [
+                torch.zeros(1).type_as(segment_boundary),
+                segment_boundary.sum(dim=1).cumsum(dim=0)[:-1],
+            ]
+        )
+        segment_positions -= col_idx[segment_labels + offset.unsqueeze(1)] * (
+            segment_labels != 0
+        )
+
+        padding_mask = src_tokens.ne(self.padding_idx)
+        segment_positions = (segment_positions + 1) * padding_mask.type_as(
+            segment_positions
+        ) + self.padding_idx
+
+        return segment_positions
+
+
+class BertRanker(BaseRanker):
+    def __init__(self, args, task):
+        super(BertRanker, self).__init__(args, task)
+
+        init_model = getattr(args, "pretrained_model", "")
+        self.joint_layers = nn.ModuleList()
+        if os.path.isfile(init_model):
+            print(f"initialize weight from {init_model}")
+
+            from fairseq import hub_utils
+
+            x = hub_utils.from_pretrained(
+                os.path.dirname(init_model),
+                checkpoint_file=os.path.basename(init_model),
+            )
+
+            in_state_dict = x["models"][0].state_dict()
+            init_args = x["args"].model
+
+            num_positional_emb = init_args.max_positions + task.dictionary.pad() + 1
+
+            # follow the setup in roberta
+            self.model = TransformerSentenceEncoder(
+                padding_idx=task.dictionary.pad(),
+                vocab_size=len(task.dictionary),
+                num_encoder_layers=getattr(
+                    args, "encoder_layers", init_args.encoder_layers
+                ),
+                embedding_dim=init_args.encoder_embed_dim,
+                ffn_embedding_dim=init_args.encoder_ffn_embed_dim,
+                num_attention_heads=init_args.encoder_attention_heads,
+                dropout=init_args.dropout,
+                attention_dropout=init_args.attention_dropout,
+                activation_dropout=init_args.activation_dropout,
+                num_segments=2,  # add language embeddings
+                max_seq_len=num_positional_emb,
+                offset_positions_by_padding=False,
+                encoder_normalize_before=True,
+                apply_bert_init=True,
+                activation_fn=init_args.activation_fn,
+                freeze_embeddings=args.freeze_embeddings,
+                n_trans_layers_to_freeze=args.n_trans_layers_to_freeze,
+            )
+
+            # still need to learn segment embeddings as we added a second language embedding
+            if args.freeze_embeddings:
+                for p in self.model.segment_embeddings.parameters():
+                    p.requires_grad = False
+
+            update_init_roberta_model_state(in_state_dict)
+            print("loading weights from the pretrained model")
+            self.model.load_state_dict(
+                in_state_dict, strict=False
+            )  # ignore mismatch in language embeddings
+
+            ffn_embedding_dim = init_args.encoder_ffn_embed_dim
+            num_attention_heads = init_args.encoder_attention_heads
+            dropout = init_args.dropout
+            attention_dropout = init_args.attention_dropout
+            activation_dropout = init_args.activation_dropout
+            activation_fn = init_args.activation_fn
+
+            classifier_embed_dim = getattr(
+                args, "embed_dim", init_args.encoder_embed_dim
+            )
+            if classifier_embed_dim != init_args.encoder_embed_dim:
+                self.transform_layer = nn.Linear(
+                    init_args.encoder_embed_dim, classifier_embed_dim
+                )
+        else:
+            self.model = TransformerSentenceEncoder(
+                padding_idx=task.dictionary.pad(),
+                vocab_size=len(task.dictionary),
+                num_encoder_layers=args.encoder_layers,
+                embedding_dim=args.embed_dim,
+                ffn_embedding_dim=args.ffn_embed_dim,
+                num_attention_heads=args.attention_heads,
+                dropout=args.dropout,
+                attention_dropout=args.attention_dropout,
+                activation_dropout=args.activation_dropout,
+                max_seq_len=task.max_positions()
+                if task.max_positions()
+                else args.tokens_per_sample,
+                num_segments=2,
+                offset_positions_by_padding=False,
+                encoder_normalize_before=args.encoder_normalize_before,
+                apply_bert_init=args.apply_bert_init,
+                activation_fn=args.activation_fn,
+            )
+
+            classifier_embed_dim = args.embed_dim
+            ffn_embedding_dim = args.ffn_embed_dim
+            num_attention_heads = args.attention_heads
+            dropout = args.dropout
+            attention_dropout = args.attention_dropout
+            activation_dropout = args.activation_dropout
+            activation_fn = args.activation_fn
+
+        self.joint_classification = args.joint_classification
+        if args.joint_classification == "sent":
+            if args.joint_normalize_before:
+                self.joint_layer_norm = LayerNorm(classifier_embed_dim)
+            else:
+                self.joint_layer_norm = None
+
+            self.joint_layers = nn.ModuleList(
+                [
+                    TransformerSentenceEncoderLayer(
+                        embedding_dim=classifier_embed_dim,
+                        ffn_embedding_dim=ffn_embedding_dim,
+                        num_attention_heads=num_attention_heads,
+                        dropout=dropout,
+                        attention_dropout=attention_dropout,
+                        activation_dropout=activation_dropout,
+                        activation_fn=activation_fn,
+                    )
+                    for _ in range(args.num_joint_layers)
+                ]
+            )
+
+        self.classifier = RobertaClassificationHead(
+            classifier_embed_dim,
+            classifier_embed_dim,
+            1,  # num_classes
+            "tanh",
+            args.classifier_dropout,
+        )
+
+    def forward(self, src_tokens, src_lengths):
+        segment_labels = self.get_segment_labels(src_tokens)
+        positions = self.get_positions(src_tokens, segment_labels)
+
+        inner_states, _ = self.model(
+            tokens=src_tokens,
+            segment_labels=segment_labels,
+            last_state_only=True,
+            positions=positions,
+        )
+
+        return inner_states[-1].transpose(0, 1)  # T x B x C -> B x T x C
+
+    def sentence_forward(self, encoder_out, src_tokens=None, sentence_rep="head"):
+        # encoder_out: B x T x C
+        if sentence_rep == "head":
+            x = encoder_out[:, :1, :]
+        else:  # 'meanpool', 'maxpool'
+            assert src_tokens is not None, "meanpool requires src_tokens input"
+            segment_labels = self.get_segment_labels(src_tokens)
+            padding_mask = src_tokens.ne(self.padding_idx)
+            encoder_mask = segment_labels * padding_mask.type_as(segment_labels)
+
+            if sentence_rep == "meanpool":
+                ntokens = torch.sum(encoder_mask, dim=1, keepdim=True)
+                x = torch.sum(
+                    encoder_out * encoder_mask.unsqueeze(2), dim=1, keepdim=True
+                ) / ntokens.unsqueeze(2).type_as(encoder_out)
+            else:  # 'maxpool'
+                encoder_out[
+                    (encoder_mask == 0).unsqueeze(2).repeat(1, 1, encoder_out.shape[-1])
+                ] = -float("inf")
+                x, _ = torch.max(encoder_out, dim=1, keepdim=True)
+
+        if hasattr(self, "transform_layer"):
+            x = self.transform_layer(x)
+
+        return x  # B x 1 x C
+
+    def joint_forward(self, x):
+        # x: T x B x C
+        if self.joint_layer_norm:
+            x = self.joint_layer_norm(x.transpose(0, 1))
+            x = x.transpose(0, 1)
+
+        for layer in self.joint_layers:
+            x, _ = layer(x, self_attn_padding_mask=None)
+        return x
+
+    def classification_forward(self, x):
+        # x: B x T x C
+        return self.classifier(x)
+
+
+@dataclass
+class DiscriminativeNMTRerankerConfig(FairseqDataclass):
+    pretrained_model: str = field(
+        default="", metadata={"help": "pretrained model to load"}
+    )
+    sentence_rep: SENTENCE_REP_CHOICES = field(
+        default="head",
+        metadata={
+            "help": "method to transform the output of the transformer stack to a sentence-level representation"
+        },
+    )
+
+    dropout: float = field(default=0.1, metadata={"help": "dropout probability"})
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability for attention weights"}
+    )
+    activation_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability after activation in FFN"}
+    )
+    classifier_dropout: float = field(
+        default=0.0, metadata={"help": "classifier dropout probability"}
+    )
+    embed_dim: int = field(default=768, metadata={"help": "embedding dimension"})
+    ffn_embed_dim: int = field(
+        default=2048, metadata={"help": "embedding dimension for FFN"}
+    )
+    encoder_layers: int = field(default=12, metadata={"help": "num encoder layers"})
+    attention_heads: int = field(default=8, metadata={"help": "num attention heads"})
+    encoder_normalize_before: bool = field(
+        default=False, metadata={"help": "apply layernorm before each encoder block"}
+    )
+    apply_bert_init: bool = field(
+        default=False, metadata={"help": "use custom param initialization for BERT"}
+    )
+    activation_fn: ACTIVATION_FN_CHOICES = field(
+        default="relu", metadata={"help": "activation function to use"}
+    )
+    freeze_embeddings: bool = field(
+        default=False, metadata={"help": "freeze embeddings in the pretrained model"}
+    )
+    n_trans_layers_to_freeze: int = field(
+        default=0,
+        metadata={
+            "help": "number of layers to freeze in the pretrained transformer model"
+        },
+    )
+
+    # joint classfication
+    joint_classification: JOINT_CLASSIFICATION_CHOICES = field(
+        default="none",
+        metadata={"help": "method to compute joint features for classification"},
+    )
+    num_joint_layers: int = field(
+        default=1, metadata={"help": "number of joint layers"}
+    )
+    joint_normalize_before: bool = field(
+        default=False,
+        metadata={"help": "apply layer norm on the input to the joint layer"},
+    )
+
+
+@register_model(
+    "discriminative_nmt_reranker", dataclass=DiscriminativeNMTRerankerConfig
+)
+class DiscriminativeNMTReranker(BaseFairseqModel):
+    @classmethod
+    def build_model(cls, args, task):
+        model = BertRanker(args, task)
+        return DiscriminativeNMTReranker(args, model)
+
+    def __init__(self, args, model):
+        super().__init__()
+
+        self.model = model
+        self.sentence_rep = args.sentence_rep
+        self.joint_classification = args.joint_classification
+
+    def forward(self, src_tokens, src_lengths, **kwargs):
+        return self.model(src_tokens, src_lengths)
+
+    def sentence_forward(self, encoder_out, src_tokens):
+        return self.model.sentence_forward(encoder_out, src_tokens, self.sentence_rep)
+
+    def joint_forward(self, x):
+        return self.model.joint_forward(x)
+
+    def classification_forward(self, x):
+        return self.model.classification_forward(x)
diff --git a/fairseq/examples/discriminative_reranking_nmt/scripts/prep_data.py b/fairseq/examples/discriminative_reranking_nmt/scripts/prep_data.py
new file mode 100755
index 0000000000000000000000000000000000000000..7aa7d37edc2c3e4c1d293911b753abf2ef597a7e
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/scripts/prep_data.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+import argparse
+from multiprocessing import Pool
+from pathlib import Path
+
+import sacrebleu
+import sentencepiece as spm
+
+
+def read_text_file(filename):
+    with open(filename, "r") as f:
+        output = [line.strip() for line in f]
+
+    return output
+
+
+def get_bleu(in_sent, target_sent):
+    bleu = sacrebleu.corpus_bleu([in_sent], [[target_sent]])
+    out = " ".join(
+        map(str, [bleu.score, bleu.sys_len, bleu.ref_len] + bleu.counts + bleu.totals)
+    )
+    return out
+
+
+def get_ter(in_sent, target_sent):
+    ter = sacrebleu.corpus_ter([in_sent], [[target_sent]])
+    out = " ".join(map(str, [ter.score, ter.num_edits, ter.ref_length]))
+    return out
+
+
+def init(sp_model):
+    global sp
+    sp = spm.SentencePieceProcessor()
+    sp.Load(sp_model)
+
+
+def process(source_sent, target_sent, hypo_sent, metric):
+    source_bpe = " ".join(sp.EncodeAsPieces(source_sent))
+    hypo_bpe = [" ".join(sp.EncodeAsPieces(h)) for h in hypo_sent]
+
+    if metric == "bleu":
+        score_str = [get_bleu(h, target_sent) for h in hypo_sent]
+    else:  # ter
+        score_str = [get_ter(h, target_sent) for h in hypo_sent]
+
+    return source_bpe, hypo_bpe, score_str
+
+
+def main(args):
+    assert (
+        args.split.startswith("train") or args.num_shards == 1
+    ), "--num-shards should be set to 1 for valid and test sets"
+    assert (
+        args.split.startswith("train")
+        or args.split.startswith("valid")
+        or args.split.startswith("test")
+    ), "--split should be set to train[n]/valid[n]/test[n]"
+
+    source_sents = read_text_file(args.input_source)
+    target_sents = read_text_file(args.input_target)
+
+    num_sents = len(source_sents)
+    assert num_sents == len(
+        target_sents
+    ), f"{args.input_source} and {args.input_target} should have the same number of sentences."
+
+    hypo_sents = read_text_file(args.input_hypo)
+    assert (
+        len(hypo_sents) % args.beam == 0
+    ), f"Number of hypotheses ({len(hypo_sents)}) cannot be divided by beam size ({args.beam})."
+
+    hypo_sents = [
+        hypo_sents[i : i + args.beam] for i in range(0, len(hypo_sents), args.beam)
+    ]
+    assert num_sents == len(
+        hypo_sents
+    ), f"{args.input_hypo} should contain {num_sents * args.beam} hypotheses but only has {len(hypo_sents) * args.beam}. (--beam={args.beam})"
+
+    output_dir = args.output_dir / args.metric
+    for ns in range(args.num_shards):
+        print(f"processing shard {ns+1}/{args.num_shards}")
+        shard_output_dir = output_dir / f"split{ns+1}"
+        source_output_dir = shard_output_dir / "input_src"
+        hypo_output_dir = shard_output_dir / "input_tgt"
+        metric_output_dir = shard_output_dir / args.metric
+
+        source_output_dir.mkdir(parents=True, exist_ok=True)
+        hypo_output_dir.mkdir(parents=True, exist_ok=True)
+        metric_output_dir.mkdir(parents=True, exist_ok=True)
+
+        if args.n_proc > 1:
+            with Pool(
+                args.n_proc, initializer=init, initargs=(args.sentencepiece_model,)
+            ) as p:
+                output = p.starmap(
+                    process,
+                    [
+                        (source_sents[i], target_sents[i], hypo_sents[i], args.metric)
+                        for i in range(ns, num_sents, args.num_shards)
+                    ],
+                )
+        else:
+            init(args.sentencepiece_model)
+            output = [
+                process(source_sents[i], target_sents[i], hypo_sents[i], args.metric)
+                for i in range(ns, num_sents, args.num_shards)
+            ]
+
+        with open(source_output_dir / f"{args.split}.bpe", "w") as s_o, open(
+            hypo_output_dir / f"{args.split}.bpe", "w"
+        ) as h_o, open(metric_output_dir / f"{args.split}.{args.metric}", "w") as m_o:
+            for source_bpe, hypo_bpe, score_str in output:
+                assert len(hypo_bpe) == len(score_str)
+                for h, m in zip(hypo_bpe, score_str):
+                    s_o.write(f"{source_bpe}\n")
+                    h_o.write(f"{h}\n")
+                    m_o.write(f"{m}\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input-source", type=Path, required=True)
+    parser.add_argument("--input-target", type=Path, required=True)
+    parser.add_argument("--input-hypo", type=Path, required=True)
+    parser.add_argument("--output-dir", type=Path, required=True)
+    parser.add_argument("--split", type=str, required=True)
+    parser.add_argument("--beam", type=int, required=True)
+    parser.add_argument("--sentencepiece-model", type=str, required=True)
+    parser.add_argument("--metric", type=str, choices=["bleu", "ter"], default="bleu")
+    parser.add_argument("--num-shards", type=int, default=1)
+    parser.add_argument("--n-proc", type=int, default=8)
+
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/discriminative_reranking_nmt/tasks/__init__.py b/fairseq/examples/discriminative_reranking_nmt/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d78ca98708121261aa365738a65c051b5b40626
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/tasks/__init__.py
@@ -0,0 +1,6 @@
+from .discriminative_reranking_task import DiscriminativeRerankingNMTTask
+
+
+__all__ = [
+    "DiscriminativeRerankingNMTTask",
+]
diff --git a/fairseq/examples/discriminative_reranking_nmt/tasks/discriminative_reranking_task.py b/fairseq/examples/discriminative_reranking_nmt/tasks/discriminative_reranking_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e7fbba888c1ddd118da8238d644b4ab571177ff
--- /dev/null
+++ b/fairseq/examples/discriminative_reranking_nmt/tasks/discriminative_reranking_task.py
@@ -0,0 +1,475 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+import itertools
+import logging
+import os
+
+import numpy as np
+import torch
+
+from fairseq import metrics
+from fairseq.data import (
+    ConcatDataset,
+    ConcatSentencesDataset,
+    data_utils,
+    Dictionary,
+    IdDataset,
+    indexed_dataset,
+    NestedDictionaryDataset,
+    NumSamplesDataset,
+    NumelDataset,
+    PrependTokenDataset,
+    RawLabelDataset,
+    RightPadDataset,
+    SortDataset,
+    TruncateDataset,
+    TokenBlockDataset,
+)
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.tasks import FairseqTask, register_task
+from omegaconf import II, MISSING
+
+
+EVAL_BLEU_ORDER = 4
+TARGET_METRIC_CHOICES = ChoiceEnum(["bleu", "ter"])
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class DiscriminativeRerankingNMTConfig(FairseqDataclass):
+    data: str = field(default=MISSING, metadata={"help": "path to data directory"})
+    num_data_splits: int = field(
+        default=1, metadata={"help": "total number of data splits"}
+    )
+    no_shuffle: bool = field(
+        default=False, metadata={"help": "do not shuffle training data"}
+    )
+    max_positions: int = field(
+        default=512, metadata={"help": "number of positional embeddings to learn"}
+    )
+    include_src: bool = field(
+        default=False, metadata={"help": "include source sentence"}
+    )
+    mt_beam: int = field(default=50, metadata={"help": "beam size of input hypotheses"})
+    eval_target_metric: bool = field(
+        default=False,
+        metadata={"help": "evaluation with the target metric during validation"},
+    )
+    target_metric: TARGET_METRIC_CHOICES = field(
+        default="bleu", metadata={"help": "name of the target metric to optimize for"}
+    )
+    train_subset: str = field(
+        default=II("dataset.train_subset"),
+        metadata={"help": "data subset to use for training (e.g. train, valid, test)"},
+    )
+    seed: int = field(
+        default=II("common.seed"),
+        metadata={"help": "pseudo random number generator seed"},
+    )
+
+
+class RerankerScorer(object):
+    """Scores the target for a given (source (optional), target) input."""
+
+    def __init__(self, args, mt_beam):
+        self.mt_beam = mt_beam
+
+    @torch.no_grad()
+    def generate(self, models, sample, **kwargs):
+        """Score a batch of translations."""
+        net_input = sample["net_input"]
+
+        assert len(models) == 1, "does not support model ensemble"
+        model = models[0]
+
+        bs = net_input["src_tokens"].shape[0]
+        assert (
+            model.joint_classification == "none" or bs % self.mt_beam == 0
+        ), f"invalid batch size ({bs}) for joint classification with beam size ({self.mt_beam})"
+
+        model.eval()
+        logits = model(**net_input)
+
+        batch_out = model.sentence_forward(logits, net_input["src_tokens"])
+        if model.joint_classification == "sent":
+            batch_out = model.joint_forward(
+                batch_out.view(self.mt_beam, bs // self.mt_beam, -1)
+            )
+        scores = model.classification_forward(
+            batch_out.view(bs, 1, -1)
+        )  # input: B x T x C
+
+        return scores
+
+
+@register_task(
+    "discriminative_reranking_nmt", dataclass=DiscriminativeRerankingNMTConfig
+)
+class DiscriminativeRerankingNMTTask(FairseqTask):
+    """
+    Translation rerank task.
+    The input can be either (src, tgt) sentence pairs or tgt sentence only.
+    """
+
+    cfg: DiscriminativeRerankingNMTConfig
+
+    def __init__(self, cfg: DiscriminativeRerankingNMTConfig, data_dictionary=None):
+        super().__init__(cfg)
+        self.dictionary = data_dictionary
+        self._max_positions = cfg.max_positions
+        # args.tokens_per_sample = self._max_positions
+        # self.num_classes = 1  # for model
+
+    @classmethod
+    def load_dictionary(cls, cfg, filename):
+        """Load the dictionary from the filename"""
+        dictionary = Dictionary.load(filename)
+        dictionary.add_symbol("<mask>")  # for loading pretrained XLMR model
+
+        return dictionary
+
+    @classmethod
+    def setup_task(cls, cfg: DiscriminativeRerankingNMTConfig, **kwargs):
+        # load data dictionary (assume joint dictionary)
+        data_path = cfg.data
+        data_dict = cls.load_dictionary(
+            cfg, os.path.join(data_path, "input_src/dict.txt")
+        )
+
+        logger.info("[input] src dictionary: {} types".format(len(data_dict)))
+
+        return DiscriminativeRerankingNMTTask(cfg, data_dict)
+
+    def load_dataset(self, split, epoch=0, combine=False, **kwargs):
+        """Load a given dataset split (e.g., train, valid, test)."""
+        if self.cfg.data.endswith("1"):
+            data_shard = (epoch - 1) % self.cfg.num_data_splits + 1
+            data_path = self.cfg.data[:-1] + str(data_shard)
+        else:
+            data_path = self.cfg.data
+
+        def get_path(type, data_split):
+            return os.path.join(data_path, str(type), data_split)
+
+        def make_dataset(type, dictionary, data_split, combine):
+            split_path = get_path(type, data_split)
+
+            dataset = data_utils.load_indexed_dataset(
+                split_path, dictionary, combine=combine,
+            )
+            return dataset
+
+        def load_split(data_split, metric):
+            input_src = None
+            if self.cfg.include_src:
+                input_src = make_dataset(
+                    "input_src", self.dictionary, data_split, combine=False
+                )
+                assert input_src is not None, "could not find dataset: {}".format(
+                    get_path("input_src", data_split)
+                )
+
+            input_tgt = make_dataset(
+                "input_tgt", self.dictionary, data_split, combine=False
+            )
+            assert input_tgt is not None, "could not find dataset: {}".format(
+                get_path("input_tgt", data_split)
+            )
+
+            label_path = f"{get_path(metric, data_split)}.{metric}"
+            assert os.path.exists(label_path), f"could not find dataset: {label_path}"
+
+            np_labels = np.loadtxt(label_path)
+            if self.cfg.target_metric == "ter":
+                np_labels = -np_labels
+            label = RawLabelDataset(np_labels)
+
+            return input_src, input_tgt, label
+
+        src_datasets = []
+        tgt_datasets = []
+        label_datasets = []
+
+        if split == self.cfg.train_subset:
+            for k in itertools.count():
+                split_k = "train" + (str(k) if k > 0 else "")
+                prefix = os.path.join(data_path, "input_tgt", split_k)
+                if not indexed_dataset.dataset_exists(prefix, impl=None):
+                    if k > 0:
+                        break
+                    else:
+                        raise FileNotFoundError(f"Dataset not found: {prefix}")
+                input_src, input_tgt, label = load_split(
+                    split_k, self.cfg.target_metric
+                )
+                src_datasets.append(input_src)
+                tgt_datasets.append(input_tgt)
+                label_datasets.append(label)
+        else:
+            input_src, input_tgt, label = load_split(split, self.cfg.target_metric)
+            src_datasets.append(input_src)
+            tgt_datasets.append(input_tgt)
+            label_datasets.append(label)
+
+        if len(tgt_datasets) == 1:
+            input_tgt, label = tgt_datasets[0], label_datasets[0]
+            if self.cfg.include_src:
+                input_src = src_datasets[0]
+        else:
+            input_tgt = ConcatDataset(tgt_datasets)
+            label = ConcatDataset(label_datasets)
+            if self.cfg.include_src:
+                input_src = ConcatDataset(src_datasets)
+
+        input_tgt = TruncateDataset(input_tgt, self.cfg.max_positions)
+        if self.cfg.include_src:
+            input_src = PrependTokenDataset(input_src, self.dictionary.bos())
+            input_src = TruncateDataset(input_src, self.cfg.max_positions)
+            src_lengths = NumelDataset(input_src, reduce=False)
+            src_tokens = ConcatSentencesDataset(input_src, input_tgt)
+        else:
+            src_tokens = PrependTokenDataset(input_tgt, self.dictionary.bos())
+            src_lengths = NumelDataset(src_tokens, reduce=False)
+
+        dataset = {
+            "id": IdDataset(),
+            "net_input": {
+                "src_tokens": RightPadDataset(
+                    src_tokens, pad_idx=self.source_dictionary.pad(),
+                ),
+                "src_lengths": src_lengths,
+            },
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(src_tokens, reduce=True),
+            "target": label,
+        }
+
+        dataset = NestedDictionaryDataset(dataset, sizes=[src_tokens.sizes],)
+
+        assert len(dataset) % self.cfg.mt_beam == 0, (
+            "dataset size (%d) is not a multiple of beam size (%d)"
+            % (len(dataset), self.cfg.mt_beam)
+        )
+
+        # no need to shuffle valid/test sets
+        if not self.cfg.no_shuffle and split == self.cfg.train_subset:
+
+            # need to keep all hypothese together
+            start_idx = np.arange(0, len(dataset), self.cfg.mt_beam)
+            with data_utils.numpy_seed(self.cfg.seed + epoch):
+                np.random.shuffle(start_idx)
+
+            idx = np.arange(0, self.cfg.mt_beam)
+            shuffle = np.tile(idx, (len(start_idx), 1)).reshape(-1) + np.tile(
+                start_idx, (self.cfg.mt_beam, 1)
+            ).transpose().reshape(-1)
+
+            dataset = SortDataset(dataset, sort_order=[shuffle],)
+
+        logger.info(f"Loaded {split} with #samples: {len(dataset)}")
+
+        self.datasets[split] = dataset
+        return self.datasets[split]
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs):
+        assert not self.cfg.include_src or len(src_tokens[0]) == 2
+        input_src = None
+        if self.cfg.include_src:
+            input_src = TokenBlockDataset(
+                [t[0] for t in src_tokens],
+                [l[0] for l in src_lengths],
+                block_size=None,  # ignored for "eos" break mode
+                pad=self.source_dictionary.pad(),
+                eos=self.source_dictionary.eos(),
+                break_mode="eos",
+            )
+            input_src = PrependTokenDataset(input_src, self.dictionary.bos())
+            input_src = TruncateDataset(input_src, self.cfg.max_positions)
+
+        input_tgt = TokenBlockDataset(
+            [t[-1] for t in src_tokens],
+            [l[-1] for l in src_lengths],
+            block_size=None,  # ignored for "eos" break mode
+            pad=self.source_dictionary.pad(),
+            eos=self.source_dictionary.eos(),
+            break_mode="eos",
+        )
+        input_tgt = TruncateDataset(input_tgt, self.cfg.max_positions)
+        if self.cfg.include_src:
+            src_tokens = ConcatSentencesDataset(input_src, input_tgt)
+            src_lengths = NumelDataset(input_src, reduce=False)
+        else:
+            input_tgt = PrependTokenDataset(input_tgt, self.dictionary.bos())
+            src_tokens = input_tgt
+            src_lengths = NumelDataset(src_tokens, reduce=False)
+
+        dataset = {
+            "id": IdDataset(),
+            "net_input": {
+                "src_tokens": RightPadDataset(
+                    src_tokens, pad_idx=self.source_dictionary.pad(),
+                ),
+                "src_lengths": src_lengths,
+            },
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(src_tokens, reduce=True),
+        }
+
+        return NestedDictionaryDataset(dataset, sizes=[src_tokens.sizes],)
+
+    def build_model(self, cfg: FairseqDataclass):
+        return super().build_model(cfg)
+
+    def build_generator(self, args):
+        return RerankerScorer(args, mt_beam=self.cfg.mt_beam)
+
+    def max_positions(self):
+        return self._max_positions
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+    def create_dummy_batch(self, device):
+        dummy_target = (
+            torch.zeros(self.cfg.mt_beam, EVAL_BLEU_ORDER * 2 + 3).long().to(device)
+            if not self.cfg.eval_ter
+            else torch.zeros(self.cfg.mt_beam, 3).long().to(device)
+        )
+
+        return {
+            "id": torch.zeros(self.cfg.mt_beam, 1).long().to(device),
+            "net_input": {
+                "src_tokens": torch.zeros(self.cfg.mt_beam, 4).long().to(device),
+                "src_lengths": torch.ones(self.cfg.mt_beam, 1).long().to(device),
+            },
+            "nsentences": 0,
+            "ntokens": 0,
+            "target": dummy_target,
+        }
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+        if ignore_grad and sample is None:
+            sample = self.create_dummy_batch(model.device)
+
+        return super().train_step(
+            sample, model, criterion, optimizer, update_num, ignore_grad
+        )
+
+    def valid_step(self, sample, model, criterion):
+        if sample is None:
+            sample = self.create_dummy_batch(model.device)
+
+        loss, sample_size, logging_output = super().valid_step(sample, model, criterion)
+
+        if not self.cfg.eval_target_metric:
+            return loss, sample_size, logging_output
+
+        scores = logging_output["scores"]
+
+        if self.cfg.target_metric == "bleu":
+            assert sample["target"].shape[1] == EVAL_BLEU_ORDER * 2 + 3, (
+                "target does not contain enough information ("
+                + str(sample["target"].shape[1])
+                + "for evaluating BLEU"
+            )
+
+            max_id = torch.argmax(scores, dim=1)
+            select_id = max_id + torch.arange(
+                0, sample_size * self.cfg.mt_beam, self.cfg.mt_beam
+            ).to(max_id.device)
+            bleu_data = sample["target"][select_id, 1:].sum(0).data
+
+            logging_output["_bleu_sys_len"] = bleu_data[0]
+            logging_output["_bleu_ref_len"] = bleu_data[1]
+
+            for i in range(EVAL_BLEU_ORDER):
+                logging_output["_bleu_counts_" + str(i)] = bleu_data[2 + i]
+                logging_output["_bleu_totals_" + str(i)] = bleu_data[
+                    2 + EVAL_BLEU_ORDER + i
+                ]
+
+        elif self.cfg.target_metric == "ter":
+            assert sample["target"].shape[1] == 3, (
+                "target does not contain enough information ("
+                + str(sample["target"].shape[1])
+                + "for evaluating TER"
+            )
+
+            max_id = torch.argmax(scores, dim=1)
+            select_id = max_id + torch.arange(
+                0, sample_size * self.cfg.mt_beam, self.cfg.mt_beam
+            ).to(max_id.device)
+            ter_data = sample["target"][select_id, 1:].sum(0).data
+
+            logging_output["_ter_num_edits"] = -ter_data[0]
+            logging_output["_ter_ref_len"] = -ter_data[1]
+
+        return loss, sample_size, logging_output
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+
+        if not self.cfg.eval_target_metric:
+            return
+
+        def sum_logs(key):
+            return sum(log.get(key, 0) for log in logging_outputs)
+
+        if self.cfg.target_metric == "bleu":
+            counts, totals = [], []
+            for i in range(EVAL_BLEU_ORDER):
+                counts.append(sum_logs("_bleu_counts_" + str(i)))
+                totals.append(sum_logs("_bleu_totals_" + str(i)))
+
+            if max(totals) > 0:
+                # log counts as numpy arrays -- log_scalar will sum them correctly
+                metrics.log_scalar("_bleu_counts", np.array(counts))
+                metrics.log_scalar("_bleu_totals", np.array(totals))
+                metrics.log_scalar("_bleu_sys_len", sum_logs("_bleu_sys_len"))
+                metrics.log_scalar("_bleu_ref_len", sum_logs("_bleu_ref_len"))
+
+                def compute_bleu(meters):
+                    import inspect
+                    import sacrebleu
+
+                    fn_sig = inspect.getfullargspec(sacrebleu.compute_bleu)[0]
+                    if "smooth_method" in fn_sig:
+                        smooth = {"smooth_method": "exp"}
+                    else:
+                        smooth = {"smooth": "exp"}
+                    bleu = sacrebleu.compute_bleu(
+                        correct=meters["_bleu_counts"].sum,
+                        total=meters["_bleu_totals"].sum,
+                        sys_len=meters["_bleu_sys_len"].sum,
+                        ref_len=meters["_bleu_ref_len"].sum,
+                        **smooth,
+                    )
+                    return round(bleu.score, 2)
+
+                metrics.log_derived("bleu", compute_bleu)
+        elif self.cfg.target_metric == "ter":
+            num_edits = sum_logs("_ter_num_edits")
+            ref_len = sum_logs("_ter_ref_len")
+
+            if ref_len > 0:
+                metrics.log_scalar("_ter_num_edits", num_edits)
+                metrics.log_scalar("_ter_ref_len", ref_len)
+
+                def compute_ter(meters):
+                    score = meters["_ter_num_edits"].sum / meters["_ter_ref_len"].sum
+                    return round(score.item(), 2)
+
+                metrics.log_derived("ter", compute_ter)
diff --git a/fairseq/examples/fast_noisy_channel/README.md b/fairseq/examples/fast_noisy_channel/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f2631a8c34d11bdf7d351c6807b6fe415f5715e1
--- /dev/null
+++ b/fairseq/examples/fast_noisy_channel/README.md
@@ -0,0 +1,345 @@
+# Language Models not just for Pre-training: Fast Online Neural Noisy Channel Modeling
+
+## Introduction
+- [Yee et al. (2019)](https://www.aclweb.org/anthology/D19-1571.pdf) introduce a simple and effective noisy channel modeling approach for neural machine translation. However, the noisy channel online decoding approach introduced in this paper is too slow to be practical.
+- To address this, [Bhosale et al. (2020)](http://www.statmt.org/wmt20/pdf/2020.wmt-1.68.pdf) introduces 3 simple approximations to make this approach very fast and practical without much loss in accuracy.
+- This README provides intructions on how to run online decoding or generation with the noisy channel modeling approach, including ways to make it very fast without much loss in accuracy.
+
+## Noisy Channel Modeling
+
+[Yee et al. (2019)](https://www.aclweb.org/anthology/D19-1571.pdf) applies the Bayes Rule to predict `P(y|x)`, the probability of the target `y` given the source `x`.
+```P(y|x) = P(x|y) * P(y) / P(x)```
+- `P(x|y)` predicts the source `x` given the target `y` and is referred to as the **channel model**
+- `P(y)` is a **language model** over the target `y`
+- `P(x)` is generally not modeled since it is constant for all `y`.
+
+We use Transformer models to parameterize the direct model `P(y|x)`, the channel model `P(x|y)` and the language model `P(y)`.
+
+During online decoding with beam search, we generate the top `K2` candidates per beam and score them with the following linear combination of the channel model, the language model as well as the direct model scores.
+
+```(1 / t) * log(P(y|x) + (1 / s) * ( λ1 * log(P(x|y)) + λ2 * log(P(y) ) )```
+- `t` - Target Prefix Length
+- `s` - Source Length
+- `λ1` - Channel Model Weight
+- `λ2` - Language Model Weight
+
+The top `beam_size` candidates based on the above combined scores are chosen to continue the beams in beam search. In beam search with a direct model alone, the scores from the direct model `P(y|x)` are used to choose the top candidates in beam search.
+
+This framework provides a great way to utlize strong target language models trained on large amounts of unlabeled data. Language models can prefer targets unrelated to the source, so we also need a channel model whose role is to ensure that the target preferred by the language model also translates back to the source.
+
+### Training Translation Models and Language Models
+
+For training Transformer models in fairseq for machine translation, refer to instructions [here](https://github.com/pytorch/fairseq/tree/main/examples/translation)
+
+For training Transformer models in fairseq for language modeling, refer to instructions [here](https://github.com/pytorch/fairseq/tree/main/examples/language_model)
+
+### Generation with Language Model for German-English translation with fairseq
+
+Here are instructions to generate using a direct model and a target-side language model.
+
+Note:
+- Download and install fairseq as per instructions [here](https://github.com/pytorch/fairseq)
+- Preprocess and binarize the dataset as per instructions in section [Test Data Preprocessing](#test-data-preprocessing)
+
+```sh
+binarized_data=data_dir/binarized
+direct_model=de_en_seed4.pt
+lm_model=en_lm.pt
+lm_data=lm_data
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt -O ${direct_model}
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt -O ${lm_model}
+mkdir -p ${lm_data}
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/dict.txt -O ${lm_data}/dict.txt
+
+k2=10
+lenpen=0.16
+lm_wt=0.14
+fairseq-generate ${binarized_data} \
+    --user-dir examples/fast_noisy_channel \
+    --beam 5 \
+    --path ${direct_model} \
+    --lm-model ${lm_model} \
+    --lm-data ${lm_data}  \
+    --k2 ${k2} \
+    --combine-method lm_only \
+    --task noisy_channel_translation \
+    --lenpen ${lenpen} \
+    --lm-wt ${lm_wt} \
+    --gen-subset valid \
+    --remove-bpe \
+    --fp16 \
+    --batch-size 10
+```
+### Noisy Channel Generation for German-English translation with fairseq
+
+Here are instructions for noisy channel generation with a direct model, channel model and language model as explained in section [Noisy Channel Modeling](#noisy-channel-modeling).
+
+Note:
+- Download and install fairseq as per instructions [here](https://github.com/pytorch/fairseq)
+- Preprocess and binarize the dataset as per instructions in section [Test Data Preprocessing](#test-data-preprocessing)
+
+```sh
+binarized_data=data_dir/binarized
+direct_model=de_en_seed4.pt
+lm_model=en_lm.pt
+lm_data=lm_data
+ch_model=en_de.big.seed4.pt
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt -O ${direct_model}
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt -O ${lm_model}
+mkdir -p ${lm_data}
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/dict.txt -O ${lm_data}/dict.txt
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed4.pt -O ${ch_model}
+
+k2=10
+lenpen=0.21
+lm_wt=0.50
+bw_wt=0.30
+fairseq-generate ${binarized_data} \
+    --user-dir examples/fast_noisy_channel \
+    --beam 5 \
+    --path ${direct_model} \
+    --lm-model ${lm_model} \
+    --lm-data ${lm_data}  \
+    --channel-model ${ch_model} \
+    --k2 ${k2} \
+    --combine-method noisy_channel \
+    --task noisy_channel_translation \
+    --lenpen ${lenpen} \
+    --lm-wt ${lm_wt} \
+    --ch-wt ${bw_wt} \
+    --gen-subset test \
+    --remove-bpe \
+    --fp16 \
+    --batch-size 1
+```
+## Fast Noisy Channel Modeling
+
+[Bhosale et al. (2020)](http://www.statmt.org/wmt20/pdf/2020.wmt-1.68.pdf) introduces 3 approximations that speed up online noisy channel decoding -
+- Smaller channel models (`Tranformer Base` with 1 encoder and decoder layer each vs. `Transformer Big`)
+  - This involves training a channel model that is possibly smaller and less accurate in terms of BLEU than a channel model of the same size as the direct model.
+  - Since the role of the channel model is mainly to assign low scores to generations from the language model if they don't translate back to the source, we may not need the most accurate channel model for this purpose.
+- Smaller output vocabulary size for the channel model (~30,000 -> ~1000)
+  - The channel model doesn't need to score the full output vocabulary, it just needs to score the source tokens, which are completely known.
+  - This is specified using the arguments `--channel-scoring-type src_vocab --top-k-vocab 500`
+  - This means that the output vocabulary for the channel model will be the source tokens for all examples in the batch and the top-K most frequent tokens in the vocabulary
+  - This reduces the memory consumption needed to store channel model scores significantly
+- Smaller number of candidates (`k2`) scored per beam
+  - This is specified by reducing the argument `--k2`
+
+
+### Fast Noisy Channel Generation for German-English translation with fairseq
+
+Here are instructions for **fast** noisy channel generation with a direct model, channel model and language model as explained in section [Fast Noisy Channel Modeling](#fast-noisy-channel-modeling). The main differences are that we use a smaller channel model, reduce `--k2`, set `--channel-scoring-type src_vocab --top-k-vocab 500` and increase the `--batch-size`.
+
+Note:
+- Download and install fairseq as per instructions [here](https://github.com/pytorch/fairseq)
+- Preprocess and binarize the dataset as per instructions in section [Test Data Preprocessing](#test-data-preprocessing)
+
+```sh
+binarized_data=data_dir/binarized
+direct_model=de_en_seed4.pt
+lm_model=en_lm.pt
+lm_data=lm_data
+small_ch_model=en_de.base_1_1.seed4.pt
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt -O ${direct_model}
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt -O ${lm_model}
+mkdir -p ${lm_data}
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/dict.txt -O ${lm_data}/dict.txt
+wget https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed4.pt -O ${small_ch_model}
+
+k2=3
+lenpen=0.23
+lm_wt=0.58
+bw_wt=0.26
+fairseq-generate ${binarized_data} \
+    --user-dir examples/fast_noisy_channel \
+    --beam 5 \
+    --path ${direct_model} \
+    --lm-model ${lm_model} \
+    --lm-data ${lm_data}  \
+    --channel-model ${small_ch_model} \
+    --k2 ${k2} \
+    --combine-method noisy_channel \
+    --task noisy_channel_translation \
+    --lenpen ${lenpen} \
+    --lm-wt ${lm_wt} \
+    --ch-wt ${bw_wt} \
+    --gen-subset test \
+    --remove-bpe \
+    --fp16 \
+    --batch-size 50 \
+    --channel-scoring-type src_vocab --top-k-vocab 500
+```
+
+## Test Data Preprocessing
+
+For preprocessing and binarizing the test sets for Romanian-English and German-English translation, we use the following script -
+
+```sh
+FAIRSEQ=/path/to/fairseq
+cd $FAIRSEQ
+SCRIPTS=$FAIRSEQ/mosesdecoder/scripts
+if [ ! -d "${SCRIPTS}" ]; then
+    echo 'Cloning Moses github repository (for tokenization scripts)...'
+    git clone https://github.com/moses-smt/mosesdecoder.git
+fi
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+NORMALIZE=$SCRIPTS/tokenizer/normalize-punctuation.perl
+
+s=de
+t=en
+test=wmt18
+
+mkdir -p data_dir
+
+# Tokenization
+if [ $s == "ro" ] ; then
+    # Note: Get normalise-romanian.py and remove-diacritics.py from
+    # https://github.com/rsennrich/wmt16-scripts/tree/master/preprocess
+    sacrebleu -t $test -l $s-$t --echo src | \
+        $NORMALIZE -l $s | \
+        python normalise-romanian.py | \
+        python remove-diacritics.py | \
+        $TOKENIZER -l $s -a -q > data_dir/$test.$s-$t.$s
+else
+    sacrebleu -t $test -l $s-$t --echo src | perl $NORMALIZE -l $s | perl $TOKENIZER -threads 8 -a -l $s > data_dir/$test.$s-$t.$s
+fi
+
+sacrebleu -t $test -l $s-$t --echo ref | perl $NORMALIZE -l $t | perl $TOKENIZER -threads 8 -a -l $t > data_dir/$test.$s-$t.$t
+
+
+# Applying BPE
+src_bpe_code=/path/to/source/language/bpe/code
+tgt_bpe_code=/path/to/target/language/bpe/code
+src_dict=/path/to/source/language/dict
+tgt_dict=/path/to/target/language/dict
+
+FASTBPE=$FAIRSEQ/fastBPE
+if [ ! -d "${FASTBPE}" ] ; then
+    git clone https://github.com/glample/fastBPE.git
+    # Follow compilation instructions at https://github.com/glample/fastBPE
+    g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast
+fi
+
+${FASTBPE}/fast applybpe data_dir/bpe.$test.$s-$t.$s data_dir/$test.$s-$t.$s ${src_bpe_code}
+${FASTBPE}/fast applybpe data_dir/bpe.$test.$s-$t.$s data_dir/$test.$s-$t.$s ${tgt_bpe_code}
+
+fairseq-preprocess -s $s -t $t \
+    --testpref data_dir/bpe.$test.$s-$t \
+    --destdir data_dir/binarized \
+    --srcdict ${src_dict} \
+    --tgtdict ${tgt_dict}
+```
+
+## Calculating BLEU
+
+```sh
+DETOKENIZER=$SCRIPTS/tokenizer/detokenizer.perl
+cat ${generation_output} | grep -P "^H" | sort -V | cut -f 3- | $DETOKENIZER -l $t -q -a | sacrebleu -t $test -l $s-$t
+```
+
+
+## Romanian-English Translation
+
+The direct and channel models are trained using bitext data (WMT16) combined with backtranslated data (The monolingual data used for backtranslation comes from http://data.statmt.org/rsennrich/wmt16_backtranslations/ (Sennrich et al., 2016c))
+
+The backtranslated data is generated using an ensemble of 3 English-Romanian models trained on bitext training data (WMT16) with unrestricted sampling.
+
+### BPE Codes and Dictionary
+
+We learn a joint BPE vocabulary of 18K types on the bitext training data which is used for both the source and target.
+||Path|
+|----------|------|
+| BPE Code | [joint_bpe_18k](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/bpe_18k) |
+| Dictionary | [dict](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/dict) |
+
+### Direct Models
+For Ro-En with backtranslation, the direct and channel models use a Transformer-Big architecture.
+
+| Seed | Model |
+|----|----|
+| 2 | [ro_en_seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/direct_models/seed2.pt)
+| 4 | [ro_en_seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/direct_models/seed4.pt)
+| 6 | [ro_en_seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/direct_models/seed6.pt)
+
+### Channel Models
+For channel models, we follow the same steps as for the direct models. But backtranslated data is generated in the opposite direction using [this Romanian monolingual data](http://data.statmt.org/rsennrich/wmt16_backtranslations/).
+The best lenpen, LM weight and CH weight are obtained by sweeping over the validation set (wmt16/dev) using beam 5.
+| Model Size | Lenpen | LM Weight | CH Weight | Seed 2 | Seed 4 | Seed 6 |
+|----|----|----|----|----|----|----|
+| `big` | 0.84 | 0.64 | 0.56 | [big.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/big.seed2.pt) | [big.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/big.seed2.pt) | [big.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/big.seed2.pt) |
+| `base_1_1` | 0.63 | 0.40 | 0.37 | [base_1_1.seed2.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/base_1_1.seed2.pt) | [base_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/base_1_1.seed4.pt) | [base_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/channel_models/base_1_1.seed6.pt) |
+
+### Language Model
+The model is trained on de-duplicated English Newscrawl data from 2007-2018 comprising 186 million sentences or 4.5B words after normalization and tokenization.
+|  | Path |
+|----|----|
+| `--lm-model` | [transformer_en_lm](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/lm_model/transformer_lm.pt) |
+| `--lm-data` | [lm_data](https://dl.fbaipublicfiles.com/fast_noisy_channel/ro_en/lm_model/lm_dict)
+
+## German-English Translation
+
+### BPE Codes and Dictionaries
+
+| | Path|
+|----------|------|
+| Source BPE Code | [de_bpe_code_24K](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/de_bpe_code_24K) |
+| Target BPE Code | [en_bpe_code_24K](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/en_bpe_code_24K)
+| Source Dictionary | [de_dict](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/de_dict) |
+| Target Dictionary | [en_dict](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/en_dict) |
+
+### Direct Models
+We train on WMT’19 training data. Following [Ng et al., 2019](http://statmt.org/wmt19/pdf/53/WMT33.pdf), we apply language identification filtering and remove sentences longer than 250 tokens as well as sentence pairs with a source/target length ratio exceeding 1.5. This results in 26.8M sentence pairs.
+We use the Transformer-Big architecture for the direct model.
+
+| Seed | Model |
+|:----:|----|
+| 4 | [de_en_seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed4.pt)
+| 5 | [de_en_seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed5.pt)
+| 6 | [de_en_seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/direct_models/seed6.pt)
+
+### Channel Models
+
+We train on WMT’19 training data. Following [Ng et al., 2019](http://statmt.org/wmt19/pdf/53/WMT33.pdf), we apply language identification filtering and remove sentences longer than 250 tokens as well as sentence pairs with a source/target length ratio exceeding 1.5. This results in 26.8M sentence pairs.
+
+| Model Size | Seed 4 | Seed 5 | Seed 6 |
+|----|----|----|----|
+| `big` | [big.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed4.pt) | [big.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed5.pt) | [big.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big.seed6.pt) |
+| `big_1_1` | [big_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big_1_1.seed4.pt) | [big_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big_1_1.seed5.pt) | [big_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/big_1_1.seed6.pt) |
+| `base` | [base.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base.seed4.pt) | [base.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base.seed5.pt) | [base.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base.seed6.pt) |
+| `base_1_1` | [base_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed4.pt) | [base_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed5.pt) | [base_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/base_1_1.seed6.pt) |
+| `half` | [half.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half.seed4.pt) | [half.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half.seed5.pt) | [half.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half.seed6.pt) |
+| `half_1_1` | [half_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half_1_1.seed4.pt) | [half_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half_1_1.seed5.pt) | [half_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/half_1_1.seed6.pt) |
+| `quarter` | [quarter.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter.seed4.pt) | [quarter.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter.seed5.pt) | [quarter.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter.seed6.pt) |
+| `quarter_1_1` | [quarter_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter_1_1.seed4.pt) | [quarter_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter_1_1.seed5.pt) | [quarter_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/quarter_1_1.seed6.pt) |
+| `8th` | [8th.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th.seed4.pt) | [8th.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th.seed5.pt) | [8th.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th.seed6.pt) |
+| `8th_1_1` | [8th_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th_1_1.seed4.pt) | [8th_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th_1_1.seed5.pt) | [8th_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/8th_1_1.seed6.pt) |
+| `16th` | [16th.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th.seed4.pt) | [16th.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th.seed5.pt) | [16th.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th.seed6.pt) |
+| `16th_1_1` | [16th_1_1.seed4.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th_1_1.seed4.pt) | [16th_1_1.seed5.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th_1_1.seed5.pt) | [16th_1_1.seed6.pt](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/channel_models/16th_1_1.seed6.pt) |
+
+### Language Model
+The model is trained on de-duplicated English Newscrawl data from 2007-2018 comprising 186 million sentences or 4.5B words after normalization and tokenization.
+|  | Path |
+|----|----|
+| `--lm-model` | [transformer_en_lm](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/transformer_lm.pt) |
+| `--lm-data` | [lm_data](https://dl.fbaipublicfiles.com/fast_noisy_channel/de_en/lm_model/lm_dict/)
+
+
+## Citation
+
+```bibtex
+@inproceedings{bhosale2020language,
+    title={Language Models not just for Pre-training: Fast Online Neural Noisy Channel Modeling},
+    author={Shruti Bhosale and Kyra Yee and Sergey Edunov and Michael Auli},
+    booktitle={Proceedings of the Fifth Conference on Machine Translation (WMT)},
+    year={2020},
+}
+
+@inproceedings{yee2019simple,
+  title={Simple and Effective Noisy Channel Modeling for Neural Machine Translation},
+  author={Yee, Kyra and Dauphin, Yann and Auli, Michael},
+  booktitle={Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
+  pages={5700--5705},
+  year={2019}
+}
+```
diff --git a/fairseq/examples/fast_noisy_channel/__init__.py b/fairseq/examples/fast_noisy_channel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b248c3a24e12ad3da885a7f328c714942de2e6b
--- /dev/null
+++ b/fairseq/examples/fast_noisy_channel/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import noisy_channel_translation  # noqa
+from . import noisy_channel_sequence_generator  # noqa
+from . import noisy_channel_beam_search  # noqa
diff --git a/fairseq/examples/fast_noisy_channel/noisy_channel_beam_search.py b/fairseq/examples/fast_noisy_channel/noisy_channel_beam_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..23869ebcd0c438f36e310c8ccddd3b5c07a71182
--- /dev/null
+++ b/fairseq/examples/fast_noisy_channel/noisy_channel_beam_search.py
@@ -0,0 +1,71 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq.search import Search
+
+
+class NoisyChannelBeamSearch(Search):
+
+    def __init__(self, tgt_dict):
+        super().__init__(tgt_dict)
+        self.fw_scores_buf = None
+        self.lm_scores_buf = None
+
+    def _init_buffers(self, t):
+        # super()._init_buffers(t)
+        if self.fw_scores_buf is None:
+            self.scores_buf = t.new()
+            self.indices_buf = torch.LongTensor().to(device=t.device)
+            self.beams_buf = torch.LongTensor().to(device=t.device)
+            self.fw_scores_buf = t.new()
+            self.lm_scores_buf = t.new()
+
+    def combine_fw_bw(self, combine_method, fw_cum, bw, step):
+        if combine_method == "noisy_channel":
+            fw_norm = fw_cum.div(step + 1)
+            lprobs = bw + fw_norm
+        elif combine_method == "lm_only":
+            lprobs = bw + fw_cum
+
+        return lprobs
+
+    def step(self, step, fw_lprobs, scores, bw_lprobs, lm_lprobs, combine_method):
+        self._init_buffers(fw_lprobs)
+        bsz, beam_size, vocab_size = fw_lprobs.size()
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            fw_lprobs = fw_lprobs[:, ::beam_size, :].contiguous()
+            bw_lprobs = bw_lprobs[:, ::beam_size, :].contiguous()
+            # nothing to add since we are at the first step
+            fw_lprobs_cum = fw_lprobs
+
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            raw_scores = (scores[:, :, step - 1].unsqueeze(-1))
+            fw_lprobs_cum = (fw_lprobs.add(raw_scores))
+
+        combined_lprobs = self.combine_fw_bw(combine_method, fw_lprobs_cum, bw_lprobs, step)
+
+        # choose the top k according to the combined noisy channel model score
+        torch.topk(
+            combined_lprobs.view(bsz, -1),
+            k=min(
+                # Take the best 2 x beam_size predictions. We'll choose the first
+                # beam_size of these which don't predict eos to continue with.
+                beam_size * 2,
+                combined_lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+            ),
+            out=(self.scores_buf, self.indices_buf),
+        )
+        # save corresponding fw and lm scores
+        self.fw_scores_buf = torch.gather(fw_lprobs_cum.view(bsz, -1), 1, self.indices_buf)
+        self.lm_scores_buf = torch.gather(lm_lprobs.view(bsz, -1), 1, self.indices_buf)
+        # Project back into relative indices and beams
+        self.beams_buf = self.indices_buf // vocab_size
+        self.indices_buf.fmod_(vocab_size)
+        return self.scores_buf, self.fw_scores_buf, self.lm_scores_buf, self.indices_buf, self.beams_buf
diff --git a/fairseq/examples/fast_noisy_channel/noisy_channel_sequence_generator.py b/fairseq/examples/fast_noisy_channel/noisy_channel_sequence_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..6586684a4b39c193a4bd3241b3f9a8c7034b5e25
--- /dev/null
+++ b/fairseq/examples/fast_noisy_channel/noisy_channel_sequence_generator.py
@@ -0,0 +1,842 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional
+
+import math
+import numpy as np
+
+import torch
+import torch.nn.functional as F
+from torch import Tensor
+
+from .noisy_channel_beam_search import NoisyChannelBeamSearch
+from fairseq.sequence_generator import EnsembleModel
+
+
+class NoisyChannelSequenceGenerator(object):
+    def __init__(
+        self,
+        combine_method,
+        tgt_dict,
+        src_dict=None,
+        beam_size=1,
+        max_len_a=0,
+        max_len_b=200,
+        min_len=1,
+        len_penalty=1.0,
+        unk_penalty=0.0,
+        retain_dropout=False,
+        temperature=1.0,
+        match_source_len=False,
+        no_repeat_ngram_size=0,
+        normalize_scores=True,
+        channel_models=None,
+        k2=10,
+        ch_weight=1.0,
+        channel_scoring_type='log_norm',
+        top_k_vocab=0,
+        lm_models=None,
+        lm_dict=None,
+        lm_weight=1.0,
+        normalize_lm_scores_by_tgt_len=False,
+    ):
+        """Generates translations of a given source sentence,
+           using beam search with noisy channel decoding.
+
+        Args:
+            combine_method (string, optional): Method to combine direct, LM and
+                channel model scores (default: None)
+            tgt_dict (~fairseq.data.Dictionary): target dictionary
+            src_dict (~fairseq.data.Dictionary): source dictionary
+            beam_size (int, optional): beam width (default: 1)
+            max_len_a/b (int, optional): generate sequences of maximum length
+                ax + b, where x is the source length
+            min_len (int, optional): the minimum length of the generated output
+                (not including end-of-sentence)
+            len_penalty (float, optional): length penalty, where <1.0 favors
+                shorter, >1.0 favors longer sentences (default: 1.0)
+            unk_penalty (float, optional): unknown word penalty, where <0
+                produces more unks, >0 produces fewer (default: 0.0)
+            retain_dropout (bool, optional): use dropout when generating
+                (default: False)
+            temperature (float, optional): temperature, where values
+                >1.0 produce more uniform samples and values <1.0 produce
+                sharper samples (default: 1.0)
+            match_source_len (bool, optional): outputs should match the source
+                length (default: False)
+            no_repeat_ngram_size (int, optional): Size of n-grams that we avoid
+                repeating in the generation (default: 0)
+            normalize_scores (bool, optional): normalize scores by the length
+                of the output (default: True)
+            channel_models (List[~fairseq.models.FairseqModel]): ensemble of models
+                translating from the target to the source
+            k2 (int, optional): Top K2 candidates to score per beam at each step (default:10)
+            ch_weight (int, optional): Weight associated with the channel model score
+                assuming that the direct model score has weight 1.0 (default: 1.0)
+            channel_scoring_type (str, optional): String specifying how to score
+                the channel model (default: 'log_norm')
+            top_k_vocab (int, optional): If `channel_scoring_type` is `'src_vocab'` or
+                `'src_vocab_batched'`, then this parameter specifies the number of
+                most frequent tokens to include in the channel model output vocabulary,
+                in addition to the source tokens in the input batch (default: 0)
+            lm_models (List[~fairseq.models.FairseqModel]): ensemble of models
+                generating text in the target language
+            lm_dict (~fairseq.data.Dictionary): LM Model dictionary
+            lm_weight (int, optional): Weight associated with the LM model score
+                assuming that the direct model score has weight 1.0 (default: 1.0)
+            normalize_lm_scores_by_tgt_len (bool, optional): Should we normalize LM scores
+                by the target length? By default, we normalize the combination of
+                LM and channel model scores by the source length
+        """
+        self.pad = tgt_dict.pad()
+        self.unk = tgt_dict.unk()
+        self.eos = tgt_dict.eos()
+        self.vocab_size = len(tgt_dict)
+        self.beam_size = beam_size
+        # the max beam size is the dictionary size - 1, since we never select pad
+        self.beam_size = min(beam_size, self.vocab_size - 1)
+        self.max_len_a = max_len_a
+        self.max_len_b = max_len_b
+        self.min_len = min_len
+        self.normalize_scores = normalize_scores
+        self.len_penalty = len_penalty
+        self.unk_penalty = unk_penalty
+        self.retain_dropout = retain_dropout
+        self.temperature = temperature
+        self.match_source_len = match_source_len
+        self.no_repeat_ngram_size = no_repeat_ngram_size
+        self.channel_models = channel_models
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+        self.combine_method = combine_method
+        self.k2 = k2
+        self.ch_weight = ch_weight
+        self.channel_scoring_type = channel_scoring_type
+        self.top_k_vocab = top_k_vocab
+        self.lm_models = lm_models
+        self.lm_dict = lm_dict
+        self.lm_weight = lm_weight
+        self.log_softmax_fn = torch.nn.LogSoftmax(dim=1)
+        self.normalize_lm_scores_by_tgt_len = normalize_lm_scores_by_tgt_len
+
+        self.share_tgt_dict = (self.lm_dict == self.tgt_dict)
+        self.tgt_to_lm = make_dict2dict(tgt_dict, lm_dict)
+
+        self.ch_scoring_bsz = 3072
+
+        assert temperature > 0, '--temperature must be greater than 0'
+
+        self.search = NoisyChannelBeamSearch(tgt_dict)
+
+    @torch.no_grad()
+    def generate(
+        self,
+        models,
+        sample,
+        prefix_tokens=None,
+        bos_token=None,
+        **kwargs
+    ):
+        """Generate a batch of translations.
+        Args:
+            models (List[~fairseq.models.FairseqModel]): ensemble of models
+            sample (dict): batch
+            prefix_tokens (torch.LongTensor, optional): force decoder to begin
+                with these tokens
+        """
+        model = EnsembleModel(models)
+        incremental_states = torch.jit.annotate(
+            List[Dict[str, Dict[str, Optional[Tensor]]]],
+            [
+                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
+                for i in range(model.models_size)
+            ],
+        )
+        if not self.retain_dropout:
+            model.eval()
+
+        # model.forward normally channels prev_output_tokens into the decoder
+        # separately, but SequenceGenerator directly calls model.encoder
+        encoder_input = {
+            k: v for k, v in sample['net_input'].items()
+            if k != 'prev_output_tokens'
+        }
+        src_tokens = encoder_input['src_tokens']
+        src_lengths_no_eos = (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1)
+        input_size = src_tokens.size()
+        # batch dimension goes first followed by source lengths
+        bsz = input_size[0]
+        src_len = input_size[1]
+        beam_size = self.beam_size
+
+        if self.match_source_len:
+            max_len = src_lengths_no_eos.max().item()
+        else:
+            max_len = min(
+                int(self.max_len_a * src_len + self.max_len_b),
+                # exclude the EOS marker
+                model.max_decoder_positions() - 1,
+            )
+
+        # compute the encoder output for each beam
+        encoder_outs = model.forward_encoder(encoder_input)
+        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
+        new_order = new_order.to(src_tokens.device).long()
+        encoder_outs = model.reorder_encoder_out(encoder_outs, new_order)
+
+        src_lengths = encoder_input['src_lengths']
+        # initialize buffers
+        scores = src_tokens.new(bsz * beam_size, max_len + 1).float().fill_(0)
+        lm_prefix_scores = src_tokens.new(bsz * beam_size).float().fill_(0)
+
+        scores_buf = scores.clone()
+        tokens = src_tokens.new(bsz * beam_size, max_len + 2).long().fill_(self.pad)
+        tokens_buf = tokens.clone()
+        tokens[:, 0] = self.eos if bos_token is None else bos_token
+
+        # reorder source tokens so they may be used as a reference in generating P(S|T)
+        src_tokens = reorder_all_tokens(src_tokens, src_lengths, self.src_dict.eos_index)
+
+        src_tokens = src_tokens.repeat(1, beam_size).view(-1, src_len)
+        src_lengths = src_lengths.view(bsz, -1).repeat(1, beam_size).view(bsz*beam_size, -1)
+
+        attn, attn_buf = None, None
+        nonpad_idxs = None
+
+        # The cands_to_ignore indicates candidates that should be ignored.
+        # For example, suppose we're sampling and have already finalized 2/5
+        # samples. Then the cands_to_ignore would mark 2 positions as being ignored,
+        # so that we only finalize the remaining 3 samples.
+        cands_to_ignore = src_tokens.new_zeros(bsz, beam_size).eq(-1)  # forward and backward-compatible False mask
+
+        # list of completed sentences
+        finalized = [[] for i in range(bsz)]
+        finished = [False for i in range(bsz)]
+        num_remaining_sent = bsz
+
+        # number of candidate hypos per step
+        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS
+
+        # offset arrays for converting between different indexing schemes
+        bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens)
+        cand_offsets = torch.arange(0, cand_size).type_as(tokens)
+
+        # helper function for allocating buffers on the fly
+        buffers = {}
+
+        def buffer(name, type_of=tokens):  # noqa
+            if name not in buffers:
+                buffers[name] = type_of.new()
+            return buffers[name]
+
+        def is_finished(sent, step, unfin_idx):
+            """
+            Check whether we've finished generation for a given sentence, by
+            comparing the worst score among finalized hypotheses to the best
+            possible score among unfinalized hypotheses.
+            """
+            assert len(finalized[sent]) <= beam_size
+            if len(finalized[sent]) == beam_size:
+                return True
+            return False
+
+        def finalize_hypos(step, bbsz_idx, eos_scores, combined_noisy_channel_eos_scores):
+            """
+            Finalize the given hypotheses at this step, while keeping the total
+            number of finalized hypotheses per sentence <= beam_size.
+
+            Note: the input must be in the desired finalization order, so that
+            hypotheses that appear earlier in the input are preferred to those
+            that appear later.
+
+            Args:
+                step: current time step
+                bbsz_idx: A vector of indices in the range [0, bsz*beam_size),
+                    indicating which hypotheses to finalize
+                eos_scores: A vector of the same size as bbsz_idx containing
+                    fw scores for each hypothesis
+                combined_noisy_channel_eos_scores: A vector of the same size as bbsz_idx containing
+                    combined noisy channel scores for each hypothesis
+            """
+            assert bbsz_idx.numel() == eos_scores.numel()
+
+            # clone relevant token and attention tensors
+            tokens_clone = tokens.index_select(0, bbsz_idx)
+            tokens_clone = tokens_clone[:, 1:step + 2]  # skip the first index, which is EOS
+            assert not tokens_clone.eq(self.eos).any()
+            tokens_clone[:, step] = self.eos
+            attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step+2] if attn is not None else None
+
+            # compute scores per token position
+            pos_scores = scores.index_select(0, bbsz_idx)[:, :step+1]
+            pos_scores[:, step] = eos_scores
+            # convert from cumulative to per-position scores
+            pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]
+
+            # normalize sentence-level scores
+            if self.normalize_scores:
+                combined_noisy_channel_eos_scores /= (step + 1) ** self.len_penalty
+
+            cum_unfin = []
+            prev = 0
+            for f in finished:
+                if f:
+                    prev += 1
+                else:
+                    cum_unfin.append(prev)
+
+            sents_seen = set()
+            for i, (idx, score) in enumerate(zip(bbsz_idx.tolist(), combined_noisy_channel_eos_scores.tolist())):
+                unfin_idx = idx // beam_size
+                sent = unfin_idx + cum_unfin[unfin_idx]
+
+                sents_seen.add((sent, unfin_idx))
+
+                if self.match_source_len and step > src_lengths_no_eos[unfin_idx]:
+                    score = -math.inf
+
+                def get_hypo():
+
+                    if attn_clone is not None:
+                        # remove padding tokens from attn scores
+                        hypo_attn = attn_clone[i][nonpad_idxs[sent]]
+                        _, alignment = hypo_attn.max(dim=0)
+                    else:
+                        hypo_attn = None
+                        alignment = None
+
+                    return {
+                        'tokens': tokens_clone[i],
+                        'score': score,
+                        'attention': hypo_attn,  # src_len x tgt_len
+                        'alignment': alignment,
+                        'positional_scores': pos_scores[i],
+                    }
+
+                if len(finalized[sent]) < beam_size:
+                    finalized[sent].append(get_hypo())
+
+            newly_finished = []
+            for sent, unfin_idx in sents_seen:
+                # check termination conditions for this sentence
+                if not finished[sent] and is_finished(sent, step, unfin_idx):
+                    finished[sent] = True
+                    newly_finished.append(unfin_idx)
+            return newly_finished
+
+        def noisy_channel_rescoring(lprobs, beam_size, bsz, src_tokens, tokens, k):
+            """Rescore the top k hypothesis from each beam using noisy channel modeling
+            Returns:
+                new_fw_lprobs: the direct model probabilities after pruning the top k
+                new_ch_lm_lprobs:  the combined channel and language model probabilities
+                new_lm_lprobs: the language model probabilities after pruning the top k
+            """
+            with torch.no_grad():
+                lprobs_size = lprobs.size()
+                if prefix_tokens is not None and step < prefix_tokens.size(1):
+                    probs_slice = lprobs.view(bsz, -1, lprobs.size(-1))[:, 0, :]
+                    cand_scores = torch.gather(
+                        probs_slice, dim=1,
+                        index=prefix_tokens[:, step].view(-1, 1).data
+                    ).expand(-1, beam_size).contiguous().view(bsz*beam_size, 1)
+                    cand_indices = prefix_tokens[:, step].view(-1, 1).expand(bsz, beam_size).data.contiguous().view(bsz*beam_size, 1)
+
+                    # need to calculate and save fw and lm probs for prefix tokens
+                    fw_top_k = cand_scores
+                    fw_top_k_idx = cand_indices
+                    k = 1
+                else:
+                    # take the top k best words for every sentence in batch*beam
+                    fw_top_k, fw_top_k_idx = torch.topk(lprobs.view(beam_size*bsz, -1), k=k)
+                eos_idx = torch.nonzero(fw_top_k_idx.view(bsz*beam_size*k, -1) == self.eos)[:, 0]
+                ch_scores = fw_top_k.new_full((beam_size*bsz*k, ), 0)
+                src_size = torch.sum(src_tokens[:, :] != self.src_dict.pad_index, dim=1, keepdim=True, dtype=fw_top_k.dtype)
+
+                if self.combine_method != "lm_only":
+                    temp_src_tokens_full = src_tokens[:, :].repeat(1, k).view(bsz*beam_size*k, -1)
+                    not_padding = temp_src_tokens_full[:, 1:] != self.src_dict.pad_index
+                    cur_tgt_size = step+2
+
+                    # add eos to all candidate sentences except those that already end in eos
+                    eos_tokens = tokens[:, 0].repeat(1, k).view(-1, 1)
+                    eos_tokens[eos_idx] = self.tgt_dict.pad_index
+
+                    if step == 0:
+                        channel_input = torch.cat((fw_top_k_idx.view(-1, 1), eos_tokens), 1)
+                    else:
+                        # move eos from beginning to end of target sentence
+                        channel_input = torch.cat((tokens[:, 1:step + 1].repeat(1, k).view(-1, step), fw_top_k_idx.view(-1, 1), eos_tokens), 1)
+
+                    ch_input_lengths = torch.tensor(np.full(channel_input.size(0), cur_tgt_size))
+                    ch_input_lengths[eos_idx] = cur_tgt_size-1
+                    if self.channel_scoring_type == "unnormalized":
+                        ch_encoder_output = channel_model.encoder(channel_input, src_lengths=ch_input_lengths)
+                        ch_decoder_output, _ = channel_model.decoder(temp_src_tokens_full, encoder_out=ch_encoder_output, features_only=True)
+                        del ch_encoder_output
+                        ch_intermed_scores = channel_model.decoder.unnormalized_scores_given_target(ch_decoder_output, target_ids=temp_src_tokens_full[:, 1:])
+                        ch_intermed_scores = ch_intermed_scores.float()
+                        ch_intermed_scores *= not_padding.float()
+                        ch_scores = torch.sum(ch_intermed_scores, dim=1)
+                    elif self.channel_scoring_type == "k2_separate":
+                        for k_idx in range(k):
+                            k_eos_tokens = eos_tokens[k_idx::k, :]
+                            if step == 0:
+                                k_ch_input = torch.cat((fw_top_k_idx[:, k_idx:k_idx+1], k_eos_tokens), 1)
+                            else:
+                                # move eos from beginning to end of target sentence
+                                k_ch_input = torch.cat((tokens[:, 1:step + 1], fw_top_k_idx[:, k_idx:k_idx+1], k_eos_tokens), 1)
+                            k_ch_input_lengths = ch_input_lengths[k_idx::k]
+                            k_ch_output = channel_model(k_ch_input, k_ch_input_lengths, src_tokens)
+                            k_ch_lprobs = channel_model.get_normalized_probs(k_ch_output, log_probs=True)
+                            k_ch_intermed_scores = torch.gather(k_ch_lprobs[:, :-1, :], 2, src_tokens[:, 1:].unsqueeze(2)).squeeze(2)
+                            k_ch_intermed_scores *= not_padding.float()
+                            ch_scores[k_idx::k] = torch.sum(k_ch_intermed_scores, dim=1)
+                    elif self.channel_scoring_type == "src_vocab":
+                        ch_encoder_output = channel_model.encoder(channel_input, src_lengths=ch_input_lengths)
+                        ch_decoder_output, _ = channel_model.decoder(temp_src_tokens_full, encoder_out=ch_encoder_output, features_only=True)
+
+                        del ch_encoder_output
+                        ch_lprobs = normalized_scores_with_batch_vocab(
+                            channel_model.decoder,
+                            ch_decoder_output, src_tokens, k, bsz, beam_size,
+                            self.src_dict.pad_index, top_k=self.top_k_vocab)
+                        ch_scores = torch.sum(ch_lprobs, dim=1)
+                    elif self.channel_scoring_type == "src_vocab_batched":
+                        ch_bsz_size = temp_src_tokens_full.shape[0]
+                        ch_lprobs_list = [None] * len(range(0, ch_bsz_size, self.ch_scoring_bsz))
+                        for i, start_idx in enumerate(range(0, ch_bsz_size, self.ch_scoring_bsz)):
+                            end_idx = min(start_idx + self.ch_scoring_bsz, ch_bsz_size)
+                            temp_src_tokens_full_batch = temp_src_tokens_full[start_idx:end_idx, :]
+                            channel_input_batch = channel_input[start_idx:end_idx, :]
+                            ch_input_lengths_batch = ch_input_lengths[start_idx:end_idx]
+                            ch_encoder_output_batch = channel_model.encoder(channel_input_batch, src_lengths=ch_input_lengths_batch)
+                            ch_decoder_output_batch, _ = channel_model.decoder(temp_src_tokens_full_batch, encoder_out=ch_encoder_output_batch, features_only=True)
+                            ch_lprobs_list[i] = normalized_scores_with_batch_vocab(
+                                channel_model.decoder,
+                                ch_decoder_output_batch, src_tokens, k, bsz, beam_size,
+                                self.src_dict.pad_index, top_k=self.top_k_vocab,
+                                start_idx=start_idx, end_idx=end_idx)
+                        ch_lprobs = torch.cat(ch_lprobs_list, dim=0)
+                        ch_scores = torch.sum(ch_lprobs, dim=1)
+                    else:
+                        ch_output = channel_model(channel_input, ch_input_lengths, temp_src_tokens_full)
+                        ch_lprobs = channel_model.get_normalized_probs(ch_output, log_probs=True)
+                        ch_intermed_scores = torch.gather(ch_lprobs[:, :-1, :], 2, temp_src_tokens_full[:, 1:].unsqueeze(2)).squeeze().view(bsz*beam_size*k, -1)
+                        ch_intermed_scores *= not_padding.float()
+                        ch_scores = torch.sum(ch_intermed_scores, dim=1)
+
+                else:
+                    cur_tgt_size = 0
+                ch_scores = ch_scores.view(bsz*beam_size, k)
+                expanded_lm_prefix_scores = lm_prefix_scores.unsqueeze(1).expand(-1, k).flatten()
+
+                if self.share_tgt_dict:
+                    lm_scores = get_lm_scores(lm, tokens[:, :step + 1].view(-1, step+1), lm_incremental_states, fw_top_k_idx.view(-1, 1), torch.tensor(np.full(tokens.size(0), step+1)), k)
+                else:
+                    new_lm_input = dict2dict(tokens[:, :step + 1].view(-1, step+1), self.tgt_to_lm)
+                    new_cands = dict2dict(fw_top_k_idx.view(-1, 1), self.tgt_to_lm)
+                    lm_scores = get_lm_scores(lm, new_lm_input, lm_incremental_states, new_cands, torch.tensor(np.full(tokens.size(0), step+1)), k)
+
+                lm_scores.add_(expanded_lm_prefix_scores)
+                ch_lm_scores = combine_ch_lm(self.combine_method, ch_scores, lm_scores, src_size, cur_tgt_size)
+                # initialize all as min value
+                new_fw_lprobs = ch_scores.new(lprobs_size).fill_(-1e17).view(bsz*beam_size, -1)
+                new_ch_lm_lprobs = ch_scores.new(lprobs_size).fill_(-1e17).view(bsz*beam_size, -1)
+                new_lm_lprobs = ch_scores.new(lprobs_size).fill_(-1e17).view(bsz*beam_size, -1)
+                new_fw_lprobs[:, self.pad] = -math.inf
+                new_ch_lm_lprobs[:, self.pad] = -math.inf
+                new_lm_lprobs[:, self.pad] = -math.inf
+
+                new_fw_lprobs.scatter_(1, fw_top_k_idx, fw_top_k)
+                new_ch_lm_lprobs.scatter_(1, fw_top_k_idx, ch_lm_scores)
+                new_lm_lprobs.scatter_(1, fw_top_k_idx, lm_scores.view(-1, k))
+                return new_fw_lprobs, new_ch_lm_lprobs, new_lm_lprobs
+
+        def combine_ch_lm(combine_type, ch_scores, lm_scores1, src_size, tgt_size):
+            if self.channel_scoring_type == "unnormalized":
+                ch_scores = self.log_softmax_fn(
+                    ch_scores.view(-1, self.beam_size * self.k2)
+                ).view(ch_scores.shape)
+            ch_scores = ch_scores * self.ch_weight
+            lm_scores1 = lm_scores1 * self.lm_weight
+
+            if combine_type == "lm_only":
+                # log P(T|S) + log P(T)
+                ch_scores = lm_scores1.view(ch_scores.size())
+            elif combine_type == "noisy_channel":
+                # 1/t log P(T|S) + 1/s log P(S|T) + 1/t log P(T)
+                if self.normalize_lm_scores_by_tgt_len:
+                    ch_scores.div_(src_size)
+                    lm_scores_norm = lm_scores1.view(ch_scores.size()).div(tgt_size)
+                    ch_scores.add_(lm_scores_norm)
+                # 1/t log P(T|S) + 1/s log P(S|T) + 1/s log P(T)
+                else:
+                    ch_scores.add_(lm_scores1.view(ch_scores.size()))
+                    ch_scores.div_(src_size)
+
+            return ch_scores
+
+        if self.channel_models is not None:
+            channel_model = self.channel_models[0]  # assume only one channel_model model
+        else:
+            channel_model = None
+
+        lm = EnsembleModel(self.lm_models)
+        lm_incremental_states = torch.jit.annotate(
+            List[Dict[str, Dict[str, Optional[Tensor]]]],
+            [
+                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
+                for i in range(lm.models_size)
+            ],
+        )
+
+        reorder_state = None
+        batch_idxs = None
+        for step in range(max_len + 1):  # one extra step for EOS marker
+            # reorder decoder internal states based on the prev choice of beams
+            if reorder_state is not None:
+                if batch_idxs is not None:
+                    # update beam indices to take into account removed sentences
+                    corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(batch_idxs)
+                    reorder_state.view(-1, beam_size).add_(corr.unsqueeze(-1) * beam_size)
+                model.reorder_incremental_state(incremental_states, reorder_state)
+                encoder_outs = model.reorder_encoder_out(encoder_outs, reorder_state)
+
+                lm.reorder_incremental_state(lm_incremental_states, reorder_state)
+
+            fw_lprobs, avg_attn_scores = model.forward_decoder(
+                tokens[:, :step + 1], encoder_outs, incremental_states, temperature=self.temperature,
+            )
+
+            fw_lprobs[:, self.pad] = -math.inf  # never select pad
+            fw_lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty
+            fw_lprobs, ch_lm_lprobs, lm_lprobs = noisy_channel_rescoring(fw_lprobs, beam_size, bsz, src_tokens, tokens, self.k2)
+
+            # handle min and max length constraints
+            if step >= max_len:
+                fw_lprobs[:, :self.eos] = -math.inf
+                fw_lprobs[:, self.eos + 1:] = -math.inf
+            elif step < self.min_len:
+                fw_lprobs[:, self.eos] = -math.inf
+
+            # handle prefix tokens (possibly with different lengths)
+            if prefix_tokens is not None and step < prefix_tokens.size(1):
+                prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1)
+                prefix_mask = prefix_toks.ne(self.pad)
+
+                prefix_fw_lprobs = fw_lprobs.gather(-1, prefix_toks.unsqueeze(-1))
+                fw_lprobs[prefix_mask] = -math.inf
+                fw_lprobs[prefix_mask] = fw_lprobs[prefix_mask].scatter_(
+                    -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_fw_lprobs
+                )
+
+                prefix_ch_lm_lprobs = ch_lm_lprobs.gather(-1, prefix_toks.unsqueeze(-1))
+                ch_lm_lprobs[prefix_mask] = -math.inf
+                ch_lm_lprobs[prefix_mask] = ch_lm_lprobs[prefix_mask].scatter_(
+                    -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_ch_lm_lprobs
+                )
+
+                prefix_lm_lprobs = lm_lprobs.gather(-1, prefix_toks.unsqueeze(-1))
+                lm_lprobs[prefix_mask] = -math.inf
+                lm_lprobs[prefix_mask] = lm_lprobs[prefix_mask].scatter_(
+                    -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lm_lprobs
+                )
+
+                # if prefix includes eos, then we should make sure tokens and
+                # scores are the same across all beams
+                eos_mask = prefix_toks.eq(self.eos)
+                if eos_mask.any():
+                    # validate that the first beam matches the prefix
+                    first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[:, 0, 1:step + 1]
+                    eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0]
+                    target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step]
+                    assert (first_beam == target_prefix).all()
+
+                    def replicate_first_beam(tensor, mask):
+                        tensor = tensor.view(-1, beam_size, tensor.size(-1))
+                        tensor[mask] = tensor[mask][:, :1, :]
+                        return tensor.view(-1, tensor.size(-1))
+
+                    # copy tokens, scores and lprobs from the first beam to all beams
+                    tokens = replicate_first_beam(tokens, eos_mask_batch_dim)
+                    scores = replicate_first_beam(scores, eos_mask_batch_dim)
+
+                    fw_lprobs = replicate_first_beam(fw_lprobs, eos_mask_batch_dim)
+                    ch_lm_lprobs = replicate_first_beam(ch_lm_lprobs, eos_mask_batch_dim)
+                    lm_lprobs = replicate_first_beam(lm_lprobs, eos_mask_batch_dim)
+
+            if self.no_repeat_ngram_size > 0:
+                # for each beam and batch sentence, generate a list of previous ngrams
+                gen_ngrams = [{} for bbsz_idx in range(bsz * beam_size)]
+                for bbsz_idx in range(bsz * beam_size):
+                    gen_tokens = tokens[bbsz_idx].tolist()
+                    for ngram in zip(*[gen_tokens[i:] for i in range(self.no_repeat_ngram_size)]):
+                        gen_ngrams[bbsz_idx][tuple(ngram[:-1])] = \
+                                gen_ngrams[bbsz_idx].get(tuple(ngram[:-1]), []) + [ngram[-1]]
+
+            # Record attention scores
+            if avg_attn_scores is not None:
+                if attn is None:
+                    attn = scores.new(bsz * beam_size, src_tokens.size(1), max_len + 2)
+                    attn_buf = attn.clone()
+                    nonpad_idxs = src_tokens.ne(self.pad)
+                attn[:, :, step + 1].copy_(avg_attn_scores)
+
+            scores = scores.type_as(fw_lprobs)
+            scores_buf = scores_buf.type_as(fw_lprobs)
+
+            self.search.set_src_lengths(src_lengths_no_eos)
+
+            if self.no_repeat_ngram_size > 0:
+                def calculate_banned_tokens(bbsz_idx):
+                    # before decoding the next token, prevent decoding of ngrams that have already appeared
+                    ngram_index = tuple(tokens[bbsz_idx, step + 2 - self.no_repeat_ngram_size:step + 1].tolist())
+                    return gen_ngrams[bbsz_idx].get(ngram_index, [])
+
+                if step + 2 - self.no_repeat_ngram_size >= 0:
+                    # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
+                    banned_tokens = [calculate_banned_tokens(bbsz_idx) for bbsz_idx in range(bsz * beam_size)]
+                else:
+                    banned_tokens = [[] for bbsz_idx in range(bsz * beam_size)]
+
+                for bbsz_idx in range(bsz * beam_size):
+                    fw_lprobs[bbsz_idx, banned_tokens[bbsz_idx]] = -math.inf
+
+            combined_noisy_channel_scores, fw_lprobs_top_k, lm_lprobs_top_k, cand_indices, cand_beams = self.search.step(
+                step,
+                fw_lprobs.view(bsz, -1, self.vocab_size),
+                scores.view(bsz, beam_size, -1)[:, :, :step], ch_lm_lprobs.view(bsz, -1, self.vocab_size),
+                lm_lprobs.view(bsz, -1, self.vocab_size), self.combine_method
+            )
+
+            # cand_bbsz_idx contains beam indices for the top candidate
+            # hypotheses, with a range of values: [0, bsz*beam_size),
+            # and dimensions: [bsz, cand_size]
+            cand_bbsz_idx = cand_beams.add(bbsz_offsets)
+
+            # finalize hypotheses that end in eos (except for candidates to be ignored)
+            eos_mask = cand_indices.eq(self.eos)
+            eos_mask[:, :beam_size] &= ~cands_to_ignore
+
+            # only consider eos when it's among the top beam_size indices
+            eos_bbsz_idx = torch.masked_select(
+                cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size]
+            )
+
+            finalized_sents = set()
+            if eos_bbsz_idx.numel() > 0:
+                eos_scores = torch.masked_select(
+                    fw_lprobs_top_k[:, :beam_size], mask=eos_mask[:, :beam_size]
+                )
+                combined_noisy_channel_eos_scores = torch.masked_select(
+                    combined_noisy_channel_scores[:, :beam_size],
+                    mask=eos_mask[:, :beam_size],
+                )
+
+                # finalize hypo using channel model score
+                finalized_sents = finalize_hypos(
+                    step, eos_bbsz_idx, eos_scores, combined_noisy_channel_eos_scores)
+
+                num_remaining_sent -= len(finalized_sents)
+
+            assert num_remaining_sent >= 0
+            if num_remaining_sent == 0:
+                break
+
+            if len(finalized_sents) > 0:
+                new_bsz = bsz - len(finalized_sents)
+
+                # construct batch_idxs which holds indices of batches to keep for the next pass
+                batch_mask = cand_indices.new_ones(bsz)
+                batch_mask[cand_indices.new(finalized_sents)] = 0
+                batch_idxs = torch.nonzero(batch_mask).squeeze(-1)
+
+                eos_mask = eos_mask[batch_idxs]
+                cand_beams = cand_beams[batch_idxs]
+                bbsz_offsets.resize_(new_bsz, 1)
+                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
+
+                lm_lprobs_top_k = lm_lprobs_top_k[batch_idxs]
+
+                fw_lprobs_top_k = fw_lprobs_top_k[batch_idxs]
+                cand_indices = cand_indices[batch_idxs]
+                if prefix_tokens is not None:
+                    prefix_tokens = prefix_tokens[batch_idxs]
+                src_lengths_no_eos = src_lengths_no_eos[batch_idxs]
+                cands_to_ignore = cands_to_ignore[batch_idxs]
+
+                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                scores_buf.resize_as_(scores)
+                tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                tokens_buf.resize_as_(tokens)
+                src_tokens = src_tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                src_lengths = src_lengths.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                lm_prefix_scores = lm_prefix_scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1).squeeze()
+
+                if attn is not None:
+                    attn = attn.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, attn.size(1), -1)
+                    attn_buf.resize_as_(attn)
+                bsz = new_bsz
+            else:
+                batch_idxs = None
+
+            # Set active_mask so that values > cand_size indicate eos or
+            # ignored hypos and values < cand_size indicate candidate
+            # active hypos. After this, the min values per row are the top
+            # candidate active hypos.
+            eos_mask[:, :beam_size] |= cands_to_ignore
+            active_mask = torch.add(
+                eos_mask.type_as(cand_offsets) * cand_size,
+                cand_offsets[: eos_mask.size(1)],
+            )
+
+            # get the top beam_size active hypotheses, which are just the hypos
+            # with the smallest values in active_mask
+            active_hypos, new_cands_to_ignore = buffer('active_hypos'), buffer('new_cands_to_ignore')
+            torch.topk(
+                active_mask, k=beam_size, dim=1, largest=False,
+                out=(new_cands_to_ignore, active_hypos)
+            )
+
+            # update cands_to_ignore to ignore any finalized hypos
+            cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size]
+            assert (~cands_to_ignore).any(dim=1).all()
+
+            active_bbsz_idx = buffer('active_bbsz_idx')
+            torch.gather(
+                cand_bbsz_idx, dim=1, index=active_hypos,
+                out=active_bbsz_idx,
+            )
+            active_scores = torch.gather(
+                fw_lprobs_top_k, dim=1, index=active_hypos,
+                out=scores[:, step].view(bsz, beam_size),
+            )
+
+            active_bbsz_idx = active_bbsz_idx.view(-1)
+            active_scores = active_scores.view(-1)
+
+            # copy tokens and scores for active hypotheses
+            torch.index_select(
+                tokens[:, :step + 1], dim=0, index=active_bbsz_idx,
+                out=tokens_buf[:, :step + 1],
+            )
+            torch.gather(
+                cand_indices, dim=1, index=active_hypos,
+                out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1],
+            )
+            if step > 0:
+                torch.index_select(
+                    scores[:, :step], dim=0, index=active_bbsz_idx,
+                    out=scores_buf[:, :step],
+                )
+            torch.gather(
+                fw_lprobs_top_k, dim=1, index=active_hypos,
+                out=scores_buf.view(bsz, beam_size, -1)[:, :, step],
+            )
+            torch.gather(
+                lm_lprobs_top_k, dim=1, index=active_hypos,
+                out=lm_prefix_scores.view(bsz, beam_size)
+            )
+
+            # copy attention for active hypotheses
+            if attn is not None:
+                torch.index_select(
+                    attn[:, :, :step + 2], dim=0, index=active_bbsz_idx,
+                    out=attn_buf[:, :, :step + 2],
+                )
+
+            # swap buffers
+            tokens, tokens_buf = tokens_buf, tokens
+            scores, scores_buf = scores_buf, scores
+            if attn is not None:
+                attn, attn_buf = attn_buf, attn
+
+            # reorder incremental state in decoder
+            reorder_state = active_bbsz_idx
+
+        # sort by score descending
+        for sent in range(len(finalized)):
+            finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True)
+
+        return finalized
+
+
+def get_lm_scores(model, input_tokens, incremental_states, cand_tokens, input_len, k):
+    with torch.no_grad():
+        lm_lprobs, avg_attn_scores = model.forward_decoder(
+            input_tokens, encoder_outs=None, incremental_states=incremental_states,
+        )
+
+        lm_lprobs_size = lm_lprobs.size(0)
+        probs_next_wrd = torch.gather(lm_lprobs.repeat(1, k).view(lm_lprobs_size*k, -1), 1, cand_tokens).squeeze().view(-1)
+
+        return probs_next_wrd
+
+
+def make_dict2dict(old_dict, new_dict):
+    dict2dict_map = {}
+    for sym in old_dict.symbols:
+        dict2dict_map[old_dict.index(sym)] = new_dict.index(sym)
+    return dict2dict_map
+
+
+def dict2dict(tokens, dict2dict_map):
+    if tokens.device == torch.device('cpu'):
+        tokens_tmp = tokens
+    else:
+        tokens_tmp = tokens.cpu()
+    return tokens_tmp.map_(
+        tokens_tmp,
+        lambda _, val, dict2dict_map=dict2dict_map : dict2dict_map[float(val)]
+    ).to(tokens.device)
+
+
+def reorder_tokens(tokens, lengths, eos):
+    # reorder source tokens so they may be used as reference for P(S|T)
+    return torch.cat((tokens.new([eos]), tokens[-lengths:-1], tokens[:-lengths]), 0)
+
+
+def reorder_all_tokens(tokens, lengths, eos):
+    # used to reorder src tokens from [<pad> <w1> <w2> .. <eos>] to [<eos> <w1> <w2>...<pad>]
+    # so source tokens can be used to predict P(S|T)
+    return torch.stack([reorder_tokens(token, length, eos) for token, length in zip(tokens, lengths)])
+
+
+def normalized_scores_with_batch_vocab(
+        model_decoder, features, target_ids, k, bsz, beam_size,
+        pad_idx, top_k=0, vocab_size_meter=None, start_idx=None,
+        end_idx=None, **kwargs):
+    """
+        Get normalized probabilities (or log probs) from a net's output
+        w.r.t. vocab consisting of target IDs in the batch
+    """
+    if model_decoder.adaptive_softmax is None:
+        weight = model_decoder.reg_head.weight
+        vocab_ids = torch.unique(
+            torch.cat(
+                (torch.unique(target_ids), torch.arange(top_k, device=target_ids.device))
+            )
+        )
+        id_map = dict(zip(vocab_ids.tolist(), range(len(vocab_ids))))
+        mapped_target_ids = target_ids.cpu().apply_(
+            lambda x, id_map=id_map: id_map[x]
+        ).to(target_ids.device)
+        expanded_target_ids = mapped_target_ids[:, :].repeat(1, k).view(bsz*beam_size*k, -1)
+        if start_idx is not None and end_idx is not None:
+            expanded_target_ids = expanded_target_ids[start_idx:end_idx, :]
+        logits = F.linear(features, weight[vocab_ids, :])
+        log_softmax = F.log_softmax(logits, dim=-1, dtype=torch.float32)
+        intermed_scores = torch.gather(
+            log_softmax[:, :-1, :],
+            2,
+            expanded_target_ids[:, 1:].unsqueeze(2),
+        ).squeeze()
+        not_padding = expanded_target_ids[:, 1:] != pad_idx
+        intermed_scores *= not_padding.float()
+        return intermed_scores
+    else:
+        raise ValueError("adaptive softmax doesn't work with " +
+                         "`normalized_scores_with_batch_vocab()`")
diff --git a/fairseq/examples/fast_noisy_channel/noisy_channel_translation.py b/fairseq/examples/fast_noisy_channel/noisy_channel_translation.py
new file mode 100644
index 0000000000000000000000000000000000000000..b74bdfd456f9b7c546ce528173c77431b4f57ac1
--- /dev/null
+++ b/fairseq/examples/fast_noisy_channel/noisy_channel_translation.py
@@ -0,0 +1,127 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.tasks.translation import TranslationTask
+from fairseq.tasks.language_modeling import LanguageModelingTask
+from fairseq import checkpoint_utils
+import argparse
+from fairseq.tasks import register_task
+import torch
+
+
+@register_task("noisy_channel_translation")
+class NoisyChannelTranslation(TranslationTask):
+    """
+    Rescore the top k candidates from each beam using noisy channel modeling
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        TranslationTask.add_args(parser)
+        # fmt: off
+        parser.add_argument('--channel-model', metavar='FILE',
+                            help='path to P(S|T) model. P(S|T) and P(T|S) must share source and target dictionaries.')
+        parser.add_argument('--combine-method', default='lm_only',
+                            choices=['lm_only', 'noisy_channel'],
+                            help="""method for combining direct and channel model scores.
+                                    lm_only: decode with P(T|S)P(T)
+                                    noisy_channel: decode with 1/t P(T|S) + 1/s(P(S|T)P(T))""")
+        parser.add_argument('--normalize-lm-scores-by-tgt-len', action='store_true', default=False,
+                            help='normalize lm score by target length instead of source length')
+        parser.add_argument('--channel-scoring-type', default='log_norm', choices=['unnormalized', 'log_norm', 'k2_separate', 'src_vocab', 'src_vocab_batched'],
+                            help="Normalize bw scores with log softmax or return bw scores without log softmax")
+        parser.add_argument('--top-k-vocab', default=0, type=int,
+                            help='top k vocab IDs to use with `src_vocab` in channel model scoring')
+        parser.add_argument('--k2', default=50, type=int,
+                            help='the top k2 candidates to rescore with the noisy channel model for each beam')
+        parser.add_argument('--ch-wt', default=1, type=float,
+                            help='weight for the channel model')
+        parser.add_argument('--lm-model', metavar='FILE',
+                            help='path to lm model file, to model P(T). P(T) must share the same vocab as the direct model on the target side')
+        parser.add_argument('--lm-data', metavar='FILE',
+                            help='path to lm model training data for target language, used to properly load LM with correct dictionary')
+        parser.add_argument('--lm-wt', default=1, type=float,
+                            help='the weight of the lm in joint decoding')
+        # fmt: on
+
+    def build_generator(
+        self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None
+    ):
+        if getattr(args, "score_reference", False):
+            raise NotImplementedError()
+        else:
+            from .noisy_channel_sequence_generator import NoisyChannelSequenceGenerator
+            use_cuda = torch.cuda.is_available() and not self.args.cpu
+            assert self.args.lm_model is not None, '--lm-model required for noisy channel generation!'
+            assert self.args.lm_data is not None, '--lm-data required for noisy channel generation to map between LM and bitext vocabs'
+            if self.args.channel_model is not None:
+                import copy
+                ch_args_task = copy.deepcopy(self.args)
+                tmp = ch_args_task.source_lang
+                ch_args_task.source_lang = ch_args_task.target_lang
+                ch_args_task.target_lang = tmp
+                ch_args_task._name = 'translation'
+                channel_task = TranslationTask.setup_task(ch_args_task)
+
+            arg_dict = {}
+            arg_dict['task'] = 'language_modeling'
+            arg_dict['sample_break_mode'] = 'eos'
+            arg_dict['data'] = self.args.lm_data
+            arg_dict['output_dictionary_size'] = -1
+            lm_args = argparse.Namespace(**arg_dict)
+            lm_task = LanguageModelingTask.setup_task(lm_args)
+            lm_dict = lm_task.output_dictionary
+
+            if self.args.channel_model is not None:
+                channel_models, _ = checkpoint_utils.load_model_ensemble(self.args.channel_model.split(':'), task=channel_task)
+
+                for model in channel_models:
+                    model.make_generation_fast_(
+                        beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
+                        need_attn=args.print_alignment,
+                    )
+                    if self.args.fp16:
+                        model.half()
+                    if use_cuda:
+                        model.cuda()
+            else:
+                channel_models = None
+
+            lm_models, _ = checkpoint_utils.load_model_ensemble(self.args.lm_model.split(':'), task=lm_task)
+
+            for model in lm_models:
+                model.make_generation_fast_(
+                    beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
+                    need_attn=args.print_alignment,
+                )
+                if self.args.fp16:
+                    model.half()
+                if use_cuda:
+                    model.cuda()
+            return NoisyChannelSequenceGenerator(
+                combine_method=self.args.combine_method,
+                tgt_dict=self.target_dictionary,
+                src_dict=self.source_dictionary,
+                beam_size=getattr(args, 'beam', 5),
+                max_len_a=getattr(args, 'max_len_a', 0),
+                max_len_b=getattr(args, 'max_len_b', 200),
+                min_len=getattr(args, 'min_len', 1),
+                len_penalty=getattr(args, 'lenpen', 1),
+                unk_penalty=getattr(args, 'unkpen', 0),
+                temperature=getattr(args, 'temperature', 1.),
+                match_source_len=getattr(args, 'match_source_len', False),
+                no_repeat_ngram_size=getattr(args, 'no_repeat_ngram_size', 0),
+                normalize_scores=(not getattr(args, 'unnormalized', False)),
+                channel_models=channel_models,
+                k2=getattr(self.args, 'k2', 50),
+                ch_weight=getattr(self.args, 'ch_wt', 1),
+                channel_scoring_type=self.args.channel_scoring_type,
+                top_k_vocab=self.args.top_k_vocab,
+                lm_models=lm_models,
+                lm_dict=lm_dict,
+                lm_weight=getattr(self.args, 'lm_wt', 1),
+                normalize_lm_scores_by_tgt_len=getattr(self.args, 'normalize_lm_scores_by_tgt_len', False),
+            )
diff --git a/fairseq/examples/flores101/README.md b/fairseq/examples/flores101/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..635c13f40bd0ccab704735bc5c26ea0192ea98cd
--- /dev/null
+++ b/fairseq/examples/flores101/README.md
@@ -0,0 +1,223 @@
+<p align="center">
+<img src="flores_logo.png" width="500">
+</p>
+
+# Flores101: Large-Scale Multilingual Machine Translation
+
+## Introduction
+
+Baseline pretrained models for small and large tracks of WMT 21 Large-Scale Multilingual Machine Translation competition.
+
+Flores Task at WMT 21: http://www.statmt.org/wmt21/large-scale-multilingual-translation-task.html
+
+Flores announement blog post: https://ai.facebook.com/blog/flores-researchers-kick-off-multilingual-translation-challenge-at-wmt-and-call-for-compute-grants/
+
+
+
+## Pretrained models
+
+Model | Num layers | Embed dimension | FFN dimension| Vocab Size | #params | Download
+---|---|---|---|---|---|---
+`flores101_mm100_615M` | 12 | 1024 | 4096 | 256,000 | 615M | https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_615M.tar.gz
+`flores101_mm100_175M` | 6 | 512 | 2048 | 256,000 | 175M | https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_175M.tar.gz
+
+
+These models are trained similar to [M2M-100](https://arxiv.org/abs/2010.11125) with additional support for the languages that are part of the WMT Large-Scale Multilingual Machine Translation track. Full list of languages can be found at the bottom.
+
+
+## Example Generation code
+
+### Download model, sentencepiece vocab
+
+```bash
+fairseq=/path/to/fairseq
+cd $fairseq
+
+# Download 615M param model.
+wget https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_615M.tar.gz
+
+# Extract 
+tar -xvzf flores101_mm100_615M.tar.gz
+```
+
+### Encode using our SentencePiece Model
+Note: Install SentencePiece from [here](https://github.com/google/sentencepiece)
+
+
+```bash
+fairseq=/path/to/fairseq
+cd $fairseq
+
+# Download example dataset From German to French
+sacrebleu --echo src -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.de
+sacrebleu --echo ref -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.fr
+
+for lang in de fr ; do
+    python scripts/spm_encode.py \
+        --model flores101_mm100_615M/sentencepiece.bpe.model \
+        --output_format=piece \
+        --inputs=raw_input.de-fr.${lang} \
+        --outputs=spm.de-fr.${lang}
+done
+```
+
+### Binarization
+
+```bash
+fairseq-preprocess \
+    --source-lang de --target-lang fr \
+    --testpref spm.de-fr \
+    --thresholdsrc 0 --thresholdtgt 0 \
+    --destdir data_bin \
+    --srcdict flores101_mm100_615M/dict.txt --tgtdict flores101_mm100_615M/dict.txt
+```
+
+### Generation 
+
+
+```bash
+fairseq-generate \
+    data_bin \
+    --batch-size 1 \
+    --path flores101_mm100_615M/model.pt \
+    --fixed-dictionary flores101_mm100_615M/dict.txt \
+    -s de -t fr \
+    --remove-bpe 'sentencepiece' \
+    --beam 5 \
+    --task translation_multi_simple_epoch \
+    --lang-pairs flores101_mm100_615M/language_pairs.txt \
+    --decoder-langtok --encoder-langtok src \
+    --gen-subset test \
+    --fp16 \
+    --dataset-impl mmap \
+    --distributed-world-size 1 --distributed-no-spawn
+```
+
+### Supported Languages and lang code
+
+Language | lang code
+---|---
+Akrikaans | af
+Amharic | am
+Arabic | ar
+Assamese | as
+Asturian | ast
+Aymara | ay
+Azerbaijani | az
+Bashkir | ba
+Belarusian | be
+Bulgarian | bg
+Bengali | bn
+Breton | br
+Bosnian | bs
+Catalan | ca
+Cebuano | ceb
+Chokwe | cjk
+Czech | cs
+Welsh | cy
+Danish | da
+German | de
+Dyula| dyu
+Greek | el
+English | en
+Spanish | es
+Estonian | et
+Persian | fa
+Fulah | ff
+Finnish | fi
+French | fr
+Western Frisian | fy
+Irish | ga
+Scottish Gaelic | gd
+Galician | gl
+Gujarati | gu
+Hausa | ha
+Hebrew | he
+Hindi | hi
+Croatian | hr
+Haitian Creole | ht
+Hungarian | hu
+Armenian | hy
+Indonesian | id
+Igbo | ig
+Iloko | ilo
+Icelandic | is
+Italian | it
+Japanese | ja
+Javanese | jv
+Georgian | ka
+Kachin | kac
+Kamba | kam
+Kabuverdianu | kea
+Kongo | kg
+Kazakh | kk
+Central Khmer | km
+Kimbundu | kmb
+Northern Kurdish | kmr
+Kannada | kn
+Korean | ko
+Kurdish | ku
+Kyrgyz | ky
+Luxembourgish | lb
+Ganda | lg
+Lingala | ln
+Lao | lo
+Lithuanian | lt
+Luo | luo
+Latvian | lv
+Malagasy | mg
+Maori | mi
+Macedonian | mk
+Malayalam | ml
+Mongolian | mn
+Marathi | mr
+Malay | ms
+Maltese | mt
+Burmese | my
+Nepali | ne
+Dutch | nl
+Norwegian | no
+Northern Sotho | ns
+Nyanja | ny
+Occitan | oc
+Oromo | om
+Oriya | or
+Punjabi | pa
+Polish | pl
+Pashto | ps
+Portuguese | pt
+Quechua | qu
+Romanian | ro
+Russian | ru
+Sindhi | sd
+Shan | shn
+Sinhala | si
+Slovak | sk
+Slovenian | sl
+Shona | sn
+Somali | so
+Albanian | sq
+Serbian | sr
+Swati | ss
+Sundanese | su
+Swedish | sv
+Swahili | sw
+Tamil | ta
+Telugu | te
+Tajik | tg
+Thai | th
+Tigrinya | ti
+Tagalog | tl
+Tswana | tn
+Turkish | tr
+Ukrainian | uk
+Umbundu | umb
+Urdu | ur
+Uzbek | uz
+Vietnamese | vi
+Wolof | wo
+Xhosa | xh
+Yiddish | yi
+Yoruba | yo
+Chinese| zh
+Zulu | zu
diff --git a/fairseq/examples/flores101/flores_logo.png b/fairseq/examples/flores101/flores_logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..d4d1455c6eab608ff5317ce885183cd213564273
Binary files /dev/null and b/fairseq/examples/flores101/flores_logo.png differ
diff --git a/fairseq/examples/fully_sharded_data_parallel/README.md b/fairseq/examples/fully_sharded_data_parallel/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b9e44fef48bee5faeee27b3d1d1b1eb96b6a477f
--- /dev/null
+++ b/fairseq/examples/fully_sharded_data_parallel/README.md
@@ -0,0 +1,177 @@
+# Fully Sharded Data Parallel (FSDP)
+
+## Overview
+Recent work by [Microsoft](https://arxiv.org/abs/1910.02054) and
+[Google](https://arxiv.org/abs/2004.13336) has shown that data parallel
+training can be made significantly more efficient by sharding the model
+parameters and optimizer state across data parallel workers. These ideas are
+encapsulated in the new **`FullyShardedDataParallel` (FSDP)** wrapper provided
+by [fairscale](https://github.com/facebookresearch/fairscale/).
+
+Compared to PyTorch DDP:
+* FSDP produces identical results as PyTorch DDP (it's still synchronous data parallel training)
+* FSDP shards parameters (FP16 + FP32) and optimizer state across data parallel GPUs
+* FSDP is faster than PyTorch DDP because the optimizer step is sharded, and the communication can be overlapped with the forward pass
+* FSDP enables training 13B parameter models on 8 GPUs and 175B parameter models on 128 GPUs
+
+FSDP is fully supported in fairseq via the following new arguments:
+* `--ddp-backend=fully_sharded`: enables full sharding via FSDP
+* `--cpu-offload`: offloads the optimizer state and FP32 model copy to CPU (combine with `--optimizer=cpu_adam`)
+* `--no-reshard-after-forward`: increases training speed for large models (1B+ params) and is similar to ZeRO stage 2
+* other popular options (`--fp16`, `--update-freq`, `--checkpoint-activations`, `--offload-activations`, etc.) continue to work as normal
+
+<details><summary>Limitations</summary><p>
+
+FSDP currently has several limitations compared to fairseq's default DDP backend (PyTorch DDP):
+* while FSDP is full compatible with pointwise Optimizers (e.g., Adam, AdamW, Adadelta, Adamax, SGD, etc.), it is not currently compatible with non-pointwise Optimizers (e.g., Adagrad, Adafactor, LAMB, etc.)
+* FSDP depends on flattening the parameters, so models that currently require `--fp16-no-flatten-grads` may not be supported
+
+See the [fairscale docs](https://fairscale.readthedocs.io/en/latest/api/nn/fsdp_tips.html) for a more detailed
+explanation of these and other limitations.
+
+</p></details>
+
+<details><summary>How it works</summary><p>
+
+<img width="800" alt="Fully Sharded Data Parallel" src="https://user-images.githubusercontent.com/231798/110406775-c2de0000-8050-11eb-9718-fbfc4510a76a.png">
+
+See the [fairscale docs](https://fairscale.readthedocs.io/en/latest/api/nn/fsdp_tips.html) for a more detailed
+explanation of how FSDP works.
+
+</p></details>
+
+## Example usage
+
+The following examples illustrate how to train a very large language model with
+13 billion parameters on 1 GPU by offloading parameters and optimizer states to
+CPU, or on 8 GPUs by fully sharding the params and optimizer states across GPUs.
+
+These examples use the WikiText-103 dataset for demonstration purposes, but
+in practice a much larger dataset will be needed to achieve good results.
+Follow the [instructions here](https://github.com/pytorch/fairseq/blob/main/examples/roberta/README.pretraining.md#1-preprocess-the-data)
+to preprocess the WikiText-103 dataset using the GPT-2/RoBERTa vocabulary.
+
+### 13B params on 1 V100 GPU (with CPU offloading)
+
+The following command trains a 13B parameter GPT-3 model on a single V100 GPU
+using the `--cpu-offload` feature to offload parameters and optimizer states to
+CPU. In this setting, the optimizer step (Adam) happens on CPU. We also use the
+`--checkpoint-activations` feature (sometimes called [gradient checkpointing](https://pytorch.org/docs/stable/checkpoint.html)),
+which further saves memory in exchange for a small increase in computation.
+
+**Requirements:**
+- Install the latest master version of fairscale: `pip install git+https://github.com/facebookresearch/fairscale.git@master`
+- You'll need 32GB of GPU memory and ~256GB of system memory to train the 13B param model.
+- If you have less system memory, the 6.7B param model can be trained with ~128GB of system memory, just set `--arch transformer_lm_gpt3_6_7`
+- We use the CPU Adam optimizer from [DeepSpeed](https://github.com/microsoft/DeepSpeed), so you'll need to `pip install deepspeed` before running the command.
+
+**Notes:**
+- The command will take ~5 minutes to start training, during which time it will appear to be hung, since randomly initializing 13B weights can be slow.
+- The `--cpu-offload` feature requires training in mixed precision (`--fp16`).
+- Tune the `OMP_NUM_THREADS` env variable for best performance with CPU offloading.
+- The example command below stops training after 10 steps (`--max-update 10`) and does not save checkpoints (`--no-save`).
+
+```bash
+OMP_NUM_THREADS=20 CUDA_VISIBLE_DEVICES=0 \
+    fairseq-train data-bin/wikitext-103-roberta-bpe-bin \
+    --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \
+    --cpu-offload --checkpoint-activations \
+    --task language_modeling --tokens-per-sample 2048 --batch-size 8 \
+    --arch transformer_lm_gpt3_13 \
+    --optimizer cpu_adam --adam-betas "(0.9,0.98)" \
+    --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \
+    --max-update 10 --no-save --log-format json --log-interval 1
+```
+
+<details><summary>Example output</summary><p>
+
+```
+(...)
+2021-03-08 12:29:51 | INFO | fairseq_cli.train | num. model params: 13,110,865,920 (num. trained: 13,110,865,920)
+(...)
+2021-03-08 12:29:51 | INFO | fairseq_cli.train | training on 1 devices (GPUs/TPUs)
+2021-03-08 12:29:51 | INFO | fairseq_cli.train | max tokens per GPU = None and batch size per GPU = 8
+(...)
+Adam Optimizer #0 is created with AVX2 arithmetic capability.
+Config: alpha=0.000100, betas=(0.900000, 0.980000), weight_decay=0.000000, adam_w=1
+(...)
+2021-03-08 12:31:36 | INFO | train_inner | {"epoch": 1, "update": 0.0, "loss": "16.475", "ppl": "91120.8", "wps": "0", "ups": "0", "wpb": "16384", "bsz": "8", "num_updates": "1", "lr": "2e-05", "gnorm": "20.751", "loss_scale": "4", "train_wall": "99", "gb_free": "9.3", "wall": "105"}
+2021-03-08 12:32:33 | INFO | train_inner | {"epoch": 1, "update": 0.0, "loss": "16.446", "ppl": "89281.6", "wps": "288.7", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "2", "lr": "4e-05", "gnorm": "19.777", "loss_scale": "4", "train_wall": "57", "gb_free": "9.3", "wall": "161"}
+2021-03-08 12:33:12 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
+2021-03-08 12:33:51 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
+2021-03-08 12:34:45 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "25.22", "ppl": "3.90691e+07", "wps": "123.4", "ups": "0.01", "wpb": "16384", "bsz": "8", "num_updates": "3", "lr": "6e-05", "gnorm": "131.281", "loss_scale": "1", "train_wall": "133", "gb_free": "9.3", "wall": "294"}
+2021-03-08 12:35:43 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "18.079", "ppl": "276809", "wps": "285.5", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "4", "lr": "8e-05", "gnorm": "13.776", "loss_scale": "1", "train_wall": "57", "gb_free": "9.3", "wall": "351"}
+2021-03-08 12:36:35 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "23.729", "ppl": "1.39088e+07", "wps": "316.7", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "5", "lr": "0.0001", "gnorm": "72.774", "loss_scale": "1", "train_wall": "52", "gb_free": "9.3", "wall": "403"}
+2021-03-08 12:37:28 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "20.429", "ppl": "1.41203e+06", "wps": "307.6", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "6", "lr": "8e-05", "gnorm": "60.846", "loss_scale": "1", "train_wall": "53", "gb_free": "9.3", "wall": "456"}
+2021-03-08 12:38:27 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "18.965", "ppl": "511684", "wps": "279.4", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "7", "lr": "6e-05", "gnorm": "22.687", "loss_scale": "1", "train_wall": "59", "gb_free": "9.3", "wall": "515"}
+2021-03-08 12:39:18 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "18.345", "ppl": "332887", "wps": "319.1", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "8", "lr": "4e-05", "gnorm": "8.451", "loss_scale": "1", "train_wall": "51", "gb_free": "9.3", "wall": "566"}
+2021-03-08 12:40:11 | INFO | train_inner | {"epoch": 1, "update": 0.002, "loss": "18.262", "ppl": "314336", "wps": "305.9", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "9", "lr": "2e-05", "gnorm": "6.457", "loss_scale": "1", "train_wall": "54", "gb_free": "9.3", "wall": "620"}
+2021-03-08 12:41:04 | INFO | train_inner | {"epoch": 1, "update": 0.002, "loss": "17.556", "ppl": "192686", "wps": "311.8", "ups": "0.02", "wpb": "16384", "bsz": "8", "num_updates": "10", "lr": "0", "gnorm": "5.796", "loss_scale": "1", "train_wall": "53", "gb_free": "9.3", "wall": "673"}
+2021-03-08 12:41:04 | INFO | fairseq_cli.train | Stopping training due to num_updates: 10 >= max_update: 10
+2021-03-08 12:41:04 | INFO | fairseq_cli.train | begin validation on "valid" subset
+2021-03-08 12:43:15 | INFO | valid | {"epoch": 1, "valid_loss": "17.953", "valid_ppl": "253807", "valid_wps": "1868.4", "valid_wpb": "15400.2", "valid_bsz": "7.6", "valid_num_updates": "10"}
+2021-03-08 12:43:15 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below)
+2021-03-08 12:43:15 | INFO | train | {"epoch": 1, "train_loss": "19.351", "train_ppl": "668509", "train_wps": "210.9", "train_ups": "0.01", "train_wpb": "16384", "train_bsz": "8", "train_num_updates": "10", "train_lr": "0", "train_gnorm": "36.26", "train_loss_scale": "1", "train_train_wall": "667", "train_gb_free": "9.3", "train_wall": "804"}
+2021-03-08 12:43:15 | INFO | fairseq_cli.train | done training in 798.6 seconds
+```
+
+</p></details>
+
+### 13B params on 8 V100 GPUs (with full parameter + optimizer state sharding)
+
+FSDP can also shard the parameters and optimizer states across multiple GPUs,
+reducing memory requirements significantly. On 8 x 32GB GPUs, sharding enables
+training the same 13B parameter model *without offloading the parameters to
+CPU*. However, without CPU offloading we'd only be able to fit a batch size of
+1 per GPU, which would cause training speed to suffer.
+
+We obtain the best performance on 8 GPUs by combining full sharding and CPU
+offloading. The following command trains the same 13B parameter GPT-3 model as
+before on 8 x 32GB V100 GPUs; training speed increases superlinearly from ~310
+words per second to ~3200 words per second.
+
+```bash
+OMP_NUM_THREADS=20 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+    fairseq-train data-bin/wikitext-103-roberta-bpe-bin \
+    --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \
+    --cpu-offload --checkpoint-activations \
+    --task language_modeling --tokens-per-sample 2048 --batch-size 8 \
+    --arch transformer_lm_gpt3_13 \
+    --optimizer cpu_adam --adam-betas "(0.9,0.98)" \
+    --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \
+    --max-update 10 --no-save --log-format json --log-interval 1
+```
+
+<details><summary>Example output</summary><p>
+
+```
+(...)
+2021-03-08 18:04:09 | INFO | fairseq_cli.train | num. model params: 13,110,865,920 (num. trained: 13,110,865,920)
+(...)
+2021-03-08 18:04:09 | INFO | fairseq_cli.train | training on 8 devices (GPUs/TPUs)
+2021-03-08 18:04:09 | INFO | fairseq_cli.train | max tokens per GPU = None and batch size per GPU = 8
+(...)
+Adam Optimizer #0 is created with AVX2 arithmetic capability.
+Config: alpha=0.000100, betas=(0.900000, 0.980000), weight_decay=0.000000, adam_w=1
+(...)
+2021-03-08 18:05:06 | INFO | train_inner | {"epoch": 1, "update": 0.001, "loss": "16.408", "ppl": "86945.6", "wps": "0", "ups": "0", "wpb": "131072", "bsz": "64", "num_updates": "1", "lr": "2e-05", "gnorm": "18.27", "loss_scale": "4", "train_wall": "47", "gb_free": "9.3", "wall": "56"}
+2021-03-08 18:05:45 | INFO | train_inner | {"epoch": 1, "update": 0.002, "loss": "16.352", "ppl": "83644.3", "wps": "3283.4", "ups": "0.03", "wpb": "131072", "bsz": "64", "num_updates": "2", "lr": "4e-05", "gnorm": "18.411", "loss_scale": "4", "train_wall": "40", "gb_free": "9.3", "wall": "96"}
+2021-03-08 18:06:21 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0
+2021-03-08 18:06:56 | INFO | fairseq.trainer | NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0
+2021-03-08 18:07:37 | INFO | train_inner | {"epoch": 1, "update": 0.006, "loss": "23.682", "ppl": "1.34537e+07", "wps": "1176.6", "ups": "0.01", "wpb": "131072", "bsz": "64", "num_updates": "3", "lr": "6e-05", "gnorm": "119.682", "loss_scale": "1", "train_wall": "111", "gb_free": "9.3", "wall": "208"}
+2021-03-08 18:08:18 | INFO | train_inner | {"epoch": 1, "update": 0.007, "loss": "18.988", "ppl": "519921", "wps": "3189.1", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "4", "lr": "8e-05", "gnorm": "14.934", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "249"}
+2021-03-08 18:08:59 | INFO | train_inner | {"epoch": 1, "update": 0.008, "loss": "20.08", "ppl": "1.10798e+06", "wps": "3223.1", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "5", "lr": "0.0001", "gnorm": "59.92", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "289"}
+2021-03-08 18:09:39 | INFO | train_inner | {"epoch": 1, "update": 0.009, "loss": "18.323", "ppl": "327980", "wps": "3256.6", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "6", "lr": "8e-05", "gnorm": "37.425", "loss_scale": "1", "train_wall": "40", "gb_free": "9.3", "wall": "330"}
+2021-03-08 18:10:20 | INFO | train_inner | {"epoch": 1, "update": 0.01, "loss": "17.264", "ppl": "157354", "wps": "3188.7", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "7", "lr": "6e-05", "gnorm": "10.824", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "371"}
+2021-03-08 18:11:01 | INFO | train_inner | {"epoch": 1, "update": 0.011, "loss": "16.794", "ppl": "113647", "wps": "3230", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "8", "lr": "4e-05", "gnorm": "5.616", "loss_scale": "1", "train_wall": "41", "gb_free": "9.3", "wall": "411"}
+2021-03-08 18:11:39 | INFO | train_inner | {"epoch": 1, "update": 0.012, "loss": "16.706", "ppl": "106938", "wps": "3384", "ups": "0.03", "wpb": "131072", "bsz": "64", "num_updates": "9", "lr": "2e-05", "gnorm": "5.318", "loss_scale": "1", "train_wall": "39", "gb_free": "9.3", "wall": "450"}
+2021-03-08 18:12:19 | INFO | train_inner | {"epoch": 1, "update": 0.013, "loss": "16.548", "ppl": "95796.2", "wps": "3274.4", "ups": "0.02", "wpb": "131072", "bsz": "64", "num_updates": "10", "lr": "0", "gnorm": "5.22", "loss_scale": "1", "train_wall": "40", "gb_free": "9.3", "wall": "490"}
+2021-03-08 18:12:19 | INFO | fairseq_cli.train | Stopping training due to num_updates: 10 >= max_update: 10
+2021-03-08 18:12:19 | INFO | fairseq_cli.train | begin validation on "valid" subset
+2021-03-08 18:12:45 | INFO | valid | {"epoch": 1, "valid_loss": "16.624", "valid_ppl": "101000", "valid_wps": "10855.9", "valid_wpb": "123202", "valid_bsz": "60.5", "valid_num_updates": "10"}
+2021-03-08 18:12:45 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below)
+2021-03-08 18:12:45 | INFO | train | {"epoch": 1, "train_loss": "18.114", "train_ppl": "283776", "train_wps": "2567.8", "train_ups": "0.02", "train_wpb": "131072", "train_bsz": "64", "train_num_updates": "10", "train_lr": "0", "train_gnorm": "29.562", "train_loss_scale": "1", "train_train_wall": "480", "train_gb_free": "9.3", "train_wall": "516"}
+2021-03-08 18:12:45 | INFO | fairseq_cli.train | done training in 509.9 seconds
+```
+
+</p></details>
diff --git a/fairseq/examples/gottbert/README.md b/fairseq/examples/gottbert/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1d58feb279a4a50222290546c3bb285d3cea98e6
--- /dev/null
+++ b/fairseq/examples/gottbert/README.md
@@ -0,0 +1,64 @@
+# GottBERT: a pure German language model
+
+## Introduction
+
+[GottBERT](http://arxiv.org/abs/2012.02110) is a pretrained language model trained on 145GB of German text based on RoBERTa.
+
+## Example usage
+
+### fairseq
+##### Load GottBERT from torch.hub (PyTorch >= 1.1):
+```python
+import torch
+gottbert = torch.hub.load('pytorch/fairseq', 'gottbert-base')
+gottbert.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Load GottBERT (for PyTorch 1.0 or custom models):
+```python
+# Download gottbert model
+wget https://dl.gottbert.de/fairseq/models/gottbert-base.tar.gz
+tar -xzvf gottbert.tar.gz
+
+# Load the model in fairseq
+from fairseq.models.roberta import GottbertModel
+gottbert = GottbertModel.from_pretrained('/path/to/gottbert')
+gottbert.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Filling masks:
+```python
+masked_line = 'Gott ist <mask> ! :)'
+gottbert.fill_mask(masked_line, topk=3)
+# [('Gott ist gut ! :)',        0.3642110526561737,   ' gut'),
+#  ('Gott ist überall ! :)',    0.06009674072265625,  ' überall'),
+#  ('Gott ist großartig ! :)',  0.0370681993663311,   ' großartig')]
+```
+
+##### Extract features from GottBERT
+
+```python
+# Extract the last layer's features
+line = "Der erste Schluck aus dem Becher der Naturwissenschaft macht atheistisch , aber auf dem Grunde des Bechers wartet Gott !"
+tokens = gottbert.encode(line)
+last_layer_features = gottbert.extract_features(tokens)
+assert last_layer_features.size() == torch.Size([1, 27, 768])
+
+# Extract all layer's features (layer 0 is the embedding layer)
+all_layers = gottbert.extract_features(tokens, return_all_hiddens=True)
+assert len(all_layers) == 13
+assert torch.all(all_layers[-1] == last_layer_features)
+```
+## Citation
+If you use our work, please cite:
+
+```bibtex
+@misc{scheible2020gottbert,
+      title={GottBERT: a pure German Language Model},
+      author={Raphael Scheible and Fabian Thomczyk and Patric Tippmann and Victor Jaravine and Martin Boeker},
+      year={2020},
+      eprint={2012.02110},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
diff --git a/fairseq/examples/hubert/README.md b/fairseq/examples/hubert/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b501a6eb2a047d4adb6f297436c1c002c926a09f
--- /dev/null
+++ b/fairseq/examples/hubert/README.md
@@ -0,0 +1,115 @@
+# HuBERT
+
+## Pre-trained and fine-tuned (ASR) models
+Model | Pretraining Data | Finetuning Dataset | Model
+|---|---|---|---
+HuBERT Base (~95M params) | [Librispeech](http://www.openslr.org/12) 960 hr | No finetuning (Pretrained Model) | [download](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt)
+HuBERT Large (~316M params) | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | No finetuning (Pretrained Model) | [download](https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k.pt)
+HuBERT Extra Large (~1B params) | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr |  No finetuning (Pretrained Model) | [download](https://dl.fbaipublicfiles.com/hubert/hubert_xtralarge_ll60k.pt)
+HuBERT Large | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/hubert/hubert_large_ll60k_finetune_ls960.pt)
+HuBERT Extra Large | [Libri-Light](https://github.com/facebookresearch/libri-light) 60k hr | [Librispeech](http://www.openslr.org/12) 960 hr | [download](https://dl.fbaipublicfiles.com/hubert/hubert_xtralarge_ll60k_finetune_ls960.pt)
+
+## Load a model
+```
+ckpt_path = "/path/to/the/checkpoint.pt"
+models, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path])
+model = models[0]
+```
+
+## Train a new model
+
+### Data preparation
+
+Follow the steps in `./simple_kmeans` to create:
+- `{train,valid}.tsv` waveform list files
+- `{train,valid}.km` frame-aligned pseudo label files.
+The `label_rate` is the same as the feature frame rate used for clustering,
+which is 100Hz for MFCC features and 50Hz for HuBERT features by default.
+
+### Pre-train a HuBERT model
+
+Suppose `{train,valid}.tsv` are saved at `/path/to/data`, `{train,valid}.km`
+are saved at `/path/to/labels`, and the label rate is 100Hz.
+
+To train a base model (12 layer transformer), run:
+```sh
+$ python fairseq_cli/hydra_train.py \
+  --config-dir /path/to/fairseq-py/examples/hubert/config/pretrain \
+  --config-name hubert_base_librispeech \
+  task.data=/path/to/data task.label_dir=/path/to/labels model.label_rate=100
+```
+
+### Fine-tune a HuBERT model with a CTC loss
+
+Suppose `{train,valid}.tsv` are saved at `/path/to/data`, and their
+corresponding character transcripts `{train,valid}.ltr` are saved at
+`/path/to/trans`.
+
+To fine-tune a pre-trained HuBERT model at `/path/to/checkpoint`, run
+```sh
+$ python fairseq_cli/hydra_train.py \
+  --config-dir /path/to/fairseq-py/examples/hubert/config/finetune \
+  --config-name base_10h \
+  task.data=/path/to/data task.label_dir=/path/to/trans \
+  model.w2v_path=/path/to/checkpoint
+```
+
+### Decode a HuBERT model
+
+Suppose the `test.tsv` and `test.ltr` are the waveform list and transcripts of
+the split to be decoded, saved at `/path/to/data`, and the fine-tuned model is
+saved at `/path/to/checkpoint`. We support three decoding modes:
+- Viterbi decoding: greedy decoding without a language model
+- KenLM decoding: decoding with an arpa-format KenLM n-gram language model
+- Fairseq-LM deocding: decoding with a Fairseq neural language model
+
+
+#### Viterbi decoding
+
+`task.normalize` needs to be consistent with the value used during fine-tuning.
+Decoding results will be saved at
+`/path/to/experiment/directory/decode/viterbi/test`.
+
+```sh
+$ python examples/speech_recognition/new/infer.py \
+  --config-dir /path/to/fairseq-py/examples/hubert/config/decode \
+  --config-name infer_viterbi \
+  task.data=/path/to/data \
+  task.normalize=[true|false] \
+  decoding.exp_dir=/path/to/experiment/directory \
+  common_eval.path=/path/to/checkpoint
+  dataset.gen_subset=test \
+```
+
+#### KenLM / Fairseq-LM decoding
+
+Suppose the pronunciation lexicon and the n-gram LM are saved at
+`/path/to/lexicon` and `/path/to/arpa`, respectively. Decoding results will be
+saved at `/path/to/experiment/directory/decode/kenlm/test`.
+
+```sh
+$ python examples/speech_recognition/new/infer.py \
+  --config-dir /path/to/fairseq-py/examples/hubert/config/decode \
+  --config-name infer_kenlm \
+  task.data=/path/to/data \
+  task.normalize=[true|false] \
+  decoding.exp_dir=/path/to/experiment/directory \
+  common_eval.path=/path/to/checkpoint
+  dataset.gen_subset=test \
+  decoding.decoder.lexicon=/path/to/lexicon \
+  decoding.decoder.lmpath=/path/to/arpa
+```
+
+The command above uses the default decoding hyperparameter, which can be found
+in `examples/speech_recognition/hydra/decoder.py`. These parameters can be
+configured from the command line. For example, to search with a beam size of
+500, we can append the command above with `decoding.decoder.beam=500`.
+Important parameters include:
+- decoding.decoder.beam
+- decoding.decoder.beamthreshold
+- decoding.decoder.lmweight
+- decoding.decoder.wordscore
+- decoding.decoder.silweight
+
+To decode with a Fairseq LM, use `--config-name infer_fsqlm` instead, and
+change the path of lexicon and LM accordingly.
diff --git a/fairseq/examples/hubert/config/decode/ax_sweep/ngram.yaml b/fairseq/examples/hubert/config/decode/ax_sweep/ngram.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a02df1f7da7eebfebe4018ef2758a716fbab646
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/ax_sweep/ngram.yaml
@@ -0,0 +1,33 @@
+# @package _global_
+
+common_eval:
+  results_path: ${decoding.exp_dir}/decode/${decoding.decoder.name}_ax/${dataset.gen_subset}
+
+hydra:
+  sweeper:
+    ax_config:
+      max_trials: 60
+      early_stop:
+        minimize: true
+        max_epochs_without_improvement: 10
+        epsilon: 0.025
+      experiment:
+        name: ${dataset.gen_subset}
+        objective_name: wer
+        minimize: true
+        parameter_constraints: null
+        outcome_constraints: null
+        status_quo: null
+      client:
+        verbose_logging: false
+        random_seed: null
+      params:
+        decoding.decoder.lmweight:
+          type: range
+          bounds: [0.0, 8.0]
+        decoding.decoder.wordscore:
+          type: range
+          bounds: [-5.0, 5.0]
+        decoding.decoder.silweight:
+          type: range
+          bounds: [-10.0, 0.0]
diff --git a/fairseq/examples/hubert/config/decode/ax_sweep/transformer.yaml b/fairseq/examples/hubert/config/decode/ax_sweep/transformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..85ed3bd1a5a44871260f572786044c28f441add6
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/ax_sweep/transformer.yaml
@@ -0,0 +1,33 @@
+# @package _global_
+
+common_eval:
+  results_path: ${decoding.exp_dir}/decode/${decoding.decoder.name}_ax/${dataset.gen_subset}
+
+hydra:
+  sweeper:
+    ax_config:
+      max_trials: 60
+      early_stop:
+        minimize: true
+        max_epochs_without_improvement: 10
+        epsilon: 0.025
+      experiment:
+        name: ${dataset.gen_subset}
+        objective_name: wer
+        minimize: true
+        parameter_constraints: null
+        outcome_constraints: null
+        status_quo: null
+      client:
+        verbose_logging: false
+        random_seed: null
+      params:
+        decoding.decoder.lmweight:
+          type: range
+          bounds: [0.0, 4.0]
+        decoding.decoder.wordscore:
+          type: range
+          bounds: [-5.0, 5.0]
+        decoding.decoder.silweight:
+          type: range
+          bounds: [-8.0, 0.0]
diff --git a/fairseq/examples/hubert/config/decode/infer_fsqlm.yaml b/fairseq/examples/hubert/config/decode/infer_fsqlm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..026ad8db89a0673969a99fed6e1e84fc41fc7a1a
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/infer_fsqlm.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+
+defaults:
+  - model: null
+
+hydra:
+  run:
+    dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight}
+  sweep:
+    dir: ${common_eval.results_path}
+    subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight}
+
+task:
+  _name: hubert_pretraining
+  single_target: true
+  fine_tuning: true
+  data: ???
+  normalize: ???
+
+decoding:
+  type: fairseqlm
+  lexicon: ???
+  lmpath: ???
+  beamthreshold: 25
+  beam: 500
+  lmweight: 2
+  wordscore: -1
+  silweight: 0
+  unique_wer_file: true
+common_eval:
+  results_path: ???
+  path: ???
+  post_process: letter
+dataset:
+  max_tokens: 1100000
+  gen_subset: ???
diff --git a/fairseq/examples/hubert/config/decode/infer_kenlm.yaml b/fairseq/examples/hubert/config/decode/infer_kenlm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..04642aeb6530133ab44e12e11e3d1661e3b9c32c
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/infer_kenlm.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+
+defaults:
+  - model: null
+
+hydra:
+  run:
+    dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight}
+  sweep:
+    dir: ${common_eval.results_path}
+    subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight}
+
+task:
+  _name: hubert_pretraining
+  single_target: true
+  fine_tuning: true
+  data: ???
+  normalize: ???
+
+decoding:
+  type: kenlm
+  lexicon: ???
+  lmpath: ???
+  beamthreshold: 100
+  beam: 500
+  lmweight: 2
+  wordscore: -1
+  silweight: 0
+  unique_wer_file: true
+common_eval:
+  results_path: ???
+  path: ???
+  post_process: letter
+dataset:
+  max_tokens: 1100000
+  gen_subset: ???
diff --git a/fairseq/examples/hubert/config/decode/infer_viterbi.yaml b/fairseq/examples/hubert/config/decode/infer_viterbi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4afc74c18ca890e1a20c6beabeb9059dd0f480f4
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/infer_viterbi.yaml
@@ -0,0 +1,29 @@
+# @package _group_
+
+defaults:
+  - model: null
+
+hydra:
+  run:
+    dir: ${common_eval.results_path}/viterbi
+  sweep:
+    dir: ${common_eval.results_path}
+    subdir: viterbi
+
+task:
+  _name: hubert_pretraining
+  single_target: true
+  fine_tuning: true
+  data: ???
+  normalize: ???
+
+decoding:
+  type: viterbi
+  unique_wer_file: true
+common_eval:
+  results_path: ???
+  path: ???
+  post_process: letter
+dataset:
+  max_tokens: 1100000
+  gen_subset: ???
diff --git a/fairseq/examples/hubert/config/decode/run/submitit_slurm.yaml b/fairseq/examples/hubert/config/decode/run/submitit_slurm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0b8065832ecacf9dd4fe4e99c87941e00fb3ef7f
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/run/submitit_slurm.yaml
@@ -0,0 +1,17 @@
+# @package _global_
+hydra:
+  launcher:
+    cpus_per_task: ${distributed_training.distributed_world_size}
+    gpus_per_node: ${distributed_training.distributed_world_size}
+    tasks_per_node: ${hydra.launcher.gpus_per_node}
+    nodes: 1
+    mem_gb: 200
+    timeout_min: 4320
+    max_num_timeout: 50
+    name: ${hydra.job.config_name}
+    submitit_folder: ${hydra.sweep.dir}/submitit
+
+distributed_training:
+  distributed_world_size: 1
+  distributed_no_spawn: true
+  distributed_port: 29761
diff --git a/fairseq/examples/hubert/config/decode/run/submitit_slurm_8gpu.yaml b/fairseq/examples/hubert/config/decode/run/submitit_slurm_8gpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f669f376312dbfe4611cc08f4996a314155fb87
--- /dev/null
+++ b/fairseq/examples/hubert/config/decode/run/submitit_slurm_8gpu.yaml
@@ -0,0 +1,17 @@
+# @package _global_
+hydra:
+  launcher:
+    cpus_per_task: ${distributed_training.distributed_world_size}
+    gpus_per_node: ${distributed_training.distributed_world_size}
+    tasks_per_node: ${hydra.launcher.gpus_per_node}
+    nodes: 1
+    mem_gb: 200
+    timeout_min: 4320
+    max_num_timeout: 50
+    name: ${hydra.job.config_name}
+    submitit_folder: ${hydra.sweep.dir}/submitit
+
+distributed_training:
+  distributed_world_size: 8
+  distributed_no_spawn: true
+  distributed_port: 29761
diff --git a/fairseq/examples/hubert/config/finetune/base_10h.yaml b/fairseq/examples/hubert/config/finetune/base_10h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a22c7c0347f792221f209bcfba7ba380a69f90a8
--- /dev/null
+++ b/fairseq/examples/hubert/config/finetune/base_10h.yaml
@@ -0,0 +1,100 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+  tensorboard_logdir: tblog
+  seed: 1337
+
+checkpoint:
+  save_interval: 5
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+distributed_training:
+  ddp_backend: c10d
+  find_unused_parameters: true
+  distributed_world_size: 1
+  distributed_port: 29671
+  nprocs_per_node: 8
+
+task:
+  _name: hubert_pretraining
+  data: ???
+  fine_tuning: true
+  label_dir: ???
+  normalize: false  # must be consistent with pre-training
+  labels: ["ltr"]
+  single_target: true
+
+dataset:
+  num_workers: 0
+  max_tokens: 3200000
+  validate_after_updates: ${model.freeze_finetune_updates}
+  validate_interval: 5
+  train_subset: train
+  valid_subset: valid
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 25000
+  lr: [2e-5]
+  sentence_avg: true
+  update_freq: [1]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  warmup_steps: 8000
+  hold_steps: 0
+  decay_steps: 72000
+  final_lr_scale: 0.05
+
+model:
+  _name: hubert_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_selection: static
+  mask_length: 10
+  mask_other: 0
+  mask_prob: 0.75
+  mask_channel_selection: static
+  mask_channel_length: 64
+  mask_channel_other: 0
+  mask_channel_prob: 0.5
+  layerdrop: 0.1
+  dropout: 0.0
+  activation_dropout: 0.1
+  attention_dropout: 0.0
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
+
+hydra:
+  job:
+    config:
+      override_dirname:
+        kv_sep: '-'
+        item_sep: '__'
+        exclude_keys:
+          - run
+          - task.data
+          - task.label_dir
+          - model.w2v_path
+          - dataset.train_subset
+          - dataset.valid_subset
+          - criterion.wer_kenlm_model
+          - criterion.wer_lexicon
+  run:
+    dir: ???
+  sweep:
+    dir: ???
+    subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}
diff --git a/fairseq/examples/hubert/config/finetune/ckpt/it1.yaml b/fairseq/examples/hubert/config/finetune/ckpt/it1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2af96b3f72746f85feb13e7efcbdab6602b293de
--- /dev/null
+++ b/fairseq/examples/hubert/config/finetune/ckpt/it1.yaml
@@ -0,0 +1,7 @@
+# @package _global_
+
+task:
+  normalize: false
+
+model:
+  w2v_path: /checkpoint/wnhsu/w2v/hubert_final/iter1/hubert.km.randcrop.pmw1_0.puw0_0.grpnorm.ml10.mp0_8.untie.mxsz250000.ufreq1.maxtok1400000.MU400k.s1337.ngpu32/checkpoint_last.pt
diff --git a/fairseq/examples/hubert/config/finetune/lm/ls_4gram.yaml b/fairseq/examples/hubert/config/finetune/lm/ls_4gram.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8c7728ad29965d3cf18605808a893bc442afd56b
--- /dev/null
+++ b/fairseq/examples/hubert/config/finetune/lm/ls_4gram.yaml
@@ -0,0 +1,7 @@
+# @package _global_
+
+criterion:
+  wer_kenlm_model: /checkpoint/abdo/old_checkpoint02/datasets/librispeech/4-gram.bin
+  wer_lexicon: /checkpoint/abdo/old_checkpoint02/datasets/librispeech/10h/raw/lexicon_ltr.lst
+  wer_lm_weight: 2.0
+  wer_word_score: -1.0
diff --git a/fairseq/examples/hubert/config/finetune/run/submitit_reg.yaml b/fairseq/examples/hubert/config/finetune/run/submitit_reg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..27509503e7b306c07742fbed2fc5726d001bb7df
--- /dev/null
+++ b/fairseq/examples/hubert/config/finetune/run/submitit_reg.yaml
@@ -0,0 +1,20 @@
+# @package _global_
+
+hydra:
+  launcher:
+    cpus_per_task: 8
+    gpus_per_node: 8
+    tasks_per_node: ${hydra.launcher.gpus_per_node}
+    nodes: 1
+    comment: null
+    mem_gb: 384
+    timeout_min: 4320
+    max_num_timeout: 100
+    constraint: volta32gb
+    name: ${hydra.job.config_name}/${hydra.job.override_dirname}
+    submitit_folder: ${hydra.sweep.dir}/submitit/%j
+
+distributed_training:
+  distributed_world_size: 8
+  distributed_port: 29671
+  nprocs_per_node: 8
diff --git a/fairseq/examples/hubert/config/pretrain/data/iter1.yaml b/fairseq/examples/hubert/config/pretrain/data/iter1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0a1b65d802c83128c53f32b21807fa5e51da6cc9
--- /dev/null
+++ b/fairseq/examples/hubert/config/pretrain/data/iter1.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+task:
+  label_dir: ???
+  labels: ["km"]
+
+model:
+  label_rate: 100
diff --git a/fairseq/examples/hubert/config/pretrain/data/iter2.yaml b/fairseq/examples/hubert/config/pretrain/data/iter2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d4bfe61cc638af9de48e92c58994e435fba2abf
--- /dev/null
+++ b/fairseq/examples/hubert/config/pretrain/data/iter2.yaml
@@ -0,0 +1,8 @@
+# @package _global_
+
+task:
+  label_dir: ???
+  labels: ["km"]
+
+model:
+  label_rate: 50
diff --git a/fairseq/examples/hubert/config/pretrain/hubert_base_librispeech.yaml b/fairseq/examples/hubert/config/pretrain/hubert_base_librispeech.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bd84461a163866f622b01bf6d36b4de6215f3d97
--- /dev/null
+++ b/fairseq/examples/hubert/config/pretrain/hubert_base_librispeech.yaml
@@ -0,0 +1,97 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+  seed: 1337
+  tensorboard_logdir: tblog
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+
+distributed_training:
+  ddp_backend: no_c10d
+  distributed_backend: 'nccl'
+  distributed_world_size: 32
+  distributed_port: 29671
+  nprocs_per_node: 8
+  find_unused_parameters: true
+
+task:
+  _name: hubert_pretraining
+  data: ???
+  label_dir: ???
+  labels: ???
+  label_rate: ${model.label_rate}
+  sample_rate: 16000
+  max_sample_size: 250000
+  min_sample_size: 32000
+  pad_audio: false
+  random_crop: true
+  normalize: false # must be consistent with extractor
+
+dataset:
+  num_workers: 6
+  max_tokens: 1400000
+  skip_invalid_size_inputs_valid_test: true
+  validate_interval: 5
+  validate_interval_updates: 10000
+
+criterion:
+  _name: hubert
+  pred_masked_weight: 1.0
+  pred_nomask_weight: 0.0
+  loss_weights: [10,]
+
+optimization:
+  max_update: 400000
+  lr: [0.0005]
+  clip_norm: 10.0
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: hubert
+  label_rate: ???
+  skip_masked: false
+  skip_nomask: false
+  mask_prob: 0.80
+  extractor_mode: default
+  conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2'
+  final_dim: 256
+  encoder_layerdrop: 0.05
+  dropout_input: 0.1
+  dropout_features: 0.1
+  dropout: 0.1
+  attention_dropout: 0.1
+  feature_grad_mult: 0.1
+  untie_final_proj: true
+  activation_dropout: 0.0
+
+hydra:
+  job:
+    config:
+      override_dirname:
+        kv_sep: '-'
+        item_sep: '__'
+        exclude_keys:
+          - run
+          - task.data
+          - task.label_dir
+  run:
+    dir: ???
+  sweep:
+    dir: ???
+    subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}
diff --git a/fairseq/examples/hubert/config/pretrain/hubert_large_librivox.yaml b/fairseq/examples/hubert/config/pretrain/hubert_large_librivox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5192b5f29b53aa8391a0ab67b6238c0d0b4985e
--- /dev/null
+++ b/fairseq/examples/hubert/config/pretrain/hubert_large_librivox.yaml
@@ -0,0 +1,101 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+  seed: 1337
+  tensorboard_logdir: tblog
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+
+distributed_training:
+  ddp_backend: no_c10d
+  distributed_backend: 'nccl'
+  distributed_world_size: 128
+  distributed_port: 29671
+  nprocs_per_node: 8
+  find_unused_parameters: true
+
+task:
+  _name: hubert_pretraining
+  data: ???
+  label_dir: ???
+  labels: ???
+  label_rate: ${model.label_rate}
+  sample_rate: 16000
+  max_sample_size: 250000
+  min_sample_size: 32000
+  pad_audio: false
+  random_crop: true
+  normalize: true # must be consistent with extractor
+
+dataset:
+  num_workers: 6
+  max_tokens: 900000
+  skip_invalid_size_inputs_valid_test: true
+  validate_interval: 5
+  validate_interval_updates: 10000
+
+criterion:
+  _name: hubert
+  pred_masked_weight: 1.0
+  pred_nomask_weight: 0.0
+  loss_weights: [10,]
+
+optimization:
+  max_update: 400000
+  lr: [0.0015]
+  clip_norm: 1.0
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: hubert
+  label_rate: ???
+  encoder_layers: 24
+  encoder_embed_dim: 1024
+  encoder_ffn_embed_dim: 4096
+  encoder_attention_heads: 16
+  final_dim: 768
+  skip_masked: false
+  skip_nomask: false
+  mask_prob: 0.80
+  extractor_mode: layer_norm
+  conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2'
+  encoder_layerdrop: 0.0
+  dropout_input: 0.0
+  dropout_features: 0.0
+  dropout: 0.0
+  attention_dropout: 0.0
+  layer_norm_first: true
+  feature_grad_mult: 1.0
+  untie_final_proj: true
+  activation_dropout: 0.0
+
+hydra:
+  job:
+    config:
+      override_dirname:
+        kv_sep: '-'
+        item_sep: '__'
+        exclude_keys:
+          - run
+          - task.data
+  run:
+    dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt
+  sweep:
+    dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt
+    subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}
diff --git a/fairseq/examples/hubert/config/pretrain/hubert_xlarge_librivox.yaml b/fairseq/examples/hubert/config/pretrain/hubert_xlarge_librivox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..34e8f2bfb93863db122f694785b80857713ceb05
--- /dev/null
+++ b/fairseq/examples/hubert/config/pretrain/hubert_xlarge_librivox.yaml
@@ -0,0 +1,101 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+  seed: 1337
+  tensorboard_logdir: tblog
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+
+distributed_training:
+  ddp_backend: no_c10d
+  distributed_backend: 'nccl'
+  distributed_world_size: 256
+  distributed_port: 29671
+  nprocs_per_node: 8
+  find_unused_parameters: true
+
+task:
+  _name: hubert_pretraining
+  data: ???
+  label_dir: ???
+  labels: ???
+  label_rate: ${model.label_rate}
+  sample_rate: 16000
+  max_sample_size: 250000
+  min_sample_size: 32000
+  pad_audio: false
+  random_crop: true
+  normalize: true # must be consistent with extractor
+
+dataset:
+  num_workers: 6
+  max_tokens: 360000
+  skip_invalid_size_inputs_valid_test: true
+  validate_interval: 5
+  validate_interval_updates: 10000
+
+criterion:
+  _name: hubert
+  pred_masked_weight: 1.0
+  pred_nomask_weight: 0.0
+  loss_weights: [10,]
+
+optimization:
+  max_update: 400000
+  lr: [0.003]
+  clip_norm: 1.0
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: hubert
+  label_rate: ???
+  encoder_layers: 48
+  encoder_embed_dim: 1280
+  encoder_ffn_embed_dim: 5120
+  encoder_attention_heads: 16
+  final_dim: 1024
+  skip_masked: false
+  skip_nomask: false
+  mask_prob: 0.80
+  extractor_mode: layer_norm
+  conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2'
+  encoder_layerdrop: 0.0
+  dropout_input: 0.0
+  dropout_features: 0.0
+  dropout: 0.0
+  attention_dropout: 0.0
+  layer_norm_first: true
+  feature_grad_mult: 1.0
+  untie_final_proj: true
+  activation_dropout: 0.0
+
+hydra:
+  job:
+    config:
+      override_dirname:
+        kv_sep: '-'
+        item_sep: '__'
+        exclude_keys:
+          - run
+          - task.data
+  run:
+    dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt
+  sweep:
+    dir: /checkpoint/wnhsu/w2v/hubert_final/hydra_pt
+    subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}
diff --git a/fairseq/examples/hubert/config/pretrain/run/submitit_reg.yaml b/fairseq/examples/hubert/config/pretrain/run/submitit_reg.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..46c979cd2835fe026b0a532a54533904d1001e54
--- /dev/null
+++ b/fairseq/examples/hubert/config/pretrain/run/submitit_reg.yaml
@@ -0,0 +1,20 @@
+# @package _global_
+
+hydra:
+  launcher:
+    cpus_per_task: 8
+    gpus_per_node: 8
+    tasks_per_node: ${hydra.launcher.gpus_per_node}
+    nodes: 4
+    comment: null
+    mem_gb: 384
+    timeout_min: 4320
+    max_num_timeout: 100
+    constraint: volta32gb
+    name: ${hydra.job.config_name}/${hydra.job.override_dirname}
+    submitit_folder: ${hydra.sweep.dir}/submitit/%j
+
+distributed_training:
+  distributed_world_size: 32
+  distributed_port: 29671
+  nprocs_per_node: 8
diff --git a/fairseq/examples/hubert/measure_teacher_quality.py b/fairseq/examples/hubert/measure_teacher_quality.py
new file mode 100644
index 0000000000000000000000000000000000000000..92279b2214bb2ba4a99aea92098907ef4f55821b
--- /dev/null
+++ b/fairseq/examples/hubert/measure_teacher_quality.py
@@ -0,0 +1,241 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import os.path as op
+import re
+from tabulate import tabulate
+from collections import Counter
+
+
+def comp_purity(p_xy, axis):
+    max_p = p_xy.max(axis=axis)
+    marg_p = p_xy.sum(axis=axis)
+    indv_pur = max_p / marg_p
+    aggr_pur = max_p.sum()
+    return indv_pur, aggr_pur
+
+
+def comp_entropy(p):
+    return (-p * np.log(p + 1e-8)).sum()
+
+
+def comp_norm_mutual_info(p_xy):
+    p_x = p_xy.sum(axis=1, keepdims=True)
+    p_y = p_xy.sum(axis=0, keepdims=True)
+    pmi = np.log(p_xy / np.matmul(p_x, p_y) + 1e-8)
+    mi = (p_xy * pmi).sum()
+    h_x = comp_entropy(p_x)
+    h_y = comp_entropy(p_y)
+    return mi, mi / h_x, mi / h_y, h_x, h_y
+
+
+def pad(labs, n):
+    if n == 0:
+        return np.array(labs)
+    return np.concatenate([[labs[0]] * n, labs, [labs[-1]] * n])
+
+
+def comp_avg_seg_dur(labs_list):
+    n_frms = 0
+    n_segs = 0
+    for labs in labs_list:
+        labs = np.array(labs)
+        edges = np.zeros(len(labs)).astype(bool)
+        edges[0] = True
+        edges[1:] = labs[1:] != labs[:-1]
+        n_frms += len(edges)
+        n_segs += edges.astype(int).sum()
+    return n_frms / n_segs
+
+
+def comp_joint_prob(uid2refs, uid2hyps):
+    """
+    Args:
+        pad: padding for spliced-feature derived labels
+    """
+    cnts = Counter()
+    skipped = []
+    abs_frmdiff = 0
+    for uid in uid2refs:
+        if uid not in uid2hyps:
+            skipped.append(uid)
+            continue
+        refs = uid2refs[uid]
+        hyps = uid2hyps[uid]
+        abs_frmdiff += abs(len(refs) - len(hyps))
+        min_len = min(len(refs), len(hyps))
+        refs = refs[:min_len]
+        hyps = hyps[:min_len]
+        cnts.update(zip(refs, hyps))
+    tot = sum(cnts.values())
+
+    ref_set = sorted({ref for ref, _ in cnts.keys()})
+    hyp_set = sorted({hyp for _, hyp in cnts.keys()})
+    ref2pid = dict(zip(ref_set, range(len(ref_set))))
+    hyp2lid = dict(zip(hyp_set, range(len(hyp_set))))
+    # print(hyp_set)
+    p_xy = np.zeros((len(ref2pid), len(hyp2lid)), dtype=float)
+    for (ref, hyp), cnt in cnts.items():
+        p_xy[ref2pid[ref], hyp2lid[hyp]] = cnt
+    p_xy /= p_xy.sum()
+    return p_xy, ref2pid, hyp2lid, tot, abs_frmdiff, skipped
+
+
+def read_phn(tsv_path, rm_stress=True):
+    uid2phns = {}
+    with open(tsv_path) as f:
+        for line in f:
+            uid, phns = line.rstrip().split("\t")
+            phns = phns.split(",")
+            if rm_stress:
+                phns = [re.sub("[0-9]", "", phn) for phn in phns]
+            uid2phns[uid] = phns
+    return uid2phns
+
+
+def read_lab(tsv_path, lab_path, pad_len=0, upsample=1):
+    """
+    tsv is needed to retrieve the uids for the labels
+    """
+    with open(tsv_path) as f:
+        f.readline()
+        uids = [op.splitext(op.basename(line.rstrip().split()[0]))[0] for line in f]
+    with open(lab_path) as f:
+        labs_list = [pad(line.rstrip().split(), pad_len).repeat(upsample) for line in f]
+    assert len(uids) == len(labs_list)
+    return dict(zip(uids, labs_list))
+
+
+def main_lab_lab(
+    tsv_dir,
+    lab_dir,
+    lab_name,
+    lab_sets,
+    ref_dir,
+    ref_name,
+    pad_len=0,
+    upsample=1,
+    verbose=False,
+):
+    # assume tsv_dir is the same for both the reference and the hypotheses
+    tsv_dir = lab_dir if tsv_dir is None else tsv_dir
+
+    uid2refs = {}
+    for s in lab_sets:
+        uid2refs.update(read_lab(f"{tsv_dir}/{s}.tsv", f"{ref_dir}/{s}.{ref_name}"))
+
+    uid2hyps = {}
+    for s in lab_sets:
+        uid2hyps.update(
+            read_lab(
+                f"{tsv_dir}/{s}.tsv", f"{lab_dir}/{s}.{lab_name}", pad_len, upsample
+            )
+        )
+    _main(uid2refs, uid2hyps, verbose)
+
+
+def main_phn_lab(
+    tsv_dir,
+    lab_dir,
+    lab_name,
+    lab_sets,
+    phn_dir,
+    phn_sets,
+    pad_len=0,
+    upsample=1,
+    verbose=False,
+):
+    uid2refs = {}
+    for s in phn_sets:
+        uid2refs.update(read_phn(f"{phn_dir}/{s}.tsv"))
+
+    uid2hyps = {}
+    tsv_dir = lab_dir if tsv_dir is None else tsv_dir
+    for s in lab_sets:
+        uid2hyps.update(
+            read_lab(
+                f"{tsv_dir}/{s}.tsv", f"{lab_dir}/{s}.{lab_name}", pad_len, upsample
+            )
+        )
+    _main(uid2refs, uid2hyps, verbose)
+
+
+def _main(uid2refs, uid2hyps, verbose):
+    (p_xy, ref2pid, hyp2lid, tot, frmdiff, skipped) = comp_joint_prob(
+        uid2refs, uid2hyps
+    )
+    ref_pur_by_hyp, ref_pur = comp_purity(p_xy, axis=0)
+    hyp_pur_by_ref, hyp_pur = comp_purity(p_xy, axis=1)
+    (mi, mi_norm_by_ref, mi_norm_by_hyp, h_ref, h_hyp) = comp_norm_mutual_info(p_xy)
+    outputs = {
+        "ref pur": ref_pur,
+        "hyp pur": hyp_pur,
+        "H(ref)": h_ref,
+        "H(hyp)": h_hyp,
+        "MI": mi,
+        "MI/H(ref)": mi_norm_by_ref,
+        "ref segL": comp_avg_seg_dur(uid2refs.values()),
+        "hyp segL": comp_avg_seg_dur(uid2hyps.values()),
+        "p_xy shape": p_xy.shape,
+        "frm tot": tot,
+        "frm diff": frmdiff,
+        "utt tot": len(uid2refs),
+        "utt miss": len(skipped),
+    }
+    print(tabulate([outputs.values()], outputs.keys(), floatfmt=".4f"))
+
+
+if __name__ == "__main__":
+    """
+    compute quality of labels with respect to phone or another labels if set
+    """
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tsv_dir")
+    parser.add_argument("lab_dir")
+    parser.add_argument("lab_name")
+    parser.add_argument("--lab_sets", default=["valid"], type=str, nargs="+")
+    parser.add_argument(
+        "--phn_dir",
+        default="/checkpoint/wnhsu/data/librispeech/960h/fa/raw_phn/phone_frame_align_v1",
+    )
+    parser.add_argument(
+        "--phn_sets", default=["dev-clean", "dev-other"], type=str, nargs="+"
+    )
+    parser.add_argument("--pad_len", default=0, type=int, help="padding for hypotheses")
+    parser.add_argument(
+        "--upsample", default=1, type=int, help="upsample factor for hypotheses"
+    )
+    parser.add_argument("--ref_lab_dir", default="")
+    parser.add_argument("--ref_lab_name", default="")
+    parser.add_argument("--verbose", action="store_true")
+    args = parser.parse_args()
+
+    if args.ref_lab_dir and args.ref_lab_name:
+        main_lab_lab(
+            args.tsv_dir,
+            args.lab_dir,
+            args.lab_name,
+            args.lab_sets,
+            args.ref_lab_dir,
+            args.ref_lab_name,
+            args.pad_len,
+            args.upsample,
+            args.verbose,
+        )
+    else:
+        main_phn_lab(
+            args.tsv_dir,
+            args.lab_dir,
+            args.lab_name,
+            args.lab_sets,
+            args.phn_dir,
+            args.phn_sets,
+            args.pad_len,
+            args.upsample,
+            args.verbose,
+        )
diff --git a/fairseq/examples/hubert/simple_kmeans/README.md b/fairseq/examples/hubert/simple_kmeans/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cd17da3b3e6f3e39083f7a76a56ff46c3a63b929
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/README.md
@@ -0,0 +1,71 @@
+# Sharded Feature Extraction and K-means Application
+
+This folder contains scripts for preparing HUBERT labels from tsv files, the
+steps are:
+1. feature extraction
+2. k-means clustering
+3. k-means application
+
+
+## Data preparation
+
+`*.tsv` files contains a list of audio, where each line is the root, and
+following lines are the subpath for each audio:
+```
+<root-dir>
+<audio-path-1>
+<audio-path-2>
+...
+```
+
+
+## Feature extraction
+
+### MFCC feature
+Suppose the tsv file is at `${tsv_dir}/${split}.tsv`. To extract 39-D
+mfcc+delta+ddelta features for the 1st iteration HUBERT training, run:
+```sh
+python dump_mfcc_feature.py ${tsv_dir} ${split} ${nshard} ${rank} ${feat_dir}
+```
+This would shard the tsv file into `${nshard}` and extract features for the
+`${rank}`-th shard, where rank is an integer in `[0, nshard-1]`. Features would
+be saved at `${feat_dir}/${split}_${rank}_${nshard}.{npy,len}`.
+
+
+### HUBERT feature
+To extract features from the `${layer}`-th transformer layer of a trained
+HUBERT model saved at `${ckpt_path}`, run:
+```sh
+python dump_hubert_feature.py ${tsv_dir} ${split} ${ckpt_path} ${layer} ${nshard} ${rank} ${feat_dir}
+```
+Features would also be saved at `${feat_dir}/${split}_${rank}_${nshard}.{npy,len}`.
+
+- if out-of-memory, decrease the chunk size with `--max_chunk`
+
+
+## K-means clustering
+To fit a k-means model with `${n_clusters}` clusters on 10% of the `${split}` data, run
+```sh
+python learn_kmeans.py ${feat_dir} ${split} ${nshard} ${km_path} ${n_cluster} --percent 0.1
+```
+This saves the k-means model to `${km_path}`.
+
+- set `--precent -1` to use all data
+- more kmeans options can be found with `-h` flag
+
+
+## K-means application
+To apply a trained k-means model `${km_path}` to obtain labels for `${split}`, run
+```sh
+python dump_km_label.py ${feat_dir} ${split} ${km_path} ${nshard} ${rank} ${lab_dir}
+```
+This would extract labels for the `${rank}`-th shard out of `${nshard}` shards
+and dump them to `${lab_dir}/${split}_${rank}_${shard}.km`
+
+
+Finally, merge shards for `${split}` by running
+```sh
+for rank in $(seq 0 $((nshard - 1))); do
+  cat $lab_dir/${split}_${rank}_${nshard}.km
+done > $lab_dir/${split}.km
+```
diff --git a/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c7b67f8b1967ca515c5f7606253b46f903ea37e
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature.py
@@ -0,0 +1,93 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+
+import fairseq
+import soundfile as sf
+import torch
+import torch.nn.functional as F
+
+from feature_utils import get_path_iterator, dump_feature
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("dump_hubert_feature")
+
+
+class HubertFeatureReader(object):
+    def __init__(self, ckpt_path, layer, max_chunk=1600000):
+        (
+            model,
+            cfg,
+            task,
+        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path])
+        self.model = model[0].eval().cuda()
+        self.task = task
+        self.layer = layer
+        self.max_chunk = max_chunk
+        logger.info(f"TASK CONFIG:\n{self.task.cfg}")
+        logger.info(f" max_chunk = {self.max_chunk}")
+
+    def read_audio(self, path, ref_len=None):
+        wav, sr = sf.read(path)
+        assert sr == self.task.cfg.sample_rate, sr
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        if ref_len is not None and abs(ref_len - len(wav)) > 160:
+            logging.warning(f"ref {ref_len} != read {len(wav)} ({path})")
+        return wav
+
+    def get_feats(self, path, ref_len=None):
+        x = self.read_audio(path, ref_len)
+        with torch.no_grad():
+            x = torch.from_numpy(x).float().cuda()
+            if self.task.cfg.normalize:
+                x = F.layer_norm(x, x.shape)
+            x = x.view(1, -1)
+
+            feat = []
+            for start in range(0, x.size(1), self.max_chunk):
+                x_chunk = x[:, start: start + self.max_chunk]
+                feat_chunk, _ = self.model.extract_features(
+                    source=x_chunk,
+                    padding_mask=None,
+                    mask=False,
+                    output_layer=self.layer,
+                )
+                feat.append(feat_chunk)
+        return torch.cat(feat, 1).squeeze(0)
+
+
+def main(tsv_dir, split, ckpt_path, layer, nshard, rank, feat_dir, max_chunk):
+    reader = HubertFeatureReader(ckpt_path, layer, max_chunk)
+    generator, num = get_path_iterator(f"{tsv_dir}/{split}.tsv", nshard, rank)
+    dump_feature(reader, generator, num, split, nshard, rank, feat_dir)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tsv_dir")
+    parser.add_argument("split")
+    parser.add_argument("ckpt_path")
+    parser.add_argument("layer", type=int)
+    parser.add_argument("nshard", type=int)
+    parser.add_argument("rank", type=int)
+    parser.add_argument("feat_dir")
+    parser.add_argument("--max_chunk", type=int, default=1600000)
+    args = parser.parse_args()
+    logger.info(args)
+
+    main(**vars(args))
diff --git a/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature_s2t.py b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature_s2t.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fff4faf44a92d42504559ecea8ec1047d2e5f14
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/dump_hubert_feature_s2t.py
@@ -0,0 +1,92 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import csv
+import io
+import logging
+import os
+import os.path as op
+import sys
+
+from dump_hubert_feature import HubertFeatureReader
+from feature_utils import get_shard_range, dump_feature
+from fairseq.data.audio.audio_utils import get_waveform
+from fairseq.data.audio.speech_to_text_dataset import (
+    read_from_uncompressed_zip,
+)
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("dump_hubert_feature_s2t")
+
+
+class HubertFeatureReaderS2T(HubertFeatureReader):
+    def read_audio(self, path, ref_len=None):
+        path, *extra = path.split(":")
+        assert len(extra) == 2
+        assert path.endswith(".zip")
+
+        data = read_from_uncompressed_zip(path, int(extra[0]), int(extra[1]))
+        f = io.BytesIO(data)
+        wav, sr = get_waveform(f)
+        assert sr == self.task.cfg.sample_rate, sr
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        if ref_len is not None and abs(ref_len - len(wav)) > 160:
+            logging.warning(f"ref {ref_len} != read {len(wav)} ({path})")
+        return wav
+
+
+def get_path_iterator(root, tsv, nshard, rank):
+    with open(tsv) as f:
+        reader = csv.DictReader(
+            f,
+            delimiter="\t",
+            quotechar=None,
+            doublequote=False,
+            lineterminator="\n",
+            quoting=csv.QUOTE_NONE,
+        )
+        subpaths = [op.join(root, e["audio"]) for e in reader]
+        start, end = get_shard_range(len(subpaths), nshard, rank)
+        subpaths = subpaths[start:end]
+        def iterate():
+            for subpath in subpaths:
+                yield op.join(root, subpath), None
+    return iterate, len(subpaths)
+
+
+def main(
+    root, tsv_path, ckpt_path, layer, nshard, rank, feat_dir, split, max_chunk
+):
+    reader = HubertFeatureReaderS2T(ckpt_path, layer, max_chunk)
+    generator, num = get_path_iterator(root, tsv_path, nshard, rank)
+    dump_feature(reader, generator, num, split, nshard, rank, feat_dir)
+
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("root")
+    parser.add_argument("tsv_path")
+    parser.add_argument("ckpt_path")
+    parser.add_argument("layer", type=int)
+    parser.add_argument("nshard", type=int)
+    parser.add_argument("rank", type=int)
+    parser.add_argument("feat_dir")
+    parser.add_argument("split")
+    parser.add_argument("--max_chunk", type=int, default=1600000)
+    args = parser.parse_args()
+    logger.info(args)
+
+    main(**vars(args))
diff --git a/fairseq/examples/hubert/simple_kmeans/dump_km_label.py b/fairseq/examples/hubert/simple_kmeans/dump_km_label.py
new file mode 100644
index 0000000000000000000000000000000000000000..8871307804d3f1e5c7cc49061614c69df26ab1ee
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/dump_km_label.py
@@ -0,0 +1,98 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+
+import numpy as np
+
+import joblib
+import torch
+import tqdm
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("dump_km_label")
+
+
+class ApplyKmeans(object):
+    def __init__(self, km_path):
+        self.km_model = joblib.load(km_path)
+        self.C_np = self.km_model.cluster_centers_.transpose()
+        self.Cnorm_np = (self.C_np ** 2).sum(0, keepdims=True)
+
+        self.C = torch.from_numpy(self.C_np)
+        self.Cnorm = torch.from_numpy(self.Cnorm_np)
+        if torch.cuda.is_available():
+            self.C = self.C.cuda()
+            self.Cnorm = self.Cnorm.cuda()
+
+    def __call__(self, x):
+        if isinstance(x, torch.Tensor):
+            dist = (
+                x.pow(2).sum(1, keepdim=True)
+                - 2 * torch.matmul(x, self.C)
+                + self.Cnorm
+            )
+            return dist.argmin(dim=1).cpu().numpy()
+        else:
+            dist = (
+                (x ** 2).sum(1, keepdims=True)
+                - 2 * np.matmul(x, self.C_np)
+                + self.Cnorm_np
+            )
+            return np.argmin(dist, axis=1)
+
+
+def get_feat_iterator(feat_dir, split, nshard, rank):
+    feat_path = f"{feat_dir}/{split}_{rank}_{nshard}.npy"
+    leng_path = f"{feat_dir}/{split}_{rank}_{nshard}.len"
+    with open(leng_path, "r") as f:
+        lengs = [int(line.rstrip()) for line in f]
+        offsets = [0] + np.cumsum(lengs[:-1]).tolist()
+
+    def iterate():
+        feat = np.load(feat_path, mmap_mode="r")
+        assert feat.shape[0] == (offsets[-1] + lengs[-1])
+        for offset, leng in zip(offsets, lengs):
+            yield feat[offset: offset + leng]
+
+    return iterate, len(lengs)
+
+
+def dump_label(feat_dir, split, km_path, nshard, rank, lab_dir):
+    apply_kmeans = ApplyKmeans(km_path)
+    generator, num = get_feat_iterator(feat_dir, split, nshard, rank)
+    iterator = generator()
+
+    lab_path = f"{lab_dir}/{split}_{rank}_{nshard}.km"
+    os.makedirs(lab_dir, exist_ok=True)
+    with open(lab_path, "w") as f:
+        for feat in tqdm.tqdm(iterator, total=num):
+            # feat = torch.from_numpy(feat).cuda()
+            lab = apply_kmeans(feat).tolist()
+            f.write(" ".join(map(str, lab)) + "\n")
+    logger.info("finished successfully")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("feat_dir")
+    parser.add_argument("split")
+    parser.add_argument("km_path")
+    parser.add_argument("nshard", type=int)
+    parser.add_argument("rank", type=int)
+    parser.add_argument("lab_dir")
+    args = parser.parse_args()
+    logging.info(str(args))
+
+    dump_label(**vars(args))
diff --git a/fairseq/examples/hubert/simple_kmeans/dump_mfcc_feature.py b/fairseq/examples/hubert/simple_kmeans/dump_mfcc_feature.py
new file mode 100644
index 0000000000000000000000000000000000000000..70d0016663b7d0b90033f4eb301b527f2c92a3f8
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/dump_mfcc_feature.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+
+import soundfile as sf
+import torch
+import torchaudio
+
+from feature_utils import get_path_iterator, dump_feature
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("dump_mfcc_feature")
+
+
+class MfccFeatureReader(object):
+    def __init__(self, sample_rate):
+        self.sample_rate = sample_rate
+
+    def read_audio(self, path, ref_len=None):
+        wav, sr = sf.read(path)
+        assert sr == self.sample_rate, sr
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        if ref_len is not None and abs(ref_len - len(wav)) > 160:
+            logging.warning(f"ref {ref_len} != read {len(wav)} ({path})")
+        return wav
+
+    def get_feats(self, path, ref_len=None):
+        x = self.read_audio(path, ref_len)
+        with torch.no_grad():
+            x = torch.from_numpy(x).float()
+            x = x.view(1, -1)
+
+            mfccs = torchaudio.compliance.kaldi.mfcc(
+                waveform=x,
+                sample_frequency=self.sample_rate,
+                use_energy=False,
+            )  # (time, freq)
+            mfccs = mfccs.transpose(0, 1)  # (freq, time)
+            deltas = torchaudio.functional.compute_deltas(mfccs)
+            ddeltas = torchaudio.functional.compute_deltas(deltas)
+            concat = torch.cat([mfccs, deltas, ddeltas], dim=0)
+            concat = concat.transpose(0, 1).contiguous()  # (freq, time)
+            return concat
+
+
+def main(tsv_dir, split, nshard, rank, feat_dir, sample_rate):
+    reader = MfccFeatureReader(sample_rate)
+    generator, num = get_path_iterator(f"{tsv_dir}/{split}.tsv", nshard, rank)
+    dump_feature(reader, generator, num, split, nshard, rank, feat_dir)
+
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tsv_dir")
+    parser.add_argument("split")
+    parser.add_argument("nshard", type=int)
+    parser.add_argument("rank", type=int)
+    parser.add_argument("feat_dir")
+    parser.add_argument("--sample_rate", type=int, default=16000)
+    args = parser.parse_args()
+    logger.info(args)
+
+    main(**vars(args))
diff --git a/fairseq/examples/hubert/simple_kmeans/dump_w2v2_feature.py b/fairseq/examples/hubert/simple_kmeans/dump_w2v2_feature.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1f0d902acf0756580a1f4604feee8fc499a9a63
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/dump_w2v2_feature.py
@@ -0,0 +1,95 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+
+import fairseq
+import soundfile as sf
+import torch
+import torch.nn.functional as F
+
+from feature_utils import get_path_iterator, dump_feature
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("dump_w2v2_feature")
+
+
+class Wav2Vec2FeatureReader(object):
+    def __init__(self, ckpt_path, layer, max_chunk=1600000):
+        (
+            model,
+            cfg,
+            task,
+        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task([ckpt_path])
+        self.model = model[0].eval().cuda()
+        self.task = task
+        self.layer = layer  # assume this is 1-based like HuBERT
+        self.max_chunk = max_chunk
+        logger.info(f"TASK CONFIG:\n{self.task.cfg}")
+        logger.info(f" max_chunk = {self.max_chunk}")
+        logger.info(f" model:\n{self.model}")
+
+    def read_audio(self, path, ref_len=None):
+        wav, sr = sf.read(path)
+        assert sr == self.task.cfg.sample_rate, sr
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        if ref_len is not None and abs(ref_len - len(wav)) > 160:
+            logging.warning(f"ref {ref_len} != read {len(wav)} ({path})")
+        return wav
+
+    def get_feats(self, path, ref_len=None):
+        x = self.read_audio(path, ref_len)
+        with torch.no_grad():
+            x = torch.from_numpy(x).float().cuda()
+            if self.task.cfg.normalize:
+                x = F.layer_norm(x, x.shape)
+            x = x.view(1, -1)
+
+            feat = []
+            for start in range(0, x.size(1), self.max_chunk):
+                x_chunk = x[:, start: start + self.max_chunk]
+                res = self.model.extract_features(
+                    source=x_chunk,
+                    padding_mask=None,
+                    mask=False,
+                    layer=self.layer - 1,
+                )
+                feat_chunk = res["x"]
+                feat.append(feat_chunk)
+        return torch.cat(feat, 1).squeeze(0)
+
+
+def main(tsv_dir, split, ckpt_path, layer, nshard, rank, feat_dir, max_chunk):
+    reader = Wav2Vec2FeatureReader(ckpt_path, layer, max_chunk)
+    generator, num = get_path_iterator(f"{tsv_dir}/{split}.tsv", nshard, rank)
+    dump_feature(reader, generator, num, split, nshard, rank, feat_dir)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tsv_dir")
+    parser.add_argument("split")
+    parser.add_argument("ckpt_path")
+    parser.add_argument("layer", type=int)
+    parser.add_argument("nshard", type=int)
+    parser.add_argument("rank", type=int)
+    parser.add_argument("feat_dir")
+    parser.add_argument("--max_chunk", type=int, default=1600000)
+    args = parser.parse_args()
+    logger.info(args)
+
+    main(**vars(args))
diff --git a/fairseq/examples/hubert/simple_kmeans/feature_utils.py b/fairseq/examples/hubert/simple_kmeans/feature_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f80bc4569768fac181133cdc8f76d1230e03bff6
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/feature_utils.py
@@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+
+import tqdm
+from npy_append_array import NpyAppendArray
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("feature_utils")
+
+
+def get_shard_range(tot, nshard, rank):
+    assert rank < nshard and rank >= 0, f"invaid rank/nshard {rank}/{nshard}"
+    start = round(tot / nshard * rank)
+    end = round(tot / nshard * (rank + 1))
+    assert start < end, f"start={start}, end={end}"
+    logger.info(
+        f"rank {rank} of {nshard}, process {end-start} "
+        f"({start}-{end}) out of {tot}"
+    )
+    return start, end
+
+
+def get_path_iterator(tsv, nshard, rank):
+    with open(tsv, "r") as f:
+        root = f.readline().rstrip()
+        lines = [line.rstrip() for line in f]
+        start, end = get_shard_range(len(lines), nshard, rank)
+        lines = lines[start:end]
+        def iterate():
+            for line in lines:
+                subpath, nsample = line.split("\t")
+                yield f"{root}/{subpath}", int(nsample)
+    return iterate, len(lines)
+
+
+def dump_feature(reader, generator, num, split, nshard, rank, feat_dir):
+    iterator = generator()
+
+    feat_path = f"{feat_dir}/{split}_{rank}_{nshard}.npy"
+    leng_path = f"{feat_dir}/{split}_{rank}_{nshard}.len"
+
+    os.makedirs(feat_dir, exist_ok=True)
+    if os.path.exists(feat_path):
+        os.remove(feat_path)
+
+    feat_f = NpyAppendArray(feat_path)
+    with open(leng_path, "w") as leng_f:
+        for path, nsample in tqdm.tqdm(iterator, total=num):
+            feat = reader.get_feats(path, nsample)
+            feat_f.append(feat.cpu().numpy())
+            leng_f.write(f"{len(feat)}\n")
+    logger.info("finished successfully")
+
+
diff --git a/fairseq/examples/hubert/simple_kmeans/learn_kmeans.py b/fairseq/examples/hubert/simple_kmeans/learn_kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..113ac655b8c0a585fe43797e99674e445098edd0
--- /dev/null
+++ b/fairseq/examples/hubert/simple_kmeans/learn_kmeans.py
@@ -0,0 +1,146 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+
+import numpy as np
+from sklearn.cluster import MiniBatchKMeans
+
+import joblib
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("learn_kmeans")
+
+
+def get_km_model(
+    n_clusters,
+    init,
+    max_iter,
+    batch_size,
+    tol,
+    max_no_improvement,
+    n_init,
+    reassignment_ratio,
+):
+    return MiniBatchKMeans(
+        n_clusters=n_clusters,
+        init=init,
+        max_iter=max_iter,
+        batch_size=batch_size,
+        verbose=1,
+        compute_labels=False,
+        tol=tol,
+        max_no_improvement=max_no_improvement,
+        init_size=None,
+        n_init=n_init,
+        reassignment_ratio=reassignment_ratio,
+    )
+
+
+def load_feature_shard(feat_dir, split, nshard, rank, percent):
+    feat_path = f"{feat_dir}/{split}_{rank}_{nshard}.npy"
+    leng_path = f"{feat_dir}/{split}_{rank}_{nshard}.len"
+    with open(leng_path, "r") as f:
+        lengs = [int(line.rstrip()) for line in f]
+        offsets = [0] + np.cumsum(lengs[:-1]).tolist()
+
+    if percent < 0:
+        return np.load(feat_path, mmap_mode="r")
+    else:
+        nsample = int(np.ceil(len(lengs) * percent))
+        indices = np.random.choice(len(lengs), nsample, replace=False)
+        feat = np.load(feat_path, mmap_mode="r")
+        sampled_feat = np.concatenate(
+            [feat[offsets[i]: offsets[i] + lengs[i]] for i in indices], axis=0
+        )
+        logger.info(
+            (
+                f"sampled {nsample} utterances, {len(sampled_feat)} frames "
+                f"from shard {rank}/{nshard}"
+            )
+        )
+        return sampled_feat
+
+
+def load_feature(feat_dir, split, nshard, seed, percent):
+    assert percent <= 1.0
+    feat = np.concatenate(
+        [
+            load_feature_shard(feat_dir, split, nshard, r, percent)
+            for r in range(nshard)
+        ],
+        axis=0,
+    )
+    logging.info(f"loaded feature with dimension {feat.shape}")
+    return feat
+
+
+def learn_kmeans(
+    feat_dir,
+    split,
+    nshard,
+    km_path,
+    n_clusters,
+    seed,
+    percent,
+    init,
+    max_iter,
+    batch_size,
+    tol,
+    n_init,
+    reassignment_ratio,
+    max_no_improvement,
+):
+    np.random.seed(seed)
+    feat = load_feature(feat_dir, split, nshard, seed, percent)
+    km_model = get_km_model(
+        n_clusters,
+        init,
+        max_iter,
+        batch_size,
+        tol,
+        max_no_improvement,
+        n_init,
+        reassignment_ratio,
+    )
+    km_model.fit(feat)
+    joblib.dump(km_model, km_path)
+
+    inertia = -km_model.score(feat) / len(feat)
+    logger.info("total intertia: %.5f", inertia)
+    logger.info("finished successfully")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("feat_dir", type=str)
+    parser.add_argument("split", type=str)
+    parser.add_argument("nshard", type=int)
+    parser.add_argument("km_path", type=str)
+    parser.add_argument("n_clusters", type=int)
+    parser.add_argument("--seed", default=0, type=int)
+    parser.add_argument(
+        "--percent", default=-1, type=float, help="sample a subset; -1 for all"
+    )
+    parser.add_argument("--init", default="k-means++")
+    parser.add_argument("--max_iter", default=100, type=int)
+    parser.add_argument("--batch_size", default=10000, type=int)
+    parser.add_argument("--tol", default=0.0, type=float)
+    parser.add_argument("--max_no_improvement", default=100, type=int)
+    parser.add_argument("--n_init", default=20, type=int)
+    parser.add_argument("--reassignment_ratio", default=0.0, type=float)
+    args = parser.parse_args()
+    logging.info(str(args))
+
+    learn_kmeans(**vars(args))
diff --git a/fairseq/examples/hubert/update_ckpt.py b/fairseq/examples/hubert/update_ckpt.py
new file mode 100644
index 0000000000000000000000000000000000000000..53c9e74ea613e30aa5c22614e658f2b7272bac0c
--- /dev/null
+++ b/fairseq/examples/hubert/update_ckpt.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+src_ckpt = "/checkpoint/wnhsu/w2v/archived/hubert_base_ls960_it2.pt"
+ref_ckpt = "/checkpoint/wnhsu/w2v/hubert_icassp_oss_v3/iter2_km100-400k-grp-L6/oss.km500_p0_1_s334.pmw1_0.puw0_0.grpnorm.ml10.mp0_8.untie.mxsz250000.ufreq1.maxtok1400000.MU100k.s1337.ngpu32/checkpoint_last.pt"
+new_ckpt = "/checkpoint/wnhsu/w2v/archived/hubert_base_ls960_it2_updated.pt"
+
+
+def update_state(state):
+    state["model"]["label_embs_concat"] = state["model"].pop("label_embs")
+    state["args"].task = "hubert_pretraining"
+    state["args"].labels = f"['{state['args'].labels}']"
+    return state
+
+
+src_state = torch.load(src_ckpt)
+src_state = update_state(src_state)
+torch.save(src_state, new_ckpt)
diff --git a/fairseq/examples/joint_alignment_translation/README.md b/fairseq/examples/joint_alignment_translation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cd9c0ea65f5292198296a8f427b42e01b584e2d9
--- /dev/null
+++ b/fairseq/examples/joint_alignment_translation/README.md
@@ -0,0 +1,89 @@
+# Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)
+
+This page includes instructions for training models described in [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](https://arxiv.org/abs/1909.02074).
+
+## Training a joint alignment-translation model on WMT'18 En-De
+
+##### 1. Extract and preprocess the WMT'18 En-De data
+```bash
+./prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
+```
+
+##### 2. Generate alignments from statistical alignment toolkits e.g. Giza++/FastAlign.
+In this example, we use FastAlign.
+```bash
+git clone git@github.com:clab/fast_align.git
+pushd fast_align
+mkdir build
+cd build
+cmake ..
+make
+popd
+ALIGN=fast_align/build/fast_align
+paste bpe.32k/train.en bpe.32k/train.de | awk -F '\t' '{print $1 " ||| " $2}' > bpe.32k/train.en-de
+$ALIGN -i bpe.32k/train.en-de -d -o -v > bpe.32k/train.align
+```
+
+##### 3. Preprocess the dataset with the above generated alignments.
+```bash
+fairseq-preprocess \
+    --source-lang en --target-lang de \
+    --trainpref bpe.32k/train \
+    --validpref bpe.32k/valid \
+    --testpref bpe.32k/test \
+    --align-suffix align \
+    --destdir binarized/ \
+    --joined-dictionary \
+    --workers 32
+```
+
+##### 4. Train a model
+```bash
+fairseq-train \
+    binarized \
+    --arch transformer_wmt_en_de_big_align --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --activation-fn relu\
+    --lr 0.0002 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \
+    --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
+    --max-tokens 3500 --label-smoothing 0.1 \
+    --save-dir ./checkpoints --log-interval 1000 --max-update 60000 \
+    --keep-interval-updates -1 --save-interval-updates 0 \
+    --load-alignments --criterion label_smoothed_cross_entropy_with_alignment \
+    --fp16
+```
+
+Note that the `--fp16` flag requires you have CUDA 9.1 or greater and a Volta GPU or newer.
+
+If you want to train the above model with big batches (assuming your machine has 8 GPUs):
+- add `--update-freq 8` to simulate training on 8x8=64 GPUs
+- increase the learning rate; 0.0007 works well for big batches
+
+##### 5. Evaluate and generate the alignments (BPE level)
+```bash
+fairseq-generate \
+    binarized --gen-subset test --print-alignment \
+    --source-lang en --target-lang de \
+    --path checkpoints/checkpoint_best.pt --beam 5 --nbest 1
+```
+
+##### 6. Other resources.
+The code for:
+1. preparing alignment test sets
+2. converting BPE level alignments to token level alignments
+3. symmetrizing bidirectional alignments
+4. evaluating alignments using AER metric
+can be found [here](https://github.com/lilt/alignment-scripts)
+
+## Citation
+
+```bibtex
+@inproceedings{garg2019jointly,
+  title = {Jointly Learning to Align and Translate with Transformer Models},
+  author = {Garg, Sarthak and Peitz, Stephan and Nallasamy, Udhyakumar and Paulik, Matthias},
+  booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP)},
+  address = {Hong Kong},
+  month = {November},
+  url = {https://arxiv.org/abs/1909.02074},
+  year = {2019},
+}
+```
diff --git a/fairseq/examples/joint_alignment_translation/prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh b/fairseq/examples/joint_alignment_translation/prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e3efeb21d302ef8d9eae8f1d4b06434c593705f6
--- /dev/null
+++ b/fairseq/examples/joint_alignment_translation/prepare-wmt18en2de_no_norm_no_escape_no_agressive.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+echo 'Cloning Moses github repository (for tokenization scripts)...'
+git clone https://github.com/moses-smt/mosesdecoder.git
+
+SCRIPTS=mosesdecoder/scripts
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+CLEAN=$SCRIPTS/training/clean-corpus-n.perl
+REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
+
+URLS=(
+    "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz"
+    "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz"
+    "http://data.statmt.org/wmt18/translation-task/training-parallel-nc-v13.tgz"
+    "http://data.statmt.org/wmt18/translation-task/rapid2016.tgz"
+    "http://data.statmt.org/wmt17/translation-task/dev.tgz"
+    "http://statmt.org/wmt14/test-full.tgz"
+)
+CORPORA=(
+    "training/europarl-v7.de-en"
+    "commoncrawl.de-en"
+    "training-parallel-nc-v13/news-commentary-v13.de-en"
+    "rapid2016.de-en"
+)
+
+if [ ! -d "$SCRIPTS" ]; then
+    echo "Please set SCRIPTS variable correctly to point to Moses scripts."
+    exit
+fi
+
+src=en
+tgt=de
+lang=en-de
+prep=wmt18_en_de
+tmp=$prep/tmp
+orig=orig
+dev=dev/newstest2012
+codes=32000
+bpe=bpe.32k
+
+mkdir -p $orig $tmp $prep $bpe
+
+cd $orig
+
+for ((i=0;i<${#URLS[@]};++i)); do
+    url=${URLS[i]}
+    file=$(basename $url)
+    if [ -f $file ]; then
+        echo "$file already exists, skipping download"
+    else
+        wget "$url"
+        if [ -f $file ]; then
+            echo "$url successfully downloaded."
+        else
+            echo "$url not successfully downloaded."
+            exit 1
+        fi
+        if [ ${file: -4} == ".tgz" ]; then
+            tar zxvf $file
+        elif [ ${file: -4} == ".tar" ]; then
+            tar xvf $file
+        fi
+    fi
+done
+cd ..
+
+echo "pre-processing train data..."
+for l in $src $tgt; do
+    rm  -rf $tmp/train.tags.$lang.tok.$l
+    for f in "${CORPORA[@]}"; do
+        cat $orig/$f.$l | \
+            perl $REM_NON_PRINT_CHAR | \
+            perl $TOKENIZER -threads 8 -l $l -no-escape >> $tmp/train.tags.$lang.tok.$l
+    done
+done
+
+echo "pre-processing test data..."
+for l in $src $tgt; do
+    if [ "$l" == "$src" ]; then
+        t="src"
+    else
+        t="ref"
+    fi
+    grep '<seg id' $orig/test-full/newstest2014-deen-$t.$l.sgm | \
+        sed -e 's/<seg id="[0-9]*">\s*//g' | \
+        sed -e 's/\s*<\/seg>\s*//g' | \
+        sed -e "s/\’/\'/g" | \
+    perl $TOKENIZER -threads 8 -l $l -no-escape > $tmp/test.$l
+    echo ""
+done
+
+# apply length filtering before BPE
+perl $CLEAN -ratio 1.5 $tmp/train.tags.$lang.tok $src $tgt $tmp/train 1 100
+
+# use newstest2012 for valid
+echo "pre-processing valid data..."
+for l in $src $tgt; do
+    rm  -rf $tmp/valid.$l
+    cat $orig/$dev.$l | \
+        perl $REM_NON_PRINT_CHAR | \
+        perl $TOKENIZER -threads 8 -l $l -no-escape >> $tmp/valid.$l
+done
+
+mkdir output
+mv $tmp/{train,valid,test}.{$src,$tgt} output
+
+#BPE
+git clone https://github.com/glample/fastBPE.git
+pushd fastBPE
+g++ -std=c++11 -pthread -O3 fastBPE/main.cc -IfastBPE -o fast
+popd
+fastBPE/fast learnbpe $codes output/train.$src output/train.$tgt > $bpe/codes
+for split in {train,valid,test}; do for lang in {en,de}; do fastBPE/fast applybpe $bpe/$split.$lang output/$split.$lang $bpe/codes; done; done
diff --git a/fairseq/examples/language_model/README.adaptive_inputs.md b/fairseq/examples/language_model/README.adaptive_inputs.md
new file mode 100644
index 0000000000000000000000000000000000000000..6650d58f37f320aa46402d59ce6494b2dd1c3faa
--- /dev/null
+++ b/fairseq/examples/language_model/README.adaptive_inputs.md
@@ -0,0 +1,39 @@
+# Adaptive Input Representations for Neural Language Modeling (Baevski and Auli, 2018)
+
+## Pre-trained models
+
+Description | Parameters | Dataset | Model and Test set(s)
+---|---:|---|---
+Adaptive Inputs <br> ([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) | 1026M | [Google Billion Words](https://github.com/ciprian-chelba/1-billion-word-language-modeling-benchmark) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.tar.bz2)
+Adaptive Inputs <br> ([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) | 247M | [WikiText-103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2)
+
+## Training an LM with adaptive inputs
+
+First, see the general [language modeling README](README.md) for instructions on
+preprocessing the WikiText-103 data.
+
+Then use the following training command to train a model with adaptive inputs
+using the `transformer_lm_wiki103` model architecture:
+```bash
+fairseq-train --task language_modeling \
+    data-bin/wikitext-103 \
+    --save-dir checkpoints/transformer_wikitext-103 \
+    --arch transformer_lm_wiki103 \
+    --max-update 286000 --lr 1.0 --t-mult 2 --lr-period-updates 270000 --lr-scheduler cosine --lr-shrink 0.75 \
+    --warmup-updates 16000 --warmup-init-lr 1e-07 --stop-min-lr 1e-09 --optimizer nag --min-lr 0.0001 --clip-norm 0.1 \
+    --criterion adaptive_loss --max-tokens 3072 --update-freq 3 --tokens-per-sample 3072 --seed 1 \
+    --sample-break-mode none --skip-invalid-size-inputs-valid-test --ddp-backend=legacy_ddp
+```
+
+## Citation
+
+```bibtex
+@inproceedings{
+    baevski2018adaptive,
+    title={Adaptive Input Representations for Neural Language Modeling},
+    author={Alexei Baevski and Michael Auli},
+    booktitle={International Conference on Learning Representations},
+    year={2019},
+    url={https://openreview.net/forum?id=ByxZX20qFQ},
+}
+```
diff --git a/fairseq/examples/language_model/README.conv.md b/fairseq/examples/language_model/README.conv.md
new file mode 100644
index 0000000000000000000000000000000000000000..1ff8635906cf278208be4714e0ef805a6a6b4da1
--- /dev/null
+++ b/fairseq/examples/language_model/README.conv.md
@@ -0,0 +1,40 @@
+# Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)
+
+## Example usage
+
+First download and preprocess the data following the main [language modeling README](README.md).
+
+Then to train a convolutional LM using the `fconv_lm_dauphin_wikitext103`
+architecture:
+```bash
+fairseq-train --task language_modeling \
+    data-bin/wikitext-103 \
+    --save-dir checkpoints/fconv_wikitext-103 \
+    --arch fconv_lm_dauphin_wikitext103 \
+    --adaptive-softmax-cutoff 10000,20000,200000 \
+    --dropout 0.2 \
+    --criterion adaptive_loss \
+    --optimizer nag --clip-norm 0.1 --weight-decay 5e-06 \
+    --lr 1.0 --lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \
+    --max-tokens 1024 --tokens-per-sample 1024 \
+    --ddp-backend legacy_ddp \
+    --max-epoch 35
+```
+
+And evaluate with:
+```bash
+fairseq-eval-lm data-bin/wikitext-103 --path checkpoints/fconv_wiki103/checkpoint_best.pt
+```
+
+## Citation
+
+```bibtex
+@inproceedings{dauphin2017language,
+  title={Language Modeling with Gated Convolutional Networks},
+  author={Dauphin, Yann N and Fan, Angela and Auli, Michael and Grangier, David},
+  booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
+  pages={933--941},
+  year={2017},
+  organization={JMLR}
+}
+```
diff --git a/fairseq/examples/language_model/README.md b/fairseq/examples/language_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e78ea48e08dc99b69751923762107a8f8a9a5e3e
--- /dev/null
+++ b/fairseq/examples/language_model/README.md
@@ -0,0 +1,123 @@
+# Neural Language Modeling
+
+## Pre-trained models
+
+Model | Description | Dataset | Download
+---|---|---|---
+`transformer_lm.gbw.adaptive_huge` | Adaptive Inputs <br> ([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) <br> 1026M params | [Google Billion Words](https://github.com/ciprian-chelba/1-billion-word-language-modeling-benchmark) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.tar.bz2)
+`transformer_lm.wiki103.adaptive` | Adaptive Inputs <br> ([Baevski and Auli, 2018](https://arxiv.org/abs/1809.10853)) <br> 247M params | [WikiText-103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2)
+`transformer_lm.wmt19.en` | English LM <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) | [WMT News Crawl](http://data.statmt.org/news-crawl/) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.en.tar.gz)
+`transformer_lm.wmt19.de` | German LM <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) | [WMT News Crawl](http://data.statmt.org/news-crawl/) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.de.tar.gz)
+`transformer_lm.wmt19.ru` | Russian LM <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) | [WMT News Crawl](http://data.statmt.org/news-crawl/) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.ru.tar.gz)
+
+## Example usage
+
+We require a few additional Python dependencies for preprocessing:
+```bash
+pip install fastBPE sacremoses
+```
+
+To sample from a language model using PyTorch Hub:
+```python
+import torch
+
+# List available models
+torch.hub.list('pytorch/fairseq')  # [..., 'transformer_lm.wmt19.en', ...]
+
+# Load an English LM trained on WMT'19 News Crawl data
+en_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.en', tokenizer='moses', bpe='fastbpe')
+en_lm.eval()  # disable dropout
+
+# Move model to GPU
+en_lm.cuda()
+
+# Sample from the language model
+en_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8)
+# "Barack Obama is coming to Sydney and New Zealand (...)"
+
+# Compute perplexity for a sequence
+en_lm.score('Barack Obama is coming to Sydney and New Zealand')['positional_scores'].mean().neg().exp()
+# tensor(15.1474)
+
+# The same interface can be used with custom models as well
+from fairseq.models.transformer_lm import TransformerLanguageModel
+custom_lm = TransformerLanguageModel.from_pretrained('/path/to/model/dir', 'checkpoint100.pt', tokenizer='moses', bpe='fastbpe')
+custom_lm.sample('Barack Obama', beam=5)
+# "Barack Obama (...)"
+```
+
+## Training a transformer language model with the CLI tools
+
+### 1) Preprocess the data
+
+First download and prepare the [WikiText-103 dataset](https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/):
+```bash
+cd examples/language_model/
+bash prepare-wikitext-103.sh
+cd ../..
+```
+
+Next preprocess/binarize the data:
+```bash
+TEXT=examples/language_model/wikitext-103
+fairseq-preprocess \
+    --only-source \
+    --trainpref $TEXT/wiki.train.tokens \
+    --validpref $TEXT/wiki.valid.tokens \
+    --testpref $TEXT/wiki.test.tokens \
+    --destdir data-bin/wikitext-103 \
+    --workers 20
+```
+
+### 2) Train a language model
+
+Next we'll train a basic transformer language model on wikitext-103. For more
+advanced usage, see the [adaptive inputs README](README.adaptive_inputs.md).
+
+To train a basic LM (assumes 2 GPUs):
+```
+$ fairseq-train --task language_modeling \
+  data-bin/wikitext-103 \
+  --save-dir checkpoints/transformer_wikitext-103 \
+  --arch transformer_lm --share-decoder-input-output-embed \
+  --dropout 0.1 \
+  --optimizer adam --adam-betas '(0.9, 0.98)' --weight-decay 0.01 --clip-norm 0.0 \
+  --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \
+  --tokens-per-sample 512 --sample-break-mode none \
+  --max-tokens 2048 --update-freq 16 \
+  --fp16 \
+  --max-update 50000
+```
+
+If you run out of memory, try reducing `--max-tokens` (max number of tokens per
+batch) or `--tokens-per-sample` (max sequence length). You can also adjust
+`--update-freq` to accumulate gradients and simulate training on a different
+number of GPUs.
+
+### 3) Evaluate
+
+```bash
+fairseq-eval-lm data-bin/wikitext-103 \
+    --path checkpoints/transformer_wiki103/checkpoint_best.pt \
+    --batch-size 2 \
+    --tokens-per-sample 512 \
+    --context-window 400
+# | Evaluated 245569 tokens in 56.1s (4379.02 tokens/s)
+# | Loss: 3.4164, Perplexity: 30.46
+```
+
+*Note:* The `--context-window` option controls how much context is provided to
+each token when computing perplexity. When the window size is 0, the dataset is
+chunked into segments of length 512 and perplexity is computed over each segment
+normally. However, this results in worse (higher) perplexity since tokens that
+appear earlier in each segment have less conditioning. When the maximum window
+size is used (511 in this case), then we compute perplexity for each token
+fully conditioned on 511 tokens of context. This slows down evaluation
+significantly, since we must run a separate forward pass for every token in the
+dataset, but results in better (lower) perplexity.
+
+
+## Convolutional language models
+
+Please see the [convolutional LM README](README.conv.md) for instructions on
+training convolutional language models.
diff --git a/fairseq/examples/language_model/prepare-wikitext-103.sh b/fairseq/examples/language_model/prepare-wikitext-103.sh
new file mode 100644
index 0000000000000000000000000000000000000000..751302156f0a6829af9c2ee5e0e2ca62c2cd4187
--- /dev/null
+++ b/fairseq/examples/language_model/prepare-wikitext-103.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh
+
+URLS=(
+    "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip"
+)
+FILES=(
+    "wikitext-103-v1.zip"
+)
+
+for ((i=0;i<${#URLS[@]};++i)); do
+    file=${FILES[i]}
+    if [ -f $file ]; then
+        echo "$file already exists, skipping download"
+    else
+        url=${URLS[i]}
+        wget "$url"
+        if [ -f $file ]; then
+            echo "$url successfully downloaded."
+        else
+            echo "$url not successfully downloaded."
+            exit -1
+        fi
+        if [ ${file: -4} == ".tgz" ]; then
+            tar zxvf $file
+        elif [ ${file: -4} == ".tar" ]; then
+            tar xvf $file
+        elif [ ${file: -4} == ".zip" ]; then
+            unzip $file
+        fi
+    fi
+done
+cd ..
diff --git a/fairseq/examples/laser/README.md b/fairseq/examples/laser/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..66acada04f58fa235cd312753f144f6f1e5f4a33
--- /dev/null
+++ b/fairseq/examples/laser/README.md
@@ -0,0 +1,144 @@
+# LASER  Language-Agnostic SEntence Representations
+
+LASER is a library to calculate and use multilingual sentence embeddings.
+
+You can find more information about LASER and how to use it on the official [LASER repository](https://github.com/facebookresearch/LASER).
+
+This folder contains source code for training LASER embeddings.
+
+
+## Prepare data and configuration file
+
+Binarize your data with fairseq, as described [here](https://fairseq.readthedocs.io/en/latest/getting_started.html#data-pre-processing).
+
+Create a json config file with this format:
+```
+{
+  "src_vocab": "/path/to/spm.src.cvocab",
+  "tgt_vocab": "/path/to/spm.tgt.cvocab",
+  "train": [
+    {
+      "type": "translation",
+      "id": 0,
+      "src": "/path/to/srclang1-tgtlang0/train.srclang1",
+      "tgt": "/path/to/srclang1-tgtlang0/train.tgtlang0"
+    },
+    {
+      "type": "translation",
+      "id": 1,
+      "src": "/path/to/srclang1-tgtlang1/train.srclang1",
+      "tgt": "/path/to/srclang1-tgtlang1/train.tgtlang1"
+    },
+    {
+      "type": "translation",
+      "id": 0,
+      "src": "/path/to/srclang2-tgtlang0/train.srclang2",
+      "tgt": "/path/to/srclang2-tgtlang0/train.tgtlang0"
+    },
+    {
+      "type": "translation",
+      "id": 1,
+      "src": "/path/to/srclang2-tgtlang1/train.srclang2",
+      "tgt": "/path/to/srclang2-tgtlang1/train.tgtlang1"
+    },
+    ...
+  ],
+  "valid": [
+    {
+      "type": "translation",
+      "id": 0,
+      "src": "/unused",
+      "tgt": "/unused"
+    }
+  ]
+}
+```
+where paths are paths to binarized indexed fairseq dataset files.
+`id` represents the target language id.
+
+
+## Training Command Line Example
+
+```
+fairseq-train \
+  /path/to/configfile_described_above.json \
+  --user-dir examples/laser/laser_src \
+  --log-interval 100 --log-format simple \
+  --task laser --arch laser_lstm \
+  --save-dir . \
+  --optimizer adam \
+  --lr 0.001 \
+  --lr-scheduler inverse_sqrt \
+  --clip-norm 5 \
+  --warmup-updates 90000 \
+  --update-freq 2 \
+  --dropout 0.0 \
+  --encoder-dropout-out 0.1 \
+  --max-tokens 2000 \
+  --max-epoch 50 \
+  --encoder-bidirectional \
+  --encoder-layers 5 \
+  --encoder-hidden-size 512 \
+  --decoder-layers 1 \
+  --decoder-hidden-size 2048 \
+  --encoder-embed-dim 320 \
+  --decoder-embed-dim 320 \
+  --decoder-lang-embed-dim 32 \
+  --warmup-init-lr 0.001 \
+  --disable-validation
+```
+
+
+## Applications
+
+We showcase several applications of multilingual sentence embeddings
+with code to reproduce our results (in the directory "tasks").
+
+* [**Cross-lingual document classification**](https://github.com/facebookresearch/LASER/tree/master/tasks/mldoc) using the
+  [*MLDoc*](https://github.com/facebookresearch/MLDoc) corpus [2,6]
+* [**WikiMatrix**](https://github.com/facebookresearch/LASER/tree/master/tasks/WikiMatrix)
+   Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia [7]
+* [**Bitext mining**](https://github.com/facebookresearch/LASER/tree/master/tasks/bucc) using the
+  [*BUCC*](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) corpus [3,5]
+* [**Cross-lingual NLI**](https://github.com/facebookresearch/LASER/tree/master/tasks/xnli)
+  using the [*XNLI*](https://www.nyu.edu/projects/bowman/xnli/) corpus [4,5,6]
+* [**Multilingual similarity search**](https://github.com/facebookresearch/LASER/tree/master/tasks/similarity) [1,6]
+* [**Sentence embedding of text files**](https://github.com/facebookresearch/LASER/tree/master/tasks/embed)
+  example how to calculate sentence embeddings for arbitrary text files in any of the supported language.
+
+**For all tasks, we use exactly the same multilingual encoder, without any task specific optimization or fine-tuning.**
+
+
+
+## References
+
+[1] Holger Schwenk and Matthijs Douze,
+    [*Learning Joint Multilingual Sentence Representations with Neural Machine Translation*](https://aclanthology.info/papers/W17-2619/w17-2619),
+    ACL workshop on Representation Learning for NLP, 2017
+
+[2] Holger Schwenk and Xian Li,
+    [*A Corpus for Multilingual Document Classification in Eight Languages*](http://www.lrec-conf.org/proceedings/lrec2018/pdf/658.pdf),
+    LREC, pages 3548-3551, 2018.
+
+[3] Holger Schwenk,
+    [*Filtering and Mining Parallel Data in a Joint Multilingual Space*](http://aclweb.org/anthology/P18-2037)
+    ACL, July 2018
+
+[4] Alexis Conneau, Guillaume Lample, Ruty Rinott, Adina Williams, Samuel R. Bowman, Holger Schwenk and Veselin Stoyanov,
+    [*XNLI: Cross-lingual Sentence Understanding through Inference*](https://aclweb.org/anthology/D18-1269),
+    EMNLP, 2018.
+
+[5] Mikel Artetxe and Holger Schwenk,
+    [*Margin-based Parallel Corpus Mining with Multilingual Sentence Embeddings*](https://arxiv.org/abs/1811.01136)
+    arXiv, Nov 3 2018.
+
+[6] Mikel Artetxe and Holger Schwenk,
+    [*Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond*](https://arxiv.org/abs/1812.10464)
+    arXiv, Dec 26 2018.
+
+[7] Holger Schwenk, Vishrav Chaudhary, Shuo Sun, Hongyu Gong and Paco Guzman,
+    [*WikiMatrix: Mining 135M Parallel Sentences in 1620 Language Pairs from Wikipedia*](https://arxiv.org/abs/1907.05791)
+    arXiv, July 11  2019.
+
+[8] Holger Schwenk, Guillaume Wenzek, Sergey Edunov, Edouard Grave and Armand Joulin
+    [*CCMatrix: Mining Billions of High-Quality Parallel Sentences on the WEB*](https://arxiv.org/abs/1911.04944)
diff --git a/fairseq/examples/laser/laser_src/__init__.py b/fairseq/examples/laser/laser_src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ffbd656d8786e421008fb4cb0d1d8911dc8330c
--- /dev/null
+++ b/fairseq/examples/laser/laser_src/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .laser_task import *  # noqa
+from .laser_lstm import *  # noqa
+from .laser_transformer import *  # noqa
diff --git a/fairseq/examples/laser/laser_src/laser_lstm.py b/fairseq/examples/laser/laser_src/laser_lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..10df90e002d5a7dd74a571dbc3b328c130c57a0a
--- /dev/null
+++ b/fairseq/examples/laser/laser_src/laser_lstm.py
@@ -0,0 +1,585 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from fairseq import options, utils
+
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqIncrementalDecoder,
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+
+
+@register_model("laser_lstm")
+class LSTMModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        prev_output_tokens=None,
+        tgt_tokens=None,
+        tgt_lengths=None,
+        target_language_id=None,
+        dataset_name="",
+    ):
+        assert target_language_id is not None
+
+        src_encoder_out = self.encoder(src_tokens, src_lengths, dataset_name)
+        return self.decoder(
+            prev_output_tokens, src_encoder_out, lang_id=target_language_id
+        )
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--dropout",
+            default=0.1,
+            type=float,
+            metavar="D",
+            help="dropout probability",
+        )
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-embed-path",
+            default=None,
+            type=str,
+            metavar="STR",
+            help="path to pre-trained encoder embedding",
+        )
+        parser.add_argument(
+            "--encoder-hidden-size", type=int, metavar="N", help="encoder hidden size"
+        )
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="N", help="number of encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-bidirectional",
+            action="store_true",
+            help="make all layers of encoder bidirectional",
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-embed-path",
+            default=None,
+            type=str,
+            metavar="STR",
+            help="path to pre-trained decoder embedding",
+        )
+        parser.add_argument(
+            "--decoder-hidden-size", type=int, metavar="N", help="decoder hidden size"
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="number of decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-out-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder output embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-zero-init",
+            type=str,
+            metavar="BOOL",
+            help="initialize the decoder hidden/cell state to zero",
+        )
+        parser.add_argument(
+            "--decoder-lang-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder language embedding dimension",
+        )
+        parser.add_argument(
+            "--fixed-embeddings",
+            action="store_true",
+            help="keep embeddings fixed (ENCODER ONLY)",
+        )  # TODO Also apply to decoder embeddings?
+
+        # Granular dropout settings (if not specified these default to --dropout)
+        parser.add_argument(
+            "--encoder-dropout-in",
+            type=float,
+            metavar="D",
+            help="dropout probability for encoder input embedding",
+        )
+        parser.add_argument(
+            "--encoder-dropout-out",
+            type=float,
+            metavar="D",
+            help="dropout probability for encoder output",
+        )
+        parser.add_argument(
+            "--decoder-dropout-in",
+            type=float,
+            metavar="D",
+            help="dropout probability for decoder input embedding",
+        )
+        parser.add_argument(
+            "--decoder-dropout-out",
+            type=float,
+            metavar="D",
+            help="dropout probability for decoder output",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure that all args are properly defaulted (in case there are any new ones)
+        base_architecture(args)
+
+        def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+            embed_dict = utils.parse_embedding(embed_path)
+            utils.print_embed_overlap(embed_dict, dictionary)
+            return utils.load_embedding(embed_dict, dictionary, embed_tokens)
+
+        pretrained_encoder_embed = None
+        if args.encoder_embed_path:
+            pretrained_encoder_embed = load_pretrained_embedding_from_file(
+                args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim
+            )
+        pretrained_decoder_embed = None
+        if args.decoder_embed_path:
+            pretrained_decoder_embed = load_pretrained_embedding_from_file(
+                args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim
+            )
+
+        num_langs = task.num_tasks if hasattr(task, "num_tasks") else 0
+
+        encoder = LSTMEncoder(
+            dictionary=task.source_dictionary,
+            embed_dim=args.encoder_embed_dim,
+            hidden_size=args.encoder_hidden_size,
+            num_layers=args.encoder_layers,
+            dropout_in=args.encoder_dropout_in,
+            dropout_out=args.encoder_dropout_out,
+            bidirectional=args.encoder_bidirectional,
+            pretrained_embed=pretrained_encoder_embed,
+            fixed_embeddings=args.fixed_embeddings,
+        )
+        decoder = LSTMDecoder(
+            dictionary=task.target_dictionary,
+            embed_dim=args.decoder_embed_dim,
+            hidden_size=args.decoder_hidden_size,
+            out_embed_dim=args.decoder_out_embed_dim,
+            num_layers=args.decoder_layers,
+            dropout_in=args.decoder_dropout_in,
+            dropout_out=args.decoder_dropout_out,
+            zero_init=options.eval_bool(args.decoder_zero_init),
+            encoder_embed_dim=args.encoder_embed_dim,
+            encoder_output_units=encoder.output_units,
+            pretrained_embed=pretrained_decoder_embed,
+            num_langs=num_langs,
+            lang_embed_dim=args.decoder_lang_embed_dim,
+        )
+        return cls(encoder, decoder)
+
+
+class LSTMEncoder(FairseqEncoder):
+    """LSTM encoder."""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        hidden_size=512,
+        num_layers=1,
+        dropout_in=0.1,
+        dropout_out=0.1,
+        bidirectional=False,
+        left_pad=True,
+        pretrained_embed=None,
+        padding_value=0.0,
+        fixed_embeddings=False,
+    ):
+        super().__init__(dictionary)
+        self.num_layers = num_layers
+        self.dropout_in = dropout_in
+        self.dropout_out = dropout_out
+        self.bidirectional = bidirectional
+        self.hidden_size = hidden_size
+
+        num_embeddings = len(dictionary)
+        self.padding_idx = dictionary.pad()
+        if pretrained_embed is None:
+            self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
+        else:
+            self.embed_tokens = pretrained_embed
+        if fixed_embeddings:
+            self.embed_tokens.weight.requires_grad = False
+
+        self.lstm = LSTM(
+            input_size=embed_dim,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=self.dropout_out if num_layers > 1 else 0.0,
+            bidirectional=bidirectional,
+        )
+        self.left_pad = left_pad
+        self.padding_value = padding_value
+
+        self.output_units = hidden_size
+        if bidirectional:
+            self.output_units *= 2
+
+    def forward(self, src_tokens, src_lengths, dataset_name):
+        if self.left_pad:
+            # convert left-padding to right-padding
+            src_tokens = utils.convert_padding_direction(
+                src_tokens,
+                self.padding_idx,
+                left_to_right=True,
+            )
+
+        bsz, seqlen = src_tokens.size()
+
+        # embed tokens
+        x = self.embed_tokens(src_tokens)
+        x = F.dropout(x, p=self.dropout_in, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # pack embedded source tokens into a PackedSequence
+        try:
+            packed_x = nn.utils.rnn.pack_padded_sequence(x, src_lengths.data.tolist())
+        except BaseException:
+            raise Exception(f"Packing failed in dataset {dataset_name}")
+
+        # apply LSTM
+        if self.bidirectional:
+            state_size = 2 * self.num_layers, bsz, self.hidden_size
+        else:
+            state_size = self.num_layers, bsz, self.hidden_size
+        h0 = x.data.new(*state_size).zero_()
+        c0 = x.data.new(*state_size).zero_()
+        packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0))
+
+        # unpack outputs and apply dropout
+        x, _ = nn.utils.rnn.pad_packed_sequence(
+            packed_outs, padding_value=self.padding_value
+        )
+        x = F.dropout(x, p=self.dropout_out, training=self.training)
+        assert list(x.size()) == [seqlen, bsz, self.output_units]
+
+        if self.bidirectional:
+
+            def combine_bidir(outs):
+                return torch.cat(
+                    [
+                        torch.cat([outs[2 * i], outs[2 * i + 1]], dim=0).view(
+                            1, bsz, self.output_units
+                        )
+                        for i in range(self.num_layers)
+                    ],
+                    dim=0,
+                )
+
+            final_hiddens = combine_bidir(final_hiddens)
+            final_cells = combine_bidir(final_cells)
+
+        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()
+
+        # Set padded outputs to -inf so they are not selected by max-pooling
+        padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1)
+        if padding_mask.any():
+            x = x.float().masked_fill_(padding_mask, float("-inf")).type_as(x)
+
+        # Build the sentence embedding by max-pooling over the encoder outputs
+        sentemb = x.max(dim=0)[0]
+
+        return {
+            "sentemb": sentemb,
+            "encoder_out": (x, final_hiddens, final_cells),
+            "encoder_padding_mask": encoder_padding_mask
+            if encoder_padding_mask.any()
+            else None,
+        }
+
+    def reorder_encoder_out(self, encoder_out_dict, new_order):
+        encoder_out_dict["sentemb"] = encoder_out_dict["sentemb"].index_select(
+            0, new_order
+        )
+        encoder_out_dict["encoder_out"] = tuple(
+            eo.index_select(1, new_order) for eo in encoder_out_dict["encoder_out"]
+        )
+        if encoder_out_dict["encoder_padding_mask"] is not None:
+            encoder_out_dict["encoder_padding_mask"] = encoder_out_dict[
+                "encoder_padding_mask"
+            ].index_select(1, new_order)
+        return encoder_out_dict
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return int(1e5)  # an arbitrary large number
+
+
+class LSTMDecoder(FairseqIncrementalDecoder):
+    """LSTM decoder."""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        hidden_size=512,
+        out_embed_dim=512,
+        num_layers=1,
+        dropout_in=0.1,
+        dropout_out=0.1,
+        zero_init=False,
+        encoder_embed_dim=512,
+        encoder_output_units=512,
+        pretrained_embed=None,
+        num_langs=1,
+        lang_embed_dim=0,
+    ):
+        super().__init__(dictionary)
+        self.dropout_in = dropout_in
+        self.dropout_out = dropout_out
+        self.hidden_size = hidden_size
+
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+        if pretrained_embed is None:
+            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+        else:
+            self.embed_tokens = pretrained_embed
+
+        self.layers = nn.ModuleList(
+            [
+                LSTMCell(
+                    input_size=encoder_output_units + embed_dim + lang_embed_dim
+                    if layer == 0
+                    else hidden_size,
+                    hidden_size=hidden_size,
+                )
+                for layer in range(num_layers)
+            ]
+        )
+        if hidden_size != out_embed_dim:
+            self.additional_fc = Linear(hidden_size, out_embed_dim)
+        self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
+
+        if zero_init:
+            self.sentemb2init = None
+        else:
+            self.sentemb2init = Linear(
+                encoder_output_units, 2 * num_layers * hidden_size
+            )
+
+        if lang_embed_dim == 0:
+            self.embed_lang = None
+        else:
+            self.embed_lang = nn.Embedding(num_langs, lang_embed_dim)
+            nn.init.uniform_(self.embed_lang.weight, -0.1, 0.1)
+
+    def forward(
+        self, prev_output_tokens, encoder_out_dict, incremental_state=None, lang_id=0
+    ):
+        sentemb = encoder_out_dict["sentemb"]
+        encoder_out = encoder_out_dict["encoder_out"]
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+        bsz, seqlen = prev_output_tokens.size()
+
+        # get outputs from encoder
+        encoder_outs, _, _ = encoder_out[:3]
+        srclen = encoder_outs.size(0)
+
+        # embed tokens
+        x = self.embed_tokens(prev_output_tokens)
+        x = F.dropout(x, p=self.dropout_in, training=self.training)
+
+        # embed language identifier
+        if self.embed_lang is not None:
+            lang_ids = prev_output_tokens.data.new_full((bsz,), lang_id)
+            langemb = self.embed_lang(lang_ids)
+            # TODO Should we dropout here???
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # initialize previous states (or get from cache during incremental generation)
+        cached_state = utils.get_incremental_state(
+            self, incremental_state, "cached_state"
+        )
+        if cached_state is not None:
+            prev_hiddens, prev_cells, input_feed = cached_state
+        else:
+            num_layers = len(self.layers)
+            if self.sentemb2init is None:
+                prev_hiddens = [
+                    x.data.new(bsz, self.hidden_size).zero_() for i in range(num_layers)
+                ]
+                prev_cells = [
+                    x.data.new(bsz, self.hidden_size).zero_() for i in range(num_layers)
+                ]
+            else:
+                init = self.sentemb2init(sentemb)
+                prev_hiddens = [
+                    init[:, (2 * i) * self.hidden_size : (2 * i + 1) * self.hidden_size]
+                    for i in range(num_layers)
+                ]
+                prev_cells = [
+                    init[
+                        :,
+                        (2 * i + 1) * self.hidden_size : (2 * i + 2) * self.hidden_size,
+                    ]
+                    for i in range(num_layers)
+                ]
+            input_feed = x.data.new(bsz, self.hidden_size).zero_()
+
+        attn_scores = x.data.new(srclen, seqlen, bsz).zero_()
+        outs = []
+        for j in range(seqlen):
+            if self.embed_lang is None:
+                input = torch.cat((x[j, :, :], sentemb), dim=1)
+            else:
+                input = torch.cat((x[j, :, :], sentemb, langemb), dim=1)
+
+            for i, rnn in enumerate(self.layers):
+                # recurrent cell
+                hidden, cell = rnn(input, (prev_hiddens[i], prev_cells[i]))
+
+                # hidden state becomes the input to the next layer
+                input = F.dropout(hidden, p=self.dropout_out, training=self.training)
+
+                # save state for next time step
+                prev_hiddens[i] = hidden
+                prev_cells[i] = cell
+
+            out = hidden
+            out = F.dropout(out, p=self.dropout_out, training=self.training)
+
+            # input feeding
+            input_feed = out
+
+            # save final output
+            outs.append(out)
+
+        # cache previous states (no-op except during incremental generation)
+        utils.set_incremental_state(
+            self,
+            incremental_state,
+            "cached_state",
+            (prev_hiddens, prev_cells, input_feed),
+        )
+
+        # collect outputs across time steps
+        x = torch.cat(outs, dim=0).view(seqlen, bsz, self.hidden_size)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(1, 0)
+
+        # srclen x tgtlen x bsz -> bsz x tgtlen x srclen
+        attn_scores = attn_scores.transpose(0, 2)
+
+        # project back to size of vocabulary
+        if hasattr(self, "additional_fc"):
+            x = self.additional_fc(x)
+            x = F.dropout(x, p=self.dropout_out, training=self.training)
+        x = self.fc_out(x)
+
+        return x, attn_scores
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        super().reorder_incremental_state(incremental_state, new_order)
+        cached_state = utils.get_incremental_state(
+            self, incremental_state, "cached_state"
+        )
+        if cached_state is None:
+            return
+
+        def reorder_state(state):
+            if isinstance(state, list):
+                return [reorder_state(state_i) for state_i in state]
+            return state.index_select(0, new_order)
+
+        new_state = tuple(map(reorder_state, cached_state))
+        utils.set_incremental_state(self, incremental_state, "cached_state", new_state)
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        return int(1e5)  # an arbitrary large number
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.uniform_(m.weight, -0.1, 0.1)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def LSTM(input_size, hidden_size, **kwargs):
+    m = nn.LSTM(input_size, hidden_size, **kwargs)
+    for name, param in m.named_parameters():
+        if "weight" in name or "bias" in name:
+            param.data.uniform_(-0.1, 0.1)
+    return m
+
+
+def LSTMCell(input_size, hidden_size, **kwargs):
+    m = nn.LSTMCell(input_size, hidden_size, **kwargs)
+    for name, param in m.named_parameters():
+        if "weight" in name or "bias" in name:
+            param.data.uniform_(-0.1, 0.1)
+    return m
+
+
+def Linear(in_features, out_features, bias=True, dropout=0):
+    """Weight-normalized Linear layer (input: N x T x C)"""
+    m = nn.Linear(in_features, out_features, bias=bias)
+    m.weight.data.uniform_(-0.1, 0.1)
+    if bias:
+        m.bias.data.uniform_(-0.1, 0.1)
+    return m
+
+
+@register_model_architecture("laser_lstm", "laser_lstm")
+def base_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_hidden_size = getattr(
+        args, "encoder_hidden_size", args.encoder_embed_dim
+    )
+    args.encoder_layers = getattr(args, "encoder_layers", 1)
+    args.encoder_bidirectional = getattr(args, "encoder_bidirectional", False)
+    args.encoder_dropout_in = getattr(args, "encoder_dropout_in", args.dropout)
+    args.encoder_dropout_out = getattr(args, "encoder_dropout_out", args.dropout)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_hidden_size = getattr(
+        args, "decoder_hidden_size", args.decoder_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 1)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512)
+    args.decoder_dropout_in = getattr(args, "decoder_dropout_in", args.dropout)
+    args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout)
+    args.decoder_zero_init = getattr(args, "decoder_zero_init", "0")
+    args.decoder_lang_embed_dim = getattr(args, "decoder_lang_embed_dim", 0)
+    args.fixed_embeddings = getattr(args, "fixed_embeddings", False)
diff --git a/fairseq/examples/laser/laser_src/laser_task.py b/fairseq/examples/laser/laser_src/laser_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4152fde6861488acc3595fa25c456bf60f134b9
--- /dev/null
+++ b/fairseq/examples/laser/laser_src/laser_task.py
@@ -0,0 +1,331 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from collections import OrderedDict, defaultdict
+import json
+import os
+import logging
+from argparse import ArgumentError
+
+from fairseq import options, models
+from fairseq.data import (
+    data_utils,
+    Dictionary,
+    LanguagePairDataset,
+    IndexedDataset,
+    FairseqDataset,
+)
+from .multitask_data_utils import (
+    MultitaskDatasetWrapper,
+    MultidatasetEpochBatchIterator,
+)
+
+
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("laser")
+class LaserTask(LegacyFairseqTask):
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument(
+            "configfile", metavar="PATH", help="dataset configuration file in json"
+        )
+        parser.add_argument(
+            "--weighting-alpha",
+            type=float,
+            default=None,
+            help="alpha for automatic weighting",
+        )
+        parser.add_argument(
+            "--raw-text", action="store_true", help="load raw text dataset"
+        )
+        parser.add_argument(
+            "--left-pad-source",
+            default="True",
+            type=str,
+            metavar="BOOL",
+            help="pad the source on the left (default: True)",
+        )
+        parser.add_argument(
+            "--left-pad-target",
+            default="False",
+            type=str,
+            metavar="BOOL",
+            help="pad the target on the left (default: False)",
+        )
+        try:
+            parser.add_argument(
+                "--max-source-positions",
+                default=1024,
+                type=int,
+                metavar="N",
+                help="max number of tokens in the source sequence",
+            )
+            parser.add_argument(
+                "--max-target-positions",
+                default=1024,
+                type=int,
+                metavar="N",
+                help="max number of tokens in the target sequence",
+            )
+        except ArgumentError:
+            # this might have already been defined. Once we transition this to hydra it should be fine to add it here.
+            pass
+
+    def __init__(self, args, config, src_dictionary, tgt_dictionary, num_tasks):
+        super().__init__(args)
+        self.config = config
+        self.src_dictionary = src_dictionary
+        self.tgt_dictionary = tgt_dictionary
+        self.num_tasks = num_tasks
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        with open(args.configfile, "r") as f:
+            config = json.load(f)
+        num_tasks = max(dataset["id"] for dataset in config["train"]) + 1
+
+        args.left_pad_source = options.eval_bool(args.left_pad_source)
+        args.left_pad_target = options.eval_bool(args.left_pad_target)
+
+        src_dictionary = Dictionary.load(config["src_vocab"])
+        tgt_dictionary = Dictionary.load(config["tgt_vocab"])
+
+        logger.info(
+            "| src Dictionary {} : {} types".format(
+                config["src_vocab"], len(src_dictionary)
+            )
+        )
+        logger.info(
+            "| tgt Dictionary {} : {} types".format(
+                config["tgt_vocab"], len(tgt_dictionary)
+            )
+        )
+
+        return cls(args, config, src_dictionary, tgt_dictionary, num_tasks)
+
+    # Experimental overriding for backtranslation
+    def build_model(self, args):
+        model = models.build_model(args, self)
+        return model
+
+    def dataset(self, split):
+        if split not in self.datasets:
+            raise KeyError("Dataset not loaded: " + split)
+        return self.datasets[split]
+
+    def load_dataset(self, split, epoch=1, **kwargs):
+        """Load a dataset split."""
+
+        def indexed_dataset(path, dictionary):
+            if self.args.raw_text:
+                raise Exception("Unable to handle raw text.")
+            dataset = IndexedDataset(path, fix_lua_indexing=True)
+
+            return dataset
+
+        pair_datasets = OrderedDict()
+
+        if split == "valid":
+            self.datasets[split] = pair_datasets
+            return
+
+        if split not in self.config:
+            raise FileNotFoundError(
+                "Dataset not found in config file: {}".format(split)
+            )
+
+        size_by_corpus = defaultdict(int)
+        size_sum = 0
+        size_sum_with_subsampling = 0
+        init_pair_datasets = {}
+
+        for dataset_config in self.config[split]:
+            src_path = os.path.dirname(dataset_config["src"])
+            corpus_name = src_path.split("/")[-2]
+            language_pair_name = src_path.split("/")[-1]
+            pair_datasets_key = corpus_name + "-" + language_pair_name
+
+            logger.info(f"loading... {pair_datasets_key}")
+            if "src" in dataset_config:
+                src_dataset = indexed_dataset(
+                    dataset_config["src"], self.src_dictionary
+                )
+            else:
+                src_dataset = None
+
+            if "tgt" in dataset_config:
+                tgt_dataset = indexed_dataset(
+                    dataset_config["tgt"], self.tgt_dictionary
+                )
+            else:
+                tgt_dataset = None
+
+            dataset = LanguagePairDataset(
+                src_dataset,
+                src_dataset.sizes,
+                self.src_dictionary,
+                tgt_dataset,
+                tgt_dataset.sizes,
+                self.tgt_dictionary,
+                left_pad_source=self.args.left_pad_source,
+                left_pad_target=self.args.left_pad_target,
+            )
+
+            if pair_datasets_key in init_pair_datasets:
+                logger.warning(
+                    f"Ignoring already added {pair_datasets_key}. "
+                    f"Consider using `sample` key in order to upsample."
+                )
+            else:
+                init_pair_datasets[pair_datasets_key] = {
+                    "dataset": dataset,
+                    "sample": dataset_config.get("sample", None),
+                    "id": dataset_config.get("id", None),
+                    "len": len(dataset),
+                }
+
+        length_sum = 0
+        weighted_freqs_sum = 0
+        freq_per_dataset = {}
+        vmax = 0
+        vmin = 1
+        weighted_freq_per_dataset = {}
+
+        if self.args.weighting_alpha:
+            for key in init_pair_datasets:
+                if init_pair_datasets[key]["sample"] is None:
+                    length_sum += len(init_pair_datasets[key]["dataset"])
+
+            for key in init_pair_datasets:
+                if init_pair_datasets[key]["sample"] is None:
+                    val = float(init_pair_datasets[key]["len"]) / length_sum
+                    freq_per_dataset[key] = val
+                    weighted_freqs_sum += val ** self.args.weighting_alpha
+
+            for key in freq_per_dataset:
+                val = (
+                    freq_per_dataset[key] ** self.args.weighting_alpha
+                    / weighted_freqs_sum
+                )
+                vmin = min(vmin, val)
+                vmax = max(vmax, val)
+                weighted_freq_per_dataset[key] = val
+
+        for pair_datasets_key in init_pair_datasets:
+            dataset_config = init_pair_datasets[pair_datasets_key]
+            dataset = dataset_config["dataset"]
+            sample = dataset_config["sample"]
+            if sample is None:
+                sample = 1.0
+
+            if pair_datasets_key in weighted_freq_per_dataset:
+                w = vmax / weighted_freq_per_dataset[pair_datasets_key]
+                sample = w
+
+            sample = round(sample)
+
+            initial_sample = sample
+            initial_pair_datasets_key = pair_datasets_key
+
+            while sample >= 1.0:
+                assert (
+                    pair_datasets_key not in pair_datasets
+                ), f"{pair_datasets_key} already in"
+                size_sum_with_subsampling += len(dataset)
+                pair_datasets[pair_datasets_key] = MultitaskDatasetWrapper(
+                    dataset, dataset_config.get("id", 0), 1.0, name=pair_datasets_key
+                )
+                size_sum += len(dataset)
+                sample -= 1.0
+                pair_datasets_key += "-up"
+
+            assert sample < 1e-6, f"sample remains > 0 {pair_datasets_key}"
+
+            logger.info(
+                f"added pair {initial_pair_datasets_key} length {len(dataset)} new_length = {len(dataset)*initial_sample}"
+            )
+            size_by_corpus[corpus_name] += len(dataset)
+
+        self.datasets[split] = pair_datasets
+        logger.info(
+            f"Datasets number = {len(self.datasets[split])} size = {size_sum} size_sum_with_subsampling = {size_sum_with_subsampling}"
+        )
+
+    @property
+    def source_dictionary(self):
+        return self.src_dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.tgt_dictionary
+
+    def get_batch_iterator(
+        self,
+        dataset,
+        max_tokens=None,
+        max_sentences=None,
+        max_positions=None,
+        ignore_invalid_inputs=False,
+        required_batch_size_multiple=1,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=1,
+        data_buffer_size=0,
+        disable_iterator_cache=False,
+    ):
+
+        assert isinstance(dataset, OrderedDict)
+        assert len(dataset)
+        assert isinstance(dataset[next(iter(dataset))], FairseqDataset)
+
+        # initialize the dataset with the correct starting epoch
+        for _, dt in dataset.items():
+            dt.set_epoch(epoch)
+
+        indices = OrderedDict()
+        batch_sampler = OrderedDict()
+
+        with data_utils.numpy_seed(seed + epoch):
+            for key, dt in dataset.items():
+                logger.info(f"\t ordered_indices {key}")
+                indices[key] = dt.ordered_indices()
+
+        # filter examples that are too large
+        if max_positions is not None:
+            for key, dt in dataset.items():
+                logger.info(f"\t filter_by_size {key}")
+                indices[key], ignored = dt.filter_indices_by_size(
+                    indices[key], max_positions
+                )
+
+        for key, dt in dataset.items():
+            logger.info(f"\t batch_by_size {key}")
+            batch_sampler[key] = data_utils.batch_by_size(
+                indices[key],
+                dt.num_tokens,
+                max_tokens=max_tokens,
+                max_sentences=max_sentences,
+                required_batch_size_multiple=required_batch_size_multiple,
+            )
+
+        epoch_iter = MultidatasetEpochBatchIterator(
+            dataset=dataset,
+            batch_sampler=batch_sampler,
+            seed=seed,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_workers=num_workers,
+            epoch=epoch,
+        )
+
+        return epoch_iter
diff --git a/fairseq/examples/laser/laser_src/laser_transformer.py b/fairseq/examples/laser/laser_src/laser_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0be030994ff87334ca0392302374693f7f2c61b3
--- /dev/null
+++ b/fairseq/examples/laser/laser_src/laser_transformer.py
@@ -0,0 +1,354 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+from typing import Any, Dict, List, Optional
+from torch import Tensor
+
+import torch
+import torch.nn as nn
+
+from fairseq.models import (
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import (
+    base_architecture,
+    Embedding,
+    TransformerModel,
+    TransformerEncoder,
+    TransformerDecoder,
+)
+from fairseq.modules import (
+    TransformerDecoderLayer,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("laser_transformer")
+class LaserTransformerModel(FairseqEncoderDecoderModel):
+    """Train Transformer for LASER task
+
+    Requires --task laser
+    """
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        prev_output_tokens=None,
+        tgt_tokens=None,
+        tgt_lengths=None,
+        target_language_id=-1,
+        dataset_name="",
+    ):
+        laser_encoder_out = self.encoder(src_tokens, src_lengths)
+        return self.decoder(
+            prev_output_tokens, laser_encoder_out, lang_id=target_language_id
+        )
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        TransformerModel.add_args(parser)
+        parser.add_argument(
+            "--decoder-lang-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder language embedding dimension",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        base_laser_transformer_architecture(args)
+
+        num_langs = task.num_tasks if hasattr(task, "num_tasks") else 0
+
+        def load_embed_tokens(dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+
+            return Embedding(num_embeddings, embed_dim, padding_idx)
+
+        encoder_embed_tokens = load_embed_tokens(
+            task.source_dictionary, args.encoder_embed_dim
+        )
+        decoder_embed_tokens = load_embed_tokens(
+            task.target_dictionary, args.decoder_embed_dim
+        )
+        num_langs = task.num_tasks if hasattr(task, "num_tasks") else 0
+
+        encoder = LaserTransformerEncoder(
+            args, task.source_dictionary, encoder_embed_tokens
+        )
+
+        decoder = LaserTransformerDecoder(
+            args,
+            task.target_dictionary,
+            decoder_embed_tokens,
+            num_langs=num_langs,
+            lang_embed_dim=args.decoder_lang_embed_dim,
+        )
+
+        return cls(encoder, decoder)
+
+
+class LaserTransformerEncoder(TransformerEncoder):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def forward(self, src_tokens, *args, **kwargs):
+        encoder_out = super().forward(src_tokens, *args, **kwargs)
+
+        x = encoder_out["encoder_out"][0]  # T x B x C
+        padding_mask = src_tokens.eq(self.padding_idx).t().unsqueeze(-1)
+
+        if padding_mask.any():
+            x = x.float().masked_fill_(padding_mask, float("-inf")).type_as(x)
+
+        # Build the sentence embedding by max-pooling over the encoder outputs
+        sentemb = x.max(dim=0)[0]
+
+        # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in
+        # `foward` so we use a dictionary instead.
+        # TorchScript does not support mixed values so the values are all lists.
+        # The empty list is equivalent to None.
+        return {"sentemb": [sentemb]}  # B x C
+
+    @torch.jit.export
+    def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order):
+        """
+        Same as the one in transformer.py, with new_sentemb
+        """
+        if len(encoder_out["sentemb"]) == 0:
+            new_sentemb = []
+        else:
+            new_sentemb = [encoder_out["sentemb"][0].index_select(0, new_order)]
+
+        return {
+            "sentemb": new_sentemb,  # B x C
+        }
+
+
+class LaserTransformerDecoder(TransformerDecoder):
+    def __init__(self, args, dictionary, *kargs, **kwargs):
+        self.num_langs = kwargs.get("num_langs", 1)
+        self.lang_embed_dim = kwargs.get("lang_embed_dim", 0)
+        kwargs.pop("num_langs", None)
+        kwargs.pop("lang_embed_dim", None)
+
+        super().__init__(args, dictionary, *kargs, **kwargs, no_encoder_attn=True)
+
+        if self.lang_embed_dim == 0:
+            self.embed_lang = None
+        else:
+            self.embed_lang = nn.Embedding(self.num_langs, self.lang_embed_dim)
+            nn.init.uniform_(self.embed_lang.weight, -0.1, 0.1)
+
+        if self.output_projection is not None:
+            laser_output_embed_dim = (
+                self.output_embed_dim + self.lang_embed_dim + args.encoder_embed_dim
+            )
+            self.output_projection = nn.Linear(
+                laser_output_embed_dim, len(dictionary), bias=False
+            )
+            nn.init.normal_(
+                self.output_projection.weight,
+                mean=0,
+                std=laser_output_embed_dim ** -0.5,
+            )
+
+    def build_decoder_layer(self, args, no_encoder_attn=False):
+        decoder_embed_dim = args.decoder_embed_dim
+        args.decoder_embed_dim = (
+            decoder_embed_dim + self.lang_embed_dim + args.encoder_embed_dim
+        )
+        res = TransformerDecoderLayer(args, no_encoder_attn=True)
+        args.decoder_embed_dim = decoder_embed_dim
+
+        return res
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+        lang_id: Optional[int] = None,
+    ):
+        """
+        Similar to *forward* but only return features.
+
+        Includes several features from "Jointly Learning to Align and
+        Translate with Transformer Models" (Garg et al., EMNLP 2019).
+
+        Args:
+            full_context_alignment (bool, optional): don't apply
+                auto-regressive mask to self-attention (default: False).
+            alignment_layer (int, optional): return mean alignment over
+                heads at this layer (default: last layer).
+            alignment_heads (int, optional): only average alignment over
+                this many heads (default: all heads).
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        if alignment_layer is None:
+            alignment_layer = self.num_layers - 1
+
+        # embed positions
+        positions = (
+            self.embed_positions(
+                prev_output_tokens, incremental_state=incremental_state
+            )
+            if self.embed_positions is not None
+            else None
+        )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        bsz, seqlen = prev_output_tokens.size()
+
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+
+        if self.quant_noise is not None:
+            x = self.quant_noise(x)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+
+        if self.layernorm_embedding is not None:
+            x = self.layernorm_embedding(x)
+
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        if self.embed_lang is not None:
+            lang_ids = prev_output_tokens.data.new_full((bsz,), lang_id)
+            langemb = self.embed_lang(lang_ids)
+            langemb = langemb.unsqueeze(0)
+            repeat_vals = [x.shape[0] // langemb.shape[0]] + [-1] * (
+                len(langemb.shape) - 1
+            )
+            x = torch.cat((x, langemb.expand(*repeat_vals)), dim=-1)
+
+        sentemb = encoder_out["sentemb"][0]
+        sentemb = sentemb.unsqueeze(0)
+
+        repeat_vals = [x.shape[0] // sentemb.shape[0]] + [-1] * (len(sentemb.shape) - 1)
+        x = torch.cat((x, sentemb.expand(*repeat_vals)), dim=-1)
+
+        self_attn_padding_mask: Optional[Tensor] = None
+        if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any():
+            self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx)
+
+        # decoder layers
+        attn: Optional[Tensor] = None
+        inner_states: List[Optional[Tensor]] = [x]
+        for idx, layer in enumerate(self.layers):
+            if incremental_state is None and not full_context_alignment:
+                self_attn_mask = self.buffered_future_mask(x)
+            else:
+                self_attn_mask = None
+
+            x, layer_attn, _ = layer(
+                x,
+                None,
+                None,
+                incremental_state,
+                self_attn_mask=self_attn_mask,
+                self_attn_padding_mask=self_attn_padding_mask,
+                need_attn=bool((idx == alignment_layer)),
+                need_head_weights=bool((idx == alignment_layer)),
+            )
+            inner_states.append(x)
+            if layer_attn is not None and idx == alignment_layer:
+                attn = layer_attn.float().to(x)
+
+        if attn is not None:
+            if alignment_heads is not None:
+                attn = attn[:alignment_heads]
+
+            # average probabilities over heads
+            attn = attn.mean(dim=0)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        return x, {"attn": [attn], "inner_states": inner_states}
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        features_only: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+        src_lengths: Optional[Any] = None,
+        return_all_hiddens: bool = False,
+        lang_id: Optional[int] = None,
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+            features_only (bool, optional): only return features without
+                applying output layer (default: False).
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+
+        assert lang_id is not None
+
+        x, extra = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            incremental_state=incremental_state,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+            lang_id=lang_id,
+        )
+        if not features_only:
+            x = self.output_layer(x)
+        return x, extra
+
+
+@register_model_architecture("laser_transformer", "laser_transformer")
+def base_laser_transformer_architecture(args):
+    base_architecture(args)
+    args.decoder_lang_embed_dim = getattr(args, "decoder_lang_embed_dim", 0)
diff --git a/fairseq/examples/laser/laser_src/multitask_data_utils.py b/fairseq/examples/laser/laser_src/multitask_data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b05caea26793bf5112a7abc29d76225f578f3ebe
--- /dev/null
+++ b/fairseq/examples/laser/laser_src/multitask_data_utils.py
@@ -0,0 +1,143 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import OrderedDict
+
+import numpy as np
+
+from fairseq.data import BaseWrapperDataset, FairseqDataset, iterators
+
+
+class MultiItr(object):
+    def __init__(self, itr):
+        self.itr = itr
+        self._counts = [0 for x in itr]
+
+    def __len__(self):
+        return sum(len(itr) for itr in self.itr)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        ratios = [count / len(itr) for count, itr in zip(self._counts, self.itr)]
+        idx = ratios.index(min(ratios))
+        self._counts[idx] += 1
+        return next(self.itr[idx])
+
+
+class MultidatasetEpochBatchIterator(iterators.EpochBatchIterating):
+    """A wrapper around multiple epoch batch iterators."""
+
+    def __init__(
+        self,
+        dataset,
+        batch_sampler,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=1,
+    ):
+
+        assert isinstance(dataset, OrderedDict)
+        assert len(dataset)
+        assert isinstance(dataset[next(iter(dataset))], FairseqDataset)
+
+        self.iterators = []
+
+        self.epoch = epoch
+        for key, dt in dataset.items():
+            epoch_iter = iterators.EpochBatchIterator(
+                dataset=dt,
+                collate_fn=dt.collater,
+                batch_sampler=batch_sampler[key],
+                seed=seed,
+                num_shards=num_shards,
+                shard_id=shard_id,
+                num_workers=0,
+                epoch=epoch,
+            )
+            self.iterators.append(epoch_iter)
+
+    def __len__(self):
+        return sum(len(itr) for itr in self.iterators)
+
+    def next_epoch_itr(self, shuffle=True, fix_batches_to_gpus=False):
+        # `self.epoch += 1` should be handled by underlying `EpochBatchIterator`s.
+        return MultiItr(
+            [
+                itr.next_epoch_itr(
+                    shuffle=shuffle, fix_batches_to_gpus=fix_batches_to_gpus
+                )
+                for itr in self.iterators
+            ]
+        )
+
+    def end_of_epoch(self):
+        return all(itr.end_of_epoch() for itr in self.iterators)
+
+    @property
+    def next_epoch_idx(self):
+        """Return the epoch index after *next_epoch_itr* is called."""
+
+        epochs = [itr.next_epoch_idx for itr in self.iterators]
+        self.epoch = epochs[0]
+        assert all(epoch == self.epoch for epoch in epochs)
+
+        return self.epoch
+
+    @property
+    def iterations_in_epoch(self):
+        return sum(itr.iterations_in_epoch for itr in self.iterators)
+
+    def state_dict(self):
+        return {
+            "iterators": [it.state_dict() for it in self.iterators],
+            "epoch": self.epoch,
+        }
+
+    def load_state_dict(self, state_dict):
+        self.epoch = state_dict["epoch"]
+        for it, d in zip(self.iterators, state_dict["iterators"]):
+            it.load_state_dict(d)
+
+
+class MultitaskDatasetWrapper(BaseWrapperDataset):
+    """A wrapper for a multitask dataset."""
+
+    def __init__(self, dataset, target_language_id, sample=1.0, name=""):
+        super().__init__(dataset)
+        self.target_language_id = target_language_id
+        self.sample = sample
+        self.name = name
+
+    def collater(self, *args, **kwargs):
+        ans = self.dataset.collater(*args, **kwargs)
+        if "net_input" in ans:
+            ans["net_input"]["target_language_id"] = self.target_language_id
+            ans["net_input"]["dataset_name"] = self.name
+        return ans
+
+    def num_tokens(self, *args, **kwargs):
+        return self.dataset.num_tokens(*args, **kwargs)
+
+    def ordered_indices(self, *args, **kwargs):
+        indices = self.dataset.ordered_indices(*args, **kwargs)
+        # Hacky solution for sampling
+        size = int(self.sample * indices.shape[0])
+
+        return indices.take(np.sort(np.random.permutation(indices.shape[0])[:size]))
+
+    def size(self, index: int):
+        return self.dataset.size(index)
+
+    @property
+    def supports_prefetch(self):
+        """Whether this dataset supports prefetching."""
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        return self.dataset.prefetch(indices)
diff --git a/fairseq/examples/latent_depth/README.md b/fairseq/examples/latent_depth/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7774c333053b95d15b180fdfc3ee3cd817790520
--- /dev/null
+++ b/fairseq/examples/latent_depth/README.md
@@ -0,0 +1,77 @@
+# Deep Transformers with Latent Depth (Li et al., 2020)
+
+[https://arxiv.org/abs/2009.13102](https://arxiv.org/abs/2009.13102).
+
+## Introduction
+
+We present a probabilistic framework to automatically learn which layer(s) to use by learning the posterior distributions of layer selection. As an extension of this framework, we propose a novel method to train one shared Transformer network for multilingual machine translation with different layer selection posteriors for each language pair.
+
+## Training a multilingual model with latent depth
+
+Below is an example of training with latent depth in decoder for one-to-many (O2M) related languages. We use the same preprocessed (numberized and binarized) TED8 dataset as in [Balancing Training for Multilingual Neural Machine Translation (Wang et al., 2020)](https://github.com/cindyxinyiwang/multiDDS), which could be generated by [the script](https://github.com/cindyxinyiwang/multiDDS/blob/multiDDS/util_scripts/prepare_multilingual_data.sh) the author provided.
+```bash
+lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur"
+databin_dir=<path to binarized data>
+
+fairseq-train ${databin_dir} \
+  --user-dir examples/latent_depth/latent_depth_src \
+  --lang-pairs "${lang_pairs_str}" \
+  --arch multilingual_transformer_iwslt_de_en \
+  --task multilingual_translation_latent_depth \
+  --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+  --share-encoders \
+  --share-decoders \
+  --decoder-langtok \
+  --share-decoder-input-output-embed \
+  --dropout 0.3 --attention-dropout 0.3 \
+  --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
+  --lr-scheduler inverse_sqrt --stop-min-lr 1e-9 --warmup-init-lr 1e-7 --warmup-updates 8000 \
+  --max-tokens 4096 --update-freq 1  \
+  --lr 0.0015 \
+  --clip-norm 1.0 \
+  --seed 2 \
+  --ddp-backend=legacy_ddp \
+  --encoder-layers 12 \
+  --decoder-layers 24 \
+  --decoder-latent-layer \
+  --sparsity-weight 0.1 \
+  --anneal-updates 5000 \
+  --soft-update 500  \
+  --target-layers 12 \
+  --share-weight 0.1
+```
+## Inference command
+
+```bash
+lang_pairs_str="eng-aze,eng-bel,eng-ces,eng-glg,eng-por,eng-rus,eng-slk,eng-tur"
+databin_dir=<path to binarized data>
+model_path=<path to checkpoint>
+src_lang=<source language to translate from>
+tgt_lang=<target language to translate to>
+gen_data=<name of data split, e.g. valid, test, etc>
+
+fairseq-generate ${databin_dir} \
+  --path ${model_path} \
+  --task multilingual_translation_latent_depth \
+  --decoder-latent-layer \
+  --lang-pairs "${lang_pairs_str}" \
+  -s ${src_lang} -t ${tgt_lang} \
+  --gen-subset $gen_data \
+  --scoring sacrebleu \
+  --remove-bpe 'sentencepiece' \
+  --lenpen 1.0 \
+  --beam 5  \
+  --decoder-langtok \
+  --max-tokens 4096
+```
+
+
+## Citation
+```bibtex
+@article{li2020deep,
+  title={Deep Transformers with Latent Depth},
+  author={Li, Xian and Stickland, Asa Cooper and Tang, Yuqing and Kong, Xiang},
+  journal={arXiv preprint arXiv:2009.13102},
+  year={2020}
+}
+```
diff --git a/fairseq/examples/latent_depth/latent_depth_src/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5fa76039ff98c18d3c14b5f4a8f73ffe644de11
--- /dev/null
+++ b/fairseq/examples/latent_depth/latent_depth_src/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import multilingual_translation_latent_depth  # noqa
+from .loss import latent_depth  # noqa
+from .models import latent_multilingual_transformer  # noqa
+from .modules import latent_layers  # noqa
diff --git a/fairseq/examples/latent_depth/latent_depth_src/loss/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/loss/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/latent_depth/latent_depth_src/loss/latent_depth.py b/fairseq/examples/latent_depth/latent_depth_src/loss/latent_depth.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3b9535ecac3ec403868681a8b50c1fbe1c90dfe
--- /dev/null
+++ b/fairseq/examples/latent_depth/latent_depth_src/loss/latent_depth.py
@@ -0,0 +1,99 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+from torch.nn.modules.loss import _Loss
+
+
+class LatentLayersKLLoss(_Loss):
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+
+    def forward(self, layer_samples, lang_idx, update_num, sample_size):
+        prior = self.args.prior
+        samples = layer_samples[lang_idx]
+        eps = 1e-7
+        if prior == "uniform":
+            # uniform prior
+            kl_loss = (samples * (torch.log(samples + eps) - math.log(0.5))).sum(-1)
+        elif prior == "agged_posterior":
+            # aggregated posterior
+            y_t = torch.stack([x.detach() for x in layer_samples], dim=0)
+            agged_q = torch.sum(y_t, dim=0)
+            row_norm = agged_q.sum(-1)
+            normed_agg_q = agged_q / row_norm
+            kl_loss = (
+                samples * (torch.log(samples + eps) - torch.log(normed_agg_q + eps))
+            ).sum(-1)
+        else:
+            raise NotImplementedError("The specified prior is not implemented.")
+
+        # normalized by number of layers
+        kl_loss /= layer_samples[0].size()[0]
+        kl_weight = min(
+            self.args.sparsity_weight,
+            (update_num - self.args.soft_update)
+            * self.args.sparsity_weight
+            / self.args.anneal_updates,
+        )
+        kl_loss *= kl_weight * sample_size
+        return kl_loss
+
+
+class LatentLayersSparsityLoss(_Loss):
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+
+    def is_valid(self, update_num):
+        if self.args.target_layers <= 0:
+            return False
+        return update_num > (self.args.soft_update + self.args.anneal_updates)
+
+    def forward(self, layer_samples_list, update_num, sample_size):
+        batch_loss = 0
+        share_loss = 0
+        global_sparsity_loss = 0
+        layer_samples = torch.stack(layer_samples_list, dim=0)
+        if (
+            self.args.target_layers > 0 or self.args.share_weight > 0
+        ) and update_num > (self.args.soft_update + self.args.anneal_updates):
+            # anneal sparsity weight
+            if update_num < (self.args.anneal_updates + self.args.soft_update):
+                weight_anneal = 0
+            elif update_num < (2 * self.args.anneal_updates + self.args.soft_update):
+                weight_anneal = (
+                    (update_num - self.args.soft_update - self.args.anneal_updates)
+                    * self.args.share_weight
+                    / self.args.anneal_updates
+                )
+            else:
+                weight_anneal = 1
+            # compute ratio among languages
+            layer_utilization = torch.sum(layer_samples, dim=0)
+            layer_utilization /= layer_samples.size()[0]
+            if self.args.share_weight > 0:
+                # encouraging sharing across languages
+                share_loss = sum(
+                    -1.0 * v * math.log(v) for v in layer_utilization if v > 0
+                )
+                batch_loss += (
+                    weight_anneal * self.args.share_weight * sample_size * share_loss
+                )
+            if self.args.target_layers > 0:
+                # computed expected number of layers selected
+                expeted_layers = sum(layer_utilization)
+                # compute l2 loss wrt target number of layers
+                global_sparsity_loss = (expeted_layers - self.args.target_layers) ** 2
+                batch_loss += (
+                    weight_anneal
+                    * self.args.share_weight
+                    * sample_size
+                    * global_sparsity_loss
+                )
+        return batch_loss
diff --git a/fairseq/examples/latent_depth/latent_depth_src/models/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py b/fairseq/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e7b655feee0042d42ac2b13cec5f1d2a88e201e
--- /dev/null
+++ b/fairseq/examples/latent_depth/latent_depth_src/models/latent_multilingual_transformer.py
@@ -0,0 +1,76 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.multilingual_transformer import MultilingualTransformerModel
+from fairseq.models.transformer import (
+    TransformerDecoder,
+    TransformerEncoder,
+    base_architecture,
+)
+from fairseq.utils import safe_hasattr
+
+from .latent_transformer import LatentTransformerDecoder, LatentTransformerEncoder
+
+
+@register_model("latent_multilingual_transformer")
+class LatentMultilingualTransformerModel(MultilingualTransformerModel):
+    """A variant of standard multilingual Transformer models which encoder and/or
+    decoders supports latent depth, as is in "Deep Transformer with Latent Depth"
+    (https://arxiv.org/abs/2009.13102).
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        MultilingualTransformerModel.add_args(parser)
+        parser.add_argument(
+            '--soft-select',
+            action='store_true',
+            help='use soft samples in training an inference',
+        )
+        parser.add_argument(
+            '--sampling-tau',
+            type=float,
+            default=5.,
+            help='sampling temperature',
+        )
+
+    @classmethod
+    def _get_module_class(cls, is_encoder, args, lang_dict, embed_tokens, langs):
+        if is_encoder:
+            if safe_hasattr(args, "encoder_latent_layer") and args.encoder_latent_layer:
+                return LatentTransformerEncoder(
+                    args, lang_dict, embed_tokens, num_logits=len(langs)
+                )
+            else:
+                return TransformerEncoder(args, lang_dict, embed_tokens)
+        else:
+            if safe_hasattr(args, "decoder_latent_layer") and args.decoder_latent_layer:
+                return LatentTransformerDecoder(
+                    args, lang_dict, embed_tokens, num_logits=len(langs)
+                )
+            else:
+                return TransformerDecoder(args, lang_dict, embed_tokens)
+
+
+@register_model_architecture(
+    "latent_multilingual_transformer", "latent_multilingual_transformer"
+)
+def latent_multilingual_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.decoder_layers = getattr(args, "decoder_layers", 24)
+    args.share_encoders = getattr(args, "share_encoders", True)
+    args.share_decoders = getattr(args, "share_decoders", True)
+    args.share_encoder_embeddings = getattr(args, "share_encoder_embeddings", True)
+    args.share_decoder_embeddings = getattr(args, "share_decoder_embeddings", True)
+
+    base_architecture(args)
diff --git a/fairseq/examples/latent_depth/latent_depth_src/models/latent_transformer.py b/fairseq/examples/latent_depth/latent_depth_src/models/latent_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a825301a452bd935deafdaf78fa2427ca9a469e
--- /dev/null
+++ b/fairseq/examples/latent_depth/latent_depth_src/models/latent_transformer.py
@@ -0,0 +1,156 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Dict, Optional
+
+import torch.nn as nn
+from fairseq.models.fairseq_encoder import EncoderOut
+from fairseq.models.transformer import TransformerDecoder, TransformerEncoder
+from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer
+from torch import Tensor
+
+from ..modules.latent_layers import LayerSelect
+
+
+class LatentTransformerEncoder(TransformerEncoder):
+    """Latent depth (https://arxiv.org/abs/2009.13102) implemented in
+    TransformerEncoder.
+    """
+
+    def __init__(self, args, dictionary, embed_tokens, num_logits=1):
+        self.num_logits = num_logits
+        self.num_layers = args.encoder_layers
+        super().__init__(args, dictionary, embed_tokens)
+        self.layer_select = LayerSelect(
+            num_layers=self.num_layers,
+            num_logits=self.num_logits,
+            soft_select=getattr(args, "soft_select", False),
+            sampling_tau=getattr(args, "sampling_tau", 5.),
+        )
+        self.lang_idx = None
+        self.layers = nn.ModuleList(
+            [self._build_encoder_layer(args, idx) for idx in range(args.encoder_layers)]
+        )
+
+    def set_lang_idx(self, lang_idx):
+        self.lang_idx = lang_idx
+
+    def _build_encoder_layer(self, args, idx=None):
+        return LatentTransformerEncoderLayer(args, idx, layer_select=self.layer_select)
+
+    def forward(self, src_tokens, src_lengths, return_all_hiddens: bool = False):
+        self.layer_select.sample(self.lang_idx)
+        return super().forward(src_tokens, src_lengths, return_all_hiddens)
+
+
+class LatentTransformerEncoderLayer(TransformerEncoderLayer):
+    """Encoder layer with each (non_residual) block weighted by samples of Bernouli
+    or Gumbel Signmoid samples.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments from standard
+            TransformerEncoderLayer.
+        idx (int): layer index (used to retrieve samples).
+        layer_select (LayerSelect, optional): instance of LayerSelect module with logits
+            parameters and sampling method.
+    """
+
+    def __init__(self, args, idx, layer_select=None):
+        super().__init__(args)
+        self.idx = idx
+        self.layer_select = layer_select
+
+    def residual_connection(self, x, residual):
+        return residual + x * self.layer_select(self.idx)
+
+
+class LatentTransformerDecoder(TransformerDecoder):
+    """Latent depth (https://arxiv.org/abs/2009.13102) implemented in
+    TransformerDecoder.
+    """
+
+    def __init__(
+        self, args, dictionary, embed_tokens, no_encoder_attn=False, num_logits=1
+    ):
+        self.num_logits = num_logits
+        self.num_layers = args.decoder_layers
+        super().__init__(
+            args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn
+        )
+        self.layer_select = LayerSelect(
+            num_layers=self.num_layers,
+            num_logits=self.num_logits,
+            soft_select=getattr(args, "soft_select", False),
+            sampling_tau=getattr(args, "sampling_tau", 5.),
+        )
+        self.lang_idx = None
+        self.layers = nn.ModuleList(
+            [
+                self._build_decoder_layer(args, no_encoder_attn, idx)
+                for idx in range(args.decoder_layers)
+            ]
+        )
+
+    def set_lang_idx(self, lang_idx):
+        self.lang_idx = lang_idx
+
+    def _build_decoder_layer(self, args, no_encoder_attn=False, idx=None):
+        return LatentTransformerDecoderLayer(
+            args, idx, layer_select=self.layer_select, no_encoder_attn=no_encoder_attn
+        )
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[EncoderOut] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        features_only: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+        src_lengths: Optional[Any] = None,
+        return_all_hiddens: bool = False,
+    ):
+        self.layer_select.sample(self.lang_idx)
+        return super().forward(
+            prev_output_tokens=prev_output_tokens,
+            encoder_out=encoder_out,
+            incremental_state=incremental_state,
+            features_only=features_only,
+            alignment_layer=alignment_layer,
+            src_lengths=src_lengths,
+            return_all_hiddens=return_all_hiddens,
+        )
+
+
+class LatentTransformerDecoderLayer(TransformerDecoderLayer):
+    """Decoder layer with each (non_residual) block weighted by samples of Bernouli
+    or Gumbel Signmoid samples.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments from standard
+            TransformerDecoderLayer.
+        idx (int): layer index (used to retrieve samples).
+        layer_select (LayerSelect, optional): instance of LayerSelect module with logits
+            parameters and sampling method.
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+
+    """
+
+    def __init__(
+        self,
+        args,
+        idx,
+        layer_select=None,
+        no_encoder_attn=False,
+        add_bias_kv=False,
+        add_zero_attn=False,
+    ):
+        super().__init__(args, no_encoder_attn, add_bias_kv, add_zero_attn)
+        self.idx = idx
+        self.layer_select = layer_select
+
+    def residual_connection(self, x, residual):
+        return residual + x * self.layer_select(self.idx)
diff --git a/fairseq/examples/latent_depth/latent_depth_src/modules/__init__.py b/fairseq/examples/latent_depth/latent_depth_src/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/latent_depth/latent_depth_src/modules/latent_layers.py b/fairseq/examples/latent_depth/latent_depth_src/modules/latent_layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..2be05d5535cb05b16f61603a7356df2326bf2e23
--- /dev/null
+++ b/fairseq/examples/latent_depth/latent_depth_src/modules/latent_layers.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+
+
+class LayerSelect(nn.Module):
+    """Compute samples (from a Gumbel-Sigmoid distribution) which is used as
+    either (soft) weighting or (hard) selection of residual connection.
+    https://arxiv.org/abs/2009.13102
+    """
+    def __init__(self, num_layers, num_logits, soft_select=False, sampling_tau=5.):
+        super(LayerSelect, self).__init__()
+        self.layer_logits = torch.nn.Parameter(
+            torch.Tensor(num_logits, num_layers),
+            requires_grad=True,
+        )
+        self.hard_select = not soft_select
+        self.tau = sampling_tau
+        self.detach_grad = False
+        self.layer_samples = [None] * num_logits
+
+    def sample(self, logit_idx):
+        """To leverage the efficiency of distributed training, samples for all
+        layers are computed at once for each logit_idx. Logits are parameters
+        learnt independent of each other.
+
+        Args:
+            logit_idx: The index of logit parameters used for sampling.
+        """
+        assert logit_idx is not None
+        self.samples = self._gumbel_sigmoid(
+            self.layer_logits[logit_idx, :].detach()
+            if self.detach_grad
+            else self.layer_logits[logit_idx, :],
+            dim=-1,
+            tau=self.tau,
+            hard=self.hard_select,
+        )
+        self.layer_samples[logit_idx] = self.samples
+
+    def forward(self, i):
+        sample = self.samples[i]
+        return sample
+
+    def _gumbel_sigmoid(
+        self, logits, tau=1, hard=False, eps=1e-10, dim=-1, threshold=0.5
+    ):
+        # ~Gumbel(0,1)
+        gumbels1 = (
+            -torch.empty_like(logits, memory_format=torch.legacy_contiguous_format)
+            .exponential_()
+            .log()
+        )
+        gumbels2 = (
+            -torch.empty_like(logits, memory_format=torch.legacy_contiguous_format)
+            .exponential_()
+            .log()
+        )
+        # Difference of two gumbels because we apply a sigmoid
+        gumbels1 = (logits + gumbels1 - gumbels2) / tau
+        y_soft = gumbels1.sigmoid()
+        if hard:
+            # Straight through.
+            y_hard = torch.zeros_like(
+                logits, memory_format=torch.legacy_contiguous_format
+            ).masked_fill(y_soft > threshold, 1.0)
+            ret = y_hard - y_soft.detach() + y_soft
+        else:
+            # Reparametrization trick.
+            ret = y_soft
+        return ret
diff --git a/fairseq/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py b/fairseq/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cc2a7174b765b7ad8808489196e12082a91a2d7
--- /dev/null
+++ b/fairseq/examples/latent_depth/latent_depth_src/multilingual_translation_latent_depth.py
@@ -0,0 +1,195 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.tasks import register_task
+from fairseq.tasks.multilingual_translation import MultilingualTranslationTask
+from fairseq.utils import safe_hasattr
+
+from .loss.latent_depth import LatentLayersKLLoss, LatentLayersSparsityLoss
+
+
+@register_task("multilingual_translation_latent_depth")
+class MultilingualTranslationTaskLatentDepth(MultilingualTranslationTask):
+    """A task for multiple translation with latent depth.
+
+    See `"Deep Transformer with Latent Depth"
+        (Li et al., 2020) <https://arxiv.org/pdf/2009.13102.pdf>`_.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        # fmt: off
+        MultilingualTranslationTask.add_args(parser)
+        parser.add_argument('--encoder-latent-layer', action='store_true', help='latent layer selection in encoder')
+        parser.add_argument('--decoder-latent-layer', action='store_true', help='latent layer selection in decoder')
+        parser.add_argument('--target-layers', default=-1, type=int,
+                            help='number of effective layers to learn; -1 means no constraint')
+        parser.add_argument('--sparsity-weight', default=0.0, type=float,
+                            help='weight for sparsity loss')
+        parser.add_argument('--share-weight', default=0.0, type=float,
+                            help='weight for sharing loss')
+        parser.add_argument('--soft-update', default=1, type=int,
+                            help='number of updates with soft sampling')
+        parser.add_argument('--anneal-updates', default=1, type=int,
+                            help='number of updates to anneal the KL loss weight')
+        parser.add_argument('--prior', default="uniform", type=str,
+                            help='prior used for computing KL loss')
+        # fmt: on
+
+    def __init__(self, args, dicts, training):
+        super().__init__(args, dicts, training)
+        self.src_langs, self.tgt_langs = zip(
+            *[(lang.split("-")[0], lang.split("-")[1]) for lang in args.lang_pairs]
+        )
+        if self.training and self.encoder_latent_layer:
+            assert self.args.share_encoders
+        if self.training and self.decoder_latent_layer:
+            assert self.args.share_decoders
+        if training or self.encoder_latent_layer or self.decoder_latent_layer:
+            self.lang_pairs = args.lang_pairs
+        else:
+            self.lang_pairs = ["{}-{}".format(args.source_lang, args.target_lang)]
+        self.eval_lang_pairs = self.lang_pairs
+        self.model_lang_pairs = self.lang_pairs
+        if self.training and (self.encoder_latent_layer or self.decoder_latent_layer):
+            self.kl_loss = LatentLayersKLLoss(self.args)
+            self.sparsity_loss = LatentLayersSparsityLoss(self.args)
+
+    def _per_lang_pair_train_loss(
+        self, lang_pair, model, update_num, criterion, sample, optimizer, ignore_grad
+    ):
+        src, tgt = lang_pair.split("-")
+        if self.encoder_latent_layer:
+            src_lang_idx = self.src_lang_idx_dict[src]
+            model.models[lang_pair].encoder.set_lang_idx(src_lang_idx)
+            model.models[lang_pair].encoder.layer_select.hard_select = (
+                update_num > self.args.soft_update
+            )
+        if self.decoder_latent_layer:
+            tgt_lang_idx = self.tgt_lang_idx_dict[tgt]
+            model.models[lang_pair].decoder.set_lang_idx(tgt_lang_idx)
+            model.models[lang_pair].decoder.layer_select.hard_select = (
+                update_num > self.args.soft_update
+            )
+
+        loss, sample_size, logging_output = criterion(
+            model.models[lang_pair], sample[lang_pair]
+        )
+        if self.encoder_latent_layer:
+            none_samples = sum(
+                1 if x is None else 0
+                for x in model.models[lang_pair].encoder.layer_select.layer_samples
+            )
+            if none_samples == 0 or self.args.prior != "agged_posterior":
+                loss += self.kl_loss(
+                    model.models[lang_pair].encoder.layer_select.layer_samples,
+                    src_lang_idx,
+                    update_num,
+                    sample_size,
+                )
+        if self.decoder_latent_layer:
+            none_samples = sum(
+                1 if x is None else 0
+                for x in model.models[lang_pair].decoder.layer_select.layer_samples
+            )
+            if none_samples == 0 or self.args.prior != "agged_posterior":
+                loss += self.kl_loss(
+                    model.models[lang_pair].decoder.layer_select.layer_samples,
+                    tgt_lang_idx,
+                    update_num,
+                    sample_size,
+                )
+        if ignore_grad:
+            loss *= 0
+
+        if hasattr(self, "sparsity_loss") and self.sparsity_loss.is_valid(update_num):
+            # need to retain the graph if sparsity loss needs to be added
+            loss.backward(retain_graph=True)
+        else:
+            optimizer.backward(loss)
+
+        return loss, sample_size, logging_output
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+        agg_loss, agg_sample_size, agg_logging_output = super().train_step(
+            sample, model, criterion, optimizer, update_num, ignore_grad
+        )
+        # compute auxiliary loss from layere sparsity, based on all samples from all languages
+        if hasattr(self, "sparsity_loss") and self.sparsity_loss.is_valid(update_num):
+            sparsity_loss = 0
+            if self.encoder_latent_layer:
+                sparsity_loss += self.sparsity_loss(
+                    next(
+                        iter(model.models.values())
+                    ).encoder.layer_select.layer_samples,
+                    update_num,
+                    agg_sample_size,
+                )
+            if self.decoder_latent_layer:
+                sparsity_loss += self.sparsity_loss(
+                    next(
+                        iter(model.models.values())
+                    ).decoder.layer_select.layer_samples,
+                    update_num,
+                    agg_sample_size,
+                )
+            if sparsity_loss > 0:
+                optimizer.backward(sparsity_loss)
+        return agg_loss, agg_sample_size, agg_logging_output
+
+    def _per_lang_pair_valid_loss(self, lang_pair, model, criterion, sample):
+        src, tgt = lang_pair.split("-")
+        if self.encoder_latent_layer:
+            src_lang_idx = self.src_lang_idx_dict[src]
+            model.models[lang_pair].encoder.set_lang_idx(src_lang_idx)
+        if self.decoder_latent_layer:
+            tgt_lang_idx = self.tgt_lang_idx_dict[tgt]
+            model.models[lang_pair].decoder.set_lang_idx(tgt_lang_idx)
+        loss, sample_size, logging_output = criterion(
+            model.models[lang_pair], sample[lang_pair]
+        )
+        return loss, sample_size, logging_output
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        if self.encoder_latent_layer or self.decoder_latent_layer:
+            for model in models:
+                if self.encoder_latent_layer:
+                    assert model.encoder.layer_select is not None
+                    src_lang_idx = self.src_lang_idx_dict[self.args.source_lang]
+                    model.encoder.set_lang_idx(src_lang_idx)
+                if self.decoder_latent_layer:
+                    assert model.decoder.layer_select is not None
+                    tgt_lang_idx = self.tgt_lang_idx_dict[self.args.target_lang]
+                    model.decoder.set_lang_idx(tgt_lang_idx)
+        return super().inference_step(
+            generator, models, sample, prefix_tokens, constraints
+        )
+
+    @property
+    def encoder_latent_layer(self):
+        return (
+            safe_hasattr(self.args, "encoder_latent_layer")
+            and self.args.encoder_latent_layer
+        )
+
+    @property
+    def decoder_latent_layer(self):
+        return (
+            safe_hasattr(self.args, "decoder_latent_layer")
+            and self.args.decoder_latent_layer
+        )
+
+    @property
+    def src_lang_idx_dict(self):
+        return {lang: lang_idx for lang_idx, lang in enumerate(self.src_langs)}
+
+    @property
+    def tgt_lang_idx_dict(self):
+        return {lang: lang_idx for lang_idx, lang in enumerate(self.tgt_langs)}
diff --git a/fairseq/examples/layerdrop/README.md b/fairseq/examples/layerdrop/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4d48ee9615e1458e1e889635dc9938e427a7f64a
--- /dev/null
+++ b/fairseq/examples/layerdrop/README.md
@@ -0,0 +1,154 @@
+# Reducing Transformer Depth on Demand with Structured Dropout (Fan et al., 2019)
+This page contains information for how to train models with LayerDrop, based on this [paper](https://arxiv.org/abs/1909.11556).
+
+## Citation:
+If you found this technique useful, please cite our paper:
+```bibtex
+@article{fan2019reducing,
+  title={Reducing Transformer Depth on Demand with Structured Dropout},
+  author={Fan, Angela and Grave, Edouard and Joulin, Armand},
+  journal={arXiv preprint arXiv:1909.11556},
+  year={2019}
+}
+```
+
+## Pre-trained models
+
+Model | Description | Download
+---|---|---
+`layerdrop_wmt_en_de_12_6` | Transformer + LayerDrop 0.2 trained on WMT16 en-de with 12 encoder and 6 decoder layers | [layerdrop_wmt_en_de_12_6.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/layerdrop_wmt_en_de_12_6.tar.gz)
+`roberta_layerdrop.base` | RoBERTa Base + LayerDrop 0.2 | [roberta_layerdrop.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.base.qnli.tar.gz)
+`roberta_layerdrop.large` | RoBERTa Large + LayerDrop 0.2 | [roberta_layerdrop.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.large.tar.gz)
+`roberta_layerdrop.large.mnli` | `roberta_layerdrop.large` finetuned on [MNLI](http://www.nyu.edu/projects/bowman/multinli) | [roberta_layerdrop.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.large.mnli.tar.gz)
+`roberta_layerdrop.large.qnli` | `roberta_layerdrop.large` finetuned on [QNLI](https://arxiv.org/abs/1804.07461) | [roberta_layerdrop.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta_layerdrop.large.qnli.tar.gz)
+
+
+Evaluate performance of these pre-trained models:
+```bash
+# Example for Machine Translation
+fairseq-generate /path/to/bped/wmt/data --path nmt_checkpoint.pt \
+  --beam 8 --lenpen 0.4 \
+  --batch-size 64 \
+  --remove-bpe \
+  --gen-subset test > wmt16_gen.txt
+bash scripts/compound_split_bleu.sh wmt16_gen.txt
+# prints BLEU4 = 30.17
+```
+
+```python
+# Example for RoBERTa + LayerDrop finetuned on MNLI:
+from fairseq.models.roberta import RobertaModel
+
+roberta_layerdrop = RobertaModel.from_pretrained(
+    '/path/to/MNLI/model',
+    checkpoint_file='mnli_checkpoint.pt',
+    data_name_or_path='/path/to/MNLI/data/MNLI-bin'
+)
+label_map = {0: 'contradiction', 2: 'neutral', 1: 'entailment'}
+ncorrect, nsamples = 0, 0
+roberta_layerdrop.cuda()
+roberta_layerdrop.eval()
+with open('/path/to/MNLI/data/dev_matched.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[8], tokens[9], tokens[-1]
+        tokens = roberta_layerdrop.encode(sent1, sent2)
+        prediction = roberta_layerdrop.predict('sentence_classification_head', tokens).argmax().item()
+        prediction_label = label_map[prediction]
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+print('| Accuracy: ', float(ncorrect)/float(nsamples))
+# prints | Accuracy:  0.9026999490575649
+
+
+# Example for RoBERTa + LayerDrop finetuned on QNLI:
+roberta = RobertaModel.from_pretrained(
+    '/path/to/QNLI/model',
+    checkpoint_file='qnli_checkpoint.pt',
+    data_name_or_path='/path/to/QNLI/data/QNLI-bin'
+)
+
+label_fn = lambda label: roberta.task.label_dictionary.string(
+    [label + roberta.task.target_dictionary.nspecial]
+)
+ncorrect, nsamples = 0, 0
+roberta.cuda()
+roberta.eval()
+with open('/path/to/QNLI/data/dev.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[1], tokens[2], tokens[3]
+        tokens = roberta.encode(sent1, sent2)
+        prediction = roberta.predict('sentence_classification_head', tokens).argmax().item()
+        prediction_label = label_fn(prediction)
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+print('| Accuracy: ', float(ncorrect)/float(nsamples))
+# prints | Accuracy:  0.9480139117700896
+```
+
+
+## Example usage
+
+To train a model with LayerDrop, add the following flags. We recommend 0.2, a value that worked well in our experiments. For Language Models that are decoder-only, you need only the decoder flag. For RoBERTa, an encoder, you need only the encoder flag. The encoder and decoder LayerDrop values can be set differently.
+```
+--encoder-layerdrop 0.2 --decoder-layerdrop 0.2
+```
+
+To prune a model that has been trained with LayerDrop, add the following flags followed by a comma separated list of which layers you would like to keep.
+```
+--encoder-layers-to-keep 0,2,4,6,8,10,12,14 --decoder-layers-to-keep 0,2,4,6,8,10,12,14
+```
+Setting these flags should print a message such as:
+```
+| Pruning model to specified layer configuration
+```
+You should also see a smaller number of parameters in the model, for example the 16-Layer Transformer Language Model prints:
+```
+num. model params: 246933504
+```
+while a model pruned to 8 Layers prints:
+```
+num. model params: 146163712
+```
+
+If you would like to pick up training with a model that has been pruned, simply adding these flags is sufficient. If you would like to use a script that only does evaluation (no training), you may need to pass an override command. A specific example would be for language modeling:
+```bash
+fairseq-eval-lm /path/to/wikitext-103 \
+  --path /path/to/model/checkpoint.pt \
+  --model-overrides "{'decoder_layers_to_keep':'0,2,4,6,8,10,12,14'}"
+```
+This model override command overrides the training parameters and updates the model arguments so that the pruned model is run instead of the full model.
+
+## Reproduce Paper Results
+
+Looking to reproduce the results in the paper?
+
+1. For Translation on WMT16 en-de, we followed this setting [here](https://github.com/pytorch/fairseq/blob/main/examples/scaling_nmt/README.md)
+2. To train RoBERTa, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/roberta)
+3. To train Language Models on Wikitext-103, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/language_model)
+
+
+## Tips
+
+1. If you would like to train large models with better performance, LayerDrop should be set to a smaller value such as 0.1 or 0.2. Too much LayerDrop will mean the model has too much regularization, so may not reach the best performance. Since LayerDrop adds regularization, you may achieve the best performance by slightly reducing the amount of standard dropout (for example, reduce by 0.1).
+
+2. If you would like to train large models to be pruned and made smaller, LayerDrop should be set to a larger value such as 0.5 if you want to prune very aggressively (such as removing half the network or more). If you would like to prune fewer layers away, LayerDrop can be set to a smaller value such as 0.2. Our experiments were conducted with low values of LayerDrop (such as 0.1 and 0.2), for reference.
+
+3. When pruning layers at inference time, it is best to spread out the layers remaining so they are evenly spaced throughout the network. For example, if you want to remove 50% of the network, keeping every other layer is good.
+
+
+## FAQ
+
+1. How did the sharing layers experiment work? In an appendix (https://openreview.net/pdf?id=SylO2yStDr) we added an experiment on Wikitext-103 language modeling that combined LayerDrop with Weight Sharing. We shared chunks of 2 layers such that every other layer had shared weights. For example, if our network has layers 1 through 6, then layer 1 and 2 are shared, layer 3 and 4 are shared, and layer 5 and 6 are shared.
+
+2. LayerDrop hasn't been helping in my setting? During training time, LayerDrop can help regularize your network. This is most important if your network is already overfitting - if your network is underfitting, it is possible LayerDrop is adding too much regularization. We recommend using smaller values (such as 0.1 or 0.2) and also decreasing the quantity of standard dropout (for example, reduce by 0.1).
+
+3. Can you train a model without LayerDrop and finetune with LayerDrop (e.g. for BERT)? In our experiments, we did not see great performance. Models such as RoBERTa have trained for a long time in the pre-training setting, so only finetuning with LayerDrop for a few epochs on a downstream task such as MNLI does not achieve the robustness required for successful pruning.
+
+
+## Having an issue or have a question?
+
+Please open an issue in this repository with the details of your question. Thanks!
diff --git a/fairseq/examples/linformer/README.md b/fairseq/examples/linformer/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f8b36bc691cb8f5bf82942e07b6d9c014387bdd8
--- /dev/null
+++ b/fairseq/examples/linformer/README.md
@@ -0,0 +1,22 @@
+# Linformer: Self-Attention with Linear Complexity (Wang et al., 2020)
+
+This example contains code to train Linformer models as described in our paper
+[Linformer: Self-Attention with Linear Complexity](https://arxiv.org/abs/2006.04768).
+
+## Training a new Linformer RoBERTa model
+
+You can mostly follow the [RoBERTa pretraining README](/examples/roberta/README.pretraining.md),
+updating your training command with `--user-dir examples/linformer/linformer_src --arch linformer_roberta_base`.
+
+## Citation
+
+If you use our work, please cite:
+
+```bibtex
+@article{wang2020linformer,
+  title={Linformer: Self-Attention with Linear Complexity},
+  author={Wang, Sinong and Li, Belinda and Khabsa, Madian and Fang, Han and Ma, Hao},
+  journal={arXiv preprint arXiv:2006.04768},
+  year={2020}
+}
+```
diff --git a/fairseq/examples/linformer/linformer_src/__init__.py b/fairseq/examples/linformer/linformer_src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c52f135ea6f99d0effe8ce1f7d77cbd66be3745
--- /dev/null
+++ b/fairseq/examples/linformer/linformer_src/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .models import linformer_roberta  # noqa
diff --git a/fairseq/examples/linformer/linformer_src/models/__init__.py b/fairseq/examples/linformer/linformer_src/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/linformer/linformer_src/models/linformer_roberta.py b/fairseq/examples/linformer/linformer_src/models/linformer_roberta.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7bdbb11057d0ba791c2f8c7fb1e77507c90172e
--- /dev/null
+++ b/fairseq/examples/linformer/linformer_src/models/linformer_roberta.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Linformer: Self-Attention with Linear Complexity
+"""
+
+import logging
+
+import torch
+from fairseq import utils
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.roberta import (
+    init_bert_params,
+    roberta_base_architecture,
+    roberta_large_architecture,
+    RobertaEncoder,
+    RobertaModel,
+)
+from fairseq.utils import safe_hasattr
+
+from ..modules.linformer_sentence_encoder import LinformerTransformerEncoder
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("linformer_roberta")
+class LinformerModel(RobertaModel):
+    @staticmethod
+    def add_args(parser):
+        RobertaModel.add_args(parser)
+
+        # add args for Linformer
+        parser.add_argument(
+            "--compressed", type=int, help="compressed ratio of sequence length"
+        )
+        parser.add_argument(
+            "--shared-kv-compressed",
+            type=int,
+            help="share compressed matrix between k and v, in each layer",
+        )
+        parser.add_argument(
+            "--shared-layer-kv-compressed",
+            type=int,
+            help="share compressed matrix between k and v and across all layers",
+        )
+        parser.add_argument(
+            "--freeze-compress",
+            type=int,
+            help="freeze the parameters in compressed layer",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present
+        base_architecture(args)
+
+        if not safe_hasattr(args, "max_positions"):
+            args.max_positions = args.tokens_per_sample
+
+        encoder = LinformerEncoder(args, task.source_dictionary)
+        return cls(args, encoder)
+
+
+class LinformerEncoder(RobertaEncoder):
+    """Linformer encoder."""
+
+    def __init__(self, args, dictionary):
+        super().__init__(args, dictionary)
+        self.register_buffer("version", torch.tensor(2))
+
+    def build_encoder(self, args, dictionary, embed_tokens):
+        encoder = LinformerTransformerEncoder(args, dictionary, embed_tokens)
+        encoder.apply(init_bert_params)
+        return encoder
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+        prefix = name + "." if name != "" else ""
+
+        # some old checkpoints had weight sharing implemented incorrectly
+        # (note: this was correct in the original paper code)
+        if utils.item(state_dict.get(f"{prefix}version", torch.tensor(1))) < 2:
+            state_dict[f"{prefix}version"] = torch.tensor(1)
+            # check if input embeddings and output embeddings were tied
+            if not torch.allclose(
+                state_dict[f"{prefix}sentence_encoder.embed_tokens.weight"],
+                state_dict[f"{prefix}lm_head.weight"],
+            ):
+                # they weren't tied, re-init the LM head without weight sharing
+                self.lm_head = self.build_lm_head(
+                    embed_dim=self.args.encoder_embed_dim,
+                    output_dim=len(self.dictionary),
+                    activation_fn=self.args.activation_fn,
+                    weight=None,  # don't share weights
+                )
+
+
+@register_model_architecture("linformer_roberta", "linformer_roberta")
+def base_architecture(args):
+    args.compressed = getattr(args, "compressed", 4)
+    args.shared_kv_compressed = getattr(args, "shared_kv_compressed", 0)
+    args.shared_layer_kv_compressed = getattr(args, "shared_layer_kv_compressed", 0)
+    args.freeze_compress = getattr(args, "freeze_compress", 0)
+    roberta_base_architecture(args)
+
+
+@register_model_architecture("linformer_roberta", "linformer_roberta_base")
+def linformer_roberta_base_architecture(args):
+    base_architecture(args)
+
+
+@register_model_architecture("linformer_roberta", "linformer_roberta_large")
+def linformer_roberta_large_architecture(args):
+    roberta_large_architecture(args)
+    base_architecture(args)
diff --git a/fairseq/examples/linformer/linformer_src/modules/__init__.py b/fairseq/examples/linformer/linformer_src/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..44f7989bd863329f763aa62b78df2eb42b3084ea
--- /dev/null
+++ b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch.nn as nn
+from fairseq.models.transformer import TransformerEncoder
+
+from .linformer_sentence_encoder_layer import LinformerTransformerEncoderLayer
+
+
+class LinformerTransformerEncoder(TransformerEncoder):
+    """
+    Implementation for a Bi-directional Linformer based Sentence Encoder used
+    in BERT/XLM style pre-trained models.
+
+    This first computes the token embedding using the token embedding matrix,
+    position embeddings (if specified) and segment embeddings
+    (if specified). After applying the specified number of
+    LinformerEncoderLayers, it outputs all the internal states of the
+    encoder as well as the final representation associated with the first
+    token (usually CLS token).
+
+    Input:
+        - tokens: B x T matrix representing sentences
+        - segment_labels: B x T matrix representing segment label for tokens
+
+    Output:
+        - a tuple of the following:
+            - a list of internal model states used to compute the
+              predictions where each tensor has shape T x B x C
+            - sentence representation associated with first input token
+              in format B x C.
+    """
+
+    def __init__(self, args, dictionary, embed_tokens):
+        self.compress_layer = None
+        super().__init__(args, dictionary, embed_tokens)
+
+    def build_encoder_layer(self, args):
+        if self.args.shared_layer_kv_compressed == 1 and self.compress_layer is None:
+            compress_layer = nn.Linear(
+                self.args.max_positions,
+                self.args.max_positions // self.args.compressed,
+            )
+            # intialize parameters for compressed layer
+            nn.init.xavier_uniform_(compress_layer.weight, gain=1 / math.sqrt(2))
+            if self.args.freeze_compress == 1:
+                compress_layer.weight.requires_grad = False
+            self.compress_layer = compress_layer
+
+        return LinformerTransformerEncoderLayer(args, self.compress_layer)
diff --git a/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e2caa03400129ac0bb34ae35274cdf46f27a055
--- /dev/null
+++ b/fairseq/examples/linformer/linformer_src/modules/linformer_sentence_encoder_layer.py
@@ -0,0 +1,65 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq import utils
+from fairseq.modules import TransformerEncoderLayer
+
+from .multihead_linear_attention import MultiheadLinearAttention
+
+
+class LinformerTransformerEncoderLayer(TransformerEncoderLayer):
+    """
+    Implements a Linformer Encoder Layer used in BERT/XLM style pre-trained
+    models.
+    """
+
+    def __init__(self, args, shared_compress_layer):
+        # wrap in a list so it's not automatically registered by PyTorch
+        self.shared_compress_layer = [shared_compress_layer]
+
+        super().__init__(args)
+
+        self.register_buffer("version", torch.tensor(2))
+
+    def build_self_attention(self, embed_dim, args):
+        return MultiheadLinearAttention(
+            embed_dim,
+            args.encoder_attention_heads,
+            dropout=args.dropout,
+            self_attention=True,
+            q_noise=args.quant_noise_pq,
+            qn_block_size=args.quant_noise_pq_block_size,
+            compressed=args.compressed,
+            max_seq_len=args.max_positions,
+            shared_kv_compressed=args.shared_kv_compressed,
+            shared_compress_layer=self.shared_compress_layer[0],
+            freeze_compress=args.freeze_compress,
+        )
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+        prefix = name + "." if name != "" else ""
+
+        # some old checkpoints had weight sharing implemented incorrectly
+        # (note: this was correct in the original paper code)
+        if utils.item(state_dict.get(f"{prefix}version", torch.tensor(1))) < 2:
+            state_dict[f"{prefix}version"] = torch.tensor(1)
+            # check compression layer sharing
+            if f"{prefix}shared_compress_layer.weight" in state_dict:
+                # reinitialize block without sharing compression layer to match
+                # old behavior
+                self.shared_compress_layer = [
+                    torch.nn.Linear(
+                        self.shared_compress_layer[0].weight.size(1),
+                        self.shared_compress_layer[0].weight.size(0),
+                    )
+                ]
+                self.self_attn = self.build_self_attention(self.embed_dim, self.args)
+                # delete shared_compress_layer, since it's already copied to
+                # self_attn.compress_k.weight
+                del state_dict[f"{prefix}shared_compress_layer.weight"]
+                if f"{prefix}shared_compress_layer.bias" in state_dict:
+                    del state_dict[f"{prefix}shared_compress_layer.bias"]
diff --git a/fairseq/examples/linformer/linformer_src/modules/multihead_linear_attention.py b/fairseq/examples/linformer/linformer_src/modules/multihead_linear_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..6be1007279217c5de644e8b054f5d14a19f06c55
--- /dev/null
+++ b/fairseq/examples/linformer/linformer_src/modules/multihead_linear_attention.py
@@ -0,0 +1,481 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Dict, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.quant_noise import quant_noise
+from torch import Tensor, nn
+from torch.nn import Parameter
+
+
+@with_incremental_state
+class MultiheadLinearAttention(nn.Module):
+    """Multi-headed linformer attention.
+
+    Projects the key and values down to the compressed dimension, before computing self-attention.
+
+    See "Linformer: Self-Attention with Linear Complexity" for more details.
+    """
+
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        kdim=None,
+        vdim=None,
+        dropout=0.0,
+        bias=True,
+        add_bias_kv=False,
+        add_zero_attn=False,
+        self_attention=False,
+        encoder_decoder_attention=False,
+        q_noise=0.0,
+        qn_block_size=8,
+        compressed=1,
+        max_seq_len=256,
+        shared_kv_compressed=0,
+        shared_compress_layer=None,
+        freeze_compress=0,
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert not self.self_attention or self.qkv_same_dim, (
+            "Self-attention requires query, key and " "value to be of the same size"
+        )
+
+        self.k_proj = quant_noise(
+            nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.v_proj = quant_noise(
+            nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.q_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        # used for compress sequence to subsequence
+        if shared_compress_layer is None:
+            self.compress_seq_len = max_seq_len // compressed
+            self.compress_k = nn.Linear(max_seq_len, self.compress_seq_len, bias=False)
+            if shared_kv_compressed == 0:
+                self.compress_v = nn.Linear(
+                    max_seq_len, self.compress_seq_len, bias=False
+                )
+            self.layerwise_sharing = False
+        else:
+            self.compress_k = shared_compress_layer
+            if shared_kv_compressed == 0:
+                self.compress_v = shared_compress_layer
+            self.layerwise_sharing = True
+        self.shared_kv_compressed = shared_kv_compressed
+
+        self.out_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self.reset_parameters()
+
+        if freeze_compress == 1:
+            self.compress_k.weight.requires_grad = False
+            if shared_kv_compressed == 0:
+                self.compress_v.weight.requires_grad = False
+
+        self.onnx_trace = False
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            # Empirically observed the convergence to be much better with
+            # the scaled initialization
+            nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
+            if (
+                not self.layerwise_sharing
+            ):  # otherwise, we already initialize the parameters
+                nn.init.xavier_uniform_(self.compress_k.weight, gain=1 / math.sqrt(2))
+                if self.shared_kv_compressed == 0:
+                    nn.init.xavier_uniform_(
+                        self.compress_v.weight, gain=1 / math.sqrt(2)
+                    )
+        else:
+            nn.init.xavier_uniform_(self.k_proj.weight)
+            nn.init.xavier_uniform_(self.v_proj.weight)
+            nn.init.xavier_uniform_(self.q_proj.weight)
+            if (
+                not self.layerwise_sharing
+            ):  # otherwise, we already initialize the parameters
+                nn.init.xavier_uniform_(self.compress_k.weight)
+                if self.shared_kv_compressed == 0:
+                    nn.init.xavier_uniform_(self.compress_v.weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.out_proj.bias is not None:
+            nn.init.constant_(self.out_proj.bias, 0.0)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+
+    def forward(
+        self,
+        query,
+        key: Optional[Tensor],
+        value: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        need_weights: bool = True,
+        static_kv: bool = False,
+        attn_mask: Optional[Tensor] = None,
+        before_softmax: bool = False,
+        need_head_weights: bool = False,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if saved_state is not None and "prev_key" in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            q = self.q_proj(query)
+
+            k_input = query.permute(1, 2, 0).contiguous()  # B * C * T
+            k_input = (
+                F.linear(k_input, self.compress_k.weight[:, 0:tgt_len])
+                .permute(2, 0, 1)
+                .contiguous()
+            )
+            k = self.k_proj(k_input)
+
+            v_input = query.permute(1, 2, 0).contiguous()  # B * C * T
+            if self.shared_kv_compressed == 0:
+                v_input = (
+                    F.linear(v_input, self.compress_v.weight[:, 0:tgt_len])
+                    .permute(2, 0, 1)
+                    .contiguous()
+                )
+            if self.shared_kv_compressed == 1:  # use shared kv compressed linear layer
+                v_input = (
+                    F.linear(v_input, self.compress_k.weight[:, 0:tgt_len])
+                    .permute(2, 0, 1)
+                    .contiguous()
+                )
+            v = self.v_proj(v_input)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.q_proj(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.k_proj(key)
+                v = self.v_proj(key)
+
+        else:
+            assert key is not None and value is not None
+            q = self.q_proj(query)
+            k = self.k_proj(key)
+            v = self.v_proj(value)
+        q *= self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        key_padding_mask.new_zeros(key_padding_mask.size(0), 1),
+                    ],
+                    dim=1,
+                )
+
+        q = (
+            q.contiguous()
+            .view(tgt_len, bsz * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+        if k is not None:
+            k = (
+                k.contiguous()
+                .view(-1, bsz * self.num_heads, self.head_dim)
+                .transpose(0, 1)
+            )
+        if v is not None:
+            v = (
+                v.contiguous()
+                .view(-1, bsz * self.num_heads, self.head_dim)
+                .transpose(0, 1)
+            )
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if "prev_key" in saved_state:
+                _prev_key = saved_state["prev_key"]
+                assert _prev_key is not None
+                prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    assert k is not None
+                    k = torch.cat([prev_key, k], dim=1)
+            if "prev_value" in saved_state:
+                _prev_value = saved_state["prev_value"]
+                assert _prev_value is not None
+                prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    assert v is not None
+                    v = torch.cat([prev_value, v], dim=1)
+            prev_key_padding_mask: Optional[Tensor] = None
+            if "prev_key_padding_mask" in saved_state:
+                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
+            assert k is not None and v is not None
+            key_padding_mask = MultiheadLinearAttention._append_prev_key_padding_mask(
+                key_padding_mask=key_padding_mask,
+                prev_key_padding_mask=prev_key_padding_mask,
+                batch_size=bsz,
+                src_len=k.size(1),
+                static_kv=static_kv,
+            )
+
+            saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_key_padding_mask"] = key_padding_mask
+            # In this branch incremental_state is never None
+            assert incremental_state is not None
+            incremental_state = self._set_input_buffer(incremental_state, saved_state)
+        assert k is not None
+        src_len = k.size(1)
+
+        if self.add_zero_attn:
+            assert v is not None
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = MultiheadLinearAttention.apply_sparse_mask(
+            attn_weights, tgt_len, src_len, bsz
+        )
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            if self.onnx_trace:
+                attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1)
+            attn_weights += attn_mask
+
+        if before_softmax:
+            return attn_weights, v
+
+        attn_weights_float = utils.softmax(
+            attn_weights, dim=-1, onnx_trace=self.onnx_trace
+        )
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = F.dropout(
+            attn_weights,
+            p=self.dropout,
+            training=self.training,
+        )
+        assert v is not None
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        if self.onnx_trace and attn.size(1) == 1:
+            # when ONNX tracing a single decoder step (sequence length == 1)
+            # the transpose is a no-op copy before view, thus unnecessary
+            attn = attn.contiguous().view(tgt_len, bsz, embed_dim)
+        else:
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn = self.out_proj(attn)
+        attn_weights: Optional[Tensor] = None
+        if need_weights:
+            attn_weights = attn_weights_float.view(
+                bsz, self.num_heads, tgt_len, src_len
+            ).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+
+        return attn, attn_weights
+
+    @staticmethod
+    def _append_prev_key_padding_mask(
+        key_padding_mask: Optional[Tensor],
+        prev_key_padding_mask: Optional[Tensor],
+        batch_size: int,
+        src_len: int,
+        static_kv: bool,
+    ) -> Optional[Tensor]:
+        # saved key padding masks have shape (bsz, seq_len)
+        if prev_key_padding_mask is not None and static_kv:
+            new_key_padding_mask = prev_key_padding_mask
+        elif prev_key_padding_mask is not None and key_padding_mask is not None:
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1
+            )
+        # During incremental decoding, as the padding token enters and
+        # leaves the frame, there will be a time when prev or current
+        # is None
+        elif prev_key_padding_mask is not None:
+            filler = torch.zeros(
+                (batch_size, src_len - prev_key_padding_mask.size(1)),
+                device=prev_key_padding_mask.device,
+            )
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), filler.float()], dim=1
+            )
+        elif key_padding_mask is not None:
+            filler = torch.zeros(
+                (batch_size, src_len - key_padding_mask.size(1)),
+                device=key_padding_mask.device,
+            )
+            new_key_padding_mask = torch.cat(
+                [filler.float(), key_padding_mask.float()], dim=1
+            )
+        else:
+            new_key_padding_mask = prev_key_padding_mask
+        return new_key_padding_mask
+
+    @torch.jit.export
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        new_order: Tensor,
+    ):
+        """Reorder buffered internal state (for incremental generation)."""
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            for k in input_buffer.keys():
+                input_buffer_k = input_buffer[k]
+                if input_buffer_k is not None:
+                    if self.encoder_decoder_attention and input_buffer_k.size(
+                        0
+                    ) == new_order.size(0):
+                        break
+                    input_buffer[k] = input_buffer_k.index_select(0, new_order)
+            incremental_state = self._set_input_buffer(incremental_state, input_buffer)
+        return incremental_state
+
+    def _get_input_buffer(
+        self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
+    ) -> Dict[str, Optional[Tensor]]:
+        result = self.get_incremental_state(incremental_state, "attn_state")
+        if result is not None:
+            return result
+        else:
+            empty_result: Dict[str, Optional[Tensor]] = {}
+            return empty_result
+
+    def _set_input_buffer(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        buffer: Dict[str, Optional[Tensor]],
+    ):
+        return self.set_incremental_state(incremental_state, "attn_state", buffer)
+
+    def apply_sparse_mask(attn_weights, tgt_len: int, src_len: int, bsz: int):
+        return attn_weights
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+        items_to_add = {}
+        keys_to_remove = []
+        for k in state_dict.keys():
+            if k.endswith(prefix + "in_proj_weight"):
+                # in_proj_weight used to be q + k + v with same dimensions
+                dim = int(state_dict[k].shape[0] / 3)
+                items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim]
+                items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim]
+                items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :]
+
+                keys_to_remove.append(k)
+
+                k_bias = prefix + "in_proj_bias"
+                if k_bias in state_dict.keys():
+                    dim = int(state_dict[k].shape[0] / 3)
+                    items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim]
+                    items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][
+                        dim : 2 * dim
+                    ]
+                    items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :]
+
+                    keys_to_remove.append(prefix + "in_proj_bias")
+
+        for k in keys_to_remove:
+            del state_dict[k]
+
+        for key, value in items_to_add.items():
+            state_dict[key] = value
diff --git a/fairseq/examples/m2m_100/README.md b/fairseq/examples/m2m_100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..02a68a5f0919a26a0468069bed46a5b1abc78941
--- /dev/null
+++ b/fairseq/examples/m2m_100/README.md
@@ -0,0 +1,241 @@
+# Beyond English-Centric Multilingual Machine Translation
+
+## Introduction
+In this work, we create a true Many-to-Many multilingual translation model that can translate directly between any pair of 100 languages. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly translating between non-English directions while performing competitively with the best single systems of WMT. 
+
+If you are new to using fairseq, read the following walkthrough. Otherwise, skip to the sections below. 
+
+0. **Generation Data**
+
+To download the generation data, follow the below commands. Note that all datasets need to be detokenized *before* applying SPM in the data preprocessing step. If you use these evaluation datasets, please cite their associated papers. 
+```bash
+# WMT - use sacrebleu, example here:
+sacrebleu -t wmt14 -l fr-en --echo src > wmt.test.fr-en.fr
+sacrebleu -t wmt14 -l fr-en --echo ref > wmt.test.fr-en.en
+
+# WAT
+wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/wat2020.my-en.zip
+unzip wat2020.my-en.zip
+
+# FLORES
+# download from: https://github.com/facebookresearch/flores
+
+# TED - need to detokenize with Moses!
+# from: https://github.com/neulab/word-embeddings-for-nmt
+wget http://phontron.com/data/ted_talks.tar.gz
+
+# Autshumato
+# request to download: https://repo.sadilar.org/handle/20.500.12185/397
+
+# Tatoeba Challenge
+# available here: https://github.com/Helsinki-NLP/Tatoeba-Challenge
+```
+
+1. **Training Data**
+
+To produce the training data, we use a combination of [CCMatrix](https://arxiv.org/abs/1911.04944) and [CCAligned](https://arxiv.org/abs/1911.06154). Check out the instructions [here](https://github.com/facebookresearch/LASER/tree/master/tasks/CCMatrix) to download the raw data.
+
+2. **Preprocess Data**
+
+After downloading raw data, you will need to postprocess the data, then apply SPM, then binarize. Note that it is very important you run the postprocessing script, because this removes any instance of the evaluation data in the mined training data.
+
+```bash
+# preprocess data
+
+# remove sentences with more than 50% punctuation
+python /path/to/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py 
+
+# deduplicate training data
+paste /path/to/datadir/train.$src /path/to/datadir/train.$tgt | awk '!x[$0]++' > /path/to/datadir/train.dedup
+echo "keeping $(wc -l /path/to/datadir/train.dedup) bitext out of $(wc -l /path/to/datadir/train.$src)"
+cut -f1 /path/to/datadir/train.dedup > /path/to/datadir/train.$src
+cut -f2 /path/to/datadir/train.dedup > /path/to/datadir/train.$tgt
+
+# remove all instances of evaluation data from the training data
+python /path/to/fairseq/examples/m2m_100/process_data/dedup_data.py 
+
+# frequency cleaning
+wget https://dl.fbaipublicfiles.com/m2m_100/histograms.tar.gz 
+tar -xvzf histograms.tar.gz
+python /path/to/fairseq/examples/m2m_100/process_data/clean_histogram.py --src $src --tgt $tgt --src-file /path/to/source/file --tgt-file /path/to/output/file --src-output-file source_output.$src --tgt-output-file target_output.$tgt --histograms /path/to/histograms
+
+# apply SPM
+wget https://dl.fbaipublicfiles.com/m2m_100/spm.128k.model
+python /path/to/fairseq/scripts/spm_encode.py \
+    --model spm.128k.model \
+    --output_format=piece \
+    --inputs=/path/to/input/file/here \
+    --outputs=/path/to/output/file/here
+
+# length ratio cleaning
+perl mosesdecoder/scripts/training/clean-corpus-n.perl --ratio 3 /path/to/training/data/train.spm.$src-$tgt $src $tgt /path/to/output/directory/train.spm.$src-$tgt 1 250
+
+# binarize data
+wget https://dl.fbaipublicfiles.com/m2m_100/data_dict.128k.txt
+fairseq-preprocess \
+    --source-lang $src --target-lang $tgt \
+    --testpref spm.$src.$tgt \
+    --thresholdsrc 0 --thresholdtgt 0 \
+    --destdir data_bin \
+    --srcdict data_dict.128k.txt --tgtdict data_dict.128k.txt
+```
+
+3. **Training Scripts**
+
+To reproduce the training of our models, we train with fairseq-py's multilingual translation [task](https://github.com/pytorch/fairseq/tree/main/examples/multilingual). If you are interested in model parallel training, also check out [fairscale](https://github.com/facebookresearch/fairscale).
+
+4. **Generation**
+
+To generate from our models, follow the the commands in the generation section below.
+
+
+If you use any of the resources listed here, please cite:
+```bibtex
+@article{fan2020beyond,
+  title={Beyond English-Centric Multilingual Machine Translation},
+  author={Fan, Angela and Bhosale, Shruti and Schwenk, Holger and Ma, Zhiyi and El-Kishky, Ahmed and Goyal, Siddharth and Baines, Mandeep and Celebi, Onur and Wenzek, Guillaume and Chaudhary, Vishrav and Goyal, Naman and Birch, Tom and Liptchinsky, Vitaliy and Edunov, Sergey and Grave, Edouard and Auli, Michael and Joulin, Armand},
+  journal={arXiv preprint},
+  year={2020}
+}
+
+@article{schwenk2019ccmatrix,
+  title={Ccmatrix: Mining billions of high-quality parallel sentences on the web},
+  author={Schwenk, Holger and Wenzek, Guillaume and Edunov, Sergey and Grave, Edouard and Joulin, Armand},
+  journal={arXiv preprint arXiv:1911.04944},
+  year={2019}
+}
+
+@article{el2019massive,
+  title={A Massive Collection of Cross-Lingual Web-Document Pairs},
+  author={El-Kishky, Ahmed and Chaudhary, Vishrav and Guzman, Francisco and Koehn, Philipp},
+  journal={arXiv preprint arXiv:1911.06154},
+  year={2019}
+}
+```
+
+
+## Trained Models
+
+### 418M and 1.2B Model
+We include the last checkpoint for both of these models. 
+
+```bash
+wget https://dl.fbaipublicfiles.com/m2m_100/model_dict.128k.txt
+wget https://dl.fbaipublicfiles.com/m2m_100/language_pairs_small_models.txt 
+
+# 418M parameter model
+wget https://dl.fbaipublicfiles.com/m2m_100/418M_last_checkpoint.pt 
+
+# 1.2B parameter model
+wget https://dl.fbaipublicfiles.com/m2m_100/1.2B_last_checkpoint.pt
+
+# Generation:
+fairseq-generate $binarized_data_path --batch-size 32 --path $path_to_model --fixed-dictionary model_dict.128k.txt -s en -t fr --remove-bpe 'sentencepiece' --beam 5 --task translation_multi_simple_epoch --lang-pairs language_pairs_small_models.txt --decoder-langtok --encoder-langtok src --gen-subset test > gen_out
+```
+
+### 12B Model
+12B parameter model trained on many-to-many training data for 100 languages. We include the last checkpoint, average of last 5 checkpoints, average of last 10 checkpoints. There isn't a universally best choice out of these three, but all three versions are pretty close in accuracy. You can either sweep over the 3 checkpoints on a dev test and use the best performing checkpoint for final testing. Or the last checkpoint can be a good default choice.
+
+**Model Download Links**
+Configuration | 2 32GB GPUs | 4 16GB GPUs | 6 12GB GPUs | 8 8GB GPUs
+:--|:--|:--|:--|:--
+Last Checkpoint | [12b_last_chk_2_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_2_gpus.pt) | [12b_last_chk_4_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_4_gpus.pt) | [12b_last_chk_6_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_6_gpus.pt) | [12b_last_chk_8_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_8_gpus.pt)
+Average of last 5 checkpoints | [12b_avg5_chk_2_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_2_gpus.pt) | [12b_avg5_chk_4_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_4_gpus.pt) | [12b_avg5_chk_6_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_6_gpus.pt) | [12b_avg5_chk_8_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg5_chk_8_gpus.pt)
+Average of last 10 checkpoints |  [12b_avg10_chk_2_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_2_gpus.pt) | [12b_avg10_chk_4_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_4_gpus.pt) | [12b_avg10_chk_6_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_6_gpus.pt) | [12b_avg10_chk_8_gpus.pt](https://dl.fbaipublicfiles.com/m2m_100/12b_avg10_chk_8_gpus.pt)
+
+**Generation Arguments**
+Configuration | 2 32GB GPUs | 4 16GB GPUs | 6 12GB GPUs | 8 8GB GPUs
+:--|:--|:--|:--|:--
+`--pipeline-encoder-balance` | `[26]` | `[1,15,10]` | `[1,9,9,7]` | `[1,6,6,6,7]`
+`--pipeline-encoder-devices` | `[0]` | `[0,1,0]` | `[0,1,2,0]` | `[0,4,5,1,0]`
+`--pipeline-decoder-balance` | `[3,22,1]` | `[3,11,11,1]` | `[3,7,7,8,1]` | `[1,6,6,6,6,1]`
+`--pipeline-decoder-devices` | `[0,1,0]` | `[0,2,3,0]` | `[0,3,4,5,0]` |  `[0,2,6,7,3,0]`
+
+
+## SentencePiece Model
+
+```bash
+wget https://dl.fbaipublicfiles.com/m2m_100/spm.128k.model
+```
+
+## Generation with M2M-100
+
+### Encode using our SentencePiece Model
+
+Note: Install SentencePiece from [here](https://github.com/google/sentencepiece)
+
+```bash
+fairseq=/path/to/fairseq
+cd $fairseq
+sacrebleu --echo src -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.de
+sacrebleu --echo ref -l de-fr -t wmt19 | head -n 20 > raw_input.de-fr.fr
+wget https://dl.fbaipublicfiles.com/m2m_100/spm.128k.model
+for lang in de fr ; do
+    python scripts/spm_encode.py \
+        --model spm.128k.model \
+        --output_format=piece \
+        --inputs=raw_input.de-fr.${lang} \
+        --outputs=spm.de-fr.${lang}
+done
+```
+
+### Binarization
+
+```bash
+wget https://dl.fbaipublicfiles.com/m2m_100/data_dict.128k.txt
+fairseq-preprocess \
+    --source-lang de --target-lang fr \
+    --testpref spm.de-fr \
+    --thresholdsrc 0 --thresholdtgt 0 \
+    --destdir data_bin \
+    --srcdict data_dict.128k.txt --tgtdict data_dict.128k.txt
+```
+
+### Generation for the 12B model
+
+Note that generation can currently be run using 2 32GB / 4 16GB / 6 12GB / 8 8GB GPUs, and the corresponding model checkpoints and pipeline arguments can be found in the [12B Model Section](#12b-model).
+Generation on CPUs will be added in the future.
+
+```bash
+wget https://dl.fbaipublicfiles.com/m2m_100/model_dict.128k.txt
+wget https://dl.fbaipublicfiles.com/m2m_100/language_pairs.txt
+wget https://dl.fbaipublicfiles.com/m2m_100/12b_last_chk_4_gpus.pt
+fairseq-generate \
+    data_bin \
+    --batch-size 1 \
+    --path 12b_last_chk_4_gpus.pt \
+    --fixed-dictionary model_dict.128k.txt \
+    -s de -t fr \
+    --remove-bpe 'sentencepiece' \
+    --beam 5 \
+    --task translation_multi_simple_epoch \
+    --lang-pairs language_pairs.txt \
+    --decoder-langtok --encoder-langtok src \
+    --gen-subset test \
+    --fp16 \
+    --dataset-impl mmap \
+    --distributed-world-size 1 --distributed-no-spawn \
+    --pipeline-model-parallel \
+    --pipeline-chunks 1 \
+    --pipeline-encoder-balance '[1,15,10]' \
+    --pipeline-encoder-devices '[0,1,0]' \
+    --pipeline-decoder-balance '[3,11,11,1]' \
+    --pipeline-decoder-devices '[0,2,3,0]' > gen_out
+```
+## Evaluation with M2M-100
+
+### Tokenization
+
+Note: Refer to tokenizers/README.md for more details on tokenization.
+
+```bash
+cd ${fairseq}/examples/m2m_100
+cat ${fairseq}/gen_out | grep -P "^H" | sort -V | cut -f 3- | sh tok.sh fr > hyp
+cat ${fairseq}/raw_input.de-fr.fr | sh tok.sh fr > ref
+```
+
+### BLEU
+
+```bash
+sacrebleu -tok 'none' ref < hyp
+```
diff --git a/fairseq/examples/m2m_100/install_dependecies.sh b/fairseq/examples/m2m_100/install_dependecies.sh
new file mode 100755
index 0000000000000000000000000000000000000000..82a1054745264a56fbec4a8eb593884f8a42bd08
--- /dev/null
+++ b/fairseq/examples/m2m_100/install_dependecies.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+CWD=`pwd`
+INSTALL_PATH=$CWD/tokenizers/thirdparty
+
+MOSES=$INSTALL_PATH/mosesdecoder
+if [ ! -d $MOSES ]; then
+    echo 'Cloning Moses github repository (for tokenization scripts)...'
+    git clone https://github.com/moses-smt/mosesdecoder.git $MOSES
+    cd $MOSES
+    # To deal with differences in handling ' vs "
+    git checkout 03578921cc1a03402
+    cd -
+fi
+
+WMT16_SCRIPTS=$INSTALL_PATH/wmt16-scripts
+if [ ! -d $WMT16_SCRIPTS ]; then
+    echo 'Cloning Romanian tokenization scripts'
+    git clone https://github.com/rsennrich/wmt16-scripts.git $WMT16_SCRIPTS
+fi
+
+KYTEA=$INSTALL_PATH/kytea
+if [ ! -f $KYTEA/bin/kytea ]; then
+    git clone https://github.com/neubig/kytea.git $KYTEA
+    cd $KYTEA
+    autoreconf -i
+    ./configure --prefix=`pwd`
+    make
+    make install
+    cd ..
+fi
+
+export MECAB=$INSTALL_PATH/mecab-0.996-ko-0.9.2
+if [ ! -f $MECAB/bin/mecab ]; then
+    cd $INSTALL_PATH
+    curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz
+    tar zxfv mecab-0.996-ko-0.9.2.tar.gz
+    cd mecab-0.996-ko-0.9.2/
+    ./configure --prefix=`pwd`
+    make
+    make install
+
+    cd ..
+    curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
+    tar zxfv mecab-ko-dic-2.1.1-20180720.tar.gz
+    cd mecab-ko-dic-2.1.1-20180720/
+    ./autogen.sh
+    ./configure --prefix=`pwd` --with-dicdir=$MECAB/lib/mecab/dic/mecab-ko-dic --with-mecab-config=$MECAB/bin/mecab-config
+    make
+    sh -c 'echo "dicdir=$MECAB/lib/mecab/dic/mecab-ko-dic" > $MECAB/etc/mecabrc'
+    make install
+    cd $CWD
+fi
+
+INDIC_RESOURCES_PATH=$INSTALL_PATH/indic_nlp_resources
+if [ ! -d $INDIC_RESOURCES_PATH ]; then
+    echo 'Cloning indic_nlp_resources'
+    git clone https://github.com/anoopkunchukuttan/indic_nlp_resources.git $INDIC_RESOURCES_PATH
+fi
+
+
+if [ ! -f $INSTALL_PATH/seg_my.py ]; then
+    cd $INSTALL_PATH
+    wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/wat2020.my-en.zip
+    unzip wat2020.my-en.zip
+    # switch to python3
+    cat wat2020.my-en/myseg.py  |sed 's/^sys.std/###sys.std/g' | sed 's/### sys/sys/g' | sed 's/unichr/chr/g' > seg_my.py
+    cd $CWD
+fi
+
+
+pip install pythainlp sacrebleu indic-nlp-library
+
diff --git a/fairseq/examples/m2m_100/process_data/clean_histogram.py b/fairseq/examples/m2m_100/process_data/clean_histogram.py
new file mode 100644
index 0000000000000000000000000000000000000000..e24e073dc0eb43c76e2ce717f52bb848c5b026b8
--- /dev/null
+++ b/fairseq/examples/m2m_100/process_data/clean_histogram.py
@@ -0,0 +1,52 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--src', type=str, help='Source language')
+parser.add_argument('--tgt', type=str, help='Target language')
+parser.add_argument('--src-file', type=str, help='Input source file')
+parser.add_argument('--tgt-file', type=str, help='Input target file')
+parser.add_argument('--src-output-file', type=str, help='Output source file')
+parser.add_argument('--tgt-output-file', type=str, help='Output target file')
+parser.add_argument('--threshold', type=float, default=0.5, help='Threshold')
+parser.add_argument('--threshold-character', type=str, default=']', help='Threshold character')
+parser.add_argument('--histograms', type=str, help='Path to histograms')
+
+args = parser.parse_args()
+
+
+def read_hist(f):
+    ch = []
+    for line in f:
+        c = line[0]
+        if c == args.threshold_character:
+            break
+        ch.append(c)
+    return ch
+
+
+with(open("{}/{}".format(args.histograms, args.src), 'r', encoding='utf8')) as f:
+    ch1 = read_hist(f)
+
+with(open("{}/{}".format(args.histograms, args.tgt), 'r', encoding='utf8')) as f:
+    ch2 = read_hist(f)
+
+print("Accepted characters for {}: {}".format(args.src, ch1))
+print("Accepted characters for {}: {}".format(args.tgt, ch2))
+
+with open(args.src_file, 'r', encoding='utf8') as fs1, open(args.tgt_file, 'r', encoding='utf8') as fs2, open(args.src_output_file, 'w', encoding='utf8') as fos1, open(args.tgt_output_file, 'w', encoding='utf8') as fos2:
+    ls1 = fs1.readline()
+    ls2 = fs2.readline()
+
+    while ls1 or ls2:
+        cnt1 = len([c for c in ls1.strip() if c in ch1])
+        cnt2 = len([c for c in ls2.strip() if c in ch2])
+
+        if cnt1 / len(ls1) > args.threshold and cnt2 / len(ls2) > args.threshold:
+            fos1.write(ls1)
+            fos2.write(ls2)
+        else:
+            print("{} {} {} \n{} {} {}".format(args.src, cnt1 / len(ls1), ls1.strip(), args.tgt, cnt2 / len(ls2), ls2.strip()))
+
+        ls1 = fs1.readline()
+        ls2 = fs2.readline()
+        
\ No newline at end of file
diff --git a/fairseq/examples/m2m_100/process_data/dedup_data.py b/fairseq/examples/m2m_100/process_data/dedup_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..58d9ed1cd17b3ba70772a6d9adab709785495fd9
--- /dev/null
+++ b/fairseq/examples/m2m_100/process_data/dedup_data.py
@@ -0,0 +1,91 @@
+import argparse
+from collections import namedtuple
+import os
+
+DATADIR = "/path/to/train_data"
+DEDUP_FROM_DIR = "/path/to/eval/data"
+OUTPUT_DIR = "/path/to/output/data"
+
+
+def main(args):
+    languages = set()
+    for language_directory in os.listdir(DATADIR):
+        if "_" in language_directory:
+            src, tgt = language_directory.split("_")
+            languages.add(LanguagePair(src=src, tgt=tgt))
+
+    data = existing_data()
+    train_languages = sorted(languages)
+    for language_pair in train_languages[args.start_index:args.start_index + args.size]:
+        print(language_pair)
+        dedup(language_pair, data)
+
+
+LanguagePair = namedtuple("LanguagePair", ["src", "tgt"])
+
+
+def existing_data():
+    data = set()
+    for file in os.listdir(DEDUP_FROM_DIR):
+        with open(os.path.join(DEDUP_FROM_DIR, file)) as f:
+            data |= set(f.readlines())
+    return data
+ 
+def dedup(language_pair, data, verbose=True, output=True):
+    train_filenames = LanguagePair(
+            src=f"{DATADIR}/{language_pair.src}_{language_pair.tgt}/train.{language_pair.src}",
+            tgt=f"{DATADIR}/{language_pair.src}_{language_pair.tgt}/train.{language_pair.tgt}",
+        )
+
+    output_filenames = LanguagePair(
+        src=f"{OUTPUT_DIR}/train.dedup.{language_pair.src}-{language_pair.tgt}.{language_pair.src}",
+        tgt=f"{OUTPUT_DIR}/train.dedup.{language_pair.src}-{language_pair.tgt}.{language_pair.tgt}"
+    )
+
+    # If output exists, skip this pair. It has already been done.
+    if (os.path.exists(output_filenames.src) and
+        os.path.exists(output_filenames.tgt)):
+        if verbose:
+            print(f"{language_pair.src}-{language_pair.tgt} already done.")
+        return
+
+    if verbose:
+        print(f"{language_pair.src}-{language_pair.tgt} ready, will check dups.")
+
+    # If there is no output, no need to actually do the loop.
+    if not output:
+        return
+
+    if os.path.exists(train_filenames.src) and os.path.exists(train_filenames.tgt):
+        with open(train_filenames.src) as f:
+            train_source = f.readlines()
+
+        with open(train_filenames.tgt) as f:
+            train_target = f.readlines()
+
+        # do dedup
+        new_train_source = []
+        new_train_target = []
+        for i, train_line in enumerate(train_source):
+            if train_line not in data and train_target[i] not in data:
+                new_train_source.append(train_line)
+                new_train_target.append(train_target[i])
+
+        assert len(train_source) == len(train_target)
+        assert len(new_train_source) == len(new_train_target)
+        assert len(new_train_source) <= len(train_source)
+
+        with open(output_filenames.src, "w") as o:
+            for line in new_train_source:
+                o.write(line)
+
+        with open(output_filenames.tgt, "w") as o:
+            for line in new_train_target:
+                o.write(line)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-s", "--start-index", required=True, type=int)
+    parser.add_argument("-n", "--size", required=True, type=int)
+    main(parser.parse_args())
diff --git a/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py b/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c280de2403daffab477ac88e2008a68b9e61ff0
--- /dev/null
+++ b/fairseq/examples/m2m_100/process_data/remove_too_much_punc.py
@@ -0,0 +1,36 @@
+import gzip
+import argparse
+from string import punctuation
+
+def len_no_punc(s, punc):
+    return len([ch for ch in s if ch in punc])
+
+def filter_overpunc(len_npunc, len_sen):
+    return len_npunc < 0.5*len_sen
+
+def main(args):
+    punc = punctuation + "—|–"
+    print('Processing file {}'.format(args.input))
+    with gzip.open(args.input, 'rt', encoding=args.encoding) as tsv:
+        with open(args.bitext + '.' + args.src_lang, 'wt', encoding=args.encoding) as fsrc:
+            with open(args.bitext + '.' + args.tgt_lang, 'wt', encoding=args.encoding) as ftgt:
+                line = tsv.readline()
+                fields = line.split('\t')
+
+                src, tgt = fields[1], fields[2]
+
+                nchar_npunc_src = len_no_punc(src, punc)
+                nchar_npunc_tgt = len_no_punc(tgt, punc)
+
+                if filter_overpunc(nchar_npunc_src, len(src)) and filter_overpunc(nchar_npunc_tgt, len(tgt)):
+                    fsrc.write(src.strip() + '\n')
+                    ftgt.write(tgt.strip() + '\n')
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", required=True, type=str)
+    parser.add_argument('--encoding', default='utf-8', help='character encoding for input/output')
+    parser.add_argument('--bitext', type=str, required=True, help='language direction')
+    parser.add_argument('--src-lang', type=str, required=True, help='Source language')
+    parser.add_argument('--tgt-lang', type=str, required=True, help='Target language')
+    main(parser.parse_args())
diff --git a/fairseq/examples/m2m_100/tok.sh b/fairseq/examples/m2m_100/tok.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ba2ec5a2f3f4794d2e528d3a6574bf05abe1d043
--- /dev/null
+++ b/fairseq/examples/m2m_100/tok.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# Copyright (c) 2019-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+set -e
+
+TOKENIZERS_SCRIPTS=tokenizers
+INSTALL_PATH=$TOKENIZERS_SCRIPTS/thirdparty
+
+N_THREADS=8
+
+lg=$1
+
+MOSES=$INSTALL_PATH/mosesdecoder
+REPLACE_UNICODE_PUNCT=$MOSES/scripts/tokenizer/replace-unicode-punctuation.perl
+NORM_PUNC=$MOSES/scripts/tokenizer/normalize-punctuation.perl
+REM_NON_PRINT_CHAR=$MOSES/scripts/tokenizer/remove-non-printing-char.perl
+TOKENIZER=$MOSES/scripts/tokenizer/tokenizer.perl
+
+# special tokenization for Romanian
+WMT16_SCRIPTS=$INSTALL_PATH/wmt16-scripts
+
+NORMALIZE_ROMANIAN=$WMT16_SCRIPTS/preprocess/normalise-romanian.py
+REMOVE_DIACRITICS=$WMT16_SCRIPTS/preprocess/remove-diacritics.py
+
+# Burmese
+MY_SEGMENT=$INSTALL_PATH/seg_my.py
+
+# Arabic
+AR_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenizer_ar.sh
+
+# Korean
+KO_SEGMENT=$TOKENIZERS_SCRIPTS/seg_ko.sh
+
+# Japanese
+JA_SEGMENT=$TOKENIZERS_SCRIPTS/seg_ja.sh
+
+# Indic
+IN_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenize_indic.py
+INDIC_RESOURCES_PATH=$INSTALL_PATH/indic_nlp_resources
+
+# Thai
+THAI_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenize_thai.py
+
+# Chinese
+CHINESE_TOKENIZER=$TOKENIZERS_SCRIPTS/tokenize_zh.py
+
+# Chinese
+if [ "$lg" = "zh" ]; then
+  cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | python $CHINESE_TOKENIZER
+# Thai
+elif [ "$lg" = "th" ]; then
+  cat - | python $THAI_TOKENIZER
+# Japanese
+elif [ "$lg" = "ja" ]; then
+  cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | ${JA_SEGMENT}
+# Korean
+elif [ "$lg" = "ko" ]; then
+  cat - | $REM_NON_PRINT_CHAR | ${KO_SEGMENT}
+# Romanian
+elif [ "$lg" = "ro" ]; then
+  cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | $NORMALIZE_ROMANIAN | $REMOVE_DIACRITICS | $TOKENIZER -no-escape -threads $N_THREADS -l $lg
+# Burmese
+elif [ "$lg" = "my" ]; then
+  cat - | python ${MY_SEGMENT}
+# Arabic
+elif [ "$lg" = "ar" ]; then
+  cat - | ${AR_TOKENIZER}
+# Indic
+elif [ "$lg" = "ne" ]; then
+  cat - | python ${IN_TOKENIZER} $lg
+elif [ "$lg" = "si" ]; then
+  cat - | python ${IN_TOKENIZER} $lg
+elif [ "$lg" = "hi" ]; then
+  cat - | python ${IN_TOKENIZER} $lg
+# other languages
+else
+  cat - | $REPLACE_UNICODE_PUNCT | $NORM_PUNC -l $lg | $REM_NON_PRINT_CHAR | $TOKENIZER -no-escape -threads $N_THREADS -l $lg
+fi
diff --git a/fairseq/examples/m2m_100/tokenizers/README.md b/fairseq/examples/m2m_100/tokenizers/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e116932bc80572f221cff6472a7b1eea7032925d
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/README.md
@@ -0,0 +1,18 @@
+# M2M-100 Tokenization
+
+We apply different tokenization strategies for different languages following the existing literature. Here we provide tok.sh a tokenizer that can be used to reproduce our results.
+
+To reproduce the results, follow these steps:
+
+```
+tgt_lang=...
+reference_translation=...
+cat generation_output | grep -P "^H" | sort -V | cut -f 3- | sh tok.sh $tgt_lang > hyp
+cat $reference_translation |sh tok.sh $tgt_lang > ref
+sacrebleu -tok 'none' ref < hyp
+```
+
+## Installation
+
+Tools needed for all the languages except Arabic can be installed by running install_dependencies.sh
+If you want to evaluate Arabic models, please follow the instructions provided here: http://alt.qcri.org/tools/arabic-normalizer/ to install 
diff --git a/fairseq/examples/m2m_100/tokenizers/seg_ja.sh b/fairseq/examples/m2m_100/tokenizers/seg_ja.sh
new file mode 100755
index 0000000000000000000000000000000000000000..be6f5ca5fe4ac8e8c786a439caaed1d1314f1aef
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/seg_ja.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+SCRIPT=`realpath $0`
+KYTEA=`dirname $SCRIPT`/thirdparty/kytea
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$KYTEA/lib:/usr/local/lib
+export PATH=$PATH:"$KYTEA/bin"
+
+cat - | tr -d "[:blank:]" | kytea -notags
diff --git a/fairseq/examples/m2m_100/tokenizers/seg_ko.sh b/fairseq/examples/m2m_100/tokenizers/seg_ko.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c523d92634d9b61b97bbcdbfd17dfc33465bfc09
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/seg_ko.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+SCRIPT=`realpath $0`
+MECAB=`dirname $SCRIPT`/thirdparty/mecab-0.996-ko-0.9.2
+
+export PATH=$PATH:"$MECAB/bin":"$MECAB/lib"
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:"$MECAB/lib"
+
+cat - | mecab -O wakati
diff --git a/fairseq/examples/m2m_100/tokenizers/thirdparty/.gitignore b/fairseq/examples/m2m_100/tokenizers/thirdparty/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..19eb6a9dd705ac583f22ecb60d9b744987e27ff1
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/thirdparty/.gitignore
@@ -0,0 +1,12 @@
+seg_my.py
+indic_nlp_library/
+indic_nlp_resources/
+kytea/
+mecab-0.996-ko-0.9.2.tar.gz
+mecab-0.996-ko-0.9.2/
+mosesdecoder/
+wat2020.my-en.zip
+wat2020.my-en/
+wmt16-scripts/
+mecab-ko-dic-2.1.1-20180720/
+mecab-ko-dic-2.1.1-20180720.tar.gz
\ No newline at end of file
diff --git a/fairseq/examples/m2m_100/tokenizers/tokenize_indic.py b/fairseq/examples/m2m_100/tokenizers/tokenize_indic.py
new file mode 100644
index 0000000000000000000000000000000000000000..a44fad07f7c718f99cccd445f33c62b0e3c562f4
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/tokenize_indic.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Use: echo {text} | python tokenize_indic.py {language}
+
+import sys
+
+from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
+from indicnlp.tokenize.indic_tokenize import trivial_tokenize
+
+
+factory = IndicNormalizerFactory()
+normalizer = factory.get_normalizer(
+    sys.argv[1], remove_nuktas=False, nasals_mode="do_nothing"
+)
+
+for line in sys.stdin:
+    normalized_line = normalizer.normalize(line.strip())
+    tokenized_line = " ".join(trivial_tokenize(normalized_line, sys.argv[1]))
+    print(tokenized_line)
diff --git a/fairseq/examples/m2m_100/tokenizers/tokenize_thai.py b/fairseq/examples/m2m_100/tokenizers/tokenize_thai.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c72cb89056f6fc92a8963415e5f3a1e61b33a5b
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/tokenize_thai.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+from pythainlp import word_tokenize
+
+
+for line in sys.stdin:
+    print(" ".join(word_tokenize(line.strip())))
diff --git a/fairseq/examples/m2m_100/tokenizers/tokenize_zh.py b/fairseq/examples/m2m_100/tokenizers/tokenize_zh.py
new file mode 100644
index 0000000000000000000000000000000000000000..674b5849cba829cf4f07a69369e9cc6eed376d4c
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/tokenize_zh.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import fileinput
+
+import sacrebleu
+
+
+for line in fileinput.input():
+    print(sacrebleu.tokenize_zh(line))
diff --git a/fairseq/examples/m2m_100/tokenizers/tokenizer_ar.sh b/fairseq/examples/m2m_100/tokenizers/tokenizer_ar.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ad35d7adf28dc9b23d13a6a3fec0b12cb760e855
--- /dev/null
+++ b/fairseq/examples/m2m_100/tokenizers/tokenizer_ar.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env sh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Please follow the instructions here http://alt.qcri.org/tools/arabic-normalizer/
+# to install tools needed for Arabic
+
+echo "Please install Arabic tools: http://alt.qcri.org/tools/arabic-normalizer/"
+echo "Then update environment variables in tokenizer_ar.sh"
+exit 1
+
+SVMTOOL=...
+GOMOSESGO=...
+QCRI_ARABIC_NORMALIZER=...
+
+export PERL5LIB="$SVMTOOL/lib":"$GOMOSESGO/bin/MADA-3.2":$PERL5LIB
+
+
+tempfile=$(mktemp)
+cat - > $tempfile
+
+cd $QCRI_ARABIC_NORMALIZER
+
+bash qcri_normalizer_mada3.2_aramorph1.2.1.sh $tempfile
+cat $tempfile.mada_norm-aramorph.europarl_tok
diff --git a/fairseq/examples/mbart/README.md b/fairseq/examples/mbart/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a45e37243c2c5d4027f79cf71498ca58bbac7d98
--- /dev/null
+++ b/fairseq/examples/mbart/README.md
@@ -0,0 +1,123 @@
+# MBART: Multilingual Denoising Pre-training for Neural Machine Translation
+[https://arxiv.org/abs/2001.08210]
+
+## Introduction
+
+MBART is a sequence-to-sequence denoising auto-encoder pre-trained on large-scale monolingual corpora in many languages using the BART objective. mBART is one of the first methods for pre-training a complete sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only on the encoder, decoder, or reconstructing parts of the text.
+
+## Pre-trained models
+
+Model | Description | # params | Download
+---|---|---|---
+`mbart.CC25` | mBART model with 12 encoder and decoder layers trained on 25 languages' monolingual corpus | 610M | [mbart.CC25.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.v2.tar.gz)
+`mbart.ft.ro_en` | finetune mBART cc25 model on ro-en language pairs | 610M | [mbart.cc25.ft.enro.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.ft.enro.tar.gz)
+
+## Results
+
+**[WMT16 EN-RO](https://www.statmt.org/wmt16/translation-task.html)**
+
+_(test set, no additional data used)_
+
+Model | en-ro | ro-en
+---|---|---
+`Random` | 34.3 | 34.0
+`mbart.cc25` | 37.7 | 37.8
+`mbart.enro.bilingual` | 38.5 | 38.5 
+
+## BPE data
+# download model
+wget https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.v2.tar.gz
+tar -xzvf mbart.CC25.tar.gz
+# bpe data
+install SPM [here](https://github.com/google/sentencepiece)
+```bash
+SPM=/path/to/sentencepiece/build/src/spm_encode
+MODEL=sentence.bpe.model
+${SPM} --model=${MODEL} < ${DATA}/${TRAIN}.${SRC} > ${DATA}/${TRAIN}.spm.${SRC} &
+${SPM} --model=${MODEL} < ${DATA}/${TRAIN}.${TGT} > ${DATA}/${TRAIN}.spm.${TGT} &
+${SPM} --model=${MODEL} < ${DATA}/${VALID}.${SRC} > ${DATA}/${VALID}.spm.${SRC} &
+${SPM} --model=${MODEL} < ${DATA}/${VALID}.${TGT} > ${DATA}/${VALID}.spm.${TGT} &
+${SPM} --model=${MODEL} < ${DATA}/${TEST}.${SRC} > ${DATA}/${TEST}.spm.${SRC} &
+${SPM} --model=${MODEL} < ${DATA}/${TEST}.${TGT} > ${DATA}/${TEST}.spm.${TGT} &
+```
+
+## Preprocess data
+
+```bash
+DICT=dict.txt
+fairseq-preprocess \
+  --source-lang ${SRC} \
+  --target-lang ${TGT} \
+  --trainpref ${DATA}/${TRAIN}.spm \
+  --validpref ${DATA}/${VALID}.spm \
+  --testpref ${DATA}/${TEST}.spm \
+  --destdir ${DEST}/${NAME} \
+  --thresholdtgt 0 \
+  --thresholdsrc 0 \
+  --srcdict ${DICT} \
+  --tgtdict ${DICT} \
+  --workers 70
+```
+
+## Finetune on EN-RO
+Finetune on mbart CC25
+
+```bash
+PRETRAIN=mbart.cc25 # fix if you moved the downloaded checkpoint
+langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
+
+fairseq-train path_2_data \
+  --encoder-normalize-before --decoder-normalize-before \
+  --arch mbart_large --layernorm-embedding \
+  --task translation_from_pretrained_bart \
+  --source-lang en_XX --target-lang ro_RO \
+  --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
+  --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
+  --lr-scheduler polynomial_decay --lr 3e-05 --warmup-updates 2500 --total-num-update 40000 \
+  --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
+  --max-tokens 1024 --update-freq 2 \
+  --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
+  --seed 222 --log-format simple --log-interval 2 \
+  --restore-file $PRETRAIN \
+  --reset-optimizer --reset-meters --reset-dataloader --reset-lr-scheduler \
+  --langs $langs \
+  --ddp-backend legacy_ddp
+```
+## Generate on EN-RO
+Get sacrebleu on finetuned en-ro model
+
+get tokenizer  [here](https://github.com/rsennrich/wmt16-scripts)
+```bash  
+wget https://dl.fbaipublicfiles.com/fairseq/models/mbart/mbart.cc25.ft.enro.tar.gz  
+tar -xzvf mbart.cc25.ft.enro.tar.gz
+```
+
+```bash
+model_dir=MBART_finetuned_enro # fix if you moved the checkpoint
+
+fairseq-generate path_2_data \
+  --path $model_dir/model.pt \
+  --task translation_from_pretrained_bart \
+  --gen-subset test \
+  -t ro_RO -s en_XX \
+  --bpe 'sentencepiece' --sentencepiece-model $model_dir/sentence.bpe.model \
+  --sacrebleu --remove-bpe 'sentencepiece' \
+  --batch-size 32 --langs $langs > en_ro
+
+cat en_ro | grep -P "^H" |sort -V |cut -f 3- | sed 's/\[ro_RO\]//g' |$TOKENIZER ro > en_ro.hyp
+cat en_ro | grep -P "^T" |sort -V |cut -f 2- | sed 's/\[ro_RO\]//g' |$TOKENIZER ro > en_ro.ref
+sacrebleu -tok 'none' -s 'none' en_ro.ref < en_ro.hyp
+```
+
+## Citation
+
+```bibtex
+@article{liu2020multilingual,
+    title={Multilingual Denoising Pre-training for Neural Machine Translation},
+    author={Yinhan Liu and Jiatao Gu and Naman Goyal and Xian Li and Sergey Edunov and Marjan Ghazvininejad and Mike Lewis and Luke Zettlemoyer},
+    year={2020},
+    eprint={2001.08210},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
diff --git a/fairseq/examples/megatron_11b/README.md b/fairseq/examples/megatron_11b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..945c96c91e2e2d93466abc28d90bc25a1e7dd471
--- /dev/null
+++ b/fairseq/examples/megatron_11b/README.md
@@ -0,0 +1,161 @@
+# Megatron-11b
+
+Megatron-11b is a unidirectional language model with `11B` parameters based on [Megatron-LM](https://arxiv.org/pdf/1909.08053.pdf). Following the original Megatron work, we trained the model using intra-layer model parallelism with each layer's parameters split across 8 GPUs.
+
+Megatron-11b is trained on the same data and uses the same byte-pair encoding (BPE) as [RoBERTa](https://arxiv.org/pdf/1907.11692.pdf).
+
+## Pre-trained models
+
+Model | Description | # params | # filesize | Download
+---|---|---|---|---
+`megatron_11b` | megatron_11b unidirectional language model | 11B | 19Gb | [megatron_11b.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/model_parallel/megatron_11b.tar.gz)
+
+#### Architecture:
+
+Param | Value
+---|---
+embed_dim | 3072
+ffn_dim | 3072 * 6
+layers | 72
+attention heads | 32
+
+#### Training details:
+
+Param | value
+---|---
+bsz | 512
+num_updates | 300,000
+peak_lr | 1.5e-04
+lr scheduler | inverse_sqrt
+clip norm | 0.0
+
+
+## Example training command (model parallel)
+
+Megatron-11b contains too many parameters to train on a single GPU. Following
+the original Megatron work, we adopt an intra-layer model parallel training
+approach in which each layer's parameters are split across multiple GPUs and
+activations and gradients are communicated during the forward/backward pass,
+respectively. We similarly split the loss computation using the
+`vocab_parallel_cross_entropy` criterion.
+
+The following training command illustrates how to do model parallel training in
+fairseq. We assume that each machine (node) has 8 GPUs among which to split the
+model parameters (`--model-parallel-size 8`). If you have access to multiple
+nodes, you may combine this with data parallel training by increasing
+`--distributed-world-size`.
+
+To train Megatron-11b on a single node:
+
+
+```bash
+fairseq-train <DATA_PATH> \
+  --distributed-world-size 8  \
+  --memory-efficient-fp16 \
+  --num-workers 2 \
+  --model-parallel-size 8 \
+  --criterion vocab_parallel_cross_entropy \
+  --task language_modeling \
+  --sample-break-mode none \
+  --tokens-per-sample 1024 \
+  --arch transformer_lm_megatron_11b \
+  --share-decoder-input-output-embed \
+  --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-08 --clip-norm 0.0 \
+  --lr-scheduler inverse_sqrt --lr 0.00015 \
+  --warmup-updates 3000 --weight-decay 0.01 \
+  --dropout 0.1 --attention-dropout 0.1 \
+  --batch-size 2 \
+  --max-update 300000;
+```
+
+Note: Above was tested on `DGX-1` box, with `8xV100-32Gb` GPUs.
+
+## Results
+
+**[Wikitext103](https://blog.einstein.ai/the-wikitext-long-term-dependency-language-modeling-dataset/)**
+
+Model | Valid perplexity | Test perplexity
+---|---|---
+`megatron_11b` | 10.64 | 10.54
+
+
+## Evaluating `megatron_11b` on Wikitext-103
+
+#### 1. Downloading Megatron-11b
+```bash
+# WARNING: this file is 19GB
+wget https://dl.fbaipublicfiles.com/fairseq/models/model_parallel/megatron_11b.tar.gz
+tar -xzvf megatron_11b.tar.gz
+```
+
+#### 2. Download Wikitext-103
+```bash
+wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip
+unzip wikitext-103-raw-v1.zip
+```
+
+#### 3. Detokenize test tokens
+Megatron-11b uses a byte-level BPE that expects raw (untokenized) input. Since
+the wikitext-103 dataset comes tokenized, we apply a simple detokenization
+process to restore the untokenized test set:
+
+```bash
+python -m examples.megatron_11b.detok wikitext-103-raw/wiki.test.raw > wikitext-103-raw/wiki.test.detok
+```
+
+#### 4. BPE encoding
+```bash
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
+
+python -m examples.roberta.multiprocessing_bpe_encoder \
+    --encoder-json encoder.json \
+    --vocab-bpe vocab.bpe \
+    --inputs "wikitext-103-raw/wiki.test.detok" \
+    --outputs "wikitext-103-raw/wiki.test.bpe" \
+    --workers 60;
+```
+
+#### 5. Fairseq binarize
+```bash
+fairseq-preprocess \
+    --only-source \
+    --testpref wikitext-103-raw/wiki.test.bpe \
+    --srcdict megatron_11b/dict.txt \
+    --destdir wikitext103-bin;
+```
+
+#### 6. Evaluating perplexity.
+We can now evaluate perplexity on the test set. Note that because we've modified
+the test set (via detokenization and BPE), the perplexity reported by
+`fairseq-eval-lm` needs to be renormalized.
+
+Compute unnormalized perplexity:
+
+```bash
+DATA_PATH=wikitext103-bin/
+fairseq-eval-lm \
+  $DATA_PATH \
+  --path megatron_11b/model.pt \
+  --task language_modeling \
+  --gen-subset test \
+  --batch-size 8 \
+  --criterion cross_entropy \
+  --context-window 992 \
+  --distributed-world-size 8 \
+  --model-parallel-size 8;
+# Expected PPL (unnormalized_ppl): [8.46]
+# Note: the eval command needs to run on 8 GPUs for the released model
+```
+Renormalizing formula:  `2 ^ ( log_2(unnormalized_PPL) * (270847 / 245566))`.
+PPL After normalization: `10.54`
+
+To renormalize the perplexity, we must account for the change in token count
+after detokenizing and appling BPE. The formula for this is:
+`2 ^ ( log_2(unnormalized_PPL) * (new_token_cnt / orig_token_cnt))`
+
+For the wikitext-103 test set, the original token count is `245566` and the
+token count after detokenization and applying BPE is `270847`.
+
+The perplexity after renormalization is:
+`2 ^ ( log_2(8.46) * (270847 / 245566)) = 10.54`
diff --git a/fairseq/examples/megatron_11b/detok.py b/fairseq/examples/megatron_11b/detok.py
new file mode 100644
index 0000000000000000000000000000000000000000..49921b28a1f35c6216b5ed85729453524e7a049d
--- /dev/null
+++ b/fairseq/examples/megatron_11b/detok.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import fileinput
+
+import sacremoses
+
+
+def main():
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("files", nargs="*", help="input files")
+    args = parser.parse_args()
+
+    detok = sacremoses.MosesDetokenizer()
+
+    for line in fileinput.input(args.files, openhook=fileinput.hook_compressed):
+        print(
+            detok.detokenize(line.strip().split(" "))
+            .replace(" @", "")
+            .replace("@ ", "")
+            .replace(" =", "=")
+            .replace("= ", "=")
+            .replace(" – ", "–")
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/multilingual/ML50_langs.txt b/fairseq/examples/multilingual/ML50_langs.txt
new file mode 100644
index 0000000000000000000000000000000000000000..558abbc785072629de8000e343fc02a32c0afb97
--- /dev/null
+++ b/fairseq/examples/multilingual/ML50_langs.txt
@@ -0,0 +1,52 @@
+ar_AR
+cs_CZ
+de_DE
+en_XX
+es_XX
+et_EE
+fi_FI
+fr_XX
+gu_IN
+hi_IN
+it_IT
+ja_XX
+kk_KZ
+ko_KR
+lt_LT
+lv_LV
+my_MM
+ne_NP
+nl_XX
+ro_RO
+ru_RU
+si_LK
+tr_TR
+vi_VN
+zh_CN
+af_ZA
+az_AZ
+bn_IN
+fa_IR
+he_IL
+hr_HR
+id_ID
+ka_GE
+km_KH
+mk_MK
+ml_IN
+mn_MN
+mr_IN
+pl_PL
+ps_AF
+pt_XX
+sv_SE
+sw_KE
+ta_IN
+te_IN
+th_TH
+tl_XX
+uk_UA
+ur_PK
+xh_ZA
+gl_ES
+sl_SI
\ No newline at end of file
diff --git a/fairseq/examples/multilingual/README.md b/fairseq/examples/multilingual/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..46ff9c351b1030e0729f89f246e0cd86444c1633
--- /dev/null
+++ b/fairseq/examples/multilingual/README.md
@@ -0,0 +1,158 @@
+# Multilingual Translation
+
+[[Multilingual Translation with Extensible Multilingual Pretraining and Finetuning, https://arxiv.org/abs/2008.00401]](https://arxiv.org/abs/2008.00401)
+
+## Introduction
+
+This work is for training multilingual translation models with multiple bitext datasets. This multilingual translation framework supports (see [[training section]](#Training) and [[finetuning section]](#Finetuning) for examples)
+
+* temperature based sampling over unbalancing datasets of different translation directions
+  - --sampling-method' with
+            choices=['uniform', 'temperature',  'concat']
+  - --sampling-temperature
+* configurable to automatically add source and/or target language tokens to source/target sentences using data which are prepared in the same way as bilignual training
+  - --encoder-langtok with choices=['src', 'tgt', None] to specify whether to add source or target language tokens to the source sentences
+  - --decoder-langtok (binary option) to specify whether to add target language tokens to the target sentences or not
+* finetuning mBART pretrained models for multilingual translation
+  - --finetune-from-model to specify the path from which to load the pretrained model
+
+## Preprocessing data
+Multilingual training requires a joint BPE vocab. Please follow [mBART's preprocessing steps](https://github.com/pytorch/fairseq/tree/main/examples/mbart#bpe-data) to reuse our pretrained sentence-piece model.
+
+You can also train a joint BPE model on your own dataset and then follow the steps in [[link]](https://github.com/pytorch/fairseq/tree/main/examples/translation#multilingual-translation).
+
+## Training
+
+
+```bash
+lang_pairs=<language pairs to be trained, e.g. "en-cs,cs-en">
+path_2_data=<set to data path>
+lang_list=<a file which contains a list of languages separated by new lines>
+
+fairseq-train $path_2_data \
+  --encoder-normalize-before --decoder-normalize-before \
+  --arch transformer --layernorm-embedding \
+  --task translation_multi_simple_epoch \
+  --sampling-method "temperature" \
+  --sampling-temperature 1.5 \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list" \
+  --lang-pairs "$lang_pairs" \
+  --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
+  --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
+  --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \
+  --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
+  --max-tokens 1024 --update-freq 2 \
+  --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
+  --seed 222 --log-format simple --log-interval 2
+```
+
+## Finetuning
+We can also finetune multilingual models from a monolingual pretrained models, e.g. [mMBART](https://github.com/pytorch/fairseq/tree/main/examples/mbart).
+```bash
+lang_pairs=<language pairs to be trained, e.g. "en-cs,cs-en">
+path_2_data=<set to data path>
+lang_list=<a file which contains a list of languages separated by new lines>
+pretrained_model=<path to the pretrained model, e.g. mbart or another trained multilingual model>
+
+fairseq-train $path_2_data \
+  --finetune-from-model $pretrained_model \
+  --encoder-normalize-before --decoder-normalize-before \
+  --arch transformer --layernorm-embedding \
+  --task translation_multi_simple_epoch \
+  --sampling-method "temperature" \
+  --sampling-temperature 1.5 \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list" \
+  --lang-pairs "$lang_pairs" \
+  --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
+  --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
+  --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \
+  --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
+  --max-tokens 1024 --update-freq 2 \
+  --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
+  --seed 222 --log-format simple --log-interval 2
+```
+## Generate
+The following command uses the multilingual task (translation_multi_simple_epoch) to generate translation  from $source_lang to $target_lang on the test dataset. During generaton, the source language tokens are added to source sentences and the target language tokens are added as the starting token to decode target sentences. Options --lang-dict and --lang-pairs are needed to tell the generation process the ordered list of languages and translation directions that the trained model are awared of; they will need to be consistent with the training.
+
+```bash
+model=<multilingual model>
+source_lang=<source language>
+target_lang=<target language>
+
+fairseq-generate $path_2_data \
+  --path $model \
+  --task translation_multi_simple_epoch \
+  --gen-subset test \
+  --source-lang $source_lang \
+  --target-lang $target_lang
+  --sacrebleu --remove-bpe 'sentencepiece'\
+  --batch-size 32 \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list" \
+  --lang-pairs "$lang_pairs" > ${source_lang}_${target_lang}.txt
+```
+Fairseq will generate translation into a file {source_lang}_${target_lang}.txt with sacreblue at the end.
+
+You can also use costomized tokenizer to compare the performance with the literature. For example, you get a tokenizer [here](https://github.com/rsennrich/wmt16-scripts) and do the following:
+```bash
+TOKENIZER=<path to a customized tokenizer for decoding evaluation>
+TOK_CMD=<"$TOKENIZER $target_lang" or cat for sacrebleu>
+
+cat {source_lang}_${target_lang}.txt | grep -P "^H" |sort -V |cut -f 3- |$TOK_CMD > ${source_lang}_${target_lang}.hyp
+cat {source_lang}_${target_lang}.txt | grep -P "^T" |sort -V |cut -f 2- |$TOK_CMD > ${source_lang}_${target_lang}.ref
+sacrebleu -tok 'none' -s 'none' ${source_lang}_${target_lang}.ref < ${source_lang}_${target_lang}.hyp
+```
+
+# mBART50 models
+
+* [mMBART 50 pretrained model](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.pretrained.tar.gz).
+* [mMBART 50 finetuned many-to-one](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.ft.n1.tar.gz).
+* [mMBART 50 finetuned one-to-many](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.ft.1n.tar.gz).
+* [mMBART 50 finetuned many-to-many](https://dl.fbaipublicfiles.com/fairseq/models/mbart50/mbart50.ft.nn.tar.gz).
+
+Please download and extract from the above tarballs. Each tarball contains
+* The fairseq model checkpoint: model.pt
+* The list of supported languages: ML50_langs.txt
+* Sentence piece model: sentence.bpe.model
+* Fairseq dictionary of each language: dict.{lang}.txt (please replace lang with a language specified in ML50_langs.txt)
+
+To use the trained models, 
+* use the tool [binarize.py](./data_scripts/binarize.py) to binarize your data using sentence.bpe.model and dict.{lang}.txt, and copy the dictionaries to your data path
+* then run the generation command:
+```bash
+path_2_data=<path to your binarized data with fairseq dictionaries>
+model=<path_to_extracted_folder>/model.pt
+lang_list=<path_to_extracted_folder>/ML50_langs.txt
+source_lang=<source language>
+target_lang=<target language>
+
+fairseq-generate $path_2_data \
+  --path $model \
+  --task translation_multi_simple_epoch \
+  --gen-subset test \
+  --source-lang $source_lang \
+  --target-lang $target_lang
+  --sacrebleu --remove-bpe 'sentencepiece'\
+  --batch-size 32 \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list"
+```
+
+## Citation
+
+```bibtex
+@article{tang2020multilingual,
+    title={Multilingual Translation with Extensible Multilingual Pretraining and Finetuning},
+    author={Yuqing Tang and Chau Tran and Xian Li and Peng-Jen Chen and Naman Goyal and Vishrav Chaudhary and Jiatao Gu and Angela Fan},
+    year={2020},
+    eprint={2008.00401},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
diff --git a/fairseq/examples/multilingual/data_scripts/README.md b/fairseq/examples/multilingual/data_scripts/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cc610c0c9e936a5ae4659ceda691c6db6d387296
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/README.md
@@ -0,0 +1,24 @@
+
+# Install dependency
+```bash
+pip install -r requirement.txt
+```
+
+# Download the data set
+```bash
+export WORKDIR_ROOT=<a directory which will hold all working files>
+
+```
+The downloaded data will be at $WORKDIR_ROOT/ML50
+
+# preprocess the data
+Install SPM [here](https://github.com/google/sentencepiece)
+```bash
+export WORKDIR_ROOT=<a directory which will hold all working files>
+export SPM_PATH=<a path pointing to sentencepice spm_encode.py>
+```
+* $WORKDIR_ROOT/ML50/raw: extracted raw data
+* $WORKDIR_ROOT/ML50/dedup: dedup data
+* $WORKDIR_ROOT/ML50/clean: data with valid and test sentences removed from the dedup data
+ 
+
diff --git a/fairseq/examples/multilingual/data_scripts/binarize.py b/fairseq/examples/multilingual/data_scripts/binarize.py
new file mode 100755
index 0000000000000000000000000000000000000000..ee54c6aabf021ca526743f8f1f67b91889e1e335
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/binarize.py
@@ -0,0 +1,200 @@
+import shutil
+import os, sys
+from subprocess import check_call, check_output
+import glob
+import argparse
+import shutil
+import pathlib
+import itertools
+
+def call_output(cmd):
+    print(f"Executing: {cmd}")
+    ret = check_output(cmd, shell=True)
+    print(ret)
+    return ret
+
+def call(cmd):
+    print(cmd)
+    check_call(cmd, shell=True)
+
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+SPM_PATH = os.environ.get('SPM_PATH', None)
+
+if SPM_PATH is None or not SPM_PATH.strip():
+    print("Please install sentence piecence from https://github.com/google/sentencepiece and set SPM_PATH pointing to the installed spm_encode.py. Exitting...")
+    sys.exit(-1)
+
+
+SPM_MODEL = f'{WORKDIR_ROOT}/sentence.bpe.model'
+SPM_VOCAB = f'{WORKDIR_ROOT}/dict_250k.txt'
+
+SPM_ENCODE = f'{SPM_PATH}'
+
+if not os.path.exists(SPM_MODEL):
+    call(f"wget https://dl.fbaipublicfiles.com/fairseq/models/mbart50/sentence.bpe.model -O {SPM_MODEL}")
+
+
+if not os.path.exists(SPM_VOCAB):
+    call(f"wget https://dl.fbaipublicfiles.com/fairseq/models/mbart50/dict_250k.txt -O {SPM_VOCAB}")
+
+
+
+def get_data_size(raw):
+    cmd = f'wc -l {raw}'
+    ret = call_output(cmd)
+    return int(ret.split()[0])
+
+def encode_spm(model, direction, prefix='', splits=['train', 'test', 'valid'], pairs_per_shard=None):
+    src, tgt = direction.split('-')
+
+    for split in splits:
+        src_raw, tgt_raw = f'{RAW_DIR}/{split}{prefix}.{direction}.{src}', f'{RAW_DIR}/{split}{prefix}.{direction}.{tgt}'
+        if os.path.exists(src_raw) and os.path.exists(tgt_raw):
+            cmd = f"""python {SPM_ENCODE} \
+            --model {model}\
+            --output_format=piece \
+            --inputs {src_raw} {tgt_raw}  \
+            --outputs {BPE_DIR}/{direction}{prefix}/{split}.bpe.{src} {BPE_DIR}/{direction}{prefix}/{split}.bpe.{tgt} """
+            print(cmd)
+            call(cmd)
+
+
+def binarize_(
+    bpe_dir,
+    databin_dir,
+    direction, spm_vocab=SPM_VOCAB, 
+    splits=['train', 'test', 'valid'],
+):
+    src, tgt = direction.split('-')
+
+    try:
+        shutil.rmtree(f'{databin_dir}', ignore_errors=True)
+        os.mkdir(f'{databin_dir}')
+    except OSError as error:
+        print(error)
+    cmds = [
+        "fairseq-preprocess",
+        f"--source-lang {src} --target-lang {tgt}",
+        f"--destdir {databin_dir}/",
+        f"--workers 8",
+    ]
+    if isinstance(spm_vocab, tuple):
+        src_vocab, tgt_vocab = spm_vocab
+        cmds.extend(
+            [
+                f"--srcdict {src_vocab}",
+                f"--tgtdict {tgt_vocab}",
+            ]
+        )
+    else:
+        cmds.extend(
+            [
+                f"--joined-dictionary",
+                f"--srcdict {spm_vocab}",
+            ]
+        )
+    input_options = []
+    if 'train' in splits and glob.glob(f"{bpe_dir}/train.bpe*"):
+        input_options.append(
+            f"--trainpref {bpe_dir}/train.bpe",
+        )        
+    if 'valid' in splits and glob.glob(f"{bpe_dir}/valid.bpe*"):
+        input_options.append(f"--validpref {bpe_dir}/valid.bpe")
+    if 'test' in splits and glob.glob(f"{bpe_dir}/test.bpe*"):
+        input_options.append(f"--testpref {bpe_dir}/test.bpe")   
+    if len(input_options) > 0:    
+        cmd = " ".join(cmds + input_options)
+        print(cmd)
+        call(cmd)
+
+
+def binarize(
+    databin_dir,
+    direction, spm_vocab=SPM_VOCAB, prefix='',
+    splits=['train', 'test', 'valid'],
+    pairs_per_shard=None,
+):
+    def move_databin_files(from_folder, to_folder):
+        for bin_file in glob.glob(f"{from_folder}/*.bin") \
+            +  glob.glob(f"{from_folder}/*.idx") \
+            +  glob.glob(f"{from_folder}/dict*"):
+            try:
+                shutil.move(bin_file, to_folder)
+            except OSError as error:
+                print(error)      
+    bpe_databin_dir = f"{BPE_DIR}/{direction}{prefix}_databin"
+    bpe_dir = f"{BPE_DIR}/{direction}{prefix}"
+    if pairs_per_shard is None:
+        binarize_(bpe_dir, bpe_databin_dir, direction, spm_vocab=spm_vocab, splits=splits)
+        move_databin_files(bpe_databin_dir, databin_dir)
+    else:
+        # binarize valid and test which will not be sharded
+        binarize_(
+            bpe_dir, bpe_databin_dir, direction,
+            spm_vocab=spm_vocab, splits=[s for s in splits if s != "train"])
+        for shard_bpe_dir in glob.glob(f"{bpe_dir}/shard*"):
+            path_strs = os.path.split(shard_bpe_dir)
+            shard_str = path_strs[-1]
+            shard_folder = f"{bpe_databin_dir}/{shard_str}"
+            databin_shard_folder = f"{databin_dir}/{shard_str}"
+            print(f'working from {shard_folder} to {databin_shard_folder}')
+            os.makedirs(databin_shard_folder, exist_ok=True)
+            binarize_(
+                shard_bpe_dir, shard_folder, direction,
+                spm_vocab=spm_vocab, splits=["train"])
+
+            for test_data in glob.glob(f"{bpe_databin_dir}/valid.*") + glob.glob(f"{bpe_databin_dir}/test.*"):
+                filename = os.path.split(test_data)[-1]
+                try:
+                    os.symlink(test_data, f"{databin_shard_folder}/{filename}")
+                except OSError as error:
+                    print(error)                
+            move_databin_files(shard_folder, databin_shard_folder)
+
+
+def load_langs(path):
+    with open(path) as fr:
+        langs = [l.strip() for l in fr]
+    return langs
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", default=f"{WORKDIR_ROOT}/ML50")
+    parser.add_argument("--raw-folder", default='raw')
+    parser.add_argument("--bpe-folder", default='bpe')    
+    parser.add_argument("--databin-folder", default='databin')    
+
+    args = parser.parse_args()
+
+    DATA_PATH = args.data_root #'/private/home/yuqtang/public_data/ML50'   
+    RAW_DIR = f'{DATA_PATH}/{args.raw_folder}'
+    BPE_DIR = f'{DATA_PATH}/{args.bpe_folder}'
+    DATABIN_DIR = f'{DATA_PATH}/{args.databin_folder}'
+    os.makedirs(BPE_DIR, exist_ok=True)
+
+    raw_files = itertools.chain(
+        glob.glob(f'{RAW_DIR}/train*'),
+        glob.glob(f'{RAW_DIR}/valid*'),
+        glob.glob(f'{RAW_DIR}/test*'),
+    )
+
+    directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files]
+
+    for direction in directions:
+        prefix = ""
+        splits = ['train', 'valid', 'test']
+        try:
+            shutil.rmtree(f'{BPE_DIR}/{direction}{prefix}', ignore_errors=True)
+            os.mkdir(f'{BPE_DIR}/{direction}{prefix}')
+            os.makedirs(DATABIN_DIR, exist_ok=True)
+        except OSError as error: 
+            print(error)     
+        spm_model, spm_vocab = SPM_MODEL, SPM_VOCAB
+        encode_spm(spm_model, direction=direction, splits=splits)
+        binarize(DATABIN_DIR, direction, spm_vocab=spm_vocab, splits=splits)
diff --git a/fairseq/examples/multilingual/data_scripts/check_iswlt_test_data.py b/fairseq/examples/multilingual/data_scripts/check_iswlt_test_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8e2eb0f15699f1b458a8445d0c1dd6229a21f77
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/check_iswlt_test_data.py
@@ -0,0 +1,67 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os, sys
+import subprocess
+import re
+from subprocess import check_call, check_output
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+
+BLEU_REGEX = re.compile("^BLEU\\S* = (\\S+) ")
+def run_eval_bleu(cmd):
+    output = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode("utf-8").strip()
+    print(output)
+    bleu = -1.0
+    for line in output.strip().split('\n'):
+        m = BLEU_REGEX.search(line)
+        if m is not None:
+            bleu = m.groups()[0]
+            bleu = float(bleu)
+            break
+    return bleu
+
+def check_data_test_bleu(raw_folder, data_lang_pairs):
+    not_matchings = []
+    for sacrebleu_set, src_tgts in data_lang_pairs:
+        for src_tgt in src_tgts:
+            print(f'checking test bleus for: {src_tgt} at {sacrebleu_set}')
+            src, tgt = src_tgt.split('-')
+            ssrc, stgt = src[:2], tgt[:2]
+            if os.path.exists(f'{raw_folder}/test.{tgt}-{src}.{src}'):
+                # reversed direction may have different test set
+                test_src = f'{raw_folder}/test.{tgt}-{src}.{src}'
+            else:
+                test_src = f'{raw_folder}/test.{src}-{tgt}.{src}'
+            cmd1 = f'cat {test_src} | sacrebleu -t "{sacrebleu_set}" -l {stgt}-{ssrc}; [ $? -eq 0 ] || echo ""'
+            test_tgt = f'{raw_folder}/test.{src}-{tgt}.{tgt}'       
+            cmd2 = f'cat {test_tgt} | sacrebleu -t "{sacrebleu_set}" -l {ssrc}-{stgt}; [ $? -eq 0 ] || echo ""'
+            bleu1 = run_eval_bleu(cmd1) 
+            if bleu1 != 100.0:
+                not_matchings.append(f'{sacrebleu_set}:{src_tgt} source side not matching: {test_src}')
+            bleu2 = run_eval_bleu(cmd2) 
+            if bleu2 != 100.0:
+                not_matchings.append(f'{sacrebleu_set}:{src_tgt} target side not matching: {test_tgt}')
+    return not_matchings       
+
+if __name__ == "__main__":
+    to_data_path = f'{WORKDIR_ROOT}/iwsltv2'
+    not_matching = check_data_test_bleu(
+        f'{to_data_path}/raw', 
+        [
+            ('iwslt17', ['en_XX-ar_AR', 'en_XX-ko_KR', 'ar_AR-en_XX', 'ko_KR-en_XX']),
+            ('iwslt17', ['en_XX-it_IT', 'en_XX-nl_XX', 'it_IT-en_XX', 'nl_XX-en_XX']),
+            ('iwslt17/tst2015', ['en_XX-vi_VN', "vi_VN-en_XX"]),        
+        ]
+        )    
+    if len(not_matching) > 0:
+        print('the following datasets do not have matching test datasets:\n\t', '\n\t'.join(not_matching))
+
diff --git a/fairseq/examples/multilingual/data_scripts/check_self_overlaps.py b/fairseq/examples/multilingual/data_scripts/check_self_overlaps.py
new file mode 100644
index 0000000000000000000000000000000000000000..07b338dcfd2d7f10317608274631d0edd93ba889
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/check_self_overlaps.py
@@ -0,0 +1,103 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+import glob
+import argparse
+from utils.dedup import deup
+import sys
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+def get_directions(folder):
+    raw_files = glob.glob(f'{folder}/train*')
+    directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files] 
+    return directions   
+
+def diff_list(lhs, rhs):
+    return set(lhs).difference(set(rhs))
+
+def check_diff(
+    from_src_file, from_tgt_file, 
+    to_src_file, to_tgt_file, 
+):
+    seen_in_from = set()
+    seen_src_in_from = set()
+    seen_tgt_in_from = set()
+    from_count = 0
+    with open(from_src_file, encoding='utf-8') as fsrc, \
+        open(from_tgt_file, encoding='utf-8') as ftgt:
+        for s, t in zip(fsrc, ftgt):
+            seen_in_from.add((s, t))
+            seen_src_in_from.add(s)
+            seen_tgt_in_from.add(t)
+            from_count += 1
+    common = 0
+    common_src = 0
+    common_tgt = 0
+    to_count = 0
+    seen = set()
+
+    with open(to_src_file, encoding='utf-8') as fsrc, \
+        open(to_tgt_file, encoding='utf-8') as ftgt:
+        for s, t in zip(fsrc, ftgt):
+            to_count += 1
+            if (s, t) not in seen:
+                if (s, t) in seen_in_from:
+                    common += 1
+                if s in seen_src_in_from:
+                    common_src += 1
+                    seen_src_in_from.remove(s)
+                if t in seen_tgt_in_from:
+                    common_tgt += 1
+                    seen_tgt_in_from.remove(t)
+                seen.add((s, t))
+    return common, common_src, common_tgt, from_count, to_count
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--folder", type=str, required=True,
+                        help="the data folder ")
+    parser.add_argument("--split", type=str, default='test',
+                        help="split (valid, test) to check against training data")
+    parser.add_argument('--directions', type=str, default=None, required=False)
+
+    args = parser.parse_args()    
+
+    if args.directions is None:
+        directions = set(get_directions(args.folder))
+        directions = sorted(directions)
+    else:
+        directions = args.directions.split(',')
+    directions = sorted(set(directions))
+
+    results = []
+    print(f'checking where {args.split} split data are in training')
+    print(f'direction\tcommon_count\tsrc common\ttgt common\tfrom_size\tto_size')
+
+    for direction in directions:
+        src, tgt = direction.split('-')
+        from_src_file = f'{args.folder}/{args.split}.{src}-{tgt}.{src}'
+        from_tgt_file = f'{args.folder}/{args.split}.{src}-{tgt}.{tgt}'
+        if not os.path.exists(from_src_file):
+            # some test/valid data might in reverse directinos:
+            from_src_file = f'{args.folder}/{args.split}.{tgt}-{src}.{src}'
+            from_tgt_file = f'{args.folder}/{args.split}.{tgt}-{src}.{tgt}'            
+        to_src_file = f'{args.folder}/train.{src}-{tgt}.{src}'
+        to_tgt_file = f'{args.folder}/train.{src}-{tgt}.{tgt}'
+        if not os.path.exists(to_src_file) or not os.path.exists(from_src_file):
+            continue
+        r = check_diff(from_src_file, from_tgt_file, to_src_file, to_tgt_file)
+        results.append(r)
+        print(f'{direction}\t', '\t'.join(map(str, r)))
+                
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/multilingual/data_scripts/check_valid_test_overlaps.py b/fairseq/examples/multilingual/data_scripts/check_valid_test_overlaps.py
new file mode 100644
index 0000000000000000000000000000000000000000..40fa9aecdf9108e095feb3661236453c0f7ed7c4
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/check_valid_test_overlaps.py
@@ -0,0 +1,124 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+import argparse
+import pandas as pd
+import sys
+
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+def load_langs(path):
+    with open(path) as fr:
+        langs = [l.strip() for l in fr]
+    return langs
+
+
+
+def load_sentences(raw_data, split, direction):
+    src, tgt = direction.split('-')
+    src_path = f"{raw_data}/{split}.{direction}.{src}"
+    tgt_path = f"{raw_data}/{split}.{direction}.{tgt}"
+    if os.path.exists(src_path) and os.path.exists(tgt_path):
+        return [(src, open(src_path).read().splitlines()), (tgt, open(tgt_path).read().splitlines())]
+    else:
+        return []
+
+def swap_direction(d):
+    src, tgt = d.split('-')
+    return f'{tgt}-{src}'
+
+def get_all_test_data(raw_data, directions, split='test'):
+    test_data = [ 
+        x
+        for dd in directions
+        for d in [dd, swap_direction(dd)]
+        for x in load_sentences(raw_data, split, d)
+    ]
+    # all_test_data = {s for _, d in test_data for s in d}
+    all_test_data = {}
+    for lang, d in test_data:
+        for s in d:
+            s = s.strip()
+            lgs = all_test_data.get(s, set())
+            lgs.add(lang)
+            all_test_data[s] = lgs
+    return all_test_data, test_data
+
+
+def check_train_sentences(src_path, tgt_path, direction, all_test_data, mess_up_train={}):
+    # src, tgt = direction.split('-')
+    print(f'check training data for {direction} in {src_path} and {tgt_path}')
+    size = 0
+    overlapped_size_counted_dup = 0
+    if not os.path.exists(tgt_path) or not os.path.exists(src_path):
+        return mess_up_train, size, overlapped_size_counted_dup
+
+    with open(src_path) as f, open(tgt_path) as g:
+        for src_line, tgt_line in zip(f, g):
+            s = src_line.strip()
+            t = tgt_line.strip()
+            size += 1
+            if  s in all_test_data:
+                langs = mess_up_train.get(s, set())
+                langs.add(direction)
+                mess_up_train[s] = langs
+                overlapped_size_counted_dup += 1
+            if t in all_test_data:
+                langs = mess_up_train.get(t, set())
+                langs.add(direction)
+                mess_up_train[t] = langs 
+                overlapped_size_counted_dup += 1
+    print(f'{direction}: size={size}, overlapped={overlapped_size_counted_dup}')
+    return mess_up_train, size, overlapped_size_counted_dup
+
+def check_train_all(raw_data, directions, all_test_data):
+    mess_up_train = {}
+    data_sizes = {}
+    # raw_data = '~chau/data-bin/MineBART/multilingual_mined_100M/en_XX/et_EE-en_XX/all.{en_XX, et_EE}'
+    print(f'checking training data againsts # {len(all_test_data)} sentences')
+    print(f'example test data: ', [s for i, s in enumerate(all_test_data.keys()) if i < 10])
+    for direction in directions:
+        src, tgt = direction.split('-')
+        path = f'{raw_data}/en_XX/{direction}/all'
+        src_path = f'{path}.{src}'
+        tgt_path = f'{path}.{tgt}'
+        print(f'checking {src_path} {tgt_path}')
+        _, size, overlapped_size_counted_dup = check_train_sentences(src_path, tgt_path, direction, all_test_data, mess_up_train)
+        data_sizes[direction] = (size, overlapped_size_counted_dup)
+    return mess_up_train, data_sizes
+
+
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--folder", type=str, required=True,
+                        help="the data folder ")
+    parser.add_argument("--test-data", type=str, required=True,
+                        help="the test data folder ")                        
+    parser.add_argument('--directions', type=str, default=None, required=False)
+
+    args = parser.parse_args()    
+    directions = args.directions.split(',')
+    directions = sorted(set(directions))
+
+    results = []
+    # print(f'checking where {args.split} split data are in training')
+    # print(f'direction\tcommon_count\tsrc common\ttgt common\tfrom_size\tto_size')
+    raw_data = args.folder
+    all_test_data, test_data = get_all_test_data(args.test_data, directions, split='test')
+    mess_up_train, data_sizes = check_train_all(raw_data, directions, all_test_data)
+    print(data_sizes)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/multilingual/data_scripts/dedup_all.py b/fairseq/examples/multilingual/data_scripts/dedup_all.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef39c05ee606aaeda1d9e94970932d2241a8b281
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/dedup_all.py
@@ -0,0 +1,52 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+
+import os
+import glob
+import argparse
+from utils.dedup import deup
+
+import sys
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--from-folder", type=str, required=True,
+                        help="the data folder to be dedup")
+    parser.add_argument("--to-folder", type=str, required=True,
+                        help="the data folder to save deduped data")
+    parser.add_argument('--directions', type=str, default=None, required=False)
+
+    args = parser.parse_args()    
+
+    if args.directions is None:
+        raw_files = glob.glob(f'{args.from_folder}/train*')
+
+        directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files]
+    else:
+        directions = args.directions.split(',')
+    directions = sorted(set(directions))
+    
+    for direction in directions:
+        src, tgt = direction.split('-')
+        src_file = f'{args.from_folder}/train.{src}-{tgt}.{src}'
+        tgt_file = f'{args.from_folder}/train.{src}-{tgt}.{tgt}'
+        src_file_out = f'{args.to_folder}/train.{src}-{tgt}.{src}'
+        tgt_file_out = f'{args.to_folder}/train.{src}-{tgt}.{tgt}'
+        assert src_file != src_file_out
+        assert tgt_file != tgt_file_out
+        print(f'deduping {src_file}, {tgt_file}')
+        deup(src_file, tgt_file, src_file_out, tgt_file_out)
+                
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/multilingual/data_scripts/download_ML50_v1.sh b/fairseq/examples/multilingual/data_scripts/download_ML50_v1.sh
new file mode 100644
index 0000000000000000000000000000000000000000..99fbc75920836a4b4bbdbd6b523749843288e450
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_ML50_v1.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+# first run download_wmt20.sh; it will install a few useful tools for other scripts
+# TODO: need to print out instructions on downloading a few files which requires manually authentication from the websites
+bash ./download_wmt20.sh
+
+python ./download_wmt19_and_before.py
+bash ./download_wat19_my.sh
+python ./download_ted_and_extract.py
+bash ./download_lotus.sh
+bash ./download_iitb.sh
+bash ./download_af_xh.sh
+
+
+# IWSLT downloading URLs have changed in between; TODO: fix them:
+bash ./download_iwslt_and_extract.sh
+
+# TODO: globalvoices URLs changed; need to be fixed
+bash ./download_flores_data.sh
diff --git a/fairseq/examples/multilingual/data_scripts/download_af_xh.sh b/fairseq/examples/multilingual/data_scripts/download_af_xh.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a78fbbbbccb6f6ae005a1f03b97f083a2d958ebe
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_af_xh.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+# set -x -e
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+ 
+# put intermediate files
+TMP_DIR=$WORKDIR_ROOT/temp/af_xhv2
+# output {train,valid,test} files to dest
+DEST=${WORKDIR_ROOT}/ML50/raw
+
+
+
+ROOT=${WORKDIR_ROOT}
+UTILS=$PWD/utils
+TMX2CORPUS="${UTILS}/tmx2corpus"
+TMX_TOOL="python ${TMX2CORPUS}/tmx2corpus.py"
+
+mkdir -p $TMP_DIR
+mkdir -p $DEST
+mkdir -p $UTILS
+
+function download_opus(){
+    src=$1
+    tgt=$2
+    subset=$3
+    ulr=$4
+
+    mkdir extract_$subset.$src-$tgt
+    pushd extract_$subset.$src-$tgt
+    if [ ! -f "$subset.$src-$tgt.tmx.gz" ]; then
+        wget $url -O "$subset.$src-$tgt.tmx.gz"
+        gzip -d "$subset.$src-$tgt.tmx.gz"
+        f=$subset.$src-$tgt.tmx
+        $TMX_TOOL $f
+        mv bitext.$src ../$subset.$src-$tgt.$src
+        mv bitext.$tgt ../$subset.$src-$tgt.$tgt
+    fi
+    popd    
+}
+
+function concat_subsets(){
+    src=$1
+    tgt=$2
+    subsets=$3
+    src_train=raw_train.$src-$tgt.$src
+    tgt_train=raw_train.$src-$tgt.$tgt
+    > $src_train
+    > $tgt_train
+    for subset in $subsets; do
+        cat $subset.$src-$tgt.$src >> $src_train
+        cat $subset.$src-$tgt.$tgt >> $tgt_train
+    done
+}
+
+
+
+function get_seeded_random()
+{
+  seed="$1"
+  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
+    </dev/zero 2>/dev/null
+}
+
+function split_train_valid(){
+    src=$1
+    tgt=$2
+    raw_src_train=raw_train.$src-$tgt.$src
+    raw_tgt_train=raw_train.$src-$tgt.$tgt
+
+    shuf --random-source=<(get_seeded_random 43) $raw_src_train > shuffled.$src-$tgt.$src 
+    shuf --random-source=<(get_seeded_random 43) $raw_tgt_train > shuffled.$src-$tgt.$tgt 
+
+    head -n 1500 shuffled.$src-$tgt.$src  > valid.$src-$tgt.$src
+    head -n 1500 shuffled.$src-$tgt.$tgt > valid.$src-$tgt.$tgt
+
+    tail +1501 shuffled.$src-$tgt.$src > train.$src-$tgt.$src
+    tail +1501 shuffled.$src-$tgt.$tgt > train.$src-$tgt.$tgt     
+}
+
+function copy2dst(){
+    lsrc=$1
+    ltgt=$2
+    src=${lsrc:0:2}
+    tgt=${ltgt:0:2}
+ 
+
+    cp valid.$src-$tgt.$src $DEST/valid.$lsrc-$ltgt.$lsrc 
+    cp valid.$src-$tgt.$tgt $DEST/valid.$lsrc-$ltgt.$ltgt 
+
+    cp train.$src-$tgt.$src $DEST/train.$lsrc-$ltgt.$lsrc 
+    cp train.$src-$tgt.$tgt $DEST/train.$lsrc-$ltgt.$ltgt        
+}
+
+
+
+
+#for xh-en
+declare -A xh_en_urls
+xh_en_urls=(
+    [Tatoeba]=https://object.pouta.csc.fi/OPUS-Tatoeba/v20190709/tmx/en-xh.tmx.gz 
+    [wikimedia]=https://object.pouta.csc.fi/OPUS-wikimedia/v20190628/tmx/en-xh.tmx.gz
+    [memat]=https://object.pouta.csc.fi/OPUS-memat/v1/tmx/en-xh.tmx.gz
+    [uedin]=https://object.pouta.csc.fi/OPUS-bible-uedin/v1/tmx/en-xh.tmx.gz
+    [GNOME]=https://object.pouta.csc.fi/OPUS-GNOME/v1/tmx/en-xh.tmx.gz
+    [XhosaNavy]=https://object.pouta.csc.fi/OPUS-XhosaNavy/v1/tmx/en-xh.tmx.gz
+    [KDE4]=https://object.pouta.csc.fi/OPUS-KDE4/v2/tmx/en-xh.tmx.gz
+    [Ubuntu]=https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/tmx/en-xh.tmx.gz    
+)
+
+mkdir $TMP_DIR/xh-en
+pushd $TMP_DIR/xh-en
+for k in "${!xh_en_urls[@]}"
+do
+    name=$k
+    url=${xh_en_urls[$k]}
+    echo "$name: $url"
+    download_opus xh en $name $ulr
+done
+concat_subsets xh en "${!xh_en_urls[@]}"
+split_train_valid xh en
+copy2dst xh_ZA en_XX
+popd
+
+
+##
+#for af-en
+declare -A af_en_urls
+af_en_urls=(
+    [Tatoeba]=https://object.pouta.csc.fi/OPUS-Tatoeba/v20190709/tmx/af-en.tmx.gz
+    [uedin]=https://object.pouta.csc.fi/OPUS-bible-uedin/v1/tmx/af-en.tmx.gz
+    [GNOME]=https://object.pouta.csc.fi/OPUS-GNOME/v1/tmx/af-en.tmx.gz
+    [QED]=https://object.pouta.csc.fi/OPUS-QED/v2.0a/tmx/af-en.tmx.gz
+    [KDE4]=https://object.pouta.csc.fi/OPUS-KDE4/v2/tmx/af-en.tmx.gz
+    [OpenSubtitles]=https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/tmx/af-en.tmx.gz
+    [SPC]=https://object.pouta.csc.fi/OPUS-SPC/v1/tmx/af-en.tmx.gz
+    [Ubuntu]=https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/tmx/af-en.tmx.gz
+)
+
+mkdir $TMP_DIR/af-en
+pushd $TMP_DIR/af-en
+for k in "${!af_en_urls[@]}"
+do
+    name=$k
+    url=${af_en_urls[$k]}
+    echo "$name: $url"
+    download_opus af en $name $ulr
+done
+concat_subsets af en "${!af_en_urls[@]}"
+split_train_valid af en
+copy2dst af_ZA en_XX
+popd
+
+
diff --git a/fairseq/examples/multilingual/data_scripts/download_flores_data.sh b/fairseq/examples/multilingual/data_scripts/download_flores_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e6175ce0c38b06a1ebddaeca808f71b47f77f500
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_flores_data.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+
+set -e
+set -o pipefail
+
+SRC=en
+SI_TGT=si
+NE_TGT=ne
+
+DESTDIR=${WORKDIR_ROOT}/ML50/raw/
+
+ROOT=${WORKDIR_ROOT}/tmp
+mkdir -p $ROOT
+DATA=$ROOT/data
+NE_ROOT=$DATA/all-clean-ne
+SI_ROOT=$DATA/all-clean-si
+
+mkdir -p $DATA $NE_ROOT $SI_ROOT
+
+SI_OPUS_DATASETS=(
+  "$SI_ROOT/GNOME.en-si"
+  "$SI_ROOT/Ubuntu.en-si"
+  "$SI_ROOT/KDE4.en-si"
+  "$SI_ROOT/OpenSubtitles.en-si"
+)
+
+SI_OPUS_URLS=(
+  "https://object.pouta.csc.fi/OPUS-GNOME/v1/moses/en-si.txt.zip"
+  "https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/moses/en-si.txt.zip"
+  "https://object.pouta.csc.fi/OPUS-KDE4/v2/moses/en-si.txt.zip"
+  "https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/moses/en-si.txt.zip"
+)
+
+NE_OPUS_DATASETS=(
+  "$NE_ROOT/GNOME.en-ne"
+  "$NE_ROOT/Ubuntu.en-ne"
+  "$NE_ROOT/KDE4.en-ne"
+)
+
+NE_OPUS_URLS=(
+  "https://object.pouta.csc.fi/OPUS-GNOME/v1/moses/en-ne.txt.zip"
+  "https://object.pouta.csc.fi/OPUS-Ubuntu/v14.10/moses/en-ne.txt.zip"
+  "https://object.pouta.csc.fi/OPUS-KDE4/v2/moses/en-ne.txt.zip"
+)
+
+REMOVE_FILE_PATHS=()
+
+# Download data
+download_data() {
+  CORPORA=$1
+  URL=$2
+
+  if [ -f $CORPORA ]; then
+    echo "$CORPORA already exists, skipping download"
+  else
+    echo "Downloading $URL"
+    wget $URL -O $CORPORA --no-check-certificate || rm -f $CORPORA
+    if [ -f $CORPORA ]; then
+      echo "$URL successfully downloaded."
+    else
+      echo "$URL not successfully downloaded."
+      rm -f $CORPORA
+      exit -1
+    fi
+  fi
+}
+
+# Example: download_opus_data $LANG_ROOT $TGT
+download_opus_data() {
+  LANG_ROOT=$1
+  TGT=$2
+
+  if [ "$TGT" = "si" ]; then
+    URLS=("${SI_OPUS_URLS[@]}")
+    DATASETS=("${SI_OPUS_DATASETS[@]}")
+  else
+    URLS=("${NE_OPUS_URLS[@]}")
+    DATASETS=("${NE_OPUS_DATASETS[@]}")
+  fi
+
+  # Download and extract data
+  for ((i=0;i<${#URLS[@]};++i)); do
+    URL=${URLS[i]}
+    CORPORA=${DATASETS[i]}
+
+    download_data $CORPORA $URL
+    unzip -o $CORPORA -d $LANG_ROOT
+    REMOVE_FILE_PATHS+=( $CORPORA $CORPORA.xml $CORPORA.ids $LANG_ROOT/README $LANG_ROOT/LICENSE )
+  done
+
+  cat ${DATASETS[0]}.$SRC ${DATASETS[1]}.$SRC ${DATASETS[2]}.$SRC > $LANG_ROOT/GNOMEKDEUbuntu.$SRC-$TGT.$SRC
+  cat ${DATASETS[0]}.$TGT ${DATASETS[1]}.$TGT ${DATASETS[2]}.$TGT > $LANG_ROOT/GNOMEKDEUbuntu.$SRC-$TGT.$TGT
+
+  REMOVE_FILE_PATHS+=( ${DATASETS[0]}.$SRC ${DATASETS[1]}.$SRC ${DATASETS[2]}.$SRC )
+  REMOVE_FILE_PATHS+=( ${DATASETS[0]}.$TGT ${DATASETS[1]}.$TGT ${DATASETS[2]}.$TGT )
+}
+
+download_opus_data $SI_ROOT $SI_TGT
+cp ${SI_OPUS_DATASETS[3]}.$SRC $SI_ROOT/OpenSubtitles2018.$SRC-$SI_TGT.$SRC
+cp ${SI_OPUS_DATASETS[3]}.$SI_TGT $SI_ROOT/OpenSubtitles2018.$SRC-$SI_TGT.$SI_TGT
+REMOVE_FILE_PATHS+=( ${SI_OPUS_DATASETS[3]}.$SRC ${SI_OPUS_DATASETS[3]}.$SI_TGT )
+
+download_opus_data $NE_ROOT $NE_TGT
+
+
+# Download and extract Global Voices data
+GLOBAL_VOICES="$NE_ROOT/globalvoices.2018q4.ne-en"
+GLOBAL_VOICES_URL="http://www.casmacat.eu/corpus/global-voices/globalvoices.ne-en.xliff.gz"
+
+download_data $GLOBAL_VOICES.gz $GLOBAL_VOICES_URL
+gunzip -Nf $GLOBAL_VOICES.gz
+
+sed -ne 's?.*<source>\(.*\)</source>.*?\1?p' $GLOBAL_VOICES > $GLOBAL_VOICES.$NE_TGT
+sed -ne 's?.*<target[^>]*>\(.*\)</target>.*?\1?p' $GLOBAL_VOICES > $GLOBAL_VOICES.$SRC
+
+REMOVE_FILE_PATHS+=( $GLOBAL_VOICES )
+
+# Download and extract the bible dataset
+BIBLE_TOOLS=bible-corpus-tools
+XML_BIBLES=XML_Bibles
+XML_BIBLES_DUP=XML_Bibles_dup
+
+if [ ! -e $BIBLE_TOOLS ]; then
+    echo "Cloning bible-corpus-tools repository..."
+    git clone https://github.com/christos-c/bible-corpus-tools.git
+fi
+
+mkdir -p $BIBLE_TOOLS/bin $XML_BIBLES $XML_BIBLES_DUP
+javac -cp "$BIBLE_TOOLS/lib/*" -d $BIBLE_TOOLS/bin $BIBLE_TOOLS/src/bible/readers/*.java $BIBLE_TOOLS/src/bible/*.java
+
+download_data bible.tar.gz "https://github.com/christos-c/bible-corpus/archive/v1.2.1.tar.gz"
+tar xvzf bible.tar.gz
+
+cp bible-corpus-1.2.1/bibles/{Greek.xml,English.xml,Nepali.xml} $XML_BIBLES/
+cp bible-corpus-1.2.1/bibles/{Greek.xml,English-WEB.xml,Nepali.xml} $XML_BIBLES_DUP/
+
+java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateMLBooks $XML_BIBLES
+java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateMLBooks $XML_BIBLES_DUP
+java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateVerseAlignedBooks $XML_BIBLES
+java -cp $BIBLE_TOOLS/lib/*:$BIBLE_TOOLS/bin bible.CreateVerseAlignedBooks $XML_BIBLES_DUP
+
+cat $XML_BIBLES/aligned/*/English.txt > $NE_ROOT/bible.$SRC-$NE_TGT.$SRC
+cat $XML_BIBLES/aligned/*/Nepali.txt > $NE_ROOT/bible.$SRC-$NE_TGT.$NE_TGT
+cat $XML_BIBLES_DUP/aligned/*/English-WEB.txt > $NE_ROOT/bible_dup.$SRC-$NE_TGT.$SRC
+cat $XML_BIBLES_DUP/aligned/*/Nepali.txt > $NE_ROOT/bible_dup.$SRC-$NE_TGT.$NE_TGT
+REMOVE_FILE_PATHS+=( bible-corpus-1.2.1 bible.tar.gz $BIBLE_TOOLS $XML_BIBLES $XML_BIBLES_DUP )
+
+# Download and extract the Penn Treebank dataset
+NE_TAGGED=$ROOT/new_submissions_parallel_corpus_project_Nepal
+NE_TAGGED_URL="http://www.cle.org.pk/Downloads/ling_resources/parallelcorpus/NepaliTaggedCorpus.zip"
+EN_TAGGED_PATCH_URL="https://dl.fbaipublicfiles.com/fairseq/data/nepali-penn-treebank.en.patch"
+NE_TAGGED_PATCH_URL="https://dl.fbaipublicfiles.com/fairseq/data/nepali-penn-treebank.ne.patch"
+MOSES=mosesdecoder
+MOSES_TOK=$MOSES/scripts/tokenizer
+EN_PATCH_REGEX="{s:\\\/:\/:g;s/\*\T\*\-\n+//g;s/\-LCB\-/\{/g;s/\-RCB\-/\}/g; s/\-LSB\-/\[/g; s/\-RSB\-/\]/g;s/\-LRB\-/\(/g; s/\-RRB\-/\)/g; s/\'\'/\"/g; s/\`\`/\"/g; s/\ +\'s\ +/\'s /g; s/\ +\'re\ +/\'re /g; s/\"\ +/\"/g; s/\ +\"/\"/g; s/\ n't([\ \.\"])/n't\1/g; s/\r+(.)/\1/g;}"
+NE_PATCH_REGEX="{s:\p{Cf}::g;s:\\\/:\/:g;s/\*\T\*\-\n+//g;s/\-LCB\-/\{/g;s/\-RCB\-/\}/g; s/\-LSB\-/\[/g; s/\-RSB\-/\]/g;s/\-LRB\-/\(/g; s/\-RRB\-/\)/g; s/\'\'/\"/g; s/\`\`/\"/g; s/\ +\'s\ +/\'s /g; s/\ +\'re\ +/\'re /g; s/\"\ +/\"/g; s/\ +\"/\"/g; s/\ n't([\ \.\"])/n't\1/g; s/\r+(.)/\1/g;}"
+
+download_data $DATA/nepali-penn-treebank.$SRC.patch $EN_TAGGED_PATCH_URL
+download_data $DATA/nepali-penn-treebank.$NE_TGT.patch $NE_TAGGED_PATCH_URL
+download_data original.zip $NE_TAGGED_URL
+unzip -o original.zip -d $ROOT
+
+cat $NE_TAGGED/00.txt $NE_TAGGED/01.txt $NE_TAGGED/02.txt > $NE_TAGGED/nepali-penn-treebank.$SRC
+cat $NE_TAGGED/00ne_revised.txt $NE_TAGGED/01ne_revised.txt $NE_TAGGED/02ne_revised.txt > $NE_TAGGED/nepali-penn-treebank.$NE_TGT
+
+patch $NE_TAGGED/nepali-penn-treebank.$SRC -i $DATA/nepali-penn-treebank.$SRC.patch -o $NE_TAGGED/nepali-penn-treebank-patched.$SRC
+patch $NE_TAGGED/nepali-penn-treebank.$NE_TGT -i $DATA/nepali-penn-treebank.$NE_TGT.patch -o $NE_TAGGED/nepali-penn-treebank-patched.$NE_TGT
+
+if [ ! -e $MOSES ]; then
+    echo "Cloning moses repository..."
+    git clone https://github.com/moses-smt/mosesdecoder.git
+fi
+
+cat $NE_TAGGED/nepali-penn-treebank-patched.$SRC | \
+  perl -anpe "$EN_PATCH_REGEX"  | \
+  $MOSES_TOK/tokenizer.perl -l $SRC | \
+  $MOSES_TOK/detokenizer.perl -l $SRC > $NE_ROOT/nepali-penn-treebank.$SRC
+
+cat $NE_TAGGED/nepali-penn-treebank-patched.$NE_TGT | \
+  perl -CIO -anpe "$NE_PATCH_REGEX" | \
+  $MOSES_TOK/detokenizer.perl -l $SRC > $NE_ROOT/nepali-penn-treebank.$NE_TGT
+
+
+# Download nepali dictionary data
+NE_DICT=$NE_ROOT/dictionaries
+download_data $NE_DICT "http://www.seas.upenn.edu/~nlp/resources/TACL-data-release/dictionaries.tar.gz"
+tar xvzf $NE_DICT
+cp dictionaries/dict.ne $NE_ROOT/dictionary.$NE_TGT-$SRC
+REMOVE_FILE_PATHS+=( $NE_DICT dictionaries )
+
+REMOVE_FILE_PATHS+=( $MOSES $NE_TAGGED original.zip $DATA/nepali-penn-treebank.$SRC.patch $DATA/nepali-penn-treebank.$NE_TGT.patch )
+
+
+# Remove the temporary files
+for ((i=0;i<${#REMOVE_FILE_PATHS[@]};++i)); do
+  rm -rf ${REMOVE_FILE_PATHS[i]}
+done
+
+# Copy the training data
+si=si_LK
+ne=ne_NP
+en=en_XX
+cat $SI_ROOT/GNOMEKDEUbuntu.en-si.si $SI_ROOT/OpenSubtitles2018.en-si.si > $DESTDIR/train.$si-$en.$si
+cat $SI_ROOT/GNOMEKDEUbuntu.en-si.en $SI_ROOT/OpenSubtitles2018.en-si.en > $DESTDIR/train.$si-$en.$en
+
+cat $NE_ROOT/bible_dup.en-ne.ne $NE_ROOT/bible.en-ne.ne $NE_ROOT/globalvoices.2018q4.ne-en.ne $NE_ROOT/GNOMEKDEUbuntu.en-ne.ne $NE_ROOT/nepali-penn-treebank.ne >  $DESTDIR/train.$ne-$en.$ne
+cat $NE_ROOT/bible_dup.en-ne.en $NE_ROOT/bible.en-ne.en $NE_ROOT/globalvoices.2018q4.ne-en.en $NE_ROOT/GNOMEKDEUbuntu.en-ne.en $NE_ROOT/nepali-penn-treebank.en >  $DESTDIR/train.$ne-$en.$en
+
+
+#Download the test sets
+wget https://github.com/facebookresearch/flores/raw/master/data/wikipedia_en_ne_si_test_sets.tgz
+tar -xvzf wikipedia_en_ne_si_test_sets.tgz
+
+cp wikipedia_en_ne_si_test_sets/wikipedia.dev.ne-en.ne $DESTDIR/valid.$ne-$en.$ne
+cp wikipedia_en_ne_si_test_sets/wikipedia.dev.ne-en.en $DESTDIR/valid.$ne-$en.$en
+
+cp wikipedia_en_ne_si_test_sets/wikipedia.dev.si-en.si $DESTDIR/valid.$si-$en.$si
+cp wikipedia_en_ne_si_test_sets/wikipedia.dev.si-en.en $DESTDIR/valid.$si-$en.$en
+
+cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.ne-en.ne $DESTDIR/devtest.$ne-$en.$ne
+cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.ne-en.en $DESTDIR/devtest.$ne-$en.$en
+
+cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.si-en.si $DESTDIR/devtest.$si-$en.$si
+cp wikipedia_en_ne_si_test_sets/wikipedia.devtest.si-en.en $DESTDIR/devtest.$si-$en.$en
+
+cp wikipedia_en_ne_si_test_sets/wikipedia.test.ne-en.ne $DESTDIR/test.$ne-$en.$ne
+cp wikipedia_en_ne_si_test_sets/wikipedia.test.ne-en.en $DESTDIR/test.$ne-$en.$en
+
+cp wikipedia_en_ne_si_test_sets/wikipedia.test.si-en.si $DESTDIR/test.$si-$en.$si
+cp wikipedia_en_ne_si_test_sets/wikipedia.test.si-en.en $DESTDIR/test.$si-$en.$en
+
+rm -rf wikipedia_en_ne_si_test_sets.tgz wikipedia_en_ne_si_test_sets
diff --git a/fairseq/examples/multilingual/data_scripts/download_iitb.sh b/fairseq/examples/multilingual/data_scripts/download_iitb.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a884e20839e2a41a57405cb6af362e37bd16ab6f
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_iitb.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+IITB=$WORKDIR_ROOT/IITB
+mkdir -p $IITB
+pushd $IITB 
+
+wget http://www.cfilt.iitb.ac.in/~moses/iitb_en_hi_parallel/iitb_corpus_download/parallel.tgz
+tar -xvzf parallel.tgz 
+
+wget http://www.cfilt.iitb.ac.in/~moses/iitb_en_hi_parallel/iitb_corpus_download/dev_test.tgz
+tar -xvzf dev_test.tgz 
+
+DESTDIR=${WORKDIR_ROOT}/ML50/raw/
+ 
+cp parallel/IITB.en-hi.en $DESTDIR/train.hi_IN-en_XX.en_XX
+cp parallel/IITB.en-hi.hi $DESTDIR/train.hi_IN-en_XX.hi_IN
+
+cp dev_test/dev.en $DESTDIR/valid.hi_IN-en_XX.en_XX
+cp dev_test/dev.hi $DESTDIR/valid.hi_IN-en_XX.hi_IN
+
+cp dev_test/test.en $DESTDIR/test.hi_IN-en_XX.en_XX
+cp dev_test/test.hi $DESTDIR/test.hi_IN-en_XX.hi_IN
+popd
\ No newline at end of file
diff --git a/fairseq/examples/multilingual/data_scripts/download_iwslt_and_extract.sh b/fairseq/examples/multilingual/data_scripts/download_iwslt_and_extract.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ca3591b3db1715f136773d62e4b9b9ede97d436c
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_iwslt_and_extract.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+#echo 'Cloning Moses github repository (for tokenization scripts)...'
+#git clone https://github.com/moses-smt/mosesdecoder.git
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+ 
+
+data_root=${WORKDIR_ROOT}/iwsltv2
+DESTDIR=${WORKDIR_ROOT}/ML50/raw
+
+
+langs="ar_AR it_IT nl_XX ko_KR vi_VN"
+echo "data_root: $data_root"
+
+download_path=${data_root}/downloads
+raw=${DESTDIR}
+tmp=${data_root}/tmp
+orig=${data_root}/orig
+ 
+mkdir -p $download_path $orig $raw $tmp
+#######################
+download_iwslt(){
+    iwslt_key=$1
+    src=$2
+    tgt=$3
+    save_prefix=$4
+    pushd ${download_path}
+    if [[ ! -f ${save_prefix}$src-$tgt.tgz ]]; then
+        wget https://wit3.fbk.eu/archive/${iwslt_key}/texts/$src/$tgt/$src-$tgt.tgz -O ${save_prefix}$src-$tgt.tgz
+        [ $? -eq 0 ] && return 0
+    fi         
+    popd
+}
+
+extract_iwslt(){
+    src=$1
+    tgt=$2
+    prefix=$3
+    pushd $orig                
+    tar zxvf ${download_path}/${prefix}$src-${tgt}.tgz
+    popd 
+}
+
+generate_train(){
+    lsrc=$1
+    ltgt=$2
+    src=${lsrc:0:2}    
+    tgt=${ltgt:0:2}
+    for ll in $lsrc $ltgt; do
+        l=${ll:0:2}
+        f="$orig/*/train.tags.$src-$tgt.$l"
+        f_raw=$raw/train.$lsrc-$ltgt.$ll
+        cat $f \
+        | grep -v '<url>' \
+        | grep -v '<talkid>' \
+        | grep -v '<keywords>' \
+        | grep -v '<speaker>' \
+        | grep -v '<reviewer' \
+        | grep -v '<translator' \
+        | grep -v '<doc' \
+        | grep -v '</doc>' \
+        | sed -e 's/<title>//g' \
+        | sed -e 's/<\/title>//g' \
+        | sed -e 's/<description>//g' \
+        | sed -e 's/<\/description>//g' \
+        | sed 's/^\s*//g' \
+        | sed 's/\s*$//g' \
+        > $f_raw
+        [ $? -eq 0 ] && echo "extracted $f to $f_raw"
+    done
+    return 0        
+}
+
+convert_valid_test(){
+    src=$1
+    tgt=$2
+    for l in $src $tgt; do
+        echo "lang: ${l}"
+        for o in `ls $orig/*/IWSLT*.TED*.$src-$tgt.$l.xml`; do
+            fname=${o##*/}
+            f=$tmp/${fname%.*}
+            echo "$o => $f"
+            grep '<seg id' $o \
+            | sed -e 's/<seg id="[0-9]*">\s*//g' \
+            | sed -e 's/\s*<\/seg>\s*//g' \
+            | sed -e "s/\’/\'/g" \
+            > $f
+            echo ""
+        done
+    done    
+}
+
+generate_subset(){
+    lsrc=$1
+    ltgt=$2
+    src=${lsrc:0:2}
+    tgt=${ltgt:0:2}
+    subset=$3
+    prefix=$4
+    for ll in $lsrc $ltgt; do
+        l=${ll:0:2}
+        f=$tmp/$prefix.${src}-${tgt}.$l
+        if [[ -f $f ]]; then        
+            cp $f $raw/$subset.${lsrc}-$ltgt.${ll}
+        fi
+    done      
+}
+#################
+
+echo "downloading iwslt training and dev data"
+# using multilingual for it, nl 
+download_iwslt "2017-01-trnmted" DeEnItNlRo DeEnItNlRo
+download_iwslt "2017-01-trnted" ar en
+download_iwslt "2017-01-trnted" en ar
+download_iwslt "2017-01-trnted" ko en
+download_iwslt "2017-01-trnted" en ko
+download_iwslt "2015-01" vi en   
+download_iwslt "2015-01" en vi   
+
+echo "donwloading iwslt test data"
+download_iwslt "2017-01-mted-test" it en "test."
+download_iwslt "2017-01-mted-test" en it "test."
+download_iwslt "2017-01-mted-test" nl en "test."
+download_iwslt "2017-01-mted-test" en nl "test."
+
+download_iwslt "2017-01-ted-test" ar en "test."
+download_iwslt "2017-01-ted-test" en ar "test."
+download_iwslt "2017-01-ted-test" ko en "test."
+download_iwslt "2017-01-ted-test" en ko "test."
+download_iwslt "2015-01-test" vi en "test."
+download_iwslt "2015-01-test" en vi "test."
+
+echo "extract training data tar balls"
+extract_iwslt  DeEnItNlRo DeEnItNlRo
+extract_iwslt  ar en
+extract_iwslt  en ar
+extract_iwslt  ko en
+extract_iwslt  en ko
+extract_iwslt  vi en   
+extract_iwslt  en vi   
+
+
+echo "extracting iwslt test data"
+for lang in $langs; do
+    l=${lang:0:2}
+    extract_iwslt $l en "test."
+    extract_iwslt en $l "test."
+done
+
+echo "convert dev and test data"
+for lang in $langs; do
+    s_lang=${lang:0:2}
+    convert_valid_test $s_lang en  
+    convert_valid_test en $s_lang
+done
+
+
+
+echo "creating training data into $raw"
+for lang in $langs; do
+    generate_train $lang en_XX
+    generate_train en_XX $lang
+done
+
+echo "creating iwslt dev data into raw"
+generate_subset en_XX vi_VN valid "IWSLT15.TED.tst2013"
+generate_subset vi_VN en_XX valid "IWSLT15.TED.tst2013"
+
+generate_subset en_XX ar_AR valid "IWSLT17.TED.tst2016"
+generate_subset ar_AR en_XX valid "IWSLT17.TED.tst2016"
+generate_subset en_XX ko_KR valid "IWSLT17.TED.tst2016"
+generate_subset ko_KR en_XX valid "IWSLT17.TED.tst2016"
+
+
+generate_subset en_XX it_IT valid "IWSLT17.TED.tst2010"
+generate_subset it_IT en_XX valid "IWSLT17.TED.tst2010"
+generate_subset en_XX nl_XX valid "IWSLT17.TED.tst2010"
+generate_subset nl_XX en_XX valid "IWSLT17.TED.tst2010"
+
+echo "creating iswslt test data into raw"
+generate_subset en_XX vi_VN test "IWSLT15.TED.tst2015"
+generate_subset vi_VN en_XX test "IWSLT15.TED.tst2015"
+
+generate_subset en_XX ar_AR test "IWSLT17.TED.tst2017"
+generate_subset ar_AR en_XX test "IWSLT17.TED.tst2017"
+generate_subset en_XX ko_KR test "IWSLT17.TED.tst2017"
+generate_subset ko_KR en_XX test "IWSLT17.TED.tst2017"
+
+generate_subset en_XX it_IT test "IWSLT17.TED.tst2017.mltlng"
+generate_subset it_IT en_XX test "IWSLT17.TED.tst2017.mltlng"
+generate_subset en_XX nl_XX test "IWSLT17.TED.tst2017.mltlng"
+generate_subset nl_XX en_XX test "IWSLT17.TED.tst2017.mltlng"
+
+# normalze iwslt directions into x-en
+pushd $raw
+for lang in $langs; do
+    for split in test valid; do
+        x_en_f1=$split.$lang-en_XX.en_XX
+        x_en_f2=$split.$lang-en_XX.${lang}
+
+        en_x_f1=$split.en_XX-$lang.en_XX
+        en_x_f2=$split.en_XX-$lang.${lang}        
+
+        if [ -f $en_x_f1 ] && [ ! -f $x_en_f1 ]; then
+            echo "cp $en_x_f1 $x_en_f1"
+            cp $en_x_f1 $x_en_f1
+        fi
+        if [ -f $x_en_f2 ] && [ ! -f $x_en_f2 ]; then
+            echo "cp $en_x_f2 $x_en_f2"
+            cp $en_x_f2 $x_en_f2
+        fi        
+    done
+done
+popd
\ No newline at end of file
diff --git a/fairseq/examples/multilingual/data_scripts/download_lotus.sh b/fairseq/examples/multilingual/data_scripts/download_lotus.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c08c701314a8e575637deff78381ab02c2ef6728
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_lotus.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+
+SRCDIR=$WORKDIR_ROOT/indic_languages_corpus
+DESTDIR=${WORKDIR_ROOT}/ML50/raw/
+mkdir -p $SRCDIR
+mkdir -p $DESTDIR
+
+cd $SRCDIR
+wget http://lotus.kuee.kyoto-u.ac.jp/WAT/indic-multilingual/indic_languages_corpus.tar.gz
+tar -xvzf indic_languages_corpus.tar.gz
+
+SRC_EXTRACT_DIR=$SRCDIR/indic_languages_corpus/bilingual
+
+cp $SRC_EXTRACT_DIR/ml-en/train.ml $DESTDIR/train.ml_IN-en_XX.ml_IN
+cp $SRC_EXTRACT_DIR/ml-en/train.en $DESTDIR/train.ml_IN-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/ml-en/dev.ml $DESTDIR/valid.ml_IN-en_XX.ml_IN
+cp $SRC_EXTRACT_DIR/ml-en/dev.en $DESTDIR/valid.ml_IN-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/ml-en/test.ml $DESTDIR/test.ml_IN-en_XX.ml_IN
+cp $SRC_EXTRACT_DIR/ml-en/test.en $DESTDIR/test.ml_IN-en_XX.en_XX
+
+cp $SRC_EXTRACT_DIR/ur-en/train.ur $DESTDIR/train.ur_PK-en_XX.ur_PK
+cp $SRC_EXTRACT_DIR/ur-en/train.en $DESTDIR/train.ur_PK-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/ur-en/dev.ur $DESTDIR/valid.ur_PK-en_XX.ur_PK
+cp $SRC_EXTRACT_DIR/ur-en/dev.en $DESTDIR/valid.ur_PK-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/ur-en/test.ur $DESTDIR/test.ur_PK-en_XX.ur_PK
+cp $SRC_EXTRACT_DIR/ur-en/test.en $DESTDIR/test.ur_PK-en_XX.en_XX
+
+cp $SRC_EXTRACT_DIR/te-en/train.te $DESTDIR/train.te_IN-en_XX.te_IN
+cp $SRC_EXTRACT_DIR/te-en/train.en $DESTDIR/train.te_IN-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/te-en/dev.te $DESTDIR/valid.te_IN-en_XX.te_IN
+cp $SRC_EXTRACT_DIR/te-en/dev.en $DESTDIR/valid.te_IN-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/te-en/test.te $DESTDIR/test.te_IN-en_XX.te_IN
+cp $SRC_EXTRACT_DIR/te-en/test.en $DESTDIR/test.te_IN-en_XX.en_XX
diff --git a/fairseq/examples/multilingual/data_scripts/download_ted_and_extract.py b/fairseq/examples/multilingual/data_scripts/download_ted_and_extract.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb756680fa7dc31a14ba45c216776a6d60c16b60
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_ted_and_extract.py
@@ -0,0 +1,338 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import itertools
+import os
+import csv
+from collections import defaultdict
+from six.moves import zip
+import io
+import wget
+import sys
+
+from subprocess import check_call, check_output
+
+# scripts and data locations
+CWD = os.getcwd()
+UTILS = f"{CWD}/utils"
+
+MOSES = f"{UTILS}/mosesdecoder"
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+
+# please donwload mosesdecoder here:
+detok_cmd = f'{MOSES}/scripts/tokenizer/detokenizer.perl'
+
+
+def call(cmd):
+    print(f"Executing: {cmd}")
+    check_call(cmd, shell=True)
+
+class MultiLingualAlignedCorpusReader(object):
+    """A class to read TED talk dataset
+    """
+
+    def __init__(self, corpus_path, delimiter='\t',
+                 target_token=True, bilingual=True, corpus_type='file',
+                 lang_dict={'source': ['fr'], 'target': ['en']},
+                 eval_lang_dict=None, zero_shot=False,
+                 detok=True,
+                 ):
+
+        self.empty_line_flag = 'NULL'
+        self.corpus_path = corpus_path
+        self.delimiter = delimiter
+        self.bilingual = bilingual
+        self.lang_dict = lang_dict
+        self.lang_set = set()
+        self.target_token = target_token
+        self.zero_shot = zero_shot
+        self.eval_lang_dict = eval_lang_dict
+        self.corpus_type = corpus_type
+        self.detok = detok
+
+        for list_ in self.lang_dict.values():
+            for lang in list_:
+                self.lang_set.add(lang)
+
+        self.data = dict()
+        self.data['train'] = self.read_aligned_corpus(split_type='train')
+        self.data['test'] = self.read_aligned_corpus(split_type='test')
+        self.data['dev'] = self.read_aligned_corpus(split_type='dev')
+
+    def read_data(self, file_loc_):
+        data_list = list()
+        with io.open(file_loc_, 'r', encoding='utf8') as fp:
+            for line in fp:
+                try:
+                    text = line.strip()
+                except IndexError:
+                    text = self.empty_line_flag
+                data_list.append(text)
+        return data_list
+
+    def filter_text(self, dict_):
+        if self.target_token:
+            field_index = 1
+        else:
+            field_index = 0
+        data_dict = defaultdict(list)
+        list1 = dict_['source']
+        list2 = dict_['target']
+        for sent1, sent2 in zip(list1, list2):
+            try:
+                src_sent = ' '.join(sent1.split()[field_index: ])
+            except IndexError:
+                src_sent = 'NULL'
+
+            if src_sent.find(self.empty_line_flag) != -1 or len(src_sent) == 0:
+                continue
+
+            elif sent2.find(self.empty_line_flag) != -1 or len(sent2) == 0:
+                continue
+
+            else:
+                data_dict['source'].append(sent1)
+                data_dict['target'].append(sent2)
+        return data_dict
+
+    def read_file(self, split_type, data_type):
+        return self.data[split_type][data_type]
+
+    def save_file(self, path_, split_type, data_type, lang):
+        tok_file = tok_file_name(path_, lang)
+        with io.open(tok_file, 'w', encoding='utf8') as fp:
+            for line in self.data[split_type][data_type]:
+                fp.write(line + '\n')
+        if self.detok:
+            de_tok(tok_file, lang)                
+
+    def add_target_token(self, list_, lang_id):
+        new_list = list()
+        token = '__' + lang_id + '__'
+        for sent in list_:
+            new_list.append(token + ' ' + sent)
+        return new_list
+
+    def read_from_single_file(self, path_, s_lang, t_lang):
+        data_dict = defaultdict(list)
+        with io.open(path_, 'r', encoding='utf8') as fp:
+            reader = csv.DictReader(fp, delimiter='\t', quoting=csv.QUOTE_NONE)
+            for row in reader:
+                data_dict['source'].append(row[s_lang])
+                data_dict['target'].append(row[t_lang])
+
+        if self.target_token:
+            text = self.add_target_token(data_dict['source'], t_lang)
+            data_dict['source'] = text
+
+        return data_dict['source'], data_dict['target']
+
+    def read_aligned_corpus(self, split_type='train'):
+        data_dict = defaultdict(list)
+        iterable = []
+        s_list = []
+        t_list = []
+
+        if self.zero_shot:
+            if split_type == "train":
+                iterable = zip(self.lang_dict['source'], self.lang_dict['target'])
+            else:
+                iterable = zip(self.eval_lang_dict['source'], self.eval_lang_dict['target'])
+
+        elif self.bilingual:
+            iterable = itertools.product(self.lang_dict['source'], self.lang_dict['target'])
+
+        for s_lang, t_lang in iterable:
+            if s_lang == t_lang:
+                continue
+            if self.corpus_type == 'file':
+                split_type_file_path = os.path.join(self.corpus_path,
+                                                    "all_talks_{}.tsv".format(split_type))
+                s_list, t_list = self.read_from_single_file(split_type_file_path,
+                                                            s_lang=s_lang,
+                                                            t_lang=t_lang)
+            data_dict['source'] += s_list
+            data_dict['target'] += t_list
+        new_data_dict = self.filter_text(data_dict)
+        return new_data_dict
+
+
+def read_langs(corpus_path):
+    split_type_file_path = os.path.join(corpus_path, 'extracted',
+                                        "all_talks_dev.tsv")    
+    with io.open(split_type_file_path, 'r', encoding='utf8') as fp:
+        reader = csv.DictReader(fp, delimiter='\t', quoting=csv.QUOTE_NONE)
+        header = next(reader)
+        return [k for k in header.keys() if k != 'talk_name']
+
+def extra_english(corpus_path, split):
+    split_type_file_path = os.path.join(corpus_path,
+                                        f"all_talks_{split}.tsv") 
+    output_split_type_file_path = os.path.join(corpus_path,
+                                        f"all_talks_{split}.en")                                            
+    with io.open(split_type_file_path, 'r', encoding='utf8') as fp, io.open(output_split_type_file_path, 'w', encoding='utf8') as fw:
+        reader = csv.DictReader(fp, delimiter='\t', quoting=csv.QUOTE_NONE)
+        for row in reader:
+            line = row['en']
+            fw.write(line + '\n')
+    de_tok(output_split_type_file_path, 'en')
+
+
+
+def tok_file_name(filename, lang):
+    seps = filename.split('.')
+    seps.insert(-1, 'tok')
+    tok_file = '.'.join(seps)
+    return tok_file
+
+def de_tok(tok_file, lang):
+    # seps = tok_file.split('.')
+    # seps.insert(-1, 'detok')
+    # de_tok_file = '.'.join(seps)
+    de_tok_file = tok_file.replace('.tok.', '.')
+    cmd = 'perl {detok_cmd} -l {lang} < {tok_file} > {de_tok_file}'.format(
+        detok_cmd=detok_cmd, tok_file=tok_file,
+        de_tok_file=de_tok_file, lang=lang[:2])
+    call(cmd)
+
+def extra_bitex(
+    ted_data_path,
+    lsrc_lang,
+    ltrg_lang,
+    target_token,
+    output_data_path,
+):
+    def get_ted_lang(lang):
+        long_langs = ['pt-br', 'zh-cn', 'zh-tw', 'fr-ca']
+        if lang[:5] in long_langs:
+            return lang[:5]
+        elif lang[:4] =='calv':
+            return lang[:5]
+        elif lang in ['pt_BR', 'zh_CN', 'zh_TW', 'fr_CA']:
+            return lang.lower().replace('_', '-')
+        return lang[:2]
+    src_lang = get_ted_lang(lsrc_lang)
+    trg_lang = get_ted_lang(ltrg_lang)
+    train_lang_dict={'source': [src_lang], 'target': [trg_lang]}
+    eval_lang_dict = {'source': [src_lang], 'target': [trg_lang]}
+
+    obj = MultiLingualAlignedCorpusReader(corpus_path=ted_data_path,
+                                          lang_dict=train_lang_dict,
+                                          target_token=target_token,
+                                          corpus_type='file',
+                                          eval_lang_dict=eval_lang_dict,
+                                          zero_shot=False,
+                                          bilingual=True)
+
+    os.makedirs(output_data_path, exist_ok=True)
+    lsrc_lang = lsrc_lang.replace('-', '_')
+    ltrg_lang = ltrg_lang.replace('-', '_')
+    obj.save_file(output_data_path + f"/train.{lsrc_lang}-{ltrg_lang}.{lsrc_lang}",
+                  split_type='train', data_type='source', lang=src_lang)
+    obj.save_file(output_data_path + f"/train.{lsrc_lang}-{ltrg_lang}.{ltrg_lang}",
+                  split_type='train', data_type='target', lang=trg_lang)
+
+    obj.save_file(output_data_path + f"/test.{lsrc_lang}-{ltrg_lang}.{lsrc_lang}",
+                  split_type='test', data_type='source', lang=src_lang)
+    obj.save_file(output_data_path + f"/test.{lsrc_lang}-{ltrg_lang}.{ltrg_lang}",
+                  split_type='test', data_type='target', lang=trg_lang)
+
+    obj.save_file(output_data_path + f"/valid.{lsrc_lang}-{ltrg_lang}.{lsrc_lang}",
+                  split_type='dev', data_type='source', lang=src_lang)
+    obj.save_file(output_data_path + f"/valid.{lsrc_lang}-{ltrg_lang}.{ltrg_lang}",
+                  split_type='dev', data_type='target', lang=trg_lang)
+
+
+def bar_custom(current, total, width=80):
+    print("Downloading: %d%% [%d / %d] Ks" % (current / total * 100, current / 1000, total / 1000), end='\r')
+
+
+def download_and_extract(download_to, extract_to):
+    url = 'http://phontron.com/data/ted_talks.tar.gz'
+    filename = f"{download_to}/ted_talks.tar.gz"
+    if os.path.exists(filename):
+        print(f'{filename} has already been downloaded so skip')
+    else:
+        filename = wget.download(url, filename, bar=bar_custom)
+    if os.path.exists(f'{extract_to}/all_talks_train.tsv'):
+        print(f'Already extracted so skip')
+    else:
+        extract_cmd = f'tar xzfv "{filename}" -C "{extract_to}"'
+        call(extract_cmd)
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--ted_data_path', type=str, default=WORKDIR_ROOT, required=False)
+    parser.add_argument(
+        '--direction-list', 
+        type=str, 
+        # default=None,
+        #for ML50
+        default=(
+            "bn_IN-en_XX,he_IL-en_XX,fa_IR-en_XX,id_ID-en_XX,sv_SE-en_XX,pt_XX-en_XX,ka_GE-en_XX,ka_GE-en_XX,th_TH-en_XX,"
+            "mr_IN-en_XX,hr_HR-en_XX,uk_UA-en_XX,az_AZ-en_XX,mk_MK-en_XX,gl_ES-en_XX,sl_SI-en_XX,mn_MN-en_XX,"
+            #non-english directions
+            # "fr_XX-de_DE," # replaced with wmt20
+            # "ja_XX-ko_KR,es_XX-pt_XX,ru_RU-sv_SE,hi_IN-bn_IN,id_ID-ar_AR,cs_CZ-pl_PL,ar_AR-tr_TR"
+        ), 
+        required=False)
+    parser.add_argument('--target-token',  action='store_true', default=False)
+    parser.add_argument('--extract-all-english',  action='store_true', default=False)    
+
+    args = parser.parse_args()
+
+    import sys
+    import json
+
+    # TED Talks data directory
+    ted_data_path = args.ted_data_path
+
+    download_to = f'{ted_data_path}/downloads'
+    extract_to = f'{ted_data_path}/extracted'
+    
+    #DESTDIR=${WORKDIR_ROOT}/ML50/raw/
+    output_path = f'{ted_data_path}/ML50/raw'
+    os.makedirs(download_to, exist_ok=True)
+    os.makedirs(extract_to, exist_ok=True)
+    os.makedirs(output_path, exist_ok=True)
+    download_and_extract(download_to, extract_to)        
+
+
+    if args.extract_all_english:
+        for split in ['train', 'dev', 'test']:
+            extra_english(ted_data_path, split)
+        exit(0)     
+    if args.direction_list is not None: 
+        directions = args.direction_list.strip().split(',')
+        directions = [tuple(d.strip().split('-', 1)) for d in directions if d]
+    else: 
+        langs = read_langs(ted_data_path)
+        # directions = [
+        #     '{}.{}'.format(src, tgt) 
+        #     for src in langs 
+        #     for tgt in langs
+        #     if src < tgt
+        # ]
+        directions = [('en', tgt) for tgt in langs if tgt != 'en']
+    print(f'num directions={len(directions)}: {directions}')
+
+    for src_lang, trg_lang in directions:
+        print('--working on {}-{}'.format(src_lang, trg_lang))
+        extra_bitex(
+            extract_to,
+            src_lang,
+            trg_lang,
+            target_token=args.target_token,
+            output_data_path=output_path
+        )
diff --git a/fairseq/examples/multilingual/data_scripts/download_wat19_my.sh b/fairseq/examples/multilingual/data_scripts/download_wat19_my.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c1e2d47287a29af4576e7a63641e8152ecb63c44
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_wat19_my.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+
+SRCDIR=$WORKDIR_ROOT/indic_languages_corpus
+DESTDIR=$WORKDIR_ROOT/ML50/raw
+mkdir -p $SRCDIR
+mkdir -p $DESTDIR
+
+WAT_MY_EN=wat2020.my-en.zip
+cd $SRCDIR
+# please refer to http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/ for latest URL if the following url expired
+#- The data used for WAT2020 are identical to those used in WAT2019.
+wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/$WAT_MY_EN
+unzip $WAT_MY_EN
+
+
+SRC_EXTRACT_DIR=$SRCDIR/wat2020.my-en/alt
+
+cp $SRC_EXTRACT_DIR/train.alt.en $DESTDIR/train.my_MM-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/train.alt.my $DESTDIR/train.my_MM-en_XX.my_MM
+cp $SRC_EXTRACT_DIR/dev.alt.en $DESTDIR/valid.my_MM-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/dev.alt.my $DESTDIR/valid.my_MM-en_XX.my_MM
+cp $SRC_EXTRACT_DIR/test.alt.en $DESTDIR/test.my_MM-en_XX.en_XX
+cp $SRC_EXTRACT_DIR/test.alt.my $DESTDIR/test.my_MM-en_XX.my_MM
diff --git a/fairseq/examples/multilingual/data_scripts/download_wmt19_and_before.py b/fairseq/examples/multilingual/data_scripts/download_wmt19_and_before.py
new file mode 100644
index 0000000000000000000000000000000000000000..3465731eb3e55047c44d1b336a97e99cb3a89a53
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_wmt19_and_before.py
@@ -0,0 +1,899 @@
+from typing import NamedTuple, List
+from urllib.parse import urlparse
+import os, sys
+import subprocess
+from subprocess import check_call, check_output
+import glob
+import wget
+import re
+import multiprocessing as mp
+from functools import partial
+import pathlib
+from collections import OrderedDict 
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+# scripts and data locations
+CWD = os.getcwd()
+UTILS = f"{CWD}/utils"
+
+MOSES = f"{UTILS}/mosesdecoder"
+SGM_TOOL = f'{MOSES}/scripts/ems/support/input-from-sgm.perl'
+
+TMX2CORPUS = f"{UTILS}/tmx2corpus"
+TMX_TOOL = f'python {TMX2CORPUS}/tmx2corpus.py'
+
+to_data_path = f'{WORKDIR_ROOT}/wmt'
+download_to = f'{to_data_path}/downloads'
+manually_downloads = f'{to_data_path}/downloads'
+extract_to = f'{to_data_path}/extracted'
+#DESTDIR=${WORKDIR_ROOT}/ML50/raw/
+raw_data = f'{WORKDIR_ROOT}/ML50/raw'
+####
+
+class DLDataset(NamedTuple):
+    name: str
+    train_urls: List[str]
+    valid_urls: List[str]
+    test_urls: List[str]        
+    train_files_patterns: List[str] = []
+    valid_files_patterns: List[str] = []
+    test_files_patterns: List[str] = []
+
+
+
+def bar_custom(current, total, width=80):
+    print("Downloading: %d%% [%d / %d] Ks" % (current / total * 100, current / 1000, total / 1000), end='\r')
+
+def get_downloaded_file(dl_folder, url):
+    if isinstance(url, tuple):
+        url, f = url
+    else:
+        url_f = urlparse(url)
+        # f = os.path.split(url_f.path)[-1]
+        f = '_'.join(url_f.path.split('/')[1:])
+    return url, f"{dl_folder}/{f}"
+
+def download_parts_and_combine(dl_folder, urls, filename):
+    parts = []
+    for url_record in urls:
+        url, part_file = get_downloaded_file(dl_folder, url_record)     
+        if os.path.exists(part_file):
+            print(f'{part_file} has already been downloaded so skip')
+        else: 
+            part_file = wget.download(url, part_file, bar=bar_custom)  
+        parts.append(part_file)
+
+    def get_combine_cmd(parts):           
+        #default as tar.gz.??
+        return f'cat {" ".join(parts)} > {filename}'
+
+    combine_cmd = get_combine_cmd(parts)
+    call(combine_cmd, debug=True)
+    return filename
+
+def download_a_url(dl_folder, url):
+    url, filename = get_downloaded_file(dl_folder, url)        
+    if os.path.exists(filename):
+        print(f'{filename} has already been downloaded so skip')
+        return filename
+
+    print(f'downloading {url} to {filename}')
+    if isinstance(url, list) or isinstance(url, tuple):
+        download_parts_and_combine(dl_folder, url, filename)
+    else:
+        wget.download(url, filename, bar=bar_custom)
+    print(f'dowloaded: {filename}')
+    return filename
+
+def download_files(dl_folder, urls, completed_urls={}):
+    for url_record in urls:
+        url, _ = get_downloaded_file(dl_folder, url_record) 
+        filename = download_a_url(dl_folder, url_record) 
+        completed_urls[str(url)] = filename
+    return completed_urls
+
+def check_need_manual_downalod(dl_folder, to_manually_download_urls):
+    to_be_manually_dowloaded = []
+    manually_completed_urls = {}
+    for url_record, instruction in to_manually_download_urls:
+        url, filename = get_downloaded_file(dl_folder, url_record)
+        if not os.path.exists(filename):
+            print(f'{url} need to be download manually, please download it manually following {instruction}; and copy it to {filename}')
+            to_be_manually_dowloaded.append((url, filename))
+        else:
+            manually_completed_urls[url] = filename
+    # if len(to_be_manually_dowloaded) > 0:
+    #     raise ValueError('Missing files that need to be downloaded manually; stop the process now.')
+    return to_be_manually_dowloaded
+        
+def download_dataset(to_folder, dl_dataset, completed_urls={}):
+    download_files(to_folder, dl_dataset.train_urls, completed_urls)
+    download_files(to_folder, dl_dataset.valid_urls, completed_urls)
+    download_files(to_folder, dl_dataset.test_urls, completed_urls)
+    print('completed downloading')
+    return completed_urls
+
+def call(cmd, debug=False):
+    if debug:
+        print(cmd)
+    check_call(cmd, shell=True)
+
+    
+def get_extract_name(file_path):
+    path = os.path.split(file_path)
+    return path[-1] + '_extract' #.split('.')[0]
+
+def extract_file(downloaded_file, extract_folder, get_extract_name=get_extract_name, debug=False):
+    extract_name = get_extract_name(downloaded_file)
+    extract_to = f'{extract_folder}/{extract_name}'
+    os.makedirs(extract_to, exist_ok=True)
+    if os.path.exists(f'{extract_to}/DONE'):
+        print(f'{downloaded_file} has already been extracted to {extract_to} so skip')
+        return extract_to
+    def get_extract_cmd(filename):
+        if filename.endswith('.tgz') or filename.endswith('tar.gz'):
+            return f'tar xzfv {filename} -C {extract_to}'
+        elif filename.endswith('.gz.tar'): 
+            return f'tar xfv {filename} -C {extract_to}; (cd {extract_to}; gzip -d *.gz; [ $? -eq 0 ]  || gzip -d */*.gz)'  
+        elif filename.endswith('.tar'):
+            return f'tar xfv {filename} -C {extract_to}'        
+        elif filename.endswith('.gz'):
+            return f'cp {filename} {extract_to}; (cd {extract_to}; gzip -d *.gz)'
+        elif filename.endswith('.zip'):
+            return f'unzip {filename} -d {extract_to}'        
+    extract_cmd = get_extract_cmd(downloaded_file) 
+    print(f'extracting {downloaded_file}')
+    if isinstance(extract_cmd, list):
+        for c in  extract_cmd:
+            call(c, debug=debug)
+    else:
+        call(extract_cmd, debug=debug)
+    call(f'echo DONE > {extract_to}/DONE')
+    return extract_to
+
+
+def extract_all_files(
+    completed_urls, extract_folder,
+    get_extract_name=get_extract_name,
+    completed_extraction={},
+    debug=False):
+    extracted_folders = OrderedDict()
+    for url, downloaded_file in set(completed_urls.items()):
+        if downloaded_file in completed_extraction:
+            print(f'{downloaded_file} is already extracted; so skip')
+            continue
+        folder = extract_file(downloaded_file, extract_folder, get_extract_name, debug)
+        extracted_folders[url] = folder
+    return extracted_folders
+
+
+def my_glob(folder):
+    for p in [f'{folder}/*', f'{folder}/*/*', f'{folder}/*/*/*']:
+        for f in glob.glob(p):
+            yield f
+
+
+def sgm2raw(sgm, debug):
+    to_file = sgm[0:len(sgm) - len('.sgm')]
+    if os.path.exists(to_file):
+        debug and print(f'{sgm} already converted to {to_file}; so skip')
+        return to_file
+    cmd = f'{SGM_TOOL} < {sgm} > {to_file}'
+    call(cmd, debug)
+    return to_file
+
+def tmx2raw(tmx, debug):
+    to_file = tmx[0:len(tmx) - len('.tmx')]
+    to_folder = os.path.join(*os.path.split(tmx)[:-1])
+    if os.path.exists(f'{to_folder}/bitext.en'):
+        debug and print(f'{tmx} already extracted to {to_file}; so skip')
+        return to_file
+    cmd = f'(cd {to_folder}; {TMX_TOOL} {tmx})'
+    call(cmd, debug)
+    return to_file
+
+CZENG16_REGEX = re.compile(r'.*?data.plaintext-format/0[0-9]train$')
+WMT19_WIKITITLES_REGEX = re.compile(r'.*?wikititles-v1.(\w\w)-en.tsv.gz')
+TSV_REGEX = re.compile(r'.*?(\w\w)-(\w\w).tsv$')
+
+
+
+def cut_wikitles(wiki_file, debug):
+    # different languages have different file names: 
+    if wiki_file.endswith('wiki/fi-en/titles.fi-en'):
+        to_file1 = f'{wiki_file}.fi'
+        to_file2 = f'{wiki_file}.en'
+        BACKSLASH = '\\'
+        cmd1 = f"cat {wiki_file} | sed 's/|||/{BACKSLASH}t/g' |cut -f1 |awk '{{$1=$1}};1' > {to_file1}"
+        cmd2 = f"cat {wiki_file} | sed 's/|||/{BACKSLASH}t/g' |cut -f2 |awk '{{$1=$1}};1' > {to_file2}"  
+#     elif WMT19_WIKITITLES_REGEX.match(wiki_file):
+#         src = WMT19_WIKITITLES_REGEX.match(wiki_file).groups()[0]
+#         to_file1 = f'{wiki_file}.{src}'
+#         to_file2 = f'{wiki_file}.en'
+#         cmd1 = f"cat {wiki_file} | cut -f1 |awk '{{$1=$1}};1' > {to_file1}"
+#         cmd2 = f"cat {wiki_file} | cut -f2 |awk '{{$1=$1}};1' > {to_file2}"
+    else:
+        return None
+    if os.path.exists(to_file1) and os.path.exists(to_file2):
+        debug and print(f'{wiki_file} already processed to {to_file1} and {to_file2}; so skip')
+        return wiki_file    
+
+    call(cmd1, debug=debug)
+    call(cmd2, debug=debug)
+    return wiki_file
+
+def cut_tsv(file, debug):
+    m = TSV_REGEX.match(file)
+    if m is None:
+        raise ValueError(f'{file} is not matching tsv pattern')
+    src = m.groups()[0]
+    tgt = m.groups()[1]
+
+    to_file1 = f'{file}.{src}'
+    to_file2 = f'{file}.{tgt}' 
+    cmd1 = f"cat {file} | cut -f1 |awk '{{$1=$1}};1' > {to_file1}"
+    cmd2 = f"cat {file} | cut -f2 |awk '{{$1=$1}};1' > {to_file2}"         
+    if os.path.exists(to_file1) and os.path.exists(to_file2):
+        debug and print(f'{file} already processed to {to_file1} and {to_file2}; so skip')
+        return file    
+
+    call(cmd1, debug=debug)
+    call(cmd2, debug=debug)
+    return file    
+
+    
+def convert_file_if_needed(file, debug):
+    if file.endswith('.sgm'):
+        return sgm2raw(file, debug)
+    elif file.endswith('.tmx'):
+        return tmx2raw(file, debug)
+    elif file.endswith('wiki/fi-en/titles.fi-en'):
+        return cut_wikitles(file, debug)
+#     elif WMT19_WIKITITLES_REGEX.match(file):
+#         return cut_wikitles(file, debug)
+    elif file.endswith('.tsv'):
+        return cut_tsv(file, debug)
+    elif CZENG16_REGEX.match(file):
+        return convert2czeng17(file, debug)
+    else:
+        return file
+
+
+def convert_files_if_needed(extracted_foldrs, my_glob=my_glob, debug=False):
+    return {
+        url: list(sorted(set(convert_file_if_needed(f, debug)) for f in sorted(set(my_glob(folder)))))
+        for url, folder in extracted_foldrs.items()
+    }
+        
+def match_patt(file_path, file_pattern, src, tgt, lang):    
+    return file_pattern.format(src=src, tgt=tgt, lang=lang) in file_path
+
+def match_patts(file_path, file_patterns, src, tgt, lang):
+    for file_pattern in file_patterns:
+        params = { k: v for k, v in [('src', src), ('tgt', tgt), ('lang', lang)] if k in file_pattern}
+        matching = file_pattern.format(**params)   
+
+        if isinstance(file_pattern, tuple):
+            pattern, directions = file_pattern
+            if f'{src}-{tgt}' in directions and matching in file_path:
+                return True
+        else:
+            if matching in file_path:
+                return True
+    return False
+
+def extracted_glob(extracted_folder, file_patterns, src, tgt, lang):
+    def get_matching_pattern(file_pattern):
+        params = {
+            k: v 
+            for k, v in [('src', src), ('tgt', tgt), ('lang', lang)] 
+            if '{' + k + '}' in file_pattern
+        }
+        file_pattern = re.sub(r'{src:(.*?)}', r'\1' if lang == src else '', file_pattern)
+        file_pattern = re.sub(r'{tgt:(.*?)}', r'\1' if lang == tgt else '', file_pattern)
+        file_pattern = file_pattern.format(**params)
+        return file_pattern
+    for file_pattern in file_patterns:
+        if isinstance(file_pattern, tuple):
+            file_pattern, lang_pairs = file_pattern
+            if f'{src}-{tgt}' not in lang_pairs:
+                continue
+#         print('working on pattern: ', file_pattern, lang_pairs )
+        matching_pattern = get_matching_pattern(file_pattern)
+        if matching_pattern is None:
+            continue
+        glob_patterns = f'{extracted_folder}/{matching_pattern}'
+#         print('glob_patterns: ', glob_patterns)
+        for f in glob.glob(glob_patterns):
+            yield f       
+
+# for debug usage
+def all_extracted_files(split, src, tgt, extracted_folders, split_urls):
+    def get_url(url):
+        if isinstance(url, tuple):
+            url, downloaded_file = url        
+        return url
+    return [
+        f
+        for url in split_urls
+        for f in my_glob(extracted_folders[str(get_url(url))])        
+    ]
+
+def concat_files(split, src, tgt, extracted_folders, split_urls, path_patterns, to_folder, debug=False):
+#     if debug:
+#         print('extracted files to be filtered by patterns: ', 
+#               '\n\t'.join(sorted(all_extracted_files(split, src, tgt, extracted_folders, split_urls))))
+    for lang in [src, tgt]:
+        to_file = f'{to_folder}/{split}.{src}-{tgt}.{lang}'
+        s_src, s_tgt, s_lang = src.split('_')[0], tgt.split('_')[0], lang.split('_')[0]
+        files = []
+        for url in split_urls:
+            if isinstance(url, tuple):
+                url, downloaded_file = url
+            if str(url) not in extracted_folders:
+                print(f'warning: {url} not in extracted files')
+            for extracted_file in set(
+                extracted_glob(
+                    extracted_folders[str(url)], path_patterns, 
+                    s_src, s_tgt, s_lang)):
+                files.append(extracted_file)
+        if len(files) == 0:
+            print('warning: ', f'No files found for split {to_file}')
+            continue
+        files = sorted(set(files))
+        print(f'concating {len(files)} files into {to_file}')
+        cmd = ['cat'] + [f'"{f}"' for f in files] + [f'>{to_file}']
+        cmd = " ".join(cmd)
+        call(cmd, debug=debug)
+
+UTILS = os.path.join(pathlib.Path(__file__).parent, 'utils')
+LID_MODEL = f'{download_to}/lid.176.bin'
+LID_MULTI = f'{UTILS}/fasttext_multi_filter.py'
+
+def lid_filter(split, src, tgt, from_folder, to_folder, debug=False):
+    if not os.path.exists(LID_MODEL):
+        call(f'wget -nc https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin -O {LID_MODEL}')
+    from_prefix = f'{from_folder}/{split}.{src}-{tgt}'
+    to_prefix = f'{to_folder}/{split}.{src}-{tgt}'
+    if os.path.exists(f'{from_prefix}.{src}') and os.path.exists(f'{from_prefix}.{tgt}'):
+        s_src, s_tgt = src.split('_')[0], tgt.split('_')[0]  
+        cmd = (
+            f'python {LID_MULTI} --model {LID_MODEL} --inputs {from_prefix}.{src} {from_prefix}.{tgt} '
+            f'--langs {s_src} {s_tgt} --outputs {to_prefix}.{src} {to_prefix}.{tgt}'
+        )
+        print(f'filtering {from_prefix}')
+        call(cmd, debug=debug)
+
+def concat_into_splits(dl_dataset, src, tgt, extracted_folders, to_folder, debug):
+    to_folder_tmp = f"{to_folder}_tmp"
+    os.makedirs(to_folder_tmp, exist_ok=True)
+    concat_files('train', src, tgt,
+                 extracted_folders,
+                 split_urls=dl_dataset.train_urls,
+                 path_patterns=dl_dataset.train_files_patterns,
+                 to_folder=to_folder_tmp, debug=debug)
+    lid_filter('train', src, tgt, to_folder_tmp, to_folder, debug)
+
+    concat_files('valid', src, tgt,
+                 extracted_folders, 
+                 split_urls=dl_dataset.valid_urls, 
+                 path_patterns=dl_dataset.valid_files_patterns, 
+                 to_folder=to_folder, debug=debug)
+    concat_files('test', src, tgt, 
+                 extracted_folders, 
+                 split_urls=dl_dataset.test_urls, 
+                 path_patterns=dl_dataset.test_files_patterns, 
+                 to_folder=to_folder, debug=debug)
+            
+
+def download_multi(dl_folder, extract_folder, urls, num_processes=8, debug=False):
+    pool = mp.Pool(processes=num_processes)
+    download_f = partial(download_a_url, dl_folder)
+    downloaded_files = pool.imap_unordered(download_f, urls)
+    pool.close()
+    pool.join()
+
+BLEU_REGEX = re.compile("^BLEU\\S* = (\\S+) ")
+def run_eval_bleu(cmd):
+    output = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode("utf-8").strip()
+    print(output)
+    bleu = -1.0
+    for line in output.strip().split('\n'):
+        m = BLEU_REGEX.search(line)
+        if m is not None:
+            bleu = m.groups()[0]
+            bleu = float(bleu)
+            break
+    return bleu
+
+def check_wmt_test_bleu(raw_folder, wmt_lang_pairs):
+    not_matchings = []
+    for wmt, src_tgts in wmt_lang_pairs:
+        for src_tgt in src_tgts:
+            print(f'checking test bleus for: {src_tgt} at {wmt}')
+            src, tgt = src_tgt.split('-')
+            ssrc, stgt = src[:2], tgt[:2]
+            if os.path.exists(f'{raw_folder}/test.{tgt}-{src}.{src}'):
+                # reversed direction may have different test set
+                test_src = f'{raw_folder}/test.{tgt}-{src}.{src}'
+            else:
+                test_src = f'{raw_folder}/test.{src}-{tgt}.{src}'
+            cmd1 = f'cat {test_src} | sacrebleu -t "{wmt}" -l {stgt}-{ssrc}; [ $? -eq 0 ] || echo ""'
+            test_tgt = f'{raw_folder}/test.{src}-{tgt}.{tgt}'       
+            cmd2 = f'cat {test_tgt} | sacrebleu -t "{wmt}" -l {ssrc}-{stgt}; [ $? -eq 0 ] || echo ""'
+            bleu1 = run_eval_bleu(cmd1) 
+            if bleu1 != 100.0:
+                not_matchings.append(f'{wmt}:{src_tgt} source side not matching: {test_src}')
+            bleu2 = run_eval_bleu(cmd2) 
+            if bleu2 != 100.0:
+                not_matchings.append(f'{wmt}:{src_tgt} target side not matching: {test_tgt}')
+    return not_matchings         
+ 
+def download_and_extract(
+    to_folder, lang_pairs, dl_dataset, 
+    to_manually_download_urls, 
+    completed_urls={}, completed_extraction={},
+    debug=False):
+
+    dl_folder = f'{to_folder}/downloads'
+    extract_folder = f'{to_folder}/extracted'
+    raw_folder =  f'{to_folder}/raw'
+    lid_filtered = f'{to_folder}/lid_filtered'
+
+    os.makedirs(extract_folder, exist_ok=True)
+    os.makedirs(raw_folder, exist_ok=True)
+    os.makedirs(lid_filtered, exist_ok=True)
+
+    
+    to_be_manually_dowloaded = check_need_manual_downalod(dl_folder, to_manually_download_urls)
+
+    completed_urls = download_dataset(
+        dl_folder, dl_dataset, completed_urls)
+    if debug:
+        print('completed urls: ', completed_urls)
+    
+
+    extracted_folders = extract_all_files(
+        completed_urls,
+        extract_folder=extract_folder, 
+        completed_extraction=completed_extraction,
+        debug=debug)
+    if debug:
+        print('download files have been extracted to folders: ', extracted_folders)
+
+    converted_files = convert_files_if_needed(extracted_folders, debug=False)
+    for src_tgt in lang_pairs:
+        print(f'working on {dl_dataset.name}: {src_tgt}')
+        src, tgt = src_tgt.split('-')
+        concat_into_splits(dl_dataset, 
+                            src=src, tgt=tgt,
+                            extracted_folders=extracted_folders, 
+                            to_folder=raw_folder, debug=debug)                            
+    print('completed data into: ', raw_folder)
+
+def download_czang16(download_to, username=None):
+    wgets = [
+        f'wget --user={username} --password=czeng -P {download_to} http://ufallab.ms.mff.cuni.cz/~bojar/czeng16-data/data-plaintext-format.{i}.tar'
+        for i in range(10)]
+    cmds = []
+    for i, cmd in enumerate(wgets):
+        filename = f'{download_to}/data-plaintext-format.{i}.tar'
+        if os.path.exists(filename):
+            print(f'{filename} has already been downloaded; so skip')
+            continue
+        cmds.append(cmd)
+    if cmds and username is None:
+        raise ValueError('No czeng username is given; please register at http://ufal.mff.cuni.cz/czeng/czeng16 to obtain username to download')        
+    for cmd in cmds:
+        call(cmd)
+    print('done with downloading czeng1.6')
+
+def download_czeng17_script(download_to, extract_folder, debug=False):
+    url = 'http://ufal.mff.cuni.cz/czeng/download.php?f=convert_czeng16_to_17.pl.zip'
+    filename = f'{download_to}/convert_czeng16_to_17.pl.zip'
+    extract_to = f'{extract_folder}/{get_extract_name(filename)}'
+    script_path = f'{extract_to}/convert_czeng16_to_17.pl'
+    
+    if not os.path.exists(script_path):
+        wget.download(url, filename, bar=bar_custom)  
+        extract_to = extract_file(f'{download_to}/convert_czeng16_to_17.pl.zip', extract_folder, get_extract_name=get_extract_name, debug=debug)    
+    return script_path
+
+czeng17_script_path = ""
+def convert2czeng17(file, debug):
+    en_file = f'{file}.en'
+    cs_file = f'{file}.cs'
+    
+    if not os.path.exists(en_file) or not os.path.exists(cs_file):
+        cs_cmd = f'cat {file} | perl {czeng17_script_path} | cut -f3 > {cs_file}'
+        en_cmd = f'cat {file} | perl {czeng17_script_path} | cut -f4 > {en_file}'
+        call(cs_cmd, debug)
+        call(en_cmd, debug)
+    else:
+        print(f'already extracted: {en_file} and {cs_file}')
+    return file
+
+def extract_czeng17(extract_folder, debug=False):
+    url = 'http://ufal.mff.cuni.cz/czeng/download.php?f=convert_czeng16_to_17.pl.zip'
+    filename = f'{download_to}/convert_czeng16_to_17.pl.zip'
+    extract_to = f'{extract_folder}/{get_extract_name(filename)}'
+    script_path = f'{extract_to}/convert_czeng16_to_17.pl'
+    
+    if not os.path.exists(script_path):
+        wget.download(url, filename, bar=bar_custom)  
+        extract_to = extract_file(f'{download_to}/convert_czeng16_to_17.pl.zip', extract_folder, get_extract_name=get_extract_name, debug=debug)    
+    return script_path
+
+#########
+# definitions of wmt data sources
+# for es-en
+# Punctuation in the official test sets will be encoded with ASCII characters (not complex Unicode characters) as much as possible. You may want to normalize your system's output before submission. You are able able to use a rawer version of the test sets that does not have this normalization.
+# script to normalize punctuation: http://www.statmt.org/wmt11/normalize-punctuation.perl
+wmt13_es_en = DLDataset(
+    name='wmt13_es-en',
+    train_urls=[
+        'http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-un.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-nc-v8.tgz',
+    ],
+    valid_urls=[
+        ('http://www.statmt.org/wmt13/dev.tgz', 'wmt13_dev.tgz')
+    ],
+    test_urls=[
+        ('http://www.statmt.org/wmt13/test.tgz', 'wmt13_test.tgz')
+    ],
+    train_files_patterns=[
+        ('*/europarl-v7.{src}-{tgt}.{lang}', ['es-en']), 
+        ('*commoncrawl.{src}-{tgt}.{lang}', ['es-en']),
+        ('*/news-commentary-v8.{src}-{tgt}.{lang}', ['es-en']),
+        ('un/*undoc.2000.{src}-{tgt}.{lang}', ['es-en']), 
+    ] ,
+    valid_files_patterns=[
+    ('dev/newstest2012.{lang}', ['es-en'])
+    ],
+    test_files_patterns=[
+    ('test/newstest*.{lang}', ['es-en'])
+    ],
+)
+
+wmt14_de_fr_en = DLDataset(
+    name='wmt14_de_fr_en',
+    train_urls=[
+        'http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-un.tgz',
+        'http://www.statmt.org/wmt14/training-parallel-nc-v9.tgz',
+        ('http://www.statmt.org/wmt10/training-giga-fren.tar', 'training-giga-fren.gz.tar'), #it is actuall a gz.tar 
+    ],
+    valid_urls=[
+        ('http://www.statmt.org/wmt14/dev.tgz', 'wmt14_dev.tgz'),
+    ],
+    test_urls=[
+        ('http://www.statmt.org/wmt14/test-full.tgz', 'wmt14_test_full.tgz'), # cleaned test sets
+    ],
+    train_files_patterns=[
+        ('*/europarl-v7.{src}-{tgt}.{lang}', ['fr-en', 'de-en']), 
+        ('*commoncrawl.{src}-{tgt}.{lang}', ['fr-en', 'de-en']),
+        ('*/*news-commentary-v9.{src}-{tgt}.{lang}', ['fr-en', 'de-en']),
+        ('un/undoc.2000.{src}-{tgt}.{lang}', ['fr-en']),    
+        ('*giga-{src}{tgt}*{lang}', ['fr-en'])
+    ],
+    valid_files_patterns=[
+    ('dev/newstest2013.{lang}', ['fr-en', 'de-en'])
+    ],
+    test_files_patterns=[ 
+    ('test-full/newstest*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['en-de', 'de-en', 'fr-en', 'en-fr']),                      
+    ],
+)
+
+# pip install git+https://github.com/amake/tmx2corpus.git
+wmt16_ro_en = DLDataset(
+    name='wmt16_ro-en',
+    train_urls=[
+        ('http://data.statmt.org/wmt16/translation-task/training-parallel-ep-v8.tgz', 'wmt16_training-parallel-ep-v8.tgz'),
+        ('http://opus.nlpl.eu/download.php?f=SETIMES/v2/tmx/en-ro.tmx.gz', 'en-ro.tmx.gz'),
+    ],
+    valid_urls=[
+        ('http://data.statmt.org/wmt16/translation-task/dev-romanian-updated.tgz', 'wmt16_dev.tgz')
+    ],
+    test_urls=[
+        ('http://data.statmt.org/wmt16/translation-task/test.tgz', 'wmt16_test.tgz')
+    ],
+    train_files_patterns=[
+        ('*/*europarl-v8.{src}-{tgt}.{lang}', ['ro-en']), 
+        ('bitext.{lang}', ['ro-en']) #setimes from tmux
+        ] ,
+    valid_files_patterns=[
+    ('dev/newsdev2016*{src}{tgt}*.{lang}', ['ro-en', 'ro-en'])
+    ],
+    test_files_patterns=[
+    ('test/newstest*{src}{tgt}*.{lang}', ['ro-en', 'en-ro'])
+    ],
+)
+
+cwmt_wmt_instruction = 'cwmt download instruction at: http://nlp.nju.edu.cn/cwmt-wmt'
+wmt17_fi_lv_tr_zh_en_manual_downloads = [
+    # fake urls to have unique keys for the data
+    ( ('http://nlp.nju.edu.cn/cwmt-wmt/CASIA2015.zip', 'CASIA2015.zip'), cwmt_wmt_instruction),
+    ( ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2011.zip', 'CASICT2011.zip'), cwmt_wmt_instruction),
+    ( ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2015.zip', 'CASICT2015.zip'), cwmt_wmt_instruction),
+    ( ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2015.zip', 'Datum2015.zip'), cwmt_wmt_instruction),
+    ( ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2017.zip', 'Datum2017.zip'), cwmt_wmt_instruction),
+    ( ('http://nlp.nju.edu.cn/cwmt-wmt/NEU2017.zip', 'NEU2017.zip'), cwmt_wmt_instruction),    
+]
+wmt17_fi_lv_tr_zh_en = DLDataset(
+    name='wmt17_fi_lv_tr_zh_en',
+    train_urls=[
+        ('http://data.statmt.org/wmt17/translation-task/training-parallel-ep-v8.tgz', 'wmt17_training-parallel-ep-v8.tgz'),
+        'http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz',
+        'http://www.statmt.org/wmt15/wiki-titles.tgz',
+        ('http://opus.nlpl.eu/download.php?f=SETIMES/v2/tmx/en-tr.tmx.gz', 'en-tr.tmx.gz'),
+        ('http://data.statmt.org/wmt17/translation-task/rapid2016.tgz', 'wmt17_rapid2016.tgz'),
+        'http://data.statmt.org/wmt17/translation-task/leta.v1.tgz',
+        'http://data.statmt.org/wmt17/translation-task/dcep.lv-en.v1.tgz',
+        'http://data.statmt.org/wmt17/translation-task/books.lv-en.v1.tgz',
+        (('https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.00',
+        'https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.01',), 'UNv1.0.en-zh.tar.gz'),
+        #manually download files:
+        ('http://nlp.nju.edu.cn/cwmt-wmt/CASIA2015.zip', 'CASIA2015.zip'),  
+        ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2011.zip', 'CASICT2011.zip'),  
+        ('http://nlp.nju.edu.cn/cwmt-wmt/CASICT2015.zip', 'CASICT2015.zip'),  
+        ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2015.zip', 'Datum2015.zip'), 
+        ('http://nlp.nju.edu.cn/cwmt-wmt/Datum2017.zip', 'Datum2017.zip'),  
+        ('http://nlp.nju.edu.cn/cwmt-wmt/NEU2017.zip', 'NEU2017.zip'),          
+    ],
+    valid_urls=[
+        ('http://data.statmt.org/wmt17/translation-task/dev.tgz', 'wmt17_dev.tgz'),
+    ],
+    test_urls=[
+        #NEW: Improved translations for zh test sets
+        ('http://data.statmt.org/wmt17/translation-task/test-update-1.tgz', 'wmt17_test_zh_en.tgz'),    
+        ('http://data.statmt.org/wmt17/translation-task/test.tgz', 'wmt17_test_others.tgz')
+    ],
+    train_files_patterns=[
+        ('casict*/cas*{src:ch}{tgt:en}.txt', ['zh-en', 'zh-en'] ),
+        ('casia*/cas*{src:ch}{tgt:en}.txt', ['zh-en', 'zh-en'] ),
+        ('dataum*/Book*{src:cn}{tgt:en}.txt', ['zh-en', 'zh-en']),
+        ('neu*/NEU*{src:cn}{tgt:en}.txt', ['zh-en', 'zh-en'] ),
+        ('*/*UNv1.0.en-zh.{src:zh}{tgt:en}', ['zh-en']),
+        ('training/*news-commentary-v12.{src}-{tgt}.{lang}', ['zh-en', ]),
+        
+        ('*/*europarl-v8.{src}-{tgt}.{lang}', ['fi-en', 'lv-en']),
+        ('wiki/fi-en/titles.{src}-{tgt}.{lang}', ['fi-en', ]),  
+        ('rapid2016.{tgt}-{src}.{lang}', ['fi-en', 'lv-en']),
+        ('*/leta.{lang}', ['lv-en']),
+        ('*/dcep.{lang}', ['lv-en']),
+        ('*/farewell.{lang}', ['lv-en']),       
+        ('bitext.{lang}', ['tr-en']),
+    ] ,
+    valid_files_patterns=[
+    ('dev/newsdev2017*{src}{tgt}-{src:src}{tgt:ref}.{lang}', 
+    [
+        'fi-en', 'lv-en', 'tr-en', 'zh-en',
+        'en-fi', 'en-lv', 'en-tr', 'en-zh'
+    ]),                      
+    ('dev/newstest2016*{src}{tgt}-{src:src}{tgt:ref}.{lang}', 
+    [
+        'fi-en',  'tr-en',  
+        'en-fi',  'en-tr',  
+    ]),  
+    ],
+    test_files_patterns=[
+    ('test/newstest2017-{src}{tgt}-{src:src}{tgt:ref}.{lang}', 
+    [
+        'fi-en', 'lv-en', 'tr-en', 
+        'en-fi', 'en-lv', 'en-tr',  
+    ]),
+    ('newstest2017-{src}{tgt}-{src:src}{tgt:ref}.{lang}', 
+    [
+        'zh-en',
+        'en-zh'
+    ]),
+    ],
+)
+
+czeng_instruction = 'download instruction at: http://ufal.mff.cuni.cz/czeng/czeng16'
+#alternative: use the prepared data but detokenize it?
+wmt18_cs_et_en_manual_downloads = [
+#for cs, need to register and download; Register and download CzEng 1.6.  
+#Better results can be obtained by using a subset of sentences, released under a new version name CzEng 1.7.
+    # ((f'http://ufallab.ms.mff.cuni.cz/~bojar/czeng16-data/data-plaintext-format.{i}.tar', 
+    #     f'data-plaintext-format.{i}.tar'), czeng_instruction)
+    # for i in range(10)
+]
+
+wmt18_cs_et_en = DLDataset(
+    name='wmt18_cs_et_en',
+    train_urls=[
+        'http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz',
+        'http://data.statmt.org/wmt18/translation-task/training-parallel-ep-v8.tgz',
+        'https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-cs.zipporah0-dedup-clean.tgz',
+        'https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-et.zipporah0-dedup-clean.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz',
+        'http://data.statmt.org/wmt18/translation-task/training-parallel-nc-v13.tgz',
+        ('http://data.statmt.org/wmt18/translation-task/rapid2016.tgz', 'wmt18_rapid2016.tgz'),
+        # (tuple(
+        #     (f'http://ufallab.ms.mff.cuni.cz/~bojar/czeng16-data/data-plaintext-format.{i}.tar', 
+        #     f'data-plaintext-format.{i}.tar')
+        #     for i in range(10)
+        # ), 
+        # 'czeng16_data_plaintext.gz.tar'), 
+    ],
+    valid_urls=[
+        ('http://data.statmt.org/wmt18/translation-task/dev.tgz', 'wmt18_dev.tgz'),
+    ],
+    test_urls=[
+        ('http://data.statmt.org/wmt18/translation-task/test.tgz', 'wmt18_test.tgz'),
+    ],
+    train_files_patterns=[
+        # ('*/*europarl-v7.{src}-{tgt}.{lang}', ['cs-en']),
+        ('*/*europarl-v8.{src}-{tgt}.{lang}', ['et-en']),
+        # ('*paracrawl-release1.{tgt}-{src}.zipporah0-dedup-clean.{lang}', ['cs-en', 'et-en']),
+        ('*paracrawl-release1.{tgt}-{src}.zipporah0-dedup-clean.{lang}', ['et-en']),
+        # ('*commoncrawl.{src}-{tgt}.{lang}', ['cs-en']),
+        # ('*/news-commentary-v13.{src}-{tgt}.{lang}', ['cs-en']),
+        # ('data.plaintext-format/*train.{lang}', ['cs-en']),
+        ('rapid2016.{tgt}-{src}.{lang}', ['et-en']),
+    ] ,
+    valid_files_patterns=[
+    ('dev/newsdev2018*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['et-en']),
+    # ('dev/newstest2017*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['cs-en'])        
+    ],
+    test_files_patterns=[
+    ('test/newstest2018-{src}{tgt}-{src:src}{tgt:ref}.{lang}', 
+    # ['cs-en', 'et-en']),
+    ['et-en']),
+    ]
+)
+
+ru_en_yandex_instruction = 'Yandex Corpus download instruction at: https://translate.yandex.ru/corpus?lang=en'
+wmt19_ru_gu_kk_lt_manual_downloads = [
+    (('https://translate.yandex.ru/corpus?lang=en', 'wmt19_1mcorpus.zip'), ru_en_yandex_instruction)
+]
+wmt19_ru_gu_kk_lt = DLDataset(
+    name='wmt19_ru_gu_kk_lt',
+    train_urls=[
+        'http://www.statmt.org/europarl/v9/training/europarl-v9.lt-en.tsv.gz',
+        'https://s3.amazonaws.com/web-language-models/paracrawl/release3/en-lt.bicleaner07.tmx.gz',
+        'https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-ru.zipporah0-dedup-clean.tgz',
+        'http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz',
+        'http://data.statmt.org/news-commentary/v14/training/news-commentary-v14-wmt19.en-kk.tsv.gz',
+        'http://data.statmt.org/news-commentary/v14/training/news-commentary-v14.en-ru.tsv.gz',
+        'http://data.statmt.org/wikititles/v1/wikititles-v1.kk-en.tsv.gz',
+        'http://data.statmt.org/wikititles/v1/wikititles-v1.ru-en.tsv.gz',
+        'http://data.statmt.org/wikititles/v1/wikititles-v1.kk-en.tsv.gz',
+        'http://data.statmt.org/wikititles/v1/wikititles-v1.lt-en.tsv.gz',
+        'http://data.statmt.org/wikititles/v1/wikititles-v1.gu-en.tsv.gz',
+        (('https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.00',
+        'https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.01',
+        'https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.02',), 
+        'wmt19_UNv1.0.en-ru.tar.gz'),
+        'https://tilde-model.s3-eu-west-1.amazonaws.com/rapid2016.en-lt.tmx.zip',
+        ('https://translate.yandex.ru/corpus?lang=en', 'wmt19_1mcorpus.zip'),
+    ],
+    valid_urls=[
+        ('http://data.statmt.org/wmt19/translation-task/dev.tgz', 'wmt19_dev.tgz'),
+    ],
+    test_urls=[
+        ('http://data.statmt.org/wmt19/translation-task/test.tgz', 'wmt19_test.tgz'),
+    ],
+    train_files_patterns=[
+        ('*europarl-v9.{src}-{tgt}.tsv.{lang}', ['lt-en']),
+        #paracrawl
+        ('*paracrawl-release1.{tgt}-{src}.zipporah0-dedup-clean.{lang}', ['ru-en']),
+        ('bitext.{lang}', ['lt-en',]),
+        ('*commoncrawl.{src}-{tgt}.{lang}', ['ru-en',]),
+        ('*news-commentary-v14-wmt19.{tgt}-{src}.tsv.{lang}', ['kk-en', ]),
+        ('*news-commentary-v14.{tgt}-{src}.tsv.{lang}', ['ru-en']),
+        #yandex
+        ('corpus.{tgt}_{src}.1m.{lang}', ['ru-en']),
+        ('wikititles_v1_wikititles-v1.{src}-{tgt}.tsv.{lang}', ['ru-en', 'kk-en', 'lt-en', 'gu-en']),
+        ('*/UNv1.0.{tgt}-{src}.{lang}', ['ru-en']),
+        #rapid
+        ('bitext.{lang}', ['lt-en'])
+    ],
+    valid_files_patterns=[
+    ('dev/newsdev2019*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['gu-en', 'kk-en', 'lt-en']),
+    ('dev/newstest2018*{src}{tgt}-{src:src}{tgt:ref}.{lang}', ['ru-en']),       
+    ],
+    test_files_patterns=[
+    ('sgm/newstest2019-{src}{tgt}-{src:src}{tgt:ref}.{lang}', 
+    ['ru-en', 'gu-en', 'kk-en', 'lt-en', 'en-ru', 'en-gu', 'en-kk', 'en-lt']),
+    ]    
+)
+
+
+#########
+
+if __name__ == "__main__":
+    # speed up the downloads with multiple processing
+    dl_folder = f'{to_data_path}/downloads'
+    extract_folder = f'{to_data_path}/extracted'
+
+    urls = [
+        url
+        for dataset in [wmt13_es_en, wmt14_de_fr_en, wmt16_ro_en, wmt18_cs_et_en, wmt19_ru_gu_kk_lt]
+        for urls in [dataset.train_urls, dataset.valid_urls, dataset.test_urls]
+        for url in urls
+    ]
+    urls = set(urls)
+    download_multi(dl_folder, extract_folder, urls, num_processes=8, debug=True)
+
+    # check manually downlaods
+    to_manually_download_urls = (
+        wmt17_fi_lv_tr_zh_en_manual_downloads + wmt18_cs_et_en_manual_downloads + wmt19_ru_gu_kk_lt_manual_downloads
+    )
+    to_be_manually_dowloaded = check_need_manual_downalod(dl_folder, to_manually_download_urls)
+    if len(to_be_manually_dowloaded) > 0:
+        print('Missing files that need to be downloaded manually; stop the process now.')
+        exit(-1)
+    
+    completed_urls = {}
+    completed_extraction = {}
+    def work_on_wmt(directions, wmt_data):
+        download_and_extract(
+            to_data_path, 
+            directions, 
+            wmt_data, 
+            to_manually_download_urls=to_manually_download_urls,
+            completed_urls=completed_urls, completed_extraction=completed_extraction, debug=True)
+                
+    work_on_wmt(
+        ['es_XX-en_XX'], 
+        wmt13_es_en,)
+    work_on_wmt(
+        [
+            'fr_XX-en_XX',  'en_XX-fr_XX',
+            # 'en_XX-de_DE', 'de_DE-en_XX',
+        ], 
+        wmt14_de_fr_en,)
+    work_on_wmt(
+        ['ro_RO-en_XX', 'en_XX-ro_XX'], 
+        wmt16_ro_en,)
+    work_on_wmt(
+        [
+            # 'zh_CN-en_XX', 
+            'lv_LV-en_XX', 'fi_FI-en_XX', 'tr_TR-en_XX',
+            #in case the reversed directions have different train/valid/test data
+            # 'en_XX-zh_CN', 
+            'en_XX-lv_LV', 'en_XX-fi_FI', 'en_XX-tr_TR',
+        ], 
+        wmt17_fi_lv_tr_zh_en, )
+    # czeng17_script_path = download_czeng17_script(download_to, extract_to, debug=False)
+    # cz_username =  None
+    work_on_wmt(
+        [
+            # 'cs_CZ-en_XX', 
+            'et_EE-en_XX'], 
+        wmt18_cs_et_en,)
+    work_on_wmt(
+        [
+            # 'ru_RU-en_XX', 'en_XX-ru_RU', 
+            'gu_IN-en_XX', 'kk_KZ-en_XX', 'lt_LT-en_XX',
+            #in case the reversed directions have different train/valid/test data
+            'en_XX-gu_IN', 'en_XX-kk_KZ', 'en_XX-lt_LT'
+        ], 
+        wmt19_ru_gu_kk_lt,)
+
+    not_matching = check_wmt_test_bleu(
+        f'{to_data_path}/raw', 
+        [
+            ('wmt13', ['es_XX-en_XX']),
+            ('wmt14/full', ['fr_XX-en_XX',]),
+            ('wmt16', ['ro_RO-en_XX',]),
+            # ('wmt17/improved', ['zh_CN-en_XX']),
+            ('wmt17', [ 'lv_LV-en_XX', 'fi_FI-en_XX', 'tr_TR-en_XX']),
+            ('wmt18', ['cs_CZ-en_XX', 'et_EE-en_XX']),
+            ('wmt19', ['gu_IN-en_XX', 'kk_KZ-en_XX', 'lt_LT-en_XX']), 
+            #'ru_RU-en_XX', 
+        ]
+        )    
+    if len(not_matching) > 0:
+        print('the following datasets do not have matching test datasets:\n\t', '\n\t'.join(not_matching))
+
diff --git a/fairseq/examples/multilingual/data_scripts/download_wmt20.sh b/fairseq/examples/multilingual/data_scripts/download_wmt20.sh
new file mode 100644
index 0000000000000000000000000000000000000000..31cd5c76b75081331ae03c5ea70ea7ddebaa06e1
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/download_wmt20.sh
@@ -0,0 +1,547 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+
+
+set -x -e
+
+# TODO update the workdir and dest dir name
+# put fasttext model
+WORKDIR=$WORKDIR_ROOT
+# put intermediate files
+TMP_DIR=$WORKDIR_ROOT/tmp/tmp_wmt20_lowres_download
+# output {train,valid,test} files to dest
+DEST=$WORKDIR_ROOT/ML50/raw
+
+UTILS=$PWD/utils
+
+# per dataset locations
+COMMONCRAWL_DIR=$TMP_DIR/commoncrawl
+YANDEX_CORPUS=$WORKDIR_ROOT/wmt20/official/ru/yandex/1mcorpus.zip
+# unzipped
+CZENG_CORPUS=$WORKDIR_ROOT/wmt20/official/cs/czeng/czeng20-train
+CCMT_DIR=$WORKDIR_ROOT/wmt20/official/zh/ccmt/parallel
+
+download_and_select() {
+  SUBFOLDER=$1
+  URL=$2
+  UNCOMPRESS_CMD=$3
+  LANG=$4
+  INPUT_FILEPATH=$5
+  if [[ $# -gt 5 ]]; then
+    LANG_COL=$6
+    EN_COL=$7
+  fi
+
+  mkdir -p $SUBFOLDER
+  cd $SUBFOLDER
+  wget -nc --content-disposition $URL
+  $UNCOMPRESS_CMD
+
+  if [[ $# -gt 5 ]]; then
+    cut -f$LANG_COL $INPUT_FILEPATH > $INPUT_FILEPATH.$LANG
+    cut -f$EN_COL $INPUT_FILEPATH > $INPUT_FILEPATH.en
+  fi
+  cd ..
+
+  ln -sf $SUBFOLDER/$INPUT_FILEPATH.$LANG $SUBFOLDER.$LANG
+  ln -sf $SUBFOLDER/$INPUT_FILEPATH.en $SUBFOLDER.en
+}
+
+prepare_lid() {
+  pip install fasttext
+
+  # TODO specify global workdir
+  MODEL=$WORKDIR/fasttext/lid.176.bin
+  LID_MULTI=$UTILS/fasttext_multi_filter.py
+
+  if [ ! -f "$MODEL" ]; then
+    echo "downloading fasttext lid model..."
+    mkdir -p $WORKDIR/fasttext
+    wget -nc https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin -O $MODEL
+  fi
+}
+
+prepare_moses() {
+  pushd $UTILS
+  echo 'Cloning Moses github repository (for tokenization scripts)...'
+  git clone https://github.com/moses-smt/mosesdecoder.git  
+  popd
+}
+
+lid_filter() {
+  # TODO specify global workdir
+  MODEL=$WORKDIR/fasttext/lid.176.bin
+  LID_MULTI=$UTILS/fasttext_multi_filter.py
+
+  prepare_lid
+
+  SRC=$1
+  SRC_FILE=$2
+  SRC_OUTPUT=$3
+  TGT=$4
+  TGT_FILE=$5
+  TGT_OUTPUT=$6
+  python $LID_MULTI --model $MODEL --inputs $SRC_FILE $TGT_FILE --langs $SRC $TGT --outputs $SRC_OUTPUT $TGT_OUTPUT
+}
+
+prepare_ja_ted() {
+  mkdir -p ted
+  cd ted
+
+  wget -nc https://wit3.fbk.eu/archive/2017-01-trnted//texts/en/ja/en-ja.tgz
+  tar -zxvf en-ja.tgz
+  cat en-ja/train.tags.en-ja.en | grep -v -P "^[ ]*\<" | sed 's/^[ \t]*//g' | sed 's/[ \t]*$//g' > en-ja/train.en-ja.en
+  cat en-ja/train.tags.en-ja.ja | grep -v -P "^[ ]*\<" | sed 's/^[ \t]*//g' | sed 's/[ \t]*$//g' > en-ja/train.en-ja.ja
+
+  cd ..
+  ln -sf ted/en-ja/train.en-ja.ja ted.ja
+  ln -sf ted/en-ja/train.en-ja.en ted.en
+}
+
+prepare_ja() {
+  OUTPUT_DIR=$TMP_DIR/ja
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select paracrawl "http://www.kecl.ntt.co.jp/icl/lirg/jparacrawl/release/2.0/bitext/en-ja.tar.gz" "tar -zxvf en-ja.tar.gz" ja en-ja/en-ja.bicleaner05.txt 4 3 &
+  download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-ja.tsv.gz" "gunzip -f news-commentary-v15.en-ja.tsv.gz" ja news-commentary-v15.en-ja.tsv 2 1 &
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ja-en.tsv.gz" "gunzip -f wikititles-v2.ja-en.tsv.gz" ja wikititles-v2.ja-en.tsv 1 2 &
+  download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-ja.langid.tsv.gz" "gunzip -f WikiMatrix.v1.en-ja.langid.tsv.gz" ja WikiMatrix.v1.en-ja.langid.tsv 3 2 &
+  download_and_select subtitle "https://nlp.stanford.edu/projects/jesc/data/split.tar.gz" "tar -zxvf split.tar.gz" ja split/train 2 1 &
+  download_and_select kftt "http://www.phontron.com/kftt/download/kftt-data-1.0.tar.gz" "tar -zxvf kftt-data-1.0.tar.gz" ja kftt-data-1.0/data/orig/kyoto-train &
+
+  prepare_ja_ted &
+
+  # ted data needs to 
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.ja" | sort -V | xargs cat > all.ja
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter ja all.ja $DEST/train.ja_XX-en_XX.ja_XX en all.en $DEST/train.ja_XX-en_XX.en_XX
+}
+
+prepare_ta() {
+  OUTPUT_DIR=$TMP_DIR/ta
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ta-en.tsv.gz" "gunzip -f wikititles-v2.ta-en.tsv.gz" ta wikititles-v2.ta-en.tsv 1 2 &
+  download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-ta.langid.tsv.gz" "gunzip -f WikiMatrix.v1.en-ta.langid.tsv.gz" ta WikiMatrix.v1.en-ta.langid.tsv 3 2 &
+  download_and_select pmindia "http://data.statmt.org/pmindia/v1/parallel/pmindia.v1.ta-en.tsv" "" ta pmindia.v1.ta-en.tsv 2 1 &
+  download_and_select tanzil "https://object.pouta.csc.fi/OPUS-Tanzil/v1/moses/en-ta.txt.zip" "unzip en-ta.txt.zip" ta Tanzil.en-ta &
+  download_and_select pib "http://preon.iiit.ac.in/~jerin/resources/datasets/pib-v0.tar" "tar -xvf pib-v0.tar" ta pib/en-ta/train &
+  download_and_select mkb "http://preon.iiit.ac.in/~jerin/resources/datasets/mkb-v0.tar" "tar -xvf mkb-v0.tar" ta mkb/en-ta/mkb &
+  download_and_select ufal "http://ufal.mff.cuni.cz/~ramasamy/parallel/data/v2/en-ta-parallel-v2.tar.gz" "tar -zxvf en-ta-parallel-v2.tar.gz" ta en-ta-parallel-v2/corpus.bcn.train &
+
+  wait
+
+  # need special handling for nlpc
+  mkdir -p nlpc
+  cd nlpc
+  wget -nc https://raw.githubusercontent.com/nlpc-uom/English-Tamil-Parallel-Corpus/master/En-Ta%20Corpus/En-Ta%20English.txt
+  wget -nc https://github.com/nlpc-uom/English-Tamil-Parallel-Corpus/raw/master/En-Ta%20Corpus/En-Ta%20Tamil.txt
+  tail -n +4 "En-Ta English.txt" > en-ta.en
+  tail -n +4 "En-Ta Tamil.txt" > en-ta.ta
+  cd ..
+  ln -sf nlpc/en-ta.en nlpc.en
+  ln -sf nlpc/en-ta.ta nlpc.ta
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.ta" | sort -V | xargs cat > all.ta
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter ta all.ta $DEST/train.ta_IN-en_XX.ta_IN en all.en $DEST/train.ta_IN-en_XX.en_XX
+}
+
+prepare_iu() {
+  OUTPUT_DIR=$TMP_DIR/iu
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+  
+  download_and_select nh "https://nrc-digital-repository.canada.ca/eng/view/dataset/?id=c7e34fa7-7629-43c2-bd6d-19b32bf64f60" "tar -zxvf Nunavut-Hansard-Inuktitut-English-Parallel-Corpus-3.0.1.tgz" iu Nunavut-Hansard-Inuktitut-English-Parallel-Corpus-3.0/NunavutHansard > /dev/null &
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.iu-en.tsv.gz" "gunzip -f wikititles-v2.iu-en.tsv.gz" iu wikititles-v2.iu-en.tsv 1 2 &
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.iu" | sort -V | xargs cat | nh/Nunavut-Hansard-Inuktitut-English-Parallel-Corpus-3.0/scripts/normalize-iu-spelling.pl > all.iu
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  paste all.iu all.en | awk -F $'\t' '$1!=""&&$2!=""' > all.iuen
+  cut -f1 all.iuen > $DEST/train.iu_CA-en_XX.iu_CA
+  cut -f2 all.iuen > $DEST/train.iu_CA-en_XX.en_XX
+}
+
+prepare_km() {
+  OUTPUT_DIR=$TMP_DIR/km
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select paracrawl "http://data.statmt.org/wmt20/translation-task/ps-km/wmt20-sent.en-km.xz" "unxz wmt20-sent.en-km.zx" km wmt20-sent.en-km 2 1 &
+
+  # km-parallel has multiple sets, concat all of them together
+  mkdir -p opus
+  cd opus
+  wget -nc "http://data.statmt.org/wmt20/translation-task/ps-km/km-parallel.tgz"
+  tar -zxvf km-parallel.tgz
+  find ./km-parallel -maxdepth 1 -name "*.km" | sort -V | xargs cat > opus.km
+  find ./km-parallel -maxdepth 1 -name "*.en" | sort -V | xargs cat > opus.en
+  cd ..
+  ln -sf opus/opus.km .
+  ln -sf opus/opus.en .
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.km" | sort -V | xargs cat > all.km
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter km all.km $DEST/train.km_KH-en_XX.km_KH en all.en $DEST/train.km_KH-en_XX.en_XX
+}
+
+prepare_ps() {
+  OUTPUT_DIR=$TMP_DIR/ps
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select paracrawl "http://data.statmt.org/wmt20/translation-task/ps-km/wmt20-sent.en-ps.xz" "unxz wmt20-sent.en-ps.xz" ps wmt20-sent.en-ps 2 1 &
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ps-en.tsv.gz" "gunzip -f wikititles-v2.ps-en.tsv.gz" ps wikititles-v2.ps-en.tsv 1 2 &
+  # ps-parallel has multiple sets, concat all of them together
+  mkdir -p opus
+  cd opus
+  wget -nc "http://data.statmt.org/wmt20/translation-task/ps-km/ps-parallel.tgz"
+  tar -zxvf ps-parallel.tgz
+  find ./ps-parallel -maxdepth 1 -name "*.ps" | sort -V | xargs cat > opus.ps
+  find ./ps-parallel -maxdepth 1 -name "*.en" | sort -V | xargs cat > opus.en
+  cd ..
+  ln -sf opus/opus.ps opus.ps
+  ln -sf opus/opus.en opus.en
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.ps" | sort -V | xargs cat > all.ps
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter ps all.ps $DEST/train.ps_AF-en_XX.ps_AF en all.en $DEST/train.ps_AF-en_XX.en_XX
+}
+
+download_commoncrawl() {
+  mkdir -p $COMMONCRAWL_DIR
+  cd $COMMONCRAWL_DIR
+
+  wget -nc "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz"
+  tar -zxvf training-parallel-commoncrawl.tgz
+}
+link_commoncrawl() {
+  LANG=$1
+  ln -sf $COMMONCRAWL_DIR/commoncrawl.$LANG-en.en commoncrawl.en
+  ln -sf $COMMONCRAWL_DIR/commoncrawl.$LANG-en.$LANG commoncrawl.$LANG
+}
+
+strip_xlf() {
+  INPUT_FILE=$1
+  SRC=$2
+  TGT=$3
+  grep '<source xml:lang=' $INPUT_FILE | sed 's/^<[^<>]*>//g' | sed 's/<[^<>]*>$//g' > $INPUT_FILE.$SRC
+  grep '<target xml:lang=' $INPUT_FILE | sed 's/^<[^<>]*>//g' | sed 's/<[^<>]*>$//g' > $INPUT_FILE.$TGT
+}
+
+download_and_process_tilde() {
+  URL=$1
+  UNCOMPRESS_CMD=$2
+  FILENAME=$3
+  LANG=$4
+  PROCESS_CMD=$5
+
+  mkdir -p tilde
+  cd tilde
+  wget -nc $URL
+  $UNCOMPRESS_CMD
+  echo "executing cmd"
+  echo $PROCESS_CMD
+  $PROCESS_CMD
+  cd ..
+  ln -sf tilde/$FILENAME.$LANG tilde.$LANG
+  ln -sf tilde/$FILENAME.en tilde.en
+}
+
+prepare_cs() {
+  OUTPUT_DIR=$TMP_DIR/cs
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  #download_and_select europarl "http://www.statmt.org/europarl/v10/training/europarl-v10.cs-en.tsv.gz" "gunzip europarl-v10.cs-en.tsv.gz" cs europarl-v10.cs-en.tsv 1 2 &
+  #download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release5.1/en-cs.txt.gz" "gunzip en-cs.txt.gz" cs en-cs.txt 2 1 &
+  #link_commoncrawl cs
+  #download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.cs-en.tsv.gz" "gunzip news-commentary-v15.cs-en.tsv.gz" cs news-commentary-v15.cs-en.tsv 1 2 &
+  #download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.cs-en.tsv.gz" "gunzip wikititles-v2.cs-en.tsv.gz" cs wikititles-v2.cs-en.tsv 1 2 &
+  #download_and_process_tilde "http://data.statmt.org/wmt20/translation-task/rapid/RAPID_2019.cs-en.xlf.gz" "gunzip RAPID_2019.cs-en.xlf.gz" RAPID_2019.cs-en.xlf cs "strip_xlf RAPID_2019.cs-en.xlf cs en" &
+  #download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.cs-en.langid.tsv.gz" "gunzip WikiMatrix.v1.cs-en.langid.tsv.gz" cs WikiMatrix.v1.cs-en.langid.tsv 2 3 &
+
+  #wait
+
+  # remove previous results
+  #rm -f all.??
+  #find ./ -maxdepth 1 -name "*.cs" | sort -V | xargs cat > all.cs
+  #find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  if [ -z $CZENG_CORPUS ] ;
+  then
+          echo "Please download CZENG_CORPUS manually and place them at $CZENG_CORPUS. Exitting..."
+          exit
+  fi  
+  cat $CZENG_CORPUS | sed '/^$/d' | cut -f5 > all.cs
+  cat $CZENG_CORPUS | sed '/^$/d' | cut -f6 > all.en
+
+  lid_filter cs all.cs $DEST/train.cs_CZ-en_XX.cs_CZ en all.en $DEST/train.cs_CZ-en_XX.en_XX
+}
+
+prepare_de() {
+  OUTPUT_DIR=$TMP_DIR/de
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select europarl "http://www.statmt.org/europarl/v10/training/europarl-v10.de-en.tsv.gz" "gunzip europarl-v10.de-en.tsv.gz" de europarl-v10.de-en.tsv 1 2 &
+  download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release5.1/en-de.txt.gz"  "gunzip en-de.txt.gz" de en-de.txt 2 1 &
+  link_commoncrawl de
+  download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.de-en.tsv.gz" "gunzip news-commentary-v15.de-en.tsv.gz" de news-commentary-v15.de-en.tsv 1 2 &
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.de-en.tsv.gz" "gunzip wikititles-v2.de-en.tsv.gz" de wikititles-v2.de-en.tsv 1 2 &
+  download_and_process_tilde "http://data.statmt.org/wmt20/translation-task/rapid/RAPID_2019.de-en.xlf.gz" "gunzip RAPID_2019.de-en.xlf.gz" RAPID_2019.de-en.xlf de "strip_xlf RAPID_2019.de-en.xlf de en" &
+  download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.de-en.langid.tsv.gz" "gunzip WikiMatrix.v1.de-en.langid.tsv.gz" de WikiMatrix.v1.de-en.langid.tsv 2 3 &
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.de" | sort -V | xargs cat > all.de
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter de all.de $DEST/train.de_DE-en_XX.de_DE en all.en $DEST/train.de_DE-en_XX.en_XX
+}
+
+prepare_tmx() {
+  TMX_FILE=$1
+  git clone https://github.com/amake/TMX2Corpus $UTILS/tmx2corpus
+  pip install tinysegmenter
+
+  python $UTILS/tmx2corpus/tmx2corpus.py $TMX_FILE
+}
+
+prepare_pl() {
+  OUTPUT_DIR=$TMP_DIR/pl
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  # download_and_select europarl "http://www.statmt.org/europarl/v10/training/europarl-v10.pl-en.tsv.gz" "gunzip europarl-v10.pl-en.tsv.gz" pl europarl-v10.pl-en.tsv 1 2 &
+  # download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release5.1/en-pl.txt.gz" "gunzip en-pl.txt.gz" pl en-pl.txt 2 1 &
+  # download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.pl-en.tsv.gz" "gunzip wikititles-v2.pl-en.tsv.gz" pl wikititles-v2.pl-en.tsv 1 2 &
+  download_and_select tilde "https://tilde-model.s3-eu-west-1.amazonaws.com/rapid2019.en-pl.tmx.zip" "gunzip rapid2019.en-pl.tmx.zip" bitext pl "prepare_tmx RAPID_2019.UNIQUE.en-pl.tmx" &
+  # download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-pl.langid.tsv.gz" "gunzip WikiMatrix.v1.en-pl.langid.tsv.gz" pl WikiMatrix.v1.en-pl.langid.tsv 3 2 &
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.pl" | sort -V | xargs cat > all.pl
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter pl all.pl $DEST/train.pl_PL-en_XX.pl_PL en all.en $DEST/train.pl_PL-en_XX.en_XX
+}
+
+prepare_uncorpus() {
+  $URLS=$1
+  $FILES=$2
+
+  mkdir -p uncorpus
+  cd uncorpus
+
+  for URL in $URLS; do
+    wget -nc $URL
+  done
+  cat $FILES > uncorpus.tar.gz
+  tar -zxvf uncorpus.tar.gz
+
+  cd ..
+  ln -sf uncorpus/en-$LANG/UNv1.0.en-$LANG.$LANG uncorpus.$LANG
+  ln -sf uncorpus/en-$LANG/UNv1.0.en-$LANG.en uncorpus.en
+}
+
+prepare_yandex() {
+  mkdir -p yandex
+  cd yandex
+  unzip $YANDEX_CORPUS ./
+  cd ..
+  ln -s yandex/corpus.en_ru.1m.en yandex.en
+  ln -s yandex/corpus.en_ru.1m.ru yandex.ru
+}
+
+prepare_ru() {
+  OUTPUT_DIR=$TMP_DIR/ru
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select paracrawl "https://s3.amazonaws.com/web-language-models/paracrawl/release1/paracrawl-release1.en-ru.zipporah0-dedup-clean.tgz" "tar -zxvf paracrawl-release1.en-ru.zipporah0-dedup-clean.tgz" ru paracrawl-release1.en-ru.zipporah0-dedup-clean &
+  link_commoncrawl ru
+  download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-ru.tsv.gz" "gunzip news-commentary-v15.en-ru.tsv.gz" ru news-commentary-v15.en-ru.tsv 2 1 &
+  prepare_yandex &
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.ru-en.tsv.gz" "gunzip wikititles-v2.ru-en.tsv.gz" ru wikititles-v2.ru-en.tsv 1 2 &
+  prepare_uncorpus "https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.00 https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.01 https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-ru.tar.gz.02" "UNv1.0.en-ru.tar.gz.00 UNv1.0.en-ru.tar.gz.01 UNv1.0.en-ru.tar.gz.02" &
+  download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-ru.langid.tsv.gz" "gunzip WikiMatrix.v1.en-ru.langid.tsv.gz" ru WikiMatrix.v1.en-ru.langid.tsv 3 2 &
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.ru" | sort -V | xargs cat > all.ru
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter ru all.ru $DEST/train.ru_RU-en_XX.ru_RU en all.en $DEST/train.ru_RU-en_XX.en_XX
+}
+
+prepare_ccmt() {
+  mkdir -p ccmt
+  cd ccmt
+  # assume ccmt data is already unzipped under CCMT_DIR folder
+  cat $CCMT_DIR/datum2017/Book*_cn.txt | sed 's/ //g' > datum2017.detok.zh
+  cat $CCMT_DIR/datum2017/Book*_en.txt > datum2017.detok.en
+  cat $CCMT_DIR/casict2011/casict-A_ch.txt $CCMT_DIR/casict2011/casict-B_ch.txt $CCMT_DIR/casict2015/casict2015_ch.txt $CCMT_DIR/datum2015/datum_ch.txt $CCMT_DIR/neu2017/NEU_cn.txt datum2017.detok.zh > ccmt.zh
+  cat $CCMT_DIR/casict2011/casict-A_en.txt $CCMT_DIR/casict2011/casict-B_en.txt $CCMT_DIR/casict2015/casict2015_en.txt $CCMT_DIR/datum2015/datum_en.txt $CCMT_DIR/neu2017/NEU_en.txt datum2017.detok.en > ccmt.en
+  cd ..
+  ln -sf ccmt/ccmt.zh ccmt.zh
+  ln -sf ccmt/ccmt.en ccmt.en
+}
+
+prepare_zh() {
+  OUTPUT_DIR=$TMP_DIR/zh
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+
+  download_and_select newscommentary "http://data.statmt.org/news-commentary/v15/training/news-commentary-v15.en-zh.tsv.gz" "gunzip news-commentary-v15.en-zh.tsv.gz" zh news-commentary-v15.en-zh.tsv 2 1 &
+  download_and_select wikititles "http://data.statmt.org/wikititles/v2/wikititles-v2.zh-en.tsv.gz" "gunzip wikititles-v2.zh-en.tsv.gz" zh wikititles-v2.zh-en.tsv 1 2 &
+  prepare_uncorpus "https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.00 https://stuncorpusprod.blob.core.windows.net/corpusfiles/UNv1.0.en-zh.tar.gz.01" "UNv1.0.en-zh.tar.gz.00 UNv1.0.en-zh.tar.gz.01" &
+  prepare_ccmt &
+  download_and_select wikimatrix "http://data.statmt.org/wmt20/translation-task/WikiMatrix/WikiMatrix.v1.en-zh.langid.tsv.gz" "gunzip WikiMatrix.v1.en-zh.langid.tsv.gz" zh WikiMatrix.v1.en-zh.langid.tsv 3 2 &
+
+  wait
+
+  # remove previous results
+  rm -f all.??
+  find ./ -maxdepth 1 -name "*.zh" | sort -V | xargs cat > all.zh
+  find ./ -maxdepth 1 -name "*.en" | sort -V | xargs cat > all.en
+  lid_filter zh all.zh $DEST/train.zh_CN-en_XX.zh_CN en all.en $DEST/train.zh_CN-en_XX.en_XX
+}
+
+prepare_tests() {
+  OUTPUT_DIR=$TMP_DIR
+  mkdir -p $OUTPUT_DIR
+  cd $OUTPUT_DIR
+  wget -nc http://data.statmt.org/wmt20/translation-task/dev.tgz
+  tar -zxvf dev.tgz
+  cd dev
+
+  cat newsdev2020-jaen-src.ja.sgm | $UTILS/strip_sgm.sh > newsdev2020-jaen.ja
+  cat newsdev2020-jaen-ref.en.sgm | $UTILS/strip_sgm.sh > newsdev2020-jaen.en
+  split newsdev2020-jaen.ja -a 0 -n r/1/2 > $DEST/valid.ja_XX-en_XX.ja_XX
+  split newsdev2020-jaen.en -a 0 -n r/1/2 > $DEST/valid.ja_XX-en_XX.en_XX
+  split newsdev2020-jaen.ja -a 0 -n r/2/2 > $DEST/test.ja_XX-en_XX.ja_XX
+  split newsdev2020-jaen.en -a 0 -n r/2/2 > $DEST/test.ja_XX-en_XX.en_XX
+
+  cat newsdev2020-iuen-src.iu.sgm | strip_sgm.sh > newsdev2020-iuen.iu
+  cat newsdev2020-iuen-ref.en.sgm | strip_sgm.sh > newsdev2020-iuen.en
+  split newsdev2020-iuen.iu -a 0 -n r/1/2 > $DEST/valid.iu_CA-en_XX.iu_CA
+  split newsdev2020-iuen.en -a 0 -n r/1/2 > $DEST/valid.iu_CA-en_XX.en_XX
+  split newsdev2020-iuen.iu -a 0 -n r/2/2 > $DEST/test.iu_CA-en_XX.iu_CA
+  split newsdev2020-iuen.en -a 0 -n r/2/2 > $DEST/test.iu_CA-en_XX.en_XX
+
+  cat newsdev2020-taen-src.ta.sgm | strip_sgm.sh > newsdev2020-taen.ta
+  cat newsdev2020-taen-ref.en.sgm | strip_sgm.sh > newsdev2020-taen.en
+  split newsdev2020-taen.ta -a 0 -n r/1/2 > $DEST/valid.ta_IN-en_XX.ta_IN
+  split newsdev2020-taen.en -a 0 -n r/1/2 > $DEST/valid.ta_IN-en_XX.en_XX
+  split newsdev2020-taen.ta -a 0 -n r/2/2 > $DEST/test.ta_IN-en_XX.ta_IN
+  split newsdev2020-taen.en -a 0 -n r/2/2 > $DEST/test.ta_IN-en_XX.en_XX
+
+  cp wikipedia.dev.km-en.km $DEST/valid.km_KH-en_XX.km_KH
+  cp wikipedia.dev.km-en.en $DEST/valid.km_KH-en_XX.en_XX
+  cp wikipedia.devtest.km-en.km $DEST/test.km_KH-en_XX.km_KH
+  cp wikipedia.devtest.km-en.en $DEST/test.km_KH-en_XX.en_XX
+
+  cp wikipedia.dev.ps-en.ps $DEST/valid.ps_AF-en_XX.ps_AF
+  cp wikipedia.dev.ps-en.en $DEST/valid.ps_AF-en_XX.en_XX
+  cp wikipedia.devtest.ps-en.ps $DEST/test.ps_AF-en_XX.ps_AF
+  cp wikipedia.devtest.ps-en.en $DEST/test.ps_AF-en_XX.en_XX
+
+  cat newsdev2020-plen-src.pl.sgm | strip_sgm.sh > newsdev2020-plen.pl
+  cat newsdev2020-plen-ref.en.sgm | strip_sgm.sh > newsdev2020-plen.en
+  split newsdev2020-plen.pl -a 0 -n r/1/2 > $DEST/valid.pl_PL-en_XX.pl_PL
+  split newsdev2020-plen.en -a 0 -n r/1/2 > $DEST/valid.pl_PL-en_XX.en_XX
+  split newsdev2020-plen.pl -a 0 -n r/2/2 > $DEST/test.pl_PL-en_XX.pl_PL
+  split newsdev2020-plen.en -a 0 -n r/2/2 > $DEST/test.pl_PL-en_XX.en_XX
+
+  cat newstest2018-encs-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-cs_CZ.en_XX
+  cat newstest2018-encs-ref.cs.sgm | strip_sgm.sh > $DEST/valid.en_XX-cs_CZ.cs_CZ
+  cat newstest2019-encs-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-cs_CZ.en_XX
+  cat newstest2019-encs-ref.cs.sgm | strip_sgm.sh > $DEST/test.en_XX-cs_CZ.cs_CZ
+
+  cat newstest2018-deen-src.de.sgm | strip_sgm.sh > $DEST/valid.de_DE-en_XX.de_DE
+  cat newstest2018-deen-ref.en.sgm | strip_sgm.sh > $DEST/valid.de_DE-en_XX.en_XX
+  cat newstest2018-ende-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-de_DE.en_XX
+  cat newstest2018-ende-ref.de.sgm | strip_sgm.sh > $DEST/valid.en_XX-de_DE.de_DE
+  cat newstest2019-deen-src.de.sgm | strip_sgm.sh > $DEST/test.de_DE-en_XX.de_DE
+  cat newstest2019-deen-ref.en.sgm | strip_sgm.sh > $DEST/test.de_DE-en_XX.en_XX
+  cat newstest2019-ende-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-de_DE.en_XX
+  cat newstest2019-ende-ref.de.sgm | strip_sgm.sh > $DEST/test.en_XX-de_DE.de_DE
+
+  cat newstest2018-ruen-src.ru.sgm | strip_sgm.sh > $DEST/valid.ru_RU-en_XX.ru_RU
+  cat newstest2018-ruen-ref.en.sgm | strip_sgm.sh > $DEST/valid.ru_RU-en_XX.en_XX
+  cat newstest2018-enru-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-ru_RU.en_XX
+  cat newstest2018-enru-ref.ru.sgm | strip_sgm.sh > $DEST/valid.en_XX-ru_RU.ru_RU
+  cat newstest2019-ruen-src.ru.sgm | strip_sgm.sh > $DEST/test.ru_RU-en_XX.ru_RU
+  cat newstest2019-ruen-ref.en.sgm | strip_sgm.sh > $DEST/test.ru_RU-en_XX.en_XX
+  cat newstest2019-enru-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-ru_RU.en_XX
+  cat newstest2019-enru-ref.ru.sgm | strip_sgm.sh > $DEST/test.en_XX-ru_RU.ru_RU
+
+  cat newstest2018-zhen-src.zh.sgm | strip_sgm.sh > $DEST/valid.zh_CN-en_XX.zh_CN
+  cat newstest2018-zhen-ref.en.sgm | strip_sgm.sh > $DEST/valid.zh_CN-en_XX.en_XX
+  cat newstest2018-enzh-src.en.sgm | strip_sgm.sh > $DEST/valid.en_XX-zh_CN.en_XX
+  cat newstest2018-enzh-ref.zh.sgm | strip_sgm.sh > $DEST/valid.en_XX-zh_CN.zh_CN
+  cat newstest2019-zhen-src.zh.sgm | strip_sgm.sh > $DEST/test.zh_CN-en_XX.zh_CN
+  cat newstest2019-zhen-ref.en.sgm | strip_sgm.sh > $DEST/test.zh_CN-en_XX.en_XX
+  cat newstest2019-enzh-src.en.sgm | strip_sgm.sh > $DEST/test.en_XX-zh_CN.en_XX
+  cat newstest2019-enzh-ref.zh.sgm | strip_sgm.sh > $DEST/test.en_XX-zh_CN.zh_CN
+}
+
+mkdir -p $DEST
+
+prepare_lid
+prepare_moses
+download_commoncrawl
+
+prepare_ja &
+prepare_ta &
+prepare_km &
+prepare_ps &
+prepare_iu &
+prepare_cs &
+prepare_de &
+prepare_pl &
+prepare_ru &
+prepare_zh &
+
+# prepare valid/test set
+prepare_tests &
+
+# wait
+
+# TODO remove intermediate files
+# rm -rf $TMP_DIR
diff --git a/fairseq/examples/multilingual/data_scripts/preprocess_ML50_v1.sh b/fairseq/examples/multilingual/data_scripts/preprocess_ML50_v1.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4655936149cab212b3cfa14f306d71153729f9d7
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/preprocess_ML50_v1.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+if [ -z $WORKDIR_ROOT ] ;
+then
+        echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
+        exit
+fi
+
+if [ -z $SPM_PATH ] ;
+then
+    echo "Please install sentence piecence from https://github.com/google/sentencepiece and set SPM_PATH pointing to the installed spm_encode.py. Exitting..."
+    exit
+fi
+
+ML50=${WORKDIR_ROOT}/ML50
+
+mkdir -p $ML50/dedup
+mkdir -p $ML50/cleaned_dedup
+
+python ./dedup_all.py --from-folder $ML50/raw --to-folder $ML50/dedup
+python ./remove_valid_test_in_train.py --from-folder $ML50/dedup --to-folder $ML50/clean
+python ./binarize.py --raw-folder $ML50/clean
\ No newline at end of file
diff --git a/fairseq/examples/multilingual/data_scripts/remove_valid_test_in_train.py b/fairseq/examples/multilingual/data_scripts/remove_valid_test_in_train.py
new file mode 100755
index 0000000000000000000000000000000000000000..ef618adef7c7d010f8de38fb5ebeb5a35d2d3cac
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/remove_valid_test_in_train.py
@@ -0,0 +1,290 @@
+import os, sys
+import glob, itertools
+import pandas as pd
+
+WORKDIR_ROOT = os.environ.get('WORKDIR_ROOT', None)
+
+if WORKDIR_ROOT is None or  not WORKDIR_ROOT.strip():
+    print('please specify your working directory root in OS environment variable WORKDIR_ROOT. Exitting..."')
+    sys.exit(-1)
+
+
+def load_langs(path):
+    with open(path) as fr:
+        langs = [l.strip() for l in fr]
+    return langs
+
+
+
+def load_sentences(raw_data, split, direction):
+    src, tgt = direction.split('-')
+    src_path = f"{raw_data}/{split}.{direction}.{src}"
+    tgt_path = f"{raw_data}/{split}.{direction}.{tgt}"
+    if os.path.exists(src_path) and os.path.exists(tgt_path):
+        return [(src, open(src_path).read().splitlines()), (tgt, open(tgt_path).read().splitlines())]
+    else:
+        return []
+
+def swap_direction(d):
+    src, tgt = d.split('-')
+    return f'{tgt}-{src}'
+
+def get_all_test_data(raw_data, directions, split='test'):
+    test_data = [ 
+        x
+        for dd in directions
+        for d in [dd, swap_direction(dd)]
+        for x in load_sentences(raw_data, split, d)
+    ]
+    # all_test_data = {s for _, d in test_data for s in d}
+    all_test_data = {}
+    for lang, d in test_data:
+        for s in d:
+            s = s.strip()
+            lgs = all_test_data.get(s, set())
+            lgs.add(lang)
+            all_test_data[s] = lgs
+    return all_test_data, test_data
+
+def check_train_sentences(raw_data, direction, all_test_data, mess_up_train={}):
+    src, tgt = direction.split('-')
+    tgt_path = f"{raw_data}/train.{direction}.{tgt}"
+    src_path = f"{raw_data}/train.{direction}.{src}"
+    print(f'check training data in {raw_data}/train.{direction}')
+    size = 0
+    if not os.path.exists(tgt_path) or not os.path.exists(src_path):
+        return mess_up_train, size
+    with open(src_path) as f, open(tgt_path) as g:
+        for src_line, tgt_line in zip(f, g):
+            s = src_line.strip()
+            t = tgt_line.strip()
+            size += 1
+            if s in all_test_data:
+                langs = mess_up_train.get(s, set())
+                langs.add(direction)
+                mess_up_train[s] = langs
+            if t in all_test_data:
+                langs = mess_up_train.get(t, set())
+                langs.add(direction)
+                mess_up_train[t] = langs                
+    return mess_up_train, size
+
+def check_train_all(raw_data, directions, all_test_data):
+    mess_up_train = {}
+    data_sizes = {}
+    for direction in directions:
+        _, size = check_train_sentences(raw_data, direction, all_test_data, mess_up_train)
+        data_sizes[direction] = size
+    return mess_up_train, data_sizes
+
+def count_train_in_other_set(mess_up_train):
+    train_in_others  = [(direction, s) for s, directions in mess_up_train.items() for direction in directions]
+    counts = {}
+    for direction, s in train_in_others:
+        counts[direction] = counts.get(direction, 0) + 1
+    return counts
+
+def train_size_if_remove_in_otherset(data_sizes, mess_up_train):
+    counts_in_other = count_train_in_other_set(mess_up_train)
+    remain_sizes = []
+    for direction, count in counts_in_other.items():
+        remain_sizes.append((direction, data_sizes[direction] - count, data_sizes[direction], count, 100 * count / data_sizes[direction] ))
+    return remain_sizes
+
+
+def remove_messed_up_sentences(raw_data, direction, mess_up_train, mess_up_train_pairs, corrected_langs):
+    split = 'train'
+    src_lang, tgt_lang = direction.split('-')
+
+    tgt = f"{raw_data}/{split}.{direction}.{tgt_lang}"
+    src = f"{raw_data}/{split}.{direction}.{src_lang}"
+    print(f'working on {direction}: ', src, tgt)
+    if not os.path.exists(tgt) or not os.path.exists(src) :
+        return
+    
+    corrected_tgt = f"{to_folder}/{split}.{direction}.{tgt_lang}"
+    corrected_src = f"{to_folder}/{split}.{direction}.{src_lang}"
+    line_num = 0
+    keep_num = 0
+    with open(src, encoding='utf8',) as fsrc, \
+        open(tgt, encoding='utf8',) as ftgt, \
+        open(corrected_src, 'w', encoding='utf8') as fsrc_corrected, \
+        open(corrected_tgt, 'w', encoding='utf8') as ftgt_corrected:
+            for s, t in zip(fsrc, ftgt):
+                s = s.strip()
+                t = t.strip()
+                if t not in mess_up_train \
+                    and s not in mess_up_train \
+                    and (s, t) not in mess_up_train_pairs \
+                    and (t, s) not in mess_up_train_pairs:
+                    corrected_langs.add(direction)
+                    print(s, file=fsrc_corrected)
+                    print(t, file=ftgt_corrected)
+                    keep_num += 1
+                line_num += 1
+                if line_num % 1000 == 0:
+                    print(f'completed {line_num} lines', end='\r')
+    return line_num, keep_num
+
+##########
+
+
+def merge_valid_test_messup(mess_up_train_valid, mess_up_train_test):
+    merged_mess = []
+    for s in set(list(mess_up_train_valid.keys()) + list(mess_up_train_test.keys())):
+        if not s:
+            continue
+        valid = mess_up_train_valid.get(s, set())
+        test = mess_up_train_test.get(s, set())
+        merged_mess.append((s, valid | test))
+    return dict(merged_mess)
+
+
+
+#########
+def check_train_pairs(raw_data, direction, all_test_data, mess_up_train={}):
+    src, tgt = direction.split('-')
+    #a hack; TODO: check the reversed directions
+    path1 = f"{raw_data}/train.{src}-{tgt}.{src}"
+    path2 = f"{raw_data}/train.{src}-{tgt}.{tgt}"
+    if not os.path.exists(path1) or not os.path.exists(path2) :
+        return
+    
+    with open(path1) as f1, open(path2) as f2:
+        for src_line, tgt_line in zip(f1, f2):
+            s = src_line.strip()
+            t = tgt_line.strip()
+            if (s, t) in all_test_data or (t, s) in all_test_data:
+                langs = mess_up_train.get( (s, t), set())
+                langs.add(src)
+                langs.add(tgt)
+                mess_up_train[(s, t)] = langs
+                
+
+def load_pairs(raw_data, split, direction):
+    src, tgt = direction.split('-')
+    src_f = f"{raw_data}/{split}.{direction}.{src}"
+    tgt_f = f"{raw_data}/{split}.{direction}.{tgt}"
+    if tgt != 'en_XX':
+        src_f, tgt_f = tgt_f, src_f
+    if os.path.exists(src_f) and os.path.exists(tgt_f):
+        return list(zip(open(src_f).read().splitlines(), 
+                open(tgt_f).read().splitlines(), 
+                ))
+    else:
+        return []
+
+# skip_langs = ['cs_CZ', 'en_XX', 'tl_XX', 'tr_TR']
+def get_messed_up_test_pairs(split, directions):
+    test_pairs = [ 
+        (d,  load_pairs(raw_data, split, d))
+        for d in directions
+    ]
+    # all_test_data = {s for _, d in test_data for s in d}
+    all_test_pairs = {}
+    for direction, d in test_pairs:
+        src, tgt = direction.split('-')
+        for s in d:
+            langs = all_test_pairs.get(s, set())
+            langs.add(src)
+            langs.add(tgt)
+            all_test_pairs[s] = langs
+    mess_up_train_pairs = {}                
+    for direction in directions:
+        check_train_pairs(raw_data, direction, all_test_pairs, mess_up_train_pairs)  
+    return all_test_pairs, mess_up_train_pairs
+
+
+
+if __name__ == "__main__":
+    #######
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--from-folder',  
+        required=True,
+        type=str)
+    parser.add_argument(
+        '--to-folder',  
+        required=True,
+        type=str)
+    parser.add_argument(
+        '--directions',  
+        default=None,
+        type=str)
+
+
+    args = parser.parse_args()    
+    raw_data = args.from_folder
+    to_folder = args.to_folder
+    os.makedirs(to_folder, exist_ok=True)
+
+    if args.directions:
+        directions = args.directions.split(',')
+    else:
+        raw_files = itertools.chain(
+            glob.glob(f'{raw_data}/train*'),
+            glob.glob(f'{raw_data}/valid*'),
+            glob.glob(f'{raw_data}/test*'),
+        )
+        directions = [os.path.split(file_path)[-1].split('.')[1] for file_path in raw_files]
+    print('working on directions: ', directions)
+
+    ##########
+    
+
+
+    all_test_data, test_data = get_all_test_data(raw_data, directions, 'test')
+    print('==loaded test data==')
+    all_valid_data, valid_data = get_all_test_data(raw_data, directions, 'valid')
+    print('==loaded valid data==')
+    all_valid_test_data =  merge_valid_test_messup(all_test_data, all_valid_data)
+    mess_up_train, data_sizes = check_train_all(raw_data, directions, all_valid_test_data)
+    print('training messing up with valid, test data:', len(mess_up_train))
+    data_situation = train_size_if_remove_in_otherset(data_sizes, mess_up_train)
+    df = pd.DataFrame(data_situation, columns=['direction', 'train_size_after_remove', 'orig_size', 'num_to_remove', 'remove_percent'])
+    df.sort_values('remove_percent', ascending=False)
+    df.to_csv(f'{raw_data}/clean_summary.tsv', sep='\t')
+    print(f'projected data clean summary in: {raw_data}/clean_summary.tsv')    
+
+    # correct the dataset:
+    all_test_pairs, mess_up_test_train_pairs = get_messed_up_test_pairs('test', directions)
+    all_valid_pairs, mess_up_valid_train_pairs = get_messed_up_test_pairs('valid', directions)
+
+    all_messed_pairs = set(mess_up_test_train_pairs.keys()).union(set(mess_up_valid_train_pairs.keys()))    
+    corrected_directions = set()
+
+    real_data_situation = []
+    for direction in directions:
+        org_size, new_size = remove_messed_up_sentences(raw_data, direction, mess_up_train, all_messed_pairs, corrected_directions)
+        if org_size == 0:
+            print(f"{direction} has size 0")
+            continue
+        real_data_situation.append(
+            (direction, new_size, org_size, org_size - new_size, (org_size - new_size) / org_size * 100)
+        )
+    print('corrected directions: ', corrected_directions)
+    df = pd.DataFrame(real_data_situation, columns=['direction', 'train_size_after_remove', 'orig_size', 'num_to_remove', 'remove_percent'])
+    df.sort_values('remove_percent', ascending=False)
+    df.to_csv(f'{raw_data}/actual_clean_summary.tsv', sep='\t')
+    print(f'actual data clean summary (which can be different from the projected one because of duplications) in: {raw_data}/actual_clean_summary.tsv')        
+
+    import shutil
+    for direction in directions:
+        src_lang, tgt_lang = direction.split('-')
+        for split in ['train', 'valid', 'test']:
+            # copying valid, test and uncorrected train
+            if direction in corrected_directions and split == 'train':
+                continue
+            tgt = f"{raw_data}/{split}.{direction}.{tgt_lang}"
+            src = f"{raw_data}/{split}.{direction}.{src_lang}"
+            if not (os.path.exists(src) and os.path.exists(tgt)):
+                continue
+            corrected_tgt = f"{to_folder}/{split}.{direction}.{tgt_lang}"
+            corrected_src = f"{to_folder}/{split}.{direction}.{src_lang}"
+            print(f'copying {src} to {corrected_src}')
+            shutil.copyfile(src, corrected_src)
+            print(f'copying {tgt} to {corrected_tgt}')
+            shutil.copyfile(tgt, corrected_tgt)   
+
+    print('completed')
\ No newline at end of file
diff --git a/fairseq/examples/multilingual/data_scripts/requirement.txt b/fairseq/examples/multilingual/data_scripts/requirement.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e85d7d540e08a1407f92dfb2311972a1a5a30123
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/requirement.txt
@@ -0,0 +1,2 @@
+wget
+pandas
\ No newline at end of file
diff --git a/fairseq/examples/multilingual/data_scripts/utils/dedup.py b/fairseq/examples/multilingual/data_scripts/utils/dedup.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6fed8c695cf218d3502d6ed8d23015520c0e179
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/utils/dedup.py
@@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import argparse
+
+def deup(src_file, tgt_file, src_file_out, tgt_file_out):
+    seen = set()
+    dup_count = 0
+    with open(src_file, encoding='utf-8') as fsrc, \
+        open(tgt_file, encoding='utf-8') as ftgt, \
+        open(src_file_out, 'w', encoding='utf-8') as fsrc_out, \
+        open(tgt_file_out, 'w', encoding='utf-8') as ftgt_out:
+        for s, t in zip(fsrc, ftgt):
+            if (s, t) not in seen:
+                fsrc_out.write(s)
+                ftgt_out.write(t)   
+                seen.add((s, t))
+            else:
+                dup_count += 1
+    print(f'number of duplication: {dup_count}')    
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--src-file", type=str, required=True,
+                        help="src file")
+    parser.add_argument("--tgt-file", type=str, required=True,
+                        help="tgt file")
+    parser.add_argument("--src-file-out", type=str, required=True,
+                        help="src ouptut file")
+    parser.add_argument("--tgt-file-out", type=str, required=True,
+                        help="tgt ouput file") 
+    args = parser.parse_args()    
+    deup(args.src_file, args.tgt_file, args.src_file_out, args.tgt_file_out)
+                
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py b/fairseq/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..41b38ba5bef20cb043921ac61820db8689189a5a
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py
@@ -0,0 +1,63 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+#!/bin/python
+
+import fasttext
+from multiprocessing import Pool
+import contextlib
+import sys
+import argparse
+from functools import partial
+import io
+
+model = None
+def init(model_path):
+    global model
+    model = fasttext.load_model(model_path)
+
+def pred(lines):
+    return lines, [model.predict(line.strip())[0][0][9:] for line in lines]
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, required=True,
+                        help="model to load")
+    parser.add_argument("--inputs", nargs="+", default=['-'],
+                        help="input files to filter")
+    parser.add_argument("--langs", nargs="+", required=True,
+                        help="lang ids of each input file")
+    parser.add_argument("--outputs", nargs="+", default=['-'],
+                        help="path to save lid filtered outputs")
+    parser.add_argument("--num-workers", type=int, metavar="N", default=10,
+                        help="number of processes in parallel")
+    args = parser.parse_args()
+
+    assert len(args.inputs) == len(args.langs) and len(args.inputs) == len(args.outputs)
+
+    with contextlib.ExitStack() as stack:
+        inputs = [
+            stack.enter_context(open(input, "r", encoding="utf-8", newline="\n", errors="replace"))
+                if input != "-" else io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors="replace")
+            for input in args.inputs
+        ]
+        outputs = [
+            stack.enter_context(open(output, "w", encoding="utf-8", newline="\n"))
+                if output != "-" else sys.stdout
+            for output in args.outputs
+        ]
+        with Pool(args.num_workers, initializer=partial(init, args.model)) as p:
+            skip_cnt = 0
+            for lines, preds in p.imap(pred, list(zip(*inputs)), chunksize=500):
+                if not all(a == b for a, b in zip(preds, args.langs)):
+                    skip_cnt += 1
+                    continue
+                for line, output_h in zip(lines, outputs):
+                    print(line.strip(), file=output_h)
+        print(f"Skipped {skip_cnt} lines.")
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/multilingual/data_scripts/utils/strip_sgm.sh b/fairseq/examples/multilingual/data_scripts/utils/strip_sgm.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7f4f61d7b1a46f51a1221de6b336cb70b5a0b8b3
--- /dev/null
+++ b/fairseq/examples/multilingual/data_scripts/utils/strip_sgm.sh
@@ -0,0 +1 @@
+grep "seg id" | sed 's/<seg id="[0-9]\+">//g' | sed 's/<\/seg>//g'
diff --git a/fairseq/examples/multilingual/finetune_multilingual_model.sh b/fairseq/examples/multilingual/finetune_multilingual_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..25960c5dc8a02e5580b61837099770a082b4dd83
--- /dev/null
+++ b/fairseq/examples/multilingual/finetune_multilingual_model.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+path_2_data=$1  # <path to data> which contains binarized data for each directions
+lang_list=$2  # <path to a file which contains a list of languages separted by new lines>
+lang_pairs=$3  #a list language pairs to train multilingual models, e.g. "en-fr,en-cs,fr-en,cs-en"
+# pretrained can be an mBART pretrained model as well
+pretrained_model=$4 #<path to a pretrained model>
+
+
+fairseq-train "$path_2_data" \
+  --encoder-normalize-before --decoder-normalize-before \
+  --arch transformer --layernorm-embedding \
+  --task translation_multi_simple_epoch \
+  --finetune-from-model "$pretrained_model" \
+  --sampling-method "temperature" \
+  --sampling-temperature "1.5" \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list" \
+  --lang-pairs "$lang_pairs" \
+  --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
+  --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
+  --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \
+  --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
+  --max-tokens 1024 --update-freq 2 \
+  --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
+  --seed 222 --log-format simple --log-interval 2
diff --git a/fairseq/examples/multilingual/multilingual_fairseq_gen.sh b/fairseq/examples/multilingual/multilingual_fairseq_gen.sh
new file mode 100644
index 0000000000000000000000000000000000000000..65aa322d7daaa428015de98abe4664a6a4164bfd
--- /dev/null
+++ b/fairseq/examples/multilingual/multilingual_fairseq_gen.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+lang_pairs="en-fr,en-cs,fr-en,cs-en"
+path_2_data=$1 # <path to data>
+lang_list=$2 # <path to a file which contains list of languages separted by new lines>
+model=$3  # <path to a trained model>
+source_lang=cs
+target_lang=en
+
+fairseq-generate "$path_2_data" \
+  --path "$model" \
+  --task translation_multi_simple_epoch \
+  --gen-subset test \
+  --source-lang "$source_lang" \
+  --target-lang "$target_lang" \
+  --sacrebleu --remove-bpe 'sentencepiece'\
+  --batch-size 32 \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list" \
+  --lang-pairs "$lang_pairs"
diff --git a/fairseq/examples/multilingual/train_multilingual_model.sh b/fairseq/examples/multilingual/train_multilingual_model.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cc050bd3f02de8a2f303737f187442d2eb80e4ef
--- /dev/null
+++ b/fairseq/examples/multilingual/train_multilingual_model.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+path_2_data=$1  # <path to data> which contains binarized data for each directions
+lang_list=$2  # <path to a file which contains a list of languages separted by new lines>
+lang_pairs=$3  #a list language pairs to train multilingual models, e.g. "en-fr,en-cs,fr-en,cs-en"
+
+fairseq-train "$path_2_data" \
+  --encoder-normalize-before --decoder-normalize-before \
+  --arch transformer --layernorm-embedding \
+  --task translation_multi_simple_epoch \
+  --sampling-method "temperature" \
+  --sampling-temperature 1.5 \
+  --encoder-langtok "src" \
+  --decoder-langtok \
+  --lang-dict "$lang_list" \
+  --lang-pairs "$lang_pairs" \
+  --criterion label_smoothed_cross_entropy --label-smoothing 0.2 \
+  --optimizer adam --adam-eps 1e-06 --adam-betas '(0.9, 0.98)' \
+  --lr-scheduler inverse_sqrt --lr 3e-05 --warmup-updates 2500 --max-update 40000 \
+  --dropout 0.3 --attention-dropout 0.1 --weight-decay 0.0 \
+  --max-tokens 1024 --update-freq 2 \
+  --save-interval 1 --save-interval-updates 5000 --keep-interval-updates 10 --no-epoch-checkpoints \
+  --seed 222 --log-format simple --log-interval 2
diff --git a/fairseq/examples/noisychannel/README.md b/fairseq/examples/noisychannel/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9d101aa874ec36ff3bb5c1166169a4c4f38ffe2b
--- /dev/null
+++ b/fairseq/examples/noisychannel/README.md
@@ -0,0 +1,72 @@
+# Simple and Effective Noisy Channel Modeling for Neural Machine Translation (Yee et al., 2019)
+This page contains pointers to pre-trained models as well as instructions on how to run the reranking scripts.
+
+## Citation:
+```bibtex
+@inproceedings{yee2019simple,
+  title = {Simple and Effective Noisy Channel Modeling for Neural Machine Translation},
+  author = {Kyra Yee and Yann Dauphin and Michael Auli},
+  booktitle = {Conference on Empirical Methods in Natural Language Processing},
+  year = {2019},
+}
+```
+
+## Pre-trained Models:
+
+Model | Description |  Download
+---|---|---
+`transformer.noisychannel.de-en` | De->En Forward Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/forward_de2en.tar.bz2)
+`transformer.noisychannel.en-de` | En->De Channel Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/backward_en2de.tar.bz2)
+`transformer_lm.noisychannel.en` | En Language model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/reranking_en_lm.tar.bz2)
+
+Test Data: [newstest_wmt17](https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/wmt17test.tar.bz2)
+
+## Example usage
+
+```
+mkdir rerank_example
+curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/forward_de2en.tar.bz2 | tar xvjf - -C rerank_example
+curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/backward_en2de.tar.bz2 | tar xvjf - -C rerank_example
+curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/reranking_en_lm.tar.bz2 | tar xvjf - -C rerank_example
+curl https://dl.fbaipublicfiles.com/fairseq/models/noisychannel/wmt17test.tar.bz2 | tar xvjf - -C rerank_example
+
+beam=50
+num_trials=1000
+fw_name=fw_model_ex
+bw_name=bw_model_ex
+lm_name=lm_ex
+data_dir=rerank_example/hyphen-splitting-mixed-case-wmt17test-wmt14bpe
+data_dir_name=wmt17
+lm=rerank_example/lm/checkpoint_best.pt
+lm_bpe_code=rerank_example/lm/bpe32k.code
+lm_dict=rerank_example/lm/dict.txt
+batch_size=32
+bw=rerank_example/backward_en2de.pt
+fw=rerank_example/forward_de2en.pt
+
+# reranking with P(T|S) P(S|T) and P(T)
+python examples/noisychannel/rerank_tune.py $data_dir  --tune-param lenpen weight1 weight3  \
+    --lower-bound 0 0 0 --upper-bound 3 3 3 --data-dir-name $data_dir_name  \ 
+    --num-trials $num_trials  --source-lang de --target-lang en --gen-model $fw \
+    -n $beam --batch-size $batch_size --score-model2 $fw --score-model1 $bw \
+    --backwards1 --weight2 1 \
+    -lm $lm  --lm-dict $lm_dict  --lm-name en_newscrawl --lm-bpe-code $lm_bpe_code \
+    --model2-name $fw_name --model1-name $bw_name --gen-model-name $fw_name
+
+# reranking with P(T|S) and P(T)
+python examples/noisychannel/rerank_tune.py $data_dir  --tune-param lenpen weight3 \
+    --lower-bound 0 0 --upper-bound 3 3  --data-dir-name $data_dir_name  \
+    --num-trials $num_trials  --source-lang de --target-lang en --gen-model $fw \
+    -n $beam --batch-size $batch_size --score-model1 $fw \
+    -lm $lm  --lm-dict $lm_dict  --lm-name en_newscrawl --lm-bpe-code $lm_bpe_code \
+    --model1-name $fw_name --gen-model-name $fw_name
+
+# to run with a preconfigured set of hyperparameters for the lenpen and model weights, using rerank.py instead.
+python examples/noisychannel/rerank.py $data_dir \
+    --lenpen 0.269 --weight1 1 --weight2 0.929 --weight3 0.831  \
+    --data-dir-name $data_dir_name  --source-lang de --target-lang en --gen-model $fw \
+    -n $beam --batch-size $batch_size --score-model2 $fw --score-model1 $bw --backwards1  \
+    -lm $lm  --lm-dict $lm_dict  --lm-name en_newscrawl --lm-bpe-code $lm_bpe_code \
+    --model2-name $fw_name --model1-name $bw_name --gen-model-name $fw_name
+```
+
diff --git a/fairseq/examples/noisychannel/__init__.py b/fairseq/examples/noisychannel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..89f1aef4f6328d25425e0bcabb42dfffd2ed35f0
--- /dev/null
+++ b/fairseq/examples/noisychannel/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .rerank_options import *  # noqa
diff --git a/fairseq/examples/noisychannel/rerank.py b/fairseq/examples/noisychannel/rerank.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb80d11a67cd75764a89f6f41915b0348ae96e92
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank.py
@@ -0,0 +1,428 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from multiprocessing import Pool
+
+import numpy as np
+from fairseq import options
+from fairseq.data import dictionary
+from fairseq.scoring import bleu
+
+from examples.noisychannel import (
+    rerank_generate,
+    rerank_options,
+    rerank_score_bw,
+    rerank_score_lm,
+    rerank_utils,
+)
+
+
+def score_target_hypo(
+    args, a, b, c, lenpen, target_outfile, hypo_outfile, write_hypos, normalize
+):
+
+    print("lenpen", lenpen, "weight1", a, "weight2", b, "weight3", c)
+    gen_output_lst, bitext1_lst, bitext2_lst, lm_res_lst = load_score_files(args)
+    dict = dictionary.Dictionary()
+    scorer = scorer = bleu.Scorer(
+        bleu.BleuConfig(
+            pad=dict.pad(),
+            eos=dict.eos(),
+            unk=dict.unk(),
+        )
+    )
+
+    ordered_hypos = {}
+    ordered_targets = {}
+
+    for shard_id in range(len(bitext1_lst)):
+        bitext1 = bitext1_lst[shard_id]
+        bitext2 = bitext2_lst[shard_id]
+        gen_output = gen_output_lst[shard_id]
+        lm_res = lm_res_lst[shard_id]
+
+        total = len(bitext1.rescore_source.keys())
+        source_lst = []
+        hypo_lst = []
+        score_lst = []
+        reference_lst = []
+        j = 1
+        best_score = -math.inf
+
+        for i in range(total):
+            # length is measured in terms of words, not bpe tokens, since models may not share the same bpe
+            target_len = len(bitext1.rescore_hypo[i].split())
+
+            if lm_res is not None:
+                lm_score = lm_res.score[i]
+            else:
+                lm_score = 0
+
+            if bitext2 is not None:
+                bitext2_score = bitext2.rescore_score[i]
+                bitext2_backwards = bitext2.backwards
+            else:
+                bitext2_score = None
+                bitext2_backwards = None
+
+            score = rerank_utils.get_score(
+                a,
+                b,
+                c,
+                target_len,
+                bitext1.rescore_score[i],
+                bitext2_score,
+                lm_score=lm_score,
+                lenpen=lenpen,
+                src_len=bitext1.source_lengths[i],
+                tgt_len=bitext1.target_lengths[i],
+                bitext1_backwards=bitext1.backwards,
+                bitext2_backwards=bitext2_backwards,
+                normalize=normalize,
+            )
+
+            if score > best_score:
+                best_score = score
+                best_hypo = bitext1.rescore_hypo[i]
+
+            if j == gen_output.num_hypos[i] or j == args.num_rescore:
+                j = 1
+                hypo_lst.append(best_hypo)
+                score_lst.append(best_score)
+                source_lst.append(bitext1.rescore_source[i])
+                reference_lst.append(bitext1.rescore_target[i])
+
+                best_score = -math.inf
+                best_hypo = ""
+            else:
+                j += 1
+
+        gen_keys = list(sorted(gen_output.no_bpe_target.keys()))
+
+        for key in range(len(gen_keys)):
+            if args.prefix_len is None:
+                assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], (
+                    "pred and rescore hypo mismatch: i: "
+                    + str(key)
+                    + ", "
+                    + str(hypo_lst[key])
+                    + str(gen_keys[key])
+                    + str(gen_output.no_bpe_hypo[key])
+                )
+                sys_tok = dict.encode_line(hypo_lst[key])
+                ref_tok = dict.encode_line(gen_output.no_bpe_target[gen_keys[key]])
+                scorer.add(ref_tok, sys_tok)
+
+            else:
+                full_hypo = rerank_utils.get_full_from_prefix(
+                    hypo_lst[key], gen_output.no_bpe_hypo[gen_keys[key]]
+                )
+                sys_tok = dict.encode_line(full_hypo)
+                ref_tok = dict.encode_line(gen_output.no_bpe_target[gen_keys[key]])
+                scorer.add(ref_tok, sys_tok)
+
+        # if only one set of hyper parameters is provided, write the predictions to a file
+        if write_hypos:
+            # recover the orinal ids from n best list generation
+            for key in range(len(gen_output.no_bpe_target)):
+                if args.prefix_len is None:
+                    assert hypo_lst[key] in gen_output.no_bpe_hypo[gen_keys[key]], (
+                        "pred and rescore hypo mismatch:"
+                        + "i:"
+                        + str(key)
+                        + str(hypo_lst[key])
+                        + str(gen_output.no_bpe_hypo[key])
+                    )
+                    ordered_hypos[gen_keys[key]] = hypo_lst[key]
+                    ordered_targets[gen_keys[key]] = gen_output.no_bpe_target[
+                        gen_keys[key]
+                    ]
+
+                else:
+                    full_hypo = rerank_utils.get_full_from_prefix(
+                        hypo_lst[key], gen_output.no_bpe_hypo[gen_keys[key]]
+                    )
+                    ordered_hypos[gen_keys[key]] = full_hypo
+                    ordered_targets[gen_keys[key]] = gen_output.no_bpe_target[
+                        gen_keys[key]
+                    ]
+
+    # write the hypos in the original order from nbest list generation
+    if args.num_shards == (len(bitext1_lst)):
+        with open(target_outfile, "w") as t:
+            with open(hypo_outfile, "w") as h:
+                for key in range(len(ordered_hypos)):
+                    t.write(ordered_targets[key])
+                    h.write(ordered_hypos[key])
+
+    res = scorer.result_string(4)
+    if write_hypos:
+        print(res)
+    score = rerank_utils.parse_bleu_scoring(res)
+    return score
+
+
+def match_target_hypo(args, target_outfile, hypo_outfile):
+    """combine scores from the LM and bitext models, and write the top scoring hypothesis to a file"""
+    if len(args.weight1) == 1:
+        res = score_target_hypo(
+            args,
+            args.weight1[0],
+            args.weight2[0],
+            args.weight3[0],
+            args.lenpen[0],
+            target_outfile,
+            hypo_outfile,
+            True,
+            args.normalize,
+        )
+        rerank_scores = [res]
+    else:
+        print("launching pool")
+        with Pool(32) as p:
+            rerank_scores = p.starmap(
+                score_target_hypo,
+                [
+                    (
+                        args,
+                        args.weight1[i],
+                        args.weight2[i],
+                        args.weight3[i],
+                        args.lenpen[i],
+                        target_outfile,
+                        hypo_outfile,
+                        False,
+                        args.normalize,
+                    )
+                    for i in range(len(args.weight1))
+                ],
+            )
+
+    if len(rerank_scores) > 1:
+        best_index = np.argmax(rerank_scores)
+        best_score = rerank_scores[best_index]
+        print("best score", best_score)
+        print("best lenpen", args.lenpen[best_index])
+        print("best weight1", args.weight1[best_index])
+        print("best weight2", args.weight2[best_index])
+        print("best weight3", args.weight3[best_index])
+        return (
+            args.lenpen[best_index],
+            args.weight1[best_index],
+            args.weight2[best_index],
+            args.weight3[best_index],
+            best_score,
+        )
+
+    else:
+        return (
+            args.lenpen[0],
+            args.weight1[0],
+            args.weight2[0],
+            args.weight3[0],
+            rerank_scores[0],
+        )
+
+
+def load_score_files(args):
+    if args.all_shards:
+        shard_ids = list(range(args.num_shards))
+    else:
+        shard_ids = [args.shard_id]
+
+    gen_output_lst = []
+    bitext1_lst = []
+    bitext2_lst = []
+    lm_res1_lst = []
+
+    for shard_id in shard_ids:
+        using_nbest = args.nbest_list is not None
+        (
+            pre_gen,
+            left_to_right_preprocessed_dir,
+            right_to_left_preprocessed_dir,
+            backwards_preprocessed_dir,
+            lm_preprocessed_dir,
+        ) = rerank_utils.get_directories(
+            args.data_dir_name,
+            args.num_rescore,
+            args.gen_subset,
+            args.gen_model_name,
+            shard_id,
+            args.num_shards,
+            args.sampling,
+            args.prefix_len,
+            args.target_prefix_frac,
+            args.source_prefix_frac,
+        )
+
+        rerank1_is_gen = (
+            args.gen_model == args.score_model1 and args.source_prefix_frac is None
+        )
+        rerank2_is_gen = (
+            args.gen_model == args.score_model2 and args.source_prefix_frac is None
+        )
+
+        score1_file = rerank_utils.rescore_file_name(
+            pre_gen,
+            args.prefix_len,
+            args.model1_name,
+            target_prefix_frac=args.target_prefix_frac,
+            source_prefix_frac=args.source_prefix_frac,
+            backwards=args.backwards1,
+        )
+        if args.score_model2 is not None:
+            score2_file = rerank_utils.rescore_file_name(
+                pre_gen,
+                args.prefix_len,
+                args.model2_name,
+                target_prefix_frac=args.target_prefix_frac,
+                source_prefix_frac=args.source_prefix_frac,
+                backwards=args.backwards2,
+            )
+        if args.language_model is not None:
+            lm_score_file = rerank_utils.rescore_file_name(
+                pre_gen, args.prefix_len, args.lm_name, lm_file=True
+            )
+
+        # get gen output
+        predictions_bpe_file = pre_gen + "/generate_output_bpe.txt"
+        if using_nbest:
+            print("Using predefined n-best list from interactive.py")
+            predictions_bpe_file = args.nbest_list
+        gen_output = rerank_utils.BitextOutputFromGen(
+            predictions_bpe_file,
+            bpe_symbol=args.post_process,
+            nbest=using_nbest,
+            prefix_len=args.prefix_len,
+            target_prefix_frac=args.target_prefix_frac,
+        )
+
+        if rerank1_is_gen:
+            bitext1 = gen_output
+        else:
+            bitext1 = rerank_utils.BitextOutput(
+                score1_file,
+                args.backwards1,
+                args.right_to_left1,
+                args.post_process,
+                args.prefix_len,
+                args.target_prefix_frac,
+                args.source_prefix_frac,
+            )
+
+        if args.score_model2 is not None or args.nbest_list is not None:
+            if rerank2_is_gen:
+                bitext2 = gen_output
+            else:
+                bitext2 = rerank_utils.BitextOutput(
+                    score2_file,
+                    args.backwards2,
+                    args.right_to_left2,
+                    args.post_process,
+                    args.prefix_len,
+                    args.target_prefix_frac,
+                    args.source_prefix_frac,
+                )
+
+                assert (
+                    bitext2.source_lengths == bitext1.source_lengths
+                ), "source lengths for rescoring models do not match"
+                assert (
+                    bitext2.target_lengths == bitext1.target_lengths
+                ), "target lengths for rescoring models do not match"
+        else:
+            if args.diff_bpe:
+                assert args.score_model2 is None
+                bitext2 = gen_output
+            else:
+                bitext2 = None
+
+        if args.language_model is not None:
+            lm_res1 = rerank_utils.LMOutput(
+                lm_score_file,
+                args.lm_dict,
+                args.prefix_len,
+                args.post_process,
+                args.target_prefix_frac,
+            )
+        else:
+            lm_res1 = None
+
+        gen_output_lst.append(gen_output)
+        bitext1_lst.append(bitext1)
+        bitext2_lst.append(bitext2)
+        lm_res1_lst.append(lm_res1)
+    return gen_output_lst, bitext1_lst, bitext2_lst, lm_res1_lst
+
+
+def rerank(args):
+    if type(args.lenpen) is not list:
+        args.lenpen = [args.lenpen]
+    if type(args.weight1) is not list:
+        args.weight1 = [args.weight1]
+    if type(args.weight2) is not list:
+        args.weight2 = [args.weight2]
+    if type(args.weight3) is not list:
+        args.weight3 = [args.weight3]
+    if args.all_shards:
+        shard_ids = list(range(args.num_shards))
+    else:
+        shard_ids = [args.shard_id]
+
+    for shard_id in shard_ids:
+        (
+            pre_gen,
+            left_to_right_preprocessed_dir,
+            right_to_left_preprocessed_dir,
+            backwards_preprocessed_dir,
+            lm_preprocessed_dir,
+        ) = rerank_utils.get_directories(
+            args.data_dir_name,
+            args.num_rescore,
+            args.gen_subset,
+            args.gen_model_name,
+            shard_id,
+            args.num_shards,
+            args.sampling,
+            args.prefix_len,
+            args.target_prefix_frac,
+            args.source_prefix_frac,
+        )
+        rerank_generate.gen_and_reprocess_nbest(args)
+        rerank_score_bw.score_bw(args)
+        rerank_score_lm.score_lm(args)
+
+        if args.write_hypos is None:
+            write_targets = pre_gen + "/matched_targets"
+            write_hypos = pre_gen + "/matched_hypos"
+        else:
+            write_targets = args.write_hypos + "_targets" + args.gen_subset
+            write_hypos = args.write_hypos + "_hypos" + args.gen_subset
+
+    if args.all_shards:
+        write_targets += "_all_shards"
+        write_hypos += "_all_shards"
+
+    (
+        best_lenpen,
+        best_weight1,
+        best_weight2,
+        best_weight3,
+        best_score,
+    ) = match_target_hypo(args, write_targets, write_hypos)
+
+    return best_lenpen, best_weight1, best_weight2, best_weight3, best_score
+
+
+def cli_main():
+    parser = rerank_options.get_reranking_parser()
+    args = options.parse_args_and_arch(parser)
+    rerank(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/noisychannel/rerank_generate.py b/fairseq/examples/noisychannel/rerank_generate.py
new file mode 100644
index 0000000000000000000000000000000000000000..daeeae059a677a9fcd7c370be087f1f5c189bc52
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank_generate.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Generate n-best translations using a trained model.
+"""
+
+import os
+import subprocess
+from contextlib import redirect_stdout
+
+from fairseq import options
+from fairseq_cli import generate, preprocess
+
+from examples.noisychannel import rerank_options, rerank_utils
+
+
+def gen_and_reprocess_nbest(args):
+    if args.score_dict_dir is None:
+        args.score_dict_dir = args.data
+    if args.prefix_len is not None:
+        assert (
+            args.right_to_left1 is False
+        ), "prefix length not compatible with right to left models"
+        assert (
+            args.right_to_left2 is False
+        ), "prefix length not compatible with right to left models"
+
+    if args.nbest_list is not None:
+        assert args.score_model2 is None
+
+    if args.backwards1:
+        scorer1_src = args.target_lang
+        scorer1_tgt = args.source_lang
+    else:
+        scorer1_src = args.source_lang
+        scorer1_tgt = args.target_lang
+
+    store_data = (
+        os.path.join(os.path.dirname(__file__)) + "/rerank_data/" + args.data_dir_name
+    )
+    if not os.path.exists(store_data):
+        os.makedirs(store_data)
+
+    (
+        pre_gen,
+        left_to_right_preprocessed_dir,
+        right_to_left_preprocessed_dir,
+        backwards_preprocessed_dir,
+        lm_preprocessed_dir,
+    ) = rerank_utils.get_directories(
+        args.data_dir_name,
+        args.num_rescore,
+        args.gen_subset,
+        args.gen_model_name,
+        args.shard_id,
+        args.num_shards,
+        args.sampling,
+        args.prefix_len,
+        args.target_prefix_frac,
+        args.source_prefix_frac,
+    )
+    assert not (
+        args.right_to_left1 and args.backwards1
+    ), "backwards right to left not supported"
+    assert not (
+        args.right_to_left2 and args.backwards2
+    ), "backwards right to left not supported"
+    assert not (
+        args.prefix_len is not None and args.target_prefix_frac is not None
+    ), "target prefix frac and target prefix len incompatible"
+
+    # make directory to store generation results
+    if not os.path.exists(pre_gen):
+        os.makedirs(pre_gen)
+
+    rerank1_is_gen = (
+        args.gen_model == args.score_model1 and args.source_prefix_frac is None
+    )
+    rerank2_is_gen = (
+        args.gen_model == args.score_model2 and args.source_prefix_frac is None
+    )
+
+    if args.nbest_list is not None:
+        rerank2_is_gen = True
+
+    # make directories to store preprossed nbest list for reranking
+    if not os.path.exists(left_to_right_preprocessed_dir):
+        os.makedirs(left_to_right_preprocessed_dir)
+    if not os.path.exists(right_to_left_preprocessed_dir):
+        os.makedirs(right_to_left_preprocessed_dir)
+    if not os.path.exists(lm_preprocessed_dir):
+        os.makedirs(lm_preprocessed_dir)
+    if not os.path.exists(backwards_preprocessed_dir):
+        os.makedirs(backwards_preprocessed_dir)
+
+    score1_file = rerank_utils.rescore_file_name(
+        pre_gen,
+        args.prefix_len,
+        args.model1_name,
+        target_prefix_frac=args.target_prefix_frac,
+        source_prefix_frac=args.source_prefix_frac,
+        backwards=args.backwards1,
+    )
+    if args.score_model2 is not None:
+        score2_file = rerank_utils.rescore_file_name(
+            pre_gen,
+            args.prefix_len,
+            args.model2_name,
+            target_prefix_frac=args.target_prefix_frac,
+            source_prefix_frac=args.source_prefix_frac,
+            backwards=args.backwards2,
+        )
+
+    predictions_bpe_file = pre_gen + "/generate_output_bpe.txt"
+
+    using_nbest = args.nbest_list is not None
+
+    if using_nbest:
+        print("Using predefined n-best list from interactive.py")
+        predictions_bpe_file = args.nbest_list
+
+    else:
+        if not os.path.isfile(predictions_bpe_file):
+            print("STEP 1: generate predictions using the p(T|S) model with bpe")
+            print(args.data)
+            param1 = [
+                args.data,
+                "--path",
+                args.gen_model,
+                "--shard-id",
+                str(args.shard_id),
+                "--num-shards",
+                str(args.num_shards),
+                "--nbest",
+                str(args.num_rescore),
+                "--batch-size",
+                str(args.batch_size),
+                "--beam",
+                str(args.num_rescore),
+                "--batch-size",
+                str(args.num_rescore),
+                "--gen-subset",
+                args.gen_subset,
+                "--source-lang",
+                args.source_lang,
+                "--target-lang",
+                args.target_lang,
+            ]
+            if args.sampling:
+                param1 += ["--sampling"]
+
+            gen_parser = options.get_generation_parser()
+            input_args = options.parse_args_and_arch(gen_parser, param1)
+
+            print(input_args)
+            with open(predictions_bpe_file, "w") as f:
+                with redirect_stdout(f):
+                    generate.main(input_args)
+
+    gen_output = rerank_utils.BitextOutputFromGen(
+        predictions_bpe_file,
+        bpe_symbol=args.post_process,
+        nbest=using_nbest,
+        prefix_len=args.prefix_len,
+        target_prefix_frac=args.target_prefix_frac,
+    )
+
+    if args.diff_bpe:
+        rerank_utils.write_reprocessed(
+            gen_output.no_bpe_source,
+            gen_output.no_bpe_hypo,
+            gen_output.no_bpe_target,
+            pre_gen + "/source_gen_bpe." + args.source_lang,
+            pre_gen + "/target_gen_bpe." + args.target_lang,
+            pre_gen + "/reference_gen_bpe." + args.target_lang,
+        )
+        bitext_bpe = args.rescore_bpe_code
+        bpe_src_param = [
+            "-c",
+            bitext_bpe,
+            "--input",
+            pre_gen + "/source_gen_bpe." + args.source_lang,
+            "--output",
+            pre_gen + "/rescore_data." + args.source_lang,
+        ]
+        bpe_tgt_param = [
+            "-c",
+            bitext_bpe,
+            "--input",
+            pre_gen + "/target_gen_bpe." + args.target_lang,
+            "--output",
+            pre_gen + "/rescore_data." + args.target_lang,
+        ]
+
+        subprocess.call(
+            [
+                "python",
+                os.path.join(
+                    os.path.dirname(__file__), "subword-nmt/subword_nmt/apply_bpe.py"
+                ),
+            ]
+            + bpe_src_param,
+            shell=False,
+        )
+
+        subprocess.call(
+            [
+                "python",
+                os.path.join(
+                    os.path.dirname(__file__), "subword-nmt/subword_nmt/apply_bpe.py"
+                ),
+            ]
+            + bpe_tgt_param,
+            shell=False,
+        )
+
+    if (not os.path.isfile(score1_file) and not rerank1_is_gen) or (
+        args.score_model2 is not None
+        and not os.path.isfile(score2_file)
+        and not rerank2_is_gen
+    ):
+        print(
+            "STEP 2: process the output of generate.py so we have clean text files with the translations"
+        )
+
+        rescore_file = "/rescore_data"
+        if args.prefix_len is not None:
+            prefix_len_rescore_file = rescore_file + "prefix" + str(args.prefix_len)
+        if args.target_prefix_frac is not None:
+            target_prefix_frac_rescore_file = (
+                rescore_file + "target_prefix_frac" + str(args.target_prefix_frac)
+            )
+        if args.source_prefix_frac is not None:
+            source_prefix_frac_rescore_file = (
+                rescore_file + "source_prefix_frac" + str(args.source_prefix_frac)
+            )
+
+        if not args.right_to_left1 or not args.right_to_left2:
+            if not args.diff_bpe:
+                rerank_utils.write_reprocessed(
+                    gen_output.source,
+                    gen_output.hypo,
+                    gen_output.target,
+                    pre_gen + rescore_file + "." + args.source_lang,
+                    pre_gen + rescore_file + "." + args.target_lang,
+                    pre_gen + "/reference_file",
+                    bpe_symbol=args.post_process,
+                )
+                if args.prefix_len is not None:
+                    bw_rescore_file = prefix_len_rescore_file
+                    rerank_utils.write_reprocessed(
+                        gen_output.source,
+                        gen_output.hypo,
+                        gen_output.target,
+                        pre_gen + prefix_len_rescore_file + "." + args.source_lang,
+                        pre_gen + prefix_len_rescore_file + "." + args.target_lang,
+                        pre_gen + "/reference_file",
+                        prefix_len=args.prefix_len,
+                        bpe_symbol=args.post_process,
+                    )
+                elif args.target_prefix_frac is not None:
+                    bw_rescore_file = target_prefix_frac_rescore_file
+                    rerank_utils.write_reprocessed(
+                        gen_output.source,
+                        gen_output.hypo,
+                        gen_output.target,
+                        pre_gen
+                        + target_prefix_frac_rescore_file
+                        + "."
+                        + args.source_lang,
+                        pre_gen
+                        + target_prefix_frac_rescore_file
+                        + "."
+                        + args.target_lang,
+                        pre_gen + "/reference_file",
+                        bpe_symbol=args.post_process,
+                        target_prefix_frac=args.target_prefix_frac,
+                    )
+                else:
+                    bw_rescore_file = rescore_file
+
+                if args.source_prefix_frac is not None:
+                    fw_rescore_file = source_prefix_frac_rescore_file
+                    rerank_utils.write_reprocessed(
+                        gen_output.source,
+                        gen_output.hypo,
+                        gen_output.target,
+                        pre_gen
+                        + source_prefix_frac_rescore_file
+                        + "."
+                        + args.source_lang,
+                        pre_gen
+                        + source_prefix_frac_rescore_file
+                        + "."
+                        + args.target_lang,
+                        pre_gen + "/reference_file",
+                        bpe_symbol=args.post_process,
+                        source_prefix_frac=args.source_prefix_frac,
+                    )
+                else:
+                    fw_rescore_file = rescore_file
+
+        if args.right_to_left1 or args.right_to_left2:
+            rerank_utils.write_reprocessed(
+                gen_output.source,
+                gen_output.hypo,
+                gen_output.target,
+                pre_gen + "/right_to_left_rescore_data." + args.source_lang,
+                pre_gen + "/right_to_left_rescore_data." + args.target_lang,
+                pre_gen + "/right_to_left_reference_file",
+                right_to_left=True,
+                bpe_symbol=args.post_process,
+            )
+
+        print("STEP 3: binarize the translations")
+        if (
+            not args.right_to_left1
+            or args.score_model2 is not None
+            and not args.right_to_left2
+            or not rerank1_is_gen
+        ):
+
+            if args.backwards1 or args.backwards2:
+                if args.backwards_score_dict_dir is not None:
+                    bw_dict = args.backwards_score_dict_dir
+                else:
+                    bw_dict = args.score_dict_dir
+                bw_preprocess_param = [
+                    "--source-lang",
+                    scorer1_src,
+                    "--target-lang",
+                    scorer1_tgt,
+                    "--trainpref",
+                    pre_gen + bw_rescore_file,
+                    "--srcdict",
+                    bw_dict + "/dict." + scorer1_src + ".txt",
+                    "--tgtdict",
+                    bw_dict + "/dict." + scorer1_tgt + ".txt",
+                    "--destdir",
+                    backwards_preprocessed_dir,
+                ]
+                preprocess_parser = options.get_preprocessing_parser()
+                input_args = preprocess_parser.parse_args(bw_preprocess_param)
+                preprocess.main(input_args)
+
+            preprocess_param = [
+                "--source-lang",
+                scorer1_src,
+                "--target-lang",
+                scorer1_tgt,
+                "--trainpref",
+                pre_gen + fw_rescore_file,
+                "--srcdict",
+                args.score_dict_dir + "/dict." + scorer1_src + ".txt",
+                "--tgtdict",
+                args.score_dict_dir + "/dict." + scorer1_tgt + ".txt",
+                "--destdir",
+                left_to_right_preprocessed_dir,
+            ]
+            preprocess_parser = options.get_preprocessing_parser()
+            input_args = preprocess_parser.parse_args(preprocess_param)
+            preprocess.main(input_args)
+
+        if args.right_to_left1 or args.right_to_left2:
+            preprocess_param = [
+                "--source-lang",
+                scorer1_src,
+                "--target-lang",
+                scorer1_tgt,
+                "--trainpref",
+                pre_gen + "/right_to_left_rescore_data",
+                "--srcdict",
+                args.score_dict_dir + "/dict." + scorer1_src + ".txt",
+                "--tgtdict",
+                args.score_dict_dir + "/dict." + scorer1_tgt + ".txt",
+                "--destdir",
+                right_to_left_preprocessed_dir,
+            ]
+            preprocess_parser = options.get_preprocessing_parser()
+            input_args = preprocess_parser.parse_args(preprocess_param)
+            preprocess.main(input_args)
+
+    return gen_output
+
+
+def cli_main():
+    parser = rerank_options.get_reranking_parser()
+    args = options.parse_args_and_arch(parser)
+    gen_and_reprocess_nbest(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/noisychannel/rerank_options.py b/fairseq/examples/noisychannel/rerank_options.py
new file mode 100644
index 0000000000000000000000000000000000000000..de91939e6635bdf33c9dc330116be07d9e8be6a2
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank_options.py
@@ -0,0 +1,149 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq import options
+
+
+def get_reranking_parser(default_task="translation"):
+    parser = options.get_parser("Generation and reranking", default_task)
+    add_reranking_args(parser)
+    return parser
+
+
+def get_tuning_parser(default_task="translation"):
+    parser = options.get_parser("Reranking tuning", default_task)
+    add_reranking_args(parser)
+    add_tuning_args(parser)
+    return parser
+
+
+def add_reranking_args(parser):
+    group = parser.add_argument_group("Reranking")
+    # fmt: off
+    group.add_argument('--score-model1', '-s1', type=str, metavar='FILE', required=True,
+                       help='path to first model or ensemble of models for rescoring')
+    group.add_argument('--score-model2', '-s2', type=str, metavar='FILE', required=False,
+                       help='path to second model or ensemble of models for rescoring')
+    group.add_argument('--num-rescore', '-n', type=int, metavar='N', default=10,
+                       help='the number of candidate hypothesis to rescore')
+    group.add_argument('-bz', '--batch-size', type=int, metavar='N', default=128,
+                       help='batch size for generating the nbest list')
+    group.add_argument('--gen-subset', default='test', metavar='SET', choices=['test', 'train', 'valid'],
+                       help='data subset to generate (train, valid, test)')
+    group.add_argument('--gen-model', default=None, metavar='FILE',
+                       help='the model to generate translations')
+    group.add_argument('-b1', '--backwards1', action='store_true',
+                       help='whether or not the first model group is backwards')
+    group.add_argument('-b2', '--backwards2', action='store_true',
+                       help='whether or not the second model group is backwards')
+    group.add_argument('-a', '--weight1', default=1, nargs='+', type=float,
+                       help='the weight(s) of the first model')
+    group.add_argument('-b', '--weight2', default=1, nargs='+', type=float,
+                       help='the weight(s) of the second model, or the gen model if using nbest from interactive.py')
+    group.add_argument('-c', '--weight3', default=1, nargs='+', type=float,
+                       help='the weight(s) of the third model')
+
+    # lm arguments
+    group.add_argument('-lm', '--language-model', default=None, metavar='FILE',
+                       help='language model for target language to rescore translations')
+    group.add_argument('--lm-dict', default=None, metavar='FILE',
+                       help='the dict of the language model for the target language')
+    group.add_argument('--lm-name', default=None,
+                       help='the name of the language model for the target language')
+    group.add_argument('--lm-bpe-code', default=None, metavar='FILE',
+                       help='the bpe code for the language model for the target language')
+    group.add_argument('--data-dir-name', default=None,
+                       help='name of data directory')
+    group.add_argument('--lenpen', default=1, nargs='+', type=float,
+                       help='length penalty: <1.0 favors shorter, >1.0 favors longer sentences')
+    group.add_argument('--score-dict-dir', default=None,
+                       help='the directory with dictionaries for the scoring models')
+    group.add_argument('--right-to-left1', action='store_true',
+                       help='whether the first model group is a right to left model')
+    group.add_argument('--right-to-left2', action='store_true',
+                       help='whether the second model group is a right to left model')
+    group.add_argument('--post-process', '--remove-bpe', default='@@ ',
+                       help='the bpe symbol, used for the bitext and LM')
+    group.add_argument('--prefix-len', default=None, type=int,
+                       help='the length of the target prefix to use in rescoring (in terms of words wo bpe)')
+    group.add_argument('--sampling', action='store_true',
+                       help='use sampling instead of beam search for generating n best list')
+    group.add_argument('--diff-bpe', action='store_true',
+                       help='bpe for rescoring and nbest list not the same')
+    group.add_argument('--rescore-bpe-code', default=None,
+                       help='bpe code for rescoring models')
+    group.add_argument('--nbest-list', default=None,
+                       help='use predefined nbest list in interactive.py format')
+    group.add_argument('--write-hypos', default=None,
+                       help='filename prefix to write hypos to')
+    group.add_argument('--ref-translation', default=None,
+                       help='reference translation to use with nbest list from interactive.py')
+    group.add_argument('--backwards-score-dict-dir', default=None,
+                       help='the directory with dictionaries for the backwards model,'
+                            'if None then it is assumed the fw and backwards models share dictionaries')
+
+    # extra scaling args
+    group.add_argument('--gen-model-name', default=None,
+                       help='the name of the models that generated the nbest list')
+    group.add_argument('--model1-name', default=None,
+                       help='the name of the set for model1 group ')
+    group.add_argument('--model2-name', default=None,
+                       help='the name of the set for model2 group')
+    group.add_argument('--shard-id', default=0, type=int,
+                       help='the id of the shard to generate')
+    group.add_argument('--num-shards', default=1, type=int,
+                       help='the number of shards to generate across')
+    group.add_argument('--all-shards', action='store_true',
+                       help='use all shards')
+    group.add_argument('--target-prefix-frac', default=None, type=float,
+                       help='the fraction of the target prefix to use in rescoring (in terms of words wo bpe)')
+    group.add_argument('--source-prefix-frac', default=None, type=float,
+                       help='the fraction of the source prefix to use in rescoring (in terms of words wo bpe)')
+    group.add_argument('--normalize', action='store_true',
+                       help='whether to normalize by src and target len')
+    # fmt: on
+    return group
+
+
+def add_tuning_args(parser):
+    group = parser.add_argument_group("Tuning")
+
+    group.add_argument(
+        "--lower-bound",
+        default=[-0.7],
+        nargs="+",
+        type=float,
+        help="lower bound of search space",
+    )
+    group.add_argument(
+        "--upper-bound",
+        default=[3],
+        nargs="+",
+        type=float,
+        help="upper bound of search space",
+    )
+    group.add_argument(
+        "--tune-param",
+        default=["lenpen"],
+        nargs="+",
+        choices=["lenpen", "weight1", "weight2", "weight3"],
+        help="the parameter(s) to tune",
+    )
+    group.add_argument(
+        "--tune-subset",
+        default="valid",
+        choices=["valid", "test", "train"],
+        help="the subset to tune on ",
+    )
+    group.add_argument(
+        "--num-trials",
+        default=1000,
+        type=int,
+        help="number of trials to do for random search",
+    )
+    group.add_argument(
+        "--share-weights", action="store_true", help="share weight2 and weight 3"
+    )
+    return group
diff --git a/fairseq/examples/noisychannel/rerank_score_bw.py b/fairseq/examples/noisychannel/rerank_score_bw.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0bc913651bd76667e25c214acb70f2bca19e185
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank_score_bw.py
@@ -0,0 +1,143 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from contextlib import redirect_stdout
+
+from fairseq import options
+from fairseq_cli import generate
+
+from examples.noisychannel import rerank_options, rerank_utils
+
+
+def score_bw(args):
+    if args.backwards1:
+        scorer1_src = args.target_lang
+        scorer1_tgt = args.source_lang
+    else:
+        scorer1_src = args.source_lang
+        scorer1_tgt = args.target_lang
+
+    if args.score_model2 is not None:
+        if args.backwards2:
+            scorer2_src = args.target_lang
+            scorer2_tgt = args.source_lang
+        else:
+            scorer2_src = args.source_lang
+            scorer2_tgt = args.target_lang
+
+    rerank1_is_gen = (
+        args.gen_model == args.score_model1 and args.source_prefix_frac is None
+    )
+    rerank2_is_gen = (
+        args.gen_model == args.score_model2 and args.source_prefix_frac is None
+    )
+
+    (
+        pre_gen,
+        left_to_right_preprocessed_dir,
+        right_to_left_preprocessed_dir,
+        backwards_preprocessed_dir,
+        lm_preprocessed_dir,
+    ) = rerank_utils.get_directories(
+        args.data_dir_name,
+        args.num_rescore,
+        args.gen_subset,
+        args.gen_model_name,
+        args.shard_id,
+        args.num_shards,
+        args.sampling,
+        args.prefix_len,
+        args.target_prefix_frac,
+        args.source_prefix_frac,
+    )
+
+    score1_file = rerank_utils.rescore_file_name(
+        pre_gen,
+        args.prefix_len,
+        args.model1_name,
+        target_prefix_frac=args.target_prefix_frac,
+        source_prefix_frac=args.source_prefix_frac,
+        backwards=args.backwards1,
+    )
+
+    if args.score_model2 is not None:
+        score2_file = rerank_utils.rescore_file_name(
+            pre_gen,
+            args.prefix_len,
+            args.model2_name,
+            target_prefix_frac=args.target_prefix_frac,
+            source_prefix_frac=args.source_prefix_frac,
+            backwards=args.backwards2,
+        )
+
+    if args.right_to_left1:
+        rerank_data1 = right_to_left_preprocessed_dir
+    elif args.backwards1:
+        rerank_data1 = backwards_preprocessed_dir
+    else:
+        rerank_data1 = left_to_right_preprocessed_dir
+
+    gen_param = ["--batch-size", str(128), "--score-reference", "--gen-subset", "train"]
+    if not rerank1_is_gen and not os.path.isfile(score1_file):
+        print("STEP 4: score the translations for model 1")
+
+        model_param1 = [
+            "--path",
+            args.score_model1,
+            "--source-lang",
+            scorer1_src,
+            "--target-lang",
+            scorer1_tgt,
+        ]
+        gen_model1_param = [rerank_data1] + gen_param + model_param1
+
+        gen_parser = options.get_generation_parser()
+        input_args = options.parse_args_and_arch(gen_parser, gen_model1_param)
+
+        with open(score1_file, "w") as f:
+            with redirect_stdout(f):
+                generate.main(input_args)
+
+    if (
+        args.score_model2 is not None
+        and not os.path.isfile(score2_file)
+        and not rerank2_is_gen
+    ):
+        print("STEP 4: score the translations for model 2")
+
+        if args.right_to_left2:
+            rerank_data2 = right_to_left_preprocessed_dir
+        elif args.backwards2:
+            rerank_data2 = backwards_preprocessed_dir
+        else:
+            rerank_data2 = left_to_right_preprocessed_dir
+
+        model_param2 = [
+            "--path",
+            args.score_model2,
+            "--source-lang",
+            scorer2_src,
+            "--target-lang",
+            scorer2_tgt,
+        ]
+        gen_model2_param = [rerank_data2] + gen_param + model_param2
+
+        gen_parser = options.get_generation_parser()
+        input_args = options.parse_args_and_arch(gen_parser, gen_model2_param)
+
+        with open(score2_file, "w") as f:
+            with redirect_stdout(f):
+                generate.main(input_args)
+
+
+def cli_main():
+    parser = rerank_options.get_reranking_parser()
+    args = options.parse_args_and_arch(parser)
+    score_bw(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/noisychannel/rerank_score_lm.py b/fairseq/examples/noisychannel/rerank_score_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80948d78b02561cbd09d72c319222105f41f6bb
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank_score_lm.py
@@ -0,0 +1,81 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+
+from fairseq import options
+
+from examples.noisychannel import rerank_options, rerank_utils
+
+
+def score_lm(args):
+    using_nbest = args.nbest_list is not None
+    (
+        pre_gen,
+        left_to_right_preprocessed_dir,
+        right_to_left_preprocessed_dir,
+        backwards_preprocessed_dir,
+        lm_preprocessed_dir,
+    ) = rerank_utils.get_directories(
+        args.data_dir_name,
+        args.num_rescore,
+        args.gen_subset,
+        args.gen_model_name,
+        args.shard_id,
+        args.num_shards,
+        args.sampling,
+        args.prefix_len,
+        args.target_prefix_frac,
+        args.source_prefix_frac,
+    )
+
+    predictions_bpe_file = pre_gen + "/generate_output_bpe.txt"
+    if using_nbest:
+        print("Using predefined n-best list from interactive.py")
+        predictions_bpe_file = args.nbest_list
+
+    gen_output = rerank_utils.BitextOutputFromGen(
+        predictions_bpe_file, bpe_symbol=args.post_process, nbest=using_nbest
+    )
+
+    if args.language_model is not None:
+        lm_score_file = rerank_utils.rescore_file_name(
+            pre_gen, args.prefix_len, args.lm_name, lm_file=True
+        )
+
+    if args.language_model is not None and not os.path.isfile(lm_score_file):
+        print("STEP 4.5: language modeling for P(T)")
+        if args.lm_bpe_code is None:
+            bpe_status = "no bpe"
+        elif args.lm_bpe_code == "shared":
+            bpe_status = "shared"
+        else:
+            bpe_status = "different"
+
+        rerank_utils.lm_scoring(
+            lm_preprocessed_dir,
+            bpe_status,
+            gen_output,
+            pre_gen,
+            args.lm_dict,
+            args.lm_name,
+            args.language_model,
+            args.lm_bpe_code,
+            128,
+            lm_score_file,
+            args.target_lang,
+            args.source_lang,
+            prefix_len=args.prefix_len,
+        )
+
+
+def cli_main():
+    parser = rerank_options.get_reranking_parser()
+    args = options.parse_args_and_arch(parser)
+    score_lm(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/noisychannel/rerank_tune.py b/fairseq/examples/noisychannel/rerank_tune.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2e8b7594a370b2462f77252d54d7ef80e290f7c
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank_tune.py
@@ -0,0 +1,102 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import random
+
+import numpy as np
+from fairseq import options
+
+from examples.noisychannel import rerank, rerank_options
+
+
+def random_search(args):
+    param_values = []
+    tuneable_parameters = ["lenpen", "weight1", "weight2", "weight3"]
+    initial_params = [args.lenpen, args.weight1, args.weight2, args.weight3]
+    for i, elem in enumerate(initial_params):
+        if type(elem) is not list:
+            initial_params[i] = [elem]
+        else:
+            initial_params[i] = elem
+
+    tune_parameters = args.tune_param.copy()
+    for i in range(len(args.tune_param)):
+        assert args.upper_bound[i] >= args.lower_bound[i]
+        index = tuneable_parameters.index(args.tune_param[i])
+        del tuneable_parameters[index]
+        del initial_params[index]
+
+    tune_parameters += tuneable_parameters
+    param_values += initial_params
+    random.seed(args.seed)
+
+    random_params = np.array(
+        [
+            [
+                random.uniform(args.lower_bound[i], args.upper_bound[i])
+                for i in range(len(args.tune_param))
+            ]
+            for k in range(args.num_trials)
+        ]
+    )
+    set_params = np.array(
+        [
+            [initial_params[i][0] for i in range(len(tuneable_parameters))]
+            for k in range(args.num_trials)
+        ]
+    )
+    random_params = np.concatenate((random_params, set_params), 1)
+
+    rerank_args = vars(args).copy()
+    if args.nbest_list:
+        rerank_args["gen_subset"] = "test"
+    else:
+        rerank_args["gen_subset"] = args.tune_subset
+
+    for k in range(len(tune_parameters)):
+        rerank_args[tune_parameters[k]] = list(random_params[:, k])
+
+    if args.share_weights:
+        k = tune_parameters.index("weight2")
+        rerank_args["weight3"] = list(random_params[:, k])
+
+    rerank_args = argparse.Namespace(**rerank_args)
+    best_lenpen, best_weight1, best_weight2, best_weight3, best_score = rerank.rerank(
+        rerank_args
+    )
+    rerank_args = vars(args).copy()
+    rerank_args["lenpen"] = [best_lenpen]
+    rerank_args["weight1"] = [best_weight1]
+    rerank_args["weight2"] = [best_weight2]
+    rerank_args["weight3"] = [best_weight3]
+
+    # write the hypothesis from the valid set from the best trial
+
+    if args.gen_subset != "valid":
+        rerank_args["gen_subset"] = "valid"
+        rerank_args = argparse.Namespace(**rerank_args)
+        rerank.rerank(rerank_args)
+
+    # test with the best hyperparameters on gen subset
+    rerank_args = vars(args).copy()
+    rerank_args["gen_subset"] = args.gen_subset
+    rerank_args["lenpen"] = [best_lenpen]
+    rerank_args["weight1"] = [best_weight1]
+    rerank_args["weight2"] = [best_weight2]
+    rerank_args["weight3"] = [best_weight3]
+    rerank_args = argparse.Namespace(**rerank_args)
+    rerank.rerank(rerank_args)
+
+
+def cli_main():
+    parser = rerank_options.get_tuning_parser()
+    args = options.parse_args_and_arch(parser)
+
+    random_search(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/noisychannel/rerank_utils.py b/fairseq/examples/noisychannel/rerank_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c6bf1b1afbb089cf5e84f720eb7a067479fbcbc
--- /dev/null
+++ b/fairseq/examples/noisychannel/rerank_utils.py
@@ -0,0 +1,850 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import os
+import re
+import subprocess
+from contextlib import redirect_stdout
+
+from fairseq import options
+from fairseq_cli import eval_lm, preprocess
+
+
+def reprocess(fle):
+    # takes in a file of generate.py translation generate_output
+    # returns a source dict and hypothesis dict, where keys are the ID num (as a string)
+    # and values and the corresponding source and translation. There may be several translations
+    # per source, so the values for hypothesis_dict are lists.
+    # parses output of generate.py
+
+    with open(fle, "r") as f:
+        txt = f.read()
+
+    """reprocess generate.py output"""
+    p = re.compile(r"[STHP][-]\d+\s*")
+    hp = re.compile(r"(\s*[-]?\d+[.]?\d+\s*)|(\s*(-inf)\s*)")
+    source_dict = {}
+    hypothesis_dict = {}
+    score_dict = {}
+    target_dict = {}
+    pos_score_dict = {}
+    lines = txt.split("\n")
+
+    for line in lines:
+        line += "\n"
+        prefix = re.search(p, line)
+        if prefix is not None:
+            assert len(prefix.group()) > 2, "prefix id not found"
+            _, j = prefix.span()
+            id_num = prefix.group()[2:]
+            id_num = int(id_num)
+            line_type = prefix.group()[0]
+            if line_type == "H":
+                h_txt = line[j:]
+                hypo = re.search(hp, h_txt)
+                assert (
+                    hypo is not None
+                ), "regular expression failed to find the hypothesis scoring"
+                _, i = hypo.span()
+                score = hypo.group()
+                if id_num in hypothesis_dict:
+                    hypothesis_dict[id_num].append(h_txt[i:])
+                    score_dict[id_num].append(float(score))
+                else:
+                    hypothesis_dict[id_num] = [h_txt[i:]]
+                    score_dict[id_num] = [float(score)]
+
+            elif line_type == "S":
+                source_dict[id_num] = line[j:]
+            elif line_type == "T":
+                target_dict[id_num] = line[j:]
+            elif line_type == "P":
+                pos_scores = (line[j:]).split()
+                pos_scores = [float(x) for x in pos_scores]
+                if id_num in pos_score_dict:
+                    pos_score_dict[id_num].append(pos_scores)
+                else:
+                    pos_score_dict[id_num] = [pos_scores]
+
+    return source_dict, hypothesis_dict, score_dict, target_dict, pos_score_dict
+
+
+def reprocess_nbest(fle):
+    """reprocess interactive.py output"""
+    with open(fle, "r") as f:
+        txt = f.read()
+
+    source_dict = {}
+    hypothesis_dict = {}
+    score_dict = {}
+    target_dict = {}
+    pos_score_dict = {}
+    lines = txt.split("\n")
+
+    hp = re.compile(r"[-]?\d+[.]?\d+")
+    j = -1
+
+    for _i, line in enumerate(lines):
+        line += "\n"
+        line_type = line[0]
+
+        if line_type == "H":
+            hypo = re.search(hp, line)
+            _, start_index = hypo.span()
+            score = hypo.group()
+            if j in score_dict:
+                score_dict[j].append(float(score))
+                hypothesis_dict[j].append(line[start_index:].strip("\t"))
+            else:
+                score_dict[j] = [float(score)]
+                hypothesis_dict[j] = [line[start_index:].strip("\t")]
+        elif line_type == "O":
+            j += 1
+            source_dict[j] = line[2:]
+            # we don't have the targets for interactive.py
+            target_dict[j] = "filler"
+
+        elif line_type == "P":
+            pos_scores = [float(pos_score) for pos_score in line.split()[1:]]
+            if j in pos_score_dict:
+                pos_score_dict[j].append(pos_scores)
+            else:
+                pos_score_dict[j] = [pos_scores]
+
+    assert source_dict.keys() == hypothesis_dict.keys()
+    assert source_dict.keys() == pos_score_dict.keys()
+    assert source_dict.keys() == score_dict.keys()
+
+    return source_dict, hypothesis_dict, score_dict, target_dict, pos_score_dict
+
+
+def write_reprocessed(
+    sources,
+    hypos,
+    targets,
+    source_outfile,
+    hypo_outfile,
+    target_outfile,
+    right_to_left=False,
+    prefix_len=None,
+    bpe_symbol=None,
+    target_prefix_frac=None,
+    source_prefix_frac=None,
+):
+
+    """writes nbest hypothesis for rescoring"""
+    assert not (
+        prefix_len is not None and target_prefix_frac is not None
+    ), "in writing reprocessed, only one type of prefix may be used"
+    assert not (
+        prefix_len is not None and source_prefix_frac is not None
+    ), "in writing reprocessed, only one type of prefix may be used"
+    assert not (
+        target_prefix_frac is not None and source_prefix_frac is not None
+    ), "in writing reprocessed, only one type of prefix may be used"
+
+    with open(source_outfile, "w") as source_file, open(
+        hypo_outfile, "w"
+    ) as hypo_file, open(target_outfile, "w") as target_file:
+
+        assert len(sources) == len(hypos), "sources and hypos list length mismatch"
+        if right_to_left:
+            for i in range(len(sources)):
+                for j in range(len(hypos[i])):
+                    if prefix_len is None:
+                        hypo_file.write(make_right_to_left(hypos[i][j]) + "\n")
+                    else:
+                        raise NotImplementedError()
+                    source_file.write(make_right_to_left(sources[i]) + "\n")
+                    target_file.write(make_right_to_left(targets[i]) + "\n")
+        else:
+            for i in sorted(sources.keys()):
+                for j in range(len(hypos[i])):
+                    if prefix_len is not None:
+                        shortened = (
+                            get_prefix_no_bpe(hypos[i][j], bpe_symbol, prefix_len)
+                            + "\n"
+                        )
+                        hypo_file.write(shortened)
+                        source_file.write(sources[i])
+                        target_file.write(targets[i])
+                    elif target_prefix_frac is not None:
+                        num_words, shortened, num_bpe_tokens = calc_length_from_frac(
+                            hypos[i][j], target_prefix_frac, bpe_symbol
+                        )
+                        shortened += "\n"
+                        hypo_file.write(shortened)
+                        source_file.write(sources[i])
+                        target_file.write(targets[i])
+                    elif source_prefix_frac is not None:
+                        num_words, shortened, num_bpe_tokensn = calc_length_from_frac(
+                            sources[i], source_prefix_frac, bpe_symbol
+                        )
+                        shortened += "\n"
+                        hypo_file.write(hypos[i][j])
+                        source_file.write(shortened)
+                        target_file.write(targets[i])
+                    else:
+                        hypo_file.write(hypos[i][j])
+                        source_file.write(sources[i])
+                        target_file.write(targets[i])
+
+
+def calc_length_from_frac(bpe_sentence, prefix_frac, bpe_symbol):
+    # return number of words, (not bpe tokens) that we want
+    no_bpe_sen = remove_bpe(bpe_sentence, bpe_symbol)
+    len_sen = len(no_bpe_sen.split())
+
+    num_words = math.ceil(len_sen * prefix_frac)
+    prefix = get_prefix_no_bpe(bpe_sentence, bpe_symbol, num_words)
+    num_bpe_tokens = len(prefix.split())
+    return num_words, prefix, num_bpe_tokens
+
+
+def get_prefix(sentence, prefix_len):
+    """assuming no bpe, gets the prefix of the sentence with prefix_len words"""
+    tokens = sentence.strip("\n").split()
+    if prefix_len >= len(tokens):
+        return sentence.strip("\n")
+    else:
+        return " ".join(tokens[:prefix_len])
+
+
+def get_prefix_no_bpe(sentence, bpe_symbol, prefix_len):
+    if bpe_symbol is None:
+        return get_prefix(sentence, prefix_len)
+    else:
+        return " ".join(get_prefix_from_len(sentence.split(), bpe_symbol, prefix_len))
+
+
+def get_prefix_from_len(sentence, bpe_symbol, prefix_len):
+    """get the prefix of sentence with bpe, with prefix len in terms of words, not bpe tokens"""
+    bpe_count = sum([bpe_symbol.strip(" ") in t for t in sentence[:prefix_len]])
+    if bpe_count == 0:
+        return sentence[:prefix_len]
+    else:
+        return sentence[:prefix_len] + get_prefix_from_len(
+            sentence[prefix_len:], bpe_symbol, bpe_count
+        )
+
+
+def get_num_bpe_tokens_from_len(sentence, bpe_symbol, prefix_len):
+    """given a prefix length in terms of words, return the number of bpe tokens"""
+    prefix = get_prefix_no_bpe(sentence, bpe_symbol, prefix_len)
+    assert len(remove_bpe(prefix, bpe_symbol).split()) <= prefix_len
+    return len(prefix.split(" "))
+
+
+def make_right_to_left(line):
+    tokens = line.split()
+    tokens.reverse()
+    new_line = " ".join(tokens)
+    return new_line
+
+
+def remove_bpe(line, bpe_symbol):
+    line = line.replace("\n", "")
+    line = (line + " ").replace(bpe_symbol, "").rstrip()
+    return line + ("\n")
+
+
+def remove_bpe_dict(pred_dict, bpe_symbol):
+    new_dict = {}
+    for i in pred_dict:
+        if type(pred_dict[i]) == list:
+            new_list = [remove_bpe(elem, bpe_symbol) for elem in pred_dict[i]]
+            new_dict[i] = new_list
+        else:
+            new_dict[i] = remove_bpe(pred_dict[i], bpe_symbol)
+    return new_dict
+
+
+def parse_bleu_scoring(line):
+    p = re.compile(r"(BLEU4 = )\d+[.]\d+")
+    res = re.search(p, line)
+    assert res is not None, line
+    return float(res.group()[8:])
+
+
+def get_full_from_prefix(hypo_prefix, hypos):
+    """given a hypo prefix, recover the first hypo from the list of complete hypos beginning with that prefix"""
+    for hypo in hypos:
+        hypo_prefix = hypo_prefix.strip("\n")
+        len_prefix = len(hypo_prefix)
+        if hypo[:len_prefix] == hypo_prefix:
+            return hypo
+    # no match found
+    raise Exception()
+
+
+def get_score(
+    a,
+    b,
+    c,
+    target_len,
+    bitext_score1,
+    bitext_score2=None,
+    lm_score=None,
+    lenpen=None,
+    src_len=None,
+    tgt_len=None,
+    bitext1_backwards=False,
+    bitext2_backwards=False,
+    normalize=False,
+):
+    if bitext1_backwards:
+        bitext1_norm = src_len
+    else:
+        bitext1_norm = tgt_len
+    if bitext_score2 is not None:
+        if bitext2_backwards:
+            bitext2_norm = src_len
+        else:
+            bitext2_norm = tgt_len
+    else:
+        bitext2_norm = 1
+        bitext_score2 = 0
+    if normalize:
+        score = (
+            a * bitext_score1 / bitext1_norm
+            + b * bitext_score2 / bitext2_norm
+            + c * lm_score / src_len
+        )
+    else:
+        score = a * bitext_score1 + b * bitext_score2 + c * lm_score
+
+    if lenpen is not None:
+        score /= (target_len) ** float(lenpen)
+
+    return score
+
+
+class BitextOutput(object):
+    def __init__(
+        self,
+        output_file,
+        backwards,
+        right_to_left,
+        bpe_symbol,
+        prefix_len=None,
+        target_prefix_frac=None,
+        source_prefix_frac=None,
+    ):
+        """process output from rescoring"""
+        source, hypo, score, target, pos_score = reprocess(output_file)
+        if backwards:
+            self.hypo_fracs = source_prefix_frac
+        else:
+            self.hypo_fracs = target_prefix_frac
+
+        # remove length penalty so we can use raw scores
+        score, num_bpe_tokens = get_score_from_pos(
+            pos_score, prefix_len, hypo, bpe_symbol, self.hypo_fracs, backwards
+        )
+        source_lengths = {}
+        target_lengths = {}
+
+        assert hypo.keys() == source.keys(), "key mismatch"
+        if backwards:
+            tmp = hypo
+            hypo = source
+            source = tmp
+        for i in source:
+            # since we are reranking, there should only be one hypo per source sentence
+            if backwards:
+                len_src = len(source[i][0].split())
+                # record length without <eos>
+                if len_src == num_bpe_tokens[i][0] - 1:
+                    source_lengths[i] = num_bpe_tokens[i][0] - 1
+                else:
+                    source_lengths[i] = num_bpe_tokens[i][0]
+
+                target_lengths[i] = len(hypo[i].split())
+
+                source[i] = remove_bpe(source[i][0], bpe_symbol)
+                target[i] = remove_bpe(target[i], bpe_symbol)
+                hypo[i] = remove_bpe(hypo[i], bpe_symbol)
+
+                score[i] = float(score[i][0])
+                pos_score[i] = pos_score[i][0]
+
+            else:
+                len_tgt = len(hypo[i][0].split())
+                # record length without <eos>
+                if len_tgt == num_bpe_tokens[i][0] - 1:
+                    target_lengths[i] = num_bpe_tokens[i][0] - 1
+                else:
+                    target_lengths[i] = num_bpe_tokens[i][0]
+
+                source_lengths[i] = len(source[i].split())
+
+                if right_to_left:
+                    source[i] = remove_bpe(make_right_to_left(source[i]), bpe_symbol)
+                    target[i] = remove_bpe(make_right_to_left(target[i]), bpe_symbol)
+                    hypo[i] = remove_bpe(make_right_to_left(hypo[i][0]), bpe_symbol)
+                    score[i] = float(score[i][0])
+                    pos_score[i] = pos_score[i][0]
+                else:
+                    assert (
+                        len(hypo[i]) == 1
+                    ), "expected only one hypothesis per source sentence"
+                    source[i] = remove_bpe(source[i], bpe_symbol)
+                    target[i] = remove_bpe(target[i], bpe_symbol)
+                    hypo[i] = remove_bpe(hypo[i][0], bpe_symbol)
+                    score[i] = float(score[i][0])
+                    pos_score[i] = pos_score[i][0]
+
+        self.rescore_source = source
+        self.rescore_hypo = hypo
+        self.rescore_score = score
+        self.rescore_target = target
+        self.rescore_pos_score = pos_score
+        self.backwards = backwards
+        self.right_to_left = right_to_left
+        self.target_lengths = target_lengths
+        self.source_lengths = source_lengths
+
+
+class BitextOutputFromGen(object):
+    def __init__(
+        self,
+        predictions_bpe_file,
+        bpe_symbol=None,
+        nbest=False,
+        prefix_len=None,
+        target_prefix_frac=None,
+    ):
+        if nbest:
+            (
+                pred_source,
+                pred_hypo,
+                pred_score,
+                pred_target,
+                pred_pos_score,
+            ) = reprocess_nbest(predictions_bpe_file)
+        else:
+            pred_source, pred_hypo, pred_score, pred_target, pred_pos_score = reprocess(
+                predictions_bpe_file
+            )
+
+        assert len(pred_source) == len(pred_hypo)
+        assert len(pred_source) == len(pred_score)
+        assert len(pred_source) == len(pred_target)
+        assert len(pred_source) == len(pred_pos_score)
+
+        # remove length penalty so we can use raw scores
+        pred_score, num_bpe_tokens = get_score_from_pos(
+            pred_pos_score, prefix_len, pred_hypo, bpe_symbol, target_prefix_frac, False
+        )
+
+        self.source = pred_source
+        self.target = pred_target
+        self.score = pred_score
+        self.pos_score = pred_pos_score
+        self.hypo = pred_hypo
+        self.target_lengths = {}
+        self.source_lengths = {}
+
+        self.no_bpe_source = remove_bpe_dict(pred_source.copy(), bpe_symbol)
+        self.no_bpe_hypo = remove_bpe_dict(pred_hypo.copy(), bpe_symbol)
+        self.no_bpe_target = remove_bpe_dict(pred_target.copy(), bpe_symbol)
+
+        # indexes to match those from the rescoring models
+        self.rescore_source = {}
+        self.rescore_target = {}
+        self.rescore_pos_score = {}
+        self.rescore_hypo = {}
+        self.rescore_score = {}
+        self.num_hypos = {}
+        self.backwards = False
+        self.right_to_left = False
+
+        index = 0
+
+        for i in sorted(pred_source.keys()):
+            for j in range(len(pred_hypo[i])):
+
+                self.target_lengths[index] = len(self.hypo[i][j].split())
+                self.source_lengths[index] = len(self.source[i].split())
+
+                self.rescore_source[index] = self.no_bpe_source[i]
+                self.rescore_target[index] = self.no_bpe_target[i]
+                self.rescore_hypo[index] = self.no_bpe_hypo[i][j]
+                self.rescore_score[index] = float(pred_score[i][j])
+                self.rescore_pos_score[index] = pred_pos_score[i][j]
+                self.num_hypos[index] = len(pred_hypo[i])
+                index += 1
+
+
+def get_score_from_pos(
+    pos_score_dict, prefix_len, hypo_dict, bpe_symbol, hypo_frac, backwards
+):
+    score_dict = {}
+    num_bpe_tokens_dict = {}
+    assert prefix_len is None or hypo_frac is None
+    for key in pos_score_dict:
+        score_dict[key] = []
+        num_bpe_tokens_dict[key] = []
+        for i in range(len(pos_score_dict[key])):
+            if prefix_len is not None and not backwards:
+                num_bpe_tokens = get_num_bpe_tokens_from_len(
+                    hypo_dict[key][i], bpe_symbol, prefix_len
+                )
+                score_dict[key].append(sum(pos_score_dict[key][i][:num_bpe_tokens]))
+                num_bpe_tokens_dict[key].append(num_bpe_tokens)
+            elif hypo_frac is not None:
+                num_words, shortened, hypo_prefix_len = calc_length_from_frac(
+                    hypo_dict[key][i], hypo_frac, bpe_symbol
+                )
+                score_dict[key].append(sum(pos_score_dict[key][i][:hypo_prefix_len]))
+                num_bpe_tokens_dict[key].append(hypo_prefix_len)
+            else:
+                score_dict[key].append(sum(pos_score_dict[key][i]))
+                num_bpe_tokens_dict[key].append(len(pos_score_dict[key][i]))
+    return score_dict, num_bpe_tokens_dict
+
+
+class LMOutput(object):
+    def __init__(
+        self,
+        lm_score_file,
+        lm_dict=None,
+        prefix_len=None,
+        bpe_symbol=None,
+        target_prefix_frac=None,
+    ):
+        (
+            lm_sentences,
+            lm_sen_scores,
+            lm_sen_pos_scores,
+            lm_no_bpe_sentences,
+            lm_bpe_tokens,
+        ) = parse_lm(
+            lm_score_file,
+            prefix_len=prefix_len,
+            bpe_symbol=bpe_symbol,
+            target_prefix_frac=target_prefix_frac,
+        )
+
+        self.sentences = lm_sentences
+        self.score = lm_sen_scores
+        self.pos_score = lm_sen_pos_scores
+        self.lm_dict = lm_dict
+        self.no_bpe_sentences = lm_no_bpe_sentences
+        self.bpe_tokens = lm_bpe_tokens
+
+
+def parse_lm(input_file, prefix_len=None, bpe_symbol=None, target_prefix_frac=None):
+    """parse output of eval_lm"""
+    with open(input_file, "r") as f:
+        text = f.readlines()
+        text = text[7:]
+        cleaned_text = text[:-2]
+
+        sentences = {}
+        sen_scores = {}
+        sen_pos_scores = {}
+        no_bpe_sentences = {}
+        num_bpe_tokens_dict = {}
+        for _i, line in enumerate(cleaned_text):
+            tokens = line.split()
+            if tokens[0].isdigit():
+                line_id = int(tokens[0])
+                scores = [float(x[1:-1]) for x in tokens[2::2]]
+                sentences[line_id] = " ".join(tokens[1::2][:-1]) + "\n"
+                if bpe_symbol is not None:
+                    # exclude <eos> symbol to match output from generate.py
+                    bpe_sen = " ".join(tokens[1::2][:-1]) + "\n"
+                    no_bpe_sen = remove_bpe(bpe_sen, bpe_symbol)
+                    no_bpe_sentences[line_id] = no_bpe_sen
+
+                if prefix_len is not None:
+                    num_bpe_tokens = get_num_bpe_tokens_from_len(
+                        bpe_sen, bpe_symbol, prefix_len
+                    )
+                    sen_scores[line_id] = sum(scores[:num_bpe_tokens])
+                    num_bpe_tokens_dict[line_id] = num_bpe_tokens
+                elif target_prefix_frac is not None:
+                    num_words, shortened, target_prefix_len = calc_length_from_frac(
+                        bpe_sen, target_prefix_frac, bpe_symbol
+                    )
+                    sen_scores[line_id] = sum(scores[:target_prefix_len])
+                    num_bpe_tokens_dict[line_id] = target_prefix_len
+                else:
+                    sen_scores[line_id] = sum(scores)
+                    num_bpe_tokens_dict[line_id] = len(scores)
+
+                sen_pos_scores[line_id] = scores
+
+    return sentences, sen_scores, sen_pos_scores, no_bpe_sentences, num_bpe_tokens_dict
+
+
+def get_directories(
+    data_dir_name,
+    num_rescore,
+    gen_subset,
+    fw_name,
+    shard_id,
+    num_shards,
+    sampling=False,
+    prefix_len=None,
+    target_prefix_frac=None,
+    source_prefix_frac=None,
+):
+    nbest_file_id = (
+        "nbest_"
+        + str(num_rescore)
+        + "_subset_"
+        + gen_subset
+        + "_fw_name_"
+        + fw_name
+        + "_shard_"
+        + str(shard_id)
+        + "_of_"
+        + str(num_shards)
+    )
+
+    if sampling:
+        nbest_file_id += "_sampling"
+
+    # the directory containing all information for this nbest list
+    pre_gen = (
+        os.path.join(os.path.dirname(__file__))
+        + "/rerank_data/"
+        + data_dir_name
+        + "/"
+        + nbest_file_id
+    )
+    # the directory to store the preprocessed nbest list, for left to right rescoring
+    left_to_right_preprocessed_dir = pre_gen + "/left_to_right_preprocessed"
+    if source_prefix_frac is not None:
+        left_to_right_preprocessed_dir = (
+            left_to_right_preprocessed_dir + "/prefix_frac" + str(source_prefix_frac)
+        )
+    # the directory to store the preprocessed nbest list, for right to left rescoring
+    right_to_left_preprocessed_dir = pre_gen + "/right_to_left_preprocessed"
+    # the directory to store the preprocessed nbest list, for backwards rescoring
+    backwards_preprocessed_dir = pre_gen + "/backwards"
+    if target_prefix_frac is not None:
+        backwards_preprocessed_dir = (
+            backwards_preprocessed_dir + "/prefix_frac" + str(target_prefix_frac)
+        )
+    elif prefix_len is not None:
+        backwards_preprocessed_dir = (
+            backwards_preprocessed_dir + "/prefix_" + str(prefix_len)
+        )
+
+    # the directory to store the preprocessed nbest list, for rescoring with P(T)
+    lm_preprocessed_dir = pre_gen + "/lm_preprocessed"
+
+    return (
+        pre_gen,
+        left_to_right_preprocessed_dir,
+        right_to_left_preprocessed_dir,
+        backwards_preprocessed_dir,
+        lm_preprocessed_dir,
+    )
+
+
+def lm_scoring(
+    preprocess_directory,
+    bpe_status,
+    gen_output,
+    pre_gen,
+    cur_lm_dict,
+    cur_lm_name,
+    cur_language_model,
+    cur_lm_bpe_code,
+    batch_size,
+    lm_score_file,
+    target_lang,
+    source_lang,
+    prefix_len=None,
+):
+    if prefix_len is not None:
+        assert (
+            bpe_status == "different"
+        ), "bpe status must be different to use prefix len"
+    if bpe_status == "no bpe":
+        # run lm on output without bpe
+        write_reprocessed(
+            gen_output.no_bpe_source,
+            gen_output.no_bpe_hypo,
+            gen_output.no_bpe_target,
+            pre_gen + "/rescore_data_no_bpe.de",
+            pre_gen + "/rescore_data_no_bpe.en",
+            pre_gen + "/reference_file_no_bpe",
+        )
+
+        preprocess_lm_param = [
+            "--only-source",
+            "--trainpref",
+            pre_gen + "/rescore_data_no_bpe." + target_lang,
+            "--srcdict",
+            cur_lm_dict,
+            "--destdir",
+            preprocess_directory,
+        ]
+        preprocess_parser = options.get_preprocessing_parser()
+        input_args = preprocess_parser.parse_args(preprocess_lm_param)
+        preprocess.main(input_args)
+
+        eval_lm_param = [
+            preprocess_directory,
+            "--path",
+            cur_language_model,
+            "--output-word-probs",
+            "--batch-size",
+            str(batch_size),
+            "--max-tokens",
+            "1024",
+            "--sample-break-mode",
+            "eos",
+            "--gen-subset",
+            "train",
+        ]
+
+        eval_lm_parser = options.get_eval_lm_parser()
+        input_args = options.parse_args_and_arch(eval_lm_parser, eval_lm_param)
+
+        with open(lm_score_file, "w") as f:
+            with redirect_stdout(f):
+                eval_lm.main(input_args)
+
+    elif bpe_status == "shared":
+        preprocess_lm_param = [
+            "--only-source",
+            "--trainpref",
+            pre_gen + "/rescore_data." + target_lang,
+            "--srcdict",
+            cur_lm_dict,
+            "--destdir",
+            preprocess_directory,
+        ]
+        preprocess_parser = options.get_preprocessing_parser()
+        input_args = preprocess_parser.parse_args(preprocess_lm_param)
+        preprocess.main(input_args)
+
+        eval_lm_param = [
+            preprocess_directory,
+            "--path",
+            cur_language_model,
+            "--output-word-probs",
+            "--batch-size",
+            str(batch_size),
+            "--sample-break-mode",
+            "eos",
+            "--gen-subset",
+            "train",
+        ]
+
+        eval_lm_parser = options.get_eval_lm_parser()
+        input_args = options.parse_args_and_arch(eval_lm_parser, eval_lm_param)
+
+        with open(lm_score_file, "w") as f:
+            with redirect_stdout(f):
+                eval_lm.main(input_args)
+
+    elif bpe_status == "different":
+        rescore_file = pre_gen + "/rescore_data_no_bpe"
+        rescore_bpe = pre_gen + "/rescore_data_new_bpe"
+
+        rescore_file += "."
+        rescore_bpe += "."
+
+        write_reprocessed(
+            gen_output.no_bpe_source,
+            gen_output.no_bpe_hypo,
+            gen_output.no_bpe_target,
+            rescore_file + source_lang,
+            rescore_file + target_lang,
+            pre_gen + "/reference_file_no_bpe",
+            bpe_symbol=None,
+        )
+
+        # apply LM bpe to nbest list
+        bpe_src_param = [
+            "-c",
+            cur_lm_bpe_code,
+            "--input",
+            rescore_file + target_lang,
+            "--output",
+            rescore_bpe + target_lang,
+        ]
+        subprocess.call(
+            [
+                "python",
+                os.path.join(
+                    os.path.dirname(__file__), "subword-nmt/subword_nmt/apply_bpe.py"
+                ),
+            ]
+            + bpe_src_param,
+            shell=False,
+        )
+        # uncomment to use fastbpe instead of subword-nmt bpe
+        # bpe_src_param = [rescore_bpe+target_lang, rescore_file+target_lang, cur_lm_bpe_code]
+        # subprocess.call(["/private/home/edunov/fastBPE/fast", "applybpe"] + bpe_src_param, shell=False)
+
+        preprocess_dir = preprocess_directory
+
+        preprocess_lm_param = [
+            "--only-source",
+            "--trainpref",
+            rescore_bpe + target_lang,
+            "--srcdict",
+            cur_lm_dict,
+            "--destdir",
+            preprocess_dir,
+        ]
+        preprocess_parser = options.get_preprocessing_parser()
+        input_args = preprocess_parser.parse_args(preprocess_lm_param)
+        preprocess.main(input_args)
+
+        eval_lm_param = [
+            preprocess_dir,
+            "--path",
+            cur_language_model,
+            "--output-word-probs",
+            "--batch-size",
+            str(batch_size),
+            "--max-tokens",
+            "1024",
+            "--sample-break-mode",
+            "eos",
+            "--gen-subset",
+            "train",
+        ]
+
+        eval_lm_parser = options.get_eval_lm_parser()
+        input_args = options.parse_args_and_arch(eval_lm_parser, eval_lm_param)
+
+        with open(lm_score_file, "w") as f:
+            with redirect_stdout(f):
+                eval_lm.main(input_args)
+
+
+def rescore_file_name(
+    nbest_dir,
+    prefix_len,
+    scorer_name,
+    lm_file=False,
+    target_prefix_frac=None,
+    source_prefix_frac=None,
+    backwards=None,
+):
+    if lm_file:
+        score_file = nbest_dir + "/lm_score_translations_model_" + scorer_name + ".txt"
+    else:
+        score_file = nbest_dir + "/" + scorer_name + "_score_translations.txt"
+    if backwards:
+        if prefix_len is not None:
+            score_file += "prefix_len" + str(prefix_len)
+        elif target_prefix_frac is not None:
+            score_file += "target_prefix_frac" + str(target_prefix_frac)
+    else:
+        if source_prefix_frac is not None:
+            score_file += "source_prefix_frac" + str(source_prefix_frac)
+    return score_file
diff --git a/fairseq/examples/nonautoregressive_translation/README.md b/fairseq/examples/nonautoregressive_translation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8793e225c99732c42c9c19e22075cde37c73341d
--- /dev/null
+++ b/fairseq/examples/nonautoregressive_translation/README.md
@@ -0,0 +1,146 @@
+# Non-autoregressive Neural Machine Translation (NAT)
+
+This page mainly includes instructions for reproducing results from the following papers
+* [Levenshtein Transformer (Gu et al., 2019)](https://arxiv.org/abs/1905.11006).
+* [Understanding Knowledge Distillation in Non-autoregressive Machine Translation (Zhou et al., 2019)](https://arxiv.org/abs/1911.02727).
+
+We also provided our own implementations for several popular non-autoregressive-based models as reference:<br>
+* [Non-Autoregressive Neural Machine Translation (Gu et al., 2017)](https://arxiv.org/abs/1711.02281)<br>
+* [Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement (Lee et al., 2018)](https://arxiv.org/abs/1802.06901)<br>
+* [Insertion Transformer: Flexible Sequence Generation via Insertion Operations (Stern et al., 2019)](https://arxiv.org/abs/1902.03249)<br>
+* [Mask-Predict: Parallel Decoding of Conditional Masked Language Models (Ghazvininejad et al., 2019)](https://arxiv.org/abs/1904.09324v2)<br>
+* [Fast Structured Decoding for Sequence Models (Sun et al., 2019)](https://arxiv.org/abs/1910.11555)
+
+## Dataset
+
+First, follow the [instructions to download and preprocess the WMT'14 En-De dataset](../translation#wmt14-english-to-german-convolutional).
+Make sure to learn a joint vocabulary by passing the `--joined-dictionary` option to `fairseq-preprocess`.
+
+### Knowledge Distillation
+Following [Gu et al. 2019](https://arxiv.org/abs/1905.11006), [knowledge distillation](https://arxiv.org/abs/1606.07947) from an autoregressive model can effectively simplify the training data distribution, which is sometimes essential for NAT-based models to learn good translations.
+The easiest way of performing distillation is to follow the [instructions of training a standard transformer model](../translation) on the same data, and then decode the training set to produce a distillation dataset for NAT.
+
+### Download
+We also provided the preprocessed [original](http://dl.fbaipublicfiles.com/nat/original_dataset.zip) and [distillation](http://dl.fbaipublicfiles.com/nat/distill_dataset.zip) datasets. Please build the binarized dataset on your own.
+
+
+## Train a model
+
+Then we can train a nonautoregressive model using the `translation_lev` task and a new criterion `nat_loss`.
+Use the `--noise` flag to specify the input noise used on the target sentences.
+In default, we run the task for *Levenshtein Transformer*, with `--noise='random_delete'`. Full scripts to run other models can also be found [here](./scripts.md).
+
+The following command will train a *Levenshtein Transformer* on the binarized dataset.
+
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch levenshtein_transformer \
+    --noise random_delete \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
+
+## Translate
+
+Once a model is trained, we can generate translations using an `iterative_refinement_generator` which will based on the model's initial output and iteratively read and greedily refine the translation until (1) the model predicts the same translations for two consecutive iterations; or (2) the generator reaches the maximum iterations (`--iter-decode-max-iter`). Use `--print-step` to check the actual # of iteration for each sentence.
+
+For *Levenshtein Transformer*, it sometimes helps to apply a `--iter-decode-eos-penalty` (typically, 0~3) to penalize the model finishing generation too early and generating too short translations.
+
+For example, to generate with `--iter-decode-max-iter=9`:
+```bash
+fairseq-generate \
+    data-bin/wmt14_en_de_distill \
+    --gen-subset test \
+    --task translation_lev \
+    --path checkpoints/checkpoint_best.pt \
+    --iter-decode-max-iter 9 \
+    --iter-decode-eos-penalty 0 \
+    --beam 1 --remove-bpe \
+    --print-step \
+    --batch-size 400
+```
+In the end of the generation, we can see the tokenized BLEU score for the translation.
+
+## Advanced Decoding Methods
+### Ensemble
+The NAT models use special implementations of [ensembling](https://github.com/fairinternal/fairseq-py/blob/b98d88da52f2f21f1b169bab8c70c1c4ca19a768/fairseq/sequence_generator.py#L522) to support iterative refinement and a variety of parallel operations in different models, while it shares the same API as standard autoregressive models as follows:
+```bash
+fairseq-generate \
+    data-bin/wmt14_en_de_distill \
+    --gen-subset test \
+    --task translation_lev \
+    --path checkpoint_1.pt:checkpoint_2.pt:checkpoint_3.pt \
+    --iter-decode-max-iter 9 \
+    --iter-decode-eos-penalty 0 \
+    --beam 1 --remove-bpe \
+    --print-step \
+    --batch-size 400
+```
+We use ``:`` to split multiple models. Note that, not all NAT models support ensembling for now.
+
+
+### Length-beam 
+For models that predict lengths before decoding (e.g. the vanilla NAT, Mask-Predict, etc), it is possible to improve the translation quality by varying the target lengths around the predicted value, and translating the same example multiple times in parallel. We can select the best translation with the highest scores defined by your model's output.
+
+Note that, not all models support length beams. For models which dynamically change the lengths (e.g. *Insertion Transformer*, *Levenshtein Transformer*), the same trick does not apply.
+
+### Re-ranking
+If the model generates multiple translations with length beam, we can also introduce an autoregressive model to rerank the translations considering scoring from an autoregressive model is much faster than decoding from that.
+
+For example, to generate translations with length beam and reranking, 
+```bash
+fairseq-generate \
+    data-bin/wmt14_en_de_distill \
+    --gen-subset test \
+    --task translation_lev \
+    --path checkpoints/checkpoint_best.pt:at_checkpoints/checkpoint_best.pt \
+    --iter-decode-max-iter 9 \
+    --iter-decode-eos-penalty 0 \
+    --iter-decode-with-beam 9 \
+    --iter-decode-with-external-reranker \
+    --beam 1 --remove-bpe \
+    --print-step \
+    --batch-size 100
+``` 
+Note that we need to make sure the autoregressive model shares the same vocabulary as our target non-autoregressive model.
+
+
+## Citation
+
+```bibtex
+@incollection{NIPS2019_9297,
+    title = {Levenshtein Transformer},
+    author = {Gu, Jiatao and Wang, Changhan and Zhao, Junbo},
+    booktitle = {Advances in Neural Information Processing Systems 32},
+    editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett},
+    pages = {11179--11189},
+    year = {2019},
+    publisher = {Curran Associates, Inc.},
+    url = {http://papers.nips.cc/paper/9297-levenshtein-transformer.pdf}
+}
+```
+```bibtex
+@article{zhou2019understanding,
+  title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
+  author={Zhou, Chunting and Neubig, Graham and Gu, Jiatao},
+  journal={arXiv preprint arXiv:1911.02727},
+  year={2019}
+}
+```
diff --git a/fairseq/examples/nonautoregressive_translation/scripts.md b/fairseq/examples/nonautoregressive_translation/scripts.md
new file mode 100644
index 0000000000000000000000000000000000000000..9d3d7b67dc08440b5f4d1c5a7ffcd4bd6e76c14f
--- /dev/null
+++ b/fairseq/examples/nonautoregressive_translation/scripts.md
@@ -0,0 +1,179 @@
+# Examples of Training scripts for Non-autoregressive Machine Translation models
+
+### Non-autoregressive Transformer (NAT, Gu et al., 2017)
+Note that we need to have an additional module to perform "length prediction" (`--length-loss-factor`) before generating the whole sequence.
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch nonautoregressive_transformer \
+    --noise full_mask \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --pred-length-offset \
+    --length-loss-factor 0.1 \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
+
+### Fast Structured Decoding for Sequence Models (NAT-CRF, Sun et al., 2019)
+Note that we implemented a low-rank appromixated CRF model by setting `--crf-lowrank-approx=32` and `--crf-beam-approx=64` as discribed in the original paper. All other settings are the same as the vanilla NAT model.
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch nacrf_transformer \
+    --noise full_mask \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --pred-length-offset \
+    --length-loss-factor 0.1 \
+    --word-ins-loss-factor 0.5 \
+    --crf-lowrank-approx 32 \
+    --crf-beam-approx 64 \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
+
+
+### Non-autoregressive Transformer with Iterative Refinement (iNAT, Lee et al., 2018)
+Note that `--train-step` means how many iterations of refinement we used during training, and `--dae-ratio` controls the ratio of denoising auto-encoder training described in the original paper.
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch iterative_nonautoregressive_transformer \
+    --noise full_mask \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --pred-length-offset \
+    --length-loss-factor 0.1 \
+    --train-step 4 \
+    --dae-ratio 0.5 \
+    --stochastic-approx \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
+
+### Insertion Transformer (InsT, Stern et al., 2019)
+Note that we need to specify the "slot-loss" (uniform or balanced tree) described in the original paper. Here we use `--label-tau` to control the temperature.
+
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch insertion_transformer \
+    --noise random_delete \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
+
+
+### Mask Predict (CMLM, Ghazvininejad et al., 2019)
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch cmlm_transformer \
+    --noise random_mask \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
+
+
+
+
+### Levenshtein Transformer (LevT, Gu et al., 2019)
+```bash
+fairseq-train \
+    data-bin/wmt14_en_de_distill \
+    --save-dir checkpoints \
+    --ddp-backend=legacy_ddp \
+    --task translation_lev \
+    --criterion nat_loss \
+    --arch levenshtein_transformer \
+    --noise random_delete \
+    --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9,0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --stop-min-lr '1e-09' --warmup-updates 10000 \
+    --warmup-init-lr '1e-07' --label-smoothing 0.1 \
+    --dropout 0.3 --weight-decay 0.01 \
+    --decoder-learned-pos \
+    --encoder-learned-pos \
+    --apply-bert-init \
+    --log-format 'simple' --log-interval 100 \
+    --fixed-validation-seed 7 \
+    --max-tokens 8000 \
+    --save-interval-updates 10000 \
+    --max-update 300000
+```
diff --git a/fairseq/examples/paraphraser/README.md b/fairseq/examples/paraphraser/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3810311f30f99f0a07fd8e5d3723bffeba9948c3
--- /dev/null
+++ b/fairseq/examples/paraphraser/README.md
@@ -0,0 +1,46 @@
+# Paraphrasing with round-trip translation and mixture of experts
+
+Machine translation models can be used to paraphrase text by translating it to
+an intermediate language and back (round-trip translation).
+
+This example shows how to paraphrase text by first passing it to an
+English-French translation model, followed by a French-English [mixture of
+experts translation model](/examples/translation_moe).
+
+##### 0. Setup
+
+Clone fairseq from source and install necessary dependencies:
+```bash
+git clone https://github.com/pytorch/fairseq.git
+cd fairseq
+pip install --editable .
+pip install sacremoses sentencepiece
+```
+
+##### 1. Download models
+```bash
+wget https://dl.fbaipublicfiles.com/fairseq/models/paraphraser.en-fr.tar.gz
+wget https://dl.fbaipublicfiles.com/fairseq/models/paraphraser.fr-en.hMoEup.tar.gz
+tar -xzvf paraphraser.en-fr.tar.gz
+tar -xzvf paraphraser.fr-en.hMoEup.tar.gz
+```
+
+##### 2. Paraphrase
+```bash
+python examples/paraphraser/paraphrase.py \
+    --en2fr paraphraser.en-fr \
+    --fr2en paraphraser.fr-en.hMoEup
+# Example input:
+#   The new date for the Games, postponed for a year in response to the coronavirus pandemic, gives athletes time to recalibrate their training schedules.
+# Example outputs:
+#   Delayed one year in response to the coronavirus pandemic, the new date of the Games gives athletes time to rebalance their training schedule.
+#   The new date of the Games, which was rescheduled one year in response to the coronavirus (CV) pandemic, gives athletes time to rebalance their training schedule.
+#   The new date of the Games, postponed one year in response to the coronavirus pandemic, provides athletes with time to rebalance their training schedule.
+#   The Games' new date, postponed one year in response to the coronavirus pandemic, gives athletes time to rebalance their training schedule.
+#   The new Games date, postponed one year in response to the coronavirus pandemic, gives the athletes time to rebalance their training schedule.
+#   The new date of the Games, which was postponed one year in response to the coronavirus pandemic, gives the athletes time to rebalance their training schedule.
+#   The new date of the Games, postponed one year in response to the coronavirus pandemic, gives athletes time to rebalance their training schedule.
+#   The new date of the Games, postponed one year in response to the coronavirus pandemic, gives athletes time to re-balance their training schedule.
+#   The new date of the Games, postponed one year in response to the coronavirus pandemic, gives the athletes time to rebalance their schedule of training.
+#   The new date of the Games, postponed one year in response to the pandemic of coronavirus, gives the athletes time to rebalance their training schedule.
+```
diff --git a/fairseq/examples/paraphraser/paraphrase.py b/fairseq/examples/paraphraser/paraphrase.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3422fb3db9a381b73a854d2379df214ebe544a2
--- /dev/null
+++ b/fairseq/examples/paraphraser/paraphrase.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3 -u
+
+import argparse
+import fileinput
+import logging
+import os
+import sys
+
+from fairseq.models.transformer import TransformerModel
+
+
+logging.getLogger().setLevel(logging.INFO)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="")
+    parser.add_argument("--en2fr", required=True, help="path to en2fr model")
+    parser.add_argument(
+        "--fr2en", required=True, help="path to fr2en mixture of experts model"
+    )
+    parser.add_argument(
+        "--user-dir", help="path to fairseq examples/translation_moe/src directory"
+    )
+    parser.add_argument(
+        "--num-experts",
+        type=int,
+        default=10,
+        help="(keep at 10 unless using a different model)",
+    )
+    parser.add_argument(
+        "files",
+        nargs="*",
+        default=["-"],
+        help='input files to paraphrase; "-" for stdin',
+    )
+    args = parser.parse_args()
+
+    if args.user_dir is None:
+        args.user_dir = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),  # examples/
+            "translation_moe",
+            "src",
+        )
+        if os.path.exists(args.user_dir):
+            logging.info("found user_dir:" + args.user_dir)
+        else:
+            raise RuntimeError(
+                "cannot find fairseq examples/translation_moe/src "
+                "(tried looking here: {})".format(args.user_dir)
+            )
+
+    logging.info("loading en2fr model from:" + args.en2fr)
+    en2fr = TransformerModel.from_pretrained(
+        model_name_or_path=args.en2fr,
+        tokenizer="moses",
+        bpe="sentencepiece",
+    ).eval()
+
+    logging.info("loading fr2en model from:" + args.fr2en)
+    fr2en = TransformerModel.from_pretrained(
+        model_name_or_path=args.fr2en,
+        tokenizer="moses",
+        bpe="sentencepiece",
+        user_dir=args.user_dir,
+        task="translation_moe",
+    ).eval()
+
+    def gen_paraphrases(en):
+        fr = en2fr.translate(en)
+        return [
+            fr2en.translate(fr, inference_step_args={"expert": i})
+            for i in range(args.num_experts)
+        ]
+
+    logging.info("Type the input sentence and press return:")
+    for line in fileinput.input(args.files):
+        line = line.strip()
+        if len(line) == 0:
+            continue
+        for paraphrase in gen_paraphrases(line):
+            print(paraphrase)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/pay_less_attention_paper/README.md b/fairseq/examples/pay_less_attention_paper/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5adab11f4dc3461f9e7126ac391b04e703616e6b
--- /dev/null
+++ b/fairseq/examples/pay_less_attention_paper/README.md
@@ -0,0 +1,176 @@
+# Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)
+
+This page contains pointers to pre-trained models as well as instructions on how to train new models for [our paper](https://arxiv.org/abs/1901.10430).
+
+## Citation:
+```bibtex
+@inproceedings{wu2018pay,
+  title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
+  author = {Felix Wu and Angela Fan and Alexei Baevski and Yann Dauphin and Michael Auli},
+  booktitle = {International Conference on Learning Representations},
+  year = {2019},
+  url = {https://arxiv.org/abs/1901.10430},
+}
+```
+
+## Translation
+
+### Pre-trained models
+For some datasets we release models without GLUs which are faster at inference.
+
+Model | Description | Dataset | Download
+---|---|---|---
+`lightconv.no_glu.iwslt14.de-en` | LightConv (without GLUs) | [IWSLT14 German-English](https://wit3.fbk.eu/archive/2014-01/texts/de/en/de-en.tgz) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.lightconv.tar.gz) <br> IWSLT14 test: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/iwslt14.de-en.test.tar.bz2)
+`dynamicconv.no_glu.iwslt14.de-en` | DynamicConv (without GLUs) | [IWSLT14 German-English](https://wit3.fbk.eu/archive/2014-01/texts/de/en/de-en.tgz) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.dynamicconv.tar.gz) <br> IWSLT14 test: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/iwslt14.de-en.test.tar.bz2)
+`lightconv.no_glu.wmt16.en-de` | LightConv (without GLUs) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2)
+`dynamicconv.no_glu.wmt16.en-de` | DynamicConv (without GLUs) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2)
+`lightconv.glu.wmt16.en-de` | LightConv | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv-glu.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2)
+`dynamicconv.glu.wmt16.en-de` | DynamicConv | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv-glu.tar.gz) <br> newstest2014 (shared vocab): <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2)
+`lightconv.glu.wmt14.en-fr` | LightConv | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.lightconv-glu.tar.gz) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2)
+`dynamicconv.glu.wmt14.en-fr` | DynamicConv | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.dynamicconv-glu.tar.gz) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2)
+`lightconv.glu.wmt17.zh-en` | LightConv | [WMT17 Chinese-English](http://statmt.org/wmt17/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.lightconv-glu.tar.gz) <br> newstest2017: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.zh-en.newstest2017.tar.bz2)
+`dynamicconv.glu.wmt17.zh-en` | DynamicConv | [WMT17 Chinese-English](http://statmt.org/wmt17/translation-task.html#Download) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.dynamicconv-glu.tar.gz) <br> newstest2017: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.zh-en.newstest2017.tar.bz2)
+
+### Memory-Efficient CUDA Kernels
+
+Since the PyTorch implementations of Light/Dynamic conv are quite memory intensive, we have developed CUDA kernels that implement the light and dynamic convolution operator in a memory-efficient and performant manner. For large sequence lengths, these kernels save about 50% memory compared to the PyTorch equivalent. 
+
+To install the kernels, use the commands below. Once installed, they will automatically be used in place of the PyTorch implementations whenever a light or dynamic convolution is used.
+
+```sh
+# to install lightconv
+cd fairseq/modules/lightconv_layer
+python cuda_function_gen.py
+python setup.py install
+
+# to install dynamicconv
+cd fairseq/modules/dynamicconv_layer
+python cuda_function_gen.py
+python setup.py install
+```
+
+### Example usage (torch.hub)
+
+We require a few additional Python dependencies for preprocessing:
+```bash
+pip install sacremoses subword_nmt
+```
+
+Interactive translation via PyTorch Hub:
+```python
+import torch
+
+# List available models
+torch.hub.list('pytorch/fairseq')  # [..., 'lightconv.glu.wmt17.zh-en', ... ]
+
+# Load a transformer trained on WMT'16 En-De
+zh2en = torch.hub.load('pytorch/fairseq', 'lightconv.glu.wmt17.zh-en', tokenizer='moses', bpe='subword_nmt')
+
+# The underlying model is available under the *models* attribute
+assert isinstance(zh2en.models[0], fairseq.models.lightconv.LightConvModel)
+
+# Translate a sentence
+zh2en.translate('你好 世界')
+# 'Hello World'
+```
+
+Loading custom models:
+```python
+from fairseq.models.lightconv import LightConvModel
+en2fr = LightConvModel.from_pretrained(
+  '/path/to/checkpoints',
+  checkpoint_file='checkpoint_best.pt',
+  data_name_or_path='data-bin/wmt14_en_fr',
+  bpe='subword_nmt',
+  bpe_codes='data-bin/wmt14_en_fr/en.code'
+)
+en2fr.translate('Hello world!')
+# 'Bonjour le monde'
+```
+
+### Preprocessing the training datasets
+
+Please follow the instructions in [`examples/translation/README.md`](../translation/README.md) to preprocess the data.
+
+### Training and evaluation options:
+To use the model without GLU, please set `--encoder-glu 0 --decoder-glu 0`.
+For LightConv, please use `--encoder-conv-type lightweight --decoder-conv-type lightweight`, otherwise the default is DynamicConv.
+For best BLEU results, lenpen may need to be manually tuned.
+
+To use the CUDA kernels, first install the PyTorch modules using the commands
+above. Once the CUDA modules are installed, they will automatically be used
+instead of the PyTorch modules.
+
+### IWSLT14 De-En
+Training and evaluating DynamicConv (without GLU) on a GPU:
+```sh
+# Training
+SAVE="save/dynamic_conv_iwslt"
+mkdir -p $SAVE 
+CUDA_VISIBLE_DEVICES=0 $(which fairseq-train) data-bin/iwslt14.tokenized.de-en \
+    --clip-norm 0 --optimizer adam --lr 0.0005 \
+    --source-lang de --target-lang en --max-tokens 4000 --no-progress-bar \
+    --log-interval 100 --stop-min-lr '1e-09' --weight-decay 0.0001 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --lr-scheduler inverse_sqrt \
+    --ddp-backend=legacy_ddp \
+    --max-update 50000 --warmup-updates 4000 --warmup-init-lr '1e-07' \
+    --adam-betas '(0.9, 0.98)' --keep-last-epochs 10 \
+    -a lightconv_iwslt_de_en --save-dir $SAVE \
+    --dropout 0.3 --attention-dropout 0.1 --weight-dropout 0.1 \
+    --encoder-glu 0 --decoder-glu 0
+python scripts/average_checkpoints.py --inputs $SAVE \
+    --num-epoch-checkpoints 10 --output "${SAVE}/checkpoint_last10_avg.pt"
+
+# Evaluation
+CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/iwslt14.tokenized.de-en --path "${SAVE}/checkpoint_last10_avg.pt" --batch-size 128 --beam 4 --remove-bpe --lenpen 1 --gen-subset test --quiet 
+```
+
+### WMT16 En-De
+Training and evaluating DynamicConv (with GLU) on WMT16 En-De using cosine scheduler on one machine with 8 V100 GPUs:
+```sh
+# Training
+SAVE="save/dynamic_conv_wmt16en2de"
+mkdir -p $SAVE
+python -m torch.distributed.launch --nproc_per_node 8 $(which fairseq-train) \
+    data-bin/wmt16_en_de_bpe32k --fp16  --log-interval 100 --no-progress-bar \
+    --max-update 30000 --share-all-embeddings --optimizer adam \
+    --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --stop-min-lr 1e-09 --update-freq 16 --attention-dropout 0.1 --keep-last-epochs 10 \
+    --ddp-backend=legacy_ddp --max-tokens 3584 \
+    --lr-scheduler cosine --warmup-init-lr 1e-7 --warmup-updates 10000 \
+    --lr-shrink 1 --lr 0.001 --min-lr 1e-7 --warmup-init-lr 1e-07 \
+    --t-mult 1 --lr-period-updates 20000 \
+    --arch lightconv_wmt_en_de_big --save-dir $SAVE \
+    --dropout 0.3 --attention-dropout 0.1 --weight-dropout 0.1 \
+    --encoder-glu 1 --decoder-glu 1
+
+# Evaluation
+CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/wmt16.en-de.joined-dict.newstest2014 --path "${SAVE}/checkpoint_best.pt" --batch-size 128 --beam 5 --remove-bpe --lenpen 0.5 --gen-subset test > wmt16_gen.txt
+bash scripts/compound_split_bleu.sh wmt16_gen.txt
+```
+
+### WMT14 En-Fr
+Training DynamicConv (with GLU) on WMT14 En-Fr using cosine scheduler on one machine with 8 V100 GPUs:
+```sh
+# Training
+SAVE="save/dynamic_conv_wmt14en2fr"
+mkdir -p $SAVE
+python -m torch.distributed.launch --nproc_per_node 8 $(which fairseq-train) \
+    data-bin/wmt14_en_fr --fp16  --log-interval 100 --no-progress-bar \
+    --max-update 30000 --share-all-embeddings --optimizer adam \
+    --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --stop-min-lr 1e-09 --update-freq 16 --attention-dropout 0.1 --keep-last-epochs 10 \
+    --ddp-backend=legacy_ddp --max-tokens 3584 \
+    --lr-scheduler cosine --warmup-init-lr 1e-7 --warmup-updates 10000 \
+    --lr-shrink 1 --lr 0.001 --min-lr 1e-7 --warmup-init-lr 1e-07 \
+    --t-mult 1 --lr-period-updates 70000 \
+    --arch lightconv_wmt_en_fr_big --save-dir $SAVE \
+    --dropout 0.1 --attention-dropout 0.1 --weight-dropout 0.1 \
+    --encoder-glu 1 --decoder-glu 1
+
+# Evaluation
+CUDA_VISIBLE_DEVICES=0 fairseq-generate data-bin/wmt14.en-fr.joined-dict.newstest2014 --path "${SAVE}/checkpoint_best.pt" --batch-size 128 --beam 5 --remove-bpe --lenpen 0.9 --gen-subset test
+```
diff --git a/fairseq/examples/pointer_generator/README.md b/fairseq/examples/pointer_generator/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..60965708254aae2174812ea6686a9807825b7fb6
--- /dev/null
+++ b/fairseq/examples/pointer_generator/README.md
@@ -0,0 +1,82 @@
+# Transformer with Pointer-Generator Network
+
+This page describes the `transformer_pointer_generator` model that incorporates
+a pointing mechanism in the Transformer model that facilitates copying of input
+words to the output. This architecture is described in [Enarvi et al. (2020)](https://www.aclweb.org/anthology/2020.nlpmc-1.4/).
+
+## Background
+
+The pointer-generator network was introduced in [See et al. (2017)](https://arxiv.org/abs/1704.04368)
+for RNN encoder-decoder attention models. A similar mechanism can be
+incorporated in a Transformer model by reusing one of the many attention
+distributions for pointing. The attention distribution over the input words is
+interpolated with the normal output distribution over the vocabulary words. This
+allows the model to generate words that appear in the input, even if they don't
+appear in the vocabulary, helping especially with small vocabularies.
+
+## Implementation
+
+The mechanism for copying out-of-vocabulary words from the input has been
+implemented differently to See et al. In their [implementation](https://github.com/abisee/pointer-generator)
+they convey the word identities through the model in order to be able to produce
+words that appear in the input sequence but not in the vocabulary. A different
+approach was taken in the Fairseq implementation to keep it self-contained in
+the model file, avoiding any changes to the rest of the code base. Copying
+out-of-vocabulary words is possible by pre-processing the input and
+post-processing the output. This is described in detail in the next section.
+
+## Usage
+
+The training and evaluation procedure is outlined below. You can also find a
+more detailed example for the XSum dataset on [this page](README.xsum.md).
+
+##### 1. Create a vocabulary and extend it with source position markers
+
+The pointing mechanism is especially helpful with small vocabularies, if we are
+able to recover the identities of any out-of-vocabulary words that are copied
+from the input. For this purpose, the model allows extending the vocabulary with
+special tokens that can be used in place of `<unk>` tokens to identify different
+input positions. For example, the user may add `<unk-0>`, `<unk-1>`, `<unk-2>`,
+etc. to the end of the vocabulary, after the normal words. Below is an example
+of how to create a vocabulary of 10000 most common words and add 1000 input
+position markers.
+
+```bash
+vocab_size=10000
+position_markers=1000
+export LC_ALL=C
+cat train.src train.tgt |
+  tr -s '[:space:]' '\n' |
+  sort |
+  uniq -c |
+  sort -k1,1bnr -k2 |
+  head -n "$((vocab_size - 4))" |
+  awk '{ print $2 " " $1 }' >dict.pg.txt
+python3 -c "[print('<unk-{}> 0'.format(n)) for n in range($position_markers)]" >>dict.pg.txt
+```
+
+##### 2. Preprocess the text data
+
+The idea is that any `<unk>` tokens in the text are replaced with `<unk-0>` if
+it appears in the first input position, `<unk-1>` if it appears in the second
+input position, and so on. This can be achieved using the `preprocess.py` script
+that is provided in this directory.
+
+##### 3. Train a model
+
+The number of these special tokens is given to the model with the
+`--source-position-markers` argument—the model simply maps all of these to the
+same word embedding as `<unk>`.
+
+The attention distribution that is used for pointing is selected using the
+`--alignment-heads` and `--alignment-layer` command-line arguments in the same
+way as with the `transformer_align` model.
+
+##### 4. Generate text and postprocess it
+
+When using the model to generate text, you want to preprocess the input text in
+the same way that training data was processed, replacing out-of-vocabulary words
+with `<unk-N>` tokens. If any of these tokens are copied to the output, the
+actual words can be retrieved from the unprocessed input text. Any `<unk-N>`
+token should be replaced with the word at position N in the original input
+sequence. This can be achieved using the `postprocess.py` script.
diff --git a/fairseq/examples/pointer_generator/README.xsum.md b/fairseq/examples/pointer_generator/README.xsum.md
new file mode 100644
index 0000000000000000000000000000000000000000..ac3a8c3ddc96cd9810b45d49f6b361e43de1e9fb
--- /dev/null
+++ b/fairseq/examples/pointer_generator/README.xsum.md
@@ -0,0 +1,180 @@
+## Training a pointer-generator model on the Extreme Summarization dataset
+
+##### 1. Download the Extreme Summarization data and preprocess it
+
+Follow the instructions [here](https://github.com/EdinburghNLP/XSum) to obtain
+the original Extreme Summarization dataset. You should have six files,
+{train,validation,test}.{document,summary}.
+
+##### 2. Create a vocabulary and extend it with source position markers
+
+```bash
+vocab_size=10000
+position_markers=1000
+export LC_ALL=C
+cat train.document train.summary |
+  tr -s '[:space:]' '\n' |
+  sort |
+  uniq -c |
+  sort -k1,1bnr -k2 |
+  head -n "$((vocab_size - 4))" |
+  awk '{ print $2 " " $1 }' >dict.pg.txt
+python3 -c "[print('<unk-{}> 0'.format(n)) for n in range($position_markers)]" >>dict.pg.txt
+```
+
+This creates the file dict.pg.txt that contains the 10k most frequent words,
+followed by 1k source position markers:
+
+```
+the 4954867
+. 4157552
+, 3439668
+to 2212159
+a 1916857
+of 1916820
+and 1823350
+...
+<unk-0> 0
+<unk-1> 0
+<unk-2> 0
+<unk-3> 0
+<unk-4> 0
+...
+```
+
+##### 2. Preprocess the text data
+
+```bash
+./preprocess.py --source train.document --target train.summary --vocab <(cut -d' ' -f1 dict.pg.txt) --source-out train.pg.src --target-out train.pg.tgt
+./preprocess.py --source validation.document --target validation.summary --vocab <(cut -d' ' -f1 dict.pg.txt) --source-out valid.pg.src --target-out valid.pg.tgt
+./preprocess.py --source test.document --vocab <(cut -d' ' -f1 dict.pg.txt) --source-out test.pg.src
+```
+
+The data should now contain `<unk-N>` tokens in place of out-of-vocabulary words.
+
+##### 3. Binarize the dataset:
+
+```bash
+fairseq-preprocess \
+  --source-lang src \
+  --target-lang tgt \
+  --trainpref train.pg \
+  --validpref valid.pg \
+  --destdir bin \
+  --workers 60 \
+  --srcdict dict.pg.txt \
+  --joined-dictionary
+```
+
+##### 3. Train a model
+
+```bash
+total_updates=20000
+warmup_updates=500
+lr=0.001
+max_tokens=4096
+update_freq=4
+pointer_layer=-2
+
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 fairseq-train bin \
+    --user-dir examples/pointer_generator/pointer_generator_src \
+    --max-tokens "$max_tokens" \
+    --task translation \
+    --source-lang src --target-lang tgt \
+    --truncate-source \
+    --layernorm-embedding \
+    --share-all-embeddings \
+    --encoder-normalize-before \
+    --decoder-normalize-before \
+    --required-batch-size-multiple 1 \
+    --arch transformer_pointer_generator \
+    --alignment-layer "$pointer_layer" \
+    --alignment-heads 1 \
+    --source-position-markers 1000 \
+    --criterion label_smoothed_cross_entropy \
+    --label-smoothing 0.1 \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.01 --optimizer adam --adam-betas "(0.9, 0.999)" --adam-eps 1e-08 \
+    --clip-norm 0.1 \
+    --lr-scheduler inverse_sqrt --lr "$lr" --max-update "$total_updates" --warmup-updates "$warmup_updates" \
+    --update-freq "$update_freq" \
+    --skip-invalid-size-inputs-valid-test
+```
+
+Above we specify that our dictionary contains 1000 source position markers, and
+that we want to use one attention head from the penultimate decoder layer for
+pointing. It should run in 5.5 hours on one node with eight 32GB V100 GPUs. The
+logged messages confirm that dictionary indices above 10000 will be mapped to
+the `<unk>` embedding:
+
+```
+2020-09-24 20:43:53 | INFO | fairseq.tasks.translation | [src] dictionary: 11000 types
+2020-09-24 20:43:53 | INFO | fairseq.tasks.translation | [tgt] dictionary: 11000 types
+2020-09-24 20:43:53 | INFO | fairseq.data.data_utils | loaded 11332 examples from: bin/valid.src-tgt.src
+2020-09-24 20:43:53 | INFO | fairseq.data.data_utils | loaded 11332 examples from: bin/valid.src-tgt.tgt
+2020-09-24 20:43:53 | INFO | fairseq.tasks.translation | bin valid src-tgt 11332 examples
+2020-09-24 20:43:53 | INFO | fairseq.models.transformer_pg | dictionary indices from 10000 to 10999 will be mapped to 3
+```
+
+##### 4. Summarize the test sequences
+
+```bash
+batch_size=32
+beam_size=6
+max_length=60
+length_penalty=1.0
+
+fairseq-interactive bin \
+    --user-dir examples/pointer_generator/pointer_generator_src \
+    --batch-size "$batch_size" \
+    --task translation \
+    --source-lang src --target-lang tgt \
+    --path checkpoints/checkpoint_last.pt \
+    --input test.pg.src \
+    --buffer-size 200 \
+    --max-len-a 0 \
+    --max-len-b "$max_length" \
+    --lenpen "$length_penalty" \
+    --beam "$beam_size" \
+    --skip-invalid-size-inputs-valid-test |
+    tee generate.out
+grep ^H generate.out | cut -f 3- >generate.hyp
+```
+
+Now you should have the generated sequences in `generate.hyp`. They contain
+`<unk-N>` tokens that the model has copied from the source sequence. In order to
+retrieve the original words, we need the unprocessed source sequences from
+`test.document`.
+
+##### 5. Process the generated output
+
+Since we skipped too long inputs when producing `generate.hyp`, we also have to
+skip too long sequences now that we read `test.document`.
+
+```bash
+./postprocess.py \
+    --source <(awk 'NF<1024' test.document) \
+    --target generate.hyp \
+    --target-out generate.hyp.processed
+```
+
+Now you'll find the final sequences from `generate.hyp.processed`, with
+`<unk-N>` replaced with the original word from the source sequence.
+
+##### An example of a summarized sequence
+
+The original source document in `test.document`:
+
+> de roon moved to teesside in june 2016 for an initial # 8.8 m fee and played 33 premier league games last term . the netherlands international , 26 , scored five goals in 36 league and cup games during his spell at boro . meanwhile , manager garry monk confirmed the championship club 's interest in signing chelsea midfielder lewis baker . `` he 's a target and one of many that we 've had throughout the summer months , '' said monk . find all the latest football transfers on our dedicated page .
+
+The preprocessed source document in `test.src.pg`:
+
+> de \<unk-1> moved to \<unk-4> in june 2016 for an initial # \<unk-12> m fee and played 33 premier league games last term . the netherlands international , 26 , scored five goals in 36 league and cup games during his spell at boro . meanwhile , manager garry monk confirmed the championship club 's interest in signing chelsea midfielder lewis baker . `` he 's a target and one of many that we 've had throughout the summer months , '' said monk . find all the latest football transfers on our dedicated page .
+
+The generated summary in `generate.hyp`:
+
+> middlesbrough striker \<unk> de \<unk-1> has joined spanish side \<unk> on a season-long loan .
+
+The generated summary after postprocessing in `generate.hyp.processed`:
+
+> middlesbrough striker \<unk> de roon has joined spanish side \<unk> on a season-long loan .
diff --git a/fairseq/examples/pointer_generator/pointer_generator_src/__init__.py b/fairseq/examples/pointer_generator/pointer_generator_src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c361ff6bd616512fe2521387665de1ad1aff66d0
--- /dev/null
+++ b/fairseq/examples/pointer_generator/pointer_generator_src/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import transformer_pg  # noqa
diff --git a/fairseq/examples/pointer_generator/pointer_generator_src/transformer_pg.py b/fairseq/examples/pointer_generator/pointer_generator_src/transformer_pg.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ccf30f4eb154f8fab1e285934fb973a2d1166cb
--- /dev/null
+++ b/fairseq/examples/pointer_generator/pointer_generator_src/transformer_pg.py
@@ -0,0 +1,518 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Any, Dict, Optional, List, Tuple
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.transformer import (
+    DEFAULT_MAX_SOURCE_POSITIONS,
+    DEFAULT_MAX_TARGET_POSITIONS,
+    TransformerDecoder,
+    TransformerEncoder,
+    TransformerModel,
+    base_architecture,
+)
+from torch import Tensor
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("transformer_pointer_generator")
+class TransformerPointerGeneratorModel(TransformerModel):
+    """
+    Transformer model from `"Attention Is All You Need" (Vaswani et al, 2017)
+    <https://arxiv.org/abs/1706.03762>`_, augmented with a pointer-generator
+    network from `"Get To The Point: Summarization with Pointer-Generator
+    Networks" (See et al, 2017) <https://arxiv.org/abs/1704.04368>`_.
+
+    Args:
+        encoder (TransformerPointerGeneratorEncoder): the encoder
+        decoder (TransformerPointerGeneratorDecoder): the decoder
+
+    The Transformer pointer-generator model provides the following named
+    architectures and command-line arguments:
+
+    .. argparse::
+        :ref: fairseq.models.transformer_pointer_generator_parser
+        :prog:
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        TransformerModel.add_args(parser)
+        parser.add_argument('--alignment-heads', type=int, metavar='N',
+                            help='number of attention heads to be used for '
+                                 'pointing')
+        parser.add_argument('--alignment-layer', type=int, metavar='I',
+                            help='layer number to be used for pointing (0 '
+                                 'corresponding to the bottommost layer)')
+        parser.add_argument('--source-position-markers', type=int, metavar='N',
+                            help='dictionary includes N additional items that '
+                                 'represent an OOV token at a particular input '
+                                 'position')
+        parser.add_argument('--force-generation', type=float, metavar='P',
+                            default=None,
+                            help='set the vocabulary distribution weight to P, '
+                                 'instead of predicting it from the input (1.0 '
+                                 'corresponding to generation, 0.0 to pointing)')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if args.encoder_layers_to_keep:
+            args.encoder_layers = len(args.encoder_layers_to_keep.split(","))
+        if args.decoder_layers_to_keep:
+            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))
+
+        if getattr(args, "max_source_positions", None) is None:
+            args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
+        if getattr(args, "max_target_positions", None) is None:
+            args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS
+        if getattr(args, "source_position_markers", None) is None:
+            args.source_position_markers = args.max_source_positions
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+        if src_dict != tgt_dict:
+            raise ValueError("Pointer-generator requires a joined dictionary")
+
+        def build_embedding(dictionary, embed_dim, path=None):
+            # The dictionary may include additional items that can be used in
+            # place of the normal OOV token and that all map to the same
+            # embedding. Using a different token for each input position allows
+            # one to restore the word identities from the original source text.
+            num_embeddings = len(dictionary) - args.source_position_markers
+            padding_idx = dictionary.pad()
+            unk_idx = dictionary.unk()
+            logger.info(
+                "dictionary indices from {0} to {1} will be mapped to {2}".format(
+                    num_embeddings, len(dictionary) - 1, unk_idx
+                )
+            )
+            emb = Embedding(num_embeddings, embed_dim, padding_idx, unk_idx)
+            # if provided, load from preloaded dictionaries
+            if path:
+                embed_dict = utils.parse_embedding(path)
+                utils.load_embedding(embed_dict, dictionary, emb)
+            return emb
+
+        if args.share_all_embeddings:
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            encoder_embed_tokens = build_embedding(
+                src_dict, args.encoder_embed_dim, args.encoder_embed_path
+            )
+            decoder_embed_tokens = encoder_embed_tokens
+            args.share_decoder_input_output_embed = True
+        else:
+            encoder_embed_tokens = build_embedding(
+                src_dict, args.encoder_embed_dim, args.encoder_embed_path
+            )
+            decoder_embed_tokens = build_embedding(
+                tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
+            )
+
+        encoder = cls.build_encoder(args, src_dict, encoder_embed_tokens)
+        decoder = cls.build_decoder(args, tgt_dict, decoder_embed_tokens)
+        return cls(args, encoder, decoder)
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return TransformerPointerGeneratorEncoder(args, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return TransformerPointerGeneratorDecoder(args, tgt_dict, embed_tokens)
+
+
+class TransformerPointerGeneratorEncoder(TransformerEncoder):
+    """
+    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
+    is a :class:`TransformerEncoderLayer`. The pointer-generator variant adds
+    the source tokens to the encoder output as these are otherwise not passed
+    to the decoder.
+    """
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths: Optional[Tensor] = None,
+        return_all_hiddens: bool = False,
+        token_embeddings: Optional[Tensor] = None
+    ):
+        """
+        Runs the `forward()` method of the parent Transformer class. Then adds
+        the source tokens into the encoder output tuple.
+
+        While it might be more elegant that the model would pass the source
+        tokens to the `forward()` method of the decoder too, this would require
+        changes to `SequenceGenerator`.
+
+        Args:
+            src_tokens (torch.LongTensor): tokens in the source language of
+                shape `(batch, src_len)`
+            src_lengths (torch.LongTensor): lengths of each source sentence of
+                shape `(batch)`
+            return_all_hiddens (bool, optional): also return all of the
+                intermediate hidden states (default: False).
+            token_embeddings (torch.Tensor, optional): precomputed embeddings
+                default `None` will recompute embeddings
+
+        Returns:
+            namedtuple:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+                - **encoder_embedding** (Tensor): the (scaled) embedding lookup
+                  of shape `(batch, src_len, embed_dim)`
+                - **encoder_states** (List[Tensor]): all intermediate
+                  hidden states of shape `(src_len, batch, embed_dim)`.
+                  Only populated if *return_all_hiddens* is True.
+                - **src_tokens** (Tensor): input token ids of shape
+                  `(batch, src_len)`
+        """
+        encoder_out = self.forward_scriptable(src_tokens,
+                                              src_lengths,
+                                              return_all_hiddens,
+                                              token_embeddings)
+
+        # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in
+        # `forward` so we use a dictionary instead.
+        # TorchScript does not support mixed values so the values are all lists.
+        # The empty list is equivalent to None.
+        return {
+            "encoder_out": encoder_out["encoder_out"],  # T x B x C
+            "encoder_padding_mask": encoder_out["encoder_padding_mask"],  # B x T
+            "encoder_embedding": encoder_out["encoder_embedding"],  # B x T x C
+            "encoder_states": encoder_out["encoder_states"],  # List[T x B x C]
+            "src_tokens": [src_tokens],  # B x T
+            "src_lengths": [],
+        }
+
+
+class TransformerPointerGeneratorDecoder(TransformerDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`. The pointer-generator variant mixes
+    the output probabilities with an attention distribution in the output layer.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+    """
+
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(args, dictionary, embed_tokens, no_encoder_attn=False)
+
+        # In the pointer-generator model these arguments define the decoder
+        # layer and the number of attention heads that will be averaged to
+        # create the alignment for pointing.
+        self.alignment_heads = args.alignment_heads
+        self.alignment_layer = args.alignment_layer
+
+        input_embed_dim = embed_tokens.embedding_dim
+
+        # Generation probabilities / interpolation coefficients are predicted
+        # from the current decoder input embedding and the decoder output, which
+        # is the size of output_embed_dim.
+        p_gen_input_size = input_embed_dim + self.output_embed_dim
+        self.project_p_gens = nn.Linear(p_gen_input_size, 1)
+        nn.init.zeros_(self.project_p_gens.bias)
+
+        # The dictionary may include a separate entry for an OOV token in each
+        # input position, so that their identity can be restored from the
+        # original source text.
+        self.num_types = len(dictionary)
+        self.num_oov_types = args.source_position_markers
+        self.num_embeddings = self.num_types - self.num_oov_types
+        self.force_p_gen = args.force_generation
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        features_only: bool = False,
+        alignment_layer: Optional[int] = 0,
+        alignment_heads: Optional[int] = 1,
+        src_lengths: Optional[Any] = None,
+        return_all_hiddens: bool = False,
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict, optional): dictionary used for storing
+                state during :ref:`Incremental decoding`
+            features_only (bool, optional): only return features without
+                applying output layer (default: False)
+            alignment_layer (int, optional): 0-based index of the layer to be
+                used for pointing (default: 0)
+            alignment_heads (int, optional): number of attention heads to be
+                used for pointing (default: 1)
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        # The normal Transformer model doesn't pass the alignment_layer and
+        # alignment_heads parameters correctly. We use our local variables.
+        x, extra = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            incremental_state=incremental_state,
+            alignment_layer=self.alignment_layer,
+            alignment_heads=self.alignment_heads,
+        )
+        if not features_only:
+            # Embedding the tokens again for generation probability prediction,
+            # so that we don't have to reimplement the whole extract_features()
+            # method.
+            if incremental_state is not None:
+                prev_output_tokens = prev_output_tokens[:, -1:]
+            prev_output_embed = self.embed_tokens(prev_output_tokens)
+            prev_output_embed *= self.embed_scale
+            predictors = torch.cat((prev_output_embed, x), 2)
+            p_gens = self.project_p_gens(predictors)
+            p_gens = torch.sigmoid(p_gens.float())
+            # Torchscript complains if encoder_out or attn are None because
+            # `output_layer()` signature expects tensors instead
+            attn: Optional[Tensor] = extra["attn"][0]
+            assert encoder_out is not None
+            assert attn is not None
+            x = self.output_layer(x, attn, encoder_out["src_tokens"][0], p_gens)
+        return x, extra
+
+    def output_layer(
+        self,
+        features: Tensor,
+        attn: Tensor,
+        src_tokens: Tensor,
+        p_gens: Tensor
+    ) -> Tensor:
+        """
+        Project features to the vocabulary size and mix with the attention
+        distributions.
+        """
+        if self.force_p_gen is not None:
+            p_gens = self.force_p_gen
+
+        # project back to size of vocabulary
+        if self.adaptive_softmax is None:
+            logits = self.output_projection(features)
+        else:
+            logits = features
+
+        batch_size = logits.shape[0]
+        output_length = logits.shape[1]
+        assert logits.shape[2] == self.num_embeddings
+        assert src_tokens.shape[0] == batch_size
+        src_length = src_tokens.shape[1]
+
+        # The final output distribution will be a mixture of the normal output
+        # distribution (softmax of logits) and attention weights.
+        gen_dists = self.get_normalized_probs_scriptable(
+            (logits, None), log_probs=False, sample=None
+        )
+        gen_dists = torch.mul(gen_dists, p_gens)
+        padding_size = (batch_size, output_length, self.num_oov_types)
+        padding = gen_dists.new_zeros(padding_size)
+        gen_dists = torch.cat((gen_dists, padding), 2)
+        assert gen_dists.shape[2] == self.num_types
+
+        # Scatter attention distributions to distributions over the extended
+        # vocabulary in a tensor of shape [batch_size, output_length,
+        # vocab_size]. Each attention weight will be written into a location
+        # that is for other dimensions the same as in the index tensor, but for
+        # the third dimension it's the value of the index tensor (the token ID).
+        attn = torch.mul(attn.float(), 1 - p_gens)
+        index = src_tokens[:, None, :]
+        index = index.expand(batch_size, output_length, src_length)
+        attn_dists_size = (batch_size, output_length, self.num_types)
+        attn_dists = attn.new_zeros(attn_dists_size)
+        attn_dists.scatter_add_(2, index, attn.float())
+
+        # Final distributions, [batch_size, output_length, num_types].
+        return gen_dists + attn_dists
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """
+        Get normalized probabilities (or log probs) from a net's output.
+        Pointer-generator network output is already normalized.
+        """
+        probs = net_output[0]
+        # Make sure the probabilities are greater than zero when returning log
+        # probabilities.
+        return probs.clamp(1e-10, 1.0).log() if log_probs else probs
+
+
+class Embedding(nn.Embedding):
+    r"""A simple lookup table that stores embeddings of a fixed dictionary and size.
+    This module is often used to store word embeddings and retrieve them using indices.
+    The input to the module is a list of indices, and the output is the corresponding
+    word embeddings. This subclass differs from the standard PyTorch Embedding class by
+    allowing additional vocabulary entries that will be mapped to the unknown token
+    embedding.
+    Args:
+        num_embeddings (int): size of the dictionary of embeddings
+        embedding_dim (int): the size of each embedding vector
+        padding_idx (int): Pads the output with the embedding vector at :attr:`padding_idx`
+                           (initialized to zeros) whenever it encounters the index.
+        unk_idx (int): Maps all token indices that are greater than or equal to
+                       num_embeddings to this index.
+    Attributes:
+        weight (Tensor): the learnable weights of the module of shape (num_embeddings, embedding_dim)
+                         initialized from :math:`\mathcal{N}(0, 1)`
+    Shape:
+        - Input: :math:`(*)`, LongTensor of arbitrary shape containing the indices to extract
+        - Output: :math:`(*, H)`, where `*` is the input shape and :math:`H=\text{embedding\_dim}`
+    .. note::
+        Keep in mind that only a limited number of optimizers support
+        sparse gradients: currently it's :class:`optim.SGD` (`CUDA` and `CPU`),
+        :class:`optim.SparseAdam` (`CUDA` and `CPU`) and :class:`optim.Adagrad` (`CPU`)
+    .. note::
+        With :attr:`padding_idx` set, the embedding vector at
+        :attr:`padding_idx` is initialized to all zeros. However, note that this
+        vector can be modified afterwards, e.g., using a customized
+        initialization method, and thus changing the vector used to pad the
+        output. The gradient for this vector from :class:`~torch.nn.Embedding`
+        is always zero.
+    """
+    __constants__ = ["unk_idx"]
+
+    # Torchscript: Inheriting from Embedding class produces an error when exporting to Torchscript
+    # -> RuntimeError: Unable to cast Python instance to C++ type (compile in debug mode for details
+    # It's happening because max_norm attribute from nn.Embedding is None by default and it cannot be
+    # cast to a C++ type
+    def __init__(
+        self,
+        num_embeddings: int,
+        embedding_dim: int,
+        padding_idx: Optional[int],
+        unk_idx: int,
+        max_norm: Optional[float] = float("inf"),
+    ):
+        super().__init__(num_embeddings, embedding_dim, padding_idx=padding_idx, max_norm=max_norm)
+        self.unk_idx = unk_idx
+        nn.init.normal_(self.weight, mean=0, std=embedding_dim ** -0.5)
+        nn.init.constant_(self.weight[padding_idx], 0)
+
+    def forward(self, input):
+        input = torch.where(
+            input >= self.num_embeddings, torch.ones_like(input) * self.unk_idx, input
+        )
+        return nn.functional.embedding(
+            input, self.weight, self.padding_idx, self.max_norm,
+            self.norm_type, self.scale_grad_by_freq, self.sparse
+        )
+
+
+@register_model_architecture(
+    "transformer_pointer_generator", "transformer_pointer_generator"
+)
+def transformer_pointer_generator(args):
+    args.alignment_heads = getattr(args, "alignment_heads", 1)
+    args.alignment_layer = getattr(args, "alignment_layer", -1)
+    base_architecture(args)
+    if args.alignment_layer < 0:
+        args.alignment_layer = args.decoder_layers + args.alignment_layer
+
+
+@register_model_architecture(
+    "transformer_pointer_generator", "transformer_pointer_generator_iwslt_de_en"
+)
+def transformer_pointer_generator_iwslt_de_en(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    transformer_pointer_generator(args)
+
+
+@register_model_architecture(
+    "transformer_pointer_generator", "transformer_pointer_generator_wmt_en_de"
+)
+def transformer_pointer_generator_wmt_en_de(args):
+    transformer_pointer_generator(args)
+
+
+# Transformer pointer-generator with the base Transformer parameters as used in
+# the "Attention Is All You Need" paper (Vaswani et al., 2017)
+@register_model_architecture(
+    "transformer_pointer_generator",
+    "transformer_pointer_generator_vaswani_wmt_en_de_big",
+)
+def transformer_pointer_generator_vaswani_wmt_en_de_big(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.3)
+    transformer_pointer_generator(args)
+
+
+@register_model_architecture(
+    "transformer_pointer_generator",
+    "transformer_pointer_generator_vaswani_wmt_en_fr_big",
+)
+def transformer_pointer_generator_vaswani_wmt_en_fr_big(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    transformer_pointer_generator_vaswani_wmt_en_de_big(args)
+
+
+@register_model_architecture(
+    "transformer_pointer_generator", "transformer_pointer_generator_wmt_en_de_big"
+)
+def transformer_pointer_generator_wmt_en_de_big(args):
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    transformer_pointer_generator_vaswani_wmt_en_de_big(args)
+
+
+# default parameters used in tensor2tensor implementation
+@register_model_architecture(
+    "transformer_pointer_generator", "transformer_pointer_generator_wmt_en_de_big_t2t"
+)
+def transformer_pointer_generator_wmt_en_de_big_t2t(args):
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.1)
+    transformer_pointer_generator_vaswani_wmt_en_de_big(args)
diff --git a/fairseq/examples/pointer_generator/postprocess.py b/fairseq/examples/pointer_generator/postprocess.py
new file mode 100755
index 0000000000000000000000000000000000000000..b213aed80fd1e3d86f975256fcb7d9d4c16ca857
--- /dev/null
+++ b/fairseq/examples/pointer_generator/postprocess.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import re
+import sys
+
+
+class OOVIndexError(IndexError):
+    def __init__(self, pos, source_seq, target_seq):
+        super(OOVIndexError, self).__init__(
+            "A <unk-N> tag in the target sequence refers to a position that is "
+            "outside the source sequence. Most likely there was a mismatch in "
+            "provided source and target sequences. Otherwise this would mean that "
+            "the pointing mechanism somehow attended to a position that is past "
+            "the actual sequence end."
+        )
+        self.source_pos = pos
+        self.source_seq = source_seq
+        self.target_seq = target_seq
+
+
+def replace_oovs(source_in, target_in, target_out):
+    """Replaces <unk-N> tokens in the target text with the corresponding word in
+    the source text.
+    """
+
+    oov_re = re.compile("^<unk-([0-9]+)>$")
+
+    for source_seq, target_seq in zip(source_in, target_in):
+        target_seq_out = []
+
+        pos_to_word = source_seq.strip().split()
+        for token in target_seq.strip().split():
+            m = oov_re.match(token)
+            if m:
+                pos = int(m.group(1))
+                if pos >= len(pos_to_word):
+                    raise OOVIndexError(pos, source_seq, target_seq)
+                token_out = pos_to_word[pos]
+            else:
+                token_out = token
+            target_seq_out.append(token_out)
+        target_out.write(" ".join(target_seq_out) + "\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Replaces <unk-N> tokens in target sequences with words from "
+        "the corresponding position in the source sequence."
+    )
+    parser.add_argument(
+        "--source", type=str, help="text file with source sequences", required=True
+    )
+    parser.add_argument(
+        "--target", type=str, help="text file with target sequences", required=True
+    )
+    parser.add_argument(
+        "--target-out",
+        type=str,
+        help="where to write target sequences without <unk-N> " "entries",
+        required=True,
+    )
+    args = parser.parse_args()
+
+    target_in = (
+        open(args.target, "r", encoding="utf-8") if args.target is not None else None
+    )
+    target_out = (
+        open(args.target_out, "w", encoding="utf-8")
+        if args.target_out is not None
+        else None
+    )
+    with open(args.source, "r", encoding="utf-8") as source_in, open(
+        args.target, "r", encoding="utf-8"
+    ) as target_in, open(args.target_out, "w", encoding="utf-8") as target_out:
+        replace_oovs(source_in, target_in, target_out)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except OOVIndexError as e:
+        print(e, file=sys.stderr)
+        print("Source sequence:", e.source_seq.strip(), file=sys.stderr)
+        print("Target sequence:", e.target_seq.strip(), file=sys.stderr)
+        print(
+            "Source sequence length:",
+            len(e.source_seq.strip().split()),
+            file=sys.stderr,
+        )
+        print("The offending tag points to:", e.source_pos)
+        sys.exit(2)
diff --git a/fairseq/examples/pointer_generator/preprocess.py b/fairseq/examples/pointer_generator/preprocess.py
new file mode 100755
index 0000000000000000000000000000000000000000..f72ca7d3d97e12ab7b405dcff314bdb6c0a78755
--- /dev/null
+++ b/fairseq/examples/pointer_generator/preprocess.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+from itertools import zip_longest
+
+
+def replace_oovs(source_in, target_in, vocabulary, source_out, target_out):
+    """Replaces out-of-vocabulary words in source and target text with <unk-N>,
+    where N in is the position of the word in the source sequence.
+    """
+
+    def format_unk(pos):
+        return "<unk-{}>".format(pos)
+
+    if target_in is None:
+        target_in = []
+
+    for seq_num, (source_seq, target_seq) in enumerate(
+        zip_longest(source_in, target_in)
+    ):
+        source_seq_out = []
+        target_seq_out = []
+
+        word_to_pos = dict()
+        for position, token in enumerate(source_seq.strip().split()):
+            if token in vocabulary:
+                token_out = token
+            else:
+                if token in word_to_pos:
+                    oov_pos = word_to_pos[token]
+                else:
+                    word_to_pos[token] = position
+                    oov_pos = position
+                token_out = format_unk(oov_pos)
+            source_seq_out.append(token_out)
+        source_out.write(" ".join(source_seq_out) + "\n")
+
+        if target_seq is not None:
+            for token in target_seq.strip().split():
+                if token in word_to_pos:
+                    token_out = format_unk(word_to_pos[token])
+                else:
+                    token_out = token
+                target_seq_out.append(token_out)
+        if target_out is not None:
+            target_out.write(" ".join(target_seq_out) + "\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Replaces out-of-vocabulary words in both source and target "
+        "sequences with tokens that indicate the position of the word "
+        "in the source sequence."
+    )
+    parser.add_argument(
+        "--source", type=str, help="text file with source sequences", required=True
+    )
+    parser.add_argument(
+        "--target", type=str, help="text file with target sequences", default=None
+    )
+    parser.add_argument("--vocab", type=str, help="vocabulary file", required=True)
+    parser.add_argument(
+        "--source-out",
+        type=str,
+        help="where to write source sequences with <unk-N> entries",
+        required=True,
+    )
+    parser.add_argument(
+        "--target-out",
+        type=str,
+        help="where to write target sequences with <unk-N> entries",
+        default=None,
+    )
+    args = parser.parse_args()
+
+    with open(args.vocab, encoding="utf-8") as vocab:
+        vocabulary = vocab.read().splitlines()
+
+    target_in = (
+        open(args.target, "r", encoding="utf-8") if args.target is not None else None
+    )
+    target_out = (
+        open(args.target_out, "w", encoding="utf-8")
+        if args.target_out is not None
+        else None
+    )
+    with open(args.source, "r", encoding="utf-8") as source_in, open(
+        args.source_out, "w", encoding="utf-8"
+    ) as source_out:
+        replace_oovs(source_in, target_in, vocabulary, source_out, target_out)
+    if target_in is not None:
+        target_in.close()
+    if target_out is not None:
+        target_out.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/quant_noise/README.md b/fairseq/examples/quant_noise/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a04d7e4e8a077f11c9f63cfa3d1f20e2b899be8c
--- /dev/null
+++ b/fairseq/examples/quant_noise/README.md
@@ -0,0 +1,298 @@
+# Training with Quantization Noise for Extreme Model Compression ({Fan\*, Stock\*} *et al.*, 2020)
+This page contains information for how to train and quantize models with Quantization Noise, for both scalar quantization like `int8` and Iterative Product Quantization.
+Check out our paper [here](https://arxiv.org/abs/2004.07320).
+
+Looking for pretrained models? They will be added shortly.
+Looking for code to train vision models? We are working on open sourcing our code as part of ClassyVision. Please check back, but note that both the Scalar and Iterative Product Quantization counterparts of the `nn.Conv2d` module are already included in this release.
+
+**Contents**:
+- [Walk through of code](#walk-through-the-code)
+- [Reproduce NLP Results](#looking-to-reproduce-the-nlp-results-in-the-paper)
+- [Reproduce Vision Results](#looking-to-reproduce-the-vision-results-in-the-paper)
+
+
+## Citation
+```bibtex
+@article{fan2020training,
+    title={Training with Quantization Noise for Extreme Model Compression},
+    author={Angela Fan* and Pierre Stock* and and Benjamin Graham and Edouard Grave and Remi Gribonval and Herve Jegou and Armand Joulin},
+    year={2020},
+    eprint={2004.07320},
+    archivePrefix={arXiv},
+    primaryClass={cs.ML}
+}
+```
+
+## Walk through the code
+
+Training a model with Quant-Noise improves the performance in subsequent inference-time quantization by training models to be robust to quantization. This technique is useful for both scalar and product quantization methods, as well as multiple domains. We detail below our approach to train, quantize models and integrate our code to quantize your favorite models.
+
+### Scalar Quantization
+
+Unlike the section [Iterative Product Quantization](#iterative-product-quantization) which gives state-of-the-art compression, this section showcases the usefulness of our approach for simple scalar quantization baselines such as int8 using on-GPU Fake Quantization.
+
+#### Training
+
+Scalar quantization with Quant-Noise consists in randomly quantizing a proportion `p` of the weights during training. Scalar quantization is implemented [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quantization/scalar) under the form of Fake Quantization, meaning that we emulate int8 on GPU by quantizing and de-quantizing both the weights and the activations. We rely on PyTorch's [quantization primitives](https://github.com/pytorch/pytorch/tree/master/torch/quantization).
+
+To train a model with Quant-Noise, add the following flag:
+```
+--quant-noise-scalar 0.5
+```
+Large values of noise make the network easier to quantize but may result in higher non-quantized test and validation perplexities.
+
+#### Quantization
+
+When evaluating a network, all quantized modules and activation hooks automatically switch to `p=1` so the validation accuracy reported by Fairseq is actually the quantized one, nothing more to do.
+
+
+#### Integration with your own code
+
+Looking to quantize your own models with Quant-Noise + Scalar Quantization?
+- Use the function `quantize_model_` implemented [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quantization/scalar/utils.py) to (1) replace all your modules by their quantized counterparts and (2) add hooks to those modules to quantize the activations.
+- Then, perform your training as usual. Note that in `eval()` mode, the network is always fully quantized (weights and activations) by default (`p=1`).
+
+
+
+### Iterative Product Quantization
+
+
+Iterative Product Quantization with Quant-Noise proceeds in two steps. First, a model must be trained uncompressed with Quant-Noise. Second, the model must be quantized with iPQ. Note that we implement here the simplest form of noise, which consists in randomly dropping a proportion `p` of blocks, and that worked as well as assigning those blocks to their current centroid.
+
+#### Training
+
+To train a model with Quant-Noise, add the following flags:
+```
+--quant-noise-pq 0.1 --quant-noise-pq-block-size 8
+```
+`quant-noise-pq` controls how much dropout is applied to the blocks of the weight matrix. `quant-noise-pq-block-size` controls the size of the weight matrix blocks.
+We recommend training with 0.05 to 0.2 Quant-Noise, a value that worked well in our experiments. For the block-size, we recommend training with block-size of 8. Note that the block size must be a multiple of `input_features`, see the size checks [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quant_noise.py). Large block sizes result in higher compression ratio but may induce a loss in accuracy.
+
+We currently support training Transformer based models, such as sequence-to-sequence, language models, and BERT architectures. The `quant_noise` function [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quant_noise.py) wraps a module. It splits a weight matrix into blocks and applies random dropout to these blocks.
+In the Transformer architectures, quant-noise is applied to the input and output embeddings, the attention, and the FFN.
+
+Quant-Noise can also be combined with **LayerDrop** (see [here](https://github.com/pytorch/fairseq/tree/main/examples/layerdrop)) to add its pruning effect to the quantized model and make the model even smaller. We recommend training with LayerDrop 0.1 or 0.2.
+
+#### Quantization
+
+We implement an improved version of product quantization from Stock et al, **iPQ**, described [here](https://arxiv.org/abs/1907.05686), see code with old API [here](https://github.com/facebookresearch/kill-the-bits). Note that we improved the iPQ API in terms of both compute speed and usability as described below.
+
+For the particular case of PQ, quantization is made sequentially. We recommend first quantizing the FFNs, then the EMBs, and finally the ATTNs. Quantization is done in two sub-steps:
+- First, perform `n` steps of Product Quantization (generally `n=20` is enough).
+- Then, finetune the obtained centroids.
+
+#### Integration with your own code
+
+Looking to quantize your own models with Quant-Noise + iPQ?
+- First wrap your modules with the `quant_noise` function [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quant_noise.py), which is module-agnostic and train your favorite model.
+- Then, quantize your trained model using the code [here](https://github.com/pytorch/fairseq/tree/main/fairseq/modules/quantization/pq). This can be done *without any changes to your training loop*. Below is an example code for integration.
+Note that we tried our approach only on Transformers and various Convolutional Models such as EfficientNets.
+
+```python
+from fairseq.modules.quantization.pq import quantize_model_, SizeTracker
+
+# get configuration parameters
+n_centroids_config = config["n_centroids"]
+block_sizes_config = config["block_sizes"]
+layers_to_quantize = config["layers_to_quantize"]
+
+# size tracker for keeping track of assignments, centroids and non-compressed sizes
+size_tracker = SizeTracker(model)
+
+# Quantize model by stages
+for step in range(len(layers_to_quantize)):
+
+    # quantize model in-place
+    quantized_layers = quantize_model_(
+        model,
+        size_tracker,
+        layers_to_quantize,
+        block_sizes_config,
+        n_centroids_config,
+        step=step,
+    )
+    logger.info(f"Finetuning stage {step}, quantized layers: {quantized_layers}")
+    logger.info(f"{size_tracker}")
+
+    # Don't forget to re-create/update trainer/optimizer since model parameters have changed
+    optimizer = ...
+
+    # Finetune the centroids with your usual training loop for a few epochs
+    trainer.train_epoch()
+```
+
+
+## Looking to reproduce the NLP results in the paper?
+
+We detail below how to reproduce the state-of-the-art results in reported in the paper for Quant-Noise + Iterative Product Quantization.
+
+### Training with Quant-Noise
+
+To **train** RoBERTa + QuantNoise, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/roberta).
+The following command can be used to train a RoBERTa Base + QuantNoise model:
+
+```bash
+TOTAL_UPDATES=125000
+WARMUP_UPDATES=10000
+PEAK_LR=0.0005
+TOKENS_PER_SAMPLE=512
+MAX_POSITIONS=512
+MAX_SENTENCES=16
+UPDATE_FREQ=2
+DATA_DIR=/path/to/data/here
+
+fairseq-train $DATA_DIR \
+    --task masked_lm --criterion masked_lm --arch roberta_base \
+    --sample-break-mode complete \
+    --tokens-per-sample $TOKENS_PER_SAMPLE --max-positions $MAX_POSITIONS \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-6 \
+    --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $PEAK_LR \
+    --warmup-updates $WARMUP_UPDATES --total-num-update $TOTAL_UPDATES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.01 \
+    --batch-size $MAX_SENTENCES \
+    --update-freq $UPDATE_FREQ --max-update $TOTAL_UPDATES \
+    --save-dir checkpoint/roberta \
+    --ddp-backend legacy_ddp --encoder-layerdrop 0.2 \
+    --quant-noise-pq 0.2 --quant-noise-pq-block-size 8 --untie-weights-roberta
+```
+
+To **finetune** RoBERTa + QuantNoise, we followed this setting [here](https://github.com/pytorch/fairseq/blob/main/examples/roberta/README.glue.md).
+The following command can be used to finetune a RoBERTa Base + QuantNoise model on the RTE dataset:
+
+```bash
+TOTAL_NUM_UPDATES=2036
+WARMUP_UPDATES=122
+LR=2e-05
+NUM_CLASSES=2
+MAX_SENTENCES=16
+ROBERTA_PATH=/path/to/roberta_quantnoise/model.pt
+
+fairseq-train /path/to/rte/data/ \
+    --restore-file $ROBERTA_PATH \
+    --max-positions 512 \
+    --batch-size $MAX_SENTENCES \
+    --max-tokens 4400 \
+    --task sentence_prediction \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --required-batch-size-multiple 1 \
+    --init-token 0 --separator-token 2 \
+    --arch roberta_large \
+    --criterion sentence_prediction \
+    --num-classes $NUM_CLASSES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \
+    --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --max-epoch 10 \
+    --find-unused-parameters \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+    --ddp-backend legacy_ddp \
+    --quant-noise-pq 0.2 --quant-noise-pq-block-size 8
+```
+
+To **train** Language Models on Wikitext-103, we followed this setting [here](https://github.com/pytorch/fairseq/tree/main/examples/language_model).
+The following command can be used to train a Transformer + QuantNoise model on Wikitext-103:
+
+```bash
+fairseq-train --task language_modeling /path/to/wikitext-103/data \
+    --save-dir checkpoints/transformer_wikitext-103 \
+    --adaptive-input --adaptive-input-cutoff 20000,60000 --adaptive-input-factor 4 \
+    --adaptive-softmax-cutoff 20000,60000 --adaptive-softmax-dropout 0.2 --adaptive-softmax-factor 4.0 \
+    --tie-adaptive-proj --tie-adaptive-weights \
+    --arch transformer_lm_gbw \
+    --attention-dropout 0.1 --dropout 0.2 --relu-dropout 0.1 \
+    --clip-norm 0.1 --criterion adaptive_loss \
+    --ddp-backend legacy_ddp \
+    --decoder-attention-heads 8 --decoder-embed-dim 1024 --decoder-ffn-embed-dim 4096 --decoder-input-dim 1024 \
+    --decoder-layers 16 --decoder-normalize-before --decoder-output-dim 1024 \
+    --min-lr 0.0001 --lr-period-updates 270000 --lr-scheduler cosine --lr-shrink 0.75 --lr 1.0 --t-mult 2.0 \
+    --max-tokens 3072 --tokens-per-sample 3072 --momentum 0.99 --optimizer nag \
+    --sample-break-mode none --update-freq 3 \
+    --warmup-init-lr 1e-07 --warmup-updates 16000 \
+    --weight-decay 0 --seed 1 --stop-min-lr 1e-09 \
+    --quant-noise-pq 0.05 --quant-noise-pq-block-size 8
+```
+
+To **evaluate** this model, note you need to use the `eval.py` script. The following command can be used to evaluate:
+
+```bash
+fairseq-eval-lm /path/to/wikitext-103/data --path /path/to/model/checkpoint \
+    --sample-break-mode complete \
+    --max-tokens 3072 \
+    --context-window 2560 \
+    --softmax-batch 1024 \
+    --gen-subset valid
+```
+and change the `--gen-subset` to `test` if you would like to evaluate on the test set instead.
+
+
+### Iterative Product Quantization
+
+To quantize the finetuned RoBERTa model, we use this command on 1 GPU. This should run in a day.
+```bash
+TOTAL_NUM_UPDATES=6108  # 2036 updates for each iteration
+WARMUP_UPDATES=122
+LR=2e-05
+NUM_CLASSES=2
+MAX_SENTENCES=16
+fairseq-train --task sentence_prediction /path/to/data/ \
+    --restore-file $ROBERTA_PATH \
+    --save-dir checkpoints/roberta_finetuned \
+    --max-positions 512 \
+    --batch-size $MAX_SENTENCES \
+    --max-tokens 4400 \
+    --init-token 0 --separator-token 2 \
+    --arch roberta_large \
+    --criterion sentence_prediction \
+    --num-classes $NUM_CLASSES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \
+    --clip-norm 0.0 --lr-scheduler polynomial_decay \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --no-progress-bar --skip-invalid-size-inputs-valid-test --ddp-backend legacy_ddp \
+    --quantization-config-path /path/to/config/yaml
+```
+
+To quantize the trained Language Model, we use this command on 8 V100 23GB GPUs. This should run in a couple of hours.
+```bash
+fairseq-train --task language_modeling /path/to/wikitext-103/data \
+    --save-dir checkpoints/transformer_wikitext-103 \
+    --adaptive-input --adaptive-input-cutoff 20000,60000 --adaptive-input-factor 4 \
+    --adaptive-softmax-cutoff 20000,60000 --adaptive-softmax-dropout 0.2 --adaptive-softmax-factor 4.0 \
+    --arch transformer_lm_gbw \
+    --attention-dropout 0.1 --dropout 0.2 --relu-dropout 0.1  \
+    --bucket-cap-mb 25 --char-embedder-highway-layers 2 --character-embedding-dim 4 \
+    --clip-norm 0.1 --criterion adaptive_loss \
+    --ddp-backend legacy_ddp \
+    --decoder-attention-heads 8 --decoder-embed-dim 1024 --decoder-ffn-embed-dim 4096 --decoder-input-dim 1024 --decoder-layers 16 --decoder-normalize-before --decoder-output-dim 1024 \
+    --fp16 --keep-last-epochs -1 \
+    --min-lr 0.0001 --lr-period-updates 270000 --lr-scheduler cosine --lr-shrink 0.75 --lr 0.05 --stop-min-lr 1e-09 \
+    --max-tokens 2944  --tokens-per-sample 2944\
+    --momentum 0.99 --no-epoch-checkpoints --no-progress-bar --optimizer nag --required-batch-size-multiple 8 \
+    --sample-break-mode none --t-mult 2.0 --skip-invalid-size-inputs-valid-test \
+    --tie-adaptive-proj --tie-adaptive-weights --update-freq 3 --weight-decay 0 --seed 1  \
+    --log-interval 100 --no-progress-bar --skip-invalid-size-inputs-valid-test \
+    --restore-file path/to/trained/lm/with/quant/noise \
+    --max-update 13500 --quantization-config-path /path/to/config/yaml
+```
+If you have less capacity or if your distributed training freezes, try reducing  `--max-tokens` and  `--tokens-per-sample` (this may reduce the quantized accuracy a bit).
+
+### Remarks
+
+We try to keep the open-sourced code as readable and as easy-to-plug as possible. Therefore, we did not test it for the following cases:
+- Scalar quantization with RoBERTa.
+- Quantization with iPQ and `int8` combined.
+
+If you have trouble adapting it, we will be more than happy to help!
+
+## Looking to reproduce the Vision results in the paper?
+
+We are working on open sourcing our code as part of ClassyVision. Please check back.
+
+
+## Having an issue or have a question?
+
+Please open an issue in this repository with the details of your question. Thanks!
diff --git a/fairseq/examples/quant_noise/transformer_quantization_config.yaml b/fairseq/examples/quant_noise/transformer_quantization_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4be14a93a3593f8e6dc66c3b05061bfdde3e0e0
--- /dev/null
+++ b/fairseq/examples/quant_noise/transformer_quantization_config.yaml
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This file defines example configuration arguments for quantizing
+# a transformer model with product quantization
+
+# Number of Centroids for Product Quantization, by default 256 (byte-aligned)
+n_centroids:
+    Linear:
+        key: in_features
+        value: {"*": 256}
+    Embedding:
+        key: embedding_dim
+        value: {"*": 256}
+
+# Block Sizes for Product Quantization
+# We suggest: 8 for FFN, 4 for ATTN, 4 for embedding projections, 8 for embeddings
+block_sizes:
+  Linear:
+      key: fuzzy_name
+      value: {fc: 8, attn: 4, emb: 4}
+  Embedding:
+      key: fuzzy_name
+      value: {emb: 8}
+
+# Layers to Quantize Sequentially
+# We suggest: first FFN, then EMB, then ATTN
+layers_to_quantize:
+    - decoder\\.layers\\.\d+\\.fc[12]
+    - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
+    - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)
diff --git a/fairseq/examples/roberta/README.custom_classification.md b/fairseq/examples/roberta/README.custom_classification.md
new file mode 100644
index 0000000000000000000000000000000000000000..7254bb7d178760ef5b847901bbcac3711af33ca2
--- /dev/null
+++ b/fairseq/examples/roberta/README.custom_classification.md
@@ -0,0 +1,168 @@
+# Finetuning RoBERTa on a custom classification task
+
+This example shows how to finetune RoBERTa on the IMDB dataset, but should illustrate the process for most classification tasks.
+
+### 1) Get the data
+
+```bash
+wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
+tar zxvf aclImdb_v1.tar.gz
+```
+
+
+### 2) Format data
+
+`IMDB` data has one data-sample in each file, below python code-snippet converts it one file for train and valid each for ease of processing.  
+```python
+import argparse
+import os
+import random
+from glob import glob
+
+random.seed(0)
+
+def main(args):
+    for split in ['train', 'test']:
+        samples = []
+        for class_label in ['pos', 'neg']:
+            fnames = glob(os.path.join(args.datadir, split, class_label) + '/*.txt')
+            for fname in fnames:
+                with open(fname) as fin:
+                    line = fin.readline()
+                    samples.append((line, 1 if class_label == 'pos' else 0))
+        random.shuffle(samples)
+        out_fname = 'train' if split == 'train' else 'dev'
+        f1 = open(os.path.join(args.datadir, out_fname + '.input0'), 'w')
+        f2 = open(os.path.join(args.datadir, out_fname + '.label'), 'w')
+        for sample in samples:
+            f1.write(sample[0] + '\n')
+            f2.write(str(sample[1]) + '\n')
+        f1.close()
+        f2.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--datadir', default='aclImdb')
+    args = parser.parse_args()
+    main(args)
+```
+
+
+### 3) BPE encode
+
+Run `multiprocessing_bpe_encoder`, you can also do this in previous step for each sample but that might be slower.
+```bash
+# Download encoder.json and vocab.bpe
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
+
+for SPLIT in train dev; do
+    python -m examples.roberta.multiprocessing_bpe_encoder \
+        --encoder-json encoder.json \
+        --vocab-bpe vocab.bpe \
+        --inputs "aclImdb/$SPLIT.input0" \
+        --outputs "aclImdb/$SPLIT.input0.bpe" \
+        --workers 60 \
+        --keep-empty
+done
+```
+
+
+### 4) Preprocess data
+
+```bash
+# Download fairseq dictionary.
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt'  
+
+fairseq-preprocess \
+    --only-source \
+    --trainpref "aclImdb/train.input0.bpe" \
+    --validpref "aclImdb/dev.input0.bpe" \
+    --destdir "IMDB-bin/input0" \
+    --workers 60 \
+    --srcdict dict.txt
+
+fairseq-preprocess \
+    --only-source \
+    --trainpref "aclImdb/train.label" \
+    --validpref "aclImdb/dev.label" \
+    --destdir "IMDB-bin/label" \
+    --workers 60
+
+```
+
+
+### 5) Run training
+
+```bash
+TOTAL_NUM_UPDATES=7812  # 10 epochs through IMDB for bsz 32
+WARMUP_UPDATES=469      # 6 percent of the number of updates
+LR=1e-05                # Peak LR for polynomial LR scheduler.
+HEAD_NAME=imdb_head     # Custom name for the classification head.
+NUM_CLASSES=2           # Number of classes for the classification task.
+MAX_SENTENCES=8         # Batch size.
+ROBERTA_PATH=/path/to/roberta.large/model.pt
+
+CUDA_VISIBLE_DEVICES=0 fairseq-train IMDB-bin/ \
+    --restore-file $ROBERTA_PATH \
+    --max-positions 512 \
+    --batch-size $MAX_SENTENCES \
+    --max-tokens 4400 \
+    --task sentence_prediction \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --required-batch-size-multiple 1 \
+    --init-token 0 --separator-token 2 \
+    --arch roberta_large \
+    --criterion sentence_prediction \
+    --classification-head-name $HEAD_NAME \
+    --num-classes $NUM_CLASSES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \
+    --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --max-epoch 10 \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+    --shorten-method "truncate" \
+    --find-unused-parameters \
+    --update-freq 4
+```
+
+The above command will finetune RoBERTa-large with an effective batch-size of 32
+sentences (`--batch-size=8 --update-freq=4`). The expected
+`best-validation-accuracy` after 10 epochs is ~96.5%.
+
+If you run out of GPU memory, try decreasing `--batch-size` and increase
+`--update-freq` to compensate.
+
+
+### 6) Load model using hub interface
+
+Now we can load the trained model checkpoint using the RoBERTa hub interface.
+
+Assuming your checkpoints are stored in `checkpoints/`:
+```python
+from fairseq.models.roberta import RobertaModel
+roberta = RobertaModel.from_pretrained(
+    'checkpoints',
+    checkpoint_file='checkpoint_best.pt',
+    data_name_or_path='IMDB-bin'
+)
+roberta.eval()  # disable dropout
+```
+
+Finally you can make predictions using the `imdb_head` (or whatever you set
+`--classification-head-name` to during training):
+```python
+label_fn = lambda label: roberta.task.label_dictionary.string(
+    [label + roberta.task.label_dictionary.nspecial]
+)
+
+tokens = roberta.encode('Best movie this year')
+pred = label_fn(roberta.predict('imdb_head', tokens).argmax().item())
+assert pred == '1'  # positive
+
+tokens = roberta.encode('Worst movie ever')
+pred = label_fn(roberta.predict('imdb_head', tokens).argmax().item())
+assert pred == '0'  # negative
+```
diff --git a/fairseq/examples/roberta/README.glue.md b/fairseq/examples/roberta/README.glue.md
new file mode 100644
index 0000000000000000000000000000000000000000..4f596d55af99fba3cdf58b1d5ff3d8f8dbf4383d
--- /dev/null
+++ b/fairseq/examples/roberta/README.glue.md
@@ -0,0 +1,64 @@
+# Finetuning RoBERTa on GLUE tasks
+
+### 1) Download the data from GLUE website (https://gluebenchmark.com/tasks) using following commands:
+```bash
+wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py
+python download_glue_data.py --data_dir glue_data --tasks all
+```
+
+### 2) Preprocess GLUE task data:
+```bash
+./examples/roberta/preprocess_GLUE_tasks.sh glue_data <glue_task_name>
+```
+`glue_task_name` is one of the following:
+`{ALL, QQP, MNLI, QNLI, MRPC, RTE, STS-B, SST-2, CoLA}`
+Use `ALL` for preprocessing all the glue tasks.
+
+### 3) Fine-tuning on GLUE task:
+Example fine-tuning cmd for `RTE` task
+```bash
+ROBERTA_PATH=/path/to/roberta/model.pt
+
+CUDA_VISIBLE_DEVICES=0 fairseq-hydra-train -config-dir examples/roberta/config/finetuning --config-name rte \
+task.data=RTE-bin checkpoint.restore_file=$ROBERTA_PATH
+```
+
+There are additional config files for each of the GLUE tasks in the examples/roberta/config/finetuning directory.
+
+**Note:**
+
+a) Above cmd-args and hyperparams are tested on one Nvidia `V100` GPU with `32gb` of memory for each task. Depending on the GPU memory resources available to you, you can use increase `--update-freq` and reduce `--batch-size`.
+
+b) All the settings in above table are suggested settings based on our hyperparam search within a fixed search space (for careful comparison across models). You might be able to find better metrics with wider hyperparam search.
+
+### Inference on GLUE task
+After training the model as mentioned in previous step, you can perform inference with checkpoints in `checkpoints/` directory using following python code snippet:
+
+```python
+from fairseq.models.roberta import RobertaModel
+
+roberta = RobertaModel.from_pretrained(
+    'checkpoints/',
+    checkpoint_file='checkpoint_best.pt',
+    data_name_or_path='RTE-bin'
+)
+
+label_fn = lambda label: roberta.task.label_dictionary.string(
+    [label + roberta.task.label_dictionary.nspecial]
+)
+ncorrect, nsamples = 0, 0
+roberta.cuda()
+roberta.eval()
+with open('glue_data/RTE/dev.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[1], tokens[2], tokens[3]
+        tokens = roberta.encode(sent1, sent2)
+        prediction = roberta.predict('sentence_classification_head', tokens).argmax().item()
+        prediction_label = label_fn(prediction)
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+print('| Accuracy: ', float(ncorrect)/float(nsamples))
+
+```
diff --git a/fairseq/examples/roberta/README.md b/fairseq/examples/roberta/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed4d5df52ccea01216276054a1f253d0d16c0409
--- /dev/null
+++ b/fairseq/examples/roberta/README.md
@@ -0,0 +1,296 @@
+# RoBERTa: A Robustly Optimized BERT Pretraining Approach
+
+https://arxiv.org/abs/1907.11692
+
+## Introduction
+
+RoBERTa iterates on BERT's pretraining procedure, including training the model longer, with bigger batches over more data; removing the next sentence prediction objective; training on longer sequences; and dynamically changing the masking pattern applied to the training data. See the associated paper for more details.
+
+### What's New:
+
+- December 2020: German model (GottBERT) is available: [GottBERT](https://github.com/pytorch/fairseq/tree/main/examples/gottbert).
+- January 2020: Italian model (UmBERTo) is available from Musixmatch Research: [UmBERTo](https://github.com/musixmatchresearch/umberto).
+- November 2019: French model (CamemBERT) is available: [CamemBERT](https://github.com/pytorch/fairseq/tree/main/examples/camembert).
+- November 2019: Multilingual encoder (XLM-RoBERTa) is available: [XLM-R](https://github.com/pytorch/fairseq/tree/main/examples/xlmr).
+- September 2019: TensorFlow and TPU support via the [transformers library](https://github.com/huggingface/transformers).
+- August 2019: RoBERTa is now supported in the [pytorch-transformers library](https://github.com/huggingface/pytorch-transformers).
+- August 2019: Added [tutorial for finetuning on WinoGrande](https://github.com/pytorch/fairseq/tree/main/examples/roberta/wsc#roberta-training-on-winogrande-dataset).
+- August 2019: Added [tutorial for pretraining RoBERTa using your own data](README.pretraining.md).
+
+## Pre-trained models
+
+Model | Description | # params | Download
+---|---|---|---
+`roberta.base` | RoBERTa using the BERT-base architecture | 125M | [roberta.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz)
+`roberta.large` | RoBERTa using the BERT-large architecture | 355M | [roberta.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz)
+`roberta.large.mnli` | `roberta.large` finetuned on [MNLI](http://www.nyu.edu/projects/bowman/multinli) | 355M | [roberta.large.mnli.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.mnli.tar.gz)
+`roberta.large.wsc` | `roberta.large` finetuned on [WSC](wsc/README.md) | 355M | [roberta.large.wsc.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.wsc.tar.gz)
+
+## Results
+
+**[GLUE (Wang et al., 2019)](https://gluebenchmark.com/)**
+_(dev set, single model, single-task finetuning)_
+
+Model | MNLI | QNLI | QQP | RTE | SST-2 | MRPC | CoLA | STS-B
+---|---|---|---|---|---|---|---|---
+`roberta.base` | 87.6 | 92.8 | 91.9 | 78.7 | 94.8 | 90.2 | 63.6 | 91.2
+`roberta.large` | 90.2 | 94.7 | 92.2 | 86.6 | 96.4 | 90.9 | 68.0 | 92.4
+`roberta.large.mnli` | 90.2 | - | - | - | - | - | - | -
+
+**[SuperGLUE (Wang et al., 2019)](https://super.gluebenchmark.com/)**
+_(dev set, single model, single-task finetuning)_
+
+Model | BoolQ | CB | COPA | MultiRC | RTE | WiC | WSC
+---|---|---|---|---|---|---|---
+`roberta.large` | 86.9 | 98.2 | 94.0 | 85.7 | 89.5 | 75.6 | -
+`roberta.large.wsc` | - | - | - | - | - | - | 91.3
+
+**[SQuAD (Rajpurkar et al., 2018)](https://rajpurkar.github.io/SQuAD-explorer/)**
+_(dev set, no additional data used)_
+
+Model | SQuAD 1.1 EM/F1 | SQuAD 2.0 EM/F1
+---|---|---
+`roberta.large` | 88.9/94.6 | 86.5/89.4
+
+**[RACE (Lai et al., 2017)](http://www.qizhexie.com/data/RACE_leaderboard.html)**
+_(test set)_
+
+Model | Accuracy | Middle | High
+---|---|---|---
+`roberta.large` | 83.2 | 86.5 | 81.3
+
+**[HellaSwag (Zellers et al., 2019)](https://rowanzellers.com/hellaswag/)**
+_(test set)_
+
+Model | Overall | In-domain | Zero-shot | ActivityNet | WikiHow
+---|---|---|---|---|---
+`roberta.large` | 85.2 | 87.3 | 83.1 | 74.6 | 90.9
+
+**[Commonsense QA (Talmor et al., 2019)](https://www.tau-nlp.org/commonsenseqa)**
+_(test set)_
+
+Model | Accuracy
+---|---
+`roberta.large` (single model) | 72.1
+`roberta.large` (ensemble) | 72.5
+
+**[Winogrande (Sakaguchi et al., 2019)](https://arxiv.org/abs/1907.10641)**
+_(test set)_
+
+Model | Accuracy
+---|---
+`roberta.large` | 78.1
+
+**[XNLI (Conneau et al., 2018)](https://arxiv.org/abs/1809.05053)**
+_(TRANSLATE-TEST)_
+
+Model | en | fr | es | de | el | bg | ru | tr | ar | vi | th | zh | hi | sw | ur
+---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---
+`roberta.large.mnli` | 91.3 | 82.91 | 84.27 | 81.24 | 81.74 | 83.13 | 78.28 | 76.79 | 76.64 | 74.17 | 74.05 | 77.5 | 70.9 | 66.65 | 66.81
+
+## Example usage
+
+##### Load RoBERTa from torch.hub (PyTorch >= 1.1):
+```python
+import torch
+roberta = torch.hub.load('pytorch/fairseq', 'roberta.large')
+roberta.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Load RoBERTa (for PyTorch 1.0 or custom models):
+```python
+# Download roberta.large model
+wget https://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz
+tar -xzvf roberta.large.tar.gz
+
+# Load the model in fairseq
+from fairseq.models.roberta import RobertaModel
+roberta = RobertaModel.from_pretrained('/path/to/roberta.large', checkpoint_file='model.pt')
+roberta.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Apply Byte-Pair Encoding (BPE) to input text:
+```python
+tokens = roberta.encode('Hello world!')
+assert tokens.tolist() == [0, 31414, 232, 328, 2]
+roberta.decode(tokens)  # 'Hello world!'
+```
+
+##### Extract features from RoBERTa:
+```python
+# Extract the last layer's features
+last_layer_features = roberta.extract_features(tokens)
+assert last_layer_features.size() == torch.Size([1, 5, 1024])
+
+# Extract all layer's features (layer 0 is the embedding layer)
+all_layers = roberta.extract_features(tokens, return_all_hiddens=True)
+assert len(all_layers) == 25
+assert torch.all(all_layers[-1] == last_layer_features)
+```
+
+##### Use RoBERTa for sentence-pair classification tasks:
+```python
+# Download RoBERTa already finetuned for MNLI
+roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
+roberta.eval()  # disable dropout for evaluation
+
+# Encode a pair of sentences and make a prediction
+tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.')
+roberta.predict('mnli', tokens).argmax()  # 0: contradiction
+
+# Encode another pair of sentences
+tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.')
+roberta.predict('mnli', tokens).argmax()  # 2: entailment
+```
+
+##### Register a new (randomly initialized) classification head:
+```python
+roberta.register_classification_head('new_task', num_classes=3)
+logprobs = roberta.predict('new_task', tokens)  # tensor([[-1.1050, -1.0672, -1.1245]], grad_fn=<LogSoftmaxBackward>)
+```
+
+##### Batched prediction:
+```python
+import torch
+from fairseq.data.data_utils import collate_tokens
+
+roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
+roberta.eval()
+
+batch_of_pairs = [
+    ['Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.'],
+    ['Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.'],
+    ['potatoes are awesome.', 'I like to run.'],
+    ['Mars is very far from earth.', 'Mars is very close.'],
+]
+
+batch = collate_tokens(
+    [roberta.encode(pair[0], pair[1]) for pair in batch_of_pairs], pad_idx=1
+)
+
+logprobs = roberta.predict('mnli', batch)
+print(logprobs.argmax(dim=1))
+# tensor([0, 2, 1, 0])
+```
+
+##### Using the GPU:
+```python
+roberta.cuda()
+roberta.predict('new_task', tokens)  # tensor([[-1.1050, -1.0672, -1.1245]], device='cuda:0', grad_fn=<LogSoftmaxBackward>)
+```
+
+## Advanced usage
+
+#### Filling masks:
+
+RoBERTa can be used to fill `<mask>` tokens in the input. Some examples from the
+[Natural Questions dataset](https://ai.google.com/research/NaturalQuestions/):
+```python
+roberta.fill_mask('The first Star wars movie came out in <mask>', topk=3)
+# [('The first Star wars movie came out in 1977', 0.9504708051681519, ' 1977'), ('The first Star wars movie came out in 1978', 0.009986862540245056, ' 1978'), ('The first Star wars movie came out in 1979', 0.009574787691235542, ' 1979')]
+
+roberta.fill_mask('Vikram samvat calender is official in <mask>', topk=3)
+# [('Vikram samvat calender is official in India', 0.21878819167613983, ' India'), ('Vikram samvat calender is official in Delhi', 0.08547237515449524, ' Delhi'), ('Vikram samvat calender is official in Gujarat', 0.07556215673685074, ' Gujarat')]
+
+roberta.fill_mask('<mask> is the common currency of the European Union', topk=3)
+# [('Euro is the common currency of the European Union', 0.9456493854522705, 'Euro'), ('euro is the common currency of the European Union', 0.025748178362846375, 'euro'), ('€ is the common currency of the European Union', 0.011183084920048714, '€')]
+```
+
+#### Pronoun disambiguation (Winograd Schema Challenge):
+
+RoBERTa can be used to disambiguate pronouns. First install spaCy and download the English-language model:
+```bash
+pip install spacy
+python -m spacy download en_core_web_lg
+```
+
+Next load the `roberta.large.wsc` model and call the `disambiguate_pronoun`
+function. The pronoun should be surrounded by square brackets (`[]`) and the
+query referent surrounded by underscores (`_`), or left blank to return the
+predicted candidate text directly:
+```python
+roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.wsc', user_dir='examples/roberta/wsc')
+roberta.cuda()  # use the GPU (optional)
+
+roberta.disambiguate_pronoun('The _trophy_ would not fit in the brown suitcase because [it] was too big.')
+# True
+roberta.disambiguate_pronoun('The trophy would not fit in the brown _suitcase_ because [it] was too big.')
+# False
+
+roberta.disambiguate_pronoun('The city councilmen refused the demonstrators a permit because [they] feared violence.')
+# 'The city councilmen'
+roberta.disambiguate_pronoun('The city councilmen refused the demonstrators a permit because [they] advocated violence.')
+# 'demonstrators'
+```
+
+See the [RoBERTA Winograd Schema Challenge (WSC) README](wsc/README.md) for more details on how to train this model.
+
+#### Extract features aligned to words:
+
+By default RoBERTa outputs one feature vector per BPE token. You can instead
+realign the features to match [spaCy's word-level tokenization](https://spacy.io/usage/linguistic-features#tokenization)
+with the `extract_features_aligned_to_words` method. This will compute a
+weighted average of the BPE-level features for each word and expose them in
+spaCy's `Token.vector` attribute:
+```python
+doc = roberta.extract_features_aligned_to_words('I said, "hello RoBERTa."')
+assert len(doc) == 10
+for tok in doc:
+    print('{:10}{} (...)'.format(str(tok), tok.vector[:5]))
+# <s>       tensor([-0.1316, -0.0386, -0.0832, -0.0477,  0.1943], grad_fn=<SliceBackward>) (...)
+# I         tensor([ 0.0559,  0.1541, -0.4832,  0.0880,  0.0120], grad_fn=<SliceBackward>) (...)
+# said      tensor([-0.1565, -0.0069, -0.8915,  0.0501, -0.0647], grad_fn=<SliceBackward>) (...)
+# ,         tensor([-0.1318, -0.0387, -0.0834, -0.0477,  0.1944], grad_fn=<SliceBackward>) (...)
+# "         tensor([-0.0486,  0.1818, -0.3946, -0.0553,  0.0981], grad_fn=<SliceBackward>) (...)
+# hello     tensor([ 0.0079,  0.1799, -0.6204, -0.0777, -0.0923], grad_fn=<SliceBackward>) (...)
+# RoBERTa   tensor([-0.2339, -0.1184, -0.7343, -0.0492,  0.5829], grad_fn=<SliceBackward>) (...)
+# .         tensor([-0.1341, -0.1203, -0.1012, -0.0621,  0.1892], grad_fn=<SliceBackward>) (...)
+# "         tensor([-0.1341, -0.1203, -0.1012, -0.0621,  0.1892], grad_fn=<SliceBackward>) (...)
+# </s>      tensor([-0.0930, -0.0392, -0.0821,  0.0158,  0.0649], grad_fn=<SliceBackward>) (...)
+```
+
+#### Evaluating the `roberta.large.mnli` model:
+
+Example python code snippet to evaluate accuracy on the MNLI `dev_matched` set.
+```python
+label_map = {0: 'contradiction', 1: 'neutral', 2: 'entailment'}
+ncorrect, nsamples = 0, 0
+roberta.cuda()
+roberta.eval()
+with open('glue_data/MNLI/dev_matched.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[8], tokens[9], tokens[-1]
+        tokens = roberta.encode(sent1, sent2)
+        prediction = roberta.predict('mnli', tokens).argmax().item()
+        prediction_label = label_map[prediction]
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+print('| Accuracy: ', float(ncorrect)/float(nsamples))
+# Expected output: 0.9060
+```
+
+## Finetuning
+
+- [Finetuning on GLUE](README.glue.md)
+- [Finetuning on custom classification tasks (e.g., IMDB)](README.custom_classification.md)
+- [Finetuning on Winograd Schema Challenge (WSC)](wsc/README.md)
+- [Finetuning on Commonsense QA (CQA)](commonsense_qa/README.md)
+
+## Pretraining using your own data
+
+See the [tutorial for pretraining RoBERTa using your own data](README.pretraining.md).
+
+## Citation
+
+```bibtex
+@article{liu2019roberta,
+    title = {RoBERTa: A Robustly Optimized BERT Pretraining Approach},
+    author = {Yinhan Liu and Myle Ott and Naman Goyal and Jingfei Du and
+              Mandar Joshi and Danqi Chen and Omer Levy and Mike Lewis and
+              Luke Zettlemoyer and Veselin Stoyanov},
+    journal={arXiv preprint arXiv:1907.11692},
+    year = {2019},
+}
+```
diff --git a/fairseq/examples/roberta/README.pretraining.md b/fairseq/examples/roberta/README.pretraining.md
new file mode 100644
index 0000000000000000000000000000000000000000..a4e7453529111fdd198be637d911d1764cb96c0e
--- /dev/null
+++ b/fairseq/examples/roberta/README.pretraining.md
@@ -0,0 +1,84 @@
+# Pretraining RoBERTa using your own data
+
+This tutorial will walk you through pretraining RoBERTa over your own data.
+
+### 1) Preprocess the data
+
+Data should be preprocessed following the [language modeling format](/examples/language_model), i.e. each document should be separated by an empty line (only useful with `--sample-break-mode complete_doc`). Lines will be concatenated as a 1D text stream during training.
+
+We'll use the [WikiText-103 dataset](https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/)
+to demonstrate how to preprocess raw text data with the GPT-2 BPE. Of course
+this dataset is quite small, so the resulting pretrained model will perform
+poorly, but it gives the general idea.
+
+First download the dataset:
+```bash
+wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip
+unzip wikitext-103-raw-v1.zip
+```
+
+Next encode it with the GPT-2 BPE:
+```bash
+mkdir -p gpt2_bpe
+wget -O gpt2_bpe/encoder.json https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json
+wget -O gpt2_bpe/vocab.bpe https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe
+for SPLIT in train valid test; do \
+    python -m examples.roberta.multiprocessing_bpe_encoder \
+        --encoder-json gpt2_bpe/encoder.json \
+        --vocab-bpe gpt2_bpe/vocab.bpe \
+        --inputs wikitext-103-raw/wiki.${SPLIT}.raw \
+        --outputs wikitext-103-raw/wiki.${SPLIT}.bpe \
+        --keep-empty \
+        --workers 60; \
+done
+```
+
+Finally preprocess/binarize the data using the GPT-2 fairseq dictionary:
+```bash
+wget -O gpt2_bpe/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt
+fairseq-preprocess \
+    --only-source \
+    --srcdict gpt2_bpe/dict.txt \
+    --trainpref wikitext-103-raw/wiki.train.bpe \
+    --validpref wikitext-103-raw/wiki.valid.bpe \
+    --testpref wikitext-103-raw/wiki.test.bpe \
+    --destdir data-bin/wikitext-103 \
+    --workers 60
+```
+
+### 2) Train RoBERTa base
+```bash
+DATA_DIR=data-bin/wikitext-103
+
+fairseq-hydra-train -m --config-dir examples/roberta/config/pretraining \
+--config-name base task.data=$DATA_DIR
+```
+
+**Note:** You can optionally resume training the released RoBERTa base model by
+adding `checkpoint.restore_file=/path/to/roberta.base/model.pt`.
+
+**Note:** The above command assumes training on 8x32GB V100 GPUs. Each GPU uses
+a batch size of 16 sequences (`dataset.batch_size`) and accumulates gradients to
+further increase the batch size by 16x (`optimization.update_freq`), for a total batch size
+of 2048 sequences. If you have fewer GPUs or GPUs with less memory you may need
+to reduce `dataset.batch_size` and increase dataset.update_freq to compensate.
+Alternatively if you have more GPUs you can decrease `dataset.update_freq` accordingly
+to increase training speed.
+
+**Note:** The learning rate and batch size are tightly connected and need to be
+adjusted together. We generally recommend increasing the learning rate as you
+increase the batch size according to the following table (although it's also
+dataset dependent, so don't rely on the following values too closely):
+
+batch size | peak learning rate
+---|---
+256 | 0.0001
+2048 | 0.0005
+8192 | 0.0007
+
+### 3) Load your pretrained model
+```python
+from fairseq.models.roberta import RobertaModel
+roberta = RobertaModel.from_pretrained('checkpoints', 'checkpoint_best.pt', 'path/to/data')
+assert isinstance(roberta.model, torch.nn.Module)
+```
diff --git a/fairseq/examples/roberta/README.race.md b/fairseq/examples/roberta/README.race.md
new file mode 100644
index 0000000000000000000000000000000000000000..13c917e8eca6621e91dce541c7e41436b38cbdc1
--- /dev/null
+++ b/fairseq/examples/roberta/README.race.md
@@ -0,0 +1,68 @@
+# Finetuning RoBERTa on RACE tasks
+
+### 1) Download the data from RACE website (http://www.cs.cmu.edu/~glai1/data/race/)
+
+### 2) Preprocess RACE data:
+```bash
+python ./examples/roberta/preprocess_RACE.py --input-dir <input-dir> --output-dir <extracted-data-dir>
+./examples/roberta/preprocess_RACE.sh <extracted-data-dir> <output-dir>
+```
+
+### 3) Fine-tuning on RACE:
+
+```bash
+MAX_EPOCH=5           # Number of training epochs.
+LR=1e-05              # Peak LR for fixed LR scheduler.
+NUM_CLASSES=4
+MAX_SENTENCES=1       # Batch size per GPU.
+UPDATE_FREQ=8         # Accumulate gradients to simulate training on 8 GPUs.
+DATA_DIR=/path/to/race-output-dir
+ROBERTA_PATH=/path/to/roberta/model.pt
+
+CUDA_VISIBLE_DEVICES=0,1 fairseq-train $DATA_DIR --ddp-backend=legacy_ddp \
+    --restore-file $ROBERTA_PATH \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+    --task sentence_ranking \
+    --num-classes $NUM_CLASSES \
+    --init-token 0 --separator-token 2 \
+    --max-option-length 128 \
+    --max-positions 512 \
+    --shorten-method "truncate" \
+    --arch roberta_large \
+    --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \
+    --criterion sentence_ranking \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \
+    --clip-norm 0.0 \
+    --lr-scheduler fixed --lr $LR \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --batch-size $MAX_SENTENCES \
+    --required-batch-size-multiple 1 \
+    --update-freq $UPDATE_FREQ \
+    --max-epoch $MAX_EPOCH
+```
+
+**Note:**
+
+a) As contexts in RACE are relatively long, we are using smaller batch size per GPU while increasing update-freq to achieve larger effective batch size.
+
+b) Above cmd-args and hyperparams are tested on one Nvidia `V100` GPU with `32gb` of memory for each task. Depending on the GPU memory resources available to you, you can use increase `--update-freq` and reduce `--batch-size`.
+
+c) The setting in above command is based on our hyperparam search within a fixed search space (for careful comparison across models). You might be able to find better metrics with wider hyperparam search.  
+
+### 4) Evaluation:
+
+```
+DATA_DIR=/path/to/race-output-dir       # data directory used during training
+MODEL_PATH=/path/to/checkpoint_best.pt  # path to the finetuned model checkpoint
+PREDS_OUT=preds.tsv                     # output file path to save prediction
+TEST_SPLIT=test                         # can be test (Middle) or test1 (High)
+fairseq-validate \
+    $DATA_DIR \
+    --valid-subset $TEST_SPLIT \
+    --path $MODEL_PATH \
+    --batch-size 1 \
+    --task sentence_ranking \
+    --criterion sentence_ranking \
+    --save-predictions $PREDS_OUT
+```
diff --git a/fairseq/examples/roberta/commonsense_qa/README.md b/fairseq/examples/roberta/commonsense_qa/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7f386decd87d93bf701e2e313c7fea39d982224f
--- /dev/null
+++ b/fairseq/examples/roberta/commonsense_qa/README.md
@@ -0,0 +1,99 @@
+# Finetuning RoBERTa on Commonsense QA
+
+We follow a similar approach to [finetuning RACE](../README.race.md). Specifically
+for each question we construct five inputs, one for each of the five candidate
+answer choices. Each input is constructed by concatenating the question and
+candidate answer. We then encode each input and pass the resulting "[CLS]"
+representations through a fully-connected layer to predict the correct answer.
+We train with a standard cross-entropy loss.
+
+We also found it helpful to prepend a prefix of `Q:` to the question and `A:` to
+the answer. The complete input format is:
+```
+<s> Q: Where would I not want a fox? </s> A: hen house </s>
+```
+
+Our final submission is based on a hyperparameter search over the learning rate
+(1e-5, 2e-5, 3e-5), batch size (8, 16), number of training steps (2000, 3000,
+4000) and random seed. We selected the model with the best performance on the
+development set after 100 trials.
+
+### 1) Download data from the Commonsense QA website (https://www.tau-nlp.org/commonsenseqa)
+```bash
+bash examples/roberta/commonsense_qa/download_cqa_data.sh
+```
+
+### 2) Finetune
+
+```bash
+MAX_UPDATES=3000      # Number of training steps.
+WARMUP_UPDATES=150    # Linearly increase LR over this many steps.
+LR=1e-05              # Peak LR for polynomial LR scheduler.
+MAX_SENTENCES=16      # Batch size.
+SEED=1                # Random seed.
+ROBERTA_PATH=/path/to/roberta/model.pt
+DATA_DIR=data/CommonsenseQA
+
+# we use the --user-dir option to load the task from
+# the examples/roberta/commonsense_qa directory:
+FAIRSEQ_PATH=/path/to/fairseq
+FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/commonsense_qa
+
+CUDA_VISIBLE_DEVICES=0 fairseq-train --fp16 --ddp-backend=legacy_ddp \
+    $DATA_DIR \
+    --user-dir $FAIRSEQ_USER_DIR \
+    --restore-file $ROBERTA_PATH \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+    --task commonsense_qa --init-token 0 --bpe gpt2 \
+    --arch roberta_large --max-positions 512 \
+    --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \
+    --criterion sentence_ranking --num-classes 5 \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $LR \
+    --warmup-updates $WARMUP_UPDATES --total-num-update $MAX_UPDATES \
+    --batch-size $MAX_SENTENCES \
+    --max-update $MAX_UPDATES \
+    --log-format simple --log-interval 25 \
+    --seed $SEED
+```
+
+The above command assumes training on 1 GPU with 32GB of RAM. For GPUs with
+less memory, decrease `--batch-size` and increase `--update-freq`
+accordingly to compensate.
+
+### 3) Evaluate
+```python
+import json
+import torch
+from fairseq.models.roberta import RobertaModel
+from examples.roberta import commonsense_qa  # load the Commonsense QA task
+roberta = RobertaModel.from_pretrained('checkpoints', 'checkpoint_best.pt', 'data/CommonsenseQA')
+roberta.eval()  # disable dropout
+roberta.cuda()  # use the GPU (optional)
+nsamples, ncorrect = 0, 0
+with open('data/CommonsenseQA/valid.jsonl') as h:
+    for line in h:
+        example = json.loads(line)
+        scores = []
+        for choice in example['question']['choices']:
+            input = roberta.encode(
+                'Q: ' + example['question']['stem'],
+                'A: ' + choice['text'],
+                no_separator=True
+            )
+            score = roberta.predict('sentence_classification_head', input, return_logits=True)
+            scores.append(score)
+        pred = torch.cat(scores).argmax()
+        answer = ord(example['answerKey']) - ord('A')
+        nsamples += 1
+        if pred == answer:
+            ncorrect += 1
+
+print('Accuracy: ' + str(ncorrect / float(nsamples)))
+# Accuracy: 0.7846027846027847
+```
+
+The above snippet is not batched, which makes it quite slow. See [instructions
+for batched prediction with RoBERTa](https://github.com/pytorch/fairseq/tree/main/examples/roberta#batched-prediction).
diff --git a/fairseq/examples/roberta/commonsense_qa/__init__.py b/fairseq/examples/roberta/commonsense_qa/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..42d21f35eb3dd33a053dcf0edd5eadd2dff11294
--- /dev/null
+++ b/fairseq/examples/roberta/commonsense_qa/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import commonsense_qa_task  # noqa
diff --git a/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py b/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..216093f7087a61060767babf5a3f3f4e716a4dfe
--- /dev/null
+++ b/fairseq/examples/roberta/commonsense_qa/commonsense_qa_task.py
@@ -0,0 +1,190 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+
+import numpy as np
+import torch
+from fairseq.data import (
+    Dictionary,
+    IdDataset,
+    ListDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    NumSamplesDataset,
+    RawLabelDataset,
+    RightPadDataset,
+    SortDataset,
+    data_utils,
+    encoders,
+)
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+@register_task("commonsense_qa")
+class CommonsenseQATask(LegacyFairseqTask):
+    """Task to finetune RoBERTa for Commonsense QA."""
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument(
+            "data", metavar="DIR", help="path to data directory; we load <split>.jsonl"
+        )
+        parser.add_argument(
+            "--init-token",
+            type=int,
+            default=None,
+            help="add token at the beginning of each batch item",
+        )
+        parser.add_argument("--num-classes", type=int, default=5)
+
+    def __init__(self, args, vocab):
+        super().__init__(args)
+        self.vocab = vocab
+        self.mask = vocab.add_symbol("<mask>")
+
+        self.bpe = encoders.build_bpe(args)
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        """Load the dictionary from the filename
+
+        Args:
+            filename (str): the filename
+        """
+        dictionary = Dictionary.load(filename)
+        dictionary.add_symbol("<mask>")
+        return dictionary
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        assert (
+            args.criterion == "sentence_ranking"
+        ), "Must set --criterion=sentence_ranking"
+
+        # load data and label dictionaries
+        vocab = cls.load_dictionary(os.path.join(args.data, "dict.txt"))
+        print("| dictionary: {} types".format(len(vocab)))
+
+        return cls(args, vocab)
+
+    def load_dataset(
+        self, split, epoch=1, combine=False, data_path=None, return_only=False, **kwargs
+    ):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+
+        def binarize(s, append_bos=False):
+            if self.bpe is not None:
+                s = self.bpe.encode(s)
+            tokens = self.vocab.encode_line(
+                s,
+                append_eos=True,
+                add_if_not_exist=False,
+            ).long()
+            if append_bos and self.args.init_token is not None:
+                tokens = torch.cat([tokens.new([self.args.init_token]), tokens])
+            return tokens
+
+        if data_path is None:
+            data_path = os.path.join(self.args.data, split + ".jsonl")
+        if not os.path.exists(data_path):
+            raise FileNotFoundError("Cannot find data: {}".format(data_path))
+
+        src_tokens = [[] for i in range(self.args.num_classes)]
+        src_lengths = [[] for i in range(self.args.num_classes)]
+        labels = []
+
+        with open(data_path) as h:
+            for line in h:
+                example = json.loads(line.strip())
+                if "answerKey" in example:
+                    label = ord(example["answerKey"]) - ord("A")
+                    labels.append(label)
+                question = example["question"]["stem"]
+                assert len(example["question"]["choices"]) == self.args.num_classes
+                # format: `<s> Q: Where would I not want a fox? </s> A: hen house </s>`
+                question = "Q: " + question
+                question_toks = binarize(question, append_bos=True)
+                for i, choice in enumerate(example["question"]["choices"]):
+                    src = "A: " + choice["text"]
+                    src_bin = torch.cat([question_toks, binarize(src)])
+                    src_tokens[i].append(src_bin)
+                    src_lengths[i].append(len(src_bin))
+        assert all(
+            len(src_tokens[0]) == len(src_tokens[i])
+            for i in range(self.args.num_classes)
+        )
+        assert len(src_tokens[0]) == len(src_lengths[0])
+        assert len(labels) == 0 or len(labels) == len(src_tokens[0])
+
+        for i in range(self.args.num_classes):
+            src_lengths[i] = np.array(src_lengths[i])
+            src_tokens[i] = ListDataset(src_tokens[i], src_lengths[i])
+            src_lengths[i] = ListDataset(src_lengths[i])
+
+        dataset = {
+            "id": IdDataset(),
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(src_tokens[0], reduce=True),
+        }
+
+        for i in range(self.args.num_classes):
+            dataset.update(
+                {
+                    "net_input{}".format(i + 1): {
+                        "src_tokens": RightPadDataset(
+                            src_tokens[i],
+                            pad_idx=self.source_dictionary.pad(),
+                        ),
+                        "src_lengths": src_lengths[i],
+                    }
+                }
+            )
+
+        if len(labels) > 0:
+            dataset.update({"target": RawLabelDataset(labels)})
+
+        dataset = NestedDictionaryDataset(
+            dataset,
+            sizes=[np.maximum.reduce([src_token.sizes for src_token in src_tokens])],
+        )
+
+        with data_utils.numpy_seed(self.args.seed):
+            dataset = SortDataset(
+                dataset,
+                # shuffle
+                sort_order=[np.random.permutation(len(dataset))],
+            )
+
+        print("| Loaded {} with {} samples".format(split, len(dataset)))
+
+        self.datasets[split] = dataset
+        return self.datasets[split]
+
+    def build_model(self, args):
+        from fairseq import models
+
+        model = models.build_model(args, self)
+
+        model.register_classification_head(
+            "sentence_classification_head",
+            num_classes=1,
+        )
+
+        return model
+
+    @property
+    def source_dictionary(self):
+        return self.vocab
+
+    @property
+    def target_dictionary(self):
+        return self.vocab
diff --git a/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh b/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5f300093fa0a0feb819d8b6aed307b59e3891d01
--- /dev/null
+++ b/fairseq/examples/roberta/commonsense_qa/download_cqa_data.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+OUTDIR=data/CommonsenseQA
+
+mkdir -p $OUTDIR
+
+wget -O $OUTDIR/train.jsonl https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl
+wget -O $OUTDIR/valid.jsonl https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl
+wget -O $OUTDIR/test.jsonl https://s3.amazonaws.com/commensenseqa/test_rand_split_no_answers.jsonl
+wget -O $OUTDIR/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt
diff --git a/fairseq/examples/roberta/config/finetuning/cola.yaml b/fairseq/examples/roberta/config/finetuning/cola.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac76611201275fcee6311b625599ea0863c92898
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/cola.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 2
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 16
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 320
+
+optimization:
+  clip_norm: 0.0
+  lr: [1e-05]
+  max_update: 5336
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/mnli.yaml b/fairseq/examples/roberta/config/finetuning/mnli.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5be10c362fdadae49e5a6018ef74095892903914
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/mnli.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 3
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 32
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 7432
+
+optimization:
+  clip_norm: 0.0
+  lr: [1e-05]
+  max_update: 123873
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/mrpc.yaml b/fairseq/examples/roberta/config/finetuning/mrpc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa8b7db393ed00dd9b403ba009de70bf18a75309
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/mrpc.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 2
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 16
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 137
+
+optimization:
+  clip_norm: 0.0
+  lr: [1e-05]
+  max_update: 2296
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/qnli.yaml b/fairseq/examples/roberta/config/finetuning/qnli.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b4595b090ee23b74bb3924c09704702c4208e395
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/qnli.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 2
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 32
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 1986
+
+optimization:
+  clip_norm: 0.0
+  lr: [1e-05]
+  max_update: 33112
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/qqp.yaml b/fairseq/examples/roberta/config/finetuning/qqp.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5a2b2ed743963af1f558927f226d993c66fbd45c
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/qqp.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 2
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 32
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 28318
+
+optimization:
+  clip_norm: 0.0
+  lr: [1e-05]
+  max_update: 113272
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/rte.yaml b/fairseq/examples/roberta/config/finetuning/rte.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..73184650117e5f1ce5ec4542a0076eaf3044c2a3
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/rte.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 2
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 16
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 122
+
+optimization:
+  clip_norm: 0.0
+  lr: [2e-05]
+  max_update: 2036
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/sst_2.yaml b/fairseq/examples/roberta/config/finetuning/sst_2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a93ad2f22c4c248f043fc18d345d61e9484ed39e
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/sst_2.yaml
@@ -0,0 +1,59 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 2
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  best_checkpoint_metric: accuracy
+  maximize_best_checkpoint_metric: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+
+dataset:
+  batch_size: 32
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 1256
+
+optimization:
+  clip_norm: 0.0
+  lr: [1e-05]
+  max_update: 20935
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/finetuning/sts_b.yaml b/fairseq/examples/roberta/config/finetuning/sts_b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d495221ad846162c0b3f15ea6e17d723e7ea754
--- /dev/null
+++ b/fairseq/examples/roberta/config/finetuning/sts_b.yaml
@@ -0,0 +1,58 @@
+# @package _group_
+
+common:
+  fp16: true
+  fp16_init_scale: 4
+  threshold_loss_scale: 1
+  fp16_scale_window: 128
+  log_format: json
+  log_interval: 200
+
+task:
+  _name: sentence_prediction
+  data: ???
+  init_token: 0
+  separator_token: 2
+  num_classes: 1
+  max_positions: 512
+
+checkpoint:
+  restore_file: ???
+  reset_optimizer: true
+  reset_dataloader: true
+  reset_meters: true
+  no_epoch_checkpoints: true
+
+distributed_training:
+  find_unused_parameters: true
+  distributed_world_size: 1
+
+criterion:
+  _name: sentence_prediction
+  regression_target: true
+
+dataset:
+  batch_size: 16
+  required_batch_size_multiple: 1
+  max_tokens: 4400
+
+optimizer:
+  _name: adam
+  weight_decay: 0.1
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 214
+
+optimization:
+  clip_norm: 0.0
+  lr: [2e-05]
+  max_update: 3598
+  max_epoch: 10
+
+model:
+  _name: roberta
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/config/pretraining/base.yaml b/fairseq/examples/roberta/config/pretraining/base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97829908f740ba6813c895aa32019cc2760c1eb8
--- /dev/null
+++ b/fairseq/examples/roberta/config/pretraining/base.yaml
@@ -0,0 +1,42 @@
+# @package _group_
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  no_epoch_checkpoints: true
+
+task:
+  _name: masked_lm
+  data: ???
+  sample_break_mode: complete
+  tokens_per_sample: 512
+
+criterion: masked_lm
+
+dataset:
+  batch_size: 16
+  ignore_unused_valid_subsets: true
+
+optimizer:
+  _name: adam
+  weight_decay: 0.01
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 10000
+
+optimization:
+  clip_norm: 0
+  lr: [0.0005]
+  max_update: 125000
+  update_freq: [16]
+
+model:
+  _name: roberta
+  max_positions: 512
+  dropout: 0.1
+  attention_dropout: 0.1
diff --git a/fairseq/examples/roberta/multiprocessing_bpe_encoder.py b/fairseq/examples/roberta/multiprocessing_bpe_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..43fe0451bf4d5762d734314075b1402c2a8db2bb
--- /dev/null
+++ b/fairseq/examples/roberta/multiprocessing_bpe_encoder.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import contextlib
+import sys
+from collections import Counter
+from multiprocessing import Pool
+
+from fairseq.data.encoders.gpt2_bpe import get_encoder
+
+
+def main():
+    """
+    Helper script to encode raw text with the GPT-2 BPE using multiple processes.
+
+    The encoder.json and vocab.bpe files can be obtained here:
+    - https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json
+    - https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--encoder-json",
+        help="path to encoder.json",
+    )
+    parser.add_argument(
+        "--vocab-bpe",
+        type=str,
+        help="path to vocab.bpe",
+    )
+    parser.add_argument(
+        "--inputs",
+        nargs="+",
+        default=["-"],
+        help="input files to filter/encode",
+    )
+    parser.add_argument(
+        "--outputs",
+        nargs="+",
+        default=["-"],
+        help="path to save encoded outputs",
+    )
+    parser.add_argument(
+        "--keep-empty",
+        action="store_true",
+        help="keep empty lines",
+    )
+    parser.add_argument("--workers", type=int, default=20)
+    args = parser.parse_args()
+
+    assert len(args.inputs) == len(
+        args.outputs
+    ), "number of input and output paths should match"
+
+    with contextlib.ExitStack() as stack:
+        inputs = [
+            stack.enter_context(open(input, "r", encoding="utf-8"))
+            if input != "-"
+            else sys.stdin
+            for input in args.inputs
+        ]
+        outputs = [
+            stack.enter_context(open(output, "w", encoding="utf-8"))
+            if output != "-"
+            else sys.stdout
+            for output in args.outputs
+        ]
+
+        encoder = MultiprocessingEncoder(args)
+        pool = Pool(args.workers, initializer=encoder.initializer)
+        encoded_lines = pool.imap(encoder.encode_lines, zip(*inputs), 100)
+
+        stats = Counter()
+        for i, (filt, enc_lines) in enumerate(encoded_lines, start=1):
+            if filt == "PASS":
+                for enc_line, output_h in zip(enc_lines, outputs):
+                    print(enc_line, file=output_h)
+            else:
+                stats["num_filtered_" + filt] += 1
+            if i % 10000 == 0:
+                print("processed {} lines".format(i), file=sys.stderr)
+
+        for k, v in stats.most_common():
+            print("[{}] filtered {} lines".format(k, v), file=sys.stderr)
+
+
+class MultiprocessingEncoder(object):
+    def __init__(self, args):
+        self.args = args
+
+    def initializer(self):
+        global bpe
+        bpe = get_encoder(self.args.encoder_json, self.args.vocab_bpe)
+
+    def encode(self, line):
+        global bpe
+        ids = bpe.encode(line)
+        return list(map(str, ids))
+
+    def decode(self, tokens):
+        global bpe
+        return bpe.decode(tokens)
+
+    def encode_lines(self, lines):
+        """
+        Encode a set of lines. All lines will be encoded together.
+        """
+        enc_lines = []
+        for line in lines:
+            line = line.strip()
+            if len(line) == 0 and not self.args.keep_empty:
+                return ["EMPTY", None]
+            tokens = self.encode(line)
+            enc_lines.append(" ".join(tokens))
+        return ["PASS", enc_lines]
+
+    def decode_lines(self, lines):
+        dec_lines = []
+        for line in lines:
+            tokens = map(int, line.strip().split())
+            dec_lines.append(self.decode(tokens))
+        return ["PASS", dec_lines]
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/roberta/preprocess_GLUE_tasks.sh b/fairseq/examples/roberta/preprocess_GLUE_tasks.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7f215a3b53e1c4a7b1f0320102915a49d84a5015
--- /dev/null
+++ b/fairseq/examples/roberta/preprocess_GLUE_tasks.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# raw glue data as downloaded by glue download script (https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e)
+if [[ $# -ne 2 ]]; then
+  echo "Run as following:"
+  echo "./examples/roberta/preprocess_GLUE_tasks.sh <glud_data_folder> <task_name>"
+  exit 1
+fi
+
+GLUE_DATA_FOLDER=$1
+
+# download bpe encoder.json, vocabulary and fairseq dictionary
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt'
+
+TASKS=$2 # QQP
+
+if [ "$TASKS" = "ALL" ]
+then
+  TASKS="QQP MNLI QNLI MRPC RTE STS-B SST-2 CoLA"
+fi
+
+for TASK in $TASKS
+do
+  echo "Preprocessing $TASK"
+
+  TASK_DATA_FOLDER="$GLUE_DATA_FOLDER/$TASK"
+  echo "Raw data as downloaded from glue website: $TASK_DATA_FOLDER"
+
+  SPLITS="train dev test"
+  INPUT_COUNT=2
+  if [ "$TASK" = "QQP" ]
+  then
+    INPUT_COLUMNS=( 4 5 )
+    TEST_INPUT_COLUMNS=( 2 3 )
+    LABEL_COLUMN=6
+  elif [ "$TASK" = "MNLI" ]
+  then
+    SPLITS="train dev_matched dev_mismatched test_matched test_mismatched"
+    INPUT_COLUMNS=( 9 10 )
+    TEST_INPUT_COLUMNS=( 9 10 )
+    DEV_LABEL_COLUMN=16
+    LABEL_COLUMN=12
+  elif [ "$TASK" = "QNLI" ]
+  then
+    INPUT_COLUMNS=( 2 3 )
+    TEST_INPUT_COLUMNS=( 2 3 )
+    LABEL_COLUMN=4
+  elif [ "$TASK" = "MRPC" ]
+  then
+    INPUT_COLUMNS=( 4 5 )
+    TEST_INPUT_COLUMNS=( 4 5 )
+    LABEL_COLUMN=1
+  elif [ "$TASK" = "RTE" ]
+  then
+    INPUT_COLUMNS=( 2 3 )
+    TEST_INPUT_COLUMNS=( 2 3 )
+    LABEL_COLUMN=4
+  elif [ "$TASK" = "STS-B" ]
+  then
+    INPUT_COLUMNS=( 8 9 )
+    TEST_INPUT_COLUMNS=( 8 9 )
+    LABEL_COLUMN=10
+  # Following are single sentence tasks.
+  elif [ "$TASK" = "SST-2" ]
+  then
+    INPUT_COLUMNS=( 1 )
+    TEST_INPUT_COLUMNS=( 2 )
+    LABEL_COLUMN=2
+    INPUT_COUNT=1
+  elif [ "$TASK" = "CoLA" ]
+  then
+    INPUT_COLUMNS=( 4 )
+    TEST_INPUT_COLUMNS=( 2 )
+    LABEL_COLUMN=2
+    INPUT_COUNT=1
+  fi
+
+  # Strip out header and filter lines that don't have expected number of fields.
+  rm -rf "$TASK_DATA_FOLDER/processed"
+  mkdir -p "$TASK_DATA_FOLDER/processed"
+  for SPLIT in $SPLITS
+  do
+    # CoLA train and dev doesn't have header.
+    if [[ ( "$TASK" = "CoLA") && ( "$SPLIT" != "test" ) ]]
+    then
+      cp "$TASK_DATA_FOLDER/$SPLIT.tsv" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp";
+    else
+      tail -n +2 "$TASK_DATA_FOLDER/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp";
+    fi
+
+    # Remove unformatted lines from train and dev files for QQP dataset.
+    if [[ ( "$TASK" = "QQP") && ( "$SPLIT" != "test" ) ]]
+    then
+      awk -F '\t' -v NUM_FIELDS=6 'NF==NUM_FIELDS{print}{}' "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp" > "$TASK_DATA_FOLDER/processed/$SPLIT.tsv";
+    else
+      cp "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv";
+    fi
+    rm "$TASK_DATA_FOLDER/processed/$SPLIT.tsv.temp";
+  done
+
+  # Split into input0, input1 and label
+  for SPLIT in $SPLITS
+  do
+    for INPUT_TYPE in $(seq 0 $((INPUT_COUNT-1)))
+    do
+      if [[ "$SPLIT" != test* ]]
+      then
+        COLUMN_NUMBER=${INPUT_COLUMNS[$INPUT_TYPE]}
+      else
+        COLUMN_NUMBER=${TEST_INPUT_COLUMNS[$INPUT_TYPE]}
+      fi
+      cut -f"$COLUMN_NUMBER" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.raw.input$INPUT_TYPE";
+    done
+
+    if [[ "$SPLIT" != test* ]]
+    then
+      if [ "$TASK" = "MNLI" ] && [ "$SPLIT" != "train" ]
+      then
+        cut -f"$DEV_LABEL_COLUMN" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv"  > "$TASK_DATA_FOLDER/processed/$SPLIT.label";
+      else
+        cut -f"$LABEL_COLUMN" "$TASK_DATA_FOLDER/processed/$SPLIT.tsv" > "$TASK_DATA_FOLDER/processed/$SPLIT.label";
+      fi
+    fi
+
+    # BPE encode.
+    for INPUT_TYPE in $(seq 0 $((INPUT_COUNT-1)))
+    do
+      LANG="input$INPUT_TYPE"
+      echo "BPE encoding $SPLIT/$LANG"
+      python -m examples.roberta.multiprocessing_bpe_encoder \
+      --encoder-json encoder.json \
+      --vocab-bpe vocab.bpe \
+      --inputs "$TASK_DATA_FOLDER/processed/$SPLIT.raw.$LANG" \
+      --outputs "$TASK_DATA_FOLDER/processed/$SPLIT.$LANG" \
+      --workers 60 \
+      --keep-empty;
+    done
+  done
+
+  # Remove output directory.
+  rm -rf "$TASK-bin"
+
+  DEVPREF="$TASK_DATA_FOLDER/processed/dev.LANG"
+  TESTPREF="$TASK_DATA_FOLDER/processed/test.LANG"
+  if [ "$TASK" = "MNLI" ]
+  then
+    DEVPREF="$TASK_DATA_FOLDER/processed/dev_matched.LANG,$TASK_DATA_FOLDER/processed/dev_mismatched.LANG"
+    TESTPREF="$TASK_DATA_FOLDER/processed/test_matched.LANG,$TASK_DATA_FOLDER/processed/test_mismatched.LANG"
+  fi
+
+  # Run fairseq preprocessing:
+  for INPUT_TYPE in $(seq 0 $((INPUT_COUNT-1)))
+  do
+    LANG="input$INPUT_TYPE"
+    fairseq-preprocess \
+      --only-source \
+      --trainpref "$TASK_DATA_FOLDER/processed/train.$LANG" \
+      --validpref "${DEVPREF//LANG/$LANG}" \
+      --testpref "${TESTPREF//LANG/$LANG}" \
+      --destdir "$TASK-bin/$LANG" \
+      --workers 60 \
+      --srcdict dict.txt;
+  done
+  if [[ "$TASK" !=  "STS-B" ]]
+  then
+    fairseq-preprocess \
+      --only-source \
+      --trainpref "$TASK_DATA_FOLDER/processed/train.label" \
+      --validpref "${DEVPREF//LANG/label}" \
+      --destdir "$TASK-bin/label" \
+      --workers 60;
+  else
+    # For STS-B output range is converted to be between: [0.0, 1.0]
+    mkdir -p "$TASK-bin/label"
+    awk '{print $1 / 5.0 }' "$TASK_DATA_FOLDER/processed/train.label" > "$TASK-bin/label/train.label"
+    awk '{print $1 / 5.0 }' "$TASK_DATA_FOLDER/processed/dev.label" > "$TASK-bin/label/valid.label"
+  fi
+done
diff --git a/fairseq/examples/roberta/preprocess_RACE.py b/fairseq/examples/roberta/preprocess_RACE.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdd66072718ccb6033304c97926271909a17f9d6
--- /dev/null
+++ b/fairseq/examples/roberta/preprocess_RACE.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import json
+import os
+import re
+
+
+class InputExample:
+    def __init__(self, paragraph, qa_list, label):
+        self.paragraph = paragraph
+        self.qa_list = qa_list
+        self.label = label
+
+
+def get_examples(data_dir, set_type):
+    """
+    Extract paragraph and question-answer list from each json file
+    """
+    examples = []
+
+    levels = ["middle", "high"]
+    set_type_c = set_type.split("-")
+    if len(set_type_c) == 2:
+        levels = [set_type_c[1]]
+        set_type = set_type_c[0]
+    for level in levels:
+        cur_dir = os.path.join(data_dir, set_type, level)
+        for filename in os.listdir(cur_dir):
+            cur_path = os.path.join(cur_dir, filename)
+            with open(cur_path, "r") as f:
+                cur_data = json.load(f)
+                answers = cur_data["answers"]
+                options = cur_data["options"]
+                questions = cur_data["questions"]
+                context = cur_data["article"].replace("\n", " ")
+                context = re.sub(r"\s+", " ", context)
+                for i in range(len(answers)):
+                    label = ord(answers[i]) - ord("A")
+                    qa_list = []
+                    question = questions[i]
+                    for j in range(4):
+                        option = options[i][j]
+                        if "_" in question:
+                            qa_cat = question.replace("_", option)
+                        else:
+                            qa_cat = " ".join([question, option])
+                        qa_cat = re.sub(r"\s+", " ", qa_cat)
+                        qa_list.append(qa_cat)
+                    examples.append(InputExample(context, qa_list, label))
+
+    return examples
+
+
+def main():
+    """
+    Helper script to extract paragraphs questions and answers from RACE datasets.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input-dir",
+        help="input directory for downloaded RACE dataset",
+    )
+    parser.add_argument(
+        "--output-dir",
+        help="output directory for extracted data",
+    )
+    args = parser.parse_args()
+
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir, exist_ok=True)
+
+    for set_type in ["train", "dev", "test-middle", "test-high"]:
+        examples = get_examples(args.input_dir, set_type)
+        qa_file_paths = [
+            os.path.join(args.output_dir, set_type + ".input" + str(i + 1))
+            for i in range(4)
+        ]
+        qa_files = [open(qa_file_path, "w") for qa_file_path in qa_file_paths]
+        outf_context_path = os.path.join(args.output_dir, set_type + ".input0")
+        outf_label_path = os.path.join(args.output_dir, set_type + ".label")
+        outf_context = open(outf_context_path, "w")
+        outf_label = open(outf_label_path, "w")
+        for example in examples:
+            outf_context.write(example.paragraph + "\n")
+            for i in range(4):
+                qa_files[i].write(example.qa_list[i] + "\n")
+            outf_label.write(str(example.label) + "\n")
+
+        for f in qa_files:
+            f.close()
+        outf_label.close()
+        outf_context.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/roberta/preprocess_RACE.sh b/fairseq/examples/roberta/preprocess_RACE.sh
new file mode 100755
index 0000000000000000000000000000000000000000..932d2ab6e521fecc7d0297f26a8c43857541ef3b
--- /dev/null
+++ b/fairseq/examples/roberta/preprocess_RACE.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# data should be downloaded and processed with reprocess_RACE.py
+if [[ $# -ne 2 ]]; then
+  echo "Run as following:"
+  echo "./examples/roberta/preprocess_RACE.sh <race_data_folder> <output_folder>"
+  exit 1
+fi
+
+RACE_DATA_FOLDER=$1
+OUT_DATA_FOLDER=$2
+
+# download bpe encoder.json, vocabulary and fairseq dictionary
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
+wget -N 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt'
+
+SPLITS="train dev test-middle test-high"
+INPUT_TYPES="input0 input1 input2 input3 input4"
+for INPUT_TYPE in $INPUT_TYPES
+do
+  for SPLIT in $SPLITS
+      do
+      echo "BPE encoding $SPLIT/$INPUT_TYPE"
+      python -m examples.roberta.multiprocessing_bpe_encoder \
+            --encoder-json encoder.json \
+            --vocab-bpe vocab.bpe \
+            --inputs "$RACE_DATA_FOLDER/$SPLIT.$INPUT_TYPE" \
+            --outputs "$RACE_DATA_FOLDER/$SPLIT.$INPUT_TYPE.bpe" \
+            --workers 10 \
+            --keep-empty;
+
+      done
+done
+
+for INPUT_TYPE in $INPUT_TYPES
+    do
+      LANG="input$INPUT_TYPE"
+      fairseq-preprocess \
+        --only-source \
+        --trainpref "$RACE_DATA_FOLDER/train.$INPUT_TYPE.bpe" \
+        --validpref "$RACE_DATA_FOLDER/dev.$INPUT_TYPE.bpe" \
+        --testpref "$RACE_DATA_FOLDER/test-middle.$INPUT_TYPE.bpe,$RACE_DATA_FOLDER/test-high.$INPUT_TYPE.bpe" \
+        --destdir "$OUT_DATA_FOLDER/$INPUT_TYPE" \
+        --workers 10 \
+        --srcdict dict.txt;
+done
+
+rm -rf "$OUT_DATA_FOLDER/label"
+mkdir -p "$OUT_DATA_FOLDER/label"
+cp "$RACE_DATA_FOLDER/train.label" "$OUT_DATA_FOLDER/label/"
+cp "$RACE_DATA_FOLDER/dev.label" "$OUT_DATA_FOLDER/label/valid.label"
+cp "$RACE_DATA_FOLDER/test-middle.label" "$OUT_DATA_FOLDER/label/test.label"
+cp "$RACE_DATA_FOLDER/test-high.label" "$OUT_DATA_FOLDER/label/test1.label"
diff --git a/fairseq/examples/roberta/wsc/README.md b/fairseq/examples/roberta/wsc/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..21a045d999739836a17574593292e42131315ae9
--- /dev/null
+++ b/fairseq/examples/roberta/wsc/README.md
@@ -0,0 +1,125 @@
+# Finetuning RoBERTa on Winograd Schema Challenge (WSC) data
+
+The following instructions can be used to finetune RoBERTa on the WSC training
+data provided by [SuperGLUE](https://super.gluebenchmark.com/).
+
+Note that there is high variance in the results. For our GLUE/SuperGLUE
+submission we swept over the learning rate (1e-5, 2e-5, 3e-5), batch size (16,
+32, 64) and total number of updates (500, 1000, 2000, 3000), as well as the
+random seed. Out of ~100 runs we chose the best 7 models and ensembled them.
+
+**Approach:** The instructions below use a slightly different loss function than
+what's described in the original RoBERTa arXiv paper. In particular,
+[Kocijan et al. (2019)](https://arxiv.org/abs/1905.06290) introduce a margin
+ranking loss between `(query, candidate)` pairs with tunable hyperparameters
+alpha and beta. This is supported in our code as well with the `--wsc-alpha` and
+`--wsc-beta` arguments. However, we achieved slightly better (and more robust)
+results on the development set by instead using a single cross entropy loss term
+over the log-probabilities for the query and all mined candidates. **The
+candidates are mined using spaCy from each input sentence in isolation, so the
+approach remains strictly pointwise.** This reduces the number of
+hyperparameters and our best model achieved 92.3% development set accuracy,
+compared to ~90% accuracy for the margin loss. Later versions of the RoBERTa
+arXiv paper will describe this updated formulation.
+
+### 1) Download the WSC data from the SuperGLUE website:
+```bash
+wget https://dl.fbaipublicfiles.com/glue/superglue/data/v2/WSC.zip
+unzip WSC.zip
+
+# we also need to copy the RoBERTa dictionary into the same directory
+wget -O WSC/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt
+```
+
+### 2) Finetune over the provided training data:
+```bash
+TOTAL_NUM_UPDATES=2000  # Total number of training steps.
+WARMUP_UPDATES=250      # Linearly increase LR over this many steps.
+LR=2e-05                # Peak LR for polynomial LR scheduler.
+MAX_SENTENCES=16        # Batch size per GPU.
+SEED=1                  # Random seed.
+ROBERTA_PATH=/path/to/roberta/model.pt
+
+# we use the --user-dir option to load the task and criterion
+# from the examples/roberta/wsc directory:
+FAIRSEQ_PATH=/path/to/fairseq
+FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/wsc
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train WSC/ \
+    --restore-file $ROBERTA_PATH \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+    --valid-subset val \
+    --fp16 --ddp-backend legacy_ddp \
+    --user-dir $FAIRSEQ_USER_DIR \
+    --task wsc --criterion wsc --wsc-cross-entropy \
+    --arch roberta_large --bpe gpt2 --max-positions 512 \
+    --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \
+    --lr-scheduler polynomial_decay --lr $LR \
+    --warmup-updates $WARMUP_UPDATES --total-num-update $TOTAL_NUM_UPDATES \
+    --batch-size $MAX_SENTENCES \
+    --max-update $TOTAL_NUM_UPDATES \
+    --log-format simple --log-interval 100 \
+    --seed $SEED
+```
+
+The above command assumes training on 4 GPUs, but you can achieve the same
+results on a single GPU by adding `--update-freq=4`.
+
+### 3) Evaluate
+```python
+from fairseq.models.roberta import RobertaModel
+from examples.roberta.wsc import wsc_utils  # also loads WSC task and criterion
+roberta = RobertaModel.from_pretrained('checkpoints', 'checkpoint_best.pt', 'WSC/')
+roberta.cuda()
+nsamples, ncorrect = 0, 0
+for sentence, label in wsc_utils.jsonl_iterator('WSC/val.jsonl', eval=True):
+    pred = roberta.disambiguate_pronoun(sentence)
+    nsamples += 1
+    if pred == label:
+        ncorrect += 1
+print('Accuracy: ' + str(ncorrect / float(nsamples)))
+# Accuracy: 0.9230769230769231
+```
+
+## RoBERTa training on WinoGrande dataset
+We have also provided `winogrande` task and criterion for finetuning on the
+[WinoGrande](https://mosaic.allenai.org/projects/winogrande) like datasets
+where there are always two candidates and one is correct.
+It's more efficient implementation for such subcases.
+
+```bash
+TOTAL_NUM_UPDATES=23750 # Total number of training steps.
+WARMUP_UPDATES=2375     # Linearly increase LR over this many steps.
+LR=1e-05                # Peak LR for polynomial LR scheduler.
+MAX_SENTENCES=32        # Batch size per GPU.
+SEED=1                  # Random seed.
+ROBERTA_PATH=/path/to/roberta/model.pt
+
+# we use the --user-dir option to load the task and criterion
+# from the examples/roberta/wsc directory:
+FAIRSEQ_PATH=/path/to/fairseq
+FAIRSEQ_USER_DIR=${FAIRSEQ_PATH}/examples/roberta/wsc
+
+cd fairseq
+CUDA_VISIBLE_DEVICES=0 fairseq-train winogrande_1.0/ \
+  --restore-file $ROBERTA_PATH \
+  --reset-optimizer --reset-dataloader --reset-meters \
+  --no-epoch-checkpoints --no-last-checkpoints --no-save-optimizer-state \
+  --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+  --valid-subset val \
+  --fp16 --ddp-backend legacy_ddp \
+  --user-dir $FAIRSEQ_USER_DIR \
+  --task winogrande --criterion winogrande \
+  --wsc-margin-alpha 5.0 --wsc-margin-beta 0.4 \
+  --arch roberta_large --bpe gpt2 --max-positions 512 \
+  --dropout 0.1 --attention-dropout 0.1 --weight-decay 0.01 \
+  --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-06 \
+  --lr-scheduler polynomial_decay --lr $LR \
+  --warmup-updates $WARMUP_UPDATES --total-num-update $TOTAL_NUM_UPDATES \
+  --batch-size $MAX_SENTENCES \
+  --max-update $TOTAL_NUM_UPDATES \
+  --log-format simple --log-interval 100
+```
diff --git a/fairseq/examples/roberta/wsc/__init__.py b/fairseq/examples/roberta/wsc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..78afa4728eeed96142900118f6452730023466c9
--- /dev/null
+++ b/fairseq/examples/roberta/wsc/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import wsc_criterion  # noqa
+from . import wsc_task  # noqa
diff --git a/fairseq/examples/roberta/wsc/wsc_criterion.py b/fairseq/examples/roberta/wsc/wsc_criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed0251fdecc3573228ad271f1090aaf914b48cd1
--- /dev/null
+++ b/fairseq/examples/roberta/wsc/wsc_criterion.py
@@ -0,0 +1,167 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.criterions import LegacyFairseqCriterion, register_criterion
+from fairseq.data import encoders
+
+
+@register_criterion("wsc")
+class WSCCriterion(LegacyFairseqCriterion):
+    def __init__(self, args, task):
+        super().__init__(args, task)
+        if self.args.save_predictions is not None:
+            self.prediction_h = open(self.args.save_predictions, "w")
+        else:
+            self.prediction_h = None
+        self.bpe = encoders.build_bpe(args.bpe)
+        self.tokenizer = encoders.build_tokenizer(args.tokenizer)
+
+    def __del__(self):
+        if self.prediction_h is not None:
+            self.prediction_h.close()
+
+    @staticmethod
+    def add_args(parser):
+        """Add criterion-specific arguments to the parser."""
+        parser.add_argument("--wsc-margin-alpha", type=float, metavar="A", default=1.0)
+        parser.add_argument("--wsc-margin-beta", type=float, metavar="B", default=0.0)
+        parser.add_argument(
+            "--wsc-cross-entropy",
+            action="store_true",
+            help="use cross entropy formulation instead of margin loss",
+        )
+        parser.add_argument(
+            "--save-predictions", metavar="FILE", help="file to save predictions to"
+        )
+
+    def get_masked_input(self, tokens, mask):
+        masked_tokens = tokens.clone()
+        masked_tokens[mask] = self.task.mask
+        return masked_tokens
+
+    def get_lprobs(self, model, tokens, mask):
+        logits, _ = model(src_tokens=self.get_masked_input(tokens, mask))
+        lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float)
+        scores = lprobs.gather(2, tokens.unsqueeze(-1)).squeeze(-1)
+        mask = mask.type_as(scores)
+        scores = (scores * mask).sum(dim=-1) / mask.sum(dim=-1)
+        return scores
+
+    def get_loss(self, query_lprobs, cand_lprobs):
+        if self.args.wsc_cross_entropy:
+            return F.cross_entropy(
+                torch.cat([query_lprobs, cand_lprobs]).unsqueeze(0),
+                query_lprobs.new([0]).long(),
+            )
+        else:
+            return (
+                -query_lprobs
+                + self.args.wsc_margin_alpha
+                * (cand_lprobs - query_lprobs + self.args.wsc_margin_beta).clamp(min=0)
+            ).sum()
+
+    def forward(self, model, sample, reduce=True):
+        # compute loss and accuracy
+        loss, nloss = 0.0, 0
+        ncorrect, nqueries = 0, 0
+
+        for i, label in enumerate(sample["labels"]):
+            query_lprobs = self.get_lprobs(
+                model,
+                sample["query_tokens"][i].unsqueeze(0),
+                sample["query_masks"][i].unsqueeze(0),
+            )
+            cand_lprobs = self.get_lprobs(
+                model,
+                sample["candidate_tokens"][i],
+                sample["candidate_masks"][i],
+            )
+
+            pred = (query_lprobs >= cand_lprobs).all().item()
+
+            if label is not None:
+                label = 1 if label else 0
+                ncorrect += 1 if pred == label else 0
+                nqueries += 1
+
+            if label:
+                # only compute a loss for positive instances
+                nloss += 1
+                loss += self.get_loss(query_lprobs, cand_lprobs)
+
+            id = sample["id"][i].item()
+            if self.prediction_h is not None:
+                print("{}\t{}\t{}".format(id, pred, label), file=self.prediction_h)
+
+        if nloss == 0:
+            loss = torch.tensor(0.0, requires_grad=True)
+
+        sample_size = nqueries if nqueries > 0 else 1
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["nsentences"],
+            "sample_size": sample_size,
+            "ncorrect": ncorrect,
+            "nqueries": nqueries,
+        }
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def aggregate_logging_outputs(logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        agg_output = {
+            "loss": loss_sum / sample_size / math.log(2),
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "sample_size": sample_size,
+        }
+
+        ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs)
+        nqueries = sum(log.get("nqueries", 0) for log in logging_outputs)
+        if nqueries > 0:
+            agg_output["accuracy"] = ncorrect / float(nqueries)
+
+        return agg_output
+
+
+@register_criterion("winogrande")
+class WinograndeCriterion(WSCCriterion):
+    def forward(self, model, sample, reduce=True):
+        # compute loss and accuracy
+        query_lprobs = self.get_lprobs(
+            model,
+            sample["query_tokens"],
+            sample["query_masks"],
+        )
+        cand_lprobs = self.get_lprobs(
+            model,
+            sample["candidate_tokens"],
+            sample["candidate_masks"],
+        )
+        pred = query_lprobs >= cand_lprobs
+        loss = self.get_loss(query_lprobs, cand_lprobs)
+
+        sample_size = sample["query_tokens"].size(0)
+        ncorrect = pred.sum().item()
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["nsentences"],
+            "sample_size": sample_size,
+            "ncorrect": ncorrect,
+            "nqueries": sample_size,
+        }
+        return loss, sample_size, logging_output
diff --git a/fairseq/examples/roberta/wsc/wsc_task.py b/fairseq/examples/roberta/wsc/wsc_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..602ea737ed75a33fddf44dd859e999ecfce2730d
--- /dev/null
+++ b/fairseq/examples/roberta/wsc/wsc_task.py
@@ -0,0 +1,401 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+import tempfile
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.data import (
+    Dictionary,
+    IdDataset,
+    ListDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    NumSamplesDataset,
+    PadDataset,
+    SortDataset,
+    data_utils,
+    encoders,
+)
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+from . import wsc_utils
+
+
+@register_task("wsc")
+class WSCTask(LegacyFairseqTask):
+    """Task to finetune RoBERTa for Winograd Schemas."""
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument(
+            "data", metavar="DIR", help="path to data directory; we load <split>.jsonl"
+        )
+        parser.add_argument(
+            "--init-token",
+            type=int,
+            default=None,
+            help="add token at the beginning of each batch item",
+        )
+
+    def __init__(self, args, vocab):
+        super().__init__(args)
+        self.vocab = vocab
+        self.mask = vocab.add_symbol("<mask>")
+
+        self.bpe = encoders.build_bpe(args)
+        self.tokenizer = encoders.build_tokenizer(args)
+
+        # hack to handle GPT-2 BPE, which includes leading spaces
+        if args.bpe == "gpt2":
+            self.leading_space = True
+            self.trailing_space = False
+        else:
+            self.leading_space = False
+            self.trailing_space = True
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        """Load the dictionary from the filename
+
+        Args:
+            filename (str): the filename
+        """
+        dictionary = Dictionary.load(filename)
+        dictionary.add_symbol("<mask>")
+        return dictionary
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        assert args.criterion == "wsc", "Must set --criterion=wsc"
+
+        # load data and label dictionaries
+        vocab = cls.load_dictionary(os.path.join(args.data, "dict.txt"))
+        print("| dictionary: {} types".format(len(vocab)))
+
+        return cls(args, vocab)
+
+    def binarize(self, s: str, append_eos: bool = False):
+        if self.tokenizer is not None:
+            s = self.tokenizer.encode(s)
+        if self.bpe is not None:
+            s = self.bpe.encode(s)
+        tokens = self.vocab.encode_line(
+            s,
+            append_eos=append_eos,
+            add_if_not_exist=False,
+        ).long()
+        if self.args.init_token is not None:
+            tokens = torch.cat([tokens.new([self.args.init_token]), tokens])
+        return tokens
+
+    def binarize_with_mask(self, txt, prefix, suffix, leading_space, trailing_space):
+        toks = self.binarize(
+            prefix + leading_space + txt + trailing_space + suffix,
+            append_eos=True,
+        )
+        mask = torch.zeros_like(toks, dtype=torch.bool)
+        mask_start = len(self.binarize(prefix))
+        mask_size = len(self.binarize(leading_space + txt))
+        mask[mask_start : mask_start + mask_size] = 1
+        return toks, mask
+
+    def load_dataset(
+        self, split, epoch=1, combine=False, data_path=None, return_only=False, **kwargs
+    ):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        if data_path is None:
+            data_path = os.path.join(self.args.data, split + ".jsonl")
+        if not os.path.exists(data_path):
+            raise FileNotFoundError("Cannot find data: {}".format(data_path))
+
+        query_tokens = []
+        query_masks = []
+        query_lengths = []
+        candidate_tokens = []
+        candidate_masks = []
+        candidate_lengths = []
+        labels = []
+
+        for sentence, pronoun_span, query, label in wsc_utils.jsonl_iterator(data_path):
+            prefix = sentence[: pronoun_span.start].text
+            suffix = sentence[pronoun_span.end :].text_with_ws
+
+            # spaCy spans include trailing spaces, but we need to know about
+            # leading spaces for the GPT-2 BPE
+            leading_space = (
+                " " if sentence[: pronoun_span.start].text_with_ws.endswith(" ") else ""
+            )
+            trailing_space = " " if pronoun_span.text_with_ws.endswith(" ") else ""
+
+            # get noun phrases, excluding pronouns and anything overlapping with the query
+            cand_spans = wsc_utils.filter_noun_chunks(
+                wsc_utils.extended_noun_chunks(sentence),
+                exclude_pronouns=True,
+                exclude_query=query,
+                exact_match=False,
+            )
+
+            if query is not None:
+                query_toks, query_mask = self.binarize_with_mask(
+                    query, prefix, suffix, leading_space, trailing_space
+                )
+                query_len = len(query_toks)
+            else:
+                query_toks, query_mask, query_len = None, None, 0
+
+            query_tokens.append(query_toks)
+            query_masks.append(query_mask)
+            query_lengths.append(query_len)
+
+            cand_toks, cand_masks = [], []
+            for cand_span in cand_spans:
+                toks, mask = self.binarize_with_mask(
+                    cand_span.text,
+                    prefix,
+                    suffix,
+                    leading_space,
+                    trailing_space,
+                )
+                cand_toks.append(toks)
+                cand_masks.append(mask)
+
+            # collate candidates
+            cand_toks = data_utils.collate_tokens(cand_toks, pad_idx=self.vocab.pad())
+            cand_masks = data_utils.collate_tokens(cand_masks, pad_idx=0)
+            assert cand_toks.size() == cand_masks.size()
+
+            candidate_tokens.append(cand_toks)
+            candidate_masks.append(cand_masks)
+            candidate_lengths.append(cand_toks.size(1))
+
+            labels.append(label)
+
+        query_lengths = np.array(query_lengths)
+        query_tokens = ListDataset(query_tokens, query_lengths)
+        query_masks = ListDataset(query_masks, query_lengths)
+
+        candidate_lengths = np.array(candidate_lengths)
+        candidate_tokens = ListDataset(candidate_tokens, candidate_lengths)
+        candidate_masks = ListDataset(candidate_masks, candidate_lengths)
+
+        labels = ListDataset(labels, [1] * len(labels))
+
+        dataset = {
+            "id": IdDataset(),
+            "query_tokens": query_tokens,
+            "query_masks": query_masks,
+            "candidate_tokens": candidate_tokens,
+            "candidate_masks": candidate_masks,
+            "labels": labels,
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(query_tokens, reduce=True),
+        }
+
+        nested_dataset = NestedDictionaryDataset(
+            dataset,
+            sizes=[query_lengths],
+        )
+
+        with data_utils.numpy_seed(self.args.seed):
+            shuffle = np.random.permutation(len(query_tokens))
+        dataset = SortDataset(
+            nested_dataset,
+            # shuffle
+            sort_order=[shuffle],
+        )
+
+        if return_only:
+            return dataset
+
+        self.datasets[split] = dataset
+        return self.datasets[split]
+
+    def build_dataset_for_inference(self, sample_json):
+        with tempfile.NamedTemporaryFile(buffering=0) as h:
+            h.write((json.dumps(sample_json) + "\n").encode("utf-8"))
+            dataset = self.load_dataset(
+                "disambiguate_pronoun",
+                data_path=h.name,
+                return_only=True,
+            )
+        return dataset
+
+    def disambiguate_pronoun(self, model, sentence, use_cuda=False):
+        sample_json = wsc_utils.convert_sentence_to_json(sentence)
+        dataset = self.build_dataset_for_inference(sample_json)
+        sample = dataset.collater([dataset[0]])
+        if use_cuda:
+            sample = utils.move_to_cuda(sample)
+
+        def get_masked_input(tokens, mask):
+            masked_tokens = tokens.clone()
+            masked_tokens[mask.bool()] = self.mask
+            return masked_tokens
+
+        def get_lprobs(tokens, mask):
+            logits, _ = model(src_tokens=get_masked_input(tokens, mask))
+            lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float)
+            scores = lprobs.gather(2, tokens.unsqueeze(-1)).squeeze(-1)
+            mask = mask.type_as(scores)
+            scores = (scores * mask).sum(dim=-1) / mask.sum(dim=-1)
+            return scores
+
+        cand_lprobs = get_lprobs(
+            sample["candidate_tokens"][0],
+            sample["candidate_masks"][0],
+        )
+        if sample["query_tokens"][0] is not None:
+            query_lprobs = get_lprobs(
+                sample["query_tokens"][0].unsqueeze(0),
+                sample["query_masks"][0].unsqueeze(0),
+            )
+            return (query_lprobs >= cand_lprobs).all().item() == 1
+        else:
+            best_idx = cand_lprobs.argmax().item()
+            full_cand = sample["candidate_tokens"][0][best_idx]
+            mask = sample["candidate_masks"][0][best_idx]
+            toks = full_cand[mask.bool()]
+            return self.bpe.decode(self.source_dictionary.string(toks)).strip()
+
+    @property
+    def source_dictionary(self):
+        return self.vocab
+
+    @property
+    def target_dictionary(self):
+        return self.vocab
+
+
+@register_task("winogrande")
+class WinograndeTask(WSCTask):
+    """
+    Task for WinoGrande dataset. Efficient implementation for Winograd schema
+    tasks with exactly two candidates, one of which is correct.
+    """
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        assert args.criterion == "winogrande", "Must set --criterion=winogrande"
+
+        # load data and label dictionaries
+        vocab = cls.load_dictionary(os.path.join(args.data, "dict.txt"))
+        print("| dictionary: {} types".format(len(vocab)))
+
+        return cls(args, vocab)
+
+    def load_dataset(
+        self, split, epoch=1, combine=False, data_path=None, return_only=False, **kwargs
+    ):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        if data_path is None:
+            data_path = os.path.join(self.args.data, split + ".jsonl")
+        if not os.path.exists(data_path):
+            raise FileNotFoundError("Cannot find data: {}".format(data_path))
+
+        query_tokens = []
+        query_masks = []
+        query_lengths = []
+        candidate_tokens = []
+        candidate_masks = []
+        candidate_lengths = []
+
+        itr = wsc_utils.winogrande_jsonl_iterator(data_path, eval=(split == "test"))
+
+        for sample in itr:
+            sentence, pronoun_span, query, cand_text = sample
+            prefix = sentence[: pronoun_span[0]].rstrip()
+            suffix = sentence[pronoun_span[1] :]
+
+            leading_space = " " if sentence[: pronoun_span[0]].endswith(" ") else ""
+            trailing_space = ""
+
+            if query is not None:
+                query_toks, query_mask = self.binarize_with_mask(
+                    query,
+                    prefix,
+                    suffix,
+                    leading_space,
+                    trailing_space,
+                )
+                query_len = len(query_toks)
+            else:
+                query_toks, query_mask, query_len = None, None, 0
+
+            query_tokens.append(query_toks)
+            query_masks.append(query_mask)
+            query_lengths.append(query_len)
+
+            cand_toks, cand_mask = self.binarize_with_mask(
+                cand_text,
+                prefix,
+                suffix,
+                leading_space,
+                trailing_space,
+            )
+
+            candidate_tokens.append(cand_toks)
+            candidate_masks.append(cand_mask)
+            candidate_lengths.append(cand_toks.size(0))
+
+        query_lengths = np.array(query_lengths)
+
+        def get_pad_dataset_fn(tokens, length, pad_idx):
+            return PadDataset(
+                ListDataset(tokens, length),
+                pad_idx=pad_idx,
+                left_pad=False,
+            )
+
+        query_tokens = get_pad_dataset_fn(query_tokens, query_lengths, self.vocab.pad())
+        query_masks = get_pad_dataset_fn(query_masks, query_lengths, 0)
+
+        candidate_lengths = np.array(candidate_lengths)
+        candidate_tokens = get_pad_dataset_fn(
+            candidate_tokens, candidate_lengths, self.vocab.pad()
+        )
+        candidate_masks = get_pad_dataset_fn(candidate_masks, candidate_lengths, 0)
+
+        dataset = {
+            "id": IdDataset(),
+            "query_tokens": query_tokens,
+            "query_masks": query_masks,
+            "candidate_tokens": candidate_tokens,
+            "candidate_masks": candidate_masks,
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(query_tokens, reduce=True),
+        }
+
+        nested_dataset = NestedDictionaryDataset(
+            dataset,
+            sizes=[query_lengths],
+        )
+
+        with data_utils.numpy_seed(self.args.seed):
+            shuffle = np.random.permutation(len(query_tokens))
+        dataset = SortDataset(
+            nested_dataset,
+            # shuffle
+            sort_order=[shuffle],
+        )
+
+        if return_only:
+            return dataset
+
+        self.datasets[split] = dataset
+        return self.datasets[split]
diff --git a/fairseq/examples/roberta/wsc/wsc_utils.py b/fairseq/examples/roberta/wsc/wsc_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..da6ba74383a2490e1108609f315f44ad4b3bf002
--- /dev/null
+++ b/fairseq/examples/roberta/wsc/wsc_utils.py
@@ -0,0 +1,241 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+from functools import lru_cache
+
+
+def convert_sentence_to_json(sentence):
+    if "_" in sentence:
+        prefix, rest = sentence.split("_", 1)
+        query, rest = rest.split("_", 1)
+        query_index = len(prefix.rstrip().split(" "))
+    else:
+        query, query_index = None, None
+
+    prefix, rest = sentence.split("[", 1)
+    pronoun, rest = rest.split("]", 1)
+    pronoun_index = len(prefix.rstrip().split(" "))
+
+    sentence = sentence.replace("_", "").replace("[", "").replace("]", "")
+
+    return {
+        "idx": 0,
+        "text": sentence,
+        "target": {
+            "span1_index": query_index,
+            "span1_text": query,
+            "span2_index": pronoun_index,
+            "span2_text": pronoun,
+        },
+    }
+
+
+def extended_noun_chunks(sentence):
+    noun_chunks = {(np.start, np.end) for np in sentence.noun_chunks}
+    np_start, cur_np = 0, "NONE"
+    for i, token in enumerate(sentence):
+        np_type = token.pos_ if token.pos_ in {"NOUN", "PROPN"} else "NONE"
+        if np_type != cur_np:
+            if cur_np != "NONE":
+                noun_chunks.add((np_start, i))
+            if np_type != "NONE":
+                np_start = i
+            cur_np = np_type
+    if cur_np != "NONE":
+        noun_chunks.add((np_start, len(sentence)))
+    return [sentence[s:e] for (s, e) in sorted(noun_chunks)]
+
+
+def find_token(sentence, start_pos):
+    found_tok = None
+    for tok in sentence:
+        if tok.idx == start_pos:
+            found_tok = tok
+            break
+    return found_tok
+
+
+def find_span(sentence, search_text, start=0):
+    search_text = search_text.lower()
+    for tok in sentence[start:]:
+        remainder = sentence[tok.i :].text.lower()
+        if remainder.startswith(search_text):
+            len_to_consume = len(search_text)
+            start_idx = tok.idx
+            for next_tok in sentence[tok.i :]:
+                end_idx = next_tok.idx + len(next_tok.text)
+                if end_idx - start_idx == len_to_consume:
+                    span = sentence[tok.i : next_tok.i + 1]
+                    return span
+    return None
+
+
+@lru_cache(maxsize=1)
+def get_detokenizer():
+    from sacremoses import MosesDetokenizer
+
+    detok = MosesDetokenizer(lang="en")
+    return detok
+
+
+@lru_cache(maxsize=1)
+def get_spacy_nlp():
+    import en_core_web_lg
+
+    nlp = en_core_web_lg.load()
+    return nlp
+
+
+def jsonl_iterator(input_fname, positive_only=False, ngram_order=3, eval=False):
+    detok = get_detokenizer()
+    nlp = get_spacy_nlp()
+
+    with open(input_fname) as fin:
+        for line in fin:
+            sample = json.loads(line.strip())
+
+            if positive_only and "label" in sample and not sample["label"]:
+                # only consider examples where the query is correct
+                continue
+
+            target = sample["target"]
+
+            # clean up the query
+            query = target["span1_text"]
+            if query is not None:
+                if "\n" in query:
+                    continue
+                if query.endswith(".") or query.endswith(","):
+                    query = query[:-1]
+
+            # split tokens
+            tokens = sample["text"].split(" ")
+
+            def strip_pronoun(x):
+                return x.rstrip('.,"')
+
+            # find the pronoun
+            pronoun_idx = target["span2_index"]
+            pronoun = strip_pronoun(target["span2_text"])
+            if strip_pronoun(tokens[pronoun_idx]) != pronoun:
+                # hack: sometimes the index is misaligned
+                if strip_pronoun(tokens[pronoun_idx + 1]) == pronoun:
+                    pronoun_idx += 1
+                else:
+                    raise Exception("Misaligned pronoun!")
+            assert strip_pronoun(tokens[pronoun_idx]) == pronoun
+
+            # split tokens before and after the pronoun
+            before = tokens[:pronoun_idx]
+            after = tokens[pronoun_idx + 1 :]
+
+            # the GPT BPE attaches leading spaces to tokens, so we keep track
+            # of whether we need spaces before or after the pronoun
+            leading_space = " " if pronoun_idx > 0 else ""
+            trailing_space = " " if len(after) > 0 else ""
+
+            # detokenize
+            before = detok.detokenize(before, return_str=True)
+            pronoun = detok.detokenize([pronoun], return_str=True)
+            after = detok.detokenize(after, return_str=True)
+
+            # hack: when the pronoun ends in a period (or comma), move the
+            # punctuation to the "after" part
+            if pronoun.endswith(".") or pronoun.endswith(","):
+                after = pronoun[-1] + trailing_space + after
+                pronoun = pronoun[:-1]
+
+            # hack: when the "after" part begins with a comma or period, remove
+            # the trailing space
+            if after.startswith(".") or after.startswith(","):
+                trailing_space = ""
+
+            # parse sentence with spacy
+            sentence = nlp(before + leading_space + pronoun + trailing_space + after)
+
+            # find pronoun span
+            start = len(before + leading_space)
+            first_pronoun_tok = find_token(sentence, start_pos=start)
+            pronoun_span = find_span(sentence, pronoun, start=first_pronoun_tok.i)
+            assert pronoun_span.text == pronoun
+
+            if eval:
+                # convert to format where pronoun is surrounded by "[]" and
+                # query is surrounded by "_"
+                query_span = find_span(sentence, query)
+                query_with_ws = "_{}_{}".format(
+                    query_span.text,
+                    (" " if query_span.text_with_ws.endswith(" ") else ""),
+                )
+                pronoun_with_ws = "[{}]{}".format(
+                    pronoun_span.text,
+                    (" " if pronoun_span.text_with_ws.endswith(" ") else ""),
+                )
+                if query_span.start < pronoun_span.start:
+                    first = (query_span, query_with_ws)
+                    second = (pronoun_span, pronoun_with_ws)
+                else:
+                    first = (pronoun_span, pronoun_with_ws)
+                    second = (query_span, query_with_ws)
+                sentence = (
+                    sentence[: first[0].start].text_with_ws
+                    + first[1]
+                    + sentence[first[0].end : second[0].start].text_with_ws
+                    + second[1]
+                    + sentence[second[0].end :].text
+                )
+                yield sentence, sample.get("label", None)
+            else:
+                yield sentence, pronoun_span, query, sample.get("label", None)
+
+
+def winogrande_jsonl_iterator(input_fname, eval=False):
+    with open(input_fname) as fin:
+        for line in fin:
+            sample = json.loads(line.strip())
+            sentence, option1, option2 = (
+                sample["sentence"],
+                sample["option1"],
+                sample["option2"],
+            )
+
+            pronoun_span = (sentence.index("_"), sentence.index("_") + 1)
+
+            if eval:
+                query, cand = option1, option2
+            else:
+                query = option1 if sample["answer"] == "1" else option2
+                cand = option2 if sample["answer"] == "1" else option1
+            yield sentence, pronoun_span, query, cand
+
+
+def filter_noun_chunks(
+    chunks, exclude_pronouns=False, exclude_query=None, exact_match=False
+):
+    if exclude_pronouns:
+        chunks = [
+            np
+            for np in chunks
+            if (np.lemma_ != "-PRON-" and not all(tok.pos_ == "PRON" for tok in np))
+        ]
+
+    if exclude_query is not None:
+        excl_txt = [exclude_query.lower()]
+        filtered_chunks = []
+        for chunk in chunks:
+            lower_chunk = chunk.text.lower()
+            found = False
+            for excl in excl_txt:
+                if (
+                    not exact_match and (lower_chunk in excl or excl in lower_chunk)
+                ) or lower_chunk == excl:
+                    found = True
+                    break
+            if not found:
+                filtered_chunks.append(chunk)
+        chunks = filtered_chunks
+
+    return chunks
diff --git a/fairseq/examples/rxf/README.md b/fairseq/examples/rxf/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..22a1cc47df23c7e0ebbf0ad805031478d1b4a95e
--- /dev/null
+++ b/fairseq/examples/rxf/README.md
@@ -0,0 +1,52 @@
+[Better Fine-Tuning by Reducing Representational Collapse](https://arxiv.org/abs/2008.03156)
+=====================
+This repo contains the code to replicate all experiments from the _Better Fine-Tuning by Reducing Representational Collapse_ paper excluding the probing results.
+
+The R3F sentence prediction criterion is registered as `sentence_prediction_r3f` while the label smoothing version of it is implemented as `label_smoothed_cross_entropy_r3f`. The R4F version of the sentence prediction criterion can be achieved by applying spectral norm to the classification head via the `--spectral-norm-classification-head` parameter.
+
+## Hyper-parameters
+Our methods introduce 3 new hyper-parameters; `--eps` which sets the standard deviation or range of the distribution we're sampling from, `--r3f-lambda` which controls the combining of logistic loss and noisy KL loss and `--noise-type` which controls which parametric distribution we use ('normal', 'uniform').
+
+For example to run R3F on RTE from GLUE
+
+```
+TOTAL_NUM_UPDATES=3120
+WARMUP_UPDATES=187
+LR=1e-05
+NUM_CLASSES=2
+MAX_SENTENCES=8        # Batch size.
+ROBERTA_PATH=/path/to/roberta/model.pt
+
+CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin \
+    --restore-file $ROBERTA_PATH \
+    --max-positions 512 \
+    --max-sentences $MAX_SENTENCES \
+    --max-tokens 4400 \
+    --task sentence_prediction \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --required-batch-size-multiple 1 \
+    --init-token 0 --separator-token 2 \
+    --arch roberta_large \
+    --criterion sentence_prediction_r3f \
+    --num-classes $NUM_CLASSES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \
+    --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --max-epoch 10 \
+    --find-unused-parameters \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric \
+    --noise-type uniform --r3f-lambda 0.7 \
+    --user-dir examples/rxf/rxf_src
+```
+
+## Citation
+```bibtex
+@article{aghajanyan2020better,
+  title={Better Fine-Tuning by Reducing Representational Collapse},
+  author={Aghajanyan, Armen and Shrivastava, Akshat and Gupta, Anchit and Goyal, Naman and Zettlemoyer, Luke and Gupta, Sonal},
+  journal={arXiv preprint arXiv:2008.03156},
+  year={2020}
+}
+```
diff --git a/fairseq/examples/rxf/__init__.py b/fairseq/examples/rxf/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b24cb6b797b4159c9862bab1f882ee6ae95614ab
--- /dev/null
+++ b/fairseq/examples/rxf/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import rxf_src  # noqa
diff --git a/fairseq/examples/rxf/rxf_src/__init__.py b/fairseq/examples/rxf/rxf_src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..306e232d6f386b26153864601114e162080dcee4
--- /dev/null
+++ b/fairseq/examples/rxf/rxf_src/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import label_smoothed_cross_entropy_r3f, sentence_prediction_r3f  # noqa
diff --git a/fairseq/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py b/fairseq/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py
new file mode 100644
index 0000000000000000000000000000000000000000..079db13e61c5ef46d1b1d288012145148eb0be04
--- /dev/null
+++ b/fairseq/examples/rxf/rxf_src/label_smoothed_cross_entropy_r3f.py
@@ -0,0 +1,157 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss
+
+
+@register_criterion("label_smoothed_cross_entropy_r3f")
+class LabelSmoothedCrossEntropyR3FCriterion(FairseqCriterion):
+    def __init__(
+        self, task, sentence_avg, label_smoothing, eps, r3f_lambda, noise_type
+    ):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+        self.label_smoothing = label_smoothing
+        self.eps = eps
+        self.r3f_lambda = r3f_lambda
+        self.noise_type = noise_type
+        if self.noise_type in {"normal"}:
+            self.noise_sampler = torch.distributions.normal.Normal(
+                loc=0.0, scale=self.eps
+            )
+        elif self.noise_type == "uniform":
+            self.noise_sampler = torch.distributions.uniform.Uniform(
+                low=-self.eps, high=self.eps
+            )
+        else:
+            raise Exception(f"unrecognized noise type {self.noise_type}")
+
+    @staticmethod
+    def add_args(parser):
+        """Add criterion-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--label-smoothing', default=0., type=float, metavar='D',
+                            help='epsilon for label smoothing, 0 means no label smoothing')
+        parser.add_argument('--eps', type=float, default=1e-5,
+                            help='noise eps')
+        parser.add_argument('--r3f-lambda', type=float, default=1.0,
+                            help='lambda for combining logistic loss and noisy KL loss')
+        parser.add_argument('--noise-type', type=str, default='normal',
+                            choices=['normal', 'uniform'],
+                            help='type of noises')
+        # fmt: on
+
+    def _get_symm_kl(self, noised_logits, input_logits):
+        return (
+            F.kl_div(
+                F.log_softmax(noised_logits, dim=-1, dtype=torch.float32),
+                F.softmax(input_logits, dim=-1, dtype=torch.float32),
+                None,
+                None,
+                "sum",
+            )
+            + F.kl_div(
+                F.log_softmax(input_logits, dim=-1, dtype=torch.float32),
+                F.softmax(noised_logits, dim=-1, dtype=torch.float32),
+                None,
+                None,
+                "sum",
+            )
+        ) / noised_logits.size(0)
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        token_embeddings = model.encoder.embed_tokens(sample["net_input"]["src_tokens"])
+        input_logits, extra = model(**sample["net_input"])
+        loss, nll_loss = self.compute_loss(
+            model, (input_logits, extra), sample, reduce=reduce
+        )
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+
+        if model.training:
+            noise = self.noise_sampler.sample(sample_shape=token_embeddings.shape).to(
+                token_embeddings
+            )
+            noised_embeddings = token_embeddings.clone() + noise
+
+            noised_logits, _ = model(
+                **sample["net_input"], token_embeddings=noised_embeddings
+            )
+            symm_kl = self._get_symm_kl(noised_logits, input_logits)
+
+        if model.training:
+            symm_kl = symm_kl * sample_size
+            loss = loss + self.r3f_lambda * symm_kl
+
+        logging_output = {
+            "loss": loss.data,
+            "nll_loss": nll_loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+        }
+
+        if model.training:
+            logging_output.update(
+                symm_kl=utils.item(symm_kl.data) if reduce else symm_kl.data
+            )
+
+        return loss, sample_size, logging_output
+
+    def compute_loss(self, model, net_output, sample, reduce=True):
+        lprobs = model.get_normalized_probs(net_output, log_probs=True)
+        lprobs = lprobs.view(-1, lprobs.size(-1))
+        target = model.get_targets(sample, net_output).view(-1, 1)
+        loss, nll_loss = label_smoothed_nll_loss(
+            lprobs,
+            target,
+            self.label_smoothing,
+            ignore_index=self.padding_idx,
+            reduce=reduce,
+        )
+        return loss, nll_loss
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        symm_kl_sum = sum(log.get("symm_kl", 0) for log in logging_outputs)
+
+        metrics.log_scalar("symm_kl", symm_kl_sum / sample_size, sample_size, round=3)
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar(
+            "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3
+        )
+        metrics.log_derived(
+            "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+        )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/examples/rxf/rxf_src/sentence_prediction_r3f.py b/fairseq/examples/rxf/rxf_src/sentence_prediction_r3f.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ecffd6b143debb1c67adccd77a6aaed194ec55a
--- /dev/null
+++ b/fairseq/examples/rxf/rxf_src/sentence_prediction_r3f.py
@@ -0,0 +1,171 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+
+
+@register_criterion("sentence_prediction_r3f")
+class SentencePredictionR3F(FairseqCriterion):
+    def __init__(
+        self,
+        task,
+        eps,
+        r3f_lambda,
+        noise_type,
+        classification_head_name,
+        regression_target,
+    ):
+        super().__init__(task)
+        self.eps = eps
+        self.r3f_lambda = r3f_lambda
+        self.noise_type = noise_type
+        self.classification_head_name = classification_head_name
+        self.regression_target = regression_target
+        if self.noise_type in {"normal"}:
+            self.noise_sampler = torch.distributions.normal.Normal(
+                loc=0.0, scale=self.eps
+            )
+        elif self.noise_type == "uniform":
+            self.noise_sampler = torch.distributions.uniform.Uniform(
+                low=-self.eps, high=self.eps
+            )
+        else:
+            raise Exception(f"unrecognized noise type {self.noise_type}")
+
+    @staticmethod
+    def add_args(parser):
+        # fmt: off
+        parser.add_argument('--eps', type=float, default=1e-5,
+                            help='noise eps')
+        parser.add_argument('--r3f-lambda', type=float, default=1.0,
+                            help='lambda for combining logistic loss and noisy KL loss')
+        parser.add_argument('--noise-type', type=str, default='uniform',
+                            choices=['normal', 'uniform'],
+                            help='type of noises for RXF methods')
+        parser.add_argument('--classification-head-name',
+                            default='sentence_classification_head',
+                            help='name of the classification head to use')
+        parser.add_argument('--regression-target', action='store_true')
+        # fmt: on
+
+    def _get_symm_kl(self, noised_logits, input_logits):
+        return (
+            F.kl_div(
+                F.log_softmax(noised_logits, dim=-1, dtype=torch.float32),
+                F.softmax(input_logits, dim=-1, dtype=torch.float32),
+                None,
+                None,
+                "sum",
+            )
+            + F.kl_div(
+                F.log_softmax(input_logits, dim=-1, dtype=torch.float32),
+                F.softmax(noised_logits, dim=-1, dtype=torch.float32),
+                None,
+                None,
+                "sum",
+            )
+        ) / noised_logits.size(0)
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        assert (
+            hasattr(model, "classification_heads")
+            and self.classification_head_name in model.classification_heads
+        ), "model must provide sentence classification head for --criterion=sentence_prediction"
+
+        token_embeddings = model.encoder.sentence_encoder.embed_tokens(
+            sample["net_input"]["src_tokens"]
+        )
+        input_logits, _ = model(
+            **sample["net_input"],
+            features_only=True,
+            classification_head_name=self.classification_head_name,
+            token_embeddings=token_embeddings,
+        )
+        if model.training and self.noise_sampler:
+            noise = self.noise_sampler.sample(sample_shape=token_embeddings.shape).to(
+                token_embeddings
+            )
+            noised_embeddings = token_embeddings.detach().clone() + noise
+
+            noised_logits, _ = model(
+                **sample["net_input"],
+                features_only=True,
+                classification_head_name=self.classification_head_name,
+                token_embeddings=noised_embeddings,
+            )
+            symm_kl = self._get_symm_kl(noised_logits, input_logits)
+        else:
+            symm_kl = 0
+
+        targets = model.get_targets(sample, [input_logits]).view(-1)
+        sample_size = targets.numel()
+
+        if not self.regression_target:
+            loss = F.nll_loss(
+                F.log_softmax(input_logits, dim=-1, dtype=torch.float32),
+                targets,
+                reduction="sum",
+            )
+            if model.training:
+                symm_kl = symm_kl * sample_size
+                loss = loss + self.r3f_lambda * symm_kl
+        else:
+            logits = input_logits.squeeze().float()
+            targets = targets.float()
+            loss = F.mse_loss(logits, targets, reduction="sum")
+
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample_size,
+            "sample_size": sample_size,
+        }
+
+        if not self.regression_target:
+            preds = input_logits.max(dim=1)[1]
+            logging_output.update(ncorrect=(preds == targets).sum().item())
+
+            if model.training and self.noise_sampler:
+                logging_output.update(
+                    symm_kl=utils.item(symm_kl.data) if reduce else symm_kl.data
+                )
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def aggregate_logging_outputs(logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        symm_kl_sum = sum(log.get("symm_kl", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        agg_output = {
+            "loss": loss_sum / sample_size / math.log(2),
+            "symm_kl": symm_kl_sum / sample_size,
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "sample_size": sample_size,
+        }
+
+        if len(logging_outputs) > 0 and "ncorrect" in logging_outputs[0]:
+            ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs)
+            agg_output.update(accuracy=ncorrect / nsentences)
+
+        if sample_size != ntokens:
+            agg_output["nll_loss"] = loss_sum / ntokens / math.log(2)
+        return agg_output
diff --git a/fairseq/examples/scaling_nmt/README.md b/fairseq/examples/scaling_nmt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0cc3360c3bbd58fe35a51591db8f081fc8576877
--- /dev/null
+++ b/fairseq/examples/scaling_nmt/README.md
@@ -0,0 +1,114 @@
+# Scaling Neural Machine Translation (Ott et al., 2018)
+
+This page includes instructions for reproducing results from the paper [Scaling Neural Machine Translation (Ott et al., 2018)](https://arxiv.org/abs/1806.00187).
+
+## Pre-trained models
+
+Model | Description | Dataset | Download
+---|---|---|---
+`transformer.wmt14.en-fr` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-fr.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2)
+`transformer.wmt16.en-de` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt16.en-de.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2)
+
+## Training a new model on WMT'16 En-De
+
+First download the [preprocessed WMT'16 En-De data provided by Google](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8).
+
+Then:
+
+##### 1. Extract the WMT'16 En-De data
+```bash
+TEXT=wmt16_en_de_bpe32k
+mkdir -p $TEXT
+tar -xzvf wmt16_en_de.tar.gz -C $TEXT
+```
+
+##### 2. Preprocess the dataset with a joined dictionary
+```bash
+fairseq-preprocess \
+    --source-lang en --target-lang de \
+    --trainpref $TEXT/train.tok.clean.bpe.32000 \
+    --validpref $TEXT/newstest2013.tok.bpe.32000 \
+    --testpref $TEXT/newstest2014.tok.bpe.32000 \
+    --destdir data-bin/wmt16_en_de_bpe32k \
+    --nwordssrc 32768 --nwordstgt 32768 \
+    --joined-dictionary \
+    --workers 20
+```
+
+##### 3. Train a model
+```bash
+fairseq-train \
+    data-bin/wmt16_en_de_bpe32k \
+    --arch transformer_vaswani_wmt_en_de_big --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
+    --lr 0.0005 --lr-scheduler inverse_sqrt --warmup-updates 4000 --warmup-init-lr 1e-07 \
+    --dropout 0.3 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --max-tokens 3584 \
+    --fp16
+```
+
+Note that the `--fp16` flag requires you have CUDA 9.1 or greater and a Volta GPU or newer.
+
+***IMPORTANT:*** You will get better performance by training with big batches and
+increasing the learning rate. If you want to train the above model with big batches
+(assuming your machine has 8 GPUs):
+- add `--update-freq 16` to simulate training on 8x16=128 GPUs
+- increase the learning rate; 0.001 works well for big batches
+
+##### 4. Evaluate
+
+Now we can evaluate our trained model.
+
+Note that the original [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
+paper used a couple tricks to achieve better BLEU scores. We use these same tricks in
+the Scaling NMT paper, so it's important to apply them when reproducing our results.
+
+First, use the [average_checkpoints.py](/scripts/average_checkpoints.py) script to
+average the last few checkpoints. Averaging the last 5-10 checkpoints is usually
+good, but you may need to adjust this depending on how long you've trained:
+```bash
+python scripts/average_checkpoints \
+    --inputs /path/to/checkpoints \
+    --num-epoch-checkpoints 10 \
+    --output checkpoint.avg10.pt
+```
+
+Next, generate translations using a beam width of 4 and length penalty of 0.6:
+```bash
+fairseq-generate \
+    data-bin/wmt16_en_de_bpe32k \
+    --path checkpoint.avg10.pt \
+    --beam 4 --lenpen 0.6 --remove-bpe > gen.out
+```
+
+Finally, we apply the ["compound splitting" script](/scripts/compound_split_bleu.sh) to
+add spaces around dashes. For example "Café-Liebhaber" would become three tokens:
+"Café - Liebhaber". This typically results in larger BLEU scores, but it is not
+appropriate to compare these inflated scores to work which does not include this trick.
+This trick was used in the [original AIAYN code](https://github.com/tensorflow/tensor2tensor/blob/fc9335c0203685cbbfe2b30c92db4352d8f60779/tensor2tensor/utils/get_ende_bleu.sh),
+so we used it in the Scaling NMT paper as well. That said, it's strongly advised to
+report [sacrebleu](https://github.com/mjpost/sacrebleu) scores instead.
+
+To compute "compound split" tokenized BLEU (not recommended!):
+```bash
+bash scripts/compound_split_bleu.sh gen.out
+# BLEU4 = 29.29, 60.3/35.0/22.8/15.3 (BP=1.000, ratio=1.004, syslen=64763, reflen=64496)
+```
+
+To compute detokenized BLEU with sacrebleu (preferred):
+```bash
+bash scripts/sacrebleu.sh wmt14/full en de gen.out
+# BLEU+case.mixed+lang.en-de+numrefs.1+smooth.exp+test.wmt14/full+tok.13a+version.1.4.3 = 28.6 59.3/34.3/22.1/14.9 (BP = 1.000 ratio = 1.016 hyp_len = 63666 ref_len = 62688)
+```
+
+## Citation
+
+```bibtex
+@inproceedings{ott2018scaling,
+  title = {Scaling Neural Machine Translation},
+  author = {Ott, Myle and Edunov, Sergey and Grangier, David and Auli, Michael},
+  booktitle = {Proceedings of the Third Conference on Machine Translation (WMT)},
+  year = 2018,
+}
+```
diff --git a/fairseq/examples/shuffled_word_order/README.finetuning.md b/fairseq/examples/shuffled_word_order/README.finetuning.md
new file mode 100644
index 0000000000000000000000000000000000000000..ecbcb65884640c3327a2cbaef8aad4f3cfe812f7
--- /dev/null
+++ b/fairseq/examples/shuffled_word_order/README.finetuning.md
@@ -0,0 +1,135 @@
+# Fine-tuning details
+
+For each task (GLUE and PAWS), we perform hyperparam search for each model, and report the mean and standard deviation across 5 seeds of the best model. First, get the datasets following the instructions in [RoBERTa fine-tuning README](../roberta/README.glue.md). Alternatively, you can use [huggingface datasets](https://huggingface.co/docs/datasets/) to get the task data:
+
+```python
+from datasets import load_dataset
+import pandas as pd
+from pathlib import Path
+
+key2file = {
+"paws": {
+        "loc": "paws_data",
+        "columns": ["id", "sentence1", "sentence2", "label"],
+        "train": "train.tsv",
+        "validation": "dev.tsv",
+        "test": "test.tsv"
+  }
+}
+
+task_data = load_dataset("paws", "labeled_final")
+task_config = key2file["paws"]
+save_path = Path(task_config["loc"])
+save_path.mkdir(exist_ok=True, parents=True)
+for key, fl in task_config.items():
+    if key in ["loc", "columns"]:
+        continue
+    print(f"Reading {key}")
+    columns = task_config["columns"]
+    df = pd.DataFrame(task_data[key])
+    print(df.columns)
+    df = df[columns]
+    print(f"Got {len(df)} records")
+    save_loc = save_path / fl
+    print(f"Saving to : {save_loc}")
+    df.to_csv(save_loc, sep="\t", header=None, index=None)
+
+```
+
+- Preprocess using RoBERTa GLUE preprocessing script, while keeping in mind the column numbers for `sentence1`, `sentence2` and `label` (which is 0,1,2 if you save the data according to the above example.)
+- Then, fine-tuning is performed similarly to RoBERTa (for example, in case of RTE):
+
+```bash
+TOTAL_NUM_UPDATES=30875  # 10 epochs through RTE for bsz 16
+WARMUP_UPDATES=1852      # 6 percent of the number of updates
+LR=2e-05                # Peak LR for polynomial LR scheduler.
+NUM_CLASSES=2
+MAX_SENTENCES=16        # Batch size.
+SHUFFLED_ROBERTA_PATH=/path/to/shuffled_roberta/model.pt
+
+CUDA_VISIBLE_DEVICES=0 fairseq-train RTE-bin/ \
+    --restore-file $SHUFFLED_ROBERTA_PATH \
+    --max-positions 512 \
+    --batch-size $MAX_SENTENCES \
+    --max-tokens 4400 \
+    --task sentence_prediction \
+    --reset-optimizer --reset-dataloader --reset-meters \
+    --required-batch-size-multiple 1 \
+    --init-token 0 --separator-token 2 \
+    --arch roberta_large \
+    --criterion sentence_prediction \
+    --num-classes $NUM_CLASSES \
+    --dropout 0.1 --attention-dropout 0.1 \
+    --weight-decay 0.1 --optimizer adam --adam-betas "(0.9, 0.98)" --adam-eps 1e-06 \
+    --clip-norm 0.0 \
+    --lr-scheduler polynomial_decay --lr $LR --total-num-update $TOTAL_NUM_UPDATES --warmup-updates $WARMUP_UPDATES \
+    --fp16 --fp16-init-scale 4 --threshold-loss-scale 1 --fp16-scale-window 128 \
+    --max-epoch 10 \
+    --find-unused-parameters \
+    --best-checkpoint-metric accuracy --maximize-best-checkpoint-metric;
+```
+
+- `TOTAL_NUM_UPDATES` is computed based on the `--batch_size` value and the dataset size.
+- `WARMUP_UPDATES` is computed as 6% of `TOTAL_NUM_UPDATES`
+- Best hyperparam of `--lr` and `--batch_size` is reported below:
+
+## `--lr`
+
+|     | name         |   RTE |  MRPC | SST-2 |  CoLA |   QQP |  QNLI |  MNLI |  PAWS |
+| --: | :----------- | ----: | ----: | ----: | ----: | ----: | ----: | ----: | ----: |
+|   0 | original     | 2e-05 | 2e-05 | 1e-05 | 2e-05 | 1e-05 | 1e-05 | 1e-05 | 2e-05 |
+|   1 | n_1          | 2e-05 | 1e-05 | 1e-05 | 1e-05 | 3e-05 | 1e-05 | 2e-05 | 2e-05 |
+|   2 | n_2          | 2e-05 | 2e-05 | 1e-05 | 1e-05 | 2e-05 | 1e-05 | 1e-05 | 3e-05 |
+|   3 | n_3          | 3e-05 | 1e-05 | 2e-05 | 2e-05 | 3e-05 | 1e-05 | 1e-05 | 2e-05 |
+|   4 | n_4          | 3e-05 | 1e-05 | 2e-05 | 2e-05 | 2e-05 | 1e-05 | 1e-05 | 2e-05 |
+|   5 | r512         | 1e-05 | 3e-05 | 2e-05 | 2e-05 | 3e-05 | 2e-05 | 3e-05 | 2e-05 |
+|   6 | rand_corpus  | 2e-05 | 1e-05 | 3e-05 | 1e-05 | 3e-05 | 3e-05 | 3e-05 | 2e-05 |
+|   7 | rand_uniform | 2e-05 | 1e-05 | 3e-05 | 2e-05 | 3e-05 | 3e-05 | 3e-05 | 1e-05 |
+|   8 | rand_init    | 1e-05 | 1e-05 | 3e-05 | 1e-05 | 1e-05 | 1e-05 | 2e-05 | 1e-05 |
+|   9 | no_pos       | 1e-05 | 3e-05 | 2e-05 | 1e-05 | 1e-05 | 1e-05 | 1e-05 | 1e-05 |
+
+## `--batch_size`
+
+|     | name         | RTE | MRPC | SST-2 | CoLA | QQP | QNLI | MNLI | PAWS |
+| --: | :----------- | --: | ---: | ----: | ---: | --: | ---: | ---: | ---: |
+|   0 | orig         |  16 |   16 |    32 |   16 |  16 |   32 |   32 |   16 |
+|   1 | n_1          |  32 |   32 |    16 |   32 |  32 |   16 |   32 |   16 |
+|   2 | n_2          |  32 |   16 |    32 |   16 |  32 |   32 |   16 |   32 |
+|   3 | n_3          |  32 |   32 |    16 |   32 |  32 |   16 |   32 |   32 |
+|   4 | n_4          |  32 |   16 |    32 |   16 |  32 |   32 |   32 |   32 |
+|   5 | r512         |  32 |   16 |    16 |   32 |  32 |   16 |   16 |   16 |
+|   6 | rand_corpus  |  16 |   16 |    16 |   16 |  32 |   16 |   16 |   32 |
+|   7 | rand_uniform |  16 |   32 |    16 |   16 |  32 |   16 |   16 |   16 |
+|   8 | rand_init    |  16 |   16 |    32 |   16 |  16 |   16 |   32 |   16 |
+|   9 | no_pos       |  16 |   32 |    16 |   16 |  32 |   16 |   16 |   16 |
+
+- Perform inference similar to RoBERTa as well:
+
+```python
+from fairseq.models.roberta import RobertaModel
+
+roberta = RobertaModel.from_pretrained(
+    'checkpoints/',
+    checkpoint_file='checkpoint_best.pt',
+    data_name_or_path='PAWS-bin'
+)
+
+label_fn = lambda label: roberta.task.label_dictionary.string(
+    [label + roberta.task.label_dictionary.nspecial]
+)
+ncorrect, nsamples = 0, 0
+roberta.cuda()
+roberta.eval()
+with open('paws_data/dev.tsv') as fin:
+    fin.readline()
+    for index, line in enumerate(fin):
+        tokens = line.strip().split('\t')
+        sent1, sent2, target = tokens[0], tokens[1], tokens[2]
+        tokens = roberta.encode(sent1, sent2)
+        prediction = roberta.predict('sentence_classification_head', tokens).argmax().item()
+        prediction_label = label_fn(prediction)
+        ncorrect += int(prediction_label == target)
+        nsamples += 1
+print('| Accuracy: ', float(ncorrect)/float(nsamples))
+
+```
diff --git a/fairseq/examples/shuffled_word_order/README.md b/fairseq/examples/shuffled_word_order/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f20483849a8ca33bf349b57882a79155ba593bf1
--- /dev/null
+++ b/fairseq/examples/shuffled_word_order/README.md
@@ -0,0 +1,84 @@
+# Masked Language Modeling and the Distributional Hypothesis: Order Word Matters Pre-training for Little
+
+[https://arxiv.org/abs/2104.06644](https://arxiv.org/abs/2104.06644)
+
+## Introduction
+
+In this work, we pre-train [RoBERTa](../roberta) base on various word shuffled variants of BookWiki corpus (16GB). We observe that a word shuffled pre-trained model achieves surprisingly good scores on GLUE, PAWS and several parametric probing tasks. Please read our paper for more details on the experiments.
+
+## Pre-trained models
+
+| Model                                 | Description                                                                                        | Download                                                                                                                                      |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
+| `roberta.base.orig`                   | RoBERTa (base) trained on natural corpus                                                           | [roberta.base.orig.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.orig.tar.gz)                                     |
+| `roberta.base.shuffle.n1`             | RoBERTa (base) trained on n=1 gram sentence word shuffled data                                     | [roberta.base.shuffle.n1.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n1.tar.gz)                         |
+| `roberta.base.shuffle.n2`             | RoBERTa (base) trained on n=2 gram sentence word shuffled data                                     | [roberta.base.shuffle.n2.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n2.tar.gz)                         |
+| `roberta.base.shuffle.n3`             | RoBERTa (base) trained on n=3 gram sentence word shuffled data                                     | [roberta.base.shuffle.n3.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n3.tar.gz)                         |
+| `roberta.base.shuffle.n4`             | RoBERTa (base) trained on n=4 gram sentence word shuffled data                                     | [roberta.base.shuffle.n4.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n4.tar.gz)                         |
+| `roberta.base.shuffle.512`            | RoBERTa (base) trained on unigram 512 word block shuffled data                                     | [roberta.base.shuffle.512.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.512.tar.gz)                       |
+| `roberta.base.shuffle.corpus`         | RoBERTa (base) trained on unigram corpus word shuffled data                                        | [roberta.base.shuffle.corpus.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus.tar.gz)                 |
+| `roberta.base.shuffle.corpus_uniform` | RoBERTa (base) trained on unigram corpus word shuffled data, where all words are uniformly sampled | [roberta.base.shuffle.corpus_uniform.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus_uniform.tar.gz) |
+| `roberta.base.nopos`                  | RoBERTa (base) without positional embeddings, trained on natural corpus                            | [roberta.base.nopos.tar.gz](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.nopos.tar.gz)                                   |
+
+## Results
+
+[GLUE (Wang et al, 2019)](https://gluebenchmark.com/) & [PAWS (Zhang et al, 2019)](https://github.com/google-research-datasets/paws) _(dev set, single model, single-task fine-tuning, median of 5 seeds)_
+
+| name                                 |  CoLA |  MNLI |  MRPC |  PAWS |  QNLI |   QQP |   RTE | SST-2 |
+| :----------------------------------- | ----: | ----: | ----: | ----: | ----: | ----: | ----: | ----: |
+| `roberta.base.orig`                  |  61.4 | 86.11 | 89.19 | 94.46 | 92.53 | 91.26 | 74.64 | 93.92 |
+| `roberta.base.shuffle.n1`            | 35.15 | 82.64 |    86 | 89.97 | 89.02 | 91.01 | 69.02 | 90.47 |
+| `roberta.base.shuffle.n2`            | 54.37 | 83.43 | 86.24 | 93.46 | 90.44 | 91.36 | 70.83 | 91.79 |
+| `roberta.base.shuffle.n3`            | 48.72 | 83.85 | 86.36 | 94.05 | 91.69 | 91.24 | 70.65 | 92.02 |
+| `roberta.base.shuffle.n4`            | 58.64 | 83.77 | 86.98 | 94.32 | 91.69 |  91.4 | 70.83 | 92.48 |
+| `roberta.base.shuffle.512`           | 12.76 | 77.52 | 79.61 | 84.77 | 85.19 |  90.2 | 56.52 | 86.34 |
+| `roberta.base.shuffle.corpus`        |     0 |  71.9 | 70.52 | 58.52 | 71.11 | 85.52 | 53.99 | 83.35 |
+| `roberta.base.shuffle.corpus_random` |  9.19 | 72.33 | 70.76 | 58.42 | 77.76 | 85.93 | 53.99 | 84.04 |
+| `roberta.base.nopos`                 |     0 |  63.5 | 72.73 | 57.08 | 77.72 | 87.87 | 54.35 | 83.24 |
+
+For more results on probing tasks, please refer to [our paper](https://arxiv.org/abs/2104.06644).
+
+## Example Usage
+
+Follow the same usage as in [RoBERTa](https://github.com/pytorch/fairseq/tree/main/examples/roberta) to load and test your models:
+
+```python
+# Download roberta.base.shuffle.n1 model
+wget https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n1.tar.gz
+tar -xzvf roberta.base.shuffle.n1.tar.gz
+
+# Load the model in fairseq
+from fairseq.models.roberta import RoBERTaModel
+roberta = RoBERTaModel.from_pretrained('/path/to/roberta.base.shuffle.n1', checkpoint_file='model.pt')
+roberta.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+**Note**: The model trained without positional embeddings (`roberta.base.nopos`) is a modified `RoBERTa` model, where the positional embeddings are not used. Thus, the typical `from_pretrained` method on fairseq version of RoBERTa will not be able to load the above model weights. To do so, construct a new `RoBERTaModel` object by setting the flag `use_positional_embeddings` to `False` (or [in the latest code](https://github.com/pytorch/fairseq/blob/main/fairseq/models/roberta/model.py#L543), set `no_token_positional_embeddings` to `True`), and then load the individual weights.
+
+## Fine-tuning Evaluation
+
+We provide the trained fine-tuned models on MNLI here for each model above for quick evaluation (1 seed for each model). Please refer to [finetuning details](README.finetuning.md) for the parameters of these models. Follow [RoBERTa](https://github.com/pytorch/fairseq/tree/main/examples/roberta) instructions to evaluate these models.
+
+| Model                                      | MNLI M Dev Accuracy | Link                                                                                                             |
+| :----------------------------------------- | :------------------ | :--------------------------------------------------------------------------------------------------------------- |
+| `roberta.base.orig.mnli`                   | 86.14               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.orig.mnli.tar.gz)                   |
+| `roberta.base.shuffle.n1.mnli`             | 82.55               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n1.mnli.tar.gz)             |
+| `roberta.base.shuffle.n2.mnli`             | 83.21               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n2.mnli.tar.gz)             |
+| `roberta.base.shuffle.n3.mnli`             | 83.89               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n3.mnli.tar.gz)             |
+| `roberta.base.shuffle.n4.mnli`             | 84.00               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.n4.mnli.tar.gz)             |
+| `roberta.base.shuffle.512.mnli`            | 77.22               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.512.mnli.tar.gz)            |
+| `roberta.base.shuffle.corpus.mnli`         | 71.88               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus.mnli.tar.gz)         |
+| `roberta.base.shuffle.corpus_uniform.mnli` | 72.46               | [Download](https://dl.fbaipublicfiles.com/unnatural_pretraining/roberta.base.shuffle.corpus_uniform.mnli.tar.gz) |
+
+## Citation
+
+```bibtex
+@misc{sinha2021masked,
+      title={Masked Language Modeling and the Distributional Hypothesis: Order Word Matters Pre-training for Little},
+      author={Koustuv Sinha and Robin Jia and Dieuwke Hupkes and Joelle Pineau and Adina Williams and Douwe Kiela},
+      year={2021},
+      eprint={2104.06644},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
diff --git a/fairseq/examples/simultaneous_translation/README.md b/fairseq/examples/simultaneous_translation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..62a005e0ec6f15af9015d335e34b45df6ed89b6c
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/README.md
@@ -0,0 +1,5 @@
+# Simultaneous Translation
+Examples of simultaneous translation in fairseq
+- [English-to-Japanese text-to-text wait-k model](docs/enja-waitk.md)
+- [English-to-Germen text-to-text monotonic multihead attention model](docs/ende-mma.md)
+- [English-to-Germen speech-to-text simultaneous translation model](../speech_to_text/docs/simulst_mustc_example.md)
diff --git a/fairseq/examples/simultaneous_translation/__init__.py b/fairseq/examples/simultaneous_translation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5835316ba9b23c0d99d1a8f109ee047682211546
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import models  # noqa
diff --git a/fairseq/examples/simultaneous_translation/docs/ende-mma.md b/fairseq/examples/simultaneous_translation/docs/ende-mma.md
new file mode 100644
index 0000000000000000000000000000000000000000..241d604a3b31a37755da68aad6ff47d46891d3fc
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/docs/ende-mma.md
@@ -0,0 +1,74 @@
+# Simultaneous Machine Translation
+
+This directory contains the code for the paper [Monotonic Multihead Attention](https://openreview.net/forum?id=Hyg96gBKPS)
+
+## Prepare Data
+
+[Please follow the instructions to download and preprocess the WMT'15 En-De dataset.](https://github.com/pytorch/fairseq/tree/simulastsharedtask/examples/translation#prepare-wmt14en2desh)
+
+Another example of training an English to Japanese model can be found [here](docs/enja.md)
+
+## Training
+
+- MMA-IL
+
+```shell
+fairseq-train \
+    data-bin/wmt15_en_de_32k \
+    --simul-type infinite_lookback \
+    --user-dir $FAIRSEQ/example/simultaneous_translation \
+    --mass-preservation \
+    --criterion latency_augmented_label_smoothed_cross_entropy \
+    --latency-weight-avg  0.1 \
+    --max-update 50000 \
+    --arch transformer_monotonic_iwslt_de_en save_dir_key=lambda \
+    --optimizer adam --adam-betas '(0.9, 0.98)' \
+    --lr-scheduler 'inverse_sqrt' \
+    --warmup-init-lr 1e-7  --warmup-updates 4000 \
+    --lr 5e-4 --stop-min-lr 1e-9 --clip-norm 0.0 --weight-decay 0.0001\
+    --dropout 0.3 \
+    --label-smoothing 0.1\
+    --max-tokens 3584
+```
+
+- MMA-H
+
+```shell
+fairseq-train \
+    data-bin/wmt15_en_de_32k \
+    --simul-type hard_aligned \
+    --user-dir $FAIRSEQ/example/simultaneous_translation \
+    --mass-preservation \
+    --criterion latency_augmented_label_smoothed_cross_entropy \
+    --latency-weight-var  0.1 \
+    --max-update 50000 \
+    --arch transformer_monotonic_iwslt_de_en save_dir_key=lambda \
+    --optimizer adam --adam-betas '(0.9, 0.98)' \
+    --lr-scheduler 'inverse_sqrt' \
+    --warmup-init-lr 1e-7  --warmup-updates 4000 \
+    --lr 5e-4 --stop-min-lr 1e-9 --clip-norm 0.0 --weight-decay 0.0001\
+    --dropout 0.3 \
+    --label-smoothing 0.1\
+    --max-tokens 3584
+```
+
+- wait-k
+
+```shell
+fairseq-train \
+    data-bin/wmt15_en_de_32k \
+    --simul-type wait-k \
+    --waitk-lagging 3 \
+    --user-dir $FAIRSEQ/example/simultaneous_translation \
+    --mass-preservation \
+    --criterion latency_augmented_label_smoothed_cross_entropy \
+    --max-update 50000 \
+    --arch transformer_monotonic_iwslt_de_en save_dir_key=lambda \
+    --optimizer adam --adam-betas '(0.9, 0.98)' \
+    --lr-scheduler 'inverse_sqrt' \
+    --warmup-init-lr 1e-7  --warmup-updates 4000 \
+    --lr 5e-4 --stop-min-lr 1e-9 --clip-norm 0.0 --weight-decay 0.0001\
+    --dropout 0.3 \
+    --label-smoothing 0.1\
+    --max-tokens 3584
+```
diff --git a/fairseq/examples/simultaneous_translation/docs/enja-waitk.md b/fairseq/examples/simultaneous_translation/docs/enja-waitk.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb9d82576f80b4405564a99774fc98ac2fe6ad3b
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/docs/enja-waitk.md
@@ -0,0 +1,106 @@
+# An example of English to Japaneses Simultaneous Translation System
+
+This is an example of training and evaluating a transformer *wait-k* English to Japanese simultaneous text-to-text translation model.
+
+## Data Preparation
+This section introduces the data preparation for training and evaluation.
+If you only want to evaluate the model, please jump to [Inference & Evaluation](#inference-&-evaluation)
+
+For illustration, we only use the following subsets of the available data from [WMT20 news translation task](http://www.statmt.org/wmt20/translation-task.html), which results in 7,815,391 sentence pairs.
+- News Commentary v16
+- Wiki Titles v3
+- WikiMatrix V1
+- Japanese-English Subtitle Corpus
+- The Kyoto Free Translation Task Corpus
+
+We use WMT20 development data as development set. Training `transformer_vaswani_wmt_en_de_big` model on such amount of data will result in 17.3 BLEU with greedy search and 19.7 with beam (10) search. Notice that a better performance can be achieved with the full WMT training data.
+
+We use [sentencepiece](https://github.com/google/sentencepiece) toolkit to tokenize the data with a vocabulary size of 32000.
+Additionally, we filtered out the sentences longer than 200 words after tokenization.
+Assuming the tokenized text data is saved at `${DATA_DIR}`,
+we prepare the data binary with the following command.
+
+```bash
+fairseq-preprocess \
+    --source-lang en --target-lang ja \
+    --trainpref ${DATA_DIR}/train \
+    --validpref ${DATA_DIR}/dev \
+    --testpref ${DATA_DIR}/test \
+    --destdir ${WMT20_ENJA_DATA_BIN} \
+    --nwordstgt 32000 --nwordssrc 32000 \
+    --workers 20
+```
+
+## Simultaneous Translation Model Training
+To train a wait-k `(k=10)` model.
+```bash
+fairseq-train ${WMT20_ENJA_DATA_BIN}  \
+    --save-dir ${SAVEDIR}
+    --simul-type waitk  \
+    --waitk-lagging 10  \
+    --max-epoch 70  \
+    --arch transformer_monotonic_vaswani_wmt_en_de_big \
+    --optimizer adam  \
+    --adam-betas '(0.9, 0.98)'  \
+    --lr-scheduler inverse_sqrt  \
+    --warmup-init-lr 1e-07  \
+    --warmup-updates 4000  \
+    --lr 0.0005  \
+    --stop-min-lr 1e-09  \
+    --clip-norm 10.0  \
+    --dropout 0.3  \
+    --weight-decay 0.0  \
+    --criterion label_smoothed_cross_entropy  \
+    --label-smoothing 0.1  \
+    --max-tokens 3584
+```
+This command is for training on 8 GPUs. Equivalently, the model can be trained on one GPU with `--update-freq 8`.
+
+## Inference & Evaluation
+First of all, install [SimulEval](https://github.com/facebookresearch/SimulEval) for evaluation.
+
+```bash
+git clone https://github.com/facebookresearch/SimulEval.git
+cd SimulEval
+pip install -e .
+```
+
+The following command is for the evaluation.
+Assuming the source and reference files are `${SRC_FILE}` and `${REF_FILE}`, the sentencepiece model file for English is saved at `${SRC_SPM_PATH}`
+
+
+```bash
+simuleval \
+    --source ${SRC_FILE} \
+    --target ${TGT_FILE} \
+    --data-bin ${WMT20_ENJA_DATA_BIN} \
+    --sacrebleu-tokenizer ja-mecab \
+    --eval-latency-unit char \
+    --no-space \
+    --src-splitter-type sentencepiecemodel \
+    --src-splitter-path ${SRC_SPM_PATH} \
+    --agent ${FAIRSEQ}/examples/simultaneous_translation/agents/simul_trans_text_agent_enja.py \
+    --model-path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \
+    --output ${OUTPUT} \
+    --scores
+```
+
+The `--data-bin` should be the same in previous sections if you prepare the data from the scratch.
+If only for evaluation, a prepared data directory can be found [here](https://dl.fbaipublicfiles.com/simultaneous_translation/wmt20_enja_medium_databin.tgz) and a pretrained checkpoint (wait-k=10 model) can be downloaded from [here](https://dl.fbaipublicfiles.com/simultaneous_translation/wmt20_enja_medium_wait10_ckpt.pt).
+
+The output should look like this:
+```bash
+{
+    "Quality": {
+        "BLEU": 11.442253287568398
+    },
+    "Latency": {
+        "AL": 8.6587861866951,
+        "AP": 0.7863304776251316,
+        "DAL": 9.477850951194764
+    }
+}
+```
+The latency is evaluated by characters (`--eval-latency-unit`) on the target side. The latency is evaluated with `sacrebleu` with `MeCab` tokenizer `--sacrebleu-tokenizer ja-mecab`. `--no-space` indicates that do not add space when merging the predicted words.
+
+If `--output ${OUTPUT}` option is used, the detailed log and scores will be stored under the `${OUTPUT}` directory.
diff --git a/fairseq/examples/simultaneous_translation/eval/agents/simul_t2t_enja.py b/fairseq/examples/simultaneous_translation/eval/agents/simul_t2t_enja.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f3c8703ca37398b9d389ce5181bdfac2333cdf2
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/eval/agents/simul_t2t_enja.py
@@ -0,0 +1,226 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+
+from fairseq import checkpoint_utils, tasks
+import sentencepiece as spm
+import torch
+
+try:
+    from simuleval import READ_ACTION, WRITE_ACTION, DEFAULT_EOS
+    from simuleval.agents import TextAgent
+except ImportError:
+    print("Please install simuleval 'pip install simuleval'")
+
+
+BOS_PREFIX = "\u2581"
+
+
+class SimulTransTextAgentJA(TextAgent):
+    """
+    Simultaneous Translation
+    Text agent for Japanese
+    """
+    def __init__(self, args):
+
+        # Whether use gpu
+        self.gpu = getattr(args, "gpu", False)
+
+        # Max len
+        self.max_len = args.max_len
+
+        # Load Model
+        self.load_model_vocab(args)
+
+        # build word splitter
+        self.build_word_splitter(args)
+
+        self.eos = DEFAULT_EOS
+
+    def initialize_states(self, states):
+        states.incremental_states = dict()
+        states.incremental_states["online"] = dict()
+
+    def to_device(self, tensor):
+        if self.gpu:
+            return tensor.cuda()
+        else:
+            return tensor.cpu()
+
+    def load_model_vocab(self, args):
+
+        filename = args.model_path
+        if not os.path.exists(filename):
+            raise IOError("Model file not found: {}".format(filename))
+
+        state = checkpoint_utils.load_checkpoint_to_cpu(filename)
+
+        task_args = state["cfg"]["task"]
+        task_args.data = args.data_bin
+
+        task = tasks.setup_task(task_args)
+
+        # build model for ensemble
+        state["cfg"]["model"].load_pretrained_encoder_from = None
+        state["cfg"]["model"].load_pretrained_decoder_from = None
+
+        self.model = task.build_model(state["cfg"]["model"])
+        self.model.load_state_dict(state["model"], strict=True)
+        self.model.eval()
+        self.model.share_memory()
+
+        if self.gpu:
+            self.model.cuda()
+
+        # Set dictionary
+        self.dict = {}
+        self.dict["tgt"] = task.target_dictionary
+        self.dict["src"] = task.source_dictionary
+
+    @staticmethod
+    def add_args(parser):
+        # fmt: off
+        parser.add_argument('--model-path', type=str, required=True,
+                            help='path to your pretrained model.')
+        parser.add_argument("--data-bin", type=str, required=True,
+                            help="Path of data binary")
+        parser.add_argument("--max-len", type=int, default=100,
+                            help="Max length of translation")
+        parser.add_argument("--tgt-splitter-type", type=str, default="SentencePiece",
+                            help="Subword splitter type for target text.")
+        parser.add_argument("--tgt-splitter-path", type=str, default=None,
+                            help="Subword splitter model path for target text.")
+        parser.add_argument("--src-splitter-type", type=str, default="SentencePiece",
+                            help="Subword splitter type for source text.")
+        parser.add_argument("--src-splitter-path", type=str, default=None,
+                            help="Subword splitter model path for source text.")
+        # fmt: on
+        return parser
+
+    def build_word_splitter(self, args):
+        self.spm = {}
+        for lang in ['src', 'tgt']:
+            if getattr(args, f'{lang}_splitter_type', None):
+                path = getattr(args, f'{lang}_splitter_path', None)
+                if path:
+                    self.spm[lang] = spm.SentencePieceProcessor()
+                    self.spm[lang].Load(path)
+
+    def segment_to_units(self, segment, states):
+        # Split a full word (segment) into subwords (units)
+        return self.spm['src'].EncodeAsPieces(segment)
+
+    def update_model_encoder(self, states):
+        if len(states.units.source) == 0:
+            return
+
+        src_indices = [
+            self.dict['src'].index(x)
+            for x in states.units.source.value
+        ]
+
+        if states.finish_read():
+            # Append the eos index when the prediction is over
+            src_indices += [self.dict["tgt"].eos_index]
+
+        src_indices = self.to_device(
+            torch.LongTensor(src_indices).unsqueeze(0)
+        )
+        src_lengths = self.to_device(
+            torch.LongTensor([src_indices.size(1)])
+        )
+
+        states.encoder_states = self.model.encoder(src_indices, src_lengths)
+
+        torch.cuda.empty_cache()
+
+    def update_states_read(self, states):
+        # Happens after a read action.
+        self.update_model_encoder(states)
+
+    def units_to_segment(self, units, states):
+        # Merge sub words (units) to full word (segment).
+        # For Japanese, we can directly send
+        # the untokenized token to server except the BOS token
+        # with following option
+        # --sacrebleu-tokenizer MeCab
+        # --eval-latency-unit char
+        # --no-space
+        token = units.value.pop()
+
+        if (
+            token == self.dict["tgt"].eos_word
+            or len(states.segments.target) > self.max_len
+        ):
+            return DEFAULT_EOS
+
+        if BOS_PREFIX == token:
+            return None
+        if token[0] == BOS_PREFIX:
+            return token[1:]
+        else:
+            return token
+
+    def policy(self, states):
+
+        if not getattr(states, "encoder_states", None):
+            # No encoder states, read a token first
+            return READ_ACTION
+
+        # encode previous predicted target tokens
+        tgt_indices = self.to_device(
+            torch.LongTensor(
+                [self.model.decoder.dictionary.eos()]
+                + [
+                    self.dict['tgt'].index(x)
+                    for x in states.units.target.value
+                    if x is not None
+                ]
+            ).unsqueeze(0)
+        )
+
+        # Current steps
+        states.incremental_states["steps"] = {
+            "src": states.encoder_states["encoder_out"][0].size(0),
+            "tgt": 1 + len(states.units.target),
+        }
+
+        # Online only means the reading is not finished
+        states.incremental_states["online"]["only"] = (
+            torch.BoolTensor([not states.finish_read()])
+        )
+
+        x, outputs = self.model.decoder.forward(
+            prev_output_tokens=tgt_indices,
+            encoder_out=states.encoder_states,
+            incremental_state=states.incremental_states,
+        )
+
+        states.decoder_out = x
+
+        torch.cuda.empty_cache()
+
+        if outputs.action == 0:
+            return READ_ACTION
+        else:
+            return WRITE_ACTION
+
+    def predict(self, states):
+        # Predict target token from decoder states
+        decoder_states = states.decoder_out
+
+        lprobs = self.model.get_normalized_probs(
+            [decoder_states[:, -1:]], log_probs=True
+        )
+
+        index = lprobs.argmax(dim=-1)[0, 0].item()
+
+        if index != self.dict['tgt'].eos_index:
+            token = self.dict['tgt'].string([index])
+        else:
+            token = self.dict['tgt'].eos_word
+
+        return token
diff --git a/fairseq/examples/simultaneous_translation/models/__init__.py b/fairseq/examples/simultaneous_translation/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..257a96593ff7af93c206c066d8db4ad795b2ae36
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/models/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        model_name = file[: file.find(".py")]
+        importlib.import_module(
+            "examples.simultaneous_translation.models." + model_name
+        )
diff --git a/fairseq/examples/simultaneous_translation/models/convtransformer_simul_trans.py b/fairseq/examples/simultaneous_translation/models/convtransformer_simul_trans.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a26422f650cf13ee7d4e8d2228b50ec49876fb8
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/models/convtransformer_simul_trans.py
@@ -0,0 +1,204 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+from fairseq import checkpoint_utils
+from fairseq.models import (
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.speech_to_text import (
+    ConvTransformerModel,
+    convtransformer_espnet,
+    ConvTransformerEncoder,
+)
+from fairseq.models.speech_to_text.modules.augmented_memory_attention import (
+    augmented_memory,
+    SequenceEncoder,
+    AugmentedMemoryConvTransformerEncoder,
+)
+
+from torch import nn, Tensor
+from typing import Dict, List
+from fairseq.models.speech_to_text.modules.emformer import NoSegAugmentedMemoryTransformerEncoderLayer
+
+@register_model("convtransformer_simul_trans")
+class SimulConvTransformerModel(ConvTransformerModel):
+    """
+    Implementation of the paper:
+
+    SimulMT to SimulST: Adapting Simultaneous Text Translation to
+    End-to-End Simultaneous Speech Translation
+
+    https://www.aclweb.org/anthology/2020.aacl-main.58.pdf
+    """
+
+    @staticmethod
+    def add_args(parser):
+        super(SimulConvTransformerModel, SimulConvTransformerModel).add_args(parser)
+        parser.add_argument(
+            "--train-monotonic-only",
+            action="store_true",
+            default=False,
+            help="Only train monotonic attention",
+        )
+
+    @classmethod
+    def build_decoder(cls, args, task, embed_tokens):
+        tgt_dict = task.tgt_dict
+
+        from examples.simultaneous_translation.models.transformer_monotonic_attention import (
+            TransformerMonotonicDecoder,
+        )
+
+        decoder = TransformerMonotonicDecoder(args, tgt_dict, embed_tokens)
+
+        if getattr(args, "load_pretrained_decoder_from", None):
+            decoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=decoder, checkpoint=args.load_pretrained_decoder_from
+            )
+        return decoder
+
+
+@register_model_architecture(
+    "convtransformer_simul_trans", "convtransformer_simul_trans_espnet"
+)
+def convtransformer_simul_trans_espnet(args):
+    convtransformer_espnet(args)
+
+
+@register_model("convtransformer_augmented_memory")
+@augmented_memory
+class AugmentedMemoryConvTransformerModel(SimulConvTransformerModel):
+    @classmethod
+    def build_encoder(cls, args):
+        encoder = SequenceEncoder(args, AugmentedMemoryConvTransformerEncoder(args))
+
+        if getattr(args, "load_pretrained_encoder_from", None) is not None:
+            encoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=encoder, checkpoint=args.load_pretrained_encoder_from
+            )
+
+        return encoder
+
+
+@register_model_architecture(
+    "convtransformer_augmented_memory", "convtransformer_augmented_memory"
+)
+def augmented_memory_convtransformer_espnet(args):
+    convtransformer_espnet(args)
+
+
+# ============================================================================ #
+#   Convtransformer
+#   with monotonic attention decoder
+#   with emformer encoder
+# ============================================================================ #
+
+
+class ConvTransformerEmformerEncoder(ConvTransformerEncoder):
+    def __init__(self, args):
+        super().__init__(args)
+        stride = self.conv_layer_stride(args)
+        trf_left_context = args.segment_left_context // stride
+        trf_right_context = args.segment_right_context // stride
+        context_config = [trf_left_context, trf_right_context]
+        self.transformer_layers = nn.ModuleList(
+            [
+                NoSegAugmentedMemoryTransformerEncoderLayer(
+                    input_dim=args.encoder_embed_dim,
+                    num_heads=args.encoder_attention_heads,
+                    ffn_dim=args.encoder_ffn_embed_dim,
+                    num_layers=args.encoder_layers,
+                    dropout_in_attn=args.dropout,
+                    dropout_on_attn=args.dropout,
+                    dropout_on_fc1=args.dropout,
+                    dropout_on_fc2=args.dropout,
+                    activation_fn=args.activation_fn,
+                    context_config=context_config,
+                    segment_size=args.segment_length,
+                    max_memory_size=args.max_memory_size,
+                    scaled_init=True,  # TODO: use constant for now.
+                    tanh_on_mem=args.amtrf_tanh_on_mem,
+                )
+            ]
+        )
+        self.conv_transformer_encoder = ConvTransformerEncoder(args)
+
+    def forward(self, src_tokens, src_lengths):
+        encoder_out: Dict[str, List[Tensor]] = self.conv_transformer_encoder(src_tokens, src_lengths.to(src_tokens.device))
+        output = encoder_out["encoder_out"][0]
+        encoder_padding_masks = encoder_out["encoder_padding_mask"]
+
+        return {
+            "encoder_out": [output],
+            # This is because that in the original implementation
+            # the output didn't consider the last segment as right context.
+            "encoder_padding_mask": [encoder_padding_masks[0][:, : output.size(0)]] if len(encoder_padding_masks) > 0
+            else [],
+            "encoder_embedding": [],
+            "encoder_states": [],
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+    @staticmethod
+    def conv_layer_stride(args):
+        # TODO: make it configurable from the args
+        return 4
+
+
+@register_model("convtransformer_emformer")
+class ConvtransformerEmformer(SimulConvTransformerModel):
+    @staticmethod
+    def add_args(parser):
+        super(ConvtransformerEmformer, ConvtransformerEmformer).add_args(parser)
+
+        parser.add_argument(
+            "--segment-length",
+            type=int,
+            metavar="N",
+            help="length of each segment (not including left context / right context)",
+        )
+        parser.add_argument(
+            "--segment-left-context",
+            type=int,
+            help="length of left context in a segment",
+        )
+        parser.add_argument(
+            "--segment-right-context",
+            type=int,
+            help="length of right context in a segment",
+        )
+        parser.add_argument(
+            "--max-memory-size",
+            type=int,
+            default=-1,
+            help="Right context for the segment.",
+        )
+        parser.add_argument(
+            "--amtrf-tanh-on-mem",
+            default=False,
+            action="store_true",
+            help="whether to use tanh on memory vector",
+        )
+
+    @classmethod
+    def build_encoder(cls, args):
+        encoder = ConvTransformerEmformerEncoder(args)
+        if getattr(args, "load_pretrained_encoder_from", None):
+            encoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=encoder, checkpoint=args.load_pretrained_encoder_from
+            )
+        return encoder
+
+
+@register_model_architecture(
+    "convtransformer_emformer",
+    "convtransformer_emformer",
+)
+def convtransformer_emformer_base(args):
+    convtransformer_espnet(args)
diff --git a/fairseq/examples/simultaneous_translation/models/transformer_monotonic_attention.py b/fairseq/examples/simultaneous_translation/models/transformer_monotonic_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b9414b0eb3b30c935478cd5b8a894168bd8cc98
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/models/transformer_monotonic_attention.py
@@ -0,0 +1,302 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, NamedTuple, Optional
+
+import torch
+import torch.nn as nn
+from examples.simultaneous_translation.modules.monotonic_transformer_layer import (
+    TransformerMonotonicDecoderLayer,
+    TransformerMonotonicEncoderLayer,
+)
+from fairseq.models import (
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import (
+    TransformerModel,
+    TransformerEncoder,
+    TransformerDecoder,
+    base_architecture,
+    transformer_iwslt_de_en,
+    transformer_vaswani_wmt_en_de_big,
+    tiny_architecture
+)
+from torch import Tensor
+
+DEFAULT_MAX_SOURCE_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+READ_ACTION = 0
+WRITE_ACTION = 1
+
+TransformerMonotonicDecoderOut = NamedTuple(
+    "TransformerMonotonicDecoderOut",
+    [
+        ("action", int),
+        ("p_choose", Optional[Tensor]),
+        ("attn_list", Optional[List[Optional[Dict[str, Tensor]]]]),
+        ("encoder_out", Optional[Dict[str, List[Tensor]]]),
+        ("encoder_padding_mask", Optional[Tensor]),
+    ],
+)
+
+
+@register_model("transformer_unidirectional")
+class TransformerUnidirectionalModel(TransformerModel):
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return TransformerMonotonicEncoder(args, src_dict, embed_tokens)
+
+
+@register_model("transformer_monotonic")
+class TransformerModelSimulTrans(TransformerModel):
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return TransformerMonotonicEncoder(args, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return TransformerMonotonicDecoder(args, tgt_dict, embed_tokens)
+
+
+class TransformerMonotonicEncoder(TransformerEncoder):
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(args, dictionary, embed_tokens)
+
+        self.dictionary = dictionary
+        self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                TransformerMonotonicEncoderLayer(args)
+                for i in range(args.encoder_layers)
+            ]
+        )
+
+
+class TransformerMonotonicDecoder(TransformerDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
+        super().__init__(args, dictionary, embed_tokens, no_encoder_attn=False)
+
+        self.dictionary = dictionary
+        self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                TransformerMonotonicDecoderLayer(args)
+                for _ in range(args.decoder_layers)
+            ]
+        )
+        self.policy_criterion = getattr(args, "policy_criterion", "any")
+        self.num_updates = None
+
+    def set_num_updates(self, num_updates):
+        self.num_updates = num_updates
+
+    def pre_attention(
+        self,
+        prev_output_tokens,
+        encoder_out_dict: Dict[str, List[Tensor]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+    ):
+        positions = (
+            self.embed_positions(
+                prev_output_tokens,
+                incremental_state=incremental_state,
+            )
+            if self.embed_positions is not None
+            else None
+        )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        encoder_out = encoder_out_dict["encoder_out"][0]
+
+        if "encoder_padding_mask" in encoder_out_dict:
+            encoder_padding_mask = (
+                encoder_out_dict["encoder_padding_mask"][0]
+                if encoder_out_dict["encoder_padding_mask"]
+                and len(encoder_out_dict["encoder_padding_mask"]) > 0
+                else None
+            )
+        else:
+            encoder_padding_mask = None
+
+        return x, encoder_out, encoder_padding_mask
+
+    def post_attention(self, x):
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        return x
+
+    def clean_cache(
+        self,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
+        end_id: Optional[int] = None,
+    ):
+        """
+        Clean cache in the monotonic layers.
+        The cache is generated because of a forward pass of decoder has run but no prediction,
+        so that the self attention key value in decoder is written in the incremental state.
+        end_id is the last idx of the layers
+        """
+        if end_id is None:
+            end_id = len(self.layers)
+
+        for index, layer in enumerate(self.layers):
+            if index < end_id:
+                layer.prune_incremental_state(incremental_state)
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,  # unused
+        alignment_layer: Optional[int] = None,  # unused
+        alignment_heads: Optional[int] = None,  # unsed
+    ):
+        """
+        Similar to *forward* but only return features.
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        # incremental_state = None
+        assert encoder_out is not None
+        (x, encoder_outs, encoder_padding_mask) = self.pre_attention(
+            prev_output_tokens, encoder_out, incremental_state
+        )
+        attn = None
+        inner_states = [x]
+        attn_list: List[Optional[Dict[str, Tensor]]] = []
+
+        p_choose = torch.tensor([1.0])
+
+        for i, layer in enumerate(self.layers):
+
+            x, attn, _ = layer(
+                x=x,
+                encoder_out=encoder_outs,
+                encoder_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                self_attn_mask=self.buffered_future_mask(x)
+                if incremental_state is None
+                else None,
+            )
+
+            inner_states.append(x)
+            attn_list.append(attn)
+
+            if incremental_state is not None:
+                if_online = incremental_state["online"]["only"]
+                assert if_online is not None
+                if if_online.to(torch.bool):
+                    # Online indicates that the encoder states are still changing
+                    assert attn is not None
+                    if self.policy_criterion == "any":
+                        # Any head decide to read than read
+                        head_read = layer.encoder_attn._get_monotonic_buffer(incremental_state)["head_read"]
+                        assert head_read is not None
+                        if head_read.any():
+                            # We need to prune the last self_attn saved_state
+                            # if model decide not to read
+                            # otherwise there will be duplicated saved_state
+                            self.clean_cache(incremental_state, i + 1)
+
+                            return x, TransformerMonotonicDecoderOut(
+                                action=0,
+                                p_choose=p_choose,
+                                attn_list=None,
+                                encoder_out=None,
+                                encoder_padding_mask=None,
+                            )
+
+        x = self.post_attention(x)
+
+        return x, TransformerMonotonicDecoderOut(
+            action=1,
+            p_choose=p_choose,
+            attn_list=attn_list,
+            encoder_out=encoder_out,
+            encoder_padding_mask=encoder_padding_mask,
+        )
+
+
+@register_model_architecture("transformer_monotonic", "transformer_monotonic")
+def base_monotonic_architecture(args):
+    base_architecture(args)
+    args.encoder_unidirectional = getattr(args, "encoder_unidirectional", False)
+
+
+@register_model_architecture(
+    "transformer_monotonic", "transformer_monotonic_iwslt_de_en"
+)
+def transformer_monotonic_iwslt_de_en(args):
+    transformer_iwslt_de_en(args)
+    base_monotonic_architecture(args)
+
+
+# parameters used in the "Attention Is All You Need" paper (Vaswani et al., 2017)
+@register_model_architecture(
+    "transformer_monotonic", "transformer_monotonic_vaswani_wmt_en_de_big"
+)
+def transformer_monotonic_vaswani_wmt_en_de_big(args):
+    transformer_vaswani_wmt_en_de_big(args)
+
+
+@register_model_architecture(
+    "transformer_monotonic", "transformer_monotonic_vaswani_wmt_en_fr_big"
+)
+def transformer_monotonic_vaswani_wmt_en_fr_big(args):
+    transformer_monotonic_vaswani_wmt_en_fr_big(args)
+
+
+@register_model_architecture(
+    "transformer_unidirectional", "transformer_unidirectional_iwslt_de_en"
+)
+def transformer_unidirectional_iwslt_de_en(args):
+    transformer_iwslt_de_en(args)
+
+
+@register_model_architecture("transformer_monotonic", "transformer_monotonic_tiny")
+def monotonic_tiny_architecture(args):
+    tiny_architecture(args)
+    base_monotonic_architecture(args)
diff --git a/fairseq/examples/simultaneous_translation/modules/__init__.py b/fairseq/examples/simultaneous_translation/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5ea180f9b4cdb27cd553439b6df9d743105f18c
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/modules/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+import importlib
+from fairseq import registry
+
+(
+    build_monotonic_attention,
+    register_monotonic_attention,
+    MONOTONIC_ATTENTION_REGISTRY,
+    _,
+) = registry.setup_registry("--simul-type")
+
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        model_name = file[: file.find(".py")]
+        importlib.import_module(
+            "examples.simultaneous_translation.modules." + model_name
+        )
diff --git a/fairseq/examples/simultaneous_translation/modules/fixed_pre_decision.py b/fairseq/examples/simultaneous_translation/modules/fixed_pre_decision.py
new file mode 100644
index 0000000000000000000000000000000000000000..3991414aed3800f301e4097e819d3064bb549c37
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/modules/fixed_pre_decision.py
@@ -0,0 +1,190 @@
+from functools import partial
+
+import torch
+from torch import Tensor
+import math
+import torch.nn.functional as F
+
+from . import register_monotonic_attention
+from .monotonic_multihead_attention import (
+    MonotonicAttention,
+    MonotonicInfiniteLookbackAttention,
+    WaitKAttention
+)
+from typing import Dict, Optional
+
+
+def fixed_pooling_monotonic_attention(monotonic_attention):
+    def create_model(monotonic_attention, klass):
+        class FixedStrideMonotonicAttention(monotonic_attention):
+            def __init__(self, args):
+                self.waitk_lagging = 0
+                self.num_heads = 0
+                self.noise_mean = 0.0
+                self.noise_var = 0.0
+                super().__init__(args)
+                self.pre_decision_type = args.fixed_pre_decision_type
+                self.pre_decision_ratio = args.fixed_pre_decision_ratio
+                self.pre_decision_pad_threshold = args.fixed_pre_decision_pad_threshold
+                assert self.pre_decision_ratio > 1
+
+                if args.fixed_pre_decision_type == "average":
+                    self.pooling_layer = torch.nn.AvgPool1d(
+                        kernel_size=self.pre_decision_ratio,
+                        stride=self.pre_decision_ratio,
+                        ceil_mode=True,
+                    )
+                elif args.fixed_pre_decision_type == "last":
+
+                    def last(key):
+                        if key.size(2) < self.pre_decision_ratio:
+                            return key
+                        else:
+                            k = key[
+                                :,
+                                :,
+                                self.pre_decision_ratio - 1:: self.pre_decision_ratio,
+                            ].contiguous()
+                            if key.size(-1) % self.pre_decision_ratio != 0:
+                                k = torch.cat([k, key[:, :, -1:]], dim=-1).contiguous()
+                            return k
+
+                    self.pooling_layer = last
+                else:
+                    raise NotImplementedError
+
+            @staticmethod
+            def add_args(parser):
+                super(
+                    FixedStrideMonotonicAttention, FixedStrideMonotonicAttention
+                ).add_args(parser)
+                parser.add_argument(
+                    "--fixed-pre-decision-ratio",
+                    type=int,
+                    required=True,
+                    help=(
+                        "Ratio for the fixed pre-decision,"
+                        "indicating how many encoder steps will start"
+                        "simultaneous decision making process."
+                    ),
+                )
+                parser.add_argument(
+                    "--fixed-pre-decision-type",
+                    default="average",
+                    choices=["average", "last"],
+                    help="Pooling type",
+                )
+                parser.add_argument(
+                    "--fixed-pre-decision-pad-threshold",
+                    type=float,
+                    default=0.3,
+                    help="If a part of the sequence has pad"
+                    ",the threshold the pooled part is a pad.",
+                )
+
+            def insert_zeros(self, x):
+                bsz_num_heads, tgt_len, src_len = x.size()
+                stride = self.pre_decision_ratio
+                weight = F.pad(torch.ones(1, 1, 1).to(x), (stride - 1, 0))
+                x_upsample = F.conv_transpose1d(
+                    x.view(-1, src_len).unsqueeze(1),
+                    weight,
+                    stride=stride,
+                    padding=0,
+                )
+                return x_upsample.squeeze(1).view(bsz_num_heads, tgt_len, -1)
+
+            def p_choose(
+                self,
+                query: Optional[Tensor],
+                key: Optional[Tensor],
+                key_padding_mask: Optional[Tensor] = None,
+                incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+            ):
+                assert key is not None
+                assert query is not None
+                src_len = key.size(0)
+                tgt_len = query.size(0)
+                batch_size = query.size(1)
+
+                key_pool = self.pooling_layer(key.transpose(0, 2)).transpose(0, 2)
+
+                if key_padding_mask is not None:
+                    key_padding_mask_pool = (
+                        self.pooling_layer(key_padding_mask.unsqueeze(0).float())
+                        .squeeze(0)
+                        .gt(self.pre_decision_pad_threshold)
+                    )
+                    # Make sure at least one element is not pad
+                    key_padding_mask_pool[:, 0] = 0
+                else:
+                    key_padding_mask_pool = None
+
+                if incremental_state is not None:
+                    # The floor instead of ceil is used for inference
+                    # But make sure the length key_pool at least 1
+                    if (
+                        max(1, math.floor(key.size(0) / self.pre_decision_ratio))
+                    ) < key_pool.size(0):
+                        key_pool = key_pool[:-1]
+                        if key_padding_mask_pool is not None:
+                            key_padding_mask_pool = key_padding_mask_pool[:-1]
+
+                p_choose_pooled = self.p_choose_from_qk(
+                    query,
+                    key_pool,
+                    key_padding_mask_pool,
+                    incremental_state=incremental_state,
+                )
+
+                # Upsample, interpolate zeros
+                p_choose = self.insert_zeros(p_choose_pooled)
+
+                if p_choose.size(-1) < src_len:
+                    # Append zeros if the upsampled p_choose is shorter than src_len
+                    p_choose = torch.cat(
+                        [
+                            p_choose,
+                            torch.zeros(
+                                p_choose.size(0),
+                                tgt_len,
+                                src_len - p_choose.size(-1)
+                            ).to(p_choose)
+                        ],
+                        dim=2
+                    )
+                else:
+                    # can be larger than src_len because we used ceil before
+                    p_choose = p_choose[:, :, :src_len]
+                    p_choose[:, :, -1] = p_choose_pooled[:, :, -1]
+
+                assert list(p_choose.size()) == [
+                    batch_size * self.num_heads,
+                    tgt_len,
+                    src_len,
+                ]
+
+                return p_choose
+
+        FixedStrideMonotonicAttention.__name__ = klass.__name__
+        return FixedStrideMonotonicAttention
+
+    return partial(create_model, monotonic_attention)
+
+
+@register_monotonic_attention("waitk_fixed_pre_decision")
+@fixed_pooling_monotonic_attention(WaitKAttention)
+class WaitKAttentionFixedStride:
+    pass
+
+
+@register_monotonic_attention("hard_aligned_fixed_pre_decision")
+@fixed_pooling_monotonic_attention(MonotonicAttention)
+class MonotonicAttentionFixedStride:
+    pass
+
+
+@register_monotonic_attention("infinite_lookback_fixed_pre_decision")
+@fixed_pooling_monotonic_attention(MonotonicInfiniteLookbackAttention)
+class MonotonicInfiniteLookbackAttentionFixedStride:
+    pass
diff --git a/fairseq/examples/simultaneous_translation/modules/monotonic_multihead_attention.py b/fairseq/examples/simultaneous_translation/modules/monotonic_multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..11ef60c9458c6d24e45b20a8eab030c18e6801e5
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/modules/monotonic_multihead_attention.py
@@ -0,0 +1,519 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+from torch import Tensor
+import torch.nn as nn
+
+from examples.simultaneous_translation.utils.p_choose_strategy import (
+    learnable_p_choose,
+    waitk_p_choose
+)
+
+from examples.simultaneous_translation.utils.monotonic_attention import (
+    expected_alignment_from_p_choose,
+    expected_soft_attention,
+    mass_preservation,
+)
+from fairseq.modules import MultiheadAttention
+
+from . import register_monotonic_attention
+from typing import Dict, Optional
+
+
+@register_monotonic_attention("hard_aligned")
+class MonotonicAttention(MultiheadAttention):
+    """
+    Abstract class of monotonic attentions
+    """
+    k_in_proj: Dict[str, nn.Linear]
+    q_in_proj: Dict[str, nn.Linear]
+
+    def __init__(self, args):
+        super().__init__(
+            embed_dim=args.decoder_embed_dim,
+            num_heads=args.decoder_attention_heads,
+            kdim=getattr(args, "encoder_embed_dim", None),
+            vdim=getattr(args, "encoder_embed_dim", None),
+            dropout=args.attention_dropout,
+            encoder_decoder_attention=True,
+        )
+
+        self.soft_attention = False
+
+        self.eps = getattr(args, "attention_eps", True)
+        self.mass_preservation = getattr(args, "mass_preservation", True)
+
+        self.noise_type = args.noise_type
+        self.noise_mean = args.noise_mean
+        self.noise_var = args.noise_var
+
+        self.energy_bias_init = args.energy_bias_init
+        self.energy_bias = (
+            nn.Parameter(self.energy_bias_init * torch.ones([1]))
+            if args.energy_bias is True
+            else 0
+        )
+
+        self.k_in_proj = {"monotonic": self.k_proj}
+        self.q_in_proj = {"monotonic": self.q_proj}
+        self.chunk_size = None
+
+    @staticmethod
+    def add_args(parser):
+        # fmt: off
+        parser.add_argument('--no-mass-preservation', action="store_false",
+                            dest="mass_preservation",
+                            help='Do not stay on the last token when decoding')
+        parser.add_argument('--mass-preservation', action="store_true",
+                            dest="mass_preservation",
+                            help='Stay on the last token when decoding')
+        parser.set_defaults(mass_preservation=True)
+        parser.add_argument('--noise-var', type=float, default=1.0,
+                            help='Variance of discretness noise')
+        parser.add_argument('--noise-mean', type=float, default=0.0,
+                            help='Mean of discretness noise')
+        parser.add_argument('--noise-type', type=str, default="flat",
+                            help='Type of discretness noise')
+        parser.add_argument('--energy-bias', action="store_true",
+                            default=False,
+                            help='Bias for energy')
+        parser.add_argument('--energy-bias-init', type=float, default=-2.0,
+                            help='Initial value of the bias for energy')
+        parser.add_argument('--attention-eps', type=float, default=1e-6,
+                            help='Epsilon when calculating expected attention')
+
+    def energy_from_qk(
+        self,
+        query: Tensor,
+        key: Tensor,
+        energy_type: str,
+        key_padding_mask: Optional[Tensor] = None,
+        bias: int = 0
+    ):
+        """
+        Compute energy from query and key
+        q_func_value is a tuple looks like
+        (q_proj_func, q_tensor)
+        q_tensor size: bsz, tgt_len, emb_dim
+        k_tensor size: bsz, src_len, emb_dim
+        key_padding_mask size: bsz, src_len
+        attn_mask: bsz, src_len
+        """
+
+        length, bsz, _ = query.size()
+        q = self.q_in_proj[energy_type].forward(query)
+        q = (
+            q.contiguous()
+            .view(length, bsz * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+        q = q * self.scaling
+        length, bsz, _ = key.size()
+        k = self.k_in_proj[energy_type].forward(key)
+        k = (
+            k.contiguous()
+            .view(length, bsz * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+
+        energy = torch.bmm(q, k.transpose(1, 2)) + bias
+
+        if key_padding_mask is not None:
+            energy = energy.masked_fill(
+                key_padding_mask.unsqueeze(1).to(torch.bool),
+                - float("inf")
+            )
+
+        return energy
+
+    def p_choose_from_qk(self, query, key, key_padding_mask, incremental_states=None):
+        monotonic_energy = self.energy_from_qk(
+            query,
+            key,
+            "monotonic",
+            key_padding_mask=key_padding_mask,
+            bias=self.energy_bias,
+        )
+
+        p_choose = learnable_p_choose(
+            monotonic_energy,
+            self.noise_mean,
+            self.noise_var,
+            self.training
+        )
+        return p_choose
+
+    def p_choose(self, query, key, key_padding_mask, incremental_states=None):
+        return self.p_choose_from_qk(self, query, key, key_padding_mask)
+
+    def monotonic_attention_process_infer(
+        self,
+        query: Optional[Tensor],
+        key: Optional[Tensor],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
+    ):
+        """
+        Monotonic attention at inference time
+        Notice that this function is designed for simuleval not sequence_generator
+        """
+        assert query is not None
+        assert key is not None
+
+        if query.size(1) != 1:
+            raise RuntimeError(
+                "Simultaneous translation models don't support batch decoding."
+            )
+        # 1. compute stepwise probability
+        p_choose = self.p_choose(
+            query, key, None, incremental_state
+        ).squeeze(1)
+
+        # 2. Compute the alpha
+        src_len = key.size(0)
+        # Maximum steps allows in this iteration
+        max_steps = src_len - 1 if self.mass_preservation else src_len
+        monotonic_cache = self._get_monotonic_buffer(incremental_state)
+        # Step for each head
+        monotonic_step = monotonic_cache.get(
+            'head_step',
+            p_choose.new_zeros(1, self.num_heads).long()
+        )
+        assert monotonic_step is not None
+        finish_read = monotonic_step.eq(max_steps)
+        p_choose_i = torch.tensor(1)
+
+        while finish_read.sum().item() < self.num_heads:
+            # p_choose: self.num_heads, src_len
+            # only choose the p at monotonic steps
+            # p_choose_i: 1, self.num_heads
+            p_choose_i = (
+                p_choose.gather(
+                    1,
+                    monotonic_step
+                    .clamp(0, src_len - 1),
+                )
+            )
+
+            read_one_step = (
+                (p_choose_i < 0.5)
+                .type_as(monotonic_step)
+                .masked_fill(finish_read, 0)
+            )
+            # 1 x bsz
+            # sample actions on unfinished seq
+            # 0 means stay, finish reading
+            # 1 means leave, continue reading
+
+            monotonic_step += read_one_step
+
+            finish_read = monotonic_step.eq(max_steps) | (read_one_step == 0)
+
+        # p_choose at last steps
+        p_choose_i = (
+            p_choose.gather(
+                1,
+                monotonic_step
+                .clamp(0, src_len - 1),
+            )
+        )
+
+        monotonic_cache["head_step"] = monotonic_step
+        # Whether a head is looking for new input
+        monotonic_cache["head_read"] = (
+            monotonic_step.eq(max_steps) & (p_choose_i < 0.5)
+        )
+        self._set_monotonic_buffer(incremental_state, monotonic_cache)
+
+        # 2. Update alpha
+        alpha = (
+            p_choose
+            .new_zeros([self.num_heads, src_len])
+            .scatter(
+                1,
+                (monotonic_step)
+                .view(self.num_heads, 1).clamp(0, src_len - 1),
+                1
+            )
+        )
+
+        if not self.mass_preservation:
+            alpha = alpha.masked_fill(
+                (monotonic_step == max_steps)
+                .view(self.num_heads, 1),
+                0
+            )
+
+        # 4. Compute Beta
+        if self.soft_attention:
+            monotonic_step = monotonic_step.t()
+            beta_mask = torch.arange(src_len).expand_as(alpha).gt(monotonic_step).unsqueeze(1)
+            # If it's soft attention just do softmax on current context
+            soft_energy = self.energy_from_qk(
+                query,
+                key,
+                "soft"
+            )
+            beta = torch.nn.functional.softmax(
+                soft_energy.masked_fill(beta_mask, -float("inf")), dim=-1
+            )
+            # It could happen that a head doesn't move at all
+            beta = beta.masked_fill(monotonic_step.eq(0).unsqueeze(1), 0)
+        else:
+            # If it's hard attention just select the last state
+            beta = alpha
+
+        return p_choose, alpha, beta
+
+    def monotonic_attention_process_train(
+        self,
+        query: Optional[Tensor],
+        key: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+    ):
+        """
+        Calculating monotonic attention process for training
+        Including:
+            stepwise probability: p_choose
+            expected hard alignment: alpha
+            expected soft attention: beta
+        """
+        assert query is not None
+        assert key is not None
+
+        # 1. compute stepwise probability
+        p_choose = self.p_choose_from_qk(query, key, key_padding_mask)
+
+        # 2. compute expected_alignment
+        alpha = expected_alignment_from_p_choose(
+            p_choose,
+            key_padding_mask,
+            eps=self.eps,
+        )
+
+        if self.mass_preservation:
+            alpha = mass_preservation(
+                alpha, key_padding_mask
+            )
+
+        # 3. compute expected soft attention (soft aligned model only)
+        if self.soft_attention:
+            soft_energy = self.energy_from_qk(
+                query,
+                key,
+                "soft",
+                key_padding_mask=None,
+            )
+
+            beta = expected_soft_attention(
+                alpha,
+                soft_energy,
+                padding_mask=key_padding_mask,
+                chunk_size=self.chunk_size,
+                eps=self.eps,
+            )
+        else:
+            beta = alpha
+            soft_energy = alpha
+
+        return p_choose, alpha, beta, soft_energy
+
+    def forward(
+        self,
+        query: Optional[Tensor],
+        key: Optional[Tensor],
+        value: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+        attn_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        need_weights: bool = True, static_kv: bool = False, need_head_weights: bool = False,
+    ):
+        """
+        query: tgt_len, bsz, embed_dim
+        key: src_len, bsz, embed_dim
+        value: src_len, bsz, embed_dim
+        """
+
+        assert attn_mask is None
+        assert query is not None
+        assert key is not None
+        assert value is not None
+
+        tgt_len, bsz, embed_dim = query.size()
+        src_len = value.size(0)
+
+        if key_padding_mask is not None:
+            assert not key_padding_mask[:, 0].any(), (
+                "Only right padding is supported."
+            )
+            key_padding_mask = (
+                key_padding_mask
+                .unsqueeze(1)
+                .expand([bsz, self.num_heads, src_len])
+                .contiguous()
+                .view(-1, src_len)
+            )
+
+        if incremental_state is not None:
+            # Inference
+            (
+                p_choose, alpha, beta
+            ) = self.monotonic_attention_process_infer(
+                query, key, incremental_state
+            )
+            soft_energy = beta
+        else:
+            # Train
+            (
+                p_choose, alpha, beta, soft_energy
+            ) = self.monotonic_attention_process_train(
+                query, key, key_padding_mask
+            )
+
+        v = self.v_proj(value)
+        length, bsz, _ = v.size()
+        v = (
+            v.contiguous()
+            .view(length, bsz * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+
+        attn = torch.bmm(beta.type_as(v), v)
+
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+
+        attn = self.out_proj(attn)
+
+        p_choose = p_choose.view(bsz, self.num_heads, tgt_len, src_len)
+        alpha = alpha.view(bsz, self.num_heads, tgt_len, src_len)
+        beta = beta.view(bsz, self.num_heads, tgt_len, src_len)
+
+        return attn, {
+            "p_choose": p_choose,
+            "alpha": alpha,
+            "beta": beta,
+        }
+
+    def _get_monotonic_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]):
+        maybe_incremental_state = self.get_incremental_state(
+            incremental_state,
+            'monotonic',
+        )
+        if maybe_incremental_state is None:
+            typed_empty_dict: Dict[str, Optional[Tensor]] = {}
+            return typed_empty_dict
+        else:
+            return maybe_incremental_state
+
+    def _set_monotonic_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], buffer: Dict[str, Optional[Tensor]]):
+        self.set_incremental_state(
+            incremental_state,
+            'monotonic',
+            buffer,
+        )
+
+
+@register_monotonic_attention("infinite_lookback")
+class MonotonicInfiniteLookbackAttention(
+    MonotonicAttention
+):
+    def __init__(self, args):
+        super().__init__(args)
+        self.soft_attention = True
+        self.init_soft_attention()
+
+    def init_soft_attention(self):
+        self.k_proj_soft = nn.Linear(self.kdim, self.embed_dim, bias=True)
+        self.q_proj_soft = nn.Linear(self.embed_dim, self.embed_dim, bias=True)
+        self.k_in_proj["soft"] = self.k_proj_soft
+        self.q_in_proj["soft"] = self.q_proj_soft
+
+        if self.qkv_same_dim:
+            # Empirically observed the convergence to be much better with
+            # the scaled initialization
+            nn.init.xavier_uniform_(
+                self.k_in_proj["soft"].weight, gain=1 / math.sqrt(2)
+            )
+            nn.init.xavier_uniform_(
+                self.q_in_proj["soft"].weight, gain=1 / math.sqrt(2)
+            )
+        else:
+            nn.init.xavier_uniform_(self.k_in_proj["soft"].weight)
+            nn.init.xavier_uniform_(self.q_in_proj["soft"].weight)
+
+
+@register_monotonic_attention("waitk")
+class WaitKAttention(
+    MonotonicInfiniteLookbackAttention
+):
+    """
+    STACL: Simultaneous Translation with Implicit Anticipation and
+    Controllable Latency using Prefix-to-Prefix Framework
+    https://www.aclweb.org/anthology/P19-1289/
+    """
+    def __init__(self, args):
+        super().__init__(args)
+        self.q_in_proj["soft"] = self.q_in_proj["monotonic"]
+        self.k_in_proj["soft"] = self.k_in_proj["monotonic"]
+
+        self.waitk_lagging = args.waitk_lagging
+        assert self.waitk_lagging > 0, (
+            f"Lagging has to been larger than 0, get {self.waitk_lagging}."
+        )
+
+    @staticmethod
+    def add_args(parser):
+        super(
+            MonotonicInfiniteLookbackAttention,
+            MonotonicInfiniteLookbackAttention
+        ).add_args(parser)
+
+        parser.add_argument(
+            "--waitk-lagging", type=int, required=True, help="Wait K lagging"
+        )
+
+    def p_choose_from_qk(
+        self,
+        query: Optional[Tensor],
+        key: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+    ):
+        assert query is not None
+        assert key is not None
+
+        p_choose = waitk_p_choose(
+            tgt_len=query.size(0),
+            src_len=key.size(0),
+            bsz=query.size(1) * self.num_heads,
+            waitk_lagging=self.waitk_lagging,
+            key_padding_mask=key_padding_mask,
+            incremental_state=incremental_state,
+        )
+
+        return p_choose.to(query)
+
+
+@register_monotonic_attention("chunkwise")
+class ChunkwiseAttention(
+    MonotonicInfiniteLookbackAttention
+):
+    def __init__(self, args):
+        super().__init__(args)
+        self.chunk_size = args.mocha_chunk_size
+        assert self.chunk_size > 1
+
+    @staticmethod
+    def add_args(parser):
+        super(
+            MonotonicInfiniteLookbackAttention
+        ).add_args(parser)
+
+        parser.add_argument(
+            "--mocha-chunk-size", type=int,
+            required=True, help="Mocha chunk size"
+        )
diff --git a/fairseq/examples/simultaneous_translation/modules/monotonic_transformer_layer.py b/fairseq/examples/simultaneous_translation/modules/monotonic_transformer_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..94bd71fb9c46a64a8b6e1960f47dfc43b78dda43
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/modules/monotonic_transformer_layer.py
@@ -0,0 +1,182 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer
+
+from . import build_monotonic_attention
+
+from typing import Dict, Optional, List
+
+from torch import Tensor
+import torch
+
+
+class TransformerMonotonicEncoderLayer(TransformerEncoderLayer):
+    def forward(self, x, encoder_padding_mask):
+        seq_len, _, _ = x.size()
+        attn_mask = x.new_ones([seq_len, seq_len]).triu(1)
+        attn_mask = attn_mask.masked_fill(attn_mask.bool(), float("-inf"))
+        return super().forward(x, encoder_padding_mask, attn_mask)
+
+
+class TransformerMonotonicDecoderLayer(TransformerDecoderLayer):
+    def __init__(self, args):
+        super().__init__(args)
+
+        assert args.simul_type is not None, "A --simul-type is needed."
+        self.encoder_attn = build_monotonic_attention(args)
+
+    def prune_incremental_state(
+        self,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
+    ):
+        input_buffer = self.self_attn._get_input_buffer(incremental_state)
+        for key in ["prev_key", "prev_value"]:
+            input_buffer_key = input_buffer[key]
+            assert input_buffer_key is not None
+            if input_buffer_key.size(2) > 1:
+                input_buffer[key] = input_buffer_key[:, :, :-1, :]
+            else:
+                typed_empty_dict: Dict[str, Optional[Tensor]] = {}
+                input_buffer = typed_empty_dict
+                break
+        assert incremental_state is not None
+        self.self_attn._set_input_buffer(incremental_state, input_buffer)
+
+    def forward(
+        self,
+        x,
+        encoder_out: Optional[Tensor] = None,
+        encoder_padding_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        prev_self_attn_state: Optional[List[Tensor]] = None,
+        prev_attn_state: Optional[List[Tensor]] = None,
+        self_attn_mask: Optional[Tensor] = None,
+        self_attn_padding_mask: Optional[Tensor] = None,
+        need_attn: bool = False,
+        need_head_weights: bool = False,
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor, optional): binary
+                ByteTensor of shape `(batch, src_len)` where padding
+                elements are indicated by ``1``.
+            need_attn (bool, optional): return attention weights
+            need_head_weights (bool, optional): return attention weights
+                for each head (default: return average over heads).
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        if need_head_weights:
+            need_attn = True
+
+        residual = x
+        if self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+        if prev_self_attn_state is not None:
+            prev_key, prev_value = prev_self_attn_state[:2]
+            saved_state: Dict[str, Optional[Tensor]] = {
+                "prev_key": prev_key,
+                "prev_value": prev_value,
+            }
+            if len(prev_self_attn_state) >= 3:
+                saved_state["prev_key_padding_mask"] = prev_self_attn_state[2]
+            assert incremental_state is not None
+            self.self_attn._set_input_buffer(incremental_state, saved_state)
+        _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state)
+        if self.cross_self_attention and not (
+            incremental_state is not None
+            and _self_attn_input_buffer is not None
+            and "prev_key" in _self_attn_input_buffer
+        ):
+            if self_attn_mask is not None:
+                assert encoder_out is not None
+                self_attn_mask = torch.cat(
+                    (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1
+                )
+            if self_attn_padding_mask is not None:
+                if encoder_padding_mask is None:
+                    assert encoder_out is not None
+                    encoder_padding_mask = self_attn_padding_mask.new_zeros(
+                        encoder_out.size(1), encoder_out.size(0)
+                    )
+                self_attn_padding_mask = torch.cat(
+                    (encoder_padding_mask, self_attn_padding_mask), dim=1
+                )
+            assert encoder_out is not None
+            y = torch.cat((encoder_out, x), dim=0)
+        else:
+            y = x
+
+        x, attn = self.self_attn(
+            query=x,
+            key=y,
+            value=y,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            need_weights=False,
+            attn_mask=self_attn_mask,
+        )
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        assert self.encoder_attn is not None
+        residual = x
+        if self.normalize_before:
+            x = self.encoder_attn_layer_norm(x)
+        if prev_attn_state is not None:
+            prev_key, prev_value = prev_attn_state[:2]
+            saved_state: Dict[str, Optional[Tensor]] = {
+                "prev_key": prev_key,
+                "prev_value": prev_value,
+            }
+            if len(prev_attn_state) >= 3:
+                saved_state["prev_key_padding_mask"] = prev_attn_state[2]
+            assert incremental_state is not None
+            self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+
+        x, attn = self.encoder_attn(
+            query=x,
+            key=encoder_out,
+            value=encoder_out,
+            key_padding_mask=encoder_padding_mask,
+            incremental_state=incremental_state,
+            static_kv=True,
+            need_weights=need_attn or (not self.training and self.need_attn),
+            need_head_weights=need_head_weights,
+        )
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.encoder_attn_layer_norm(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.final_layer_norm(x)
+
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.final_layer_norm(x)
+        if self.onnx_trace and incremental_state is not None:
+            saved_state = self.self_attn._get_input_buffer(incremental_state)
+            assert saved_state is not None
+            if self_attn_padding_mask is not None:
+                self_attn_state = [
+                    saved_state["prev_key"],
+                    saved_state["prev_value"],
+                    saved_state["prev_key_padding_mask"],
+                ]
+            else:
+                self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]]
+            return x, attn, self_attn_state
+        return x, attn, None
diff --git a/fairseq/examples/simultaneous_translation/tests/test_text_models.py b/fairseq/examples/simultaneous_translation/tests/test_text_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..127adfa6337333ba5ae598fcd158956def0d520f
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/tests/test_text_models.py
@@ -0,0 +1,407 @@
+import argparse
+import unittest
+from typing import Any, Dict
+
+import torch
+from examples.simultaneous_translation.models import (
+    transformer_monotonic_attention
+)
+
+
+from tests.test_roberta import FakeTask
+
+
+DEFAULT_CONFIG = {
+    "attention_eps": 1e-6,
+    "mass_preservation": True,
+    "noise_type": "flat",
+    "noise_mean": 0.0,
+    "noise_var": 1.0,
+    "energy_bias_init": -2,
+    "energy_bias": True
+}
+
+
+PAD_INDEX = 1
+
+
+def generate_config(overrides_kv):
+    new_dict = {key: value for key, value in DEFAULT_CONFIG.items()}
+    for key, value in overrides_kv.items():
+        new_dict[key] = value
+    return new_dict
+
+
+def make_sample_with_padding(longer_src=False) -> Dict[str, Any]:
+    tokens_1 = torch.LongTensor(
+        [
+            [2, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15, 2],
+            [
+                2, 11, 12, 14, 15, 10, 11, 12, 13, 14, 15, 2,
+                PAD_INDEX, PAD_INDEX
+            ],
+        ]
+    )
+    tokens_2 = torch.LongTensor(
+        [
+            [2, 11, 12, 13, 14, 2, PAD_INDEX, PAD_INDEX],
+            [2, 11, 22, 33, 2, PAD_INDEX, PAD_INDEX, PAD_INDEX]
+        ]
+    )
+    if longer_src:
+        src_tokens = tokens_1[:, 1:]
+        prev_output_tokens = tokens_2
+    else:
+        src_tokens = tokens_2[:, 1:8]
+        prev_output_tokens = tokens_1
+
+    src_lengths = src_tokens.ne(PAD_INDEX).sum(dim=1).long()
+
+    sample = {
+        "net_input": {
+            "src_tokens": src_tokens,
+            "prev_output_tokens": prev_output_tokens,
+            "src_lengths": src_lengths,
+        },
+        "target": prev_output_tokens[:, 1:],
+    }
+    return sample
+
+
+def build_transformer_monotonic_attention(**extra_args: Any):
+    overrides = {
+        # Use characteristics dimensions
+        "encoder_embed_dim": 12,
+        "encoder_ffn_embed_dim": 14,
+        "decoder_embed_dim": 12,
+        "decoder_ffn_embed_dim": 14,
+        # Disable dropout so we have comparable tests.
+        "dropout": 0,
+        "attention_dropout": 0,
+        "activation_dropout": 0,
+        "encoder_layerdrop": 0,
+    }
+    overrides.update(extra_args)
+    # Overrides the defaults from the parser
+    args = argparse.Namespace(**overrides)
+    transformer_monotonic_attention.monotonic_tiny_architecture(args)
+
+    torch.manual_seed(0)
+    task = FakeTask(args)
+    return (
+        transformer_monotonic_attention
+        .TransformerModelSimulTrans
+        .build_model(args, task)
+    )
+
+
+def expected_alignment_formula(
+    p_choose,
+    mass_perservation=True,
+    padding_mask=None
+):
+    # Online and Linear-Time Attention by Enforcing Monotonic Alignments
+    # https://arxiv.org/pdf/1704.00784.pdf
+    # Eq 18, 19
+    bsz, tgt_len, src_len = p_choose.size()
+    alpha = torch.zeros_like(p_choose)
+
+    if padding_mask is not None:
+        bsz_pad = padding_mask.size(0)
+        num_heads = int(bsz / bsz_pad)
+        padding_mask = (
+            padding_mask
+            .unsqueeze(1)
+            .expand([bsz_pad, num_heads, src_len])
+            .contiguous()
+            .view(-1, src_len)
+        )
+
+    p_choose = p_choose.masked_fill(padding_mask.unsqueeze(1), 0)
+
+    for bsz_i in range(bsz):
+        for i in range(tgt_len):
+            for j in range(src_len):
+                if i == 0:
+                    if j == 0:
+                        # First source token
+                        alpha[bsz_i, i, j] = p_choose[bsz_i, i, j]
+                    else:
+                        # First target token
+                        alpha[bsz_i, i, j] = (
+                            p_choose[bsz_i, i, j]
+                            * torch.prod(
+                                1 - p_choose[bsz_i, i, :j]
+                            )
+                        )
+                else:
+                    alpha[bsz_i, i, j] = alpha[bsz_i, i - 1, j]
+                    for k in range(j):
+                        alpha[bsz_i, i, j] += (
+                            alpha[bsz_i, i - 1, k]
+                            * torch.prod(
+                                1 - p_choose[bsz_i, i, k:j]
+                            )
+                        )
+                    alpha[bsz_i, i, j] *= p_choose[bsz_i, i, j]
+
+    alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0)
+
+    if mass_perservation:
+        alpha = mass_perservation_formula(alpha, False, padding_mask)
+
+    return alpha
+
+
+def mass_perservation_formula(alpha, left_padding=False, padding_mask=None):
+    if padding_mask is None or alpha.size(-1) == 1:
+        if alpha.size(-1) > 1:
+            alpha[:, :, -1] = 1 - alpha[:, :, :-1].sum(dim=-1)
+        return alpha
+
+    src_lens = (padding_mask.logical_not()).sum(dim=1).long()
+
+    bsz, tgt_len, src_len = alpha.size()
+
+    assert (
+        not left_padding
+        or (left_padding and (not padding_mask[:, 0].any()))
+    )
+
+    alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0)
+
+    for bsz_i in range(bsz):
+        if left_padding:
+            alpha[bsz_i, :, -1] = (
+                1 - alpha[bsz_i, :, :-1].sum(dim=-1)
+            )
+        else:
+            alpha[bsz_i, :, src_lens[bsz_i] - 1] = (
+                1 - alpha[bsz_i, :, :src_lens[bsz_i] - 1].sum(dim=-1)
+            )
+
+    return alpha
+
+
+def expected_soft_attention_formula(
+    alpha,
+    soft_energy,
+    padding_mask=None,
+    chunksize=1e10,
+):
+    # Monotonic Infinite Lookback Attention for Simultaneous Machine Translation
+    # https://arxiv.org/pdf/1906.05218.pdf
+    # Eq 14
+
+    # Monotonic Chunkwise Attention
+    # https://arxiv.org/abs/1712.05382
+    # Eq 17
+    bsz, tgt_len, src_len = alpha.size()
+    beta = torch.zeros_like(alpha)
+
+    if padding_mask is not None:
+        bsz_pad = padding_mask.size(0)
+        num_heads = int(bsz / bsz_pad)
+        # Expanding for potential head dimension
+        padding_mask = (
+            padding_mask
+            .unsqueeze(1)
+            .expand([bsz_pad, num_heads, src_len])
+            .contiguous()
+            .view(-1, src_len)
+        )
+        soft_energy = soft_energy.masked_fill(padding_mask.unsqueeze(1), float('-inf'))
+
+    for bsz_i in range(bsz):
+        for i in range(tgt_len):
+            for j in range(src_len):
+                for k in range(j, min([src_len, j + chunksize])):
+                    if not padding_mask[bsz_i, j]:
+                        beta[bsz_i, i, j] += (
+                            alpha[bsz_i, i, k] * torch.exp(soft_energy[bsz_i, i, j])
+                            / torch.sum(torch.exp(soft_energy[bsz_i, i, max([0, k - chunksize + 1]):k + 1]))
+                        )
+    return beta
+
+
+class MonotonicAttentionTestAbstractClass(object):
+    def test_forward(self):
+        sample = make_sample_with_padding()
+        out, _ = self.model.forward(**sample["net_input"])
+        loss = out.sum()
+        loss.backward()
+
+    def test_p_choose(self):
+        sample = make_sample_with_padding()
+        _, extra_out = self.model.forward(**sample["net_input"])
+        for item in extra_out.attn_list:
+            p_choose = item["p_choose"]
+            self.assertTrue(p_choose.le(1.0).all())
+            self.assertTrue(p_choose.ge(0.0).all())
+
+    def test_expected_alignment(self):
+        for longer_src in [True, False]:
+            sample = make_sample_with_padding(longer_src)
+            _, extra_out = self.model.forward(**sample["net_input"])
+            for item in extra_out.attn_list:
+                p_choose = item["p_choose"]
+                alpha_system = item["alpha"]
+                self.assertTrue(p_choose.size() == alpha_system.size())
+                bsz, num_head, tgt_len, src_len = alpha_system.size()
+                alpha_system = alpha_system.view(-1, tgt_len, src_len)
+                p_choose = p_choose.view(-1, tgt_len, src_len)
+
+                alpha_real = expected_alignment_formula(
+                    p_choose,
+                    self.model.decoder.layers[0].encoder_attn.mass_preservation,
+                    sample["net_input"]["src_tokens"].eq(PAD_INDEX)
+                )
+
+                self.assertTrue(
+                    torch.abs(alpha_system - alpha_real).le(5e-5).all(),
+                )
+
+
+class HardMonotonicAttentionTestCase(
+    unittest.TestCase,
+    MonotonicAttentionTestAbstractClass
+):
+    def setUp(self):
+        self.model = build_transformer_monotonic_attention(
+            **generate_config({"simul_type": "hard_aligned"})
+        )
+
+
+class InfiniteLookbackTestCase(
+    unittest.TestCase,
+    MonotonicAttentionTestAbstractClass
+):
+    def setUp(self):
+        self.model = build_transformer_monotonic_attention(
+            **generate_config(
+                {
+                    "simul_type": "infinite_lookback"
+                }
+            )
+        )
+        self.model.train()
+
+    def test_fp16_for_long_input(self):
+        sample = {
+            "net_input": {
+                "src_tokens": torch.LongTensor([7] * 1000 + [2]).cuda().unsqueeze(0),
+                "prev_output_tokens": torch.LongTensor([7] * 1000 + [2]).cuda().unsqueeze(0),
+                "src_lengths": torch.LongTensor([1000]).cuda(),
+            },
+            "target": torch.LongTensor([2] + [7] * 1000).unsqueeze(0).cuda()
+        }
+        self.model.cuda().half()
+        _, extra_out = self.model.forward(**sample["net_input"])
+        for item in extra_out.attn_list:
+            for key in ["p_choose", "alpha", "beta", "soft_energy"]:
+                self.assertFalse(torch.isnan(item[key]).any())
+
+    def test_expected_attention(self):
+        for longer_src in [True, False]:
+            sample = make_sample_with_padding(longer_src)
+            _, extra_out = self.model.forward(**sample["net_input"])
+            for item in extra_out.attn_list:
+                p_choose = item["p_choose"]
+                alpha_system = item["alpha"]
+                beta_system = item["beta"]
+                soft_energy_system = item["soft_energy"]
+                self.assertTrue(beta_system.size() == alpha_system.size())
+                self.assertTrue(p_choose.size() == alpha_system.size())
+
+                bsz, num_head, tgt_len, src_len = alpha_system.size()
+
+                alpha_system = alpha_system.view(-1, tgt_len, src_len)
+                beta_system = beta_system.view(-1, tgt_len, src_len)
+                p_choose = p_choose.view(-1, tgt_len, src_len)
+                soft_energy_system = soft_energy_system.view(-1, tgt_len, src_len)
+
+                alpha_real = expected_alignment_formula(
+                    p_choose,
+                    self.model.decoder.layers[0].encoder_attn.mass_preservation,
+                    sample["net_input"]["src_tokens"].eq(PAD_INDEX)
+                )
+
+                beta_real = expected_soft_attention_formula(
+                    alpha_real,
+                    soft_energy_system,
+                    sample["net_input"]["src_tokens"].eq(PAD_INDEX),
+                    chunksize=getattr(
+                        self.model.decoder.layers[0].encoder_attn,
+                        "chunk_size",
+                        int(1e10)
+                    )
+                )
+
+                self.assertTrue(
+                    torch.abs(beta_system - beta_real).le(1e-5).all(),
+                )
+
+
+class ChunkwiswTestCase(
+    InfiniteLookbackTestCase
+):
+    def setUp(self):
+        self.model = build_transformer_monotonic_attention(
+            **generate_config(
+                {
+                    "simul_type": "chunkwise",
+                    "mocha_chunk_size": 3
+                }
+            )
+        )
+
+
+class WaitkTestCase(InfiniteLookbackTestCase):
+    def setUp(self):
+        self.model = build_transformer_monotonic_attention(
+            **generate_config(
+                {
+                    "simul_type": "waitk",
+                    "waitk_lagging": 3,
+                }
+            )
+        )
+
+    def check_waitk(self, p_choose, lagging, padding_mask):
+        bsz, tgt_len, src_len = p_choose.size()
+        for bsz_i in range(bsz):
+            for i in range(tgt_len):
+                for j in range(src_len):
+                    if not padding_mask[bsz_i, j]:
+                        if j - i == lagging - 1:
+                            self.assertTrue(p_choose[bsz_i, i, j] == 1)
+                        else:
+                            self.assertTrue(p_choose[bsz_i, i, j] == 0)
+
+    def test_waitk_p_choose(self):
+        for longer_src in [True, False]:
+            for k in [1, 3, 10, 20, 100]:
+                sample = make_sample_with_padding(longer_src)
+                model = build_transformer_monotonic_attention(
+                    **generate_config(
+                        {
+                            "simul_type": "waitk",
+                            "waitk_lagging": k,
+                        }
+                    )
+                )
+                model.train()
+                _, extra_out = model.forward(**sample["net_input"])
+                for item in extra_out.attn_list:
+                    p_choose = item["p_choose"]
+                    bsz, num_heads, tgt_len, src_len = p_choose.size()
+                    padding_mask = sample["net_input"]["src_tokens"].eq(PAD_INDEX)
+                    padding_mask = (
+                        padding_mask
+                        .unsqueeze(1)
+                        .expand([bsz, num_heads, src_len])
+                        .contiguous()
+                        .view(-1, src_len)
+                    )
+                    p_choose = p_choose.view(bsz * num_heads, tgt_len, src_len)
+                    self.check_waitk(p_choose, k, padding_mask)
diff --git a/fairseq/examples/simultaneous_translation/utils/__init__.py b/fairseq/examples/simultaneous_translation/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e9ce844f59a4211061392084cc81075e6bab19f
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/utils/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+
+# automatically import any Python files in the criterions/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        module = file[: file.find(".py")]
+        importlib.import_module("examples.simultaneous_translation.utils." + module)
diff --git a/fairseq/examples/simultaneous_translation/utils/functions.py b/fairseq/examples/simultaneous_translation/utils/functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..590a6c11cea222ac9096b19f0e3dfe1b71b6c10b
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/utils/functions.py
@@ -0,0 +1,125 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+def prob_check(tensor, eps=1e-10):
+    assert not torch.isnan(tensor).any(), (
+        "Nan in a probability tensor."
+    )
+    # Add the eps here to prevent errors introduced by precision
+    assert tensor.le(1.0 + eps).all() and tensor.ge(0.0 - eps).all(), (
+        "Incorrect values in a probability tensor"
+        ", 0.0 <= tensor <= 1.0"
+    )
+
+
+def exclusive_cumprod(tensor, dim: int, eps: float = 1e-10):
+    """
+    Implementing exclusive cumprod.
+    There is cumprod in pytorch, however there is no exclusive mode.
+    cumprod(x) = [x1, x1x2, x2x3x4, ..., prod_{i=1}^n x_i]
+    exclusive means
+    cumprod(x) = [1, x1, x1x2, x1x2x3, ..., prod_{i=1}^{n-1} x_i]
+    """
+    tensor_size = list(tensor.size())
+    tensor_size[dim] = 1
+    return_tensor = safe_cumprod(
+        torch.cat([torch.ones(tensor_size).type_as(tensor), tensor], dim=dim),
+        dim=dim,
+        eps=eps,
+    )
+
+    if dim == 0:
+        return return_tensor[:-1]
+    elif dim == 1:
+        return return_tensor[:, :-1]
+    elif dim == 2:
+        return return_tensor[:, :, :-1]
+    else:
+        raise RuntimeError(
+            "Cumprod on dimension 3 and more is not implemented"
+        )
+
+
+def safe_cumprod(tensor, dim: int, eps: float = 1e-10):
+    """
+    An implementation of cumprod to prevent precision issue.
+    cumprod(x)
+    = [x1, x1x2, x1x2x3, ....]
+    = [exp(log(x1)), exp(log(x1) + log(x2)), exp(log(x1) + log(x2) + log(x3)), ...]
+    = exp(cumsum(log(x)))
+    """
+
+    if (tensor + eps < 0).any().item():
+        raise RuntimeError(
+            "Safe cumprod can only take non-negative tensors as input."
+            "Consider use torch.cumprod if you want to calculate negative values."
+        )
+
+    log_tensor = torch.log(tensor + eps)
+    cumsum_log_tensor = torch.cumsum(log_tensor, dim)
+    exp_cumsum_log_tensor = torch.exp(cumsum_log_tensor)
+    return exp_cumsum_log_tensor
+
+
+def moving_sum(x, start_idx: int, end_idx: int):
+    """
+    From MONOTONIC CHUNKWISE ATTENTION
+    https://arxiv.org/pdf/1712.05382.pdf
+    Equation (18)
+
+    x = [x_1, x_2, ..., x_N]
+    MovingSum(x, start_idx, end_idx)_n = Sigma_{m=n−(start_idx−1)}^{n+end_idx-1} x_m
+    for n in {1, 2, 3, ..., N}
+
+    x : src_len, batch_size
+    start_idx : start idx
+    end_idx : end idx
+
+    Example
+    src_len = 5
+    batch_size = 3
+    x =
+       [[ 0, 5, 10],
+        [ 1, 6, 11],
+        [ 2, 7, 12],
+        [ 3, 8, 13],
+        [ 4, 9, 14]]
+
+    MovingSum(x, 3, 1) =
+       [[ 0,  5, 10],
+        [ 1, 11, 21],
+        [ 3, 18, 33],
+        [ 6, 21, 36],
+        [ 9, 24, 39]]
+
+    MovingSum(x, 1, 3) =
+       [[ 3, 18, 33],
+        [ 6, 21, 36],
+        [ 9, 24, 39],
+        [ 7, 17, 27],
+        [ 4,  9, 14]]
+    """
+    # TODO: Make dimension configurable
+    assert start_idx > 0 and end_idx > 0
+    batch_size, tgt_len, src_len = x.size()
+    x = x.view(-1, src_len).unsqueeze(1)
+    # batch_size, 1, src_len
+    moving_sum_weight = torch.ones([1, 1, end_idx + start_idx - 1]).type_as(x)
+
+    moving_sum = torch.nn.functional.conv1d(
+        x, moving_sum_weight, padding=start_idx + end_idx - 1
+    ).squeeze(1)
+
+    moving_sum = moving_sum[:, end_idx:-start_idx]
+
+    assert src_len == moving_sum.size(1)
+    assert batch_size * tgt_len == moving_sum.size(0)
+
+    moving_sum = moving_sum.view(batch_size, tgt_len, src_len)
+
+    return moving_sum
diff --git a/fairseq/examples/simultaneous_translation/utils/monotonic_attention.py b/fairseq/examples/simultaneous_translation/utils/monotonic_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..61dbb112bfd5ea7b92f2739f046910f486bb0153
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/utils/monotonic_attention.py
@@ -0,0 +1,198 @@
+from typing import Optional
+import torch
+from torch import Tensor
+
+from examples.simultaneous_translation.utils.functions import (
+    exclusive_cumprod,
+    prob_check,
+    moving_sum,
+)
+
+
+def expected_alignment_from_p_choose(
+    p_choose: Tensor,
+    padding_mask: Optional[Tensor] = None,
+    eps: float = 1e-6
+):
+    """
+    Calculating expected alignment for from stepwise probability
+
+    Reference:
+    Online and Linear-Time Attention by Enforcing Monotonic Alignments
+    https://arxiv.org/pdf/1704.00784.pdf
+
+    q_ij = (1 − p_{ij−1})q_{ij−1} + a+{i−1j}
+    a_ij = p_ij q_ij
+
+    Parallel solution:
+    ai = p_i * cumprod(1 − pi) * cumsum(a_i / cumprod(1 − pi))
+
+    ============================================================
+    Expected input size
+    p_choose: bsz, tgt_len, src_len
+    """
+    prob_check(p_choose)
+
+    # p_choose: bsz, tgt_len, src_len
+    bsz, tgt_len, src_len = p_choose.size()
+    dtype = p_choose.dtype
+
+    p_choose = p_choose.float()
+
+    if padding_mask is not None:
+        p_choose = p_choose.masked_fill(padding_mask.unsqueeze(1), 0.0)
+
+    # cumprod_1mp : bsz, tgt_len, src_len
+    cumprod_1mp = exclusive_cumprod(1 - p_choose, dim=2, eps=eps)
+    cumprod_1mp_clamp = torch.clamp(cumprod_1mp, eps, 1.0)
+
+    alpha_0 = p_choose.new_zeros([bsz, 1, src_len])
+    alpha_0[:, :, 0] = 1.0
+
+    previous_alpha = [alpha_0]
+
+    for i in range(tgt_len):
+        # p_choose: bsz , tgt_len, src_len
+        # cumprod_1mp_clamp : bsz, tgt_len, src_len
+        # previous_alpha[i]: bsz, 1, src_len
+        # alpha_i: bsz, src_len
+        alpha_i = (
+            p_choose[:, i]
+            * cumprod_1mp[:, i]
+            * torch.cumsum(
+                previous_alpha[i][:, 0] / cumprod_1mp_clamp[:, i], dim=1
+            )
+        ).clamp(0, 1.0)
+
+        previous_alpha.append(alpha_i.unsqueeze(1))
+
+    # alpha: bsz * num_heads, tgt_len, src_len
+    alpha = torch.cat(previous_alpha[1:], dim=1)
+
+    # Mix precision to prevent overflow for fp16
+    alpha = alpha.type(dtype)
+
+    prob_check(alpha)
+
+    return alpha
+
+
+def expected_soft_attention(
+    alpha: Tensor,
+    soft_energy: Tensor,
+    padding_mask: Optional[Tensor] = None,
+    chunk_size: Optional[int] = None,
+    eps: float = 1e-10
+):
+    """
+    Function to compute expected soft attention for
+    monotonic infinite lookback attention from
+    expected alignment and soft energy.
+
+    Reference:
+    Monotonic Chunkwise Attention
+    https://arxiv.org/abs/1712.05382
+
+    Monotonic Infinite Lookback Attention for Simultaneous Machine Translation
+    https://arxiv.org/abs/1906.05218
+
+    alpha: bsz, tgt_len, src_len
+    soft_energy: bsz, tgt_len, src_len
+    padding_mask: bsz, src_len
+    left_padding: bool
+    """
+    if padding_mask is not None:
+        alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0.0)
+        soft_energy = soft_energy.masked_fill(
+            padding_mask.unsqueeze(1), -float("inf")
+        )
+
+    prob_check(alpha)
+
+    dtype = alpha.dtype
+
+    alpha = alpha.float()
+    soft_energy = soft_energy.float()
+
+    soft_energy = soft_energy - soft_energy.max(dim=2, keepdim=True)[0]
+    exp_soft_energy = torch.exp(soft_energy) + eps
+
+    if chunk_size is not None:
+        # Chunkwise
+        beta = (
+            exp_soft_energy
+            * moving_sum(
+                alpha / (eps + moving_sum(exp_soft_energy, chunk_size, 1)),
+                1, chunk_size
+            )
+        )
+    else:
+        # Infinite lookback
+        # Notice that infinite lookback is a special case of chunkwise
+        # where chunksize = inf
+        inner_items = alpha / (eps + torch.cumsum(exp_soft_energy, dim=2))
+
+        beta = (
+            exp_soft_energy
+            * torch.cumsum(inner_items.flip(dims=[2]), dim=2)
+            .flip(dims=[2])
+        )
+
+    if padding_mask is not None:
+        beta = beta.masked_fill(
+            padding_mask.unsqueeze(1).to(torch.bool), 0.0)
+
+    # Mix precision to prevent overflow for fp16
+    beta = beta.type(dtype)
+
+    beta = beta.clamp(0, 1)
+
+    prob_check(beta)
+
+    return beta
+
+
+def mass_preservation(
+    alpha: Tensor,
+    padding_mask: Optional[Tensor] = None,
+    left_padding: bool = False
+):
+    """
+    Function to compute the mass perservation for alpha.
+    This means that the residual weights of alpha will be assigned
+    to the last token.
+
+    Reference:
+    Monotonic Infinite Lookback Attention for Simultaneous Machine Translation
+    https://arxiv.org/abs/1906.05218
+
+    alpha: bsz, tgt_len, src_len
+    padding_mask: bsz, src_len
+    left_padding: bool
+    """
+
+    prob_check(alpha)
+
+    if padding_mask is not None:
+        if not left_padding:
+            assert not padding_mask[:, 0].any(), (
+                "Find padding on the beginning of the sequence."
+            )
+        alpha = alpha.masked_fill(padding_mask.unsqueeze(1), 0.0)
+
+    if left_padding or padding_mask is None:
+        residuals = 1 - alpha[:, :, :-1].sum(dim=-1).clamp(0, 1)
+        alpha[:, :, -1] = residuals
+    else:
+        # right padding
+        _, tgt_len, src_len = alpha.size()
+        residuals = 1 - alpha.sum(dim=-1, keepdim=True).clamp(0, 1)
+        src_lens = src_len - padding_mask.sum(dim=1, keepdim=True)
+        src_lens = src_lens.expand(-1, tgt_len).contiguous()
+        # add back the last value
+        residuals += alpha.gather(2, src_lens.unsqueeze(2) - 1)
+        alpha = alpha.scatter(2, src_lens.unsqueeze(2) - 1, residuals)
+
+        prob_check(alpha)
+
+    return alpha
diff --git a/fairseq/examples/simultaneous_translation/utils/p_choose_strategy.py b/fairseq/examples/simultaneous_translation/utils/p_choose_strategy.py
new file mode 100644
index 0000000000000000000000000000000000000000..724c6912a62d48fc61988cac1434a4f5c8754521
--- /dev/null
+++ b/fairseq/examples/simultaneous_translation/utils/p_choose_strategy.py
@@ -0,0 +1,126 @@
+from typing import Optional, Dict
+from torch import Tensor
+import torch
+
+
+def waitk_p_choose(
+    tgt_len: int,
+    src_len: int,
+    bsz: int,
+    waitk_lagging: int,
+    key_padding_mask: Optional[Tensor] = None,
+    incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None
+):
+
+    max_src_len = src_len
+    if incremental_state is not None:
+        # Retrieve target length from incremental states
+        # For inference the length of query is always 1
+        max_tgt_len = incremental_state["steps"]["tgt"]
+        assert max_tgt_len is not None
+        max_tgt_len = int(max_tgt_len)
+    else:
+        max_tgt_len = tgt_len
+
+    if max_src_len < waitk_lagging:
+        if incremental_state is not None:
+            max_tgt_len = 1
+        return torch.zeros(
+            bsz, max_tgt_len, max_src_len
+        )
+
+    # Assuming the p_choose looks like this for wait k=3
+    # src_len = 6, max_tgt_len = 5
+    #   [0, 0, 1, 0, 0, 0, 0]
+    #   [0, 0, 0, 1, 0, 0, 0]
+    #   [0, 0, 0, 0, 1, 0, 0]
+    #   [0, 0, 0, 0, 0, 1, 0]
+    #   [0, 0, 0, 0, 0, 0, 1]
+    # linearize the p_choose matrix:
+    # [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0...]
+    # The indices of linearized matrix that equals 1 is
+    # 2 + 6 * 0
+    # 3 + 6 * 1
+    # ...
+    # n + src_len * n + k - 1 = n * (src_len + 1) + k - 1
+    # n from 0 to max_tgt_len - 1
+    #
+    # First, generate the indices (activate_indices_offset: bsz, max_tgt_len)
+    # Second, scatter a zeros tensor (bsz, max_tgt_len * src_len)
+    # with activate_indices_offset
+    # Third, resize the tensor to (bsz, max_tgt_len, src_len)
+
+    activate_indices_offset = (
+        (
+            torch.arange(max_tgt_len) * (max_src_len + 1)
+            + waitk_lagging - 1
+        )
+        .unsqueeze(0)
+        .expand(bsz, max_tgt_len)
+        .long()
+    )
+
+    if key_padding_mask is not None:
+        if key_padding_mask[:, 0].any():
+            # Left padding
+            activate_indices_offset += (
+                key_padding_mask.sum(dim=1, keepdim=True)
+            )
+
+    # Need to clamp the indices that are too large
+    activate_indices_offset = (
+        activate_indices_offset
+        .clamp(
+            0,
+            min(
+                [
+                    max_tgt_len,
+                    max_src_len - waitk_lagging + 1
+                ]
+            ) * max_src_len - 1
+        )
+    )
+
+    p_choose = torch.zeros(bsz, max_tgt_len * max_src_len)
+
+    p_choose = p_choose.scatter(
+        1,
+        activate_indices_offset,
+        1.0
+    ).view(bsz, max_tgt_len, max_src_len)
+
+    if key_padding_mask is not None:
+        p_choose = p_choose.to(key_padding_mask)
+        p_choose = p_choose.masked_fill(key_padding_mask.unsqueeze(1), 0)
+
+    if incremental_state is not None:
+        p_choose = p_choose[:, -1:]
+
+    return p_choose.float()
+
+
+def learnable_p_choose(
+    energy,
+    noise_mean: float = 0.0,
+    noise_var: float = 0.0,
+    training: bool = True
+):
+    """
+    Calculating step wise prob for reading and writing
+    1 to read, 0 to write
+    energy: bsz, tgt_len, src_len
+    """
+
+    noise = 0
+    if training:
+        # add noise here to encourage discretness
+        noise = (
+            torch.normal(noise_mean, noise_var, energy.size())
+            .type_as(energy)
+            .to(energy.device)
+        )
+
+    p_choose = torch.sigmoid(energy + noise)
+
+    # p_choose: bsz * self.num_heads, tgt_len, src_len
+    return p_choose
diff --git a/fairseq/examples/speech_recognition/README.md b/fairseq/examples/speech_recognition/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..17030bf0fd50bb843a508e13e97ed436eae33287
--- /dev/null
+++ b/fairseq/examples/speech_recognition/README.md
@@ -0,0 +1,83 @@
+### 2021 Update: We are merging this example into the [S2T framework](../speech_to_text), which supports more generic speech-to-text tasks (e.g. speech translation) and more flexible data processing pipelines. Please stay tuned.
+
+# Speech Recognition
+`examples/speech_recognition` is implementing ASR task in Fairseq, along with needed features, datasets, models and loss functions to train and infer model described in [Transformers with convolutional context for ASR (Abdelrahman Mohamed et al., 2019)](https://arxiv.org/abs/1904.11660).
+
+
+## Additional dependencies
+On top of main fairseq dependencies there are couple more additional requirements.
+
+1) Please follow the instructions to install [torchaudio](https://github.com/pytorch/audio). This is required to compute audio fbank features.
+2) [Sclite](http://www1.icsi.berkeley.edu/Speech/docs/sctk-1.2/sclite.htm#sclite_name_0) is used to measure WER. Sclite can be downloaded and installed from source from sctk package [here](http://www.openslr.org/4/). Training and inference doesn't require Sclite dependency.
+3) [sentencepiece](https://github.com/google/sentencepiece) is required in order to create dataset with word-piece targets.
+
+## Preparing librispeech data
+```
+./examples/speech_recognition/datasets/prepare-librispeech.sh $DIR_TO_SAVE_RAW_DATA $DIR_FOR_PREPROCESSED_DATA
+```
+
+## Training librispeech data
+```
+python train.py $DIR_FOR_PREPROCESSED_DATA --save-dir $MODEL_PATH --max-epoch 80 --task speech_recognition --arch vggtransformer_2 --optimizer adadelta --lr 1.0 --adadelta-eps 1e-8 --adadelta-rho 0.95 --clip-norm 10.0  --max-tokens 5000 --log-format json --log-interval 1 --criterion cross_entropy_acc --user-dir examples/speech_recognition/
+```
+
+## Inference for librispeech
+`$SET` can be `test_clean` or `test_other`
+Any checkpoint in `$MODEL_PATH` can be selected. In this example we are working with `checkpoint_last.pt`
+```
+python examples/speech_recognition/infer.py $DIR_FOR_PREPROCESSED_DATA --task speech_recognition --max-tokens 25000 --nbest 1 --path $MODEL_PATH/checkpoint_last.pt --beam 20 --results-path $RES_DIR --batch-size 40 --gen-subset $SET --user-dir examples/speech_recognition/
+```
+
+## Inference for librispeech
+```
+sclite -r ${RES_DIR}/ref.word-checkpoint_last.pt-${SET}.txt -h ${RES_DIR}/hypo.word-checkpoint_last.pt-${SET}.txt -i rm -o all stdout > $RES_REPORT
+```
+`Sum/Avg` row from first table of the report has WER
+
+## Using flashlight (previously called [wav2letter](https://github.com/facebookresearch/wav2letter)) components
+[flashlight](https://github.com/facebookresearch/flashlight) now has integration with fairseq. Currently this includes:
+
+* AutoSegmentationCriterion (ASG)
+* flashlight-style Conv/GLU model
+* flashlight's beam search decoder
+
+To use these, follow the instructions on [this page](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) to install python bindings.
+
+## Training librispeech data (flashlight style, Conv/GLU + ASG loss)
+Training command:
+```
+python train.py $DIR_FOR_PREPROCESSED_DATA --save-dir $MODEL_PATH --max-epoch 100 --task speech_recognition --arch w2l_conv_glu_enc --batch-size 4 --optimizer sgd --lr 0.3,0.8 --momentum 0.8 --clip-norm 0.2 --max-tokens 50000 --log-format json --log-interval 100 --num-workers 0 --sentence-avg --criterion asg_loss --asg-transitions-init 5 --max-replabel 2 --linseg-updates 8789 --user-dir examples/speech_recognition
+```
+
+Note that ASG loss currently doesn't do well with word-pieces. You should prepare a dataset with character targets by setting `nbpe=31` in `prepare-librispeech.sh`.
+
+## Inference for librispeech (flashlight decoder, n-gram LM)
+Inference command:
+```
+python examples/speech_recognition/infer.py $DIR_FOR_PREPROCESSED_DATA --task speech_recognition --seed 1 --nbest 1 --path $MODEL_PATH/checkpoint_last.pt --gen-subset $SET --results-path $RES_DIR --w2l-decoder kenlm --kenlm-model $KENLM_MODEL_PATH --lexicon $LEXICON_PATH --beam 200 --beam-threshold 15 --lm-weight 1.5 --word-score 1.5 --sil-weight -0.3 --criterion asg_loss --max-replabel 2 --user-dir examples/speech_recognition
+```
+
+`$KENLM_MODEL_PATH` should be a standard n-gram language model file. `$LEXICON_PATH` should be a flashlight-style lexicon (list of known words and their spellings). For ASG inference, a lexicon line should look like this (note the repetition labels):
+```
+doorbell  D O 1 R B E L 1 ▁
+```
+For CTC inference with word-pieces, repetition labels are not used and the lexicon should have most common spellings for each word (one can use sentencepiece's `NBestEncodeAsPieces` for this):
+```
+doorbell  ▁DOOR BE LL
+doorbell  ▁DOOR B E LL
+doorbell  ▁DO OR BE LL
+doorbell  ▁DOOR B EL L
+doorbell  ▁DOOR BE L L
+doorbell  ▁DO OR B E LL
+doorbell  ▁DOOR B E L L
+doorbell  ▁DO OR B EL L
+doorbell  ▁DO O R BE LL
+doorbell  ▁DO OR BE L L
+```
+Lowercase vs. uppercase matters: the *word* should match the case of the n-gram language model (i.e. `$KENLM_MODEL_PATH`), while the *spelling* should match the case of the token dictionary (i.e. `$DIR_FOR_PREPROCESSED_DATA/dict.txt`).
+
+## Inference for librispeech (flashlight decoder, viterbi only)
+Inference command:
+```
+python examples/speech_recognition/infer.py $DIR_FOR_PREPROCESSED_DATA --task speech_recognition --seed 1 --nbest 1 --path $MODEL_PATH/checkpoint_last.pt --gen-subset $SET --results-path $RES_DIR --w2l-decoder viterbi --criterion asg_loss --max-replabel 2 --user-dir examples/speech_recognition
+```
diff --git a/fairseq/examples/speech_recognition/__init__.py b/fairseq/examples/speech_recognition/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0278f6a27340c7ff7e207d09348483d1b0d3a100
--- /dev/null
+++ b/fairseq/examples/speech_recognition/__init__.py
@@ -0,0 +1 @@
+from . import criterions, models, tasks  # noqa
diff --git a/fairseq/examples/speech_recognition/criterions/ASG_loss.py b/fairseq/examples/speech_recognition/criterions/ASG_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..41f50bbd70388ce723f2d316d4e9776bcd6be3c9
--- /dev/null
+++ b/fairseq/examples/speech_recognition/criterions/ASG_loss.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from examples.speech_recognition.data.replabels import pack_replabels
+from fairseq import utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+
+
+@register_criterion("asg_loss")
+class ASGCriterion(FairseqCriterion):
+    @staticmethod
+    def add_args(parser):
+        group = parser.add_argument_group("ASG Loss")
+        group.add_argument(
+            "--asg-transitions-init",
+            help="initial diagonal value of transition matrix",
+            type=float,
+            default=0.0,
+        )
+        group.add_argument(
+            "--max-replabel", help="maximum # of replabels", type=int, default=2
+        )
+        group.add_argument(
+            "--linseg-updates",
+            help="# of training updates to use LinSeg initialization",
+            type=int,
+            default=0,
+        )
+        group.add_argument(
+            "--hide-linseg-messages",
+            help="hide messages about LinSeg initialization",
+            action="store_true",
+        )
+
+    def __init__(
+        self,
+        task,
+        silence_token,
+        asg_transitions_init,
+        max_replabel,
+        linseg_updates,
+        hide_linseg_messages,
+    ):
+        from flashlight.lib.sequence.criterion import ASGLoss, CriterionScaleMode
+
+        super().__init__(task)
+        self.tgt_dict = task.target_dictionary
+        self.eos = self.tgt_dict.eos()
+        self.silence = (
+            self.tgt_dict.index(silence_token)
+            if silence_token in self.tgt_dict
+            else None
+        )
+        self.max_replabel = max_replabel
+
+        num_labels = len(self.tgt_dict)
+        self.asg = ASGLoss(num_labels, scale_mode=CriterionScaleMode.TARGET_SZ_SQRT)
+        self.asg.trans = torch.nn.Parameter(
+            asg_transitions_init * torch.eye(num_labels), requires_grad=True
+        )
+
+        self.linseg_progress = torch.nn.Parameter(
+            torch.tensor([0], dtype=torch.int), requires_grad=False
+        )
+        self.linseg_maximum = linseg_updates
+        self.linseg_message_state = "none" if hide_linseg_messages else "start"
+
+    @classmethod
+    def build_criterion(cls, args, task):
+        return cls(
+            task,
+            args.silence_token,
+            args.asg_transitions_init,
+            args.max_replabel,
+            args.linseg_updates,
+            args.hide_linseg_messages,
+        )
+
+    def linseg_step(self):
+        if not self.training:
+            return False
+        if self.linseg_progress.item() < self.linseg_maximum:
+            if self.linseg_message_state == "start":
+                print("| using LinSeg to initialize ASG")
+                self.linseg_message_state = "finish"
+            self.linseg_progress.add_(1)
+            return True
+        elif self.linseg_message_state == "finish":
+            print("| finished LinSeg initialization")
+            self.linseg_message_state = "none"
+        return False
+
+    def replace_eos_with_silence(self, tgt):
+        if tgt[-1] != self.eos:
+            return tgt
+        elif self.silence is None or (len(tgt) > 1 and tgt[-2] == self.silence):
+            return tgt[:-1]
+        else:
+            return tgt[:-1] + [self.silence]
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+
+        net_output = model(**sample["net_input"])
+        emissions = net_output["encoder_out"].transpose(0, 1).contiguous()
+        B = emissions.size(0)
+        T = emissions.size(1)
+        device = emissions.device
+
+        target = torch.IntTensor(B, T)
+        target_size = torch.IntTensor(B)
+        using_linseg = self.linseg_step()
+
+        for b in range(B):
+            initial_target_size = sample["target_lengths"][b].item()
+            if initial_target_size == 0:
+                raise ValueError("target size cannot be zero")
+
+            tgt = sample["target"][b, :initial_target_size].tolist()
+            tgt = self.replace_eos_with_silence(tgt)
+            tgt = pack_replabels(tgt, self.tgt_dict, self.max_replabel)
+            tgt = tgt[:T]
+
+            if using_linseg:
+                tgt = [tgt[t * len(tgt) // T] for t in range(T)]
+
+            target[b][: len(tgt)] = torch.IntTensor(tgt)
+            target_size[b] = len(tgt)
+
+        loss = self.asg.forward(emissions, target.to(device), target_size.to(device))
+
+        if reduce:
+            loss = torch.sum(loss)
+
+        sample_size = (
+            sample["target"].size(0) if self.args.sentence_avg else sample["ntokens"]
+        )
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+        }
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def aggregate_logging_outputs(logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        agg_output = {
+            "loss": loss_sum / nsentences,
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "sample_size": sample_size,
+        }
+        return agg_output
diff --git a/fairseq/examples/speech_recognition/criterions/__init__.py b/fairseq/examples/speech_recognition/criterions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..579abd2ace1b14b80f5e53e5c96583e4d5b14c52
--- /dev/null
+++ b/fairseq/examples/speech_recognition/criterions/__init__.py
@@ -0,0 +1,17 @@
+import importlib
+import os
+
+
+# ASG loss requires flashlight bindings
+files_to_skip = set()
+try:
+    import flashlight.lib.sequence.criterion
+except ImportError:
+    files_to_skip.add("ASG_loss.py")
+
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_") and file not in files_to_skip:
+        criterion_name = file[: file.find(".py")]
+        importlib.import_module(
+            "examples.speech_recognition.criterions." + criterion_name
+        )
diff --git a/fairseq/examples/speech_recognition/criterions/cross_entropy_acc.py b/fairseq/examples/speech_recognition/criterions/cross_entropy_acc.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c4d8ba3802a2da9467c42b0aa18653c7bbb2ec9
--- /dev/null
+++ b/fairseq/examples/speech_recognition/criterions/cross_entropy_acc.py
@@ -0,0 +1,130 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+
+
+@register_criterion("cross_entropy_acc")
+class CrossEntropyWithAccCriterion(FairseqCriterion):
+    def __init__(self, task, sentence_avg):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+
+    def compute_loss(self, model, net_output, target, reduction, log_probs):
+        # N, T -> N * T
+        target = target.view(-1)
+        lprobs = model.get_normalized_probs(net_output, log_probs=log_probs)
+        if not hasattr(lprobs, "batch_first"):
+            logging.warning(
+                "ERROR: we need to know whether "
+                "batch first for the net output; "
+                "you need to set batch_first attribute for the return value of "
+                "model.get_normalized_probs. Now, we assume this is true, but "
+                "in the future, we will raise exception instead. "
+            )
+        batch_first = getattr(lprobs, "batch_first", True)
+        if not batch_first:
+            lprobs = lprobs.transpose(0, 1)
+
+        # N, T, D -> N * T, D
+        lprobs = lprobs.view(-1, lprobs.size(-1))
+        loss = F.nll_loss(
+            lprobs, target, ignore_index=self.padding_idx, reduction=reduction
+        )
+        return lprobs, loss
+
+    def get_logging_output(self, sample, target, lprobs, loss):
+        target = target.view(-1)
+        mask = target != self.padding_idx
+        correct = torch.sum(
+            lprobs.argmax(1).masked_select(mask) == target.masked_select(mask)
+        )
+        total = torch.sum(mask)
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+
+        logging_output = {
+            "loss": utils.item(loss.data),  # * sample['ntokens'],
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+            "correct": utils.item(correct.data),
+            "total": utils.item(total.data),
+            "nframes": torch.sum(sample["net_input"]["src_lengths"]).item(),
+        }
+
+        return sample_size, logging_output
+
+    def forward(self, model, sample, reduction="sum", log_probs=True):
+        """Computes the cross entropy with accuracy metric for the given sample.
+
+        This is similar to CrossEntropyCriterion in fairseq, but also
+        computes accuracy metrics as part of logging
+
+        Args:
+            logprobs (Torch.tensor) of shape N, T, D i.e.
+                batchsize, timesteps, dimensions
+            targets (Torch.tensor) of shape N, T  i.e batchsize, timesteps
+
+        Returns:
+        tuple: With three elements:
+            1) the loss
+            2) the sample size, which is used as the denominator for the gradient
+            3) logging outputs to display while training
+
+        TODO:
+            * Currently this Criterion will only work with LSTMEncoderModels or
+            FairseqModels which have decoder, or Models which return TorchTensor
+            as net_output.
+            We need to make a change to support all FairseqEncoder models.
+        """
+        net_output = model(**sample["net_input"])
+        target = model.get_targets(sample, net_output)
+        lprobs, loss = self.compute_loss(
+            model, net_output, target, reduction, log_probs
+        )
+        sample_size, logging_output = self.get_logging_output(
+            sample, target, lprobs, loss
+        )
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def aggregate_logging_outputs(logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        correct_sum = sum(log.get("correct", 0) for log in logging_outputs)
+        total_sum = sum(log.get("total", 0) for log in logging_outputs)
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        nframes = sum(log.get("nframes", 0) for log in logging_outputs)
+        agg_output = {
+            "loss": loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0,
+            # if args.sentence_avg, then sample_size is nsentences, then loss
+            # is per-sentence loss; else sample_size is ntokens, the loss
+            # becomes per-output token loss
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "nframes": nframes,
+            "sample_size": sample_size,
+            "acc": correct_sum * 100.0 / total_sum if total_sum > 0 else 0.0,
+            "correct": correct_sum,
+            "total": total_sum,
+            # total is the number of validate tokens
+        }
+        if sample_size != ntokens:
+            agg_output["nll_loss"] = loss_sum / ntokens / math.log(2)
+        # loss: per output token loss
+        # nll_loss: per sentence loss
+        return agg_output
diff --git a/fairseq/examples/speech_recognition/data/__init__.py b/fairseq/examples/speech_recognition/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..47bb6e24ddf25aa4fd5bf0fe9672f89099efb9b4
--- /dev/null
+++ b/fairseq/examples/speech_recognition/data/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .asr_dataset import AsrDataset
+
+
+__all__ = [
+    "AsrDataset",
+]
diff --git a/fairseq/examples/speech_recognition/data/asr_dataset.py b/fairseq/examples/speech_recognition/data/asr_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..63a6fcac85d73b1fce8e4d044b4209b1b67fa8ce
--- /dev/null
+++ b/fairseq/examples/speech_recognition/data/asr_dataset.py
@@ -0,0 +1,122 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+
+import numpy as np
+from fairseq.data import FairseqDataset
+
+from . import data_utils
+from .collaters import Seq2SeqCollater
+
+
+class AsrDataset(FairseqDataset):
+    """
+    A dataset representing speech and corresponding transcription.
+
+    Args:
+        aud_paths: (List[str]): A list of str with paths to audio files.
+        aud_durations_ms (List[int]): A list of int containing the durations of
+            audio files.
+        tgt (List[torch.LongTensor]): A list of LongTensors containing the indices
+            of target transcriptions.
+        tgt_dict (~fairseq.data.Dictionary): target vocabulary.
+        ids (List[str]): A list of utterance IDs.
+        speakers (List[str]): A list of speakers corresponding to utterances.
+        num_mel_bins (int): Number of triangular mel-frequency bins (default: 80)
+        frame_length (float): Frame length in milliseconds (default: 25.0)
+        frame_shift (float): Frame shift in milliseconds (default: 10.0)
+    """
+
+    def __init__(
+        self,
+        aud_paths,
+        aud_durations_ms,
+        tgt,
+        tgt_dict,
+        ids,
+        speakers,
+        num_mel_bins=80,
+        frame_length=25.0,
+        frame_shift=10.0,
+    ):
+        assert frame_length > 0
+        assert frame_shift > 0
+        assert all(x > frame_length for x in aud_durations_ms)
+        self.frame_sizes = [
+            int(1 + (d - frame_length) / frame_shift) for d in aud_durations_ms
+        ]
+
+        assert len(aud_paths) > 0
+        assert len(aud_paths) == len(aud_durations_ms)
+        assert len(aud_paths) == len(tgt)
+        assert len(aud_paths) == len(ids)
+        assert len(aud_paths) == len(speakers)
+        self.aud_paths = aud_paths
+        self.tgt_dict = tgt_dict
+        self.tgt = tgt
+        self.ids = ids
+        self.speakers = speakers
+        self.num_mel_bins = num_mel_bins
+        self.frame_length = frame_length
+        self.frame_shift = frame_shift
+
+        self.s2s_collater = Seq2SeqCollater(
+            0,
+            1,
+            pad_index=self.tgt_dict.pad(),
+            eos_index=self.tgt_dict.eos(),
+            move_eos_to_beginning=True,
+        )
+
+    def __getitem__(self, index):
+        import torchaudio
+        import torchaudio.compliance.kaldi as kaldi
+
+        tgt_item = self.tgt[index] if self.tgt is not None else None
+
+        path = self.aud_paths[index]
+        if not os.path.exists(path):
+            raise FileNotFoundError("Audio file not found: {}".format(path))
+        sound, sample_rate = torchaudio.load_wav(path)
+        output = kaldi.fbank(
+            sound,
+            num_mel_bins=self.num_mel_bins,
+            frame_length=self.frame_length,
+            frame_shift=self.frame_shift,
+        )
+        output_cmvn = data_utils.apply_mv_norm(output)
+
+        return {"id": index, "data": [output_cmvn.detach(), tgt_item]}
+
+    def __len__(self):
+        return len(self.aud_paths)
+
+    def collater(self, samples):
+        """Merge a list of samples to form a mini-batch.
+
+        Args:
+            samples (List[int]): sample indices to collate
+
+        Returns:
+            dict: a mini-batch suitable for forwarding with a Model
+        """
+        return self.s2s_collater.collate(samples)
+
+    def num_tokens(self, index):
+        return self.frame_sizes[index]
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        return (
+            self.frame_sizes[index],
+            len(self.tgt[index]) if self.tgt is not None else 0,
+        )
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        return np.arange(len(self))
diff --git a/fairseq/examples/speech_recognition/data/collaters.py b/fairseq/examples/speech_recognition/data/collaters.py
new file mode 100644
index 0000000000000000000000000000000000000000..6acfec876b87e5a00bc92083b1181301a2a18e3f
--- /dev/null
+++ b/fairseq/examples/speech_recognition/data/collaters.py
@@ -0,0 +1,131 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+    This module contains collection of classes which implement
+    collate functionalities for various tasks.
+
+    Collaters should know what data to expect for each sample
+    and they should pack / collate them into batches
+"""
+
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import numpy as np
+import torch
+from fairseq.data import data_utils as fairseq_data_utils
+
+
+class Seq2SeqCollater(object):
+    """
+    Implements collate function mainly for seq2seq tasks
+    This expects each sample to contain feature (src_tokens) and
+    targets.
+    This collator is also used for aligned training task.
+    """
+
+    def __init__(
+        self,
+        feature_index=0,
+        label_index=1,
+        pad_index=1,
+        eos_index=2,
+        move_eos_to_beginning=True,
+    ):
+        self.feature_index = feature_index
+        self.label_index = label_index
+        self.pad_index = pad_index
+        self.eos_index = eos_index
+        self.move_eos_to_beginning = move_eos_to_beginning
+
+    def _collate_frames(self, frames):
+        """Convert a list of 2d frames into a padded 3d tensor
+        Args:
+            frames (list): list of 2d frames of size L[i]*f_dim. Where L[i] is
+                length of i-th frame and f_dim is static dimension of features
+        Returns:
+            3d tensor of size len(frames)*len_max*f_dim where len_max is max of L[i]
+        """
+        len_max = max(frame.size(0) for frame in frames)
+        f_dim = frames[0].size(1)
+        res = frames[0].new(len(frames), len_max, f_dim).fill_(0.0)
+
+        for i, v in enumerate(frames):
+            res[i, : v.size(0)] = v
+
+        return res
+
+    def collate(self, samples):
+        """
+        utility function to collate samples into batch for speech recognition.
+        """
+        if len(samples) == 0:
+            return {}
+
+        # parse samples into torch tensors
+        parsed_samples = []
+        for s in samples:
+            # skip invalid samples
+            if s["data"][self.feature_index] is None:
+                continue
+            source = s["data"][self.feature_index]
+            if isinstance(source, (np.ndarray, np.generic)):
+                source = torch.from_numpy(source)
+            target = s["data"][self.label_index]
+            if isinstance(target, (np.ndarray, np.generic)):
+                target = torch.from_numpy(target).long()
+            elif isinstance(target, list):
+                target = torch.LongTensor(target)
+
+            parsed_sample = {"id": s["id"], "source": source, "target": target}
+            parsed_samples.append(parsed_sample)
+        samples = parsed_samples
+
+        id = torch.LongTensor([s["id"] for s in samples])
+        frames = self._collate_frames([s["source"] for s in samples])
+        # sort samples by descending number of frames
+        frames_lengths = torch.LongTensor([s["source"].size(0) for s in samples])
+        frames_lengths, sort_order = frames_lengths.sort(descending=True)
+        id = id.index_select(0, sort_order)
+        frames = frames.index_select(0, sort_order)
+
+        target = None
+        target_lengths = None
+        prev_output_tokens = None
+        if samples[0].get("target", None) is not None:
+            ntokens = sum(len(s["target"]) for s in samples)
+            target = fairseq_data_utils.collate_tokens(
+                [s["target"] for s in samples],
+                self.pad_index,
+                self.eos_index,
+                left_pad=False,
+                move_eos_to_beginning=False,
+            )
+            target = target.index_select(0, sort_order)
+            target_lengths = torch.LongTensor(
+                [s["target"].size(0) for s in samples]
+            ).index_select(0, sort_order)
+            prev_output_tokens = fairseq_data_utils.collate_tokens(
+                [s["target"] for s in samples],
+                self.pad_index,
+                self.eos_index,
+                left_pad=False,
+                move_eos_to_beginning=self.move_eos_to_beginning,
+            )
+            prev_output_tokens = prev_output_tokens.index_select(0, sort_order)
+        else:
+            ntokens = sum(len(s["source"]) for s in samples)
+
+        batch = {
+            "id": id,
+            "ntokens": ntokens,
+            "net_input": {"src_tokens": frames, "src_lengths": frames_lengths},
+            "target": target,
+            "target_lengths": target_lengths,
+            "nsentences": len(samples),
+        }
+        if prev_output_tokens is not None:
+            batch["net_input"]["prev_output_tokens"] = prev_output_tokens
+        return batch
diff --git a/fairseq/examples/speech_recognition/data/data_utils.py b/fairseq/examples/speech_recognition/data/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc4729e63c8ef551b29617d1169a44c24f509ad0
--- /dev/null
+++ b/fairseq/examples/speech_recognition/data/data_utils.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+def calc_mean_invstddev(feature):
+    if len(feature.size()) != 2:
+        raise ValueError("We expect the input feature to be 2-D tensor")
+    mean = feature.mean(0)
+    var = feature.var(0)
+    # avoid division by ~zero
+    eps = 1e-8
+    if (var < eps).any():
+        return mean, 1.0 / (torch.sqrt(var) + eps)
+    return mean, 1.0 / torch.sqrt(var)
+
+
+def apply_mv_norm(features):
+    # If there is less than 2 spectrograms, the variance cannot be computed (is NaN)
+    # and normalization is not possible, so return the item as it is
+    if features.size(0) < 2:
+        return features
+    mean, invstddev = calc_mean_invstddev(features)
+    res = (features - mean) * invstddev
+    return res
+
+
+def lengths_to_encoder_padding_mask(lengths, batch_first=False):
+    """
+    convert lengths (a 1-D Long/Int tensor) to 2-D binary tensor
+
+    Args:
+        lengths: a (B, )-shaped tensor
+
+    Return:
+        max_length: maximum length of B sequences
+        encoder_padding_mask: a (max_length, B) binary mask, where
+        [t, b] = 0 for t < lengths[b] and 1 otherwise
+
+    TODO:
+        kernelize this function if benchmarking shows this function is slow
+    """
+    max_lengths = torch.max(lengths).item()
+    bsz = lengths.size(0)
+    encoder_padding_mask = torch.arange(
+        max_lengths
+    ).to(  # a (T, ) tensor with [0, ..., T-1]
+        lengths.device
+    ).view(  # move to the right device
+        1, max_lengths
+    ).expand(  # reshape to (1, T)-shaped tensor
+        bsz, -1
+    ) >= lengths.view(  # expand to (B, T)-shaped tensor
+        bsz, 1
+    ).expand(
+        -1, max_lengths
+    )
+    if not batch_first:
+        return encoder_padding_mask.t(), max_lengths
+    else:
+        return encoder_padding_mask, max_lengths
+
+
+def encoder_padding_mask_to_lengths(
+    encoder_padding_mask, max_lengths, batch_size, device
+):
+    """
+    convert encoder_padding_mask (2-D binary tensor) to a 1-D tensor
+
+    Conventionally, encoder output contains a encoder_padding_mask, which is
+    a 2-D mask in a shape (T, B), whose (t, b) element indicate whether
+    encoder_out[t, b] is a valid output (=0) or not (=1). Occasionally, we
+    need to convert this mask tensor to a 1-D tensor in shape (B, ), where
+    [b] denotes the valid length of b-th sequence
+
+    Args:
+        encoder_padding_mask: a (T, B)-shaped binary tensor or None; if None,
+        indicating all are valid
+    Return:
+        seq_lengths: a (B,)-shaped tensor, where its (b, )-th element is the
+        number of valid elements of b-th sequence
+
+        max_lengths: maximum length of all sequence, if encoder_padding_mask is
+        not None, max_lengths must equal to encoder_padding_mask.size(0)
+
+        batch_size: batch size; if encoder_padding_mask is
+        not None, max_lengths must equal to encoder_padding_mask.size(1)
+
+        device: which device to put the result on
+    """
+    if encoder_padding_mask is None:
+        return torch.Tensor([max_lengths] * batch_size).to(torch.int32).to(device)
+
+    assert encoder_padding_mask.size(0) == max_lengths, "max_lengths does not match"
+    assert encoder_padding_mask.size(1) == batch_size, "batch_size does not match"
+
+    return max_lengths - torch.sum(encoder_padding_mask, dim=0)
diff --git a/fairseq/examples/speech_recognition/data/replabels.py b/fairseq/examples/speech_recognition/data/replabels.py
new file mode 100644
index 0000000000000000000000000000000000000000..441f1bd432b95865fc981c6c695cee299b07ed62
--- /dev/null
+++ b/fairseq/examples/speech_recognition/data/replabels.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Replabel transforms for use with flashlight's ASG criterion.
+"""
+
+
+def replabel_symbol(i):
+    """
+    Replabel symbols used in flashlight, currently just "1", "2", ...
+    This prevents training with numeral tokens, so this might change in the future
+    """
+    return str(i)
+
+
+def pack_replabels(tokens, dictionary, max_reps):
+    """
+    Pack a token sequence so that repeated symbols are replaced by replabels
+    """
+    if len(tokens) == 0 or max_reps <= 0:
+        return tokens
+
+    replabel_value_to_idx = [0] * (max_reps + 1)
+    for i in range(1, max_reps + 1):
+        replabel_value_to_idx[i] = dictionary.index(replabel_symbol(i))
+
+    result = []
+    prev_token = -1
+    num_reps = 0
+    for token in tokens:
+        if token == prev_token and num_reps < max_reps:
+            num_reps += 1
+        else:
+            if num_reps > 0:
+                result.append(replabel_value_to_idx[num_reps])
+                num_reps = 0
+            result.append(token)
+            prev_token = token
+    if num_reps > 0:
+        result.append(replabel_value_to_idx[num_reps])
+    return result
+
+
+def unpack_replabels(tokens, dictionary, max_reps):
+    """
+    Unpack a token sequence so that replabels are replaced by repeated symbols
+    """
+    if len(tokens) == 0 or max_reps <= 0:
+        return tokens
+
+    replabel_idx_to_value = {}
+    for i in range(1, max_reps + 1):
+        replabel_idx_to_value[dictionary.index(replabel_symbol(i))] = i
+
+    result = []
+    prev_token = -1
+    for token in tokens:
+        try:
+            for _ in range(replabel_idx_to_value[token]):
+                result.append(prev_token)
+            prev_token = -1
+        except KeyError:
+            result.append(token)
+            prev_token = token
+    return result
diff --git a/fairseq/examples/speech_recognition/datasets/asr_prep_json.py b/fairseq/examples/speech_recognition/datasets/asr_prep_json.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8db8ff16691158fae034a8ab3faad622b351caf
--- /dev/null
+++ b/fairseq/examples/speech_recognition/datasets/asr_prep_json.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import argparse
+import concurrent.futures
+import json
+import multiprocessing
+import os
+from collections import namedtuple
+from itertools import chain
+
+import sentencepiece as spm
+from fairseq.data import Dictionary
+
+
+MILLISECONDS_TO_SECONDS = 0.001
+
+
+def process_sample(aud_path, lable, utt_id, sp, tgt_dict):
+    import torchaudio
+
+    input = {}
+    output = {}
+    si, ei = torchaudio.info(aud_path)
+    input["length_ms"] = int(
+        si.length / si.channels / si.rate / MILLISECONDS_TO_SECONDS
+    )
+    input["path"] = aud_path
+
+    token = " ".join(sp.EncodeAsPieces(lable))
+    ids = tgt_dict.encode_line(token, append_eos=False)
+    output["text"] = lable
+    output["token"] = token
+    output["tokenid"] = ", ".join(map(str, [t.tolist() for t in ids]))
+    return {utt_id: {"input": input, "output": output}}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--audio-dirs",
+        nargs="+",
+        default=["-"],
+        required=True,
+        help="input directories with audio files",
+    )
+    parser.add_argument(
+        "--labels",
+        required=True,
+        help="aggregated input labels with format <ID LABEL> per line",
+        type=argparse.FileType("r", encoding="UTF-8"),
+    )
+    parser.add_argument(
+        "--spm-model",
+        required=True,
+        help="sentencepiece model to use for encoding",
+        type=argparse.FileType("r", encoding="UTF-8"),
+    )
+    parser.add_argument(
+        "--dictionary",
+        required=True,
+        help="file to load fairseq dictionary from",
+        type=argparse.FileType("r", encoding="UTF-8"),
+    )
+    parser.add_argument("--audio-format", choices=["flac", "wav"], default="wav")
+    parser.add_argument(
+        "--output",
+        required=True,
+        type=argparse.FileType("w"),
+        help="path to save json output",
+    )
+    args = parser.parse_args()
+
+    sp = spm.SentencePieceProcessor()
+    sp.Load(args.spm_model.name)
+
+    tgt_dict = Dictionary.load(args.dictionary)
+
+    labels = {}
+    for line in args.labels:
+        (utt_id, label) = line.split(" ", 1)
+        labels[utt_id] = label
+    if len(labels) == 0:
+        raise Exception("No labels found in ", args.labels_path)
+
+    Sample = namedtuple("Sample", "aud_path utt_id")
+    samples = []
+    for path, _, files in chain.from_iterable(
+        os.walk(path) for path in args.audio_dirs
+    ):
+        for f in files:
+            if f.endswith(args.audio_format):
+                if len(os.path.splitext(f)) != 2:
+                    raise Exception("Expect <utt_id.extension> file name. Got: ", f)
+                utt_id = os.path.splitext(f)[0]
+                if utt_id not in labels:
+                    continue
+                samples.append(Sample(os.path.join(path, f), utt_id))
+
+    utts = {}
+    num_cpu = multiprocessing.cpu_count()
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_cpu) as executor:
+        future_to_sample = {
+            executor.submit(
+                process_sample, s.aud_path, labels[s.utt_id], s.utt_id, sp, tgt_dict
+            ): s
+            for s in samples
+        }
+        for future in concurrent.futures.as_completed(future_to_sample):
+            try:
+                data = future.result()
+            except Exception as exc:
+                print("generated an exception: ", exc)
+            else:
+                utts.update(data)
+    json.dump({"utts": utts}, args.output, indent=4)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_recognition/datasets/prepare-librispeech.sh b/fairseq/examples/speech_recognition/datasets/prepare-librispeech.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9e9297f08947027685ff508bfa91ff26b0d8ea0c
--- /dev/null
+++ b/fairseq/examples/speech_recognition/datasets/prepare-librispeech.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Prepare librispeech dataset
+
+base_url=www.openslr.org/resources/12
+train_dir=train_960
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <download_dir> <out_dir>"
+  echo "e.g.: $0 /tmp/librispeech_raw/ ~/data/librispeech_final"
+  exit 1
+fi
+
+download_dir=${1%/}
+out_dir=${2%/}
+
+fairseq_root=~/fairseq-py/
+mkdir -p ${out_dir}
+cd ${out_dir} || exit
+
+nbpe=5000
+bpemode=unigram
+
+if [ ! -d "$fairseq_root" ]; then
+    echo "$0: Please set correct fairseq_root"
+    exit 1
+fi
+
+echo "Data Download"
+for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
+    url=$base_url/$part.tar.gz
+    if ! wget -P $download_dir $url; then
+        echo "$0: wget failed for $url"
+        exit 1
+    fi
+    if ! tar -C $download_dir -xvzf $download_dir/$part.tar.gz; then
+        echo "$0: error un-tarring archive $download_dir/$part.tar.gz"
+        exit 1
+    fi
+done
+
+echo "Merge all train packs into one"
+mkdir -p ${download_dir}/LibriSpeech/${train_dir}/
+for part in train-clean-100 train-clean-360 train-other-500; do
+    mv ${download_dir}/LibriSpeech/${part}/* $download_dir/LibriSpeech/${train_dir}/
+done
+echo "Merge train text"
+find ${download_dir}/LibriSpeech/${train_dir}/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/${train_dir}/text
+
+# Use combined dev-clean and dev-other as validation set
+find ${download_dir}/LibriSpeech/dev-clean/ ${download_dir}/LibriSpeech/dev-other/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/valid_text
+find ${download_dir}/LibriSpeech/test-clean/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/test-clean/text
+find ${download_dir}/LibriSpeech/test-other/ -name '*.txt' -exec cat {} \; >> ${download_dir}/LibriSpeech/test-other/text
+
+
+dict=data/lang_char/${train_dir}_${bpemode}${nbpe}_units.txt
+encoded=data/lang_char/${train_dir}_${bpemode}${nbpe}_encoded.txt
+fairseq_dict=data/lang_char/${train_dir}_${bpemode}${nbpe}_fairseq_dict.txt
+bpemodel=data/lang_char/${train_dir}_${bpemode}${nbpe}
+echo "dictionary: ${dict}"
+echo "Dictionary preparation"
+mkdir -p data/lang_char/
+echo "<unk> 3" > ${dict}
+echo "</s> 2" >> ${dict}
+echo "<pad> 1" >> ${dict}
+cut -f 2- -d" " ${download_dir}/LibriSpeech/${train_dir}/text > data/lang_char/input.txt
+spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000 --unk_id=3 --eos_id=2 --pad_id=1 --bos_id=-1 --character_coverage=1
+spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt > ${encoded}
+cat ${encoded} | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+3}' >> ${dict}
+cat ${encoded} | tr ' ' '\n' | sort | uniq -c | awk '{print $2 " " $1}' > ${fairseq_dict}
+wc -l ${dict}
+
+echo "Prepare train and test jsons"
+for part in train_960 test-other test-clean; do
+    python ${fairseq_root}/examples/speech_recognition/datasets/asr_prep_json.py --audio-dirs ${download_dir}/LibriSpeech/${part} --labels ${download_dir}/LibriSpeech/${part}/text --spm-model ${bpemodel}.model --audio-format flac --dictionary ${fairseq_dict} --output ${part}.json
+done
+# fairseq expects to find train.json and valid.json during training
+mv train_960.json train.json
+
+echo "Prepare valid json"
+python ${fairseq_root}/examples/speech_recognition/datasets/asr_prep_json.py --audio-dirs ${download_dir}/LibriSpeech/dev-clean ${download_dir}/LibriSpeech/dev-other --labels ${download_dir}/LibriSpeech/valid_text --spm-model ${bpemodel}.model --audio-format flac --dictionary ${fairseq_dict} --output valid.json
+
+cp ${fairseq_dict} ./dict.txt
+cp ${bpemodel}.model ./spm.model
diff --git a/fairseq/examples/speech_recognition/infer.py b/fairseq/examples/speech_recognition/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e9a878af46242ced57cfcd0e876a3d2ef3820ae
--- /dev/null
+++ b/fairseq/examples/speech_recognition/infer.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Run inference for pre-processed data with a trained model.
+"""
+
+import ast
+import logging
+import math
+import os
+import sys
+
+import editdistance
+import numpy as np
+import torch
+from fairseq import checkpoint_utils, options, progress_bar, tasks, utils
+from fairseq.data.data_utils import post_process
+from fairseq.logging.meters import StopwatchMeter, TimeMeter
+
+
+logging.basicConfig()
+logging.root.setLevel(logging.INFO)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def add_asr_eval_argument(parser):
+    parser.add_argument("--kspmodel", default=None, help="sentence piece model")
+    parser.add_argument(
+        "--wfstlm", default=None, help="wfstlm on dictonary output units"
+    )
+    parser.add_argument(
+        "--rnnt_decoding_type",
+        default="greedy",
+        help="wfstlm on dictonary\
+output units",
+    )
+    try:
+        parser.add_argument(
+            "--lm-weight",
+            "--lm_weight",
+            type=float,
+            default=0.2,
+            help="weight for lm while interpolating with neural score",
+        )
+    except:
+        pass
+    parser.add_argument(
+        "--rnnt_len_penalty", default=-0.5, help="rnnt length penalty on word level"
+    )
+    parser.add_argument(
+        "--w2l-decoder",
+        choices=["viterbi", "kenlm", "fairseqlm"],
+        help="use a w2l decoder",
+    )
+    parser.add_argument("--lexicon", help="lexicon for w2l decoder")
+    parser.add_argument("--unit-lm", action="store_true", help="if using a unit lm")
+    parser.add_argument("--kenlm-model", "--lm-model", help="lm model for w2l decoder")
+    parser.add_argument("--beam-threshold", type=float, default=25.0)
+    parser.add_argument("--beam-size-token", type=float, default=100)
+    parser.add_argument("--word-score", type=float, default=1.0)
+    parser.add_argument("--unk-weight", type=float, default=-math.inf)
+    parser.add_argument("--sil-weight", type=float, default=0.0)
+    parser.add_argument(
+        "--dump-emissions",
+        type=str,
+        default=None,
+        help="if present, dumps emissions into this file and exits",
+    )
+    parser.add_argument(
+        "--dump-features",
+        type=str,
+        default=None,
+        help="if present, dumps features into this file and exits",
+    )
+    parser.add_argument(
+        "--load-emissions",
+        type=str,
+        default=None,
+        help="if present, loads emissions from this file",
+    )
+    return parser
+
+
+def check_args(args):
+    # assert args.path is not None, "--path required for generation!"
+    # assert args.results_path is not None, "--results_path required for generation!"
+    assert (
+        not args.sampling or args.nbest == args.beam
+    ), "--sampling requires --nbest to be equal to --beam"
+    assert (
+        args.replace_unk is None or args.raw_text
+    ), "--replace-unk requires a raw text dataset (--raw-text)"
+
+
+def get_dataset_itr(args, task, models):
+    return task.get_batch_iterator(
+        dataset=task.dataset(args.gen_subset),
+        max_tokens=args.max_tokens,
+        max_sentences=args.batch_size,
+        max_positions=(sys.maxsize, sys.maxsize),
+        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=args.required_batch_size_multiple,
+        num_shards=args.num_shards,
+        shard_id=args.shard_id,
+        num_workers=args.num_workers,
+        data_buffer_size=args.data_buffer_size,
+    ).next_epoch_itr(shuffle=False)
+
+
+def process_predictions(
+    args, hypos, sp, tgt_dict, target_tokens, res_files, speaker, id
+):
+    for hypo in hypos[: min(len(hypos), args.nbest)]:
+        hyp_pieces = tgt_dict.string(hypo["tokens"].int().cpu())
+
+        if "words" in hypo:
+            hyp_words = " ".join(hypo["words"])
+        else:
+            hyp_words = post_process(hyp_pieces, args.post_process)
+
+        if res_files is not None:
+            print(
+                "{} ({}-{})".format(hyp_pieces, speaker, id),
+                file=res_files["hypo.units"],
+            )
+            print(
+                "{} ({}-{})".format(hyp_words, speaker, id),
+                file=res_files["hypo.words"],
+            )
+
+        tgt_pieces = tgt_dict.string(target_tokens)
+        tgt_words = post_process(tgt_pieces, args.post_process)
+
+        if res_files is not None:
+            print(
+                "{} ({}-{})".format(tgt_pieces, speaker, id),
+                file=res_files["ref.units"],
+            )
+            print(
+                "{} ({}-{})".format(tgt_words, speaker, id), file=res_files["ref.words"]
+            )
+
+        if not args.quiet:
+            logger.info("HYPO:" + hyp_words)
+            logger.info("TARGET:" + tgt_words)
+            logger.info("___________________")
+
+        hyp_words = hyp_words.split()
+        tgt_words = tgt_words.split()
+        return editdistance.eval(hyp_words, tgt_words), len(tgt_words)
+
+
+def prepare_result_files(args):
+    def get_res_file(file_prefix):
+        if args.num_shards > 1:
+            file_prefix = f"{args.shard_id}_{file_prefix}"
+        path = os.path.join(
+            args.results_path,
+            "{}-{}-{}.txt".format(
+                file_prefix, os.path.basename(args.path), args.gen_subset
+            ),
+        )
+        return open(path, "w", buffering=1)
+
+    if not args.results_path:
+        return None
+
+    return {
+        "hypo.words": get_res_file("hypo.word"),
+        "hypo.units": get_res_file("hypo.units"),
+        "ref.words": get_res_file("ref.word"),
+        "ref.units": get_res_file("ref.units"),
+    }
+
+
+def optimize_models(args, use_cuda, models):
+    """Optimize ensemble for generation"""
+    for model in models:
+        model.make_generation_fast_(
+            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
+            need_attn=args.print_alignment,
+        )
+        if args.fp16:
+            model.half()
+        if use_cuda:
+            model.cuda()
+
+
+class ExistingEmissionsDecoder(object):
+    def __init__(self, decoder, emissions):
+        self.decoder = decoder
+        self.emissions = emissions
+
+    def generate(self, models, sample, **unused):
+        ids = sample["id"].cpu().numpy()
+        try:
+            emissions = np.stack(self.emissions[ids])
+        except:
+            print([x.shape for x in self.emissions[ids]])
+            raise Exception("invalid sizes")
+        emissions = torch.from_numpy(emissions)
+        return self.decoder.decode(emissions)
+
+
+def main(args, task=None, model_state=None):
+    check_args(args)
+
+    if args.max_tokens is None and args.batch_size is None:
+        args.max_tokens = 4000000
+    logger.info(args)
+
+    use_cuda = torch.cuda.is_available() and not args.cpu
+
+    logger.info("| decoding with criterion {}".format(args.criterion))
+
+    task = tasks.setup_task(args)
+
+    # Load ensemble
+    if args.load_emissions:
+        models, criterions = [], []
+        task.load_dataset(args.gen_subset)
+    else:
+        logger.info("| loading model(s) from {}".format(args.path))
+        models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+            utils.split_paths(args.path, separator="\\"),
+            arg_overrides=ast.literal_eval(args.model_overrides),
+            task=task,
+            suffix=args.checkpoint_suffix,
+            strict=(args.checkpoint_shard_count == 1),
+            num_shards=args.checkpoint_shard_count,
+            state=model_state,
+        )
+        optimize_models(args, use_cuda, models)
+        task.load_dataset(args.gen_subset, task_cfg=saved_cfg.task)
+
+
+    # Set dictionary
+    tgt_dict = task.target_dictionary
+
+    logger.info(
+        "| {} {} {} examples".format(
+            args.data, args.gen_subset, len(task.dataset(args.gen_subset))
+        )
+    )
+
+    # hack to pass transitions to W2lDecoder
+    if args.criterion == "asg_loss":
+        raise NotImplementedError("asg_loss is currently not supported")
+        # trans = criterions[0].asg.trans.data
+        # args.asg_transitions = torch.flatten(trans).tolist()
+
+    # Load dataset (possibly sharded)
+    itr = get_dataset_itr(args, task, models)
+
+    # Initialize generator
+    gen_timer = StopwatchMeter()
+
+    def build_generator(args):
+        w2l_decoder = getattr(args, "w2l_decoder", None)
+        if w2l_decoder == "viterbi":
+            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder
+
+            return W2lViterbiDecoder(args, task.target_dictionary)
+        elif w2l_decoder == "kenlm":
+            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder
+
+            return W2lKenLMDecoder(args, task.target_dictionary)
+        elif w2l_decoder == "fairseqlm":
+            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder
+
+            return W2lFairseqLMDecoder(args, task.target_dictionary)
+        else:
+            print(
+                "only flashlight decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment"
+            )
+
+    # please do not touch this unless you test both generate.py and infer.py with audio_pretraining task
+    generator = build_generator(args)
+
+    if args.load_emissions:
+        generator = ExistingEmissionsDecoder(
+            generator, np.load(args.load_emissions, allow_pickle=True)
+        )
+        logger.info("loaded emissions from " + args.load_emissions)
+
+    num_sentences = 0
+
+    if args.results_path is not None and not os.path.exists(args.results_path):
+        os.makedirs(args.results_path)
+
+    max_source_pos = (
+        utils.resolve_max_positions(
+            task.max_positions(), *[model.max_positions() for model in models]
+        ),
+    )
+
+    if max_source_pos is not None:
+        max_source_pos = max_source_pos[0]
+        if max_source_pos is not None:
+            max_source_pos = max_source_pos[0] - 1
+
+    if args.dump_emissions:
+        emissions = {}
+    if args.dump_features:
+        features = {}
+        models[0].bert.proj = None
+    else:
+        res_files = prepare_result_files(args)
+    errs_t = 0
+    lengths_t = 0
+    with progress_bar.build_progress_bar(args, itr) as t:
+        wps_meter = TimeMeter()
+        for sample in t:
+            sample = utils.move_to_cuda(sample) if use_cuda else sample
+            if "net_input" not in sample:
+                continue
+
+            prefix_tokens = None
+            if args.prefix_size > 0:
+                prefix_tokens = sample["target"][:, : args.prefix_size]
+
+            gen_timer.start()
+            if args.dump_emissions:
+                with torch.no_grad():
+                    encoder_out = models[0](**sample["net_input"])
+                    emm = models[0].get_normalized_probs(encoder_out, log_probs=True)
+                    emm = emm.transpose(0, 1).cpu().numpy()
+                    for i, id in enumerate(sample["id"]):
+                        emissions[id.item()] = emm[i]
+                    continue
+            elif args.dump_features:
+                with torch.no_grad():
+                    encoder_out = models[0](**sample["net_input"])
+                    feat = encoder_out["encoder_out"].transpose(0, 1).cpu().numpy()
+                    for i, id in enumerate(sample["id"]):
+                        padding = (
+                            encoder_out["encoder_padding_mask"][i].cpu().numpy()
+                            if encoder_out["encoder_padding_mask"] is not None
+                            else None
+                        )
+                        features[id.item()] = (feat[i], padding)
+                    continue
+            hypos = task.inference_step(generator, models, sample, prefix_tokens)
+            num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos)
+            gen_timer.stop(num_generated_tokens)
+
+            for i, sample_id in enumerate(sample["id"].tolist()):
+                speaker = None
+                # id = task.dataset(args.gen_subset).ids[int(sample_id)]
+                id = sample_id
+                toks = (
+                    sample["target"][i, :]
+                    if "target_label" not in sample
+                    else sample["target_label"][i, :]
+                )
+                target_tokens = utils.strip_pad(toks, tgt_dict.pad()).int().cpu()
+                # Process top predictions
+                errs, length = process_predictions(
+                    args,
+                    hypos[i],
+                    None,
+                    tgt_dict,
+                    target_tokens,
+                    res_files,
+                    speaker,
+                    id,
+                )
+                errs_t += errs
+                lengths_t += length
+
+            wps_meter.update(num_generated_tokens)
+            t.log({"wps": round(wps_meter.avg)})
+            num_sentences += (
+                sample["nsentences"] if "nsentences" in sample else sample["id"].numel()
+            )
+
+    wer = None
+    if args.dump_emissions:
+        emm_arr = []
+        for i in range(len(emissions)):
+            emm_arr.append(emissions[i])
+        np.save(args.dump_emissions, emm_arr)
+        logger.info(f"saved {len(emissions)} emissions to {args.dump_emissions}")
+    elif args.dump_features:
+        feat_arr = []
+        for i in range(len(features)):
+            feat_arr.append(features[i])
+        np.save(args.dump_features, feat_arr)
+        logger.info(f"saved {len(features)} emissions to {args.dump_features}")
+    else:
+        if lengths_t > 0:
+            wer = errs_t * 100.0 / lengths_t
+            logger.info(f"WER: {wer}")
+
+        logger.info(
+            "| Processed {} sentences ({} tokens) in {:.1f}s ({:.2f}"
+            "sentences/s, {:.2f} tokens/s)".format(
+                num_sentences,
+                gen_timer.n,
+                gen_timer.sum,
+                num_sentences / gen_timer.sum,
+                1.0 / gen_timer.avg,
+            )
+        )
+        logger.info("| Generate {} with beam={}".format(args.gen_subset, args.beam))
+    return task, wer
+
+
+def make_parser():
+    parser = options.get_generation_parser()
+    parser = add_asr_eval_argument(parser)
+    return parser
+
+
+def cli_main():
+    parser = make_parser()
+    args = options.parse_args_and_arch(parser)
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/speech_recognition/kaldi/__init__.py b/fairseq/examples/speech_recognition/kaldi/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/speech_recognition/kaldi/add-self-loop-simple.cc b/fairseq/examples/speech_recognition/kaldi/add-self-loop-simple.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e18fb62df52ab85d7802615d8619b0fd94a08f8c
--- /dev/null
+++ b/fairseq/examples/speech_recognition/kaldi/add-self-loop-simple.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <iostream>
+#include "fstext/fstext-lib.h" // @manual
+#include "util/common-utils.h" // @manual
+
+/*
+ * This program is to modify a FST without self-loop by:
+ *   for each incoming arc with non-eps input symbol, add a self-loop arc
+ *   with that non-eps symbol as input and eps as output.
+ *
+ * This is to make sure the resultant FST can do deduplication for repeated
+ * symbols, which is very common in acoustic model
+ *
+ */
+namespace {
+int32 AddSelfLoopsSimple(fst::StdVectorFst* fst) {
+  typedef fst::MutableArcIterator<fst::StdVectorFst> IterType;
+
+  int32 num_states_before = fst->NumStates();
+  fst::MakePrecedingInputSymbolsSame(false, fst);
+  int32 num_states_after = fst->NumStates();
+  KALDI_LOG << "There are " << num_states_before
+            << " states in the original FST; "
+            << " after MakePrecedingInputSymbolsSame, there are "
+            << num_states_after << " states " << std::endl;
+
+  auto weight_one = fst::StdArc::Weight::One();
+
+  int32 num_arc_added = 0;
+
+  fst::StdArc self_loop_arc;
+  self_loop_arc.weight = weight_one;
+
+  int32 num_states = fst->NumStates();
+  std::vector<std::set<int32>> incoming_non_eps_label_per_state(num_states);
+
+  for (int32 state = 0; state < num_states; state++) {
+    for (IterType aiter(fst, state); !aiter.Done(); aiter.Next()) {
+      fst::StdArc arc(aiter.Value());
+      if (arc.ilabel != 0) {
+        incoming_non_eps_label_per_state[arc.nextstate].insert(arc.ilabel);
+      }
+    }
+  }
+
+  for (int32 state = 0; state < num_states; state++) {
+    if (!incoming_non_eps_label_per_state[state].empty()) {
+      auto& ilabel_set = incoming_non_eps_label_per_state[state];
+      for (auto it = ilabel_set.begin(); it != ilabel_set.end(); it++) {
+        self_loop_arc.ilabel = *it;
+        self_loop_arc.olabel = 0;
+        self_loop_arc.nextstate = state;
+        fst->AddArc(state, self_loop_arc);
+        num_arc_added++;
+      }
+    }
+  }
+  return num_arc_added;
+}
+
+void print_usage() {
+  std::cout << "add-self-loop-simple usage:\n"
+               "\tadd-self-loop-simple <in-fst> <out-fst> \n";
+}
+} // namespace
+
+int main(int argc, char** argv) {
+  if (argc != 3) {
+    print_usage();
+    exit(1);
+  }
+
+  auto input = argv[1];
+  auto output = argv[2];
+
+  auto fst = fst::ReadFstKaldi(input);
+  auto num_states = fst->NumStates();
+  KALDI_LOG << "Loading FST from " << input << " with " << num_states
+            << " states." << std::endl;
+
+  int32 num_arc_added = AddSelfLoopsSimple(fst);
+  KALDI_LOG << "Adding " << num_arc_added << " self-loop arcs " << std::endl;
+
+  fst::WriteFstKaldi(*fst, std::string(output));
+  KALDI_LOG << "Writing FST to " << output << std::endl;
+
+  delete fst;
+}
diff --git a/fairseq/examples/speech_recognition/kaldi/config/kaldi_initializer.yaml b/fairseq/examples/speech_recognition/kaldi/config/kaldi_initializer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..be9ba98f55463d41d5d5ea35e306abc0886dbead
--- /dev/null
+++ b/fairseq/examples/speech_recognition/kaldi/config/kaldi_initializer.yaml
@@ -0,0 +1,8 @@
+# @package _group_
+
+data_dir: ???
+fst_dir: ???
+in_labels: ???
+kaldi_root: ???
+lm_arpa: ???
+blank_symbol: <s>
diff --git a/fairseq/examples/speech_recognition/kaldi/kaldi_decoder.py b/fairseq/examples/speech_recognition/kaldi/kaldi_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f62cc58ae8c0c5a3ba7d17713fedf0abc302942
--- /dev/null
+++ b/fairseq/examples/speech_recognition/kaldi/kaldi_decoder.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from concurrent.futures import ThreadPoolExecutor
+import logging
+from omegaconf import MISSING
+import os
+import torch
+from typing import Optional
+import warnings
+
+
+from dataclasses import dataclass
+from fairseq.dataclass import FairseqDataclass
+from .kaldi_initializer import KaldiInitializerConfig, initalize_kaldi
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class KaldiDecoderConfig(FairseqDataclass):
+    hlg_graph_path: Optional[str] = None
+    output_dict: str = MISSING
+
+    kaldi_initializer_config: Optional[KaldiInitializerConfig] = None
+
+    acoustic_scale: float = 0.5
+    max_active: int = 10000
+    beam_delta: float = 0.5
+    hash_ratio: float = 2.0
+
+    is_lattice: bool = False
+    lattice_beam: float = 10.0
+    prune_interval: int = 25
+    determinize_lattice: bool = True
+    prune_scale: float = 0.1
+    max_mem: int = 0
+    phone_determinize: bool = True
+    word_determinize: bool = True
+    minimize: bool = True
+
+    num_threads: int = 1
+
+
+class KaldiDecoder(object):
+    def __init__(
+        self,
+        cfg: KaldiDecoderConfig,
+        beam: int,
+        nbest: int = 1,
+    ):
+        try:
+            from kaldi.asr import FasterRecognizer, LatticeFasterRecognizer
+            from kaldi.base import set_verbose_level
+            from kaldi.decoder import (
+                FasterDecoder,
+                FasterDecoderOptions,
+                LatticeFasterDecoder,
+                LatticeFasterDecoderOptions,
+            )
+            from kaldi.lat.functions import DeterminizeLatticePhonePrunedOptions
+            from kaldi.fstext import read_fst_kaldi, SymbolTable
+        except:
+            warnings.warn(
+                "pykaldi is required for this functionality. Please install from https://github.com/pykaldi/pykaldi"
+            )
+
+        # set_verbose_level(2)
+
+        self.acoustic_scale = cfg.acoustic_scale
+        self.nbest = nbest
+
+        if cfg.hlg_graph_path is None:
+            assert (
+                cfg.kaldi_initializer_config is not None
+            ), "Must provide hlg graph path or kaldi initializer config"
+            cfg.hlg_graph_path = initalize_kaldi(cfg.kaldi_initializer_config)
+
+        assert os.path.exists(cfg.hlg_graph_path), cfg.hlg_graph_path
+
+        if cfg.is_lattice:
+            self.dec_cls = LatticeFasterDecoder
+            opt_cls = LatticeFasterDecoderOptions
+            self.rec_cls = LatticeFasterRecognizer
+        else:
+            assert self.nbest == 1, "nbest > 1 requires lattice decoder"
+            self.dec_cls = FasterDecoder
+            opt_cls = FasterDecoderOptions
+            self.rec_cls = FasterRecognizer
+
+        self.decoder_options = opt_cls()
+        self.decoder_options.beam = beam
+        self.decoder_options.max_active = cfg.max_active
+        self.decoder_options.beam_delta = cfg.beam_delta
+        self.decoder_options.hash_ratio = cfg.hash_ratio
+
+        if cfg.is_lattice:
+            self.decoder_options.lattice_beam = cfg.lattice_beam
+            self.decoder_options.prune_interval = cfg.prune_interval
+            self.decoder_options.determinize_lattice = cfg.determinize_lattice
+            self.decoder_options.prune_scale = cfg.prune_scale
+            det_opts = DeterminizeLatticePhonePrunedOptions()
+            det_opts.max_mem = cfg.max_mem
+            det_opts.phone_determinize = cfg.phone_determinize
+            det_opts.word_determinize = cfg.word_determinize
+            det_opts.minimize = cfg.minimize
+            self.decoder_options.det_opts = det_opts
+
+        self.output_symbols = {}
+        with open(cfg.output_dict, "r") as f:
+            for line in f:
+                items = line.rstrip().split()
+                assert len(items) == 2
+                self.output_symbols[int(items[1])] = items[0]
+
+        logger.info(f"Loading FST from {cfg.hlg_graph_path}")
+        self.fst = read_fst_kaldi(cfg.hlg_graph_path)
+        self.symbol_table = SymbolTable.read_text(cfg.output_dict)
+
+        self.executor = ThreadPoolExecutor(max_workers=cfg.num_threads)
+
+    def generate(self, models, sample, **unused):
+        """Generate a batch of inferences."""
+        # model.forward normally channels prev_output_tokens into the decoder
+        # separately, but SequenceGenerator directly calls model.encoder
+        encoder_input = {
+            k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens"
+        }
+        emissions, padding = self.get_emissions(models, encoder_input)
+        return self.decode(emissions, padding)
+
+    def get_emissions(self, models, encoder_input):
+        """Run encoder and normalize emissions"""
+        model = models[0]
+
+        all_encoder_out = [m(**encoder_input) for m in models]
+
+        if len(all_encoder_out) > 1:
+
+            if "encoder_out" in all_encoder_out[0]:
+                encoder_out = {
+                    "encoder_out": sum(e["encoder_out"] for e in all_encoder_out)
+                    / len(all_encoder_out),
+                    "encoder_padding_mask": all_encoder_out[0]["encoder_padding_mask"],
+                }
+                padding = encoder_out["encoder_padding_mask"]
+            else:
+                encoder_out = {
+                    "logits": sum(e["logits"] for e in all_encoder_out)
+                    / len(all_encoder_out),
+                    "padding_mask": all_encoder_out[0]["padding_mask"],
+                }
+                padding = encoder_out["padding_mask"]
+        else:
+            encoder_out = all_encoder_out[0]
+            padding = (
+                encoder_out["padding_mask"]
+                if "padding_mask" in encoder_out
+                else encoder_out["encoder_padding_mask"]
+            )
+
+        if hasattr(model, "get_logits"):
+            emissions = model.get_logits(encoder_out, normalize=True)
+        else:
+            emissions = model.get_normalized_probs(encoder_out, log_probs=True)
+
+        return (
+            emissions.cpu().float().transpose(0, 1),
+            padding.cpu() if padding is not None and padding.any() else None,
+        )
+
+    def decode_one(self, logits, padding):
+        from kaldi.matrix import Matrix
+
+        decoder = self.dec_cls(self.fst, self.decoder_options)
+        asr = self.rec_cls(
+            decoder, self.symbol_table, acoustic_scale=self.acoustic_scale
+        )
+
+        if padding is not None:
+            logits = logits[~padding]
+
+        mat = Matrix(logits.numpy())
+
+        out = asr.decode(mat)
+
+        if self.nbest > 1:
+            from kaldi.fstext import shortestpath
+            from kaldi.fstext.utils import (
+                convert_compact_lattice_to_lattice,
+                convert_lattice_to_std,
+                convert_nbest_to_list,
+                get_linear_symbol_sequence,
+            )
+
+            lat = out["lattice"]
+
+            sp = shortestpath(lat, nshortest=self.nbest)
+
+            sp = convert_compact_lattice_to_lattice(sp)
+            sp = convert_lattice_to_std(sp)
+            seq = convert_nbest_to_list(sp)
+
+            results = []
+            for s in seq:
+                _, o, w = get_linear_symbol_sequence(s)
+                words = list(self.output_symbols[z] for z in o)
+                results.append(
+                    {
+                        "tokens": words,
+                        "words": words,
+                        "score": w.value,
+                        "emissions": logits,
+                    }
+                )
+            return results
+        else:
+            words = out["text"].split()
+            return [
+                {
+                    "tokens": words,
+                    "words": words,
+                    "score": out["likelihood"],
+                    "emissions": logits,
+                }
+            ]
+
+    def decode(self, emissions, padding):
+        if padding is None:
+            padding = [None] * len(emissions)
+
+        ret = list(
+            map(
+                lambda e, p: self.executor.submit(self.decode_one, e, p),
+                emissions,
+                padding,
+            )
+        )
+        return ret
diff --git a/fairseq/examples/speech_recognition/kaldi/kaldi_initializer.py b/fairseq/examples/speech_recognition/kaldi/kaldi_initializer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d2a2a4b6b809ba1106f9a57cb6f241dc083e670
--- /dev/null
+++ b/fairseq/examples/speech_recognition/kaldi/kaldi_initializer.py
@@ -0,0 +1,698 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+import hydra
+from hydra.core.config_store import ConfigStore
+import logging
+from omegaconf import MISSING, OmegaConf
+import os
+import os.path as osp
+from pathlib import Path
+import subprocess
+from typing import Optional
+
+from fairseq.data.dictionary import Dictionary
+from fairseq.dataclass import FairseqDataclass
+
+script_dir = Path(__file__).resolve().parent
+config_path = script_dir / "config"
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class KaldiInitializerConfig(FairseqDataclass):
+    data_dir: str = MISSING
+    fst_dir: Optional[str] = None
+    in_labels: str = MISSING
+    out_labels: Optional[str] = None
+    wav2letter_lexicon: Optional[str] = None
+    lm_arpa: str = MISSING
+    kaldi_root: str = MISSING
+    blank_symbol: str = "<s>"
+    silence_symbol: Optional[str] = None
+
+
+def create_units(fst_dir: Path, in_labels: str, vocab: Dictionary) -> Path:
+    in_units_file = fst_dir / f"kaldi_dict.{in_labels}.txt"
+    if not in_units_file.exists():
+
+        logger.info(f"Creating {in_units_file}")
+
+        with open(in_units_file, "w") as f:
+            print("<eps> 0", file=f)
+            i = 1
+            for symb in vocab.symbols[vocab.nspecial :]:
+                if not symb.startswith("madeupword"):
+                    print(f"{symb} {i}", file=f)
+                    i += 1
+    return in_units_file
+
+
+def create_lexicon(
+    cfg: KaldiInitializerConfig,
+    fst_dir: Path,
+    unique_label: str,
+    in_units_file: Path,
+    out_words_file: Path,
+) -> (Path, Path):
+
+    disambig_in_units_file = fst_dir / f"kaldi_dict.{cfg.in_labels}_disambig.txt"
+    lexicon_file = fst_dir / f"kaldi_lexicon.{unique_label}.txt"
+    disambig_lexicon_file = fst_dir / f"kaldi_lexicon.{unique_label}_disambig.txt"
+    if (
+        not lexicon_file.exists()
+        or not disambig_lexicon_file.exists()
+        or not disambig_in_units_file.exists()
+    ):
+        logger.info(f"Creating {lexicon_file} (in units file: {in_units_file})")
+
+        assert cfg.wav2letter_lexicon is not None or cfg.in_labels == cfg.out_labels
+
+        if cfg.wav2letter_lexicon is not None:
+            lm_words = set()
+            with open(out_words_file, "r") as lm_dict_f:
+                for line in lm_dict_f:
+                    lm_words.add(line.split()[0])
+
+            num_skipped = 0
+            total = 0
+            with open(cfg.wav2letter_lexicon, "r") as w2l_lex_f, open(
+                lexicon_file, "w"
+            ) as out_f:
+                for line in w2l_lex_f:
+                    items = line.rstrip().split("\t")
+                    assert len(items) == 2, items
+                    if items[0] in lm_words:
+                        print(items[0], items[1], file=out_f)
+                    else:
+                        num_skipped += 1
+                        logger.debug(
+                            f"Skipping word {items[0]} as it was not found in LM"
+                        )
+                    total += 1
+            if num_skipped > 0:
+                logger.warning(
+                    f"Skipped {num_skipped} out of {total} words as they were not found in LM"
+                )
+        else:
+            with open(in_units_file, "r") as in_f, open(lexicon_file, "w") as out_f:
+                for line in in_f:
+                    symb = line.split()[0]
+                    if symb != "<eps>" and symb != "<ctc_blank>" and symb != "<SIL>":
+                        print(symb, symb, file=out_f)
+
+        lex_disambig_path = (
+            Path(cfg.kaldi_root) / "egs/wsj/s5/utils/add_lex_disambig.pl"
+        )
+        res = subprocess.run(
+            [lex_disambig_path, lexicon_file, disambig_lexicon_file],
+            check=True,
+            capture_output=True,
+        )
+        ndisambig = int(res.stdout)
+        disamib_path = Path(cfg.kaldi_root) / "egs/wsj/s5/utils/add_disambig.pl"
+        res = subprocess.run(
+            [disamib_path, "--include-zero", in_units_file, str(ndisambig)],
+            check=True,
+            capture_output=True,
+        )
+        with open(disambig_in_units_file, "wb") as f:
+            f.write(res.stdout)
+
+    return disambig_lexicon_file, disambig_in_units_file
+
+
+def create_G(
+    kaldi_root: Path, fst_dir: Path, lm_arpa: Path, arpa_base: str
+) -> (Path, Path):
+
+    out_words_file = fst_dir / f"kaldi_dict.{arpa_base}.txt"
+    grammar_graph = fst_dir / f"G_{arpa_base}.fst"
+    if not grammar_graph.exists() or not out_words_file.exists():
+        logger.info(f"Creating {grammar_graph}")
+        arpa2fst = kaldi_root / "src/lmbin/arpa2fst"
+        subprocess.run(
+            [
+                arpa2fst,
+                "--disambig-symbol=#0",
+                f"--write-symbol-table={out_words_file}",
+                lm_arpa,
+                grammar_graph,
+            ],
+            check=True,
+        )
+    return grammar_graph, out_words_file
+
+
+def create_L(
+    kaldi_root: Path,
+    fst_dir: Path,
+    unique_label: str,
+    lexicon_file: Path,
+    in_units_file: Path,
+    out_words_file: Path,
+) -> Path:
+    lexicon_graph = fst_dir / f"L.{unique_label}.fst"
+
+    if not lexicon_graph.exists():
+        logger.info(f"Creating {lexicon_graph} (in units: {in_units_file})")
+        make_lex = kaldi_root / "egs/wsj/s5/utils/make_lexicon_fst.pl"
+        fstcompile = kaldi_root / "tools/openfst-1.6.7/bin/fstcompile"
+        fstaddselfloops = kaldi_root / "src/fstbin/fstaddselfloops"
+        fstarcsort = kaldi_root / "tools/openfst-1.6.7/bin/fstarcsort"
+
+        def write_disambig_symbol(file):
+            with open(file, "r") as f:
+                for line in f:
+                    items = line.rstrip().split()
+                    if items[0] == "#0":
+                        out_path = str(file) + "_disamig"
+                        with open(out_path, "w") as out_f:
+                            print(items[1], file=out_f)
+                            return out_path
+
+            return None
+
+        in_disambig_sym = write_disambig_symbol(in_units_file)
+        assert in_disambig_sym is not None
+        out_disambig_sym = write_disambig_symbol(out_words_file)
+        assert out_disambig_sym is not None
+
+        try:
+            with open(lexicon_graph, "wb") as out_f:
+                res = subprocess.run(
+                    [make_lex, lexicon_file], capture_output=True, check=True
+                )
+                assert len(res.stderr) == 0, res.stderr.decode("utf-8")
+                res = subprocess.run(
+                    [
+                        fstcompile,
+                        f"--isymbols={in_units_file}",
+                        f"--osymbols={out_words_file}",
+                        "--keep_isymbols=false",
+                        "--keep_osymbols=false",
+                    ],
+                    input=res.stdout,
+                    capture_output=True,
+                )
+                assert len(res.stderr) == 0, res.stderr.decode("utf-8")
+                res = subprocess.run(
+                    [fstaddselfloops, in_disambig_sym, out_disambig_sym],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstarcsort, "--sort_type=olabel"],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                out_f.write(res.stdout)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}")
+            os.remove(lexicon_graph)
+            raise
+        except AssertionError:
+            os.remove(lexicon_graph)
+            raise
+
+    return lexicon_graph
+
+
+def create_LG(
+    kaldi_root: Path,
+    fst_dir: Path,
+    unique_label: str,
+    lexicon_graph: Path,
+    grammar_graph: Path,
+) -> Path:
+    lg_graph = fst_dir / f"LG.{unique_label}.fst"
+
+    if not lg_graph.exists():
+        logger.info(f"Creating {lg_graph}")
+
+        fsttablecompose = kaldi_root / "src/fstbin/fsttablecompose"
+        fstdeterminizestar = kaldi_root / "src/fstbin/fstdeterminizestar"
+        fstminimizeencoded = kaldi_root / "src/fstbin/fstminimizeencoded"
+        fstpushspecial = kaldi_root / "src/fstbin/fstpushspecial"
+        fstarcsort = kaldi_root / "tools/openfst-1.6.7/bin/fstarcsort"
+
+        try:
+            with open(lg_graph, "wb") as out_f:
+                res = subprocess.run(
+                    [fsttablecompose, lexicon_graph, grammar_graph],
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [
+                        fstdeterminizestar,
+                        "--use-log=true",
+                    ],
+                    input=res.stdout,
+                    capture_output=True,
+                )
+                res = subprocess.run(
+                    [fstminimizeencoded],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstpushspecial],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstarcsort, "--sort_type=ilabel"],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                out_f.write(res.stdout)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}")
+            os.remove(lg_graph)
+            raise
+
+    return lg_graph
+
+
+def create_H(
+    kaldi_root: Path,
+    fst_dir: Path,
+    disambig_out_units_file: Path,
+    in_labels: str,
+    vocab: Dictionary,
+    blk_sym: str,
+    silence_symbol: Optional[str],
+) -> (Path, Path, Path):
+    h_graph = (
+        fst_dir / f"H.{in_labels}{'_' + silence_symbol if silence_symbol else ''}.fst"
+    )
+    h_out_units_file = fst_dir / f"kaldi_dict.h_out.{in_labels}.txt"
+    disambig_in_units_file_int = Path(str(h_graph) + "isym_disambig.int")
+    disambig_out_units_file_int = Path(str(disambig_out_units_file) + ".int")
+    if (
+        not h_graph.exists()
+        or not h_out_units_file.exists()
+        or not disambig_in_units_file_int.exists()
+    ):
+        logger.info(f"Creating {h_graph}")
+        eps_sym = "<eps>"
+
+        num_disambig = 0
+        osymbols = []
+
+        with open(disambig_out_units_file, "r") as f, open(
+            disambig_out_units_file_int, "w"
+        ) as out_f:
+            for line in f:
+                symb, id = line.rstrip().split()
+                if line.startswith("#"):
+                    num_disambig += 1
+                    print(id, file=out_f)
+                else:
+                    if len(osymbols) == 0:
+                        assert symb == eps_sym, symb
+                    osymbols.append((symb, id))
+
+        i_idx = 0
+        isymbols = [(eps_sym, 0)]
+
+        imap = {}
+
+        for i, s in enumerate(vocab.symbols):
+            i_idx += 1
+            isymbols.append((s, i_idx))
+            imap[s] = i_idx
+
+        fst_str = []
+
+        node_idx = 0
+        root_node = node_idx
+
+        special_symbols = [blk_sym]
+        if silence_symbol is not None:
+            special_symbols.append(silence_symbol)
+
+        for ss in special_symbols:
+            fst_str.append("{} {} {} {}".format(root_node, root_node, ss, eps_sym))
+
+        for symbol, _ in osymbols:
+            if symbol == eps_sym or symbol.startswith("#"):
+                continue
+
+            node_idx += 1
+            # 1. from root to emitting state
+            fst_str.append("{} {} {} {}".format(root_node, node_idx, symbol, symbol))
+            # 2. from emitting state back to root
+            fst_str.append("{} {} {} {}".format(node_idx, root_node, eps_sym, eps_sym))
+            # 3. from emitting state to optional blank state
+            pre_node = node_idx
+            node_idx += 1
+            for ss in special_symbols:
+                fst_str.append("{} {} {} {}".format(pre_node, node_idx, ss, eps_sym))
+            # 4. from blank state back to root
+            fst_str.append("{} {} {} {}".format(node_idx, root_node, eps_sym, eps_sym))
+
+        fst_str.append("{}".format(root_node))
+
+        fst_str = "\n".join(fst_str)
+        h_str = str(h_graph)
+        isym_file = h_str + ".isym"
+
+        with open(isym_file, "w") as f:
+            for sym, id in isymbols:
+                f.write("{} {}\n".format(sym, id))
+
+        with open(h_out_units_file, "w") as f:
+            for sym, id in osymbols:
+                f.write("{} {}\n".format(sym, id))
+
+        with open(disambig_in_units_file_int, "w") as f:
+            disam_sym_id = len(isymbols)
+            for _ in range(num_disambig):
+                f.write("{}\n".format(disam_sym_id))
+                disam_sym_id += 1
+
+        fstcompile = kaldi_root / "tools/openfst-1.6.7/bin/fstcompile"
+        fstaddselfloops = kaldi_root / "src/fstbin/fstaddselfloops"
+        fstarcsort = kaldi_root / "tools/openfst-1.6.7/bin/fstarcsort"
+
+        try:
+            with open(h_graph, "wb") as out_f:
+                res = subprocess.run(
+                    [
+                        fstcompile,
+                        f"--isymbols={isym_file}",
+                        f"--osymbols={h_out_units_file}",
+                        "--keep_isymbols=false",
+                        "--keep_osymbols=false",
+                    ],
+                    input=str.encode(fst_str),
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [
+                        fstaddselfloops,
+                        disambig_in_units_file_int,
+                        disambig_out_units_file_int,
+                    ],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstarcsort, "--sort_type=olabel"],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                out_f.write(res.stdout)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}")
+            os.remove(h_graph)
+            raise
+    return h_graph, h_out_units_file, disambig_in_units_file_int
+
+
+def create_HLGa(
+    kaldi_root: Path,
+    fst_dir: Path,
+    unique_label: str,
+    h_graph: Path,
+    lg_graph: Path,
+    disambig_in_words_file_int: Path,
+) -> Path:
+    hlga_graph = fst_dir / f"HLGa.{unique_label}.fst"
+
+    if not hlga_graph.exists():
+        logger.info(f"Creating {hlga_graph}")
+
+        fsttablecompose = kaldi_root / "src/fstbin/fsttablecompose"
+        fstdeterminizestar = kaldi_root / "src/fstbin/fstdeterminizestar"
+        fstrmsymbols = kaldi_root / "src/fstbin/fstrmsymbols"
+        fstrmepslocal = kaldi_root / "src/fstbin/fstrmepslocal"
+        fstminimizeencoded = kaldi_root / "src/fstbin/fstminimizeencoded"
+
+        try:
+            with open(hlga_graph, "wb") as out_f:
+                res = subprocess.run(
+                    [
+                        fsttablecompose,
+                        h_graph,
+                        lg_graph,
+                    ],
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstdeterminizestar, "--use-log=true"],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstrmsymbols, disambig_in_words_file_int],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstrmepslocal],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstminimizeencoded],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                out_f.write(res.stdout)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}")
+            os.remove(hlga_graph)
+            raise
+
+    return hlga_graph
+
+
+def create_HLa(
+    kaldi_root: Path,
+    fst_dir: Path,
+    unique_label: str,
+    h_graph: Path,
+    l_graph: Path,
+    disambig_in_words_file_int: Path,
+) -> Path:
+    hla_graph = fst_dir / f"HLa.{unique_label}.fst"
+
+    if not hla_graph.exists():
+        logger.info(f"Creating {hla_graph}")
+
+        fsttablecompose = kaldi_root / "src/fstbin/fsttablecompose"
+        fstdeterminizestar = kaldi_root / "src/fstbin/fstdeterminizestar"
+        fstrmsymbols = kaldi_root / "src/fstbin/fstrmsymbols"
+        fstrmepslocal = kaldi_root / "src/fstbin/fstrmepslocal"
+        fstminimizeencoded = kaldi_root / "src/fstbin/fstminimizeencoded"
+
+        try:
+            with open(hla_graph, "wb") as out_f:
+                res = subprocess.run(
+                    [
+                        fsttablecompose,
+                        h_graph,
+                        l_graph,
+                    ],
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstdeterminizestar, "--use-log=true"],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstrmsymbols, disambig_in_words_file_int],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstrmepslocal],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                res = subprocess.run(
+                    [fstminimizeencoded],
+                    input=res.stdout,
+                    capture_output=True,
+                    check=True,
+                )
+                out_f.write(res.stdout)
+        except subprocess.CalledProcessError as e:
+            logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}")
+            os.remove(hla_graph)
+            raise
+
+    return hla_graph
+
+
+def create_HLG(
+    kaldi_root: Path,
+    fst_dir: Path,
+    unique_label: str,
+    hlga_graph: Path,
+    prefix: str = "HLG",
+) -> Path:
+    hlg_graph = fst_dir / f"{prefix}.{unique_label}.fst"
+
+    if not hlg_graph.exists():
+        logger.info(f"Creating {hlg_graph}")
+
+        add_self_loop = script_dir / "add-self-loop-simple"
+        kaldi_src = kaldi_root / "src"
+        kaldi_lib = kaldi_src / "lib"
+
+        try:
+            if not add_self_loop.exists():
+                fst_include = kaldi_root / "tools/openfst-1.6.7/include"
+                add_self_loop_src = script_dir / "add-self-loop-simple.cc"
+
+                subprocess.run(
+                    [
+                        "c++",
+                        f"-I{kaldi_src}",
+                        f"-I{fst_include}",
+                        f"-L{kaldi_lib}",
+                        add_self_loop_src,
+                        "-lkaldi-base",
+                        "-lkaldi-fstext",
+                        "-o",
+                        add_self_loop,
+                    ],
+                    check=True,
+                )
+
+            my_env = os.environ.copy()
+            my_env["LD_LIBRARY_PATH"] = f"{kaldi_lib}:{my_env['LD_LIBRARY_PATH']}"
+
+            subprocess.run(
+                [
+                    add_self_loop,
+                    hlga_graph,
+                    hlg_graph,
+                ],
+                check=True,
+                capture_output=True,
+                env=my_env,
+            )
+        except subprocess.CalledProcessError as e:
+            logger.error(f"cmd: {e.cmd}, err: {e.stderr.decode('utf-8')}")
+            raise
+
+    return hlg_graph
+
+
+def initalize_kaldi(cfg: KaldiInitializerConfig) -> Path:
+    if cfg.fst_dir is None:
+        cfg.fst_dir = osp.join(cfg.data_dir, "kaldi")
+    if cfg.out_labels is None:
+        cfg.out_labels = cfg.in_labels
+
+    kaldi_root = Path(cfg.kaldi_root)
+    data_dir = Path(cfg.data_dir)
+    fst_dir = Path(cfg.fst_dir)
+    fst_dir.mkdir(parents=True, exist_ok=True)
+
+    arpa_base = osp.splitext(osp.basename(cfg.lm_arpa))[0]
+    unique_label = f"{cfg.in_labels}.{arpa_base}"
+
+    with open(data_dir / f"dict.{cfg.in_labels}.txt", "r") as f:
+        vocab = Dictionary.load(f)
+
+    in_units_file = create_units(fst_dir, cfg.in_labels, vocab)
+
+    grammar_graph, out_words_file = create_G(
+        kaldi_root, fst_dir, Path(cfg.lm_arpa), arpa_base
+    )
+
+    disambig_lexicon_file, disambig_L_in_units_file = create_lexicon(
+        cfg, fst_dir, unique_label, in_units_file, out_words_file
+    )
+
+    h_graph, h_out_units_file, disambig_in_units_file_int = create_H(
+        kaldi_root,
+        fst_dir,
+        disambig_L_in_units_file,
+        cfg.in_labels,
+        vocab,
+        cfg.blank_symbol,
+        cfg.silence_symbol,
+    )
+    lexicon_graph = create_L(
+        kaldi_root,
+        fst_dir,
+        unique_label,
+        disambig_lexicon_file,
+        disambig_L_in_units_file,
+        out_words_file,
+    )
+    lg_graph = create_LG(
+        kaldi_root, fst_dir, unique_label, lexicon_graph, grammar_graph
+    )
+    hlga_graph = create_HLGa(
+        kaldi_root, fst_dir, unique_label, h_graph, lg_graph, disambig_in_units_file_int
+    )
+    hlg_graph = create_HLG(kaldi_root, fst_dir, unique_label, hlga_graph)
+
+    # for debugging
+    # hla_graph = create_HLa(kaldi_root, fst_dir, unique_label, h_graph, lexicon_graph, disambig_in_units_file_int)
+    # hl_graph = create_HLG(kaldi_root, fst_dir, unique_label, hla_graph, prefix="HL_looped")
+    # create_HLG(kaldi_root, fst_dir, "phnc", h_graph, prefix="H_looped")
+
+    return hlg_graph
+
+
+@hydra.main(config_path=config_path, config_name="kaldi_initializer")
+def cli_main(cfg: KaldiInitializerConfig) -> None:
+    container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True)
+    cfg = OmegaConf.create(container)
+    OmegaConf.set_struct(cfg, True)
+    initalize_kaldi(cfg)
+
+
+if __name__ == "__main__":
+
+    logging.root.setLevel(logging.INFO)
+    logging.basicConfig(level=logging.INFO)
+
+    try:
+        from hydra._internal.utils import (
+            get_args,
+        )  # pylint: disable=import-outside-toplevel
+
+        cfg_name = get_args().config_name or "kaldi_initializer"
+    except ImportError:
+        logger.warning("Failed to get config name from hydra args")
+        cfg_name = "kaldi_initializer"
+
+    cs = ConfigStore.instance()
+    cs.store(name=cfg_name, node=KaldiInitializerConfig)
+
+    cli_main()
diff --git a/fairseq/examples/speech_recognition/models/__init__.py b/fairseq/examples/speech_recognition/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..54b5a1c31243e55d384f80ef9514461cd35b15c6
--- /dev/null
+++ b/fairseq/examples/speech_recognition/models/__init__.py
@@ -0,0 +1,8 @@
+import importlib
+import os
+
+
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        model_name = file[: file.find(".py")]
+        importlib.import_module("examples.speech_recognition.models." + model_name)
diff --git a/fairseq/examples/speech_recognition/models/vggtransformer.py b/fairseq/examples/speech_recognition/models/vggtransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..bca0ae59a8cbe2b7c337e395021c883a61d101ee
--- /dev/null
+++ b/fairseq/examples/speech_recognition/models/vggtransformer.py
@@ -0,0 +1,1020 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import math
+from collections.abc import Iterable
+
+import torch
+import torch.nn as nn
+from examples.speech_recognition.data.data_utils import lengths_to_encoder_padding_mask
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqEncoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import (
+    LinearizedConvolution,
+    TransformerDecoderLayer,
+    TransformerEncoderLayer,
+    VGGBlock,
+)
+
+
+@register_model("asr_vggtransformer")
+class VGGTransformerModel(FairseqEncoderDecoderModel):
+    """
+    Transformers with convolutional context for ASR
+    https://arxiv.org/abs/1904.11660
+    """
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--input-feat-per-channel",
+            type=int,
+            metavar="N",
+            help="encoder input dimension per input channel",
+        )
+        parser.add_argument(
+            "--vggblock-enc-config",
+            type=str,
+            metavar="EXPR",
+            help="""
+    an array of tuples each containing the configuration of one vggblock:
+    [(out_channels,
+      conv_kernel_size,
+      pooling_kernel_size,
+      num_conv_layers,
+      use_layer_norm), ...])
+            """,
+        )
+        parser.add_argument(
+            "--transformer-enc-config",
+            type=str,
+            metavar="EXPR",
+            help=""""
+    a tuple containing the configuration of the encoder transformer layers
+    configurations:
+    [(input_dim,
+      num_heads,
+      ffn_dim,
+      normalize_before,
+      dropout,
+      attention_dropout,
+      relu_dropout), ...]')
+            """,
+        )
+        parser.add_argument(
+            "--enc-output-dim",
+            type=int,
+            metavar="N",
+            help="""
+    encoder output dimension, can be None. If specified, projecting the
+    transformer output to the specified dimension""",
+        )
+        parser.add_argument(
+            "--in-channels",
+            type=int,
+            metavar="N",
+            help="number of encoder input channels",
+        )
+        parser.add_argument(
+            "--tgt-embed-dim",
+            type=int,
+            metavar="N",
+            help="embedding dimension of the decoder target tokens",
+        )
+        parser.add_argument(
+            "--transformer-dec-config",
+            type=str,
+            metavar="EXPR",
+            help="""
+    a tuple containing the configuration of the decoder transformer layers
+    configurations:
+    [(input_dim,
+      num_heads,
+      ffn_dim,
+      normalize_before,
+      dropout,
+      attention_dropout,
+      relu_dropout), ...]
+            """,
+        )
+        parser.add_argument(
+            "--conv-dec-config",
+            type=str,
+            metavar="EXPR",
+            help="""
+    an array of tuples for the decoder 1-D convolution config
+        [(out_channels, conv_kernel_size, use_layer_norm), ...]""",
+        )
+
+    @classmethod
+    def build_encoder(cls, args, task):
+        return VGGTransformerEncoder(
+            input_feat_per_channel=args.input_feat_per_channel,
+            vggblock_config=eval(args.vggblock_enc_config),
+            transformer_config=eval(args.transformer_enc_config),
+            encoder_output_dim=args.enc_output_dim,
+            in_channels=args.in_channels,
+        )
+
+    @classmethod
+    def build_decoder(cls, args, task):
+        return TransformerDecoder(
+            dictionary=task.target_dictionary,
+            embed_dim=args.tgt_embed_dim,
+            transformer_config=eval(args.transformer_dec_config),
+            conv_config=eval(args.conv_dec_config),
+            encoder_output_dim=args.enc_output_dim,
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure that all args are properly defaulted
+        # (in case there are any new ones)
+        base_architecture(args)
+
+        encoder = cls.build_encoder(args, task)
+        decoder = cls.build_decoder(args, task)
+        return cls(encoder, decoder)
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        # net_output['encoder_out'] is a (B, T, D) tensor
+        lprobs = super().get_normalized_probs(net_output, log_probs, sample)
+        lprobs.batch_first = True
+        return lprobs
+
+
+DEFAULT_ENC_VGGBLOCK_CONFIG = ((32, 3, 2, 2, False),) * 2
+DEFAULT_ENC_TRANSFORMER_CONFIG = ((256, 4, 1024, True, 0.2, 0.2, 0.2),) * 2
+# 256: embedding dimension
+# 4: number of heads
+# 1024: FFN
+# True: apply layerNorm before (dropout + resiaul) instead of after
+# 0.2 (dropout): dropout after MultiheadAttention and second FC
+# 0.2 (attention_dropout): dropout in MultiheadAttention
+# 0.2 (relu_dropout): dropout after ReLu
+DEFAULT_DEC_TRANSFORMER_CONFIG = ((256, 2, 1024, True, 0.2, 0.2, 0.2),) * 2
+DEFAULT_DEC_CONV_CONFIG = ((256, 3, True),) * 2
+
+
+# TODO: repace transformer encoder config from one liner
+# to explicit args to get rid of this transformation
+def prepare_transformer_encoder_params(
+    input_dim,
+    num_heads,
+    ffn_dim,
+    normalize_before,
+    dropout,
+    attention_dropout,
+    relu_dropout,
+):
+    args = argparse.Namespace()
+    args.encoder_embed_dim = input_dim
+    args.encoder_attention_heads = num_heads
+    args.attention_dropout = attention_dropout
+    args.dropout = dropout
+    args.activation_dropout = relu_dropout
+    args.encoder_normalize_before = normalize_before
+    args.encoder_ffn_embed_dim = ffn_dim
+    return args
+
+
+def prepare_transformer_decoder_params(
+    input_dim,
+    num_heads,
+    ffn_dim,
+    normalize_before,
+    dropout,
+    attention_dropout,
+    relu_dropout,
+):
+    args = argparse.Namespace()
+    args.encoder_embed_dim = None
+    args.decoder_embed_dim = input_dim
+    args.decoder_attention_heads = num_heads
+    args.attention_dropout = attention_dropout
+    args.dropout = dropout
+    args.activation_dropout = relu_dropout
+    args.decoder_normalize_before = normalize_before
+    args.decoder_ffn_embed_dim = ffn_dim
+    return args
+
+
+class VGGTransformerEncoder(FairseqEncoder):
+    """VGG + Transformer encoder"""
+
+    def __init__(
+        self,
+        input_feat_per_channel,
+        vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG,
+        transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG,
+        encoder_output_dim=512,
+        in_channels=1,
+        transformer_context=None,
+        transformer_sampling=None,
+    ):
+        """constructor for VGGTransformerEncoder
+
+        Args:
+            - input_feat_per_channel: feature dim (not including stacked,
+              just base feature)
+            - in_channel: # input channels (e.g., if stack 8 feature vector
+                together, this is 8)
+            - vggblock_config: configuration of vggblock, see comments on
+                DEFAULT_ENC_VGGBLOCK_CONFIG
+            - transformer_config: configuration of transformer layer, see comments
+                on DEFAULT_ENC_TRANSFORMER_CONFIG
+            - encoder_output_dim: final transformer output embedding dimension
+            - transformer_context: (left, right) if set, self-attention will be focused
+              on (t-left, t+right)
+            - transformer_sampling: an iterable of int, must match with
+              len(transformer_config), transformer_sampling[i] indicates sampling
+              factor for i-th transformer layer, after multihead att and feedfoward
+              part
+        """
+        super().__init__(None)
+
+        self.num_vggblocks = 0
+        if vggblock_config is not None:
+            if not isinstance(vggblock_config, Iterable):
+                raise ValueError("vggblock_config is not iterable")
+            self.num_vggblocks = len(vggblock_config)
+
+        self.conv_layers = nn.ModuleList()
+        self.in_channels = in_channels
+        self.input_dim = input_feat_per_channel
+        self.pooling_kernel_sizes = []
+
+        if vggblock_config is not None:
+            for _, config in enumerate(vggblock_config):
+                (
+                    out_channels,
+                    conv_kernel_size,
+                    pooling_kernel_size,
+                    num_conv_layers,
+                    layer_norm,
+                ) = config
+                self.conv_layers.append(
+                    VGGBlock(
+                        in_channels,
+                        out_channels,
+                        conv_kernel_size,
+                        pooling_kernel_size,
+                        num_conv_layers,
+                        input_dim=input_feat_per_channel,
+                        layer_norm=layer_norm,
+                    )
+                )
+                self.pooling_kernel_sizes.append(pooling_kernel_size)
+                in_channels = out_channels
+                input_feat_per_channel = self.conv_layers[-1].output_dim
+
+        transformer_input_dim = self.infer_conv_output_dim(
+            self.in_channels, self.input_dim
+        )
+        # transformer_input_dim is the output dimension of VGG part
+
+        self.validate_transformer_config(transformer_config)
+        self.transformer_context = self.parse_transformer_context(transformer_context)
+        self.transformer_sampling = self.parse_transformer_sampling(
+            transformer_sampling, len(transformer_config)
+        )
+
+        self.transformer_layers = nn.ModuleList()
+
+        if transformer_input_dim != transformer_config[0][0]:
+            self.transformer_layers.append(
+                Linear(transformer_input_dim, transformer_config[0][0])
+            )
+        self.transformer_layers.append(
+            TransformerEncoderLayer(
+                prepare_transformer_encoder_params(*transformer_config[0])
+            )
+        )
+
+        for i in range(1, len(transformer_config)):
+            if transformer_config[i - 1][0] != transformer_config[i][0]:
+                self.transformer_layers.append(
+                    Linear(transformer_config[i - 1][0], transformer_config[i][0])
+                )
+            self.transformer_layers.append(
+                TransformerEncoderLayer(
+                    prepare_transformer_encoder_params(*transformer_config[i])
+                )
+            )
+
+        self.encoder_output_dim = encoder_output_dim
+        self.transformer_layers.extend(
+            [
+                Linear(transformer_config[-1][0], encoder_output_dim),
+                LayerNorm(encoder_output_dim),
+            ]
+        )
+
+    def forward(self, src_tokens, src_lengths, **kwargs):
+        """
+        src_tokens: padded tensor (B, T, C * feat)
+        src_lengths: tensor of original lengths of input utterances (B,)
+        """
+        bsz, max_seq_len, _ = src_tokens.size()
+        x = src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim)
+        x = x.transpose(1, 2).contiguous()
+        # (B, C, T, feat)
+
+        for layer_idx in range(len(self.conv_layers)):
+            x = self.conv_layers[layer_idx](x)
+
+        bsz, _, output_seq_len, _ = x.size()
+
+        # (B, C, T, feat) -> (B, T, C, feat) -> (T, B, C, feat) -> (T, B, C * feat)
+        x = x.transpose(1, 2).transpose(0, 1)
+        x = x.contiguous().view(output_seq_len, bsz, -1)
+
+        input_lengths = src_lengths.clone()
+        for s in self.pooling_kernel_sizes:
+            input_lengths = (input_lengths.float() / s).ceil().long()
+
+        encoder_padding_mask, _ = lengths_to_encoder_padding_mask(
+            input_lengths, batch_first=True
+        )
+        if not encoder_padding_mask.any():
+            encoder_padding_mask = None
+
+        subsampling_factor = int(max_seq_len * 1.0 / output_seq_len + 0.5)
+        attn_mask = self.lengths_to_attn_mask(input_lengths, subsampling_factor)
+
+        transformer_layer_idx = 0
+
+        for layer_idx in range(len(self.transformer_layers)):
+
+            if isinstance(self.transformer_layers[layer_idx], TransformerEncoderLayer):
+                x = self.transformer_layers[layer_idx](
+                    x, encoder_padding_mask, attn_mask
+                )
+
+                if self.transformer_sampling[transformer_layer_idx] != 1:
+                    sampling_factor = self.transformer_sampling[transformer_layer_idx]
+                    x, encoder_padding_mask, attn_mask = self.slice(
+                        x, encoder_padding_mask, attn_mask, sampling_factor
+                    )
+
+                transformer_layer_idx += 1
+
+            else:
+                x = self.transformer_layers[layer_idx](x)
+
+        # encoder_padding_maks is a (T x B) tensor, its [t, b] elements indicate
+        # whether encoder_output[t, b] is valid or not (valid=0, invalid=1)
+
+        return {
+            "encoder_out": x,  # (T, B, C)
+            "encoder_padding_mask": encoder_padding_mask.t()
+            if encoder_padding_mask is not None
+            else None,
+            # (B, T) --> (T, B)
+        }
+
+    def infer_conv_output_dim(self, in_channels, input_dim):
+        sample_seq_len = 200
+        sample_bsz = 10
+        x = torch.randn(sample_bsz, in_channels, sample_seq_len, input_dim)
+        for i, _ in enumerate(self.conv_layers):
+            x = self.conv_layers[i](x)
+        x = x.transpose(1, 2)
+        mb, seq = x.size()[:2]
+        return x.contiguous().view(mb, seq, -1).size(-1)
+
+    def validate_transformer_config(self, transformer_config):
+        for config in transformer_config:
+            input_dim, num_heads = config[:2]
+            if input_dim % num_heads != 0:
+                msg = (
+                    "ERROR in transformer config {}: ".format(config)
+                    + "input dimension {} ".format(input_dim)
+                    + "not dividable by number of heads {}".format(num_heads)
+                )
+                raise ValueError(msg)
+
+    def parse_transformer_context(self, transformer_context):
+        """
+        transformer_context can be the following:
+        -   None; indicates no context is used, i.e.,
+            transformer can access full context
+        -   a tuple/list of two int; indicates left and right context,
+            any number <0 indicates infinite context
+                * e.g., (5, 6) indicates that for query at x_t, transformer can
+                access [t-5, t+6] (inclusive)
+                * e.g., (-1, 6) indicates that for query at x_t, transformer can
+                access [0, t+6] (inclusive)
+        """
+        if transformer_context is None:
+            return None
+
+        if not isinstance(transformer_context, Iterable):
+            raise ValueError("transformer context must be Iterable if it is not None")
+
+        if len(transformer_context) != 2:
+            raise ValueError("transformer context must have length 2")
+
+        left_context = transformer_context[0]
+        if left_context < 0:
+            left_context = None
+
+        right_context = transformer_context[1]
+        if right_context < 0:
+            right_context = None
+
+        if left_context is None and right_context is None:
+            return None
+
+        return (left_context, right_context)
+
+    def parse_transformer_sampling(self, transformer_sampling, num_layers):
+        """
+        parsing transformer sampling configuration
+
+        Args:
+            - transformer_sampling, accepted input:
+                * None, indicating no sampling
+                * an Iterable with int (>0) as element
+            - num_layers, expected number of transformer layers, must match with
+              the length of transformer_sampling if it is not None
+
+        Returns:
+            - A tuple with length num_layers
+        """
+        if transformer_sampling is None:
+            return (1,) * num_layers
+
+        if not isinstance(transformer_sampling, Iterable):
+            raise ValueError(
+                "transformer_sampling must be an iterable if it is not None"
+            )
+
+        if len(transformer_sampling) != num_layers:
+            raise ValueError(
+                "transformer_sampling {} does not match with the number "
+                "of layers {}".format(transformer_sampling, num_layers)
+            )
+
+        for layer, value in enumerate(transformer_sampling):
+            if not isinstance(value, int):
+                raise ValueError("Invalid value in transformer_sampling: ")
+            if value < 1:
+                raise ValueError(
+                    "{} layer's subsampling is {}.".format(layer, value)
+                    + " This is not allowed! "
+                )
+        return transformer_sampling
+
+    def slice(self, embedding, padding_mask, attn_mask, sampling_factor):
+        """
+        embedding is a (T, B, D) tensor
+        padding_mask is a (B, T) tensor or None
+        attn_mask is a (T, T) tensor or None
+        """
+        embedding = embedding[::sampling_factor, :, :]
+        if padding_mask is not None:
+            padding_mask = padding_mask[:, ::sampling_factor]
+        if attn_mask is not None:
+            attn_mask = attn_mask[::sampling_factor, ::sampling_factor]
+
+        return embedding, padding_mask, attn_mask
+
+    def lengths_to_attn_mask(self, input_lengths, subsampling_factor=1):
+        """
+        create attention mask according to sequence lengths and transformer
+        context
+
+        Args:
+            - input_lengths: (B, )-shape Int/Long tensor; input_lengths[b] is
+              the length of b-th sequence
+            - subsampling_factor: int
+                * Note that the left_context and right_context is specified in
+                  the input frame-level while input to transformer may already
+                  go through subsampling (e.g., the use of striding in vggblock)
+                  we use subsampling_factor to scale the left/right context
+
+        Return:
+            - a (T, T) binary tensor or None, where T is max(input_lengths)
+                * if self.transformer_context is None, None
+                * if left_context is None,
+                    * attn_mask[t, t + right_context + 1:] = 1
+                    * others = 0
+                * if right_context is None,
+                    * attn_mask[t, 0:t - left_context] = 1
+                    * others = 0
+                * elsif
+                    * attn_mask[t, t - left_context: t + right_context + 1] = 0
+                    * others = 1
+        """
+        if self.transformer_context is None:
+            return None
+
+        maxT = torch.max(input_lengths).item()
+        attn_mask = torch.zeros(maxT, maxT)
+
+        left_context = self.transformer_context[0]
+        right_context = self.transformer_context[1]
+        if left_context is not None:
+            left_context = math.ceil(self.transformer_context[0] / subsampling_factor)
+        if right_context is not None:
+            right_context = math.ceil(self.transformer_context[1] / subsampling_factor)
+
+        for t in range(maxT):
+            if left_context is not None:
+                st = 0
+                en = max(st, t - left_context)
+                attn_mask[t, st:en] = 1
+            if right_context is not None:
+                st = t + right_context + 1
+                st = min(st, maxT - 1)
+                attn_mask[t, st:] = 1
+
+        return attn_mask.to(input_lengths.device)
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select(
+            1, new_order
+        )
+        if encoder_out["encoder_padding_mask"] is not None:
+            encoder_out["encoder_padding_mask"] = encoder_out[
+                "encoder_padding_mask"
+            ].index_select(1, new_order)
+        return encoder_out
+
+
+class TransformerDecoder(FairseqIncrementalDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs.
+            Default: ``False``
+        left_pad (bool, optional): whether the input is left-padded. Default:
+            ``False``
+    """
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG,
+        conv_config=DEFAULT_DEC_CONV_CONFIG,
+        encoder_output_dim=512,
+    ):
+
+        super().__init__(dictionary)
+        vocab_size = len(dictionary)
+        self.padding_idx = dictionary.pad()
+        self.embed_tokens = Embedding(vocab_size, embed_dim, self.padding_idx)
+
+        self.conv_layers = nn.ModuleList()
+        for i in range(len(conv_config)):
+            out_channels, kernel_size, layer_norm = conv_config[i]
+            if i == 0:
+                conv_layer = LinearizedConv1d(
+                    embed_dim, out_channels, kernel_size, padding=kernel_size - 1
+                )
+            else:
+                conv_layer = LinearizedConv1d(
+                    conv_config[i - 1][0],
+                    out_channels,
+                    kernel_size,
+                    padding=kernel_size - 1,
+                )
+            self.conv_layers.append(conv_layer)
+            if layer_norm:
+                self.conv_layers.append(nn.LayerNorm(out_channels))
+            self.conv_layers.append(nn.ReLU())
+
+        self.layers = nn.ModuleList()
+        if conv_config[-1][0] != transformer_config[0][0]:
+            self.layers.append(Linear(conv_config[-1][0], transformer_config[0][0]))
+        self.layers.append(
+            TransformerDecoderLayer(
+                prepare_transformer_decoder_params(*transformer_config[0])
+            )
+        )
+
+        for i in range(1, len(transformer_config)):
+            if transformer_config[i - 1][0] != transformer_config[i][0]:
+                self.layers.append(
+                    Linear(transformer_config[i - 1][0], transformer_config[i][0])
+                )
+            self.layers.append(
+                TransformerDecoderLayer(
+                    prepare_transformer_decoder_params(*transformer_config[i])
+                )
+            )
+        self.fc_out = Linear(transformer_config[-1][0], vocab_size)
+
+    def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for input feeding/teacher forcing
+            encoder_out (Tensor, optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+        Returns:
+            tuple:
+                - the last decoder layer's output of shape `(batch, tgt_len,
+                  vocab)`
+                - the last decoder layer's attention weights of shape `(batch,
+                  tgt_len, src_len)`
+        """
+        target_padding_mask = (
+            (prev_output_tokens == self.padding_idx).to(prev_output_tokens.device)
+            if incremental_state is None
+            else None
+        )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+
+        # embed tokens
+        x = self.embed_tokens(prev_output_tokens)
+
+        # B x T x C -> T x B x C
+        x = self._transpose_if_training(x, incremental_state)
+
+        for layer in self.conv_layers:
+            if isinstance(layer, LinearizedConvolution):
+                x = layer(x, incremental_state)
+            else:
+                x = layer(x)
+
+        # B x T x C -> T x B x C
+        x = self._transpose_if_inference(x, incremental_state)
+
+        # decoder layers
+        for layer in self.layers:
+            if isinstance(layer, TransformerDecoderLayer):
+                x, *_ = layer(
+                    x,
+                    (encoder_out["encoder_out"] if encoder_out is not None else None),
+                    (
+                        encoder_out["encoder_padding_mask"].t()
+                        if encoder_out["encoder_padding_mask"] is not None
+                        else None
+                    ),
+                    incremental_state,
+                    self_attn_mask=(
+                        self.buffered_future_mask(x)
+                        if incremental_state is None
+                        else None
+                    ),
+                    self_attn_padding_mask=(
+                        target_padding_mask if incremental_state is None else None
+                    ),
+                )
+            else:
+                x = layer(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        x = self.fc_out(x)
+
+        return x, None
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        if (
+            not hasattr(self, "_future_mask")
+            or self._future_mask is None
+            or self._future_mask.device != tensor.device
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
+            )
+        if self._future_mask.size(0) < dim:
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
+            )
+        return self._future_mask[:dim, :dim]
+
+    def _transpose_if_training(self, x, incremental_state):
+        if incremental_state is None:
+            x = x.transpose(0, 1)
+        return x
+
+    def _transpose_if_inference(self, x, incremental_state):
+        if incremental_state:
+            x = x.transpose(0, 1)
+        return x
+
+
+@register_model("asr_vggtransformer_encoder")
+class VGGTransformerEncoderModel(FairseqEncoderModel):
+    def __init__(self, encoder):
+        super().__init__(encoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--input-feat-per-channel",
+            type=int,
+            metavar="N",
+            help="encoder input dimension per input channel",
+        )
+        parser.add_argument(
+            "--vggblock-enc-config",
+            type=str,
+            metavar="EXPR",
+            help="""
+    an array of tuples each containing the configuration of one vggblock
+    [(out_channels, conv_kernel_size, pooling_kernel_size,num_conv_layers), ...]
+    """,
+        )
+        parser.add_argument(
+            "--transformer-enc-config",
+            type=str,
+            metavar="EXPR",
+            help="""
+    a tuple containing the configuration of the Transformer layers
+    configurations:
+    [(input_dim,
+      num_heads,
+      ffn_dim,
+      normalize_before,
+      dropout,
+      attention_dropout,
+      relu_dropout), ]""",
+        )
+        parser.add_argument(
+            "--enc-output-dim",
+            type=int,
+            metavar="N",
+            help="encoder output dimension, projecting the LSTM output",
+        )
+        parser.add_argument(
+            "--in-channels",
+            type=int,
+            metavar="N",
+            help="number of encoder input channels",
+        )
+        parser.add_argument(
+            "--transformer-context",
+            type=str,
+            metavar="EXPR",
+            help="""
+    either None or a tuple of two ints, indicating left/right context a
+    transformer can have access to""",
+        )
+        parser.add_argument(
+            "--transformer-sampling",
+            type=str,
+            metavar="EXPR",
+            help="""
+    either None or a tuple of ints, indicating sampling factor in each layer""",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        base_architecture_enconly(args)
+        encoder = VGGTransformerEncoderOnly(
+            vocab_size=len(task.target_dictionary),
+            input_feat_per_channel=args.input_feat_per_channel,
+            vggblock_config=eval(args.vggblock_enc_config),
+            transformer_config=eval(args.transformer_enc_config),
+            encoder_output_dim=args.enc_output_dim,
+            in_channels=args.in_channels,
+            transformer_context=eval(args.transformer_context),
+            transformer_sampling=eval(args.transformer_sampling),
+        )
+        return cls(encoder)
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        # net_output['encoder_out'] is a (T, B, D) tensor
+        lprobs = super().get_normalized_probs(net_output, log_probs, sample)
+        # lprobs is a (T, B, D) tensor
+        # we need to transoose to get (B, T, D) tensor
+        lprobs = lprobs.transpose(0, 1).contiguous()
+        lprobs.batch_first = True
+        return lprobs
+
+
+class VGGTransformerEncoderOnly(VGGTransformerEncoder):
+    def __init__(
+        self,
+        vocab_size,
+        input_feat_per_channel,
+        vggblock_config=DEFAULT_ENC_VGGBLOCK_CONFIG,
+        transformer_config=DEFAULT_ENC_TRANSFORMER_CONFIG,
+        encoder_output_dim=512,
+        in_channels=1,
+        transformer_context=None,
+        transformer_sampling=None,
+    ):
+        super().__init__(
+            input_feat_per_channel=input_feat_per_channel,
+            vggblock_config=vggblock_config,
+            transformer_config=transformer_config,
+            encoder_output_dim=encoder_output_dim,
+            in_channels=in_channels,
+            transformer_context=transformer_context,
+            transformer_sampling=transformer_sampling,
+        )
+        self.fc_out = Linear(self.encoder_output_dim, vocab_size)
+
+    def forward(self, src_tokens, src_lengths, **kwargs):
+        """
+        src_tokens: padded tensor (B, T, C * feat)
+        src_lengths: tensor of original lengths of input utterances (B,)
+        """
+
+        enc_out = super().forward(src_tokens, src_lengths)
+        x = self.fc_out(enc_out["encoder_out"])
+        # x = F.log_softmax(x, dim=-1)
+        # Note: no need this line, because model.get_normalized_prob will call
+        # log_softmax
+        return {
+            "encoder_out": x,  # (T, B, C)
+            "encoder_padding_mask": enc_out["encoder_padding_mask"],  # (T, B)
+        }
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return (1e6, 1e6)  # an arbitrary large number
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    # nn.init.uniform_(m.weight, -0.1, 0.1)
+    # nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def Linear(in_features, out_features, bias=True, dropout=0):
+    """Linear layer (input: N x T x C)"""
+    m = nn.Linear(in_features, out_features, bias=bias)
+    # m.weight.data.uniform_(-0.1, 0.1)
+    # if bias:
+    #     m.bias.data.uniform_(-0.1, 0.1)
+    return m
+
+
+def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0, **kwargs):
+    """Weight-normalized Conv1d layer optimized for decoding"""
+    m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs)
+    std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
+    nn.init.normal_(m.weight, mean=0, std=std)
+    nn.init.constant_(m.bias, 0)
+    return nn.utils.weight_norm(m, dim=2)
+
+
+def LayerNorm(embedding_dim):
+    m = nn.LayerNorm(embedding_dim)
+    return m
+
+
+# seq2seq models
+def base_architecture(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 40)
+    args.vggblock_enc_config = getattr(
+        args, "vggblock_enc_config", DEFAULT_ENC_VGGBLOCK_CONFIG
+    )
+    args.transformer_enc_config = getattr(
+        args, "transformer_enc_config", DEFAULT_ENC_TRANSFORMER_CONFIG
+    )
+    args.enc_output_dim = getattr(args, "enc_output_dim", 512)
+    args.in_channels = getattr(args, "in_channels", 1)
+    args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 128)
+    args.transformer_dec_config = getattr(
+        args, "transformer_dec_config", DEFAULT_ENC_TRANSFORMER_CONFIG
+    )
+    args.conv_dec_config = getattr(args, "conv_dec_config", DEFAULT_DEC_CONV_CONFIG)
+    args.transformer_context = getattr(args, "transformer_context", "None")
+
+
+@register_model_architecture("asr_vggtransformer", "vggtransformer_1")
+def vggtransformer_1(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.vggblock_enc_config = getattr(
+        args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]"
+    )
+    args.transformer_enc_config = getattr(
+        args,
+        "transformer_enc_config",
+        "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 14",
+    )
+    args.enc_output_dim = getattr(args, "enc_output_dim", 1024)
+    args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 128)
+    args.conv_dec_config = getattr(args, "conv_dec_config", "((256, 3, True),) * 4")
+    args.transformer_dec_config = getattr(
+        args,
+        "transformer_dec_config",
+        "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 4",
+    )
+
+
+@register_model_architecture("asr_vggtransformer", "vggtransformer_2")
+def vggtransformer_2(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.vggblock_enc_config = getattr(
+        args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]"
+    )
+    args.transformer_enc_config = getattr(
+        args,
+        "transformer_enc_config",
+        "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 16",
+    )
+    args.enc_output_dim = getattr(args, "enc_output_dim", 1024)
+    args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 512)
+    args.conv_dec_config = getattr(args, "conv_dec_config", "((256, 3, True),) * 4")
+    args.transformer_dec_config = getattr(
+        args,
+        "transformer_dec_config",
+        "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 6",
+    )
+
+
+@register_model_architecture("asr_vggtransformer", "vggtransformer_base")
+def vggtransformer_base(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.vggblock_enc_config = getattr(
+        args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]"
+    )
+    args.transformer_enc_config = getattr(
+        args, "transformer_enc_config", "((512, 8, 2048, True, 0.15, 0.15, 0.15),) * 12"
+    )
+
+    args.enc_output_dim = getattr(args, "enc_output_dim", 512)
+    args.tgt_embed_dim = getattr(args, "tgt_embed_dim", 512)
+    args.conv_dec_config = getattr(args, "conv_dec_config", "((256, 3, True),) * 4")
+    args.transformer_dec_config = getattr(
+        args, "transformer_dec_config", "((512, 8, 2048, True, 0.15, 0.15, 0.15),) * 6"
+    )
+    # Size estimations:
+    # Encoder:
+    #   - vggblock param: 64*1*3*3 + 64*64*3*3 + 128*64*3*3  + 128*128*3 = 258K
+    #   Transformer:
+    #   - input dimension adapter: 2560 x 512 -> 1.31M
+    #   - transformer_layers (x12) --> 37.74M
+    #       * MultiheadAttention: 512*512*3 (in_proj) + 512*512 (out_proj) = 1.048M
+    #       * FFN weight: 512*2048*2 = 2.097M
+    #   - output dimension adapter: 512 x 512 -> 0.26 M
+    # Decoder:
+    #   - LinearizedConv1d: 512 * 256 * 3 + 256 * 256 * 3 * 3
+    #   - transformer_layer: (x6) --> 25.16M
+    #        * MultiheadAttention (self-attention): 512*512*3 + 512*512 = 1.048M
+    #        * MultiheadAttention (encoder-attention): 512*512*3 + 512*512 = 1.048M
+    #        * FFN: 512*2048*2 = 2.097M
+    # Final FC:
+    #   - FC: 512*5000 = 256K (assuming vocab size 5K)
+    # In total:
+    #       ~65 M
+
+
+# CTC models
+def base_architecture_enconly(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 40)
+    args.vggblock_enc_config = getattr(
+        args, "vggblock_enc_config", "[(32, 3, 2, 2, True)] * 2"
+    )
+    args.transformer_enc_config = getattr(
+        args, "transformer_enc_config", "((256, 4, 1024, True, 0.2, 0.2, 0.2),) * 2"
+    )
+    args.enc_output_dim = getattr(args, "enc_output_dim", 512)
+    args.in_channels = getattr(args, "in_channels", 1)
+    args.transformer_context = getattr(args, "transformer_context", "None")
+    args.transformer_sampling = getattr(args, "transformer_sampling", "None")
+
+
+@register_model_architecture("asr_vggtransformer_encoder", "vggtransformer_enc_1")
+def vggtransformer_enc_1(args):
+    # vggtransformer_1 is the same as vggtransformer_enc_big, except the number
+    # of layers is increased to 16
+    # keep it here for backward compatiablity purpose
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.vggblock_enc_config = getattr(
+        args, "vggblock_enc_config", "[(64, 3, 2, 2, True), (128, 3, 2, 2, True)]"
+    )
+    args.transformer_enc_config = getattr(
+        args,
+        "transformer_enc_config",
+        "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 16",
+    )
+    args.enc_output_dim = getattr(args, "enc_output_dim", 1024)
diff --git a/fairseq/examples/speech_recognition/models/w2l_conv_glu_enc.py b/fairseq/examples/speech_recognition/models/w2l_conv_glu_enc.py
new file mode 100644
index 0000000000000000000000000000000000000000..655a9b0d19d11e35511392a016f9d6b7d7aa2925
--- /dev/null
+++ b/fairseq/examples/speech_recognition/models/w2l_conv_glu_enc.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules.fairseq_dropout import FairseqDropout
+
+
+default_conv_enc_config = """[
+    (400, 13, 170, 0.2),
+    (440, 14, 0, 0.214),
+    (484, 15, 0, 0.22898),
+    (532, 16, 0, 0.2450086),
+    (584, 17, 0, 0.262159202),
+    (642, 18, 0, 0.28051034614),
+    (706, 19, 0, 0.30014607037),
+    (776, 20, 0, 0.321156295296),
+    (852, 21, 0, 0.343637235966),
+    (936, 22, 0, 0.367691842484),
+    (1028, 23, 0, 0.393430271458),
+    (1130, 24, 0, 0.42097039046),
+    (1242, 25, 0, 0.450438317792),
+    (1366, 26, 0, 0.481969000038),
+    (1502, 27, 0, 0.51570683004),
+    (1652, 28, 0, 0.551806308143),
+    (1816, 29, 0, 0.590432749713),
+]"""
+
+
+@register_model("asr_w2l_conv_glu_encoder")
+class W2lConvGluEncoderModel(FairseqEncoderModel):
+    def __init__(self, encoder):
+        super().__init__(encoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--input-feat-per-channel",
+            type=int,
+            metavar="N",
+            help="encoder input dimension per input channel",
+        )
+        parser.add_argument(
+            "--in-channels",
+            type=int,
+            metavar="N",
+            help="number of encoder input channels",
+        )
+        parser.add_argument(
+            "--conv-enc-config",
+            type=str,
+            metavar="EXPR",
+            help="""
+    an array of tuples each containing the configuration of one conv layer
+    [(out_channels, kernel_size, padding, dropout), ...]
+            """,
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        conv_enc_config = getattr(args, "conv_enc_config", default_conv_enc_config)
+        encoder = W2lConvGluEncoder(
+            vocab_size=len(task.target_dictionary),
+            input_feat_per_channel=args.input_feat_per_channel,
+            in_channels=args.in_channels,
+            conv_enc_config=eval(conv_enc_config),
+        )
+        return cls(encoder)
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        lprobs = super().get_normalized_probs(net_output, log_probs, sample)
+        lprobs.batch_first = False
+        return lprobs
+
+
+class W2lConvGluEncoder(FairseqEncoder):
+    def __init__(
+        self, vocab_size, input_feat_per_channel, in_channels, conv_enc_config
+    ):
+        super().__init__(None)
+
+        self.input_dim = input_feat_per_channel
+        if in_channels != 1:
+            raise ValueError("only 1 input channel is currently supported")
+
+        self.conv_layers = nn.ModuleList()
+        self.linear_layers = nn.ModuleList()
+        self.dropouts = []
+        cur_channels = input_feat_per_channel
+
+        for out_channels, kernel_size, padding, dropout in conv_enc_config:
+            layer = nn.Conv1d(cur_channels, out_channels, kernel_size, padding=padding)
+            layer.weight.data.mul_(math.sqrt(3))  # match wav2letter init
+            self.conv_layers.append(nn.utils.weight_norm(layer))
+            self.dropouts.append(
+                FairseqDropout(dropout, module_name=self.__class__.__name__)
+            )
+            if out_channels % 2 != 0:
+                raise ValueError("odd # of out_channels is incompatible with GLU")
+            cur_channels = out_channels // 2  # halved by GLU
+
+        for out_channels in [2 * cur_channels, vocab_size]:
+            layer = nn.Linear(cur_channels, out_channels)
+            layer.weight.data.mul_(math.sqrt(3))
+            self.linear_layers.append(nn.utils.weight_norm(layer))
+            cur_channels = out_channels // 2
+
+    def forward(self, src_tokens, src_lengths, **kwargs):
+
+        """
+        src_tokens: padded tensor (B, T, C * feat)
+        src_lengths: tensor of original lengths of input utterances (B,)
+        """
+        B, T, _ = src_tokens.size()
+        x = src_tokens.transpose(1, 2).contiguous()  # (B, feat, T) assuming C == 1
+
+        for layer_idx in range(len(self.conv_layers)):
+            x = self.conv_layers[layer_idx](x)
+            x = F.glu(x, dim=1)
+            x = self.dropouts[layer_idx](x)
+
+        x = x.transpose(1, 2).contiguous()  # (B, T, 908)
+        x = self.linear_layers[0](x)
+        x = F.glu(x, dim=2)
+        x = self.dropouts[-1](x)
+        x = self.linear_layers[1](x)
+
+        assert x.size(0) == B
+        assert x.size(1) == T
+
+        encoder_out = x.transpose(0, 1)  # (T, B, vocab_size)
+
+        # need to debug this -- find a simpler/elegant way in pytorch APIs
+        encoder_padding_mask = (
+            torch.arange(T).view(1, T).expand(B, -1).to(x.device)
+            >= src_lengths.view(B, 1).expand(-1, T)
+        ).t()  # (B x T) -> (T x B)
+
+        return {
+            "encoder_out": encoder_out,  # (T, B, vocab_size)
+            "encoder_padding_mask": encoder_padding_mask,  # (T, B)
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select(
+            1, new_order
+        )
+        encoder_out["encoder_padding_mask"] = encoder_out[
+            "encoder_padding_mask"
+        ].index_select(1, new_order)
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return (1e6, 1e6)  # an arbitrary large number
+
+
+@register_model_architecture("asr_w2l_conv_glu_encoder", "w2l_conv_glu_enc")
+def w2l_conv_glu_enc(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.in_channels = getattr(args, "in_channels", 1)
+    args.conv_enc_config = getattr(args, "conv_enc_config", default_conv_enc_config)
diff --git a/fairseq/examples/speech_recognition/new/README.md b/fairseq/examples/speech_recognition/new/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5fa0e97245d3ba6db69d11222261b0644960183d
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/README.md
@@ -0,0 +1,43 @@
+# Flashlight Decoder
+
+This script runs decoding for pre-trained speech recognition models.
+
+## Usage
+
+Assuming a few variables:
+
+```bash
+checkpoint=<path-to-checkpoint>
+data=<path-to-data-directory>
+lm_model=<path-to-language-model>
+lexicon=<path-to-lexicon>
+```
+
+Example usage for decoding a fine-tuned Wav2Vec model:
+
+```bash
+python $FAIRSEQ_ROOT/examples/speech_recognition/new/infer.py --multirun \
+    task=audio_pretraining \
+    task.data=$data \
+    task.labels=ltr \
+    common_eval.path=$checkpoint \
+    decoding.type=kenlm \
+    decoding.lexicon=$lexicon \
+    decoding.lmpath=$lm_model \
+    dataset.gen_subset=dev_clean,dev_other,test_clean,test_other
+```
+
+Example usage for using Ax to sweep WER parameters (requires `pip install hydra-ax-sweeper`):
+
+```bash
+python $FAIRSEQ_ROOT/examples/speech_recognition/new/infer.py --multirun \
+    hydra/sweeper=ax \
+    task=audio_pretraining \
+    task.data=$data \
+    task.labels=ltr \
+    common_eval.path=$checkpoint \
+    decoding.type=kenlm \
+    decoding.lexicon=$lexicon \
+    decoding.lmpath=$lm_model \
+    dataset.gen_subset=dev_other
+```
diff --git a/fairseq/examples/speech_recognition/new/__init__.py b/fairseq/examples/speech_recognition/new/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax.yaml b/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fbeff17ca6b5fb0a1b44de0abe0d1a3d3d2aeeb2
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/conf/hydra/sweeper/ax.yaml
@@ -0,0 +1,26 @@
+# @package hydra.sweeper
+_target_: hydra_plugins.hydra_ax_sweeper.ax_sweeper.AxSweeper
+max_batch_size: null
+ax_config:
+  max_trials: 128
+  early_stop:
+    minimize: true
+    max_epochs_without_improvement: 32
+    epsilon: 1.0e-05
+  experiment:
+    name: ${dataset.gen_subset}
+    objective_name: wer
+    minimize: true
+    parameter_constraints: null
+    outcome_constraints: null
+    status_quo: null
+  client:
+    verbose_logging: false
+    random_seed: null
+  params:
+    decoding.lmweight:
+      type: range
+      bounds: [0.0, 5.0]
+    decoding.wordscore:
+      type: range
+      bounds: [-5.0, 5.0]
diff --git a/fairseq/examples/speech_recognition/new/conf/infer.yaml b/fairseq/examples/speech_recognition/new/conf/infer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f176228082478fae0586a6da60a437e7b377b9ae
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/conf/infer.yaml
@@ -0,0 +1,25 @@
+# @package _group_
+
+defaults:
+    - task: null
+    - model: null
+
+hydra:
+  run:
+    dir: ${common_eval.results_path}/${dataset.gen_subset}
+  sweep:
+    dir: ${common_eval.results_path}
+    subdir: ${dataset.gen_subset}
+common_eval:
+  results_path: null
+  path: null
+  post_process: letter
+  quiet: true
+dataset:
+  max_tokens: 1000000
+  gen_subset: test
+distributed_training:
+  distributed_world_size: 1
+decoding:
+  beam: 5
+  type: viterbi
diff --git a/fairseq/examples/speech_recognition/new/decoders/__init__.py b/fairseq/examples/speech_recognition/new/decoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/speech_recognition/new/decoders/base_decoder.py b/fairseq/examples/speech_recognition/new/decoders/base_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..a097969b3c0650cf8ea2ab5f8e96bbc68ea9b97f
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/decoders/base_decoder.py
@@ -0,0 +1,62 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools as it
+from typing import Any, Dict, List
+
+import torch
+from fairseq.data.dictionary import Dictionary
+from fairseq.models.fairseq_model import FairseqModel
+
+
+class BaseDecoder:
+    def __init__(self, tgt_dict: Dictionary) -> None:
+        self.tgt_dict = tgt_dict
+        self.vocab_size = len(tgt_dict)
+
+        self.blank = (
+            tgt_dict.index("<ctc_blank>")
+            if "<ctc_blank>" in tgt_dict.indices
+            else tgt_dict.bos()
+        )
+        if "<sep>" in tgt_dict.indices:
+            self.silence = tgt_dict.index("<sep>")
+        elif "|" in tgt_dict.indices:
+            self.silence = tgt_dict.index("|")
+        else:
+            self.silence = tgt_dict.eos()
+
+    def generate(
+        self, models: List[FairseqModel], sample: Dict[str, Any], **unused
+    ) -> List[List[Dict[str, torch.LongTensor]]]:
+        encoder_input = {
+            k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens"
+        }
+        emissions = self.get_emissions(models, encoder_input)
+        return self.decode(emissions)
+
+    def get_emissions(
+        self,
+        models: List[FairseqModel],
+        encoder_input: Dict[str, Any],
+    ) -> torch.FloatTensor:
+        model = models[0]
+        encoder_out = model(**encoder_input)
+        if hasattr(model, "get_logits"):
+            emissions = model.get_logits(encoder_out)
+        else:
+            emissions = model.get_normalized_probs(encoder_out, log_probs=True)
+        return emissions.transpose(0, 1).float().cpu().contiguous()
+
+    def get_tokens(self, idxs: torch.IntTensor) -> torch.LongTensor:
+        idxs = (g[0] for g in it.groupby(idxs))
+        idxs = filter(lambda x: x != self.blank, idxs)
+        return torch.LongTensor(list(idxs))
+
+    def decode(
+        self,
+        emissions: torch.FloatTensor,
+    ) -> List[List[Dict[str, torch.LongTensor]]]:
+        raise NotImplementedError
diff --git a/fairseq/examples/speech_recognition/new/decoders/decoder.py b/fairseq/examples/speech_recognition/new/decoders/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5bec8cf707b53104ef7a45993a5db2893d3443b
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/decoders/decoder.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Union
+
+from fairseq.data.dictionary import Dictionary
+
+from .decoder_config import DecoderConfig, FlashlightDecoderConfig
+from .base_decoder import BaseDecoder
+
+
+def Decoder(
+    cfg: Union[DecoderConfig, FlashlightDecoderConfig], tgt_dict: Dictionary
+) -> BaseDecoder:
+
+    if cfg.type == "viterbi":
+        from .viterbi_decoder import ViterbiDecoder
+
+        return ViterbiDecoder(tgt_dict)
+    if cfg.type == "kenlm":
+        from .flashlight_decoder import KenLMDecoder
+
+        return KenLMDecoder(cfg, tgt_dict)
+    if cfg.type == "fairseqlm":
+        from .flashlight_decoder import FairseqLMDecoder
+
+        return FairseqLMDecoder(cfg, tgt_dict)
+    raise NotImplementedError(f"Invalid decoder name: {cfg.name}")
diff --git a/fairseq/examples/speech_recognition/new/decoders/decoder_config.py b/fairseq/examples/speech_recognition/new/decoders/decoder_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..659eb94a9b8187a7c126d7b439ac2742f9d72022
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/decoders/decoder_config.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+from typing import Optional
+
+from fairseq.dataclass.configs import FairseqDataclass
+from fairseq.dataclass.constants import ChoiceEnum
+from omegaconf import MISSING
+
+
+DECODER_CHOICES = ChoiceEnum(["viterbi", "kenlm", "fairseqlm"])
+
+
+@dataclass
+class DecoderConfig(FairseqDataclass):
+    type: DECODER_CHOICES = field(
+        default="viterbi",
+        metadata={"help": "The type of decoder to use"},
+    )
+
+
+@dataclass
+class FlashlightDecoderConfig(FairseqDataclass):
+    nbest: int = field(
+        default=1,
+        metadata={"help": "Number of decodings to return"},
+    )
+    unitlm: bool = field(
+        default=False,
+        metadata={"help": "If set, use unit language model"},
+    )
+    lmpath: str = field(
+        default=MISSING,
+        metadata={"help": "Language model for KenLM decoder"},
+    )
+    lexicon: Optional[str] = field(
+        default=None,
+        metadata={"help": "Lexicon for Flashlight decoder"},
+    )
+    beam: int = field(
+        default=50,
+        metadata={"help": "Number of beams to use for decoding"},
+    )
+    beamthreshold: float = field(
+        default=50.0,
+        metadata={"help": "Threshold for beam search decoding"},
+    )
+    beamsizetoken: Optional[int] = field(
+        default=None, metadata={"help": "Beam size to use"}
+    )
+    wordscore: float = field(
+        default=-1,
+        metadata={"help": "Word score for KenLM decoder"},
+    )
+    unkweight: float = field(
+        default=-math.inf,
+        metadata={"help": "Unknown weight for KenLM decoder"},
+    )
+    silweight: float = field(
+        default=0,
+        metadata={"help": "Silence weight for KenLM decoder"},
+    )
+    lmweight: float = field(
+        default=2,
+        metadata={"help": "Weight for LM while interpolating score"},
+    )
diff --git a/fairseq/examples/speech_recognition/new/decoders/flashlight_decoder.py b/fairseq/examples/speech_recognition/new/decoders/flashlight_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..38c7ac492f390a367a64769d7a72fe228df097c7
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/decoders/flashlight_decoder.py
@@ -0,0 +1,431 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import gc
+import os.path as osp
+import warnings
+from collections import deque, namedtuple
+from typing import Any, Dict, Tuple
+
+import numpy as np
+import torch
+from fairseq import tasks
+from fairseq.data.dictionary import Dictionary
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.models.fairseq_model import FairseqModel
+from fairseq.utils import apply_to_sample
+from omegaconf import open_dict, OmegaConf
+
+from typing import List
+
+from .decoder_config import FlashlightDecoderConfig
+from .base_decoder import BaseDecoder
+
+try:
+    from flashlight.lib.text.decoder import (
+        LM,
+        CriterionType,
+        DecodeResult,
+        KenLM,
+        LexiconDecoder,
+        LexiconDecoderOptions,
+        LexiconFreeDecoder,
+        LexiconFreeDecoderOptions,
+        LMState,
+        SmearingMode,
+        Trie,
+    )
+    from flashlight.lib.text.dictionary import create_word_dict, load_words
+except ImportError:
+    warnings.warn(
+        "flashlight python bindings are required to use this functionality. "
+        "Please install from "
+        "https://github.com/facebookresearch/flashlight/tree/master/bindings/python"
+    )
+    LM = object
+    LMState = object
+
+
+class KenLMDecoder(BaseDecoder):
+    def __init__(self, cfg: FlashlightDecoderConfig, tgt_dict: Dictionary) -> None:
+        super().__init__(tgt_dict)
+
+        self.nbest = cfg.nbest
+        self.unitlm = cfg.unitlm
+
+        if cfg.lexicon:
+            self.lexicon = load_words(cfg.lexicon)
+            self.word_dict = create_word_dict(self.lexicon)
+            self.unk_word = self.word_dict.get_index("<unk>")
+
+            self.lm = KenLM(cfg.lmpath, self.word_dict)
+            self.trie = Trie(self.vocab_size, self.silence)
+
+            start_state = self.lm.start(False)
+            for word, spellings in self.lexicon.items():
+                word_idx = self.word_dict.get_index(word)
+                _, score = self.lm.score(start_state, word_idx)
+                for spelling in spellings:
+                    spelling_idxs = [tgt_dict.index(token) for token in spelling]
+                    assert (
+                        tgt_dict.unk() not in spelling_idxs
+                    ), f"{word} {spelling} {spelling_idxs}"
+                    self.trie.insert(spelling_idxs, word_idx, score)
+            self.trie.smear(SmearingMode.MAX)
+
+            self.decoder_opts = LexiconDecoderOptions(
+                beam_size=cfg.beam,
+                beam_size_token=cfg.beamsizetoken or len(tgt_dict),
+                beam_threshold=cfg.beamthreshold,
+                lm_weight=cfg.lmweight,
+                word_score=cfg.wordscore,
+                unk_score=cfg.unkweight,
+                sil_score=cfg.silweight,
+                log_add=False,
+                criterion_type=CriterionType.CTC,
+            )
+
+            self.decoder = LexiconDecoder(
+                self.decoder_opts,
+                self.trie,
+                self.lm,
+                self.silence,
+                self.blank,
+                self.unk_word,
+                [],
+                self.unitlm,
+            )
+        else:
+            assert self.unitlm, "Lexicon-free decoding requires unit LM"
+
+            d = {w: [[w]] for w in tgt_dict.symbols}
+            self.word_dict = create_word_dict(d)
+            self.lm = KenLM(cfg.lmpath, self.word_dict)
+            self.decoder_opts = LexiconFreeDecoderOptions(
+                beam_size=cfg.beam,
+                beam_size_token=cfg.beamsizetoken or len(tgt_dict),
+                beam_threshold=cfg.beamthreshold,
+                lm_weight=cfg.lmweight,
+                sil_score=cfg.silweight,
+                log_add=False,
+                criterion_type=CriterionType.CTC,
+            )
+            self.decoder = LexiconFreeDecoder(
+                self.decoder_opts, self.lm, self.silence, self.blank, []
+            )
+
+    def get_timesteps(self, token_idxs: List[int]) -> List[int]:
+        """Returns frame numbers corresponding to every non-blank token.
+
+        Parameters
+        ----------
+        token_idxs : List[int]
+            IDs of decoded tokens.
+
+        Returns
+        -------
+        List[int]
+            Frame numbers corresponding to every non-blank token.
+        """
+        timesteps = []
+        for i, token_idx in enumerate(token_idxs):
+            if token_idx == self.blank:
+                continue
+            if i == 0 or token_idx != token_idxs[i-1]:
+                timesteps.append(i)
+        return timesteps
+
+    def decode(
+        self,
+        emissions: torch.FloatTensor,
+    ) -> List[List[Dict[str, torch.LongTensor]]]:
+        B, T, N = emissions.size()
+        hypos = []
+        for b in range(B):
+            emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0)
+            results = self.decoder.decode(emissions_ptr, T, N)
+
+            nbest_results = results[: self.nbest]
+            hypos.append(
+                [
+                    {
+                        "tokens": self.get_tokens(result.tokens),
+                        "score": result.score,
+                        "timesteps": self.get_timesteps(result.tokens),
+                        "words": [
+                            self.word_dict.get_entry(x) for x in result.words if x >= 0
+                        ],
+                    }
+                    for result in nbest_results
+                ]
+            )
+        return hypos
+
+
+FairseqLMState = namedtuple(
+    "FairseqLMState",
+    [
+        "prefix",
+        "incremental_state",
+        "probs",
+    ],
+)
+
+
+class FairseqLM(LM):
+    def __init__(self, dictionary: Dictionary, model: FairseqModel) -> None:
+        super().__init__()
+
+        self.dictionary = dictionary
+        self.model = model
+        self.unk = self.dictionary.unk()
+
+        self.save_incremental = False  # this currently does not work properly
+        self.max_cache = 20_000
+
+        if torch.cuda.is_available():
+            model.cuda()
+        model.eval()
+        model.make_generation_fast_()
+
+        self.states = {}
+        self.stateq = deque()
+
+    def start(self, start_with_nothing: bool) -> LMState:
+        state = LMState()
+        prefix = torch.LongTensor([[self.dictionary.eos()]])
+        incremental_state = {} if self.save_incremental else None
+        with torch.no_grad():
+            res = self.model(prefix.cuda(), incremental_state=incremental_state)
+            probs = self.model.get_normalized_probs(res, log_probs=True, sample=None)
+
+        if incremental_state is not None:
+            incremental_state = apply_to_sample(lambda x: x.cpu(), incremental_state)
+        self.states[state] = FairseqLMState(
+            prefix.numpy(), incremental_state, probs[0, -1].cpu().numpy()
+        )
+        self.stateq.append(state)
+
+        return state
+
+    def score(
+        self,
+        state: LMState,
+        token_index: int,
+        no_cache: bool = False,
+    ) -> Tuple[LMState, int]:
+        """
+        Evaluate language model based on the current lm state and new word
+        Parameters:
+        -----------
+        state: current lm state
+        token_index: index of the word
+                     (can be lexicon index then you should store inside LM the
+                      mapping between indices of lexicon and lm, or lm index of a word)
+        Returns:
+        --------
+        (LMState, float): pair of (new state, score for the current word)
+        """
+        curr_state = self.states[state]
+
+        def trim_cache(targ_size: int) -> None:
+            while len(self.stateq) > targ_size:
+                rem_k = self.stateq.popleft()
+                rem_st = self.states[rem_k]
+                rem_st = FairseqLMState(rem_st.prefix, None, None)
+                self.states[rem_k] = rem_st
+
+        if curr_state.probs is None:
+            new_incremental_state = (
+                curr_state.incremental_state.copy()
+                if curr_state.incremental_state is not None
+                else None
+            )
+            with torch.no_grad():
+                if new_incremental_state is not None:
+                    new_incremental_state = apply_to_sample(
+                        lambda x: x.cuda(), new_incremental_state
+                    )
+                elif self.save_incremental:
+                    new_incremental_state = {}
+
+                res = self.model(
+                    torch.from_numpy(curr_state.prefix).cuda(),
+                    incremental_state=new_incremental_state,
+                )
+                probs = self.model.get_normalized_probs(
+                    res, log_probs=True, sample=None
+                )
+
+                if new_incremental_state is not None:
+                    new_incremental_state = apply_to_sample(
+                        lambda x: x.cpu(), new_incremental_state
+                    )
+
+                curr_state = FairseqLMState(
+                    curr_state.prefix, new_incremental_state, probs[0, -1].cpu().numpy()
+                )
+
+            if not no_cache:
+                self.states[state] = curr_state
+                self.stateq.append(state)
+
+        score = curr_state.probs[token_index].item()
+
+        trim_cache(self.max_cache)
+
+        outstate = state.child(token_index)
+        if outstate not in self.states and not no_cache:
+            prefix = np.concatenate(
+                [curr_state.prefix, torch.LongTensor([[token_index]])], -1
+            )
+            incr_state = curr_state.incremental_state
+
+            self.states[outstate] = FairseqLMState(prefix, incr_state, None)
+
+        if token_index == self.unk:
+            score = float("-inf")
+
+        return outstate, score
+
+    def finish(self, state: LMState) -> Tuple[LMState, int]:
+        """
+        Evaluate eos for language model based on the current lm state
+        Returns:
+        --------
+        (LMState, float): pair of (new state, score for the current word)
+        """
+        return self.score(state, self.dictionary.eos())
+
+    def empty_cache(self) -> None:
+        self.states = {}
+        self.stateq = deque()
+        gc.collect()
+
+
+class FairseqLMDecoder(BaseDecoder):
+    def __init__(self, cfg: FlashlightDecoderConfig, tgt_dict: Dictionary) -> None:
+        super().__init__(tgt_dict)
+
+        self.nbest = cfg.nbest
+        self.unitlm = cfg.unitlm
+
+        self.lexicon = load_words(cfg.lexicon) if cfg.lexicon else None
+        self.idx_to_wrd = {}
+
+        checkpoint = torch.load(cfg.lmpath, map_location="cpu")
+
+        if "cfg" in checkpoint and checkpoint["cfg"] is not None:
+            lm_args = checkpoint["cfg"]
+        else:
+            lm_args = convert_namespace_to_omegaconf(checkpoint["args"])
+
+        if not OmegaConf.is_dict(lm_args):
+            lm_args = OmegaConf.create(lm_args)
+
+        with open_dict(lm_args.task):
+            lm_args.task.data = osp.dirname(cfg.lmpath)
+
+        task = tasks.setup_task(lm_args.task)
+        model = task.build_model(lm_args.model)
+        model.load_state_dict(checkpoint["model"], strict=False)
+
+        self.trie = Trie(self.vocab_size, self.silence)
+
+        self.word_dict = task.dictionary
+        self.unk_word = self.word_dict.unk()
+        self.lm = FairseqLM(self.word_dict, model)
+
+        if self.lexicon:
+            start_state = self.lm.start(False)
+            for i, (word, spellings) in enumerate(self.lexicon.items()):
+                if self.unitlm:
+                    word_idx = i
+                    self.idx_to_wrd[i] = word
+                    score = 0
+                else:
+                    word_idx = self.word_dict.index(word)
+                    _, score = self.lm.score(start_state, word_idx, no_cache=True)
+
+                for spelling in spellings:
+                    spelling_idxs = [tgt_dict.index(token) for token in spelling]
+                    assert (
+                        tgt_dict.unk() not in spelling_idxs
+                    ), f"{spelling} {spelling_idxs}"
+                    self.trie.insert(spelling_idxs, word_idx, score)
+            self.trie.smear(SmearingMode.MAX)
+
+            self.decoder_opts = LexiconDecoderOptions(
+                beam_size=cfg.beam,
+                beam_size_token=cfg.beamsizetoken or len(tgt_dict),
+                beam_threshold=cfg.beamthreshold,
+                lm_weight=cfg.lmweight,
+                word_score=cfg.wordscore,
+                unk_score=cfg.unkweight,
+                sil_score=cfg.silweight,
+                log_add=False,
+                criterion_type=CriterionType.CTC,
+            )
+
+            self.decoder = LexiconDecoder(
+                self.decoder_opts,
+                self.trie,
+                self.lm,
+                self.silence,
+                self.blank,
+                self.unk_word,
+                [],
+                self.unitlm,
+            )
+        else:
+            assert self.unitlm, "Lexicon-free decoding requires unit LM"
+
+            d = {w: [[w]] for w in tgt_dict.symbols}
+            self.word_dict = create_word_dict(d)
+            self.lm = KenLM(cfg.lmpath, self.word_dict)
+            self.decoder_opts = LexiconFreeDecoderOptions(
+                beam_size=cfg.beam,
+                beam_size_token=cfg.beamsizetoken or len(tgt_dict),
+                beam_threshold=cfg.beamthreshold,
+                lm_weight=cfg.lmweight,
+                sil_score=cfg.silweight,
+                log_add=False,
+                criterion_type=CriterionType.CTC,
+            )
+            self.decoder = LexiconFreeDecoder(
+                self.decoder_opts, self.lm, self.silence, self.blank, []
+            )
+
+    def decode(
+        self,
+        emissions: torch.FloatTensor,
+    ) -> List[List[Dict[str, torch.LongTensor]]]:
+        B, T, N = emissions.size()
+        hypos = []
+
+        def make_hypo(result: DecodeResult) -> Dict[str, Any]:
+            hypo = {
+                "tokens": self.get_tokens(result.tokens),
+                "score": result.score,
+            }
+            if self.lexicon:
+                hypo["words"] = [
+                    self.idx_to_wrd[x] if self.unitlm else self.word_dict[x]
+                    for x in result.words
+                    if x >= 0
+                ]
+            return hypo
+
+        for b in range(B):
+            emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0)
+            results = self.decoder.decode(emissions_ptr, T, N)
+
+            nbest_results = results[: self.nbest]
+            hypos.append([make_hypo(result) for result in nbest_results])
+            self.lm.empty_cache()
+
+        return hypos
diff --git a/fairseq/examples/speech_recognition/new/decoders/viterbi_decoder.py b/fairseq/examples/speech_recognition/new/decoders/viterbi_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1c47868fa3b4e21f939b0695ede8d14ba1b168d
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/decoders/viterbi_decoder.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from typing import List, Dict
+
+from .base_decoder import BaseDecoder
+
+
+class ViterbiDecoder(BaseDecoder):
+    def decode(
+        self,
+        emissions: torch.FloatTensor,
+    ) -> List[List[Dict[str, torch.LongTensor]]]:
+        def get_pred(e):
+            toks = e.argmax(dim=-1).unique_consecutive()
+            return toks[toks != self.blank]
+
+        return [[{"tokens": get_pred(x), "score": 0}] for x in emissions]
diff --git a/fairseq/examples/speech_recognition/new/infer.py b/fairseq/examples/speech_recognition/new/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fb67151e0dc425e02d090a62b1d83e6039e6ccb
--- /dev/null
+++ b/fairseq/examples/speech_recognition/new/infer.py
@@ -0,0 +1,471 @@
+#!/usr/bin/env python -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ast
+import hashlib
+import logging
+import os
+import shutil
+import sys
+from dataclasses import dataclass, field, is_dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import editdistance
+import torch
+import torch.distributed as dist
+from examples.speech_recognition.new.decoders.decoder_config import (
+    DecoderConfig,
+    FlashlightDecoderConfig,
+)
+from examples.speech_recognition.new.decoders.decoder import Decoder
+from fairseq import checkpoint_utils, distributed_utils, progress_bar, tasks, utils
+from fairseq.data.data_utils import post_process
+from fairseq.dataclass.configs import (
+    CheckpointConfig,
+    CommonConfig,
+    CommonEvalConfig,
+    DatasetConfig,
+    DistributedTrainingConfig,
+    FairseqDataclass,
+)
+from fairseq.logging.meters import StopwatchMeter, TimeMeter
+from fairseq.logging.progress_bar import BaseProgressBar
+from fairseq.models.fairseq_model import FairseqModel
+from omegaconf import OmegaConf
+
+import hydra
+from hydra.core.config_store import ConfigStore
+
+logging.root.setLevel(logging.INFO)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+config_path = Path(__file__).resolve().parent / "conf"
+
+
+@dataclass
+class DecodingConfig(DecoderConfig, FlashlightDecoderConfig):
+    unique_wer_file: bool = field(
+        default=False,
+        metadata={"help": "If set, use a unique file for storing WER"},
+    )
+    results_path: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "If set, write hypothesis and reference sentences into this directory"
+        },
+    )
+
+
+@dataclass
+class InferConfig(FairseqDataclass):
+    task: Any = None
+    decoding: DecodingConfig = DecodingConfig()
+    common: CommonConfig = CommonConfig()
+    common_eval: CommonEvalConfig = CommonEvalConfig()
+    checkpoint: CheckpointConfig = CheckpointConfig()
+    distributed_training: DistributedTrainingConfig = DistributedTrainingConfig()
+    dataset: DatasetConfig = DatasetConfig()
+    is_ax: bool = field(
+        default=False,
+        metadata={
+            "help": "if true, assumes we are using ax for tuning and returns a tuple for ax to consume"
+        },
+    )
+
+
+def reset_logging():
+    root = logging.getLogger()
+    for handler in root.handlers:
+        root.removeHandler(handler)
+    root.setLevel(os.environ.get("LOGLEVEL", "INFO").upper())
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(
+        logging.Formatter(
+            fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+        )
+    )
+    root.addHandler(handler)
+
+
+class InferenceProcessor:
+    cfg: InferConfig
+
+    def __init__(self, cfg: InferConfig) -> None:
+        self.cfg = cfg
+        self.task = tasks.setup_task(cfg.task)
+
+        models, saved_cfg = self.load_model_ensemble()
+        self.models = models
+        self.saved_cfg = saved_cfg
+        self.tgt_dict = self.task.target_dictionary
+
+        self.task.load_dataset(
+            self.cfg.dataset.gen_subset,
+            task_cfg=saved_cfg.task,
+        )
+        self.generator = Decoder(cfg.decoding, self.tgt_dict)
+        self.gen_timer = StopwatchMeter()
+        self.wps_meter = TimeMeter()
+        self.num_sentences = 0
+        self.total_errors = 0
+        self.total_length = 0
+
+        self.hypo_words_file = None
+        self.hypo_units_file = None
+        self.ref_words_file = None
+        self.ref_units_file = None
+
+        self.progress_bar = self.build_progress_bar()
+
+    def __enter__(self) -> "InferenceProcessor":
+        if self.cfg.decoding.results_path is not None:
+            self.hypo_words_file = self.get_res_file("hypo.word")
+            self.hypo_units_file = self.get_res_file("hypo.units")
+            self.ref_words_file = self.get_res_file("ref.word")
+            self.ref_units_file = self.get_res_file("ref.units")
+        return self
+
+    def __exit__(self, *exc) -> bool:
+        if self.cfg.decoding.results_path is not None:
+            self.hypo_words_file.close()
+            self.hypo_units_file.close()
+            self.ref_words_file.close()
+            self.ref_units_file.close()
+        return False
+
+    def __iter__(self) -> Any:
+        for sample in self.progress_bar:
+            if not self.cfg.common.cpu:
+                sample = utils.move_to_cuda(sample)
+
+            # Happens on the last batch.
+            if "net_input" not in sample:
+                continue
+            yield sample
+
+    def log(self, *args, **kwargs):
+        self.progress_bar.log(*args, **kwargs)
+
+    def print(self, *args, **kwargs):
+        self.progress_bar.print(*args, **kwargs)
+
+    def get_res_file(self, fname: str) -> None:
+        fname = os.path.join(self.cfg.decoding.results_path, fname)
+        if self.data_parallel_world_size > 1:
+            fname = f"{fname}.{self.data_parallel_rank}"
+        return open(fname, "w", buffering=1)
+
+    def merge_shards(self) -> None:
+        """Merges all shard files into shard 0, then removes shard suffix."""
+
+        shard_id = self.data_parallel_rank
+        num_shards = self.data_parallel_world_size
+
+        if self.data_parallel_world_size > 1:
+
+            def merge_shards_with_root(fname: str) -> None:
+                fname = os.path.join(self.cfg.decoding.results_path, fname)
+                logger.info("Merging %s on shard %d", fname, shard_id)
+                base_fpath = Path(f"{fname}.0")
+                with open(base_fpath, "a") as out_file:
+                    for s in range(1, num_shards):
+                        shard_fpath = Path(f"{fname}.{s}")
+                        with open(shard_fpath, "r") as in_file:
+                            for line in in_file:
+                                out_file.write(line)
+                        shard_fpath.unlink()
+                shutil.move(f"{fname}.0", fname)
+
+            dist.barrier()  # ensure all shards finished writing
+            if shard_id == (0 % num_shards):
+                merge_shards_with_root("hypo.word")
+            if shard_id == (1 % num_shards):
+                merge_shards_with_root("hypo.units")
+            if shard_id == (2 % num_shards):
+                merge_shards_with_root("ref.word")
+            if shard_id == (3 % num_shards):
+                merge_shards_with_root("ref.units")
+            dist.barrier()
+
+    def optimize_model(self, model: FairseqModel) -> None:
+        model.make_generation_fast_()
+        if self.cfg.common.fp16:
+            model.half()
+        if not self.cfg.common.cpu:
+            model.cuda()
+
+    def load_model_ensemble(self) -> Tuple[List[FairseqModel], FairseqDataclass]:
+        arg_overrides = ast.literal_eval(self.cfg.common_eval.model_overrides)
+        models, saved_cfg = checkpoint_utils.load_model_ensemble(
+            utils.split_paths(self.cfg.common_eval.path, separator="\\"),
+            arg_overrides=arg_overrides,
+            task=self.task,
+            suffix=self.cfg.checkpoint.checkpoint_suffix,
+            strict=(self.cfg.checkpoint.checkpoint_shard_count == 1),
+            num_shards=self.cfg.checkpoint.checkpoint_shard_count,
+        )
+        for model in models:
+            self.optimize_model(model)
+        return models, saved_cfg
+
+    def get_dataset_itr(self, disable_iterator_cache: bool = False) -> None:
+        return self.task.get_batch_iterator(
+            dataset=self.task.dataset(self.cfg.dataset.gen_subset),
+            max_tokens=self.cfg.dataset.max_tokens,
+            max_sentences=self.cfg.dataset.batch_size,
+            max_positions=(sys.maxsize, sys.maxsize),
+            ignore_invalid_inputs=self.cfg.dataset.skip_invalid_size_inputs_valid_test,
+            required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple,
+            seed=self.cfg.common.seed,
+            num_shards=self.data_parallel_world_size,
+            shard_id=self.data_parallel_rank,
+            num_workers=self.cfg.dataset.num_workers,
+            data_buffer_size=self.cfg.dataset.data_buffer_size,
+            disable_iterator_cache=disable_iterator_cache,
+        ).next_epoch_itr(shuffle=False)
+
+    def build_progress_bar(
+        self,
+        epoch: Optional[int] = None,
+        prefix: Optional[str] = None,
+        default_log_format: str = "tqdm",
+    ) -> BaseProgressBar:
+        return progress_bar.progress_bar(
+            iterator=self.get_dataset_itr(),
+            log_format=self.cfg.common.log_format,
+            log_interval=self.cfg.common.log_interval,
+            epoch=epoch,
+            prefix=prefix,
+            tensorboard_logdir=self.cfg.common.tensorboard_logdir,
+            default_log_format=default_log_format,
+        )
+
+    @property
+    def data_parallel_world_size(self):
+        if self.cfg.distributed_training.distributed_world_size == 1:
+            return 1
+        return distributed_utils.get_data_parallel_world_size()
+
+    @property
+    def data_parallel_rank(self):
+        if self.cfg.distributed_training.distributed_world_size == 1:
+            return 0
+        return distributed_utils.get_data_parallel_rank()
+
+    def process_sentence(
+        self,
+        sample: Dict[str, Any],
+        hypo: Dict[str, Any],
+        sid: int,
+        batch_id: int,
+    ) -> Tuple[int, int]:
+        speaker = None  # Speaker can't be parsed from dataset.
+
+        if "target_label" in sample:
+            toks = sample["target_label"]
+        else:
+            toks = sample["target"]
+        toks = toks[batch_id, :]
+
+        # Processes hypothesis.
+        hyp_pieces = self.tgt_dict.string(hypo["tokens"].int().cpu())
+        if "words" in hypo:
+            hyp_words = " ".join(hypo["words"])
+        else:
+            hyp_words = post_process(hyp_pieces, self.cfg.common_eval.post_process)
+
+        # Processes target.
+        target_tokens = utils.strip_pad(toks, self.tgt_dict.pad())
+        tgt_pieces = self.tgt_dict.string(target_tokens.int().cpu())
+        tgt_words = post_process(tgt_pieces, self.cfg.common_eval.post_process)
+
+        if self.cfg.decoding.results_path is not None:
+            print(f"{hyp_pieces} ({speaker}-{sid})", file=self.hypo_units_file)
+            print(f"{hyp_words} ({speaker}-{sid})", file=self.hypo_words_file)
+            print(f"{tgt_pieces} ({speaker}-{sid})", file=self.ref_units_file)
+            print(f"{tgt_words} ({speaker}-{sid})", file=self.ref_words_file)
+
+        if not self.cfg.common_eval.quiet:
+            logger.info(f"HYPO: {hyp_words}")
+            logger.info(f"REF: {tgt_words}")
+            logger.info("---------------------")
+
+        hyp_words, tgt_words = hyp_words.split(), tgt_words.split()
+
+        return editdistance.eval(hyp_words, tgt_words), len(tgt_words)
+
+    def process_sample(self, sample: Dict[str, Any]) -> None:
+        self.gen_timer.start()
+        hypos = self.task.inference_step(
+            generator=self.generator,
+            models=self.models,
+            sample=sample,
+        )
+        num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos)
+        self.gen_timer.stop(num_generated_tokens)
+        self.wps_meter.update(num_generated_tokens)
+
+        for batch_id, sample_id in enumerate(sample["id"].tolist()):
+            errs, length = self.process_sentence(
+                sample=sample,
+                sid=sample_id,
+                batch_id=batch_id,
+                hypo=hypos[batch_id][0],
+            )
+            self.total_errors += errs
+            self.total_length += length
+
+        self.log({"wps": round(self.wps_meter.avg)})
+        if "nsentences" in sample:
+            self.num_sentences += sample["nsentences"]
+        else:
+            self.num_sentences += sample["id"].numel()
+
+    def log_generation_time(self) -> None:
+        logger.info(
+            "Processed %d sentences (%d tokens) in %.1fs %.2f "
+            "sentences per second, %.2f tokens per second)",
+            self.num_sentences,
+            self.gen_timer.n,
+            self.gen_timer.sum,
+            self.num_sentences / self.gen_timer.sum,
+            1.0 / self.gen_timer.avg,
+        )
+
+
+def parse_wer(wer_file: Path) -> float:
+    with open(wer_file, "r") as f:
+        return float(f.readline().strip().split(" ")[1])
+
+
+def get_wer_file(cfg: InferConfig) -> Path:
+    """Hashes the decoding parameters to a unique file ID."""
+    base_path = "wer"
+    if cfg.decoding.results_path is not None:
+        base_path = os.path.join(cfg.decoding.results_path, base_path)
+
+    if cfg.decoding.unique_wer_file:
+        yaml_str = OmegaConf.to_yaml(cfg.decoding)
+        fid = int(hashlib.md5(yaml_str.encode("utf-8")).hexdigest(), 16)
+        return Path(f"{base_path}.{fid % 1000000}")
+    else:
+        return Path(base_path)
+
+
+def main(cfg: InferConfig) -> float:
+    """Entry point for main processing logic.
+
+    Args:
+        cfg: The inferance configuration to use.
+        wer: Optional shared memory pointer for returning the WER. If not None,
+            the final WER value will be written here instead of being returned.
+
+    Returns:
+        The final WER if `wer` is None, otherwise None.
+    """
+
+    yaml_str, wer_file = OmegaConf.to_yaml(cfg.decoding), get_wer_file(cfg)
+
+    # Validates the provided configuration.
+    if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None:
+        cfg.dataset.max_tokens = 4000000
+    if not cfg.common.cpu and not torch.cuda.is_available():
+        raise ValueError("CUDA not found; set `cpu=True` to run without CUDA")
+
+    with InferenceProcessor(cfg) as processor:
+        for sample in processor:
+            processor.process_sample(sample)
+
+        processor.log_generation_time()
+
+        if cfg.decoding.results_path is not None:
+            processor.merge_shards()
+
+        errs_t, leng_t = processor.total_errors, processor.total_length
+
+        if cfg.common.cpu:
+            logger.warning("Merging WER requires CUDA.")
+        elif processor.data_parallel_world_size > 1:
+            stats = torch.LongTensor([errs_t, leng_t]).cuda()
+            dist.all_reduce(stats, op=dist.ReduceOp.SUM)
+            errs_t, leng_t = stats[0].item(), stats[1].item()
+
+        wer = errs_t * 100.0 / leng_t
+
+        if distributed_utils.is_master(cfg.distributed_training):
+            with open(wer_file, "w") as f:
+                f.write(
+                    (
+                        f"WER: {wer}\n"
+                        f"err / num_ref_words = {errs_t} / {leng_t}\n\n"
+                        f"{yaml_str}"
+                    )
+                )
+
+        return wer
+
+
+@hydra.main(config_path=config_path, config_name="infer")
+def hydra_main(cfg: InferConfig) -> Union[float, Tuple[float, Optional[float]]]:
+    container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True)
+    cfg = OmegaConf.create(container)
+    OmegaConf.set_struct(cfg, True)
+
+    if cfg.common.reset_logging:
+        reset_logging()
+
+    # logger.info("Config:\n%s", OmegaConf.to_yaml(cfg))
+    wer = float("inf")
+
+    try:
+        if cfg.common.profile:
+            with torch.cuda.profiler.profile():
+                with torch.autograd.profiler.emit_nvtx():
+                    distributed_utils.call_main(cfg, main)
+        else:
+            distributed_utils.call_main(cfg, main)
+
+        wer = parse_wer(get_wer_file(cfg))
+    except BaseException as e:  # pylint: disable=broad-except
+        if not cfg.common.suppress_crashes:
+            raise
+        else:
+            logger.error("Crashed! %s", str(e))
+
+    logger.info("Word error rate: %.4f", wer)
+    if cfg.is_ax:
+        return wer, None
+
+    return wer
+
+
+def cli_main() -> None:
+    try:
+        from hydra._internal.utils import (
+            get_args,
+        )  # pylint: disable=import-outside-toplevel
+
+        cfg_name = get_args().config_name or "infer"
+    except ImportError:
+        logger.warning("Failed to get config name from hydra args")
+        cfg_name = "infer"
+
+    cs = ConfigStore.instance()
+    cs.store(name=cfg_name, node=InferConfig)
+
+    for k in InferConfig.__dataclass_fields__:
+        if is_dataclass(InferConfig.__dataclass_fields__[k].type):
+            v = InferConfig.__dataclass_fields__[k].default
+            cs.store(name=k, node=v)
+
+    hydra_main()  # pylint: disable=no-value-for-parameter
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/speech_recognition/tasks/__init__.py b/fairseq/examples/speech_recognition/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ac3b8dc69639c92cc129294356e9012745e3fb2
--- /dev/null
+++ b/fairseq/examples/speech_recognition/tasks/__init__.py
@@ -0,0 +1,8 @@
+import importlib
+import os
+
+
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        task_name = file[: file.find(".py")]
+        importlib.import_module("examples.speech_recognition.tasks." + task_name)
diff --git a/fairseq/examples/speech_recognition/tasks/speech_recognition.py b/fairseq/examples/speech_recognition/tasks/speech_recognition.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9f011d55ff4fdfeb4c04ca790c314d685708c3a
--- /dev/null
+++ b/fairseq/examples/speech_recognition/tasks/speech_recognition.py
@@ -0,0 +1,157 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+import re
+import sys
+
+import torch
+from examples.speech_recognition.data import AsrDataset
+from examples.speech_recognition.data.replabels import replabel_symbol
+from fairseq.data import Dictionary
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+def get_asr_dataset_from_json(data_json_path, tgt_dict):
+    """
+    Parse data json and create dataset.
+    See scripts/asr_prep_json.py which pack json from raw files
+
+    Json example:
+    {
+    "utts": {
+        "4771-29403-0025": {
+            "input": {
+                "length_ms": 170,
+                "path": "/tmp/file1.flac"
+            },
+            "output": {
+                "text": "HELLO \n",
+                "token": "HE LLO",
+                "tokenid": "4815, 861"
+            }
+        },
+        "1564-142299-0096": {
+            ...
+        }
+    }
+    """
+    if not os.path.isfile(data_json_path):
+        raise FileNotFoundError("Dataset not found: {}".format(data_json_path))
+    with open(data_json_path, "rb") as f:
+        data_samples = json.load(f)["utts"]
+        assert len(data_samples) != 0
+        sorted_samples = sorted(
+            data_samples.items(),
+            key=lambda sample: int(sample[1]["input"]["length_ms"]),
+            reverse=True,
+        )
+        aud_paths = [s[1]["input"]["path"] for s in sorted_samples]
+        ids = [s[0] for s in sorted_samples]
+        speakers = []
+        for s in sorted_samples:
+            m = re.search("(.+?)-(.+?)-(.+?)", s[0])
+            speakers.append(m.group(1) + "_" + m.group(2))
+        frame_sizes = [s[1]["input"]["length_ms"] for s in sorted_samples]
+        tgt = [
+            [int(i) for i in s[1]["output"]["tokenid"].split(", ")]
+            for s in sorted_samples
+        ]
+        # append eos
+        tgt = [[*t, tgt_dict.eos()] for t in tgt]
+        return AsrDataset(aud_paths, frame_sizes, tgt, tgt_dict, ids, speakers)
+
+
+@register_task("speech_recognition")
+class SpeechRecognitionTask(LegacyFairseqTask):
+    """
+    Task for training speech recognition model.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument("data", help="path to data directory")
+        parser.add_argument(
+            "--silence-token", default="\u2581", help="token for silence (used by w2l)"
+        )
+        parser.add_argument(
+            "--max-source-positions",
+            default=sys.maxsize,
+            type=int,
+            metavar="N",
+            help="max number of frames in the source sequence",
+        )
+        parser.add_argument(
+            "--max-target-positions",
+            default=1024,
+            type=int,
+            metavar="N",
+            help="max number of tokens in the target sequence",
+        )
+
+    def __init__(self, args, tgt_dict):
+        super().__init__(args)
+        self.tgt_dict = tgt_dict
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task (e.g., load dictionaries)."""
+        dict_path = os.path.join(args.data, "dict.txt")
+        if not os.path.isfile(dict_path):
+            raise FileNotFoundError("Dict not found: {}".format(dict_path))
+        tgt_dict = Dictionary.load(dict_path)
+
+        if args.criterion == "ctc_loss":
+            tgt_dict.add_symbol("<ctc_blank>")
+        elif args.criterion == "asg_loss":
+            for i in range(1, args.max_replabel + 1):
+                tgt_dict.add_symbol(replabel_symbol(i))
+
+        print("| dictionary: {} types".format(len(tgt_dict)))
+        return cls(args, tgt_dict)
+
+    def load_dataset(self, split, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        data_json_path = os.path.join(self.args.data, "{}.json".format(split))
+        self.datasets[split] = get_asr_dataset_from_json(data_json_path, self.tgt_dict)
+
+    def build_generator(self, models, args, **unused):
+        w2l_decoder = getattr(args, "w2l_decoder", None)
+        if w2l_decoder == "viterbi":
+            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder
+
+            return W2lViterbiDecoder(args, self.target_dictionary)
+        elif w2l_decoder == "kenlm":
+            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder
+
+            return W2lKenLMDecoder(args, self.target_dictionary)
+        elif w2l_decoder == "fairseqlm":
+            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder
+
+            return W2lFairseqLMDecoder(args, self.target_dictionary)
+        else:
+            return super().build_generator(models, args)
+
+    @property
+    def target_dictionary(self):
+        """Return the :class:`~fairseq.data.Dictionary` for the language
+        model."""
+        return self.tgt_dict
+
+    @property
+    def source_dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary` (if applicable
+        for this task)."""
+        return None
+
+    def max_positions(self):
+        """Return the max speech and sentence length allowed by the task."""
+        return (self.args.max_source_positions, self.args.max_target_positions)
diff --git a/fairseq/examples/speech_recognition/utils/wer_utils.py b/fairseq/examples/speech_recognition/utils/wer_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf6f3d09ba41a46ad4d7968fb3c286dd53d15c38
--- /dev/null
+++ b/fairseq/examples/speech_recognition/utils/wer_utils.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+from collections import deque
+from enum import Enum
+
+import numpy as np
+
+
+"""
+    Utility modules for computation of Word Error Rate,
+    Alignments, as well as more granular metrics like
+    deletion, insersion and substitutions.
+"""
+
+
+class Code(Enum):
+    match = 1
+    substitution = 2
+    insertion = 3
+    deletion = 4
+
+
+class Token(object):
+    def __init__(self, lbl="", st=np.nan, en=np.nan):
+        if np.isnan(st):
+            self.label, self.start, self.end = "", 0.0, 0.0
+        else:
+            self.label, self.start, self.end = lbl, st, en
+
+
+class AlignmentResult(object):
+    def __init__(self, refs, hyps, codes, score):
+        self.refs = refs  # std::deque<int>
+        self.hyps = hyps  # std::deque<int>
+        self.codes = codes  # std::deque<Code>
+        self.score = score  # float
+
+
+def coordinate_to_offset(row, col, ncols):
+    return int(row * ncols + col)
+
+
+def offset_to_row(offset, ncols):
+    return int(offset / ncols)
+
+
+def offset_to_col(offset, ncols):
+    return int(offset % ncols)
+
+
+def trimWhitespace(str):
+    return re.sub(" +", " ", re.sub(" *$", "", re.sub("^ *", "", str)))
+
+
+def str2toks(str):
+    pieces = trimWhitespace(str).split(" ")
+    toks = []
+    for p in pieces:
+        toks.append(Token(p, 0.0, 0.0))
+    return toks
+
+
+class EditDistance(object):
+    def __init__(self, time_mediated):
+        self.time_mediated_ = time_mediated
+        self.scores_ = np.nan  # Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>
+        self.backtraces_ = (
+            np.nan
+        )  # Eigen::Matrix<size_t, Eigen::Dynamic, Eigen::Dynamic> backtraces_;
+        self.confusion_pairs_ = {}
+
+    def cost(self, ref, hyp, code):
+        if self.time_mediated_:
+            if code == Code.match:
+                return abs(ref.start - hyp.start) + abs(ref.end - hyp.end)
+            elif code == Code.insertion:
+                return hyp.end - hyp.start
+            elif code == Code.deletion:
+                return ref.end - ref.start
+            else:  # substitution
+                return abs(ref.start - hyp.start) + abs(ref.end - hyp.end) + 0.1
+        else:
+            if code == Code.match:
+                return 0
+            elif code == Code.insertion or code == Code.deletion:
+                return 3
+            else:  # substitution
+                return 4
+
+    def get_result(self, refs, hyps):
+        res = AlignmentResult(refs=deque(), hyps=deque(), codes=deque(), score=np.nan)
+
+        num_rows, num_cols = self.scores_.shape
+        res.score = self.scores_[num_rows - 1, num_cols - 1]
+
+        curr_offset = coordinate_to_offset(num_rows - 1, num_cols - 1, num_cols)
+
+        while curr_offset != 0:
+            curr_row = offset_to_row(curr_offset, num_cols)
+            curr_col = offset_to_col(curr_offset, num_cols)
+
+            prev_offset = self.backtraces_[curr_row, curr_col]
+
+            prev_row = offset_to_row(prev_offset, num_cols)
+            prev_col = offset_to_col(prev_offset, num_cols)
+
+            res.refs.appendleft(curr_row - 1)  # Note: this was .push_front() in C++
+            res.hyps.appendleft(curr_col - 1)
+            if curr_row - 1 == prev_row and curr_col == prev_col:
+                res.codes.appendleft(Code.deletion)
+            elif curr_row == prev_row and curr_col - 1 == prev_col:
+                res.codes.appendleft(Code.insertion)
+            else:
+                # assert(curr_row - 1 == prev_row and curr_col - 1 == prev_col)
+                ref_str = refs[res.refs[0]].label
+                hyp_str = hyps[res.hyps[0]].label
+
+                if ref_str == hyp_str:
+                    res.codes.appendleft(Code.match)
+                else:
+                    res.codes.appendleft(Code.substitution)
+
+                    confusion_pair = "%s -> %s" % (ref_str, hyp_str)
+                    if confusion_pair not in self.confusion_pairs_:
+                        self.confusion_pairs_[confusion_pair] = 1
+                    else:
+                        self.confusion_pairs_[confusion_pair] += 1
+
+            curr_offset = prev_offset
+
+        return res
+
+    def align(self, refs, hyps):
+        if len(refs) == 0 and len(hyps) == 0:
+            return np.nan
+
+        # NOTE: we're not resetting the values in these matrices because every value
+        # will be overridden in the loop below. If this assumption doesn't hold,
+        # be sure to set all entries in self.scores_ and self.backtraces_ to 0.
+        self.scores_ = np.zeros((len(refs) + 1, len(hyps) + 1))
+        self.backtraces_ = np.zeros((len(refs) + 1, len(hyps) + 1))
+
+        num_rows, num_cols = self.scores_.shape
+
+        for i in range(num_rows):
+            for j in range(num_cols):
+                if i == 0 and j == 0:
+                    self.scores_[i, j] = 0.0
+                    self.backtraces_[i, j] = 0
+                    continue
+
+                if i == 0:
+                    self.scores_[i, j] = self.scores_[i, j - 1] + self.cost(
+                        None, hyps[j - 1], Code.insertion
+                    )
+                    self.backtraces_[i, j] = coordinate_to_offset(i, j - 1, num_cols)
+                    continue
+
+                if j == 0:
+                    self.scores_[i, j] = self.scores_[i - 1, j] + self.cost(
+                        refs[i - 1], None, Code.deletion
+                    )
+                    self.backtraces_[i, j] = coordinate_to_offset(i - 1, j, num_cols)
+                    continue
+
+                # Below here both i and j are greater than 0
+                ref = refs[i - 1]
+                hyp = hyps[j - 1]
+                best_score = self.scores_[i - 1, j - 1] + (
+                    self.cost(ref, hyp, Code.match)
+                    if (ref.label == hyp.label)
+                    else self.cost(ref, hyp, Code.substitution)
+                )
+
+                prev_row = i - 1
+                prev_col = j - 1
+                ins = self.scores_[i, j - 1] + self.cost(None, hyp, Code.insertion)
+                if ins < best_score:
+                    best_score = ins
+                    prev_row = i
+                    prev_col = j - 1
+
+                delt = self.scores_[i - 1, j] + self.cost(ref, None, Code.deletion)
+                if delt < best_score:
+                    best_score = delt
+                    prev_row = i - 1
+                    prev_col = j
+
+                self.scores_[i, j] = best_score
+                self.backtraces_[i, j] = coordinate_to_offset(
+                    prev_row, prev_col, num_cols
+                )
+
+        return self.get_result(refs, hyps)
+
+
+class WERTransformer(object):
+    def __init__(self, hyp_str, ref_str, verbose=True):
+        self.ed_ = EditDistance(False)
+        self.id2oracle_errs_ = {}
+        self.utts_ = 0
+        self.words_ = 0
+        self.insertions_ = 0
+        self.deletions_ = 0
+        self.substitutions_ = 0
+
+        self.process(["dummy_str", hyp_str, ref_str])
+
+        if verbose:
+            print("'%s' vs '%s'" % (hyp_str, ref_str))
+            self.report_result()
+
+    def process(self, input):  # std::vector<std::string>&& input
+        if len(input) < 3:
+            print(
+                "Input must be of the form <id> ... <hypo> <ref> , got ",
+                len(input),
+                " inputs:",
+            )
+            return None
+
+        # Align
+        # std::vector<Token> hyps;
+        # std::vector<Token> refs;
+
+        hyps = str2toks(input[-2])
+        refs = str2toks(input[-1])
+
+        alignment = self.ed_.align(refs, hyps)
+        if alignment is None:
+            print("Alignment is null")
+            return np.nan
+
+        # Tally errors
+        ins = 0
+        dels = 0
+        subs = 0
+        for code in alignment.codes:
+            if code == Code.substitution:
+                subs += 1
+            elif code == Code.insertion:
+                ins += 1
+            elif code == Code.deletion:
+                dels += 1
+
+        # Output
+        row = input
+        row.append(str(len(refs)))
+        row.append(str(ins))
+        row.append(str(dels))
+        row.append(str(subs))
+        # print(row)
+
+        # Accumulate
+        kIdIndex = 0
+        kNBestSep = "/"
+
+        pieces = input[kIdIndex].split(kNBestSep)
+
+        if len(pieces) == 0:
+            print(
+                "Error splitting ",
+                input[kIdIndex],
+                " on '",
+                kNBestSep,
+                "', got empty list",
+            )
+            return np.nan
+
+        id = pieces[0]
+        if id not in self.id2oracle_errs_:
+            self.utts_ += 1
+            self.words_ += len(refs)
+            self.insertions_ += ins
+            self.deletions_ += dels
+            self.substitutions_ += subs
+            self.id2oracle_errs_[id] = [ins, dels, subs]
+        else:
+            curr_err = ins + dels + subs
+            prev_err = np.sum(self.id2oracle_errs_[id])
+            if curr_err < prev_err:
+                self.id2oracle_errs_[id] = [ins, dels, subs]
+
+        return 0
+
+    def report_result(self):
+        # print("----------  Summary ---------------")
+        if self.words_ == 0:
+            print("No words counted")
+            return
+
+        # 1-best
+        best_wer = (
+            100.0
+            * (self.insertions_ + self.deletions_ + self.substitutions_)
+            / self.words_
+        )
+
+        print(
+            "\tWER = %0.2f%% (%i utts, %i words, %0.2f%% ins, "
+            "%0.2f%% dels, %0.2f%% subs)"
+            % (
+                best_wer,
+                self.utts_,
+                self.words_,
+                100.0 * self.insertions_ / self.words_,
+                100.0 * self.deletions_ / self.words_,
+                100.0 * self.substitutions_ / self.words_,
+            )
+        )
+
+    def wer(self):
+        if self.words_ == 0:
+            wer = np.nan
+        else:
+            wer = (
+                100.0
+                * (self.insertions_ + self.deletions_ + self.substitutions_)
+                / self.words_
+            )
+        return wer
+
+    def stats(self):
+        if self.words_ == 0:
+            stats = {}
+        else:
+            wer = (
+                100.0
+                * (self.insertions_ + self.deletions_ + self.substitutions_)
+                / self.words_
+            )
+            stats = dict(
+                {
+                    "wer": wer,
+                    "utts": self.utts_,
+                    "numwords": self.words_,
+                    "ins": self.insertions_,
+                    "dels": self.deletions_,
+                    "subs": self.substitutions_,
+                    "confusion_pairs": self.ed_.confusion_pairs_,
+                }
+            )
+        return stats
+
+
+def calc_wer(hyp_str, ref_str):
+    t = WERTransformer(hyp_str, ref_str, verbose=0)
+    return t.wer()
+
+
+def calc_wer_stats(hyp_str, ref_str):
+    t = WERTransformer(hyp_str, ref_str, verbose=0)
+    return t.stats()
+
+
+def get_wer_alignment_codes(hyp_str, ref_str):
+    """
+    INPUT: hypothesis string, reference string
+    OUTPUT: List of alignment codes (intermediate results from WER computation)
+    """
+    t = WERTransformer(hyp_str, ref_str, verbose=0)
+    return t.ed_.align(str2toks(ref_str), str2toks(hyp_str)).codes
+
+
+def merge_counts(x, y):
+    # Merge two hashes which have 'counts' as their values
+    # This can be used for example to merge confusion pair counts
+    #   conf_pairs = merge_counts(conf_pairs, stats['confusion_pairs'])
+    for k, v in y.items():
+        if k not in x:
+            x[k] = 0
+        x[k] += v
+    return x
diff --git a/fairseq/examples/speech_recognition/w2l_decoder.py b/fairseq/examples/speech_recognition/w2l_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbf2d3524ee40bd0d08b6a9560047d96e49b6045
--- /dev/null
+++ b/fairseq/examples/speech_recognition/w2l_decoder.py
@@ -0,0 +1,486 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Flashlight decoders.
+"""
+
+import gc
+import itertools as it
+import os.path as osp
+from typing import List
+import warnings
+from collections import deque, namedtuple
+
+import numpy as np
+import torch
+from examples.speech_recognition.data.replabels import unpack_replabels
+from fairseq import tasks
+from fairseq.utils import apply_to_sample
+from omegaconf import open_dict
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+
+
+try:
+    from flashlight.lib.text.dictionary import create_word_dict, load_words
+    from flashlight.lib.sequence.criterion import CpuViterbiPath, get_data_ptr_as_bytes
+    from flashlight.lib.text.decoder import (
+        CriterionType,
+        LexiconDecoderOptions,
+        KenLM,
+        LM,
+        LMState,
+        SmearingMode,
+        Trie,
+        LexiconDecoder,
+    )
+except:
+    warnings.warn(
+        "flashlight python bindings are required to use this functionality. Please install from https://github.com/facebookresearch/flashlight/tree/master/bindings/python"
+    )
+    LM = object
+    LMState = object
+
+
+class W2lDecoder(object):
+    def __init__(self, args, tgt_dict):
+        self.tgt_dict = tgt_dict
+        self.vocab_size = len(tgt_dict)
+        self.nbest = args.nbest
+
+        # criterion-specific init
+        self.criterion_type = CriterionType.CTC
+        self.blank = (
+            tgt_dict.index("<ctc_blank>")
+            if "<ctc_blank>" in tgt_dict.indices
+            else tgt_dict.bos()
+        )
+        if "<sep>" in tgt_dict.indices:
+            self.silence = tgt_dict.index("<sep>")
+        elif "|" in tgt_dict.indices:
+            self.silence = tgt_dict.index("|")
+        else:
+            self.silence = tgt_dict.eos()
+        self.asg_transitions = None
+
+    def generate(self, models, sample, **unused):
+        """Generate a batch of inferences."""
+        # model.forward normally channels prev_output_tokens into the decoder
+        # separately, but SequenceGenerator directly calls model.encoder
+        encoder_input = {
+            k: v for k, v in sample["net_input"].items() if k != "prev_output_tokens"
+        }
+        emissions = self.get_emissions(models, encoder_input)
+        return self.decode(emissions)
+
+    def get_emissions(self, models, encoder_input):
+        """Run encoder and normalize emissions"""
+        model = models[0]
+        encoder_out = model(**encoder_input)
+        if hasattr(model, "get_logits"):
+            emissions = model.get_logits(encoder_out) # no need to normalize emissions
+        else:
+            emissions = model.get_normalized_probs(encoder_out, log_probs=True)
+        return emissions.transpose(0, 1).float().cpu().contiguous()
+
+    def get_tokens(self, idxs):
+        """Normalize tokens by handling CTC blank, ASG replabels, etc."""
+        idxs = (g[0] for g in it.groupby(idxs))
+        idxs = filter(lambda x: x != self.blank, idxs)
+        return torch.LongTensor(list(idxs))
+
+
+class W2lViterbiDecoder(W2lDecoder):
+    def __init__(self, args, tgt_dict):
+        super().__init__(args, tgt_dict)
+
+    def decode(self, emissions):
+        B, T, N = emissions.size()
+        hypos = []
+        if self.asg_transitions is None:
+            transitions = torch.FloatTensor(N, N).zero_()
+        else:
+            transitions = torch.FloatTensor(self.asg_transitions).view(N, N)
+        viterbi_path = torch.IntTensor(B, T)
+        workspace = torch.ByteTensor(CpuViterbiPath.get_workspace_size(B, T, N))
+        CpuViterbiPath.compute(
+            B,
+            T,
+            N,
+            get_data_ptr_as_bytes(emissions),
+            get_data_ptr_as_bytes(transitions),
+            get_data_ptr_as_bytes(viterbi_path),
+            get_data_ptr_as_bytes(workspace),
+        )
+        return [
+            [{"tokens": self.get_tokens(viterbi_path[b].tolist()), "score": 0}]
+            for b in range(B)
+        ]
+
+
+class W2lKenLMDecoder(W2lDecoder):
+    def __init__(self, args, tgt_dict):
+        super().__init__(args, tgt_dict)
+
+        self.unit_lm = getattr(args, "unit_lm", False)
+
+        if args.lexicon:
+            self.lexicon = load_words(args.lexicon)
+            self.word_dict = create_word_dict(self.lexicon)
+            self.unk_word = self.word_dict.get_index("<unk>")
+
+            self.lm = KenLM(args.kenlm_model, self.word_dict)
+            self.trie = Trie(self.vocab_size, self.silence)
+
+            start_state = self.lm.start(False)
+            for i, (word, spellings) in enumerate(self.lexicon.items()):
+                word_idx = self.word_dict.get_index(word)
+                _, score = self.lm.score(start_state, word_idx)
+                for spelling in spellings:
+                    spelling_idxs = [tgt_dict.index(token) for token in spelling]
+                    assert (
+                        tgt_dict.unk() not in spelling_idxs
+                    ), f"{spelling} {spelling_idxs}"
+                    self.trie.insert(spelling_idxs, word_idx, score)
+            self.trie.smear(SmearingMode.MAX)
+
+            self.decoder_opts = LexiconDecoderOptions(
+                beam_size=args.beam,
+                beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))),
+                beam_threshold=args.beam_threshold,
+                lm_weight=args.lm_weight,
+                word_score=args.word_score,
+                unk_score=args.unk_weight,
+                sil_score=args.sil_weight,
+                log_add=False,
+                criterion_type=self.criterion_type,
+            )
+
+            if self.asg_transitions is None:
+                N = 768
+                # self.asg_transitions = torch.FloatTensor(N, N).zero_()
+                self.asg_transitions = []
+
+            self.decoder = LexiconDecoder(
+                self.decoder_opts,
+                self.trie,
+                self.lm,
+                self.silence,
+                self.blank,
+                self.unk_word,
+                self.asg_transitions,
+                self.unit_lm,
+            )
+        else:
+            assert args.unit_lm, "lexicon free decoding can only be done with a unit language model"
+            from flashlight.lib.text.decoder import LexiconFreeDecoder, LexiconFreeDecoderOptions
+
+            d = {w: [[w]] for w in tgt_dict.symbols}
+            self.word_dict = create_word_dict(d)
+            self.lm = KenLM(args.kenlm_model, self.word_dict)
+            self.decoder_opts = LexiconFreeDecoderOptions(
+                beam_size=args.beam,
+                beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))),
+                beam_threshold=args.beam_threshold,
+                lm_weight=args.lm_weight,
+                sil_score=args.sil_weight,
+                log_add=False,
+                criterion_type=self.criterion_type,
+            )
+            self.decoder = LexiconFreeDecoder(
+                self.decoder_opts, self.lm, self.silence, self.blank, []
+            )
+
+    def get_timesteps(self, token_idxs: List[int]) -> List[int]:
+        """Returns frame numbers corresponding to every non-blank token.
+
+        Parameters
+        ----------
+        token_idxs : List[int]
+            IDs of decoded tokens.
+
+        Returns
+        -------
+        List[int]
+            Frame numbers corresponding to every non-blank token.
+        """
+        timesteps = []
+        for i, token_idx in enumerate(token_idxs):
+            if token_idx == self.blank:
+                continue
+            if i == 0 or token_idx != token_idxs[i-1]:
+                timesteps.append(i)
+        return timesteps
+
+    def decode(self, emissions):
+        B, T, N = emissions.size()
+        hypos = []
+        for b in range(B):
+            emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0)
+            results = self.decoder.decode(emissions_ptr, T, N)
+
+            nbest_results = results[: self.nbest]
+            hypos.append(
+                [
+                    {
+                        "tokens": self.get_tokens(result.tokens),
+                        "score": result.score,
+                        "timesteps": self.get_timesteps(result.tokens),
+                        "words": [
+                            self.word_dict.get_entry(x) for x in result.words if x >= 0
+                        ],
+                    }
+                    for result in nbest_results
+                ]
+            )
+        return hypos
+
+
+FairseqLMState = namedtuple("FairseqLMState", ["prefix", "incremental_state", "probs"])
+
+
+class FairseqLM(LM):
+    def __init__(self, dictionary, model):
+        LM.__init__(self)
+        self.dictionary = dictionary
+        self.model = model
+        self.unk = self.dictionary.unk()
+
+        self.save_incremental = False  # this currently does not work properly
+        self.max_cache = 20_000
+
+        model.cuda()
+        model.eval()
+        model.make_generation_fast_()
+
+        self.states = {}
+        self.stateq = deque()
+
+    def start(self, start_with_nothing):
+        state = LMState()
+        prefix = torch.LongTensor([[self.dictionary.eos()]])
+        incremental_state = {} if self.save_incremental else None
+        with torch.no_grad():
+            res = self.model(prefix.cuda(), incremental_state=incremental_state)
+            probs = self.model.get_normalized_probs(res, log_probs=True, sample=None)
+
+        if incremental_state is not None:
+            incremental_state = apply_to_sample(lambda x: x.cpu(), incremental_state)
+        self.states[state] = FairseqLMState(
+            prefix.numpy(), incremental_state, probs[0, -1].cpu().numpy()
+        )
+        self.stateq.append(state)
+
+        return state
+
+    def score(self, state: LMState, token_index: int, no_cache: bool = False):
+        """
+        Evaluate language model based on the current lm state and new word
+        Parameters:
+        -----------
+        state: current lm state
+        token_index: index of the word
+                     (can be lexicon index then you should store inside LM the
+                      mapping between indices of lexicon and lm, or lm index of a word)
+
+        Returns:
+        --------
+        (LMState, float): pair of (new state, score for the current word)
+        """
+        curr_state = self.states[state]
+
+        def trim_cache(targ_size):
+            while len(self.stateq) > targ_size:
+                rem_k = self.stateq.popleft()
+                rem_st = self.states[rem_k]
+                rem_st = FairseqLMState(rem_st.prefix, None, None)
+                self.states[rem_k] = rem_st
+
+        if curr_state.probs is None:
+            new_incremental_state = (
+                curr_state.incremental_state.copy()
+                if curr_state.incremental_state is not None
+                else None
+            )
+            with torch.no_grad():
+                if new_incremental_state is not None:
+                    new_incremental_state = apply_to_sample(
+                        lambda x: x.cuda(), new_incremental_state
+                    )
+                elif self.save_incremental:
+                    new_incremental_state = {}
+
+                res = self.model(
+                    torch.from_numpy(curr_state.prefix).cuda(),
+                    incremental_state=new_incremental_state,
+                )
+                probs = self.model.get_normalized_probs(
+                    res, log_probs=True, sample=None
+                )
+
+                if new_incremental_state is not None:
+                    new_incremental_state = apply_to_sample(
+                        lambda x: x.cpu(), new_incremental_state
+                    )
+
+                curr_state = FairseqLMState(
+                    curr_state.prefix, new_incremental_state, probs[0, -1].cpu().numpy()
+                )
+
+            if not no_cache:
+                self.states[state] = curr_state
+                self.stateq.append(state)
+
+        score = curr_state.probs[token_index].item()
+
+        trim_cache(self.max_cache)
+
+        outstate = state.child(token_index)
+        if outstate not in self.states and not no_cache:
+            prefix = np.concatenate(
+                [curr_state.prefix, torch.LongTensor([[token_index]])], -1
+            )
+            incr_state = curr_state.incremental_state
+
+            self.states[outstate] = FairseqLMState(prefix, incr_state, None)
+
+        if token_index == self.unk:
+            score = float("-inf")
+
+        return outstate, score
+
+    def finish(self, state: LMState):
+        """
+        Evaluate eos for language model based on the current lm state
+
+        Returns:
+        --------
+        (LMState, float): pair of (new state, score for the current word)
+        """
+        return self.score(state, self.dictionary.eos())
+
+    def empty_cache(self):
+        self.states = {}
+        self.stateq = deque()
+        gc.collect()
+
+
+class W2lFairseqLMDecoder(W2lDecoder):
+    def __init__(self, args, tgt_dict):
+        super().__init__(args, tgt_dict)
+
+        self.unit_lm = getattr(args, "unit_lm", False)
+
+        self.lexicon = load_words(args.lexicon) if args.lexicon else None
+        self.idx_to_wrd = {}
+
+        checkpoint = torch.load(args.kenlm_model, map_location="cpu")
+
+        if "cfg" in checkpoint and checkpoint["cfg"] is not None:
+            lm_args = checkpoint["cfg"]
+        else:
+            lm_args = convert_namespace_to_omegaconf(checkpoint["args"])
+
+        with open_dict(lm_args.task):
+            lm_args.task.data = osp.dirname(args.kenlm_model)
+
+        task = tasks.setup_task(lm_args.task)
+        model = task.build_model(lm_args.model)
+        model.load_state_dict(checkpoint["model"], strict=False)
+
+        self.trie = Trie(self.vocab_size, self.silence)
+
+        self.word_dict = task.dictionary
+        self.unk_word = self.word_dict.unk()
+        self.lm = FairseqLM(self.word_dict, model)
+
+        if self.lexicon:
+            start_state = self.lm.start(False)
+            for i, (word, spellings) in enumerate(self.lexicon.items()):
+                if self.unit_lm:
+                    word_idx = i
+                    self.idx_to_wrd[i] = word
+                    score = 0
+                else:
+                    word_idx = self.word_dict.index(word)
+                    _, score = self.lm.score(start_state, word_idx, no_cache=True)
+
+                for spelling in spellings:
+                    spelling_idxs = [tgt_dict.index(token) for token in spelling]
+                    assert (
+                        tgt_dict.unk() not in spelling_idxs
+                    ), f"{spelling} {spelling_idxs}"
+                    self.trie.insert(spelling_idxs, word_idx, score)
+            self.trie.smear(SmearingMode.MAX)
+
+            self.decoder_opts = LexiconDecoderOptions(
+                beam_size=args.beam,
+                beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))),
+                beam_threshold=args.beam_threshold,
+                lm_weight=args.lm_weight,
+                word_score=args.word_score,
+                unk_score=args.unk_weight,
+                sil_score=args.sil_weight,
+                log_add=False,
+                criterion_type=self.criterion_type,
+            )
+
+            self.decoder = LexiconDecoder(
+                self.decoder_opts,
+                self.trie,
+                self.lm,
+                self.silence,
+                self.blank,
+                self.unk_word,
+                [],
+                self.unit_lm,
+            )
+        else:
+            assert args.unit_lm, "lexicon free decoding can only be done with a unit language model"
+            from flashlight.lib.text.decoder import LexiconFreeDecoder, LexiconFreeDecoderOptions
+
+            d = {w: [[w]] for w in tgt_dict.symbols}
+            self.word_dict = create_word_dict(d)
+            self.lm = KenLM(args.kenlm_model, self.word_dict)
+            self.decoder_opts = LexiconFreeDecoderOptions(
+                beam_size=args.beam,
+                beam_size_token=int(getattr(args, "beam_size_token", len(tgt_dict))),
+                beam_threshold=args.beam_threshold,
+                lm_weight=args.lm_weight,
+                sil_score=args.sil_weight,
+                log_add=False,
+                criterion_type=self.criterion_type,
+            )
+            self.decoder = LexiconFreeDecoder(
+                self.decoder_opts, self.lm, self.silence, self.blank, []
+            )
+
+    def decode(self, emissions):
+        B, T, N = emissions.size()
+        hypos = []
+
+        def idx_to_word(idx):
+            if self.unit_lm:
+                return self.idx_to_wrd[idx]
+            else:
+                return self.word_dict[idx]
+
+        def make_hypo(result):
+            hypo = {"tokens": self.get_tokens(result.tokens), "score": result.score}
+            if self.lexicon:
+                hypo["words"] = [idx_to_word(x) for x in result.words if x >= 0]
+            return hypo
+
+        for b in range(B):
+            emissions_ptr = emissions.data_ptr() + 4 * b * emissions.stride(0)
+            results = self.decoder.decode(emissions_ptr, T, N)
+
+            nbest_results = results[: self.nbest]
+            hypos.append([make_hypo(result) for result in nbest_results])
+            self.lm.empty_cache()
+
+        return hypos
diff --git a/fairseq/examples/speech_synthesis/README.md b/fairseq/examples/speech_synthesis/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4a3ae54b857c43621c9fb67ee4b214584beec835
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/README.md
@@ -0,0 +1,16 @@
+Speech Synthesis (S^2)
+===
+
+Speech synthesis with fairseq.
+
+- Autoregressive and non-autoregressive models
+- Multi-speaker synthesis
+- Audio preprocessing
+- Automatic metrics
+- Similar data configuration as [S2T](../speech_to_text/README.md)
+
+
+## Examples
+- [Single-speaker synthesis on LJSpeech](docs/ljspeech_example.md)
+- [Multi-speaker synthesis on VCTK](docs/vctk_example.md)
+- [Multi-speaker synthesis on Common Voice](docs/common_voice_example.md)
diff --git a/fairseq/examples/speech_synthesis/__init__.py b/fairseq/examples/speech_synthesis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6264236915a7269a4d920ee8213004374dd86a9a
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/examples/speech_synthesis/data_utils.py b/fairseq/examples/speech_synthesis/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f43a4a90046fb9ee4944dc06ba377c1faade141d
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/data_utils.py
@@ -0,0 +1,320 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from pathlib import Path
+from typing import Optional, List, Dict
+import zipfile
+import tempfile
+from dataclasses import dataclass
+from itertools import groupby
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+from tqdm import tqdm
+
+from examples.speech_to_text.data_utils import load_tsv_to_dicts
+from fairseq.data.audio.audio_utils import TTSSpectrogram, TTSMelScale
+
+
+def trim_or_pad_to_target_length(
+        data_1d_or_2d: np.ndarray, target_length: int
+) -> np.ndarray:
+    assert len(data_1d_or_2d.shape) in {1, 2}
+    delta = data_1d_or_2d.shape[0] - target_length
+    if delta >= 0:  # trim if being longer
+        data_1d_or_2d = data_1d_or_2d[: target_length]
+    else:  # pad if being shorter
+        if len(data_1d_or_2d.shape) == 1:
+            data_1d_or_2d = np.concatenate(
+                [data_1d_or_2d, np.zeros(-delta)], axis=0
+            )
+        else:
+            data_1d_or_2d = np.concatenate(
+                [data_1d_or_2d, np.zeros((-delta, data_1d_or_2d.shape[1]))],
+                axis=0
+            )
+    return data_1d_or_2d
+
+
+def extract_logmel_spectrogram(
+        waveform: torch.Tensor, sample_rate: int,
+        output_path: Optional[Path] = None, win_length: int = 1024,
+        hop_length: int = 256, n_fft: int = 1024,
+        win_fn: callable = torch.hann_window, n_mels: int = 80,
+        f_min: float = 0., f_max: float = 8000, eps: float = 1e-5,
+        overwrite: bool = False, target_length: Optional[int] = None
+):
+    if output_path is not None and output_path.is_file() and not overwrite:
+        return
+
+    spectrogram_transform = TTSSpectrogram(
+        n_fft=n_fft, win_length=win_length, hop_length=hop_length,
+        window_fn=win_fn
+    )
+    mel_scale_transform = TTSMelScale(
+        n_mels=n_mels, sample_rate=sample_rate, f_min=f_min, f_max=f_max,
+        n_stft=n_fft // 2 + 1
+    )
+    spectrogram = spectrogram_transform(waveform)
+    mel_spec = mel_scale_transform(spectrogram)
+    logmel_spec = torch.clamp(mel_spec, min=eps).log()
+    assert len(logmel_spec.shape) == 3 and logmel_spec.shape[0] == 1
+    logmel_spec = logmel_spec.squeeze().t()  # D x T -> T x D
+    if target_length is not None:
+        trim_or_pad_to_target_length(logmel_spec, target_length)
+
+    if output_path is not None:
+        np.save(output_path.as_posix(), logmel_spec)
+    else:
+        return logmel_spec
+
+
+def extract_pitch(
+        waveform: torch.Tensor, sample_rate: int,
+        output_path: Optional[Path] = None, hop_length: int = 256,
+        log_scale: bool = True, phoneme_durations: Optional[List[int]] = None
+):
+    if output_path is not None and output_path.is_file():
+        return
+
+    try:
+        import pyworld
+    except ImportError:
+        raise ImportError("Please install PyWORLD: pip install pyworld")
+
+    _waveform = waveform.squeeze(0).double().numpy()
+    pitch, t = pyworld.dio(
+        _waveform, sample_rate, frame_period=hop_length / sample_rate * 1000
+    )
+    pitch = pyworld.stonemask(_waveform, pitch, t, sample_rate)
+
+    if phoneme_durations is not None:
+        pitch = trim_or_pad_to_target_length(pitch, sum(phoneme_durations))
+        try:
+            from scipy.interpolate import interp1d
+        except ImportError:
+            raise ImportError("Please install SciPy: pip install scipy")
+        nonzero_ids = np.where(pitch != 0)[0]
+        interp_fn = interp1d(
+            nonzero_ids,
+            pitch[nonzero_ids],
+            fill_value=(pitch[nonzero_ids[0]], pitch[nonzero_ids[-1]]),
+            bounds_error=False,
+        )
+        pitch = interp_fn(np.arange(0, len(pitch)))
+        d_cumsum = np.cumsum(np.concatenate([np.array([0]), phoneme_durations]))
+        pitch = np.array(
+            [
+                np.mean(pitch[d_cumsum[i-1]: d_cumsum[i]])
+                for i in range(1, len(d_cumsum))
+            ]
+        )
+        assert len(pitch) == len(phoneme_durations)
+
+    if log_scale:
+        pitch = np.log(pitch + 1)
+
+    if output_path is not None:
+        np.save(output_path.as_posix(), pitch)
+    else:
+        return pitch
+
+
+def extract_energy(
+        waveform: torch.Tensor, output_path: Optional[Path] = None,
+        hop_length: int = 256, n_fft: int = 1024, log_scale: bool = True,
+        phoneme_durations: Optional[List[int]] = None
+):
+    if output_path is not None and output_path.is_file():
+        return
+
+    assert len(waveform.shape) == 2 and waveform.shape[0] == 1
+    waveform = waveform.view(1, 1, waveform.shape[1])
+    waveform = F.pad(
+        waveform.unsqueeze(1), [n_fft // 2, n_fft // 2, 0, 0],
+        mode="reflect"
+    )
+    waveform = waveform.squeeze(1)
+
+    fourier_basis = np.fft.fft(np.eye(n_fft))
+    cutoff = int((n_fft / 2 + 1))
+    fourier_basis = np.vstack(
+        [np.real(fourier_basis[:cutoff, :]),
+         np.imag(fourier_basis[:cutoff, :])]
+    )
+
+    forward_basis = torch.FloatTensor(fourier_basis[:, None, :])
+    forward_transform = F.conv1d(
+        waveform, forward_basis, stride=hop_length, padding=0
+    )
+
+    real_part = forward_transform[:, :cutoff, :]
+    imag_part = forward_transform[:, cutoff:, :]
+    magnitude = torch.sqrt(real_part ** 2 + imag_part ** 2)
+    energy = torch.norm(magnitude, dim=1).squeeze(0).numpy()
+
+    if phoneme_durations is not None:
+        energy = trim_or_pad_to_target_length(energy, sum(phoneme_durations))
+        d_cumsum = np.cumsum(np.concatenate([np.array([0]), phoneme_durations]))
+        energy = np.array(
+            [
+                np.mean(energy[d_cumsum[i - 1]: d_cumsum[i]])
+                for i in range(1, len(d_cumsum))
+            ]
+        )
+        assert len(energy) == len(phoneme_durations)
+
+    if log_scale:
+        energy = np.log(energy + 1)
+
+    if output_path is not None:
+        np.save(output_path.as_posix(), energy)
+    else:
+        return energy
+
+
+def get_global_cmvn(feature_root: Path, output_path: Optional[Path] = None):
+    mean_x, mean_x2, n_frames = None, None, 0
+    feature_paths = feature_root.glob("*.npy")
+    for p in tqdm(feature_paths):
+        with open(p, 'rb') as f:
+            frames = np.load(f).squeeze()
+
+        n_frames += frames.shape[0]
+
+        cur_mean_x = frames.sum(axis=0)
+        if mean_x is None:
+            mean_x = cur_mean_x
+        else:
+            mean_x += cur_mean_x
+
+        cur_mean_x2 = (frames ** 2).sum(axis=0)
+        if mean_x2 is None:
+            mean_x2 = cur_mean_x2
+        else:
+            mean_x2 += cur_mean_x2
+
+    mean_x /= n_frames
+    mean_x2 /= n_frames
+    var_x = mean_x2 - mean_x ** 2
+    std_x = np.sqrt(np.maximum(var_x, 1e-10))
+
+    if output_path is not None:
+        with open(output_path, 'wb') as f:
+            np.savez(f, mean=mean_x, std=std_x)
+    else:
+        return {"mean": mean_x, "std": std_x}
+
+
+def ipa_phonemize(text, lang="en-us", use_g2p=False):
+    if use_g2p:
+        assert lang == "en-us", "g2pE phonemizer only works for en-us"
+        try:
+            from g2p_en import G2p
+            g2p = G2p()
+            return " ".join("|" if p == " " else p for p in g2p(text))
+        except ImportError:
+            raise ImportError(
+                "Please install phonemizer: pip install g2p_en"
+            )
+    else:
+        try:
+            from phonemizer import phonemize
+            from phonemizer.separator import Separator
+            return phonemize(
+                text, backend='espeak', language=lang,
+                separator=Separator(word="| ", phone=" ")
+            )
+        except ImportError:
+            raise ImportError(
+                "Please install phonemizer: pip install phonemizer"
+            )
+
+
+@dataclass
+class ForceAlignmentInfo(object):
+    tokens: List[str]
+    frame_durations: List[int]
+    start_sec: Optional[float]
+    end_sec: Optional[float]
+
+
+def get_mfa_alignment_by_sample_id(
+        textgrid_zip_path: str, sample_id: str, sample_rate: int,
+        hop_length: int, silence_phones: List[str] = ("sil", "sp", "spn")
+) -> ForceAlignmentInfo:
+    try:
+        import tgt
+    except ImportError:
+        raise ImportError("Please install TextGridTools: pip install tgt")
+
+    filename = f"{sample_id}.TextGrid"
+    out_root = Path(tempfile.gettempdir())
+    tgt_path = out_root / filename
+    with zipfile.ZipFile(textgrid_zip_path) as f_zip:
+        f_zip.extract(filename, path=out_root)
+    textgrid = tgt.io.read_textgrid(tgt_path.as_posix())
+    os.remove(tgt_path)
+
+    phones, frame_durations = [], []
+    start_sec, end_sec, end_idx = 0, 0, 0
+    for t in textgrid.get_tier_by_name("phones")._objects:
+        s, e, p = t.start_time, t.end_time, t.text
+        # Trim leading silences
+        if len(phones) == 0:
+            if p in silence_phones:
+                continue
+            else:
+                start_sec = s
+        phones.append(p)
+        if p not in silence_phones:
+            end_sec = e
+            end_idx = len(phones)
+        r = sample_rate / hop_length
+        frame_durations.append(int(np.round(e * r) - np.round(s * r)))
+    # Trim tailing silences
+    phones = phones[:end_idx]
+    frame_durations = frame_durations[:end_idx]
+
+    return ForceAlignmentInfo(
+        tokens=phones, frame_durations=frame_durations, start_sec=start_sec,
+        end_sec=end_sec
+    )
+
+
+def get_mfa_alignment(
+        textgrid_zip_path: str, sample_ids: List[str], sample_rate: int,
+        hop_length: int
+) -> Dict[str, ForceAlignmentInfo]:
+    return {
+        i: get_mfa_alignment_by_sample_id(
+            textgrid_zip_path, i, sample_rate, hop_length
+        ) for i in tqdm(sample_ids)
+    }
+
+
+def get_unit_alignment(
+        id_to_unit_tsv_path: str, sample_ids: List[str]
+) -> Dict[str, ForceAlignmentInfo]:
+    id_to_units = {
+        e["id"]: e["units"] for e in load_tsv_to_dicts(id_to_unit_tsv_path)
+    }
+    id_to_units = {i: id_to_units[i].split() for i in sample_ids}
+    id_to_units_collapsed = {
+        i: [uu for uu, _ in groupby(u)] for i, u in id_to_units.items()
+    }
+    id_to_durations = {
+        i: [len(list(g)) for _, g in groupby(u)] for i, u in id_to_units.items()
+    }
+
+    return {
+        i: ForceAlignmentInfo(
+            tokens=id_to_units_collapsed[i], frame_durations=id_to_durations[i],
+            start_sec=None, end_sec=None
+        )
+        for i in sample_ids
+    }
diff --git a/fairseq/examples/speech_synthesis/docs/common_voice_example.md b/fairseq/examples/speech_synthesis/docs/common_voice_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..40e841b284a7e34b458b286eb0bb60e33c0601da
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/docs/common_voice_example.md
@@ -0,0 +1,56 @@
+[[Back]](..)
+
+# Common Voice
+
+[Common Voice](https://commonvoice.mozilla.org/en/datasets) is a public domain speech corpus with 11.2K hours of read
+speech in 76 languages (the latest version 7.0). We provide examples for building
+[Transformer](https://arxiv.org/abs/1809.08895) models on this dataset.
+
+
+## Data preparation
+[Download](https://commonvoice.mozilla.org/en/datasets) and unpack Common Voice v4 to a path `${DATA_ROOT}/${LANG_ID}`.
+Create splits and generate audio manifests with
+```bash
+python -m examples.speech_synthesis.preprocessing.get_common_voice_audio_manifest \
+  --data-root ${DATA_ROOT} \
+  --lang ${LANG_ID} \
+  --output-manifest-root ${AUDIO_MANIFEST_ROOT} --convert-to-wav
+```
+
+Then, extract log-Mel spectrograms, generate feature manifest and create data configuration YAML with
+```bash
+python -m examples.speech_synthesis.preprocessing.get_feature_manifest \
+  --audio-manifest-root ${AUDIO_MANIFEST_ROOT} \
+  --output-root ${FEATURE_MANIFEST_ROOT} \
+  --ipa-vocab --lang ${LANG_ID}
+```
+where we use phoneme inputs (`--ipa-vocab`) as example.
+
+To denoise audio and trim leading/trailing silence using signal processing based VAD, run
+```bash
+for SPLIT in dev test train; do
+    python -m examples.speech_synthesis.preprocessing.denoise_and_vad_audio \
+      --audio-manifest ${AUDIO_MANIFEST_ROOT}/${SPLIT}.audio.tsv \
+      --output-dir ${PROCESSED_DATA_ROOT} \
+      --denoise --vad --vad-agg-level 2
+done
+```
+
+
+## Training
+(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#transformer).)
+
+
+## Inference
+(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#inference).)
+
+## Automatic Evaluation
+(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#automatic-evaluation).)
+
+## Results
+
+| Language | Speakers | --arch | Params | Test MCD | Model |
+|---|---|---|---|---|---|
+| English | 200 | tts_transformer | 54M | 3.8 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/cv4_en200_transformer_phn.tar) |
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_synthesis/docs/ljspeech_example.md b/fairseq/examples/speech_synthesis/docs/ljspeech_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..90c524fac8ffdc1819ec9bb36928500320337603
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/docs/ljspeech_example.md
@@ -0,0 +1,138 @@
+[[Back]](..)
+
+# LJSpeech
+
+[LJSpeech](https://keithito.com/LJ-Speech-Dataset) is a public domain TTS
+corpus with around 24 hours of English speech sampled at 22.05kHz. We provide examples for building
+[Transformer](https://arxiv.org/abs/1809.08895) and [FastSpeech 2](https://arxiv.org/abs/2006.04558)
+models on this dataset.
+
+
+## Data preparation
+
+Download data, create splits and generate audio manifests with
+```bash
+python -m examples.speech_synthesis.preprocessing.get_ljspeech_audio_manifest \
+  --output-data-root ${AUDIO_DATA_ROOT} \
+  --output-manifest-root ${AUDIO_MANIFEST_ROOT}
+```
+
+Then, extract log-Mel spectrograms, generate feature manifest and create data configuration YAML with
+```bash
+python -m examples.speech_synthesis.preprocessing.get_feature_manifest \
+  --audio-manifest-root ${AUDIO_MANIFEST_ROOT} \
+  --output-root ${FEATURE_MANIFEST_ROOT} \
+  --ipa-vocab --use-g2p
+```
+where we use phoneme inputs (`--ipa-vocab --use-g2p`) as example.
+
+FastSpeech 2 additionally requires frame durations, pitch and energy as auxiliary training targets.
+Add `--add-fastspeech-targets` to include these fields in the feature manifests. We get frame durations either from
+phoneme-level force-alignment or frame-level pseudo-text unit sequence. They should be pre-computed and specified via:
+- `--textgrid-zip ${TEXT_GRID_ZIP_PATH}` for a ZIP file, inside which there is one
+  [TextGrid](https://www.fon.hum.uva.nl/praat/manual/TextGrid.html) file per sample to provide force-alignment info.
+- `--id-to-units-tsv ${ID_TO_UNIT_TSV}` for a TSV file, where there are 2 columns for sample ID and
+  space-delimited pseudo-text unit sequence, respectively.
+
+For your convenience, we provide pre-computed
+[force-alignment](https://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_mfa.zip) from
+[Montreal Forced Aligner](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) and
+[pseudo-text units](s3://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_hubert.tsv) from
+[HuBERT](https://github.com/pytorch/fairseq/tree/main/examples/hubert). You can also generate them by yourself using
+a different software or model.
+
+
+## Training
+#### Transformer
+```bash
+fairseq-train ${FEATURE_MANIFEST_ROOT} --save-dir ${SAVE_DIR} \
+  --config-yaml config.yaml --train-subset train --valid-subset dev \
+  --num-workers 4 --max-tokens 30000 --max-update 200000 \
+  --task text_to_speech --criterion tacotron2 --arch tts_transformer \
+  --clip-norm 5.0 --n-frames-per-step 4 --bce-pos-weight 5.0 \
+  --dropout 0.1 --attention-dropout 0.1 --activation-dropout 0.1 \
+  --encoder-normalize-before --decoder-normalize-before \
+  --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+  --seed 1 --update-freq 8 --eval-inference --best-checkpoint-metric mcd_loss
+```
+where `SAVE_DIR` is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to
+update it accordingly when using more than 1 GPU.
+
+#### FastSpeech2
+```bash
+fairseq-train ${FEATURE_MANIFEST_ROOT} --save-dir ${SAVE_DIR} \
+  --config-yaml config.yaml --train-subset train --valid-subset dev \
+  --num-workers 4 --max-sentences 6 --max-update 200000 \
+  --task text_to_speech --criterion fastspeech2 --arch fastspeech2 \
+  --clip-norm 5.0 --n-frames-per-step 1 \
+  --dropout 0.1 --attention-dropout 0.1 --activation-dropout 0.1 \
+  --encoder-normalize-before --decoder-normalize-before \
+  --optimizer adam --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+  --seed 1 --update-freq 8 --eval-inference --best-checkpoint-metric mcd_loss
+```
+
+
+## Inference
+Average the last 5 checkpoints, generate the test split spectrogram and waveform using the default Griffin-Lim vocoder:
+```bash
+SPLIT=test
+CHECKPOINT_NAME=avg_last_5
+CHECKPOINT_PATH=${SAVE_DIR}/checkpoint_${CHECKPOINT_NAME}.pt
+python scripts/average_checkpoints.py --inputs ${SAVE_DIR} \
+  --num-epoch-checkpoints 5 \
+  --output ${CHECKPOINT_PATH}
+
+python -m examples.speech_synthesis.generate_waveform ${FEATURE_MANIFEST_ROOT} \
+  --config-yaml config.yaml --gen-subset ${SPLIT} --task text_to_speech \
+  --path ${CHECKPOINT_PATH} --max-tokens 50000 --spec-bwd-max-iter 32 \
+  --dump-waveforms
+```
+which dumps files (waveform, feature, attention plot, etc.) to `${SAVE_DIR}/generate-${CHECKPOINT_NAME}-${SPLIT}`. To
+re-synthesize target waveforms for automatic evaluation, add `--dump-target`.
+
+## Automatic Evaluation
+To start with, generate the manifest for synthetic speech, which will be taken as inputs by evaluation scripts.
+```bash
+python -m examples.speech_synthesis.evaluation.get_eval_manifest \
+  --generation-root ${SAVE_DIR}/generate-${CHECKPOINT_NAME}-${SPLIT} \
+  --audio-manifest ${AUDIO_MANIFEST_ROOT}/${SPLIT}.audio.tsv \
+  --output-path ${EVAL_OUTPUT_ROOT}/eval.tsv \
+  --vocoder griffin_lim --sample-rate 22050 --audio-format flac \
+  --use-resynthesized-target
+```
+Speech recognition (ASR) models usually operate at lower sample rates (e.g. 16kHz). For the WER/CER metric,
+you may need to resample the audios accordingly --- add `--output-sample-rate 16000` for `generate_waveform.py` and
+use `--sample-rate 16000` for `get_eval_manifest.py`.
+
+
+#### WER/CER metric
+We use wav2vec 2.0 ASR model as example. [Download](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec)
+the model checkpoint and dictionary, then compute WER/CER with
+```bash
+python -m examples.speech_synthesis.evaluation.eval_asr \
+  --audio-header syn --text-header text --err-unit char --split ${SPLIT} \
+  --w2v-ckpt ${WAV2VEC2_CHECKPOINT_PATH} --w2v-dict-dir ${WAV2VEC2_DICT_DIR} \
+  --raw-manifest ${EVAL_OUTPUT_ROOT}/eval_16khz.tsv --asr-dir ${EVAL_OUTPUT_ROOT}/asr
+```
+
+#### MCD/MSD metric
+```bash
+python -m examples.speech_synthesis.evaluation.eval_sp \
+  ${EVAL_OUTPUT_ROOT}/eval.tsv --mcd --msd
+```
+
+#### F0 metrics
+```bash
+python -m examples.speech_synthesis.evaluation.eval_f0 \
+  ${EVAL_OUTPUT_ROOT}/eval.tsv --gpe --vde --ffe
+```
+
+
+## Results
+
+| --arch | Params | Test MCD | Model |
+|---|---|---|---|
+| tts_transformer | 54M | 3.8 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_transformer_phn.tar) |
+| fastspeech2 | 41M | 3.8 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/ljspeech_fastspeech2_phn.tar) |
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_synthesis/docs/vctk_example.md b/fairseq/examples/speech_synthesis/docs/vctk_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..2ba78f3f73d6ea30f9de89150fbbc9dd5923b6fa
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/docs/vctk_example.md
@@ -0,0 +1,51 @@
+[[Back]](..)
+
+# VCTK
+
+[VCTK](https://datashare.ed.ac.uk/handle/10283/3443) is an open English speech corpus. We provide examples
+for building [Transformer](https://arxiv.org/abs/1809.08895) models on this dataset.
+
+
+## Data preparation
+Download data, create splits and generate audio manifests with
+```bash
+python -m examples.speech_synthesis.preprocessing.get_vctk_audio_manifest \
+  --output-data-root ${AUDIO_DATA_ROOT} \
+  --output-manifest-root ${AUDIO_MANIFEST_ROOT}
+```
+
+Then, extract log-Mel spectrograms, generate feature manifest and create data configuration YAML with
+```bash
+python -m examples.speech_synthesis.preprocessing.get_feature_manifest \
+  --audio-manifest-root ${AUDIO_MANIFEST_ROOT} \
+  --output-root ${FEATURE_MANIFEST_ROOT} \
+  --ipa-vocab --use-g2p
+```
+where we use phoneme inputs (`--ipa-vocab --use-g2p`) as example.
+
+To denoise audio and trim leading/trailing silence using signal processing based VAD, run
+```bash
+for SPLIT in dev test train; do
+    python -m examples.speech_synthesis.preprocessing.denoise_and_vad_audio \
+      --audio-manifest ${AUDIO_MANIFEST_ROOT}/${SPLIT}.audio.tsv \
+      --output-dir ${PROCESSED_DATA_ROOT} \
+      --denoise --vad --vad-agg-level 3
+done
+```
+
+## Training
+(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#transformer).)
+
+## Inference
+(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#inference).)
+
+## Automatic Evaluation
+(Please refer to [the LJSpeech example](../docs/ljspeech_example.md#automatic-evaluation).)
+
+## Results
+
+| --arch | Params | Test MCD | Model |
+|---|---|---|---|
+| tts_transformer | 54M | 3.4 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2/vctk_transformer_phn.tar) |
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_synthesis/evaluation/__init__.py b/fairseq/examples/speech_synthesis/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6264236915a7269a4d920ee8213004374dd86a9a
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/evaluation/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/examples/speech_synthesis/evaluation/eval_asr.py b/fairseq/examples/speech_synthesis/evaluation/eval_asr.py
new file mode 100644
index 0000000000000000000000000000000000000000..005a11bfb34ca477ad9e133acd60f249e66cda47
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/evaluation/eval_asr.py
@@ -0,0 +1,128 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import editdistance
+import re
+import shutil
+import soundfile as sf
+import subprocess
+from pathlib import Path
+
+from examples.speech_to_text.data_utils import load_tsv_to_dicts
+
+
+def preprocess_text(text):
+    text = "|".join(re.sub(r"[^A-Z' ]", " ", text.upper()).split())
+    text = " ".join(text)
+    return text
+
+
+def prepare_w2v_data(
+        dict_dir, sample_rate, label, audio_paths, texts, split, data_dir
+):
+    data_dir.mkdir(parents=True, exist_ok=True)
+    shutil.copyfile(
+        dict_dir / f"dict.{label}.txt",
+        data_dir / f"dict.{label}.txt"
+    )
+    with open(data_dir / f"{split}.tsv", "w") as f:
+        f.write("/\n")
+        for audio_path in audio_paths:
+            wav, sr = sf.read(audio_path)
+            assert sr == sample_rate, f"{sr} != sample_rate"
+            nsample = len(wav)
+            f.write(f"{audio_path}\t{nsample}\n")
+    with open(data_dir / f"{split}.{label}", "w") as f:
+        for text in texts:
+            text = preprocess_text(text)
+            f.write(f"{text}\n")
+
+
+def run_asr(asr_dir, split, w2v_ckpt, w2v_label, res_dir):
+    """
+    results will be saved at
+    {res_dir}/{ref,hypo}.word-{w2v_ckpt.filename}-{split}.txt
+    """
+    cmd = ["python", "-m", "examples.speech_recognition.infer"]
+    cmd += [str(asr_dir.resolve())]
+    cmd += ["--task", "audio_finetuning", "--nbest", "1", "--quiet"]
+    cmd += ["--w2l-decoder", "viterbi", "--criterion", "ctc"]
+    cmd += ["--post-process", "letter", "--max-tokens", "4000000"]
+    cmd += ["--path", str(w2v_ckpt.resolve()), "--labels", w2v_label]
+    cmd += ["--gen-subset", split, "--results-path", str(res_dir.resolve())]
+
+    print(f"running cmd:\n{' '.join(cmd)}")
+    subprocess.run(cmd, check=True)
+
+
+def compute_error_rate(hyp_wrd_path, ref_wrd_path, unit="word"):
+    """each line is "<text> (None-<index>)" """
+    tokenize_line = {
+        "word": lambda x: re.sub(r" \(.*\)$", "", x.rstrip()).split(),
+        "char": lambda x: list(re.sub(r" \(.*\)$", "", x.rstrip()))
+    }.get(unit)
+    if tokenize_line is None:
+        raise ValueError(f"{unit} not supported")
+
+    inds = [int(re.sub(r"\D*(\d*)\D*", r"\1", line))
+            for line in open(hyp_wrd_path)]
+    hyps = [tokenize_line(line) for line in open(hyp_wrd_path)]
+    refs = [tokenize_line(line) for line in open(ref_wrd_path)]
+    assert(len(hyps) == len(refs))
+    err_rates = [
+        editdistance.eval(hyp, ref) / len(ref) for hyp, ref in zip(hyps, refs)
+    ]
+    ind_to_err_rates = {i: e for i, e in zip(inds, err_rates)}
+    return ind_to_err_rates
+
+
+def main(args):
+    samples = load_tsv_to_dicts(args.raw_manifest)
+    ids = [
+        sample[args.id_header] if args.id_header else "" for sample in samples
+    ]
+    audio_paths = [sample[args.audio_header] for sample in samples]
+    texts = [sample[args.text_header] for sample in samples]
+
+    prepare_w2v_data(
+        args.w2v_dict_dir,
+        args.w2v_sample_rate,
+        args.w2v_label,
+        audio_paths,
+        texts,
+        args.split,
+        args.asr_dir
+    )
+    run_asr(args.asr_dir, args.split, args.w2v_ckpt, args.w2v_label, args.asr_dir)
+    ind_to_err_rates = compute_error_rate(
+        args.asr_dir / f"hypo.word-{args.w2v_ckpt.name}-{args.split}.txt",
+        args.asr_dir / f"ref.word-{args.w2v_ckpt.name}-{args.split}.txt",
+        args.err_unit,
+    )
+
+    uer_path = args.asr_dir / f"uer_{args.err_unit}.{args.split}.tsv"
+    with open(uer_path, "w") as f:
+        f.write("id\taudio\tuer\n")
+        for ind, (id_, audio_path) in enumerate(zip(ids, audio_paths)):
+            f.write(f"{id_}\t{audio_path}\t{ind_to_err_rates[ind]:.4f}\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--raw-manifest", required=True, type=Path)
+    parser.add_argument("--asr-dir", required=True, type=Path)
+    parser.add_argument("--id-header", default="id", type=str)
+    parser.add_argument("--audio-header", default="audio", type=str)
+    parser.add_argument("--text-header", default="src_text", type=str)
+    parser.add_argument("--split", default="raw", type=str)
+    parser.add_argument("--w2v-ckpt", required=True, type=Path)
+    parser.add_argument("--w2v-dict-dir", required=True, type=Path)
+    parser.add_argument("--w2v-sample-rate", default=16000, type=int)
+    parser.add_argument("--w2v-label", default="ltr", type=str)
+    parser.add_argument("--err-unit", default="word", type=str)
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/speech_synthesis/evaluation/eval_f0.py b/fairseq/examples/speech_synthesis/evaluation/eval_f0.py
new file mode 100644
index 0000000000000000000000000000000000000000..df721d683113b44957149cfc3cddaba36520a22c
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/evaluation/eval_f0.py
@@ -0,0 +1,266 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Signal processing-based evaluation using waveforms
+"""
+import numpy as np
+import os.path as op
+
+import torchaudio
+import tqdm
+from tabulate import tabulate
+
+from examples.speech_synthesis.utils import (
+    gross_pitch_error, voicing_decision_error, f0_frame_error
+)
+from examples.speech_synthesis.evaluation.eval_sp import load_eval_spec
+
+
+def difference_function(x, n, tau_max):
+    """
+    Compute difference function of data x. This solution is implemented directly
+    with Numpy fft.
+
+
+    :param x: audio data
+    :param n: length of data
+    :param tau_max: integration window size
+    :return: difference function
+    :rtype: list
+    """
+
+    x = np.array(x, np.float64)
+    w = x.size
+    tau_max = min(tau_max, w)
+    x_cumsum = np.concatenate((np.array([0.]), (x * x).cumsum()))
+    size = w + tau_max
+    p2 = (size // 32).bit_length()
+    nice_numbers = (16, 18, 20, 24, 25, 27, 30, 32)
+    size_pad = min(x * 2 ** p2 for x in nice_numbers if x * 2 ** p2 >= size)
+    fc = np.fft.rfft(x, size_pad)
+    conv = np.fft.irfft(fc * fc.conjugate())[:tau_max]
+    return x_cumsum[w:w - tau_max:-1] + x_cumsum[w] - x_cumsum[:tau_max] - \
+        2 * conv
+
+
+def cumulative_mean_normalized_difference_function(df, n):
+    """
+    Compute cumulative mean normalized difference function (CMND).
+
+    :param df: Difference function
+    :param n: length of data
+    :return: cumulative mean normalized difference function
+    :rtype: list
+    """
+
+    # scipy method
+    cmn_df = df[1:] * range(1, n) / np.cumsum(df[1:]).astype(float)
+    return np.insert(cmn_df, 0, 1)
+
+
+def get_pitch(cmdf, tau_min, tau_max, harmo_th=0.1):
+    """
+    Return fundamental period of a frame based on CMND function.
+
+    :param cmdf: Cumulative Mean Normalized Difference function
+    :param tau_min: minimum period for speech
+    :param tau_max: maximum period for speech
+    :param harmo_th: harmonicity threshold to determine if it is necessary to
+    compute pitch frequency
+    :return: fundamental period if there is values under threshold, 0 otherwise
+    :rtype: float
+    """
+    tau = tau_min
+    while tau < tau_max:
+        if cmdf[tau] < harmo_th:
+            while tau + 1 < tau_max and cmdf[tau + 1] < cmdf[tau]:
+                tau += 1
+            return tau
+        tau += 1
+
+    return 0    # if unvoiced
+
+
+def compute_yin(sig, sr, w_len=512, w_step=256, f0_min=100, f0_max=500,
+                harmo_thresh=0.1):
+    """
+
+    Compute the Yin Algorithm. Return fundamental frequency and harmonic rate.
+
+    https://github.com/NVIDIA/mellotron adaption of
+    https://github.com/patriceguyot/Yin
+
+    :param sig: Audio signal (list of float)
+    :param sr: sampling rate (int)
+    :param w_len: size of the analysis window (samples)
+    :param w_step: size of the lag between two consecutives windows (samples)
+    :param f0_min: Minimum fundamental frequency that can be detected (hertz)
+    :param f0_max: Maximum fundamental frequency that can be detected (hertz)
+    :param harmo_thresh: Threshold of detection. The yalgorithmù return the
+    first minimum of the CMND function below this threshold.
+
+    :returns:
+
+        * pitches: list of fundamental frequencies,
+        * harmonic_rates: list of harmonic rate values for each fundamental
+        frequency value (= confidence value)
+        * argmins: minimums of the Cumulative Mean Normalized DifferenceFunction
+        * times: list of time of each estimation
+    :rtype: tuple
+    """
+
+    tau_min = int(sr / f0_max)
+    tau_max = int(sr / f0_min)
+
+    # time values for each analysis window
+    time_scale = range(0, len(sig) - w_len, w_step)
+    times = [t/float(sr) for t in time_scale]
+    frames = [sig[t:t + w_len] for t in time_scale]
+
+    pitches = [0.0] * len(time_scale)
+    harmonic_rates = [0.0] * len(time_scale)
+    argmins = [0.0] * len(time_scale)
+
+    for i, frame in enumerate(frames):
+        # Compute YIN
+        df = difference_function(frame, w_len, tau_max)
+        cm_df = cumulative_mean_normalized_difference_function(df, tau_max)
+        p = get_pitch(cm_df, tau_min, tau_max, harmo_thresh)
+
+        # Get results
+        if np.argmin(cm_df) > tau_min:
+            argmins[i] = float(sr / np.argmin(cm_df))
+        if p != 0:  # A pitch was found
+            pitches[i] = float(sr / p)
+            harmonic_rates[i] = cm_df[p]
+        else:  # No pitch, but we compute a value of the harmonic rate
+            harmonic_rates[i] = min(cm_df)
+
+    return pitches, harmonic_rates, argmins, times
+
+
+def extract_f0(samples):
+    f0_samples = []
+    for sample in tqdm.tqdm(samples):
+        if not op.isfile(sample["ref"]) or not op.isfile(sample["syn"]):
+            f0_samples.append(None)
+            continue
+
+        # assume single channel
+        yref, sr = torchaudio.load(sample["ref"])
+        ysyn, _sr = torchaudio.load(sample["syn"])
+        yref, ysyn = yref[0], ysyn[0]
+        assert sr == _sr, f"{sr} != {_sr}"
+
+        yref_f0 = compute_yin(yref, sr)
+        ysyn_f0 = compute_yin(ysyn, sr)
+
+        f0_samples += [
+            {
+                "ref": yref_f0,
+                "syn": ysyn_f0
+            }
+        ]
+
+    return f0_samples
+
+
+def eval_f0_error(samples, distortion_fn):
+    results = []
+    for sample in tqdm.tqdm(samples):
+        if sample is None:
+            results.append(None)
+            continue
+        # assume single channel
+        yref_f, _, _, yref_t = sample["ref"]
+        ysyn_f, _, _, ysyn_t = sample["syn"]
+
+        yref_f = np.array(yref_f)
+        yref_t = np.array(yref_t)
+        ysyn_f = np.array(ysyn_f)
+        ysyn_t = np.array(ysyn_t)
+
+        distortion = distortion_fn(yref_t, yref_f, ysyn_t, ysyn_f)
+        results.append((distortion.item(),
+                        len(yref_f),
+                        len(ysyn_f)
+                        ))
+    return results
+
+
+def eval_gross_pitch_error(samples):
+    return eval_f0_error(samples, gross_pitch_error)
+
+
+def eval_voicing_decision_error(samples):
+    return eval_f0_error(samples, voicing_decision_error)
+
+
+def eval_f0_frame_error(samples):
+    return eval_f0_error(samples, f0_frame_error)
+
+
+def print_results(results, show_bin):
+    results = np.array(list(filter(lambda x: x is not None, results)))
+
+    np.set_printoptions(precision=3)
+
+    def _print_result(results):
+        res = {
+            "nutt": len(results),
+            "error": results[:, 0].mean(),
+            "std": results[:, 0].std(),
+            "dur_ref": int(results[:, 1].sum()),
+            "dur_syn": int(results[:, 2].sum()),
+        }
+        print(tabulate([res.values()], res.keys(), floatfmt=".4f"))
+
+    print(">>>> ALL")
+    _print_result(results)
+
+    if show_bin:
+        edges = [0, 200, 400, 600, 800, 1000, 2000, 4000]
+        for i in range(1, len(edges)):
+            mask = np.logical_and(results[:, 1] >= edges[i-1],
+                                  results[:, 1] < edges[i])
+            if not mask.any():
+                continue
+            bin_results = results[mask]
+            print(f">>>> ({edges[i-1]}, {edges[i]})")
+            _print_result(bin_results)
+
+
+def main(eval_f0, gpe, vde, ffe, show_bin):
+    samples = load_eval_spec(eval_f0)
+    if gpe or vde or ffe:
+        f0_samples = extract_f0(samples)
+
+    if gpe:
+        print("===== Evaluate Gross Pitch Error =====")
+        results = eval_gross_pitch_error(f0_samples)
+        print_results(results, show_bin)
+    if vde:
+        print("===== Evaluate Voicing Decision Error =====")
+        results = eval_voicing_decision_error(f0_samples)
+        print_results(results, show_bin)
+    if ffe:
+        print("===== Evaluate F0 Frame Error =====")
+        results = eval_f0_frame_error(f0_samples)
+        print_results(results, show_bin)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("eval_f0")
+    parser.add_argument("--gpe", action="store_true")
+    parser.add_argument("--vde", action="store_true")
+    parser.add_argument("--ffe", action="store_true")
+    parser.add_argument("--show-bin", action="store_true")
+    args = parser.parse_args()
+
+    main(args.eval_f0, args.gpe, args.vde, args.ffe, args.show_bin)
diff --git a/fairseq/examples/speech_synthesis/evaluation/eval_sp.py b/fairseq/examples/speech_synthesis/evaluation/eval_sp.py
new file mode 100644
index 0000000000000000000000000000000000000000..702c4980389624f788abc0b42cdf54757a52512f
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/evaluation/eval_sp.py
@@ -0,0 +1,131 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+"""
+Signal processing-based evaluation using waveforms
+"""
+
+import csv
+import numpy as np
+import os.path as op
+
+import torch
+import tqdm
+from tabulate import tabulate
+import torchaudio
+
+from examples.speech_synthesis.utils import batch_mel_spectral_distortion
+from fairseq.tasks.text_to_speech import batch_mel_cepstral_distortion
+
+
+def load_eval_spec(path):
+    with open(path) as f:
+        reader = csv.DictReader(f, delimiter='\t')
+        samples = list(reader)
+    return samples
+
+
+def eval_distortion(samples, distortion_fn, device="cuda"):
+    nmiss = 0
+    results = []
+    for sample in tqdm.tqdm(samples):
+        if not op.isfile(sample["ref"]) or not op.isfile(sample["syn"]):
+            nmiss += 1
+            results.append(None)
+            continue
+        # assume single channel
+        yref, sr = torchaudio.load(sample["ref"])
+        ysyn, _sr = torchaudio.load(sample["syn"])
+        yref, ysyn = yref[0].to(device), ysyn[0].to(device)
+        assert sr == _sr, f"{sr} != {_sr}"
+
+        distortion, extra = distortion_fn([yref], [ysyn], sr, None)[0]
+        _, _, _, _, _, pathmap = extra
+        nins = torch.sum(pathmap.sum(dim=1) - 1)  # extra frames in syn
+        ndel = torch.sum(pathmap.sum(dim=0) - 1)  # missing frames from syn
+        results.append(
+            (distortion.item(),  # path distortion
+             pathmap.size(0),  # yref num frames
+             pathmap.size(1),  # ysyn num frames
+             pathmap.sum().item(),  # path length
+             nins.item(),  # insertion
+             ndel.item(),  # deletion
+             )
+        )
+    return results
+
+
+def eval_mel_cepstral_distortion(samples, device="cuda"):
+    return eval_distortion(samples, batch_mel_cepstral_distortion, device)
+
+
+def eval_mel_spectral_distortion(samples, device="cuda"):
+    return eval_distortion(samples, batch_mel_spectral_distortion, device)
+
+
+def print_results(results, show_bin):
+    results = np.array(list(filter(lambda x: x is not None, results)))
+
+    np.set_printoptions(precision=3)
+
+    def _print_result(results):
+        dist, dur_ref, dur_syn, dur_ali, nins, ndel = results.sum(axis=0)
+        res = {
+            "nutt": len(results),
+            "dist": dist,
+            "dur_ref": int(dur_ref),
+            "dur_syn": int(dur_syn),
+            "dur_ali": int(dur_ali),
+            "dist_per_ref_frm": dist/dur_ref,
+            "dist_per_syn_frm": dist/dur_syn,
+            "dist_per_ali_frm": dist/dur_ali,
+            "ins": nins/dur_ref,
+            "del": ndel/dur_ref,
+        }
+        print(tabulate(
+            [res.values()],
+            res.keys(),
+            floatfmt=".4f"
+        ))
+
+    print(">>>> ALL")
+    _print_result(results)
+
+    if show_bin:
+        edges = [0, 200, 400, 600, 800, 1000, 2000, 4000]
+        for i in range(1, len(edges)):
+            mask = np.logical_and(results[:, 1] >= edges[i-1],
+                                  results[:, 1] < edges[i])
+            if not mask.any():
+                continue
+            bin_results = results[mask]
+            print(f">>>> ({edges[i-1]}, {edges[i]})")
+            _print_result(bin_results)
+
+
+def main(eval_spec, mcd, msd, show_bin):
+    samples = load_eval_spec(eval_spec)
+    device = "cpu"
+    if mcd:
+        print("===== Evaluate Mean Cepstral Distortion =====")
+        results = eval_mel_cepstral_distortion(samples, device)
+        print_results(results, show_bin)
+    if msd:
+        print("===== Evaluate Mean Spectral Distortion =====")
+        results = eval_mel_spectral_distortion(samples, device)
+        print_results(results, show_bin)
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("eval_spec")
+    parser.add_argument("--mcd", action="store_true")
+    parser.add_argument("--msd", action="store_true")
+    parser.add_argument("--show-bin", action="store_true")
+    args = parser.parse_args()
+
+    main(args.eval_spec, args.mcd, args.msd, args.show_bin)
diff --git a/fairseq/examples/speech_synthesis/evaluation/get_eval_manifest.py b/fairseq/examples/speech_synthesis/evaluation/get_eval_manifest.py
new file mode 100644
index 0000000000000000000000000000000000000000..a28cd607a096844438f6a3ba6b007d94d67d1bc8
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/evaluation/get_eval_manifest.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import csv
+from pathlib import Path
+
+
+def main(args):
+    """
+    `uid syn ref text`
+    """
+    in_root = Path(args.generation_root).resolve()
+    ext = args.audio_format
+    with open(args.audio_manifest) as f, open(args.output_path, "w") as f_out:
+        reader = csv.DictReader(
+            f, delimiter="\t", quotechar=None, doublequote=False,
+            lineterminator="\n", quoting=csv.QUOTE_NONE
+        )
+        header = ["id", "syn", "ref", "text", "speaker"]
+        f_out.write("\t".join(header) + "\n")
+        for row in reader:
+            dir_name = f"{ext}_{args.sample_rate}hz_{args.vocoder}"
+            id_ = row["id"]
+            syn = (in_root / dir_name / f"{id_}.{ext}").as_posix()
+            ref = row["audio"]
+            if args.use_resynthesized_target:
+                ref = (in_root / f"{dir_name}_tgt" / f"{id_}.{ext}").as_posix()
+            sample = [id_, syn, ref, row["tgt_text"], row["speaker"]]
+            f_out.write("\t".join(sample) + "\n")
+    print(f"wrote evaluation file to {args.output_path}")
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--generation-root",  help="output directory for generate_waveform.py"
+    )
+    parser.add_argument(
+        "--audio-manifest",
+        help="used to determine the original utterance ID and text"
+    )
+    parser.add_argument(
+        "--output-path", help="path to output evaluation spec file"
+    )
+    parser.add_argument(
+        "--use-resynthesized-target", action="store_true",
+        help="use resynthesized reference instead of the original audio"
+    )
+    parser.add_argument("--vocoder", type=str, default="griffin_lim")
+    parser.add_argument("--sample-rate", type=int, default=22_050)
+    parser.add_argument("--audio-format", type=str, default="wav")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/speech_synthesis/generate_waveform.py b/fairseq/examples/speech_synthesis/generate_waveform.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfc2ef8eb3d91366caf7609d75aa1795ab0ed8f9
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/generate_waveform.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import logging
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+import soundfile as sf
+import sys
+import torch
+import torchaudio
+
+from fairseq import checkpoint_utils, options, tasks, utils
+from fairseq.logging import progress_bar
+from fairseq.tasks.text_to_speech import plot_tts_output
+from fairseq.data.audio.text_to_speech_dataset import TextToSpeechDataset
+
+
+logging.basicConfig()
+logging.root.setLevel(logging.INFO)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def make_parser():
+    parser = options.get_speech_generation_parser()
+    parser.add_argument("--dump-features", action="store_true")
+    parser.add_argument("--dump-waveforms", action="store_true")
+    parser.add_argument("--dump-attentions", action="store_true")
+    parser.add_argument("--dump-eos-probs", action="store_true")
+    parser.add_argument("--dump-plots", action="store_true")
+    parser.add_argument("--dump-target", action="store_true")
+    parser.add_argument("--output-sample-rate", default=22050, type=int)
+    parser.add_argument("--teacher-forcing", action="store_true")
+    parser.add_argument(
+        "--audio-format", type=str, default="wav", choices=["wav", "flac"]
+    )
+    return parser
+
+
+def postprocess_results(
+        dataset: TextToSpeechDataset, sample, hypos, resample_fn, dump_target
+):
+    def to_np(x):
+        return None if x is None else x.detach().cpu().numpy()
+
+    sample_ids = [dataset.ids[i] for i in sample["id"].tolist()]
+    texts = sample["src_texts"]
+    attns = [to_np(hypo["attn"]) for hypo in hypos]
+    eos_probs = [to_np(hypo.get("eos_prob", None)) for hypo in hypos]
+    feat_preds = [to_np(hypo["feature"]) for hypo in hypos]
+    wave_preds = [to_np(resample_fn(h["waveform"])) for h in hypos]
+    if dump_target:
+        feat_targs = [to_np(hypo["targ_feature"]) for hypo in hypos]
+        wave_targs = [to_np(resample_fn(h["targ_waveform"])) for h in hypos]
+    else:
+        feat_targs = [None for _ in hypos]
+        wave_targs = [None for _ in hypos]
+
+    return zip(sample_ids, texts, attns, eos_probs, feat_preds, wave_preds,
+               feat_targs, wave_targs)
+
+
+def dump_result(
+        is_na_model,
+        args,
+        vocoder,
+        sample_id,
+        text,
+        attn,
+        eos_prob,
+        feat_pred,
+        wave_pred,
+        feat_targ,
+        wave_targ,
+):
+    sample_rate = args.output_sample_rate
+    out_root = Path(args.results_path)
+    if args.dump_features:
+        feat_dir = out_root / "feat"
+        feat_dir.mkdir(exist_ok=True, parents=True)
+        np.save(feat_dir / f"{sample_id}.npy", feat_pred)
+        if args.dump_target:
+            feat_tgt_dir = out_root / "feat_tgt"
+            feat_tgt_dir.mkdir(exist_ok=True, parents=True)
+            np.save(feat_tgt_dir / f"{sample_id}.npy", feat_targ)
+    if args.dump_attentions:
+        attn_dir = out_root / "attn"
+        attn_dir.mkdir(exist_ok=True, parents=True)
+        np.save(attn_dir / f"{sample_id}.npy", attn.numpy())
+    if args.dump_eos_probs and not is_na_model:
+        eos_dir = out_root / "eos"
+        eos_dir.mkdir(exist_ok=True, parents=True)
+        np.save(eos_dir / f"{sample_id}.npy", eos_prob)
+
+    if args.dump_plots:
+        images = [feat_pred.T] if is_na_model else [feat_pred.T, attn]
+        names = ["output"] if is_na_model else ["output", "alignment"]
+        if feat_targ is not None:
+            images = [feat_targ.T] + images
+            names = [f"target (idx={sample_id})"] + names
+        if is_na_model:
+            plot_tts_output(images, names, attn, "alignment", suptitle=text)
+        else:
+            plot_tts_output(images, names, eos_prob, "eos prob", suptitle=text)
+        plot_dir = out_root / "plot"
+        plot_dir.mkdir(exist_ok=True, parents=True)
+        plt.savefig(plot_dir / f"{sample_id}.png")
+        plt.close()
+
+    if args.dump_waveforms:
+        ext = args.audio_format
+        if wave_pred is not None:
+            wav_dir = out_root / f"{ext}_{sample_rate}hz_{vocoder}"
+            wav_dir.mkdir(exist_ok=True, parents=True)
+            sf.write(wav_dir / f"{sample_id}.{ext}", wave_pred, sample_rate)
+        if args.dump_target and wave_targ is not None:
+            wav_tgt_dir = out_root / f"{ext}_{sample_rate}hz_{vocoder}_tgt"
+            wav_tgt_dir.mkdir(exist_ok=True, parents=True)
+            sf.write(wav_tgt_dir / f"{sample_id}.{ext}", wave_targ, sample_rate)
+
+
+def main(args):
+    assert(args.dump_features or args.dump_waveforms or args.dump_attentions
+           or args.dump_eos_probs or args.dump_plots)
+    if args.max_tokens is None and args.batch_size is None:
+        args.max_tokens = 8000
+    logger.info(args)
+
+    use_cuda = torch.cuda.is_available() and not args.cpu
+    task = tasks.setup_task(args)
+    models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+        [args.path],
+        task=task,
+    )
+    model = models[0].cuda() if use_cuda else models[0]
+    # use the original n_frames_per_step
+    task.args.n_frames_per_step = saved_cfg.task.n_frames_per_step
+    task.load_dataset(args.gen_subset, task_cfg=saved_cfg.task)
+
+    data_cfg = task.data_cfg
+    sample_rate = data_cfg.config.get("features", {}).get("sample_rate", 22050)
+    resample_fn = {
+        False: lambda x: x,
+        True: lambda x: torchaudio.sox_effects.apply_effects_tensor(
+            x.detach().cpu().unsqueeze(0), sample_rate,
+            [['rate', str(args.output_sample_rate)]]
+        )[0].squeeze(0)
+    }.get(args.output_sample_rate != sample_rate)
+    if args.output_sample_rate != sample_rate:
+        logger.info(f"resampling to {args.output_sample_rate}Hz")
+
+    generator = task.build_generator([model], args)
+    itr = task.get_batch_iterator(
+        dataset=task.dataset(args.gen_subset),
+        max_tokens=args.max_tokens,
+        max_sentences=args.batch_size,
+        max_positions=(sys.maxsize, sys.maxsize),
+        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=args.required_batch_size_multiple,
+        num_shards=args.num_shards,
+        shard_id=args.shard_id,
+        num_workers=args.num_workers,
+        data_buffer_size=args.data_buffer_size,
+    ).next_epoch_itr(shuffle=False)
+
+    Path(args.results_path).mkdir(exist_ok=True, parents=True)
+    is_na_model = getattr(model, "NON_AUTOREGRESSIVE", False)
+    dataset = task.dataset(args.gen_subset)
+    vocoder = task.args.vocoder
+    with progress_bar.build_progress_bar(args, itr) as t:
+        for sample in t:
+            sample = utils.move_to_cuda(sample) if use_cuda else sample
+            hypos = generator.generate(model, sample, has_targ=args.dump_target)
+            for result in postprocess_results(
+                    dataset, sample, hypos, resample_fn, args.dump_target
+            ):
+                dump_result(is_na_model, args, vocoder, *result)
+
+
+def cli_main():
+    parser = make_parser()
+    args = options.parse_args_and_arch(parser)
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6264236915a7269a4d920ee8213004374dd86a9a
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoise_and_vad_audio.py b/fairseq/examples/speech_synthesis/preprocessing/denoise_and_vad_audio.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e13b38a5d3fb44dd3969e6afcb8f202274ee3b7
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/denoise_and_vad_audio.py
@@ -0,0 +1,204 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+import csv
+import tempfile
+from collections import defaultdict
+from pathlib import Path
+
+import torchaudio
+try:
+    import webrtcvad
+except ImportError:
+    raise ImportError("Please install py-webrtcvad: pip install webrtcvad")
+import pandas as pd
+from tqdm import tqdm
+
+from examples.speech_synthesis.preprocessing.denoiser.pretrained import master64
+import examples.speech_synthesis.preprocessing.denoiser.utils as utils
+from examples.speech_synthesis.preprocessing.vad import (
+    frame_generator, vad_collector, read_wave, write_wave, FS_MS, THRESHOLD,
+    SCALE
+)
+from examples.speech_to_text.data_utils import save_df_to_tsv
+
+
+log = logging.getLogger(__name__)
+
+PATHS = ["after_denoise", "after_vad"]
+MIN_T = 0.05
+
+
+def generate_tmp_filename(extension="txt"):
+    return tempfile._get_default_tempdir() + "/" + \
+           next(tempfile._get_candidate_names()) + "." + extension
+
+
+def convert_sr(inpath, sr, output_path=None):
+    if not output_path:
+        output_path = generate_tmp_filename("wav")
+    cmd = f"sox {inpath} -r {sr} {output_path}"
+    os.system(cmd)
+    return output_path
+
+
+def apply_vad(vad, inpath):
+    audio, sample_rate = read_wave(inpath)
+    frames = frame_generator(FS_MS, audio, sample_rate)
+    frames = list(frames)
+    segments = vad_collector(sample_rate, FS_MS, 300, vad, frames)
+    merge_segments = list()
+    timestamp_start = 0.0
+    timestamp_end = 0.0
+    # removing start, end, and long sequences of sils
+    for i, segment in enumerate(segments):
+        merge_segments.append(segment[0])
+        if i and timestamp_start:
+            sil_duration = segment[1] - timestamp_end
+            if sil_duration > THRESHOLD:
+                merge_segments.append(int(THRESHOLD / SCALE) * (b'\x00'))
+            else:
+                merge_segments.append(int((sil_duration / SCALE)) * (b'\x00'))
+        timestamp_start = segment[1]
+        timestamp_end = segment[2]
+    segment = b''.join(merge_segments)
+    return segment, sample_rate
+
+
+def write(wav, filename, sr=16_000):
+    # Normalize audio if it prevents clipping
+    wav = wav / max(wav.abs().max().item(), 1)
+    torchaudio.save(filename, wav.cpu(), sr, encoding="PCM_S",
+                    bits_per_sample=16)
+
+
+def process(args):
+    # making sure we are requested either denoise or vad
+    if not args.denoise and not args.vad:
+        log.error("No denoise or vad is requested.")
+        return
+
+    log.info("Creating out directories...")
+    if args.denoise:
+        out_denoise = Path(args.output_dir).absolute().joinpath(PATHS[0])
+        out_denoise.mkdir(parents=True, exist_ok=True)
+    if args.vad:
+        out_vad = Path(args.output_dir).absolute().joinpath(PATHS[1])
+        out_vad.mkdir(parents=True, exist_ok=True)
+
+    log.info("Loading pre-trained speech enhancement model...")
+    model = master64().to(args.device)
+
+    log.info("Building the VAD model...")
+    vad = webrtcvad.Vad(int(args.vad_agg_level))
+
+    # preparing the output dict
+    output_dict = defaultdict(list)
+
+    log.info(f"Parsing input manifest: {args.audio_manifest}")
+    with open(args.audio_manifest, "r") as f:
+        manifest_dict = csv.DictReader(f, delimiter="\t")
+        for row in tqdm(manifest_dict):
+            filename = str(row["audio"])
+
+            final_output = filename
+            keep_sample = True
+            n_frames = row["n_frames"]
+            snr = -1
+            if args.denoise:
+                output_path_denoise = out_denoise.joinpath(Path(filename).name)
+                # convert to 16khz in case we use a differet sr
+                tmp_path = convert_sr(final_output, 16000)
+
+                # loading audio file and generating the enhanced version
+                out, sr = torchaudio.load(tmp_path)
+                out = out.to(args.device)
+                estimate = model(out)
+                estimate = (1 - args.dry_wet) * estimate + args.dry_wet * out
+                write(estimate[0], str(output_path_denoise), sr)
+
+                snr = utils.cal_snr(out, estimate)
+                snr = snr.cpu().detach().numpy()[0][0]
+                final_output = str(output_path_denoise)
+
+            if args.vad:
+                output_path_vad = out_vad.joinpath(Path(filename).name)
+                sr = torchaudio.info(final_output).sample_rate
+                if sr in [16000, 32000, 48000]:
+                    tmp_path = final_output
+                elif sr < 16000:
+                    tmp_path = convert_sr(final_output, 16000)
+                elif sr < 32000:
+                    tmp_path = convert_sr(final_output, 32000)
+                else:
+                    tmp_path = convert_sr(final_output, 48000)
+                # apply VAD
+                segment, sample_rate = apply_vad(vad, tmp_path)
+                if len(segment) < sample_rate * MIN_T:
+                    keep_sample = False
+                    print((
+                        f"WARNING: skip {filename} because it is too short "
+                        f"after VAD ({len(segment) / sample_rate} < {MIN_T})"
+                    ))
+                else:
+                    if sample_rate != sr:
+                        tmp_path = generate_tmp_filename("wav")
+                        write_wave(tmp_path, segment, sample_rate)
+                        convert_sr(tmp_path, sr,
+                                   output_path=str(output_path_vad))
+                    else:
+                        write_wave(str(output_path_vad), segment, sample_rate)
+                    final_output = str(output_path_vad)
+                    segment, _ = torchaudio.load(final_output)
+                    n_frames = segment.size(1)
+
+            if keep_sample:
+                output_dict["id"].append(row["id"])
+                output_dict["audio"].append(final_output)
+                output_dict["n_frames"].append(n_frames)
+                output_dict["tgt_text"].append(row["tgt_text"])
+                output_dict["speaker"].append(row["speaker"])
+                output_dict["src_text"].append(row["src_text"])
+                output_dict["snr"].append(snr)
+
+        out_tsv_path = Path(args.output_dir) / Path(args.audio_manifest).name
+        log.info(f"Saving manifest to {out_tsv_path.as_posix()}")
+        save_df_to_tsv(pd.DataFrame.from_dict(output_dict), out_tsv_path)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--audio-manifest", "-i", required=True,
+                        type=str, help="path to the input manifest.")
+    parser.add_argument(
+        "--output-dir", "-o", required=True, type=str,
+        help="path to the output dir. it will contain files after denoising and"
+             " vad"
+    )
+    parser.add_argument("--vad-agg-level", "-a", type=int, default=2,
+                        help="the aggresive level of the vad [0-3].")
+    parser.add_argument(
+        "--dry-wet", "-dw", type=float, default=0.01,
+        help="the level of linear interpolation between noisy and enhanced "
+             "files."
+    )
+    parser.add_argument(
+        "--device", "-d", type=str, default="cpu",
+        help="the device to be used for the speech enhancement model: "
+             "cpu | cuda."
+    )
+    parser.add_argument("--denoise", action="store_true",
+                        help="apply a denoising")
+    parser.add_argument("--vad", action="store_true", help="apply a VAD")
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6264236915a7269a4d920ee8213004374dd86a9a
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/demucs.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/demucs.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f70e73d6a37d32e05b6cf0e87f42e13c467cd52
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/demucs.py
@@ -0,0 +1,473 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# author: adefossez
+
+import math
+import time
+
+import torch as th
+from torch import nn
+from torch.nn import functional as F
+
+from .resample import downsample2, upsample2
+from .utils import capture_init
+
+
+class BLSTM(nn.Module):
+    def __init__(self, dim, layers=2, bi=True):
+        super().__init__()
+        klass = nn.LSTM
+        self.lstm = klass(
+            bidirectional=bi, num_layers=layers, hidden_size=dim, input_size=dim
+        )
+        self.linear = None
+        if bi:
+            self.linear = nn.Linear(2 * dim, dim)
+
+    def forward(self, x, hidden=None):
+        x, hidden = self.lstm(x, hidden)
+        if self.linear:
+            x = self.linear(x)
+        return x, hidden
+
+
+def rescale_conv(conv, reference):
+    std = conv.weight.std().detach()
+    scale = (std / reference)**0.5
+    conv.weight.data /= scale
+    if conv.bias is not None:
+        conv.bias.data /= scale
+
+
+def rescale_module(module, reference):
+    for sub in module.modules():
+        if isinstance(sub, (nn.Conv1d, nn.ConvTranspose1d)):
+            rescale_conv(sub, reference)
+
+
+class Demucs(nn.Module):
+    """
+    Demucs speech enhancement model.
+    Args:
+        - chin (int): number of input channels.
+        - chout (int): number of output channels.
+        - hidden (int): number of initial hidden channels.
+        - depth (int): number of layers.
+        - kernel_size (int): kernel size for each layer.
+        - stride (int): stride for each layer.
+        - causal (bool): if false, uses BiLSTM instead of LSTM.
+        - resample (int): amount of resampling to apply to the input/output.
+            Can be one of 1, 2 or 4.
+        - growth (float): number of channels is multiplied by this for every layer.
+        - max_hidden (int): maximum number of channels. Can be useful to
+            control the size/speed of the model.
+        - normalize (bool): if true, normalize the input.
+        - glu (bool): if true uses GLU instead of ReLU in 1x1 convolutions.
+        - rescale (float): controls custom weight initialization.
+            See https://arxiv.org/abs/1911.13254.
+        - floor (float): stability flooring when normalizing.
+
+    """
+    @capture_init
+    def __init__(self,
+                 chin=1,
+                 chout=1,
+                 hidden=48,
+                 depth=5,
+                 kernel_size=8,
+                 stride=4,
+                 causal=True,
+                 resample=4,
+                 growth=2,
+                 max_hidden=10_000,
+                 normalize=True,
+                 glu=True,
+                 rescale=0.1,
+                 floor=1e-3):
+
+        super().__init__()
+        if resample not in [1, 2, 4]:
+            raise ValueError("Resample should be 1, 2 or 4.")
+
+        self.chin = chin
+        self.chout = chout
+        self.hidden = hidden
+        self.depth = depth
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.causal = causal
+        self.floor = floor
+        self.resample = resample
+        self.normalize = normalize
+
+        self.encoder = nn.ModuleList()
+        self.decoder = nn.ModuleList()
+        activation = nn.GLU(1) if glu else nn.ReLU()
+        ch_scale = 2 if glu else 1
+
+        for index in range(depth):
+            encode = []
+            encode += [
+                nn.Conv1d(chin, hidden, kernel_size, stride),
+                nn.ReLU(),
+                nn.Conv1d(hidden, hidden * ch_scale, 1), activation,
+            ]
+            self.encoder.append(nn.Sequential(*encode))
+
+            decode = []
+            decode += [
+                nn.Conv1d(hidden, ch_scale * hidden, 1), activation,
+                nn.ConvTranspose1d(hidden, chout, kernel_size, stride),
+            ]
+            if index > 0:
+                decode.append(nn.ReLU())
+            self.decoder.insert(0, nn.Sequential(*decode))
+            chout = hidden
+            chin = hidden
+            hidden = min(int(growth * hidden), max_hidden)
+
+        self.lstm = BLSTM(chin, bi=not causal)
+        if rescale:
+            rescale_module(self, reference=rescale)
+
+    def valid_length(self, length):
+        """
+        Return the nearest valid length to use with the model so that
+        there is no time steps left over in a convolutions, e.g. for all
+        layers, size of the input - kernel_size % stride = 0.
+
+        If the mixture has a valid length, the estimated sources
+        will have exactly the same length.
+        """
+        length = math.ceil(length * self.resample)
+        for _ in range(self.depth):
+            length = math.ceil((length - self.kernel_size) / self.stride) + 1
+            length = max(length, 1)
+        for _ in range(self.depth):
+            length = (length - 1) * self.stride + self.kernel_size
+        length = int(math.ceil(length / self.resample))
+        return int(length)
+
+    @property
+    def total_stride(self):
+        return self.stride ** self.depth // self.resample
+
+    def forward(self, mix):
+        if mix.dim() == 2:
+            mix = mix.unsqueeze(1)
+
+        if self.normalize:
+            mono = mix.mean(dim=1, keepdim=True)
+            std = mono.std(dim=-1, keepdim=True)
+            mix = mix / (self.floor + std)
+        else:
+            std = 1
+        length = mix.shape[-1]
+        x = mix
+        x = F.pad(x, (0, self.valid_length(length) - length))
+        if self.resample == 2:
+            x = upsample2(x)
+        elif self.resample == 4:
+            x = upsample2(x)
+            x = upsample2(x)
+        skips = []
+        for encode in self.encoder:
+            x = encode(x)
+            skips.append(x)
+        x = x.permute(2, 0, 1)
+        x, _ = self.lstm(x)
+        x = x.permute(1, 2, 0)
+        for decode in self.decoder:
+            skip = skips.pop(-1)
+            x = x + skip[..., :x.shape[-1]]
+            x = decode(x)
+        if self.resample == 2:
+            x = downsample2(x)
+        elif self.resample == 4:
+            x = downsample2(x)
+            x = downsample2(x)
+
+        x = x[..., :length]
+        return std * x
+
+
+def fast_conv(conv, x):
+    """
+    Faster convolution evaluation if either kernel size is 1
+    or length of sequence is 1.
+    """
+    batch, chin, length = x.shape
+    chout, chin, kernel = conv.weight.shape
+    assert batch == 1
+    if kernel == 1:
+        x = x.view(chin, length)
+        out = th.addmm(conv.bias.view(-1, 1),
+                       conv.weight.view(chout, chin), x)
+    elif length == kernel:
+        x = x.view(chin * kernel, 1)
+        out = th.addmm(conv.bias.view(-1, 1),
+                       conv.weight.view(chout, chin * kernel), x)
+    else:
+        out = conv(x)
+    return out.view(batch, chout, -1)
+
+
+class DemucsStreamer:
+    """
+    Streaming implementation for Demucs. It supports being fed with any amount
+    of audio at a time. You will get back as much audio as possible at that
+    point.
+
+    Args:
+        - demucs (Demucs): Demucs model.
+        - dry (float): amount of dry (e.g. input) signal to keep. 0 is maximum
+            noise removal, 1 just returns the input signal. Small values > 0
+            allows to limit distortions.
+        - num_frames (int): number of frames to process at once. Higher values
+            will increase overall latency but improve the real time factor.
+        - resample_lookahead (int): extra lookahead used for the resampling.
+        - resample_buffer (int): size of the buffer of previous inputs/outputs
+            kept for resampling.
+    """
+    def __init__(self, demucs,
+                 dry=0,
+                 num_frames=1,
+                 resample_lookahead=64,
+                 resample_buffer=256):
+        device = next(iter(demucs.parameters())).device
+        self.demucs = demucs
+        self.lstm_state = None
+        self.conv_state = None
+        self.dry = dry
+        self.resample_lookahead = resample_lookahead
+        resample_buffer = min(demucs.total_stride, resample_buffer)
+        self.resample_buffer = resample_buffer
+        self.frame_length = demucs.valid_length(1) + \
+            demucs.total_stride * (num_frames - 1)
+        self.total_length = self.frame_length + self.resample_lookahead
+        self.stride = demucs.total_stride * num_frames
+        self.resample_in = th.zeros(demucs.chin, resample_buffer, device=device)
+        self.resample_out = th.zeros(
+            demucs.chin, resample_buffer, device=device
+        )
+
+        self.frames = 0
+        self.total_time = 0
+        self.variance = 0
+        self.pending = th.zeros(demucs.chin, 0, device=device)
+
+        bias = demucs.decoder[0][2].bias
+        weight = demucs.decoder[0][2].weight
+        chin, chout, kernel = weight.shape
+        self._bias = bias.view(-1, 1).repeat(1, kernel).view(-1, 1)
+        self._weight = weight.permute(1, 2, 0).contiguous()
+
+    def reset_time_per_frame(self):
+        self.total_time = 0
+        self.frames = 0
+
+    @property
+    def time_per_frame(self):
+        return self.total_time / self.frames
+
+    def flush(self):
+        """
+        Flush remaining audio by padding it with zero. Call this
+        when you have no more input and want to get back the last chunk of audio.
+        """
+        pending_length = self.pending.shape[1]
+        padding = th.zeros(
+            self.demucs.chin, self.total_length, device=self.pending.device
+        )
+        out = self.feed(padding)
+        return out[:, :pending_length]
+
+    def feed(self, wav):
+        """
+        Apply the model to mix using true real time evaluation.
+        Normalization is done online as is the resampling.
+        """
+        begin = time.time()
+        demucs = self.demucs
+        resample_buffer = self.resample_buffer
+        stride = self.stride
+        resample = demucs.resample
+
+        if wav.dim() != 2:
+            raise ValueError("input wav should be two dimensional.")
+        chin, _ = wav.shape
+        if chin != demucs.chin:
+            raise ValueError(f"Expected {demucs.chin} channels, got {chin}")
+
+        self.pending = th.cat([self.pending, wav], dim=1)
+        outs = []
+        while self.pending.shape[1] >= self.total_length:
+            self.frames += 1
+            frame = self.pending[:, :self.total_length]
+            dry_signal = frame[:, :stride]
+            if demucs.normalize:
+                mono = frame.mean(0)
+                variance = (mono**2).mean()
+                self.variance = variance / self.frames + \
+                    (1 - 1 / self.frames) * self.variance
+                frame = frame / (demucs.floor + math.sqrt(self.variance))
+            frame = th.cat([self.resample_in, frame], dim=-1)
+            self.resample_in[:] = frame[:, stride - resample_buffer:stride]
+
+            if resample == 4:
+                frame = upsample2(upsample2(frame))
+            elif resample == 2:
+                frame = upsample2(frame)
+            # remove pre sampling buffer
+            frame = frame[:, resample * resample_buffer:]
+            # remove extra samples after window
+            frame = frame[:, :resample * self.frame_length]
+
+            out, extra = self._separate_frame(frame)
+            padded_out = th.cat([self.resample_out, out, extra], 1)
+            self.resample_out[:] = out[:, -resample_buffer:]
+            if resample == 4:
+                out = downsample2(downsample2(padded_out))
+            elif resample == 2:
+                out = downsample2(padded_out)
+            else:
+                out = padded_out
+
+            out = out[:, resample_buffer // resample:]
+            out = out[:, :stride]
+
+            if demucs.normalize:
+                out *= math.sqrt(self.variance)
+            out = self.dry * dry_signal + (1 - self.dry) * out
+            outs.append(out)
+            self.pending = self.pending[:, stride:]
+
+        self.total_time += time.time() - begin
+        if outs:
+            out = th.cat(outs, 1)
+        else:
+            out = th.zeros(chin, 0, device=wav.device)
+        return out
+
+    def _separate_frame(self, frame):
+        demucs = self.demucs
+        skips = []
+        next_state = []
+        first = self.conv_state is None
+        stride = self.stride * demucs.resample
+        x = frame[None]
+        for idx, encode in enumerate(demucs.encoder):
+            stride //= demucs.stride
+            length = x.shape[2]
+            if idx == demucs.depth - 1:
+                # This is sligthly faster for the last conv
+                x = fast_conv(encode[0], x)
+                x = encode[1](x)
+                x = fast_conv(encode[2], x)
+                x = encode[3](x)
+            else:
+                if not first:
+                    prev = self.conv_state.pop(0)
+                    prev = prev[..., stride:]
+                    tgt = (length - demucs.kernel_size) // demucs.stride + 1
+                    missing = tgt - prev.shape[-1]
+                    offset = length - demucs.kernel_size - \
+                        demucs.stride * (missing - 1)
+                    x = x[..., offset:]
+                x = encode[1](encode[0](x))
+                x = fast_conv(encode[2], x)
+                x = encode[3](x)
+                if not first:
+                    x = th.cat([prev, x], -1)
+                next_state.append(x)
+            skips.append(x)
+
+        x = x.permute(2, 0, 1)
+        x, self.lstm_state = demucs.lstm(x, self.lstm_state)
+        x = x.permute(1, 2, 0)
+        # In the following, x contains only correct samples, i.e. the one
+        # for which each time position is covered by two window of the upper
+        # layer. extra contains extra samples to the right, and is used only as
+        # a better padding for the online resampling.
+        extra = None
+        for idx, decode in enumerate(demucs.decoder):
+            skip = skips.pop(-1)
+            x += skip[..., :x.shape[-1]]
+            x = fast_conv(decode[0], x)
+            x = decode[1](x)
+
+            if extra is not None:
+                skip = skip[..., x.shape[-1]:]
+                extra += skip[..., :extra.shape[-1]]
+                extra = decode[2](decode[1](decode[0](extra)))
+            x = decode[2](x)
+            next_state.append(
+                x[..., -demucs.stride:] - decode[2].bias.view(-1, 1)
+            )
+            if extra is None:
+                extra = x[..., -demucs.stride:]
+            else:
+                extra[..., :demucs.stride] += next_state[-1]
+            x = x[..., :-demucs.stride]
+
+            if not first:
+                prev = self.conv_state.pop(0)
+                x[..., :demucs.stride] += prev
+            if idx != demucs.depth - 1:
+                x = decode[3](x)
+                extra = decode[3](extra)
+        self.conv_state = next_state
+        return x[0], extra[0]
+
+
+def test():
+    import argparse
+    parser = argparse.ArgumentParser(
+        "denoiser.demucs",
+        description="Benchmark the streaming Demucs implementation, as well as "
+                    "checking the delta with the offline implementation.")
+    parser.add_argument("--depth", default=5, type=int)
+    parser.add_argument("--resample", default=4, type=int)
+    parser.add_argument("--hidden", default=48, type=int)
+    parser.add_argument("--sample_rate", default=16000, type=float)
+    parser.add_argument("--device", default="cpu")
+    parser.add_argument("-t", "--num_threads", type=int)
+    parser.add_argument("-f", "--num_frames", type=int, default=1)
+    args = parser.parse_args()
+    if args.num_threads:
+        th.set_num_threads(args.num_threads)
+    sr = args.sample_rate
+    sr_ms = sr / 1000
+    demucs = Demucs(
+        depth=args.depth, hidden=args.hidden, resample=args.resample
+    ).to(args.device)
+    x = th.randn(1, int(sr * 4)).to(args.device)
+    out = demucs(x[None])[0]
+    streamer = DemucsStreamer(demucs, num_frames=args.num_frames)
+    out_rt = []
+    frame_size = streamer.total_length
+    with th.no_grad():
+        while x.shape[1] > 0:
+            out_rt.append(streamer.feed(x[:, :frame_size]))
+            x = x[:, frame_size:]
+            frame_size = streamer.demucs.total_stride
+    out_rt.append(streamer.flush())
+    out_rt = th.cat(out_rt, 1)
+    model_size = sum(p.numel() for p in demucs.parameters()) * 4 / 2**20
+    initial_lag = streamer.total_length / sr_ms
+    tpf = 1000 * streamer.time_per_frame
+    print(f"model size: {model_size:.1f}MB, ", end='')
+    print(f"delta batch/streaming: {th.norm(out - out_rt) / th.norm(out):.2%}")
+    print(f"initial lag: {initial_lag:.1f}ms, ", end='')
+    print(f"stride: {streamer.stride * args.num_frames / sr_ms:.1f}ms")
+    print(f"time per frame: {tpf:.1f}ms, ", end='')
+    rtf = (1000 * streamer.time_per_frame) / (streamer.stride / sr_ms)
+    print(f"RTF: {rtf:.2f}")
+    print(f"Total lag with computation: {initial_lag + tpf:.1f}ms")
+
+
+if __name__ == "__main__":
+    test()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/pretrained.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/pretrained.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fa846075b6872cdcc0baebca0b9acbb9ffcd287
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/pretrained.py
@@ -0,0 +1,81 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# author: adefossez
+
+import logging
+
+import torch.hub
+
+from .demucs import Demucs
+from .utils import deserialize_model
+
+logger = logging.getLogger(__name__)
+ROOT = "https://dl.fbaipublicfiles.com/adiyoss/denoiser/"
+DNS_48_URL = ROOT + "dns48-11decc9d8e3f0998.th"
+DNS_64_URL = ROOT + "dns64-a7761ff99a7d5bb6.th"
+MASTER_64_URL = ROOT + "master64-8a5dfb4bb92753dd.th"
+
+
+def _demucs(pretrained, url, **kwargs):
+    model = Demucs(**kwargs)
+    if pretrained:
+        state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu')
+        model.load_state_dict(state_dict)
+    return model
+
+
+def dns48(pretrained=True):
+    return _demucs(pretrained, DNS_48_URL, hidden=48)
+
+
+def dns64(pretrained=True):
+    return _demucs(pretrained, DNS_64_URL, hidden=64)
+
+
+def master64(pretrained=True):
+    return _demucs(pretrained, MASTER_64_URL, hidden=64)
+
+
+def add_model_flags(parser):
+    group = parser.add_mutually_exclusive_group(required=False)
+    group.add_argument(
+        "-m", "--model_path", help="Path to local trained model."
+    )
+    group.add_argument(
+        "--dns48", action="store_true",
+        help="Use pre-trained real time H=48 model trained on DNS."
+    )
+    group.add_argument(
+        "--dns64", action="store_true",
+        help="Use pre-trained real time H=64 model trained on DNS."
+    )
+    group.add_argument(
+        "--master64", action="store_true",
+        help="Use pre-trained real time H=64 model trained on DNS and Valentini."
+    )
+
+
+def get_model(args):
+    """
+    Load local model package or torchhub pre-trained model.
+    """
+    if args.model_path:
+        logger.info("Loading model from %s", args.model_path)
+        pkg = torch.load(args.model_path)
+        model = deserialize_model(pkg)
+    elif args.dns64:
+        logger.info("Loading pre-trained real time H=64 model trained on DNS.")
+        model = dns64()
+    elif args.master64:
+        logger.info(
+            "Loading pre-trained real time H=64 model trained on DNS and Valentini."
+        )
+        model = master64()
+    else:
+        logger.info("Loading pre-trained real time H=48 model trained on DNS.")
+        model = dns48()
+    logger.debug(model)
+    return model
diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/resample.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/resample.py
new file mode 100644
index 0000000000000000000000000000000000000000..1222addc424d4f898d602009e4032907241aadfe
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/resample.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# author: adefossez
+
+import math
+
+import torch as th
+from torch.nn import functional as F
+
+
+def sinc(t):
+    """sinc.
+
+    :param t: the input tensor
+    """
+    return th.where(t == 0, th.tensor(1., device=t.device, dtype=t.dtype),
+                    th.sin(t) / t)
+
+
+def kernel_upsample2(zeros=56):
+    """kernel_upsample2.
+
+    """
+    win = th.hann_window(4 * zeros + 1, periodic=False)
+    winodd = win[1::2]
+    t = th.linspace(-zeros + 0.5, zeros - 0.5, 2 * zeros)
+    t *= math.pi
+    kernel = (sinc(t) * winodd).view(1, 1, -1)
+    return kernel
+
+
+def upsample2(x, zeros=56):
+    """
+    Upsampling the input by 2 using sinc interpolation.
+    Smith, Julius, and Phil Gossett. "A flexible sampling-rate conversion method."
+    ICASSP'84. IEEE International Conference on Acoustics, Speech, and Signal Processing.
+    Vol. 9. IEEE, 1984.
+    """
+    *other, time = x.shape
+    kernel = kernel_upsample2(zeros).to(x)
+    out = F.conv1d(x.view(-1, 1, time), kernel, padding=zeros)[..., 1:].view(
+        *other, time
+    )
+    y = th.stack([x, out], dim=-1)
+    return y.view(*other, -1)
+
+
+def kernel_downsample2(zeros=56):
+    """kernel_downsample2.
+
+    """
+    win = th.hann_window(4 * zeros + 1, periodic=False)
+    winodd = win[1::2]
+    t = th.linspace(-zeros + 0.5, zeros - 0.5, 2 * zeros)
+    t.mul_(math.pi)
+    kernel = (sinc(t) * winodd).view(1, 1, -1)
+    return kernel
+
+
+def downsample2(x, zeros=56):
+    """
+    Downsampling the input by 2 using sinc interpolation.
+    Smith, Julius, and Phil Gossett. "A flexible sampling-rate conversion method."
+    ICASSP'84. IEEE International Conference on Acoustics, Speech, and Signal Processing.
+    Vol. 9. IEEE, 1984.
+    """
+    if x.shape[-1] % 2 != 0:
+        x = F.pad(x, (0, 1))
+    xeven = x[..., ::2]
+    xodd = x[..., 1::2]
+    *other, time = xodd.shape
+    kernel = kernel_downsample2(zeros).to(x)
+    out = xeven + F.conv1d(
+        xodd.view(-1, 1, time), kernel, padding=zeros
+    )[..., :-1].view(*other, time)
+    return out.view(*other, -1).mul(0.5)
diff --git a/fairseq/examples/speech_synthesis/preprocessing/denoiser/utils.py b/fairseq/examples/speech_synthesis/preprocessing/denoiser/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..734d047f1bb8e3aa98c88e152eee7f91fea3d814
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/denoiser/utils.py
@@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# author: adefossez
+
+import functools
+import logging
+from contextlib import contextmanager
+import inspect
+import time
+
+logger = logging.getLogger(__name__)
+
+EPS = 1e-8
+
+
+def capture_init(init):
+    """capture_init.
+
+    Decorate `__init__` with this, and you can then
+    recover the *args and **kwargs passed to it in `self._init_args_kwargs`
+    """
+    @functools.wraps(init)
+    def __init__(self, *args, **kwargs):
+        self._init_args_kwargs = (args, kwargs)
+        init(self, *args, **kwargs)
+
+    return __init__
+
+
+def deserialize_model(package, strict=False):
+    """deserialize_model.
+
+    """
+    klass = package['class']
+    if strict:
+        model = klass(*package['args'], **package['kwargs'])
+    else:
+        sig = inspect.signature(klass)
+        kw = package['kwargs']
+        for key in list(kw):
+            if key not in sig.parameters:
+                logger.warning("Dropping inexistant parameter %s", key)
+                del kw[key]
+        model = klass(*package['args'], **kw)
+    model.load_state_dict(package['state'])
+    return model
+
+
+def copy_state(state):
+    return {k: v.cpu().clone() for k, v in state.items()}
+
+
+def serialize_model(model):
+    args, kwargs = model._init_args_kwargs
+    state = copy_state(model.state_dict())
+    return {"class": model.__class__, "args": args, "kwargs": kwargs, "state": state}
+
+
+@contextmanager
+def swap_state(model, state):
+    """
+    Context manager that swaps the state of a model, e.g:
+
+        # model is in old state
+        with swap_state(model, new_state):
+            # model in new state
+        # model back to old state
+    """
+    old_state = copy_state(model.state_dict())
+    model.load_state_dict(state)
+    try:
+        yield
+    finally:
+        model.load_state_dict(old_state)
+
+
+def pull_metric(history, name):
+    out = []
+    for metrics in history:
+        if name in metrics:
+            out.append(metrics[name])
+    return out
+
+
+class LogProgress:
+    """
+    Sort of like tqdm but using log lines and not as real time.
+    Args:
+        - logger: logger obtained from `logging.getLogger`,
+        - iterable: iterable object to wrap
+        - updates (int): number of lines that will be printed, e.g.
+            if `updates=5`, log every 1/5th of the total length.
+        - total (int): length of the iterable, in case it does not support
+            `len`.
+        - name (str): prefix to use in the log.
+        - level: logging level (like `logging.INFO`).
+    """
+    def __init__(self,
+                 logger,
+                 iterable,
+                 updates=5,
+                 total=None,
+                 name="LogProgress",
+                 level=logging.INFO):
+        self.iterable = iterable
+        self.total = total or len(iterable)
+        self.updates = updates
+        self.name = name
+        self.logger = logger
+        self.level = level
+
+    def update(self, **infos):
+        self._infos = infos
+
+    def __iter__(self):
+        self._iterator = iter(self.iterable)
+        self._index = -1
+        self._infos = {}
+        self._begin = time.time()
+        return self
+
+    def __next__(self):
+        self._index += 1
+        try:
+            value = next(self._iterator)
+        except StopIteration:
+            raise
+        else:
+            return value
+        finally:
+            log_every = max(1, self.total // self.updates)
+            # logging is delayed by 1 it, in order to have the metrics from update
+            if self._index >= 1 and self._index % log_every == 0:
+                self._log()
+
+    def _log(self):
+        self._speed = (1 + self._index) / (time.time() - self._begin)
+        infos = " | ".join(f"{k.capitalize()} {v}" for k, v in self._infos.items())
+        if self._speed < 1e-4:
+            speed = "oo sec/it"
+        elif self._speed < 0.1:
+            speed = f"{1/self._speed:.1f} sec/it"
+        else:
+            speed = f"{self._speed:.1f} it/sec"
+        out = f"{self.name} | {self._index}/{self.total} | {speed}"
+        if infos:
+            out += " | " + infos
+        self.logger.log(self.level, out)
+
+
+def colorize(text, color):
+    """
+    Display text with some ANSI color in the terminal.
+    """
+    code = f"\033[{color}m"
+    restore = "\033[0m"
+    return "".join([code, text, restore])
+
+
+def bold(text):
+    """
+    Display text in bold in the terminal.
+    """
+    return colorize(text, "1")
+
+
+def cal_snr(lbl, est):
+    import torch
+    y = 10.0 * torch.log10(
+        torch.sum(lbl**2, dim=-1) / (torch.sum((est-lbl)**2, dim=-1) + EPS) +
+        EPS
+    )
+    return y
diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_common_voice_audio_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_common_voice_audio_manifest.py
new file mode 100644
index 0000000000000000000000000000000000000000..a30254604311a488a1d4959f941051890ed32b2e
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/get_common_voice_audio_manifest.py
@@ -0,0 +1,140 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+from pathlib import Path
+from collections import defaultdict
+from typing import List, Dict, Tuple
+
+import pandas as pd
+import numpy as np
+import torchaudio
+from tqdm import tqdm
+
+from examples.speech_to_text.data_utils import load_df_from_tsv, save_df_to_tsv
+
+
+log = logging.getLogger(__name__)
+
+SPLITS = ["train", "dev", "test"]
+
+
+def get_top_n(
+        root: Path, n_speakers: int = 10, min_n_tokens: int = 5
+) -> pd.DataFrame:
+    df = load_df_from_tsv(root / "validated.tsv")
+    df["n_tokens"] = [len(s.split()) for s in df["sentence"]]
+    df = df[df["n_tokens"] >= min_n_tokens]
+    df["n_frames"] = [
+        torchaudio.info((root / "clips" / p).as_posix()).num_frames
+        for p in tqdm(df["path"])
+    ]
+    df["id"] = [Path(p).stem for p in df["path"]]
+    total_duration_ms = df.groupby("client_id")["n_frames"].agg(["sum"])
+    total_duration_ms = total_duration_ms.sort_values("sum", ascending=False)
+
+    top_n_total_duration_ms = total_duration_ms.head(n_speakers)
+    top_n_client_ids = set(top_n_total_duration_ms.index.tolist())
+    df_top_n = df[df["client_id"].isin(top_n_client_ids)]
+    return df_top_n
+
+
+def get_splits(
+        df, train_split_ratio=0.99, speaker_in_all_splits=False, rand_seed=0
+) -> Tuple[Dict[str, str], List[str]]:
+    np.random.seed(rand_seed)
+    dev_split_ratio = (1. - train_split_ratio) / 3
+    grouped = list(df.groupby("client_id"))
+    id_to_split = {}
+    for _, cur_df in tqdm(grouped):
+        cur_n_examples = len(cur_df)
+        if speaker_in_all_splits and cur_n_examples < 3:
+            continue
+        cur_n_train = int(cur_n_examples * train_split_ratio)
+        cur_n_dev = int(cur_n_examples * dev_split_ratio)
+        cur_n_test = cur_n_examples - cur_n_dev - cur_n_train
+        if speaker_in_all_splits and cur_n_dev * cur_n_test == 0:
+            cur_n_dev, cur_n_test = 1, 1
+            cur_n_train = cur_n_examples - cur_n_dev - cur_n_test
+        cur_indices = cur_df.index.tolist()
+        cur_shuffled_indices = np.random.permutation(cur_n_examples)
+        cur_shuffled_indices = [cur_indices[i] for i in cur_shuffled_indices]
+        cur_indices_by_split = {
+            "train": cur_shuffled_indices[:cur_n_train],
+            "dev": cur_shuffled_indices[cur_n_train: cur_n_train + cur_n_dev],
+            "test": cur_shuffled_indices[cur_n_train + cur_n_dev:]
+        }
+        for split in SPLITS:
+            for i in cur_indices_by_split[split]:
+                id_ = df["id"].loc[i]
+                id_to_split[id_] = split
+    return id_to_split, sorted(df["client_id"].unique())
+
+
+def convert_to_wav(root: Path, filenames: List[str], target_sr=16_000):
+    out_root = root / "wav"
+    out_root.mkdir(exist_ok=True, parents=True)
+    print("Converting to WAV...")
+    for n in tqdm(filenames):
+        in_path = (root / "clips" / n).as_posix()
+        waveform, sr = torchaudio.load(in_path)
+        converted, converted_sr = torchaudio.sox_effects.apply_effects_tensor(
+            waveform, sr, [["rate", str(target_sr)], ["channels", "1"]]
+        )
+        out_path = (out_root / Path(n).with_suffix(".wav").name).as_posix()
+        torchaudio.save(out_path, converted, converted_sr, encoding="PCM_S",
+                        bits_per_sample=16)
+
+
+def process(args):
+    data_root = Path(args.data_root).absolute() / args.lang
+
+    # Generate TSV manifest
+    print("Generating manifest...")
+
+    df_top_n = get_top_n(data_root)
+    id_to_split, speakers = get_splits(df_top_n)
+
+    if args.convert_to_wav:
+        convert_to_wav(data_root, df_top_n["path"].tolist())
+
+    manifest_by_split = {split: defaultdict(list) for split in SPLITS}
+    for sample in tqdm(df_top_n.to_dict(orient="index").values()):
+        sample_id = sample["id"]
+        split = id_to_split[sample_id]
+        manifest_by_split[split]["id"].append(sample_id)
+        if args.convert_to_wav:
+            audio_path = data_root / "wav" / f"{sample_id}.wav"
+        else:
+            audio_path = data_root / "clips" / f"{sample_id}.mp3"
+        manifest_by_split[split]["audio"].append(audio_path.as_posix())
+        manifest_by_split[split]["n_frames"].append(sample["n_frames"])
+        manifest_by_split[split]["tgt_text"].append(sample["sentence"])
+        manifest_by_split[split]["speaker"].append(sample["client_id"])
+        manifest_by_split[split]["src_text"].append(sample["sentence"])
+
+    output_root = Path(args.output_manifest_root).absolute()
+    output_root.mkdir(parents=True, exist_ok=True)
+    for split in SPLITS:
+        save_df_to_tsv(
+            pd.DataFrame.from_dict(manifest_by_split[split]),
+            output_root / f"{split}.audio.tsv"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-root", "-d", required=True, type=str)
+    parser.add_argument("--output-manifest-root", "-m", required=True, type=str)
+    parser.add_argument("--lang", "-l", required=True, type=str)
+    parser.add_argument("--convert-to-wav", action="store_true")
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_feature_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_feature_manifest.py
new file mode 100644
index 0000000000000000000000000000000000000000..516f2cc469af9b417126dea1988698adac41d8ab
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/get_feature_manifest.py
@@ -0,0 +1,233 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+from pathlib import Path
+import shutil
+from tempfile import NamedTemporaryFile
+from collections import Counter, defaultdict
+
+import pandas as pd
+import torchaudio
+from tqdm import tqdm
+
+from fairseq.data.audio.audio_utils import convert_waveform
+from examples.speech_to_text.data_utils import (
+    create_zip,
+    gen_config_yaml,
+    gen_vocab,
+    get_zip_manifest,
+    load_tsv_to_dicts,
+    save_df_to_tsv
+)
+from examples.speech_synthesis.data_utils import (
+    extract_logmel_spectrogram, extract_pitch, extract_energy, get_global_cmvn,
+    ipa_phonemize, get_mfa_alignment, get_unit_alignment
+)
+
+
+log = logging.getLogger(__name__)
+
+
+def process(args):
+    assert "train" in args.splits
+    out_root = Path(args.output_root).absolute()
+    out_root.mkdir(exist_ok=True)
+
+    print("Fetching data...")
+    audio_manifest_root = Path(args.audio_manifest_root).absolute()
+    samples = []
+    for s in args.splits:
+        for e in load_tsv_to_dicts(audio_manifest_root / f"{s}.audio.tsv"):
+            e["split"] = s
+            samples.append(e)
+    sample_ids = [s["id"] for s in samples]
+
+    # Get alignment info
+    id_to_alignment = None
+    if args.textgrid_zip is not None:
+        assert args.id_to_units_tsv is None
+        id_to_alignment = get_mfa_alignment(
+            args.textgrid_zip, sample_ids, args.sample_rate, args.hop_length
+        )
+    elif args.id_to_units_tsv is not None:
+        # assume identical hop length on the unit sequence
+        id_to_alignment = get_unit_alignment(args.id_to_units_tsv, sample_ids)
+
+    # Extract features and pack features into ZIP
+    feature_name = "logmelspec80"
+    zip_path = out_root / f"{feature_name}.zip"
+    pitch_zip_path = out_root / "pitch.zip"
+    energy_zip_path = out_root / "energy.zip"
+    gcmvn_npz_path = out_root / "gcmvn_stats.npz"
+    if zip_path.exists() and gcmvn_npz_path.exists():
+        print(f"{zip_path} and {gcmvn_npz_path} exist.")
+    else:
+        feature_root = out_root / feature_name
+        feature_root.mkdir(exist_ok=True)
+        pitch_root = out_root / "pitch"
+        energy_root = out_root / "energy"
+        if args.add_fastspeech_targets:
+            pitch_root.mkdir(exist_ok=True)
+            energy_root.mkdir(exist_ok=True)
+        print("Extracting Mel spectrogram features...")
+        for sample in tqdm(samples):
+            waveform, sample_rate = torchaudio.load(sample["audio"])
+            waveform, sample_rate = convert_waveform(
+                waveform, sample_rate, normalize_volume=args.normalize_volume,
+                to_sample_rate=args.sample_rate
+            )
+            sample_id = sample["id"]
+            target_length = None
+            if id_to_alignment is not None:
+                a = id_to_alignment[sample_id]
+                target_length = sum(a.frame_durations)
+                if a.start_sec is not None and a.end_sec is not None:
+                    start_frame = int(a.start_sec * sample_rate)
+                    end_frame = int(a.end_sec * sample_rate)
+                    waveform = waveform[:, start_frame: end_frame]
+            extract_logmel_spectrogram(
+                waveform, sample_rate, feature_root / f"{sample_id}.npy",
+                win_length=args.win_length, hop_length=args.hop_length,
+                n_fft=args.n_fft, n_mels=args.n_mels, f_min=args.f_min,
+                f_max=args.f_max, target_length=target_length
+            )
+            if args.add_fastspeech_targets:
+                assert id_to_alignment is not None
+                extract_pitch(
+                    waveform, sample_rate, pitch_root / f"{sample_id}.npy",
+                    hop_length=args.hop_length, log_scale=True,
+                    phoneme_durations=id_to_alignment[sample_id].frame_durations
+                )
+                extract_energy(
+                    waveform, energy_root / f"{sample_id}.npy",
+                    hop_length=args.hop_length, n_fft=args.n_fft,
+                    log_scale=True,
+                    phoneme_durations=id_to_alignment[sample_id].frame_durations
+                )
+        print("ZIPing features...")
+        create_zip(feature_root, zip_path)
+        get_global_cmvn(feature_root, gcmvn_npz_path)
+        shutil.rmtree(feature_root)
+        if args.add_fastspeech_targets:
+            create_zip(pitch_root, pitch_zip_path)
+            shutil.rmtree(pitch_root)
+            create_zip(energy_root, energy_zip_path)
+            shutil.rmtree(energy_root)
+
+    print("Fetching ZIP manifest...")
+    audio_paths, audio_lengths = get_zip_manifest(zip_path)
+    pitch_paths, pitch_lengths, energy_paths, energy_lengths = [None] * 4
+    if args.add_fastspeech_targets:
+        pitch_paths, pitch_lengths = get_zip_manifest(pitch_zip_path)
+        energy_paths, energy_lengths = get_zip_manifest(energy_zip_path)
+    # Generate TSV manifest
+    print("Generating manifest...")
+    manifest_by_split = {split: defaultdict(list) for split in args.splits}
+    for sample in tqdm(samples):
+        sample_id, split = sample["id"], sample["split"]
+        normalized_utt = sample["tgt_text"]
+        if id_to_alignment is not None:
+            normalized_utt = " ".join(id_to_alignment[sample_id].tokens)
+        elif args.ipa_vocab:
+            normalized_utt = ipa_phonemize(
+                normalized_utt, lang=args.lang, use_g2p=args.use_g2p
+            )
+        manifest_by_split[split]["id"].append(sample_id)
+        manifest_by_split[split]["audio"].append(audio_paths[sample_id])
+        manifest_by_split[split]["n_frames"].append(audio_lengths[sample_id])
+        manifest_by_split[split]["tgt_text"].append(normalized_utt)
+        manifest_by_split[split]["speaker"].append(sample["speaker"])
+        manifest_by_split[split]["src_text"].append(sample["src_text"])
+        if args.add_fastspeech_targets:
+            assert id_to_alignment is not None
+            duration = " ".join(
+                str(d) for d in id_to_alignment[sample_id].frame_durations
+            )
+            manifest_by_split[split]["duration"].append(duration)
+            manifest_by_split[split]["pitch"].append(pitch_paths[sample_id])
+            manifest_by_split[split]["energy"].append(energy_paths[sample_id])
+    for split in args.splits:
+        save_df_to_tsv(
+            pd.DataFrame.from_dict(manifest_by_split[split]),
+            out_root / f"{split}.tsv"
+        )
+    # Generate vocab
+    vocab_name, spm_filename = None, None
+    if id_to_alignment is not None or args.ipa_vocab:
+        vocab = Counter()
+        for t in manifest_by_split["train"]["tgt_text"]:
+            vocab.update(t.split(" "))
+        vocab_name = "vocab.txt"
+        with open(out_root / vocab_name, "w") as f:
+            for s, c in vocab.most_common():
+                f.write(f"{s} {c}\n")
+    else:
+        spm_filename_prefix = "spm_char"
+        spm_filename = f"{spm_filename_prefix}.model"
+        with NamedTemporaryFile(mode="w") as f:
+            for t in manifest_by_split["train"]["tgt_text"]:
+                f.write(t + "\n")
+            f.flush()  # needed to ensure gen_vocab sees dumped text
+            gen_vocab(Path(f.name), out_root / spm_filename_prefix, "char")
+    # Generate speaker list
+    speakers = sorted({sample["speaker"] for sample in samples})
+    speakers_path = out_root / "speakers.txt"
+    with open(speakers_path, "w") as f:
+        for speaker in speakers:
+            f.write(f"{speaker}\n")
+    # Generate config YAML
+    win_len_t = args.win_length / args.sample_rate
+    hop_len_t = args.hop_length / args.sample_rate
+    extra = {
+        "sample_rate": args.sample_rate,
+        "features": {
+            "type": "spectrogram+melscale+log",
+            "eps": 1e-2, "n_mels": args.n_mels, "n_fft": args.n_fft,
+            "window_fn": "hann", "win_length": args.win_length,
+            "hop_length": args.hop_length, "sample_rate": args.sample_rate,
+            "win_len_t": win_len_t, "hop_len_t": hop_len_t,
+            "f_min": args.f_min, "f_max": args.f_max,
+            "n_stft": args.n_fft // 2 + 1
+        }
+    }
+    if len(speakers) > 1:
+        extra["speaker_set_filename"] = "speakers.txt"
+    gen_config_yaml(
+        out_root, spm_filename=spm_filename, vocab_name=vocab_name,
+        audio_root=out_root.as_posix(), input_channels=None,
+        input_feat_per_channel=None, specaugment_policy=None,
+        cmvn_type="global", gcmvn_path=gcmvn_npz_path, extra=extra
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--audio-manifest-root", "-m", required=True, type=str)
+    parser.add_argument("--output-root", "-o", required=True, type=str)
+    parser.add_argument("--splits", "-s", type=str, nargs="+",
+                        default=["train", "dev", "test"])
+    parser.add_argument("--ipa-vocab", action="store_true")
+    parser.add_argument("--use-g2p", action="store_true")
+    parser.add_argument("--lang", type=str, default="en-us")
+    parser.add_argument("--win-length", type=int, default=1024)
+    parser.add_argument("--hop-length", type=int, default=256)
+    parser.add_argument("--n-fft", type=int, default=1024)
+    parser.add_argument("--n-mels", type=int, default=80)
+    parser.add_argument("--f-min", type=int, default=20)
+    parser.add_argument("--f-max", type=int, default=8000)
+    parser.add_argument("--sample-rate", type=int, default=22050)
+    parser.add_argument("--normalize-volume", "-n", action="store_true")
+    parser.add_argument("--textgrid-zip", type=str, default=None)
+    parser.add_argument("--id-to-units-tsv", type=str, default=None)
+    parser.add_argument("--add-fastspeech-targets", action="store_true")
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_ljspeech_audio_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_ljspeech_audio_manifest.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ec1fb7521b8a9b821d28bcaaaedb034f6e95e0b
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/get_ljspeech_audio_manifest.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+from pathlib import Path
+from collections import defaultdict
+
+import pandas as pd
+from torchaudio.datasets import LJSPEECH
+from tqdm import tqdm
+
+from examples.speech_to_text.data_utils import save_df_to_tsv
+
+
+log = logging.getLogger(__name__)
+
+SPLITS = ["train", "dev", "test"]
+
+
+def process(args):
+    out_root = Path(args.output_data_root).absolute()
+    out_root.mkdir(parents=True, exist_ok=True)
+
+    # Generate TSV manifest
+    print("Generating manifest...")
+    # following FastSpeech's splits
+    dataset = LJSPEECH(out_root.as_posix(), download=True)
+    id_to_split = {}
+    for x in dataset._flist:
+        id_ = x[0]
+        speaker = id_.split("-")[0]
+        id_to_split[id_] = {
+            "LJ001": "test", "LJ002": "test", "LJ003": "dev"
+        }.get(speaker, "train")
+    manifest_by_split = {split: defaultdict(list) for split in SPLITS}
+    progress = tqdm(enumerate(dataset), total=len(dataset))
+    for i, (waveform, _, utt, normalized_utt) in progress:
+        sample_id = dataset._flist[i][0]
+        split = id_to_split[sample_id]
+        manifest_by_split[split]["id"].append(sample_id)
+        audio_path = f"{dataset._path}/{sample_id}.wav"
+        manifest_by_split[split]["audio"].append(audio_path)
+        manifest_by_split[split]["n_frames"].append(len(waveform[0]))
+        manifest_by_split[split]["tgt_text"].append(normalized_utt)
+        manifest_by_split[split]["speaker"].append("ljspeech")
+        manifest_by_split[split]["src_text"].append(utt)
+
+    manifest_root = Path(args.output_manifest_root).absolute()
+    manifest_root.mkdir(parents=True, exist_ok=True)
+    for split in SPLITS:
+        save_df_to_tsv(
+            pd.DataFrame.from_dict(manifest_by_split[split]),
+            manifest_root / f"{split}.audio.tsv"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-data-root", "-d", required=True, type=str)
+    parser.add_argument("--output-manifest-root", "-m", required=True, type=str)
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_speaker_embedding.py b/fairseq/examples/speech_synthesis/preprocessing/get_speaker_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..034d4f2c9f16748d7daae4123a7bbe8bfd48c284
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/get_speaker_embedding.py
@@ -0,0 +1,89 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import argparse
+from collections import defaultdict
+from itertools import chain
+from pathlib import Path
+
+import numpy as np
+import torchaudio
+import torchaudio.sox_effects as ta_sox
+import yaml
+from tqdm import tqdm
+
+from examples.speech_to_text.data_utils import load_tsv_to_dicts
+from examples.speech_synthesis.preprocessing.speaker_embedder import SpkrEmbedder
+
+
+def extract_embedding(audio_path, embedder):
+    wav, sr = torchaudio.load(audio_path)  # 2D
+    if sr != embedder.RATE:
+        wav, sr = ta_sox.apply_effects_tensor(
+            wav, sr, [["rate", str(embedder.RATE)]]
+        )
+    try:
+        emb = embedder([wav[0].cuda().float()]).cpu().numpy()
+    except RuntimeError:
+        emb = None
+    return emb
+
+
+def process(args):
+    print("Fetching data...")
+    raw_manifest_root = Path(args.raw_manifest_root).absolute()
+    samples = [load_tsv_to_dicts(raw_manifest_root / (s + ".tsv"))
+               for s in args.splits]
+    samples = list(chain(*samples))
+    with open(args.config, "r") as f:
+        config = yaml.safe_load(f, Loader=yaml.FullLoader)
+    with open(f"{config['audio_root']}/{config['speaker_set_filename']}") as f:
+        speaker_to_id = {r.strip(): i for i, r in enumerate(f)}
+
+    embedder = SpkrEmbedder(args.ckpt).cuda()
+    speaker_to_cnt = defaultdict(float)
+    speaker_to_emb = defaultdict(float)
+    for sample in tqdm(samples, desc="extract emb"):
+        emb = extract_embedding(sample["audio"], embedder)
+        if emb is not None:
+            speaker_to_cnt[sample["speaker"]] += 1
+            speaker_to_emb[sample["speaker"]] += emb
+    if len(speaker_to_emb) != len(speaker_to_id):
+        missed = set(speaker_to_id) - set(speaker_to_emb.keys())
+        print(
+            f"WARNING: missing embeddings for {len(missed)} speaker:\n{missed}"
+        )
+    speaker_emb_mat = np.zeros((len(speaker_to_id), len(emb)), float)
+    for speaker in speaker_to_emb:
+        idx = speaker_to_id[speaker]
+        emb = speaker_to_emb[speaker]
+        cnt = speaker_to_cnt[speaker]
+        speaker_emb_mat[idx, :] = emb / cnt
+    speaker_emb_name = "speaker_emb.npy"
+    speaker_emb_path = f"{config['audio_root']}/{speaker_emb_name}"
+    np.save(speaker_emb_path, speaker_emb_mat)
+    config["speaker_emb_filename"] = speaker_emb_name
+
+    with open(args.new_config, "w") as f:
+        yaml.dump(config, f)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--raw-manifest-root", "-m", required=True, type=str)
+    parser.add_argument("--splits", "-s", type=str, nargs="+",
+                        default=["train"])
+    parser.add_argument("--config", "-c", required=True, type=str)
+    parser.add_argument("--new-config", "-n", required=True, type=str)
+    parser.add_argument("--ckpt", required=True, type=str,
+                        help="speaker embedder checkpoint")
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/get_vctk_audio_manifest.py b/fairseq/examples/speech_synthesis/preprocessing/get_vctk_audio_manifest.py
new file mode 100644
index 0000000000000000000000000000000000000000..7afa40fcd195465a225c9f251734e84fe6b3c7ef
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/get_vctk_audio_manifest.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import numpy as np
+import re
+from pathlib import Path
+from collections import defaultdict
+
+import pandas as pd
+from torchaudio.datasets import VCTK
+from tqdm import tqdm
+
+from examples.speech_to_text.data_utils import save_df_to_tsv
+
+
+log = logging.getLogger(__name__)
+
+SPLITS = ["train", "dev", "test"]
+
+
+def normalize_text(text):
+    return re.sub(r"[^a-zA-Z.?!,'\- ]", '', text)
+
+
+def process(args):
+    out_root = Path(args.output_data_root).absolute()
+    out_root.mkdir(parents=True, exist_ok=True)
+
+    # Generate TSV manifest
+    print("Generating manifest...")
+    dataset = VCTK(out_root.as_posix(), download=False)
+    ids = list(dataset._walker)
+    np.random.seed(args.seed)
+    np.random.shuffle(ids)
+    n_train = len(ids) - args.n_dev - args.n_test
+    _split = ["train"] * n_train + ["dev"] * args.n_dev + ["test"] * args.n_test
+    id_to_split = dict(zip(ids, _split))
+    manifest_by_split = {split: defaultdict(list) for split in SPLITS}
+    progress = tqdm(enumerate(dataset), total=len(dataset))
+    for i, (waveform, _, text, speaker_id, _) in progress:
+        sample_id = dataset._walker[i]
+        _split = id_to_split[sample_id]
+        audio_dir = Path(dataset._path) / dataset._folder_audio / speaker_id
+        audio_path = audio_dir / f"{sample_id}.wav"
+        text = normalize_text(text)
+        manifest_by_split[_split]["id"].append(sample_id)
+        manifest_by_split[_split]["audio"].append(audio_path.as_posix())
+        manifest_by_split[_split]["n_frames"].append(len(waveform[0]))
+        manifest_by_split[_split]["tgt_text"].append(text)
+        manifest_by_split[_split]["speaker"].append(speaker_id)
+        manifest_by_split[_split]["src_text"].append(text)
+
+    manifest_root = Path(args.output_manifest_root).absolute()
+    manifest_root.mkdir(parents=True, exist_ok=True)
+    for _split in SPLITS:
+        save_df_to_tsv(
+            pd.DataFrame.from_dict(manifest_by_split[_split]),
+            manifest_root / f"{_split}.audio.tsv"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-data-root", "-d", required=True, type=str)
+    parser.add_argument("--output-manifest-root", "-m", required=True, type=str)
+    parser.add_argument("--n-dev", default=50, type=int)
+    parser.add_argument("--n-test", default=100, type=int)
+    parser.add_argument("--seed", "-s", default=1234, type=int)
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_synthesis/preprocessing/speaker_embedder/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/speaker_embedder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b178676ba322ef613df42977cb498101f841b09
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/speaker_embedder/__init__.py
@@ -0,0 +1,135 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import librosa
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.data
+import torchaudio
+
+
+EMBEDDER_PARAMS = {
+    'num_mels': 40,
+    'n_fft': 512,
+    'emb_dim': 256,
+    'lstm_hidden': 768,
+    'lstm_layers': 3,
+    'window': 80,
+    'stride': 40,
+}
+
+
+def set_requires_grad(nets, requires_grad=False):
+    """Set requies_grad=Fasle for all the networks to avoid unnecessary
+    computations
+    Parameters:
+        nets (network list)   -- a list of networks
+        requires_grad (bool)  -- whether the networks require gradients or not
+    """
+    if not isinstance(nets, list):
+        nets = [nets]
+    for net in nets:
+        if net is not None:
+            for param in net.parameters():
+                param.requires_grad = requires_grad
+
+
+class LinearNorm(nn.Module):
+    def __init__(self, hp):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = nn.Linear(hp["lstm_hidden"], hp["emb_dim"])
+
+    def forward(self, x):
+        return self.linear_layer(x)
+
+
+class SpeechEmbedder(nn.Module):
+    def __init__(self, hp):
+        super(SpeechEmbedder, self).__init__()
+        self.lstm = nn.LSTM(hp["num_mels"],
+                            hp["lstm_hidden"],
+                            num_layers=hp["lstm_layers"],
+                            batch_first=True)
+        self.proj = LinearNorm(hp)
+        self.hp = hp
+
+    def forward(self, mel):
+        # (num_mels, T) -> (num_mels, T', window)
+        mels = mel.unfold(1, self.hp["window"], self.hp["stride"])
+        mels = mels.permute(1, 2, 0)  # (T', window, num_mels)
+        x, _ = self.lstm(mels)  # (T', window, lstm_hidden)
+        x = x[:, -1, :]  # (T', lstm_hidden), use last frame only
+        x = self.proj(x)  # (T', emb_dim)
+        x = x / torch.norm(x, p=2, dim=1, keepdim=True)  # (T', emb_dim)
+
+        x = x.mean(dim=0)
+        if x.norm(p=2) != 0:
+            x = x / x.norm(p=2)
+        return x
+
+
+class SpkrEmbedder(nn.Module):
+    RATE = 16000
+
+    def __init__(
+        self,
+        embedder_path,
+        embedder_params=EMBEDDER_PARAMS,
+        rate=16000,
+        hop_length=160,
+        win_length=400,
+        pad=False,
+    ):
+        super(SpkrEmbedder, self).__init__()
+        embedder_pt = torch.load(embedder_path, map_location="cpu")
+        self.embedder = SpeechEmbedder(embedder_params)
+        self.embedder.load_state_dict(embedder_pt)
+        self.embedder.eval()
+        set_requires_grad(self.embedder, requires_grad=False)
+        self.embedder_params = embedder_params
+
+        self.register_buffer('mel_basis', torch.from_numpy(
+            librosa.filters.mel(
+                sr=self.RATE,
+                n_fft=self.embedder_params["n_fft"],
+                n_mels=self.embedder_params["num_mels"])
+        )
+                             )
+
+        self.resample = None
+        if rate != self.RATE:
+            self.resample = torchaudio.transforms.Resample(rate, self.RATE)
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.pad = pad
+
+    def get_mel(self, y):
+        if self.pad and y.shape[-1] < 14000:
+            y = F.pad(y, (0, 14000 - y.shape[-1]))
+
+        window = torch.hann_window(self.win_length).to(y)
+        y = torch.stft(y, n_fft=self.embedder_params["n_fft"],
+                       hop_length=self.hop_length,
+                       win_length=self.win_length,
+                       window=window)
+        magnitudes = torch.norm(y, dim=-1, p=2) ** 2
+        mel = torch.log10(self.mel_basis @ magnitudes + 1e-6)
+        return mel
+
+    def forward(self, inputs):
+        dvecs = []
+        for wav in inputs:
+            mel = self.get_mel(wav)
+            if mel.dim() == 3:
+                mel = mel.squeeze(0)
+            dvecs += [self.embedder(mel)]
+        dvecs = torch.stack(dvecs)
+
+        dvec = torch.mean(dvecs, dim=0)
+        dvec = dvec / torch.norm(dvec)
+
+        return dvec
diff --git a/fairseq/examples/speech_synthesis/preprocessing/vad/__init__.py b/fairseq/examples/speech_synthesis/preprocessing/vad/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cf121081fbde2f5085ed380f0841649d143a4be
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/preprocessing/vad/__init__.py
@@ -0,0 +1,192 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import collections
+import contextlib
+import wave
+
+try:
+    import webrtcvad
+except ImportError:
+    raise ImportError("Please install py-webrtcvad: pip install webrtcvad")
+import argparse
+import os
+import logging
+from tqdm import tqdm
+
+AUDIO_SUFFIX = '.wav'
+FS_MS = 30
+SCALE = 6e-5
+THRESHOLD = 0.3
+
+
+def read_wave(path):
+    """Reads a .wav file.
+    Takes the path, and returns (PCM audio data, sample rate).
+    """
+    with contextlib.closing(wave.open(path, 'rb')) as wf:
+        num_channels = wf.getnchannels()
+        assert num_channels == 1
+        sample_width = wf.getsampwidth()
+        assert sample_width == 2
+        sample_rate = wf.getframerate()
+        assert sample_rate in (8000, 16000, 32000, 48000)
+        pcm_data = wf.readframes(wf.getnframes())
+        return pcm_data, sample_rate
+
+
+def write_wave(path, audio, sample_rate):
+    """Writes a .wav file.
+    Takes path, PCM audio data, and sample rate.
+    """
+    with contextlib.closing(wave.open(path, 'wb')) as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes(audio)
+
+
+class Frame(object):
+    """Represents a "frame" of audio data."""
+    def __init__(self, bytes, timestamp, duration):
+        self.bytes = bytes
+        self.timestamp = timestamp
+        self.duration = duration
+
+
+def frame_generator(frame_duration_ms, audio, sample_rate):
+    """Generates audio frames from PCM audio data.
+    Takes the desired frame duration in milliseconds, the PCM data, and
+    the sample rate.
+    Yields Frames of the requested duration.
+    """
+    n = int(sample_rate * (frame_duration_ms / 1000.0) * 2)
+    offset = 0
+    timestamp = 0.0
+    duration = (float(n) / sample_rate) / 2.0
+    while offset + n < len(audio):
+        yield Frame(audio[offset:offset + n], timestamp, duration)
+        timestamp += duration
+        offset += n
+
+
+def vad_collector(sample_rate, frame_duration_ms,
+                  padding_duration_ms, vad, frames):
+    """Filters out non-voiced audio frames.
+    Given a webrtcvad.Vad and a source of audio frames, yields only
+    the voiced audio.
+    Uses a padded, sliding window algorithm over the audio frames.
+    When more than 90% of the frames in the window are voiced (as
+    reported by the VAD), the collector triggers and begins yielding
+    audio frames. Then the collector waits until 90% of the frames in
+    the window are unvoiced to detrigger.
+    The window is padded at the front and back to provide a small
+    amount of silence or the beginnings/endings of speech around the
+    voiced frames.
+    Arguments:
+    sample_rate - The audio sample rate, in Hz.
+    frame_duration_ms - The frame duration in milliseconds.
+    padding_duration_ms - The amount to pad the window, in milliseconds.
+    vad - An instance of webrtcvad.Vad.
+    frames - a source of audio frames (sequence or generator).
+    Returns: A generator that yields PCM audio data.
+    """
+    num_padding_frames = int(padding_duration_ms / frame_duration_ms)
+    # We use a deque for our sliding window/ring buffer.
+    ring_buffer = collections.deque(maxlen=num_padding_frames)
+    # We have two states: TRIGGERED and NOTTRIGGERED. We start in the
+    # NOTTRIGGERED state.
+    triggered = False
+
+    voiced_frames = []
+    for frame in frames:
+        is_speech = vad.is_speech(frame.bytes, sample_rate)
+
+        #  sys.stdout.write('1' if is_speech else '0')
+        if not triggered:
+            ring_buffer.append((frame, is_speech))
+            num_voiced = len([f for f, speech in ring_buffer if speech])
+            # If we're NOTTRIGGERED and more than 90% of the frames in
+            # the ring buffer are voiced frames, then enter the
+            # TRIGGERED state.
+            if num_voiced > 0.9 * ring_buffer.maxlen:
+                triggered = True
+                # We want to yield all the audio we see from now until
+                # we are NOTTRIGGERED, but we have to start with the
+                # audio that's already in the ring buffer.
+                for f, _ in ring_buffer:
+                    voiced_frames.append(f)
+                ring_buffer.clear()
+        else:
+            # We're in the TRIGGERED state, so collect the audio data
+            # and add it to the ring buffer.
+            voiced_frames.append(frame)
+            ring_buffer.append((frame, is_speech))
+            num_unvoiced = len([f for f, speech in ring_buffer if not speech])
+            # If more than 90% of the frames in the ring buffer are
+            # unvoiced, then enter NOTTRIGGERED and yield whatever
+            # audio we've collected.
+            if num_unvoiced > 0.9 * ring_buffer.maxlen:
+                triggered = False
+                yield [b''.join([f.bytes for f in voiced_frames]),
+                       voiced_frames[0].timestamp, voiced_frames[-1].timestamp]
+                ring_buffer.clear()
+                voiced_frames = []
+    # If we have any leftover voiced audio when we run out of input,
+    # yield it.
+    if voiced_frames:
+        yield [b''.join([f.bytes for f in voiced_frames]),
+               voiced_frames[0].timestamp, voiced_frames[-1].timestamp]
+
+
+def main(args):
+    # create output folder
+    try:
+        cmd = f"mkdir -p {args.out_path}"
+        os.system(cmd)
+    except Exception:
+        logging.error("Can not create output folder")
+        exit(-1)
+
+    # build vad object
+    vad = webrtcvad.Vad(int(args.agg))
+    # iterating over wavs in dir
+    for file in tqdm(os.listdir(args.in_path)):
+        if file.endswith(AUDIO_SUFFIX):
+            audio_inpath = os.path.join(args.in_path, file)
+            audio_outpath = os.path.join(args.out_path, file)
+            audio, sample_rate = read_wave(audio_inpath)
+            frames = frame_generator(FS_MS, audio, sample_rate)
+            frames = list(frames)
+            segments = vad_collector(sample_rate, FS_MS, 300, vad, frames)
+            merge_segments = list()
+            timestamp_start = 0.0
+            timestamp_end = 0.0
+            # removing start, end, and long sequences of sils
+            for i, segment in enumerate(segments):
+                merge_segments.append(segment[0])
+                if i and timestamp_start:
+                    sil_duration = segment[1] - timestamp_end
+                    if sil_duration > THRESHOLD:
+                        merge_segments.append(int(THRESHOLD / SCALE)*(b'\x00'))
+                    else:
+                        merge_segments.append(int((sil_duration / SCALE))*(b'\x00'))
+                timestamp_start = segment[1]
+                timestamp_end = segment[2]
+            segment = b''.join(merge_segments)
+            write_wave(audio_outpath, segment, sample_rate)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Apply vad to a file of fils.')
+    parser.add_argument('in_path', type=str, help='Path to the input files')
+    parser.add_argument('out_path', type=str,
+                        help='Path to save the processed files')
+    parser.add_argument('--agg', type=int, default=3,
+                        help='The level of aggressiveness of the VAD: [0-3]')
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/speech_synthesis/utils.py b/fairseq/examples/speech_synthesis/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c7b03733d2290d3834d2c68a16034198daa1e69
--- /dev/null
+++ b/fairseq/examples/speech_synthesis/utils.py
@@ -0,0 +1,101 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from scipy.interpolate import interp1d
+import torchaudio
+
+from fairseq.tasks.text_to_speech import (
+    batch_compute_distortion, compute_rms_dist
+)
+
+
+def batch_mel_spectral_distortion(
+        y1, y2, sr, normalize_type="path", mel_fn=None
+):
+    """
+    https://arxiv.org/pdf/2011.03568.pdf
+
+    Same as Mel Cepstral Distortion, but computed on log-mel spectrograms.
+    """
+    if mel_fn is None or mel_fn.sample_rate != sr:
+        mel_fn = torchaudio.transforms.MelSpectrogram(
+            sr, n_fft=int(0.05 * sr), win_length=int(0.05 * sr),
+            hop_length=int(0.0125 * sr), f_min=20, n_mels=80,
+            window_fn=torch.hann_window
+        ).to(y1[0].device)
+    offset = 1e-6
+    return batch_compute_distortion(
+        y1, y2, sr, lambda y: torch.log(mel_fn(y) + offset).transpose(-1, -2),
+        compute_rms_dist, normalize_type
+    )
+
+
+# This code is based on
+# "https://github.com/bastibe/MAPS-Scripts/blob/master/helper.py"
+def _same_t_in_true_and_est(func):
+    def new_func(true_t, true_f, est_t, est_f):
+        assert type(true_t) is np.ndarray
+        assert type(true_f) is np.ndarray
+        assert type(est_t) is np.ndarray
+        assert type(est_f) is np.ndarray
+
+        interpolated_f = interp1d(
+            est_t, est_f, bounds_error=False, kind='nearest', fill_value=0
+        )(true_t)
+        return func(true_t, true_f, true_t, interpolated_f)
+
+    return new_func
+
+
+@_same_t_in_true_and_est
+def gross_pitch_error(true_t, true_f, est_t, est_f):
+    """The relative frequency in percent of pitch estimates that are
+    outside a threshold around the true pitch. Only frames that are
+    considered pitched by both the ground truth and the estimator (if
+    applicable) are considered.
+    """
+
+    correct_frames = _true_voiced_frames(true_t, true_f, est_t, est_f)
+    gross_pitch_error_frames = _gross_pitch_error_frames(
+        true_t, true_f, est_t, est_f
+    )
+    return np.sum(gross_pitch_error_frames) / np.sum(correct_frames)
+
+
+def _gross_pitch_error_frames(true_t, true_f, est_t, est_f, eps=1e-8):
+    voiced_frames = _true_voiced_frames(true_t, true_f, est_t, est_f)
+    true_f_p_eps = [x + eps for x in true_f]
+    pitch_error_frames = np.abs(est_f / true_f_p_eps - 1) > 0.2
+    return voiced_frames & pitch_error_frames
+
+
+def _true_voiced_frames(true_t, true_f, est_t, est_f):
+    return (est_f != 0) & (true_f != 0)
+
+
+def _voicing_decision_error_frames(true_t, true_f, est_t, est_f):
+    return (est_f != 0) != (true_f != 0)
+
+
+@_same_t_in_true_and_est
+def f0_frame_error(true_t, true_f, est_t, est_f):
+    gross_pitch_error_frames = _gross_pitch_error_frames(
+        true_t, true_f, est_t, est_f
+    )
+    voicing_decision_error_frames = _voicing_decision_error_frames(
+        true_t, true_f, est_t, est_f
+    )
+    return (np.sum(gross_pitch_error_frames) +
+            np.sum(voicing_decision_error_frames)) / (len(true_t))
+
+
+@_same_t_in_true_and_est
+def voicing_decision_error(true_t, true_f, est_t, est_f):
+    voicing_decision_error_frames = _voicing_decision_error_frames(
+        true_t, true_f, est_t, est_f
+    )
+    return np.sum(voicing_decision_error_frames) / (len(true_t))
diff --git a/fairseq/examples/speech_text_joint_to_text/README.md b/fairseq/examples/speech_text_joint_to_text/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e071d241e0e02b35d3aac777ac09b4ef3be9119f
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/README.md
@@ -0,0 +1,46 @@
+# Joint Speech Text training in Fairseq
+An extension of Fairseq s2t project with the speech to text task enhanced by the co-trained text to text mapping task. More details about Fairseq s2t can be found [here](../speech_to_text/README.md)
+
+## Examples
+Examples of speech text joint training in fairseq
+- [English-to-German MuST-C model](docs/ende-mustc.md)
+- [IWSLT 2021 Multilingual Speech Translation](docs/iwslt2021.md)
+
+## Citation
+Please cite as:
+```
+@inproceedings{Tang2021AGM,
+  title={A General Multi-Task Learning Framework to Leverage Text Data for Speech to Text Tasks},
+  author={Yun Tang and J. Pino and Changhan Wang and Xutai Ma and Dmitriy Genzel},
+  booktitle={ICASSP},
+  year={2021}
+}
+
+@inproceedings{Tang2021IST,
+  title = {Improving Speech Translation by Understanding and Learning from the Auxiliary Text Translation Task},
+  author = {Yun Tang and Juan Pino and Xian Li and Changhan Wang and Dmitriy Genzel},
+  booktitle = {ACL},
+  year = {2021},
+}
+
+@inproceedings{Tang2021FST,
+  title = {FST: the FAIR Speech Translation System for the IWSLT21 Multilingual Shared Task},
+  author = {Yun Tang and Hongyu Gong and Xian Li and Changhan Wang  and Juan Pino and  Holger Schwenk and  Naman Goyal},
+  booktitle = {IWSLT},
+  year = {2021},
+}
+
+@inproceedings{wang2020fairseqs2t,
+  title = {fairseq S2T: Fast Speech-to-Text Modeling with fairseq},
+  author = {Changhan Wang and Yun Tang and Xutai Ma and Anne Wu and Dmytro Okhonko and Juan Pino},
+  booktitle = {Proceedings of the 2020 Conference of the Asian Chapter of the Association for Computational Linguistics (AACL): System Demonstrations},
+  year = {2020},
+}
+
+@inproceedings{ott2019fairseq,
+  title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
+  author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},
+  booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},
+  year = {2019},
+}
+```
diff --git a/fairseq/examples/speech_text_joint_to_text/__init__.py b/fairseq/examples/speech_text_joint_to_text/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..239d2e69f9a235095dee1ea7b3a94164a77273f5
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import tasks, criterions, models  # noqa
diff --git a/fairseq/examples/speech_text_joint_to_text/configs/mustc_noise.list b/fairseq/examples/speech_text_joint_to_text/configs/mustc_noise.list
new file mode 100644
index 0000000000000000000000000000000000000000..02eeac4e009f77b765004272f59a1618214da18d
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/configs/mustc_noise.list
@@ -0,0 +1,49 @@
+"(Applause) NOISE
+"(Laughter) VOICE
+"(Laughter)" VOICE
+(Applause) NOISE
+(Applause). NOISE
+(Audience) VOICE
+(Audio) NOISE
+(Beat) NOISE
+(Beatboxing) VOICE
+(Beep) NOISE
+(Beeps) NOISE
+(Cheering) VOICE
+(Cheers) VOICE
+(Claps) NOISE
+(Clicking) NOISE
+(Clunk) NOISE
+(Coughs) NOISE
+(Drums) NOISE
+(Explosion) NOISE
+(Gasps) VOICE
+(Guitar) NOISE
+(Honk) NOISE
+(Laugher) VOICE
+(Laughing) VOICE
+(Laughs) VOICE
+(Laughter) VOICE
+(Laughter). VOICE
+(Laughter)... VOICE
+(Mumbling) VOICE
+(Music) NOISE
+(Noise) NOISE
+(Recording) VOICE
+(Ringing) NOISE
+(Shouts) VOICE
+(Sigh) VOICE
+(Sighs) VOICE
+(Silence) NOISE 
+(Singing) VOICE
+(Sings) VOICE
+(Spanish) VOICE
+(Static) NOISE 
+(Tones) NOISE
+(Trumpet) NOISE
+(Video) NOISE
+(Video): NOISE
+(Voice-over) NOISE
+(Whistle) NOISE
+(Whistling) NOISE
+(video): NOISE
diff --git a/fairseq/examples/speech_text_joint_to_text/criterions/__init__.py b/fairseq/examples/speech_text_joint_to_text/criterions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7faae73119321af0b34fe8e26499a2ef5577291a
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/criterions/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+
+for file in os.listdir(os.path.dirname(__file__)):
+    if file.endswith(".py") and not file.startswith("_"):
+        criterion_name = file[: file.find(".py")]
+        importlib.import_module(
+            "examples.speech_text_joint_to_text.criterions." + criterion_name
+        )
diff --git a/fairseq/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py b/fairseq/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d356e5a10241716b58a5bc04a9d204a72553ff8
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py
@@ -0,0 +1,223 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.criterions.label_smoothed_cross_entropy import label_smoothed_nll_loss
+from fairseq import metrics, utils
+
+
+@register_criterion("guided_label_smoothed_cross_entropy_with_accuracy")
+class GuidedCrossEntAccCriterion(FairseqCriterion):
+    def __init__(
+        self,
+        task,
+        sentence_avg,
+        guide_alpha,
+        text_input_cost_ratio,
+        label_smoothing,
+        disable_text_guide_update_num=0,
+        attentive_cost_regularization=0,
+    ):
+        """
+        guide_alpha:            alpha to inteplate nll and kd loss
+        text_input_cost_ratio:  loss ratio for text only input data
+        label_smoothing:        label smoothing ratio
+        disable_text_guide_update_num:  only use nll loss for the first N updates
+        attentive_cost_regularization:  ratio fo attentive cost
+        """
+        super().__init__(task)
+        self.alpha = guide_alpha
+        self.attn_beta = attentive_cost_regularization
+        self.sentence_avg = sentence_avg
+        self.eps = label_smoothing
+        self.text_input_cost_ratio = text_input_cost_ratio
+        self.disable_update_num = disable_text_guide_update_num
+        assert self.alpha >= 0 and self.alpha <= 1.0
+
+    @staticmethod
+    def add_args(parser):
+        """Add criterion-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--label-smoothing', default=0., type=float, metavar='D',
+                            help='epsilon for label smoothing, 0 means no label smoothing')
+        # fmt: off
+        parser.add_argument('--guide-alpha', default=0., type=float, metavar='D',
+                            help='alpha to merge kd cost from text to speech input with ce loss')
+        # fmt: off
+        parser.add_argument('--disable-text-guide-update-num', default=0, type=int, metavar='D',
+                            help='disable guided target from text for the first N updates.')
+        parser.add_argument("--attentive-cost-regularization", default=0.0, type=float, metavar='D',
+                            help="use encoder attentive loss regularization with cost ratio D")
+        parser.add_argument("--attentive-cost-without-normalize", action='store_true',
+                            help="Don't do normalization during attentive cost computation")
+
+    def forward(self, model, sample, reduce=True):
+        reduction = 'sum' if reduce else 'none'
+        net_input = sample["net_input"]
+        net_output = model(**net_input)
+        attn_cost = None
+        lprobs = model.get_normalized_probs(net_output, log_probs=True)
+        is_dual_input = True if net_input['src_tokens'] is not None and net_input.get('src_txt_tokens') is not None else False
+        target = model.get_targets(sample, net_output)
+        src_token_num = 0
+        if is_dual_input:
+            # lprobs_spch from speech encoder and lprobs_text from text encoder
+            lprobs_spch, lprobs_text = torch.chunk(lprobs, 2)
+            lprobs_spch.batch_first = lprobs.batch_first
+            lprobs_text.batch_first = lprobs.batch_first
+
+            speech_loss, speech_nll_loss, speech_correct, speech_total = \
+                self.guide_loss_and_acc(model, lprobs_spch, lprobs_text, target, reduce=(reduction == 'sum'))
+            text_loss, text_nll_loss, text_correct, text_total = self.compute_loss_and_acc(model, lprobs_text, target, reduction=reduction)
+            loss = (speech_loss + text_loss)
+            nll_loss = (speech_nll_loss + text_nll_loss)
+            correct = speech_correct + text_correct
+            total = speech_total + text_total
+
+            attn_cost = net_output[1].get('attn_cost')
+            if attn_cost is not None:
+                # attn_cost is batch_first and padding tokens have been masked already
+                src_token_num = attn_cost.ne(0).sum()
+                attn_cost = attn_cost.sum()
+                loss = loss + attn_cost * self.attn_beta
+            else:
+                attn_cost = 0
+        else:
+            loss, nll_loss, correct, total = self.compute_loss_and_acc(model, lprobs, target, reduction=reduction)
+            if sample["net_input"]['src_tokens'] is None:   # text input only
+                loss = loss * self.text_input_cost_ratio
+            speech_loss = None
+            speech_nll_loss = None
+
+        sample_size, logging_output = self.get_logging_output(
+            sample, loss, nll_loss, correct, total, src_token_num, speech_loss, speech_nll_loss, attn_cost, is_dual_input
+        )
+        return loss, sample_size, logging_output
+
+    def compute_loss_and_acc(self, model, lprobs, target, reduction='sum'):
+        if not lprobs.batch_first:
+            lprobs = lprobs.transpose(0, 1)
+        lprobs = lprobs.view(-1, lprobs.size(-1))  # -> (B x T) x C
+        target = target.view(-1)
+        loss, nll_loss = label_smoothed_nll_loss(
+            lprobs, target, self.eps, ignore_index=self.padding_idx, reduce=(reduction == 'sum'),
+        )
+
+        mask = target.ne(self.padding_idx)
+        correct = torch.sum(lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)))
+        total = torch.sum(mask)
+        return loss, nll_loss, correct, total
+
+    def guide_loss_and_acc(self, model, lprobs, lprobs_teacher, target, reduce=True):
+        """ lprobs_teacher is used as guide for lprobs """
+        if self.alpha == 0.0 or model.num_updates < self.disable_update_num:
+            return self.compute_loss_and_acc(model, lprobs, target, reduction=('sum' if reduce else 'none'))
+        if not lprobs.batch_first:
+            lprobs = lprobs.transpose(0, 1)
+            lprobs_teacher = lprobs_teacher.transpose(0, 1)
+
+        lprobs = lprobs.view(-1, lprobs.size(-1)).float()  # -> (B x T) x C
+        lprobs_teacher = lprobs_teacher.view(-1, lprobs_teacher.size(-1)).float()  # -> (B x T) x C
+        target = target.view(-1)
+        loss = F.nll_loss(lprobs, target, ignore_index=self.padding_idx, reduction='sum' if reduce else 'none')
+        nll_loss = loss
+        probs_teacher = lprobs_teacher.exp().masked_fill_(target.unsqueeze(-1).eq(self.padding_idx), 0)
+        probs_teacher = probs_teacher.detach()
+        guide_loss = -(probs_teacher*lprobs).sum() if reduce else -(probs_teacher*lprobs).sum(-1, keepdim=True)
+        loss = self.alpha*guide_loss + (1.0 - self.alpha)*loss
+
+        mask = target.ne(self.padding_idx)
+        correct = torch.sum(lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask)))
+        total = torch.sum(mask)
+        return loss, nll_loss, correct, total
+
+    def get_logging_output(
+        self,
+        sample,
+        loss,
+        nll_loss,
+        correct,
+        total,
+        src_token_num=0,
+        speech_loss=None,
+        speech_nll_loss=None,
+        attn_cost=None,
+        is_dual_input=False,
+    ):
+
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+        mul_size = 2 if is_dual_input else 1
+
+        logging_output = {
+            "loss": utils.item(loss.data),  # * sample['ntokens'],
+            "nll_loss": utils.item(nll_loss.data),  # * sample['ntokens'],
+            "ntokens": sample["ntokens"]*mul_size,
+            "nsentences": sample["target"].size(0)*mul_size,
+            "sample_size": sample_size*mul_size,
+            "correct": utils.item(correct.data),
+            "total": utils.item(total.data),
+            "src_token_num": utils.item(src_token_num.data) if src_token_num > 0 else 0,
+            "nframes": torch.sum(sample["net_input"]["src_lengths"]).item(),
+        }
+
+        if speech_loss is not None:
+            logging_output["speech_loss"] = utils.item(speech_loss.data)
+            logging_output["speech_nll_loss"] = utils.item(speech_nll_loss.data)
+            logging_output["sample_size_speech_cost"] = sample_size
+            logging_output["speech_attn_loss"] = attn_cost
+
+        return sample_size*mul_size, logging_output
+
+    @staticmethod
+    def aggregate_logging_outputs(logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        correct_sum = sum(log.get("correct", 0) for log in logging_outputs)
+        total_sum = sum(log.get("total", 0) for log in logging_outputs)
+        src_token_sum = sum(log.get("src_token_num", 0) for log in logging_outputs)
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        nframes = sum(log.get("nframes", 0) for log in logging_outputs)
+        speech_loss_sum = sum(log.get("speech_loss", 0) for log in logging_outputs)
+        speech_nll_loss_sum = sum(log.get("speech_nll_loss", 0) for log in logging_outputs)
+        speech_attn_loss_sum = sum(log.get("speech_attn_loss", 0) for log in logging_outputs)
+        sample_size_speech = sum(log.get("sample_size_speech_cost", 0) for log in logging_outputs)
+
+        agg_output = {
+            "loss": loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0,
+            "nll_loss": nll_loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0,
+            # if args.sentence_avg, then sample_size is nsentences, and loss
+            # is per-sentence loss; else sample_size is ntokens, and the loss
+            # becomes per-output token loss
+            "speech_loss": speech_loss_sum / sample_size_speech / math.log(2) if sample_size_speech > 0 else 0.0,
+            "speech_nll_loss": speech_nll_loss_sum / sample_size_speech / math.log(2) if sample_size_speech > 0 else 0.0,
+            "speech_attn_loss": speech_attn_loss_sum / src_token_sum / math.log(2) if src_token_sum > 0 else 0.0,
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "nframes": nframes,
+            "sample_size": sample_size,
+            "acc": correct_sum * 100.0 / total_sum if total_sum > 0 else 0.0,
+            "correct": correct_sum,
+            "total": total_sum,
+            "src_token_num": src_token_sum,
+            # total is the number of validate tokens
+        }
+        return agg_output
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        agg_logging_outputs = cls.aggregate_logging_outputs(logging_outputs)
+        for k, v in agg_logging_outputs.items():
+            if k in {'nsentences', 'ntokens', 'sample_size'}:
+                continue
+            metrics.log_scalar(k, v, round=3)
diff --git a/fairseq/examples/speech_text_joint_to_text/docs/ende-mustc.md b/fairseq/examples/speech_text_joint_to_text/docs/ende-mustc.md
new file mode 100644
index 0000000000000000000000000000000000000000..2897c4e27b053d4fd65b37fb7e586679dffed1ba
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/docs/ende-mustc.md
@@ -0,0 +1,112 @@
+[[Back]](..)
+
+# Joint Speech Text Training for the MuST-C English to German Speech Translation task
+
+Joint Training Baseline: it is based on paper ["A general multi-task learning framework to leverage text data for speech to text tasks"](https://arxiv.org/pdf/2010.11338.pdf)
+
+Enhanced Joint Training: the joint training is enhanced with pre-trained models, cross attentive regularization and online knowledge distillation based on paper ["Improving Speech Translation by Understanding and Learning from the Auxiliary Text Translation Task"](https://research.fb.com/publications/improving-speech-translation-by-understanding-and-learning-from-the-auxiliary-text-translation-task)
+
+## Prepare Data
+#### Download files
+-   Sentence piece model [spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/spm.model)
+-   Dictionary [dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/dict.txt)
+-   config [config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/config.yaml)
+#### Prepare MuST-C data set
+-   [Please follow the data preparation in the S2T example](https://github.com/pytorch/fairseq/blob/main/examples/speech_to_text/docs/mustc_example.md)
+-   Append src_text in the tsv file with phoneme representation.
+```bash
+    python examples/speech_text_joint_to_text/scripts/g2p_encode.py \
+        --lower-case --do-filter --use-word-start --no-punc \
+        --reserve-word examples/speech_text_joint_to_text/configs/mustc_noise.list \
+        --data-path ${must_c_en_de_src_text} \
+        --out-path ${must_c_en_de_src_text_pho}
+```
+-   Update tsv data with src_text generated above and save to $MANIFEST_ROOT
+-   Prepare phoneme dictionary and save to $MANIFEST_ROOT as [src_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/src_dict.txt)
+#### Prepare WMT text data
+-   [Download wmt data](https://github.com/pytorch/fairseq/blob/main/examples/translation/prepare-wmt14en2de.sh)
+-   Convert source text (English) into phoneme representation as above
+-   Generate binary parallel file for training (as translation example) and save data in $parallel_text_data
+
+## Training
+The model is trained with 8 v100 GPUs.
+
+#### Download pretrained models
+-    [pretrain_encoder](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_multilingual_asr_transformer_m.pt)
+-    [pretrain_nmt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/checkpoint_mt.pt)
+
+#### Training scripts
+- Jointly trained model from scratch
+```bash
+python train.py ${MANIFEST_ROOT} \
+    --save-dir ${save_dir} \
+    --num-workers 8 \
+    --task speech_text_joint_to_text \
+    --arch dualinputs2ttransformer_s \
+    --user-dir examples/speech_text_joint_to_text \
+    --max-epoch 100 --update-mix-data \
+    --optimizer adam --lr-scheduler inverse_sqrt \
+    --lr 0.001 --update-freq 4 --clip-norm 10.0 \
+    --criterion guided_label_smoothed_cross_entropy_with_accuracy \
+    --label-smoothing 0.1 --max-tokens 10000 --max-tokens-text 10000 \
+    --max-positions-text 400 --seed 2 --speech-encoder-layers 12 \
+    --text-encoder-layers 6 --encoder-shared-layers 6 --decoder-layers 6 \
+    --dropout 0.1 --warmup-updates 20000  \
+    --text-sample-ratio 0.25 --parallel-text-data ${parallel_text_data} \
+    --text-input-cost-ratio 0.5 --enc-grad-mult 2.0 --add-speech-eos \
+    --log-format json --langpairs en-de --noise-token '"'"'▁NOISE'"'"' \
+    --mask-text-ratio 0.0 --max-tokens-valid 20000 --ddp-backend no_c10d \
+    --log-interval 100 --data-buffer-size 50 --config-yaml config.yaml \
+    --keep-last-epochs 10
+```
+- Jointly trained model with good initialization, cross attentive loss and online knowledge distillation
+```bash
+python train.py ${MANIFEST_ROOT} \
+    --save-dir ${save_dir} \
+    --num-workers 8 \
+    --task speech_text_joint_to_text \
+    --arch dualinputs2ttransformer_m \
+    --user-dir examples/speech_text_joint_to_text \
+    --max-epoch 100 --update-mix-data \
+    --optimizer adam --lr-scheduler inverse_sqrt \
+    --lr 0.002 --update-freq 4 --clip-norm 10.0 \
+    --criterion guided_label_smoothed_cross_entropy_with_accuracy \
+    --guide-alpha 0.8 --disable-text-guide-update-num 5000 \
+    --label-smoothing 0.1 --max-tokens 10000 --max-tokens-text 10000 \
+    --max-positions-text 400 --seed 2 --speech-encoder-layers 12 \
+    --text-encoder-layers 6 --encoder-shared-layers 6 --decoder-layers 6 \
+    --dropout 0.1 --warmup-updates 20000 --attentive-cost-regularization 0.02 \
+    --text-sample-ratio 0.25 --parallel-text-data ${parallel_text_data} \
+    --text-input-cost-ratio 0.5 --enc-grad-mult 2.0 --add-speech-eos \
+    --log-format json --langpairs en-de --noise-token '"'"'▁NOISE'"'"' \
+    --mask-text-ratio 0.0 --max-tokens-valid 20000 --ddp-backend no_c10d \
+    --log-interval 100 --data-buffer-size 50 --config-yaml config.yaml \
+    --load-pretrain-speech-encoder ${pretrain_encoder} \
+    --load-pretrain-decoder ${pretrain_nmt} \
+    --load-pretrain-text-encoder-last ${pretrain_nmt} \
+    --keep-last-epochs 10
+```
+
+## Evaluation
+```bash
+python ./fairseq_cli/generate.py \
+        ${MANIFEST_ROOT} \
+        --task speech_text_joint_to_text \
+        --max-tokens 25000 \
+        --nbest 1 \
+        --results-path ${infer_results} \
+        --batch-size 512 \
+        --path ${model} \
+        --gen-subset tst-COMMON \
+        --config-yaml config_spm.yaml \
+        --scoring sacrebleu \
+        --beam 5 --lenpen 1.0 \
+        --user-dir examples/speech_text_joint_to_text \
+        --load-speech-only
+```
+
+## Results (Joint training with initialization + CAR + online KD)
+|Direction|En-De | En-Es | En-Fr |
+|---|---|---|---|
+|BLEU|27.4| 31.2 | 37.6 |
+|checkpoint | [link](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_de/checkpoint_ave_10.pt) |[link](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_es/checkpoint_ave_10.pt)|[link](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/must_c/en_fr/checkpoint_ave_10.pt)|
diff --git a/fairseq/examples/speech_text_joint_to_text/docs/iwslt2021.md b/fairseq/examples/speech_text_joint_to_text/docs/iwslt2021.md
new file mode 100644
index 0000000000000000000000000000000000000000..920ff271c2e178c7a4ca3c7c8ce57a2f28653969
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/docs/iwslt2021.md
@@ -0,0 +1,76 @@
+[[Back]](..)
+
+# Joint Speech Text Training for the 2021 IWSLT multilingual speech translation
+
+This directory contains the code from paper ["FST: the FAIR Speech Translation System for the IWSLT21 Multilingual Shared Task"](https://arxiv.org/pdf/2107.06959.pdf).
+
+## Prepare Data
+#### Download files
+-   Sentence piece model [spm.model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/spm.model)
+-   Dictionary [tgt_dict.txt](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/dict.txt)
+-   Config [config.yaml](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/config.yaml)
+
+#### Prepare
+-   [Please follow the data preparation in speech-to-text](https://github.com/pytorch/fairseq/blob/main/examples/speech_to_text/docs/mtedx_example.md)
+
+
+
+## Training
+
+#### Download pretrained models
+- [Pretrained mbart model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/mbart.pt)
+- [Pretrained w2v model](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/xlsr_53_56k.pt)
+
+
+#### Training scripts
+
+```bash
+python train.py ${MANIFEST_ROOT} \
+    --save-dir ${save_dir} \
+    --user-dir examples/speech_text_joint_to_text \
+    --train-subset train_es_en_tedx,train_es_es_tedx,train_fr_en_tedx,train_fr_es_tedx,train_fr_fr_tedx,train_it_it_tedx,train_pt_en_tedx,train_pt_pt_tedx \
+    --valid-subset valid_es_en_tedx,valid_es_es_tedx,valid_es_fr_tedx,valid_es_it_tedx,valid_es_pt_tedx,valid_fr_en_tedx,valid_fr_es_tedx,valid_fr_fr_tedx,valid_fr_pt_tedx,valid_it_en_tedx,valid_it_es_tedx,valid_it_it_tedx,valid_pt_en_tedx,valid_pt_es_tedx,valid_pt_pt_tedx \
+    --config-yaml config.yaml --ddp-backend no_c10d \
+    --num-workers 2 --task speech_text_joint_to_text \
+    --criterion guided_label_smoothed_cross_entropy_with_accuracy \
+    --label-smoothing 0.3 --guide-alpha 0.8 \
+    --disable-text-guide-update-num 5000 --arch dualinputxmtransformer_base \
+    --max-tokens 500000 --max-sentences 3 --max-tokens-valid 800000 \
+    --max-source-positions 800000 --enc-grad-mult 2.0 \
+    --attentive-cost-regularization 0.02 --optimizer adam \
+    --clip-norm 1.0 --log-format simple --log-interval 200 \
+    --keep-last-epochs 5 --seed 1 \
+    --w2v-path ${w2v_path} \
+    --load-pretrained-mbart-from ${mbart_path} \
+    --max-update 1000000 --update-freq 4 \
+    --skip-invalid-size-inputs-valid-test \
+    --skip-encoder-projection --save-interval 1 \
+    --attention-dropout 0.3 --mbart-dropout 0.3 \
+    --finetune-w2v-params all --finetune-mbart-decoder-params all \
+    --finetune-mbart-encoder-params all --stack-w2v-mbart-encoder \
+    --drop-w2v-layers 12 --normalize \
+    --lr 5e-05 --lr-scheduler inverse_sqrt --warmup-updates 5000
+```
+
+## Evaluation
+```bash
+python ./fairseq_cli/generate.py
+   ${MANIFEST_ROOT} \
+   --task speech_text_joint_to_text \
+   --user-dir ./examples/speech_text_joint_to_text \
+   --load-speech-only  --gen-subset  test_es_en_tedx \
+   --path  ${model}  \
+   --max-source-positions 800000 \
+   --skip-invalid-size-inputs-valid-test \
+   --config-yaml config.yaml \
+   --infer-target-lang en  \
+   --max-tokens 800000 \
+   --beam 5 \
+   --results-path ${RESULTS_DIR}  \
+   --scoring sacrebleu
+```
+The trained model can be downloaded [here](https://dl.fbaipublicfiles.com/joint_speech_text_4_s2t/iwslt/iwslt_data/checkpoint17.pt)
+
+|direction|es_en|fr_en|pt_en|it_en|fr_es|pt_es|it_es|es_es|fr_fr|pt_pt|it_it|
+|---|---|---|---|---|---|---|---|---|---|---|---|
+|BLEU|31.62|36.93|35.07|27.12|38.87|35.57|34.13|74.59|74.64|70.84|69.76|
diff --git a/fairseq/examples/speech_text_joint_to_text/models/__init__.py b/fairseq/examples/speech_text_joint_to_text/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a394c7e4f25bfef8603596ca3629e65ca7b0d8b
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/models/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+for file in os.listdir(os.path.dirname(__file__)):
+    if file.endswith(".py") and not file.startswith("_"):
+        model_name = file[: file.find(".py")]
+        importlib.import_module(
+            "examples.speech_text_joint_to_text.models." + model_name
+        )
diff --git a/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputtransformer.py b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputtransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..219b5fc90a3a6309ab6cafbad2ffdda01213e4a5
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputtransformer.py
@@ -0,0 +1,1090 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from collections import namedtuple
+
+import torch
+import torch.nn as nn
+from fairseq import checkpoint_utils
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqDecoder,
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.fairseq_encoder import EncoderOut
+from fairseq.models.speech_to_text import (
+    TransformerDecoder,
+    S2TTransformerEncoder,
+)
+from fairseq.models.transformer import TransformerEncoder
+from fairseq.modules import (
+    TransformerEncoderLayer,
+    GradMultiply,
+    LayerNorm,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class SpeechEoSEncoder(FairseqEncoder):
+    def __init__(self, encoder, eos_num, feat_dim, adapter_type="None", adapter_dim=0):
+        super().__init__(None)
+        self.encoder = encoder
+        self.eos_num = eos_num  # downsampling rate for speech input feature
+        self.eos_emb = (
+            nn.Parameter(torch.zeros(1, feat_dim), requires_grad=True)
+            if eos_num > 0
+            else None
+        )
+        self.adapter = self.add_adapter(adapter_type, adapter_dim)
+
+    def add_adapter(self, adapter_type, adapter_dim):
+        def _make_identity(linear, eps=1e-5):
+            assert isinstance(linear, nn.Linear)
+            linear.weight.data.mul_(eps)
+            linear.weight.data.fill_diagonal_(1.0)
+            if linear.bias is not None:
+                linear.bias.data.mul_(eps)
+
+        adapter = None
+        if adapter_type == "Linear":
+            assert adapter_dim > 0
+            adapter = nn.Sequential(
+                nn.Linear(adapter_dim, adapter_dim), LayerNorm(adapter_dim)
+            )
+            # initialize the adapter as identity matrix first
+            _make_identity(adapter[0])
+
+        elif adapter_type == "MLP":
+            assert adapter_dim > 0
+            # assume the model is pre-norm model
+            adapter = nn.Sequential(
+                nn.Linear(adapter_dim, 2 * adapter_dim),
+                nn.ReLU(),
+                nn.Linear(2 * adapter_dim, adapter_dim),
+                LayerNorm(adapter_dim),
+            )
+            _make_identity(adapter[0])
+            _make_identity(adapter[2])
+        return adapter
+
+    def add_eos(self, src_tokens, src_lengths):
+        bsz, max_seq_len, fdim = src_tokens.size()
+        if self.eos_num > 0:
+            src_token_eos = torch.zeros(
+                [bsz, max_seq_len + self.eos_num, fdim],
+                dtype=src_tokens.dtype,
+                device=src_tokens.device,
+            )
+            src_token_eos[:, :max_seq_len] = src_tokens
+            for bi in range(bsz):
+                src_token_eos[bi][
+                    src_lengths[bi] : src_lengths[bi] + self.eos_num
+                ] = self.eos_emb.expand(self.eos_num, fdim)
+            src_lengths = src_lengths + self.eos_num
+            src_tokens = src_token_eos
+        return src_tokens, src_lengths
+
+    def apply_adapter(self, enc_out):
+        if self.adapter is None:
+            return enc_out
+        rst = self.adapter(enc_out.encoder_out)
+        if enc_out.encoder_padding_mask is not None:
+            rst.masked_fill_(
+                enc_out.encoder_padding_mask.transpose(0, 1).unsqueeze(-1), 0
+            )
+        return EncoderOut(
+            encoder_out=rst,
+            encoder_padding_mask=enc_out.encoder_padding_mask,
+            encoder_embedding=enc_out.encoder_embedding,
+            encoder_states=enc_out.encoder_states,
+            src_tokens=enc_out.src_tokens,
+            src_lengths=enc_out.src_lengths,
+        )
+
+    def forward(self, src_tokens, src_lengths=None, return_all_hiddens=False, **kwargs):
+        """
+        src_tokens: padded tensor (B, T, C * feat)
+        src_lengths: tensor of original lengths of input utterances (B,)
+        """
+        src_tokens, src_lengths = self.add_eos(src_tokens, src_lengths)
+        enc_out = self.encoder(src_tokens, src_lengths, return_all_hiddens)
+        enc_out = self.apply_adapter(enc_out)
+        return enc_out
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        return self.encoder.reorder_encoder_out(encoder_out, new_order)
+
+
+class DualInputEncoder(FairseqEncoder):
+    def __init__(
+        self,
+        args,
+        spch_encoder,
+        text_encoder,
+        dictionary,
+        cross_attentive_loss_before_last_layer=-1,
+    ):
+        super().__init__(dictionary)
+
+        self.spch_encoder = spch_encoder
+        self.text_encoder = text_encoder
+        self.enc_grad_mult = args.enc_grad_mult
+        self.cross_attentive_loss_before_last_layer = (
+            cross_attentive_loss_before_last_layer
+        )
+        self.use_cross_attentive_loss = (
+            False if cross_attentive_loss_before_last_layer <= -1 else True
+        )
+        self.enc2_along_grad_mult = args.enc2_along_grad_mult
+
+    @classmethod
+    def set_shared_layer(cls, share_level, src_layer, tgt_layer):
+        """
+        share parameters from tgt_layer to src_layer
+        share_level:
+            0: share everything
+            1: share everything but different model
+            2: share weight but not bias, layernorm
+        """
+        if share_level == 0:
+            return tgt_layer
+        if isinstance(src_layer, nn.Linear):
+            return tgt_layer
+        if isinstance(src_layer, TransformerEncoderLayer):
+            assert src_layer.embed_dim == tgt_layer.embed_dim
+            assert src_layer.normalize_before == tgt_layer.normalize_before
+            if share_level == 1:
+                src_layer.fc1 = tgt_layer.fc1
+                src_layer.fc2 = tgt_layer.fc2
+                src_layer.self_attn = tgt_layer.self_attn
+                src_layer.final_layer_norm = tgt_layer.final_layer_norm
+                src_layer.self_attn_layer_norm = tgt_layer.self_attn_layer_norm
+                src_layer.layernorm_embedding = tgt_layer.layernorm_embedding
+            else:
+                src_layer.fc1.weight = tgt_layer.fc1.weight
+                src_layer.fc2.weight = tgt_layer.fc2.weight
+                src_layer.self_attn.k_proj.weight = tgt_layer.self_attn.k_proj.weight
+                src_layer.self_attn.v_proj.weight = tgt_layer.self_attn.v_proj.weight
+                src_layer.self_attn.q_proj.weight = tgt_layer.self_attn.q_proj.weight
+                src_layer.self_attn.out_proj.weight = (
+                    tgt_layer.self_attn.out_proj.weight
+                )
+        else:
+            if share_level == 1:
+                return tgt_layer
+        return src_layer
+
+    @classmethod
+    def build_spch_encoder(cls, args):
+        cfg = {
+            "input_feat_per_channel": args.input_feat_per_channel,
+            "input_channels": args.input_channels,
+            "conv_kernel_sizes": args.conv_kernel_sizes,
+            "conv_channels": args.conv_channels,
+            "encoder_embed_dim": args.encoder_embed_dim,
+            "encoder_ffn_embed_dim": args.encoder_ffn_embed_dim,
+            "encoder_layers": args.speech_encoder_layers,
+            "encoder_layerdrop": args.encoder_layerdrop,
+            "encoder_attention_heads": args.encoder_attention_heads,
+            "max_source_positions": args.max_source_positions,
+            "dropout": args.dropout,
+            "encoder_normalize_before": args.encoder_normalize_before,
+            "activation_dropout": args.activation_dropout,
+            "attention_dropout": args.attention_dropout,
+            "activation_fn": args.activation_fn,
+            "layernorm_embedding": args.layernorm_embedding,
+            "no_token_positional_embeddings": args.no_token_positional_embeddings,
+            "no_scale_embedding": args.no_scale_embedding,
+            "quant_noise_pq": args.quant_noise_pq,
+            "encoder_freezing_updates": 0,
+        }
+        model_args = namedtuple("args", cfg.keys())(*cfg.values())
+        spch_encoder = S2TTransformerEncoder(model_args)
+        if args.add_speech_eos:
+            spch_encoder = SpeechEoSEncoder(
+                spch_encoder,
+                2 * len(args.conv_kernel_sizes.split(",")),
+                args.input_feat_per_channel,
+                adapter_type=getattr(args, "speech_encoder_adapter_type", "None"),
+                adapter_dim=args.encoder_embed_dim,
+            )
+        return spch_encoder
+
+    @classmethod
+    def build_text_encoder(cls, args, src_dictionary, spch_encoder):
+        if args.encoder_shared_layers > 0:
+            mx_shared_layers = (
+                args.speech_encoder_layers
+                if args.speech_encoder_layers < args.text_encoder_layers
+                else args.text_encoder_layers
+            )
+            args.encoder_shared_layers = (
+                args.encoder_shared_layers
+                if args.encoder_shared_layers <= mx_shared_layers
+                else mx_shared_layers
+            )
+        cfg = {
+            "encoder_embed_dim": args.encoder_text_embed_dim,
+            "encoder_ffn_embed_dim": args.encoder_ffn_embed_dim,
+            "encoder_layers": args.text_encoder_layers,
+            "encoder_layerdrop": args.encoder_layerdrop,
+            "encoder_attention_heads": args.encoder_attention_heads,
+            "encoder_learned_pos": args.encoder_learned_pos,
+            "max_source_positions": args.max_source_positions,
+            "dropout": args.dropout,
+            "encoder_normalize_before": args.encoder_normalize_before,
+            "activation_dropout": args.activation_dropout,
+            "attention_dropout": args.attention_dropout,
+            "activation_fn": args.activation_fn,
+            "adaptive_input": args.adaptive_input,
+            "no_token_positional_embeddings": args.no_token_positional_embeddings,
+            "no_scale_embedding": args.no_scale_embedding,
+            "quant_noise_pq": args.quant_noise_pq,
+        }
+        model_args = namedtuple("args", cfg.keys())(*cfg.values())
+        enc_emb = nn.Embedding(
+            len(src_dictionary), model_args.encoder_embed_dim, src_dictionary.pad()
+        )
+        text_encoder = TransformerEncoder(model_args, src_dictionary, enc_emb)
+        if args.add_speech_eos:
+            spch_encoder = spch_encoder.encoder
+        if args.encoder_shared_layers > 0:
+            text_encoder.layer_norm = cls.set_shared_layer(
+                args.encoder_shared_layer_level,
+                text_encoder.layer_norm,
+                spch_encoder.layer_norm,
+            )
+            for i, ly in enumerate(
+                spch_encoder.transformer_layers[-args.encoder_shared_layers :]
+            ):
+                ly_id = i + args.text_encoder_layers - args.encoder_shared_layers
+                assert isinstance(text_encoder.layers[ly_id], type(ly))
+                text_encoder.layers[ly_id] = cls.set_shared_layer(
+                    args.encoder_shared_layer_level,
+                    text_encoder.layers[ly_id],
+                    ly,
+                )
+        return text_encoder
+
+    def mult_rst_grad(self, rst, ratio):
+        assert isinstance(rst, dict)  # instead of EncoderOut
+        assert len(rst["encoder_out"]) == 1
+        rst["encoder_out"][0] = GradMultiply.apply(rst["encoder_out"][0], ratio)
+        return rst
+
+    def process_attentive_loss_states(self, rst, interstates):
+        assert isinstance(rst, dict)  # instead of EncoderOut
+        rst["encoder_states"] = interstates
+        return rst
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths=None,
+        src_txt_tokens=None,
+        src_txt_lengths=None,
+        **kwargs
+    ):
+        """
+        Args:
+            src_tokens: padded tensor (B, T, C * feat)
+            src_lengths: tensor of original lengths of input utterances (speech) (B,)
+            src_txt_tokens: padded tensor (B, T)
+            src_txt_lengths: tensor of original lengths of input utterances (text) (B,)
+        """
+        # src_tokens only: inference
+        # src_tokens, src_lengths: speech only training
+        # src_txt_tokens, src_txt_lengths: text only training
+        # all valid: speech + text training
+
+        if src_tokens is None and src_txt_tokens is None:
+            raise ValueError(
+                "src_tokens and src_txt_tokens cannot be None at the same time"
+            )
+        ret1 = None
+        ret2 = None
+        return_all_hiddens = False
+        if src_tokens is not None:
+            if (
+                self.use_cross_attentive_loss and src_txt_tokens is not None
+            ):  # remove self.training so we can get attn score during validation step
+                return_all_hiddens = True
+            ret1 = self.spch_encoder(
+                src_tokens, src_lengths, return_all_hiddens=return_all_hiddens
+            )
+
+            if self.use_cross_attentive_loss and src_txt_tokens is not None:
+                assert self.cross_attentive_loss_before_last_layer < len(
+                    ret1["encoder_states"]
+                )
+                ret1 = self.process_attentive_loss_states(
+                    ret1,
+                    ret1["encoder_states"][
+                        -self.cross_attentive_loss_before_last_layer - 1
+                    ],
+                )
+
+        if src_txt_tokens is not None:
+            ret2 = self.text_encoder(
+                src_txt_tokens, src_txt_lengths, return_all_hiddens=return_all_hiddens
+            )
+            if return_all_hiddens:
+                if self.cross_attentive_loss_before_last_layer == len(
+                    self.text_encoder.layers
+                ):
+                    text_embedding, _ = self.text_encoder.forward_embedding(
+                        src_txt_tokens
+                    )
+                    text_embedding = text_embedding.transpose(0, 1)
+                    ret2 = self.process_attentive_loss_states(ret2, text_embedding)
+                else:
+                    assert self.cross_attentive_loss_before_last_layer < len(
+                        self.text_encoder.layers
+                    )
+                    ret2 = self.process_attentive_loss_states(
+                        ret2,
+                        ret2["encoder_states"][
+                            -self.cross_attentive_loss_before_last_layer - 1
+                        ],
+                    )
+
+        def merge_output(rst1, rst2):
+            if rst1 is None:
+                if not (self.enc2_along_grad_mult == 1.0 or self.training):
+                    rst2 = self.mult_rst_grad(rst2, self.enc2_along_grad_mult)
+                return rst2
+            if rst2 is None:
+                return rst1
+            if self.enc_grad_mult != 1.0 and self.training:
+                rst1 = self.mult_rst_grad(rst1, self.enc_grad_mult)
+                rst2 = self.mult_rst_grad(rst2, self.enc_grad_mult)
+            rst = (rst1, rst2)
+            return rst
+
+        return merge_output(ret1, ret2)
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        assert self.training is False  # used for inference only
+        return self.spch_encoder.reorder_encoder_out(encoder_out, new_order)
+
+
+# TransformerMultiInputDecoder: take one or two encoder inputs
+class TransformerMultiInputDecoder(FairseqDecoder):
+    def __init__(
+        self,
+        dictionary,
+        spch_decoder,
+        text_decoder,
+        compute_cross_attentive_loss=False,
+        cross_attentive_loss_with_norm=True,
+        cross_attentive_loss_reverse=False,
+    ):
+
+        super().__init__(dictionary)
+        self.spch_decoder = spch_decoder
+        self.text_decoder = text_decoder
+        self.compute_cross_attentive_loss = compute_cross_attentive_loss
+        self.cross_attentive_loss_with_norm = cross_attentive_loss_with_norm
+        self.cross_attentive_loss_reverse = cross_attentive_loss_reverse
+
+    @classmethod
+    def share_spchdecoder(cls, task_args, text_decoder, spch_decoder):
+        if task_args.decoder_shared_layer_level == 0:
+            return text_decoder
+        assert text_decoder.embed_tokens == spch_decoder.embed_tokens
+        spch_decoder.project_in_dim = text_decoder.project_in_dim
+        spch_decoder.embed_positions = text_decoder.embed_positions
+        spch_decoder.layernorm_embedding = text_decoder.layernorm_embedding
+        spch_decoder.project_out_dim = text_decoder.project_out_dim
+        spch_decoder.adaptive_softmax = text_decoder.adaptive_softmax
+        if task_args.decoder_shared_layer_level == 1:
+            spch_decoder.reg_head = text_decoder.reg_head
+            spch_decoder.layer_norm = text_decoder.layer_norm
+        else:  # 2
+            spch_decoder.reg_head.weight = (
+                text_decoder.reg_head.weight
+            )
+        for i, ly in enumerate(text_decoder.layers):
+            sly = spch_decoder.layers[i]
+            sly.self_attn = ly.self_attn
+            sly.self_attn_layer_norm = ly.self_attn_layer_norm
+            # sly.encoder_attn = ly.encoder_attn
+            if (
+                task_args.decoder_shared_layer_level == 1
+            ):  # share everything, but under different models
+                sly.encoder_attn = ly.encoder_attn
+                sly.encoder_attn_layer_norm = ly.encoder_attn_layer_norm
+                sly.fc1 = ly.fc1
+                sly.fc2 = ly.fc2
+                sly.final_layer_norm = ly.final_layer_norm
+            else:  # task_args.decoder_shared_layer_level == 2: #separated encoder_attn_layer_norm and bias
+                sly.encoder_attn.k_proj.weight = ly.encoder_attn.k_proj.weight
+                sly.encoder_attn.v_proj.weight = ly.encoder_attn.v_proj.weight
+                sly.encoder_attn.q_proj.weight = ly.encoder_attn.q_proj.weight
+                sly.encoder_attn.out_proj.weight = ly.encoder_attn.out_proj.weight
+                sly.fc1.weight = ly.fc1.weight
+                sly.fc2.weight = ly.fc2.weight
+
+        return spch_decoder
+
+    def cross_attentive_loss(
+        self, teacher_states, student_states, teacher_masking, student_masking, eps=1e-6
+    ):
+        x = teacher_states.transpose(0, 1)  # from T X B X D to B X T X D
+        y = student_states.transpose(0, 1)
+        if self.cross_attentive_loss_with_norm:
+            x = x / (x.norm(dim=2, keepdim=True) + eps)
+            y = y / (y.norm(dim=2, keepdim=True) + eps)
+        dim = x.size(-1)
+        # lengths: batch X seqLen
+        sim_scores_xy = torch.bmm(x, y.transpose(1, 2))  # batch X lenx X leny ]
+        if y.dtype == torch.float16:
+            sim_scores_xy = sim_scores_xy.float()
+            y = y.float()
+            x = x.float()
+        if teacher_masking != []:
+            assert len(teacher_masking) == 1
+            sim_scores_xy = sim_scores_xy.masked_fill(
+                teacher_masking[0].unsqueeze(-1), float("-inf")
+            )
+        if student_masking != []:
+            sim_scores_xy = sim_scores_xy.masked_fill(
+                student_masking[0].unsqueeze(1), float("-inf")
+            )
+        # do masking
+        y_weights = utils.softmax(sim_scores_xy, dim=-1)
+        if teacher_masking != []:
+            y_weights = y_weights.masked_fill(teacher_masking[0].unsqueeze(-1), 0)
+        x_reconstruct_from_y = torch.bmm(y_weights, y)
+
+        sim_scores_xx = torch.bmm(x, x.transpose(1, 2))  # batch X lenx X lenx ]
+        x_weights = utils.softmax(sim_scores_xx, dim=-1)
+        if teacher_masking != []:
+            x_weights = x_weights.masked_fill(teacher_masking[0].unsqueeze(-1), 0)
+
+        # no gradient for teacher state
+        x_reconstruct_from_x = torch.bmm(x_weights, x).detach()
+        cost = (x_reconstruct_from_x - x_reconstruct_from_y).norm(dim=2)
+        if teacher_masking != []:
+            cost = cost.masked_fill(teacher_masking[0], 0)
+
+        if not self.cross_attentive_loss_with_norm:
+            cost = cost / dim
+        return cost
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out,
+        incremental_state=None,
+        has_txt_input=False,
+        **kwargs
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for input feeding/teacher forcing. If there are
+                two or more input during training, they will share the same prev_output_tokens
+            encoder_out (tuple[Tensor]): output from the encoder, used for
+                encoder-side attention. It will be tuple if there are more inputs, but a tensor
+                if only one input
+            incremental_state ([dict]): dictionary used for storing state during
+                :ref:`Incremental decoding`. It is only valid for inference, only from single
+                input
+        Returns:
+            tuple:
+                - the last decoder layer's output of shape `(batch, tgt_len,
+                  vocab)`. If there are N inputs, batch will be N bigger than a single input
+                - the last decoder layer's attention weights of shape `(batch,
+                  tgt_len, src_len)`
+        """
+        assert not isinstance(encoder_out, EncoderOut)
+        if isinstance(encoder_out, tuple):  # training with mulitple input
+            rst = []
+            assert len(encoder_out) == 2
+            for i, eo in enumerate(encoder_out):
+                assert incremental_state is None
+                if i == 0:
+                    rst.append(
+                        self.spch_decoder(prev_output_tokens, eo, incremental_state)
+                    )
+                else:
+                    rst.append(
+                        self.text_decoder(prev_output_tokens, eo, incremental_state)
+                    )
+            dec_out = torch.cat([r[0] for r in rst], dim=0)
+            attn_cost = None
+            if self.compute_cross_attentive_loss:
+                assert isinstance(encoder_out[0], dict)
+                if self.cross_attentive_loss_reverse:
+                    attn_cost = self.cross_attentive_loss(
+                        teacher_states=encoder_out[1]["encoder_states"],  # text_states
+                        student_states=encoder_out[0]["encoder_states"],  # spch_states
+                        teacher_masking=encoder_out[1]["encoder_padding_mask"],
+                        student_masking=encoder_out[0]["encoder_padding_mask"],
+                    )
+                else:
+                    attn_cost = self.cross_attentive_loss(
+                        teacher_states=encoder_out[0]["encoder_states"],  # spch_states
+                        student_states=encoder_out[1]["encoder_states"],  # text_states
+                        teacher_masking=encoder_out[0]["encoder_padding_mask"],
+                        student_masking=encoder_out[1]["encoder_padding_mask"],
+                    )
+
+            return (dec_out, {"attn_cost": attn_cost})
+        else:  # inference or training with one input
+            if has_txt_input:
+                return self.text_decoder(
+                    prev_output_tokens, encoder_out, incremental_state
+                )
+            return self.spch_decoder(prev_output_tokens, encoder_out, incremental_state)
+
+
+# Note:
+# dual input transformer:
+#    encoder: S2TTransformerEncoder for speech + TransformerEncoder for text
+#    decoder: TransformerDecoder for text
+@register_model("dual_input_s2t_transformer")
+class DualInputS2TTransformerModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+        self.num_updates = 0
+
+    def max_positions(self):
+        return None  # it is provided in task
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # encoder 1: S2TTransformerEncoder for speech
+        parser.add_argument(
+            "--conv-kernel-sizes",
+            type=str,
+            metavar="N",
+            help="kernel sizes of Conv1d subsampling layers",
+        )
+        parser.add_argument(
+            "--conv-channels",
+            type=int,
+            metavar="N",
+            help="# of channels in Conv1d subsampling layers",
+        )
+        parser.add_argument(
+            "--enc-output-dim",
+            type=int,
+            metavar="N",
+            help="""
+                encoder output dimension, can be None. If specified, projecting the
+                transformer output to the specified dimension""",
+        )
+        # standard Transformer
+        parser.add_argument(
+            "--activation-fn",
+            type=str,
+            default="relu",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use",
+        )
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            "--relu-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after activation in FFN.",
+        )
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-text-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder text embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num encoder attention heads",
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="num decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num decoder attention heads",
+        )
+        parser.add_argument(
+            "--layernorm-embedding",
+            action="store_true",
+            help="add layernorm to embedding",
+        )
+        parser.add_argument(
+            "--no-scale-embedding",
+            action="store_true",
+            help="if True, dont scale embeddings",
+        )
+        # non-standard transformer parameters
+        parser.add_argument(
+            "--speech-encoder-layers",
+            type=int,
+            metavar="N",
+            help="num speech encoder layers",
+        )
+        parser.add_argument(
+            "--text-encoder-layers",
+            type=int,
+            metavar="N",
+            help="num text encoder layers",
+        )
+        parser.add_argument(
+            "--encoder-shared-layers",
+            type=int,
+            metavar="N",
+            help="num shared encoder layers",
+        )
+        parser.add_argument(
+            "--encoder-shared-layer-level",
+            type=int,
+            metavar="N",
+            default=0,
+            choices=[0, 1, 2],
+            help="share layer level 0: all share 1: all share with separate model 2: share weight but not bias and layernorm",
+        )
+
+        parser.add_argument(
+            "--decoder-shared-layer-level",
+            default=0,
+            choices=[0, 1, 2],
+            type=int,
+            metavar="N",
+            help="0: share everything; 1: share everything with different model 2: no share layer_norm and bias",
+        )
+        ###
+        parser.add_argument(
+            "--text-input-cost-ratio",
+            type=float,
+            default=1.0,
+            metavar="V",
+            help="text input cost ratio relative to speech input cost",
+        )
+        parser.add_argument(
+            "--init-scale",
+            type=float,
+            default=1.0,
+            metavar="V",
+            help="scale the initial weight by given factor",
+        )
+        parser.add_argument(
+            "--enc-grad-mult",
+            type=float,
+            metavar="V",
+            default=1.0,
+            help="multiply enc1 and enc2 gradient by V",
+        )
+        parser.add_argument(
+            "--enc2-along-grad-mult",
+            type=float,
+            metavar="V",
+            default=1.0,
+            help="multiply enc2 gradient by V if only enc2 is used",
+        )
+        parser.add_argument(
+            "--load-pretrain-encoder",
+            type=str,
+            default="",
+            metavar="EXPR",
+            help=""" path to the pretrained encoder """,
+        )
+        parser.add_argument(
+            "--load-pretrain-speech-encoder",
+            type=str,
+            default="",
+            metavar="EXPR",
+            help=""" path to the pretrained speech encoder """,
+        )
+        parser.add_argument(
+            "--load-pretrain-text-encoder",
+            type=str,
+            default="",
+            metavar="EXPR",
+            help=""" path to the pretrained text encoder """,
+        )
+        parser.add_argument(
+            "--load-pretrain-text-encoder-last",
+            type=str,
+            default="",
+            metavar="EXPR",
+            help=""" path to the pretrained text encoder """,
+        )
+        parser.add_argument(
+            "--load-pretrain-decoder",
+            type=str,
+            metavar="EXPR",
+            default="",
+            help=""" path to the pretrained encoder """,
+        )
+        parser.add_argument(
+            "--add-speech-eos",
+            action="store_true",
+            help="add eos token at the end of input feature",
+        )
+        parser.add_argument(
+            "--speech-encoder-adapter-type",
+            type=str,
+            metavar="EXPR",
+            default="None",
+            choices=["None", "Linear", "MLP"],
+            help="add speech encoder adapter",
+        )
+
+    @classmethod
+    def build_encoder(cls, args, task):
+        spch_encoder = DualInputEncoder.build_spch_encoder(args)
+        text_encoder = DualInputEncoder.build_text_encoder(
+            args, task.src_dict, spch_encoder
+        )
+        cross_attentive_loss_before_last_layer = (
+            0 if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else -1
+        )
+        encoder = DualInputEncoder(
+            args,
+            spch_encoder,
+            text_encoder,
+            task.src_dict,
+            cross_attentive_loss_before_last_layer,
+        )
+        if args.init_scale != 1.0:
+            with torch.no_grad():
+                for param in encoder.parameters():
+                    param.data.mul_(args.init_scale)
+        if args.load_pretrain_text_encoder != "":
+            checkpoint_utils.load_pretrained_component_from_model(
+                text_encoder, args.load_pretrain_text_encoder
+            )
+        if args.load_pretrain_speech_encoder != "":
+            if hasattr(spch_encoder, "encoder"):
+                checkpoint_utils.load_pretrained_component_from_model(
+                    spch_encoder.encoder, args.load_pretrain_speech_encoder
+                )
+            else:
+                checkpoint_utils.load_pretrained_component_from_model(
+                    spch_encoder, args.load_pretrain_speech_encoder
+                )
+        if (
+            args.load_pretrain_text_encoder_last != ""
+        ):  # if share encoder, speech encoder parameters will be used.
+            # It provides a chance to use pre-trained mt encoder instead
+            checkpoint_utils.load_pretrained_component_from_model(
+                text_encoder, args.load_pretrain_text_encoder_last
+            )
+
+        if args.load_pretrain_encoder != "":
+            checkpoint_utils.load_pretrained_component_from_model(
+                encoder, args.load_pretrain_encoder
+            )
+        return encoder
+
+    @classmethod
+    def build_decoder(cls, args, task):
+        dec_cfg = {
+            "decoder_layerdrop": args.decoder_layerdrop,
+            "share_decoder_input_output_embed": args.share_decoder_input_output_embed,
+            "decoder_embed_dim": args.decoder_embed_dim,
+            "max_target_positions": args.max_target_positions,
+            "dropout": args.dropout,
+            "encoder_learned_pos": args.encoder_learned_pos,
+            "decoder_learned_pos": args.decoder_learned_pos,
+            "layernorm_embedding": args.layernorm_embedding,
+            "decoder_normalize_before": args.decoder_normalize_before,
+            "activation_dropout": args.activation_dropout,
+            "attention_dropout": args.attention_dropout,
+            "decoder_ffn_embed_dim": args.decoder_ffn_embed_dim,
+            "decoder_layers": args.decoder_layers,
+            "decoder_attention_heads": args.decoder_attention_heads,
+            "decoder_output_dim": args.decoder_embed_dim,
+            "no_scale_embedding": args.no_scale_embedding,
+            "adaptive_input": args.adaptive_input,
+            "quant_noise_pq": args.quant_noise_pq,
+            "adaptive_softmax_cutoff": args.adaptive_softmax_cutoff,
+            "tie_adaptive_weights": args.tie_adaptive_weights,
+            "no_token_positional_embeddings": args.no_token_positional_embeddings,
+        }
+        dec_cfg = namedtuple("args", dec_cfg.keys())(*dec_cfg.values())
+        dec_emb = nn.Embedding(
+            len(task.target_dictionary),
+            args.decoder_embed_dim,
+            task.target_dictionary.pad(),
+        )
+        compute_cross_attentive_loss = (
+            True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False
+        )
+        cross_attentive_loss_without_norm = getattr(
+            args, "attentive_cost_without_normalize", False
+        )
+        cross_attentive_loss_reverse = (
+            False  # getattr(args, "attentive_cost_reverse", False)
+        )
+
+        text_decoder = TransformerDecoder(dec_cfg, task.target_dictionary, dec_emb)
+        spch_decoder = TransformerDecoder(dec_cfg, task.target_dictionary, dec_emb)
+        spch_decoder = TransformerMultiInputDecoder.share_spchdecoder(
+            args, text_decoder, spch_decoder
+        )
+        decoder = TransformerMultiInputDecoder(
+            dictionary=task.target_dictionary,
+            spch_decoder=spch_decoder,
+            text_decoder=text_decoder,
+            compute_cross_attentive_loss=compute_cross_attentive_loss,
+            cross_attentive_loss_with_norm=True
+            if not cross_attentive_loss_without_norm
+            else False,
+            cross_attentive_loss_reverse=cross_attentive_loss_reverse,
+        )
+        if args.init_scale != 1.0:
+            with torch.no_grad():
+                for param in decoder.parameters():
+                    param.data.mul_(args.init_scale)
+        if args.load_pretrain_decoder != "":
+            try:
+                checkpoint_utils.load_pretrained_component_from_model(
+                    decoder, args.load_pretrain_decoder
+                )
+            except RuntimeError:
+                checkpoint_utils.load_pretrained_component_from_model(
+                    decoder.text_decoder, args.load_pretrain_decoder
+                )
+                if args.decoder_shared_layer_level > 0:
+                    checkpoint_utils.load_pretrained_component_from_model(
+                        decoder.spch_decoder, args.load_pretrain_decoder
+                    )
+
+        return decoder
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure that all args are properly defaulted
+        # (in case there are any new ones)
+        dualinputs2ttransformer_base(args)
+
+        encoder = cls.build_encoder(args, task)
+        decoder = cls.build_decoder(args, task)
+        return cls(encoder, decoder)
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        # net_output['encoder_out'] is a (B, T, D) tensor
+        lprobs = super().get_normalized_probs(net_output, log_probs, sample)
+        lprobs.batch_first = True
+        return lprobs
+
+    def set_num_updates(self, num_updates):
+        """Set the number of parameters updates."""
+        super().set_num_updates(num_updates)
+        self.num_updates = num_updates
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        prev_output_tokens,
+        use_encoder_outputs=False,
+        src_txt_tokens=None,
+        src_txt_lengths=None,
+        mode="sup_speech",
+        **kwargs
+    ):
+        """
+        Run the forward pass for an encoder-decoder model.
+
+        First feed a batch of source tokens through the encoder. Then, feed the
+        encoder output and previous decoder outputs (i.e., teacher forcing) to
+        the decoder to produce the next outputs::
+
+            encoder_out = self.encoder(src_tokens, src_lengths)
+            return self.decoder(prev_output_tokens, encoder_out)
+
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (LongTensor): source sentence lengths of shape `(batch)`
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            mode = 'sup_speech' or 'text'
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        if mode == "text":
+            assert src_txt_tokens is None
+            src_txt_tokens = src_tokens
+            src_txt_lengths = src_lengths
+            src_tokens = None
+            src_lengths = None
+        encoder_out = self.encoder(
+            src_tokens,
+            src_lengths=src_lengths,
+            src_txt_tokens=src_txt_tokens,
+            src_txt_lengths=src_txt_lengths,
+            **kwargs
+        )
+        has_txt_input = True if src_txt_tokens is not None else False
+        decoder_out = self.decoder(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            has_txt_input=has_txt_input,
+            **kwargs
+        )
+        if use_encoder_outputs:
+            return decoder_out, encoder_out
+        return decoder_out
+
+
+@register_model_architecture(
+    "dual_input_s2t_transformer", "dualinputs2ttransformer_base"
+)
+def dualinputs2ttransformer_base(args):
+    args.encoder_freezing_updates = getattr(args, "encoder_freezing_updates", 0)
+    # Convolutional subsampler
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.conv_kernel_sizes = getattr(args, "conv_kernel_sizes", "5,5")
+    args.conv_channels = getattr(args, "conv_channels", 1024)
+    # Transformer
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_text_embed_dim = getattr(
+        args, "encoder_text_embed_dim", args.encoder_embed_dim
+    )
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", args.dropout)
+    args.activation_dropout = getattr(args, "activation_dropout", args.dropout)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0)
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", False)
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+
+    args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 10)
+    args.text_encoder_layers = getattr(args, "text_encoder_layers", 6)
+    args.encoder_shared_layers = getattr(args, "encoder_shared_layers", 0)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+
+    args.add_speech_eos = getattr(args, "add_speech_eos", False)
+
+
+@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_s")
+def dualinputs2ttransformer_s(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 4)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 7)
+    args.text_encoder_layers = getattr(args, "text_encoder_layers", 7)
+    args.decoder_layers = getattr(args, "decoder_layers", 7)
+    dualinputs2ttransformer_base(args)
+
+
+@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_m")
+def dualinputs2ttransformer_m(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 512 * 4)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.dropout = getattr(args, "dropout", 0.15)
+    args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 10)
+    args.text_encoder_layers = getattr(args, "text_encoder_layers", 6)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    dualinputs2ttransformer_base(args)
+
+
+@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_b")
+def dualinputs2ttransformer_b(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 768 * 4)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 12)
+    args.dropout = getattr(args, "dropout", 0.15)
+    args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 12)
+    args.text_encoder_layers = getattr(args, "text_encoder_layers", 6)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    dualinputs2ttransformer_base(args)
+
+
+@register_model_architecture("dual_input_s2t_transformer", "dualinputs2ttransformer_l")
+def dualinputs2ttransformer_l(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024 * 4)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.2)
+    args.speech_encoder_layers = getattr(args, "speech_encoder_layers", 12)
+    args.text_encoder_layers = getattr(args, "text_encoder_layers", 6)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    dualinputs2ttransformer_base(args)
diff --git a/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputxmtransformer.py b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputxmtransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..50683e6d7c8c0db5b8f019e5f7f5fb8c6dfd9f66
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/models/s2t_dualinputxmtransformer.py
@@ -0,0 +1,585 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+
+import torch.nn as nn
+from fairseq import checkpoint_utils
+from fairseq import utils
+from fairseq.data.data_utils import lengths_to_padding_mask
+from fairseq.models import (
+    register_model,
+    register_model_architecture,
+    FairseqEncoder,
+)
+from fairseq.models.speech_to_text import XMTransformerModel, Wav2VecEncoderWithAdaptor
+from fairseq.models.speech_to_text.xm_transformer import (
+    set_default_adaptor_args,
+    set_default_w2v_encoder_args,
+)
+from fairseq.models.transformer import TransformerEncoder, TransformerDecoder
+from fairseq.models.wav2vec import TransformerSentenceEncoderLayer
+from fairseq.utils import safe_hasattr
+
+from .s2t_dualinputtransformer import (
+    DualInputS2TTransformerModel,
+    TransformerMultiInputDecoder,
+    DualInputEncoder,
+)
+
+
+class TransformerSentenceEncoderLayerStd(TransformerSentenceEncoderLayer):
+    def __init__(self, sent_enc_layer):
+        super(TransformerSentenceEncoderLayer, self).__init__()
+        self.embedding_dim = sent_enc_layer.embedding_dim
+        self.dropout = sent_enc_layer.dropout
+        self.activation_dropout = sent_enc_layer.activation_dropout
+
+        # Initialize blocks
+        self.activation_fn = sent_enc_layer.activation_fn
+        self.self_attn = sent_enc_layer.self_attn
+
+        self.dropout1 = sent_enc_layer.dropout1
+        self.dropout2 = sent_enc_layer.dropout2
+        self.dropout3 = sent_enc_layer.dropout3
+
+        self.layer_norm_first = sent_enc_layer.layer_norm_first
+
+        # layer norm associated with the self attention layer
+        self.self_attn_layer_norm = sent_enc_layer.self_attn_layer_norm
+        self.fc1 = sent_enc_layer.fc1
+        self.fc2 = sent_enc_layer.fc2
+
+        # layer norm associated with the position wise feed-forward NN
+        self.final_layer_norm = sent_enc_layer.final_layer_norm
+
+    def forward(
+        self,
+        x,
+        self_attn_mask=None,
+        self_attn_padding_mask=None,
+        need_weights=None,
+        att_args=None,
+    ):
+        x, attn = super().forward(
+            x, self_attn_mask, self_attn_padding_mask, need_weights, att_args
+        )
+        return x
+
+
+# TODO retire SharedEncoder
+class SharedEncoder(FairseqEncoder):
+    def __init__(self, wav2vec_enc, mbart_enc, adaptor, shared_layers):
+        super().__init__(None)
+        self.w2v_encoder = wav2vec_enc
+        self.shared_layers = self.w2v_encoder.w2v_model.encoder.layers[-shared_layers:]
+        self.w2v_encoder.w2v_model.encoder.layers = (
+            self.w2v_encoder.w2v_model.encoder.layers[:-shared_layers]
+        )
+        self.adaptor = adaptor
+        if self.shared_layers[-1].layer_norm_first:
+            self.final_layer_norm = mbart_enc.layer_norm
+        else:
+            mbart_enc.layer_norm = None
+            self.final_layer_norm = None
+        shared_layer_from = len(mbart_enc.layers) - shared_layers
+        if shared_layer_from < 0:
+            shared_layer_from = 0
+        for layer_id, layer in enumerate(self.shared_layers):
+            mbart_enc.layers[
+                shared_layer_from + layer_id
+            ] = TransformerSentenceEncoderLayerStd(layer)
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        padding_mask = lengths_to_padding_mask(src_lengths)
+        if not padding_mask.any():
+            padding_mask = None
+
+        out = self.w2v_encoder.forward(src_tokens, padding_mask, tbc=True)
+        x = out["encoder_out"]
+        enc_padding_mask = None
+        if out["encoder_padding_mask"] is not None:
+            enc_padding_mask = out["encoder_padding_mask"].transpose(
+                0, 1
+            )  # T X B --> B X T
+
+        x, enc_padding_mask = self.adaptor(x, enc_padding_mask)
+        for layer in self.shared_layers:
+            x, _ = layer(x, enc_padding_mask)
+        if self.final_layer_norm is not None:
+            x = self.final_layer_norm(x)
+
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [enc_padding_mask]
+            if enc_padding_mask is not None
+            else [],  # B x T
+            "encoder_embedding": [],  # B x T x C
+            "encoder_states": [],  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+
+class StackedWav2VecEncoderWithAdaptor(FairseqEncoder):
+    def __init__(
+        self,
+        wav2vec_enc,
+        mbart_enc_layers,
+        mbart_layer_norm,
+        adaptor,
+        drop_w2v_layers=0,
+    ):
+        super().__init__(None)
+        self.w2v_encoder = wav2vec_enc
+        self.adaptor = adaptor
+        self.mbart_encoder_layers = mbart_enc_layers
+        self.final_layer_norm = mbart_layer_norm
+        if drop_w2v_layers > 0:
+            self.w2v_encoder.w2v_model.encoder.layers = (
+                self.w2v_encoder.w2v_model.encoder.layers[:-drop_w2v_layers]
+            )
+
+    def forward(self, src_tokens, src_lengths=None, return_all_hiddens=False, **kwargs):
+        padding_mask = lengths_to_padding_mask(src_lengths)
+        if not padding_mask.any():
+            padding_mask = None
+
+        out = self.w2v_encoder.forward(src_tokens, padding_mask, tbc=True)
+        x = out["encoder_out"]
+        enc_padding_mask = None
+        if out["encoder_padding_mask"] is not None:
+            enc_padding_mask = out["encoder_padding_mask"].transpose(
+                0, 1
+            )  # T X B --> B X T
+
+        x, enc_padding_mask = self.adaptor(x, enc_padding_mask)
+        encoder_states = []
+        for layer in self.mbart_encoder_layers:
+            x = layer(x, enc_padding_mask)
+            if return_all_hiddens:
+                encoder_states.append(x)
+        if self.final_layer_norm is not None:
+            x = self.final_layer_norm(x)
+
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [enc_padding_mask]
+            if enc_padding_mask is not None
+            else [],  # B x T
+            "encoder_embedding": [],  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        new_encoder_out = (
+            []
+            if len(encoder_out["encoder_out"]) == 0
+            else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]]
+        )
+
+        new_encoder_padding_mask = (
+            []
+            if len(encoder_out["encoder_padding_mask"]) == 0
+            else [
+                x.index_select(0, new_order)
+                for x in encoder_out["encoder_padding_mask"]
+            ]
+        )
+
+        new_encoder_embedding = (
+            []
+            if len(encoder_out["encoder_embedding"]) == 0
+            else [
+                x.index_select(0, new_order) for x in encoder_out["encoder_embedding"]
+            ]
+        )
+
+        encoder_states = encoder_out["encoder_states"]
+        if len(encoder_states) > 0:
+            for idx, state in enumerate(encoder_states):
+                encoder_states[idx] = state.index_select(1, new_order)
+
+        return {
+            "encoder_out": new_encoder_out,  # T x B x C
+            "encoder_padding_mask": new_encoder_padding_mask,  # B x T
+            "encoder_embedding": new_encoder_embedding,  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],  # B x T
+            "src_lengths": [],  # B x 1
+        }
+
+
+# Note:
+# dual input transformer:
+#    encoder: wav2vec for speech + mbart encoder for text
+#    decoder: mbart decoder  for text
+@register_model("dual_input_xm_transformer")
+class DualInputXMTransformerModel(DualInputS2TTransformerModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # wav2vec encoder
+        Wav2VecEncoderWithAdaptor.add_args(parser)
+        # add_decoder_args(parser)
+        # mbart Transformer
+        parser.add_argument(
+            "--activation-fn",
+            type=str,
+            default="relu",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use",
+        )
+
+        parser.add_argument(
+            "--mbart-dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--mbart-attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--mbart-activation-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after activation in FFN.",
+        )
+
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="N", help="num encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num encoder attention heads",
+        )
+        parser.add_argument(
+            "--encoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each encoder block",
+        )
+
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="num decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num decoder attention heads",
+        )
+        parser.add_argument(
+            "--decoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each decoder block",
+        )
+        parser.add_argument(
+            "--layernorm-embedding",
+            action="store_true",
+            help="add layernorm to embedding",
+        )
+        parser.add_argument(
+            "--no-scale-embedding",
+            action="store_true",
+            help="if True, dont scale embeddings",
+        )
+        parser.add_argument(
+            "--load-pretrained-mbart-from",
+            type=str,
+            metavar="STR",
+            help="model to take text encoder decoder weights from (for initialization)",
+        )
+        # parser.add_argument("--finetune-w2v-params", type=str, metavar="STR",
+        #                    help="comma-separated param strings to finetune.")
+        parser.add_argument(
+            "--finetune-mbart-decoder-params",
+            type=str,
+            metavar="STR",
+            help="comma-separated param strings to finetune.",
+        )
+        parser.add_argument(
+            "--finetune-mbart-encoder-params",
+            type=str,
+            metavar="STR",
+            help="comma-separated param strings to finetune.",
+        )
+        parser.add_argument(
+            "--skip-encoder-projection",
+            action="store_true",
+            help="skip the projection layer in encoder",
+        )
+
+        parser.add_argument(
+            "--enc-grad-mult",
+            type=float,
+            metavar="V",
+            default=1.0,
+            help="multiply enc1 and enc2 gradient by V",
+        )
+        parser.add_argument(
+            "--enc2-along-grad-mult",
+            type=float,
+            metavar="V",
+            default=1.0,
+            help="multiply enc2 gradient by V if only enc2 is used",
+        )
+        parser.add_argument(
+            "--text-input-cost-ratio",
+            type=float,
+            default=1.0,
+            metavar="V",
+            help="text input cost ratio relative to speech input cost",
+        )
+        parser.add_argument(
+            "--stack-w2v-mbart-encoder",
+            action="store_true",
+            help="stack w2v and mbart encoder",
+        )
+        parser.add_argument(
+            "--stack-w2v-mbart-nonorm-encoder",
+            action="store_true",
+            help="stack w2v and mbart encoder",
+        )
+        parser.add_argument(
+            "--no-final-norm-decoder", action="store_true", help="no layer norm"
+        )
+        parser.add_argument(
+            "--drop-w2v-layers",
+            type=int,
+            default=0,
+            metavar="N",
+            help="drop w2v encoder layers",
+        )
+
+        parser.add_argument(
+            "--share-w2v-text-encoder",
+            action="store_true",
+            help="share w2v encoder layers with text encoder",
+        )
+        parser.add_argument(
+            "--shared-w2v-layers",
+            type=int,
+            default=0,
+            metavar="N",
+            help="shared encoder layers from w2v encoder",
+        )
+
+    @classmethod
+    def build_encoder(cls, args, task):
+        _args = copy.deepcopy(args)
+        _args.dropout = args.mbart_dropout
+        _args.attention_dropout = args.mbart_attention_dropout
+        _args.activation_dropout = args.mbart_activation_dropout
+        _args.max_source_positions = 1024
+        enc_emb = nn.Embedding(
+            len(task.src_dict), _args.encoder_embed_dim, task.src_dict.pad()
+        )
+        text_encoder = TransformerEncoder(_args, task.src_dict, enc_emb)
+        spch_encoder = Wav2VecEncoderWithAdaptor(args)
+        if getattr(args, "load_pretrained_mbart_from", None):
+            text_encoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=text_encoder, checkpoint=args.load_pretrained_mbart_from
+            )
+        if getattr(args, "stack_w2v_mbart_encoder", False):
+            assert getattr(args, "share_w2v_text_encoder", False) is False
+            spch_encoder = StackedWav2VecEncoderWithAdaptor(
+                spch_encoder.w2v_encoder,
+                text_encoder.layers,
+                text_encoder.layer_norm,
+                spch_encoder.adaptor,
+                args.drop_w2v_layers,
+            )
+        elif getattr(args, "stack_w2v_mbart_nonorm_encoder", False):
+            text_encoder.layer_norm = None
+            spch_encoder = StackedWav2VecEncoderWithAdaptor(
+                spch_encoder.w2v_encoder,
+                text_encoder.layers,
+                text_encoder.layer_norm,
+                spch_encoder.adaptor,
+                args.drop_w2v_layers,
+            )
+        elif getattr(args, "share_w2v_text_encoder", False):
+            spch_encoder = SharedEncoder(
+                spch_encoder.w2v_encoder,
+                text_encoder,
+                spch_encoder.adaptor,
+                args.shared_w2v_layers,
+            )
+
+        for k, p in spch_encoder.named_parameters():
+            # Freeze pretrained models by default
+            if safe_hasattr(
+                args, "finetune_w2v_params"
+            ) and XMTransformerModel.finetune_params(args.finetune_w2v_params, k):
+                p.requires_grad = True
+            else:
+                p.requires_grad = False
+        for k, p in text_encoder.named_parameters():
+            # Freeze pretrained models by default
+            if safe_hasattr(
+                args, "finetune_mbart_encoder_params"
+            ) and XMTransformerModel.finetune_params(
+                args.finetune_mbart_encoder_params, k
+            ):
+                p.requires_grad = True
+            else:
+                p.requires_grad = False
+        cross_attentive_loss_before_last_layer = (
+            0 if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else -1
+        )
+        encoder = DualInputEncoder(
+            args,
+            spch_encoder,
+            text_encoder,
+            task.src_dict,
+            cross_attentive_loss_before_last_layer,
+        )
+        return encoder
+
+    @classmethod
+    def build_decoder(cls, args, task):
+        _args = copy.deepcopy(args)
+        _args.dropout = args.mbart_dropout
+        _args.attention_dropout = args.mbart_attention_dropout
+        _args.activation_dropout = args.mbart_activation_dropout
+        _args.max_target_positions = 1024
+        dec_emb = nn.Embedding(
+            len(task.tgt_dict), _args.encoder_embed_dim, task.tgt_dict.pad()
+        )
+        decoder = TransformerDecoder(_args, task.tgt_dict, dec_emb)
+        if getattr(args, "load_pretrained_mbart_from", None):
+            decoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=decoder, checkpoint=args.load_pretrained_mbart_from
+            )
+        if getattr(args, "no_final_norm_decoder", False):
+            decoder.layer_norm = None
+        for k, p in decoder.named_parameters():
+            # Freeze pretrained models by default
+            if safe_hasattr(
+                args, "finetune_mbart_decoder_params"
+            ) and XMTransformerModel.finetune_params(
+                args.finetune_mbart_decoder_params, k
+            ):
+                p.requires_grad = True
+            else:
+                p.requires_grad = False
+
+        compute_cross_attentive_loss = (
+            True if getattr(args, "attentive_cost_regularization", 0.0) > 0.0 else False
+        )
+        cross_attentive_loss_without_norm = getattr(
+            args, "attentive_cost_without_normalize", False
+        )
+        cross_attentive_loss_reverse = (
+            False  # getattr(args, "attentive_cost_reverse", False)
+        )
+        decoder = TransformerMultiInputDecoder(
+            dictionary=task.target_dictionary,
+            spch_decoder=decoder,
+            text_decoder=decoder,
+            compute_cross_attentive_loss=compute_cross_attentive_loss,
+            cross_attentive_loss_with_norm=True
+            if not cross_attentive_loss_without_norm
+            else False,
+            cross_attentive_loss_reverse=cross_attentive_loss_reverse,
+        )
+        return decoder
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure that all args are properly defaulted
+        # (in case there are any new ones)
+        dualinputxmtransformer_base(args)
+
+        encoder = cls.build_encoder(args, task)
+        decoder = cls.build_decoder(args, task)
+        return cls(encoder, decoder)
+
+
+@register_model_architecture("dual_input_xm_transformer", "dualinputxmtransformer_base")
+def dualinputxmtransformer_base(args):
+    # wav2vec encoder
+    set_default_w2v_encoder_args(args)
+    set_default_adaptor_args(args)
+
+    # mbart model
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(
+        args, "encoder_ffn_embed_dim", 4 * args.encoder_embed_dim
+    )
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True)
+
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4 * 1024)
+    args.decoder_layers = getattr(args, "decoder_layers", 12)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", True)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0)
+
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+
+    args.mbart_attention_dropout = getattr(args, "mbart_attention_dropout", 0.0)
+    args.mbart_activation_dropout = getattr(args, "mbart_activation_dropout", 0.0)
+    args.mbart_dropout = getattr(args, "mbart_dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", True
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", True)
+
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh")
+    args.pooler_dropout = getattr(args, "pooler_dropout", 0.0)
diff --git a/fairseq/examples/speech_text_joint_to_text/scripts/g2p_encode.py b/fairseq/examples/speech_text_joint_to_text/scripts/g2p_encode.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db779396f492e3f71b08d7b895beb81d8e46bc9
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/scripts/g2p_encode.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import itertools
+import logging
+import re
+import time
+
+from g2p_en import G2p
+
+logger = logging.getLogger(__name__)
+
+FAIL_SENT = "FAILED_SENTENCE"
+
+
+def parse():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-path", type=str, required=True)
+    parser.add_argument("--out-path", type=str, required=True)
+    parser.add_argument("--lower-case", action="store_true")
+    parser.add_argument("--do-filter", action="store_true")
+    parser.add_argument("--use-word-start", action="store_true")
+    parser.add_argument("--dup-vowel", default=1, type=int)
+    parser.add_argument("--dup-consonant", default=1, type=int)
+    parser.add_argument("--no-punc", action="store_true")
+    parser.add_argument("--reserve-word", type=str, default="")
+    parser.add_argument(
+        "--reserve-first-column",
+        action="store_true",
+        help="first column is sentence id",
+    )
+    ###
+    parser.add_argument("--parallel-process-num", default=1, type=int)
+    parser.add_argument("--logdir", default="")
+    args = parser.parse_args()
+    return args
+
+
+def process_sent(sent, g2p, res_wrds, args):
+    sents = pre_process_sent(sent, args.do_filter, args.lower_case, res_wrds)
+    pho_seqs = [do_g2p(g2p, s, res_wrds, i == 0) for i, s in enumerate(sents)]
+    pho_seq = (
+        [FAIL_SENT]
+        if [FAIL_SENT] in pho_seqs
+        else list(itertools.chain.from_iterable(pho_seqs))
+    )
+    if args.no_punc:
+        pho_seq = remove_punc(pho_seq)
+    if args.dup_vowel > 1 or args.dup_consonant > 1:
+        pho_seq = dup_pho(pho_seq, args.dup_vowel, args.dup_consonant)
+    if args.use_word_start:
+        pho_seq = add_word_start(pho_seq)
+    return " ".join(pho_seq)
+
+
+def remove_punc(sent):
+    ns = []
+    regex = re.compile("[^a-zA-Z0-9 ]")
+    for p in sent:
+        if (not regex.search(p)) or p == FAIL_SENT:
+            if p == " " and (len(ns) == 0 or ns[-1] == " "):
+                continue
+            ns.append(p)
+    return ns
+
+
+def do_g2p(g2p, sent, res_wrds, is_first_sent):
+    if sent in res_wrds:
+        pho_seq = [res_wrds[sent]]
+    else:
+        pho_seq = g2p(sent)
+    if not is_first_sent:
+        pho_seq = [" "] + pho_seq  # add space to separate
+    return pho_seq
+
+
+def pre_process_sent(sent, do_filter, lower_case, res_wrds):
+    if do_filter:
+        sent = re.sub("-", " ", sent)
+        sent = re.sub("—", " ", sent)
+    if len(res_wrds) > 0:
+        wrds = sent.split()
+        wrds = ["SPLIT_ME " + w + " SPLIT_ME" if w in res_wrds else w for w in wrds]
+        sents = [x.strip() for x in " ".join(wrds).split("SPLIT_ME") if x.strip() != ""]
+    else:
+        sents = [sent]
+    if lower_case:
+        sents = [s.lower() if s not in res_wrds else s for s in sents]
+    return sents
+
+
+def dup_pho(sent, dup_v_num, dup_c_num):
+    """
+    duplicate phoneme defined as cmudict
+    http://www.speech.cs.cmu.edu/cgi-bin/cmudict
+    """
+    if dup_v_num == 1 and dup_c_num == 1:
+        return sent
+    ns = []
+    for p in sent:
+        ns.append(p)
+        if re.search(r"\d$", p):
+            for i in range(1, dup_v_num):
+                ns.append(f"{p}-{i}P")
+        elif re.search(r"\w", p):
+            for i in range(1, dup_c_num):
+                ns.append(f"{p}-{i}P")
+    return ns
+
+
+def add_word_start(sent):
+    ns = []
+    do_add = True
+    ws = "▁"
+    for p in sent:
+        if do_add:
+            p = ws + p
+            do_add = False
+        if p == " ":
+            do_add = True
+        else:
+            ns.append(p)
+    return ns
+
+
+def load_reserve_word(reserve_word):
+    if reserve_word == "":
+        return []
+    with open(reserve_word, "r") as fp:
+        res_wrds = [x.strip().split() for x in fp.readlines() if x.strip() != ""]
+        assert sum([0 if len(x) == 2 else 1 for x in res_wrds]) == 0
+        res_wrds = dict(res_wrds)
+    return res_wrds
+
+
+def process_sents(sents, args):
+    g2p = G2p()
+    out_sents = []
+    res_wrds = load_reserve_word(args.reserve_word)
+    for sent in sents:
+        col1 = ""
+        if args.reserve_first_column:
+            col1, sent = sent.split(None, 1)
+        sent = process_sent(sent, g2p, res_wrds, args)
+        if args.reserve_first_column and col1 != "":
+            sent = f"{col1} {sent}"
+        out_sents.append(sent)
+    return out_sents
+
+
+def main():
+    args = parse()
+    out_sents = []
+    with open(args.data_path, "r") as fp:
+        sent_list = [x.strip() for x in fp.readlines()]
+    if args.parallel_process_num > 1:
+        try:
+            import submitit
+        except ImportError:
+            logger.warn(
+                "submitit is not found and only one job is used to process the data"
+            )
+            submitit = None
+
+    if args.parallel_process_num == 1 or submitit is None:
+        out_sents = process_sents(sent_list, args)
+    else:
+        # process sentences with parallel computation
+        lsize = len(sent_list) // args.parallel_process_num + 1
+        executor = submitit.AutoExecutor(folder=args.logdir)
+        executor.update_parameters(timeout_min=1000, cpus_per_task=4)
+        jobs = []
+        for i in range(args.parallel_process_num):
+            job = executor.submit(
+                process_sents, sent_list[lsize * i : lsize * (i + 1)], args
+            )
+            jobs.append(job)
+        is_running = True
+        while is_running:
+            time.sleep(5)
+            is_running = sum([job.done() for job in jobs]) < len(jobs)
+        out_sents = list(itertools.chain.from_iterable([job.result() for job in jobs]))
+    with open(args.out_path, "w") as fp:
+        fp.write("\n".join(out_sents) + "\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_text_joint_to_text/tasks/__init__.py b/fairseq/examples/speech_text_joint_to_text/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d878278475fb24cf6b97d66d784e657567f5aa80
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/tasks/__init__.py
@@ -0,0 +1,12 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+for file in os.listdir(os.path.dirname(__file__)):
+    if file.endswith(".py") and not file.startswith("_"):
+        task_name = file[: file.find(".py")]
+        importlib.import_module("examples.speech_text_joint_to_text.tasks." + task_name)
diff --git a/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_joint.py b/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_joint.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2b3966d2d6b103f3dc2ff170c12ab9663875684
--- /dev/null
+++ b/fairseq/examples/speech_text_joint_to_text/tasks/speech_text_joint.py
@@ -0,0 +1,372 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import logging
+import os
+from argparse import Namespace
+from pathlib import Path
+
+import torch
+from fairseq.data import (
+    encoders,
+    Dictionary,
+    ResamplingDataset,
+    TransformEosLangPairDataset,
+    ConcatDataset,
+)
+from fairseq.data.iterators import GroupedEpochBatchIterator
+from fairseq.data.audio.multi_modality_dataset import (
+    MultiModalityDataset,
+    LangPairMaskDataset,
+    ModalityDatasetItem,
+)
+from fairseq.data.audio.speech_to_text_dataset import SpeechToTextDataset, SpeechToTextDatasetCreator
+from fairseq.data.audio.speech_to_text_joint_dataset import (
+    S2TJointDataConfig,
+    SpeechToTextJointDatasetCreator,
+)
+from fairseq.tasks import register_task
+from fairseq.tasks.speech_to_text import SpeechToTextTask
+from fairseq.tasks.translation import load_langpair_dataset
+
+logger = logging.getLogger(__name__)
+LANG_TAG_TEMPLATE = "<lang:{}>"
+
+
+@register_task("speech_text_joint_to_text")
+class SpeechTextJointToTextTask(SpeechToTextTask):
+    """
+    Task for joint training speech and text to text.
+    """
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add task-specific arguments to the parser."""
+        super(SpeechTextJointToTextTask, cls).add_args(parser)
+        ###
+        parser.add_argument(
+            "--parallel-text-data",
+            default="",
+            help="path to parallel text data directory",
+        )
+        parser.add_argument(
+            "--max-tokens-text",
+            type=int,
+            metavar="N",
+            help="maximum tokens for encoder text input ",
+        )
+        parser.add_argument(
+            "--max-positions-text",
+            type=int,
+            metavar="N",
+            default=400,
+            help="maximum tokens for per encoder text input ",
+        )
+        parser.add_argument(
+            "--langpairs",
+            default=None,
+            metavar="S",
+            help='language pairs for text training, separated with ","',
+        )
+        parser.add_argument(
+            "--speech-sample-ratio",
+            default=1,
+            type=float,
+            metavar="N",
+            help="Multiple Ratio for speech dataset with transcripts ",
+        )
+        parser.add_argument(
+            "--text-sample-ratio",
+            default=1,
+            type=float,
+            metavar="N",
+            help="Multiple Ratio for text set ",
+        )
+        parser.add_argument(
+            "--update-mix-data",
+            action="store_true",
+            help="use mixed data in one update when update-freq  > 1",
+        )
+        parser.add_argument(
+            "--load-speech-only",
+            action="store_true",
+            help="load speech data only",
+        )
+        parser.add_argument(
+            "--mask-text-ratio",
+            type=float,
+            metavar="V",
+            default=0.0,
+            help="mask V source tokens for text only mode",
+        )
+        parser.add_argument(
+            "--mask-text-type",
+            default="random",
+            choices=["random", "tail"],
+            help="mask text typed",
+        )
+        parser.add_argument(
+            "--noise-token",
+            default="",
+            help="noise token for masking src text tokens if mask-text-ratio > 0",
+        )
+        parser.add_argument(
+            "--infer-target-lang",
+            default="",
+            metavar="S",
+            help="target language for inference",
+        )
+
+    def __init__(self, args, src_dict, tgt_dict, infer_tgt_lang_id=None):
+        super().__init__(args, tgt_dict)
+        self.src_dict = src_dict
+        self.data_cfg = S2TJointDataConfig(Path(args.data) / args.config_yaml)
+        assert self.tgt_dict.pad() == self.src_dict.pad()
+        assert self.tgt_dict.eos() == self.src_dict.eos()
+        self.speech_only = args.load_speech_only
+        self._infer_tgt_lang_id = infer_tgt_lang_id
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task (e.g., load dictionaries)."""
+        data_cfg = S2TJointDataConfig(Path(args.data) / args.config_yaml)
+        tgt_dict_path = Path(args.data) / data_cfg.vocab_filename
+        src_dict_path = Path(args.data) / data_cfg.src_vocab_filename
+        if (not os.path.isfile(src_dict_path)) or (not os.path.isfile(tgt_dict_path)):
+            raise FileNotFoundError("Dict not found: {}".format(args.data))
+        src_dict = Dictionary.load(src_dict_path.as_posix())
+        tgt_dict = Dictionary.load(tgt_dict_path.as_posix())
+
+        print("| src dictionary: {} types".format(len(src_dict)))
+        print("| tgt dictionary: {} types".format(len(tgt_dict)))
+
+        if args.parallel_text_data != "":
+            if not os.path.isabs(args.parallel_text_data):
+                args.parallel_text_data = os.path.join(
+                    args.data, args.parallel_text_data
+                )
+
+            if args.langpairs is None:
+                raise Exception(
+                    "Could not infer language pair, please provide it explicitly"
+                )
+        infer_tgt_lang_id = None
+        if args.infer_target_lang != "" and data_cfg.prepend_tgt_lang_tag_no_change:
+            tgt_lang_tag = SpeechToTextDataset.LANG_TAG_TEMPLATE.format(
+                args.infer_target_lang
+            )
+            infer_tgt_lang_id = tgt_dict.index(tgt_lang_tag)
+            assert infer_tgt_lang_id != tgt_dict.unk()
+        return cls(args, src_dict, tgt_dict, infer_tgt_lang_id=infer_tgt_lang_id)
+
+    def load_langpair_dataset(self, prepend_tgt_lang_tag=False, sampling_alpha=1.0, epoch=0):
+        lang_pairs = []
+        text_dataset = None
+        split = "train"
+        for lp in self.args.langpairs.split(","):
+            src, tgt = lp.split("-")
+            text_dataset = load_langpair_dataset(
+                self.args.parallel_text_data,
+                split,
+                src,
+                self.src_dict,
+                tgt,
+                self.tgt_dict,
+                combine=True,
+                dataset_impl=None,
+                upsample_primary=1,
+                left_pad_source=False,
+                left_pad_target=False,
+                max_source_positions=self.args.max_positions_text,
+                max_target_positions=self.args.max_target_positions,
+                load_alignments=False,
+                truncate_source=False,
+            )
+            if prepend_tgt_lang_tag:
+                # TODO
+                text_dataset = TransformEosLangPairDataset(
+                    text_dataset,
+                    src_eos=self.src_dict.eos(),
+                    tgt_bos=self.tgt_dict.eos(),  # 'prev_output_tokens' starts with eos
+                    new_tgt_bos=self.tgt_dict.index(LANG_TAG_TEMPLATE.format(tgt)),
+                )
+            lang_pairs.append(text_dataset)
+        if len(lang_pairs) > 1:
+            if sampling_alpha != 1.0:
+                size_ratios = SpeechToTextDatasetCreator.get_size_ratios(
+                    self.args.langpairs.split(","),
+                    [len(s) for s in lang_pairs],
+                    alpha=sampling_alpha,
+                )
+                lang_pairs = [
+                    ResamplingDataset(
+                        d, size_ratio=r, epoch=epoch, replace=(r >= 1.0)
+                    )
+                    for d, r in zip(lang_pairs, size_ratios)
+                ]
+            return ConcatDataset(lang_pairs)
+        return text_dataset
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        with torch.no_grad():
+            return generator.generate(
+                models,
+                sample,
+                prefix_tokens=prefix_tokens,
+                constraints=constraints,
+                bos_token=self._infer_tgt_lang_id,
+            )
+
+    def build_src_tokenizer(self, args):
+        logger.info(f"src-pre-tokenizer: {self.data_cfg.src_pre_tokenizer}")
+        return encoders.build_tokenizer(Namespace(**self.data_cfg.src_pre_tokenizer))
+
+    def build_src_bpe(self, args):
+        logger.info(f"tokenizer: {self.data_cfg.src_bpe_tokenizer}")
+        return encoders.build_bpe(Namespace(**self.data_cfg.src_bpe_tokenizer))
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        is_train_split = split.startswith("train")
+        pre_tokenizer = self.build_tokenizer(self.args)
+        bpe_tokenizer = self.build_bpe(self.args)
+        src_pre_tokenizer = self.build_src_tokenizer(self.args)
+        src_bpe_tokenizer = self.build_src_bpe(self.args)
+        ast_dataset = SpeechToTextJointDatasetCreator.from_tsv(
+            self.args.data,
+            self.data_cfg,
+            split,
+            self.tgt_dict,
+            src_dict=None if self.speech_only else self.src_dict,
+            pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer,
+            src_pre_tokenizer=src_pre_tokenizer,
+            src_bpe_tokenizer=src_bpe_tokenizer,
+            is_train_split=is_train_split,
+            epoch=epoch,
+            seed=self.args.seed,
+        )
+        noise_token_id = -1
+        text_dataset = None
+        if self.args.parallel_text_data != "" and is_train_split:
+            text_dataset = self.load_langpair_dataset(
+                self.data_cfg.prepend_tgt_lang_tag_no_change,
+                1.0,
+                epoch=epoch,
+            )
+            if self.args.mask_text_ratio > 0:
+                # add mask
+                noise_token_id = (
+                    self.src_dict.unk()
+                    if self.args.noise_token == ""
+                    else self.src_dict.index(self.args.noise_token)
+                )
+                text_dataset = LangPairMaskDataset(
+                    text_dataset,
+                    src_bos=self.src_dict.bos(),
+                    src_eos=self.src_dict.eos(),
+                    noise_id=noise_token_id,
+                    mask_ratio=self.args.mask_text_ratio,
+                    mask_type=self.args.mask_text_type,
+                )
+
+        if text_dataset is not None:
+            mdsets = [
+                ModalityDatasetItem(
+                    "sup_speech",
+                    ast_dataset,
+                    (self.args.max_source_positions, self.args.max_target_positions),
+                    self.args.max_tokens,
+                    self.args.batch_size,
+                ),
+                ModalityDatasetItem(
+                    "text",
+                    text_dataset,
+                    (self.args.max_positions_text, self.args.max_target_positions),
+                    self.args.max_tokens_text
+                    if self.args.max_tokens_text is not None
+                    else self.args.max_tokens,
+                    self.args.batch_size,
+                ),
+            ]
+            ast_dataset = MultiModalityDataset(mdsets)
+        self.datasets[split] = ast_dataset
+
+    @property
+    def target_dictionary(self):
+        """Return the :class:`~fairseq.data.Dictionary` for the language
+        model."""
+        return self.tgt_dict
+
+    @property
+    def source_dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary` (if applicable
+        for this task)."""
+        return None if self.speech_only else self.src_dict
+
+    def get_batch_iterator(
+        self,
+        dataset,
+        max_tokens=None,
+        max_sentences=None,
+        max_positions=None,
+        ignore_invalid_inputs=False,
+        required_batch_size_multiple=1,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=0,
+        data_buffer_size=0,
+        disable_iterator_cache=False,
+    ):
+
+        if not isinstance(dataset, MultiModalityDataset):
+            return super(SpeechTextJointToTextTask, self).get_batch_iterator(
+                dataset,
+                max_tokens,
+                max_sentences,
+                max_positions,
+                ignore_invalid_inputs,
+                required_batch_size_multiple,
+                seed,
+                num_shards,
+                shard_id,
+                num_workers,
+                epoch,
+                data_buffer_size,
+                disable_iterator_cache,
+            )
+
+        mult_ratio = [self.args.speech_sample_ratio, self.args.text_sample_ratio]
+        assert len(dataset.datasets) == 2
+
+        # initialize the dataset with the correct starting epoch
+        dataset.set_epoch(epoch)
+
+        batch_samplers = dataset.get_batch_samplers(
+            mult_ratio, required_batch_size_multiple, seed
+        )
+
+        # return a reusable, sharded iterator
+        epoch_iter = GroupedEpochBatchIterator(
+            dataset=dataset,
+            collate_fn=dataset.collater,
+            batch_samplers=batch_samplers,
+            seed=seed,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_workers=num_workers,
+            epoch=epoch,
+            mult_rate=1 if self.args.update_mix_data else max(self.args.update_freq),
+            buffer_size=data_buffer_size,
+        )
+        self.dataset_to_epoch_iter[dataset] = {}  # refresh it every epoch
+        return epoch_iter
diff --git a/fairseq/examples/speech_to_text/README.md b/fairseq/examples/speech_to_text/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f639d300d342f8de1392c98bfc44ec8690188539
--- /dev/null
+++ b/fairseq/examples/speech_to_text/README.md
@@ -0,0 +1,77 @@
+# Speech-to-Text (S2T) Modeling
+
+[https://www.aclweb.org/anthology/2020.aacl-demo.6](https://www.aclweb.org/anthology/2020.aacl-demo.6.pdf)
+
+Speech recognition (ASR) and speech-to-text translation (ST) with fairseq.
+
+## Data Preparation
+S2T modeling data consists of source speech features, target text and other optional information
+(source text, speaker id, etc.). Fairseq S2T uses per-dataset-split TSV manifest files
+to store these information. Each data field is represented by a column in the TSV file.
+
+Unlike text token embeddings, speech features (e.g. log mel-scale filter banks) are usually fixed
+during model training and can be pre-computed. The manifest file contains the path to
+either the feature file in NumPy format or the WAV/FLAC audio file. For the latter,
+features will be extracted on-the-fly by fairseq S2T. Optionally, feature/audio files can be packed
+into uncompressed ZIP files (then accessed via byte offset and length) to improve I/O performance.
+
+Fairseq S2T also employs a YAML file for data related configurations: tokenizer type and dictionary path
+for the target text, feature transforms such as CMVN (cepstral mean and variance normalization) and SpecAugment,
+temperature-based resampling, etc.
+
+## Model Training
+Fairseq S2T uses the unified `fairseq-train` interface for model training. It requires arguments `--task speech_to_text`,
+ `--arch <model architecture in fairseq.models.speech_to_text.*>` and `--config-yaml <config YAML filename>`.
+
+## Inference & Evaluation
+Fairseq S2T uses the unified `fairseq-generate`/`fairseq-interactive` interface for inference and evaluation. It
+requires arguments `--task speech_to_text` and `--config-yaml <config YAML filename>`. The interactive console takes
+audio paths (one per line) as inputs.
+
+
+## Examples
+- [Speech Recognition (ASR) on LibriSpeech](docs/librispeech_example.md)
+
+- [Speech-to-Text Translation (ST) on MuST-C](docs/mustc_example.md)
+
+- [Speech-to-Text Translation (ST) on CoVoST 2](docs/covost_example.md)
+
+- [Speech-to-Text Translation (ST) on Multilingual TEDx](docs/mtedx_example.md)
+- [Simultaneous Speech-to-Text Translation (SimulST) on MuST-C](docs/simulst_mustc_example.md)
+
+## Updates
+- 02/04/2021: Added interactive decoding (`fairseq-interactive`) support. Examples:
+  [ASR (LibriSpeech)](docs/librispeech_example.md#interactive-decoding)
+  and [ST (CoVoST 2)](docs/covost_example.md#interactive-decoding).
+- 01/08/2021: Several fixes for S2T Transformer model, inference-time de-tokenization, scorer configuration and data
+  preparation scripts. We also add pre-trained models to the examples and revise the instructions.
+  Breaking changes: the data preparation scripts now extract filterbank features without CMVN. CMVN is instead applied
+  on-the-fly (defined in the config YAML).
+
+## What's Next
+- We are migrating the old fairseq [ASR example](../speech_recognition) into this S2T framework and
+  merging the features from both sides.
+- The following papers also base their experiments on fairseq S2T. We are adding more examples for replication.
+  - [Improving Cross-Lingual Transfer Learning for End-to-End Speech Recognition with Speech Translation (Wang et al., 2020)](https://arxiv.org/abs/2006.05474)
+  - [Self-Supervised Representations Improve End-to-End Speech Translation (Wu et al., 2020)](https://arxiv.org/abs/2006.12124)
+  - [Self-Training for End-to-End Speech Translation (Pino et al., 2020)](https://arxiv.org/abs/2006.02490)
+  - [CoVoST: A Diverse Multilingual Speech-To-Text Translation Corpus (Wang et al., 2020)](https://arxiv.org/abs/2002.01320)
+  - [Harnessing Indirect Training Data for End-to-End Automatic Speech Translation: Tricks of the Trade (Pino et al., 2019)](https://arxiv.org/abs/1909.06515)
+
+## Citation
+Please cite as:
+```
+@inproceedings{wang2020fairseqs2t,
+  title = {fairseq S2T: Fast Speech-to-Text Modeling with fairseq},
+  author = {Changhan Wang and Yun Tang and Xutai Ma and Anne Wu and Dmytro Okhonko and Juan Pino},
+  booktitle = {Proceedings of the 2020 Conference of the Asian Chapter of the Association for Computational Linguistics (AACL): System Demonstrations},
+  year = {2020},
+}
+
+@inproceedings{ott2019fairseq,
+  title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
+  author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},
+  booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},
+  year = {2019},
+}
+```
diff --git a/fairseq/examples/speech_to_text/data_utils.py b/fairseq/examples/speech_to_text/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..41afac0bf8f6d70e06bee1a34e220ab396ec247d
--- /dev/null
+++ b/fairseq/examples/speech_to_text/data_utils.py
@@ -0,0 +1,382 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import csv
+from pathlib import Path
+import zipfile
+from functools import reduce
+from multiprocessing import cpu_count
+from typing import Any, Dict, List, Optional, Union
+import io
+
+import numpy as np
+import pandas as pd
+import sentencepiece as sp
+from fairseq.data.audio.audio_utils import (
+    convert_waveform, _get_kaldi_fbank, _get_torchaudio_fbank, is_npy_data,
+    is_sf_audio_data
+)
+import torch
+import soundfile as sf
+from tqdm import tqdm
+
+
+UNK_TOKEN, UNK_TOKEN_ID = "<unk>", 3
+BOS_TOKEN, BOS_TOKEN_ID = "<s>", 0
+EOS_TOKEN, EOS_TOKEN_ID = "</s>", 2
+PAD_TOKEN, PAD_TOKEN_ID = "<pad>", 1
+
+
+def gen_vocab(
+    input_path: Path, output_path_prefix: Path, model_type="bpe",
+    vocab_size=1000, special_symbols: Optional[List[str]] = None
+):
+    # Train SentencePiece Model
+    arguments = [
+        f"--input={input_path.as_posix()}",
+        f"--model_prefix={output_path_prefix.as_posix()}",
+        f"--model_type={model_type}",
+        f"--vocab_size={vocab_size}",
+        "--character_coverage=1.0",
+        f"--num_threads={cpu_count()}",
+        f"--unk_id={UNK_TOKEN_ID}",
+        f"--bos_id={BOS_TOKEN_ID}",
+        f"--eos_id={EOS_TOKEN_ID}",
+        f"--pad_id={PAD_TOKEN_ID}",
+    ]
+    if special_symbols is not None:
+        _special_symbols = ",".join(special_symbols)
+        arguments.append(f"--user_defined_symbols={_special_symbols}")
+    sp.SentencePieceTrainer.Train(" ".join(arguments))
+    # Export fairseq dictionary
+    spm = sp.SentencePieceProcessor()
+    spm.Load(output_path_prefix.as_posix() + ".model")
+    vocab = {i: spm.IdToPiece(i) for i in range(spm.GetPieceSize())}
+    assert (
+        vocab.get(UNK_TOKEN_ID) == UNK_TOKEN
+        and vocab.get(PAD_TOKEN_ID) == PAD_TOKEN
+        and vocab.get(BOS_TOKEN_ID) == BOS_TOKEN
+        and vocab.get(EOS_TOKEN_ID) == EOS_TOKEN
+    )
+    vocab = {
+        i: s
+        for i, s in vocab.items()
+        if s not in {UNK_TOKEN, BOS_TOKEN, EOS_TOKEN, PAD_TOKEN}
+    }
+    with open(output_path_prefix.as_posix() + ".txt", "w") as f_out:
+        for _, s in sorted(vocab.items(), key=lambda x: x[0]):
+            f_out.write(f"{s} 1\n")
+
+
+def extract_fbank_features(
+    waveform: torch.FloatTensor,
+    sample_rate: int,
+    output_path: Optional[Path] = None,
+    n_mel_bins: int = 80,
+    overwrite: bool = False,
+):
+    if output_path is not None and output_path.is_file() and not overwrite:
+        return
+
+    _waveform = convert_waveform(waveform, sample_rate, to_mono=True)
+    # Kaldi compliance: 16-bit signed integers
+    _waveform = _waveform * (2 ** 15)
+    _waveform = _waveform.numpy()
+
+    features = _get_kaldi_fbank(_waveform, sample_rate, n_mel_bins)
+    if features is None:
+        features = _get_torchaudio_fbank(_waveform, sample_rate, n_mel_bins)
+    if features is None:
+        raise ImportError(
+            "Please install pyKaldi or torchaudio to enable fbank feature extraction"
+        )
+
+    if output_path is not None:
+        np.save(output_path.as_posix(), features)
+    return features
+
+
+def create_zip(data_root: Path, zip_path: Path):
+    paths = list(data_root.glob("*.npy"))
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_STORED) as f:
+        for path in tqdm(paths):
+            f.write(path, arcname=path.name)
+
+
+def get_zip_manifest(
+        zip_path: Path, zip_root: Optional[Path] = None, is_audio=False
+):
+    _zip_path = Path.joinpath(zip_root or Path(""), zip_path)
+    with zipfile.ZipFile(_zip_path, mode="r") as f:
+        info = f.infolist()
+    paths, lengths = {}, {}
+    for i in tqdm(info):
+        utt_id = Path(i.filename).stem
+        offset, file_size = i.header_offset + 30 + len(i.filename), i.file_size
+        paths[utt_id] = f"{zip_path.as_posix()}:{offset}:{file_size}"
+        with open(_zip_path, "rb") as f:
+            f.seek(offset)
+            byte_data = f.read(file_size)
+            assert len(byte_data) > 1
+            if is_audio:
+                assert is_sf_audio_data(byte_data), i
+            else:
+                assert is_npy_data(byte_data), i
+            byte_data_fp = io.BytesIO(byte_data)
+            if is_audio:
+                lengths[utt_id] = sf.info(byte_data_fp).frames
+            else:
+                lengths[utt_id] = np.load(byte_data_fp).shape[0]
+    return paths, lengths
+
+
+def gen_config_yaml(
+    manifest_root: Path,
+    spm_filename: Optional[str] = None,
+    vocab_name: Optional[str] = None,
+    yaml_filename: str = "config.yaml",
+    specaugment_policy: Optional[str] = "lb",
+    prepend_tgt_lang_tag: bool = False,
+    sampling_alpha: Optional[float] = None,
+    input_channels: Optional[int] = 1,
+    input_feat_per_channel: Optional[int] = 80,
+    audio_root: str = "",
+    cmvn_type: str = "utterance",
+    gcmvn_path: Optional[Path] = None,
+    extra=None
+):
+    manifest_root = manifest_root.absolute()
+    writer = S2TDataConfigWriter(manifest_root / yaml_filename)
+    assert spm_filename is not None or vocab_name is not None
+    vocab_name = spm_filename.replace(".model", ".txt") if vocab_name is None \
+        else vocab_name
+    writer.set_vocab_filename(vocab_name)
+    if input_channels is not None:
+        writer.set_input_channels(input_channels)
+    if input_feat_per_channel is not None:
+        writer.set_input_feat_per_channel(input_feat_per_channel)
+    specaugment_setters = {
+        "lb": writer.set_specaugment_lb_policy,
+        "ld": writer.set_specaugment_ld_policy,
+        "sm": writer.set_specaugment_sm_policy,
+        "ss": writer.set_specaugment_ss_policy,
+    }
+    specaugment_setter = specaugment_setters.get(specaugment_policy, None)
+    if specaugment_setter is not None:
+        specaugment_setter()
+    if spm_filename is not None:
+        writer.set_bpe_tokenizer(
+            {
+                "bpe": "sentencepiece",
+                "sentencepiece_model": (manifest_root / spm_filename).as_posix(),
+            }
+        )
+    if prepend_tgt_lang_tag:
+        writer.set_prepend_tgt_lang_tag(True)
+    if sampling_alpha is not None:
+        writer.set_sampling_alpha(sampling_alpha)
+
+    if cmvn_type not in ["global", "utterance"]:
+        raise NotImplementedError
+
+    if specaugment_policy is not None:
+        writer.set_feature_transforms(
+            "_train", [f"{cmvn_type}_cmvn", "specaugment"]
+        )
+    writer.set_feature_transforms("*", [f"{cmvn_type}_cmvn"])
+
+    if cmvn_type == "global":
+        if gcmvn_path is None:
+            raise ValueError("Please provide path of global cmvn file.")
+        else:
+            writer.set_global_cmvn(gcmvn_path.as_posix())
+
+    if len(audio_root) > 0:
+        writer.set_audio_root(audio_root)
+
+    if extra is not None:
+        writer.set_extra(extra)
+    writer.flush()
+
+
+def load_df_from_tsv(path: Union[str, Path]) -> pd.DataFrame:
+    _path = path if isinstance(path, str) else path.as_posix()
+    return pd.read_csv(
+        _path,
+        sep="\t",
+        header=0,
+        encoding="utf-8",
+        escapechar="\\",
+        quoting=csv.QUOTE_NONE,
+        na_filter=False,
+    )
+
+
+def save_df_to_tsv(dataframe, path: Union[str, Path]):
+    _path = path if isinstance(path, str) else path.as_posix()
+    dataframe.to_csv(
+        _path,
+        sep="\t",
+        header=True,
+        index=False,
+        encoding="utf-8",
+        escapechar="\\",
+        quoting=csv.QUOTE_NONE,
+    )
+
+
+def load_tsv_to_dicts(path: Union[str, Path]) -> List[dict]:
+    with open(path, "r") as f:
+        reader = csv.DictReader(
+            f,
+            delimiter="\t",
+            quotechar=None,
+            doublequote=False,
+            lineterminator="\n",
+            quoting=csv.QUOTE_NONE,
+        )
+        rows = [dict(e) for e in reader]
+    return rows
+
+
+def filter_manifest_df(
+    df, is_train_split=False, extra_filters=None, min_n_frames=5, max_n_frames=3000
+):
+    filters = {
+        "no speech": df["audio"] == "",
+        f"short speech (<{min_n_frames} frames)": df["n_frames"] < min_n_frames,
+        "empty sentence": df["tgt_text"] == "",
+    }
+    if is_train_split:
+        filters[f"long speech (>{max_n_frames} frames)"] = df["n_frames"] > max_n_frames
+    if extra_filters is not None:
+        filters.update(extra_filters)
+    invalid = reduce(lambda x, y: x | y, filters.values())
+    valid = ~invalid
+    print(
+        "| "
+        + ", ".join(f"{n}: {f.sum()}" for n, f in filters.items())
+        + f", total {invalid.sum()} filtered, {valid.sum()} remained."
+    )
+    return df[valid]
+
+
+def cal_gcmvn_stats(features_list):
+    features = np.concatenate(features_list)
+    square_sums = (features ** 2).sum(axis=0)
+    mean = features.mean(axis=0)
+    features = np.subtract(features, mean)
+    var = square_sums / features.shape[0] - mean ** 2
+    std = np.sqrt(np.maximum(var, 1e-8))
+    return {"mean": mean.astype("float32"), "std": std.astype("float32")}
+
+
+class S2TDataConfigWriter(object):
+    DEFAULT_VOCAB_FILENAME = "dict.txt"
+    DEFAULT_INPUT_FEAT_PER_CHANNEL = 80
+    DEFAULT_INPUT_CHANNELS = 1
+
+    def __init__(self, yaml_path: Path):
+        try:
+            import yaml
+        except ImportError:
+            print("Please install PyYAML for S2T data config YAML files")
+        self.yaml = yaml
+        self.yaml_path = yaml_path
+        self.config = {}
+
+    def flush(self):
+        with open(self.yaml_path, "w") as f:
+            self.yaml.dump(self.config, f)
+
+    def set_audio_root(self, audio_root=""):
+        self.config["audio_root"] = audio_root
+
+    def set_vocab_filename(self, vocab_filename: str = "dict.txt"):
+        self.config["vocab_filename"] = vocab_filename
+
+    def set_specaugment(
+        self,
+        time_wrap_w: int,
+        freq_mask_n: int,
+        freq_mask_f: int,
+        time_mask_n: int,
+        time_mask_t: int,
+        time_mask_p: float,
+    ):
+        self.config["specaugment"] = {
+            "time_wrap_W": time_wrap_w,
+            "freq_mask_N": freq_mask_n,
+            "freq_mask_F": freq_mask_f,
+            "time_mask_N": time_mask_n,
+            "time_mask_T": time_mask_t,
+            "time_mask_p": time_mask_p,
+        }
+
+    def set_specaugment_lb_policy(self):
+        self.set_specaugment(
+            time_wrap_w=0,
+            freq_mask_n=1,
+            freq_mask_f=27,
+            time_mask_n=1,
+            time_mask_t=100,
+            time_mask_p=1.0,
+        )
+
+    def set_specaugment_ld_policy(self):
+        self.set_specaugment(
+            time_wrap_w=0,
+            freq_mask_n=2,
+            freq_mask_f=27,
+            time_mask_n=2,
+            time_mask_t=100,
+            time_mask_p=1.0,
+        )
+
+    def set_specaugment_sm_policy(self):
+        self.set_specaugment(
+            time_wrap_w=0,
+            freq_mask_n=2,
+            freq_mask_f=15,
+            time_mask_n=2,
+            time_mask_t=70,
+            time_mask_p=0.2,
+        )
+
+    def set_specaugment_ss_policy(self):
+        self.set_specaugment(
+            time_wrap_w=0,
+            freq_mask_n=2,
+            freq_mask_f=27,
+            time_mask_n=2,
+            time_mask_t=70,
+            time_mask_p=0.2,
+        )
+
+    def set_input_channels(self, input_channels: int = 1):
+        self.config["input_channels"] = input_channels
+
+    def set_input_feat_per_channel(self, input_feat_per_channel: int = 80):
+        self.config["input_feat_per_channel"] = input_feat_per_channel
+
+    def set_bpe_tokenizer(self, bpe_tokenizer: Dict[str, Any]):
+        self.config["bpe_tokenizer"] = bpe_tokenizer
+
+    def set_global_cmvn(self, stats_npz_path: str):
+        self.config["global_cmvn"] = {"stats_npz_path": stats_npz_path}
+
+    def set_feature_transforms(self, split: str, transforms: List[str]):
+        if "transforms" not in self.config:
+            self.config["transforms"] = {}
+        self.config["transforms"][split] = transforms
+
+    def set_prepend_tgt_lang_tag(self, flag: bool = True):
+        self.config["prepend_tgt_lang_tag"] = flag
+
+    def set_sampling_alpha(self, sampling_alpha: float = 1.0):
+        self.config["sampling_alpha"] = sampling_alpha
+
+    def set_extra(self, data):
+        self.config.update(data)
diff --git a/fairseq/examples/speech_to_text/docs/covost_example.md b/fairseq/examples/speech_to_text/docs/covost_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..16447f041e4751f79d9f7848b33ef2ff943d63c2
--- /dev/null
+++ b/fairseq/examples/speech_to_text/docs/covost_example.md
@@ -0,0 +1,102 @@
+[[Back]](..)
+
+# S2T Example: ST on CoVoST
+We replicate the experiments in
+[CoVoST 2 and Massively Multilingual Speech-to-Text Translation (Wang et al., 2020)](https://arxiv.org/abs/2007.10310).
+
+## Data Preparation
+[Download](https://commonvoice.mozilla.org/en/datasets) and unpack Common Voice v4 to a path
+`${COVOST_ROOT}/${SOURCE_LANG_ID}`, then preprocess it with
+```bash
+# additional Python packages for S2T data processing/model training
+pip install pandas torchaudio sentencepiece
+
+# En ASR
+python examples/speech_to_text/prep_covost_data.py \
+  --data-root ${COVOST_ROOT} --vocab-type char --src-lang en
+# ST
+python examples/speech_to_text/prep_covost_data.py \
+  --data-root ${COVOST_ROOT} --vocab-type char \
+  --src-lang fr --tgt-lang en
+```
+The generated files (manifest, features, vocabulary and data configuration) will be added to
+`${COVOST_ROOT}/${SOURCE_LANG_ID}`.
+
+Download our vocabulary files if you want to use our pre-trained models:
+- ASR: [En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_asr_vocab_char.zip)
+- ST: [Fr-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_fr_en_st_vocab_char.zip), [De-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_de_en_st_vocab_char.zip), [Es-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_es_en_st_vocab_char.zip), [Ca-En](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_ca_en_st_vocab_char.zip), [En-De](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_de_st_vocab_char.zip), [En-Ca](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_ca_st_vocab_char.zip), [En-Fa](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_fa_st_vocab_char.zip), [En-Et](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_et_st_vocab_char.zip)
+
+## ASR
+#### Training
+We train an En ASR model for encoder pre-training of all ST models:
+```bash
+fairseq-train ${COVOST_ROOT}/en \
+  --config-yaml config_asr_en.yaml --train-subset train_asr_en --valid-subset dev_asr_en \
+  --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 50000 --max-update 60000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+  --report-accuracy --arch s2t_transformer_s --dropout 0.15 --optimizer adam --lr 2e-3 \
+  --lr-scheduler inverse_sqrt --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8
+```
+where `ASR_SAVE_DIR` is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs with 1 GPU.
+You may want to update it accordingly when using more than 1 GPU.
+
+#### Inference & Evaluation
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${ASR_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+fairseq-generate ${COVOST_ROOT}/en \
+  --config-yaml config_asr_en.yaml --gen-subset test_asr_en --task speech_to_text \
+  --path ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \
+  --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct
+```
+#### Results
+| --arch | Params | En | Model |
+|---|---|---|---|
+| s2t_transformer_s | 31M | 25.6 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_asr_transformer_s.pt) |
+
+## ST
+#### Training
+Fr-En as example:
+```bash
+fairseq-train ${COVOST_ROOT}/fr \
+  --config-yaml config_st_fr_en.yaml --train-subset train_st_fr_en --valid-subset dev_st_fr_en \
+  --save-dir ${ST_SAVE_DIR} --num-workers 4 --max-update 30000 --max-tokens 40000 \  # --max-tokens 50000 for en-*
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \
+  --arch s2t_transformer_s --encoder-freezing-updates 1000 --optimizer adam --lr 2e-3 \
+  --lr-scheduler inverse_sqrt --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \
+  --load-pretrained-encoder-from ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}
+```
+where `ST_SAVE_DIR` is the checkpoint root path. The ST encoder is pre-trained by En ASR for faster training and better
+performance: `--load-pretrained-encoder-from <ASR checkpoint path>`. We set `--update-freq 8` to simulate 8 GPUs with 1 GPU.
+You may want to update it accordingly when using more than 1 GPU.
+
+#### Inference & Evaluation
+Average the last 10 checkpoints and evaluate on test split:
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${ST_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${ST_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+fairseq-generate ${COVOST_ROOT}/fr \
+  --config-yaml config_st_fr_en.yaml --gen-subset test_st_fr_en --task speech_to_text \
+  --path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+  --max-tokens 50000 --beam 5 --scoring sacrebleu
+```
+
+## Interactive Decoding
+Launch the interactive console via
+```bash
+fairseq-interactive ${COVOST_ROOT}/fr --config-yaml config_st_fr_en.yaml \
+  --task speech_to_text --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \
+  --max-tokens 50000 --beam 5
+```
+Type in WAV/FLAC/OGG audio paths (one per line) after the prompt.
+
+#### Results
+| --arch | Params | Fr-En | De-En | Es-En | Ca-En | En-De | En-Ca | En-Fa | En-Et | Model |
+|---|---|---|---|---|---|---|---|---|---|---|
+| s2t_transformer_s | 31M | [27.2](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_fr_en_st_transformer_s.pt) | [17.7](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_de_en_st_transformer_s.pt) | [23.1](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_es_en_st_transformer_s.pt) | [19.3](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_ca_en_st_transformer_s.pt) | [16.1](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_de_st_transformer_s.pt) | [21.6](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_ca_st_transformer_s.pt) | [12.9](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_fa_st_transformer_s.pt) | [12.8](https://dl.fbaipublicfiles.com/fairseq/s2t/covost2_en_et_st_transformer_s.pt) | (<-Download) |
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_to_text/docs/librispeech_example.md b/fairseq/examples/speech_to_text/docs/librispeech_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..4040fda9426027537036ba987d087a43e734bfd9
--- /dev/null
+++ b/fairseq/examples/speech_to_text/docs/librispeech_example.md
@@ -0,0 +1,69 @@
+[[Back]](..)
+
+# S2T Example: Speech Recognition (ASR) on LibriSpeech
+[LibriSpeech](https://www.danielpovey.com/files/2015_icassp_librispeech.pdf) is a de-facto standard English ASR
+benchmark. We provide competitive
+vanilla [Transformer](https://papers.nips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) baselines.
+
+## Data preparation
+Download and preprocess LibriSpeech data with
+```bash
+# additional Python packages for S2T data processing/model training
+pip install pandas torchaudio sentencepiece
+
+python examples/speech_to_text/prep_librispeech_data.py \
+  --output-root ${LS_ROOT} --vocab-type unigram --vocab-size 10000
+```
+where `LS_ROOT` is the root path for downloaded data as well as generated files (manifest, features, vocabulary and
+data configuration).
+
+[Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_vocab_unigram10000.zip) our vocabulary files
+if you want to use our pre-trained models.
+
+## Training
+```bash
+fairseq-train ${LS_ROOT} --save-dir ${SAVE_DIR} \
+  --config-yaml config.yaml --train-subset train-clean-100,train-clean-360,train-other-500 --valid-subset dev-clean,dev-other \
+  --num-workers 4 --max-tokens 40000 --max-update 300000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \
+  --arch s2t_transformer_s --share-decoder-input-output-embed \
+  --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt --warmup-updates 10000 \
+  --clip-norm 10.0 --seed 1 --update-freq 8
+```
+where `SAVE_DIR` is the checkpoint root path. Here we use `--arch s2t_transformer_s` (31M parameters) as example.
+For better performance, you may switch to `s2t_transformer_m` (71M, with `--lr 1e-3`) or `s2t_transformer_l`
+(268M, with `--lr 5e-4`). We set `--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly
+when using more than 1 GPU.
+
+## Inference & Evaluation
+Average the last 10 checkpoints and evaluate on the 4 splits
+(`dev-clean`, `dev-other`, `test-clean` and `test-other`):
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py --inputs ${SAVE_DIR} \
+  --num-epoch-checkpoints 10 \
+  --output "${SAVE_DIR}/${CHECKPOINT_FILENAME}"
+for SUBSET in dev-clean dev-other test-clean test-other; do
+  fairseq-generate ${LS_ROOT} --config-yaml config.yaml --gen-subset ${SUBSET} \
+    --task speech_to_text --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} \
+    --max-tokens 50000 --beam 5 --scoring wer
+done
+```
+
+## Interactive Decoding
+Launch the interactive console via
+```bash
+fairseq-interactive ${LS_ROOT} --config-yaml config.yaml --task speech_to_text \
+  --path ${SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5
+```
+Type in WAV/FLAC/OGG audio paths (one per line) after the prompt.
+
+## Results
+
+| --arch | Params | dev-clean | dev-other | test-clean | test-other | Model |
+|---|---|---|---|---|---|---|
+| s2t_transformer_s | 30M | 3.8 | 8.9 | 4.4 | 9.0 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_transformer_s.pt) |
+| s2t_transformer_m | 71M | 3.2 | 8.0 | 3.4 | 7.9 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_transformer_m.pt) |
+| s2t_transformer_l | 268M | 3.0 | 7.5 | 3.2 | 7.5 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/librispeech_transformer_l.pt) |
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_to_text/docs/mtedx_example.md b/fairseq/examples/speech_to_text/docs/mtedx_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..25b4556affbf5bc141b103095d15fffef6225c0e
--- /dev/null
+++ b/fairseq/examples/speech_to_text/docs/mtedx_example.md
@@ -0,0 +1,200 @@
+[[Back]](..)
+
+# S2T Example: Speech Translation (ST) on Multilingual TEDx
+
+[Multilingual TEDx](https://arxiv.org/abs/2102.01757) is multilingual corpus for speech recognition and
+speech translation. The data is derived from TEDx talks in 8 source languages
+with translations to a subset of 5 target languages.
+
+## Data Preparation
+[Download](http://openslr.org/100/) and unpack Multilingual TEDx data to a path
+`${MTEDX_ROOT}/${LANG_PAIR}`, then preprocess it with
+```bash
+# additional Python packages for S2T data processing/model training
+pip install pandas torchaudio soundfile sentencepiece
+
+# Generate TSV manifests, features, vocabulary
+# and configuration for each language
+python examples/speech_to_text/prep_mtedx_data.py \
+  --data-root ${MTEDX_ROOT} --task asr \
+  --vocab-type unigram --vocab-size 1000
+python examples/speech_to_text/prep_mtedx_data.py \
+  --data-root ${MTEDX_ROOT} --task st \
+  --vocab-type unigram --vocab-size 1000
+
+# Add vocabulary and configuration for joint data
+# (based on the manifests and features generated above)
+python examples/speech_to_text/prep_mtedx_data.py \
+  --data-root ${MTEDX_ROOT} --task asr --joint \
+  --vocab-type unigram --vocab-size 8000
+python examples/speech_to_text/prep_mtedx_data.py \
+  --data-root ${MTEDX_ROOT} --task st --joint \
+  --vocab-type unigram --vocab-size 8000
+```
+The generated files (manifest, features, vocabulary and data configuration) will be added to
+`${MTEDX_ROOT}/${LANG_PAIR}` (per-language data) and `MTEDX_ROOT` (joint data).
+
+
+## ASR
+#### Training
+Spanish as example:
+```bash
+fairseq-train ${MTEDX_ROOT}/es-es \
+    --config-yaml config_asr.yaml --train-subset train_asr --valid-subset valid_asr \
+    --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \
+    --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \
+    --arch s2t_transformer_xs --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \
+    --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \
+    --load-pretrained-encoder-from ${PRETRAINED_ENCODER} \
+    --skip-invalid-size-inputs-valid-test \
+    --keep-last-epochs 10 --update-freq 8 --patience 10
+```
+For joint model (using ASR data from all 8 languages):
+```bash
+fairseq-train ${MTEDX_ROOT} \
+    --config-yaml config_asr.yaml \
+    --train-subset train_es-es_asr,train_fr-fr_asr,train_pt-pt_asr,train_it-it_asr,train_ru-ru_asr,train_el-el_asr,train_ar-ar_asr,train_de-de_asr \
+    --valid-subset valid_es-es_asr,valid_fr-fr_asr,valid_pt-pt_asr,valid_it-it_asr,valid_ru-ru_asr,valid_el-el_asr,valid_ar-ar_asr,valid_de-de_asr \
+    --save-dir ${MULTILINGUAL_ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \
+    --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \
+    --arch s2t_transformer_s --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \
+    --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \
+    --skip-invalid-size-inputs-valid-test \
+    --keep-last-epochs 10 --update-freq 8 --patience 10 \
+    --ignore-prefix-size 1
+```
+where `MULTILINGUAL_ASR_SAVE_DIR` is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs
+with 1 GPU. You may want to update it accordingly when using more than 1 GPU.
+For multilingual models, we prepend target language ID token as target BOS, which should be excluded from
+the training loss via `--ignore-prefix-size 1`.
+
+#### Inference & Evaluation
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${ASR_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+
+fairseq-generate ${MTEDX_ROOT}/es-es \
+  --config-yaml config_asr.yaml --gen-subset test --task speech_to_text \
+  --path ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \
+  --skip-invalid-size-inputs-valid-test \
+  --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct --remove-bpe
+
+# For models trained on joint data
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${MULTILINGUAL_ASR_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${MULTILINGUAL_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+
+for LANG in es fr pt it ru el ar de; do
+  fairseq-generate ${MTEDX_ROOT} \
+    --config-yaml config_asr.yaml --gen-subset test_${LANG}-${LANG}_asr --task speech_to_text \
+    --prefix-size 1 --path ${MULTILINGUAL_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+    --max-tokens 40000 --beam 5 \
+    --skip-invalid-size-inputs-valid-test \
+    --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct --remove-bpe
+done
+```
+#### Results
+| Data         | --arch             | Params |  Es  |  Fr  |  Pt  |  It  |  Ru  |   El  |   Ar  |   De  |
+|--------------|--------------------|--------|------|------|------|------|------|-------|-------|-------|
+| Monolingual  | s2t_transformer_xs |    10M | 46.4 | 45.6 | 54.8 | 48.0 | 74.7 | 109.5 | 104.4 | 111.1 |
+
+
+## ST
+#### Training
+Es-En as example:
+```bash
+fairseq-train ${MTEDX_ROOT}/es-en \
+    --config-yaml config_st.yaml --train-subset train_st --valid-subset valid_st \
+    --save-dir ${ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \
+    --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \
+    --arch s2t_transformer_xs --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \
+    --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \
+    --load-pretrained-encoder-from ${PRETRAINED_ENCODER} \
+    --skip-invalid-size-inputs-valid-test \
+    --keep-last-epochs 10 --update-freq 8 --patience 10
+```
+For multilingual model (all 12 directions):
+```bash
+fairseq-train ${MTEDX_ROOT} \
+    --config-yaml config_st.yaml \
+    --train-subset train_el-en_st,train_es-en_st,train_es-fr_st,train_es-it_st,train_es-pt_st,train_fr-en_st,train_fr-es_st,train_fr-pt_st,train_it-en_st,train_it-es_st,train_pt-en_st,train_pt-es_st,train_ru-en_st \
+    --valid-subset valid_el-en_st,valid_es-en_st,valid_es-fr_st,valid_es-it_st,valid_es-pt_st,valid_fr-en_st,valid_fr-es_st,valid_fr-pt_st,valid_it-en_st,valid_it-es_st,valid_pt-en_st,valid_pt-es_st,valid_ru-en_st \
+    --save-dir ${MULTILINGUAL_ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-epoch 200 \
+    --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \
+    --arch s2t_transformer_s --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \
+    --warmup-updates 10000 --clip-norm 10.0 --seed 1 --dropout 0.3 --label-smoothing 0.1 \
+    --skip-invalid-size-inputs-valid-test \
+    --keep-last-epochs 10 --update-freq 8 --patience 10 \
+    --ignore-prefix-size 1 \
+    --load-pretrained-encoder-from ${PRETRAINED_ENCODER}
+```
+where `ST_SAVE_DIR` (`MULTILINGUAL_ST_SAVE_DIR`) is the checkpoint root path. The ST encoder is pre-trained by ASR
+for faster training and better performance: `--load-pretrained-encoder-from <(JOINT_)ASR checkpoint path>`. We set
+`--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly when using more than 1 GPU.
+For multilingual models, we prepend target language ID token as target BOS, which should be excluded from
+the training loss via `--ignore-prefix-size 1`.
+
+#### Inference & Evaluation
+Average the last 10 checkpoints and evaluate on the `test` split:
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${ST_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${ST_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+
+fairseq-generate ${MTEDX_ROOT}/es-en \
+  --config-yaml config_st.yaml --gen-subset test --task speech_to_text \
+  --path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+  --max-tokens 50000 --beam 5 --scoring sacrebleu --remove-bpe
+
+# For multilingual models
+python scripts/average_checkpoints.py \
+  --inputs ${MULTILINGUAL_ST_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+
+for LANGPAIR in es-en es-fr es-pt fr-en fr-es fr-pt pt-en pt-es it-en it-es ru-en el-en; do
+  fairseq-generate ${MTEDX_ROOT} \
+    --config-yaml config_st.yaml --gen-subset test_${LANGPAIR}_st --task speech_to_text \
+    --prefix-size 1 --path ${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+    --max-tokens 40000 --beam 5 \
+    --skip-invalid-size-inputs-valid-test \
+    --scoring sacrebleu --remove-bpe
+done
+```
+For multilingual models, we force decoding from the target language ID token (as BOS) via `--prefix-size 1`.
+
+#### Results
+| Data         | --arch          | Params | Es-En | Es-Pt | Es-Fr | Fr-En | Fr-Es | Fr-Pt | Pt-En | Pt-Es | It-En | It-Es | Ru-En | El-En |
+|--------------|--------------------|-----|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|-------|
+| Bilingual    | s2t_transformer_xs | 10M |  7.0  |  12.2 |  1.7  |  8.9  |  10.6 |  7.9  |  8.1  |  8.7  |   6.4 |  1.0  |  0.7  |  0.6  |
+| Multilingual | s2t_transformer_s  | 31M |  12.3 |  17.4 |   6.1 |  12.0 |  13.6 |  13.2 |  12.0 |  13.7 |  10.7 |  13.1 |  0.6  |  0.8  |
+
+
+## Citation
+Please cite as:
+```
+@misc{salesky2021mtedx,
+      title={Multilingual TEDx Corpus for Speech Recognition and Translation},
+      author={Elizabeth Salesky and Matthew Wiesner and Jacob Bremerman and Roldano Cattoni and Matteo Negri and Marco Turchi and Douglas W. Oard and Matt Post},
+      year={2021},
+}
+
+@inproceedings{wang2020fairseqs2t,
+  title = {fairseq S2T: Fast Speech-to-Text Modeling with fairseq},
+  author = {Changhan Wang and Yun Tang and Xutai Ma and Anne Wu and Dmytro Okhonko and Juan Pino},
+  booktitle = {Proceedings of the 2020 Conference of the Asian Chapter of the Association for Computational Linguistics (AACL): System Demonstrations},
+  year = {2020},
+}
+
+@inproceedings{ott2019fairseq,
+  title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
+  author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},
+  booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},
+  year = {2019},
+}
+```
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_to_text/docs/mustc_example.md b/fairseq/examples/speech_to_text/docs/mustc_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..c95ef3e15660107c3384f87c1680f005044e7f3b
--- /dev/null
+++ b/fairseq/examples/speech_to_text/docs/mustc_example.md
@@ -0,0 +1,155 @@
+[[Back]](..)
+
+# S2T Example: Speech Translation (ST) on MuST-C
+
+[MuST-C](https://www.aclweb.org/anthology/N19-1202) is multilingual speech-to-text translation corpus with
+8-language translations on English TED talks. We match the state-of-the-art performance in
+[ESPNet-ST](https://arxiv.org/pdf/2004.10234.pdf) with a simpler model training pipeline.
+
+## Data Preparation
+[Download](https://ict.fbk.eu/must-c) and unpack MuST-C data to a path
+`${MUSTC_ROOT}/en-${TARGET_LANG_ID}`, then preprocess it with
+```bash
+# additional Python packages for S2T data processing/model training
+pip install pandas torchaudio soundfile sentencepiece
+
+# Generate TSV manifests, features, vocabulary
+# and configuration for each language
+python examples/speech_to_text/prep_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --task asr \
+  --vocab-type unigram --vocab-size 5000
+python examples/speech_to_text/prep_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --task st \
+  --vocab-type unigram --vocab-size 8000
+
+# Add vocabulary and configuration for joint data
+# (based on the manifests and features generated above)
+python examples/speech_to_text/prep_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --task asr --joint \
+  --vocab-type unigram --vocab-size 10000
+python examples/speech_to_text/prep_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --task st --joint \
+  --vocab-type unigram --vocab-size 10000
+```
+The generated files (manifest, features, vocabulary and data configuration) will be added to
+`${MUSTC_ROOT}/en-${TARGET_LANG_ID}` (per-language data) and `MUSTC_ROOT` (joint data).
+
+Download our vocabulary files if you want to use our pre-trained models:
+- ASR: [En-De](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_asr_vocab_unigram5000.zip), [En-Nl](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_asr_vocab_unigram5000.zip), [En-Es](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_asr_vocab_unigram5000.zip), [En-Fr](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_asr_vocab_unigram5000.zip), [En-It](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_asr_vocab_unigram5000.zip), [En-Pt](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_asr_vocab_unigram5000.zip), [En-Ro](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_asr_vocab_unigram5000.zip), [En-Ru](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_asr_vocab_unigram5000.zip), [Joint](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_joint_asr_vocab_unigram10000.zip)
+- ST: [En-De](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_st_vocab_unigram8000.zip), [En-Nl](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_st_vocab_unigram8000.zip), [En-Es](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_st_vocab_unigram8000.zip), [En-Fr](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_st_vocab_unigram8000.zip), [En-It](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_st_vocab_unigram8000.zip), [En-Pt](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_st_vocab_unigram8000.zip), [En-Ro](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_st_vocab_unigram8000.zip), [En-Ru](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_st_vocab_unigram8000.zip), [Multilingual](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_multilingual_st_vocab_unigram10000.zip)
+
+## ASR
+#### Training
+En-De as example:
+```bash
+fairseq-train ${MUSTC_ROOT}/en-de \
+  --config-yaml config_asr.yaml --train-subset train_asr --valid-subset dev_asr \
+  --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \
+  --arch s2t_transformer_s --optimizer adam --lr 1e-3 --lr-scheduler inverse_sqrt \
+  --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8
+```
+For joint model (using ASR data from all 8 directions):
+```bash
+fairseq-train ${MUSTC_ROOT} \
+  --config-yaml config_asr.yaml \
+  --train-subset train_de_asr,train_nl_asr,train_es_asr,train_fr_asr,train_it_asr,train_pt_asr,train_ro_asr,train_ru_asr \
+  --valid-subset dev_de_asr,dev_nl_asr,dev_es_asr,dev_fr_asr,dev_it_asr,dev_pt_asr,dev_ro_asr,dev_ru_asr \
+  --save-dir ${JOINT_ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \
+  --arch s2t_transformer_s --optimizer adam --lr 1e-3 --lr-scheduler inverse_sqrt \
+  --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8
+```
+where `ASR_SAVE_DIR` (`JOINT_ASR_SAVE_DIR`) is the checkpoint root path. We set `--update-freq 8` to simulate 8 GPUs
+with 1 GPU. You may want to update it accordingly when using more than 1 GPU.
+
+#### Inference & Evaluation
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${ASR_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+fairseq-generate ${MUSTC_ROOT}/en-de \
+  --config-yaml config_asr.yaml --gen-subset tst-COMMON_asr --task speech_to_text \
+  --path ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \
+  --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct
+
+# For models trained on joint data
+python scripts/average_checkpoints.py \
+  --inputs ${JOINT_ASR_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${JOINT_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+for LANG in de nl es fr it pt ro ru; do
+  fairseq-generate ${MUSTC_ROOT} \
+  --config-yaml config_asr.yaml --gen-subset tst-COMMON_${LANG}_asr --task speech_to_text \
+    --path ${JOINT_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} --max-tokens 50000 --beam 5 \
+    --scoring wer --wer-tokenizer 13a --wer-lowercase --wer-remove-punct
+done
+```
+#### Results
+| Data | --arch | Params | En-De | En-Nl | En-Es | En-Fr | En-It | En-Pt | En-Ro | En-Ru | Model |
+|---|---|---|---|---|---|---|---|---|---|---|---|
+| Single | s2t_transformer_s | 31M | [18.2](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_asr_transformer_s.pt) | [17.6](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_asr_transformer_s.pt) | [17.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_asr_transformer_s.pt) | [17.2](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_asr_transformer_s.pt) | [17.9](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_asr_transformer_s.pt) | [19.1](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_asr_transformer_s.pt) | [18.1](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_asr_transformer_s.pt) | [17.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_asr_transformer_s.pt) | (<-Download) |
+| Joint | s2t_transformer_m | 76M | 16.8 | 16.7 | 16.9 | 16.9 | 17.0 | 17.4 | 17.0 | 16.9 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_joint_asr_transformer_m.pt) |
+
+## ST
+#### Training
+En-De as example:
+```bash
+fairseq-train ${MUSTC_ROOT}/en-de \
+  --config-yaml config_st.yaml --train-subset train_st --valid-subset dev_st \
+  --save-dir ${ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \
+  --arch s2t_transformer_s --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \
+  --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \
+  --load-pretrained-encoder-from ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}
+```
+For multilingual model (all 8 directions):
+```bash
+fairseq-train ${MUSTC_ROOT} \
+  --config-yaml config_st.yaml \
+  --train-subset train_de_st,train_nl_st,train_es_st,train_fr_st,train_it_st,train_pt_st,train_ro_st,train_ru_st \
+  --valid-subset dev_de_st,dev_nl_st,dev_es_st,dev_fr_st,dev_it_st,dev_pt_st,dev_ro_st,dev_ru_st \
+  --save-dir ${MULTILINGUAL_ST_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --label-smoothing 0.1 --report-accuracy \
+  --arch s2t_transformer_s --ignore-prefix-size 1 --optimizer adam --lr 2e-3 --lr-scheduler inverse_sqrt \
+  --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8 \
+  --load-pretrained-encoder-from ${JOINT_ASR_SAVE_DIR}/${CHECKPOINT_FILENAME}
+```
+where `ST_SAVE_DIR` (`MULTILINGUAL_ST_SAVE_DIR`) is the checkpoint root path. The ST encoder is pre-trained by ASR
+for faster training and better performance: `--load-pretrained-encoder-from <(JOINT_)ASR checkpoint path>`. We set
+`--update-freq 8` to simulate 8 GPUs with 1 GPU. You may want to update it accordingly when using more than 1 GPU.
+For multilingual models, we prepend target language ID token as target BOS, which should be excluded from
+the training loss via `--ignore-prefix-size 1`.
+
+#### Inference & Evaluation
+Average the last 10 checkpoints and evaluate on the `tst-COMMON` split:
+```bash
+CHECKPOINT_FILENAME=avg_last_10_checkpoint.pt
+python scripts/average_checkpoints.py \
+  --inputs ${ST_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${ST_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+fairseq-generate ${MUSTC_ROOT}/en-de \
+  --config-yaml config_st.yaml --gen-subset tst-COMMON_st --task speech_to_text \
+  --path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+  --max-tokens 50000 --beam 5 --scoring sacrebleu
+
+# For multilingual models
+python scripts/average_checkpoints.py \
+  --inputs ${MULTILINGUAL_ST_SAVE_DIR} --num-epoch-checkpoints 10 \
+  --output "${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME}"
+for LANG in de nl es fr it pt ro ru; do
+  fairseq-generate ${MUSTC_ROOT} \
+    --config-yaml config_st.yaml --gen-subset tst-COMMON_${LANG}_st --task speech_to_text \
+    --prefix-size 1 --path ${MULTILINGUAL_ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+    --max-tokens 50000 --beam 5 --scoring sacrebleu
+done
+```
+For multilingual models, we force decoding from the target language ID token (as BOS) via `--prefix-size 1`.
+
+#### Results
+| Data | --arch | Params | En-De | En-Nl | En-Es | En-Fr | En-It | En-Pt | En-Ro | En-Ru | Model |
+|---|---|---|---|---|---|---|---|---|---|---|---|
+| Bilingual | s2t_transformer_s | 31M | [22.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_de_st_transformer_s.pt) | [27.3](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_nl_st_transformer_s.pt) | [27.2](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_es_st_transformer_s.pt) | [32.9](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_fr_st_transformer_s.pt) | [22.7](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_it_st_transformer_s.pt) | [28.1](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_pt_st_transformer_s.pt) | [21.9](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ro_st_transformer_s.pt) | [15.3](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_ru_st_transformer_s.pt) | (<-Download) |
+| Multilingual | s2t_transformer_m | 76M | 24.5 | 28.6 | 28.2 | 34.9 | 24.6 | 31.1 | 23.8 | 16.0 | [Download](https://dl.fbaipublicfiles.com/fairseq/s2t/mustc_multilingual_st_transformer_m.pt) |
+
+[[Back]](..)
diff --git a/fairseq/examples/speech_to_text/docs/simulst_mustc_example.md b/fairseq/examples/speech_to_text/docs/simulst_mustc_example.md
new file mode 100644
index 0000000000000000000000000000000000000000..f3b5a413a27bbe2700da3f418460aa0a7c41abdd
--- /dev/null
+++ b/fairseq/examples/speech_to_text/docs/simulst_mustc_example.md
@@ -0,0 +1,190 @@
+# Simultaneous Speech Translation (SimulST) on MuST-C
+
+This is a tutorial of training and evaluating a transformer *wait-k* simultaneous model on MUST-C English-Germen Dataset, from [SimulMT to SimulST: Adapting Simultaneous Text Translation to End-to-End Simultaneous Speech Translation](https://www.aclweb.org/anthology/2020.aacl-main.58.pdf).
+
+[MuST-C](https://www.aclweb.org/anthology/N19-1202) is multilingual speech-to-text translation corpus with 8-language translations on English TED talks.
+
+## Data Preparation
+This section introduces the data preparation for training and evaluation.
+If you only want to evaluate the model, please jump to [Inference & Evaluation](#inference--evaluation)
+
+[Download](https://ict.fbk.eu/must-c) and unpack MuST-C data to a path
+`${MUSTC_ROOT}/en-${TARGET_LANG_ID}`, then preprocess it with
+```bash
+# Additional Python packages for S2T data processing/model training
+pip install pandas torchaudio sentencepiece
+
+# Generate TSV manifests, features, vocabulary,
+# global cepstral and mean estimation,
+# and configuration for each language
+cd fairseq
+
+python examples/speech_to_text/prep_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --task asr \
+  --vocab-type unigram --vocab-size 10000 \
+  --cmvn-type global
+
+python examples/speech_to_text/prep_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --task st \
+  --vocab-type unigram --vocab-size 10000 \
+  --cmvn-type global
+```
+
+## ASR Pretraining
+We need a pretrained offline ASR model. Assuming the save directory of the ASR model is `${ASR_SAVE_DIR}`.
+The following command (and the subsequent training commands in this tutorial) assume training on 1 GPU (you can also train on 8 GPUs and remove the `--update-freq 8` option).
+```
+fairseq-train ${MUSTC_ROOT}/en-de \
+  --config-yaml config_asr.yaml --train-subset train_asr --valid-subset dev_asr \
+  --save-dir ${ASR_SAVE_DIR} --num-workers 4 --max-tokens 40000 --max-update 100000 \
+  --task speech_to_text --criterion label_smoothed_cross_entropy --report-accuracy \
+  --arch convtransformer_espnet --optimizer adam --lr 0.0005 --lr-scheduler inverse_sqrt \
+  --warmup-updates 10000 --clip-norm 10.0 --seed 1 --update-freq 8
+```
+A pretrained ASR checkpoint can be downloaded [here](https://dl.fbaipublicfiles.com/simultaneous_translation/must_c_v1_en_de_pretrained_asr)
+
+## Simultaneous Speech Translation Training
+
+### Wait-K with fixed pre-decision module
+Fixed pre-decision indicates that the model operate simultaneous policy on the boundaries of fixed chunks.
+Here is a example of fixed pre-decision ratio 7 (the simultaneous decision is made every 7 encoder states) and
+a wait-3 policy model. Assuming the save directory is `${ST_SAVE_DIR}`
+```bash
+ fairseq-train ${MUSTC_ROOT}/en-de \
+        --config-yaml config_st.yaml --train-subset train_st --valid-subset dev_st \
+        --save-dir ${ST_SAVE_DIR} --num-workers 8  \
+        --optimizer adam --lr 0.0001 --lr-scheduler inverse_sqrt --clip-norm 10.0 \
+        --criterion label_smoothed_cross_entropy \
+        --warmup-updates 4000 --max-update 100000 --max-tokens 40000 --seed 2 \
+        --load-pretrained-encoder-from ${ASR_SAVE_DIR}/checkpoint_best.pt \
+        --task speech_to_text  \
+        --arch convtransformer_simul_trans_espnet  \
+        --simul-type waitk_fixed_pre_decision  \
+        --waitk-lagging 3 \
+        --fixed-pre-decision-ratio 7 \
+        --update-freq 8
+
+```
+### Monotonic multihead attention with fixed pre-decision module
+```
+ fairseq-train ${MUSTC_ROOT}/en-de \
+        --config-yaml config_st.yaml --train-subset train_st --valid-subset dev_st \
+        --save-dir ${ST_SAVE_DIR} --num-workers 8  \
+        --optimizer adam --lr 0.0001 --lr-scheduler inverse_sqrt --clip-norm 10.0 \
+        --warmup-updates 4000 --max-update 100000 --max-tokens 40000 --seed 2 \
+        --load-pretrained-encoder-from ${ASR_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+        --task speech_to_text  \
+        --criterion latency_augmented_label_smoothed_cross_entropy \
+        --latency-weight-avg 0.1 \
+        --arch convtransformer_simul_trans_espnet  \
+        --simul-type infinite_lookback_fixed_pre_decision  \
+        --fixed-pre-decision-ratio 7 \
+        --update-freq 8
+```
+## Inference & Evaluation
+[SimulEval](https://github.com/facebookresearch/SimulEval) is used for evaluation.
+The following command is for evaluation.
+
+```
+git clone https://github.com/facebookresearch/SimulEval.git
+cd SimulEval
+pip install -e .
+
+simuleval \
+    --agent ${FAIRSEQ}/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py
+    --source ${SRC_LIST_OF_AUDIO}
+    --target ${TGT_FILE}
+    --data-bin ${MUSTC_ROOT}/en-de \
+    --config config_st.yaml \
+    --model-path ${ST_SAVE_DIR}/${CHECKPOINT_FILENAME} \
+    --output ${OUTPUT} \
+    --scores
+```
+
+The source file `${SRC_LIST_OF_AUDIO}` is a list of paths of audio files. Assuming your audio files stored at `/home/user/data`,
+it should look like this
+
+```bash
+/home/user/data/audio-1.wav
+/home/user/data/audio-2.wav
+```
+
+Each line of target file `${TGT_FILE}` is the translation for each audio file input.
+```bash
+Translation_1
+Translation_2
+```
+The evaluation runs on the original MUSTC segmentation.
+The following command will generate the wav list and text file for a evaluation set `${SPLIT}` (chose from `dev`, `tst-COMMON` and `tst-HE`) in MUSTC to `${EVAL_DATA}`.
+```bash
+python ${FAIRSEQ}/examples/speech_to_text/seg_mustc_data.py \
+  --data-root ${MUSTC_ROOT} --lang de \
+  --split ${SPLIT} --task st \
+  --output ${EVAL_DATA}
+```
+
+The `--data-bin` and `--config` should be the same in previous section if you prepare the data from the scratch.
+If only for evaluation, a prepared data directory can be found [here](https://dl.fbaipublicfiles.com/simultaneous_translation/must_c_v1.0_en_de_databin.tgz). It contains
+- `spm_unigram10000_st.model`: a sentencepiece model binary.
+- `spm_unigram10000_st.txt`: the dictionary file generated by the sentencepiece model.
+- `gcmvn.npz`: the binary for global cepstral mean and variance.
+- `config_st.yaml`: the config yaml file. It looks like this.
+You will need to set the absolute paths for `sentencepiece_model` and `stats_npz_path` if the data directory is downloaded.
+```yaml
+bpe_tokenizer:
+  bpe: sentencepiece
+  sentencepiece_model: ABS_PATH_TO_SENTENCEPIECE_MODEL
+global_cmvn:
+  stats_npz_path: ABS_PATH_TO_GCMVN_FILE
+input_channels: 1
+input_feat_per_channel: 80
+sampling_alpha: 1.0
+specaugment:
+  freq_mask_F: 27
+  freq_mask_N: 1
+  time_mask_N: 1
+  time_mask_T: 100
+  time_mask_p: 1.0
+  time_wrap_W: 0
+transforms:
+  '*':
+  - global_cmvn
+  _train:
+  - global_cmvn
+  - specaugment
+vocab_filename: spm_unigram10000_st.txt
+```
+
+Notice that once a `--data-bin` is set, the `--config` is the base name of the config yaml, not the full path.
+
+Set `--model-path` to the model checkpoint.
+A pretrained checkpoint can be downloaded from [here](https://dl.fbaipublicfiles.com/simultaneous_translation/convtransformer_wait5_pre7), which is a wait-5 model with a pre-decision of 280 ms.
+
+The result of this model on `tst-COMMON` is:
+```bash
+{
+    "Quality": {
+        "BLEU": 13.94974229366959
+    },
+    "Latency": {
+        "AL": 1751.8031870037803,
+        "AL_CA": 2338.5911762796536,
+        "AP": 0.7931395378788959,
+        "AP_CA": 0.9405103863210942,
+        "DAL": 1987.7811616943081,
+        "DAL_CA": 2425.2751560926167
+    }
+}
+```
+
+If `--output ${OUTPUT}` option is used, the detailed log and scores will be stored under the `${OUTPUT}` directory.
+
+
+The quality is measured by detokenized BLEU. So make sure that the predicted words sent to the server are detokenized.
+
+The latency metrics are
+* Average Proportion
+* Average Lagging
+* Differentiable Average Lagging
+
+Again they will also be evaluated on detokenized text.
diff --git a/fairseq/examples/speech_to_text/prep_covost_data.py b/fairseq/examples/speech_to_text/prep_covost_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..411e9b55152ea4a8e345e8c2d18431958c4f4c07
--- /dev/null
+++ b/fairseq/examples/speech_to_text/prep_covost_data.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+from pathlib import Path
+import shutil
+from tempfile import NamedTemporaryFile
+from typing import Optional, Tuple
+
+import pandas as pd
+import torchaudio
+from examples.speech_to_text.data_utils import (
+    create_zip,
+    extract_fbank_features,
+    filter_manifest_df,
+    gen_config_yaml,
+    gen_vocab,
+    get_zip_manifest,
+    load_df_from_tsv,
+    save_df_to_tsv,
+)
+from torch import Tensor
+from torch.utils.data import Dataset
+from torchaudio.datasets.utils import download_url, extract_archive
+from tqdm import tqdm
+
+
+log = logging.getLogger(__name__)
+
+
+MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"]
+
+
+class CoVoST(Dataset):
+    """Create a Dataset for CoVoST (https://github.com/facebookresearch/covost).
+
+    Args:
+        root (str): root path to the dataset and generated manifests/features
+        source_language (str): source (audio) language
+        target_language (str, optional): target (text) language,
+        None for no translation (default: None)
+        version (int, optional): CoVoST version. (default: 2)
+        download (bool, optional): Whether to download the dataset if it is not
+        found at root path. (default: ``False``).
+    """
+
+    COVOST_URL_TEMPLATE = (
+        "https://dl.fbaipublicfiles.com/covost/"
+        "covost_v2.{src_lang}_{tgt_lang}.tsv.tar.gz"
+    )
+
+    VERSIONS = {2}
+    SPLITS = ["train", "dev", "test"]
+
+    XX_EN_LANGUAGES = {
+        1: ["fr", "de", "nl", "ru", "es", "it", "tr", "fa", "sv-SE", "mn", "zh-CN"],
+        2: [
+            "fr",
+            "de",
+            "es",
+            "ca",
+            "it",
+            "ru",
+            "zh-CN",
+            "pt",
+            "fa",
+            "et",
+            "mn",
+            "nl",
+            "tr",
+            "ar",
+            "sv-SE",
+            "lv",
+            "sl",
+            "ta",
+            "ja",
+            "id",
+            "cy",
+        ],
+    }
+    EN_XX_LANGUAGES = {
+        1: [],
+        2: [
+            "de",
+            "tr",
+            "fa",
+            "sv-SE",
+            "mn",
+            "zh-CN",
+            "cy",
+            "ca",
+            "sl",
+            "et",
+            "id",
+            "ar",
+            "ta",
+            "lv",
+            "ja",
+        ],
+    }
+
+    def __init__(
+        self,
+        root: str,
+        split: str,
+        source_language: str,
+        target_language: Optional[str] = None,
+        version: int = 2,
+    ) -> None:
+        assert version in self.VERSIONS and split in self.SPLITS
+        assert source_language is not None
+        self.no_translation = target_language is None
+        if not self.no_translation:
+            assert "en" in {source_language, target_language}
+            if source_language == "en":
+                assert target_language in self.EN_XX_LANGUAGES[version]
+            else:
+                assert source_language in self.XX_EN_LANGUAGES[version]
+        else:
+            # Hack here so that we can get "split" column from CoVoST TSV.
+            # Note that we use CoVoST train split for ASR which is an extension
+            # to Common Voice train split.
+            target_language = "de" if source_language == "en" else "en"
+
+        self.root: Path = Path(root)
+
+        cv_tsv_path = self.root / "validated.tsv"
+        assert cv_tsv_path.is_file()
+
+        covost_url = self.COVOST_URL_TEMPLATE.format(
+            src_lang=source_language, tgt_lang=target_language
+        )
+        covost_archive = self.root / Path(covost_url).name
+        if not covost_archive.is_file():
+            download_url(covost_url, self.root.as_posix(), hash_value=None)
+        extract_archive(covost_archive.as_posix())
+
+        cv_tsv = load_df_from_tsv(cv_tsv_path)
+        covost_tsv = load_df_from_tsv(
+            self.root / Path(covost_url).name.replace(".tar.gz", "")
+        )
+        df = pd.merge(
+            left=cv_tsv[["path", "sentence", "client_id"]],
+            right=covost_tsv[["path", "translation", "split"]],
+            how="inner",
+            on="path",
+        )
+        if split == "train":
+            df = df[(df["split"] == split) | (df["split"] == f"{split}_covost")]
+        else:
+            df = df[df["split"] == split]
+        data = df.to_dict(orient="index").items()
+        data = [v for k, v in sorted(data, key=lambda x: x[0])]
+        self.data = []
+        for e in data:
+            try:
+                path = self.root / "clips" / e["path"]
+                _ = torchaudio.info(path.as_posix())
+                self.data.append(e)
+            except RuntimeError:
+                pass
+
+    def __getitem__(
+        self, n: int
+    ) -> Tuple[Tensor, int, str, str, Optional[str], str, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, sentence, translation, speaker_id,
+            sample_id)``
+        """
+        data = self.data[n]
+        path = self.root / "clips" / data["path"]
+        waveform, sample_rate = torchaudio.load(path)
+        sentence = data["sentence"]
+        translation = None if self.no_translation else data["translation"]
+        speaker_id = data["client_id"]
+        _id = data["path"].replace(".mp3", "")
+        return waveform, sample_rate, sentence, translation, speaker_id, _id
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+
+def process(args):
+    root = Path(args.data_root).absolute() / args.src_lang
+    if not root.is_dir():
+        raise NotADirectoryError(f"{root} does not exist")
+    # Extract features
+    feature_root = root / "fbank80"
+    feature_root.mkdir(exist_ok=True)
+    for split in CoVoST.SPLITS:
+        print(f"Fetching split {split}...")
+        dataset = CoVoST(root, split, args.src_lang, args.tgt_lang)
+        print("Extracting log mel filter bank features...")
+        for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset):
+            extract_fbank_features(
+                waveform, sample_rate, feature_root / f"{utt_id}.npy"
+            )
+    # Pack features into ZIP
+    zip_path = root / "fbank80.zip"
+    print("ZIPing features...")
+    create_zip(feature_root, zip_path)
+    print("Fetching ZIP manifest...")
+    audio_paths, audio_lengths = get_zip_manifest(zip_path)
+    # Generate TSV manifest
+    print("Generating manifest...")
+    train_text = []
+    task = f"asr_{args.src_lang}"
+    if args.tgt_lang is not None:
+        task = f"st_{args.src_lang}_{args.tgt_lang}"
+    for split in CoVoST.SPLITS:
+        manifest = {c: [] for c in MANIFEST_COLUMNS}
+        dataset = CoVoST(root, split, args.src_lang, args.tgt_lang)
+        for _, _, src_utt, tgt_utt, speaker_id, utt_id in tqdm(dataset):
+            manifest["id"].append(utt_id)
+            manifest["audio"].append(audio_paths[utt_id])
+            manifest["n_frames"].append(audio_lengths[utt_id])
+            manifest["tgt_text"].append(src_utt if args.tgt_lang is None else tgt_utt)
+            manifest["speaker"].append(speaker_id)
+        is_train_split = split.startswith("train")
+        if is_train_split:
+            train_text.extend(manifest["tgt_text"])
+        df = pd.DataFrame.from_dict(manifest)
+        df = filter_manifest_df(df, is_train_split=is_train_split)
+        save_df_to_tsv(df, root / f"{split}_{task}.tsv")
+    # Generate vocab
+    vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size)
+    spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{task}"
+    with NamedTemporaryFile(mode="w") as f:
+        for t in train_text:
+            f.write(t + "\n")
+        gen_vocab(
+            Path(f.name),
+            root / spm_filename_prefix,
+            args.vocab_type,
+            args.vocab_size
+        )
+    # Generate config YAML
+    gen_config_yaml(
+        root,
+        spm_filename=spm_filename_prefix + ".model",
+        yaml_filename=f"config_{task}.yaml",
+        specaugment_policy="lb",
+    )
+    # Clean up
+    shutil.rmtree(feature_root)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data-root", "-d", required=True, type=str,
+        help="data root with sub-folders for each language <root>/<src_lang>"
+    )
+    parser.add_argument(
+        "--vocab-type",
+        default="unigram",
+        required=True,
+        type=str,
+        choices=["bpe", "unigram", "char"],
+    ),
+    parser.add_argument("--vocab-size", default=1000, type=int)
+    parser.add_argument("--src-lang", "-s", required=True, type=str)
+    parser.add_argument("--tgt-lang", "-t", type=str)
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_to_text/prep_librispeech_data.py b/fairseq/examples/speech_to_text/prep_librispeech_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..f379fa7bf195f48ad6b2ed3dbd93a5fbeb7abf79
--- /dev/null
+++ b/fairseq/examples/speech_to_text/prep_librispeech_data.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+from pathlib import Path
+import shutil
+from tempfile import NamedTemporaryFile
+
+import pandas as pd
+from examples.speech_to_text.data_utils import (
+    create_zip,
+    extract_fbank_features,
+    gen_config_yaml,
+    gen_vocab,
+    get_zip_manifest,
+    save_df_to_tsv,
+)
+from torchaudio.datasets import LIBRISPEECH
+from tqdm import tqdm
+
+
+log = logging.getLogger(__name__)
+
+SPLITS = [
+    "train-clean-100",
+    "train-clean-360",
+    "train-other-500",
+    "dev-clean",
+    "dev-other",
+    "test-clean",
+    "test-other",
+]
+
+MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"]
+
+
+def process(args):
+    out_root = Path(args.output_root).absolute()
+    out_root.mkdir(exist_ok=True)
+    # Extract features
+    feature_root = out_root / "fbank80"
+    feature_root.mkdir(exist_ok=True)
+    for split in SPLITS:
+        print(f"Fetching split {split}...")
+        dataset = LIBRISPEECH(out_root.as_posix(), url=split, download=True)
+        print("Extracting log mel filter bank features...")
+        for wav, sample_rate, _, spk_id, chapter_no, utt_no in tqdm(dataset):
+            sample_id = f"{spk_id}-{chapter_no}-{utt_no}"
+            extract_fbank_features(
+                wav, sample_rate, feature_root / f"{sample_id}.npy"
+            )
+    # Pack features into ZIP
+    zip_path = out_root / "fbank80.zip"
+    print("ZIPing features...")
+    create_zip(feature_root, zip_path)
+    print("Fetching ZIP manifest...")
+    audio_paths, audio_lengths = get_zip_manifest(zip_path)
+    # Generate TSV manifest
+    print("Generating manifest...")
+    train_text = []
+    for split in SPLITS:
+        manifest = {c: [] for c in MANIFEST_COLUMNS}
+        dataset = LIBRISPEECH(out_root.as_posix(), url=split)
+        for _, _, utt, spk_id, chapter_no, utt_no in tqdm(dataset):
+            sample_id = f"{spk_id}-{chapter_no}-{utt_no}"
+            manifest["id"].append(sample_id)
+            manifest["audio"].append(audio_paths[sample_id])
+            manifest["n_frames"].append(audio_lengths[sample_id])
+            manifest["tgt_text"].append(utt.lower())
+            manifest["speaker"].append(spk_id)
+        save_df_to_tsv(
+            pd.DataFrame.from_dict(manifest), out_root / f"{split}.tsv"
+        )
+        if split.startswith("train"):
+            train_text.extend(manifest["tgt_text"])
+    # Generate vocab
+    vocab_size = "" if args.vocab_type == "char" else str(args.vocab_size)
+    spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size}"
+    with NamedTemporaryFile(mode="w") as f:
+        for t in train_text:
+            f.write(t + "\n")
+        gen_vocab(
+            Path(f.name),
+            out_root / spm_filename_prefix,
+            args.vocab_type,
+            args.vocab_size,
+        )
+    # Generate config YAML
+    gen_config_yaml(
+        out_root,
+        spm_filename=spm_filename_prefix + ".model",
+        specaugment_policy="ld"
+    )
+    # Clean up
+    shutil.rmtree(feature_root)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-root", "-o", required=True, type=str)
+    parser.add_argument(
+        "--vocab-type",
+        default="unigram",
+        required=True,
+        type=str,
+        choices=["bpe", "unigram", "char"],
+    ),
+    parser.add_argument("--vocab-size", default=10000, type=int)
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_to_text/prep_mtedx_data.py b/fairseq/examples/speech_to_text/prep_mtedx_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a627af357dc785fc764aeca1060a921d8567081
--- /dev/null
+++ b/fairseq/examples/speech_to_text/prep_mtedx_data.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+from pathlib import Path
+import shutil
+from itertools import groupby
+from tempfile import NamedTemporaryFile
+from typing import Tuple
+
+import pandas as pd
+import soundfile as sf
+from examples.speech_to_text.data_utils import (
+    create_zip,
+    extract_fbank_features,
+    filter_manifest_df,
+    gen_config_yaml,
+    gen_vocab,
+    get_zip_manifest,
+    load_df_from_tsv,
+    save_df_to_tsv,
+)
+import torch
+from torch.utils.data import Dataset
+from tqdm import tqdm
+
+from fairseq.data.audio.audio_utils import get_waveform, convert_waveform
+
+
+log = logging.getLogger(__name__)
+
+
+MANIFEST_COLUMNS = [
+    "id", "audio", "n_frames", "tgt_text", "speaker", "tgt_lang"
+]
+
+
+class mTEDx(Dataset):
+    """
+    Create a Dataset for Multilingual TEDx.
+    Each item is a tuple of the form: waveform, sample_rate, source utterance,
+    target utterance, speaker_id, utterance_id
+    """
+
+    SPLITS = ["train", "valid", "test"]
+    LANGPAIRS = ["es-es", "fr-fr", "pt-pt", "it-it", "ru-ru", "el-el", "ar-ar",
+                 "de-de", "es-en", "es-fr", "es-pt", "es-it", "fr-en", "fr-es",
+                 "fr-pt", "pt-en", "pt-es", "it-en", "it-es", "ru-en", "el-en"]
+
+    def __init__(self, root: str, lang: str, split: str) -> None:
+        assert split in self.SPLITS and lang in self.LANGPAIRS
+        _root = Path(root) / f"{lang}" / "data" / split
+        wav_root, txt_root = _root / "wav", _root / "txt"
+        assert _root.is_dir() and wav_root.is_dir() and txt_root.is_dir()
+        # Load audio segments
+        try:
+            import yaml
+        except ImportError:
+            print(
+                "Please install PyYAML to load the Multilingual TEDx YAML files"
+            )
+        with open(txt_root / f"{split}.yaml") as f:
+            segments = yaml.safe_load(f, Loader=yaml.BaseLoader)
+        # Load source and target utterances
+        src, tgt = lang.split("-")
+        for _lang in [src, tgt]:
+            with open(txt_root / f"{split}.{_lang}") as f:
+                utterances = [r.strip() for r in f]
+            assert len(segments) == len(utterances)
+            for i, u in enumerate(utterances):
+                segments[i][_lang] = u
+        # Gather info
+        self.data = []
+        for wav_filename, _seg_group in groupby(segments, lambda x: x["wav"]):
+            wav_filename = wav_filename.replace(".wav", ".flac")
+            wav_path = wav_root / wav_filename
+            sample_rate = sf.info(wav_path.as_posix()).samplerate
+            seg_group = sorted(_seg_group, key=lambda x: float(x["offset"]))
+            for i, segment in enumerate(seg_group):
+                offset = int(float(segment["offset"]) * sample_rate)
+                n_frames = int(float(segment["duration"]) * sample_rate)
+                _id = f"{wav_path.stem}_{i}"
+                self.data.append(
+                    (
+                        wav_path.as_posix(),
+                        offset,
+                        n_frames,
+                        sample_rate,
+                        segment[src],
+                        segment[tgt],
+                        segment["speaker_id"],
+                        tgt,
+                        _id,
+                    )
+                )
+
+    def __getitem__(
+            self, n: int
+    ) -> Tuple[torch.Tensor, int, str, str, str, str, str]:
+        wav_path, offset, n_frames, sr, src_utt, tgt_utt, spk_id, tgt_lang, \
+            utt_id = self.data[n]
+        waveform, _ = get_waveform(wav_path, frames=n_frames, start=offset)
+        waveform = torch.from_numpy(waveform)
+        return waveform, sr, src_utt, tgt_utt, spk_id, tgt_lang, utt_id
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+
+def process(args):
+    root = Path(args.data_root).absolute()
+    for lang in mTEDx.LANGPAIRS:
+        cur_root = root / f"{lang}"
+        if not cur_root.is_dir():
+            print(f"{cur_root.as_posix()} does not exist. Skipped.")
+            continue
+        # Extract features
+        audio_root = cur_root / ("flac" if args.use_audio_input else "fbank80")
+        audio_root.mkdir(exist_ok=True)
+        for split in mTEDx.SPLITS:
+            print(f"Fetching split {split}...")
+            dataset = mTEDx(root.as_posix(), lang, split)
+            if args.use_audio_input:
+                print("Converting audios...")
+                for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset):
+                    tgt_sample_rate = 16_000
+                    _wavform, _ = convert_waveform(
+                        waveform, sample_rate, to_mono=True,
+                        to_sample_rate=tgt_sample_rate
+                    )
+                    sf.write(
+                        (audio_root / f"{utt_id}.flac").as_posix(),
+                        _wavform.numpy(), tgt_sample_rate
+                    )
+            else:
+                print("Extracting log mel filter bank features...")
+                for waveform, sample_rate, _, _, _, _, utt_id in tqdm(dataset):
+                    extract_fbank_features(
+                        waveform, sample_rate, audio_root / f"{utt_id}.npy"
+                    )
+        # Pack features into ZIP
+        zip_path = cur_root / f"{audio_root.name}.zip"
+        print("ZIPing audios/features...")
+        create_zip(audio_root, zip_path)
+        print("Fetching ZIP manifest...")
+        audio_paths, audio_lengths = get_zip_manifest(zip_path)
+        # Generate TSV manifest
+        print("Generating manifest...")
+        train_text = []
+        for split in mTEDx.SPLITS:
+            is_train_split = split.startswith("train")
+            manifest = {c: [] for c in MANIFEST_COLUMNS}
+            ds = mTEDx(args.data_root, lang, split)
+            for _, _, src_utt, tgt_utt, spk_id, tgt_lang, utt_id in tqdm(ds):
+                manifest["id"].append(utt_id)
+                manifest["audio"].append(audio_paths[utt_id])
+                manifest["n_frames"].append(audio_lengths[utt_id])
+                manifest["tgt_text"].append(
+                    src_utt if args.task == "asr" else tgt_utt
+                )
+                manifest["speaker"].append(spk_id)
+                manifest["tgt_lang"].append(tgt_lang)
+            if is_train_split:
+                train_text.extend(manifest["tgt_text"])
+            df = pd.DataFrame.from_dict(manifest)
+            df = filter_manifest_df(df, is_train_split=is_train_split)
+            save_df_to_tsv(df, cur_root / f"{split}_{args.task}.tsv")
+        # Generate vocab
+        v_size_str = "" if args.vocab_type == "char" else str(args.vocab_size)
+        spm_filename_prefix = f"spm_{args.vocab_type}{v_size_str}_{args.task}"
+        with NamedTemporaryFile(mode="w") as f:
+            for t in train_text:
+                f.write(t + "\n")
+            gen_vocab(
+                Path(f.name),
+                cur_root / spm_filename_prefix,
+                args.vocab_type,
+                args.vocab_size,
+            )
+        # Generate config YAML
+        if args.use_audio_input:
+            gen_config_yaml(
+                cur_root,
+                spm_filename=spm_filename_prefix + ".model",
+                yaml_filename=f"config_{args.task}.yaml",
+                specaugment_policy=None,
+                extra={"use_audio_input": True}
+            )
+        else:
+            gen_config_yaml(
+                cur_root,
+                spm_filename=spm_filename_prefix + ".model",
+                yaml_filename=f"config_{args.task}.yaml",
+                specaugment_policy="lb",
+            )
+        # Clean up
+        shutil.rmtree(audio_root)
+
+
+def process_joint(args):
+    cur_root = Path(args.data_root)
+    assert all((cur_root / f"{lang}").is_dir() for lang in mTEDx.LANGPAIRS), \
+        "do not have downloaded data available for all languages"
+    # Generate vocab
+    vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size)
+    spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{args.task}"
+    with NamedTemporaryFile(mode="w") as f:
+        for lang in mTEDx.LANGPAIRS:
+            tsv_path = cur_root / f"{lang}" / f"train_{args.task}.tsv"
+            df = load_df_from_tsv(tsv_path)
+            for t in df["tgt_text"]:
+                f.write(t + "\n")
+        special_symbols = None
+        if args.joint:
+            # Add tgt_lang tags to dict
+            special_symbols = list(
+                {f'<lang:{lang.split("-")[1]}>' for lang in mTEDx.LANGPAIRS}
+            )
+        gen_vocab(
+            Path(f.name),
+            cur_root / spm_filename_prefix,
+            args.vocab_type,
+            args.vocab_size,
+            special_symbols=special_symbols
+        )
+    # Generate config YAML
+    gen_config_yaml(
+        cur_root,
+        spm_filename=spm_filename_prefix + ".model",
+        yaml_filename=f"config_{args.task}.yaml",
+        specaugment_policy="ld",
+        prepend_tgt_lang_tag=(args.joint),
+    )
+    # Make symbolic links to manifests
+    for lang in mTEDx.LANGPAIRS:
+        for split in mTEDx.SPLITS:
+            src_path = cur_root / f"{lang}" / f"{split}_{args.task}.tsv"
+            desc_path = cur_root / f"{split}_{lang}_{args.task}.tsv"
+            if not desc_path.is_symlink():
+                os.symlink(src_path, desc_path)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-root", "-d", required=True, type=str)
+    parser.add_argument(
+        "--vocab-type",
+        default="unigram",
+        required=True,
+        type=str,
+        choices=["bpe", "unigram", "char"],
+    ),
+    parser.add_argument("--vocab-size", default=8000, type=int)
+    parser.add_argument("--task", type=str, choices=["asr", "st"])
+    parser.add_argument("--joint", action="store_true", help="")
+    parser.add_argument("--use-audio-input", action="store_true")
+    args = parser.parse_args()
+
+    if args.joint:
+        process_joint(args)
+    else:
+        process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_to_text/prep_mustc_data.py b/fairseq/examples/speech_to_text/prep_mustc_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..403331a6d835177e32c4dc0059e35f29d2b03ba9
--- /dev/null
+++ b/fairseq/examples/speech_to_text/prep_mustc_data.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+from pathlib import Path
+import shutil
+from itertools import groupby
+from tempfile import NamedTemporaryFile
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+import soundfile as sf
+from examples.speech_to_text.data_utils import (
+    create_zip,
+    extract_fbank_features,
+    filter_manifest_df,
+    gen_config_yaml,
+    gen_vocab,
+    get_zip_manifest,
+    load_df_from_tsv,
+    save_df_to_tsv,
+    cal_gcmvn_stats,
+)
+import torch
+from torch.utils.data import Dataset
+from tqdm import tqdm
+
+from fairseq.data.audio.audio_utils import get_waveform, convert_waveform
+
+
+log = logging.getLogger(__name__)
+
+
+MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"]
+
+
+class MUSTC(Dataset):
+    """
+    Create a Dataset for MuST-C. Each item is a tuple of the form:
+    waveform, sample_rate, source utterance, target utterance, speaker_id,
+    utterance_id
+    """
+
+    SPLITS = ["train", "dev", "tst-COMMON", "tst-HE"]
+    LANGUAGES = ["de", "es", "fr", "it", "nl", "pt", "ro", "ru"]
+
+    def __init__(self, root: str, lang: str, split: str) -> None:
+        assert split in self.SPLITS and lang in self.LANGUAGES
+        _root = Path(root) / f"en-{lang}" / "data" / split
+        wav_root, txt_root = _root / "wav", _root / "txt"
+        assert _root.is_dir() and wav_root.is_dir() and txt_root.is_dir()
+        # Load audio segments
+        try:
+            import yaml
+        except ImportError:
+            print("Please install PyYAML to load the MuST-C YAML files")
+        with open(txt_root / f"{split}.yaml") as f:
+            segments = yaml.safe_load(f, Loader=yaml.BaseLoader)
+        # Load source and target utterances
+        for _lang in ["en", lang]:
+            with open(txt_root / f"{split}.{_lang}") as f:
+                utterances = [r.strip() for r in f]
+            assert len(segments) == len(utterances)
+            for i, u in enumerate(utterances):
+                segments[i][_lang] = u
+        # Gather info
+        self.data = []
+        for wav_filename, _seg_group in groupby(segments, lambda x: x["wav"]):
+            wav_path = wav_root / wav_filename
+            sample_rate = sf.info(wav_path.as_posix()).samplerate
+            seg_group = sorted(_seg_group, key=lambda x: x["offset"])
+            for i, segment in enumerate(seg_group):
+                offset = int(float(segment["offset"]) * sample_rate)
+                n_frames = int(float(segment["duration"]) * sample_rate)
+                _id = f"{wav_path.stem}_{i}"
+                self.data.append(
+                    (
+                        wav_path.as_posix(),
+                        offset,
+                        n_frames,
+                        sample_rate,
+                        segment["en"],
+                        segment[lang],
+                        segment["speaker_id"],
+                        _id,
+                    )
+                )
+
+    def __getitem__(
+            self, n: int
+    ) -> Tuple[torch.Tensor, int, str, str, str, str]:
+        wav_path, offset, n_frames, sr, src_utt, tgt_utt, spk_id, \
+            utt_id = self.data[n]
+        waveform, _ = get_waveform(wav_path, frames=n_frames, start=offset)
+        waveform = torch.from_numpy(waveform)
+        return waveform, sr, src_utt, tgt_utt, spk_id, utt_id
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+
+def process(args):
+    root = Path(args.data_root).absolute()
+    for lang in MUSTC.LANGUAGES:
+        cur_root = root / f"en-{lang}"
+        if not cur_root.is_dir():
+            print(f"{cur_root.as_posix()} does not exist. Skipped.")
+            continue
+        # Extract features
+        audio_root = cur_root / ("flac" if args.use_audio_input else "fbank80")
+        audio_root.mkdir(exist_ok=True)
+
+        for split in MUSTC.SPLITS:
+            print(f"Fetching split {split}...")
+            dataset = MUSTC(root.as_posix(), lang, split)
+            if args.use_audio_input:
+                print("Converting audios...")
+                for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset):
+                    tgt_sample_rate = 16_000
+                    _wavform, _ = convert_waveform(
+                        waveform, sample_rate, to_mono=True,
+                        to_sample_rate=tgt_sample_rate
+                    )
+                    sf.write(
+                        (audio_root / f"{utt_id}.flac").as_posix(),
+                        _wavform.numpy(), tgt_sample_rate
+                    )
+            else:
+                print("Extracting log mel filter bank features...")
+                gcmvn_feature_list = []
+                if split == 'train' and args.cmvn_type == "global":
+                    print("And estimating cepstral mean and variance stats...")
+
+                for waveform, sample_rate, _, _, _, utt_id in tqdm(dataset):
+                    features = extract_fbank_features(
+                        waveform, sample_rate, audio_root / f"{utt_id}.npy"
+                    )
+                    if split == 'train' and args.cmvn_type == "global":
+                        if len(gcmvn_feature_list) < args.gcmvn_max_num:
+                            gcmvn_feature_list.append(features)
+
+                if split == 'train' and args.cmvn_type == "global":
+                    # Estimate and save cmv
+                    stats = cal_gcmvn_stats(gcmvn_feature_list)
+                    with open(cur_root / "gcmvn.npz", "wb") as f:
+                        np.savez(f, mean=stats["mean"], std=stats["std"])
+
+        # Pack features into ZIP
+        zip_path = cur_root / f"{audio_root.name}.zip"
+        print("ZIPing audios/features...")
+        create_zip(audio_root, zip_path)
+        print("Fetching ZIP manifest...")
+        audio_paths, audio_lengths = get_zip_manifest(zip_path)
+        # Generate TSV manifest
+        print("Generating manifest...")
+        train_text = []
+        for split in MUSTC.SPLITS:
+            is_train_split = split.startswith("train")
+            manifest = {c: [] for c in MANIFEST_COLUMNS}
+            dataset = MUSTC(args.data_root, lang, split)
+            for _, _, src_utt, tgt_utt, speaker_id, utt_id in tqdm(dataset):
+                manifest["id"].append(utt_id)
+                manifest["audio"].append(audio_paths[utt_id])
+                manifest["n_frames"].append(audio_lengths[utt_id])
+                manifest["tgt_text"].append(
+                    src_utt if args.task == "asr" else tgt_utt
+                )
+                manifest["speaker"].append(speaker_id)
+            if is_train_split:
+                train_text.extend(manifest["tgt_text"])
+            df = pd.DataFrame.from_dict(manifest)
+            df = filter_manifest_df(df, is_train_split=is_train_split)
+            save_df_to_tsv(df, cur_root / f"{split}_{args.task}.tsv")
+        # Generate vocab
+        v_size_str = "" if args.vocab_type == "char" else str(args.vocab_size)
+        spm_filename_prefix = f"spm_{args.vocab_type}{v_size_str}_{args.task}"
+        with NamedTemporaryFile(mode="w") as f:
+            for t in train_text:
+                f.write(t + "\n")
+            gen_vocab(
+                Path(f.name),
+                cur_root / spm_filename_prefix,
+                args.vocab_type,
+                args.vocab_size,
+            )
+        # Generate config YAML
+        if args.use_audio_input:
+            gen_config_yaml(
+                cur_root,
+                spm_filename=spm_filename_prefix + ".model",
+                yaml_filename=f"config_{args.task}.yaml",
+                specaugment_policy=None,
+                extra={"use_audio_input": True}
+            )
+        else:
+            gen_config_yaml(
+                cur_root,
+                spm_filename=spm_filename_prefix + ".model",
+                yaml_filename=f"config_{args.task}.yaml",
+                specaugment_policy="lb",
+                cmvn_type=args.cmvn_type,
+                gcmvn_path=(
+                    cur_root / "gcmvn.npz" if args.cmvn_type == "global"
+                    else None
+                ),
+            )
+        # Clean up
+        shutil.rmtree(audio_root)
+
+
+def process_joint(args):
+    cur_root = Path(args.data_root)
+    assert all(
+        (cur_root / f"en-{lang}").is_dir() for lang in MUSTC.LANGUAGES
+    ), "do not have downloaded data available for all 8 languages"
+    # Generate vocab
+    vocab_size_str = "" if args.vocab_type == "char" else str(args.vocab_size)
+    spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size_str}_{args.task}"
+    with NamedTemporaryFile(mode="w") as f:
+        for lang in MUSTC.LANGUAGES:
+            tsv_path = cur_root / f"en-{lang}" / f"train_{args.task}.tsv"
+            df = load_df_from_tsv(tsv_path)
+            for t in df["tgt_text"]:
+                f.write(t + "\n")
+        special_symbols = None
+        if args.task == 'st':
+            special_symbols = [f'<lang:{lang}>' for lang in MUSTC.LANGUAGES]
+        gen_vocab(
+            Path(f.name),
+            cur_root / spm_filename_prefix,
+            args.vocab_type,
+            args.vocab_size,
+            special_symbols=special_symbols
+        )
+    # Generate config YAML
+    gen_config_yaml(
+        cur_root,
+        spm_filename=spm_filename_prefix + ".model",
+        yaml_filename=f"config_{args.task}.yaml",
+        specaugment_policy="ld",
+        prepend_tgt_lang_tag=(args.task == "st"),
+    )
+    # Make symbolic links to manifests
+    for lang in MUSTC.LANGUAGES:
+        for split in MUSTC.SPLITS:
+            src_path = cur_root / f"en-{lang}" / f"{split}_{args.task}.tsv"
+            desc_path = cur_root / f"{split}_{lang}_{args.task}.tsv"
+            if not desc_path.is_symlink():
+                os.symlink(src_path, desc_path)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-root", "-d", required=True, type=str)
+    parser.add_argument(
+        "--vocab-type",
+        default="unigram",
+        required=True,
+        type=str,
+        choices=["bpe", "unigram", "char"],
+    ),
+    parser.add_argument("--vocab-size", default=8000, type=int)
+    parser.add_argument("--task", type=str, choices=["asr", "st"])
+    parser.add_argument("--joint", action="store_true", help="")
+    parser.add_argument(
+        "--cmvn-type", default="utterance",
+        choices=["global", "utterance"],
+        help="The type of cepstral mean and variance normalization"
+    )
+    parser.add_argument(
+        "--gcmvn-max-num", default=150000, type=int,
+        help="Maximum number of sentences to use to estimate global mean and "
+             "variance"
+        )
+    parser.add_argument("--use-audio-input", action="store_true")
+    args = parser.parse_args()
+
+    if args.joint:
+        process_joint(args)
+    else:
+        process(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/speech_to_text/seg_mustc_data.py b/fairseq/examples/speech_to_text/seg_mustc_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ee665d6399729afe17d790d872eff34de124900
--- /dev/null
+++ b/fairseq/examples/speech_to_text/seg_mustc_data.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+from pathlib import Path
+import soundfile as sf
+from examples.speech_to_text.prep_mustc_data import (
+    MUSTC
+)
+
+from tqdm import tqdm
+
+log = logging.getLogger(__name__)
+
+
+def main(args):
+    root = Path(args.data_root).absolute()
+    lang = args.lang
+    split = args.split
+
+    cur_root = root / f"en-{lang}"
+    assert cur_root.is_dir(), (
+        f"{cur_root.as_posix()} does not exist. Skipped."
+    )
+
+    dataset = MUSTC(root.as_posix(), lang, split)
+    output = Path(args.output).absolute()
+    output.mkdir(exist_ok=True)
+    f_text = open(output / f"{split}.{lang}", "w")
+    f_wav_list = open(output / f"{split}.wav_list", "w")
+    for waveform, sample_rate, _, text, _, utt_id in tqdm(dataset):
+        sf.write(
+            output / f"{utt_id}.wav",
+            waveform.squeeze(0).numpy(),
+            samplerate=int(sample_rate)
+        )
+        f_text.write(text + "\n")
+        f_wav_list.write(str(output / f"{utt_id}.wav") + "\n")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data-root", "-d", required=True, type=str)
+    parser.add_argument("--task", required=True, type=str, choices=["asr", "st"])
+    parser.add_argument("--lang", required=True, type=str)
+    parser.add_argument("--output", required=True, type=str)
+    parser.add_argument("--split", required=True, choices=MUSTC.SPLITS)
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py b/fairseq/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f2a3664854bec87af4158afdb71dfd4da3d39a9
--- /dev/null
+++ b/fairseq/examples/speech_to_text/simultaneous_translation/agents/fairseq_simul_st_agent.py
@@ -0,0 +1,363 @@
+import math
+import os
+import json
+import numpy as np
+import torch
+import torchaudio.compliance.kaldi as kaldi
+import yaml
+from fairseq import checkpoint_utils, tasks
+from fairseq.file_io import PathManager
+
+try:
+    from simuleval import READ_ACTION, WRITE_ACTION, DEFAULT_EOS
+    from simuleval.agents import SpeechAgent
+    from simuleval.states import ListEntry, SpeechStates
+except ImportError:
+    print("Please install simuleval 'pip install simuleval'")
+
+SHIFT_SIZE = 10
+WINDOW_SIZE = 25
+SAMPLE_RATE = 16000
+FEATURE_DIM = 80
+BOW_PREFIX = "\u2581"
+
+
+class OnlineFeatureExtractor:
+    """
+    Extract speech feature on the fly.
+    """
+
+    def __init__(self, args):
+        self.shift_size = args.shift_size
+        self.window_size = args.window_size
+        assert self.window_size >= self.shift_size
+
+        self.sample_rate = args.sample_rate
+        self.feature_dim = args.feature_dim
+        self.num_samples_per_shift = int(self.shift_size * self.sample_rate / 1000)
+        self.num_samples_per_window = int(self.window_size * self.sample_rate / 1000)
+        self.len_ms_to_samples = lambda x: x * self.sample_rate / 1000
+        self.previous_residual_samples = []
+        self.global_cmvn = args.global_cmvn
+
+    def clear_cache(self):
+        self.previous_residual_samples = []
+
+    def __call__(self, new_samples):
+        samples = self.previous_residual_samples + new_samples
+        if len(samples) < self.num_samples_per_window:
+            self.previous_residual_samples = samples
+            return
+
+        # num_frames is the number of frames from the new segment
+        num_frames = math.floor(
+            (len(samples) - self.len_ms_to_samples(self.window_size - self.shift_size))
+            / self.num_samples_per_shift
+        )
+
+        # the number of frames used for feature extraction
+        # including some part of thte previous segment
+        effective_num_samples = int(
+            num_frames * self.len_ms_to_samples(self.shift_size)
+            + self.len_ms_to_samples(self.window_size - self.shift_size)
+        )
+
+        input_samples = samples[:effective_num_samples]
+        self.previous_residual_samples = samples[
+            num_frames * self.num_samples_per_shift:
+        ]
+
+        torch.manual_seed(1)
+        output = kaldi.fbank(
+            torch.FloatTensor(input_samples).unsqueeze(0),
+            num_mel_bins=self.feature_dim,
+            frame_length=self.window_size,
+            frame_shift=self.shift_size,
+        ).numpy()
+
+        output = self.transform(output)
+
+        return torch.from_numpy(output)
+
+    def transform(self, input):
+        if self.global_cmvn is None:
+            return input
+
+        mean = self.global_cmvn["mean"]
+        std = self.global_cmvn["std"]
+
+        x = np.subtract(input, mean)
+        x = np.divide(x, std)
+        return x
+
+
+class TensorListEntry(ListEntry):
+    """
+    Data structure to store a list of tensor.
+    """
+
+    def append(self, value):
+
+        if len(self.value) == 0:
+            self.value = value
+            return
+
+        self.value = torch.cat([self.value] + [value], dim=0)
+
+    def info(self):
+        return {
+            "type": str(self.new_value_type),
+            "length": self.__len__(),
+            "value": "" if type(self.value) is list else self.value.size(),
+        }
+
+
+class FairseqSimulSTAgent(SpeechAgent):
+
+    speech_segment_size = 40  # in ms, 4 pooling ratio * 10 ms step size
+
+    def __init__(self, args):
+        super().__init__(args)
+
+        self.eos = DEFAULT_EOS
+
+        self.gpu = getattr(args, "gpu", False)
+
+        self.args = args
+
+        self.load_model_vocab(args)
+
+        if getattr(
+            self.model.decoder.layers[0].encoder_attn,
+            'pre_decision_ratio',
+            None
+        ) is not None:
+            self.speech_segment_size *= (
+                self.model.decoder.layers[0].encoder_attn.pre_decision_ratio
+            )
+
+        args.global_cmvn = None
+        if args.config:
+            with open(os.path.join(args.data_bin, args.config), "r") as f:
+                config = yaml.safe_load(f, Loader=yaml.BaseLoader)
+
+            if "global_cmvn" in config:
+                args.global_cmvn = np.load(config["global_cmvn"]["stats_npz_path"])
+
+        if args.global_stats:
+            with PathManager.open(args.global_stats, "r") as f:
+                global_cmvn = json.loads(f.read())
+                self.global_cmvn = {"mean": global_cmvn["mean"], "std": global_cmvn["stddev"]}
+
+        self.feature_extractor = OnlineFeatureExtractor(args)
+
+        self.max_len = args.max_len
+
+        self.force_finish = args.force_finish
+
+        torch.set_grad_enabled(False)
+
+    def build_states(self, args, client, sentence_id):
+        # Initialize states here, for example add customized entry to states
+        # This function will be called at beginning of every new sentence
+        states = SpeechStates(args, client, sentence_id, self)
+        self.initialize_states(states)
+        return states
+
+    def to_device(self, tensor):
+        if self.gpu:
+            return tensor.cuda()
+        else:
+            return tensor.cpu()
+
+    @staticmethod
+    def add_args(parser):
+        # fmt: off
+        parser.add_argument('--model-path', type=str, required=True,
+                            help='path to your pretrained model.')
+        parser.add_argument("--data-bin", type=str, required=True,
+                            help="Path of data binary")
+        parser.add_argument("--config", type=str, default=None,
+                            help="Path to config yaml file")
+        parser.add_argument("--global-stats", type=str, default=None,
+                            help="Path to json file containing cmvn stats")
+        parser.add_argument("--tgt-splitter-type", type=str, default="SentencePiece",
+                            help="Subword splitter type for target text")
+        parser.add_argument("--tgt-splitter-path", type=str, default=None,
+                            help="Subword splitter model path for target text")
+        parser.add_argument("--user-dir", type=str, default="examples/simultaneous_translation",
+                            help="User directory for simultaneous translation")
+        parser.add_argument("--max-len", type=int, default=200,
+                            help="Max length of translation")
+        parser.add_argument("--force-finish", default=False, action="store_true",
+                            help="Force the model to finish the hypothsis if the source is not finished")
+        parser.add_argument("--shift-size", type=int, default=SHIFT_SIZE,
+                            help="Shift size of feature extraction window.")
+        parser.add_argument("--window-size", type=int, default=WINDOW_SIZE,
+                            help="Window size of feature extraction window.")
+        parser.add_argument("--sample-rate", type=int, default=SAMPLE_RATE,
+                            help="Sample rate")
+        parser.add_argument("--feature-dim", type=int, default=FEATURE_DIM,
+                            help="Acoustic feature dimension.")
+
+        # fmt: on
+        return parser
+
+    def load_model_vocab(self, args):
+
+        filename = args.model_path
+        if not os.path.exists(filename):
+            raise IOError("Model file not found: {}".format(filename))
+
+        state = checkpoint_utils.load_checkpoint_to_cpu(filename)
+
+        task_args = state["cfg"]["task"]
+        task_args.data = args.data_bin
+
+        if args.config is not None:
+            task_args.config_yaml = args.config
+
+        task = tasks.setup_task(task_args)
+
+        # build model for ensemble
+        state["cfg"]["model"].load_pretrained_encoder_from = None
+        state["cfg"]["model"].load_pretrained_decoder_from = None
+        self.model = task.build_model(state["cfg"]["model"])
+        self.model.load_state_dict(state["model"], strict=True)
+        self.model.eval()
+        self.model.share_memory()
+
+        if self.gpu:
+            self.model.cuda()
+
+        # Set dictionary
+        self.dict = {}
+        self.dict["tgt"] = task.target_dictionary
+
+    def initialize_states(self, states):
+        self.feature_extractor.clear_cache()
+        states.units.source = TensorListEntry()
+        states.units.target = ListEntry()
+        states.incremental_states = dict()
+
+    def segment_to_units(self, segment, states):
+        # Convert speech samples to features
+        features = self.feature_extractor(segment)
+        if features is not None:
+            return [features]
+        else:
+            return []
+
+    def units_to_segment(self, units, states):
+        # Merge sub word to full word.
+        if self.model.decoder.dictionary.eos() == units[0]:
+            return DEFAULT_EOS
+
+        segment = []
+        if None in units.value:
+            units.value.remove(None)
+
+        for index in units:
+            if index is None:
+                units.pop()
+            token = self.model.decoder.dictionary.string([index])
+            if token.startswith(BOW_PREFIX):
+                if len(segment) == 0:
+                    segment += [token.replace(BOW_PREFIX, "")]
+                else:
+                    for j in range(len(segment)):
+                        units.pop()
+
+                    string_to_return = ["".join(segment)]
+
+                    if self.model.decoder.dictionary.eos() == units[0]:
+                        string_to_return += [DEFAULT_EOS]
+
+                    return string_to_return
+            else:
+                segment += [token.replace(BOW_PREFIX, "")]
+
+        if (
+            len(units) > 0
+            and self.model.decoder.dictionary.eos() == units[-1]
+            or len(states.units.target) > self.max_len
+        ):
+            tokens = [self.model.decoder.dictionary.string([unit]) for unit in units]
+            return ["".join(tokens).replace(BOW_PREFIX, "")] + [DEFAULT_EOS]
+
+        return None
+
+    def update_model_encoder(self, states):
+        if len(states.units.source) == 0:
+            return
+        src_indices = self.to_device(
+            states.units.source.value.unsqueeze(0)
+        )
+        src_lengths = self.to_device(
+            torch.LongTensor([states.units.source.value.size(0)])
+        )
+
+        states.encoder_states = self.model.encoder(src_indices, src_lengths)
+        torch.cuda.empty_cache()
+
+    def update_states_read(self, states):
+        # Happens after a read action.
+        self.update_model_encoder(states)
+
+    def policy(self, states):
+        if not getattr(states, "encoder_states", None):
+            return READ_ACTION
+
+        tgt_indices = self.to_device(
+            torch.LongTensor(
+                [self.model.decoder.dictionary.eos()]
+                + [x for x in states.units.target.value if x is not None]
+            ).unsqueeze(0)
+        )
+
+        states.incremental_states["steps"] = {
+            "src": states.encoder_states["encoder_out"][0].size(0),
+            "tgt": 1 + len(states.units.target),
+        }
+
+        states.incremental_states["online"] = {"only": torch.tensor(not states.finish_read())}
+
+        x, outputs = self.model.decoder.forward(
+            prev_output_tokens=tgt_indices,
+            encoder_out=states.encoder_states,
+            incremental_state=states.incremental_states,
+        )
+
+        states.decoder_out = x
+
+        states.decoder_out_extra = outputs
+
+        torch.cuda.empty_cache()
+
+        if outputs.action == 0:
+            return READ_ACTION
+        else:
+            return WRITE_ACTION
+
+    def predict(self, states):
+        decoder_states = states.decoder_out
+
+        lprobs = self.model.get_normalized_probs(
+            [decoder_states[:, -1:]], log_probs=True
+        )
+
+        index = lprobs.argmax(dim=-1)
+
+        index = index[0, 0].item()
+
+        if (
+            self.force_finish
+            and index == self.model.decoder.dictionary.eos()
+            and not states.finish_read()
+        ):
+            # If we want to force finish the translation
+            # (don't stop before finish reading), return a None
+            # self.model.decoder.clear_cache(states.incremental_states)
+            index = None
+
+        return index
diff --git a/fairseq/examples/stories/README.md b/fairseq/examples/stories/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..588941eddc5f0280f5254affd40ef49de874c885
--- /dev/null
+++ b/fairseq/examples/stories/README.md
@@ -0,0 +1,66 @@
+# Hierarchical Neural Story Generation (Fan et al., 2018)
+
+The following commands provide an example of pre-processing data, training a model, and generating text for story generation with the WritingPrompts dataset.
+
+## Pre-trained models
+
+Description | Dataset | Model | Test set(s)
+---|---|---|---
+Stories with Convolutional Model <br> ([Fan et al., 2018](https://arxiv.org/abs/1805.04833)) | [WritingPrompts](https://dl.fbaipublicfiles.com/fairseq/data/writingPrompts.tar.gz) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/stories_checkpoint.tar.bz2) | [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/stories_test.tar.bz2)
+
+We provide sample stories generated by the [convolutional seq2seq model](https://dl.fbaipublicfiles.com/fairseq/data/seq2seq_stories.txt) and [fusion model](https://dl.fbaipublicfiles.com/fairseq/data/fusion_stories.txt) from [Fan et al., 2018](https://arxiv.org/abs/1805.04833). The corresponding prompts for the fusion model can be found [here](https://dl.fbaipublicfiles.com/fairseq/data/fusion_prompts.txt). Note that there are unk in the file, as we modeled a small full vocabulary (no BPE or pre-training). We did not use these unk prompts for human evaluation.
+
+## Dataset
+
+The dataset can be downloaded like this:
+
+```bash
+cd examples/stories
+curl https://dl.fbaipublicfiles.com/fairseq/data/writingPrompts.tar.gz | tar xvzf -
+```
+
+and contains a train, test, and valid split. The dataset is described here: https://arxiv.org/abs/1805.04833. We model only the first 1000 words of each story, including one newLine token.
+
+## Example usage
+
+First we will preprocess the dataset. Note that the dataset release is the full data, but the paper models the first 1000 words of each story. Here is example code that trims the dataset to the first 1000 words of each story:
+```python
+data = ["train", "test", "valid"]
+for name in data:
+    with open(name + ".wp_target") as f:
+        stories = f.readlines()
+    stories = [" ".join(i.split()[0:1000]) for i in stories]
+    with open(name + ".wp_target", "w") as o:
+        for line in stories:
+            o.write(line.strip() + "\n")
+```
+
+Once we've trimmed the data we can binarize it and train our model:
+```bash
+# Binarize the dataset:
+export TEXT=examples/stories/writingPrompts
+fairseq-preprocess --source-lang wp_source --target-lang wp_target \
+    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
+    --destdir data-bin/writingPrompts --padding-factor 1 --thresholdtgt 10 --thresholdsrc 10
+
+# Train the model:
+fairseq-train data-bin/writingPrompts -a fconv_self_att_wp --lr 0.25 --optimizer nag --clip-norm 0.1 --max-tokens 1500 --lr-scheduler reduce_lr_on_plateau --decoder-attention True --encoder-attention False --criterion label_smoothed_cross_entropy --weight-decay .0000001 --label-smoothing 0 --source-lang wp_source --target-lang wp_target --gated-attention True --self-attention True --project-input True --pretrained False
+
+# Train a fusion model:
+# add the arguments: --pretrained True --pretrained-checkpoint path/to/checkpoint
+
+# Generate:
+# Note: to load the pretrained model at generation time, you need to pass in a model-override argument to communicate to the fusion model at generation time where you have placed the pretrained checkpoint. By default, it will load the exact path of the fusion model's pretrained model from training time. You should use model-override if you have moved the pretrained model (or are using our provided models). If you are generating from a non-fusion model, the model-override argument is not necessary.
+
+fairseq-generate data-bin/writingPrompts --path /path/to/trained/model/checkpoint_best.pt --batch-size 32 --beam 1 --sampling --sampling-topk 10 --temperature 0.8 --nbest 1 --model-overrides "{'pretrained_checkpoint':'/path/to/pretrained/model/checkpoint'}"
+```
+
+## Citation
+```bibtex
+@inproceedings{fan2018hierarchical,
+  title = {Hierarchical Neural Story Generation},
+  author = {Fan, Angela and Lewis, Mike and Dauphin, Yann},
+  booktitle = {Conference of the Association for Computational Linguistics (ACL)},
+  year = 2018,
+}
+```
diff --git a/fairseq/examples/textless_nlp/gslm/README.md b/fairseq/examples/textless_nlp/gslm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7a76ffd57c066c20af94aa3fca24c18e2ba4c3dd
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/README.md
@@ -0,0 +1,21 @@
+# Generative Spoken Language Modeling
+
+* [Paper](https://arxiv.org/abs/2102.01192)
+* [Demo](https://speechbot.github.io/gslm/index.html)
+
+We build and evaluate generative speech2speech systems using [Log Mel Filtebank](https://pytorch.org/audio/stable/compliance.kaldi.html#fbank), [Modified CPC](https://github.com/facebookresearch/CPC_audio), [HuBERT Base](https://github.com/pytorch/fairseq/tree/main/examples/hubert) and [Wav2Vec 2.0 Large](https://github.com/pytorch/fairseq/tree/main/examples/wav2vec). Our system is composed of three components, namely, *speech2unit*, *ulm* and *unit2speech*. We explain about models and usage of these components in their respective sub-directories. See the links below.
+
+## Speech to Unit Model (speech2unit)
+Speech to unit model is used for quantizing raw speech into learned discrete speech units. [More details](speech2unit)
+
+## Unit Language Model (ulm)
+Unit Language Model is a generative language model trained on discrete speech units. [More details](ulm)
+
+## Unit to Speech Model (unit2speech)
+Unit to speech model is used for synthesizing speech from discrete speech units. [More details](unit2speech)
+
+## Metrics
+We show how to compute ASR based metrics as well as zero-shot metrics proposed in our paper [here](metrics).
+
+## Tools
+We share two tools to resynthesize a given spoken utterance, and generate novel spoken language given a spoken prompt. [More detail](tools)
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/README.md b/fairseq/examples/textless_nlp/gslm/metrics/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0a63e2f0d844ce157f9502c82738aac2a0de3f0c
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/README.md
@@ -0,0 +1,10 @@
+# GSLM Metrics
+
+## ASR Metrics
+The suite of metrics here uses an ASR model to transcribe the synthesized speech into text, and then uses text-based metrics. We also use word error rate from ASR transcription itself as one of the metrics. [More details](asr_metrics)
+
+## ABX Metrics
+We use [ABX](https://www.semanticscholar.org/paper/ABX-Discriminability-Measures-and-Applications-Schatz/13d3537228f728c1063cc83743cb118bba3367a0) to evaluate how well-separated phonetic categories are with quantized representations. [More details](abx_metrics)
+
+## sWUGGY and sBLIMP
+We refer to [ZeroSpeech challenge](https://www.zerospeech.com/2021/track_s.html#scoring-based-metrics) for details on the sWUGGY and sBLIMP metrics.
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/README.md b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..aa2560f0453403fb5846c387848c78b037c79cb2
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/README.md
@@ -0,0 +1,77 @@
+# ABX-based evaluation
+
+ABX is used to evaluate the quality of the obtained discrete units.
+
+The life cycle of the ABX-based evaluation for the Speech-to-Unit contains the following steps:
+1. Training an acoustic model (or use an existing acoustic model) ([description](./../..))
+2. Perform quantization of speech by learning a K-means clustering model ([description](./../..))
+3. Compute discrete features for ABX computation using the learned clusters
+4. Compute the ABX score over the discrete features taking advantage of [libri-light's ABX evaluation script][ll-abx]
+
+Here we assume that you already went throught the first two steps and focus solely on extracting features and computing ABX scores.
+
+## Libri-light setup
+
+Follow [libri-light's instructions][ll-instructions] for installation and [ABX evaluation setup][ll-abx] (including the download of the data items required for ABX computation).
+
+## Computing ABX
+
+### Dumping quantized features
+
+The first step for the ABX computation is to dump the quantized representations corresponding to the test files.
+
+```shell
+TYPE="hubert"
+LAYER=6
+CKPT_PATH="<PATH_TO_HUBERT_MODEL_CHECKPOINT_FILE>"
+KM_MODEL_PATH="<PATH_TO_PRETRAINED_KM_MODEL_FILE>"
+
+SUBSET="dev-clean"
+MANIFEST="<PATH_TO_MANIFEST_FOR_LS_DEV-CLEAN>"
+DATA_DIR="<PATH_TO_DIR_TO_STORE_FEATURES>/$SUBSET"
+
+PYTHONPATH=. python examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py \
+    --feature_type $TYPE \
+    --kmeans_model_path $KM_MODEL_PATH \
+    --checkpoint_path $CKPT_PATH \
+    --layer $LAYER \
+    --manifest_path $MANIFEST \
+    --out_dir_path $DATA_DIR \
+    --extension ".flac"
+```
+
+Again the manifest file follows the same structure than elsewhere in the codebase.
+
+### Compute ABX with Libri-light
+
+Use libri-light's `eval_ABX.py` script (within the appropriate environment set up) as followed:
+
+```shell
+LIBRILIGHT_ROOT="<PATH_TO_LIBRILIGHT>"
+
+SUBSET="dev-clean"
+DATA_DIR="<PATH_TO_DIR_TO_STORE_FEATURES>/$SUBSET"
+ITEM_FILE_PATH="$LIBRILIGHT_ROOT/eval/ABX_data/$SUBSET.item"
+OUT_DIR="<PATH_TO_DIR_TO_STORE_ABX_SCORES>/$SUBSET"
+
+FILE_EXTENSION=".npy"
+FEATURE_SIZE=0.02 # depends on the model used
+
+PYTHONPATH=$LIBRILIGHT_ROOT \
+  python $LIBRILIGHT_ROOT/eval/eval_ABX.py \
+    $DATA_DIR \
+    $ITEM_FILE_PATH \
+    --file_extension $FILE_EXTENSION \
+    --feature_size $FEATURE_SIZE \
+    --out $OUT_DIR \
+    --mode "all"
+```
+
+Note that `FEATURE_SIZE` will depend on the model type you are using to extract the acoustic features:
+* For HuBERT and Wav2Vec2.0, use `FEATURE_SIZE=0.02`
+* For CPC and Log Mel, use `FEATURE_SIZE=0.01`
+
+If you have a gpu available, make sure you add the `--cuda` flag for faster computation.
+
+[ll-instructions]: https://github.com/facebookresearch/libri-light
+[ll-abx]: https://github.com/facebookresearch/libri-light/tree/master/eval#abx
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py
new file mode 100644
index 0000000000000000000000000000000000000000..41cf558970608fa5a9241e91e59ba214b609dc73
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/abx_metrics/dump_abx_feats.py
@@ -0,0 +1,107 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+
+import joblib
+import numpy as np
+
+from examples.textless_nlp.gslm.speech2unit.clustering.utils import get_audio_files
+from examples.textless_nlp.gslm.speech2unit.pretrained.utils import get_features
+
+def get_logger():
+    log_format = "[%(asctime)s] [%(levelname)s]: %(message)s"
+    logging.basicConfig(format=log_format, level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Quantize using K-means clustering over acoustic features."
+    )
+    parser.add_argument(
+        "--feature_type",
+        type=str,
+        choices=["logmel", "hubert", "w2v2", "cpc"],
+        default=None,
+        required=True,
+        help="Acoustic feature type",
+    )
+    parser.add_argument(
+        "--kmeans_model_path",
+        type=str,
+        required=True,
+        help="K-means model file path to use for inference",
+    )
+    parser.add_argument(
+        "--manifest_path",
+        type=str,
+        default=None,
+        help="Manifest file containing the root dir and file names",
+    )
+    parser.add_argument(
+        "--checkpoint_path",
+        type=str,
+        help="Pretrained model checkpoint",
+    )
+    parser.add_argument(
+        "--layer",
+        type=int,
+        help="The layer of the pretrained model to extract features from",
+        default=-1,
+    )
+    parser.add_argument(
+        "--out_dir_path",
+        required=True,
+        type=str,
+        help="File path of quantized output.",
+    )
+    parser.add_argument(
+        "--extension", type=str, default=".flac", help="Features file path"
+    )
+    return parser
+
+
+def one_hot(feat, n_clusters):
+    return np.eye(n_clusters)[feat]
+
+def main(args, logger):
+    # Feature extraction
+    logger.info(f"Extracting {args.feature_type} acoustic features...")
+    features_batch = get_features(
+        feature_type=args.feature_type,
+        checkpoint_path=args.checkpoint_path,
+        layer=args.layer,
+        manifest_path=args.manifest_path,
+        sample_pct=1.0,
+        flatten=False,
+    )
+    logger.info(f"Features extracted for {len(features_batch)} utterances.\n")
+    logger.info(f"Dimensionality of representation = {features_batch[0].shape[1]}")
+
+    logger.info(f"Loading K-means model from {args.kmeans_model_path} ...")
+    kmeans_model = joblib.load(open(args.kmeans_model_path, "rb"))
+    kmeans_model.verbose = False
+
+    _, fnames, _ = get_audio_files(args.manifest_path)
+
+    os.makedirs(args.out_dir_path, exist_ok=True)
+    logger.info(f"Writing quantized features to {args.out_dir_path}")
+    for i, feats in enumerate(features_batch):
+        pred = kmeans_model.predict(feats)
+        emb = one_hot(pred, kmeans_model.n_clusters)
+        base_fname = os.path.basename(fnames[i]).rstrip(args.extension)
+        output_path = os.path.join(args.out_dir_path, f"{base_fname}.npy")
+        with open(output_path, "wb") as f:
+            np.save(f, emb)
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    logger = get_logger()
+    logger.info(args)
+    main(args, logger)
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/README.md b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..90741f42b0b070f2a91b63c8badb817c6aa24230
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/README.md
@@ -0,0 +1,87 @@
+# ASR-based evaluation
+
+Overall, the life cycle of the ASR-based evaluation for an ULM contains the following steps:
+ 1. Training an ULM and sampling from it [[description]](./../../ulm)
+ 2. Running UTS on the sampled unit sequences [[description]](./../../unit2speech)
+ 3. Pre-processing for the ASR (down-sampling to 16 KHz, aligning length of the generated audio with ground-truth utterances)
+ 4. Running ASR
+ 5. Calculation of the post-ASR evaluation metrics
+
+Here we assume that you have already went throught the first two steps and focus on the rest.
+
+## Preprocessing
+### Down-sampling to 16KHz
+The bulk conversion can be done by running
+```bash
+ python $FAIRSEQ_ROOT/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py $UTS_OUTPUT $UTS_OUTPUT_DOWNSAMPLE
+ ```
+ where `$UTS_OUTPUT` specifies the directory with the generated audio and `$UTS_OUTPUT_DOWNSAMPLE` is the directory where downsampled audio would be saved.
+
+ ### Matching by length
+This step is somewhat optional. However, if you want to compare the fluency and diversity of a generated speech utterance to that of the ground-truth speech with the same prefix, it is a good idea to force them to be of the same length.
+```bash
+python $FAIRSEQ_ROOT/examples/textless_nlp/asr_metrics/cut_as.py \
+    --samples_dir=$UTS_OUTPUT_DOWNSAMPLE --out_dir=$UTS_OUTPUT_DOWNSAMPLE_CUT \
+    --prompts_description=data/ground_truth_continuation_dev.json
+```
+
+Here `ground_truth_continuation_dev.json` is a json file with ground-truth text from LibriSpeech dev-clean, associated with some meta-data (assuming the evaluation is done on dev-clean). This file can be downloaded [[here]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/ground_truth_continuation_dev.json). A similar file for the test-clean is [[here]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/ground_truth_continuation_test.json). These files are used for the evaluation and contain texts for audio sequences that are at least 6s long.
+
+## Running ASR
+We use a pre-trained wav2vec model to run the ASR step. We firstly need to prepare manifest files which, roughly, tell the ASR system which files we want to transcribe. You can find more details and download the `960h_scratch.pt` checkpoint
+[[here]](https://github.com/pytorch/fairseq/blob/main/examples/wav2vec/README.md)). To run ASR, you would also need to
+install KenLM, Flashlight decoder, and download the KenLM 4-gram English language model.
+
+```bash
+ python $FAIRSEQ_ROOT/examples/wav2vec/wav2vec_manifest.py  \
+    $UTS_OUTPUT_DOWNSAMPLE_CUT --valid-percent 0.0  --dest $MANIFEST_DIR --ext wav
+```
+where `$UTS_OUTPUT_DOWNSAMPLE_CUT` speficies the directory with the preprocessed UTS outputs and `$MANIFEST_DIR` is the output directory.
+
+We will be running an out-of-the-box evaluation script which requires ground-truth transcripts to measure quality metrics. We are only
+interested in the transcripts (and we don't have ground-truth outputs for when our ULM generated!), hence we will just generate
+some dummy transcripts instead:
+```bash
+cp $FAIRSEQ_ROOT/examples/textless_nlp/gslm/asr_metrics/misc/dict.ltr.txt $MANIFEST_DIR
+python $FAIRSEQ_ROOT/examples/textless_nlp/gslm/asr_metrics/misc/dummy_asr_data.py  --tsv=$MANIFEST_DIR/train.tsv \
+ --output-dir=$MANIFEST_DIR
+```
+
+Now we are ready for running ASR:
+```
+mkdir -p asr
+python $FAIRSEQ_ROOT/examples/speech_recognition/infer.py  \
+    $MANIFEST_DIR \
+    --task audio_pretraining --nbest 1 --path 960h_scratch.pt \
+    --gen-subset=train --results-path $PATH_TO_ASR_OUTPUT \
+    --w2l-decoder kenlm --lm-model 4-gram.bin \
+    --lexicon librispeech/lexicon_ltr.lst --word-score -1 \
+    --sil-weight 0 --lm-weight 2 --criterion ctc --labels ltr --max-tokens 300000 --remove-bpe letter
+```
+where `lexicon_ltr.lst` is the LibriSpeech lexicon and `$PATH_TO_ASR_OUTPUT` is the output directory (can be downloaded [[here]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/lexicon_ltr.lst)).
+
+## Evaluation metrics
+We run evaluation on the 1_000 shortest sequences that are at least 6s long. To filter those from the ASR transcript, we additionally provide each metric script with the paths to the manifest and `ground_truth_continuation_*` files.
+
+### Perplexity (PPX)
+To get a PPX metric estimate on an ASR transcript, you need to run the following command:
+```bash
+python ppx.py $PATH_TO_ASR_OUTPUT/hypo.word-960h_scratch.pt-train.txt --cut-tail\
+  --manifest=$MANIFEST_DIR/train.tsv --prompts-description=data/ground_truth_continuation_dev.json
+```
+where `--cut-tail` tells the script to ignore the last token on each line (ASR puts the sequence ID there).
+
+### Self- and Auto-BLEU
+```bash
+python self_bleu.py $PATH_TO_ASR_OUTPUT/hypo.word-960h_scratch.pt-train.txt  --cut-tail \
+  --manifest=$MANIFEST_DIR/train.tsv --prompts-description=data/ground_truth_continuation_dev.json
+```
+
+### Continuation-BLEU
+```bash
+python continuation_eval.py --asr-transcript $PATH_TO_ASR_OUTPUT/hypo.word-960h_scratch.pt-train.txt \
+   --manifest=$MANIFEST_DIR/train.tsv --prompts-description=data/ground_truth_continuation_dev.json
+```
+
+### AUC
+Based on the metrics calculated above, we can estimate the AUC of the perplexity/diversity trade-off. We provide an illustration in a [Colab notebook](https://colab.research.google.com/drive/1pVPfOVax_PU3MkYdHRSsa-SI8GBUldNt?usp=sharing).
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/continuation_eval.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/continuation_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..72b92a341dcd1b82035af72b8a6b4edc65783ecc
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/continuation_eval.py
@@ -0,0 +1,99 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from collections import defaultdict
+import numpy as np
+from misc.bleu_utils import sentence_bleu
+import json
+import warnings
+
+
+def get_args():
+    import argparse
+
+    parser = argparse.ArgumentParser("Tool to calculate Continuation-BLEU2")
+    parser.add_argument('--asr-transcript', type=str,
+                        help='Path to the transcript file.')
+    parser.add_argument('--prompts-description', type=str,
+                        help='Path to the ground-truth continuation')
+    parser.add_argument('--manifest', type=str, required=True)
+    parser.add_argument('--take-shortest', type=int, default=1000)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    # NLTK produces warnings
+    warnings.filterwarnings("ignore")
+
+    args = get_args()
+
+    with open(args.prompts_description, 'r') as fin:
+        original_continuations = json.loads(fin.read())
+
+    sequence2length = [(k, v[0]) for k, v in original_continuations.items()]
+    assert all(float(v) >= 6.0 for (_, v) in sequence2length)  # 6 seconds
+
+    sequence2length.sort(key=lambda x: x[1])
+    to_take = set(v[0] for v in sequence2length[:args.take_shortest])
+
+    with open(args.manifest, 'r') as fin:
+        fin.readline()
+
+        linenum2file = dict([
+            (i, l.split("__")[0]) for (i, l) in enumerate(fin)
+        ])
+
+    max_files = max(linenum2file.keys())
+    continuations = defaultdict(list)
+
+    mean_length_after = 0
+    n_examples = 0
+
+    with open(args.asr_transcript, 'r') as fin:
+        for line in fin:
+            n_examples += 1
+            line = line.split()
+            sequence_id = int(line[-1].split('-')[1][:-1])
+
+            assert sequence_id <= max_files
+
+            sequence_name = linenum2file[sequence_id]
+
+            continuations[sequence_name].append(line[:-1])
+            mean_length_after += len(line)
+
+    mean_length_after /= n_examples
+    print(f'Mean length of continuations, in words: {mean_length_after}')
+    metric_values = []
+
+    mean_ground_truth_words = 0
+    n_examples = 0
+    n_candidates = 0
+
+    for k, candidates in continuations.items():
+        if k not in to_take:
+            continue
+
+        n_examples += 1
+
+        ground_truth = original_continuations[k][1].split()
+        n_candidates += len(candidates)
+        bleu = sentence_bleu(candidates, ground_truth, weights=(
+            0.5, 0.5), no_length_penalty=True, averaging_mode="geometric")
+        mean_ground_truth_words += len(ground_truth)
+
+        metric_values.append(bleu)
+
+    n = len(metric_values)
+    print(
+        f'Median BLEU over {n} examples: {np.median(metric_values)} +- {np.std(metric_values) / np.sqrt(n)}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/bleu_utils.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/bleu_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..75cc5272d367c4f3be98d698b512a529bdb2e4f5
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/bleu_utils.py
@@ -0,0 +1,166 @@
+"""
+
+TODO: the code is take from Apache-2 Licensed NLTK: make sure we do this properly!
+
+
+Copied over from nltk.tranlate.bleu_score. This code has two major changes:
+ - allows to turn off length/brevity penalty --- it has no sense for self-bleu,
+ - allows to use arithmetic instead of geometric mean
+"""
+
+import math
+import sys
+from fractions import Fraction
+import warnings
+from collections import Counter
+from nltk.translate.bleu_score import modified_precision, closest_ref_length, brevity_penalty, SmoothingFunction
+
+
+def corpus_bleu(
+    list_of_references,
+    hypotheses,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+    averaging_mode="geometric",
+    no_length_penalty=False
+):
+    """
+    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
+    the hypotheses and their respective references.
+
+    Instead of averaging the sentence level BLEU scores (i.e. marco-average
+    precision), the original BLEU metric (Papineni et al. 2002) accounts for
+    the micro-average precision (i.e. summing the numerators and denominators
+    for each hypothesis-reference(s) pairs before the division).
+
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+    ...          'because', 'he', 'read', 'the', 'book']
+
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
+    0.5920...
+
+    The example below show that corpus_bleu() is different from averaging
+    sentence_bleu() for hypotheses
+
+    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
+    >>> score2 = sentence_bleu([ref2a], hyp2)
+    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
+    0.6223...
+
+    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type list_of_references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param weights: weights for unigrams, bigrams, trigrams and so on
+    :type weights: list(float)
+    :param smoothing_function:
+    :type smoothing_function: SmoothingFunction
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
+    :type auto_reweigh: bool
+    :return: The corpus-level BLEU score.
+    :rtype: float
+    """
+    # Before proceeding to compute BLEU, perform sanity checks.
+
+    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
+    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
+    hyp_lengths, ref_lengths = 0, 0
+
+    assert len(list_of_references) == len(hypotheses), (
+        "The number of hypotheses and their reference(s) should be the " "same "
+    )
+
+    # Iterate through each hypothesis and their corresponding references.
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        # For each order of ngram, calculate the numerator and
+        # denominator for the corpus-level modified precision.
+        for i, _ in enumerate(weights, start=1):
+            p_i = modified_precision(references, hypothesis, i)
+            p_numerators[i] += p_i.numerator
+            p_denominators[i] += p_i.denominator
+
+        # Calculate the hypothesis length and the closest reference length.
+        # Adds them to the corpus-level hypothesis and reference counts.
+        hyp_len = len(hypothesis)
+        hyp_lengths += hyp_len
+        ref_lengths += closest_ref_length(references, hyp_len)
+
+    # Calculate corpus-level brevity penalty.
+    if no_length_penalty and averaging_mode == 'geometric':
+        bp = 1.0
+    elif no_length_penalty and averaging_mode == 'arithmetic':
+        bp = 0.0
+    else:
+        assert not no_length_penalty
+        assert averaging_mode != 'arithmetic', 'Not sure how to apply length penalty when aurithmetic mode'
+        bp = brevity_penalty(ref_lengths, hyp_lengths)
+
+    # Uniformly re-weighting based on maximum hypothesis lengths if largest
+    # order of n-grams < 4 and weights is set at default.
+    if auto_reweigh:
+        if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
+            weights = (1 / hyp_lengths,) * hyp_lengths
+
+    # Collects the various precision values for the different ngram orders.
+    p_n = [
+        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
+        for i, _ in enumerate(weights, start=1)
+    ]
+
+    # Returns 0 if there's no matching n-grams
+    # We only need to check for p_numerators[1] == 0, since if there's
+    # no unigrams, there won't be any higher order ngrams.
+    if p_numerators[1] == 0:
+        return 0
+
+    # If there's no smoothing, set use method0 from SmoothinFunction class.
+    if not smoothing_function:
+        smoothing_function = SmoothingFunction().method0
+    # Smoothen the modified precision.
+    # Note: smoothing_function() may convert values into floats;
+    #       it tries to retain the Fraction object as much as the
+    #       smoothing method allows.
+    p_n = smoothing_function(
+        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
+    )
+
+    if averaging_mode == "geometric":
+        s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n))
+        s = bp * math.exp(math.fsum(s))
+    elif averaging_mode == "arithmetic":
+        s = (w_i * p_i for w_i, p_i in zip(weights, p_n))
+        s = math.fsum(s)
+
+    return s
+
+
+def sentence_bleu(
+    references,
+    hypothesis,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+    averaging_mode="geometric",
+    no_length_penalty=False
+):
+    return corpus_bleu(
+        [references], [hypothesis], weights, smoothing_function, auto_reweigh, averaging_mode, no_length_penalty
+    )
\ No newline at end of file
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/cut_as.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/cut_as.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b7e1e968564b84c47049c5cc69c9d6b8fafe0e9
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/cut_as.py
@@ -0,0 +1,69 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torchaudio
+import argparse
+import json
+import pathlib
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        "Assuring generated audio have the same length as ground-truth audio")
+    parser.add_argument('--samples_dir', required=True, type=str)
+    parser.add_argument('--out_dir', required=True, type=str)
+    parser.add_argument('--prompts_description', required=True, type=str)
+    return parser.parse_args()
+
+
+def cut(src, tgt, l):
+    x, sr = torchaudio.load(str(src))
+    assert sr == 16_000
+
+    x = x.squeeze()
+    target_frames = int(l * sr)
+
+    flag = 0
+    if target_frames <= x.size(0):
+        x = x[:target_frames]
+        flag = 1
+    else:
+        flag = 0
+    torchaudio.save(str(tgt), x.unsqueeze(0), sr)
+    return flag
+
+
+def main():
+    args = get_args()
+    tgt_dir = pathlib.Path(args.out_dir)
+    tgt_dir.mkdir(exist_ok=True, parents=True)
+
+    total_files, sufficiently_long = 0, 0
+
+    with open(args.prompts_description, 'r') as f:
+        description = json.loads(f.read())
+
+    for src_f in pathlib.Path(args.samples_dir).glob('*.wav'):
+        name_prompt = src_f.with_suffix('').name.split('__')[0]
+
+        assert name_prompt in description, f'Cannot find {name_prompt}!'
+
+        target_length = description[name_prompt][0]
+        tgt_f = tgt_dir / (src_f.name)
+
+        is_long_enough = cut(src_f, tgt_f, target_length)
+        sufficiently_long += is_long_enough
+        if not is_long_enough:
+            print(f'{src_f} is not long enough')
+
+        total_files += 1
+
+    print(
+        f'Total files: {total_files}; sufficiently long: {sufficiently_long}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/dict.ltr.txt b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/dict.ltr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69929e1666c8182148d83ef4332e4c677bb90e5a
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/misc/dict.ltr.txt
@@ -0,0 +1,28 @@
+| 94802
+E 51860
+T 38431
+A 33152
+O 31495
+N 28855
+I 28794
+H 27187
+S 26071
+R 23546
+D 18289
+L 16308
+U 12400
+M 10685
+W 10317
+C 9844
+F 9062
+G 8924
+Y 8226
+P 6890
+B 6339
+V 3936
+K 3456
+' 1023
+X 636
+J 598
+Q 437
+Z 213
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/ppx.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/ppx.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6a40e4d359bdcae6d64f53ba06d8a533aec01ac
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/ppx.py
@@ -0,0 +1,122 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import numpy as np
+import warnings
+
+
+def get_target_sequences(manifest, ground_truth, to_take=1000):
+    import json
+    import pathlib
+
+    with open(ground_truth, 'r') as fin:
+        original_continuations = json.loads(fin.read())
+
+    sequence2length = [(k, v[0]) for k, v in original_continuations.items()]
+    assert all(float(v) >= 6.0 for (_, v) in sequence2length)  # 6 seconds
+
+    sequence2length.sort(key=lambda x: x[1])
+    to_take_sequences = set(v[0] for v in sequence2length[:to_take])
+    to_take_ids = []
+
+    with open(manifest, 'r') as f:
+        f.readline()
+
+        for i, line in enumerate(f.readlines()):
+            seq_id = line.split()[0]
+            seq_id = pathlib.Path(seq_id).name.split('__')[0]
+
+            if seq_id in to_take_sequences:
+                to_take_ids.append(i)
+
+    print(f'Took {len(to_take_ids)} ids')
+    return set(to_take_ids)
+
+
+def get_args():
+    import argparse
+
+    parser = argparse.ArgumentParser("Evaluate PPX metric of a transcript.")
+    parser.add_argument('--asr-transcript', type=str,
+                        help='Path to the transcript file.')
+    parser.add_argument('--cut-id', action='store_true',
+                        help='Whether cut the first token (typically a seq id)')
+    parser.add_argument('--cut-tail', action='store_true',
+                        help='Whether cut the last token (typically a speaker id)')
+
+    parser.add_argument('--manifest', type=str, default=None)
+    parser.add_argument('--prompts-description', type=str, default=None)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = get_args()
+
+    lm = torch.hub.load(
+        'pytorch/fairseq', 'transformer_lm.wmt19.en', tokenizer='moses', bpe='fastbpe')
+
+    lm.eval().cuda()  # disable dropout
+
+    if args.manifest is None and args.prompts_description is None:
+        target_ids = None
+    else:
+        target_ids = get_target_sequences(
+            args.manifest, args.prompts_description)
+
+    with open(args.asr_transcript, 'r') as fin:
+        lines = fin.readlines()
+
+    if target_ids is not None:
+        filtered = []
+        for line in lines:
+            line_id = line.split()[-1]
+            line_id = int(line_id.split('-')[1][:-1])
+            if line_id in target_ids:
+                filtered.append(line)
+        lines = filtered
+    else:
+        pass
+
+    if args.cut_id:
+        lines = [' '.join(x.split()[1:]) for x in lines]
+    if args.cut_tail:
+        lines = [' '.join(x.split()[:-1]) for x in lines]
+    lines = [x.strip().lower() for x in lines]
+
+    def get_logprob(sent): return \
+        lm.score(sent)['positional_scores'].mean().neg().item()
+
+    logprobs = [get_logprob(l) for l in lines]
+
+    filtered = [x for x in logprobs if not np.isnan(x)]
+    if len(filtered) != len(logprobs):
+        warnings.warn("NaNs detected!")
+        logprobs = filtered
+
+    perplexities = [np.exp(l) for l in logprobs]
+
+    for name, stats in [('logprob', logprobs), ('perplexity', perplexities)]:
+        mean = np.mean(stats)
+        sem = np.std(stats) / np.sqrt(len(stats))
+
+        median = np.median(stats)
+        interval = list(np.percentile(stats, [10, 90]))
+
+        mean, sem, median, percentile10, percentile90 = [
+            round(x, 2) for x in [mean, sem, median] + interval]
+
+        print(name)
+        print(f"\tMean {mean} +- {sem}")
+        print(
+            f"\tMedian {median}, 90% confidence interval {percentile10}...{percentile90}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/self_auto_bleu.py b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/self_auto_bleu.py
new file mode 100644
index 0000000000000000000000000000000000000000..062bb82f669f63a537b6ee8df4d42d292eb2575e
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/metrics/asr_metrics/self_auto_bleu.py
@@ -0,0 +1,201 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import nltk
+from misc.bleu_utils import sentence_bleu
+import warnings
+
+
+def get_target_sequences(manifest, ground_truth, to_take=1000):
+    import json
+    import pathlib
+
+    with open(ground_truth, 'r') as fin:
+        original_continuations = json.loads(fin.read())
+
+    sequence2length = [(k, v[0]) for k, v in original_continuations.items()]
+    assert all(float(v) >= 6.0 for (_, v) in sequence2length)  # 6 seconds
+
+    sequence2length.sort(key=lambda x: x[1])
+    to_take_sequences = set(v[0] for v in sequence2length[:to_take])
+    to_take_ids = []
+
+    with open(manifest, 'r') as f:
+        f.readline()
+
+        for i, line in enumerate(f.readlines()):
+            seq_id = line.split()[0]
+            seq_id = pathlib.Path(seq_id).name.split('__')[0]
+
+            if seq_id in to_take_sequences:
+                to_take_ids.append(i)
+
+    print(f'Took {len(to_take_ids)} ids')
+    return set(to_take_ids)
+
+
+def get_args():
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--asr-transcript', type=str,
+                        help='Path to the transcript file.')
+
+    parser.add_argument('--manifest', required=True)
+    parser.add_argument('--prompts-description', required=True)
+
+    parser.add_argument('--cut-id', action='store_true',
+                        help='Whether cut the first token (typically a seq id)')
+    parser.add_argument('--cut-tail', action='store_true',
+                        help='Whether cut the last token (typically a speaker id)')
+    parser.add_argument('--debug', action='store_true')
+
+    args = parser.parse_args()
+
+    return args
+
+
+def get_self_bleu(utterances, averaging_mode, weights):
+    self_bleu = []
+
+    for i in range(len(utterances)):
+        hypo = utterances[i]
+        rest = utterances[:i] + utterances[i+1:]
+
+        self_bleu.append(sentence_bleu(rest, hypo, weights,
+                         no_length_penalty=True, averaging_mode=averaging_mode))
+
+    return self_bleu
+
+
+def get_self_bleu2_arithmetic(utterances):
+    weights = (0.5, 0.5)  # equal weight for unigrams and bigrams
+    return get_self_bleu(utterances, averaging_mode='arithmetic', weights=weights)
+
+
+def get_self_bleu2_geometric(utterances):
+    weights = (0.5, 0.5)
+    return get_self_bleu(utterances, averaging_mode='geometric', weights=weights)
+
+
+def get_auto_bleu2_arithmetic(utterances):
+    weights = (0.5, 0.5)
+    return [auto_bleu(u, mean_mode='arithmetic', weights=weights) for u in utterances]
+
+
+def get_auto_bleu2_geometric(utterances):
+    weights = (0.5, 0.5)
+    return [auto_bleu(u, mean_mode='geometric', weights=weights) for u in utterances]
+
+
+def get_auto_bleu3_geometric(utterances):
+    weights = (1./3, 1./3, 1./3)
+    return [auto_bleu(u, mean_mode='geometric', weights=weights) for u in utterances]
+
+
+def get_auto_bleu3_arithmetic(utterances):
+    weights = (1./3, 1./3, 1./3)
+    return [auto_bleu(u, mean_mode='arithmetic', weights=weights) for u in utterances]
+
+
+def get_self_bleu3_arithmetic(utterances):
+    weights = (1./3, 1./3, 1./3)
+    return get_self_bleu(utterances, averaging_mode='arithmetic', weights=weights)
+
+
+def get_self_bleu3_geometric(utterances):
+    weights = (1./3, 1./3, 1./3)
+    return get_self_bleu(utterances, averaging_mode='geometric', weights=weights)
+
+
+def auto_bleu(sentence, weights, mean_mode='arithmetic'):
+    if len(sentence) <= 1:
+        return 0
+
+    N = len(weights)
+
+    bleu_n = np.zeros([N])
+    for n in range(N):
+        targ_ngrams = list(nltk.ngrams(sentence, n+1))
+        for p in range(len(targ_ngrams)):
+            left = sentence[:p]
+            right = sentence[(p+n+1):]
+            rest_ngrams = list(nltk.ngrams(left, n+1)) + \
+                list(nltk.ngrams(right, n+1))
+            # compute the nb of matching ngrams
+            bleu_n[n] += targ_ngrams[p] in rest_ngrams
+        bleu_n[n] /= len(targ_ngrams)  # average them to get a proportion
+
+    weights = np.array(weights)
+    if mean_mode == 'arithmetic':
+        return (bleu_n * weights).sum()
+    elif mean_mode == 'geometric':
+        return (bleu_n ** weights).prod()
+    else:
+        raise ValueError(f'Unknown agggregation mode {mean_mode}')
+
+
+def main():
+    from multiprocessing import Pool
+
+    args = get_args()
+    target_ids = get_target_sequences(args.manifest, args.prompts_description)
+
+    with open(args.asr_transcript, 'r') as fin:
+        lines = fin.readlines()
+
+    terms = [x.strip().split() for x in lines]
+    filtered = []
+    for term in terms:
+        line_id = int(term[-1].split('-')[1][:-1])
+        if line_id in target_ids:
+            filtered.append(term)
+    terms = filtered
+
+    if args.cut_id:
+        terms = [x[1:] for x in terms]
+    if args.cut_tail:
+        terms = [x[:-1] for x in terms]
+
+    if args.debug:
+        terms = terms[:10]
+
+    tasks = [
+        ('Self-BLEU2-arithmetic', get_self_bleu2_arithmetic),
+        ('Self-BLEU2-geometric', get_self_bleu2_geometric),
+        ('Auto-BLEU2-arithmetic', get_auto_bleu2_arithmetic),
+        ('Auto-BLEU2-geometric', get_auto_bleu2_geometric),
+
+        ('Self-BLEU3-arithmetic', get_self_bleu3_arithmetic),
+        ('Self-BLEU3-geometric', get_self_bleu3_geometric),
+        ('Auto-BLEU3-arithmetic', get_auto_bleu3_arithmetic),
+        ('Auto-BLEU3-geometric', get_auto_bleu3_geometric),
+    ]
+
+    n_processes = min(16, len(tasks))
+    with Pool(n_processes) as pool:
+        metrics = pool.map(run_f, [(t[1], terms) for t in tasks])
+
+    for (metric_name, _), metric in zip(tasks, metrics):
+        metric, sem = np.mean(metric), np.std(metric) / np.sqrt(len(metric))
+
+        metric, sem = [
+            round(100 * x, 2) for x in [metric, sem]
+        ]
+
+        print(f'{metric_name} {metric} +- {sem}')
+
+
+def run_f(task_params):
+    f, terms = task_params
+    return f(terms)
+
+
+if __name__ == '__main__':
+    # NLTK produces warnings
+    warnings.filterwarnings("ignore")
+
+    main()
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/README.md b/fairseq/examples/textless_nlp/gslm/speech2unit/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1a3d131ec165f12e37906420fc2c284a7223bda2
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/README.md
@@ -0,0 +1,71 @@
+# Speech to Unit Model (speech2unit)
+
+## Acoustic Model
+For quantizing speech we learn a K-means clustering over acoustic representations for which we either use Log-Mel Filterbank or pretrained acoustic representation models. For using pretrained models, please download from their respective locations linked below.
+* [Modified CPC](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/cpc_big_ll6kh_top_ctc.pt)
+* [HuBERT-Base](https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt)
+* [Wav2Vec 2.0-Base](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_new.pt)
+
+## Quantization Model
+You can download pretrained quantized model from the list below.
+
+K-Means Model | Download Link
+|-|-
+Log Mel Filterbank + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km50/km.bin)
+Log Mel Filterbank + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km100/km.bin)
+Log Mel Filterbank + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km200/km.bin)
+Log Mel Filterbank + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/km500/km.bin)
+Modified CPC + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km50/km.bin)
+Modified CPC + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km100/km.bin)
+Modified CPC + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km200/km.bin)
+Modified CPC + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/km500/km.bin)
+HuBERT Base + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km50/km.bin)
+HuBERT Base + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km100/km.bin)
+HuBERT Base + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km200/km.bin)
+HuBERT Base + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/km500/km.bin)
+wav2vec 2.0 Large + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km50/km.bin)
+wav2vec 2.0 Large + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km100/km.bin)
+wav2vec 2.0 Large + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km200/km.bin)
+wav2vec 2.0 Large + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/km500/km.bin)
+
+### Quantization
+For quantizing speech with a given acoustic representation, please follow the steps below.
+1. Learn K-means clustering model
+```
+N_CLUSTERS=<number_of_clusters_used_for_kmeans>
+TYPE=<one_of_logmel/cpc/hubert/w2v2>
+CKPT_PATH=<path_of_pretrained_acoustic_model>
+LAYER=<layer_of_acoustic_model_to_extract_features_from>
+MANIFEST=<tab_separated_manifest_of_audio_files_for_training_kmeans>
+KM_MODEL_PATH=<output_path_of_the_kmeans_model>
+
+PYTHONPATH=. python examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py \
+    --num_clusters $N_CLUSTERS \
+    --feature_type $TYPE \
+    --checkpoint_path $CKPT_PATH \
+    --layer $LAYER \
+    --manifest_path $MANIFEST \
+    --out_kmeans_model_path $KM_MODEL_PATH
+```
+2. Quantize using the learned clusters
+```
+MANIFEST=<tab_separated_manifest_of_audio_files_to_quantize>
+OUT_QUANTIZED_FILE=<output_quantized_audio_file_path>
+
+python examples/textless_nlp/gslm/speech2unit/clustering/del/quantize_with_kmeans.py \
+    --feature_type $TYPE \
+    --kmeans_model_path $KM_MODEL_PATH \
+    --checkpoint_path $CKPT_PATH \
+    --layer $LAYER \
+    --manifest_path $MANIFEST \
+    --out_quantized_file_path $OUT_QUANTIZED_FILE \
+    --extension ".flac"
+```
+
+Note about the manifest file is a file with paths and length of input audio files. The format of the file is as follows:
+```
+<path_of_root_directory_containing_audio_files>
+<relative_path_of_audio_file_1>\t<number_of_frames_1>
+<relative_path_of_audio_file_2>\t<number_of_frames_1>
+...
+```
\ No newline at end of file
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/__init__.py b/fairseq/examples/textless_nlp/gslm/speech2unit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/__init__.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cf844a95a075ee9ad318dc11dd71537d1ef6a5b
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/cluster_kmeans.py
@@ -0,0 +1,212 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+import time
+
+import numpy as np
+from sklearn.cluster import MiniBatchKMeans
+
+import joblib
+from examples.textless_nlp.gslm.speech2unit.pretrained.utils import (
+    get_and_dump_features,
+    get_features,
+)
+
+
+def get_logger():
+    log_format = "[%(asctime)s] [%(levelname)s]: %(message)s"
+    logging.basicConfig(format=log_format, level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Learn K-means clustering over acoustic features."
+    )
+
+    # Features arguments
+    parser.add_argument(
+        "--in_features_path", type=str, default=None, help="Features file path"
+    )
+    parser.add_argument(
+        "--feature_type",
+        type=str,
+        choices=["logmel", "hubert", "w2v2", "cpc"],
+        default=None,
+        help="Acoustic feature type",
+    )
+    parser.add_argument(
+        "--manifest_path",
+        type=str,
+        default=None,
+        help="Manifest file containing the root dir and file names",
+    )
+    parser.add_argument(
+        "--out_features_path",
+        type=str,
+        default=None,
+        help="Features file path to write to",
+    )
+    parser.add_argument(
+        "--checkpoint_path",
+        type=str,
+        help="Pretrained acoustic model checkpoint",
+    )
+    parser.add_argument(
+        "--layer",
+        type=int,
+        help="The layer of the pretrained model to extract features from",
+        default=-1,
+    )
+    parser.add_argument(
+        "--sample_pct",
+        type=float,
+        help="Percent data to use for K-means training",
+        default=0.1,
+    )
+
+    # K-means arguments
+    parser.add_argument(
+        "--num_clusters", type=int, help="Nubmer of clusters", default=50
+    )
+    parser.add_argument("--init", default="k-means++")
+    parser.add_argument(
+        "--max_iter",
+        type=int,
+        help="Maximum number of iterations for K-means training",
+        default=150,
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        help="Batch size for K-means training",
+        default=10000,
+    )
+    parser.add_argument("--tol", default=0.0, type=float)
+    parser.add_argument("--max_no_improvement", default=100, type=int)
+    parser.add_argument("--n_init", default=20, type=int)
+    parser.add_argument("--reassignment_ratio", default=0.5, type=float)
+    parser.add_argument(
+        "--out_kmeans_model_path",
+        type=str,
+        required=True,
+        help="Path to save K-means model",
+    )
+
+    # Leftovers
+    parser.add_argument(
+        "--seed",
+        type=int,
+        help="Random seed to use for K-means training",
+        default=1369,
+    )
+
+    return parser
+
+
+def get_kmeans_model(
+    n_clusters,
+    init,
+    max_iter,
+    batch_size,
+    tol,
+    max_no_improvement,
+    n_init,
+    reassignment_ratio,
+    random_state,
+):
+    return MiniBatchKMeans(
+        n_clusters=n_clusters,
+        init=init,
+        max_iter=max_iter,
+        batch_size=batch_size,
+        tol=tol,
+        max_no_improvement=max_no_improvement,
+        n_init=n_init,
+        reassignment_ratio=reassignment_ratio,
+        random_state=random_state,
+        verbose=1,
+        compute_labels=True,
+        init_size=None,
+    )
+
+
+def train_kmeans(kmeans_model, features_batch):
+    start_time = time.time()
+    kmeans_model.fit(features_batch)
+    time_taken = round((time.time() - start_time) // 60, 2)
+    return kmeans_model, time_taken
+
+
+def main(args, logger):
+    # Features loading/extraction for K-means
+    if args.in_features_path:
+        # Feature loading
+        logger.info(f"Loading features from {args.in_features_path}...")
+        features_batch = np.load(args.in_features_path, allow_pickle=True)
+    else:
+        # Feature extraction
+        logger.info(f"Extracting {args.feature_type} acoustic features...")
+        features_batch = (
+            get_features(
+                feature_type=args.feature_type,
+                checkpoint_path=args.checkpoint_path,
+                layer=args.layer,
+                manifest_path=args.manifest_path,
+                sample_pct=args.sample_pct,
+                flatten=True,
+            )
+            if not args.out_features_path
+            else get_and_dump_features(
+                feature_type=args.feature_type,
+                checkpoint_path=args.checkpoint_path,
+                layer=args.layer,
+                manifest_path=args.manifest_path,
+                sample_pct=args.sample_pct,
+                flatten=True,
+                out_features_path=args.out_features_path,
+            )
+        )
+        if args.out_features_path:
+            logger.info(
+                f"Saved extracted features at {args.out_features_path}"
+            )
+    logger.info(f"Features shape = {features_batch.shape}\n")
+
+    # Learn and save K-means model
+    kmeans_model = get_kmeans_model(
+        n_clusters=args.num_clusters,
+        init=args.init,
+        max_iter=args.max_iter,
+        batch_size=args.batch_size,
+        tol=args.tol,
+        max_no_improvement=args.max_no_improvement,
+        n_init=args.n_init,
+        reassignment_ratio=args.reassignment_ratio,
+        random_state=args.seed,
+    )
+    logger.info("Starting k-means training...")
+    kmeans_model, time_taken = train_kmeans(
+        kmeans_model=kmeans_model, features_batch=features_batch
+    )
+    logger.info(f"...done k-means training in {time_taken} minutes")
+    inertia = -kmeans_model.score(features_batch) / len(features_batch)
+    logger.info(f"Total intertia: {round(inertia, 2)}\n")
+
+    logger.info(f"Saving k-means model to {args.out_kmeans_model_path}")
+    os.makedirs(os.path.dirname(args.out_kmeans_model_path), exist_ok=True)
+    joblib.dump(kmeans_model, open(args.out_kmeans_model_path, "wb"))
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    logger = get_logger()
+    logger.info(args)
+    main(args, logger)
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/dump_feats.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/dump_feats.py
new file mode 100644
index 0000000000000000000000000000000000000000..031567c6d85d16b5236053abf008b7cabccb4673
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/dump_feats.py
@@ -0,0 +1,91 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+
+from examples.textless_nlp.gslm.speech2unit.pretrained.utils import (
+    get_and_dump_features,
+)
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Compute and dump log mel fbank features."
+    )
+    parser.add_argument(
+        "--feature_type",
+        type=str,
+        choices=["logmel", "hubert", "w2v2", "cpc"],
+        default=None,
+        help="Acoustic feature type",
+    )
+    parser.add_argument(
+        "--manifest_path",
+        type=str,
+        default=None,
+        help="Manifest file containing the root dir and file names",
+    )
+    parser.add_argument(
+        "--out_features_path",
+        type=str,
+        default=None,
+        help="Features file path to write to",
+    )
+    parser.add_argument(
+        "--checkpoint_path",
+        type=str,
+        help="Pretrained acoustic model checkpoint",
+    )
+    parser.add_argument(
+        "--layer",
+        type=int,
+        help="The layer of the pretrained model to extract features from",
+        default=-1,
+    )
+    parser.add_argument(
+        "--sample_pct",
+        type=float,
+        help="Percent data to use for K-means training",
+        default=0.1,
+    )
+    parser.add_argument(
+        "--out_features_path",
+        type=str,
+        help="Path to save log mel fbank features",
+    )
+    return parser
+
+
+def get_logger():
+    log_format = "[%(asctime)s] [%(levelname)s]: %(message)s"
+    logging.basicConfig(format=log_format, level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger
+
+
+if __name__ == "__main__":
+    """
+    Example command:
+    python ~/speechbot/clustering/dump_logmelfank_feats.py \
+        --manifest_path /checkpoint/kushall/data/LJSpeech-1.1/asr_input_wavs_16k/train.tsv
+        --out_features_path /checkpoint/kushall/experiments/speechbot/logmelfbank/features/ljspeech/train.npy
+    """
+    parser = get_parser()
+    args = parser.parse_args()
+    logger = get_logger()
+    logger.info(args)
+
+    logger.info(f"Extracting {args.feature_type} acoustic features...")
+    get_and_dump_features(
+        feature_type=args.feature_type,
+        checkpoint_path=args.checkpoint_path,
+        layer=args.layer,
+        manifest_path=args.manifest_path,
+        sample_pct=args.sample_pct,
+        flatten=True,
+        out_features_path=args.out_features_path,
+    )
+    logger.info(f"Saved extracted features at {args.out_features_path}")
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c87445d810cd790f887d1a135287a334cbdf223
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/quantize_with_kmeans.py
@@ -0,0 +1,125 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+
+import numpy as np
+
+import joblib
+from examples.textless_nlp.gslm.speech2unit.clustering.utils import (
+    get_audio_files,
+)
+from examples.textless_nlp.gslm.speech2unit.pretrained.utils import (
+    get_features,
+)
+
+
+def get_logger():
+    log_format = "[%(asctime)s] [%(levelname)s]: %(message)s"
+    logging.basicConfig(format=log_format, level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Quantize using K-means clustering over acoustic features."
+    )
+    parser.add_argument(
+        "--feature_type",
+        type=str,
+        choices=["logmel", "hubert", "w2v2", "cpc"],
+        default=None,
+        required=True,
+        help="Acoustic feature type",
+    )
+    parser.add_argument(
+        "--acoustic_model_path",
+        type=str,
+        help="Pretrained acoustic model checkpoint"
+    )
+    parser.add_argument(
+        "--layer",
+        type=int,
+        help="The layer of the pretrained model to extract features from",
+        default=-1,
+    )
+    parser.add_argument(
+        "--kmeans_model_path",
+        type=str,
+        required=True,
+        help="K-means model file path to use for inference",
+    )
+    parser.add_argument(
+        "--features_path",
+        type=str,
+        default=None,
+        help="Features file path. You don't need to enter acoustic model details if you have dumped features",
+    )
+    parser.add_argument(
+        "--manifest_path",
+        type=str,
+        default=None,
+        help="Manifest file containing the root dir and file names",
+    )
+    parser.add_argument(
+        "--out_quantized_file_path",
+        required=True,
+        type=str,
+        help="File path of quantized output.",
+    )
+    parser.add_argument(
+        "--extension", type=str, default=".flac", help="Features file path"
+    )
+    return parser
+
+
+def main(args, logger):
+    # Feature extraction
+    if args.features_path is not None:
+        logger.info(f"Loading acoustic features from {args.features_path}...")
+        features_batch = np.load(args.features_path)
+    else:
+        logger.info(f"Extracting {args.feature_type} acoustic features...")
+        features_batch = get_features(
+            feature_type=args.feature_type,
+            checkpoint_path=args.acoustic_model_path,
+            layer=args.layer,
+            manifest_path=args.manifest_path,
+            sample_pct=1.0,
+            flatten=False,
+        )
+        logger.info(
+            f"Features extracted for {len(features_batch)} utterances.\n"
+        )
+        logger.info(
+            f"Dimensionality of representation = {features_batch[0].shape[1]}"
+        )
+
+    # K-means model
+    logger.info(f"Loading K-means model from {args.kmeans_model_path} ...")
+    kmeans_model = joblib.load(open(args.kmeans_model_path, "rb"))
+    kmeans_model.verbose = False
+
+    _, fnames, _ = get_audio_files(args.manifest_path)
+
+    os.makedirs(os.path.dirname(args.out_quantized_file_path), exist_ok=True)
+    print(f"Writing quantized predictions to {args.out_quantized_file_path}")
+    with open(args.out_quantized_file_path, "w") as fout:
+        for i, feats in enumerate(features_batch):
+            pred = kmeans_model.predict(feats)
+            pred_str = " ".join(str(p) for p in pred)
+            base_fname = os.path.basename(fnames[i]).rstrip(args.extension)
+            fout.write(f"{base_fname}|{pred_str}\n")
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    logger = get_logger()
+    logger.info(args)
+    main(args, logger)
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/utils.py b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf08d1fe4b470477b724aa8d770d91c0cac35a0e
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/clustering/utils.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Tuple
+
+
+def get_audio_files(manifest_path: str) -> Tuple[str, List[str], List[int]]:
+    fnames, sizes = [], []
+    with open(manifest_path, "r") as f:
+        root_dir = f.readline().strip()
+        for line in f:
+            items = line.strip().split("\t")
+            assert (
+                len(items) == 2
+            ), f"File must have two columns separated by tab. Got {line}"
+            fnames.append(items[0])
+            sizes.append(int(items[1]))
+    return root_dir, fnames, sizes
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/cpc_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/cpc_feature_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..c613f52d3c3de43a048849a231a9a34e2a883486
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/cpc_feature_reader.py
@@ -0,0 +1,192 @@
+import soundfile as sf
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class CpcFeatureReader:
+    """
+    Wrapper class to run inference on CPC model.
+    Helps extract features for a given audio file.
+    """
+
+    def __init__(
+        self,
+        checkpoint_path,
+        layer,
+        use_encoder_layer=False,
+        norm_features=False,
+        sample_rate=16000,
+        max_chunk=64000,
+    ):
+        self.model = load_cpc_model(checkpoint_path, layer).eval().cuda()
+        self.sample_rate = sample_rate
+        self.max_chunk = max_chunk
+        self.norm_features = norm_features
+        self.use_encoder_layer = use_encoder_layer
+
+    def read_audio(self, path, ref_len=None):
+        wav, sr = sf.read(path)
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        assert sr == self.sample_rate, sr
+        if ref_len is not None and abs(ref_len - len(wav)) > 160:
+            print(f"ref {ref_len} != read {len(wav)} ({path})")
+        return wav
+
+    def get_feats(self, file_path, ref_len=None):
+        x = self.read_audio(file_path, ref_len)
+        # Inspired from CPC_audio feature_loader.py
+        with torch.no_grad():
+            x = torch.from_numpy(x).float().cuda()
+            x = x.view(1, 1, -1)
+            size = x.size(2)
+            feat = []
+            start = 0
+            while start < size:
+                if start + self.max_chunk > size:
+                    break
+                x_chunk = x[..., start : start + self.max_chunk]
+                feat_chunk = self.model.extract_features(
+                    source=x_chunk,
+                    get_encoded=self.use_encoder_layer,
+                    norm_output=self.norm_features,
+                )
+                feat.append(feat_chunk)
+                start += self.max_chunk
+
+            if start < size:
+                x_chunk = x[:, -self.max_chunk :]
+                feat_chunk = self.model.extract_features(
+                    source=x_chunk,
+                    get_encoded=self.use_encoder_layer,
+                    norm_output=self.norm_features,
+                )
+                df = x_chunk.size(2) // feat_chunk.size(1)
+                delta = (size - start) // df
+                feat.append(feat_chunk[:, -delta:])
+        return torch.cat(feat, 1).squeeze(0)
+
+
+def load_cpc_model(checkpoint_path, layer=None):
+    state_dict = torch.load(checkpoint_path)
+    weights = state_dict["weights"]
+    config = state_dict["config"]
+    if layer is not None:
+        config["nLevelsGRU"] = layer
+
+    encoder = CPCEncoder(config["hiddenEncoder"])
+    ar_net = CPCAR(
+        config["hiddenEncoder"], config["hiddenGar"], False, config["nLevelsGRU"]
+    )
+
+    model = CPCModel(encoder, ar_net)
+    model.load_state_dict(weights, strict=False)
+    model.config = config
+
+    return model
+
+
+class ChannelNorm(nn.Module):
+    def __init__(self, num_features, epsilon=1e-05, affine=True):
+        super(ChannelNorm, self).__init__()
+        if affine:
+            self.weight = nn.parameter.Parameter(torch.Tensor(1, num_features, 1))
+            self.bias = nn.parameter.Parameter(torch.Tensor(1, num_features, 1))
+        else:
+            self.weight = None
+            self.bias = None
+        self.epsilon = epsilon
+        self.p = 0
+        self.affine = affine
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if self.affine:
+            torch.nn.init.ones_(self.weight)
+            torch.nn.init.zeros_(self.bias)
+
+    def forward(self, x):
+        cum_mean = x.mean(dim=1, keepdim=True)
+        cum_var = x.var(dim=1, keepdim=True)
+        x = (x - cum_mean) * torch.rsqrt(cum_var + self.epsilon)
+        if self.weight is not None:
+            x = x * self.weight + self.bias
+        return x
+
+
+class CPCEncoder(nn.Module):
+    def __init__(self, hidden_dim=512):
+        super(CPCEncoder, self).__init__()
+        self.conv0 = nn.Conv1d(1, hidden_dim, 10, stride=5, padding=3)
+        self.batchNorm0 = ChannelNorm(hidden_dim)
+        self.conv1 = nn.Conv1d(hidden_dim, hidden_dim, 8, stride=4, padding=2)
+        self.batchNorm1 = ChannelNorm(hidden_dim)
+        self.conv2 = nn.Conv1d(hidden_dim, hidden_dim, 4, stride=2, padding=1)
+        self.batchNorm2 = ChannelNorm(hidden_dim)
+        self.conv3 = nn.Conv1d(hidden_dim, hidden_dim, 4, stride=2, padding=1)
+        self.batchNorm3 = ChannelNorm(hidden_dim)
+        self.conv4 = nn.Conv1d(hidden_dim, hidden_dim, 4, stride=2, padding=1)
+        self.batchNorm4 = ChannelNorm(hidden_dim)
+        self.DOWNSAMPLING = 160
+
+    def get_output_dim(self):
+        return self.conv4.out_channels
+
+    def forward(self, x):
+        x = F.relu(self.batchNorm0(self.conv0(x)))
+        x = F.relu(self.batchNorm1(self.conv1(x)))
+        x = F.relu(self.batchNorm2(self.conv2(x)))
+        x = F.relu(self.batchNorm3(self.conv3(x)))
+        x = F.relu(self.batchNorm4(self.conv4(x)))
+        return x
+
+
+class CPCAR(nn.Module):
+    def __init__(self, dim_encoded, dim_output, keep_hidden, num_layers):
+        super(CPCAR, self).__init__()
+        self.baseNet = nn.LSTM(
+            dim_encoded, dim_output, num_layers=num_layers, batch_first=True
+        )
+        self.hidden = None
+        self.keep_hidden = keep_hidden
+
+    def get_output_dim(self):
+        return self.baseNet.hidden_size
+
+    def forward(self, x):
+        try:
+            self.baseNet.flatten_parameters()
+        except RuntimeError:
+            pass
+        x, h = self.baseNet(x, self.hidden)
+        if self.keep_hidden:
+            if isinstance(h, tuple):
+                self.hidden = tuple(x.detach() for x in h)
+            else:
+                self.hidden = h.detach()
+        return x
+
+
+class CPCModel(nn.Module):
+    def __init__(self, encoder, ar_net):
+        super(CPCModel, self).__init__()
+        self.gEncoder = encoder
+        self.gAR = ar_net
+        self.config = None
+
+    def forward(self, x, label):
+        encoded = self.gEncoder(x).permute(0, 2, 1)
+        cpc_feature = self.gAR(encoded)
+        return cpc_feature, encoded, label
+
+    def extract_features(self, source, get_encoded=False, norm_output=False):
+        cpc_feature, encoded, _ = self.forward(source, None)
+        if get_encoded:
+            cpc_feature = encoded
+        if norm_output:
+            mean = cpc_feature.mean(dim=1, keepdim=True)
+            var = cpc_feature.var(dim=1, keepdim=True)
+            cpc_feature = (cpc_feature - mean) / torch.sqrt(var + 1e-08)
+        return cpc_feature
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/hubert_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/hubert_feature_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..09442206e19abf854f2f02754ec7c6f8bc564200
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/hubert_feature_reader.py
@@ -0,0 +1,59 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import fairseq
+import soundfile as sf
+import torch.nn.functional as F
+
+
+class HubertFeatureReader:
+    """
+    Wrapper class to run inference on HuBERT model.
+    Helps extract features for a given audio file.
+    """
+
+    def __init__(self, checkpoint_path, layer, max_chunk=1600000):
+        (
+            model,
+            cfg,
+            task,
+        ) = fairseq.checkpoint_utils.load_model_ensemble_and_task(
+            [checkpoint_path]
+        )
+        self.model = model[0].eval().cuda()
+        self.task = task
+        self.layer = layer
+        self.max_chunk = max_chunk
+
+    def read_audio(self, path, ref_len=None):
+        wav, sr = sf.read(path)
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        assert sr == self.task.cfg.sample_rate, sr
+        if ref_len is not None and abs(ref_len - len(wav)) > 160:
+            print(f"ref {ref_len} != read {len(wav)} ({path})")
+        return wav
+
+    def get_feats(self, file_path, ref_len=None):
+        x = self.read_audio(file_path, ref_len)
+        with torch.no_grad():
+            x = torch.from_numpy(x).float().cuda()
+            if self.task.cfg.normalize:
+                x = F.layer_norm(x, x.shape)
+            x = x.view(1, -1)
+
+            feat = []
+            for start in range(0, x.size(1), self.max_chunk):
+                x_chunk = x[:, start: start + self.max_chunk]
+                feat_chunk, _ = self.model.extract_features(
+                    source=x_chunk,
+                    padding_mask=None,
+                    mask=False,
+                    output_layer=self.layer,
+                )
+                feat.append(feat_chunk)
+        return torch.cat(feat, 1).squeeze(0)
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/logmel_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/logmel_feature_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..106f50247622deca688b223f1ad63275d5b65e58
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/logmel_feature_reader.py
@@ -0,0 +1,30 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import soundfile as sf
+import torch
+import torchaudio.compliance.kaldi as kaldi
+
+
+class LogMelFeatureReader:
+    """
+    Wrapper class to run inference on HuBERT model.
+    Helps extract features for a given audio file.
+    """
+    
+    def __init__(self, *args, **kwargs):
+        self.num_mel_bins = kwargs.get("num_mel_bins", 80)
+        self.frame_length = kwargs.get("frame_length", 25.0)
+
+    def get_feats(self, file_path):
+        wav, sr = sf.read(file_path)
+        feats = torch.from_numpy(wav).float()
+        feats = kaldi.fbank(
+            feats.unsqueeze(0),
+            num_mel_bins=self.num_mel_bins,
+            frame_length=self.frame_length,
+            sample_frequency=sr,
+        )
+        return feats
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/utils.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5aaddf6421ab7fa417af508005671a0ed821c701
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/utils.py
@@ -0,0 +1,126 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import gc
+import os
+import random
+import shutil
+import numpy as np
+
+import torch
+import tqdm
+from examples.textless_nlp.gslm.speech2unit.pretrained.cpc_feature_reader import (
+    CpcFeatureReader,
+)
+from examples.textless_nlp.gslm.speech2unit.pretrained.hubert_feature_reader import (
+    HubertFeatureReader,
+)
+from examples.textless_nlp.gslm.speech2unit.pretrained.logmel_feature_reader import (
+    LogMelFeatureReader,
+)
+from examples.textless_nlp.gslm.speech2unit.pretrained.w2v2_feature_reader import (
+    Wav2VecFeatureReader,
+)
+
+
+def get_feature_reader(feature_type):
+    if feature_type == "logmel":
+        return LogMelFeatureReader
+    elif feature_type == "hubert":
+        return HubertFeatureReader
+    elif feature_type == "w2v2":
+        return Wav2VecFeatureReader
+    elif feature_type == "cpc":
+        return CpcFeatureReader
+    else:
+        raise NotImplementedError(f"{feature_type} is not supported.")
+
+
+def get_feature_iterator(
+    feature_type, checkpoint_path, layer, manifest_path, sample_pct
+):
+    feature_reader_cls = get_feature_reader(feature_type)
+    with open(manifest_path, "r") as fp:
+        lines = fp.read().split("\n")
+        root = lines.pop(0).strip()
+        file_path_list = [
+            os.path.join(root, line.split("\t")[0])
+            for line in lines
+            if len(line) > 0
+        ]
+        if sample_pct < 1.0:
+            file_path_list = random.sample(
+                file_path_list, int(sample_pct * len(file_path_list))
+            )
+        num_files = len(file_path_list)
+        reader = feature_reader_cls(
+            checkpoint_path=checkpoint_path, layer=layer
+        )
+
+        def iterate():
+            for file_path in file_path_list:
+                feats = reader.get_feats(file_path)
+                yield feats.cpu().numpy()
+
+    return iterate, num_files
+
+
+def get_features(
+    feature_type, checkpoint_path, layer, manifest_path, sample_pct, flatten
+):
+    generator, num_files = get_feature_iterator(
+        feature_type=feature_type,
+        checkpoint_path=checkpoint_path,
+        layer=layer,
+        manifest_path=manifest_path,
+        sample_pct=sample_pct,
+    )
+    iterator = generator()
+
+    features_list = []
+    for features in tqdm.tqdm(iterator, total=num_files):
+        features_list.append(features)
+
+    # Explicit clean up
+    del iterator
+    del generator
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    if flatten:
+        return np.concatenate(features_list)
+
+    return features_list
+
+
+def get_and_dump_features(
+    feature_type,
+    checkpoint_path,
+    layer,
+    manifest_path,
+    sample_pct,
+    flatten,
+    out_features_path,
+):
+    # Feature extraction
+    features_batch = get_features(
+        feature_type=feature_type,
+        checkpoint_path=checkpoint_path,
+        layer=layer,
+        manifest_path=manifest_path,
+        sample_pct=sample_pct,
+        flatten=flatten,
+    )
+
+    # Save features
+    out_dir_path = os.path.dirname(out_features_path)
+    os.makedirs(out_dir_path, exist_ok=True)
+    shutil.copyfile(
+        manifest_path,
+        os.path.join(out_dir_path, os.path.basename(manifest_path)),
+    )
+    np.save(out_features_path, features_batch)
+
+    return features_batch
diff --git a/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/w2v2_feature_reader.py b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/w2v2_feature_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..b878321e445093f187e7af5310622a6ac456c30d
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/speech2unit/pretrained/w2v2_feature_reader.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import fairseq
+import soundfile as sf
+
+
+class Wav2VecFeatureReader:
+    """
+    Wrapper class to run inference on Wav2Vec 2.0 model.
+    Helps extract features for a given audio file.
+    """
+
+    def __init__(self, checkpoint_path, layer):
+        state = fairseq.checkpoint_utils.load_checkpoint_to_cpu(
+            checkpoint_path
+        )
+
+        w2v_args = state["args"]
+        self.task = fairseq.tasks.setup_task(w2v_args)
+        model = self.task.build_model(w2v_args)
+        model.load_state_dict(state["model"], strict=True)
+        model.eval()
+        model.cuda()
+        self.model = model
+        self.layer = layer
+
+    def read_audio(self, fname):
+        wav, sr = sf.read(fname)
+        if wav.ndim == 2:
+            wav = wav.mean(-1)
+        assert wav.ndim == 1, wav.ndim
+        assert sr == self.task.cfg.sample_rate, sr
+        return wav
+
+    def get_feats(self, file_path):
+        x = self.read_audio(file_path)
+        with torch.no_grad():
+            source = torch.from_numpy(x).view(1, -1).float().cuda()
+            res = self.model(
+                source=source, mask=False, features_only=True, layer=self.layer
+            )
+            return res["layer_results"][self.layer][0].squeeze(1)
diff --git a/fairseq/examples/textless_nlp/gslm/tools/README.md b/fairseq/examples/textless_nlp/gslm/tools/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..61fcbbded80023f75eaec4b69ddfbbe4cc252e5b
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/tools/README.md
@@ -0,0 +1,22 @@
+# GSLM Tools
+
+## Resynthesis
+You can use the command line tool below to input an audio file and get the resynthesized audio. This tool implements the unsupervised method for resynthesis described in the paper. The way to invoke the command line tool is shown below.
+```
+FAIRSEQ_ROOT=<path_to_your_fairseq_repo_root>
+TYPE=<one_of_logmel/cpc/hubert/w2v2>
+ACOUSTIC_MODEL_PATH=<path_of_pretrained_acoustic_model>
+LAYER=<layer_of_acoustic_model_to_extract_features_from>
+KM_MODEL_PATH=<output_path_of_the_kmeans_model>
+TTS_MODEL_PATH=<unit2speech_model_file_path>
+WAVEGLOW_PATH=<path_where_you_have_downloaded_waveglow_checkpoint>
+
+PYTHONPATH=${FAIRSEQ_ROOT}:${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/unit2speech python ${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/tools/gen_speech.py \
+    --feature_type $TYPE \
+    --acoustic_model_path $ACOUSTIC_MODEL_PATH \
+    --layer $LAYER \
+    --kmeans_model_path $KM_MODEL_PATH \
+    --tts_model_path $TTS_MODEL_PATH \
+    --waveglow_path  $WAVEGLOW_PATH \
+    --max_decoder_steps 2000
+```
\ No newline at end of file
diff --git a/fairseq/examples/textless_nlp/gslm/tools/resynthesize_speech.py b/fairseq/examples/textless_nlp/gslm/tools/resynthesize_speech.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b6215d372035284f115b6eec0712a324246b67a
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/tools/resynthesize_speech.py
@@ -0,0 +1,138 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import gc
+import logging
+
+import joblib
+import soundfile as sf
+import torch
+from examples.textless_nlp.gslm.speech2unit.pretrained.utils import (
+    get_feature_reader,
+)
+from examples.textless_nlp.gslm.unit2speech.tts_data import (
+    TacotronInputDataset,
+)
+from examples.textless_nlp.gslm.unit2speech.utils import (
+    load_tacotron,
+    load_waveglow,
+    synthesize_audio,
+)
+
+
+def get_logger():
+    log_format = "[%(asctime)s] [%(levelname)s]: %(message)s"
+    logging.basicConfig(format=log_format, level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="GSLM speech resynthesis tool."
+    )
+    parser.add_argument(
+        "--feature_type",
+        type=str,
+        choices=["logmel", "hubert", "w2v2", "cpc"],
+        default=None,
+        required=True,
+        help="Acoustic feature type",
+    )
+    parser.add_argument(
+        "--acoustic_model_path",
+        type=str,
+        help="Pretrained acoustic model checkpoint",
+    )
+    parser.add_argument(
+        "--layer", type=int, help="Layer of acoustic model"
+    )
+    parser.add_argument(
+        "--kmeans_model_path",
+        type=str,
+        required=True,
+        help="K-means model file path to use for inference",
+    )
+    parser.add_argument(
+        "--tts_model_path",
+        type=str,
+        help="TTS model file path to use for inference",
+    )
+    parser.add_argument(
+        "--waveglow_path",
+        type=str,
+        help="Waveglow (vocoder) model file path to use for inference",
+    )
+    parser.add_argument("--max_decoder_steps", type=int, default=2000)
+    parser.add_argument("--denoiser_strength", type=float, default=0.1)
+    return parser
+
+
+################################################
+def main(args, logger):
+    # Acoustic Model
+    logger.info(f"Loading acoustic model from {args.tts_model_path}...")
+    feature_reader_cls = get_feature_reader(args.feature_type)
+    reader = feature_reader_cls(
+        checkpoint_path=args.acoustic_model_path, layer=args.layer
+    )
+
+    # K-means Model
+    logger.info(f"Loading K-means model from {args.kmeans_model_path} ...")
+    kmeans_model = joblib.load(open(args.kmeans_model_path, "rb"))
+    kmeans_model.verbose = False
+
+    # TTS Model
+    logger.info(f"Loading TTS model from {args.tts_model_path}...")
+    tacotron_model, sample_rate, hparams = load_tacotron(
+        tacotron_model_path=args.tts_model_path,
+        max_decoder_steps=args.max_decoder_steps,
+    )
+
+    # Waveglow Model
+    logger.info(f"Loading Waveglow model from {args.waveglow_path}...")
+    waveglow, denoiser = load_waveglow(waveglow_path=args.waveglow_path)
+
+    # Dataset
+    tts_dataset = TacotronInputDataset(hparams)
+
+    iters = 0
+    while True:
+        in_file_path = input(
+            "Input: Enter the full file path of audio file...\n"
+        )
+        out_file_path = input(
+            "Output: Enter the full file path of audio file...\n"
+        )
+        feats = reader.get_feats(in_file_path).cpu().numpy()
+        iters += 1
+        if iters == 1000:
+            gc.collect()
+            torch.cuda.empty_cache()
+
+        quantized_units = kmeans_model.predict(feats)
+        quantized_units_str = " ".join(map(str, quantized_units))
+
+        tts_input = tts_dataset.get_tensor(quantized_units_str)
+        mel, aud, aud_dn, has_eos = synthesize_audio(
+            tacotron_model,
+            waveglow,
+            denoiser,
+            tts_input.unsqueeze(0),
+            strength=args.denoiser_strength,
+        )
+        sf.write(
+            f"{out_file_path}", aud_dn[0].cpu().float().numpy(), sample_rate
+        )
+        logger.info("Resynthesis done!\n")
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    logger = get_logger()
+    logger.info(args)
+    main(args, logger)
diff --git a/fairseq/examples/textless_nlp/gslm/ulm/README.md b/fairseq/examples/textless_nlp/gslm/ulm/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..01459121cebefc61fdc2eae201462aa78d699111
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/ulm/README.md
@@ -0,0 +1,72 @@
+# Unit Language Model (ULM)
+
+Here you can find links to the pre-trained ULMs and instructions on training new models using fairseq. At the end of the page, we also share how to run sampling for those models and provide pointers to the transcribed prompts we used.
+
+## Pre-trained models
+
+Using the links below, you can download pre-trained models for various unit types and vocabulary sizes:
+
+| | 50 | 100 | 200
+|-|-|-|-
+| LogMel Filterbank | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/lm_km50/logmel50_lm.tgz)  |  [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/lm_km100/logmel100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/lm_km200/logmel200_lm.tgz)
+| Modified CPC | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/lm_km50/cpc50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/lm_km100/cpc100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/lm_km200/cpc200_lm.tgz)
+| HuBERT | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/lm_km50/hubert50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/lm_km100/hubert100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/lm_km200/hubert200_lm.tgz)
+| Wav2Vec 2.0 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/lm_km50/w2v2_50_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/lm_km100/w2v2_100_lm.tgz) | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/lm_km200/w2v2_200_lm.tgz)     
+
+
+## Preprocessing data
+Assuming that unit-transcribed train, valid, and test sets are located in `data/train.txt`, `data/valid.txt`, and `data/test.txt`, respectively,
+we run the following command to get a preprocessed version of the datast in `data-bin`:
+
+```bash
+fairseq-preprocess --only-source \
+        --trainpref data/train.txt --validpref data/valid.txt --testpref data/test.txt \
+        --destdir data-bin/ --workers 40
+```
+As a result, the `data-bin` directory should appear.
+
+## Fitting a Unit Language Model (ULM)
+As an ULM, we train a standard fairseq Transformer LM. Assuming 8 GPUs used for training, a good starting point for an ULM training would be:
+```bash
+	fairseq-train data-bin/ \
+        --task=language_modeling \
+        --arch=transformer_lm_big \
+        --share-decoder-input-output-embed \
+        --dropout=0.1 \
+        --attention-dropout=0.1 \
+        --optimizer=adam \
+        --adam-betas='(0.9, 0.98)' \
+        --clip-norm=1.0 \
+        --lr=0.0005 \
+        --lr-scheduler=inverse_sqrt \
+        --warmup-updates=4000 \
+        --warmup-init-lr=1e-07 \
+        --tokens-per-sample=3072 \
+        --update-freq=16 \
+        --max-tokens=4096 \
+        --num-workers=4 \
+        --skip-invalid-size-inputs-valid-test \
+        --max-update=500000 \
+        --log-interval=10 \
+        --seed=100501 \
+        --fp16 \
+        --sample-break-mode=eos
+```
+This command will train a Transformer-large model (12 layers). You can train other standard LM models provided by fairseq, e.g. specify `--arch=transformer_lm` to train a smaller (6-layer) Transformer model. When training with a different number of GPUs, it might be a good idea to adjust the `update-freq` parameter. To save the GPU memory at an expense of additional computation, it can be useful to enable activation checkpointing with `--checkpoint-activations`.
+
+## Sampling from an ULM
+Once an ULM was trained, we can use it for generating new utterances. Suppose, that the prompts are given in a file named `prompts.txt`. Then we can sample continuations by running the following command:
+
+```bash
+    python sample.py  data-bin/ \
+        --path=checkpoints/checkpoint_best.pt --task=language_modeling --sampling --temperature=0.7 \
+        --seed=1  --prompts=prompts.txt  --output=samples.txt --max-len-a=0 --max-len-b=500 \
+        --prefix-size=-1 --batch-size=16 --fp16 --samples-per-prompt=10
+```
+Here, `--prefix-size` controls the number of tokens that are used to prime the ULM. When set to a positive value, the sampling script will take first `prefix-size` tokens to prompt the ULM; with `0` it runs unconditional sampling and with `-1` the entire prompt is used. 
+`--samples-per-prompt` specifies how many utterances are generated with every prompt which can be useful when generating multiple prompt continuations. In this command, `--max-len-a` and `--max-len-b` control the number of generated tokens. 
+
+When using a pretrained model from above, `data-bin` should point to the unpacked directory (with `dict.txt` file).
+
+Evaluation-time, to generate prompts, we used utterances from LibriSpeech dev-clean and test-clean that are longer than 6s. We took first 3s from an utterance as a prompt. Unit transcripts of those prompts can be downloaded here: [[dev]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/dev_prompts.tgz) [[test]](https://dl.fbaipublicfiles.com/textless_nlp/gslm/eval_data/test_prompts.tgz)
+
diff --git a/fairseq/examples/textless_nlp/gslm/ulm/sample.py b/fairseq/examples/textless_nlp/gslm/ulm/sample.py
new file mode 100644
index 0000000000000000000000000000000000000000..77302a6894cacf07588cf34fb1e695dc519d7df5
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/ulm/sample.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Sample from a trained LM; hacked fairseq-interactive
+"""
+from collections import namedtuple
+import os
+import ast
+import numpy as np
+
+from fairseq import checkpoint_utils, options, tasks, utils
+
+import tqdm
+
+Batch = namedtuple('Batch', 'ids src_tokens src_lengths')
+Translation = namedtuple('Translation', 'src_str hypos pos_scores alignments')
+
+
+def make_batches(lines, args, task, max_positions):
+    tokens = [
+        task.source_dictionary.encode_line(
+            src_str, add_if_not_exist=False
+        ).long()
+        for src_str in lines
+    ]
+    lengths = [t.numel() for t in tokens]
+    itr = task.get_batch_iterator(
+        dataset=task.build_dataset_for_inference(tokens, lengths),
+        max_tokens=args.dataset.max_tokens,
+        max_sentences=args.dataset.batch_size,
+        max_positions=max_positions,
+        ignore_invalid_inputs=args.dataset.skip_invalid_size_inputs_valid_test
+    ).next_epoch_itr(shuffle=False)
+    for batch in itr:
+        yield Batch(
+            ids=batch['id'],
+            src_tokens=batch['net_input']['src_tokens'], src_lengths=batch['net_input']['src_lengths'],
+        )
+
+
+def main(args):
+    arg_prompts = args.prompts
+    arg_output = args.output
+    arg_debug = args.debug
+    arg_sample_size = args.samples_per_prompt
+
+    try:
+        from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+        args = convert_namespace_to_omegaconf(args)
+    except:
+        pass
+
+    # if args.max_tokens is None and args.max_sentences is None:
+    if args.common.seed is not None:
+        np.random.seed(args.common.seed)
+        utils.set_torch_seed(args.common.seed)
+
+    if args.generation.sampling:
+        args.generation.nbest = args.generation.beam = arg_sample_size
+
+    task = tasks.setup_task(args.task)
+
+    overrides = ast.literal_eval(args.common_eval.model_overrides)
+
+    models, _model_args = checkpoint_utils.load_model_ensemble(
+        args.common_eval.path.split(os.pathsep),
+        arg_overrides=overrides,
+        task=task,
+        suffix=getattr(args, "checkpoint_suffix", ""),
+    )
+
+    # Set dictionaries
+    src_dict = task.source_dictionary
+    tgt_dict = task.target_dictionary
+
+    # Optimize ensemble for generation
+    for model in models:
+        model.prepare_for_inference_(args)
+        model.cuda()
+
+    # Load alignment dictionary for unknown word replacement
+    # (None if no unknown word replacement, empty if no path to align dictionary)
+    align_dict = utils.load_align_dict(args.generation.replace_unk)
+
+    max_positions = utils.resolve_max_positions(
+        task.max_positions(),
+        *[model.max_positions() for model in models]
+    )
+
+    output_file = open(arg_output, 'w')
+
+    with open(arg_prompts, 'r') as fin:
+        lines = fin.readlines()
+
+    split = [x.split('|', 1) for x in lines]
+    seq_id = [x[0] for x in split]
+    prompts = [x[1] for x in split]
+
+    if args.generation.prefix_size >= 0:
+        prompts = [' '.join(l.split()[:args.generation.prefix_size])
+                   for l in prompts]
+
+    if arg_debug:
+        prompts = prompts[:10]
+
+    generator = task.build_generator(models, args.generation)
+
+    start_id = 0
+    pbar = tqdm.tqdm(total=len(prompts))
+    for batch in make_batches(prompts, args, task, max_positions):
+        src_tokens = batch.src_tokens
+        src_lengths = batch.src_lengths
+        src_tokens = src_tokens.cuda()
+        src_lengths = src_lengths.cuda()
+
+        sample = {
+            'net_input': {
+                'src_tokens': src_tokens,
+                'src_lengths': src_lengths,
+            },
+        }
+
+        results = []
+        translations = task.inference_step(generator, models, sample)
+        for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)):
+            src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad())
+            results.append((i + start_id, src_tokens_i, hypos))
+
+        # sort output to match input order
+        for id, src_tokens, hypos in sorted(results, key=lambda x: x[0]):
+            if src_dict is not None:
+                src_str = src_dict.string(
+                    src_tokens, args.common_eval.post_process)
+
+            # Process top predictions
+            for hypo_id, hypo in enumerate(hypos):
+                _hypo_tokens, hypo_str, _alignment = utils.post_process_prediction(
+                    hypo_tokens=hypo['tokens'].int().cpu(),
+                    src_str=src_str,
+                    alignment=hypo['alignment'],
+                    align_dict=align_dict,
+                    tgt_dict=tgt_dict,
+                    remove_bpe=args.common_eval.post_process,
+                )
+
+                detok_hypo_str = hypo_str
+                utterance = detok_hypo_str
+                print(f'{seq_id[id]}__{hypo_id}|{utterance}', file=output_file)
+            pbar.update(1)
+        start_id += len(results)
+
+    # output_file.close()
+
+
+def cli_main():
+    parser = options.get_interactive_generation_parser()
+    parser.add_argument('--prompts', type=str, default=None, required=True)
+    parser.add_argument('--output', type=str, default=None, required=True)
+    parser.add_argument('--debug', action='store_true')
+    parser.add_argument('--samples-per-prompt', type=int, default=1)
+
+    args = options.parse_args_and_arch(parser)
+
+    np.random.seed(args.seed)
+    utils.set_torch_seed(args.seed)
+
+    main(args)
+
+
+if __name__ == '__main__':
+    cli_main()
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/README.md b/fairseq/examples/textless_nlp/gslm/unit2speech/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..57104230655c7c517d25904e634c53b6159ee60f
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/README.md
@@ -0,0 +1,42 @@
+# Unit to Speech Model (unit2speech)
+
+Unit to speech model is modified Tacotron2 model that learns to synthesize speech from discrete speech units. All models are trained on quantized [LJSpeech](https://keithito.com/LJ-Speech-Dataset/).
+
+Upstream Units | Download Link
+|-|-
+Log Mel Filterbank + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km50/tts_checkpoint_best.pt)
+Log Mel Filterbank + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km100/tts_checkpoint_best.pt)
+Log Mel Filterbank + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km200/tts_checkpoint_best.pt)
+Log Mel Filterbank + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/logmel/tts_km500/tts_checkpoint_best.pt)
+Modified CPC + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km50/tts_checkpoint_best.pt)
+Modified CPC + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km100/tts_checkpoint_best.pt)
+Modified CPC + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km200/tts_checkpoint_best.pt)
+Modified CPC + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/cpc/tts_km500/tts_checkpoint_best.pt)
+HuBERT Base + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km50/tts_checkpoint_best.pt)
+HuBERT Base + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km100/tts_checkpoint_best.pt)
+HuBERT Base + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km200/tts_checkpoint_best.pt)
+HuBERT Base + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/hubert/tts_km500/tts_checkpoint_best.pt)
+wav2vec 2.0 Large + KM50 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km50/tts_checkpoint_best.pt)
+wav2vec 2.0 Large + KM100 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km100/tts_checkpoint_best.pt)
+wav2vec 2.0 Large + KM200 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km200/tts_checkpoint_best.pt)
+wav2vec 2.0 Large + KM500 | [download](https://dl.fbaipublicfiles.com/textless_nlp/gslm/w2v2/tts_km500/tts_checkpoint_best.pt)
+
+## Run inference using a unit2speech model
+* Install librosa, unidecode and inflect using `pip install librosa, unidecode, inflect`
+* Download [Waveglow checkpoint](https://dl.fbaipublicfiles.com/textless_nlp/gslm/waveglow_256channels_new.pt). This is the vocoder.
+
+Sample commnd to run inference using trained unit2speech models. Please note that the quantized audio to synthesized should be using the same units as the unit2speech model was trained with.
+```
+FAIRSEQ_ROOT=<path_to_your_fairseq_repo_root>
+TTS_MODEL_PATH=<unit2speech_model_file_path>
+QUANTIZED_UNIT_PATH=<quantized_audio_file_path>
+OUT_DIR=<dir_to_dump_synthesized_audio_files>
+WAVEGLOW_PATH=<path_where_you_have_downloaded_waveglow_checkpoint>
+
+PYTHONPATH=${FAIRSEQ_ROOT}:${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/unit2speech python ${FAIRSEQ_ROOT}/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py \
+    --tts_model_path $TTS_MODEL_PATH \
+    --quantized_unit_path $QUANTIZED_UNIT_PATH \
+    --out_audio_dir $OUT_DIR \
+    --waveglow_path  $WAVEGLOW_PATH \
+    --max_decoder_steps 2000
+```
\ No newline at end of file
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py b/fairseq/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py
new file mode 100644
index 0000000000000000000000000000000000000000..2be848fceae65e3bd5747a2c98106b0215c6a039
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/convert_to_16k.py
@@ -0,0 +1,56 @@
+import os
+import shlex
+import subprocess
+import progressbar
+from time import time
+from pathlib import Path
+
+def find_all_files(path_dir, extension):
+    out = []
+    for root, dirs, filenames in os.walk(path_dir):
+        for f in filenames:
+            if f.endswith(extension):
+                out.append(((str(Path(f).stem)), os.path.join(root, f)))
+    return out
+
+def convert16k(inputfile, outputfile16k):
+    command = ('sox -c 1 -b 16 {} -t wav {} rate 16k'.format(inputfile, outputfile16k))
+    subprocess.call(shlex.split(command))
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Convert to wav 16k audio using sox.')
+    parser.add_argument('input_dir', type=str,
+                    help='Path to the input dir.')
+    parser.add_argument('output_dir', type=str,
+                    help='Path to the output dir.')
+    parser.add_argument('--extension', type=str, default='wav',
+                    help='Audio file extension in the input. Default: mp3')
+    args = parser.parse_args()
+
+    # Find all sequences
+    print(f"Finding all audio files with extension '{args.extension}' from {args.input_dir}...")
+    audio_files = find_all_files(args.input_dir, args.extension)
+    print(f"Done! Found {len(audio_files)} files.")
+
+    # Convert to relative path
+    audio_files = [os.path.relpath(file[-1], start=args.input_dir) for file in audio_files]
+
+    # Create all the directories needed
+    rel_dirs_set = set([os.path.dirname(file) for file in audio_files])
+    for rel_dir in rel_dirs_set:
+        Path(os.path.join(args.output_dir, rel_dir)).mkdir(parents=True, exist_ok=True)
+
+    # Converting wavs files
+    print("Converting the audio to wav files...")
+    bar = progressbar.ProgressBar(maxval=len(audio_files))
+    bar.start()
+    start_time = time()
+    for index, file in enumerate(audio_files):
+        bar.update(index)
+        input_file = os.path.join(args.input_dir, file)
+        output_file = os.path.join(args.output_dir, os.path.splitext(file)[0]+".wav")
+        convert16k(input_file, output_file)
+    bar.finish()
+    print(f"...done {len(audio_files)} files in {time()-start_time} seconds.")
\ No newline at end of file
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/glow.py b/fairseq/examples/textless_nlp/gslm/unit2speech/glow.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a7696403d505afdf0f1606f8220801b0f46152f
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/glow.py
@@ -0,0 +1,311 @@
+# *****************************************************************************
+#  Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#      * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#      * Redistributions in binary form must reproduce the above copyright
+#        notice, this list of conditions and the following disclaimer in the
+#        documentation and/or other materials provided with the distribution.
+#      * Neither the name of the NVIDIA CORPORATION nor the
+#        names of its contributors may be used to endorse or promote products
+#        derived from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *****************************************************************************
+import copy
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+
+
+@torch.jit.script
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+    n_channels_int = n_channels[0]
+    in_act = input_a+input_b
+    t_act = torch.tanh(in_act[:, :n_channels_int, :])
+    s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
+    acts = t_act * s_act
+    return acts
+
+
+class WaveGlowLoss(torch.nn.Module):
+    def __init__(self, sigma=1.0):
+        super(WaveGlowLoss, self).__init__()
+        self.sigma = sigma
+
+    def forward(self, model_output):
+        z, log_s_list, log_det_W_list = model_output
+        for i, log_s in enumerate(log_s_list):
+            if i == 0:
+                log_s_total = torch.sum(log_s)
+                log_det_W_total = log_det_W_list[i]
+            else:
+                log_s_total = log_s_total + torch.sum(log_s)
+                log_det_W_total += log_det_W_list[i]
+
+        loss = torch.sum(z*z)/(2*self.sigma*self.sigma) - log_s_total - log_det_W_total
+        return loss/(z.size(0)*z.size(1)*z.size(2))
+
+
+class Invertible1x1Conv(torch.nn.Module):
+    """
+    The layer outputs both the convolution, and the log determinant
+    of its weight matrix.  If reverse=True it does convolution with
+    inverse
+    """
+    def __init__(self, c):
+        super(Invertible1x1Conv, self).__init__()
+        self.conv = torch.nn.Conv1d(c, c, kernel_size=1, stride=1, padding=0,
+                                    bias=False)
+
+        # Sample a random orthonormal matrix to initialize weights
+        W = torch.qr(torch.FloatTensor(c, c).normal_())[0]
+
+        # Ensure determinant is 1.0 not -1.0
+        if torch.det(W) < 0:
+            W[:,0] = -1*W[:,0]
+        W = W.view(c, c, 1)
+        self.conv.weight.data = W
+
+    def forward(self, z, reverse=False):
+        # shape
+        batch_size, group_size, n_of_groups = z.size()
+
+        W = self.conv.weight.squeeze()
+
+        if reverse:
+            if not hasattr(self, 'W_inverse'):
+                # Reverse computation
+                W_inverse = W.float().inverse()
+                W_inverse = Variable(W_inverse[..., None])
+                if z.type() == 'torch.cuda.HalfTensor':
+                    W_inverse = W_inverse.half()
+                self.W_inverse = W_inverse
+            z = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0)
+            return z
+        else:
+            # Forward computation
+            log_det_W = batch_size * n_of_groups * torch.logdet(W)
+            z = self.conv(z)
+            return z, log_det_W
+
+
+class WN(torch.nn.Module):
+    """
+    This is the WaveNet like layer for the affine coupling.  The primary difference
+    from WaveNet is the convolutions need not be causal.  There is also no dilation
+    size reset.  The dilation only doubles on each layer
+    """
+    def __init__(self, n_in_channels, n_mel_channels, n_layers, n_channels,
+                 kernel_size):
+        super(WN, self).__init__()
+        assert(kernel_size % 2 == 1)
+        assert(n_channels % 2 == 0)
+        self.n_layers = n_layers
+        self.n_channels = n_channels
+        self.in_layers = torch.nn.ModuleList()
+        self.res_skip_layers = torch.nn.ModuleList()
+
+        start = torch.nn.Conv1d(n_in_channels, n_channels, 1)
+        start = torch.nn.utils.weight_norm(start, name='weight')
+        self.start = start
+
+        # Initializing last layer to 0 makes the affine coupling layers
+        # do nothing at first.  This helps with training stability
+        end = torch.nn.Conv1d(n_channels, 2*n_in_channels, 1)
+        end.weight.data.zero_()
+        end.bias.data.zero_()
+        self.end = end
+
+        cond_layer = torch.nn.Conv1d(n_mel_channels, 2*n_channels*n_layers, 1)
+        self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight')
+
+        for i in range(n_layers):
+            dilation = 2 ** i
+            padding = int((kernel_size*dilation - dilation)/2)
+            in_layer = torch.nn.Conv1d(n_channels, 2*n_channels, kernel_size,
+                                       dilation=dilation, padding=padding)
+            in_layer = torch.nn.utils.weight_norm(in_layer, name='weight')
+            self.in_layers.append(in_layer)
+
+
+            # last one is not necessary
+            if i < n_layers - 1:
+                res_skip_channels = 2*n_channels
+            else:
+                res_skip_channels = n_channels
+            res_skip_layer = torch.nn.Conv1d(n_channels, res_skip_channels, 1)
+            res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
+            self.res_skip_layers.append(res_skip_layer)
+
+    def forward(self, forward_input):
+        audio, spect = forward_input
+        audio = self.start(audio)
+        output = torch.zeros_like(audio)
+        n_channels_tensor = torch.IntTensor([self.n_channels])
+
+        spect = self.cond_layer(spect)
+
+        for i in range(self.n_layers):
+            spect_offset = i*2*self.n_channels
+            acts = fused_add_tanh_sigmoid_multiply(
+                self.in_layers[i](audio),
+                spect[:,spect_offset:spect_offset+2*self.n_channels,:],
+                n_channels_tensor)
+
+            res_skip_acts = self.res_skip_layers[i](acts)
+            if i < self.n_layers - 1:
+                audio = audio + res_skip_acts[:,:self.n_channels,:]
+                output = output + res_skip_acts[:,self.n_channels:,:]
+            else:
+                output = output + res_skip_acts
+
+        return self.end(output)
+
+
+class WaveGlow(torch.nn.Module):
+    def __init__(self, n_mel_channels, n_flows, n_group, n_early_every,
+                 n_early_size, WN_config):
+        super(WaveGlow, self).__init__()
+
+        self.upsample = torch.nn.ConvTranspose1d(n_mel_channels,
+                                                 n_mel_channels,
+                                                 1024, stride=256)
+        assert(n_group % 2 == 0)
+        self.n_flows = n_flows
+        self.n_group = n_group
+        self.n_early_every = n_early_every
+        self.n_early_size = n_early_size
+        self.WN = torch.nn.ModuleList()
+        self.convinv = torch.nn.ModuleList()
+
+        n_half = int(n_group/2)
+
+        # Set up layers with the right sizes based on how many dimensions
+        # have been output already
+        n_remaining_channels = n_group
+        for k in range(n_flows):
+            if k % self.n_early_every == 0 and k > 0:
+                n_half = n_half - int(self.n_early_size/2)
+                n_remaining_channels = n_remaining_channels - self.n_early_size
+            self.convinv.append(Invertible1x1Conv(n_remaining_channels))
+            self.WN.append(WN(n_half, n_mel_channels*n_group, **WN_config))
+        self.n_remaining_channels = n_remaining_channels  # Useful during inference
+
+    def forward(self, forward_input):
+        """
+        forward_input[0] = mel_spectrogram:  batch x n_mel_channels x frames
+        forward_input[1] = audio: batch x time
+        """
+        spect, audio = forward_input
+
+        #  Upsample spectrogram to size of audio
+        spect = self.upsample(spect)
+        assert(spect.size(2) >= audio.size(1))
+        if spect.size(2) > audio.size(1):
+            spect = spect[:, :, :audio.size(1)]
+
+        spect = spect.unfold(2, self.n_group, self.n_group).permute(0, 2, 1, 3)
+        spect = spect.contiguous().view(spect.size(0), spect.size(1), -1).permute(0, 2, 1)
+
+        audio = audio.unfold(1, self.n_group, self.n_group).permute(0, 2, 1)
+        output_audio = []
+        log_s_list = []
+        log_det_W_list = []
+
+        for k in range(self.n_flows):
+            if k % self.n_early_every == 0 and k > 0:
+                output_audio.append(audio[:,:self.n_early_size,:])
+                audio = audio[:,self.n_early_size:,:]
+
+            audio, log_det_W = self.convinv[k](audio)
+            log_det_W_list.append(log_det_W)
+
+            n_half = int(audio.size(1)/2)
+            audio_0 = audio[:,:n_half,:]
+            audio_1 = audio[:,n_half:,:]
+
+            output = self.WN[k]((audio_0, spect))
+            log_s = output[:, n_half:, :]
+            b = output[:, :n_half, :]
+            audio_1 = torch.exp(log_s)*audio_1 + b
+            log_s_list.append(log_s)
+
+            audio = torch.cat([audio_0, audio_1],1)
+
+        output_audio.append(audio)
+        return torch.cat(output_audio,1), log_s_list, log_det_W_list
+
+    def infer(self, spect, sigma=1.0):
+        spect = self.upsample(spect)
+        # trim conv artifacts. maybe pad spec to kernel multiple
+        time_cutoff = self.upsample.kernel_size[0] - self.upsample.stride[0]
+        spect = spect[:, :, :-time_cutoff]
+
+        spect = spect.unfold(2, self.n_group, self.n_group).permute(0, 2, 1, 3)
+        spect = spect.contiguous().view(spect.size(0), spect.size(1), -1).permute(0, 2, 1)
+
+        if spect.type() == 'torch.cuda.HalfTensor':
+            audio = torch.cuda.HalfTensor(spect.size(0),
+                                          self.n_remaining_channels,
+                                          spect.size(2)).normal_()
+        else:
+            audio = torch.cuda.FloatTensor(spect.size(0),
+                                           self.n_remaining_channels,
+                                           spect.size(2)).normal_()
+
+        audio = torch.autograd.Variable(sigma*audio)
+
+        for k in reversed(range(self.n_flows)):
+            n_half = int(audio.size(1)/2)
+            audio_0 = audio[:,:n_half,:]
+            audio_1 = audio[:,n_half:,:]
+
+            output = self.WN[k]((audio_0, spect))
+
+            s = output[:, n_half:, :]
+            b = output[:, :n_half, :]
+            audio_1 = (audio_1 - b)/torch.exp(s)
+            audio = torch.cat([audio_0, audio_1],1)
+
+            audio = self.convinv[k](audio, reverse=True)
+
+            if k % self.n_early_every == 0 and k > 0:
+                if spect.type() == 'torch.cuda.HalfTensor':
+                    z = torch.cuda.HalfTensor(spect.size(0), self.n_early_size, spect.size(2)).normal_()
+                else:
+                    z = torch.cuda.FloatTensor(spect.size(0), self.n_early_size, spect.size(2)).normal_()
+                audio = torch.cat((sigma*z, audio),1)
+
+        audio = audio.permute(0,2,1).contiguous().view(audio.size(0), -1).data
+        return audio
+
+    @staticmethod
+    def remove_weightnorm(model):
+        waveglow = model
+        for WN in waveglow.WN:
+            WN.start = torch.nn.utils.remove_weight_norm(WN.start)
+            WN.in_layers = remove(WN.in_layers)
+            WN.cond_layer = torch.nn.utils.remove_weight_norm(WN.cond_layer)
+            WN.res_skip_layers = remove(WN.res_skip_layers)
+        return waveglow
+
+
+def remove(conv_list):
+    new_conv_list = torch.nn.ModuleList()
+    for old_conv in conv_list:
+        old_conv = torch.nn.utils.remove_weight_norm(old_conv)
+        new_conv_list.append(old_conv)
+    return new_conv_list
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/multiproc.py b/fairseq/examples/textless_nlp/gslm/unit2speech/multiproc.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a287a4e97c66acbd36897b25f2ece5494005f03
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/multiproc.py
@@ -0,0 +1,27 @@
+import os
+import time
+import torch
+import sys
+import subprocess
+
+argslist = list(sys.argv)[1:]
+log_dir = argslist[-1]
+num_gpus = torch.cuda.device_count()
+argslist.append('--n_gpus={}'.format(num_gpus))
+workers = []
+job_id = time.strftime("%Y_%m_%d-%H%M%S")
+argslist.append("--group_name=group_{}".format(job_id))
+
+print("GPU log directory is {}".format(log_dir))
+os.makedirs(log_dir, exist_ok=True)
+for i in range(num_gpus):
+    argslist.append('--rank={}'.format(i))
+    stdout = None if i == 0 else open("{}/{}_GPU_{}.log".format(log_dir, job_id, i),
+                                      "w")
+    print(argslist)
+    p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout)
+    workers.append(p)
+    argslist = argslist[:-1]
+
+for p in workers:
+    p.wait()
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py b/fairseq/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py
new file mode 100644
index 0000000000000000000000000000000000000000..f226d5f50514ecb5ee3b4f1031df750609a56112
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/synthesize_audio_from_units.py
@@ -0,0 +1,97 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import os
+
+import soundfile as sf
+from examples.textless_nlp.gslm.unit2speech.tts_data import (
+    TacotronInputDataset,
+)
+from examples.textless_nlp.gslm.unit2speech.utils import (
+    load_quantized_audio_from_file,
+    load_tacotron,
+    load_waveglow,
+    synthesize_audio,
+)
+
+
+def get_logger():
+    log_format = "[%(asctime)s] [%(levelname)s]: %(message)s"
+    logging.basicConfig(format=log_format, level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Wav2Vec 2.0 speech generator."
+    )
+    parser.add_argument(
+        "--quantized_unit_path",
+        type=str,
+        help="K-means model file path to use for inference",
+    )
+    parser.add_argument(
+        "--tts_model_path",
+        type=str,
+        help="TTS model file path to use for inference",
+    )
+    parser.add_argument(
+        "--waveglow_path",
+        type=str,
+        help="Path to the waveglow checkpoint (vocoder).",
+    )
+    parser.add_argument("--max_decoder_steps", type=int, default=2000)
+    parser.add_argument("--denoiser_strength", type=float, default=0.1)
+    parser.add_argument(
+        "--out_audio_dir",
+        type=str,
+        help="Output directory to dump audio files",
+    )
+
+    return parser
+
+
+def main(args, logger):
+    # Load quantized audio
+    logger.info(f"Loading quantized audio from {args.quantized_unit_path}...")
+    names_batch, quantized_units_batch = load_quantized_audio_from_file(
+        file_path=args.quantized_unit_path
+    )
+
+    logger.info(f"Loading TTS model from {args.tts_model_path}...")
+    tacotron_model, sample_rate, hparams = load_tacotron(
+        tacotron_model_path=args.tts_model_path,
+        max_decoder_steps=args.max_decoder_steps,
+    )
+
+    logger.info(f"Loading Waveglow model from {args.waveglow_path}...")
+    waveglow, denoiser = load_waveglow(waveglow_path=args.waveglow_path)
+
+    tts_dataset = TacotronInputDataset(hparams)
+    for name, quantized_units in zip(names_batch, quantized_units_batch):
+        quantized_units_str = " ".join(map(str, quantized_units))
+        tts_input = tts_dataset.get_tensor(quantized_units_str)
+        mel, aud, aud_dn, has_eos = synthesize_audio(
+            tacotron_model,
+            waveglow,
+            denoiser,
+            tts_input.unsqueeze(0),
+            strength=args.denoiser_strength,
+        )
+        out_file_path = os.path.join(args.out_audio_dir, f"{name}.wav")
+        sf.write(
+            f"{out_file_path}", aud_dn[0].cpu().float().numpy(), sample_rate
+        )
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    logger = get_logger()
+    logger.info(args)
+    main(args, logger)
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/__init__.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/audio_processing.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/audio_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5af7f723eb8047bc58db2f85234aea161fbc659
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/audio_processing.py
@@ -0,0 +1,93 @@
+import torch
+import numpy as np
+from scipy.signal import get_window
+import librosa.util as librosa_util
+
+
+def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
+                     n_fft=800, dtype=np.float32, norm=None):
+    """
+    # from librosa 0.6
+    Compute the sum-square envelope of a window function at a given hop length.
+
+    This is used to estimate modulation effects induced by windowing
+    observations in short-time fourier transforms.
+
+    Parameters
+    ----------
+    window : string, tuple, number, callable, or list-like
+        Window specification, as in `get_window`
+
+    n_frames : int > 0
+        The number of analysis frames
+
+    hop_length : int > 0
+        The number of samples to advance between frames
+
+    win_length : [optional]
+        The length of the window function.  By default, this matches `n_fft`.
+
+    n_fft : int > 0
+        The length of each analysis frame.
+
+    dtype : np.dtype
+        The data type of the output
+
+    Returns
+    -------
+    wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
+        The sum-squared envelope of the window function
+    """
+    if win_length is None:
+        win_length = n_fft
+
+    n = n_fft + hop_length * (n_frames - 1)
+    x = np.zeros(n, dtype=dtype)
+
+    # Compute the squared window at the desired length
+    win_sq = get_window(window, win_length, fftbins=True)
+    win_sq = librosa_util.normalize(win_sq, norm=norm)**2
+    win_sq = librosa_util.pad_center(win_sq, n_fft)
+
+    # Fill the envelope
+    for i in range(n_frames):
+        sample = i * hop_length
+        x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
+    return x
+
+
+def griffin_lim(magnitudes, stft_fn, n_iters=30):
+    """
+    PARAMS
+    ------
+    magnitudes: spectrogram magnitudes
+    stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
+    """
+
+    angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
+    angles = angles.astype(np.float32)
+    angles = torch.autograd.Variable(torch.from_numpy(angles))
+    signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
+
+    for i in range(n_iters):
+        _, angles = stft_fn.transform(signal)
+        signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
+    return signal
+
+
+def dynamic_range_compression(x, C=1, clip_val=1e-5):
+    """
+    PARAMS
+    ------
+    C: compression factor
+    """
+    return torch.log(torch.clamp(x, min=clip_val) * C)
+
+
+def dynamic_range_decompression(x, C=1):
+    """
+    PARAMS
+    ------
+    C: compression factor used to compress
+    """
+    return torch.exp(x) / C
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cleaners.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cleaners.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2e35c1a8cc4c628c5d05802677142c9a2122d2b
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cleaners.py
@@ -0,0 +1,90 @@
+""" from https://github.com/keithito/tacotron """
+
+'''
+Cleaners are transformations that run over the input text at both training and eval time.
+
+Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
+hyperparameter. Some cleaners are English-specific. You'll typically want to use:
+  1. "english_cleaners" for English text
+  2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
+     the Unidecode library (https://pypi.python.org/pypi/Unidecode)
+  3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
+     the symbols in symbols.py to match your data).
+'''
+
+import re
+from unidecode import unidecode
+from .numbers import normalize_numbers
+
+
+# Regular expression matching whitespace:
+_whitespace_re = re.compile(r'\s+')
+
+# List of (regular expression, replacement) pairs for abbreviations:
+_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
+  ('mrs', 'misess'),
+  ('mr', 'mister'),
+  ('dr', 'doctor'),
+  ('st', 'saint'),
+  ('co', 'company'),
+  ('jr', 'junior'),
+  ('maj', 'major'),
+  ('gen', 'general'),
+  ('drs', 'doctors'),
+  ('rev', 'reverend'),
+  ('lt', 'lieutenant'),
+  ('hon', 'honorable'),
+  ('sgt', 'sergeant'),
+  ('capt', 'captain'),
+  ('esq', 'esquire'),
+  ('ltd', 'limited'),
+  ('col', 'colonel'),
+  ('ft', 'fort'),
+]]
+
+
+def expand_abbreviations(text):
+  for regex, replacement in _abbreviations:
+    text = re.sub(regex, replacement, text)
+  return text
+
+
+def expand_numbers(text):
+  return normalize_numbers(text)
+
+
+def lowercase(text):
+  return text.lower()
+
+
+def collapse_whitespace(text):
+  return re.sub(_whitespace_re, ' ', text)
+
+
+def convert_to_ascii(text):
+  return unidecode(text)
+
+
+def basic_cleaners(text):
+  '''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
+  text = lowercase(text)
+  text = collapse_whitespace(text)
+  return text
+
+
+def transliteration_cleaners(text):
+  '''Pipeline for non-English text that transliterates to ASCII.'''
+  text = convert_to_ascii(text)
+  text = lowercase(text)
+  text = collapse_whitespace(text)
+  return text
+
+
+def english_cleaners(text):
+  '''Pipeline for English text, including number and abbreviation expansion.'''
+  text = convert_to_ascii(text)
+  text = lowercase(text)
+  text = expand_numbers(text)
+  text = expand_abbreviations(text)
+  text = collapse_whitespace(text)
+  return text
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cmudict.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cmudict.py
new file mode 100644
index 0000000000000000000000000000000000000000..62bfef745c30a56f7b6605d9e3becfbc40edb50d
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/cmudict.py
@@ -0,0 +1,65 @@
+""" from https://github.com/keithito/tacotron """
+
+import re
+
+
+valid_symbols = [
+  'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2',
+  'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2',
+  'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY',
+  'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1',
+  'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0',
+  'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW',
+  'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'
+]
+
+_valid_symbol_set = set(valid_symbols)
+
+
+class CMUDict:
+  '''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict'''
+  def __init__(self, file_or_path, keep_ambiguous=True):
+    if isinstance(file_or_path, str):
+      with open(file_or_path, encoding='latin-1') as f:
+        entries = _parse_cmudict(f)
+    else:
+      entries = _parse_cmudict(file_or_path)
+    if not keep_ambiguous:
+      entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
+    self._entries = entries
+
+
+  def __len__(self):
+    return len(self._entries)
+
+
+  def lookup(self, word):
+    '''Returns list of ARPAbet pronunciations of the given word.'''
+    return self._entries.get(word.upper())
+
+
+
+_alt_re = re.compile(r'\([0-9]+\)')
+
+
+def _parse_cmudict(file):
+  cmudict = {}
+  for line in file:
+    if len(line) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"):
+      parts = line.split('  ')
+      word = re.sub(_alt_re, '', parts[0])
+      pronunciation = _get_pronunciation(parts[1])
+      if pronunciation:
+        if word in cmudict:
+          cmudict[word].append(pronunciation)
+        else:
+          cmudict[word] = [pronunciation]
+  return cmudict
+
+
+def _get_pronunciation(s):
+  parts = s.strip().split(' ')
+  for part in parts:
+    if part not in _valid_symbol_set:
+      return None
+  return ' '.join(parts)
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/layers.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..f10d557ff5a4fff03b94f81543bd58cf1a66bc8f
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/layers.py
@@ -0,0 +1,103 @@
+import torch
+from librosa.filters import mel as librosa_mel_fn
+from .audio_processing import dynamic_range_compression
+from .audio_processing import dynamic_range_decompression
+from .stft import STFT
+from .utils import get_mask_from_lengths
+
+
+class LinearNorm(torch.nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
+
+        torch.nn.init.xavier_uniform_(
+            self.linear_layer.weight,
+            gain=torch.nn.init.calculate_gain(w_init_gain))
+
+    def forward(self, x):
+        return self.linear_layer(x)
+
+
+class ConvNorm(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
+                 padding=None, dilation=1, bias=True, w_init_gain='linear'):
+        super(ConvNorm, self).__init__()
+        if padding is None:
+            assert(kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2)
+
+        self.conv = torch.nn.Conv1d(in_channels, out_channels,
+                                    kernel_size=kernel_size, stride=stride,
+                                    padding=padding, dilation=dilation,
+                                    bias=bias)
+
+        torch.nn.init.xavier_uniform_(
+            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
+
+    def forward(self, signal):
+        conv_signal = self.conv(signal)
+        return conv_signal
+
+
+class GlobalAvgPool(torch.nn.Module):
+    def __init__(self):
+        super(GlobalAvgPool, self).__init__()
+
+    def forward(self, x, lengths=None):
+        """Average pooling across time steps (dim=1) with optionally lengths.
+        Args:
+            x: torch.Tensor of shape (N, T, ...)
+            lengths: None or torch.Tensor of shape (N,)
+            dim: dimension to pool
+        """
+        if lengths is None:
+            return x.mean(dim=1, keepdim=False)
+        else:
+            mask = get_mask_from_lengths(lengths).type(x.type()).to(x.device)
+            mask_shape = list(mask.size()) + [1 for _ in range(x.ndimension()-2)]
+            mask = mask.reshape(*mask_shape)
+            numer = (x * mask).sum(dim=1, keepdim=False)
+            denom = mask.sum(dim=1, keepdim=False)
+            return numer / denom
+
+
+class TacotronSTFT(torch.nn.Module):
+    def __init__(self, filter_length=1024, hop_length=256, win_length=1024,
+                 n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0,
+                 mel_fmax=8000.0):
+        super(TacotronSTFT, self).__init__()
+        self.n_mel_channels = n_mel_channels
+        self.sampling_rate = sampling_rate
+        self.stft_fn = STFT(filter_length, hop_length, win_length)
+        mel_basis = librosa_mel_fn(
+            sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax)
+        mel_basis = torch.from_numpy(mel_basis).float()
+        self.register_buffer('mel_basis', mel_basis)
+
+    def spectral_normalize(self, magnitudes):
+        output = dynamic_range_compression(magnitudes)
+        return output
+
+    def spectral_de_normalize(self, magnitudes):
+        output = dynamic_range_decompression(magnitudes)
+        return output
+
+    def mel_spectrogram(self, y):
+        """Computes mel-spectrograms from a batch of waves
+        PARAMS
+        ------
+        y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1]
+
+        RETURNS
+        -------
+        mel_output: torch.FloatTensor of shape (B, n_mel_channels, T)
+        """
+        assert(torch.min(y.data) >= -1)
+        assert(torch.max(y.data) <= 1)
+
+        magnitudes, phases = self.stft_fn.transform(y)
+        magnitudes = magnitudes.data
+        mel_output = torch.matmul(self.mel_basis, magnitudes)
+        mel_output = self.spectral_normalize(mel_output)
+        return mel_output
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/model.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccf132b150a7cc1c125c1190b5fd8f43edaae685
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/model.py
@@ -0,0 +1,669 @@
+from math import sqrt
+import torch
+import torch.distributions as distr
+from torch.autograd import Variable
+from torch import nn
+from torch.nn import functional as F
+from .layers import ConvNorm, LinearNorm, GlobalAvgPool
+from .utils import to_gpu, get_mask_from_lengths
+
+
+class LocationLayer(nn.Module):
+    def __init__(self, attention_n_filters, attention_kernel_size,
+                 attention_dim):
+        super(LocationLayer, self).__init__()
+        padding = int((attention_kernel_size - 1) / 2)
+        self.location_conv = ConvNorm(2, attention_n_filters,
+                                      kernel_size=attention_kernel_size,
+                                      padding=padding, bias=False, stride=1,
+                                      dilation=1)
+        self.location_dense = LinearNorm(attention_n_filters, attention_dim,
+                                         bias=False, w_init_gain='tanh')
+
+    def forward(self, attention_weights_cat):
+        processed_attention = self.location_conv(attention_weights_cat)
+        processed_attention = processed_attention.transpose(1, 2)
+        processed_attention = self.location_dense(processed_attention)
+        return processed_attention
+
+
+class Attention(nn.Module):
+    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
+                 attention_location_n_filters, attention_location_kernel_size):
+        super(Attention, self).__init__()
+        self.query_layer = LinearNorm(attention_rnn_dim, attention_dim,
+                                      bias=False, w_init_gain='tanh')
+        self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False,
+                                       w_init_gain='tanh')
+        self.v = LinearNorm(attention_dim, 1, bias=False)
+        self.location_layer = LocationLayer(attention_location_n_filters,
+                                            attention_location_kernel_size,
+                                            attention_dim)
+        self.score_mask_value = -float("inf")
+
+    def get_alignment_energies(self, query, processed_memory,
+                               attention_weights_cat):
+        """
+        PARAMS
+        ------
+        query: decoder output (batch, n_mel_channels * n_frames_per_step)
+        processed_memory: processed encoder outputs (B, T_in, attention_dim)
+        attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
+
+        RETURNS
+        -------
+        alignment (batch, max_time)
+        """
+
+        processed_query = self.query_layer(query.unsqueeze(1))
+        processed_attention_weights = self.location_layer(attention_weights_cat)
+        energies = self.v(torch.tanh(
+            processed_query + processed_attention_weights + processed_memory))
+
+        energies = energies.squeeze(-1)
+        return energies
+
+    def forward(self, attention_hidden_state, memory, processed_memory,
+                attention_weights_cat, mask):
+        """
+        PARAMS
+        ------
+        attention_hidden_state: attention rnn last output
+        memory: encoder outputs
+        processed_memory: processed encoder outputs
+        attention_weights_cat: previous and cummulative attention weights
+        mask: binary mask for padded data
+        """
+        alignment = self.get_alignment_energies(
+            attention_hidden_state, processed_memory, attention_weights_cat)
+
+        if mask is not None:
+            alignment.data.masked_fill_(mask, self.score_mask_value)
+
+        attention_weights = F.softmax(alignment, dim=1)
+        attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
+        attention_context = attention_context.squeeze(1)
+
+        return attention_context, attention_weights
+
+
+class Prenet(nn.Module):
+    def __init__(self, in_dim, sizes):
+        super(Prenet, self).__init__()
+        in_sizes = [in_dim] + sizes[:-1]
+        self.layers = nn.ModuleList(
+            [LinearNorm(in_size, out_size, bias=False)
+             for (in_size, out_size) in zip(in_sizes, sizes)])
+
+    def forward(self, x):
+        for linear in self.layers:
+            x = F.dropout(F.relu(linear(x)), p=0.5, training=True)
+        return x
+
+
+class Postnet(nn.Module):
+    """Postnet
+        - Five 1-d convolution with 512 channels and kernel size 5
+    """
+
+    def __init__(self, hparams):
+        super(Postnet, self).__init__()
+        self.convolutions = nn.ModuleList()
+
+        self.convolutions.append(
+            nn.Sequential(
+                ConvNorm(hparams.n_mel_channels, hparams.postnet_embedding_dim,
+                         kernel_size=hparams.postnet_kernel_size, stride=1,
+                         padding=int((hparams.postnet_kernel_size - 1) / 2),
+                         dilation=1, w_init_gain='tanh'),
+                nn.BatchNorm1d(hparams.postnet_embedding_dim))
+        )
+
+        for i in range(1, hparams.postnet_n_convolutions - 1):
+            self.convolutions.append(
+                nn.Sequential(
+                    ConvNorm(hparams.postnet_embedding_dim,
+                             hparams.postnet_embedding_dim,
+                             kernel_size=hparams.postnet_kernel_size, stride=1,
+                             padding=int((hparams.postnet_kernel_size - 1) / 2),
+                             dilation=1, w_init_gain='tanh'),
+                    nn.BatchNorm1d(hparams.postnet_embedding_dim))
+            )
+
+        self.convolutions.append(
+            nn.Sequential(
+                ConvNorm(hparams.postnet_embedding_dim, hparams.n_mel_channels,
+                         kernel_size=hparams.postnet_kernel_size, stride=1,
+                         padding=int((hparams.postnet_kernel_size - 1) / 2),
+                         dilation=1, w_init_gain='linear'),
+                nn.BatchNorm1d(hparams.n_mel_channels))
+            )
+
+    def forward(self, x):
+        for i in range(len(self.convolutions) - 1):
+            x = F.dropout(torch.tanh(self.convolutions[i](x)), 0.5, self.training)
+        x = F.dropout(self.convolutions[-1](x), 0.5, self.training)
+
+        return x
+
+
+class Encoder(nn.Module):
+    """Encoder module:
+        - Three 1-d convolution banks
+        - Bidirectional LSTM
+    """
+    def __init__(self, hparams):
+        super(Encoder, self).__init__()
+
+        convolutions = []
+        for _ in range(hparams.encoder_n_convolutions):
+            conv_layer = nn.Sequential(
+                ConvNorm(hparams.encoder_embedding_dim,
+                         hparams.encoder_embedding_dim,
+                         kernel_size=hparams.encoder_kernel_size, stride=1,
+                         padding=int((hparams.encoder_kernel_size - 1) / 2),
+                         dilation=1, w_init_gain='relu'),
+                nn.BatchNorm1d(hparams.encoder_embedding_dim))
+            convolutions.append(conv_layer)
+        self.convolutions = nn.ModuleList(convolutions)
+
+        self.lstm = nn.LSTM(hparams.encoder_embedding_dim,
+                            int(hparams.encoder_embedding_dim / 2), 1,
+                            batch_first=True, bidirectional=True)
+
+    def forward(self, x, input_lengths):
+        for conv in self.convolutions:
+            x = F.dropout(F.relu(conv(x)), 0.5, self.training)
+
+        x = x.transpose(1, 2)
+
+        # pytorch tensor are not reversible, hence the conversion
+        input_lengths = input_lengths.cpu().numpy()
+        x = nn.utils.rnn.pack_padded_sequence(
+            x, input_lengths, batch_first=True)
+
+        self.lstm.flatten_parameters()
+        outputs, _ = self.lstm(x)
+
+        outputs, _ = nn.utils.rnn.pad_packed_sequence(
+            outputs, batch_first=True)
+
+        return outputs
+
+    def inference(self, x):
+        for conv in self.convolutions:
+            x = F.dropout(F.relu(conv(x)), 0.5, self.training)
+
+        x = x.transpose(1, 2)
+
+        self.lstm.flatten_parameters()
+        outputs, _ = self.lstm(x)
+
+        return outputs
+
+
+class AudioEncoder(nn.Module):
+    def __init__(self, hparams):
+        super(AudioEncoder, self).__init__()
+
+        assert hparams.lat_dim > 0
+
+        convolutions = []
+        inp_dim = hparams.n_mel_channels
+        for _ in range(hparams.lat_n_convolutions):
+            conv_layer = nn.Sequential(
+                ConvNorm(inp_dim, hparams.lat_n_filters,
+                         kernel_size=hparams.lat_kernel_size, stride=1,
+                         padding=int((hparams.lat_kernel_size - 1) / 2),
+                         dilation=1, w_init_gain='tanh'),
+                nn.BatchNorm1d(hparams.lat_n_filters))
+            inp_dim = hparams.lat_n_filters
+            convolutions.append(conv_layer)
+        self.convolutions = nn.ModuleList(convolutions)
+
+        self.lstm = nn.LSTM(hparams.lat_n_filters,
+                            int(hparams.lat_n_filters / 2),
+                            hparams.lat_n_blstms, batch_first=True,
+                            bidirectional=True)
+        self.pool = GlobalAvgPool()
+
+        self.mu_proj = LinearNorm(hparams.lat_n_filters, hparams.lat_dim)
+        self.logvar_proj = LinearNorm(hparams.lat_n_filters, hparams.lat_dim)
+        self.lat_dim = hparams.lat_dim
+
+    def forward(self, x, lengths):
+        """
+        Args:
+            x (torch.Tensor): (B, F, T)
+        """
+
+        for conv in self.convolutions:
+            x = F.dropout(F.tanh(conv(x)), 0.5, self.training)
+
+        x = x.transpose(1, 2)  # (B, T, D)
+
+        # x may not be sorted by length. Sort->process->unsort
+        max_len = x.size(1)
+        assert max_len == torch.max(lengths).item()
+
+        lengths, perm_idx = lengths.sort(0, descending=True)
+        x = x[perm_idx]
+        x = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True)
+
+        self.lstm.flatten_parameters()
+        outputs, _ = self.lstm(x)
+        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
+
+        _, unperm_idx = perm_idx.sort(0)
+        outputs = outputs[unperm_idx]  # (B, T, D)
+        lengths = lengths[unperm_idx]  # (B, T, D)
+
+        outputs = self.pool(outputs, lengths)  # (B, D)
+
+        mu = self.mu_proj(outputs)
+        logvar = self.logvar_proj(outputs)
+        z = distr.Normal(mu, logvar).rsample()
+        return z, mu, logvar
+
+
+class Decoder(nn.Module):
+    def __init__(self, hparams):
+        super(Decoder, self).__init__()
+        self.n_mel_channels = hparams.n_mel_channels
+        self.n_frames_per_step = hparams.n_frames_per_step
+        self.encoder_embedding_dim = hparams.encoder_embedding_dim
+        self.obs_dim = hparams.obs_dim
+        self.lat_dim = hparams.lat_dim
+        self.attention_rnn_dim = hparams.attention_rnn_dim
+        self.decoder_rnn_dim = hparams.decoder_rnn_dim
+        self.prenet_dim = hparams.prenet_dim
+        self.max_decoder_steps = hparams.max_decoder_steps
+        self.gate_threshold = hparams.gate_threshold
+        self.p_attention_dropout = hparams.p_attention_dropout
+        self.p_decoder_dropout = hparams.p_decoder_dropout
+
+        self.prenet = Prenet(
+            hparams.n_mel_channels * hparams.n_frames_per_step,
+            [hparams.prenet_dim, hparams.prenet_dim])
+
+        self.attention_rnn = nn.LSTMCell(
+            hparams.prenet_dim + hparams.encoder_embedding_dim,
+            hparams.attention_rnn_dim)
+
+        self.attention_layer = Attention(
+            hparams.attention_rnn_dim, hparams.encoder_embedding_dim,
+            hparams.attention_dim, hparams.attention_location_n_filters,
+            hparams.attention_location_kernel_size)
+
+        encoder_tot_dim = (hparams.encoder_embedding_dim + \
+                           hparams.lat_dim + hparams.obs_dim)
+        self.decoder_rnn = nn.LSTMCell(
+            hparams.attention_rnn_dim + encoder_tot_dim,
+            hparams.decoder_rnn_dim, 1)
+
+        self.linear_projection = LinearNorm(
+            hparams.decoder_rnn_dim + encoder_tot_dim,
+            hparams.n_mel_channels * hparams.n_frames_per_step)
+
+        self.gate_layer = LinearNorm(
+            hparams.decoder_rnn_dim + encoder_tot_dim, 1,
+            bias=True, w_init_gain='sigmoid')
+
+    def get_go_frame(self, memory):
+        """ Gets all zeros frames to use as first decoder input
+        PARAMS
+        ------
+        memory: decoder outputs
+
+        RETURNS
+        -------
+        decoder_input: all zeros frames
+        """
+        B = memory.size(0)
+        decoder_input = Variable(memory.data.new(
+            B, self.n_mel_channels * self.n_frames_per_step).zero_())
+        return decoder_input
+
+    def initialize_decoder_states(self, memory, obs_and_lat, mask):
+        """ Initializes attention rnn states, decoder rnn states, attention
+        weights, attention cumulative weights, attention context, stores memory
+        and stores processed memory
+        PARAMS
+        ------
+        memory: Encoder outputs
+        obs_and_lat: Observed and latent attribute embeddings
+        mask: Mask for padded data if training, expects None for inference
+        """
+        B = memory.size(0)
+        MAX_TIME = memory.size(1)
+
+        self.attention_hidden = Variable(memory.data.new(
+            B, self.attention_rnn_dim).zero_())
+        self.attention_cell = Variable(memory.data.new(
+            B, self.attention_rnn_dim).zero_())
+
+        self.decoder_hidden = Variable(memory.data.new(
+            B, self.decoder_rnn_dim).zero_())
+        self.decoder_cell = Variable(memory.data.new(
+            B, self.decoder_rnn_dim).zero_())
+
+        self.attention_weights = Variable(memory.data.new(
+            B, MAX_TIME).zero_())
+        self.attention_weights_cum = Variable(memory.data.new(
+            B, MAX_TIME).zero_())
+        self.attention_context = Variable(memory.data.new(
+            B, self.encoder_embedding_dim).zero_())
+
+        self.memory = memory
+        self.processed_memory = self.attention_layer.memory_layer(memory)
+        self.obs_and_lat = obs_and_lat
+        self.mask = mask
+
+    def parse_decoder_inputs(self, decoder_inputs):
+        """ Prepares decoder inputs, i.e. mel outputs
+        PARAMS
+        ------
+        decoder_inputs: inputs used for teacher-forced training, i.e. mel-specs
+
+        RETURNS
+        -------
+        inputs: processed decoder inputs
+
+        """
+        # (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels)
+        decoder_inputs = decoder_inputs.transpose(1, 2)
+        decoder_inputs = decoder_inputs.view(
+            decoder_inputs.size(0),
+            int(decoder_inputs.size(1)/self.n_frames_per_step), -1)
+        # (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels)
+        decoder_inputs = decoder_inputs.transpose(0, 1)
+        return decoder_inputs
+
+    def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
+        """ Prepares decoder outputs for output
+        PARAMS
+        ------
+        mel_outputs:
+        gate_outputs: gate output energies
+        alignments:
+
+        RETURNS
+        -------
+        mel_outputs:
+        gate_outpust: gate output energies
+        alignments:
+        """
+        # (T_out, B) -> (B, T_out)
+        alignments = torch.stack(alignments).transpose(0, 1)
+        # (T_out, B) -> (B, T_out)
+        gate_outputs = torch.stack(gate_outputs).transpose(0, 1)
+        gate_outputs = gate_outputs.contiguous()
+        # (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels)
+        mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous()
+        # decouple frames per step
+        mel_outputs = mel_outputs.view(
+            mel_outputs.size(0), -1, self.n_mel_channels)
+        # (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out)
+        mel_outputs = mel_outputs.transpose(1, 2)
+
+        return mel_outputs, gate_outputs, alignments
+
+    def decode(self, decoder_input):
+        """ Decoder step using stored states, attention and memory
+        PARAMS
+        ------
+        decoder_input: previous mel output
+
+        RETURNS
+        -------
+        mel_output:
+        gate_output: gate output energies
+        attention_weights:
+        """
+        cell_input = torch.cat((decoder_input, self.attention_context), -1)
+        self.attention_hidden, self.attention_cell = self.attention_rnn(
+            cell_input, (self.attention_hidden, self.attention_cell))
+        self.attention_hidden = F.dropout(
+            self.attention_hidden, self.p_attention_dropout, self.training)
+
+        attention_weights_cat = torch.cat(
+            (self.attention_weights.unsqueeze(1),
+             self.attention_weights_cum.unsqueeze(1)), dim=1)
+        self.attention_context, self.attention_weights = self.attention_layer(
+            self.attention_hidden, self.memory, self.processed_memory,
+            attention_weights_cat, self.mask)
+
+        self.attention_weights_cum += self.attention_weights
+        decoder_input = torch.cat(
+            (self.attention_hidden, self.attention_context), -1)
+        if self.obs_and_lat is not None:
+            decoder_input = torch.cat((decoder_input, self.obs_and_lat), -1)
+        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
+            decoder_input, (self.decoder_hidden, self.decoder_cell))
+        self.decoder_hidden = F.dropout(
+            self.decoder_hidden, self.p_decoder_dropout, self.training)
+
+        decoder_hidden_attention_context = torch.cat(
+            (self.decoder_hidden, self.attention_context), dim=1)
+        if self.obs_and_lat is not None:
+            decoder_hidden_attention_context = torch.cat(
+                    (decoder_hidden_attention_context, self.obs_and_lat), dim=1)
+        decoder_output = self.linear_projection(
+            decoder_hidden_attention_context)
+
+        gate_prediction = self.gate_layer(decoder_hidden_attention_context)
+        return decoder_output, gate_prediction, self.attention_weights
+
+    def forward(self, memory, obs_and_lat, decoder_inputs, memory_lengths):
+        """ Decoder forward pass for training
+        PARAMS
+        ------
+        memory: Encoder outputs
+        obs_and_lat: Observed and latent attribute embeddings
+        decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs
+        memory_lengths: Encoder output lengths for attention masking.
+
+        RETURNS
+        -------
+        mel_outputs: mel outputs from the decoder
+        gate_outputs: gate outputs from the decoder
+        alignments: sequence of attention weights from the decoder
+        """
+
+        decoder_input = self.get_go_frame(memory).unsqueeze(0)
+        decoder_inputs = self.parse_decoder_inputs(decoder_inputs)
+        decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0)
+        decoder_inputs = self.prenet(decoder_inputs)
+
+        self.initialize_decoder_states(
+            memory, obs_and_lat, mask=~get_mask_from_lengths(memory_lengths))
+
+        mel_outputs, gate_outputs, alignments = [], [], []
+        while len(mel_outputs) < decoder_inputs.size(0) - 1:
+            decoder_input = decoder_inputs[len(mel_outputs)]
+            mel_output, gate_output, attention_weights = self.decode(
+                decoder_input)
+            mel_outputs += [mel_output.squeeze(1)]
+            gate_outputs += [gate_output.squeeze()]
+            alignments += [attention_weights]
+
+        mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
+            mel_outputs, gate_outputs, alignments)
+
+        return mel_outputs, gate_outputs, alignments
+
+    def inference(self, memory, obs_and_lat, ret_has_eos=False):
+        """ Decoder inference
+        PARAMS
+        ------
+        memory: Encoder outputs
+        obs_and_lat: Observed and latent attribute embeddings
+
+        RETURNS
+        -------
+        mel_outputs: mel outputs from the decoder
+        gate_outputs: gate outputs from the decoder
+        alignments: sequence of attention weights from the decoder
+        """
+        decoder_input = self.get_go_frame(memory)
+
+        self.initialize_decoder_states(memory, obs_and_lat, mask=None)
+
+        mel_outputs, gate_outputs, alignments = [], [], []
+        has_eos = False
+        while True:
+            decoder_input = self.prenet(decoder_input)
+            mel_output, gate_output, alignment = self.decode(decoder_input)
+
+            mel_outputs += [mel_output.squeeze(1)]
+            gate_outputs += [gate_output]
+            alignments += [alignment]
+
+            if torch.sigmoid(gate_output.data) > self.gate_threshold:
+                has_eos = True
+                break
+            elif len(mel_outputs) == self.max_decoder_steps:
+                # print("Warning! Reached max decoder steps")
+                break
+
+            decoder_input = mel_output
+
+        mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
+            mel_outputs, gate_outputs, alignments)
+
+        if ret_has_eos:
+            return mel_outputs, gate_outputs, alignments, has_eos
+        else:
+            return mel_outputs, gate_outputs, alignments
+
+
+class Tacotron2(nn.Module):
+    def __init__(self, hparams):
+        super(Tacotron2, self).__init__()
+        self.mask_padding = hparams.mask_padding
+        self.fp16_run = hparams.fp16_run
+        self.n_mel_channels = hparams.n_mel_channels
+        self.n_frames_per_step = hparams.n_frames_per_step
+
+        # initialize text encoder embedding
+        self.embedding = nn.Embedding(
+            hparams.n_symbols, hparams.symbols_embedding_dim)
+        std = sqrt(2.0 / (hparams.n_symbols + hparams.symbols_embedding_dim))
+        val = sqrt(3.0) * std  # uniform bounds for std
+        self.embedding.weight.data.uniform_(-val, val)
+
+        # initialize observed attribute embedding
+        self.obs_embedding = None
+        if hparams.obs_dim > 0:
+            self.obs_embedding = nn.Embedding(
+                hparams.obs_n_class, hparams.obs_dim)
+            std = sqrt(2.0 / (hparams.obs_n_class + hparams.obs_dim))
+            val = sqrt(3.0) * std  # uniform bounds for std
+            self.obs_embedding.weight.data.uniform_(-val, val)
+
+        self.encoder = Encoder(hparams)
+        self.decoder = Decoder(hparams)
+        self.postnet = Postnet(hparams)
+
+        self.lat_encoder = None
+        if hparams.lat_dim > 0:
+            self.lat_encoder = AudioEncoder(hparams)
+
+    def parse_batch(self, batch):
+        (text_padded, input_lengths, obs_labels,
+         mel_padded, gate_padded, output_lengths) = batch
+        text_padded = to_gpu(text_padded).long()
+        input_lengths = to_gpu(input_lengths).long()
+        obs_labels = to_gpu(obs_labels).long()
+        max_len = torch.max(input_lengths.data).item()
+        mel_padded = to_gpu(mel_padded).float()
+        gate_padded = to_gpu(gate_padded).float()
+        output_lengths = to_gpu(output_lengths).long()
+
+        return (
+            (text_padded, input_lengths, obs_labels,
+             mel_padded, max_len, output_lengths),
+            (mel_padded, gate_padded))
+
+    def parse_output(self, outputs, output_lengths=None):
+        if self.mask_padding and output_lengths is not None:
+            mask = ~get_mask_from_lengths(output_lengths)
+            mask = mask.expand(self.n_mel_channels, mask.size(0), mask.size(1))
+            mask = mask.permute(1, 0, 2)
+
+            outputs[0].data.masked_fill_(mask, 0.0)
+            outputs[1].data.masked_fill_(mask, 0.0)
+            outputs[2].data.masked_fill_(mask[:, 0, :], 1e3)  # gate energies
+
+        return outputs
+
+    def forward(self, inputs):
+        (text_inputs, text_lengths, obs_labels,
+         mels, max_len, output_lengths) = inputs
+        text_lengths, output_lengths = text_lengths.data, output_lengths.data
+
+        embedded_inputs = self.embedding(text_inputs).transpose(1, 2)
+
+        encoder_outputs = self.encoder(embedded_inputs, text_lengths)
+
+        obs = None
+        if self.obs_embedding is not None:
+            obs = self.obs_embedding(obs_labels)
+
+        lat, lat_mu, lat_logvar = None, None, None
+        if self.lat_encoder is not None:
+            (lat, lat_mu, lat_logvar) = self.lat_encoder(mels, output_lengths)
+
+        obs_and_lat = [x for x in [obs, lat] if x is not None]
+        if bool(obs_and_lat):
+            obs_and_lat = torch.cat(obs_and_lat, dim=-1)
+        else:
+            obs_and_lat = None
+
+        mel_outputs, gate_outputs, alignments = self.decoder(
+            encoder_outputs, obs_and_lat, mels, memory_lengths=text_lengths)
+
+        mel_outputs_postnet = self.postnet(mel_outputs)
+        mel_outputs_postnet = mel_outputs + mel_outputs_postnet
+
+        return self.parse_output(
+            [mel_outputs, mel_outputs_postnet, gate_outputs, alignments,
+             lat_mu, lat_logvar],
+            output_lengths)
+
+    def inference(self, inputs, obs_labels=None, lat=None, ret_has_eos=False):
+        embedded_inputs = self.embedding(inputs).transpose(1, 2)
+        encoder_outputs = self.encoder.inference(embedded_inputs)
+
+        if obs_labels is None:
+            obs_labels = torch.LongTensor(len(inputs))
+            obs_labels = obs_labels.to(inputs.device).zero_()
+
+        obs = None
+        if self.obs_embedding is not None:
+            obs = self.obs_embedding(obs_labels)
+
+        if self.lat_encoder is not None:
+            if lat is None:
+                lat = torch.FloatTensor(len(inputs), self.lat_encoder.lat_dim)
+                lat = lat.to(inputs.device).zero_().type(encoder_outputs.type())
+
+        obs_and_lat = [x for x in [obs, lat] if x is not None]
+        if bool(obs_and_lat):
+            obs_and_lat = torch.cat(obs_and_lat, dim=-1)
+        else:
+            obs_and_lat = None
+
+        mel_outputs, gate_outputs, alignments, has_eos = self.decoder.inference(
+            encoder_outputs, obs_and_lat, ret_has_eos=True)
+
+        mel_outputs_postnet = self.postnet(mel_outputs)
+        mel_outputs_postnet = mel_outputs + mel_outputs_postnet
+
+        outputs = self.parse_output(
+            [mel_outputs, mel_outputs_postnet, gate_outputs, alignments])
+
+        if ret_has_eos:
+            return outputs + [has_eos]
+        else:
+            return outputs
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/numbers.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/numbers.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d5f7fa818a45ecf132627d240afac653e148070
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/numbers.py
@@ -0,0 +1,71 @@
+""" from https://github.com/keithito/tacotron """
+
+import inflect
+import re
+
+
+_inflect = inflect.engine()
+_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
+_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
+_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
+_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
+_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
+_number_re = re.compile(r'[0-9]+')
+
+
+def _remove_commas(m):
+  return m.group(1).replace(',', '')
+
+
+def _expand_decimal_point(m):
+  return m.group(1).replace('.', ' point ')
+
+
+def _expand_dollars(m):
+  match = m.group(1)
+  parts = match.split('.')
+  if len(parts) > 2:
+    return match + ' dollars'  # Unexpected format
+  dollars = int(parts[0]) if parts[0] else 0
+  cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
+  if dollars and cents:
+    dollar_unit = 'dollar' if dollars == 1 else 'dollars'
+    cent_unit = 'cent' if cents == 1 else 'cents'
+    return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
+  elif dollars:
+    dollar_unit = 'dollar' if dollars == 1 else 'dollars'
+    return '%s %s' % (dollars, dollar_unit)
+  elif cents:
+    cent_unit = 'cent' if cents == 1 else 'cents'
+    return '%s %s' % (cents, cent_unit)
+  else:
+    return 'zero dollars'
+
+
+def _expand_ordinal(m):
+  return _inflect.number_to_words(m.group(0))
+
+
+def _expand_number(m):
+  num = int(m.group(0))
+  if num > 1000 and num < 3000:
+    if num == 2000:
+      return 'two thousand'
+    elif num > 2000 and num < 2010:
+      return 'two thousand ' + _inflect.number_to_words(num % 100)
+    elif num % 100 == 0:
+      return _inflect.number_to_words(num // 100) + ' hundred'
+    else:
+      return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
+  else:
+    return _inflect.number_to_words(num, andword='')
+
+
+def normalize_numbers(text):
+  text = re.sub(_comma_number_re, _remove_commas, text)
+  text = re.sub(_pounds_re, r'\1 pounds', text)
+  text = re.sub(_dollars_re, _expand_dollars, text)
+  text = re.sub(_decimal_number_re, _expand_decimal_point, text)
+  text = re.sub(_ordinal_re, _expand_ordinal, text)
+  text = re.sub(_number_re, _expand_number, text)
+  return text
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/stft.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/stft.py
new file mode 100644
index 0000000000000000000000000000000000000000..63fcd431e2d7746b696aaa0d4172bc04ffb88efa
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/stft.py
@@ -0,0 +1,141 @@
+"""
+BSD 3-Clause License
+
+Copyright (c) 2017, Prem Seetharaman
+All rights reserved.
+
+* Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import torch
+import numpy as np
+import torch.nn.functional as F
+from torch.autograd import Variable
+from scipy.signal import get_window
+from librosa.util import pad_center, tiny
+from .audio_processing import window_sumsquare
+
+
+class STFT(torch.nn.Module):
+    """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft"""
+    def __init__(self, filter_length=800, hop_length=200, win_length=800,
+                 window='hann'):
+        super(STFT, self).__init__()
+        self.filter_length = filter_length
+        self.hop_length = hop_length
+        self.win_length = win_length
+        self.window = window
+        self.forward_transform = None
+        scale = self.filter_length / self.hop_length
+        fourier_basis = np.fft.fft(np.eye(self.filter_length))
+
+        cutoff = int((self.filter_length / 2 + 1))
+        fourier_basis = np.vstack([np.real(fourier_basis[:cutoff, :]),
+                                   np.imag(fourier_basis[:cutoff, :])])
+
+        forward_basis = torch.FloatTensor(fourier_basis[:, None, :])
+        inverse_basis = torch.FloatTensor(
+            np.linalg.pinv(scale * fourier_basis).T[:, None, :])
+
+        if window is not None:
+            assert(filter_length >= win_length)
+            # get window and zero center pad it to filter_length
+            fft_window = get_window(window, win_length, fftbins=True)
+            fft_window = pad_center(fft_window, filter_length)
+            fft_window = torch.from_numpy(fft_window).float()
+
+            # window the bases
+            forward_basis *= fft_window
+            inverse_basis *= fft_window
+
+        self.register_buffer('forward_basis', forward_basis.float())
+        self.register_buffer('inverse_basis', inverse_basis.float())
+
+    def transform(self, input_data):
+        num_batches = input_data.size(0)
+        num_samples = input_data.size(1)
+
+        self.num_samples = num_samples
+
+        # similar to librosa, reflect-pad the input
+        input_data = input_data.view(num_batches, 1, num_samples)
+        input_data = F.pad(
+            input_data.unsqueeze(1),
+            (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0),
+            mode='reflect')
+        input_data = input_data.squeeze(1)
+
+        forward_transform = F.conv1d(
+            input_data,
+            Variable(self.forward_basis, requires_grad=False),
+            stride=self.hop_length,
+            padding=0)
+
+        cutoff = int((self.filter_length / 2) + 1)
+        real_part = forward_transform[:, :cutoff, :]
+        imag_part = forward_transform[:, cutoff:, :]
+
+        magnitude = torch.sqrt(real_part**2 + imag_part**2)
+        phase = torch.autograd.Variable(
+            torch.atan2(imag_part.data, real_part.data))
+
+        return magnitude, phase
+
+    def inverse(self, magnitude, phase):
+        recombine_magnitude_phase = torch.cat(
+            [magnitude*torch.cos(phase), magnitude*torch.sin(phase)], dim=1)
+
+        inverse_transform = F.conv_transpose1d(
+            recombine_magnitude_phase,
+            Variable(self.inverse_basis, requires_grad=False),
+            stride=self.hop_length,
+            padding=0)
+
+        if self.window is not None:
+            window_sum = window_sumsquare(
+                self.window, magnitude.size(-1), hop_length=self.hop_length,
+                win_length=self.win_length, n_fft=self.filter_length,
+                dtype=np.float32)
+            # remove modulation effects
+            approx_nonzero_indices = torch.from_numpy(
+                np.where(window_sum > tiny(window_sum))[0])
+            window_sum = torch.autograd.Variable(
+                torch.from_numpy(window_sum), requires_grad=False)
+            window_sum = window_sum.cuda() if magnitude.is_cuda else window_sum
+            inverse_transform[:, :, approx_nonzero_indices] /= window_sum[approx_nonzero_indices]
+
+            # scale by hop ratio
+            inverse_transform *= float(self.filter_length) / self.hop_length
+
+        inverse_transform = inverse_transform[:, :, int(self.filter_length/2):]
+        inverse_transform = inverse_transform[:, :, :-int(self.filter_length/2):]
+
+        return inverse_transform
+
+    def forward(self, input_data):
+        self.magnitude, self.phase = self.transform(input_data)
+        reconstruction = self.inverse(self.magnitude, self.phase)
+        return reconstruction
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/symbols.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/symbols.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f0d70fdad92ba4f554d971710b60f2f9e8d9298
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/symbols.py
@@ -0,0 +1,18 @@
+""" from https://github.com/keithito/tacotron """
+
+'''
+Defines the set of symbols used in text input to the model.
+
+The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. '''
+from . import cmudict
+
+_pad        = '_'
+_punctuation = '!\'(),.:;? '
+_special = '-'
+_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
+
+# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
+_arpabet = ['@' + s for s in cmudict.valid_symbols]
+
+# Export all symbols:
+symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/text.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/text.py
new file mode 100644
index 0000000000000000000000000000000000000000..49e2ca498bf67ad226af5de796b9f44afa65198d
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/text.py
@@ -0,0 +1,107 @@
+""" from https://github.com/keithito/tacotron """
+import numpy as np
+import re
+from . import cleaners
+from .symbols import symbols
+
+
+# Mappings from symbol to numeric ID and vice versa:
+_symbol_to_id = {s: i for i, s in enumerate(symbols)}
+_id_to_symbol = {i: s for i, s in enumerate(symbols)}
+
+# Regular expression matching text enclosed in curly braces:
+_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
+
+# Special symbols
+SOS_TOK = '<s>'
+EOS_TOK = '</s>'
+
+def text_to_sequence(text, cleaner_names):
+  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
+
+    The text can optionally have ARPAbet sequences enclosed in curly braces embedded
+    in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
+
+    Args:
+      text: string to convert to a sequence
+      cleaner_names: names of the cleaner functions to run the text through
+
+    Returns:
+      List of integers corresponding to the symbols in the text
+  '''
+  sequence = []
+
+  # Check for curly braces and treat their contents as ARPAbet:
+  while len(text):
+    m = _curly_re.match(text)
+    if not m:
+      sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
+      break
+    sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
+    sequence += _arpabet_to_sequence(m.group(2))
+    text = m.group(3)
+
+  return sequence
+
+
+def sample_code_chunk(code, size):
+    assert(size > 0 and size <= len(code))
+    start = np.random.randint(len(code) - size + 1)
+    end = start + size
+    return code[start:end], start, end
+
+
+def code_to_sequence(code, code_dict, collapse_code):
+    if collapse_code:
+        prev_c = None
+        sequence = []
+        for c in code:
+            if c in code_dict and c != prev_c:
+                sequence.append(code_dict[c])
+                prev_c = c
+    else:
+        sequence = [code_dict[c] for c in code if c in code_dict]
+        if len(sequence) < 0.95 * len(code):
+            print('WARNING : over 5%% codes are OOV')
+
+    return sequence
+
+
+def sequence_to_text(sequence):
+  '''Converts a sequence of IDs back to a string'''
+  result = ''
+  for symbol_id in sequence:
+    if symbol_id in _id_to_symbol:
+      s = _id_to_symbol[symbol_id]
+      # Enclose ARPAbet back in curly braces:
+      if len(s) > 1 and s[0] == '@':
+        s = '{%s}' % s[1:]
+      result += s
+  return result.replace('}{', ' ')
+
+
+def sequence_to_code(sequence, code_dict):
+    '''Analogous to sequence_to_text'''
+    id_to_code = {i: c for c, i in code_dict.items()}
+    return ' '.join([id_to_code[i] for i in sequence])
+
+
+def _clean_text(text, cleaner_names):
+  for name in cleaner_names:
+    cleaner = getattr(cleaners, name)
+    if not cleaner:
+      raise Exception('Unknown cleaner: %s' % name)
+    text = cleaner(text)
+  return text
+
+
+def _symbols_to_sequence(symbols):
+  return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
+
+
+def _arpabet_to_sequence(text):
+  return _symbols_to_sequence(['@' + s for s in text.split()])
+
+
+def _should_keep_symbol(s):
+  return s in _symbol_to_id and s != '_' and s != '~'
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/utils.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..66a426d2223ce75ffae6cee2131770556c5949bc
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/utils.py
@@ -0,0 +1,167 @@
+import collections
+import io
+import json
+import librosa
+import numpy as np
+import soundfile as sf
+import time
+import torch
+from scipy.io.wavfile import read
+from .text import SOS_TOK, EOS_TOK
+
+
+def get_mask_from_lengths(lengths):
+    max_len = torch.max(lengths).item()
+    ids = torch.arange(0, max_len, out=torch.cuda.LongTensor(max_len))
+    mask = (ids < lengths.unsqueeze(1))
+    return mask
+
+
+def load_wav_to_torch(full_path, sr=None):
+    data, sr = librosa.load(full_path, sr=sr)
+    data = np.clip(data, -1, 1)  # potentially out of [-1, 1] due to resampling
+    data = data * 32768.0  # match values loaded by scipy
+    return torch.FloatTensor(data.astype(np.float32)), sr
+
+
+def read_binary_audio(bin_data, tar_sr=None):
+    """
+    read binary audio (`bytes` or `uint8` `numpy.ndarray`) to `float32`
+    `numpy.ndarray`
+
+    RETURNS:
+        data (np.ndarray) : audio of shape (n,) or (2, n)
+        tar_sr (int) : sample rate
+    """
+    data, ori_sr = sf.read(io.BytesIO(bin_data), dtype='float32')
+    data = data.T
+    if (tar_sr is not None) and (ori_sr != tar_sr):
+        data = librosa.resample(data, ori_sr, tar_sr)
+    else:
+        tar_sr = ori_sr
+    data = np.clip(data, -1, 1)
+    data = data * 32768.0
+    return torch.FloatTensor(data.astype(np.float32)), tar_sr
+
+
+def load_filepaths_and_text(filename):
+    with open(filename, encoding='utf-8') as f:
+        data = [json.loads(line.rstrip()) for line in f]
+    return data
+
+
+def to_gpu(x):
+    x = x.contiguous()
+
+    if torch.cuda.is_available():
+        x = x.cuda(non_blocking=True)
+    return torch.autograd.Variable(x)
+
+
+def load_code_dict(path, add_sos=False, add_eos=False):
+    if not path:
+        return {}
+
+    with open(path, 'r') as f:
+        codes = ['_'] + [line.rstrip() for line in f]  # '_' for pad
+    code_dict = {c: i for i, c in enumerate(codes)}
+
+    if add_sos:
+        code_dict[SOS_TOK] = len(code_dict)
+    if add_eos:
+        code_dict[EOS_TOK] = len(code_dict)
+    assert(set(code_dict.values()) == set(range(len(code_dict))))
+
+    return code_dict
+
+
+def load_obs_label_dict(path):
+    if not path:
+        return {}
+    with open(path, 'r') as f:
+        obs_labels = [line.rstrip() for line in f]
+    return {c: i for i, c in enumerate(obs_labels)}
+
+
+# A simple timer class inspired from `tnt.TimeMeter`
+class CudaTimer:
+    def __init__(self, keys):
+        self.keys = keys
+        self.reset()
+
+    def start(self, key):
+        s = torch.cuda.Event(enable_timing=True)
+        s.record()
+        self.start_events[key].append(s)
+        return self
+
+    def stop(self, key):
+        e = torch.cuda.Event(enable_timing=True)
+        e.record()
+        self.end_events[key].append(e)
+        return self
+
+    def reset(self):
+        self.start_events = collections.defaultdict(list)
+        self.end_events = collections.defaultdict(list)
+        self.running_times = collections.defaultdict(float)
+        self.n = collections.defaultdict(int)
+        return self
+
+    def value(self):
+        self._synchronize()
+        return {k: self.running_times[k] / self.n[k] for k in self.keys}
+
+    def _synchronize(self):
+        torch.cuda.synchronize()
+        for k in self.keys:
+            starts = self.start_events[k]
+            ends = self.end_events[k]
+            if len(starts) == 0:
+                raise ValueError("Trying to divide by zero in TimeMeter")
+            if len(ends) != len(starts):
+                raise ValueError("Call stop before checking value!")
+            time = 0
+            for start, end in zip(starts, ends):
+                time += start.elapsed_time(end)
+            self.running_times[k] += time * 1e-3
+            self.n[k] += len(starts)
+        self.start_events = collections.defaultdict(list)
+        self.end_events = collections.defaultdict(list)
+
+
+# Used to measure the time taken for multiple events
+class Timer:
+    def __init__(self, keys):
+        self.keys = keys
+        self.n = {}
+        self.running_time = {}
+        self.total_time = {}
+        self.reset()
+
+    def start(self, key):
+        self.running_time[key] = time.time()
+        return self
+
+    def stop(self, key):
+        self.total_time[key] = time.time() - self.running_time[key]
+        self.n[key] += 1
+        self.running_time[key] = None
+        return self
+
+    def reset(self):
+        for k in self.keys:
+            self.total_time[k] = 0
+            self.running_time[k] = None
+            self.n[k] = 0
+        return self
+
+    def value(self):
+        vals = {}
+        for k in self.keys:
+            if self.n[k] == 0:
+                raise ValueError("Trying to divide by zero in TimeMeter")
+            else:
+                vals[k] = self.total_time[k] / self.n[k]
+        return vals
+
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/waveglow_denoiser.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/waveglow_denoiser.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a6585e8b6901a059445ff54ca20ea87751bbb11
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tacotron2/waveglow_denoiser.py
@@ -0,0 +1,40 @@
+# import sys
+# sys.path.append('tacotron2')
+import torch
+from .layers import STFT
+
+
+class Denoiser(torch.nn.Module):
+    """ Removes model bias from audio produced with waveglow """
+
+    def __init__(self, waveglow, filter_length=1024, n_overlap=4,
+                 win_length=1024, mode='zeros'):
+        super(Denoiser, self).__init__()
+        self.stft = STFT(filter_length=filter_length,
+                         hop_length=int(filter_length/n_overlap),
+                         win_length=win_length).cuda()
+        if mode == 'zeros':
+            mel_input = torch.zeros(
+                (1, 80, 88),
+                dtype=waveglow.upsample.weight.dtype,
+                device=waveglow.upsample.weight.device)
+        elif mode == 'normal':
+            mel_input = torch.randn(
+                (1, 80, 88),
+                dtype=waveglow.upsample.weight.dtype,
+                device=waveglow.upsample.weight.device)
+        else:
+            raise Exception("Mode {} if not supported".format(mode))
+
+        with torch.no_grad():
+            bias_audio = waveglow.infer(mel_input, sigma=0.0).float()
+            bias_spec, _ = self.stft.transform(bias_audio)
+
+        self.register_buffer('bias_spec', bias_spec[:, :, 0][:, :, None])
+
+    def forward(self, audio, strength=0.1):
+        audio_spec, audio_angles = self.stft.transform(audio.cuda().float())
+        audio_spec_denoised = audio_spec - self.bias_spec * strength
+        audio_spec_denoised = torch.clamp(audio_spec_denoised, 0.0)
+        audio_denoised = self.stft.inverse(audio_spec_denoised, audio_angles)
+        return audio_denoised
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/tts_data.py b/fairseq/examples/textless_nlp/gslm/unit2speech/tts_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb0f7c360d749fd9d489b40b04dae8652b095098
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/tts_data.py
@@ -0,0 +1,52 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+import numpy as np
+from examples.textless_nlp.gslm.unit2speech.tacotron2.text import (
+    EOS_TOK,
+    SOS_TOK,
+    code_to_sequence,
+    text_to_sequence,
+)
+from examples.textless_nlp.gslm.unit2speech.tacotron2.utils import (
+    load_code_dict,
+)
+
+
+class TacotronInputDataset:
+    def __init__(self, hparams, append_str=""):
+        self.is_text = getattr(hparams, "text_or_code", "text") == "text"
+        if not self.is_text:
+            self.code_dict = load_code_dict(hparams.code_dict)
+            self.code_key = hparams.code_key
+        self.add_sos = hparams.add_sos
+        self.add_eos = hparams.add_eos
+        self.collapse_code = hparams.collapse_code
+        self.append_str = append_str
+
+    def process_code(self, inp_str):
+        inp_toks = inp_str.split()
+        if self.add_sos:
+            inp_toks = [SOS_TOK] + inp_toks
+        if self.add_eos:
+            inp_toks = inp_toks + [EOS_TOK]
+        return code_to_sequence(inp_toks, self.code_dict, self.collapse_code)
+
+    def process_text(self, inp_str):
+        return text_to_sequence(inp_str, ["english_cleaners"])
+
+    def get_tensor(self, inp_str):
+        # uid, txt, inp_str = self._get_data(idx)
+        inp_str = inp_str + self.append_str
+        if self.is_text:
+            inp_toks = self.process_text(inp_str)
+        else:
+            inp_toks = self.process_code(inp_str)
+        return torch.from_numpy(np.array(inp_toks)).long()
+
+    def __len__(self):
+        return len(self.data)
diff --git a/fairseq/examples/textless_nlp/gslm/unit2speech/utils.py b/fairseq/examples/textless_nlp/gslm/unit2speech/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7aced08d38301b98b19e2df7d19f1c61150107bc
--- /dev/null
+++ b/fairseq/examples/textless_nlp/gslm/unit2speech/utils.py
@@ -0,0 +1,55 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+from examples.textless_nlp.gslm.unit2speech.tacotron2.model import Tacotron2
+from examples.textless_nlp.gslm.unit2speech.tacotron2.waveglow_denoiser import (
+    Denoiser,
+)
+
+
+def load_quantized_audio_from_file(file_path):
+    base_fname_batch, quantized_units_batch = [], []
+    with open(file_path) as f:
+        for line in f:
+            base_fname, quantized_units_str = line.rstrip().split("|")
+            quantized_units = [int(q) for q in quantized_units_str.split(" ")]
+            base_fname_batch.append(base_fname)
+            quantized_units_batch.append(quantized_units)
+    return base_fname_batch, quantized_units_batch
+
+
+def synthesize_audio(model, waveglow, denoiser, inp, lab=None, strength=0.0):
+    assert inp.size(0) == 1
+    inp = inp.cuda()
+    if lab is not None:
+        lab = torch.LongTensor(1).cuda().fill_(lab)
+
+    with torch.no_grad():
+        _, mel, _, ali, has_eos = model.inference(inp, lab, ret_has_eos=True)
+        aud = waveglow.infer(mel, sigma=0.666)
+        aud_dn = denoiser(aud, strength=strength).squeeze(1)
+    return mel, aud, aud_dn, has_eos
+
+
+def load_tacotron(tacotron_model_path, max_decoder_steps):
+    ckpt_dict = torch.load(tacotron_model_path)
+    hparams = ckpt_dict["hparams"]
+    hparams.max_decoder_steps = max_decoder_steps
+    sr = hparams.sampling_rate
+    model = Tacotron2(hparams)
+    model.load_state_dict(ckpt_dict["model_dict"])
+    model = model.cuda().eval().half()
+    return model, sr, hparams
+
+
+def load_waveglow(waveglow_path):
+    waveglow = torch.load(waveglow_path)["model"]
+    waveglow = waveglow.cuda().eval().half()
+    for k in waveglow.convinv:
+        k.float()
+    denoiser = Denoiser(waveglow)
+    return waveglow, denoiser
diff --git a/fairseq/examples/translation/README.md b/fairseq/examples/translation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2941f5eb8482dab61dca5eca27a71abd7ee5bf5c
--- /dev/null
+++ b/fairseq/examples/translation/README.md
@@ -0,0 +1,301 @@
+# Neural Machine Translation
+
+This README contains instructions for [using pretrained translation models](#example-usage-torchhub)
+as well as [training new models](#training-a-new-model).
+
+## Pre-trained models
+
+Model | Description | Dataset | Download
+---|---|---|---
+`conv.wmt14.en-fr` | Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.newstest2014.tar.bz2) <br> newstest2012/2013: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.ntst1213.tar.bz2)
+`conv.wmt14.en-de` | Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT14 English-German](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-de.fconv-py.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-de.newstest2014.tar.bz2)
+`conv.wmt17.en-de` | Convolutional <br> ([Gehring et al., 2017](https://arxiv.org/abs/1705.03122)) | [WMT17 English-German](http://statmt.org/wmt17/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt17.v2.en-de.fconv-py.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt17.v2.en-de.newstest2014.tar.bz2)
+`transformer.wmt14.en-fr` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT14 English-French](http://statmt.org/wmt14/translation-task.html#Download) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-fr.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt14.en-fr.joined-dict.newstest2014.tar.bz2)
+`transformer.wmt16.en-de` | Transformer <br> ([Ott et al., 2018](https://arxiv.org/abs/1806.00187)) | [WMT16 English-German](https://drive.google.com/uc?export=download&id=0B_bZck-ksdkpM25jRUN2X2UxMm8) | model: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/models/wmt16.en-de.joined-dict.transformer.tar.bz2) <br> newstest2014: <br> [download (.tar.bz2)](https://dl.fbaipublicfiles.com/fairseq/data/wmt16.en-de.joined-dict.newstest2014.tar.bz2)
+`transformer.wmt18.en-de` | Transformer <br> ([Edunov et al., 2018](https://arxiv.org/abs/1808.09381)) <br> WMT'18 winner | [WMT'18 English-German](http://www.statmt.org/wmt18/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz) <br> See NOTE in the archive
+`transformer.wmt19.en-de` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 English-German](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz)
+`transformer.wmt19.de-en` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 German-English](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz)
+`transformer.wmt19.en-ru` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 English-Russian](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz)
+`transformer.wmt19.ru-en` | Transformer <br> ([Ng et al., 2019](https://arxiv.org/abs/1907.06616)) <br> WMT'19 winner | [WMT'19 Russian-English](http://www.statmt.org/wmt19/translation-task.html) | model: <br> [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz)
+
+## Example usage (torch.hub)
+
+We require a few additional Python dependencies for preprocessing:
+```bash
+pip install fastBPE sacremoses subword_nmt
+```
+
+Interactive translation via PyTorch Hub:
+```python
+import torch
+
+# List available models
+torch.hub.list('pytorch/fairseq')  # [..., 'transformer.wmt16.en-de', ... ]
+
+# Load a transformer trained on WMT'16 En-De
+# Note: WMT'19 models use fastBPE instead of subword_nmt, see instructions below
+en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt16.en-de',
+                       tokenizer='moses', bpe='subword_nmt')
+en2de.eval()  # disable dropout
+
+# The underlying model is available under the *models* attribute
+assert isinstance(en2de.models[0], fairseq.models.transformer.TransformerModel)
+
+# Move model to GPU for faster translation
+en2de.cuda()
+
+# Translate a sentence
+en2de.translate('Hello world!')
+# 'Hallo Welt!'
+
+# Batched translation
+en2de.translate(['Hello world!', 'The cat sat on the mat.'])
+# ['Hallo Welt!', 'Die Katze saß auf der Matte.']
+```
+
+Loading custom models:
+```python
+from fairseq.models.transformer import TransformerModel
+zh2en = TransformerModel.from_pretrained(
+  '/path/to/checkpoints',
+  checkpoint_file='checkpoint_best.pt',
+  data_name_or_path='data-bin/wmt17_zh_en_full',
+  bpe='subword_nmt',
+  bpe_codes='data-bin/wmt17_zh_en_full/zh.code'
+)
+zh2en.translate('你好 世界')
+# 'Hello World'
+```
+
+If you are using a `transformer.wmt19` models, you will need to set the `bpe`
+argument to `'fastbpe'` and (optionally) load the 4-model ensemble:
+```python
+en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de',
+                       checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt',
+                       tokenizer='moses', bpe='fastbpe')
+en2de.eval()  # disable dropout
+```
+
+## Example usage (CLI tools)
+
+Generation with the binarized test sets can be run in batch mode as follows, e.g. for WMT 2014 English-French on a GTX-1080ti:
+```bash
+mkdir -p data-bin
+curl https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2 | tar xvjf - -C data-bin
+curl https://dl.fbaipublicfiles.com/fairseq/data/wmt14.v2.en-fr.newstest2014.tar.bz2 | tar xvjf - -C data-bin
+fairseq-generate data-bin/wmt14.en-fr.newstest2014  \
+    --path data-bin/wmt14.en-fr.fconv-py/model.pt \
+    --beam 5 --batch-size 128 --remove-bpe | tee /tmp/gen.out
+# ...
+# | Translated 3003 sentences (96311 tokens) in 166.0s (580.04 tokens/s)
+# | Generate test with beam=5: BLEU4 = 40.83, 67.5/46.9/34.4/25.5 (BP=1.000, ratio=1.006, syslen=83262, reflen=82787)
+
+# Compute BLEU score
+grep ^H /tmp/gen.out | cut -f3- > /tmp/gen.out.sys
+grep ^T /tmp/gen.out | cut -f2- > /tmp/gen.out.ref
+fairseq-score --sys /tmp/gen.out.sys --ref /tmp/gen.out.ref
+# BLEU4 = 40.83, 67.5/46.9/34.4/25.5 (BP=1.000, ratio=1.006, syslen=83262, reflen=82787)
+```
+
+## Training a new model
+
+### IWSLT'14 German to English (Transformer)
+
+The following instructions can be used to train a Transformer model on the [IWSLT'14 German to English dataset](http://workshop2014.iwslt.org/downloads/proceeding.pdf).
+
+First download and preprocess the data:
+```bash
+# Download and prepare the data
+cd examples/translation/
+bash prepare-iwslt14.sh
+cd ../..
+
+# Preprocess/binarize the data
+TEXT=examples/translation/iwslt14.tokenized.de-en
+fairseq-preprocess --source-lang de --target-lang en \
+    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
+    --destdir data-bin/iwslt14.tokenized.de-en \
+    --workers 20
+```
+
+Next we'll train a Transformer translation model over this data:
+```bash
+CUDA_VISIBLE_DEVICES=0 fairseq-train \
+    data-bin/iwslt14.tokenized.de-en \
+    --arch transformer_iwslt_de_en --share-decoder-input-output-embed \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
+    --lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
+    --dropout 0.3 --weight-decay 0.0001 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --max-tokens 4096 \
+    --eval-bleu \
+    --eval-bleu-args '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}' \
+    --eval-bleu-detok moses \
+    --eval-bleu-remove-bpe \
+    --eval-bleu-print-samples \
+    --best-checkpoint-metric bleu --maximize-best-checkpoint-metric
+```
+
+Finally we can evaluate our trained model:
+```bash
+fairseq-generate data-bin/iwslt14.tokenized.de-en \
+    --path checkpoints/checkpoint_best.pt \
+    --batch-size 128 --beam 5 --remove-bpe
+```
+
+### WMT'14 English to German (Convolutional)
+
+The following instructions can be used to train a Convolutional translation model on the WMT English to German dataset.
+See the [Scaling NMT README](../scaling_nmt/README.md) for instructions to train a Transformer translation model on this data.
+
+The WMT English to German dataset can be preprocessed using the `prepare-wmt14en2de.sh` script.
+By default it will produce a dataset that was modeled after [Attention Is All You Need (Vaswani et al., 2017)](https://arxiv.org/abs/1706.03762), but with additional news-commentary-v12 data from WMT'17.
+
+To use only data available in WMT'14 or to replicate results obtained in the original [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](https://arxiv.org/abs/1705.03122) paper, please use the `--icml17` option.
+
+```bash
+# Download and prepare the data
+cd examples/translation/
+# WMT'17 data:
+bash prepare-wmt14en2de.sh
+# or to use WMT'14 data:
+# bash prepare-wmt14en2de.sh --icml17
+cd ../..
+
+# Binarize the dataset
+TEXT=examples/translation/wmt17_en_de
+fairseq-preprocess \
+    --source-lang en --target-lang de \
+    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
+    --destdir data-bin/wmt17_en_de --thresholdtgt 0 --thresholdsrc 0 \
+    --workers 20
+
+# Train the model
+mkdir -p checkpoints/fconv_wmt_en_de
+fairseq-train \
+    data-bin/wmt17_en_de \
+    --arch fconv_wmt_en_de \
+    --dropout 0.2 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --optimizer nag --clip-norm 0.1 \
+    --lr 0.5 --lr-scheduler fixed --force-anneal 50 \
+    --max-tokens 4000 \
+    --save-dir checkpoints/fconv_wmt_en_de
+
+# Evaluate
+fairseq-generate data-bin/wmt17_en_de \
+    --path checkpoints/fconv_wmt_en_de/checkpoint_best.pt \
+    --beam 5 --remove-bpe
+```
+
+### WMT'14 English to French
+```bash
+# Download and prepare the data
+cd examples/translation/
+bash prepare-wmt14en2fr.sh
+cd ../..
+
+# Binarize the dataset
+TEXT=examples/translation/wmt14_en_fr
+fairseq-preprocess \
+    --source-lang en --target-lang fr \
+    --trainpref $TEXT/train --validpref $TEXT/valid --testpref $TEXT/test \
+    --destdir data-bin/wmt14_en_fr --thresholdtgt 0 --thresholdsrc 0 \
+    --workers 60
+
+# Train the model
+mkdir -p checkpoints/fconv_wmt_en_fr
+fairseq-train \
+    data-bin/wmt14_en_fr \
+    --arch fconv_wmt_en_fr \
+    --dropout 0.1 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --optimizer nag --clip-norm 0.1 \
+    --lr 0.5 --lr-scheduler fixed --force-anneal 50 \
+    --max-tokens 3000 \
+    --save-dir checkpoints/fconv_wmt_en_fr
+
+# Evaluate
+fairseq-generate \
+    data-bin/fconv_wmt_en_fr \
+    --path checkpoints/fconv_wmt_en_fr/checkpoint_best.pt \
+    --beam 5 --remove-bpe
+```
+
+## Multilingual Translation
+
+We also support training multilingual translation models. In this example we'll
+train a multilingual `{de,fr}-en` translation model using the IWSLT'17 datasets.
+
+Note that we use slightly different preprocessing here than for the IWSLT'14
+En-De data above. In particular we learn a joint BPE code for all three
+languages and use fairseq-interactive and sacrebleu for scoring the test set.
+
+```bash
+# First install sacrebleu and sentencepiece
+pip install sacrebleu sentencepiece
+
+# Then download and preprocess the data
+cd examples/translation/
+bash prepare-iwslt17-multilingual.sh
+cd ../..
+
+# Binarize the de-en dataset
+TEXT=examples/translation/iwslt17.de_fr.en.bpe16k
+fairseq-preprocess --source-lang de --target-lang en \
+    --trainpref $TEXT/train.bpe.de-en \
+    --validpref $TEXT/valid0.bpe.de-en,$TEXT/valid1.bpe.de-en,$TEXT/valid2.bpe.de-en,$TEXT/valid3.bpe.de-en,$TEXT/valid4.bpe.de-en,$TEXT/valid5.bpe.de-en \
+    --destdir data-bin/iwslt17.de_fr.en.bpe16k \
+    --workers 10
+
+# Binarize the fr-en dataset
+# NOTE: it's important to reuse the en dictionary from the previous step
+fairseq-preprocess --source-lang fr --target-lang en \
+    --trainpref $TEXT/train.bpe.fr-en \
+    --validpref $TEXT/valid0.bpe.fr-en,$TEXT/valid1.bpe.fr-en,$TEXT/valid2.bpe.fr-en,$TEXT/valid3.bpe.fr-en,$TEXT/valid4.bpe.fr-en,$TEXT/valid5.bpe.fr-en \
+    --tgtdict data-bin/iwslt17.de_fr.en.bpe16k/dict.en.txt \
+    --destdir data-bin/iwslt17.de_fr.en.bpe16k \
+    --workers 10
+
+# Train a multilingual transformer model
+# NOTE: the command below assumes 1 GPU, but accumulates gradients from
+#       8 fwd/bwd passes to simulate training on 8 GPUs
+mkdir -p checkpoints/multilingual_transformer
+CUDA_VISIBLE_DEVICES=0 fairseq-train data-bin/iwslt17.de_fr.en.bpe16k/ \
+    --max-epoch 50 \
+    --ddp-backend=legacy_ddp \
+    --task multilingual_translation --lang-pairs de-en,fr-en \
+    --arch multilingual_transformer_iwslt_de_en \
+    --share-decoders --share-decoder-input-output-embed \
+    --optimizer adam --adam-betas '(0.9, 0.98)' \
+    --lr 0.0005 --lr-scheduler inverse_sqrt \
+    --warmup-updates 4000 --warmup-init-lr '1e-07' \
+    --label-smoothing 0.1 --criterion label_smoothed_cross_entropy \
+    --dropout 0.3 --weight-decay 0.0001 \
+    --save-dir checkpoints/multilingual_transformer \
+    --max-tokens 4000 \
+    --update-freq 8
+
+# Generate and score the test set with sacrebleu
+SRC=de
+sacrebleu --test-set iwslt17 --language-pair ${SRC}-en --echo src \
+    | python scripts/spm_encode.py --model examples/translation/iwslt17.de_fr.en.bpe16k/sentencepiece.bpe.model \
+    > iwslt17.test.${SRC}-en.${SRC}.bpe
+cat iwslt17.test.${SRC}-en.${SRC}.bpe \
+    | fairseq-interactive data-bin/iwslt17.de_fr.en.bpe16k/ \
+      --task multilingual_translation --lang-pairs de-en,fr-en \
+      --source-lang ${SRC} --target-lang en \
+      --path checkpoints/multilingual_transformer/checkpoint_best.pt \
+      --buffer-size 2000 --batch-size 128 \
+      --beam 5 --remove-bpe=sentencepiece \
+    > iwslt17.test.${SRC}-en.en.sys
+grep ^H iwslt17.test.${SRC}-en.en.sys | cut -f3 \
+    | sacrebleu --test-set iwslt17 --language-pair ${SRC}-en
+```
+
+##### Argument format during inference
+
+During inference it is required to specify a single `--source-lang` and
+`--target-lang`, which indicates the inference langauge direction.
+`--lang-pairs`, `--encoder-langtok`, `--decoder-langtok` have to be set to
+the same value as training.
diff --git a/fairseq/examples/translation/prepare-iwslt14.sh b/fairseq/examples/translation/prepare-iwslt14.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2fb6643fbccb58701dcbb77d91430e68a821ba38
--- /dev/null
+++ b/fairseq/examples/translation/prepare-iwslt14.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+#
+# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh
+
+echo 'Cloning Moses github repository (for tokenization scripts)...'
+git clone https://github.com/moses-smt/mosesdecoder.git
+
+echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
+git clone https://github.com/rsennrich/subword-nmt.git
+
+SCRIPTS=mosesdecoder/scripts
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+LC=$SCRIPTS/tokenizer/lowercase.perl
+CLEAN=$SCRIPTS/training/clean-corpus-n.perl
+BPEROOT=subword-nmt/subword_nmt
+BPE_TOKENS=10000
+
+URL="http://dl.fbaipublicfiles.com/fairseq/data/iwslt14/de-en.tgz"
+GZ=de-en.tgz
+
+if [ ! -d "$SCRIPTS" ]; then
+    echo "Please set SCRIPTS variable correctly to point to Moses scripts."
+    exit
+fi
+
+src=de
+tgt=en
+lang=de-en
+prep=iwslt14.tokenized.de-en
+tmp=$prep/tmp
+orig=orig
+
+mkdir -p $orig $tmp $prep
+
+echo "Downloading data from ${URL}..."
+cd $orig
+wget "$URL"
+
+if [ -f $GZ ]; then
+    echo "Data successfully downloaded."
+else
+    echo "Data not successfully downloaded."
+    exit
+fi
+
+tar zxvf $GZ
+cd ..
+
+echo "pre-processing train data..."
+for l in $src $tgt; do
+    f=train.tags.$lang.$l
+    tok=train.tags.$lang.tok.$l
+
+    cat $orig/$lang/$f | \
+    grep -v '<url>' | \
+    grep -v '<talkid>' | \
+    grep -v '<keywords>' | \
+    sed -e 's/<title>//g' | \
+    sed -e 's/<\/title>//g' | \
+    sed -e 's/<description>//g' | \
+    sed -e 's/<\/description>//g' | \
+    perl $TOKENIZER -threads 8 -l $l > $tmp/$tok
+    echo ""
+done
+perl $CLEAN -ratio 1.5 $tmp/train.tags.$lang.tok $src $tgt $tmp/train.tags.$lang.clean 1 175
+for l in $src $tgt; do
+    perl $LC < $tmp/train.tags.$lang.clean.$l > $tmp/train.tags.$lang.$l
+done
+
+echo "pre-processing valid/test data..."
+for l in $src $tgt; do
+    for o in `ls $orig/$lang/IWSLT14.TED*.$l.xml`; do
+    fname=${o##*/}
+    f=$tmp/${fname%.*}
+    echo $o $f
+    grep '<seg id' $o | \
+        sed -e 's/<seg id="[0-9]*">\s*//g' | \
+        sed -e 's/\s*<\/seg>\s*//g' | \
+        sed -e "s/\’/\'/g" | \
+    perl $TOKENIZER -threads 8 -l $l | \
+    perl $LC > $f
+    echo ""
+    done
+done
+
+
+echo "creating train, valid, test..."
+for l in $src $tgt; do
+    awk '{if (NR%23 == 0)  print $0; }' $tmp/train.tags.de-en.$l > $tmp/valid.$l
+    awk '{if (NR%23 != 0)  print $0; }' $tmp/train.tags.de-en.$l > $tmp/train.$l
+
+    cat $tmp/IWSLT14.TED.dev2010.de-en.$l \
+        $tmp/IWSLT14.TEDX.dev2012.de-en.$l \
+        $tmp/IWSLT14.TED.tst2010.de-en.$l \
+        $tmp/IWSLT14.TED.tst2011.de-en.$l \
+        $tmp/IWSLT14.TED.tst2012.de-en.$l \
+        > $tmp/test.$l
+done
+
+TRAIN=$tmp/train.en-de
+BPE_CODE=$prep/code
+rm -f $TRAIN
+for l in $src $tgt; do
+    cat $tmp/train.$l >> $TRAIN
+done
+
+echo "learn_bpe.py on ${TRAIN}..."
+python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE
+
+for L in $src $tgt; do
+    for f in train.$L valid.$L test.$L; do
+        echo "apply_bpe.py to ${f}..."
+        python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $prep/$f
+    done
+done
diff --git a/fairseq/examples/translation/prepare-iwslt17-multilingual.sh b/fairseq/examples/translation/prepare-iwslt17-multilingual.sh
new file mode 100644
index 0000000000000000000000000000000000000000..23be87555322bc03b13e9d95951d88b1a442f97a
--- /dev/null
+++ b/fairseq/examples/translation/prepare-iwslt17-multilingual.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+SRCS=(
+    "de"
+    "fr"
+)
+TGT=en
+
+ROOT=$(dirname "$0")
+SCRIPTS=$ROOT/../../scripts
+SPM_TRAIN=$SCRIPTS/spm_train.py
+SPM_ENCODE=$SCRIPTS/spm_encode.py
+
+BPESIZE=16384
+ORIG=$ROOT/iwslt17_orig
+DATA=$ROOT/iwslt17.de_fr.en.bpe16k
+mkdir -p "$ORIG" "$DATA"
+
+TRAIN_MINLEN=1  # remove sentences with <1 BPE token
+TRAIN_MAXLEN=250  # remove sentences with >250 BPE tokens
+
+URLS=(
+    "https://wit3.fbk.eu/archive/2017-01-trnted/texts/de/en/de-en.tgz"
+    "https://wit3.fbk.eu/archive/2017-01-trnted/texts/fr/en/fr-en.tgz"
+)
+ARCHIVES=(
+    "de-en.tgz"
+    "fr-en.tgz"
+)
+VALID_SETS=(
+    "IWSLT17.TED.dev2010.de-en IWSLT17.TED.tst2010.de-en IWSLT17.TED.tst2011.de-en IWSLT17.TED.tst2012.de-en IWSLT17.TED.tst2013.de-en IWSLT17.TED.tst2014.de-en IWSLT17.TED.tst2015.de-en"
+    "IWSLT17.TED.dev2010.fr-en IWSLT17.TED.tst2010.fr-en IWSLT17.TED.tst2011.fr-en IWSLT17.TED.tst2012.fr-en IWSLT17.TED.tst2013.fr-en IWSLT17.TED.tst2014.fr-en IWSLT17.TED.tst2015.fr-en"
+)
+
+# download and extract data
+for ((i=0;i<${#URLS[@]};++i)); do
+    ARCHIVE=$ORIG/${ARCHIVES[i]}
+    if [ -f "$ARCHIVE" ]; then
+        echo "$ARCHIVE already exists, skipping download"
+    else
+        URL=${URLS[i]}
+        wget -P "$ORIG" "$URL"
+        if [ -f "$ARCHIVE" ]; then
+            echo "$URL successfully downloaded."
+        else
+            echo "$URL not successfully downloaded."
+            exit 1
+        fi
+    fi
+    FILE=${ARCHIVE: -4}
+    if [ -e "$FILE" ]; then
+        echo "$FILE already exists, skipping extraction"
+    else
+        tar -C "$ORIG" -xzvf "$ARCHIVE"
+    fi
+done
+
+echo "pre-processing train data..."
+for SRC in "${SRCS[@]}"; do
+    for LANG in "${SRC}" "${TGT}"; do
+        cat "$ORIG/${SRC}-${TGT}/train.tags.${SRC}-${TGT}.${LANG}" \
+            | grep -v '<url>' \
+            | grep -v '<talkid>' \
+            | grep -v '<keywords>' \
+            | grep -v '<speaker>' \
+            | grep -v '<reviewer' \
+            | grep -v '<translator' \
+            | grep -v '<doc' \
+            | grep -v '</doc>' \
+            | sed -e 's/<title>//g' \
+            | sed -e 's/<\/title>//g' \
+            | sed -e 's/<description>//g' \
+            | sed -e 's/<\/description>//g' \
+            | sed 's/^\s*//g' \
+            | sed 's/\s*$//g' \
+            > "$DATA/train.${SRC}-${TGT}.${LANG}"
+    done
+done
+
+echo "pre-processing valid data..."
+for ((i=0;i<${#SRCS[@]};++i)); do
+    SRC=${SRCS[i]}
+    VALID_SET=(${VALID_SETS[i]})
+    for ((j=0;j<${#VALID_SET[@]};++j)); do
+        FILE=${VALID_SET[j]}
+        for LANG in "$SRC" "$TGT"; do
+            grep '<seg id' "$ORIG/${SRC}-${TGT}/${FILE}.${LANG}.xml" \
+                | sed -e 's/<seg id="[0-9]*">\s*//g' \
+                | sed -e 's/\s*<\/seg>\s*//g' \
+                | sed -e "s/\’/\'/g" \
+                > "$DATA/valid${j}.${SRC}-${TGT}.${LANG}"
+        done
+    done
+done
+
+# learn BPE with sentencepiece
+TRAIN_FILES=$(for SRC in "${SRCS[@]}"; do echo $DATA/train.${SRC}-${TGT}.${SRC}; echo $DATA/train.${SRC}-${TGT}.${TGT}; done | tr "\n" ",")
+echo "learning joint BPE over ${TRAIN_FILES}..."
+python "$SPM_TRAIN" \
+    --input=$TRAIN_FILES \
+    --model_prefix=$DATA/sentencepiece.bpe \
+    --vocab_size=$BPESIZE \
+    --character_coverage=1.0 \
+    --model_type=bpe
+
+# encode train/valid
+echo "encoding train with learned BPE..."
+for SRC in "${SRCS[@]}"; do
+    python "$SPM_ENCODE" \
+        --model "$DATA/sentencepiece.bpe.model" \
+        --output_format=piece \
+        --inputs $DATA/train.${SRC}-${TGT}.${SRC} $DATA/train.${SRC}-${TGT}.${TGT} \
+        --outputs $DATA/train.bpe.${SRC}-${TGT}.${SRC} $DATA/train.bpe.${SRC}-${TGT}.${TGT} \
+        --min-len $TRAIN_MINLEN --max-len $TRAIN_MAXLEN
+done
+
+echo "encoding valid with learned BPE..."
+for ((i=0;i<${#SRCS[@]};++i)); do
+    SRC=${SRCS[i]}
+    VALID_SET=(${VALID_SETS[i]})
+    for ((j=0;j<${#VALID_SET[@]};++j)); do
+        python "$SPM_ENCODE" \
+            --model "$DATA/sentencepiece.bpe.model" \
+            --output_format=piece \
+            --inputs $DATA/valid${j}.${SRC}-${TGT}.${SRC} $DATA/valid${j}.${SRC}-${TGT}.${TGT} \
+            --outputs $DATA/valid${j}.bpe.${SRC}-${TGT}.${SRC} $DATA/valid${j}.bpe.${SRC}-${TGT}.${TGT}
+    done
+done
diff --git a/fairseq/examples/translation/prepare-wmt14en2de.sh b/fairseq/examples/translation/prepare-wmt14en2de.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6702c88b568c9e680b525593ff0c9fb0a474825d
--- /dev/null
+++ b/fairseq/examples/translation/prepare-wmt14en2de.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh
+
+echo 'Cloning Moses github repository (for tokenization scripts)...'
+git clone https://github.com/moses-smt/mosesdecoder.git
+
+echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
+git clone https://github.com/rsennrich/subword-nmt.git
+
+SCRIPTS=mosesdecoder/scripts
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+CLEAN=$SCRIPTS/training/clean-corpus-n.perl
+NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
+REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
+BPEROOT=subword-nmt/subword_nmt
+BPE_TOKENS=40000
+
+URLS=(
+    "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz"
+    "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz"
+    "http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz"
+    "http://data.statmt.org/wmt17/translation-task/dev.tgz"
+    "http://statmt.org/wmt14/test-full.tgz"
+)
+FILES=(
+    "training-parallel-europarl-v7.tgz"
+    "training-parallel-commoncrawl.tgz"
+    "training-parallel-nc-v12.tgz"
+    "dev.tgz"
+    "test-full.tgz"
+)
+CORPORA=(
+    "training/europarl-v7.de-en"
+    "commoncrawl.de-en"
+    "training/news-commentary-v12.de-en"
+)
+
+# This will make the dataset compatible to the one used in "Convolutional Sequence to Sequence Learning"
+# https://arxiv.org/abs/1705.03122
+if [ "$1" == "--icml17" ]; then
+    URLS[2]="http://statmt.org/wmt14/training-parallel-nc-v9.tgz"
+    FILES[2]="training-parallel-nc-v9.tgz"
+    CORPORA[2]="training/news-commentary-v9.de-en"
+    OUTDIR=wmt14_en_de
+else
+    OUTDIR=wmt17_en_de
+fi
+
+if [ ! -d "$SCRIPTS" ]; then
+    echo "Please set SCRIPTS variable correctly to point to Moses scripts."
+    exit
+fi
+
+src=en
+tgt=de
+lang=en-de
+prep=$OUTDIR
+tmp=$prep/tmp
+orig=orig
+dev=dev/newstest2013
+
+mkdir -p $orig $tmp $prep
+
+cd $orig
+
+for ((i=0;i<${#URLS[@]};++i)); do
+    file=${FILES[i]}
+    if [ -f $file ]; then
+        echo "$file already exists, skipping download"
+    else
+        url=${URLS[i]}
+        wget "$url"
+        if [ -f $file ]; then
+            echo "$url successfully downloaded."
+        else
+            echo "$url not successfully downloaded."
+            exit -1
+        fi
+        if [ ${file: -4} == ".tgz" ]; then
+            tar zxvf $file
+        elif [ ${file: -4} == ".tar" ]; then
+            tar xvf $file
+        fi
+    fi
+done
+cd ..
+
+echo "pre-processing train data..."
+for l in $src $tgt; do
+    rm $tmp/train.tags.$lang.tok.$l
+    for f in "${CORPORA[@]}"; do
+        cat $orig/$f.$l | \
+            perl $NORM_PUNC $l | \
+            perl $REM_NON_PRINT_CHAR | \
+            perl $TOKENIZER -threads 8 -a -l $l >> $tmp/train.tags.$lang.tok.$l
+    done
+done
+
+echo "pre-processing test data..."
+for l in $src $tgt; do
+    if [ "$l" == "$src" ]; then
+        t="src"
+    else
+        t="ref"
+    fi
+    grep '<seg id' $orig/test-full/newstest2014-deen-$t.$l.sgm | \
+        sed -e 's/<seg id="[0-9]*">\s*//g' | \
+        sed -e 's/\s*<\/seg>\s*//g' | \
+        sed -e "s/\’/\'/g" | \
+    perl $TOKENIZER -threads 8 -a -l $l > $tmp/test.$l
+    echo ""
+done
+
+echo "splitting train and valid..."
+for l in $src $tgt; do
+    awk '{if (NR%100 == 0)  print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/valid.$l
+    awk '{if (NR%100 != 0)  print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/train.$l
+done
+
+TRAIN=$tmp/train.de-en
+BPE_CODE=$prep/code
+rm -f $TRAIN
+for l in $src $tgt; do
+    cat $tmp/train.$l >> $TRAIN
+done
+
+echo "learn_bpe.py on ${TRAIN}..."
+python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE
+
+for L in $src $tgt; do
+    for f in train.$L valid.$L test.$L; do
+        echo "apply_bpe.py to ${f}..."
+        python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $tmp/bpe.$f
+    done
+done
+
+perl $CLEAN -ratio 1.5 $tmp/bpe.train $src $tgt $prep/train 1 250
+perl $CLEAN -ratio 1.5 $tmp/bpe.valid $src $tgt $prep/valid 1 250
+
+for L in $src $tgt; do
+    cp $tmp/bpe.test.$L $prep/test.$L
+done
diff --git a/fairseq/examples/translation/prepare-wmt14en2fr.sh b/fairseq/examples/translation/prepare-wmt14en2fr.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2ac97a5b76fab255449493488ed8bd67350a7bac
--- /dev/null
+++ b/fairseq/examples/translation/prepare-wmt14en2fr.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+# Adapted from https://github.com/facebookresearch/MIXER/blob/master/prepareData.sh
+
+echo 'Cloning Moses github repository (for tokenization scripts)...'
+git clone https://github.com/moses-smt/mosesdecoder.git
+
+echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
+git clone https://github.com/rsennrich/subword-nmt.git
+
+SCRIPTS=mosesdecoder/scripts
+TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
+CLEAN=$SCRIPTS/training/clean-corpus-n.perl
+NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
+REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
+BPEROOT=subword-nmt/subword_nmt
+BPE_TOKENS=40000
+
+URLS=(
+    "http://statmt.org/wmt13/training-parallel-europarl-v7.tgz"
+    "http://statmt.org/wmt13/training-parallel-commoncrawl.tgz"
+    "http://statmt.org/wmt13/training-parallel-un.tgz"
+    "http://statmt.org/wmt14/training-parallel-nc-v9.tgz"
+    "http://statmt.org/wmt10/training-giga-fren.tar"
+    "http://statmt.org/wmt14/test-full.tgz"
+)
+FILES=(
+    "training-parallel-europarl-v7.tgz"
+    "training-parallel-commoncrawl.tgz"
+    "training-parallel-un.tgz"
+    "training-parallel-nc-v9.tgz"
+    "training-giga-fren.tar"
+    "test-full.tgz"
+)
+CORPORA=(
+    "training/europarl-v7.fr-en"
+    "commoncrawl.fr-en"
+    "un/undoc.2000.fr-en"
+    "training/news-commentary-v9.fr-en"
+    "giga-fren.release2.fixed"
+)
+
+if [ ! -d "$SCRIPTS" ]; then
+    echo "Please set SCRIPTS variable correctly to point to Moses scripts."
+    exit
+fi
+
+src=en
+tgt=fr
+lang=en-fr
+prep=wmt14_en_fr
+tmp=$prep/tmp
+orig=orig
+
+mkdir -p $orig $tmp $prep
+
+cd $orig
+
+for ((i=0;i<${#URLS[@]};++i)); do
+    file=${FILES[i]}
+    if [ -f $file ]; then
+        echo "$file already exists, skipping download"
+    else
+        url=${URLS[i]}
+        wget "$url"
+        if [ -f $file ]; then
+            echo "$url successfully downloaded."
+        else
+            echo "$url not successfully downloaded."
+            exit -1
+        fi
+        if [ ${file: -4} == ".tgz" ]; then
+            tar zxvf $file
+        elif [ ${file: -4} == ".tar" ]; then
+            tar xvf $file
+        fi
+    fi
+done
+
+gunzip giga-fren.release2.fixed.*.gz
+cd ..
+
+echo "pre-processing train data..."
+for l in $src $tgt; do
+    rm $tmp/train.tags.$lang.tok.$l
+    for f in "${CORPORA[@]}"; do
+        cat $orig/$f.$l | \
+            perl $NORM_PUNC $l | \
+            perl $REM_NON_PRINT_CHAR | \
+            perl $TOKENIZER -threads 8 -a -l $l >> $tmp/train.tags.$lang.tok.$l
+    done
+done
+
+echo "pre-processing test data..."
+for l in $src $tgt; do
+    if [ "$l" == "$src" ]; then
+        t="src"
+    else
+        t="ref"
+    fi
+    grep '<seg id' $orig/test-full/newstest2014-fren-$t.$l.sgm | \
+        sed -e 's/<seg id="[0-9]*">\s*//g' | \
+        sed -e 's/\s*<\/seg>\s*//g' | \
+        sed -e "s/\’/\'/g" | \
+    perl $TOKENIZER -threads 8 -a -l $l > $tmp/test.$l
+    echo ""
+done
+
+echo "splitting train and valid..."
+for l in $src $tgt; do
+    awk '{if (NR%1333 == 0)  print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/valid.$l
+    awk '{if (NR%1333 != 0)  print $0; }' $tmp/train.tags.$lang.tok.$l > $tmp/train.$l
+done
+
+TRAIN=$tmp/train.fr-en
+BPE_CODE=$prep/code
+rm -f $TRAIN
+for l in $src $tgt; do
+    cat $tmp/train.$l >> $TRAIN
+done
+
+echo "learn_bpe.py on ${TRAIN}..."
+python $BPEROOT/learn_bpe.py -s $BPE_TOKENS < $TRAIN > $BPE_CODE
+
+for L in $src $tgt; do
+    for f in train.$L valid.$L test.$L; do
+        echo "apply_bpe.py to ${f}..."
+        python $BPEROOT/apply_bpe.py -c $BPE_CODE < $tmp/$f > $tmp/bpe.$f
+    done
+done
+
+perl $CLEAN -ratio 1.5 $tmp/bpe.train $src $tgt $prep/train 1 250
+perl $CLEAN -ratio 1.5 $tmp/bpe.valid $src $tgt $prep/valid 1 250
+
+for L in $src $tgt; do
+    cp $tmp/bpe.test.$L $prep/test.$L
+done
diff --git a/fairseq/examples/translation_moe/README.md b/fairseq/examples/translation_moe/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2e5c8af617f410f64ca38d29447bd05b6af8c5a8
--- /dev/null
+++ b/fairseq/examples/translation_moe/README.md
@@ -0,0 +1,89 @@
+# Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)
+
+This page includes instructions for reproducing results from the paper [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](https://arxiv.org/abs/1902.07816).
+
+## Download data
+
+First, follow the [instructions to download and preprocess the WMT'17 En-De dataset](../translation#prepare-wmt14en2desh).
+Make sure to learn a joint vocabulary by passing the `--joined-dictionary` option to `fairseq-preprocess`.
+
+## Train a model
+
+Then we can train a mixture of experts model using the `translation_moe` task.
+Use the `--method` flag to choose the MoE variant; we support hard mixtures with a learned or uniform prior (`--method hMoElp` and `hMoEup`, respectively) and soft mixures (`--method sMoElp` and `sMoEup`).
+The model is trained with online responsibility assignment and shared parameterization.
+
+The following command will train a `hMoElp` model with `3` experts:
+```bash
+fairseq-train --ddp-backend='legacy_ddp' \
+    data-bin/wmt17_en_de \
+    --max-update 100000 \
+    --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
+    --method hMoElp --mean-pool-gating-network \
+    --num-experts 3 \
+    --arch transformer_wmt_en_de --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
+    --lr-scheduler inverse_sqrt --warmup-init-lr 1e-07 --warmup-updates 4000 \
+    --lr 0.0007 \
+    --dropout 0.1 --weight-decay 0.0 --criterion cross_entropy \
+    --max-tokens 3584
+```
+
+## Translate
+
+Once a model is trained, we can generate translations from different experts using the `--gen-expert` option.
+For example, to generate from expert 0:
+```bash
+fairseq-generate data-bin/wmt17_en_de \
+    --path checkpoints/checkpoint_best.pt \
+    --beam 1 --remove-bpe \
+    --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
+    --method hMoElp --mean-pool-gating-network \
+    --num-experts 3 \
+    --gen-expert 0
+```
+
+## Evaluate
+
+First download a tokenized version of the WMT'14 En-De test set with multiple references:
+```bash
+wget dl.fbaipublicfiles.com/fairseq/data/wmt14-en-de.extra_refs.tok
+```
+
+Next apply BPE on the fly and run generation for each expert:
+```bash
+BPE_CODE=examples/translation/wmt17_en_de/code
+for EXPERT in $(seq 0 2); do \
+    cat wmt14-en-de.extra_refs.tok \
+    | grep ^S | cut -f 2 \
+    | fairseq-interactive data-bin/wmt17_en_de \
+        --path checkpoints/checkpoint_best.pt \
+        --beam 1 \
+        --bpe subword_nmt --bpe-codes $BPE_CODE \
+        --buffer-size 500 --max-tokens 6000 \
+        --task translation_moe --user-dir examples/translation_moe/translation_moe_src \
+        --method hMoElp --mean-pool-gating-network \
+        --num-experts 3 \
+        --gen-expert $EXPERT ; \
+done > wmt14-en-de.extra_refs.tok.gen.3experts
+```
+
+Finally use `score_moe.py` to compute pairwise BLUE and average oracle BLEU:
+```bash
+python examples/translation_moe/score.py --sys wmt14-en-de.extra_refs.tok.gen.3experts --ref wmt14-en-de.extra_refs.tok
+# pairwise BLEU: 48.26
+# #refs covered: 2.11
+# multi-reference BLEU (leave-one-out): 59.46
+```
+This matches row 3 from Table 7 in the paper.
+
+## Citation
+
+```bibtex
+@article{shen2019mixture,
+  title = {Mixture Models for Diverse Machine Translation: Tricks of the Trade},
+  author = {Tianxiao Shen and Myle Ott and Michael Auli and Marc'Aurelio Ranzato},
+  journal = {International Conference on Machine Learning},
+  year = 2019,
+}
+```
diff --git a/fairseq/examples/translation_moe/score.py b/fairseq/examples/translation_moe/score.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a529a985019710ea202cb6bf28ae071c0ce4135
--- /dev/null
+++ b/fairseq/examples/translation_moe/score.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Scoring script for computing pairwise BLEU and multi-ref BLEU over a set of
+candidate hypotheses.
+
+See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade"
+(Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_.
+"""
+
+import argparse
+import random
+import sys
+from itertools import chain
+
+import numpy as np
+from sacrebleu import compute_bleu, corpus_bleu as _corpus_bleu
+
+
+def main():
+    parser = argparse.ArgumentParser(sys.argv[0])
+    parser.add_argument(
+        "--sys", nargs="*", default="", metavar="FILE", help="path to system output"
+    )
+    parser.add_argument("--ref", default="", metavar="FILE", help="path to references")
+    parser.add_argument(
+        "--output",
+        default="",
+        metavar="FILE",
+        help="print outputs into a pretty format",
+    )
+    args = parser.parse_args()
+
+    if args.sys:
+        src, tgt, hypos, log_probs = load_sys(args.sys)
+        print("pairwise BLEU: %.2f" % pairwise(hypos))
+        if args.output:
+            merge(src, tgt, hypos, log_probs, args.output)
+
+    if args.ref:
+        _, _, refs = load_ref(args.ref)
+        if args.sys:
+            multi_ref(refs, hypos)
+        else:
+            intra_ref(refs)
+
+
+def dictolist(d):
+    a = sorted(d.items(), key=lambda i: i[0])
+    return [i[1] for i in a]
+
+
+def load_sys(paths):
+    src, tgt, hypos, log_probs = {}, {}, {}, {}
+    for path in paths:
+        with open(path) as f:
+            for line in f:
+                line = line.rstrip()
+                # S: source
+                # T: target
+                # D: detokenized system output
+                if line.startswith(("S-", "T-", "D-")):
+                    i = int(line[line.find("-") + 1 : line.find("\t")])
+                    if line.startswith("S-"):
+                        src[i] = line.split("\t")[1]
+                    if line.startswith("T-"):
+                        tgt[i] = line.split("\t")[1]
+                    if line.startswith("D-"):
+                        if i not in hypos:
+                            hypos[i] = []
+                            log_probs[i] = []
+                        hypos[i].append(line.split("\t")[2])
+                        log_probs[i].append(float(line.split("\t")[1]))
+    return dictolist(src), dictolist(tgt), dictolist(hypos), dictolist(log_probs)
+
+
+def load_ref(path):
+    with open(path) as f:
+        lines = f.readlines()
+    src, tgt, refs = [], [], []
+    i = 0
+    while i < len(lines):
+        if lines[i].startswith("S-"):
+            src.append(lines[i].split("\t")[1].rstrip())
+            i += 1
+        elif lines[i].startswith("T-"):
+            tgt.append(lines[i].split("\t")[1].rstrip())
+            i += 1
+        else:
+            a = []
+            while i < len(lines) and lines[i].startswith("R"):
+                a.append(lines[i].split("\t")[1].rstrip())
+                i += 1
+            refs.append(a)
+    return src, tgt, refs
+
+
+def merge(src, tgt, hypos, log_probs, path):
+    with open(path, "w") as f:
+        for s, t, hs, lps in zip(src, tgt, hypos, log_probs):
+            f.write(s + "\n")
+            f.write(t + "\n")
+            f.write("\n")
+            for h, lp in zip(hs, lps):
+                f.write("\t%f\t%s\n" % (lp, h.strip()))
+            f.write("------------------------------------------------------\n")
+
+
+def corpus_bleu(sys_stream, ref_streams):
+    bleu = _corpus_bleu(sys_stream, ref_streams, tokenize="none")
+    return bleu.score
+
+
+def sentence_bleu(hypothesis, reference):
+    bleu = _corpus_bleu(hypothesis, reference)
+    for i in range(1, 4):
+        bleu.counts[i] += 1
+        bleu.totals[i] += 1
+    bleu = compute_bleu(
+        bleu.counts,
+        bleu.totals,
+        bleu.sys_len,
+        bleu.ref_len,
+        smooth_method="exp",
+    )
+    return bleu.score
+
+
+def pairwise(sents):
+    _ref, _hypo = [], []
+    for s in sents:
+        for i in range(len(s)):
+            for j in range(len(s)):
+                if i != j:
+                    _ref.append(s[i])
+                    _hypo.append(s[j])
+    return corpus_bleu(_hypo, [_ref])
+
+
+def multi_ref(refs, hypos):
+    _ref, _hypo = [], []
+    ref_cnt = 0
+    assert len(refs) == len(hypos)
+
+    # count number of refs covered
+    for rs, hs in zip(refs, hypos):
+        a = set()
+        for h in hs:
+            s = [sentence_bleu(h, r) for r in rs]
+            j = np.argmax(s)
+            _ref.append(rs[j])
+            _hypo.append(h)
+            best = [k for k in range(len(rs)) if s[k] == s[j]]
+            a.add(random.choice(best))
+        ref_cnt += len(a)
+    print("#refs covered: %.2f" % (ref_cnt / len(refs)))
+
+    # transpose refs and hypos
+    refs = list(zip(*refs))
+    hypos = list(zip(*hypos))
+
+    # compute multi-ref corpus BLEU (leave-one-out to be comparable to intra_ref)
+    k = len(hypos)
+    m = len(refs)
+    flat_hypos = [hypos[j][i] for i in range(len(hypos[0])) for j in range(k)]
+    duplicated_refs = [[ref for ref in refs_i for _ in range(k)] for refs_i in refs]
+    loo_bleus = []
+    for held_out_ref in range(m):
+        remaining_refs = (
+            duplicated_refs[:held_out_ref] + duplicated_refs[held_out_ref + 1 :]
+        )
+        assert len(remaining_refs) == m - 1
+        loo_bleus.append(corpus_bleu(flat_hypos, remaining_refs))
+    print("average multi-reference BLEU (leave-one-out): %.2f" % np.mean(loo_bleus))
+
+
+def intra_ref(refs):
+    print("ref pairwise BLEU: %.2f" % pairwise(refs))
+    refs = list(zip(*refs))
+    m = len(refs)
+    concat_h = []
+    concat_rest = [[] for j in range(m - 1)]
+    for i, h in enumerate(refs):
+        rest = refs[:i] + refs[i + 1 :]
+        concat_h.append(h)
+        for j in range(m - 1):
+            concat_rest[j].extend(rest[j])
+    concat_h = list(chain.from_iterable(concat_h))
+    bleu = corpus_bleu(concat_h, concat_rest)
+    print("multi-reference BLEU (leave-one-out): %.2f" % bleu)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/translation_moe/translation_moe_src/__init__.py b/fairseq/examples/translation_moe/translation_moe_src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0abe53e973b4bb31cfb062708965d002c79b6e7
--- /dev/null
+++ b/fairseq/examples/translation_moe/translation_moe_src/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import translation_moe  # noqa
diff --git a/fairseq/examples/translation_moe/translation_moe_src/logsumexp_moe.py b/fairseq/examples/translation_moe/translation_moe_src/logsumexp_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb299daecbc2b15fb66555bbfb8d1d983e481518
--- /dev/null
+++ b/fairseq/examples/translation_moe/translation_moe_src/logsumexp_moe.py
@@ -0,0 +1,26 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+class LogSumExpMoE(torch.autograd.Function):
+    """Standard LogSumExp forward pass, but use *posterior* for the backward.
+
+    See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade"
+    (Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_.
+    """
+
+    @staticmethod
+    def forward(ctx, logp, posterior, dim=-1):
+        ctx.save_for_backward(posterior)
+        ctx.dim = dim
+        return torch.logsumexp(logp, dim=dim)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        (posterior,) = ctx.saved_tensors
+        grad_logp = grad_output.unsqueeze(ctx.dim) * posterior
+        return grad_logp, None, None
diff --git a/fairseq/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py b/fairseq/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py
new file mode 100644
index 0000000000000000000000000000000000000000..efc7ae40bf8fed6c2384cbc6f94477c4caa4c10c
--- /dev/null
+++ b/fairseq/examples/translation_moe/translation_moe_src/mean_pool_gating_network.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn.functional as F
+
+
+class MeanPoolGatingNetwork(torch.nn.Module):
+    """A simple mean-pooling gating network for selecting experts.
+
+    This module applies mean pooling over an encoder's output and returns
+    reponsibilities for each expert. The encoder format is expected to match
+    :class:`fairseq.models.transformer.TransformerEncoder`.
+    """
+
+    def __init__(self, embed_dim, num_experts, dropout=None):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_experts = num_experts
+
+        self.fc1 = torch.nn.Linear(embed_dim, embed_dim)
+        self.dropout = torch.nn.Dropout(dropout) if dropout is not None else None
+        self.fc2 = torch.nn.Linear(embed_dim, num_experts)
+
+    def forward(self, encoder_out):
+        if not (
+            "encoder_out" in encoder_out
+            and "encoder_padding_mask" in encoder_out
+            and encoder_out["encoder_out"][0].size(2) == self.embed_dim
+        ):
+            raise ValueError("Unexpected format for encoder_out")
+
+        # mean pooling over time
+        encoder_padding_mask = encoder_out["encoder_padding_mask"][0]  # B x T
+        encoder_out = encoder_out["encoder_out"][0].transpose(0, 1)    # B x T x C
+        if encoder_padding_mask is not None:
+            encoder_out = encoder_out.clone()  # required because of transpose above
+            encoder_out[encoder_padding_mask] = 0
+            ntokens = torch.sum(~encoder_padding_mask, dim=1, keepdim=True)
+            x = torch.sum(encoder_out, dim=1) / ntokens.type_as(encoder_out)
+        else:
+            x = torch.mean(encoder_out, dim=1)
+
+        x = torch.tanh(self.fc1(x))
+        if self.dropout is not None:
+            x = self.dropout(x)
+        x = self.fc2(x)
+        return F.log_softmax(x, dim=-1, dtype=torch.float32).type_as(x)
diff --git a/fairseq/examples/translation_moe/translation_moe_src/translation_moe.py b/fairseq/examples/translation_moe/translation_moe_src/translation_moe.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f28c32dd6152f53d6922cdfccfa903e0bdc5829
--- /dev/null
+++ b/fairseq/examples/translation_moe/translation_moe_src/translation_moe.py
@@ -0,0 +1,258 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+import torch
+from omegaconf import II
+
+from fairseq import metrics, utils
+from fairseq.dataclass import ChoiceEnum
+from fairseq.tasks import register_task
+from fairseq.tasks.translation import TranslationConfig, TranslationTask
+
+from .logsumexp_moe import LogSumExpMoE
+from .mean_pool_gating_network import MeanPoolGatingNetwork
+
+
+METHOD_CHOICES = ChoiceEnum(["sMoElp", "sMoEup", "hMoElp", "hMoEup"])
+
+
+@dataclass
+class TranslationMoEConfig(TranslationConfig):
+    method: METHOD_CHOICES = field(
+        default="hMoEup",
+        metadata={"help": "MoE method"},
+    )
+    num_experts: int = field(
+        default=3,
+        metadata={"help": "number of experts"},
+    )
+    mean_pool_gating_network: bool = field(
+        default=False,
+        metadata={"help": "use a simple mean-pooling gating network"},
+    )
+    mean_pool_gating_network_dropout: float = field(
+        default=0,
+        metadata={"help": "dropout for mean-pooling gating network"},
+    )
+    mean_pool_gating_network_encoder_dim: int = field(
+        default=0,
+        metadata={"help": "encoder output dim for mean-pooling gating network"},
+    )
+    gen_expert: int = field(
+        default=0,
+        metadata={"help": "which expert to use for generation"},
+    )
+    sentence_avg: bool = II("optimization.sentence_avg")
+
+
+@register_task("translation_moe", dataclass=TranslationMoEConfig)
+class TranslationMoETask(TranslationTask):
+    """
+    Translation task for Mixture of Experts (MoE) models.
+
+    See `"Mixture Models for Diverse Machine Translation: Tricks of the Trade"
+    (Shen et al., 2019) <https://arxiv.org/abs/1902.07816>`_.
+
+    Args:
+        src_dict (~fairseq.data.Dictionary): dictionary for the source language
+        tgt_dict (~fairseq.data.Dictionary): dictionary for the target language
+
+    .. note::
+
+        The translation task is compatible with :mod:`fairseq-train`,
+        :mod:`fairseq-generate` and :mod:`fairseq-interactive`.
+
+    The translation task provides the following additional command-line
+    arguments:
+
+    .. argparse::
+        :ref: fairseq.tasks.translation_parser
+        :prog:
+    """
+
+    cfg: TranslationMoEConfig
+
+    def __init__(self, cfg: TranslationMoEConfig, src_dict, tgt_dict):
+        if cfg.method == "sMoElp":
+            # soft MoE with learned prior
+            self.uniform_prior = False
+            self.hard_selection = False
+        elif cfg.method == "sMoEup":
+            # soft MoE with uniform prior
+            self.uniform_prior = True
+            self.hard_selection = False
+        elif cfg.method == "hMoElp":
+            # hard MoE with learned prior
+            self.uniform_prior = False
+            self.hard_selection = True
+        elif cfg.method == "hMoEup":
+            # hard MoE with uniform prior
+            self.uniform_prior = True
+            self.hard_selection = True
+
+        # add indicator tokens for each expert
+        for i in range(cfg.num_experts):
+            # add to both dictionaries in case we're sharing embeddings
+            src_dict.add_symbol("<expert_{}>".format(i))
+            tgt_dict.add_symbol("<expert_{}>".format(i))
+
+        super().__init__(cfg, src_dict, tgt_dict)
+
+    def build_model(self, cfg):
+        from fairseq import models
+
+        model = models.build_model(cfg, self)
+        if not self.uniform_prior and not hasattr(model, "gating_network"):
+            if self.cfg.mean_pool_gating_network:
+                if self.cfg.mean_pool_gating_network_encoder_dim > 0:
+                    encoder_dim = self.cfg.mean_pool_gating_network_encoder_dim
+                elif getattr(cfg, "encoder_embed_dim", None):
+                    # assume that encoder_embed_dim is the encoder's output dimension
+                    encoder_dim = cfg.encoder_embed_dim
+                else:
+                    raise ValueError(
+                        "Must specify --mean-pool-gating-network-encoder-dim"
+                    )
+
+                if self.cfg.mean_pool_gating_network_dropout > 0:
+                    dropout = self.cfg.mean_pool_gating_network_dropout
+                elif getattr(cfg, "dropout", None):
+                    dropout = cfg.dropout
+                else:
+                    raise ValueError("Must specify task.mean_pool_gating_network_dropout")
+
+                model.gating_network = MeanPoolGatingNetwork(
+                    encoder_dim,
+                    self.cfg.num_experts,
+                    dropout,
+                )
+            else:
+                raise ValueError(
+                    "translation_moe task with learned prior requires the model to "
+                    "have a gating network; try using --mean-pool-gating-network"
+                )
+        return model
+
+    def expert_index(self, i):
+        return i + self.tgt_dict.index("<expert_0>")
+
+    def _get_loss(self, sample, model, criterion):
+        assert hasattr(
+            criterion, "compute_loss"
+        ), "translation_moe task requires the criterion to implement the compute_loss() method"
+
+        k = self.cfg.num_experts
+        bsz = sample["target"].size(0)
+
+        def get_lprob_y(encoder_out, prev_output_tokens_k):
+            net_output = model.decoder(
+                prev_output_tokens=prev_output_tokens_k,
+                encoder_out=encoder_out,
+            )
+            loss, _ = criterion.compute_loss(model, net_output, sample, reduce=False)
+            loss = loss.view(bsz, -1)
+            return -loss.sum(dim=1, keepdim=True)  # -> B x 1
+
+        def get_lprob_yz(winners=None):
+            encoder_out = model.encoder(
+                src_tokens=sample["net_input"]["src_tokens"],
+                src_lengths=sample["net_input"]["src_lengths"],
+            )
+
+            if winners is None:
+                lprob_y = []
+                for i in range(k):
+                    prev_output_tokens_k = sample["net_input"][
+                        "prev_output_tokens"
+                    ].clone()
+                    assert not prev_output_tokens_k.requires_grad
+                    prev_output_tokens_k[:, 0] = self.expert_index(i)
+                    lprob_y.append(get_lprob_y(encoder_out, prev_output_tokens_k))
+                lprob_y = torch.cat(lprob_y, dim=1)  # -> B x K
+            else:
+                prev_output_tokens_k = sample["net_input"]["prev_output_tokens"].clone()
+                prev_output_tokens_k[:, 0] = self.expert_index(winners)
+                lprob_y = get_lprob_y(encoder_out, prev_output_tokens_k)  # -> B
+
+            if self.uniform_prior:
+                lprob_yz = lprob_y
+            else:
+                lprob_z = model.gating_network(encoder_out)  # B x K
+                if winners is not None:
+                    lprob_z = lprob_z.gather(dim=1, index=winners.unsqueeze(-1))
+                lprob_yz = lprob_y + lprob_z.type_as(lprob_y)  # B x K
+
+            return lprob_yz
+
+        # compute responsibilities without dropout
+        with utils.model_eval(model):  # disable dropout
+            with torch.no_grad():  # disable autograd
+                lprob_yz = get_lprob_yz()  # B x K
+                prob_z_xy = torch.nn.functional.softmax(lprob_yz, dim=1)
+        assert not prob_z_xy.requires_grad
+
+        # compute loss with dropout
+        if self.hard_selection:
+            winners = prob_z_xy.max(dim=1)[1]
+            loss = -get_lprob_yz(winners)
+        else:
+            lprob_yz = get_lprob_yz()  # B x K
+            loss = -LogSumExpMoE.apply(lprob_yz, prob_z_xy, 1)
+
+        loss = loss.sum()
+        sample_size = (
+            sample["target"].size(0) if self.cfg.sentence_avg else sample["ntokens"]
+        )
+        logging_output = {
+            "loss": utils.item(loss.data),
+            "ntokens": sample["ntokens"],
+            "nsentences": bsz,
+            "sample_size": sample_size,
+            "posterior": prob_z_xy.float().sum(dim=0).cpu(),
+        }
+        return loss, sample_size, logging_output
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+        model.train()
+        loss, sample_size, logging_output = self._get_loss(sample, model, criterion)
+        if ignore_grad:
+            loss *= 0
+        optimizer.backward(loss)
+        return loss, sample_size, logging_output
+
+    def valid_step(self, sample, model, criterion):
+        model.eval()
+        with torch.no_grad():
+            loss, sample_size, logging_output = self._get_loss(sample, model, criterion)
+        return loss, sample_size, logging_output
+
+    def inference_step(
+        self,
+        generator,
+        models,
+        sample,
+        prefix_tokens=None,
+        expert=None,
+        constraints=None,
+    ):
+        expert = expert or self.cfg.gen_expert
+        with torch.no_grad():
+            return generator.generate(
+                models,
+                sample,
+                prefix_tokens=prefix_tokens,
+                constraints=constraints,
+                bos_token=self.expert_index(expert),
+            )
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+        metrics.log_scalar(
+            "posterior",
+            sum(log["posterior"] for log in logging_outputs if "posterior" in log),
+        )
diff --git a/fairseq/examples/truncated_bptt/README.md b/fairseq/examples/truncated_bptt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..86518c9d5ef09fbd4fed1512a52e9431b74f08fa
--- /dev/null
+++ b/fairseq/examples/truncated_bptt/README.md
@@ -0,0 +1,70 @@
+# Truncated Backpropagation Through Time (BPTT)
+
+Truncated BPTT is a useful technique for training language models on very long
+sequences. Typically a long sequences is split into chunks and a language model
+is trained over the chunks sequentially. The LM may condition on previous
+chunks, but gradients only flow through the current chunk. This technique was
+the basis for the paper: [Transformer-XL: Attentive Language Models Beyond a
+Fixed-Length Context](https://arxiv.org/abs/1901.02860), which achieved
+state-of-the-art language modeling results at the time of publication.
+
+It is slightly tricky to implement Truncated BPTT efficiently in fairseq, since
+we need to iterate over the data sequentially and disable any batch shuffling
+logic. The code provided in this example illustrates how to implement Truncated
+BPTT in fairseq by overriding ``FairseqTask::get_batch_iterator`` to iterate
+over the data sequentially. Crucially, this example supports batching and
+multi-GPU (data parallel) training.
+
+##### 0. Setup
+
+First, see the general [language modeling README](README.md) for instructions on
+preprocessing the WikiText-103 data.
+
+##### 1. Train a Transformer-XL model on WikiText-103
+
+We will train a 16-layer Transformer-XL model following the [hyperparameters
+used in the original
+paper](https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/run_wt103_base.sh).
+
+The following command assumes 4 GPUs, so that the total batch size is 60
+sequences (15 x 4). Training should take ~24 hours on 4 V100 GPUs:
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train \
+    --user-dir examples/truncated_bptt \
+    data-bin/wikitext-103/ \
+    --task truncated_bptt_lm --tokens-per-sample 150 \
+    --batch-size 15 --max-update 200000 \
+    --arch transformer_xl --n-layer 16 --d-model 410 --n-head 10 \
+    --d-head 41 --d-inner 2100 --dropout 0.1 --dropatt 0.0 --mem-len 150 \
+    --optimizer adam --clip-norm 0.25 \
+    --lr-scheduler cosine --warmup-updates 0 --min-lr 0.0 --lr 0.00025  \
+    --log-format json --log-interval 25 \
+    --fp16
+```
+
+If training on a single GPU, set `--update-freq=4` to accumulate 4x gradients
+and simulate training on 4 GPUs.
+
+##### 2. Evaluate
+
+```bash
+fairseq-eval-lm data-bin/wikitext-103/ \
+    --path checkpoints/checkpoint_best.pt \
+    --user-dir examples/truncated_bptt/ \
+    --task truncated_bptt_lm \
+    --batch-size 1 --required-batch-size-multiple 1 \
+    --model-overrides '{"mem_len":640,"clamp_len":400,"same_length":True}' \
+    --tokens-per-sample 64
+# ... | INFO | fairseq_cli.eval_lm | num. model params: 151123537
+# ... | INFO | fairseq_cli.eval_lm | Evaluated 245569 tokens in 83.1s (2956.82 tokens/s)
+# ... | INFO | fairseq_cli.eval_lm | Loss (base 2): 4.5668, Perplexity: 23.70
+# Compare to 24.0 test perplexity from the paper
+```
+
+*Note:* During training the model saw 150 tokens of context
+(``--tokens-per-sample=150``) and 150 extra memory tokens (``--mem-len=150``).
+During evaluation we measure perplexity on sequences of 64 tokens
+(``--tokens-per-sample=64``) and increase the memory length
+(``--model-overrides='{"mem_len":640}'``). These settings match the evaluation
+settings from [the original
+paper](https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/run_wt103_base.sh).
diff --git a/fairseq/examples/truncated_bptt/__init__.py b/fairseq/examples/truncated_bptt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..eee484d427a68828462469d133144a8d7c052c40
--- /dev/null
+++ b/fairseq/examples/truncated_bptt/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import transformer_xl_model, truncated_bptt_lm_task  # noqa
diff --git a/fairseq/examples/truncated_bptt/transformer_xl_model.py b/fairseq/examples/truncated_bptt/transformer_xl_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6c8b25a07276c2ee30c0aa5f0e4b0a2837ed5ca
--- /dev/null
+++ b/fairseq/examples/truncated_bptt/transformer_xl_model.py
@@ -0,0 +1,155 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+import torch
+from fairseq.dataclass import FairseqDataclass
+from fairseq.models import (
+    FairseqIncrementalDecoder,
+    FairseqLanguageModel,
+    register_model,
+)
+from fairseq.modules.checkpoint_activations import checkpoint_wrapper
+from omegaconf import II
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TransformerXLConfig(FairseqDataclass):
+    # defaults come from the original Transformer-XL code
+    cutoffs: List[int] = field(default_factory=lambda: [20000, 40000, 200000])
+    d_model: int = 500
+    n_head: int = 10
+    d_head: int = 50
+    d_inner: int = 1000
+    div_val: int = 1
+    n_layer: int = 12
+    mem_len: int = 0
+    clamp_len: int = -1
+    same_length: bool = False
+    dropout: float = 0.0
+    dropatt: float = 0.0
+    checkpoint_activations: bool = False
+    offload_activations: bool = False
+    max_target_positions: int = II("task.max_target_positions")
+
+
+@register_model("transformer_xl", dataclass=TransformerXLConfig)
+class TransformerXLLanguageModel(FairseqLanguageModel):
+    @classmethod
+    def build_model(cls, cfg: TransformerXLConfig, task):
+        return cls(TransformerXLDecoder(cfg, task))
+
+
+class TransformerXLDecoder(FairseqIncrementalDecoder):
+    def __init__(self, cfg, task):
+        try:
+            from transformers.models.transfo_xl import (
+                TransfoXLConfig,
+                TransfoXLLMHeadModel,
+            )
+        except ImportError:
+            from transformers.configuration_transfo_xl import TransfoXLConfig
+            from transformers.modeling_transfo_xl import TransfoXLLMHeadModel
+
+        super().__init__(task.target_dictionary)
+        self.cfg = cfg
+
+        # remove any cutoffs larger than the vocab size
+        cutoffs = [
+            cutoff for cutoff in cfg.cutoffs if cutoff < len(task.target_dictionary)
+        ]
+
+        config = TransfoXLConfig(
+            vocab_size=len(task.target_dictionary),
+            cutoffs=cutoffs,
+            d_model=cfg.d_model,
+            d_embed=cfg.d_model,
+            n_head=cfg.n_head,
+            d_head=cfg.d_head,
+            d_inner=cfg.d_inner,
+            div_val=cfg.div_val,
+            n_layer=cfg.n_layer,
+            mem_len=cfg.mem_len,
+            clamp_len=cfg.clamp_len,
+            same_length=cfg.same_length,
+            dropout=cfg.dropout,
+            dropatt=cfg.dropatt,
+        )
+        logger.info(config)
+        self.model = TransfoXLLMHeadModel(config)
+
+        # Workaround a bug in huggingface's ``ProjectedAdaptiveLogSoftmax``
+        # which adds ``None`` values to an ``nn.ParameterList``, which is not
+        # supported in PyTorch. Instead we can replace this with an
+        # ``nn.ModuleList``, which does support ``None`` values.
+        try:
+            if all(p is None for p in self.model.crit.out_projs._parameters.values()):
+                self.model.crit.out_projs = torch.nn.ModuleList(
+                    [None] * len(self.model.crit.out_projs._parameters)
+                )
+        except Exception:
+            pass
+
+        if cfg.checkpoint_activations or cfg.offload_activations:
+            for i in range(len(self.model.transformer.layers)):
+                self.model.transformer.layers[i] = checkpoint_wrapper(
+                    self.model.transformer.layers[i],
+                    offload_to_cpu=cfg.offload_activations,
+                )
+                # TODO: may save mem to wrap(layer.pos_ff.CoreNet[3])
+
+        self._mems = None
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths=None,  # unused
+        incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None,
+        encoder_out=None,
+    ):
+        if incremental_state is not None:  # used during inference
+            mems = self.get_incremental_state(incremental_state, "mems")
+            src_tokens = src_tokens[:, -1:]  # only keep the most recent token
+        else:
+            mems = self._mems
+
+        output = self.model(
+            input_ids=src_tokens,
+            mems=mems,
+            return_dict=False,
+        )
+
+        if len(output) >= 2:
+            if incremental_state is not None:
+                self.set_incremental_state(incremental_state, "mems", output[1])
+            else:
+                self._mems = output[1]
+
+        return (output[0],)
+
+    def max_positions(self):
+        return self.cfg.max_target_positions
+
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[torch.Tensor]]],
+        new_order: torch.Tensor,
+    ):
+        """Reorder incremental state.
+
+        This will be called when the order of the input has changed from the
+        previous time step. A typical use case is beam search, where the input
+        order changes between time steps based on the selection of beams.
+        """
+        mems = self.get_incremental_state(incremental_state, "mems")
+        if mems is not None:
+            new_mems = [mems_i.index_select(1, new_order) for mems_i in mems]
+            self.set_incremental_state(incremental_state, "mems", new_mems)
diff --git a/fairseq/examples/truncated_bptt/truncated_bptt_lm_task.py b/fairseq/examples/truncated_bptt/truncated_bptt_lm_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..02be0e7fb4213b98798c85b79e9046e9990b97fc
--- /dev/null
+++ b/fairseq/examples/truncated_bptt/truncated_bptt_lm_task.py
@@ -0,0 +1,281 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple
+
+import torch
+from fairseq import utils
+from fairseq.data import (
+    Dictionary,
+    TokenBlockDataset,
+    data_utils,
+    iterators,
+)
+from fairseq.dataclass import FairseqDataclass
+from fairseq.distributed import utils as dist_utils
+from fairseq.tasks import FairseqTask, register_task
+from omegaconf import II
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TruncatedBPTTLMConfig(FairseqDataclass):
+    data: str = field(default="???", metadata={"help": "path to data directory"})
+    tokens_per_sample: int = field(
+        default=1024,
+        metadata={"help": "max number of tokens per sequence"},
+    )
+    batch_size: int = II("dataset.batch_size")
+    # Some models use *max_target_positions* to know how many positional
+    # embeddings to learn. We use II(...) to make it default to
+    # *tokens_per_sample*, but in principle there could be more positional
+    # embeddings than tokens in a single batch. This may also be irrelevant for
+    # custom model implementations.
+    max_target_positions: int = II("task.tokens_per_sample")
+    # these will be populated automatically if not provided
+    data_parallel_rank: Optional[int] = None
+    data_parallel_size: Optional[int] = None
+
+
+@register_task("truncated_bptt_lm", dataclass=TruncatedBPTTLMConfig)
+class TruncatedBPTTLMTask(FairseqTask):
+    def __init__(self, cfg: TruncatedBPTTLMConfig):
+        super().__init__(cfg)
+
+        if cfg.data_parallel_rank is None or cfg.data_parallel_size is None:
+            if torch.distributed.is_initialized():
+                cfg.data_parallel_rank = dist_utils.get_data_parallel_rank()
+                cfg.data_parallel_size = dist_utils.get_data_parallel_world_size()
+            else:
+                cfg.data_parallel_rank = 0
+                cfg.data_parallel_size = 1
+
+        # load the dictionary
+        paths = utils.split_paths(cfg.data)
+        assert len(paths) > 0
+        self.dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt"))
+        logger.info("dictionary: {} types".format(len(self.dictionary)))
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split (e.g., train, valid, test)"""
+
+        # support sharded datasets
+        paths = utils.split_paths(self.cfg.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+        split_path = os.path.join(data_path, split)
+
+        # each element of *data* will be a tensorized line from the original
+        # text dataset, similar to ``open(split_path).readlines()``
+        data = data_utils.load_indexed_dataset(
+            split_path, self.dictionary, combine=combine
+        )
+        if data is None:
+            raise FileNotFoundError(
+                "Dataset not found: {} ({})".format(split, split_path)
+            )
+
+        # this is similar to ``data.view(-1).split(tokens_per_sample)``
+        data = TokenBlockDataset(
+            data,
+            data.sizes,
+            block_size=self.cfg.tokens_per_sample,
+            pad=None,  # unused
+            eos=None,  # unused
+            break_mode="none",
+        )
+
+        self.datasets[split] = TruncatedBPTTDataset(
+            data=data,
+            bsz_per_shard=self.cfg.batch_size,
+            shard_id=self.cfg.data_parallel_rank,
+            num_shards=self.cfg.data_parallel_size,
+        )
+
+    def dataset(self, split):
+        return self.datasets[split]
+
+    def get_batch_iterator(
+        self, dataset, num_workers=0, epoch=1, data_buffer_size=0, **kwargs
+    ):
+        return iterators.EpochBatchIterator(
+            dataset=dataset,
+            collate_fn=self._collate_fn,
+            num_workers=num_workers,
+            epoch=epoch,
+            buffer_size=data_buffer_size,
+            # we don't use the batching functionality from EpochBatchIterator;
+            # instead every item in *dataset* is a whole batch
+            batch_sampler=[[i] for i in range(len(dataset))],
+            disable_shuffling=True,
+        )
+
+    def _collate_fn(self, items: List[List[torch.Tensor]]):
+        # we don't use fairseq's batching functionality, so we expect a single
+        # Tensor of type List[torch.Tensor]
+        assert len(items) == 1
+
+        # item will have shape B x T (the last batch may have length < T)
+        id, item = items[0]
+        item = data_utils.collate_tokens(item, pad_idx=self.source_dictionary.pad())
+        B, T = item.size()
+
+        # shift item one position over and append a padding token for the target
+        target = torch.nn.functional.pad(
+            item[:, 1:], (0, 1, 0, 0), value=self.target_dictionary.pad()
+        )
+
+        # fairseq expects batches to have the following structure
+        return {
+            "id": torch.tensor([id]*item.size(0)),
+            "net_input": {
+                "src_tokens": item,
+            },
+            "target": target,
+            "nsentences": item.size(0),
+            "ntokens": item.numel(),
+        }
+
+    def build_dataset_for_inference(
+        self, src_tokens: List[torch.Tensor], src_lengths: List[int], **kwargs
+    ) -> torch.utils.data.Dataset:
+        eos = self.source_dictionary.eos()
+        dataset = TokenBlockDataset(
+            src_tokens,
+            src_lengths,
+            block_size=None,  # ignored for "eos" break mode
+            pad=self.source_dictionary.pad(),
+            eos=eos,
+            break_mode="eos",
+        )
+
+        class Dataset(torch.utils.data.Dataset):
+            def __getitem__(self, i):
+                item = dataset[i]
+                if item[-1] == eos:
+                    # remove eos to support generating with a prefix
+                    item = item[:-1]
+                return (i, [item])
+
+            def __len__(self):
+                return len(dataset)
+
+        return Dataset()
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        with torch.no_grad():
+            if constraints is not None:
+                raise NotImplementedError
+
+            # SequenceGenerator doesn't use *src_tokens* directly, we need to
+            # pass the *prefix_tokens* argument instead.
+            if prefix_tokens is None and sample["net_input"]["src_tokens"].nelement():
+                prefix_tokens = sample["net_input"]["src_tokens"]
+
+            # begin generation with the end-of-sentence token
+            bos_token = self.source_dictionary.eos()
+
+            return generator.generate(
+                models, sample, prefix_tokens=prefix_tokens, bos_token=bos_token
+            )
+
+    def eval_lm_dataloader(
+        self,
+        dataset,
+        max_tokens: Optional[int] = 36000,
+        batch_size: Optional[int] = None,
+        max_positions: Optional[int] = None,
+        num_shards: int = 1,
+        shard_id: int = 0,
+        num_workers: int = 1,
+        data_buffer_size: int = 10,
+        context_window: int = 0,
+    ):
+        if context_window > 0:
+            raise NotImplementedError(
+                "Transformer-XL doesn't need --context-window, try "
+                "--model-overrides '{\"mem_len\":42}' instead "
+            )
+        return self.get_batch_iterator(
+            dataset=dataset,
+            max_tokens=max_tokens,
+            max_sentences=batch_size,
+            max_positions=max_positions,
+            ignore_invalid_inputs=True,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_workers=num_workers,
+            data_buffer_size=data_buffer_size,
+        ).next_epoch_itr(shuffle=False)
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+class TruncatedBPTTDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        data: List[torch.Tensor],  # ordered list of items
+        bsz_per_shard,  # number of items processed per GPUs per forward
+        shard_id,  # current GPU ID
+        num_shards,  # number of GPUs
+    ):
+        super().__init__()
+        self.data = data
+
+        def batchify(data, bsz):
+            # Work out how cleanly we can divide the dataset into bsz parts.
+            nbatch = data.size(0) // bsz
+            # Trim off any extra elements that wouldn't cleanly fit (remainders).
+            data = data.narrow(0, 0, nbatch * bsz)
+            # Evenly divide the data across the bsz batches.
+            data = data.view(bsz, -1).contiguous()
+            return data
+
+        # total number of sequences processed by all GPUs in each forward pass
+        global_batch_size = bsz_per_shard * num_shards
+
+        """
+        With a 16 item dataset, bsz_per_shard=2 and num_shards=3,
+        *indices* might look like:
+
+            indices = [[0, 1],
+                       [2, 3],
+                       [4, 5],
+                       [6, 7],
+                       [8, 9],
+                       [10, 11]]
+
+        The size of the TruncatedBPTTDataset instance will be 2,
+        and shard 1 will see items:
+
+            [(0, [data[4], data[6]]),
+             (1, [data[5], data[7]])]
+        """
+        indices = batchify(torch.arange(len(data)), global_batch_size)
+        assert indices.size(0) == global_batch_size
+
+        self.my_indices = indices[
+            shard_id * bsz_per_shard : (shard_id + 1) * bsz_per_shard
+        ]
+        assert self.my_indices.size(0) == bsz_per_shard
+
+    def __len__(self):
+        return self.my_indices.size(1)
+
+    def __getitem__(self, i) -> Tuple[int, List[torch.Tensor]]:
+        return (i, [self.data[idx] for idx in self.my_indices[:, i]])
diff --git a/fairseq/examples/unsupervised_quality_estimation/README.md b/fairseq/examples/unsupervised_quality_estimation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e86a0d13b883af0c37fdc2c1fee9b0b9dff4d18c
--- /dev/null
+++ b/fairseq/examples/unsupervised_quality_estimation/README.md
@@ -0,0 +1,126 @@
+# Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)
+
+This page includes instructions for reproducing results from the paper [Unsupervised Quality Estimation for Neural
+Machine Translation (Fomicheva et al., 2020)](https://arxiv.org/abs/2005.10608)
+
+## Requirements:
+
+* mosesdecoder: https://github.com/moses-smt/mosesdecoder
+* subword-nmt: https://github.com/rsennrich/subword-nmt
+* flores: https://github.com/facebookresearch/flores
+
+## Download Models and Test Data
+
+Download translation models and test data from [MLQE dataset repository](https://github.com/facebookresearch/mlqe).
+
+## Set up:
+
+Given a testset consisting of source sentences and reference translations:
+
+* `SRC_LANG`: source language
+* `TGT_LANG`: target language
+* `INPUT`: input prefix, such that the file `$INPUT.$SRC_LANG` contains source sentences and `$INPUT.$TGT_LANG`
+contains the reference sentences
+* `OUTPUT_DIR`: output path to store results
+* `MOSES_DECODER`: path to mosesdecoder installation
+* `BPE_ROOT`: path to subword-nmt installation
+* `BPE`: path to BPE model
+* `MODEL_DIR`: directory containing the NMT model `.pt` file as well as the source and target vocabularies.
+* `TMP`: directory for intermediate temporary files
+* `GPU`: if translating with GPU, id of the GPU to use for inference
+* `DROPOUT_N`: number of stochastic forward passes
+
+`$DROPOUT_N` is set to 30 in the experiments reported in the paper. However, we observed that increasing it beyond 10
+does not bring substantial improvements.
+
+## Translate the data using standard decoding
+
+Preprocess the input data:
+```
+for LANG in $SRC_LANG $TGT_LANG; do
+  perl $MOSES_DECODER/scripts/tokenizer/tokenizer.perl -threads 80 -a -l $LANG < $INPUT.$LANG > $TMP/preprocessed.tok.$LANG
+  python $BPE_ROOT/apply_bpe.py -c ${BPE} < $TMP/preprocessed.tok.$LANG > $TMP/preprocessed.tok.bpe.$LANG
+done
+```
+
+Binarize the data for faster translation:
+
+```
+fairseq-preprocess --srcdict $MODEL_DIR/dict.$SRC_LANG.txt --tgtdict $MODEL_DIR/dict.$TGT_LANG.txt
+--source-lang ${SRC_LANG} --target-lang ${TGT_LANG} --testpref $TMP/preprocessed.tok.bpe --destdir $TMP/bin --workers 4
+```
+
+Translate
+
+```
+CUDA_VISIBLE_DEVICES=$GPU fairseq-generate $TMP/bin --path ${MODEL_DIR}/${SRC_LANG}-${TGT_LANG}.pt --beam 5
+--source-lang $SRC_LANG --target-lang $TGT_LANG --no-progress-bar --unkpen 5 > $TMP/fairseq.out
+grep ^H $TMP/fairseq.out | cut -d- -f2- | sort -n | cut -f3- > $TMP/mt.out
+```
+
+Post-process
+
+```
+sed -r 's/(@@ )| (@@ ?$)//g' < $TMP/mt.out | perl $MOSES_DECODER/scripts/tokenizer/detokenizer.perl
+-l $TGT_LANG > $OUTPUT_DIR/mt.out
+```
+
+## Produce uncertainty estimates
+
+### Scoring
+
+Make temporary files to store the translations repeated N times.
+
+```
+python ${SCRIPTS}/scripts/uncertainty/repeat_lines.py -i $TMP/preprocessed.tok.bpe.$SRC_LANG -n $DROPOUT_N
+-o $TMP/repeated.$SRC_LANG
+python ${SCRIPTS}/scripts/uncertainty/repeat_lines.py -i $TMP/mt.out -n $DROPOUT_N -o $TMP/repeated.$TGT_LANG
+
+fairseq-preprocess --srcdict ${MODEL_DIR}/dict.${SRC_LANG}.txt $TGT_DIC --source-lang ${SRC_LANG}
+--target-lang ${TGT_LANG} --testpref ${TMP}/repeated --destdir ${TMP}/bin-repeated
+```
+
+Produce model scores for the generated translations using `--retain-dropout` option to apply dropout at inference time:
+
+```
+CUDA_VISIBLE_DEVICES=${GPU} fairseq-generate ${TMP}/bin-repeated --path ${MODEL_DIR}/${LP}.pt --beam 5
+ --source-lang $SRC_LANG --target-lang $TGT_LANG --no-progress-bar --unkpen 5 --score-reference --retain-dropout
+ --retain-dropout-modules '["TransformerModel","TransformerEncoder","TransformerDecoder","TransformerEncoderLayer"]'
+ TransformerDecoderLayer --seed 46 > $TMP/dropout.scoring.out
+
+grep ^H $TMP/dropout.scoring.out | cut -d- -f2- | sort -n | cut -f2 > $TMP/dropout.scores
+
+```
+
+Use `--retain-dropout-modules` to specify the modules. By default, dropout is applied in the same places
+as for training.
+
+Compute the mean of the resulting output distribution:
+
+```
+python $SCRIPTS/scripts/uncertainty/aggregate_scores.py -i $TMP/dropout.scores -o $OUTPUT_DIR/dropout.scores.mean
+-n $DROPOUT_N
+```
+
+### Generation
+
+Produce multiple translation hypotheses for the same source using `--retain-dropout` option:
+
+```
+CUDA_VISIBLE_DEVICES=${GPU} fairseq-generate ${TMP}/bin-repeated --path ${MODEL_DIR}/${LP}.pt
+ --beam 5 --source-lang $SRC_LANG --target-lang $TGT_LANG --no-progress-bar --retain-dropout
+ --unkpen 5 --retain-dropout-modules TransformerModel TransformerEncoder TransformerDecoder
+TransformerEncoderLayer TransformerDecoderLayer --seed 46 > $TMP/dropout.generation.out
+
+grep ^H $TMP/dropout.generation.out | cut -d- -f2- | sort -n | cut -f3- > $TMP/dropout.hypotheses_
+
+sed -r 's/(@@ )| (@@ ?$)//g' < $TMP/dropout.hypotheses_ | perl $MOSES_DECODER/scripts/tokenizer/detokenizer.perl
+-l $TGT_LANG > $TMP/dropout.hypotheses
+```
+
+Compute similarity between multiple hypotheses corresponding to the same source sentence using Meteor
+evaluation metric:
+```
+python meteor.py -i $TMP/dropout.hypotheses -m <path_to_meteor_installation> -n $DROPOUT_N -o
+$OUTPUT_DIR/dropout.gen.sim.meteor
+```
diff --git a/fairseq/examples/unsupervised_quality_estimation/aggregate_scores.py b/fairseq/examples/unsupervised_quality_estimation/aggregate_scores.py
new file mode 100644
index 0000000000000000000000000000000000000000..66d50d07ff2067b802b90a2aadd88df23153830a
--- /dev/null
+++ b/fairseq/examples/unsupervised_quality_estimation/aggregate_scores.py
@@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import sys
+
+import numpy as np
+
+
+aggregate_funcs = {
+    "std": np.std,
+    "var": np.var,
+    "median": np.median,
+    "mean": np.mean,
+    "min": np.min,
+    "max": np.max,
+}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input_file", required=True, type=str)
+    parser.add_argument("-n", "--repeat_times", required=True, type=int)
+    parser.add_argument("-o", "--output_file", required=False)
+    parser.add_argument("-f", "--func", required=False, default="mean")
+    args = parser.parse_args()
+
+    stream = open(args.output_file, "w") if args.output_file else sys.stdout
+
+    segment_scores = []
+    for line in open(args.input_file):
+        segment_scores.append(float(line.strip()))
+        if len(segment_scores) == args.repeat_times:
+            stream.write("{}\n".format(aggregate_funcs[args.func](segment_scores)))
+            segment_scores = []
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/unsupervised_quality_estimation/meteor.py b/fairseq/examples/unsupervised_quality_estimation/meteor.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ee0448cf1f167f6f3ecee56ad807922cffb0956
--- /dev/null
+++ b/fairseq/examples/unsupervised_quality_estimation/meteor.py
@@ -0,0 +1,109 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import math
+import os
+import subprocess
+import sys
+import tempfile
+from collections import defaultdict
+from itertools import combinations
+
+
+def read_translations(path, n_repeats):
+    segment_counter = 0
+    segment_translations = []
+    translations = defaultdict(list)
+    for line in open(path):
+        segment_translations.append(" ".join(line.split()))
+        if len(segment_translations) == n_repeats:
+            translations[segment_counter] = segment_translations
+            segment_translations = []
+            segment_counter += 1
+    return translations
+
+
+def generate_input(translations, n_repeats):
+    _, ref_path = tempfile.mkstemp()
+    _, mt_path = tempfile.mkstemp()
+    ref_fh = open(ref_path, "w")
+    mt_fh = open(mt_path, "w")
+    for segid in sorted(translations.keys()):
+        assert len(translations[segid]) == n_repeats
+        indexes = combinations(range(n_repeats), 2)
+        for idx1, idx2 in indexes:
+            mt_fh.write(translations[segid][idx1].strip() + "\n")
+            ref_fh.write(translations[segid][idx2].strip() + "\n")
+    sys.stderr.write("\nSaved translations to %s and %s" % (ref_path, mt_path))
+    return ref_path, mt_path
+
+
+def run_meteor(ref_path, mt_path, metric_path, lang="en"):
+    _, out_path = tempfile.mkstemp()
+    subprocess.call(
+        [
+            "java",
+            "-Xmx2G",
+            "-jar",
+            metric_path,
+            mt_path,
+            ref_path,
+            "-p",
+            "0.5 0.2 0.6 0.75",  # default parameters, only changed alpha to give equal weight to P and R
+            "-norm",
+            "-l",
+            lang,
+        ],
+        stdout=open(out_path, "w"),
+    )
+    os.remove(ref_path)
+    os.remove(mt_path)
+    sys.stderr.write("\nSaved Meteor output to %s" % out_path)
+    return out_path
+
+
+def read_output(meteor_output_path, n_repeats):
+    n_combinations = math.factorial(n_repeats) / (
+        math.factorial(2) * math.factorial(n_repeats - 2)
+    )
+    raw_scores = []
+    average_scores = []
+    for line in open(meteor_output_path):
+        if not line.startswith("Segment "):
+            continue
+        score = float(line.strip().split("\t")[1])
+        raw_scores.append(score)
+        if len(raw_scores) == n_combinations:
+            average_scores.append(sum(raw_scores) / n_combinations)
+            raw_scores = []
+    os.remove(meteor_output_path)
+    return average_scores
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--infile")
+    parser.add_argument("-n", "--repeat_times", type=int)
+    parser.add_argument("-m", "--meteor")
+    parser.add_argument("-o", "--output")
+    args = parser.parse_args()
+
+    translations = read_translations(args.infile, args.repeat_times)
+    sys.stderr.write("\nGenerating input for Meteor...")
+    ref_path, mt_path = generate_input(translations, args.repeat_times)
+    sys.stderr.write("\nRunning Meteor...")
+    out_path = run_meteor(ref_path, mt_path, args.meteor)
+    sys.stderr.write("\nReading output...")
+    scores = read_output(out_path, args.repeat_times)
+    sys.stderr.write("\nWriting results...")
+    with open(args.output, "w") as o:
+        for scr in scores:
+            o.write("{}\n".format(scr))
+    o.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/unsupervised_quality_estimation/repeat_lines.py b/fairseq/examples/unsupervised_quality_estimation/repeat_lines.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a04851a74624e9c8ebc259805b7aed6c638b0de
--- /dev/null
+++ b/fairseq/examples/unsupervised_quality_estimation/repeat_lines.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import sys
+
+
+def _normalize_spaces(line):
+    return " ".join(line.split())
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input_file", required=True, type=str)
+    parser.add_argument("-n", "--repeat_times", required=True, type=int)
+    parser.add_argument("-o", "--output_file", required=False, type=str)
+    args = parser.parse_args()
+    stream = open(args.output_file, "w") if args.output_file else sys.stdout
+
+    for line in open(args.input_file):
+        for _ in range(args.repeat_times):
+            stream.write(_normalize_spaces(line) + "\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/README.md b/fairseq/examples/wav2vec/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..253c8af2516580bbc33e8ecc8efe4f7a526d7142
--- /dev/null
+++ b/fairseq/examples/wav2vec/README.md
@@ -0,0 +1,376 @@
+# wav2vec 2.0
+
+wav2vec 2.0 learns speech representations on unlabeled data as described in [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](https://arxiv.org/abs/2006.11477).
+
+We learned speech representations in multiple languages as well in [Unsupervised Cross-lingual Representation Learning for Speech Recognition (Conneau et al., 2020)](https://arxiv.org/abs/2006.13979).
+
+We also combined wav2vec 2.0 with self-training in [Self-training and Pre-training are Complementary for Speech Recognition (Xu et al., 2020)](https://arxiv.org/abs/2010.11430).
+
+We combined speech data from multiple domains in [Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training (Hsu, et al., 2021)](https://arxiv.org/abs/2104.01027)
+
+## Pre-trained models
+
+Model | Finetuning split | Dataset | Model
+|---|---|---|---
+Wav2Vec 2.0 Base | No finetuning | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small.pt)
+Wav2Vec 2.0 Base | 10 minutes | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small_10m.pt)
+Wav2Vec 2.0 Base | 100 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small_100h.pt)
+Wav2Vec 2.0 Base | 960 hours | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_small_960h.pt)
+Wav2Vec 2.0 Large | No finetuning | [Librispeech](http://www.openslr.org/12)  | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/libri960_big.pt)
+Wav2Vec 2.0 Large | 10 minutes | [Librispeech](http://www.openslr.org/12)  | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_big_10m.pt)
+Wav2Vec 2.0 Large | 100 hours | [Librispeech](http://www.openslr.org/12)  | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_big_100h.pt)
+Wav2Vec 2.0 Large | 960 hours | [Librispeech](http://www.openslr.org/12)  | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_big_960h.pt)
+Wav2Vec 2.0 Large (LV-60)* | No finetuning | [Libri-Light](https://github.com/facebookresearch/libri-light) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_new.pt)
+Wav2Vec 2.0 Large (LV-60)* | 10 minutes | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_10m_new.pt)
+Wav2Vec 2.0 Large (LV-60)* | 100 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_100h_new.pt)
+Wav2Vec 2.0 Large (LV-60)* | 960 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec2_vox_960h_new.pt)
+Wav2Vec 2.0 Large (LV-60) + Self Training * | 10 minutes | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_10m_pl.pt)
+Wav2Vec 2.0 Large (LV-60) + Self Training * | 100 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_100h_pl.pt)
+Wav2Vec 2.0 Large (LV-60) + Self Training * | 960 hours | [Libri-Light](https://github.com/facebookresearch/libri-light) + [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_vox_960h_pl.pt)
+Wav2Vec 2.0 Large (LV-60 + CV + SWBD + FSH) ** | No finetuning | [Libri-Light](https://github.com/facebookresearch/libri-light) + [CommonVoice](https://commonvoice.mozilla.org/en/languages) + [Switchboard](https://catalog.ldc.upenn.edu/LDC97S62) + [Fisher](https://catalog.ldc.upenn.edu/LDC2004T19) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/w2v_large_lv_fsh_swbd_cv.pt)
+Wav2Vec 2.0 Large (LV-60 + CV + SWBD + FSH) ** | 960 hours Librispeech | [Libri-Light](https://github.com/facebookresearch/libri-light) + [CommonVoice](https://commonvoice.mozilla.org/en/languages) + [Switchboard](https://catalog.ldc.upenn.edu/LDC97S62) + [Fisher](https://catalog.ldc.upenn.edu/LDC2004T19) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/w2v_large_lv_fsh_swbd_cv_ftls960.pt)
+Wav2Vec 2.0 Large (LV-60 + CV + SWBD + FSH) ** | 300 hours Switchboard | [Libri-Light](https://github.com/facebookresearch/libri-light) + [CommonVoice](https://commonvoice.mozilla.org/en/languages) + [Switchboard](https://catalog.ldc.upenn.edu/LDC97S62) + [Fisher](https://catalog.ldc.upenn.edu/LDC2004T19) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/w2v_large_lv_fsh_swbd_cv_ftsb300.pt)
+
+\* updated (Oct. 24, 2020)\
+** updated (Jul. 8, 2021)
+
+We also release multilingual pre-trained wav2vec 2.0 (XLSR) models:
+
+Model | Architecture | Hours | Languages | Datasets | Model
+|---|---|---|---|---|---
+XLSR-53 | Large | 56k | 53 | MLS, CommonVoice, BABEL | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_53_56k.pt)
+
+The XLSR model uses the following datasets for multilingual pretraining:
+
+* **[MLS: Multilingual LibriSpeech](https://indico2.conference4me.psnc.pl/event/35/contributions/3585/attachments/1060/1101/Wed-2-6-10.pdf)** (8 languages, 50.7k hours): *Dutch, English, French, German, Italian, Polish, Portuguese, Spanish*
+
+* **[CommonVoice](https://commonvoice.mozilla.org/en/languages)** (36 languages, 3.6k hours): *Arabic, Basque, Breton, Chinese (CN), Chinese (HK), Chinese (TW), Chuvash, Dhivehi, Dutch, English, Esperanto, Estonian, French, German, Hakh-Chin, Indonesian, Interlingua, Irish, Italian, Japanese, Kabyle, Kinyarwanda, Kyrgyz, Latvian, Mongolian, Persian, Portuguese, Russian, Sakha, Slovenian, Spanish, Swedish, Tamil, Tatar, Turkish, Welsh* (see also [finetuning splits]([https://dl.fbaipublicfiles.com/cpc_audio/common_voices_splits.tar.gz]) from [this paper](https://arxiv.org/abs/2002.02848)).
+
+* **[Babel](https://catalog.ldc.upenn.edu/byyear)** (17 languages, 1.7k hours): *Assamese, Bengali, Cantonese, Cebuano, Georgian, Haitian, Kazakh, Kurmanji, Lao, Pashto, Swahili, Tagalog, Tamil, Tok, Turkish, Vietnamese, Zulu*
+
+
+## Training a new model with the CLI tools
+
+Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate file 10 to 30 seconds in length)
+
+### Prepare training data manifest:
+
+First, install the `soundfile` library:
+```shell script
+pip install soundfile
+```
+
+Next, run:
+
+```shell script
+$ python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext $ext --valid-percent $valid
+```
+
+$ext should be set to flac, wav, or whatever format your dataset happens to use that soundfile can read.
+
+$valid should be set to some reasonable percentage (like 0.01) of training data to use for validation.
+To use a pre-defined validation set (like dev-other from librispeech), set to it 0 and then overwrite valid.tsv with a
+separately pre-processed manifest file.
+
+### Train a wav2vec 2.0 base model:
+
+This configuration was used for the base model trained on the Librispeech dataset in the wav2vec 2.0 paper
+
+Note that the input is expected to be single channel, sampled at 16 kHz
+
+```shell script
+$ fairseq-hydra-train \
+    task.data=/path/to/data \
+    --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \
+    --config-name wav2vec2_base_librispeech
+```
+
+Note: you can simulate 64 GPUs by using k GPUs and adding command line parameters (before `--config-dir`)
+`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 64/k
+
+### Train a wav2vec 2.0 large model:
+
+This configuration was used for the large model trained on the Libri-light dataset in the wav2vec 2.0 paper
+
+```shell script
+$ fairseq-hydra-train \
+    task.data=/path/to/data \
+    --config-dir /path/to/fairseq-py/examples/wav2vec/config/pretraining \
+    --config-name wav2vec2_large_librivox
+```
+
+Note: you can simulate 128 GPUs by using k GPUs and adding command line parameters (before `--config-dir`)
+`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 128/k
+
+### Fine-tune a pre-trained model with CTC:
+
+Fine-tuning a model requires parallel audio and labels file, as well as a vocabulary file in fairseq format.
+A letter vocabulary can be downloaded [here](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt).
+An example [script](libri_labels.py) that generates labels for the Librispeech dataset from the tsv file produced by wav2vec_manifest.py can be used as follows:
+
+```shell script
+split=train
+$ python libri_labels.py /path/to/tsv --output-dir /output/dir --output-name $split
+```
+
+Fine-tuning on 100h of Librispeech with letter targets:
+```shell script
+$ fairseq-hydra-train \
+    distributed_training.distributed_port=$PORT \
+    task.data=/path/to/data \
+    model.w2v_path=/path/to/model.pt \
+    --config-dir /path/to/fairseq-py/examples/wav2vec/config/finetuning \
+    --config-name base_100h
+```
+
+There are other config files in the config/finetuning directory that can be used to fine-tune on other splits.
+You can specify the right config via the `--config-name` parameter.
+
+Note: you can simulate 24 GPUs by using k GPUs and adding command line parameters (before `--config-dir`)
+`distributed_training.distributed_world_size=k` `+optimization.update_freq='[x]'` where x = 24/k
+
+Decoding with a language model during training requires flashlight [python bindings](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) (previously called [wav2letter](https://github.com/facebookresearch/wav2letter).
+If you want to use a language model, add `+criterion.wer_args='[/path/to/kenlm, /path/to/lexicon, 2, -1]'` to the command line.
+
+### Evaluating a CTC model:
+
+Evaluating a CTC model with a language model requires [flashlight python bindings](https://github.com/facebookresearch/flashlight/tree/master/bindings/python) (previously called [wav2letter](https://github.com/facebookresearch/wav2letter) to be installed.
+
+Fairseq transformer language model used in the wav2vec 2.0 paper can be obtained from the [wav2letter model repository](https://github.com/facebookresearch/wav2letter/tree/master/recipes/sota/2019).
+Be sure to upper-case the language model vocab after downloading it.
+
+Letter dictionary for pre-trained models can be found [here](https://dl.fbaipublicfiles.com/fairseq/wav2vec/dict.ltr.txt).
+
+Next, run the evaluation command:
+
+```shell script
+$subset=dev_other
+python examples/speech_recognition/infer.py /checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw --task audio_finetuning \
+--nbest 1 --path /path/to/model --gen-subset $subset --results-path /path/to/save/results/for/sclite --w2l-decoder kenlm \
+--lm-model /path/to/kenlm.bin --lm-weight 2 --word-score -1 --sil-weight 0 --criterion ctc --labels ltr --max-tokens 4000000 \
+--post-process letter
+```
+
+To get raw numbers, use --w2l-decoder viterbi and omit the lexicon. To use the transformer language model, use --w2l-decoder fairseqlm.
+
+## Use wav2vec 2.0 with 🤗Transformers:
+
+Wav2Vec2 is also available in the [🤗Transformers library](https://github.com/huggingface/transformers) since version 4.4.
+
+Pretrained Models can be found on the [hub](https://huggingface.co/models?filter=wav2vec2)
+and documentation can be found [here](https://huggingface.co/transformers/master/model_doc/wav2vec2.html).
+
+Usage example:
+
+```python
+# !pip install transformers
+# !pip install datasets
+import soundfile as sf
+import torch
+from datasets import load_dataset
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+
+# load pretrained model
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+
+
+librispeech_samples_ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
+
+# load audio
+audio_input, sample_rate = sf.read(librispeech_samples_ds[0]["file"])
+
+# pad input values and return pt tensor
+input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values
+
+# INFERENCE
+
+# retrieve logits & take argmax
+logits = model(input_values).logits
+predicted_ids = torch.argmax(logits, dim=-1)
+
+# transcribe
+transcription = processor.decode(predicted_ids[0])
+
+# FINE-TUNE
+
+target_transcription = "A MAN SAID TO THE UNIVERSE I EXIST"
+
+# encode labels
+with processor.as_target_processor():
+  labels = processor(target_transcription, return_tensors="pt").input_ids
+
+# compute loss by passing labels
+loss = model(input_values, labels=labels).loss
+loss.backward()
+```
+
+# wav2vec
+
+Example to train a wav2vec model as described in [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](https://arxiv.org/abs/1904.05862).
+
+## Pre-trained models
+
+Description | Dataset | Model
+---|---|---
+Wav2Vec large | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/wav2vec_large.pt)
+
+#### Example usage:
+```python
+import torch
+import fairseq
+
+cp_path = '/path/to/wav2vec.pt'
+model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([cp_path])
+model = model[0]
+model.eval()
+
+wav_input_16khz = torch.randn(1,10000)
+z = model.feature_extractor(wav_input_16khz)
+c = model.feature_aggregator(z)
+```
+
+## Training a new model with the CLI tools
+
+Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate files 10 to 30 seconds in length)
+
+### Prepare training data manifest:
+
+```
+$ python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext wav
+```
+
+### Train a wav2vec model:
+
+```
+$ python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints \
+--arch wav2vec --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 --optimizer adam --lr 0.005 --lr-scheduler cosine \
+--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)] \
+--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \
+--skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500 --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 \
+--max-sample-size 150000 --max-tokens 1500000 --skip-invalid-size-inputs-valid-test
+```
+
+### Run wav2vec2 pre-training on Google Cloud TPUs:
+
+Wav2Vec2 is now supported on TPUs! It's currently pre-training only.
+
+#### Using hydra on a v3-8:
+
+```
+$ OMP_NUM_THREADS=1 fairseq-hydra-train \
+  task.data=/manifest/path \
+  --config-dir /PATH/TO/FAIRSEQ/examples/wav2vec/config/pretraining \
+  --config-name wav2vec2_large_librivox_tpu.yaml
+```
+
+#### Using command line arguments on a v3-8:
+Note: Commandline arguments way of execution has a [known-problem](https://github.com/pytorch/fairseq/issues/3741) currently.
+
+```
+$ OMP_NUM_THREADS=1 python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints \
+--arch wav2vec2 --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 --optimizer adam --lr 0.005 --lr-scheduler cosine \
+--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)] \
+--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \
+--skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500 --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 \
+--max-sample-size 150000 --max-tokens 1500000 --skip-invalid-size-inputs-valid-test \
+--tpu --distributed-world-size 8 --num-batch-buckets 3 --enable-padding \
+--encoder-layerdrop 0 --mask-channel-prob 0.1
+```
+
+#### Using hydra on a pod slice (v3-N with N > 8):
+
+```
+$ OMP_NUM_THREADS=1 fairseq-hydra-train \
+  task.data=/manifest/path \
+  --config-dir /PATH/TO/FAIRSEQ/examples/wav2vec/config/pretraining \
+  --config-name wav2vec2_large_librivox_tpu-pod.yaml  # edit distributed-world-size accordingly
+```
+
+#### Using command line arguments on a pod slice (v3-N with N > 8):
+Note: Commandline arguments way of execution has a [known-problem](https://github.com/pytorch/fairseq/issues/3741) currently.
+
+```
+$ python -m torch_xla.distributed.xla_dist \
+  --tpu ${TPUNAME} --conda-env=torch-xla-${TORCH_XLA_VERSION} --env OMP_NUM_THREADS=1 \
+  -- \
+python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 --save-interval 1 --no-epoch-checkpoints \
+--arch wav2vec2 --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 --optimizer adam --lr 0.005 --lr-scheduler cosine \
+--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1)] \
+--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \
+--skip-connections-agg --residual-scale 0.5 --log-compression --warmup-updates 500 --warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 \
+--max-sample-size 150000 --max-tokens 1500000 --skip-invalid-size-inputs-valid-test \
+--tpu --distributed-world-size ${WORLD_SIZE} --num-batch-buckets 3 --enable-padding \
+--encoder-layerdrop 0 --mask-channel-prob 0.1
+```
+
+### Extract embeddings from the downstream task data:
+
+```
+$ PYTHONPATH=/path/to/fairseq python examples/wav2vec/wav2vec_featurize.py --input /path/to/task/waves --output /path/to/output \
+--model /model/path/checkpoint_best.pt --split train valid test
+```
+
+# vq-wav2vec
+
+Example to train a vq-wav2vec model as described in [vq-wav2vec: Self-Supervised Learning of Discrete Speech Representations (Baevski et al., 2019)](https://arxiv.org/abs/1910.05453).
+
+These models are also used in [Effectiveness of self-supervised pre-training for speech recognition (Baevski et al., 2019)](https://arxiv.org/abs/1911.03912).
+
+## Pre-trained models
+
+Description | Dataset | Model
+---|---|---
+vq-wav2vec Gumbel | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/vq-wav2vec.pt)
+vq-wav2vec K-means | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/vq-wav2vec_kmeans.pt)
+Roberta on K-means codes | [Librispeech](http://www.openslr.org/12) | [download](https://dl.fbaipublicfiles.com/fairseq/wav2vec/bert_kmeans.tar)
+
+#### Example usage:
+```python
+import torch
+import fairseq
+
+cp = torch.load('/path/to/vq-wav2vec.pt')
+model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([cp])
+model = model[0]
+model.eval()
+
+wav_input_16khz = torch.randn(1,10000)
+z = model.feature_extractor(wav_input_16khz)
+_, idxs = model.vector_quantizer.forward_idx(z)
+print(idxs.shape) # output: torch.Size([1, 60, 2]), 60 timesteps with 2 indexes corresponding to 2 groups in the model
+```
+
+## Training a new model with the CLI tools
+
+Given a directory containing wav files to be used for pretraining (we recommend splitting each file into separate file 10 to 30 seconds in length)
+
+### Prepare training data manifest:
+
+```
+$ python examples/wav2vec/wav2vec_manifest.py /path/to/waves --dest /manifest/path --ext wav
+```
+
+### Train a gumbel vq-wav2vec model:
+
+```
+$ python train.py /manifest/path --save-dir /model/path --num-workers 6 --fp16 --max-update 400000 \
+--save-interval 1 --no-epoch-checkpoints --arch wav2vec --task audio_pretraining --min-lr 1e-06 --stop-min-lr 1e-09 \
+--optimizer adam --lr 1e-05 --lr-scheduler cosine \
+--conv-feature-layers [(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1), (512, 1, 1)] \
+--conv-aggregator-layers [(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)] \
+--activation gelu --offset auto --skip-connections-agg --residual-scale 0.5 \
+--log-keys ["prob_perplexity","code_perplexity","temp"] --vq-type gumbel --vq-groups 2 --vq-depth 2 \
+--combine-groups --vq-vars 320 --vq-temp (2,0.5,0.999995) --prediction-steps 12 --warmup-updates 1000 \
+--warmup-init-lr 1e-07 --criterion wav2vec --num-negatives 10 --max-sample-size 150000 \
+--max-tokens 300000 --cross-sample-negatives 0 --update-freq 1 --seed 2 --skip-invalid-size-inputs-valid-test
+```
+
+for k-means training, set vq-type with "kmeans" and add --loss-weights [1] argument. Pre-trained models were trained on 16 GPUs.
+
+### Tokenize audio data (e.g. for BERT training):
+
+```
+$ PYTHONPATH=/path/to/fairseq python examples/wav2vec/vq-wav2vec_featurize.py --data-dir /manifest/path --output-dir /path/to/output \
+--checkpoint /model/path/checkpoint_best.pt --split train valid test --extension tsv
+```
diff --git a/fairseq/examples/wav2vec/__init__.py b/fairseq/examples/wav2vec/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/wav2vec/config/finetuning/base_100h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_100h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..153b5df1708f187933a68ff55009652a107c69eb
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/base_100h.yaml
@@ -0,0 +1,58 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: false
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 3200000
+  skip_invalid_size_inputs_valid_test: true
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 2
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 80000
+  lr: [0.00003]
+  sentence_avg: true
+  update_freq: [4]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.65
+  mask_channel_prob: 0.5
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 0
diff --git a/fairseq/examples/wav2vec/config/finetuning/base_10h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_10h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5044518025aadf73b3f5f204a74969cf3f982f03
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/base_10h.yaml
@@ -0,0 +1,63 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval: 50
+  save_interval_updates: 10000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: false
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 3200000
+  skip_invalid_size_inputs_valid_test: true
+  validate_after_updates: 10000
+  validate_interval: 50
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 2
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 20000
+  lr: [0.00005]
+  sentence_avg: true
+  update_freq: [4]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.65
+  mask_channel_prob: 0.5
+  mask_channel_length: 64
+  layerdrop: 0.05
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/base_10m.yaml b/fairseq/examples/wav2vec/config/finetuning/base_10m.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14abc013bd1862bc0ec3df7e2603924ab9fe671a
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/base_10m.yaml
@@ -0,0 +1,63 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval: 1000
+  save_interval_updates: 50
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: false
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 3200000
+  skip_invalid_size_inputs_valid_test: true
+  validate_after_updates: 10000
+  validate_interval: 1000
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 2
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 13000
+  lr: [0.00005]
+  sentence_avg: true
+  update_freq: [4]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.65
+  mask_channel_prob: 0.25
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/base_1h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_1h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14abc013bd1862bc0ec3df7e2603924ab9fe671a
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/base_1h.yaml
@@ -0,0 +1,63 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval: 1000
+  save_interval_updates: 50
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: false
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 3200000
+  skip_invalid_size_inputs_valid_test: true
+  validate_after_updates: 10000
+  validate_interval: 1000
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 2
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 13000
+  lr: [0.00005]
+  sentence_avg: true
+  update_freq: [4]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.65
+  mask_channel_prob: 0.25
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/base_960h.yaml b/fairseq/examples/wav2vec/config/finetuning/base_960h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3eadc36b37b5caa7c0f0838e4e9ff2d401110429
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/base_960h.yaml
@@ -0,0 +1,57 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: false
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 3200000
+  skip_invalid_size_inputs_valid_test: true
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 8
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 320000
+  lr: [0.0001]
+  sentence_avg: true
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.5
+  mask_channel_prob: 0.1
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 0
diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_100h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_100h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b8f81e5e1895e448c6126f9ccd6d9c4a5fb36b18
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/vox_100h.yaml
@@ -0,0 +1,58 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: true
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 1280000
+  skip_invalid_size_inputs_valid_test: true
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 4
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 80000
+  lr: [0.00003]
+  sentence_avg: true
+  update_freq: [5]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.5
+  mask_channel_prob: 0.5
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8f1ca71ee2bd195934f85010e141772479769f0b
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/vox_10h.yaml
@@ -0,0 +1,63 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval: 50
+  save_interval_updates: 10000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: true
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 1280000
+  skip_invalid_size_inputs_valid_test: true
+  validate_after_updates: 10000
+  validate_interval: 50
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 4
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 20000
+  lr: [0.0001]
+  sentence_avg: true
+  update_freq: [5]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.75
+  mask_channel_prob: 0.25
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_10m.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_10m.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..07e327fe7477c88545d7744a5928d2fe703dd6de
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/vox_10m.yaml
@@ -0,0 +1,63 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval: 1000
+  save_interval_updates: 50
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: true
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 1280000
+  skip_invalid_size_inputs_valid_test: true
+  validate_after_updates: 10000
+  validate_interval: 1000
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 4
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 13000
+  lr: [0.0001]
+  sentence_avg: true
+  update_freq: [5]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.65
+  mask_channel_prob: 0.25
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_1h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_1h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fac1bbb32fa98364568dd1af0f6fb703906e0606
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/vox_1h.yaml
@@ -0,0 +1,63 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval: 1000
+  save_interval_updates: 50
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: true
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 1280000
+  skip_invalid_size_inputs_valid_test: true
+  validate_after_updates: 10000
+  validate_interval: 1000
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 4
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 13000
+  lr: [0.0003]
+  sentence_avg: true
+  update_freq: [5]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.75
+  mask_channel_prob: 0.25
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/finetuning/vox_960h.yaml b/fairseq/examples/wav2vec/config/finetuning/vox_960h.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9d72404fa3b72624ed00556deced2513dff075d8
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/finetuning/vox_960h.yaml
@@ -0,0 +1,57 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: wer
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: true
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 1280000
+  skip_invalid_size_inputs_valid_test: true
+  valid_subset: dev_other
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 24
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+
+optimization:
+  max_update: 320000
+  lr: [0.00003]
+  sentence_avg: true
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.5
+  mask_channel_prob: 0.25
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 10000
diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_base_librispeech.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_base_librispeech.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b686e21ab1d367158fe7afa4197303a4ee74df66
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_base_librispeech.yaml
@@ -0,0 +1,57 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+task:
+  _name: audio_pretraining
+  data: ???
+  max_sample_size: 250000
+  min_sample_size: 32000
+  normalize: false
+
+dataset:
+  num_workers: 6
+  max_tokens: 1400000
+  skip_invalid_size_inputs_valid_test: true
+
+distributed_training:
+  distributed_world_size: 64
+  ddp_backend: legacy_ddp
+
+criterion:
+  _name: wav2vec
+  infonce: true
+  log_keys: ["prob_perplexity","code_perplexity","temp"]
+  loss_weights: [0.1, 10]
+
+optimization:
+  max_update: 400000
+  lr: [0.0005]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: wav2vec2
+  quantize_targets: true
+  final_dim: 256
+  encoder_layerdrop: 0.05
+  dropout_input: 0.1
+  dropout_features: 0.1
+  feature_grad_mult: 0.1
+  encoder_embed_dim: 768
diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3192ce4cba42d1741099d27aad8588a45a59814c
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox.yaml
@@ -0,0 +1,70 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+task:
+  _name: audio_pretraining
+  data: ???
+  max_sample_size: 320000
+  min_sample_size: 32000
+  normalize: true
+
+dataset:
+  batch_size: 4
+  num_workers: 6
+  max_tokens: 1200000
+  skip_invalid_size_inputs_valid_test: true
+
+distributed_training:
+  distributed_world_size: 128
+  ddp_backend: legacy_ddp
+
+criterion:
+  _name: wav2vec
+  infonce: true
+  log_keys: ["prob_perplexity","code_perplexity","temp"]
+  loss_weights: [0.1, 0]
+
+optimization:
+  max_update: 1000000
+  lr: [0.005]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: wav2vec2
+  quantize_targets: true
+  extractor_mode: layer_norm
+  layer_norm_first: true
+  final_dim: 768
+  latent_temp: [2.0,0.1,0.999995]
+  encoder_layerdrop: 0.00
+  dropout_input: 0.0
+  dropout_features: 0.0
+  dropout: 0.0
+  attention_dropout: 0.0
+  conv_bias: true
+
+  encoder_layers: 24
+  encoder_embed_dim: 1024
+  encoder_ffn_embed_dim: 4096
+  encoder_attention_heads: 16
+
+  feature_grad_mult: 1.0
+
diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu-pod.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu-pod.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff35a95b6596b74215ef1bbdd2ec8d462d1d8542
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu-pod.yaml
@@ -0,0 +1,72 @@
+# @package _group_
+
+common:
+  tpu: true
+  fp16: false
+  log_format: json
+  log_interval: 10
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+task:
+  _name: audio_pretraining
+  data: ???
+  max_sample_size: 250000
+  min_sample_size: 32000
+  normalize: true
+  num_batch_buckets: 3
+  precompute_mask_indices: true
+  enable_padding: true
+
+dataset:
+  num_workers: 6
+  max_tokens: 1200000
+  skip_invalid_size_inputs_valid_test: true
+
+distributed_training:
+  distributed_world_size: 128
+  ddp_backend: legacy_ddp
+
+criterion:
+  _name: wav2vec
+  infonce: true
+  log_keys: ["prob_perplexity","code_perplexity","temp"]
+  loss_weights: [0.1, 0]
+
+optimization:
+  max_update: 1000000
+  lr: [0.005]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: wav2vec2
+  quantize_targets: true
+  extractor_mode: layer_norm
+  layer_norm_first: true
+  final_dim: 768
+  latent_temp: [2.0,0.1,0.999995]
+  encoder_layerdrop: 0.00
+  dropout_input: 0.0
+  dropout_features: 0.0
+  dropout: 0.0
+  attention_dropout: 0.0
+  conv_bias: true
+
+  encoder_layers: 24
+  encoder_embed_dim: 1024
+  encoder_ffn_embed_dim: 4096
+  encoder_attention_heads: 16
+
+  feature_grad_mult: 1.0
diff --git a/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu.yaml b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee55bdab72e40c2f410cde592e3cfe5ff7f06e0c
--- /dev/null
+++ b/fairseq/examples/wav2vec/config/pretraining/wav2vec2_large_librivox_tpu.yaml
@@ -0,0 +1,77 @@
+# @package _group_
+
+common:
+  tpu: true
+  fp16: false
+  log_format: json
+  log_interval: 10
+
+checkpoint:
+  save_interval_updates: 25000
+  keep_interval_updates: 1
+  no_epoch_checkpoints: true
+
+task:
+  _name: audio_pretraining
+  data: ???
+  max_sample_size: 250000
+  min_sample_size: 32000
+  normalize: true
+  num_batch_buckets: 3
+  precompute_mask_indices: true
+  enable_padding: true
+  inferred_w2v_config:
+      mask_prob: 0.65
+      mask_selection: 'static'
+      mask_other: 0
+      mask_channel_prob: 0.1
+
+dataset:
+  num_workers: 6
+  max_tokens: 1200000
+  skip_invalid_size_inputs_valid_test: true
+
+distributed_training:
+  distributed_world_size: 8
+  ddp_backend: legacy_ddp
+
+criterion:
+  _name: wav2vec
+  infonce: true
+  log_keys: ["prob_perplexity","code_perplexity","temp"]
+  loss_weights: [0.1, 0]
+
+optimization:
+  max_update: 1000000
+  lr: [0.005]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-06
+  weight_decay: 0.01
+
+lr_scheduler:
+  _name: polynomial_decay
+  warmup_updates: 32000
+
+model:
+  _name: wav2vec2
+  quantize_targets: true
+  extractor_mode: layer_norm
+  layer_norm_first: true
+  final_dim: 768
+  latent_temp: [2.0,0.1,0.999995]
+  encoder_layerdrop: 0.00
+  dropout_input: 0.0
+  dropout_features: 0.0
+  dropout: 0.0
+  attention_dropout: 0.0
+  conv_bias: true
+
+  encoder_layers: 24
+  encoder_embed_dim: 1024
+  encoder_ffn_embed_dim: 4096
+  encoder_attention_heads: 16
+
+  feature_grad_mult: 1.0
diff --git a/fairseq/examples/wav2vec/libri_labels.py b/fairseq/examples/wav2vec/libri_labels.py
new file mode 100644
index 0000000000000000000000000000000000000000..694a202604c7a4a480550550679ce6c16bd10e42
--- /dev/null
+++ b/fairseq/examples/wav2vec/libri_labels.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset
+"""
+
+import argparse
+import os
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("tsv")
+    parser.add_argument("--output-dir", required=True)
+    parser.add_argument("--output-name", required=True)
+    args = parser.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    transcriptions = {}
+
+    with open(args.tsv, "r") as tsv, open(
+        os.path.join(args.output_dir, args.output_name + ".ltr"), "w"
+    ) as ltr_out, open(
+        os.path.join(args.output_dir, args.output_name + ".wrd"), "w"
+    ) as wrd_out:
+        root = next(tsv).strip()
+        for line in tsv:
+            line = line.strip()
+            dir = os.path.dirname(line)
+            if dir not in transcriptions:
+                parts = dir.split(os.path.sep)
+                trans_path = f"{parts[-2]}-{parts[-1]}.trans.txt"
+                path = os.path.join(root, dir, trans_path)
+                assert os.path.exists(path)
+                texts = {}
+                with open(path, "r") as trans_f:
+                    for tline in trans_f:
+                        items = tline.strip().split()
+                        texts[items[0]] = " ".join(items[1:])
+                transcriptions[dir] = texts
+            part = os.path.basename(line).split(".")[0]
+            assert part in transcriptions[dir]
+            print(transcriptions[dir][part], file=wrd_out)
+            print(
+                " ".join(list(transcriptions[dir][part].replace(" ", "|"))) + " |",
+                file=ltr_out,
+            )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/scripts/binarize_manifest.sh b/fairseq/examples/wav2vec/scripts/binarize_manifest.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6f201bdb524fad51a69d8c45889eaa1578efc62d
--- /dev/null
+++ b/fairseq/examples/wav2vec/scripts/binarize_manifest.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+# usage: bash binarize_manifest <dest_dir> <train_split> <valid_split>
+
+DEST_DIR=$1
+TRAIN_SPLIT=$2
+VALID_SPLIT=$3
+FAIRSEQ_ROOT=$4
+
+mkdir -p $DEST_DIR
+
+# split file path and lengths into separate files
+cut -f1 $TRAIN_SPLIT.tsv > $DEST_DIR/train_fnames.txt
+cut -f1 $VALID_SPLIT.tsv > $DEST_DIR/valid_fnames.txt
+cut -f2 $TRAIN_SPLIT.tsv > $DEST_DIR/train.lengths
+cut -f2 $VALID_SPLIT.tsv > $DEST_DIR/valid.lengths
+
+# copy root directory
+head -1 $TRAIN_SPLIT.tsv > $DEST_DIR/train.root
+head -1 $VALID_SPLIT.tsv > $DEST_DIR/valid.root
+
+# remove root directory
+sed -i '1d' $DEST_DIR/train_fnames.txt
+sed -i '1d' $DEST_DIR/valid_fnames.txt
+sed -i '1d' $DEST_DIR/train.lengths
+sed -i '1d' $DEST_DIR/valid.lengths
+
+# insert spaces between characters
+sed -i -e 's/\(.\)/\1 /g' $DEST_DIR/train_fnames.txt
+sed -i -e 's/\(.\)/\1 /g' $DEST_DIR/valid_fnames.txt
+
+# run preprocessor
+PYTHONPATH=$FAIRSEQ_ROOT python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $DEST_DIR/train_fnames.txt --validpref $DEST_DIR/valid_fnames.txt --workers 60 --only-source --destdir $DEST_DIR
diff --git a/fairseq/examples/wav2vec/unsupervised/README.md b/fairseq/examples/wav2vec/unsupervised/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0b213fd202d04bce2149936ec149c23c6d483745
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/README.md
@@ -0,0 +1,103 @@
+# wav2vec Unsupervised  (wav2vec-U)
+  
+Wav2vec Unsupervised (wav2vec-U) is a framework for building speech recognition systems without any labeled training data as described in [Unsupervised Speech Recognition (Baevski et al., 2021)](https://ai.facebook.com/research/publications/unsupervised-speech-recognition).  The model takes as input wav2vec 2.0 or XLSR representations (see [pretrained models](https://github.com/pytorch/fairseq/blob/main/examples/wav2vec)) as well as unlabeled speech and text data.
+  
+  The wav2vec-U training procedure consists of three consecutive main steps:
+* Preparation of speech representations and text data
+* Generative adversarial training (GAN)
+* Iterative self-training + Kaldi LM-decoding
+
+## Preparation of speech and text data
+Similar to [wav2vec 2.0](https://github.com/pytorch/fairseq/blob/main/examples/wav2vec/README.md),  data folders contain {train,valid,test}.{tsv,wrd,phn} files, where audio paths are stored in tsv files, and word, letter or phoneme transcriptions are stored in .{wrd,ltr,phn}.
+
+In **/path/to/data/with_silence** you need a *train.tsv* file as well as (optionally) *{valid,test}.{tsv,wrd,phn}*. It is nice to have *10h.{tsv,phn}* files there too for reproducing the ablation study on  layer selection. In **/path/to/data/without_silence** you have the same files, except *.tsv* files contain audios with silences removed using rVAD.
+
+Pre-requisites:
+* set FAIRSEQ_ROOT environmental variable to your fairseq installation
+* set RVAD_ROOT environmental variable to a checkout of [rVADfast](https://github.com/zhenghuatan/rVADfast)
+* set KENLM_ROOT environmental variable to the location of [KenLM](https://github.com/kpu/kenlm) binaries
+* install [PyKaldi](https://github.com/pykaldi/pykaldi) and set KALDI_ROOT environmental variable to the location of your kaldi installation. To use the version bundled with PyKaldi, you can use /path/to/pykaldi/tools/kaldi
+
+Create new audio files without silences:
+```shell
+# create a manifest file for the set original of audio files
+python $FAIRSEQ_ROOT/examples/wav2vec/wav2vec_manifest.py /dir/to/save/audio/files --ext wav --dest /path/to/new/train.tsv --valid-percent 0
+
+python scripts/vads.py -r $RVAD_ROOT < /path/to/train.tsv > train.vads
+
+python scripts/remove_silence.py --tsv /path/to/train.tsv --vads train.vads --out /dir/to/save/audio/files
+
+python $FAIRSEQ_ROOT/examples/wav2vec/wav2vec_manifest.py /dir/to/save/audio/files --ext wav --dest /path/to/new/train.tsv --valid-percent 0.01
+```
+
+Next, we need to preprocess the audio data to better match phonemized text data:
+
+```shell
+zsh scripts/prepare_audio.sh /dir/with/{train,test,valid}.tsv /output/dir /path/to/wav2vec2/model.pt 512 14
+```
+Note that if you have splits different than train/valid/test, you will need to modify this script. The last two arguments are the PCA dimensionality and the 0-based index of the layer from which to extract representations.
+
+Now we need to prepare text data:
+```shell
+zsh scripts/prepare_text.sh language /path/to/text/file /output/dir 1000 espeak /path/to/fasttext/lid/model
+```
+
+The fourth argument is minimum number observations of phones to keep. If your text corpus is small, you might want to reduce this number.
+
+The fifth argument is which phonemizer to use. Supported values are [espeak](http://espeak.sourceforge.net/), [espeak-ng](https://github.com/espeak-ng/espeak-ng), and [G2P](https://github.com/Kyubyong/g2p) (english only).
+
+Pre-trained fasttext LID models can be downloaded [here](https://fasttext.cc/docs/en/language-identification.html).
+
+### Prepare TIMIT data
+TIMIT transcripts include silence. Therefore VAD is not used for audio preprocessing, and we do not wrap transcripts with silences or insert random silence in between words.
+
+To prepare TIMIT data for both the matched an unmatched setup:
+```shell
+bash scripts/prepare_timit.sh /dir/to/timit/raw/data /output/dir /path/to/wav2vec2/model.pt
+```
+
+Note that we assume the TIMIT distribution with capitalized directories and filenames are used (e.g., `TRAIN/DR1/FCJF0/SA1.PHN`).
+
+## Generative adversarial training (GAN)
+
+We then use a GAN model to build a first unsupervised ASR model. The data preparation above of both speech features and text data is a necessary procedure that enables the generator to match speech to text in an unsupervised way. 
+
+Launching GAN training on top of preprocessed features, with default hyperparameters can be done with:
+
+```
+PREFIX=w2v_unsup_gan_xp
+TASK_DATA=/path/to/features/precompute_unfiltered_pca512_cls128_mean_pooled  
+TEXT_DATA=/path/to/data/phones  # path to fairseq-preprocessed GAN data (phones dir)
+KENLM_PATH=/path/to/data/phones/kenlm.phn.o4.bin  # KenLM 4-gram phoneme language model (LM data = GAN data here)
+
+PYTHONPATH=$FAIRSEQ_ROOT PREFIX=$PREFIX fairseq-hydra-train \
+    -m --config-dir config/gan \
+    --config-name w2vu \
+    task.data=${TASK_DATA} \
+    task.text_data=${TEXT_DATA} \
+    task.kenlm_path=${KENLM_PATH} \
+    common.user_dir=${FAIRSEQ_ROOT}/examples/wav2vec/unsupervised \
+    model.code_penalty=2,4 model.gradient_penalty=1.5,2.0 \
+    model.smoothness_weight=0.5,0.75,1.0 'common.seed=range(0,5)'
+```
+
+
+Once we find the best checkpoint (chosen using unsupervised metric that combined language model perplexity and vocabulary usage), we can use it to generate phone labels (or word labels with an appropriate kaldi WFST):
+
+```shell
+python w2vu_generate.py --config-dir config/generate --config-name viterbi \
+fairseq.common.user_dir=${FAIRSEQ_ROOT}/examples/wav2vec/unsupervised \
+fairseq.task.data=/path/to/dir/with/features \
+fairseq.common_eval.path=/path/to/gan/checkpoint \ 
+fairseq.dataset.gen_subset=valid results_path=/where/to/save/transcriptions
+```
+
+The decoding without LM works best on the same adjacent-mean-pooled features that the gan was trained on, while decoding with LM works better on features before the adjacent timestep mean-pooling step (without the "_pooled" suffix).
+
+## Iterative self-training + Kaldi LM-decoding
+After the GAN training provides a first unsupervised model, we can then progressively refine the quality of transcriptions using several iterations of semi-supervised learning. We perform two iterations: first, pseudo-label the training data with the unsupervised GAN model and train an HMM on the pseudo-labels. Second, we relabel the training data with the HMM and then fine-tune the original wav2vec 2.0 model using the HMM pseudo-labels with a CTC loss. Note that HMM models use phonemes as output, while wav2vec 2.0 use letter. Both are decoded using WFST decoders into words.
+
+
+Please see [this README](kaldi_self_train/README.md) for more instructions on how to do iterative self-training + Kaldi LM-decoding.
+
+*** Note: these instructions are a work in progress and will be updated over the next few days
diff --git a/fairseq/examples/wav2vec/unsupervised/__init__.py b/fairseq/examples/wav2vec/unsupervised/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/examples/wav2vec/unsupervised/config/finetuning/w2v_finetune.yaml b/fairseq/examples/wav2vec/unsupervised/config/finetuning/w2v_finetune.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..19a3ef34849d33836ec4084dcbd755c53879ba28
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/finetuning/w2v_finetune.yaml
@@ -0,0 +1,62 @@
+# @package _group_
+
+common:
+  fp16: true
+  log_format: json
+  log_interval: 200
+  tensorboard_logdir: tb
+
+checkpoint:
+  no_epoch_checkpoints: true
+  save_interval_updates: 20000
+
+task:
+  _name: audio_finetuning
+  data: ???
+  normalize: true
+  labels: ltr
+
+dataset:
+  num_workers: 6
+  max_tokens: 800000
+  skip_invalid_size_inputs_valid_test: true
+  train_subset: train
+  valid_subset: valid
+
+distributed_training:
+  ddp_backend: legacy_ddp
+  distributed_world_size: 8
+  find_unused_parameters: True
+
+criterion:
+  _name: ctc
+  zero_infinity: true
+  post_process: letter
+
+optimization:
+  max_update: 80000
+  lr: [0.00003]
+  sentence_avg: true
+  update_freq: [1]
+
+optimizer:
+  _name: adam
+  adam_betas: (0.9,0.98)
+  adam_eps: 1e-08
+
+lr_scheduler:
+  _name: tri_stage
+  phase_ratio: [0.1, 0.4, 0.5]
+  final_lr_scale: 0.05
+
+model:
+  _name: wav2vec_ctc
+  w2v_path: ???
+  apply_mask: true
+  mask_prob: 0.25
+  mask_channel_prob: 0.1
+  mask_channel_length: 64
+  layerdrop: 0.1
+  activation_dropout: 0.1
+  feature_grad_mult: 0.0
+  freeze_finetune_updates: 0
diff --git a/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu.yaml b/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..74f1829d1497560f6e1e006073f19716d36bc947
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/gan/w2vu.yaml
@@ -0,0 +1,115 @@
+# @package _group_
+
+common:
+  fp16: false
+  fp16_no_flatten_grads: true
+  log_format: json
+  log_interval: 100
+  tensorboard_logdir: tb
+  reset_logging: false
+  suppress_crashes: false
+
+checkpoint:
+  save_interval: 1000
+  save_interval_updates: 1000
+  no_epoch_checkpoints: true
+  best_checkpoint_metric: weighted_lm_ppl
+  save_dir: .
+
+distributed_training:
+  distributed_world_size: 1
+
+task:
+  _name: unpaired_audio_text
+  data: ???
+  text_data: ???
+  labels: phn
+  sort_by_length: false
+  unfiltered: false
+  max_length: null
+  append_eos: false
+  kenlm_path: ???
+
+dataset:
+  num_workers: 6
+  batch_size: 160
+  skip_invalid_size_inputs_valid_test: true
+  valid_subset: valid
+  validate_interval: 1000
+  validate_interval_updates: 1000
+
+criterion:
+  _name: model
+  log_keys:
+    - accuracy_dense
+    - accuracy_token
+    - temp
+    - code_ppl
+
+optimization:
+  max_update: 150000
+  clip_norm: 5.0
+  lr: [0]
+
+optimizer:
+  _name: composite
+  groups:
+    generator:
+      lr: [0.0004]
+      lr_float: null
+      optimizer:
+        _name: adam
+        adam_betas: [0.5,0.98]
+        adam_eps: 1e-06
+        weight_decay: 0
+        amsgrad: false
+      lr_scheduler:
+        _name: fixed
+        warmup_updates: 0
+    discriminator:
+      lr: [ 0.0005 ]
+      lr_float: null
+      optimizer:
+        _name: adam
+        adam_betas: [0.5,0.98]
+        adam_eps: 1e-06
+        weight_decay: 0.0001
+        amsgrad: false
+      lr_scheduler:
+        _name: fixed
+        warmup_updates: 0
+
+lr_scheduler: pass_through
+
+model:
+  _name: wav2vec_u
+
+  discriminator_dim: 384
+  discriminator_depth: 2
+  discriminator_kernel: 6
+  discriminator_linear_emb: false
+  discriminator_causal: true
+  discriminator_max_pool: false
+  discriminator_act_after_linear: false
+  discriminator_dropout: 0.0
+  discriminator_weight_norm: false
+
+  generator_stride: 1
+  generator_kernel: 4
+  generator_bias: false
+  generator_dropout: 0.1
+
+  smoothness_weight: 0.5
+  smoothing: 0
+  smoothing_one_sided: false
+  gumbel: false
+  hard_gumbel: false
+  gradient_penalty: 1.5
+  code_penalty: 4.0
+  temp: [ 2,0.1,0.99995 ]
+  input_dim: 512
+
+  segmentation:
+    type: JOIN
+    mean_pool_join: false
+    remove_zeros: false
diff --git a/fairseq/examples/wav2vec/unsupervised/config/generate/viterbi.yaml b/fairseq/examples/wav2vec/unsupervised/config/generate/viterbi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c88beebcb15f9047195c8c7e79c21eac59db418
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/generate/viterbi.yaml
@@ -0,0 +1,21 @@
+# @package _group_
+
+fairseq:
+  task:
+    _name: unpaired_audio_text
+    labels: phn
+    data: ???
+    sort_by_length: false
+    shuffle: false
+    text_data: ''
+
+  common_eval:
+    path: ???
+    quiet: true
+
+  dataset:
+    gen_subset: valid
+    batch_size: 1
+
+w2l_decoder: VITERBI
+post_process: silence
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/test.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/test.uid
new file mode 100644
index 0000000000000000000000000000000000000000..401008246a1bc2cbf309d9d0aa56710f0ff643bc
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/test.uid
@@ -0,0 +1,192 @@
+FDHC0_SI1559
+FDHC0_SI2189
+FDHC0_SI929
+FDHC0_SX119
+FDHC0_SX209
+FDHC0_SX29
+FDHC0_SX299
+FDHC0_SX389
+FELC0_SI1386
+FELC0_SI2016
+FELC0_SI756
+FELC0_SX126
+FELC0_SX216
+FELC0_SX306
+FELC0_SX36
+FELC0_SX396
+FJLM0_SI1043
+FJLM0_SI1673
+FJLM0_SI2303
+FJLM0_SX143
+FJLM0_SX233
+FJLM0_SX323
+FJLM0_SX413
+FJLM0_SX53
+FMGD0_SI1564
+FMGD0_SI2194
+FMGD0_SI934
+FMGD0_SX124
+FMGD0_SX214
+FMGD0_SX304
+FMGD0_SX34
+FMGD0_SX394
+FMLD0_SI2185
+FMLD0_SI822
+FMLD0_SI925
+FMLD0_SX115
+FMLD0_SX205
+FMLD0_SX25
+FMLD0_SX295
+FMLD0_SX385
+FNLP0_SI1308
+FNLP0_SI1938
+FNLP0_SI678
+FNLP0_SX138
+FNLP0_SX228
+FNLP0_SX318
+FNLP0_SX408
+FNLP0_SX48
+FPAS0_SI1272
+FPAS0_SI2204
+FPAS0_SI944
+FPAS0_SX134
+FPAS0_SX224
+FPAS0_SX314
+FPAS0_SX404
+FPAS0_SX44
+FPKT0_SI1538
+FPKT0_SI2168
+FPKT0_SI908
+FPKT0_SX188
+FPKT0_SX278
+FPKT0_SX368
+FPKT0_SX8
+FPKT0_SX98
+MBPM0_SI1577
+MBPM0_SI1584
+MBPM0_SI947
+MBPM0_SX137
+MBPM0_SX227
+MBPM0_SX317
+MBPM0_SX407
+MBPM0_SX47
+MCMJ0_SI1094
+MCMJ0_SI464
+MCMJ0_SI602
+MCMJ0_SX104
+MCMJ0_SX14
+MCMJ0_SX194
+MCMJ0_SX284
+MCMJ0_SX374
+MDAB0_SI1039
+MDAB0_SI1669
+MDAB0_SI2299
+MDAB0_SX139
+MDAB0_SX229
+MDAB0_SX319
+MDAB0_SX409
+MDAB0_SX49
+MGRT0_SI1450
+MGRT0_SI2080
+MGRT0_SI820
+MGRT0_SX10
+MGRT0_SX100
+MGRT0_SX190
+MGRT0_SX280
+MGRT0_SX370
+MJDH0_SI1354
+MJDH0_SI1984
+MJDH0_SI724
+MJDH0_SX184
+MJDH0_SX274
+MJDH0_SX364
+MJDH0_SX4
+MJDH0_SX94
+MJLN0_SI1449
+MJLN0_SI2079
+MJLN0_SI819
+MJLN0_SX189
+MJLN0_SX279
+MJLN0_SX369
+MJLN0_SX9
+MJLN0_SX99
+MJMP0_SI1535
+MJMP0_SI1791
+MJMP0_SI905
+MJMP0_SX185
+MJMP0_SX275
+MJMP0_SX365
+MJMP0_SX5
+MJMP0_SX95
+MKLT0_SI1213
+MKLT0_SI1843
+MKLT0_SI583
+MKLT0_SX133
+MKLT0_SX223
+MKLT0_SX313
+MKLT0_SX403
+MKLT0_SX43
+MLLL0_SI1363
+MLLL0_SI1993
+MLLL0_SI733
+MLLL0_SX103
+MLLL0_SX13
+MLLL0_SX193
+MLLL0_SX283
+MLLL0_SX373
+MLNT0_SI1574
+MLNT0_SI1902
+MLNT0_SI642
+MLNT0_SX102
+MLNT0_SX12
+MLNT0_SX192
+MLNT0_SX282
+MLNT0_SX372
+MNJM0_SI1580
+MNJM0_SI2210
+MNJM0_SI950
+MNJM0_SX140
+MNJM0_SX230
+MNJM0_SX320
+MNJM0_SX410
+MNJM0_SX50
+MPAM0_SI1189
+MPAM0_SI1819
+MPAM0_SI1961
+MPAM0_SX109
+MPAM0_SX19
+MPAM0_SX199
+MPAM0_SX289
+MPAM0_SX379
+MTAS1_SI1473
+MTAS1_SI2098
+MTAS1_SI838
+MTAS1_SX118
+MTAS1_SX208
+MTAS1_SX28
+MTAS1_SX298
+MTAS1_SX388
+MTLS0_SI1370
+MTLS0_SI2000
+MTLS0_SI740
+MTLS0_SX110
+MTLS0_SX20
+MTLS0_SX200
+MTLS0_SX290
+MTLS0_SX380
+MWBT0_SI1553
+MWBT0_SI2183
+MWBT0_SI923
+MWBT0_SX113
+MWBT0_SX203
+MWBT0_SX23
+MWBT0_SX293
+MWBT0_SX383
+MWEW0_SI1361
+MWEW0_SI1991
+MWEW0_SI731
+MWEW0_SX101
+MWEW0_SX11
+MWEW0_SX191
+MWEW0_SX281
+MWEW0_SX371
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train.uid
new file mode 100644
index 0000000000000000000000000000000000000000..c39fd0b91d51e0ae15caf1e9701d0d9ef51ee21b
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train.uid
@@ -0,0 +1,3696 @@
+FAEM0_SI1392
+FAEM0_SI2022
+FAEM0_SI762
+FAEM0_SX132
+FAEM0_SX222
+FAEM0_SX312
+FAEM0_SX402
+FAEM0_SX42
+FAJW0_SI1263
+FAJW0_SI1893
+FAJW0_SI633
+FAJW0_SX183
+FAJW0_SX273
+FAJW0_SX3
+FAJW0_SX363
+FAJW0_SX93
+FALK0_SI1086
+FALK0_SI456
+FALK0_SI658
+FALK0_SX186
+FALK0_SX276
+FALK0_SX366
+FALK0_SX6
+FALK0_SX96
+FALR0_SI1325
+FALR0_SI1955
+FALR0_SI695
+FALR0_SX155
+FALR0_SX245
+FALR0_SX335
+FALR0_SX425
+FALR0_SX65
+FAPB0_SI1063
+FAPB0_SI1693
+FAPB0_SI2323
+FAPB0_SX163
+FAPB0_SX253
+FAPB0_SX343
+FAPB0_SX433
+FAPB0_SX73
+FBAS0_SI1387
+FBAS0_SI1472
+FBAS0_SI2066
+FBAS0_SX127
+FBAS0_SX217
+FBAS0_SX307
+FBAS0_SX37
+FBAS0_SX397
+FBCG1_SI1612
+FBCG1_SI2242
+FBCG1_SI982
+FBCG1_SX172
+FBCG1_SX262
+FBCG1_SX352
+FBCG1_SX442
+FBCG1_SX82
+FBCH0_SI1586
+FBCH0_SI956
+FBCH0_SI959
+FBCH0_SX146
+FBCH0_SX236
+FBCH0_SX326
+FBCH0_SX416
+FBCH0_SX56
+FBJL0_SI1552
+FBJL0_SI2182
+FBJL0_SI922
+FBJL0_SX112
+FBJL0_SX202
+FBJL0_SX22
+FBJL0_SX292
+FBJL0_SX382
+FBLV0_SI1058
+FBLV0_SI1688
+FBLV0_SI2318
+FBLV0_SX158
+FBLV0_SX248
+FBLV0_SX338
+FBLV0_SX428
+FBLV0_SX68
+FBMH0_SI1136
+FBMH0_SI1766
+FBMH0_SI970
+FBMH0_SX146
+FBMH0_SX236
+FBMH0_SX326
+FBMH0_SX416
+FBMH0_SX56
+FBMJ0_SI1776
+FBMJ0_SI516
+FBMJ0_SI815
+FBMJ0_SX156
+FBMJ0_SX246
+FBMJ0_SX336
+FBMJ0_SX426
+FBMJ0_SX66
+FCAG0_SI1503
+FCAG0_SI1641
+FCAG0_SI2133
+FCAG0_SX153
+FCAG0_SX243
+FCAG0_SX333
+FCAG0_SX423
+FCAG0_SX63
+FCAJ0_SI1479
+FCAJ0_SI1804
+FCAJ0_SI849
+FCAJ0_SX129
+FCAJ0_SX219
+FCAJ0_SX309
+FCAJ0_SX39
+FCAJ0_SX399
+FCDR1_SI1186
+FCDR1_SI1816
+FCDR1_SI556
+FCDR1_SX106
+FCDR1_SX16
+FCDR1_SX196
+FCDR1_SX286
+FCDR1_SX376
+FCEG0_SI1248
+FCEG0_SI1878
+FCEG0_SI618
+FCEG0_SX168
+FCEG0_SX258
+FCEG0_SX348
+FCEG0_SX438
+FCEG0_SX78
+FCJF0_SI1027
+FCJF0_SI1657
+FCJF0_SI648
+FCJF0_SX127
+FCJF0_SX217
+FCJF0_SX307
+FCJF0_SX37
+FCJF0_SX397
+FCJS0_SI1607
+FCJS0_SI2237
+FCJS0_SI977
+FCJS0_SX167
+FCJS0_SX257
+FCJS0_SX347
+FCJS0_SX437
+FCJS0_SX77
+FCKE0_SI1111
+FCKE0_SI1741
+FCKE0_SI481
+FCKE0_SX121
+FCKE0_SX211
+FCKE0_SX301
+FCKE0_SX31
+FCKE0_SX391
+FCLT0_SI1438
+FCLT0_SI2068
+FCLT0_SI808
+FCLT0_SX178
+FCLT0_SX268
+FCLT0_SX358
+FCLT0_SX448
+FCLT0_SX88
+FCMG0_SI1142
+FCMG0_SI1242
+FCMG0_SI1872
+FCMG0_SX162
+FCMG0_SX252
+FCMG0_SX342
+FCMG0_SX432
+FCMG0_SX72
+FCMM0_SI1083
+FCMM0_SI1957
+FCMM0_SI453
+FCMM0_SX183
+FCMM0_SX273
+FCMM0_SX363
+FCMM0_SX420
+FCMM0_SX93
+FCRZ0_SI1913
+FCRZ0_SI2053
+FCRZ0_SI793
+FCRZ0_SX163
+FCRZ0_SX253
+FCRZ0_SX343
+FCRZ0_SX433
+FCRZ0_SX73
+FCYL0_SI1297
+FCYL0_SI1927
+FCYL0_SI667
+FCYL0_SX127
+FCYL0_SX217
+FCYL0_SX349
+FCYL0_SX37
+FCYL0_SX397
+FDAS1_SI1461
+FDAS1_SI2091
+FDAS1_SI831
+FDAS1_SX111
+FDAS1_SX201
+FDAS1_SX21
+FDAS1_SX291
+FDAS1_SX381
+FDAW0_SI1271
+FDAW0_SI1406
+FDAW0_SI2036
+FDAW0_SX146
+FDAW0_SX236
+FDAW0_SX326
+FDAW0_SX416
+FDAW0_SX56
+FDFB0_SI1318
+FDFB0_SI1948
+FDFB0_SI2010
+FDFB0_SX148
+FDFB0_SX238
+FDFB0_SX328
+FDFB0_SX418
+FDFB0_SX58
+FDJH0_SI1565
+FDJH0_SI2195
+FDJH0_SI935
+FDJH0_SX125
+FDJH0_SX215
+FDJH0_SX305
+FDJH0_SX35
+FDJH0_SX395
+FDKN0_SI1081
+FDKN0_SI1202
+FDKN0_SI1711
+FDKN0_SX181
+FDKN0_SX271
+FDKN0_SX361
+FDKN0_SX451
+FDKN0_SX91
+FDML0_SI1149
+FDML0_SI1779
+FDML0_SI2075
+FDML0_SX159
+FDML0_SX249
+FDML0_SX339
+FDML0_SX429
+FDML0_SX69
+FDMY0_SI1197
+FDMY0_SI567
+FDMY0_SI714
+FDMY0_SX117
+FDMY0_SX207
+FDMY0_SX27
+FDMY0_SX297
+FDMY0_SX387
+FDNC0_SI1278
+FDNC0_SI1908
+FDNC0_SI2287
+FDNC0_SX108
+FDNC0_SX18
+FDNC0_SX198
+FDNC0_SX288
+FDNC0_SX378
+FDTD0_SI1561
+FDTD0_SI2191
+FDTD0_SI931
+FDTD0_SX121
+FDTD0_SX211
+FDTD0_SX301
+FDTD0_SX321
+FDTD0_SX391
+FDXW0_SI1511
+FDXW0_SI2141
+FDXW0_SI881
+FDXW0_SX161
+FDXW0_SX251
+FDXW0_SX341
+FDXW0_SX431
+FDXW0_SX71
+FEAC0_SI1245
+FEAC0_SI1875
+FEAC0_SI615
+FEAC0_SX165
+FEAC0_SX255
+FEAC0_SX345
+FEAC0_SX435
+FEAC0_SX75
+FEAR0_SI1252
+FEAR0_SI1882
+FEAR0_SI622
+FEAR0_SX172
+FEAR0_SX262
+FEAR0_SX352
+FEAR0_SX442
+FEAR0_SX82
+FECD0_SI1418
+FECD0_SI2048
+FECD0_SI788
+FECD0_SX158
+FECD0_SX248
+FECD0_SX338
+FECD0_SX428
+FECD0_SX68
+FEEH0_SI1112
+FEEH0_SI1742
+FEEH0_SI471
+FEEH0_SX122
+FEEH0_SX212
+FEEH0_SX302
+FEEH0_SX32
+FEEH0_SX392
+FEME0_SI1505
+FEME0_SI2135
+FEME0_SI875
+FEME0_SX155
+FEME0_SX245
+FEME0_SX335
+FEME0_SX425
+FEME0_SX65
+FETB0_SI1148
+FETB0_SI1778
+FETB0_SI518
+FETB0_SX158
+FETB0_SX248
+FETB0_SX338
+FETB0_SX428
+FETB0_SX68
+FEXM0_SI1101
+FEXM0_SI1731
+FEXM0_SI482
+FEXM0_SX111
+FEXM0_SX201
+FEXM0_SX291
+FEXM0_SX366
+FEXM0_SX381
+FGCS0_SI1486
+FGCS0_SI2116
+FGCS0_SI856
+FGCS0_SX136
+FGCS0_SX226
+FGCS0_SX316
+FGCS0_SX406
+FGCS0_SX46
+FGDP0_SI1618
+FGDP0_SI2248
+FGDP0_SI988
+FGDP0_SX178
+FGDP0_SX268
+FGDP0_SX358
+FGDP0_SX448
+FGDP0_SX88
+FGMB0_SI1145
+FGMB0_SI1775
+FGMB0_SI515
+FGMB0_SX155
+FGMB0_SX245
+FGMB0_SX335
+FGMB0_SX425
+FGMB0_SX65
+FGRW0_SI1152
+FGRW0_SI1782
+FGRW0_SI1990
+FGRW0_SX162
+FGRW0_SX252
+FGRW0_SX342
+FGRW0_SX432
+FGRW0_SX72
+FHLM0_SI1560
+FHLM0_SI2190
+FHLM0_SI930
+FHLM0_SX120
+FHLM0_SX210
+FHLM0_SX300
+FHLM0_SX349
+FHLM0_SX390
+FHXS0_SI1075
+FHXS0_SI2302
+FHXS0_SI2335
+FHXS0_SX175
+FHXS0_SX265
+FHXS0_SX355
+FHXS0_SX445
+FHXS0_SX85
+FJDM2_SI1582
+FJDM2_SI1964
+FJDM2_SI2212
+FJDM2_SX142
+FJDM2_SX232
+FJDM2_SX322
+FJDM2_SX412
+FJDM2_SX52
+FJEN0_SI1047
+FJEN0_SI1677
+FJEN0_SI2307
+FJEN0_SX147
+FJEN0_SX237
+FJEN0_SX327
+FJEN0_SX417
+FJEN0_SX57
+FJHK0_SI1022
+FJHK0_SI1652
+FJHK0_SI2282
+FJHK0_SX122
+FJHK0_SX212
+FJHK0_SX302
+FJHK0_SX32
+FJHK0_SX392
+FJKL0_SI1562
+FJKL0_SI2192
+FJKL0_SI932
+FJKL0_SX122
+FJKL0_SX212
+FJKL0_SX302
+FJKL0_SX32
+FJKL0_SX392
+FJLG0_SI1506
+FJLG0_SI1889
+FJLG0_SI2306
+FJLG0_SX179
+FJLG0_SX269
+FJLG0_SX359
+FJLG0_SX449
+FJLG0_SX89
+FJLR0_SI1231
+FJLR0_SI1861
+FJLR0_SI601
+FJLR0_SX151
+FJLR0_SX241
+FJLR0_SX331
+FJLR0_SX421
+FJLR0_SX61
+FJRB0_SI1302
+FJRB0_SI1932
+FJRB0_SI672
+FJRB0_SX132
+FJRB0_SX222
+FJRB0_SX312
+FJRB0_SX402
+FJRB0_SX42
+FJRP1_SI1432
+FJRP1_SI2062
+FJRP1_SI802
+FJRP1_SX172
+FJRP1_SX262
+FJRP1_SX352
+FJRP1_SX442
+FJRP1_SX82
+FJSK0_SI1052
+FJSK0_SI1682
+FJSK0_SI2312
+FJSK0_SX152
+FJSK0_SX242
+FJSK0_SX332
+FJSK0_SX422
+FJSK0_SX62
+FJSP0_SI1434
+FJSP0_SI1763
+FJSP0_SI804
+FJSP0_SX174
+FJSP0_SX264
+FJSP0_SX354
+FJSP0_SX444
+FJSP0_SX84
+FJWB1_SI2055
+FJWB1_SI748
+FJWB1_SI795
+FJWB1_SX165
+FJWB1_SX255
+FJWB1_SX345
+FJWB1_SX435
+FJWB1_SX75
+FJXM0_SI1211
+FJXM0_SI1971
+FJXM0_SI581
+FJXM0_SX131
+FJXM0_SX221
+FJXM0_SX311
+FJXM0_SX401
+FJXM0_SX41
+FJXP0_SI1122
+FJXP0_SI1752
+FJXP0_SI492
+FJXP0_SX132
+FJXP0_SX222
+FJXP0_SX312
+FJXP0_SX402
+FJXP0_SX42
+FKAA0_SI1208
+FKAA0_SI1838
+FKAA0_SI578
+FKAA0_SX128
+FKAA0_SX218
+FKAA0_SX308
+FKAA0_SX38
+FKAA0_SX398
+FKDE0_SI1141
+FKDE0_SI1771
+FKDE0_SI2221
+FKDE0_SX151
+FKDE0_SX241
+FKDE0_SX331
+FKDE0_SX421
+FKDE0_SX61
+FKDW0_SI1207
+FKDW0_SI1891
+FKDW0_SI577
+FKDW0_SX127
+FKDW0_SX217
+FKDW0_SX307
+FKDW0_SX37
+FKDW0_SX397
+FKFB0_SI1608
+FKFB0_SI2238
+FKFB0_SI978
+FKFB0_SX168
+FKFB0_SX258
+FKFB0_SX348
+FKFB0_SX438
+FKFB0_SX78
+FKKH0_SI1290
+FKKH0_SI1920
+FKKH0_SI660
+FKKH0_SX120
+FKKH0_SX210
+FKKH0_SX30
+FKKH0_SX300
+FKKH0_SX390
+FKLC0_SI1615
+FKLC0_SI2245
+FKLC0_SI985
+FKLC0_SX175
+FKLC0_SX265
+FKLC0_SX355
+FKLC0_SX445
+FKLC0_SX85
+FKLC1_SI1048
+FKLC1_SI1678
+FKLC1_SI2308
+FKLC1_SX148
+FKLC1_SX238
+FKLC1_SX328
+FKLC1_SX418
+FKLC1_SX58
+FKLH0_SI1257
+FKLH0_SI1887
+FKLH0_SI627
+FKLH0_SX177
+FKLH0_SX267
+FKLH0_SX357
+FKLH0_SX447
+FKLH0_SX87
+FKSR0_SI1117
+FKSR0_SI1747
+FKSR0_SI487
+FKSR0_SX161
+FKSR0_SX217
+FKSR0_SX366
+FKSR0_SX37
+FKSR0_SX397
+FLAC0_SI1339
+FLAC0_SI2161
+FLAC0_SI901
+FLAC0_SX181
+FLAC0_SX271
+FLAC0_SX361
+FLAC0_SX451
+FLAC0_SX91
+FLAG0_SI1464
+FLAG0_SI2094
+FLAG0_SI834
+FLAG0_SX114
+FLAG0_SX204
+FLAG0_SX24
+FLAG0_SX294
+FLAG0_SX384
+FLEH0_SI1051
+FLEH0_SI1681
+FLEH0_SI2311
+FLEH0_SX151
+FLEH0_SX241
+FLEH0_SX331
+FLEH0_SX421
+FLEH0_SX61
+FLET0_SI1137
+FLET0_SI1767
+FLET0_SI507
+FLET0_SX147
+FLET0_SX237
+FLET0_SX277
+FLET0_SX417
+FLET0_SX57
+FLHD0_SI1344
+FLHD0_SI1827
+FLHD0_SI1974
+FLHD0_SX174
+FLHD0_SX264
+FLHD0_SX354
+FLHD0_SX444
+FLHD0_SX84
+FLJA0_SI1078
+FLJA0_SI1708
+FLJA0_SI2338
+FLJA0_SX178
+FLJA0_SX268
+FLJA0_SX358
+FLJA0_SX448
+FLJA0_SX88
+FLJD0_SI1516
+FLJD0_SI2146
+FLJD0_SI886
+FLJD0_SX166
+FLJD0_SX256
+FLJD0_SX346
+FLJD0_SX436
+FLJD0_SX76
+FLJG0_SI1611
+FLJG0_SI2241
+FLJG0_SI981
+FLJG0_SX171
+FLJG0_SX261
+FLJG0_SX351
+FLJG0_SX441
+FLJG0_SX81
+FLKM0_SI1880
+FLKM0_SI620
+FLKM0_SI686
+FLKM0_SX116
+FLKM0_SX260
+FLKM0_SX350
+FLKM0_SX440
+FLKM0_SX80
+FLMA0_SI1243
+FLMA0_SI1873
+FLMA0_SI613
+FLMA0_SX163
+FLMA0_SX253
+FLMA0_SX343
+FLMA0_SX433
+FLMA0_SX73
+FLMC0_SI1372
+FLMC0_SI2002
+FLMC0_SI742
+FLMC0_SX112
+FLMC0_SX22
+FLMC0_SX292
+FLMC0_SX336
+FLMC0_SX382
+FLMK0_SI1035
+FLMK0_SI1229
+FLMK0_SI2295
+FLMK0_SX135
+FLMK0_SX225
+FLMK0_SX315
+FLMK0_SX405
+FLMK0_SX45
+FLOD0_SI1287
+FLOD0_SI1917
+FLOD0_SI657
+FLOD0_SX117
+FLOD0_SX171
+FLOD0_SX207
+FLOD0_SX297
+FLOD0_SX387
+FLTM0_SI1070
+FLTM0_SI1700
+FLTM0_SI2330
+FLTM0_SX170
+FLTM0_SX260
+FLTM0_SX350
+FLTM0_SX440
+FLTM0_SX80
+FMAH1_SI1509
+FMAH1_SI2139
+FMAH1_SI879
+FMAH1_SX159
+FMAH1_SX249
+FMAH1_SX339
+FMAH1_SX429
+FMAH1_SX69
+FMBG0_SI1160
+FMBG0_SI1790
+FMBG0_SI2264
+FMBG0_SX260
+FMBG0_SX3
+FMBG0_SX350
+FMBG0_SX440
+FMBG0_SX80
+FMEM0_SI1377
+FMEM0_SI2007
+FMEM0_SI747
+FMEM0_SX117
+FMEM0_SX207
+FMEM0_SX297
+FMEM0_SX333
+FMEM0_SX387
+FMJB0_SI1177
+FMJB0_SI1807
+FMJB0_SI547
+FMJB0_SX187
+FMJB0_SX277
+FMJB0_SX367
+FMJB0_SX7
+FMJB0_SX97
+FMJF0_SI1254
+FMJF0_SI1884
+FMJF0_SI624
+FMJF0_SX174
+FMJF0_SX264
+FMJF0_SX354
+FMJF0_SX444
+FMJF0_SX84
+FMJU0_SI1389
+FMJU0_SI2019
+FMJU0_SI759
+FMJU0_SX129
+FMJU0_SX219
+FMJU0_SX309
+FMJU0_SX39
+FMJU0_SX399
+FMKC0_SI1041
+FMKC0_SI1072
+FMKC0_SI1702
+FMKC0_SX172
+FMKC0_SX262
+FMKC0_SX352
+FMKC0_SX442
+FMKC0_SX82
+FMKF0_SI1018
+FMKF0_SI1536
+FMKF0_SI906
+FMKF0_SX186
+FMKF0_SX276
+FMKF0_SX366
+FMKF0_SX6
+FMKF0_SX96
+FMMH0_SI1537
+FMMH0_SI2167
+FMMH0_SI907
+FMMH0_SX187
+FMMH0_SX367
+FMMH0_SX420
+FMMH0_SX7
+FMMH0_SX97
+FMPG0_SI1602
+FMPG0_SI2232
+FMPG0_SI972
+FMPG0_SX162
+FMPG0_SX252
+FMPG0_SX342
+FMPG0_SX432
+FMPG0_SX72
+FNKL0_SI1522
+FNKL0_SI2152
+FNKL0_SI892
+FNKL0_SX172
+FNKL0_SX196
+FNKL0_SX262
+FNKL0_SX442
+FNKL0_SX82
+FNTB0_SI1203
+FNTB0_SI573
+FNTB0_SI679
+FNTB0_SX123
+FNTB0_SX213
+FNTB0_SX303
+FNTB0_SX33
+FNTB0_SX393
+FPAB1_SI1471
+FPAB1_SI2101
+FPAB1_SI841
+FPAB1_SX121
+FPAB1_SX211
+FPAB1_SX301
+FPAB1_SX31
+FPAB1_SX391
+FPAC0_SI1921
+FPAC0_SI2011
+FPAC0_SI661
+FPAC0_SX121
+FPAC0_SX211
+FPAC0_SX301
+FPAC0_SX31
+FPAC0_SX391
+FPAD0_SI1346
+FPAD0_SI1976
+FPAD0_SI716
+FPAD0_SX176
+FPAD0_SX266
+FPAD0_SX356
+FPAD0_SX446
+FPAD0_SX86
+FPAF0_SI1054
+FPAF0_SI1684
+FPAF0_SI2314
+FPAF0_SX154
+FPAF0_SX244
+FPAF0_SX334
+FPAF0_SX424
+FPAF0_SX64
+FPAZ0_SI1593
+FPAZ0_SI2223
+FPAZ0_SI963
+FPAZ0_SX153
+FPAZ0_SX243
+FPAZ0_SX27
+FPAZ0_SX423
+FPAZ0_SX63
+FPJF0_SI1046
+FPJF0_SI1259
+FPJF0_SI1676
+FPJF0_SX146
+FPJF0_SX236
+FPJF0_SX326
+FPJF0_SX352
+FPJF0_SX56
+FPLS0_SI1590
+FPLS0_SI2220
+FPLS0_SI960
+FPLS0_SX150
+FPLS0_SX240
+FPLS0_SX3
+FPLS0_SX330
+FPLS0_SX60
+FPMY0_SI1153
+FPMY0_SI1783
+FPMY0_SI523
+FPMY0_SX163
+FPMY0_SX196
+FPMY0_SX253
+FPMY0_SX343
+FPMY0_SX73
+FREH0_SI1315
+FREH0_SI1945
+FREH0_SI685
+FREH0_SX145
+FREH0_SX235
+FREH0_SX325
+FREH0_SX415
+FREH0_SX55
+FRJB0_SI1427
+FRJB0_SI1470
+FRJB0_SI1794
+FRJB0_SX167
+FRJB0_SX257
+FRJB0_SX347
+FRJB0_SX437
+FRJB0_SX77
+FRLL0_SI1514
+FRLL0_SI805
+FRLL0_SI884
+FRLL0_SX164
+FRLL0_SX254
+FRLL0_SX344
+FRLL0_SX434
+FRLL0_SX74
+FSAG0_SI1323
+FSAG0_SI1953
+FSAG0_SI693
+FSAG0_SX153
+FSAG0_SX243
+FSAG0_SX333
+FSAG0_SX423
+FSAG0_SX63
+FSAH0_SI1244
+FSAH0_SI1874
+FSAH0_SI614
+FSAH0_SX164
+FSAH0_SX327
+FSAH0_SX344
+FSAH0_SX434
+FSAH0_SX74
+FSAK0_SI1300
+FSAK0_SI1930
+FSAK0_SI670
+FSAK0_SX130
+FSAK0_SX220
+FSAK0_SX310
+FSAK0_SX40
+FSAK0_SX400
+FSBK0_SI1069
+FSBK0_SI1699
+FSBK0_SI2329
+FSBK0_SX169
+FSBK0_SX259
+FSBK0_SX349
+FSBK0_SX439
+FSBK0_SX79
+FSCN0_SI1886
+FSCN0_SI626
+FSCN0_SI705
+FSCN0_SX176
+FSCN0_SX266
+FSCN0_SX356
+FSCN0_SX446
+FSCN0_SX86
+FSDC0_SI1312
+FSDC0_SI1942
+FSDC0_SI2234
+FSDC0_SX142
+FSDC0_SX232
+FSDC0_SX322
+FSDC0_SX412
+FSDC0_SX52
+FSDJ0_SI1115
+FSDJ0_SI1745
+FSDJ0_SI485
+FSDJ0_SX125
+FSDJ0_SX215
+FSDJ0_SX305
+FSDJ0_SX35
+FSDJ0_SX395
+FSGF0_SI1557
+FSGF0_SI2187
+FSGF0_SI927
+FSGF0_SX117
+FSGF0_SX207
+FSGF0_SX27
+FSGF0_SX297
+FSGF0_SX387
+FSJG0_SI1570
+FSJG0_SI2200
+FSJG0_SI940
+FSJG0_SX130
+FSJG0_SX220
+FSJG0_SX310
+FSJG0_SX40
+FSJG0_SX400
+FSJK1_SI1025
+FSJK1_SI2285
+FSJK1_SI696
+FSJK1_SX125
+FSJK1_SX215
+FSJK1_SX305
+FSJK1_SX35
+FSJK1_SX395
+FSJS0_SI1171
+FSJS0_SI1801
+FSJS0_SI541
+FSJS0_SX181
+FSJS0_SX271
+FSJS0_SX361
+FSJS0_SX451
+FSJS0_SX91
+FSJW0_SI1333
+FSJW0_SI1963
+FSJW0_SI703
+FSJW0_SX163
+FSJW0_SX253
+FSJW0_SX343
+FSJW0_SX433
+FSJW0_SX73
+FSKC0_SI1416
+FSKC0_SI2046
+FSKC0_SI786
+FSKC0_SX156
+FSKC0_SX246
+FSKC0_SX336
+FSKC0_SX426
+FSKC0_SX66
+FSKL0_SI1529
+FSKL0_SI2159
+FSKL0_SI899
+FSKL0_SX179
+FSKL0_SX269
+FSKL0_SX359
+FSKL0_SX449
+FSKL0_SX89
+FSKP0_SI1098
+FSKP0_SI1728
+FSKP0_SI468
+FSKP0_SX108
+FSKP0_SX18
+FSKP0_SX198
+FSKP0_SX288
+FSKP0_SX378
+FSLS0_SI1056
+FSLS0_SI1686
+FSLS0_SI2316
+FSLS0_SX156
+FSLS0_SX202
+FSLS0_SX246
+FSLS0_SX426
+FSLS0_SX66
+FSMA0_SI1621
+FSMA0_SI2251
+FSMA0_SI991
+FSMA0_SX181
+FSMA0_SX271
+FSMA0_SX361
+FSMA0_SX451
+FSMA0_SX91
+FSMM0_SI1314
+FSMM0_SI1944
+FSMM0_SI684
+FSMM0_SX144
+FSMM0_SX234
+FSMM0_SX324
+FSMM0_SX414
+FSMM0_SX54
+FSMS1_SI1504
+FSMS1_SI2134
+FSMS1_SI874
+FSMS1_SX154
+FSMS1_SX244
+FSMS1_SX334
+FSMS1_SX347
+FSMS1_SX64
+FSPM0_SI1241
+FSPM0_SI1871
+FSPM0_SI611
+FSPM0_SX161
+FSPM0_SX251
+FSPM0_SX341
+FSPM0_SX431
+FSPM0_SX71
+FSRH0_SI1719
+FSRH0_SI1931
+FSRH0_SI671
+FSRH0_SX131
+FSRH0_SX221
+FSRH0_SX311
+FSRH0_SX401
+FSRH0_SX41
+FSSB0_SI1082
+FSSB0_SI1712
+FSSB0_SI2342
+FSSB0_SX182
+FSSB0_SX272
+FSSB0_SX362
+FSSB0_SX452
+FSSB0_SX92
+FTAJ0_SI1329
+FTAJ0_SI474
+FTAJ0_SI699
+FTAJ0_SX159
+FTAJ0_SX249
+FTAJ0_SX339
+FTAJ0_SX429
+FTAJ0_SX69
+FTBR0_SI1402
+FTBR0_SI2181
+FTBR0_SI921
+FTBR0_SX111
+FTBR0_SX201
+FTBR0_SX21
+FTBR0_SX291
+FTBR0_SX381
+FTBW0_SI1345
+FTBW0_SI1975
+FTBW0_SI715
+FTBW0_SX175
+FTBW0_SX265
+FTBW0_SX355
+FTBW0_SX445
+FTBW0_SX85
+FTLG0_SI1743
+FTLG0_SI483
+FTLG0_SI840
+FTLG0_SX123
+FTLG0_SX213
+FTLG0_SX303
+FTLG0_SX33
+FTLG0_SX393
+FTMG0_SI1532
+FTMG0_SI2162
+FTMG0_SI902
+FTMG0_SX182
+FTMG0_SX272
+FTMG0_SX362
+FTMG0_SX452
+FTMG0_SX92
+FVFB0_SI1032
+FVFB0_SI1510
+FVFB0_SI2292
+FVFB0_SX132
+FVFB0_SX222
+FVFB0_SX312
+FVFB0_SX402
+FVFB0_SX42
+FVKB0_SI1159
+FVKB0_SI1789
+FVKB0_SI529
+FVKB0_SX169
+FVKB0_SX259
+FVKB0_SX349
+FVKB0_SX439
+FVKB0_SX79
+FVMH0_SI1466
+FVMH0_SI2096
+FVMH0_SI836
+FVMH0_SX116
+FVMH0_SX206
+FVMH0_SX26
+FVMH0_SX296
+FVMH0_SX386
+MABC0_SI1620
+MABC0_SI2041
+MABC0_SI781
+MABC0_SX151
+MABC0_SX241
+MABC0_SX331
+MABC0_SX421
+MABC0_SX61
+MADC0_SI1367
+MADC0_SI1997
+MADC0_SI737
+MADC0_SX107
+MADC0_SX17
+MADC0_SX197
+MADC0_SX287
+MADC0_SX377
+MADD0_SI1295
+MADD0_SI1798
+MADD0_SI538
+MADD0_SX178
+MADD0_SX268
+MADD0_SX358
+MADD0_SX448
+MADD0_SX88
+MAEB0_SI1411
+MAEB0_SI2250
+MAEB0_SI990
+MAEB0_SX180
+MAEB0_SX270
+MAEB0_SX360
+MAEB0_SX450
+MAEB0_SX90
+MAEO0_SI1326
+MAEO0_SI1655
+MAEO0_SI1956
+MAEO0_SX156
+MAEO0_SX246
+MAEO0_SX336
+MAEO0_SX426
+MAEO0_SX66
+MAFM0_SI1569
+MAFM0_SI2199
+MAFM0_SI939
+MAFM0_SX129
+MAFM0_SX219
+MAFM0_SX309
+MAFM0_SX39
+MAFM0_SX399
+MAJP0_SI1074
+MAJP0_SI1704
+MAJP0_SI2334
+MAJP0_SX174
+MAJP0_SX264
+MAJP0_SX354
+MAJP0_SX444
+MAJP0_SX84
+MAKB0_SI1016
+MAKB0_SI1646
+MAKB0_SI2276
+MAKB0_SX116
+MAKB0_SX206
+MAKB0_SX26
+MAKB0_SX296
+MAKB0_SX386
+MAKR0_SI1352
+MAKR0_SI1982
+MAKR0_SI722
+MAKR0_SX182
+MAKR0_SX272
+MAKR0_SX362
+MAKR0_SX452
+MAKR0_SX92
+MAPV0_SI1293
+MAPV0_SI1923
+MAPV0_SI663
+MAPV0_SX123
+MAPV0_SX213
+MAPV0_SX303
+MAPV0_SX33
+MAPV0_SX393
+MARC0_SI1188
+MARC0_SI1818
+MARC0_SI558
+MARC0_SX108
+MARC0_SX18
+MARC0_SX198
+MARC0_SX288
+MARC0_SX378
+MARW0_SI1276
+MARW0_SI1906
+MARW0_SI646
+MARW0_SX106
+MARW0_SX16
+MARW0_SX286
+MARW0_SX349
+MARW0_SX376
+MBAR0_SI1319
+MBAR0_SI1949
+MBAR0_SI689
+MBAR0_SX149
+MBAR0_SX239
+MBAR0_SX329
+MBAR0_SX419
+MBAR0_SX59
+MBBR0_SI1055
+MBBR0_SI1685
+MBBR0_SI2315
+MBBR0_SX155
+MBBR0_SX245
+MBBR0_SX335
+MBBR0_SX425
+MBBR0_SX65
+MBCG0_SI2217
+MBCG0_SI486
+MBCG0_SI957
+MBCG0_SX147
+MBCG0_SX237
+MBCG0_SX327
+MBCG0_SX417
+MBCG0_SX57
+MBEF0_SI1281
+MBEF0_SI1911
+MBEF0_SI651
+MBEF0_SX111
+MBEF0_SX201
+MBEF0_SX21
+MBEF0_SX291
+MBEF0_SX381
+MBGT0_SI1341
+MBGT0_SI1841
+MBGT0_SI711
+MBGT0_SX171
+MBGT0_SX261
+MBGT0_SX351
+MBGT0_SX441
+MBGT0_SX81
+MBJV0_SI1247
+MBJV0_SI1877
+MBJV0_SI617
+MBJV0_SX167
+MBJV0_SX257
+MBJV0_SX347
+MBJV0_SX437
+MBJV0_SX77
+MBMA0_SI1222
+MBMA0_SI1852
+MBMA0_SI592
+MBMA0_SX142
+MBMA0_SX232
+MBMA0_SX322
+MBMA0_SX412
+MBMA0_SX52
+MBMA1_SI2207
+MBMA1_SI2214
+MBMA1_SI954
+MBMA1_SX144
+MBMA1_SX234
+MBMA1_SX324
+MBMA1_SX414
+MBMA1_SX54
+MBML0_SI1169
+MBML0_SI1799
+MBML0_SI539
+MBML0_SX179
+MBML0_SX269
+MBML0_SX359
+MBML0_SX449
+MBML0_SX89
+MBOM0_SI1014
+MBOM0_SI1644
+MBOM0_SI2274
+MBOM0_SX114
+MBOM0_SX204
+MBOM0_SX294
+MBOM0_SX311
+MBOM0_SX384
+MBSB0_SI1353
+MBSB0_SI1983
+MBSB0_SI723
+MBSB0_SX183
+MBSB0_SX273
+MBSB0_SX3
+MBSB0_SX363
+MBSB0_SX93
+MBTH0_SI2102
+MBTH0_SI505
+MBTH0_SI757
+MBTH0_SX122
+MBTH0_SX212
+MBTH0_SX302
+MBTH0_SX32
+MBTH0_SX392
+MBWP0_SI1531
+MBWP0_SI1969
+MBWP0_SI709
+MBWP0_SX169
+MBWP0_SX259
+MBWP0_SX349
+MBWP0_SX439
+MBWP0_SX79
+MCAE0_SI1447
+MCAE0_SI2077
+MCAE0_SI817
+MCAE0_SX187
+MCAE0_SX277
+MCAE0_SX367
+MCAE0_SX7
+MCAE0_SX97
+MCAL0_SI1138
+MCAL0_SI1768
+MCAL0_SI508
+MCAL0_SX148
+MCAL0_SX238
+MCAL0_SX328
+MCAL0_SX418
+MCAL0_SX58
+MCDC0_SI1292
+MCDC0_SI1922
+MCDC0_SI662
+MCDC0_SX122
+MCDC0_SX212
+MCDC0_SX302
+MCDC0_SX32
+MCDC0_SX392
+MCDD0_SI1513
+MCDD0_SI2143
+MCDD0_SI883
+MCDD0_SX163
+MCDD0_SX253
+MCDD0_SX343
+MCDD0_SX433
+MCDD0_SX73
+MCDR0_SI1154
+MCDR0_SI1784
+MCDR0_SI524
+MCDR0_SX164
+MCDR0_SX254
+MCDR0_SX344
+MCDR0_SX434
+MCDR0_SX74
+MCEF0_SI1135
+MCEF0_SI1765
+MCEF0_SI842
+MCEF0_SX145
+MCEF0_SX235
+MCEF0_SX325
+MCEF0_SX415
+MCEF0_SX55
+MCEW0_SI1442
+MCEW0_SI2072
+MCEW0_SI812
+MCEW0_SX182
+MCEW0_SX272
+MCEW0_SX362
+MCEW0_SX452
+MCEW0_SX92
+MCHL0_SI1347
+MCHL0_SI1404
+MCHL0_SI1977
+MCHL0_SX177
+MCHL0_SX267
+MCHL0_SX357
+MCHL0_SX447
+MCHL0_SX87
+MCLK0_SI1660
+MCLK0_SI2290
+MCLK0_SI650
+MCLK0_SX130
+MCLK0_SX220
+MCLK0_SX310
+MCLK0_SX40
+MCLK0_SX400
+MCLM0_SI1456
+MCLM0_SI2086
+MCLM0_SI826
+MCLM0_SX106
+MCLM0_SX16
+MCLM0_SX196
+MCLM0_SX286
+MCLM0_SX376
+MCPM0_SI1194
+MCPM0_SI1824
+MCPM0_SI564
+MCPM0_SX114
+MCPM0_SX204
+MCPM0_SX24
+MCPM0_SX294
+MCPM0_SX384
+MCRE0_SI1121
+MCRE0_SI1725
+MCRE0_SI1751
+MCRE0_SX131
+MCRE0_SX221
+MCRE0_SX24
+MCRE0_SX401
+MCRE0_SX41
+MCSS0_SI1380
+MCSS0_SI688
+MCSS0_SI750
+MCSS0_SX120
+MCSS0_SX210
+MCSS0_SX30
+MCSS0_SX300
+MCSS0_SX390
+MCTH0_SI1209
+MCTH0_SI1839
+MCTH0_SI579
+MCTH0_SX129
+MCTH0_SX219
+MCTH0_SX309
+MCTH0_SX39
+MCTH0_SX399
+MCTM0_SI1350
+MCTM0_SI1980
+MCTM0_SI720
+MCTM0_SX180
+MCTM0_SX270
+MCTM0_SX360
+MCTM0_SX450
+MCTM0_SX90
+MCXM0_SI1351
+MCXM0_SI1981
+MCXM0_SI721
+MCXM0_SX181
+MCXM0_SX271
+MCXM0_SX361
+MCXM0_SX451
+MCXM0_SX91
+MDAC0_SI1261
+MDAC0_SI1837
+MDAC0_SI631
+MDAC0_SX181
+MDAC0_SX271
+MDAC0_SX361
+MDAC0_SX451
+MDAC0_SX91
+MDAS0_SI1266
+MDAS0_SI1896
+MDAS0_SI636
+MDAS0_SX186
+MDAS0_SX21
+MDAS0_SX276
+MDAS0_SX6
+MDAS0_SX96
+MDBB1_SI1006
+MDBB1_SI1636
+MDBB1_SI2056
+MDBB1_SX106
+MDBB1_SX16
+MDBB1_SX196
+MDBB1_SX286
+MDBB1_SX376
+MDBP0_SI1158
+MDBP0_SI1788
+MDBP0_SI528
+MDBP0_SX168
+MDBP0_SX258
+MDBP0_SX348
+MDBP0_SX438
+MDBP0_SX78
+MDCD0_SI1415
+MDCD0_SI2045
+MDCD0_SI785
+MDCD0_SX155
+MDCD0_SX245
+MDCD0_SX335
+MDCD0_SX425
+MDCD0_SX65
+MDCM0_SI1480
+MDCM0_SI2110
+MDCM0_SI850
+MDCM0_SX130
+MDCM0_SX220
+MDCM0_SX310
+MDCM0_SX40
+MDCM0_SX400
+MDDC0_SI1419
+MDDC0_SI2049
+MDDC0_SI789
+MDDC0_SX159
+MDDC0_SX249
+MDDC0_SX339
+MDDC0_SX429
+MDDC0_SX69
+MDED0_SI1170
+MDED0_SI1800
+MDED0_SI540
+MDED0_SX180
+MDED0_SX270
+MDED0_SX360
+MDED0_SX450
+MDED0_SX90
+MDEF0_SI1123
+MDEF0_SI1563
+MDEF0_SI2193
+MDEF0_SX123
+MDEF0_SX213
+MDEF0_SX303
+MDEF0_SX33
+MDEF0_SX393
+MDEM0_SI1868
+MDEM0_SI608
+MDEM0_SI800
+MDEM0_SX158
+MDEM0_SX248
+MDEM0_SX338
+MDEM0_SX428
+MDEM0_SX68
+MDHL0_SI1439
+MDHL0_SI2069
+MDHL0_SI809
+MDHL0_SX179
+MDHL0_SX269
+MDHL0_SX359
+MDHL0_SX449
+MDHL0_SX89
+MDHS0_SI1530
+MDHS0_SI2160
+MDHS0_SI900
+MDHS0_SX180
+MDHS0_SX270
+MDHS0_SX360
+MDHS0_SX450
+MDHS0_SX90
+MDJM0_SI1455
+MDJM0_SI2085
+MDJM0_SI825
+MDJM0_SX105
+MDJM0_SX15
+MDJM0_SX195
+MDJM0_SX285
+MDJM0_SX375
+MDKS0_SI1066
+MDKS0_SI1696
+MDKS0_SI2326
+MDKS0_SX166
+MDKS0_SX256
+MDKS0_SX346
+MDKS0_SX436
+MDKS0_SX76
+MDLB0_SI1306
+MDLB0_SI1936
+MDLB0_SI676
+MDLB0_SX136
+MDLB0_SX226
+MDLB0_SX316
+MDLB0_SX406
+MDLB0_SX46
+MDLC0_SI1395
+MDLC0_SI2025
+MDLC0_SI765
+MDLC0_SX135
+MDLC0_SX225
+MDLC0_SX315
+MDLC0_SX405
+MDLC0_SX45
+MDLC1_SI1435
+MDLC1_SI2065
+MDLC1_SI2144
+MDLC1_SX175
+MDLC1_SX265
+MDLC1_SX355
+MDLC1_SX445
+MDLC1_SX85
+MDLC2_SI1614
+MDLC2_SI2244
+MDLC2_SI984
+MDLC2_SX174
+MDLC2_SX264
+MDLC2_SX354
+MDLC2_SX444
+MDLC2_SX84
+MDLH0_SI1960
+MDLH0_SI574
+MDLH0_SI700
+MDLH0_SX160
+MDLH0_SX250
+MDLH0_SX340
+MDLH0_SX430
+MDLH0_SX70
+MDLM0_SI1234
+MDLM0_SI1864
+MDLM0_SI604
+MDLM0_SX154
+MDLM0_SX244
+MDLM0_SX334
+MDLM0_SX424
+MDLM0_SX64
+MDLR0_SI1233
+MDLR0_SI1863
+MDLR0_SI603
+MDLR0_SX153
+MDLR0_SX243
+MDLR0_SX333
+MDLR0_SX423
+MDLR0_SX63
+MDLR1_SI1299
+MDLR1_SI1929
+MDLR1_SI669
+MDLR1_SX129
+MDLR1_SX219
+MDLR1_SX309
+MDLR1_SX39
+MDLR1_SX399
+MDMA0_SI1238
+MDMA0_SI1430
+MDMA0_SI2060
+MDMA0_SX170
+MDMA0_SX260
+MDMA0_SX350
+MDMA0_SX440
+MDMA0_SX80
+MDMT0_SI1832
+MDMT0_SI2341
+MDMT0_SI572
+MDMT0_SX122
+MDMT0_SX212
+MDMT0_SX302
+MDMT0_SX32
+MDMT0_SX392
+MDNS0_SI1011
+MDNS0_SI2271
+MDNS0_SI873
+MDNS0_SX111
+MDNS0_SX201
+MDNS0_SX21
+MDNS0_SX291
+MDNS0_SX381
+MDPB0_SI1760
+MDPB0_SI2126
+MDPB0_SI866
+MDPB0_SX146
+MDPB0_SX236
+MDPB0_SX326
+MDPB0_SX416
+MDPB0_SX56
+MDPK0_SI1053
+MDPK0_SI1683
+MDPK0_SI552
+MDPK0_SX153
+MDPK0_SX243
+MDPK0_SX333
+MDPK0_SX423
+MDPK0_SX63
+MDPS0_SI1651
+MDPS0_SI1979
+MDPS0_SI719
+MDPS0_SX179
+MDPS0_SX269
+MDPS0_SX359
+MDPS0_SX449
+MDPS0_SX89
+MDRD0_SI1382
+MDRD0_SI2012
+MDRD0_SI752
+MDRD0_SX122
+MDRD0_SX212
+MDRD0_SX302
+MDRD0_SX32
+MDRD0_SX392
+MDSJ0_SI1462
+MDSJ0_SI2092
+MDSJ0_SI832
+MDSJ0_SX112
+MDSJ0_SX22
+MDSJ0_SX292
+MDSJ0_SX382
+MDSJ0_SX438
+MDSS0_SI1881
+MDSS0_SI2087
+MDSS0_SI621
+MDSS0_SX171
+MDSS0_SX261
+MDSS0_SX351
+MDSS0_SX441
+MDSS0_SX81
+MDSS1_SI1327
+MDSS1_SI1713
+MDSS1_SI697
+MDSS1_SX157
+MDSS1_SX247
+MDSS1_SX337
+MDSS1_SX427
+MDSS1_SX67
+MDTB0_SI1200
+MDTB0_SI1830
+MDTB0_SI570
+MDTB0_SX120
+MDTB0_SX210
+MDTB0_SX300
+MDTB0_SX321
+MDTB0_SX390
+MDWD0_SI1260
+MDWD0_SI1890
+MDWD0_SI557
+MDWD0_SX180
+MDWD0_SX270
+MDWD0_SX360
+MDWD0_SX450
+MDWD0_SX90
+MDWH0_SI1168
+MDWH0_SI1925
+MDWH0_SI665
+MDWH0_SX125
+MDWH0_SX215
+MDWH0_SX305
+MDWH0_SX35
+MDWH0_SX395
+MDWM0_SI1546
+MDWM0_SI2176
+MDWM0_SI916
+MDWM0_SX106
+MDWM0_SX16
+MDWM0_SX286
+MDWM0_SX376
+MDWM0_SX433
+MEAL0_SI1547
+MEAL0_SI2177
+MEAL0_SI917
+MEAL0_SX107
+MEAL0_SX197
+MEAL0_SX287
+MEAL0_SX347
+MEAL0_SX377
+MEDR0_SI1374
+MEDR0_SI2004
+MEDR0_SI744
+MEDR0_SX114
+MEDR0_SX204
+MEDR0_SX24
+MEDR0_SX294
+MEDR0_SX384
+MEFG0_SI465
+MEFG0_SI491
+MEFG0_SI598
+MEFG0_SX105
+MEFG0_SX15
+MEFG0_SX195
+MEFG0_SX285
+MEFG0_SX375
+MEGJ0_SI1337
+MEGJ0_SI1967
+MEGJ0_SI707
+MEGJ0_SX167
+MEGJ0_SX257
+MEGJ0_SX3
+MEGJ0_SX437
+MEGJ0_SX77
+MEJL0_SI1592
+MEJL0_SI1654
+MEJL0_SI962
+MEJL0_SX152
+MEJL0_SX242
+MEJL0_SX332
+MEJL0_SX422
+MEJL0_SX62
+MEJS0_SI1240
+MEJS0_SI1870
+MEJS0_SI610
+MEJS0_SX160
+MEJS0_SX250
+MEJS0_SX340
+MEJS0_SX430
+MEJS0_SX70
+MESG0_SI1332
+MESG0_SI1962
+MESG0_SI702
+MESG0_SX162
+MESG0_SX252
+MESG0_SX342
+MESG0_SX432
+MESG0_SX72
+MESJ0_SI2039
+MESJ0_SI2257
+MESJ0_SI997
+MESJ0_SX187
+MESJ0_SX277
+MESJ0_SX367
+MESJ0_SX7
+MESJ0_SX97
+MEWM0_SI1348
+MEWM0_SI1978
+MEWM0_SI718
+MEWM0_SX178
+MEWM0_SX268
+MEWM0_SX358
+MEWM0_SX448
+MEWM0_SX88
+MFER0_SI1492
+MFER0_SI2122
+MFER0_SI862
+MFER0_SX142
+MFER0_SX232
+MFER0_SX322
+MFER0_SX412
+MFER0_SX52
+MFMC0_SI1132
+MFMC0_SI1762
+MFMC0_SI502
+MFMC0_SX142
+MFMC0_SX232
+MFMC0_SX322
+MFMC0_SX412
+MFMC0_SX52
+MFRM0_SI1155
+MFRM0_SI1717
+MFRM0_SI1785
+MFRM0_SX165
+MFRM0_SX255
+MFRM0_SX345
+MFRM0_SX435
+MFRM0_SX75
+MFWK0_SI1249
+MFWK0_SI1879
+MFWK0_SI619
+MFWK0_SX169
+MFWK0_SX259
+MFWK0_SX349
+MFWK0_SX439
+MFWK0_SX79
+MFXS0_SI1674
+MFXS0_SI2225
+MFXS0_SI2304
+MFXS0_SX144
+MFXS0_SX234
+MFXS0_SX324
+MFXS0_SX414
+MFXS0_SX54
+MFXV0_SI1005
+MFXV0_SI1342
+MFXV0_SI1635
+MFXV0_SX105
+MFXV0_SX15
+MFXV0_SX195
+MFXV0_SX285
+MFXV0_SX375
+MGAF0_SI1282
+MGAF0_SI1912
+MGAF0_SI652
+MGAF0_SX112
+MGAF0_SX202
+MGAF0_SX22
+MGAF0_SX292
+MGAF0_SX382
+MGAG0_SI1321
+MGAG0_SI645
+MGAG0_SI691
+MGAG0_SX151
+MGAG0_SX241
+MGAG0_SX331
+MGAG0_SX421
+MGAG0_SX61
+MGAK0_SI1036
+MGAK0_SI1666
+MGAK0_SI2296
+MGAK0_SX136
+MGAK0_SX226
+MGAK0_SX316
+MGAK0_SX406
+MGAK0_SX46
+MGAR0_SI1212
+MGAR0_SI1694
+MGAR0_SI1842
+MGAR0_SX132
+MGAR0_SX222
+MGAR0_SX312
+MGAR0_SX402
+MGAR0_SX42
+MGAW0_SI1165
+MGAW0_SI1802
+MGAW0_SI535
+MGAW0_SX175
+MGAW0_SX265
+MGAW0_SX355
+MGAW0_SX445
+MGAW0_SX85
+MGES0_SI1481
+MGES0_SI2111
+MGES0_SI851
+MGES0_SX131
+MGES0_SX221
+MGES0_SX311
+MGES0_SX401
+MGES0_SX41
+MGJC0_SI1256
+MGJC0_SI1335
+MGJC0_SI1965
+MGJC0_SX165
+MGJC0_SX255
+MGJC0_SX345
+MGJC0_SX435
+MGJC0_SX75
+MGRL0_SI1497
+MGRL0_SI2127
+MGRL0_SI867
+MGRL0_SX147
+MGRL0_SX237
+MGRL0_SX327
+MGRL0_SX417
+MGRL0_SX57
+MGRP0_SI1317
+MGRP0_SI1947
+MGRP0_SI687
+MGRP0_SX147
+MGRP0_SX237
+MGRP0_SX327
+MGRP0_SX417
+MGRP0_SX57
+MGSH0_SI1176
+MGSH0_SI1806
+MGSH0_SI546
+MGSH0_SX127
+MGSH0_SX186
+MGSH0_SX276
+MGSH0_SX6
+MGSH0_SX96
+MGSL0_SI1164
+MGSL0_SI534
+MGSL0_SI797
+MGSL0_SX174
+MGSL0_SX264
+MGSL0_SX354
+MGSL0_SX444
+MGSL0_SX84
+MGXP0_SI1087
+MGXP0_SI457
+MGXP0_SI525
+MGXP0_SX187
+MGXP0_SX277
+MGXP0_SX367
+MGXP0_SX7
+MGXP0_SX97
+MHBS0_SI1575
+MHBS0_SI2205
+MHBS0_SI945
+MHBS0_SX135
+MHBS0_SX225
+MHBS0_SX315
+MHBS0_SX405
+MHBS0_SX45
+MHIT0_SI1613
+MHIT0_SI2243
+MHIT0_SI983
+MHIT0_SX173
+MHIT0_SX263
+MHIT0_SX353
+MHIT0_SX443
+MHIT0_SX83
+MHJB0_SI1017
+MHJB0_SI1647
+MHJB0_SI2277
+MHJB0_SX117
+MHJB0_SX207
+MHJB0_SX27
+MHJB0_SX297
+MHJB0_SX387
+MHMG0_SI1365
+MHMG0_SI1995
+MHMG0_SI735
+MHMG0_SX105
+MHMG0_SX15
+MHMG0_SX195
+MHMG0_SX285
+MHMG0_SX375
+MHMR0_SI1119
+MHMR0_SI1692
+MHMR0_SI489
+MHMR0_SX129
+MHMR0_SX219
+MHMR0_SX309
+MHMR0_SX39
+MHMR0_SX399
+MHRM0_SI1475
+MHRM0_SI2218
+MHRM0_SI958
+MHRM0_SX148
+MHRM0_SX238
+MHRM0_SX328
+MHRM0_SX418
+MHRM0_SX58
+MHXL0_SI1772
+MHXL0_SI512
+MHXL0_SI612
+MHXL0_SX152
+MHXL0_SX242
+MHXL0_SX332
+MHXL0_SX422
+MHXL0_SX62
+MILB0_SI2163
+MILB0_SI807
+MILB0_SI903
+MILB0_SX183
+MILB0_SX273
+MILB0_SX3
+MILB0_SX363
+MILB0_SX93
+MJAC0_SI1331
+MJAC0_SI2148
+MJAC0_SI701
+MJAC0_SX251
+MJAC0_SX307
+MJAC0_SX341
+MJAC0_SX431
+MJAC0_SX71
+MJAE0_SI1524
+MJAE0_SI1999
+MJAE0_SI2154
+MJAE0_SX174
+MJAE0_SX264
+MJAE0_SX354
+MJAE0_SX444
+MJAE0_SX84
+MJAI0_SI1604
+MJAI0_SI682
+MJAI0_SI710
+MJAI0_SX164
+MJAI0_SX254
+MJAI0_SX344
+MJAI0_SX434
+MJAI0_SX74
+MJBG0_SI1232
+MJBG0_SI1724
+MJBG0_SI1862
+MJBG0_SX152
+MJBG0_SX242
+MJBG0_SX332
+MJBG0_SX422
+MJBG0_SX62
+MJDA0_SI1031
+MJDA0_SI1661
+MJDA0_SI2291
+MJDA0_SX131
+MJDA0_SX221
+MJDA0_SX311
+MJDA0_SX401
+MJDA0_SX41
+MJDC0_SI1161
+MJDC0_SI2165
+MJDC0_SI531
+MJDC0_SX171
+MJDC0_SX261
+MJDC0_SX351
+MJDC0_SX441
+MJDC0_SX81
+MJDE0_SI1120
+MJDE0_SI463
+MJDE0_SI490
+MJDE0_SX130
+MJDE0_SX220
+MJDE0_SX310
+MJDE0_SX40
+MJDE0_SX400
+MJDG0_SI1042
+MJDG0_SI1672
+MJDG0_SI1705
+MJDG0_SX142
+MJDG0_SX232
+MJDG0_SX322
+MJDG0_SX412
+MJDG0_SX52
+MJDM0_SI1340
+MJDM0_SI1937
+MJDM0_SI974
+MJDM0_SX170
+MJDM0_SX260
+MJDM0_SX350
+MJDM0_SX440
+MJDM0_SX80
+MJEB0_SI1286
+MJEB0_SI1916
+MJEB0_SI656
+MJEB0_SX170
+MJEB0_SX206
+MJEB0_SX26
+MJEB0_SX296
+MJEB0_SX386
+MJEB1_SI1467
+MJEB1_SI2097
+MJEB1_SI837
+MJEB1_SX117
+MJEB1_SX207
+MJEB1_SX27
+MJEB1_SX297
+MJEB1_SX387
+MJEE0_SI1237
+MJEE0_SI1867
+MJEE0_SI607
+MJEE0_SX157
+MJEE0_SX247
+MJEE0_SX337
+MJEE0_SX427
+MJEE0_SX67
+MJFH0_SI1107
+MJFH0_SI1737
+MJFH0_SI477
+MJFH0_SX117
+MJFH0_SX207
+MJFH0_SX27
+MJFH0_SX297
+MJFH0_SX387
+MJFR0_SI1605
+MJFR0_SI2235
+MJFR0_SI975
+MJFR0_SX165
+MJFR0_SX255
+MJFR0_SX345
+MJFR0_SX435
+MJFR0_SX75
+MJHI0_SI1328
+MJHI0_SI555
+MJHI0_SI698
+MJHI0_SX158
+MJHI0_SX248
+MJHI0_SX338
+MJHI0_SX428
+MJHI0_SX68
+MJJB0_SI1139
+MJJB0_SI1277
+MJJB0_SI1769
+MJJB0_SX149
+MJJB0_SX239
+MJJB0_SX329
+MJJB0_SX419
+MJJB0_SX59
+MJJJ0_SI1163
+MJJJ0_SI1793
+MJJJ0_SI533
+MJJJ0_SX173
+MJJJ0_SX263
+MJJJ0_SX353
+MJJJ0_SX443
+MJJJ0_SX83
+MJJM0_SI1251
+MJJM0_SI1457
+MJJM0_SI827
+MJJM0_SX107
+MJJM0_SX17
+MJJM0_SX197
+MJJM0_SX287
+MJJM0_SX377
+MJKR0_SI1201
+MJKR0_SI1831
+MJKR0_SI571
+MJKR0_SX121
+MJKR0_SX211
+MJKR0_SX301
+MJKR0_SX31
+MJKR0_SX391
+MJLB0_SI1616
+MJLB0_SI2246
+MJLB0_SI986
+MJLB0_SX176
+MJLB0_SX266
+MJLB0_SX356
+MJLB0_SX446
+MJLB0_SX86
+MJLG1_SI1012
+MJLG1_SI1642
+MJLG1_SI2272
+MJLG1_SX112
+MJLG1_SX202
+MJLG1_SX22
+MJLG1_SX292
+MJLG1_SX382
+MJLS0_SI1096
+MJLS0_SI1726
+MJLS0_SI466
+MJLS0_SX106
+MJLS0_SX16
+MJLS0_SX196
+MJLS0_SX286
+MJLS0_SX376
+MJMA0_SI1495
+MJMA0_SI2125
+MJMA0_SI865
+MJMA0_SX145
+MJMA0_SX235
+MJMA0_SX325
+MJMA0_SX415
+MJMA0_SX55
+MJMD0_SI1028
+MJMD0_SI1658
+MJMD0_SI2288
+MJMD0_SX128
+MJMD0_SX218
+MJMD0_SX308
+MJMD0_SX38
+MJMD0_SX398
+MJMM0_SI1255
+MJMM0_SI1885
+MJMM0_SI625
+MJMM0_SX175
+MJMM0_SX265
+MJMM0_SX355
+MJMM0_SX445
+MJMM0_SX85
+MJPG0_SI1191
+MJPG0_SI1821
+MJPG0_SI561
+MJPG0_SX111
+MJPG0_SX201
+MJPG0_SX21
+MJPG0_SX291
+MJPG0_SX381
+MJPM0_SI1368
+MJPM0_SI1998
+MJPM0_SI738
+MJPM0_SX108
+MJPM0_SX18
+MJPM0_SX198
+MJPM0_SX288
+MJPM0_SX378
+MJPM1_SI1897
+MJPM1_SI2280
+MJPM1_SI761
+MJPM1_SX131
+MJPM1_SX221
+MJPM1_SX311
+MJPM1_SX401
+MJPM1_SX41
+MJRA0_SI1236
+MJRA0_SI1866
+MJRA0_SI606
+MJRA0_SX156
+MJRA0_SX246
+MJRA0_SX336
+MJRA0_SX426
+MJRA0_SX66
+MJRG0_SI1366
+MJRG0_SI1996
+MJRG0_SI736
+MJRG0_SX106
+MJRG0_SX16
+MJRG0_SX286
+MJRG0_SX352
+MJRG0_SX376
+MJRH0_SI1125
+MJRH0_SI1755
+MJRH0_SI1840
+MJRH0_SX135
+MJRH0_SX225
+MJRH0_SX315
+MJRH0_SX405
+MJRH0_SX45
+MJRH1_SI1558
+MJRH1_SI1774
+MJRH1_SI514
+MJRH1_SX154
+MJRH1_SX244
+MJRH1_SX334
+MJRH1_SX424
+MJRH1_SX64
+MJRK0_SI1662
+MJRK0_SI2103
+MJRK0_SI880
+MJRK0_SX160
+MJRK0_SX250
+MJRK0_SX340
+MJRK0_SX430
+MJRK0_SX70
+MJRP0_SI1835
+MJRP0_SI1845
+MJRP0_SI585
+MJRP0_SX135
+MJRP0_SX225
+MJRP0_SX315
+MJRP0_SX405
+MJRP0_SX45
+MJSR0_SI1424
+MJSR0_SI2054
+MJSR0_SI794
+MJSR0_SX164
+MJSR0_SX254
+MJSR0_SX344
+MJSR0_SX434
+MJSR0_SX74
+MJWG0_SI2155
+MJWG0_SI813
+MJWG0_SI895
+MJWG0_SX175
+MJWG0_SX265
+MJWG0_SX355
+MJWG0_SX445
+MJWG0_SX85
+MJWS0_SI1143
+MJWS0_SI1773
+MJWS0_SI513
+MJWS0_SX153
+MJWS0_SX243
+MJWS0_SX333
+MJWS0_SX423
+MJWS0_SX63
+MJWT0_SI1291
+MJWT0_SI1381
+MJWT0_SI751
+MJWT0_SX121
+MJWT0_SX211
+MJWT0_SX301
+MJWT0_SX31
+MJWT0_SX391
+MJXA0_SI1507
+MJXA0_SI2137
+MJXA0_SI877
+MJXA0_SX157
+MJXA0_SX247
+MJXA0_SX337
+MJXA0_SX427
+MJXA0_SX67
+MJXL0_SI1172
+MJXL0_SI1795
+MJXL0_SI542
+MJXL0_SX182
+MJXL0_SX272
+MJXL0_SX362
+MJXL0_SX452
+MJXL0_SX92
+MKAG0_SI1609
+MKAG0_SI2239
+MKAG0_SI979
+MKAG0_SX169
+MKAG0_SX259
+MKAG0_SX30
+MKAG0_SX439
+MKAG0_SX79
+MKAH0_SI1528
+MKAH0_SI2158
+MKAH0_SI898
+MKAH0_SX178
+MKAH0_SX268
+MKAH0_SX358
+MKAH0_SX448
+MKAH0_SX88
+MKAJ0_SI1414
+MKAJ0_SI2044
+MKAJ0_SI784
+MKAJ0_SX154
+MKAJ0_SX244
+MKAJ0_SX334
+MKAJ0_SX424
+MKAJ0_SX64
+MKAM0_SI1250
+MKAM0_SI1316
+MKAM0_SI1465
+MKAM0_SX146
+MKAM0_SX236
+MKAM0_SX326
+MKAM0_SX416
+MKAM0_SX56
+MKDB0_SI2132
+MKDB0_SI588
+MKDB0_SI872
+MKDB0_SX152
+MKDB0_SX242
+MKDB0_SX332
+MKDB0_SX422
+MKDB0_SX62
+MKDD0_SI1567
+MKDD0_SI2197
+MKDD0_SI937
+MKDD0_SX127
+MKDD0_SX217
+MKDD0_SX307
+MKDD0_SX37
+MKDD0_SX397
+MKDT0_SI2153
+MKDT0_SI814
+MKDT0_SI893
+MKDT0_SX173
+MKDT0_SX263
+MKDT0_SX353
+MKDT0_SX443
+MKDT0_SX83
+MKES0_SI1253
+MKES0_SI1883
+MKES0_SI623
+MKES0_SX173
+MKES0_SX263
+MKES0_SX353
+MKES0_SX443
+MKES0_SX83
+MKJO0_SI1517
+MKJO0_SI2147
+MKJO0_SI887
+MKJO0_SX167
+MKJO0_SX257
+MKJO0_SX424
+MKJO0_SX437
+MKJO0_SX77
+MKLN0_SI1598
+MKLN0_SI2228
+MKLN0_SI968
+MKLN0_SX158
+MKLN0_SX248
+MKLN0_SX338
+MKLN0_SX428
+MKLN0_SX68
+MKLR0_SI1059
+MKLR0_SI1689
+MKLR0_SI2319
+MKLR0_SX159
+MKLR0_SX249
+MKLR0_SX339
+MKLR0_SX429
+MKLR0_SX69
+MKLS0_SI1437
+MKLS0_SI1533
+MKLS0_SI2067
+MKLS0_SX177
+MKLS0_SX267
+MKLS0_SX357
+MKLS0_SX447
+MKLS0_SX87
+MKLS1_SI1545
+MKLS1_SI2175
+MKLS1_SI915
+MKLS1_SX105
+MKLS1_SX15
+MKLS1_SX195
+MKLS1_SX285
+MKLS1_SX375
+MKLW0_SI1571
+MKLW0_SI1844
+MKLW0_SI2201
+MKLW0_SX131
+MKLW0_SX221
+MKLW0_SX311
+MKLW0_SX401
+MKLW0_SX41
+MKRG0_SI1491
+MKRG0_SI2121
+MKRG0_SI861
+MKRG0_SX141
+MKRG0_SX231
+MKRG0_SX31
+MKRG0_SX411
+MKRG0_SX51
+MKXL0_SI1185
+MKXL0_SI1815
+MKXL0_SI1958
+MKXL0_SX105
+MKXL0_SX15
+MKXL0_SX195
+MKXL0_SX285
+MKXL0_SX375
+MLBC0_SI1239
+MLBC0_SI1869
+MLBC0_SI609
+MLBC0_SX159
+MLBC0_SX249
+MLBC0_SX339
+MLBC0_SX429
+MLBC0_SX69
+MLEL0_SI1246
+MLEL0_SI1876
+MLEL0_SI616
+MLEL0_SX166
+MLEL0_SX256
+MLEL0_SX346
+MLEL0_SX436
+MLEL0_SX76
+MLJC0_SI1225
+MLJC0_SI1855
+MLJC0_SI595
+MLJC0_SX145
+MLJC0_SX235
+MLJC0_SX325
+MLJC0_SX415
+MLJC0_SX55
+MLJH0_SI1324
+MLJH0_SI1422
+MLJH0_SI694
+MLJH0_SX154
+MLJH0_SX244
+MLJH0_SX334
+MLJH0_SX424
+MLJH0_SX64
+MLNS0_SI1407
+MLNS0_SI2037
+MLNS0_SI777
+MLNS0_SX147
+MLNS0_SX237
+MLNS0_SX327
+MLNS0_SX417
+MLNS0_SX57
+MLSH0_SI1417
+MLSH0_SI2047
+MLSH0_SI787
+MLSH0_SX157
+MLSH0_SX247
+MLSH0_SX337
+MLSH0_SX427
+MLSH0_SX67
+MMAA0_SI1588
+MMAA0_SI2105
+MMAA0_SI845
+MMAA0_SX125
+MMAA0_SX215
+MMAA0_SX305
+MMAA0_SX35
+MMAA0_SX395
+MMAB1_SI1494
+MMAB1_SI2124
+MMAB1_SI864
+MMAB1_SX144
+MMAB1_SX234
+MMAB1_SX324
+MMAB1_SX414
+MMAB1_SX54
+MMAG0_SI1126
+MMAG0_SI1756
+MMAG0_SI496
+MMAG0_SX136
+MMAG0_SX226
+MMAG0_SX316
+MMAG0_SX406
+MMAG0_SX46
+MMAM0_SI1597
+MMAM0_SI1668
+MMAM0_SI2227
+MMAM0_SX157
+MMAM0_SX247
+MMAM0_SX337
+MMAM0_SX427
+MMAM0_SX67
+MMAR0_SI1336
+MMAR0_SI1966
+MMAR0_SI706
+MMAR0_SX166
+MMAR0_SX256
+MMAR0_SX346
+MMAR0_SX436
+MMAR0_SX76
+MMBS0_SI1151
+MMBS0_SI1781
+MMBS0_SI521
+MMBS0_SX161
+MMBS0_SX251
+MMBS0_SX341
+MMBS0_SX431
+MMBS0_SX71
+MMCC0_SI1338
+MMCC0_SI1968
+MMCC0_SI708
+MMCC0_SX168
+MMCC0_SX258
+MMCC0_SX348
+MMCC0_SX438
+MMCC0_SX78
+MMDB0_SI1358
+MMDB0_SI1617
+MMDB0_SI987
+MMDB0_SX177
+MMDB0_SX267
+MMDB0_SX357
+MMDB0_SX447
+MMDB0_SX87
+MMDG0_SI1780
+MMDG0_SI2035
+MMDG0_SI520
+MMDG0_SX160
+MMDG0_SX250
+MMDG0_SX340
+MMDG0_SX430
+MMDG0_SX70
+MMDM0_SI1311
+MMDM0_SI1941
+MMDM0_SI681
+MMDM0_SX141
+MMDM0_SX231
+MMDM0_SX321
+MMDM0_SX411
+MMDM0_SX51
+MMDM1_SI1650
+MMDM1_SI2043
+MMDM1_SI783
+MMDM1_SX153
+MMDM1_SX243
+MMDM1_SX333
+MMDM1_SX423
+MMDM1_SX63
+MMDS0_SI1343
+MMDS0_SI1973
+MMDS0_SI713
+MMDS0_SX173
+MMDS0_SX263
+MMDS0_SX353
+MMDS0_SX443
+MMDS0_SX83
+MMEA0_SI1388
+MMEA0_SI2018
+MMEA0_SI758
+MMEA0_SX128
+MMEA0_SX218
+MMEA0_SX308
+MMEA0_SX38
+MMEA0_SX398
+MMEB0_SI1357
+MMEB0_SI1987
+MMEB0_SI727
+MMEB0_SX187
+MMEB0_SX327
+MMEB0_SX367
+MMEB0_SX7
+MMEB0_SX97
+MMGC0_SI1305
+MMGC0_SI1935
+MMGC0_SI2184
+MMGC0_SX135
+MMGC0_SX225
+MMGC0_SX315
+MMGC0_SX405
+MMGC0_SX45
+MMGG0_SI1079
+MMGG0_SI1709
+MMGG0_SI2339
+MMGG0_SX179
+MMGG0_SX269
+MMGG0_SX359
+MMGG0_SX449
+MMGG0_SX89
+MMGK0_SI1322
+MMGK0_SI1952
+MMGK0_SI692
+MMGK0_SX152
+MMGK0_SX242
+MMGK0_SX332
+MMGK0_SX422
+MMGK0_SX62
+MMJB1_SI1408
+MMJB1_SI2038
+MMJB1_SI778
+MMJB1_SX148
+MMJB1_SX238
+MMJB1_SX328
+MMJB1_SX418
+MMJB1_SX58
+MMLM0_SI1527
+MMLM0_SI2150
+MMLM0_SI897
+MMLM0_SX177
+MMLM0_SX267
+MMLM0_SX357
+MMLM0_SX447
+MMLM0_SX87
+MMPM0_SI1061
+MMPM0_SI1691
+MMPM0_SI2321
+MMPM0_SX161
+MMPM0_SX251
+MMPM0_SX341
+MMPM0_SX431
+MMPM0_SX71
+MMRP0_SI2034
+MMRP0_SI717
+MMRP0_SI774
+MMRP0_SX144
+MMRP0_SX234
+MMRP0_SX324
+MMRP0_SX414
+MMRP0_SX54
+MMSM0_SI1106
+MMSM0_SI1736
+MMSM0_SI476
+MMSM0_SX116
+MMSM0_SX206
+MMSM0_SX26
+MMSM0_SX296
+MMSM0_SX386
+MMVP0_SI1284
+MMVP0_SI1914
+MMVP0_SI654
+MMVP0_SX114
+MMVP0_SX204
+MMVP0_SX294
+MMVP0_SX347
+MMVP0_SX384
+MMWB0_SI1619
+MMWB0_SI2249
+MMWB0_SI989
+MMWB0_SX179
+MMWB0_SX269
+MMWB0_SX359
+MMWB0_SX449
+MMWB0_SX89
+MMWS0_SI1518
+MMWS0_SI559
+MMWS0_SI888
+MMWS0_SX168
+MMWS0_SX258
+MMWS0_SX348
+MMWS0_SX438
+MMWS0_SX78
+MMWS1_SI1071
+MMWS1_SI1701
+MMWS1_SI2331
+MMWS1_SX261
+MMWS1_SX27
+MMWS1_SX351
+MMWS1_SX441
+MMWS1_SX81
+MMXS0_SI2136
+MMXS0_SI629
+MMXS0_SI876
+MMXS0_SX156
+MMXS0_SX246
+MMXS0_SX336
+MMXS0_SX426
+MMXS0_SX66
+MNET0_SI1446
+MNET0_SI2076
+MNET0_SI816
+MNET0_SX186
+MNET0_SX276
+MNET0_SX366
+MNET0_SX6
+MNET0_SX96
+MNTW0_SI1068
+MNTW0_SI1698
+MNTW0_SI2328
+MNTW0_SX168
+MNTW0_SX202
+MNTW0_SX258
+MNTW0_SX348
+MNTW0_SX78
+MPAR0_SI1576
+MPAR0_SI2206
+MPAR0_SI946
+MPAR0_SX136
+MPAR0_SX226
+MPAR0_SX316
+MPAR0_SX406
+MPAR0_SX46
+MPEB0_SI1034
+MPEB0_SI1860
+MPEB0_SI600
+MPEB0_SX150
+MPEB0_SX240
+MPEB0_SX330
+MPEB0_SX420
+MPEB0_SX60
+MPFU0_SI1258
+MPFU0_SI1888
+MPFU0_SI628
+MPFU0_SX178
+MPFU0_SX268
+MPFU0_SX358
+MPFU0_SX448
+MPFU0_SX88
+MPGH0_SI1554
+MPGH0_SI675
+MPGH0_SI924
+MPGH0_SX114
+MPGH0_SX204
+MPGH0_SX24
+MPGH0_SX294
+MPGH0_SX384
+MPGR0_SI1410
+MPGR0_SI2040
+MPGR0_SI780
+MPGR0_SX150
+MPGR0_SX240
+MPGR0_SX330
+MPGR0_SX420
+MPGR0_SX60
+MPGR1_SI1269
+MPGR1_SI1499
+MPGR1_SI2129
+MPGR1_SX149
+MPGR1_SX239
+MPGR1_SX329
+MPGR1_SX419
+MPGR1_SX59
+MPMB0_SI1501
+MPMB0_SI2131
+MPMB0_SI871
+MPMB0_SX151
+MPMB0_SX241
+MPMB0_SX331
+MPMB0_SX421
+MPMB0_SX61
+MPPC0_SI1412
+MPPC0_SI2042
+MPPC0_SI782
+MPPC0_SX152
+MPPC0_SX242
+MPPC0_SX332
+MPPC0_SX422
+MPPC0_SX62
+MPRB0_SI1205
+MPRB0_SI1215
+MPRB0_SI575
+MPRB0_SX125
+MPRB0_SX215
+MPRB0_SX305
+MPRB0_SX35
+MPRB0_SX395
+MPRD0_SI1431
+MPRD0_SI2061
+MPRD0_SI801
+MPRD0_SX171
+MPRD0_SX261
+MPRD0_SX351
+MPRD0_SX441
+MPRD0_SX81
+MPRK0_SI1097
+MPRK0_SI1727
+MPRK0_SI467
+MPRK0_SX107
+MPRK0_SX17
+MPRK0_SX197
+MPRK0_SX287
+MPRK0_SX377
+MPRT0_SI1210
+MPRT0_SI495
+MPRT0_SI580
+MPRT0_SX130
+MPRT0_SX220
+MPRT0_SX310
+MPRT0_SX40
+MPRT0_SX400
+MPSW0_SI1067
+MPSW0_SI1697
+MPSW0_SI2327
+MPSW0_SX167
+MPSW0_SX24
+MPSW0_SX257
+MPSW0_SX437
+MPSW0_SX77
+MRAB0_SI1224
+MRAB0_SI1854
+MRAB0_SI594
+MRAB0_SX144
+MRAB0_SX234
+MRAB0_SX324
+MRAB0_SX414
+MRAB0_SX54
+MRAB1_SI1478
+MRAB1_SI2108
+MRAB1_SI848
+MRAB1_SX128
+MRAB1_SX218
+MRAB1_SX308
+MRAB1_SX38
+MRAB1_SX398
+MRAI0_SI1954
+MRAI0_SI2052
+MRAI0_SI792
+MRAI0_SX162
+MRAI0_SX252
+MRAI0_SX342
+MRAI0_SX432
+MRAI0_SX72
+MRAM0_SI1275
+MRAM0_SI1905
+MRAM0_SI1951
+MRAM0_SX105
+MRAM0_SX15
+MRAM0_SX195
+MRAM0_SX285
+MRAM0_SX375
+MRAV0_SI1008
+MRAV0_SI1638
+MRAV0_SI2268
+MRAV0_SX108
+MRAV0_SX18
+MRAV0_SX198
+MRAV0_SX288
+MRAV0_SX378
+MRBC0_SI1665
+MRBC0_SI1859
+MRBC0_SI599
+MRBC0_SX149
+MRBC0_SX239
+MRBC0_SX329
+MRBC0_SX419
+MRBC0_SX59
+MRCG0_SI1428
+MRCG0_SI2058
+MRCG0_SI798
+MRCG0_SX168
+MRCG0_SX258
+MRCG0_SX348
+MRCG0_SX438
+MRCG0_SX78
+MRCW0_SI1371
+MRCW0_SI2001
+MRCW0_SI741
+MRCW0_SX111
+MRCW0_SX201
+MRCW0_SX21
+MRCW0_SX291
+MRCW0_SX381
+MRDD0_SI1050
+MRDD0_SI1680
+MRDD0_SI2310
+MRDD0_SX150
+MRDD0_SX240
+MRDD0_SX277
+MRDD0_SX330
+MRDD0_SX60
+MRDM0_SI1044
+MRDM0_SI1595
+MRDM0_SI965
+MRDM0_SX155
+MRDM0_SX245
+MRDM0_SX335
+MRDM0_SX425
+MRDM0_SX65
+MRDS0_SI1167
+MRDS0_SI1797
+MRDS0_SI537
+MRDS0_SX177
+MRDS0_SX267
+MRDS0_SX357
+MRDS0_SX447
+MRDS0_SX87
+MREE0_SI1104
+MREE0_SI1734
+MREE0_SI1959
+MREE0_SX114
+MREE0_SX204
+MREE0_SX24
+MREE0_SX294
+MREE0_SX384
+MREH1_SI1599
+MREH1_SI2229
+MREH1_SI969
+MREH1_SX159
+MREH1_SX249
+MREH1_SX339
+MREH1_SX429
+MREH1_SX69
+MREM0_SI1591
+MREM0_SI511
+MREM0_SI961
+MREM0_SX151
+MREM0_SX241
+MREM0_SX331
+MREM0_SX421
+MREM0_SX61
+MREW1_SI1500
+MREW1_SI2130
+MREW1_SI870
+MREW1_SX150
+MREW1_SX240
+MREW1_SX330
+MREW1_SX420
+MREW1_SX60
+MRFK0_SI1076
+MRFK0_SI1706
+MRFK0_SI2336
+MRFK0_SX176
+MRFK0_SX266
+MRFK0_SX356
+MRFK0_SX446
+MRFK0_SX86
+MRFL0_SI1156
+MRFL0_SI1786
+MRFL0_SI526
+MRFL0_SX166
+MRFL0_SX256
+MRFL0_SX346
+MRFL0_SX436
+MRFL0_SX76
+MRGM0_SI1162
+MRGM0_SI1792
+MRGM0_SI532
+MRGM0_SX172
+MRGM0_SX262
+MRGM0_SX416
+MRGM0_SX442
+MRGM0_SX82
+MRGS0_SI1356
+MRGS0_SI1986
+MRGS0_SI726
+MRGS0_SX186
+MRGS0_SX276
+MRGS0_SX366
+MRGS0_SX6
+MRGS0_SX96
+MRHL0_SI1515
+MRHL0_SI2145
+MRHL0_SI885
+MRHL0_SX165
+MRHL0_SX255
+MRHL0_SX345
+MRHL0_SX435
+MRHL0_SX75
+MRJB1_SI1020
+MRJB1_SI1413
+MRJB1_SI2021
+MRJB1_SX120
+MRJB1_SX210
+MRJB1_SX30
+MRJB1_SX300
+MRJB1_SX390
+MRJH0_SI1519
+MRJH0_SI889
+MRJH0_SI914
+MRJH0_SX169
+MRJH0_SX259
+MRJH0_SX307
+MRJH0_SX439
+MRJH0_SX79
+MRJM0_SI1095
+MRJM0_SI1228
+MRJM0_SI1858
+MRJM0_SX148
+MRJM0_SX238
+MRJM0_SX328
+MRJM0_SX418
+MRJM0_SX58
+MRJM1_SI1298
+MRJM1_SI1928
+MRJM1_SI668
+MRJM1_SX128
+MRJM1_SX218
+MRJM1_SX308
+MRJM1_SX38
+MRJM1_SX398
+MRJT0_SI1498
+MRJT0_SI1805
+MRJT0_SI868
+MRJT0_SX148
+MRJT0_SX238
+MRJT0_SX328
+MRJT0_SX418
+MRJT0_SX58
+MRKM0_SI1267
+MRKM0_SI1391
+MRKM0_SI637
+MRKM0_SX187
+MRKM0_SX277
+MRKM0_SX367
+MRKM0_SX7
+MRKM0_SX97
+MRLD0_SI1594
+MRLD0_SI2224
+MRLD0_SI964
+MRLD0_SX154
+MRLD0_SX244
+MRLD0_SX334
+MRLD0_SX424
+MRLD0_SX64
+MRLJ0_SI1420
+MRLJ0_SI2050
+MRLJ0_SI790
+MRLJ0_SX160
+MRLJ0_SX250
+MRLJ0_SX340
+MRLJ0_SX430
+MRLJ0_SX70
+MRLJ1_SI1671
+MRLJ1_SI2301
+MRLJ1_SI2332
+MRLJ1_SX141
+MRLJ1_SX231
+MRLJ1_SX321
+MRLJ1_SX411
+MRLJ1_SX51
+MRLK0_SI1468
+MRLK0_SI2140
+MRLK0_SI843
+MRLK0_SX123
+MRLK0_SX213
+MRLK0_SX303
+MRLK0_SX33
+MRLK0_SX393
+MRLR0_SI1196
+MRLR0_SI1826
+MRLR0_SI566
+MRLR0_SX116
+MRLR0_SX206
+MRLR0_SX26
+MRLR0_SX296
+MRLR0_SX386
+MRMB0_SI1581
+MRMB0_SI2211
+MRMB0_SI951
+MRMB0_SX141
+MRMB0_SX231
+MRMB0_SX321
+MRMB0_SX411
+MRMB0_SX51
+MRMG0_SI1080
+MRMG0_SI1710
+MRMG0_SI2340
+MRMG0_SX180
+MRMG0_SX270
+MRMG0_SX360
+MRMG0_SX450
+MRMG0_SX90
+MRMH0_SI1021
+MRMH0_SI1349
+MRMH0_SI2281
+MRMH0_SX121
+MRMH0_SX211
+MRMH0_SX301
+MRMH0_SX31
+MRMH0_SX391
+MRML0_SI1421
+MRML0_SI2051
+MRML0_SI791
+MRML0_SX161
+MRML0_SX251
+MRML0_SX341
+MRML0_SX431
+MRML0_SX71
+MRMS0_SI1113
+MRMS0_SI2057
+MRMS0_SI2100
+MRMS0_SX120
+MRMS0_SX210
+MRMS0_SX30
+MRMS0_SX300
+MRMS0_SX390
+MRPC1_SI1482
+MRPC1_SI2026
+MRPC1_SI2112
+MRPC1_SX132
+MRPC1_SX222
+MRPC1_SX312
+MRPC1_SX402
+MRPC1_SX42
+MRRE0_SI1334
+MRRE0_SI704
+MRRE0_SI952
+MRRE0_SX164
+MRRE0_SX254
+MRRE0_SX344
+MRRE0_SX434
+MRRE0_SX74
+MRSO0_SI1206
+MRSO0_SI1659
+MRSO0_SI2289
+MRSO0_SX129
+MRSO0_SX219
+MRSO0_SX309
+MRSO0_SX39
+MRSO0_SX399
+MRSP0_SI1429
+MRSP0_SI2059
+MRSP0_SI799
+MRSP0_SX169
+MRSP0_SX196
+MRSP0_SX259
+MRSP0_SX439
+MRSP0_SX79
+MRTC0_SI1458
+MRTC0_SI2088
+MRTC0_SI828
+MRTC0_SX108
+MRTC0_SX18
+MRTC0_SX198
+MRTC0_SX288
+MRTC0_SX378
+MRTJ0_SI1551
+MRTJ0_SI2032
+MRTJ0_SI772
+MRTJ0_SX142
+MRTJ0_SX232
+MRTJ0_SX322
+MRTJ0_SX412
+MRTJ0_SX52
+MRVG0_SI1140
+MRVG0_SI1770
+MRVG0_SI510
+MRVG0_SX150
+MRVG0_SX240
+MRVG0_SX330
+MRVG0_SX420
+MRVG0_SX60
+MRWA0_SI1603
+MRWA0_SI2233
+MRWA0_SI973
+MRWA0_SX163
+MRWA0_SX253
+MRWA0_SX343
+MRWA0_SX433
+MRWA0_SX73
+MRWS0_SI1102
+MRWS0_SI1732
+MRWS0_SI472
+MRWS0_SX112
+MRWS0_SX202
+MRWS0_SX22
+MRWS0_SX292
+MRWS0_SX382
+MRXB0_SI1585
+MRXB0_SI2215
+MRXB0_SI955
+MRXB0_SX145
+MRXB0_SX235
+MRXB0_SX325
+MRXB0_SX415
+MRXB0_SX55
+MSAH1_SI1049
+MSAH1_SI1679
+MSAH1_SI2309
+MSAH1_SX149
+MSAH1_SX239
+MSAH1_SX329
+MSAH1_SX419
+MSAH1_SX59
+MSAS0_SI1376
+MSAS0_SI2006
+MSAS0_SI746
+MSAS0_SX116
+MSAS0_SX206
+MSAS0_SX26
+MSAS0_SX296
+MSAS0_SX386
+MSAT0_SI1526
+MSAT0_SI2156
+MSAT0_SI896
+MSAT0_SX176
+MSAT0_SX266
+MSAT0_SX356
+MSAT0_SX446
+MSAT0_SX86
+MSAT1_SI1073
+MSAT1_SI1703
+MSAT1_SI2333
+MSAT1_SX173
+MSAT1_SX263
+MSAT1_SX353
+MSAT1_SX443
+MSAT1_SX83
+MSDB0_SI1007
+MSDB0_SI1637
+MSDB0_SI2267
+MSDB0_SX107
+MSDB0_SX17
+MSDB0_SX197
+MSDB0_SX287
+MSDB0_SX377
+MSDH0_SI2113
+MSDH0_SI2240
+MSDH0_SI980
+MSDH0_SX170
+MSDH0_SX260
+MSDH0_SX350
+MSDH0_SX440
+MSDH0_SX80
+MSDS0_SI1077
+MSDS0_SI1707
+MSDS0_SI2337
+MSDS0_SX177
+MSDS0_SX267
+MSDS0_SX357
+MSDS0_SX447
+MSDS0_SX87
+MSEM1_SI1440
+MSEM1_SI2070
+MSEM1_SI810
+MSEM1_SX180
+MSEM1_SX270
+MSEM1_SX360
+MSEM1_SX450
+MSEM1_SX90
+MSES0_SI1589
+MSES0_SI2216
+MSES0_SI2219
+MSES0_SX149
+MSES0_SX239
+MSES0_SX329
+MSES0_SX419
+MSES0_SX59
+MSFH0_SI1216
+MSFH0_SI1738
+MSFH0_SI586
+MSFH0_SX136
+MSFH0_SX226
+MSFH0_SX316
+MSFH0_SX406
+MSFH0_SX46
+MSFV0_SI1262
+MSFV0_SI1892
+MSFV0_SI632
+MSFV0_SX182
+MSFV0_SX272
+MSFV0_SX362
+MSFV0_SX452
+MSFV0_SX92
+MSJK0_SI1596
+MSJK0_SI2226
+MSJK0_SI966
+MSJK0_SX156
+MSJK0_SX246
+MSJK0_SX336
+MSJK0_SX426
+MSJK0_SX66
+MSMC0_SI1907
+MSMC0_SI509
+MSMC0_SI647
+MSMC0_SX107
+MSMC0_SX17
+MSMC0_SX197
+MSMC0_SX287
+MSMC0_SX377
+MSMR0_SI1150
+MSMR0_SI1405
+MSMR0_SI775
+MSMR0_SX145
+MSMR0_SX235
+MSMR0_SX325
+MSMR0_SX415
+MSMR0_SX55
+MSMS0_SI1433
+MSMS0_SI2063
+MSMS0_SI803
+MSMS0_SX173
+MSMS0_SX263
+MSMS0_SX353
+MSMS0_SX443
+MSMS0_SX83
+MSRG0_SI1221
+MSRG0_SI1851
+MSRG0_SI591
+MSRG0_SX141
+MSRG0_SX231
+MSRG0_SX321
+MSRG0_SX411
+MSRG0_SX51
+MSRR0_SI1131
+MSRR0_SI1761
+MSRR0_SI501
+MSRR0_SX141
+MSRR0_SX231
+MSRR0_SX30
+MSRR0_SX411
+MSRR0_SX51
+MSTF0_SI1396
+MSTF0_SI766
+MSTF0_SI852
+MSTF0_SX136
+MSTF0_SX226
+MSTF0_SX316
+MSTF0_SX406
+MSTF0_SX46
+MSVS0_SI1568
+MSVS0_SI2198
+MSVS0_SI938
+MSVS0_SX128
+MSVS0_SX218
+MSVS0_SX308
+MSVS0_SX38
+MSVS0_SX398
+MTAB0_SI1572
+MTAB0_SI2202
+MTAB0_SI942
+MTAB0_SX132
+MTAB0_SX222
+MTAB0_SX312
+MTAB0_SX402
+MTAB0_SX42
+MTAS0_SI1385
+MTAS0_SI2015
+MTAS0_SI755
+MTAS0_SX125
+MTAS0_SX215
+MTAS0_SX305
+MTAS0_SX35
+MTAS0_SX395
+MTAT0_SI1110
+MTAT0_SI1740
+MTAT0_SI811
+MTAT0_SX120
+MTAT0_SX210
+MTAT0_SX30
+MTAT0_SX300
+MTAT0_SX390
+MTAT1_SI1409
+MTAT1_SI1627
+MTAT1_SI779
+MTAT1_SX149
+MTAT1_SX239
+MTAT1_SX329
+MTAT1_SX419
+MTAT1_SX59
+MTBC0_SI1173
+MTBC0_SI1803
+MTBC0_SI543
+MTBC0_SX183
+MTBC0_SX273
+MTBC0_SX347
+MTBC0_SX363
+MTBC0_SX93
+MTCS0_SI1972
+MTCS0_SI2265
+MTCS0_SI712
+MTCS0_SX172
+MTCS0_SX262
+MTCS0_SX352
+MTCS0_SX442
+MTCS0_SX82
+MTDB0_SI1401
+MTDB0_SI2031
+MTDB0_SI771
+MTDB0_SX141
+MTDB0_SX231
+MTDB0_SX321
+MTDB0_SX411
+MTDB0_SX51
+MTDP0_SI1274
+MTDP0_SI1521
+MTDP0_SI2151
+MTDP0_SX171
+MTDP0_SX261
+MTDP0_SX351
+MTDP0_SX441
+MTDP0_SX81
+MTER0_SI1157
+MTER0_SI1787
+MTER0_SI527
+MTER0_SX167
+MTER0_SX17
+MTER0_SX257
+MTER0_SX437
+MTER0_SX77
+MTJG0_SI1520
+MTJG0_SI2157
+MTJG0_SI890
+MTJG0_SX170
+MTJG0_SX260
+MTJG0_SX350
+MTJG0_SX440
+MTJG0_SX80
+MTJM0_SI1226
+MTJM0_SI1856
+MTJM0_SI655
+MTJM0_SX146
+MTJM0_SX236
+MTJM0_SX326
+MTJM0_SX416
+MTJM0_SX56
+MTJS0_SI1192
+MTJS0_SI1822
+MTJS0_SI562
+MTJS0_SX112
+MTJS0_SX202
+MTJS0_SX22
+MTJS0_SX292
+MTJS0_SX382
+MTJU0_SI2020
+MTJU0_SI2269
+MTJU0_SI760
+MTJU0_SX130
+MTJU0_SX220
+MTJU0_SX310
+MTJU0_SX40
+MTJU0_SX400
+MTKD0_SI1187
+MTKD0_SI1817
+MTKD0_SI630
+MTKD0_SX107
+MTKD0_SX17
+MTKD0_SX197
+MTKD0_SX287
+MTKD0_SX377
+MTKP0_SI1023
+MTKP0_SI2283
+MTKP0_SI454
+MTKP0_SX123
+MTKP0_SX213
+MTKP0_SX303
+MTKP0_SX33
+MTKP0_SX393
+MTLB0_SI1134
+MTLB0_SI1764
+MTLB0_SI504
+MTLB0_SX144
+MTLB0_SX234
+MTLB0_SX324
+MTLB0_SX414
+MTLB0_SX54
+MTLC0_SI1313
+MTLC0_SI1477
+MTLC0_SI847
+MTLC0_SX127
+MTLC0_SX217
+MTLC0_SX307
+MTLC0_SX37
+MTLC0_SX397
+MTML0_SI1065
+MTML0_SI1695
+MTML0_SI2325
+MTML0_SX165
+MTML0_SX255
+MTML0_SX345
+MTML0_SX435
+MTML0_SX75
+MTMN0_SI1064
+MTMN0_SI2324
+MTMN0_SI582
+MTMN0_SX164
+MTMN0_SX254
+MTMN0_SX344
+MTMN0_SX434
+MTMN0_SX74
+MTMT0_SI1118
+MTMT0_SI1748
+MTMT0_SI488
+MTMT0_SX128
+MTMT0_SX218
+MTMT0_SX308
+MTMT0_SX38
+MTMT0_SX398
+MTPF0_SI1235
+MTPF0_SI1865
+MTPF0_SI605
+MTPF0_SX155
+MTPF0_SX245
+MTPF0_SX335
+MTPF0_SX425
+MTPF0_SX65
+MTPG0_SI1383
+MTPG0_SI2013
+MTPG0_SI753
+MTPG0_SX123
+MTPG0_SX213
+MTPG0_SX303
+MTPG0_SX33
+MTPG0_SX393
+MTPP0_SI1508
+MTPP0_SI2138
+MTPP0_SI878
+MTPP0_SX158
+MTPP0_SX248
+MTPP0_SX338
+MTPP0_SX428
+MTPP0_SX68
+MTPR0_SI1600
+MTPR0_SI2230
+MTPR0_SI506
+MTPR0_SX160
+MTPR0_SX250
+MTPR0_SX340
+MTPR0_SX430
+MTPR0_SX70
+MTQC0_SI1441
+MTQC0_SI2071
+MTQC0_SI480
+MTQC0_SX181
+MTQC0_SX271
+MTQC0_SX361
+MTQC0_SX451
+MTQC0_SX91
+MTRC0_SI1623
+MTRC0_SI589
+MTRC0_SI993
+MTRC0_SX170
+MTRC0_SX183
+MTRC0_SX273
+MTRC0_SX363
+MTRC0_SX93
+MTRR0_SI1548
+MTRR0_SI2178
+MTRR0_SI918
+MTRR0_SX108
+MTRR0_SX18
+MTRR0_SX198
+MTRR0_SX288
+MTRR0_SX378
+MTRT0_SI1227
+MTRT0_SI1857
+MTRT0_SI597
+MTRT0_SX147
+MTRT0_SX237
+MTRT0_SX254
+MTRT0_SX417
+MTRT0_SX57
+MTWH1_SI1512
+MTWH1_SI2142
+MTWH1_SI882
+MTWH1_SX162
+MTWH1_SX252
+MTWH1_SX342
+MTWH1_SX432
+MTWH1_SX72
+MTXS0_SI1060
+MTXS0_SI1690
+MTXS0_SI2320
+MTXS0_SX160
+MTXS0_SX250
+MTXS0_SX340
+MTXS0_SX430
+MTXS0_SX70
+MVJH0_SI1556
+MVJH0_SI2186
+MVJH0_SI926
+MVJH0_SX116
+MVJH0_SX206
+MVJH0_SX26
+MVJH0_SX296
+MVJH0_SX386
+MVLO0_SI1147
+MVLO0_SI1777
+MVLO0_SI517
+MVLO0_SX157
+MVLO0_SX247
+MVLO0_SX337
+MVLO0_SX427
+MVLO0_SX67
+MVRW0_SI1485
+MVRW0_SI2115
+MVRW0_SI855
+MVRW0_SX135
+MVRW0_SX225
+MVRW0_SX315
+MVRW0_SX405
+MVRW0_SX45
+MWAC0_SI1601
+MWAC0_SI2231
+MWAC0_SI971
+MWAC0_SX161
+MWAC0_SX251
+MWAC0_SX341
+MWAC0_SX431
+MWAC0_SX71
+MWAD0_SI1062
+MWAD0_SI1749
+MWAD0_SI2322
+MWAD0_SX162
+MWAD0_SX252
+MWAD0_SX342
+MWAD0_SX432
+MWAD0_SX72
+MWAR0_SI1045
+MWAR0_SI1675
+MWAR0_SI2305
+MWAR0_SX145
+MWAR0_SX235
+MWAR0_SX325
+MWAR0_SX415
+MWAR0_SX55
+MWCH0_SI1622
+MWCH0_SI1895
+MWCH0_SI2252
+MWCH0_SX182
+MWCH0_SX272
+MWCH0_SX362
+MWCH0_SX452
+MWCH0_SX92
+MWDK0_SI1436
+MWDK0_SI2017
+MWDK0_SI806
+MWDK0_SX176
+MWDK0_SX266
+MWDK0_SX356
+MWDK0_SX446
+MWDK0_SX86
+MWEM0_SI1320
+MWEM0_SI1393
+MWEM0_SI1950
+MWEM0_SX150
+MWEM0_SX240
+MWEM0_SX330
+MWEM0_SX420
+MWEM0_SX60
+MWGR0_SI1606
+MWGR0_SI2236
+MWGR0_SI976
+MWGR0_SX166
+MWGR0_SX256
+MWGR0_SX346
+MWGR0_SX436
+MWGR0_SX76
+MWRE0_SI1057
+MWRE0_SI1687
+MWRE0_SI2317
+MWRE0_SX157
+MWRE0_SX247
+MWRE0_SX337
+MWRE0_SX427
+MWRE0_SX67
+MWRP0_SI1443
+MWRP0_SI1525
+MWRP0_SI2073
+MWRP0_SX183
+MWRP0_SX273
+MWRP0_SX3
+MWRP0_SX363
+MWRP0_SX93
+MWSB0_SI1626
+MWSB0_SI2256
+MWSB0_SI996
+MWSB0_SX186
+MWSB0_SX276
+MWSB0_SX366
+MWSB0_SX6
+MWSB0_SX96
+MWSH0_SI1426
+MWSH0_SI2266
+MWSH0_SI796
+MWSH0_SX166
+MWSH0_SX256
+MWSH0_SX346
+MWSH0_SX436
+MWSH0_SX76
+MZMB0_SI1166
+MZMB0_SI1796
+MZMB0_SI536
+MZMB0_SX176
+MZMB0_SX266
+MZMB0_SX356
+MZMB0_SX446
+MZMB0_SX86
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train_text.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train_text.uid
new file mode 100644
index 0000000000000000000000000000000000000000..c39fd0b91d51e0ae15caf1e9701d0d9ef51ee21b
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/train_text.uid
@@ -0,0 +1,3696 @@
+FAEM0_SI1392
+FAEM0_SI2022
+FAEM0_SI762
+FAEM0_SX132
+FAEM0_SX222
+FAEM0_SX312
+FAEM0_SX402
+FAEM0_SX42
+FAJW0_SI1263
+FAJW0_SI1893
+FAJW0_SI633
+FAJW0_SX183
+FAJW0_SX273
+FAJW0_SX3
+FAJW0_SX363
+FAJW0_SX93
+FALK0_SI1086
+FALK0_SI456
+FALK0_SI658
+FALK0_SX186
+FALK0_SX276
+FALK0_SX366
+FALK0_SX6
+FALK0_SX96
+FALR0_SI1325
+FALR0_SI1955
+FALR0_SI695
+FALR0_SX155
+FALR0_SX245
+FALR0_SX335
+FALR0_SX425
+FALR0_SX65
+FAPB0_SI1063
+FAPB0_SI1693
+FAPB0_SI2323
+FAPB0_SX163
+FAPB0_SX253
+FAPB0_SX343
+FAPB0_SX433
+FAPB0_SX73
+FBAS0_SI1387
+FBAS0_SI1472
+FBAS0_SI2066
+FBAS0_SX127
+FBAS0_SX217
+FBAS0_SX307
+FBAS0_SX37
+FBAS0_SX397
+FBCG1_SI1612
+FBCG1_SI2242
+FBCG1_SI982
+FBCG1_SX172
+FBCG1_SX262
+FBCG1_SX352
+FBCG1_SX442
+FBCG1_SX82
+FBCH0_SI1586
+FBCH0_SI956
+FBCH0_SI959
+FBCH0_SX146
+FBCH0_SX236
+FBCH0_SX326
+FBCH0_SX416
+FBCH0_SX56
+FBJL0_SI1552
+FBJL0_SI2182
+FBJL0_SI922
+FBJL0_SX112
+FBJL0_SX202
+FBJL0_SX22
+FBJL0_SX292
+FBJL0_SX382
+FBLV0_SI1058
+FBLV0_SI1688
+FBLV0_SI2318
+FBLV0_SX158
+FBLV0_SX248
+FBLV0_SX338
+FBLV0_SX428
+FBLV0_SX68
+FBMH0_SI1136
+FBMH0_SI1766
+FBMH0_SI970
+FBMH0_SX146
+FBMH0_SX236
+FBMH0_SX326
+FBMH0_SX416
+FBMH0_SX56
+FBMJ0_SI1776
+FBMJ0_SI516
+FBMJ0_SI815
+FBMJ0_SX156
+FBMJ0_SX246
+FBMJ0_SX336
+FBMJ0_SX426
+FBMJ0_SX66
+FCAG0_SI1503
+FCAG0_SI1641
+FCAG0_SI2133
+FCAG0_SX153
+FCAG0_SX243
+FCAG0_SX333
+FCAG0_SX423
+FCAG0_SX63
+FCAJ0_SI1479
+FCAJ0_SI1804
+FCAJ0_SI849
+FCAJ0_SX129
+FCAJ0_SX219
+FCAJ0_SX309
+FCAJ0_SX39
+FCAJ0_SX399
+FCDR1_SI1186
+FCDR1_SI1816
+FCDR1_SI556
+FCDR1_SX106
+FCDR1_SX16
+FCDR1_SX196
+FCDR1_SX286
+FCDR1_SX376
+FCEG0_SI1248
+FCEG0_SI1878
+FCEG0_SI618
+FCEG0_SX168
+FCEG0_SX258
+FCEG0_SX348
+FCEG0_SX438
+FCEG0_SX78
+FCJF0_SI1027
+FCJF0_SI1657
+FCJF0_SI648
+FCJF0_SX127
+FCJF0_SX217
+FCJF0_SX307
+FCJF0_SX37
+FCJF0_SX397
+FCJS0_SI1607
+FCJS0_SI2237
+FCJS0_SI977
+FCJS0_SX167
+FCJS0_SX257
+FCJS0_SX347
+FCJS0_SX437
+FCJS0_SX77
+FCKE0_SI1111
+FCKE0_SI1741
+FCKE0_SI481
+FCKE0_SX121
+FCKE0_SX211
+FCKE0_SX301
+FCKE0_SX31
+FCKE0_SX391
+FCLT0_SI1438
+FCLT0_SI2068
+FCLT0_SI808
+FCLT0_SX178
+FCLT0_SX268
+FCLT0_SX358
+FCLT0_SX448
+FCLT0_SX88
+FCMG0_SI1142
+FCMG0_SI1242
+FCMG0_SI1872
+FCMG0_SX162
+FCMG0_SX252
+FCMG0_SX342
+FCMG0_SX432
+FCMG0_SX72
+FCMM0_SI1083
+FCMM0_SI1957
+FCMM0_SI453
+FCMM0_SX183
+FCMM0_SX273
+FCMM0_SX363
+FCMM0_SX420
+FCMM0_SX93
+FCRZ0_SI1913
+FCRZ0_SI2053
+FCRZ0_SI793
+FCRZ0_SX163
+FCRZ0_SX253
+FCRZ0_SX343
+FCRZ0_SX433
+FCRZ0_SX73
+FCYL0_SI1297
+FCYL0_SI1927
+FCYL0_SI667
+FCYL0_SX127
+FCYL0_SX217
+FCYL0_SX349
+FCYL0_SX37
+FCYL0_SX397
+FDAS1_SI1461
+FDAS1_SI2091
+FDAS1_SI831
+FDAS1_SX111
+FDAS1_SX201
+FDAS1_SX21
+FDAS1_SX291
+FDAS1_SX381
+FDAW0_SI1271
+FDAW0_SI1406
+FDAW0_SI2036
+FDAW0_SX146
+FDAW0_SX236
+FDAW0_SX326
+FDAW0_SX416
+FDAW0_SX56
+FDFB0_SI1318
+FDFB0_SI1948
+FDFB0_SI2010
+FDFB0_SX148
+FDFB0_SX238
+FDFB0_SX328
+FDFB0_SX418
+FDFB0_SX58
+FDJH0_SI1565
+FDJH0_SI2195
+FDJH0_SI935
+FDJH0_SX125
+FDJH0_SX215
+FDJH0_SX305
+FDJH0_SX35
+FDJH0_SX395
+FDKN0_SI1081
+FDKN0_SI1202
+FDKN0_SI1711
+FDKN0_SX181
+FDKN0_SX271
+FDKN0_SX361
+FDKN0_SX451
+FDKN0_SX91
+FDML0_SI1149
+FDML0_SI1779
+FDML0_SI2075
+FDML0_SX159
+FDML0_SX249
+FDML0_SX339
+FDML0_SX429
+FDML0_SX69
+FDMY0_SI1197
+FDMY0_SI567
+FDMY0_SI714
+FDMY0_SX117
+FDMY0_SX207
+FDMY0_SX27
+FDMY0_SX297
+FDMY0_SX387
+FDNC0_SI1278
+FDNC0_SI1908
+FDNC0_SI2287
+FDNC0_SX108
+FDNC0_SX18
+FDNC0_SX198
+FDNC0_SX288
+FDNC0_SX378
+FDTD0_SI1561
+FDTD0_SI2191
+FDTD0_SI931
+FDTD0_SX121
+FDTD0_SX211
+FDTD0_SX301
+FDTD0_SX321
+FDTD0_SX391
+FDXW0_SI1511
+FDXW0_SI2141
+FDXW0_SI881
+FDXW0_SX161
+FDXW0_SX251
+FDXW0_SX341
+FDXW0_SX431
+FDXW0_SX71
+FEAC0_SI1245
+FEAC0_SI1875
+FEAC0_SI615
+FEAC0_SX165
+FEAC0_SX255
+FEAC0_SX345
+FEAC0_SX435
+FEAC0_SX75
+FEAR0_SI1252
+FEAR0_SI1882
+FEAR0_SI622
+FEAR0_SX172
+FEAR0_SX262
+FEAR0_SX352
+FEAR0_SX442
+FEAR0_SX82
+FECD0_SI1418
+FECD0_SI2048
+FECD0_SI788
+FECD0_SX158
+FECD0_SX248
+FECD0_SX338
+FECD0_SX428
+FECD0_SX68
+FEEH0_SI1112
+FEEH0_SI1742
+FEEH0_SI471
+FEEH0_SX122
+FEEH0_SX212
+FEEH0_SX302
+FEEH0_SX32
+FEEH0_SX392
+FEME0_SI1505
+FEME0_SI2135
+FEME0_SI875
+FEME0_SX155
+FEME0_SX245
+FEME0_SX335
+FEME0_SX425
+FEME0_SX65
+FETB0_SI1148
+FETB0_SI1778
+FETB0_SI518
+FETB0_SX158
+FETB0_SX248
+FETB0_SX338
+FETB0_SX428
+FETB0_SX68
+FEXM0_SI1101
+FEXM0_SI1731
+FEXM0_SI482
+FEXM0_SX111
+FEXM0_SX201
+FEXM0_SX291
+FEXM0_SX366
+FEXM0_SX381
+FGCS0_SI1486
+FGCS0_SI2116
+FGCS0_SI856
+FGCS0_SX136
+FGCS0_SX226
+FGCS0_SX316
+FGCS0_SX406
+FGCS0_SX46
+FGDP0_SI1618
+FGDP0_SI2248
+FGDP0_SI988
+FGDP0_SX178
+FGDP0_SX268
+FGDP0_SX358
+FGDP0_SX448
+FGDP0_SX88
+FGMB0_SI1145
+FGMB0_SI1775
+FGMB0_SI515
+FGMB0_SX155
+FGMB0_SX245
+FGMB0_SX335
+FGMB0_SX425
+FGMB0_SX65
+FGRW0_SI1152
+FGRW0_SI1782
+FGRW0_SI1990
+FGRW0_SX162
+FGRW0_SX252
+FGRW0_SX342
+FGRW0_SX432
+FGRW0_SX72
+FHLM0_SI1560
+FHLM0_SI2190
+FHLM0_SI930
+FHLM0_SX120
+FHLM0_SX210
+FHLM0_SX300
+FHLM0_SX349
+FHLM0_SX390
+FHXS0_SI1075
+FHXS0_SI2302
+FHXS0_SI2335
+FHXS0_SX175
+FHXS0_SX265
+FHXS0_SX355
+FHXS0_SX445
+FHXS0_SX85
+FJDM2_SI1582
+FJDM2_SI1964
+FJDM2_SI2212
+FJDM2_SX142
+FJDM2_SX232
+FJDM2_SX322
+FJDM2_SX412
+FJDM2_SX52
+FJEN0_SI1047
+FJEN0_SI1677
+FJEN0_SI2307
+FJEN0_SX147
+FJEN0_SX237
+FJEN0_SX327
+FJEN0_SX417
+FJEN0_SX57
+FJHK0_SI1022
+FJHK0_SI1652
+FJHK0_SI2282
+FJHK0_SX122
+FJHK0_SX212
+FJHK0_SX302
+FJHK0_SX32
+FJHK0_SX392
+FJKL0_SI1562
+FJKL0_SI2192
+FJKL0_SI932
+FJKL0_SX122
+FJKL0_SX212
+FJKL0_SX302
+FJKL0_SX32
+FJKL0_SX392
+FJLG0_SI1506
+FJLG0_SI1889
+FJLG0_SI2306
+FJLG0_SX179
+FJLG0_SX269
+FJLG0_SX359
+FJLG0_SX449
+FJLG0_SX89
+FJLR0_SI1231
+FJLR0_SI1861
+FJLR0_SI601
+FJLR0_SX151
+FJLR0_SX241
+FJLR0_SX331
+FJLR0_SX421
+FJLR0_SX61
+FJRB0_SI1302
+FJRB0_SI1932
+FJRB0_SI672
+FJRB0_SX132
+FJRB0_SX222
+FJRB0_SX312
+FJRB0_SX402
+FJRB0_SX42
+FJRP1_SI1432
+FJRP1_SI2062
+FJRP1_SI802
+FJRP1_SX172
+FJRP1_SX262
+FJRP1_SX352
+FJRP1_SX442
+FJRP1_SX82
+FJSK0_SI1052
+FJSK0_SI1682
+FJSK0_SI2312
+FJSK0_SX152
+FJSK0_SX242
+FJSK0_SX332
+FJSK0_SX422
+FJSK0_SX62
+FJSP0_SI1434
+FJSP0_SI1763
+FJSP0_SI804
+FJSP0_SX174
+FJSP0_SX264
+FJSP0_SX354
+FJSP0_SX444
+FJSP0_SX84
+FJWB1_SI2055
+FJWB1_SI748
+FJWB1_SI795
+FJWB1_SX165
+FJWB1_SX255
+FJWB1_SX345
+FJWB1_SX435
+FJWB1_SX75
+FJXM0_SI1211
+FJXM0_SI1971
+FJXM0_SI581
+FJXM0_SX131
+FJXM0_SX221
+FJXM0_SX311
+FJXM0_SX401
+FJXM0_SX41
+FJXP0_SI1122
+FJXP0_SI1752
+FJXP0_SI492
+FJXP0_SX132
+FJXP0_SX222
+FJXP0_SX312
+FJXP0_SX402
+FJXP0_SX42
+FKAA0_SI1208
+FKAA0_SI1838
+FKAA0_SI578
+FKAA0_SX128
+FKAA0_SX218
+FKAA0_SX308
+FKAA0_SX38
+FKAA0_SX398
+FKDE0_SI1141
+FKDE0_SI1771
+FKDE0_SI2221
+FKDE0_SX151
+FKDE0_SX241
+FKDE0_SX331
+FKDE0_SX421
+FKDE0_SX61
+FKDW0_SI1207
+FKDW0_SI1891
+FKDW0_SI577
+FKDW0_SX127
+FKDW0_SX217
+FKDW0_SX307
+FKDW0_SX37
+FKDW0_SX397
+FKFB0_SI1608
+FKFB0_SI2238
+FKFB0_SI978
+FKFB0_SX168
+FKFB0_SX258
+FKFB0_SX348
+FKFB0_SX438
+FKFB0_SX78
+FKKH0_SI1290
+FKKH0_SI1920
+FKKH0_SI660
+FKKH0_SX120
+FKKH0_SX210
+FKKH0_SX30
+FKKH0_SX300
+FKKH0_SX390
+FKLC0_SI1615
+FKLC0_SI2245
+FKLC0_SI985
+FKLC0_SX175
+FKLC0_SX265
+FKLC0_SX355
+FKLC0_SX445
+FKLC0_SX85
+FKLC1_SI1048
+FKLC1_SI1678
+FKLC1_SI2308
+FKLC1_SX148
+FKLC1_SX238
+FKLC1_SX328
+FKLC1_SX418
+FKLC1_SX58
+FKLH0_SI1257
+FKLH0_SI1887
+FKLH0_SI627
+FKLH0_SX177
+FKLH0_SX267
+FKLH0_SX357
+FKLH0_SX447
+FKLH0_SX87
+FKSR0_SI1117
+FKSR0_SI1747
+FKSR0_SI487
+FKSR0_SX161
+FKSR0_SX217
+FKSR0_SX366
+FKSR0_SX37
+FKSR0_SX397
+FLAC0_SI1339
+FLAC0_SI2161
+FLAC0_SI901
+FLAC0_SX181
+FLAC0_SX271
+FLAC0_SX361
+FLAC0_SX451
+FLAC0_SX91
+FLAG0_SI1464
+FLAG0_SI2094
+FLAG0_SI834
+FLAG0_SX114
+FLAG0_SX204
+FLAG0_SX24
+FLAG0_SX294
+FLAG0_SX384
+FLEH0_SI1051
+FLEH0_SI1681
+FLEH0_SI2311
+FLEH0_SX151
+FLEH0_SX241
+FLEH0_SX331
+FLEH0_SX421
+FLEH0_SX61
+FLET0_SI1137
+FLET0_SI1767
+FLET0_SI507
+FLET0_SX147
+FLET0_SX237
+FLET0_SX277
+FLET0_SX417
+FLET0_SX57
+FLHD0_SI1344
+FLHD0_SI1827
+FLHD0_SI1974
+FLHD0_SX174
+FLHD0_SX264
+FLHD0_SX354
+FLHD0_SX444
+FLHD0_SX84
+FLJA0_SI1078
+FLJA0_SI1708
+FLJA0_SI2338
+FLJA0_SX178
+FLJA0_SX268
+FLJA0_SX358
+FLJA0_SX448
+FLJA0_SX88
+FLJD0_SI1516
+FLJD0_SI2146
+FLJD0_SI886
+FLJD0_SX166
+FLJD0_SX256
+FLJD0_SX346
+FLJD0_SX436
+FLJD0_SX76
+FLJG0_SI1611
+FLJG0_SI2241
+FLJG0_SI981
+FLJG0_SX171
+FLJG0_SX261
+FLJG0_SX351
+FLJG0_SX441
+FLJG0_SX81
+FLKM0_SI1880
+FLKM0_SI620
+FLKM0_SI686
+FLKM0_SX116
+FLKM0_SX260
+FLKM0_SX350
+FLKM0_SX440
+FLKM0_SX80
+FLMA0_SI1243
+FLMA0_SI1873
+FLMA0_SI613
+FLMA0_SX163
+FLMA0_SX253
+FLMA0_SX343
+FLMA0_SX433
+FLMA0_SX73
+FLMC0_SI1372
+FLMC0_SI2002
+FLMC0_SI742
+FLMC0_SX112
+FLMC0_SX22
+FLMC0_SX292
+FLMC0_SX336
+FLMC0_SX382
+FLMK0_SI1035
+FLMK0_SI1229
+FLMK0_SI2295
+FLMK0_SX135
+FLMK0_SX225
+FLMK0_SX315
+FLMK0_SX405
+FLMK0_SX45
+FLOD0_SI1287
+FLOD0_SI1917
+FLOD0_SI657
+FLOD0_SX117
+FLOD0_SX171
+FLOD0_SX207
+FLOD0_SX297
+FLOD0_SX387
+FLTM0_SI1070
+FLTM0_SI1700
+FLTM0_SI2330
+FLTM0_SX170
+FLTM0_SX260
+FLTM0_SX350
+FLTM0_SX440
+FLTM0_SX80
+FMAH1_SI1509
+FMAH1_SI2139
+FMAH1_SI879
+FMAH1_SX159
+FMAH1_SX249
+FMAH1_SX339
+FMAH1_SX429
+FMAH1_SX69
+FMBG0_SI1160
+FMBG0_SI1790
+FMBG0_SI2264
+FMBG0_SX260
+FMBG0_SX3
+FMBG0_SX350
+FMBG0_SX440
+FMBG0_SX80
+FMEM0_SI1377
+FMEM0_SI2007
+FMEM0_SI747
+FMEM0_SX117
+FMEM0_SX207
+FMEM0_SX297
+FMEM0_SX333
+FMEM0_SX387
+FMJB0_SI1177
+FMJB0_SI1807
+FMJB0_SI547
+FMJB0_SX187
+FMJB0_SX277
+FMJB0_SX367
+FMJB0_SX7
+FMJB0_SX97
+FMJF0_SI1254
+FMJF0_SI1884
+FMJF0_SI624
+FMJF0_SX174
+FMJF0_SX264
+FMJF0_SX354
+FMJF0_SX444
+FMJF0_SX84
+FMJU0_SI1389
+FMJU0_SI2019
+FMJU0_SI759
+FMJU0_SX129
+FMJU0_SX219
+FMJU0_SX309
+FMJU0_SX39
+FMJU0_SX399
+FMKC0_SI1041
+FMKC0_SI1072
+FMKC0_SI1702
+FMKC0_SX172
+FMKC0_SX262
+FMKC0_SX352
+FMKC0_SX442
+FMKC0_SX82
+FMKF0_SI1018
+FMKF0_SI1536
+FMKF0_SI906
+FMKF0_SX186
+FMKF0_SX276
+FMKF0_SX366
+FMKF0_SX6
+FMKF0_SX96
+FMMH0_SI1537
+FMMH0_SI2167
+FMMH0_SI907
+FMMH0_SX187
+FMMH0_SX367
+FMMH0_SX420
+FMMH0_SX7
+FMMH0_SX97
+FMPG0_SI1602
+FMPG0_SI2232
+FMPG0_SI972
+FMPG0_SX162
+FMPG0_SX252
+FMPG0_SX342
+FMPG0_SX432
+FMPG0_SX72
+FNKL0_SI1522
+FNKL0_SI2152
+FNKL0_SI892
+FNKL0_SX172
+FNKL0_SX196
+FNKL0_SX262
+FNKL0_SX442
+FNKL0_SX82
+FNTB0_SI1203
+FNTB0_SI573
+FNTB0_SI679
+FNTB0_SX123
+FNTB0_SX213
+FNTB0_SX303
+FNTB0_SX33
+FNTB0_SX393
+FPAB1_SI1471
+FPAB1_SI2101
+FPAB1_SI841
+FPAB1_SX121
+FPAB1_SX211
+FPAB1_SX301
+FPAB1_SX31
+FPAB1_SX391
+FPAC0_SI1921
+FPAC0_SI2011
+FPAC0_SI661
+FPAC0_SX121
+FPAC0_SX211
+FPAC0_SX301
+FPAC0_SX31
+FPAC0_SX391
+FPAD0_SI1346
+FPAD0_SI1976
+FPAD0_SI716
+FPAD0_SX176
+FPAD0_SX266
+FPAD0_SX356
+FPAD0_SX446
+FPAD0_SX86
+FPAF0_SI1054
+FPAF0_SI1684
+FPAF0_SI2314
+FPAF0_SX154
+FPAF0_SX244
+FPAF0_SX334
+FPAF0_SX424
+FPAF0_SX64
+FPAZ0_SI1593
+FPAZ0_SI2223
+FPAZ0_SI963
+FPAZ0_SX153
+FPAZ0_SX243
+FPAZ0_SX27
+FPAZ0_SX423
+FPAZ0_SX63
+FPJF0_SI1046
+FPJF0_SI1259
+FPJF0_SI1676
+FPJF0_SX146
+FPJF0_SX236
+FPJF0_SX326
+FPJF0_SX352
+FPJF0_SX56
+FPLS0_SI1590
+FPLS0_SI2220
+FPLS0_SI960
+FPLS0_SX150
+FPLS0_SX240
+FPLS0_SX3
+FPLS0_SX330
+FPLS0_SX60
+FPMY0_SI1153
+FPMY0_SI1783
+FPMY0_SI523
+FPMY0_SX163
+FPMY0_SX196
+FPMY0_SX253
+FPMY0_SX343
+FPMY0_SX73
+FREH0_SI1315
+FREH0_SI1945
+FREH0_SI685
+FREH0_SX145
+FREH0_SX235
+FREH0_SX325
+FREH0_SX415
+FREH0_SX55
+FRJB0_SI1427
+FRJB0_SI1470
+FRJB0_SI1794
+FRJB0_SX167
+FRJB0_SX257
+FRJB0_SX347
+FRJB0_SX437
+FRJB0_SX77
+FRLL0_SI1514
+FRLL0_SI805
+FRLL0_SI884
+FRLL0_SX164
+FRLL0_SX254
+FRLL0_SX344
+FRLL0_SX434
+FRLL0_SX74
+FSAG0_SI1323
+FSAG0_SI1953
+FSAG0_SI693
+FSAG0_SX153
+FSAG0_SX243
+FSAG0_SX333
+FSAG0_SX423
+FSAG0_SX63
+FSAH0_SI1244
+FSAH0_SI1874
+FSAH0_SI614
+FSAH0_SX164
+FSAH0_SX327
+FSAH0_SX344
+FSAH0_SX434
+FSAH0_SX74
+FSAK0_SI1300
+FSAK0_SI1930
+FSAK0_SI670
+FSAK0_SX130
+FSAK0_SX220
+FSAK0_SX310
+FSAK0_SX40
+FSAK0_SX400
+FSBK0_SI1069
+FSBK0_SI1699
+FSBK0_SI2329
+FSBK0_SX169
+FSBK0_SX259
+FSBK0_SX349
+FSBK0_SX439
+FSBK0_SX79
+FSCN0_SI1886
+FSCN0_SI626
+FSCN0_SI705
+FSCN0_SX176
+FSCN0_SX266
+FSCN0_SX356
+FSCN0_SX446
+FSCN0_SX86
+FSDC0_SI1312
+FSDC0_SI1942
+FSDC0_SI2234
+FSDC0_SX142
+FSDC0_SX232
+FSDC0_SX322
+FSDC0_SX412
+FSDC0_SX52
+FSDJ0_SI1115
+FSDJ0_SI1745
+FSDJ0_SI485
+FSDJ0_SX125
+FSDJ0_SX215
+FSDJ0_SX305
+FSDJ0_SX35
+FSDJ0_SX395
+FSGF0_SI1557
+FSGF0_SI2187
+FSGF0_SI927
+FSGF0_SX117
+FSGF0_SX207
+FSGF0_SX27
+FSGF0_SX297
+FSGF0_SX387
+FSJG0_SI1570
+FSJG0_SI2200
+FSJG0_SI940
+FSJG0_SX130
+FSJG0_SX220
+FSJG0_SX310
+FSJG0_SX40
+FSJG0_SX400
+FSJK1_SI1025
+FSJK1_SI2285
+FSJK1_SI696
+FSJK1_SX125
+FSJK1_SX215
+FSJK1_SX305
+FSJK1_SX35
+FSJK1_SX395
+FSJS0_SI1171
+FSJS0_SI1801
+FSJS0_SI541
+FSJS0_SX181
+FSJS0_SX271
+FSJS0_SX361
+FSJS0_SX451
+FSJS0_SX91
+FSJW0_SI1333
+FSJW0_SI1963
+FSJW0_SI703
+FSJW0_SX163
+FSJW0_SX253
+FSJW0_SX343
+FSJW0_SX433
+FSJW0_SX73
+FSKC0_SI1416
+FSKC0_SI2046
+FSKC0_SI786
+FSKC0_SX156
+FSKC0_SX246
+FSKC0_SX336
+FSKC0_SX426
+FSKC0_SX66
+FSKL0_SI1529
+FSKL0_SI2159
+FSKL0_SI899
+FSKL0_SX179
+FSKL0_SX269
+FSKL0_SX359
+FSKL0_SX449
+FSKL0_SX89
+FSKP0_SI1098
+FSKP0_SI1728
+FSKP0_SI468
+FSKP0_SX108
+FSKP0_SX18
+FSKP0_SX198
+FSKP0_SX288
+FSKP0_SX378
+FSLS0_SI1056
+FSLS0_SI1686
+FSLS0_SI2316
+FSLS0_SX156
+FSLS0_SX202
+FSLS0_SX246
+FSLS0_SX426
+FSLS0_SX66
+FSMA0_SI1621
+FSMA0_SI2251
+FSMA0_SI991
+FSMA0_SX181
+FSMA0_SX271
+FSMA0_SX361
+FSMA0_SX451
+FSMA0_SX91
+FSMM0_SI1314
+FSMM0_SI1944
+FSMM0_SI684
+FSMM0_SX144
+FSMM0_SX234
+FSMM0_SX324
+FSMM0_SX414
+FSMM0_SX54
+FSMS1_SI1504
+FSMS1_SI2134
+FSMS1_SI874
+FSMS1_SX154
+FSMS1_SX244
+FSMS1_SX334
+FSMS1_SX347
+FSMS1_SX64
+FSPM0_SI1241
+FSPM0_SI1871
+FSPM0_SI611
+FSPM0_SX161
+FSPM0_SX251
+FSPM0_SX341
+FSPM0_SX431
+FSPM0_SX71
+FSRH0_SI1719
+FSRH0_SI1931
+FSRH0_SI671
+FSRH0_SX131
+FSRH0_SX221
+FSRH0_SX311
+FSRH0_SX401
+FSRH0_SX41
+FSSB0_SI1082
+FSSB0_SI1712
+FSSB0_SI2342
+FSSB0_SX182
+FSSB0_SX272
+FSSB0_SX362
+FSSB0_SX452
+FSSB0_SX92
+FTAJ0_SI1329
+FTAJ0_SI474
+FTAJ0_SI699
+FTAJ0_SX159
+FTAJ0_SX249
+FTAJ0_SX339
+FTAJ0_SX429
+FTAJ0_SX69
+FTBR0_SI1402
+FTBR0_SI2181
+FTBR0_SI921
+FTBR0_SX111
+FTBR0_SX201
+FTBR0_SX21
+FTBR0_SX291
+FTBR0_SX381
+FTBW0_SI1345
+FTBW0_SI1975
+FTBW0_SI715
+FTBW0_SX175
+FTBW0_SX265
+FTBW0_SX355
+FTBW0_SX445
+FTBW0_SX85
+FTLG0_SI1743
+FTLG0_SI483
+FTLG0_SI840
+FTLG0_SX123
+FTLG0_SX213
+FTLG0_SX303
+FTLG0_SX33
+FTLG0_SX393
+FTMG0_SI1532
+FTMG0_SI2162
+FTMG0_SI902
+FTMG0_SX182
+FTMG0_SX272
+FTMG0_SX362
+FTMG0_SX452
+FTMG0_SX92
+FVFB0_SI1032
+FVFB0_SI1510
+FVFB0_SI2292
+FVFB0_SX132
+FVFB0_SX222
+FVFB0_SX312
+FVFB0_SX402
+FVFB0_SX42
+FVKB0_SI1159
+FVKB0_SI1789
+FVKB0_SI529
+FVKB0_SX169
+FVKB0_SX259
+FVKB0_SX349
+FVKB0_SX439
+FVKB0_SX79
+FVMH0_SI1466
+FVMH0_SI2096
+FVMH0_SI836
+FVMH0_SX116
+FVMH0_SX206
+FVMH0_SX26
+FVMH0_SX296
+FVMH0_SX386
+MABC0_SI1620
+MABC0_SI2041
+MABC0_SI781
+MABC0_SX151
+MABC0_SX241
+MABC0_SX331
+MABC0_SX421
+MABC0_SX61
+MADC0_SI1367
+MADC0_SI1997
+MADC0_SI737
+MADC0_SX107
+MADC0_SX17
+MADC0_SX197
+MADC0_SX287
+MADC0_SX377
+MADD0_SI1295
+MADD0_SI1798
+MADD0_SI538
+MADD0_SX178
+MADD0_SX268
+MADD0_SX358
+MADD0_SX448
+MADD0_SX88
+MAEB0_SI1411
+MAEB0_SI2250
+MAEB0_SI990
+MAEB0_SX180
+MAEB0_SX270
+MAEB0_SX360
+MAEB0_SX450
+MAEB0_SX90
+MAEO0_SI1326
+MAEO0_SI1655
+MAEO0_SI1956
+MAEO0_SX156
+MAEO0_SX246
+MAEO0_SX336
+MAEO0_SX426
+MAEO0_SX66
+MAFM0_SI1569
+MAFM0_SI2199
+MAFM0_SI939
+MAFM0_SX129
+MAFM0_SX219
+MAFM0_SX309
+MAFM0_SX39
+MAFM0_SX399
+MAJP0_SI1074
+MAJP0_SI1704
+MAJP0_SI2334
+MAJP0_SX174
+MAJP0_SX264
+MAJP0_SX354
+MAJP0_SX444
+MAJP0_SX84
+MAKB0_SI1016
+MAKB0_SI1646
+MAKB0_SI2276
+MAKB0_SX116
+MAKB0_SX206
+MAKB0_SX26
+MAKB0_SX296
+MAKB0_SX386
+MAKR0_SI1352
+MAKR0_SI1982
+MAKR0_SI722
+MAKR0_SX182
+MAKR0_SX272
+MAKR0_SX362
+MAKR0_SX452
+MAKR0_SX92
+MAPV0_SI1293
+MAPV0_SI1923
+MAPV0_SI663
+MAPV0_SX123
+MAPV0_SX213
+MAPV0_SX303
+MAPV0_SX33
+MAPV0_SX393
+MARC0_SI1188
+MARC0_SI1818
+MARC0_SI558
+MARC0_SX108
+MARC0_SX18
+MARC0_SX198
+MARC0_SX288
+MARC0_SX378
+MARW0_SI1276
+MARW0_SI1906
+MARW0_SI646
+MARW0_SX106
+MARW0_SX16
+MARW0_SX286
+MARW0_SX349
+MARW0_SX376
+MBAR0_SI1319
+MBAR0_SI1949
+MBAR0_SI689
+MBAR0_SX149
+MBAR0_SX239
+MBAR0_SX329
+MBAR0_SX419
+MBAR0_SX59
+MBBR0_SI1055
+MBBR0_SI1685
+MBBR0_SI2315
+MBBR0_SX155
+MBBR0_SX245
+MBBR0_SX335
+MBBR0_SX425
+MBBR0_SX65
+MBCG0_SI2217
+MBCG0_SI486
+MBCG0_SI957
+MBCG0_SX147
+MBCG0_SX237
+MBCG0_SX327
+MBCG0_SX417
+MBCG0_SX57
+MBEF0_SI1281
+MBEF0_SI1911
+MBEF0_SI651
+MBEF0_SX111
+MBEF0_SX201
+MBEF0_SX21
+MBEF0_SX291
+MBEF0_SX381
+MBGT0_SI1341
+MBGT0_SI1841
+MBGT0_SI711
+MBGT0_SX171
+MBGT0_SX261
+MBGT0_SX351
+MBGT0_SX441
+MBGT0_SX81
+MBJV0_SI1247
+MBJV0_SI1877
+MBJV0_SI617
+MBJV0_SX167
+MBJV0_SX257
+MBJV0_SX347
+MBJV0_SX437
+MBJV0_SX77
+MBMA0_SI1222
+MBMA0_SI1852
+MBMA0_SI592
+MBMA0_SX142
+MBMA0_SX232
+MBMA0_SX322
+MBMA0_SX412
+MBMA0_SX52
+MBMA1_SI2207
+MBMA1_SI2214
+MBMA1_SI954
+MBMA1_SX144
+MBMA1_SX234
+MBMA1_SX324
+MBMA1_SX414
+MBMA1_SX54
+MBML0_SI1169
+MBML0_SI1799
+MBML0_SI539
+MBML0_SX179
+MBML0_SX269
+MBML0_SX359
+MBML0_SX449
+MBML0_SX89
+MBOM0_SI1014
+MBOM0_SI1644
+MBOM0_SI2274
+MBOM0_SX114
+MBOM0_SX204
+MBOM0_SX294
+MBOM0_SX311
+MBOM0_SX384
+MBSB0_SI1353
+MBSB0_SI1983
+MBSB0_SI723
+MBSB0_SX183
+MBSB0_SX273
+MBSB0_SX3
+MBSB0_SX363
+MBSB0_SX93
+MBTH0_SI2102
+MBTH0_SI505
+MBTH0_SI757
+MBTH0_SX122
+MBTH0_SX212
+MBTH0_SX302
+MBTH0_SX32
+MBTH0_SX392
+MBWP0_SI1531
+MBWP0_SI1969
+MBWP0_SI709
+MBWP0_SX169
+MBWP0_SX259
+MBWP0_SX349
+MBWP0_SX439
+MBWP0_SX79
+MCAE0_SI1447
+MCAE0_SI2077
+MCAE0_SI817
+MCAE0_SX187
+MCAE0_SX277
+MCAE0_SX367
+MCAE0_SX7
+MCAE0_SX97
+MCAL0_SI1138
+MCAL0_SI1768
+MCAL0_SI508
+MCAL0_SX148
+MCAL0_SX238
+MCAL0_SX328
+MCAL0_SX418
+MCAL0_SX58
+MCDC0_SI1292
+MCDC0_SI1922
+MCDC0_SI662
+MCDC0_SX122
+MCDC0_SX212
+MCDC0_SX302
+MCDC0_SX32
+MCDC0_SX392
+MCDD0_SI1513
+MCDD0_SI2143
+MCDD0_SI883
+MCDD0_SX163
+MCDD0_SX253
+MCDD0_SX343
+MCDD0_SX433
+MCDD0_SX73
+MCDR0_SI1154
+MCDR0_SI1784
+MCDR0_SI524
+MCDR0_SX164
+MCDR0_SX254
+MCDR0_SX344
+MCDR0_SX434
+MCDR0_SX74
+MCEF0_SI1135
+MCEF0_SI1765
+MCEF0_SI842
+MCEF0_SX145
+MCEF0_SX235
+MCEF0_SX325
+MCEF0_SX415
+MCEF0_SX55
+MCEW0_SI1442
+MCEW0_SI2072
+MCEW0_SI812
+MCEW0_SX182
+MCEW0_SX272
+MCEW0_SX362
+MCEW0_SX452
+MCEW0_SX92
+MCHL0_SI1347
+MCHL0_SI1404
+MCHL0_SI1977
+MCHL0_SX177
+MCHL0_SX267
+MCHL0_SX357
+MCHL0_SX447
+MCHL0_SX87
+MCLK0_SI1660
+MCLK0_SI2290
+MCLK0_SI650
+MCLK0_SX130
+MCLK0_SX220
+MCLK0_SX310
+MCLK0_SX40
+MCLK0_SX400
+MCLM0_SI1456
+MCLM0_SI2086
+MCLM0_SI826
+MCLM0_SX106
+MCLM0_SX16
+MCLM0_SX196
+MCLM0_SX286
+MCLM0_SX376
+MCPM0_SI1194
+MCPM0_SI1824
+MCPM0_SI564
+MCPM0_SX114
+MCPM0_SX204
+MCPM0_SX24
+MCPM0_SX294
+MCPM0_SX384
+MCRE0_SI1121
+MCRE0_SI1725
+MCRE0_SI1751
+MCRE0_SX131
+MCRE0_SX221
+MCRE0_SX24
+MCRE0_SX401
+MCRE0_SX41
+MCSS0_SI1380
+MCSS0_SI688
+MCSS0_SI750
+MCSS0_SX120
+MCSS0_SX210
+MCSS0_SX30
+MCSS0_SX300
+MCSS0_SX390
+MCTH0_SI1209
+MCTH0_SI1839
+MCTH0_SI579
+MCTH0_SX129
+MCTH0_SX219
+MCTH0_SX309
+MCTH0_SX39
+MCTH0_SX399
+MCTM0_SI1350
+MCTM0_SI1980
+MCTM0_SI720
+MCTM0_SX180
+MCTM0_SX270
+MCTM0_SX360
+MCTM0_SX450
+MCTM0_SX90
+MCXM0_SI1351
+MCXM0_SI1981
+MCXM0_SI721
+MCXM0_SX181
+MCXM0_SX271
+MCXM0_SX361
+MCXM0_SX451
+MCXM0_SX91
+MDAC0_SI1261
+MDAC0_SI1837
+MDAC0_SI631
+MDAC0_SX181
+MDAC0_SX271
+MDAC0_SX361
+MDAC0_SX451
+MDAC0_SX91
+MDAS0_SI1266
+MDAS0_SI1896
+MDAS0_SI636
+MDAS0_SX186
+MDAS0_SX21
+MDAS0_SX276
+MDAS0_SX6
+MDAS0_SX96
+MDBB1_SI1006
+MDBB1_SI1636
+MDBB1_SI2056
+MDBB1_SX106
+MDBB1_SX16
+MDBB1_SX196
+MDBB1_SX286
+MDBB1_SX376
+MDBP0_SI1158
+MDBP0_SI1788
+MDBP0_SI528
+MDBP0_SX168
+MDBP0_SX258
+MDBP0_SX348
+MDBP0_SX438
+MDBP0_SX78
+MDCD0_SI1415
+MDCD0_SI2045
+MDCD0_SI785
+MDCD0_SX155
+MDCD0_SX245
+MDCD0_SX335
+MDCD0_SX425
+MDCD0_SX65
+MDCM0_SI1480
+MDCM0_SI2110
+MDCM0_SI850
+MDCM0_SX130
+MDCM0_SX220
+MDCM0_SX310
+MDCM0_SX40
+MDCM0_SX400
+MDDC0_SI1419
+MDDC0_SI2049
+MDDC0_SI789
+MDDC0_SX159
+MDDC0_SX249
+MDDC0_SX339
+MDDC0_SX429
+MDDC0_SX69
+MDED0_SI1170
+MDED0_SI1800
+MDED0_SI540
+MDED0_SX180
+MDED0_SX270
+MDED0_SX360
+MDED0_SX450
+MDED0_SX90
+MDEF0_SI1123
+MDEF0_SI1563
+MDEF0_SI2193
+MDEF0_SX123
+MDEF0_SX213
+MDEF0_SX303
+MDEF0_SX33
+MDEF0_SX393
+MDEM0_SI1868
+MDEM0_SI608
+MDEM0_SI800
+MDEM0_SX158
+MDEM0_SX248
+MDEM0_SX338
+MDEM0_SX428
+MDEM0_SX68
+MDHL0_SI1439
+MDHL0_SI2069
+MDHL0_SI809
+MDHL0_SX179
+MDHL0_SX269
+MDHL0_SX359
+MDHL0_SX449
+MDHL0_SX89
+MDHS0_SI1530
+MDHS0_SI2160
+MDHS0_SI900
+MDHS0_SX180
+MDHS0_SX270
+MDHS0_SX360
+MDHS0_SX450
+MDHS0_SX90
+MDJM0_SI1455
+MDJM0_SI2085
+MDJM0_SI825
+MDJM0_SX105
+MDJM0_SX15
+MDJM0_SX195
+MDJM0_SX285
+MDJM0_SX375
+MDKS0_SI1066
+MDKS0_SI1696
+MDKS0_SI2326
+MDKS0_SX166
+MDKS0_SX256
+MDKS0_SX346
+MDKS0_SX436
+MDKS0_SX76
+MDLB0_SI1306
+MDLB0_SI1936
+MDLB0_SI676
+MDLB0_SX136
+MDLB0_SX226
+MDLB0_SX316
+MDLB0_SX406
+MDLB0_SX46
+MDLC0_SI1395
+MDLC0_SI2025
+MDLC0_SI765
+MDLC0_SX135
+MDLC0_SX225
+MDLC0_SX315
+MDLC0_SX405
+MDLC0_SX45
+MDLC1_SI1435
+MDLC1_SI2065
+MDLC1_SI2144
+MDLC1_SX175
+MDLC1_SX265
+MDLC1_SX355
+MDLC1_SX445
+MDLC1_SX85
+MDLC2_SI1614
+MDLC2_SI2244
+MDLC2_SI984
+MDLC2_SX174
+MDLC2_SX264
+MDLC2_SX354
+MDLC2_SX444
+MDLC2_SX84
+MDLH0_SI1960
+MDLH0_SI574
+MDLH0_SI700
+MDLH0_SX160
+MDLH0_SX250
+MDLH0_SX340
+MDLH0_SX430
+MDLH0_SX70
+MDLM0_SI1234
+MDLM0_SI1864
+MDLM0_SI604
+MDLM0_SX154
+MDLM0_SX244
+MDLM0_SX334
+MDLM0_SX424
+MDLM0_SX64
+MDLR0_SI1233
+MDLR0_SI1863
+MDLR0_SI603
+MDLR0_SX153
+MDLR0_SX243
+MDLR0_SX333
+MDLR0_SX423
+MDLR0_SX63
+MDLR1_SI1299
+MDLR1_SI1929
+MDLR1_SI669
+MDLR1_SX129
+MDLR1_SX219
+MDLR1_SX309
+MDLR1_SX39
+MDLR1_SX399
+MDMA0_SI1238
+MDMA0_SI1430
+MDMA0_SI2060
+MDMA0_SX170
+MDMA0_SX260
+MDMA0_SX350
+MDMA0_SX440
+MDMA0_SX80
+MDMT0_SI1832
+MDMT0_SI2341
+MDMT0_SI572
+MDMT0_SX122
+MDMT0_SX212
+MDMT0_SX302
+MDMT0_SX32
+MDMT0_SX392
+MDNS0_SI1011
+MDNS0_SI2271
+MDNS0_SI873
+MDNS0_SX111
+MDNS0_SX201
+MDNS0_SX21
+MDNS0_SX291
+MDNS0_SX381
+MDPB0_SI1760
+MDPB0_SI2126
+MDPB0_SI866
+MDPB0_SX146
+MDPB0_SX236
+MDPB0_SX326
+MDPB0_SX416
+MDPB0_SX56
+MDPK0_SI1053
+MDPK0_SI1683
+MDPK0_SI552
+MDPK0_SX153
+MDPK0_SX243
+MDPK0_SX333
+MDPK0_SX423
+MDPK0_SX63
+MDPS0_SI1651
+MDPS0_SI1979
+MDPS0_SI719
+MDPS0_SX179
+MDPS0_SX269
+MDPS0_SX359
+MDPS0_SX449
+MDPS0_SX89
+MDRD0_SI1382
+MDRD0_SI2012
+MDRD0_SI752
+MDRD0_SX122
+MDRD0_SX212
+MDRD0_SX302
+MDRD0_SX32
+MDRD0_SX392
+MDSJ0_SI1462
+MDSJ0_SI2092
+MDSJ0_SI832
+MDSJ0_SX112
+MDSJ0_SX22
+MDSJ0_SX292
+MDSJ0_SX382
+MDSJ0_SX438
+MDSS0_SI1881
+MDSS0_SI2087
+MDSS0_SI621
+MDSS0_SX171
+MDSS0_SX261
+MDSS0_SX351
+MDSS0_SX441
+MDSS0_SX81
+MDSS1_SI1327
+MDSS1_SI1713
+MDSS1_SI697
+MDSS1_SX157
+MDSS1_SX247
+MDSS1_SX337
+MDSS1_SX427
+MDSS1_SX67
+MDTB0_SI1200
+MDTB0_SI1830
+MDTB0_SI570
+MDTB0_SX120
+MDTB0_SX210
+MDTB0_SX300
+MDTB0_SX321
+MDTB0_SX390
+MDWD0_SI1260
+MDWD0_SI1890
+MDWD0_SI557
+MDWD0_SX180
+MDWD0_SX270
+MDWD0_SX360
+MDWD0_SX450
+MDWD0_SX90
+MDWH0_SI1168
+MDWH0_SI1925
+MDWH0_SI665
+MDWH0_SX125
+MDWH0_SX215
+MDWH0_SX305
+MDWH0_SX35
+MDWH0_SX395
+MDWM0_SI1546
+MDWM0_SI2176
+MDWM0_SI916
+MDWM0_SX106
+MDWM0_SX16
+MDWM0_SX286
+MDWM0_SX376
+MDWM0_SX433
+MEAL0_SI1547
+MEAL0_SI2177
+MEAL0_SI917
+MEAL0_SX107
+MEAL0_SX197
+MEAL0_SX287
+MEAL0_SX347
+MEAL0_SX377
+MEDR0_SI1374
+MEDR0_SI2004
+MEDR0_SI744
+MEDR0_SX114
+MEDR0_SX204
+MEDR0_SX24
+MEDR0_SX294
+MEDR0_SX384
+MEFG0_SI465
+MEFG0_SI491
+MEFG0_SI598
+MEFG0_SX105
+MEFG0_SX15
+MEFG0_SX195
+MEFG0_SX285
+MEFG0_SX375
+MEGJ0_SI1337
+MEGJ0_SI1967
+MEGJ0_SI707
+MEGJ0_SX167
+MEGJ0_SX257
+MEGJ0_SX3
+MEGJ0_SX437
+MEGJ0_SX77
+MEJL0_SI1592
+MEJL0_SI1654
+MEJL0_SI962
+MEJL0_SX152
+MEJL0_SX242
+MEJL0_SX332
+MEJL0_SX422
+MEJL0_SX62
+MEJS0_SI1240
+MEJS0_SI1870
+MEJS0_SI610
+MEJS0_SX160
+MEJS0_SX250
+MEJS0_SX340
+MEJS0_SX430
+MEJS0_SX70
+MESG0_SI1332
+MESG0_SI1962
+MESG0_SI702
+MESG0_SX162
+MESG0_SX252
+MESG0_SX342
+MESG0_SX432
+MESG0_SX72
+MESJ0_SI2039
+MESJ0_SI2257
+MESJ0_SI997
+MESJ0_SX187
+MESJ0_SX277
+MESJ0_SX367
+MESJ0_SX7
+MESJ0_SX97
+MEWM0_SI1348
+MEWM0_SI1978
+MEWM0_SI718
+MEWM0_SX178
+MEWM0_SX268
+MEWM0_SX358
+MEWM0_SX448
+MEWM0_SX88
+MFER0_SI1492
+MFER0_SI2122
+MFER0_SI862
+MFER0_SX142
+MFER0_SX232
+MFER0_SX322
+MFER0_SX412
+MFER0_SX52
+MFMC0_SI1132
+MFMC0_SI1762
+MFMC0_SI502
+MFMC0_SX142
+MFMC0_SX232
+MFMC0_SX322
+MFMC0_SX412
+MFMC0_SX52
+MFRM0_SI1155
+MFRM0_SI1717
+MFRM0_SI1785
+MFRM0_SX165
+MFRM0_SX255
+MFRM0_SX345
+MFRM0_SX435
+MFRM0_SX75
+MFWK0_SI1249
+MFWK0_SI1879
+MFWK0_SI619
+MFWK0_SX169
+MFWK0_SX259
+MFWK0_SX349
+MFWK0_SX439
+MFWK0_SX79
+MFXS0_SI1674
+MFXS0_SI2225
+MFXS0_SI2304
+MFXS0_SX144
+MFXS0_SX234
+MFXS0_SX324
+MFXS0_SX414
+MFXS0_SX54
+MFXV0_SI1005
+MFXV0_SI1342
+MFXV0_SI1635
+MFXV0_SX105
+MFXV0_SX15
+MFXV0_SX195
+MFXV0_SX285
+MFXV0_SX375
+MGAF0_SI1282
+MGAF0_SI1912
+MGAF0_SI652
+MGAF0_SX112
+MGAF0_SX202
+MGAF0_SX22
+MGAF0_SX292
+MGAF0_SX382
+MGAG0_SI1321
+MGAG0_SI645
+MGAG0_SI691
+MGAG0_SX151
+MGAG0_SX241
+MGAG0_SX331
+MGAG0_SX421
+MGAG0_SX61
+MGAK0_SI1036
+MGAK0_SI1666
+MGAK0_SI2296
+MGAK0_SX136
+MGAK0_SX226
+MGAK0_SX316
+MGAK0_SX406
+MGAK0_SX46
+MGAR0_SI1212
+MGAR0_SI1694
+MGAR0_SI1842
+MGAR0_SX132
+MGAR0_SX222
+MGAR0_SX312
+MGAR0_SX402
+MGAR0_SX42
+MGAW0_SI1165
+MGAW0_SI1802
+MGAW0_SI535
+MGAW0_SX175
+MGAW0_SX265
+MGAW0_SX355
+MGAW0_SX445
+MGAW0_SX85
+MGES0_SI1481
+MGES0_SI2111
+MGES0_SI851
+MGES0_SX131
+MGES0_SX221
+MGES0_SX311
+MGES0_SX401
+MGES0_SX41
+MGJC0_SI1256
+MGJC0_SI1335
+MGJC0_SI1965
+MGJC0_SX165
+MGJC0_SX255
+MGJC0_SX345
+MGJC0_SX435
+MGJC0_SX75
+MGRL0_SI1497
+MGRL0_SI2127
+MGRL0_SI867
+MGRL0_SX147
+MGRL0_SX237
+MGRL0_SX327
+MGRL0_SX417
+MGRL0_SX57
+MGRP0_SI1317
+MGRP0_SI1947
+MGRP0_SI687
+MGRP0_SX147
+MGRP0_SX237
+MGRP0_SX327
+MGRP0_SX417
+MGRP0_SX57
+MGSH0_SI1176
+MGSH0_SI1806
+MGSH0_SI546
+MGSH0_SX127
+MGSH0_SX186
+MGSH0_SX276
+MGSH0_SX6
+MGSH0_SX96
+MGSL0_SI1164
+MGSL0_SI534
+MGSL0_SI797
+MGSL0_SX174
+MGSL0_SX264
+MGSL0_SX354
+MGSL0_SX444
+MGSL0_SX84
+MGXP0_SI1087
+MGXP0_SI457
+MGXP0_SI525
+MGXP0_SX187
+MGXP0_SX277
+MGXP0_SX367
+MGXP0_SX7
+MGXP0_SX97
+MHBS0_SI1575
+MHBS0_SI2205
+MHBS0_SI945
+MHBS0_SX135
+MHBS0_SX225
+MHBS0_SX315
+MHBS0_SX405
+MHBS0_SX45
+MHIT0_SI1613
+MHIT0_SI2243
+MHIT0_SI983
+MHIT0_SX173
+MHIT0_SX263
+MHIT0_SX353
+MHIT0_SX443
+MHIT0_SX83
+MHJB0_SI1017
+MHJB0_SI1647
+MHJB0_SI2277
+MHJB0_SX117
+MHJB0_SX207
+MHJB0_SX27
+MHJB0_SX297
+MHJB0_SX387
+MHMG0_SI1365
+MHMG0_SI1995
+MHMG0_SI735
+MHMG0_SX105
+MHMG0_SX15
+MHMG0_SX195
+MHMG0_SX285
+MHMG0_SX375
+MHMR0_SI1119
+MHMR0_SI1692
+MHMR0_SI489
+MHMR0_SX129
+MHMR0_SX219
+MHMR0_SX309
+MHMR0_SX39
+MHMR0_SX399
+MHRM0_SI1475
+MHRM0_SI2218
+MHRM0_SI958
+MHRM0_SX148
+MHRM0_SX238
+MHRM0_SX328
+MHRM0_SX418
+MHRM0_SX58
+MHXL0_SI1772
+MHXL0_SI512
+MHXL0_SI612
+MHXL0_SX152
+MHXL0_SX242
+MHXL0_SX332
+MHXL0_SX422
+MHXL0_SX62
+MILB0_SI2163
+MILB0_SI807
+MILB0_SI903
+MILB0_SX183
+MILB0_SX273
+MILB0_SX3
+MILB0_SX363
+MILB0_SX93
+MJAC0_SI1331
+MJAC0_SI2148
+MJAC0_SI701
+MJAC0_SX251
+MJAC0_SX307
+MJAC0_SX341
+MJAC0_SX431
+MJAC0_SX71
+MJAE0_SI1524
+MJAE0_SI1999
+MJAE0_SI2154
+MJAE0_SX174
+MJAE0_SX264
+MJAE0_SX354
+MJAE0_SX444
+MJAE0_SX84
+MJAI0_SI1604
+MJAI0_SI682
+MJAI0_SI710
+MJAI0_SX164
+MJAI0_SX254
+MJAI0_SX344
+MJAI0_SX434
+MJAI0_SX74
+MJBG0_SI1232
+MJBG0_SI1724
+MJBG0_SI1862
+MJBG0_SX152
+MJBG0_SX242
+MJBG0_SX332
+MJBG0_SX422
+MJBG0_SX62
+MJDA0_SI1031
+MJDA0_SI1661
+MJDA0_SI2291
+MJDA0_SX131
+MJDA0_SX221
+MJDA0_SX311
+MJDA0_SX401
+MJDA0_SX41
+MJDC0_SI1161
+MJDC0_SI2165
+MJDC0_SI531
+MJDC0_SX171
+MJDC0_SX261
+MJDC0_SX351
+MJDC0_SX441
+MJDC0_SX81
+MJDE0_SI1120
+MJDE0_SI463
+MJDE0_SI490
+MJDE0_SX130
+MJDE0_SX220
+MJDE0_SX310
+MJDE0_SX40
+MJDE0_SX400
+MJDG0_SI1042
+MJDG0_SI1672
+MJDG0_SI1705
+MJDG0_SX142
+MJDG0_SX232
+MJDG0_SX322
+MJDG0_SX412
+MJDG0_SX52
+MJDM0_SI1340
+MJDM0_SI1937
+MJDM0_SI974
+MJDM0_SX170
+MJDM0_SX260
+MJDM0_SX350
+MJDM0_SX440
+MJDM0_SX80
+MJEB0_SI1286
+MJEB0_SI1916
+MJEB0_SI656
+MJEB0_SX170
+MJEB0_SX206
+MJEB0_SX26
+MJEB0_SX296
+MJEB0_SX386
+MJEB1_SI1467
+MJEB1_SI2097
+MJEB1_SI837
+MJEB1_SX117
+MJEB1_SX207
+MJEB1_SX27
+MJEB1_SX297
+MJEB1_SX387
+MJEE0_SI1237
+MJEE0_SI1867
+MJEE0_SI607
+MJEE0_SX157
+MJEE0_SX247
+MJEE0_SX337
+MJEE0_SX427
+MJEE0_SX67
+MJFH0_SI1107
+MJFH0_SI1737
+MJFH0_SI477
+MJFH0_SX117
+MJFH0_SX207
+MJFH0_SX27
+MJFH0_SX297
+MJFH0_SX387
+MJFR0_SI1605
+MJFR0_SI2235
+MJFR0_SI975
+MJFR0_SX165
+MJFR0_SX255
+MJFR0_SX345
+MJFR0_SX435
+MJFR0_SX75
+MJHI0_SI1328
+MJHI0_SI555
+MJHI0_SI698
+MJHI0_SX158
+MJHI0_SX248
+MJHI0_SX338
+MJHI0_SX428
+MJHI0_SX68
+MJJB0_SI1139
+MJJB0_SI1277
+MJJB0_SI1769
+MJJB0_SX149
+MJJB0_SX239
+MJJB0_SX329
+MJJB0_SX419
+MJJB0_SX59
+MJJJ0_SI1163
+MJJJ0_SI1793
+MJJJ0_SI533
+MJJJ0_SX173
+MJJJ0_SX263
+MJJJ0_SX353
+MJJJ0_SX443
+MJJJ0_SX83
+MJJM0_SI1251
+MJJM0_SI1457
+MJJM0_SI827
+MJJM0_SX107
+MJJM0_SX17
+MJJM0_SX197
+MJJM0_SX287
+MJJM0_SX377
+MJKR0_SI1201
+MJKR0_SI1831
+MJKR0_SI571
+MJKR0_SX121
+MJKR0_SX211
+MJKR0_SX301
+MJKR0_SX31
+MJKR0_SX391
+MJLB0_SI1616
+MJLB0_SI2246
+MJLB0_SI986
+MJLB0_SX176
+MJLB0_SX266
+MJLB0_SX356
+MJLB0_SX446
+MJLB0_SX86
+MJLG1_SI1012
+MJLG1_SI1642
+MJLG1_SI2272
+MJLG1_SX112
+MJLG1_SX202
+MJLG1_SX22
+MJLG1_SX292
+MJLG1_SX382
+MJLS0_SI1096
+MJLS0_SI1726
+MJLS0_SI466
+MJLS0_SX106
+MJLS0_SX16
+MJLS0_SX196
+MJLS0_SX286
+MJLS0_SX376
+MJMA0_SI1495
+MJMA0_SI2125
+MJMA0_SI865
+MJMA0_SX145
+MJMA0_SX235
+MJMA0_SX325
+MJMA0_SX415
+MJMA0_SX55
+MJMD0_SI1028
+MJMD0_SI1658
+MJMD0_SI2288
+MJMD0_SX128
+MJMD0_SX218
+MJMD0_SX308
+MJMD0_SX38
+MJMD0_SX398
+MJMM0_SI1255
+MJMM0_SI1885
+MJMM0_SI625
+MJMM0_SX175
+MJMM0_SX265
+MJMM0_SX355
+MJMM0_SX445
+MJMM0_SX85
+MJPG0_SI1191
+MJPG0_SI1821
+MJPG0_SI561
+MJPG0_SX111
+MJPG0_SX201
+MJPG0_SX21
+MJPG0_SX291
+MJPG0_SX381
+MJPM0_SI1368
+MJPM0_SI1998
+MJPM0_SI738
+MJPM0_SX108
+MJPM0_SX18
+MJPM0_SX198
+MJPM0_SX288
+MJPM0_SX378
+MJPM1_SI1897
+MJPM1_SI2280
+MJPM1_SI761
+MJPM1_SX131
+MJPM1_SX221
+MJPM1_SX311
+MJPM1_SX401
+MJPM1_SX41
+MJRA0_SI1236
+MJRA0_SI1866
+MJRA0_SI606
+MJRA0_SX156
+MJRA0_SX246
+MJRA0_SX336
+MJRA0_SX426
+MJRA0_SX66
+MJRG0_SI1366
+MJRG0_SI1996
+MJRG0_SI736
+MJRG0_SX106
+MJRG0_SX16
+MJRG0_SX286
+MJRG0_SX352
+MJRG0_SX376
+MJRH0_SI1125
+MJRH0_SI1755
+MJRH0_SI1840
+MJRH0_SX135
+MJRH0_SX225
+MJRH0_SX315
+MJRH0_SX405
+MJRH0_SX45
+MJRH1_SI1558
+MJRH1_SI1774
+MJRH1_SI514
+MJRH1_SX154
+MJRH1_SX244
+MJRH1_SX334
+MJRH1_SX424
+MJRH1_SX64
+MJRK0_SI1662
+MJRK0_SI2103
+MJRK0_SI880
+MJRK0_SX160
+MJRK0_SX250
+MJRK0_SX340
+MJRK0_SX430
+MJRK0_SX70
+MJRP0_SI1835
+MJRP0_SI1845
+MJRP0_SI585
+MJRP0_SX135
+MJRP0_SX225
+MJRP0_SX315
+MJRP0_SX405
+MJRP0_SX45
+MJSR0_SI1424
+MJSR0_SI2054
+MJSR0_SI794
+MJSR0_SX164
+MJSR0_SX254
+MJSR0_SX344
+MJSR0_SX434
+MJSR0_SX74
+MJWG0_SI2155
+MJWG0_SI813
+MJWG0_SI895
+MJWG0_SX175
+MJWG0_SX265
+MJWG0_SX355
+MJWG0_SX445
+MJWG0_SX85
+MJWS0_SI1143
+MJWS0_SI1773
+MJWS0_SI513
+MJWS0_SX153
+MJWS0_SX243
+MJWS0_SX333
+MJWS0_SX423
+MJWS0_SX63
+MJWT0_SI1291
+MJWT0_SI1381
+MJWT0_SI751
+MJWT0_SX121
+MJWT0_SX211
+MJWT0_SX301
+MJWT0_SX31
+MJWT0_SX391
+MJXA0_SI1507
+MJXA0_SI2137
+MJXA0_SI877
+MJXA0_SX157
+MJXA0_SX247
+MJXA0_SX337
+MJXA0_SX427
+MJXA0_SX67
+MJXL0_SI1172
+MJXL0_SI1795
+MJXL0_SI542
+MJXL0_SX182
+MJXL0_SX272
+MJXL0_SX362
+MJXL0_SX452
+MJXL0_SX92
+MKAG0_SI1609
+MKAG0_SI2239
+MKAG0_SI979
+MKAG0_SX169
+MKAG0_SX259
+MKAG0_SX30
+MKAG0_SX439
+MKAG0_SX79
+MKAH0_SI1528
+MKAH0_SI2158
+MKAH0_SI898
+MKAH0_SX178
+MKAH0_SX268
+MKAH0_SX358
+MKAH0_SX448
+MKAH0_SX88
+MKAJ0_SI1414
+MKAJ0_SI2044
+MKAJ0_SI784
+MKAJ0_SX154
+MKAJ0_SX244
+MKAJ0_SX334
+MKAJ0_SX424
+MKAJ0_SX64
+MKAM0_SI1250
+MKAM0_SI1316
+MKAM0_SI1465
+MKAM0_SX146
+MKAM0_SX236
+MKAM0_SX326
+MKAM0_SX416
+MKAM0_SX56
+MKDB0_SI2132
+MKDB0_SI588
+MKDB0_SI872
+MKDB0_SX152
+MKDB0_SX242
+MKDB0_SX332
+MKDB0_SX422
+MKDB0_SX62
+MKDD0_SI1567
+MKDD0_SI2197
+MKDD0_SI937
+MKDD0_SX127
+MKDD0_SX217
+MKDD0_SX307
+MKDD0_SX37
+MKDD0_SX397
+MKDT0_SI2153
+MKDT0_SI814
+MKDT0_SI893
+MKDT0_SX173
+MKDT0_SX263
+MKDT0_SX353
+MKDT0_SX443
+MKDT0_SX83
+MKES0_SI1253
+MKES0_SI1883
+MKES0_SI623
+MKES0_SX173
+MKES0_SX263
+MKES0_SX353
+MKES0_SX443
+MKES0_SX83
+MKJO0_SI1517
+MKJO0_SI2147
+MKJO0_SI887
+MKJO0_SX167
+MKJO0_SX257
+MKJO0_SX424
+MKJO0_SX437
+MKJO0_SX77
+MKLN0_SI1598
+MKLN0_SI2228
+MKLN0_SI968
+MKLN0_SX158
+MKLN0_SX248
+MKLN0_SX338
+MKLN0_SX428
+MKLN0_SX68
+MKLR0_SI1059
+MKLR0_SI1689
+MKLR0_SI2319
+MKLR0_SX159
+MKLR0_SX249
+MKLR0_SX339
+MKLR0_SX429
+MKLR0_SX69
+MKLS0_SI1437
+MKLS0_SI1533
+MKLS0_SI2067
+MKLS0_SX177
+MKLS0_SX267
+MKLS0_SX357
+MKLS0_SX447
+MKLS0_SX87
+MKLS1_SI1545
+MKLS1_SI2175
+MKLS1_SI915
+MKLS1_SX105
+MKLS1_SX15
+MKLS1_SX195
+MKLS1_SX285
+MKLS1_SX375
+MKLW0_SI1571
+MKLW0_SI1844
+MKLW0_SI2201
+MKLW0_SX131
+MKLW0_SX221
+MKLW0_SX311
+MKLW0_SX401
+MKLW0_SX41
+MKRG0_SI1491
+MKRG0_SI2121
+MKRG0_SI861
+MKRG0_SX141
+MKRG0_SX231
+MKRG0_SX31
+MKRG0_SX411
+MKRG0_SX51
+MKXL0_SI1185
+MKXL0_SI1815
+MKXL0_SI1958
+MKXL0_SX105
+MKXL0_SX15
+MKXL0_SX195
+MKXL0_SX285
+MKXL0_SX375
+MLBC0_SI1239
+MLBC0_SI1869
+MLBC0_SI609
+MLBC0_SX159
+MLBC0_SX249
+MLBC0_SX339
+MLBC0_SX429
+MLBC0_SX69
+MLEL0_SI1246
+MLEL0_SI1876
+MLEL0_SI616
+MLEL0_SX166
+MLEL0_SX256
+MLEL0_SX346
+MLEL0_SX436
+MLEL0_SX76
+MLJC0_SI1225
+MLJC0_SI1855
+MLJC0_SI595
+MLJC0_SX145
+MLJC0_SX235
+MLJC0_SX325
+MLJC0_SX415
+MLJC0_SX55
+MLJH0_SI1324
+MLJH0_SI1422
+MLJH0_SI694
+MLJH0_SX154
+MLJH0_SX244
+MLJH0_SX334
+MLJH0_SX424
+MLJH0_SX64
+MLNS0_SI1407
+MLNS0_SI2037
+MLNS0_SI777
+MLNS0_SX147
+MLNS0_SX237
+MLNS0_SX327
+MLNS0_SX417
+MLNS0_SX57
+MLSH0_SI1417
+MLSH0_SI2047
+MLSH0_SI787
+MLSH0_SX157
+MLSH0_SX247
+MLSH0_SX337
+MLSH0_SX427
+MLSH0_SX67
+MMAA0_SI1588
+MMAA0_SI2105
+MMAA0_SI845
+MMAA0_SX125
+MMAA0_SX215
+MMAA0_SX305
+MMAA0_SX35
+MMAA0_SX395
+MMAB1_SI1494
+MMAB1_SI2124
+MMAB1_SI864
+MMAB1_SX144
+MMAB1_SX234
+MMAB1_SX324
+MMAB1_SX414
+MMAB1_SX54
+MMAG0_SI1126
+MMAG0_SI1756
+MMAG0_SI496
+MMAG0_SX136
+MMAG0_SX226
+MMAG0_SX316
+MMAG0_SX406
+MMAG0_SX46
+MMAM0_SI1597
+MMAM0_SI1668
+MMAM0_SI2227
+MMAM0_SX157
+MMAM0_SX247
+MMAM0_SX337
+MMAM0_SX427
+MMAM0_SX67
+MMAR0_SI1336
+MMAR0_SI1966
+MMAR0_SI706
+MMAR0_SX166
+MMAR0_SX256
+MMAR0_SX346
+MMAR0_SX436
+MMAR0_SX76
+MMBS0_SI1151
+MMBS0_SI1781
+MMBS0_SI521
+MMBS0_SX161
+MMBS0_SX251
+MMBS0_SX341
+MMBS0_SX431
+MMBS0_SX71
+MMCC0_SI1338
+MMCC0_SI1968
+MMCC0_SI708
+MMCC0_SX168
+MMCC0_SX258
+MMCC0_SX348
+MMCC0_SX438
+MMCC0_SX78
+MMDB0_SI1358
+MMDB0_SI1617
+MMDB0_SI987
+MMDB0_SX177
+MMDB0_SX267
+MMDB0_SX357
+MMDB0_SX447
+MMDB0_SX87
+MMDG0_SI1780
+MMDG0_SI2035
+MMDG0_SI520
+MMDG0_SX160
+MMDG0_SX250
+MMDG0_SX340
+MMDG0_SX430
+MMDG0_SX70
+MMDM0_SI1311
+MMDM0_SI1941
+MMDM0_SI681
+MMDM0_SX141
+MMDM0_SX231
+MMDM0_SX321
+MMDM0_SX411
+MMDM0_SX51
+MMDM1_SI1650
+MMDM1_SI2043
+MMDM1_SI783
+MMDM1_SX153
+MMDM1_SX243
+MMDM1_SX333
+MMDM1_SX423
+MMDM1_SX63
+MMDS0_SI1343
+MMDS0_SI1973
+MMDS0_SI713
+MMDS0_SX173
+MMDS0_SX263
+MMDS0_SX353
+MMDS0_SX443
+MMDS0_SX83
+MMEA0_SI1388
+MMEA0_SI2018
+MMEA0_SI758
+MMEA0_SX128
+MMEA0_SX218
+MMEA0_SX308
+MMEA0_SX38
+MMEA0_SX398
+MMEB0_SI1357
+MMEB0_SI1987
+MMEB0_SI727
+MMEB0_SX187
+MMEB0_SX327
+MMEB0_SX367
+MMEB0_SX7
+MMEB0_SX97
+MMGC0_SI1305
+MMGC0_SI1935
+MMGC0_SI2184
+MMGC0_SX135
+MMGC0_SX225
+MMGC0_SX315
+MMGC0_SX405
+MMGC0_SX45
+MMGG0_SI1079
+MMGG0_SI1709
+MMGG0_SI2339
+MMGG0_SX179
+MMGG0_SX269
+MMGG0_SX359
+MMGG0_SX449
+MMGG0_SX89
+MMGK0_SI1322
+MMGK0_SI1952
+MMGK0_SI692
+MMGK0_SX152
+MMGK0_SX242
+MMGK0_SX332
+MMGK0_SX422
+MMGK0_SX62
+MMJB1_SI1408
+MMJB1_SI2038
+MMJB1_SI778
+MMJB1_SX148
+MMJB1_SX238
+MMJB1_SX328
+MMJB1_SX418
+MMJB1_SX58
+MMLM0_SI1527
+MMLM0_SI2150
+MMLM0_SI897
+MMLM0_SX177
+MMLM0_SX267
+MMLM0_SX357
+MMLM0_SX447
+MMLM0_SX87
+MMPM0_SI1061
+MMPM0_SI1691
+MMPM0_SI2321
+MMPM0_SX161
+MMPM0_SX251
+MMPM0_SX341
+MMPM0_SX431
+MMPM0_SX71
+MMRP0_SI2034
+MMRP0_SI717
+MMRP0_SI774
+MMRP0_SX144
+MMRP0_SX234
+MMRP0_SX324
+MMRP0_SX414
+MMRP0_SX54
+MMSM0_SI1106
+MMSM0_SI1736
+MMSM0_SI476
+MMSM0_SX116
+MMSM0_SX206
+MMSM0_SX26
+MMSM0_SX296
+MMSM0_SX386
+MMVP0_SI1284
+MMVP0_SI1914
+MMVP0_SI654
+MMVP0_SX114
+MMVP0_SX204
+MMVP0_SX294
+MMVP0_SX347
+MMVP0_SX384
+MMWB0_SI1619
+MMWB0_SI2249
+MMWB0_SI989
+MMWB0_SX179
+MMWB0_SX269
+MMWB0_SX359
+MMWB0_SX449
+MMWB0_SX89
+MMWS0_SI1518
+MMWS0_SI559
+MMWS0_SI888
+MMWS0_SX168
+MMWS0_SX258
+MMWS0_SX348
+MMWS0_SX438
+MMWS0_SX78
+MMWS1_SI1071
+MMWS1_SI1701
+MMWS1_SI2331
+MMWS1_SX261
+MMWS1_SX27
+MMWS1_SX351
+MMWS1_SX441
+MMWS1_SX81
+MMXS0_SI2136
+MMXS0_SI629
+MMXS0_SI876
+MMXS0_SX156
+MMXS0_SX246
+MMXS0_SX336
+MMXS0_SX426
+MMXS0_SX66
+MNET0_SI1446
+MNET0_SI2076
+MNET0_SI816
+MNET0_SX186
+MNET0_SX276
+MNET0_SX366
+MNET0_SX6
+MNET0_SX96
+MNTW0_SI1068
+MNTW0_SI1698
+MNTW0_SI2328
+MNTW0_SX168
+MNTW0_SX202
+MNTW0_SX258
+MNTW0_SX348
+MNTW0_SX78
+MPAR0_SI1576
+MPAR0_SI2206
+MPAR0_SI946
+MPAR0_SX136
+MPAR0_SX226
+MPAR0_SX316
+MPAR0_SX406
+MPAR0_SX46
+MPEB0_SI1034
+MPEB0_SI1860
+MPEB0_SI600
+MPEB0_SX150
+MPEB0_SX240
+MPEB0_SX330
+MPEB0_SX420
+MPEB0_SX60
+MPFU0_SI1258
+MPFU0_SI1888
+MPFU0_SI628
+MPFU0_SX178
+MPFU0_SX268
+MPFU0_SX358
+MPFU0_SX448
+MPFU0_SX88
+MPGH0_SI1554
+MPGH0_SI675
+MPGH0_SI924
+MPGH0_SX114
+MPGH0_SX204
+MPGH0_SX24
+MPGH0_SX294
+MPGH0_SX384
+MPGR0_SI1410
+MPGR0_SI2040
+MPGR0_SI780
+MPGR0_SX150
+MPGR0_SX240
+MPGR0_SX330
+MPGR0_SX420
+MPGR0_SX60
+MPGR1_SI1269
+MPGR1_SI1499
+MPGR1_SI2129
+MPGR1_SX149
+MPGR1_SX239
+MPGR1_SX329
+MPGR1_SX419
+MPGR1_SX59
+MPMB0_SI1501
+MPMB0_SI2131
+MPMB0_SI871
+MPMB0_SX151
+MPMB0_SX241
+MPMB0_SX331
+MPMB0_SX421
+MPMB0_SX61
+MPPC0_SI1412
+MPPC0_SI2042
+MPPC0_SI782
+MPPC0_SX152
+MPPC0_SX242
+MPPC0_SX332
+MPPC0_SX422
+MPPC0_SX62
+MPRB0_SI1205
+MPRB0_SI1215
+MPRB0_SI575
+MPRB0_SX125
+MPRB0_SX215
+MPRB0_SX305
+MPRB0_SX35
+MPRB0_SX395
+MPRD0_SI1431
+MPRD0_SI2061
+MPRD0_SI801
+MPRD0_SX171
+MPRD0_SX261
+MPRD0_SX351
+MPRD0_SX441
+MPRD0_SX81
+MPRK0_SI1097
+MPRK0_SI1727
+MPRK0_SI467
+MPRK0_SX107
+MPRK0_SX17
+MPRK0_SX197
+MPRK0_SX287
+MPRK0_SX377
+MPRT0_SI1210
+MPRT0_SI495
+MPRT0_SI580
+MPRT0_SX130
+MPRT0_SX220
+MPRT0_SX310
+MPRT0_SX40
+MPRT0_SX400
+MPSW0_SI1067
+MPSW0_SI1697
+MPSW0_SI2327
+MPSW0_SX167
+MPSW0_SX24
+MPSW0_SX257
+MPSW0_SX437
+MPSW0_SX77
+MRAB0_SI1224
+MRAB0_SI1854
+MRAB0_SI594
+MRAB0_SX144
+MRAB0_SX234
+MRAB0_SX324
+MRAB0_SX414
+MRAB0_SX54
+MRAB1_SI1478
+MRAB1_SI2108
+MRAB1_SI848
+MRAB1_SX128
+MRAB1_SX218
+MRAB1_SX308
+MRAB1_SX38
+MRAB1_SX398
+MRAI0_SI1954
+MRAI0_SI2052
+MRAI0_SI792
+MRAI0_SX162
+MRAI0_SX252
+MRAI0_SX342
+MRAI0_SX432
+MRAI0_SX72
+MRAM0_SI1275
+MRAM0_SI1905
+MRAM0_SI1951
+MRAM0_SX105
+MRAM0_SX15
+MRAM0_SX195
+MRAM0_SX285
+MRAM0_SX375
+MRAV0_SI1008
+MRAV0_SI1638
+MRAV0_SI2268
+MRAV0_SX108
+MRAV0_SX18
+MRAV0_SX198
+MRAV0_SX288
+MRAV0_SX378
+MRBC0_SI1665
+MRBC0_SI1859
+MRBC0_SI599
+MRBC0_SX149
+MRBC0_SX239
+MRBC0_SX329
+MRBC0_SX419
+MRBC0_SX59
+MRCG0_SI1428
+MRCG0_SI2058
+MRCG0_SI798
+MRCG0_SX168
+MRCG0_SX258
+MRCG0_SX348
+MRCG0_SX438
+MRCG0_SX78
+MRCW0_SI1371
+MRCW0_SI2001
+MRCW0_SI741
+MRCW0_SX111
+MRCW0_SX201
+MRCW0_SX21
+MRCW0_SX291
+MRCW0_SX381
+MRDD0_SI1050
+MRDD0_SI1680
+MRDD0_SI2310
+MRDD0_SX150
+MRDD0_SX240
+MRDD0_SX277
+MRDD0_SX330
+MRDD0_SX60
+MRDM0_SI1044
+MRDM0_SI1595
+MRDM0_SI965
+MRDM0_SX155
+MRDM0_SX245
+MRDM0_SX335
+MRDM0_SX425
+MRDM0_SX65
+MRDS0_SI1167
+MRDS0_SI1797
+MRDS0_SI537
+MRDS0_SX177
+MRDS0_SX267
+MRDS0_SX357
+MRDS0_SX447
+MRDS0_SX87
+MREE0_SI1104
+MREE0_SI1734
+MREE0_SI1959
+MREE0_SX114
+MREE0_SX204
+MREE0_SX24
+MREE0_SX294
+MREE0_SX384
+MREH1_SI1599
+MREH1_SI2229
+MREH1_SI969
+MREH1_SX159
+MREH1_SX249
+MREH1_SX339
+MREH1_SX429
+MREH1_SX69
+MREM0_SI1591
+MREM0_SI511
+MREM0_SI961
+MREM0_SX151
+MREM0_SX241
+MREM0_SX331
+MREM0_SX421
+MREM0_SX61
+MREW1_SI1500
+MREW1_SI2130
+MREW1_SI870
+MREW1_SX150
+MREW1_SX240
+MREW1_SX330
+MREW1_SX420
+MREW1_SX60
+MRFK0_SI1076
+MRFK0_SI1706
+MRFK0_SI2336
+MRFK0_SX176
+MRFK0_SX266
+MRFK0_SX356
+MRFK0_SX446
+MRFK0_SX86
+MRFL0_SI1156
+MRFL0_SI1786
+MRFL0_SI526
+MRFL0_SX166
+MRFL0_SX256
+MRFL0_SX346
+MRFL0_SX436
+MRFL0_SX76
+MRGM0_SI1162
+MRGM0_SI1792
+MRGM0_SI532
+MRGM0_SX172
+MRGM0_SX262
+MRGM0_SX416
+MRGM0_SX442
+MRGM0_SX82
+MRGS0_SI1356
+MRGS0_SI1986
+MRGS0_SI726
+MRGS0_SX186
+MRGS0_SX276
+MRGS0_SX366
+MRGS0_SX6
+MRGS0_SX96
+MRHL0_SI1515
+MRHL0_SI2145
+MRHL0_SI885
+MRHL0_SX165
+MRHL0_SX255
+MRHL0_SX345
+MRHL0_SX435
+MRHL0_SX75
+MRJB1_SI1020
+MRJB1_SI1413
+MRJB1_SI2021
+MRJB1_SX120
+MRJB1_SX210
+MRJB1_SX30
+MRJB1_SX300
+MRJB1_SX390
+MRJH0_SI1519
+MRJH0_SI889
+MRJH0_SI914
+MRJH0_SX169
+MRJH0_SX259
+MRJH0_SX307
+MRJH0_SX439
+MRJH0_SX79
+MRJM0_SI1095
+MRJM0_SI1228
+MRJM0_SI1858
+MRJM0_SX148
+MRJM0_SX238
+MRJM0_SX328
+MRJM0_SX418
+MRJM0_SX58
+MRJM1_SI1298
+MRJM1_SI1928
+MRJM1_SI668
+MRJM1_SX128
+MRJM1_SX218
+MRJM1_SX308
+MRJM1_SX38
+MRJM1_SX398
+MRJT0_SI1498
+MRJT0_SI1805
+MRJT0_SI868
+MRJT0_SX148
+MRJT0_SX238
+MRJT0_SX328
+MRJT0_SX418
+MRJT0_SX58
+MRKM0_SI1267
+MRKM0_SI1391
+MRKM0_SI637
+MRKM0_SX187
+MRKM0_SX277
+MRKM0_SX367
+MRKM0_SX7
+MRKM0_SX97
+MRLD0_SI1594
+MRLD0_SI2224
+MRLD0_SI964
+MRLD0_SX154
+MRLD0_SX244
+MRLD0_SX334
+MRLD0_SX424
+MRLD0_SX64
+MRLJ0_SI1420
+MRLJ0_SI2050
+MRLJ0_SI790
+MRLJ0_SX160
+MRLJ0_SX250
+MRLJ0_SX340
+MRLJ0_SX430
+MRLJ0_SX70
+MRLJ1_SI1671
+MRLJ1_SI2301
+MRLJ1_SI2332
+MRLJ1_SX141
+MRLJ1_SX231
+MRLJ1_SX321
+MRLJ1_SX411
+MRLJ1_SX51
+MRLK0_SI1468
+MRLK0_SI2140
+MRLK0_SI843
+MRLK0_SX123
+MRLK0_SX213
+MRLK0_SX303
+MRLK0_SX33
+MRLK0_SX393
+MRLR0_SI1196
+MRLR0_SI1826
+MRLR0_SI566
+MRLR0_SX116
+MRLR0_SX206
+MRLR0_SX26
+MRLR0_SX296
+MRLR0_SX386
+MRMB0_SI1581
+MRMB0_SI2211
+MRMB0_SI951
+MRMB0_SX141
+MRMB0_SX231
+MRMB0_SX321
+MRMB0_SX411
+MRMB0_SX51
+MRMG0_SI1080
+MRMG0_SI1710
+MRMG0_SI2340
+MRMG0_SX180
+MRMG0_SX270
+MRMG0_SX360
+MRMG0_SX450
+MRMG0_SX90
+MRMH0_SI1021
+MRMH0_SI1349
+MRMH0_SI2281
+MRMH0_SX121
+MRMH0_SX211
+MRMH0_SX301
+MRMH0_SX31
+MRMH0_SX391
+MRML0_SI1421
+MRML0_SI2051
+MRML0_SI791
+MRML0_SX161
+MRML0_SX251
+MRML0_SX341
+MRML0_SX431
+MRML0_SX71
+MRMS0_SI1113
+MRMS0_SI2057
+MRMS0_SI2100
+MRMS0_SX120
+MRMS0_SX210
+MRMS0_SX30
+MRMS0_SX300
+MRMS0_SX390
+MRPC1_SI1482
+MRPC1_SI2026
+MRPC1_SI2112
+MRPC1_SX132
+MRPC1_SX222
+MRPC1_SX312
+MRPC1_SX402
+MRPC1_SX42
+MRRE0_SI1334
+MRRE0_SI704
+MRRE0_SI952
+MRRE0_SX164
+MRRE0_SX254
+MRRE0_SX344
+MRRE0_SX434
+MRRE0_SX74
+MRSO0_SI1206
+MRSO0_SI1659
+MRSO0_SI2289
+MRSO0_SX129
+MRSO0_SX219
+MRSO0_SX309
+MRSO0_SX39
+MRSO0_SX399
+MRSP0_SI1429
+MRSP0_SI2059
+MRSP0_SI799
+MRSP0_SX169
+MRSP0_SX196
+MRSP0_SX259
+MRSP0_SX439
+MRSP0_SX79
+MRTC0_SI1458
+MRTC0_SI2088
+MRTC0_SI828
+MRTC0_SX108
+MRTC0_SX18
+MRTC0_SX198
+MRTC0_SX288
+MRTC0_SX378
+MRTJ0_SI1551
+MRTJ0_SI2032
+MRTJ0_SI772
+MRTJ0_SX142
+MRTJ0_SX232
+MRTJ0_SX322
+MRTJ0_SX412
+MRTJ0_SX52
+MRVG0_SI1140
+MRVG0_SI1770
+MRVG0_SI510
+MRVG0_SX150
+MRVG0_SX240
+MRVG0_SX330
+MRVG0_SX420
+MRVG0_SX60
+MRWA0_SI1603
+MRWA0_SI2233
+MRWA0_SI973
+MRWA0_SX163
+MRWA0_SX253
+MRWA0_SX343
+MRWA0_SX433
+MRWA0_SX73
+MRWS0_SI1102
+MRWS0_SI1732
+MRWS0_SI472
+MRWS0_SX112
+MRWS0_SX202
+MRWS0_SX22
+MRWS0_SX292
+MRWS0_SX382
+MRXB0_SI1585
+MRXB0_SI2215
+MRXB0_SI955
+MRXB0_SX145
+MRXB0_SX235
+MRXB0_SX325
+MRXB0_SX415
+MRXB0_SX55
+MSAH1_SI1049
+MSAH1_SI1679
+MSAH1_SI2309
+MSAH1_SX149
+MSAH1_SX239
+MSAH1_SX329
+MSAH1_SX419
+MSAH1_SX59
+MSAS0_SI1376
+MSAS0_SI2006
+MSAS0_SI746
+MSAS0_SX116
+MSAS0_SX206
+MSAS0_SX26
+MSAS0_SX296
+MSAS0_SX386
+MSAT0_SI1526
+MSAT0_SI2156
+MSAT0_SI896
+MSAT0_SX176
+MSAT0_SX266
+MSAT0_SX356
+MSAT0_SX446
+MSAT0_SX86
+MSAT1_SI1073
+MSAT1_SI1703
+MSAT1_SI2333
+MSAT1_SX173
+MSAT1_SX263
+MSAT1_SX353
+MSAT1_SX443
+MSAT1_SX83
+MSDB0_SI1007
+MSDB0_SI1637
+MSDB0_SI2267
+MSDB0_SX107
+MSDB0_SX17
+MSDB0_SX197
+MSDB0_SX287
+MSDB0_SX377
+MSDH0_SI2113
+MSDH0_SI2240
+MSDH0_SI980
+MSDH0_SX170
+MSDH0_SX260
+MSDH0_SX350
+MSDH0_SX440
+MSDH0_SX80
+MSDS0_SI1077
+MSDS0_SI1707
+MSDS0_SI2337
+MSDS0_SX177
+MSDS0_SX267
+MSDS0_SX357
+MSDS0_SX447
+MSDS0_SX87
+MSEM1_SI1440
+MSEM1_SI2070
+MSEM1_SI810
+MSEM1_SX180
+MSEM1_SX270
+MSEM1_SX360
+MSEM1_SX450
+MSEM1_SX90
+MSES0_SI1589
+MSES0_SI2216
+MSES0_SI2219
+MSES0_SX149
+MSES0_SX239
+MSES0_SX329
+MSES0_SX419
+MSES0_SX59
+MSFH0_SI1216
+MSFH0_SI1738
+MSFH0_SI586
+MSFH0_SX136
+MSFH0_SX226
+MSFH0_SX316
+MSFH0_SX406
+MSFH0_SX46
+MSFV0_SI1262
+MSFV0_SI1892
+MSFV0_SI632
+MSFV0_SX182
+MSFV0_SX272
+MSFV0_SX362
+MSFV0_SX452
+MSFV0_SX92
+MSJK0_SI1596
+MSJK0_SI2226
+MSJK0_SI966
+MSJK0_SX156
+MSJK0_SX246
+MSJK0_SX336
+MSJK0_SX426
+MSJK0_SX66
+MSMC0_SI1907
+MSMC0_SI509
+MSMC0_SI647
+MSMC0_SX107
+MSMC0_SX17
+MSMC0_SX197
+MSMC0_SX287
+MSMC0_SX377
+MSMR0_SI1150
+MSMR0_SI1405
+MSMR0_SI775
+MSMR0_SX145
+MSMR0_SX235
+MSMR0_SX325
+MSMR0_SX415
+MSMR0_SX55
+MSMS0_SI1433
+MSMS0_SI2063
+MSMS0_SI803
+MSMS0_SX173
+MSMS0_SX263
+MSMS0_SX353
+MSMS0_SX443
+MSMS0_SX83
+MSRG0_SI1221
+MSRG0_SI1851
+MSRG0_SI591
+MSRG0_SX141
+MSRG0_SX231
+MSRG0_SX321
+MSRG0_SX411
+MSRG0_SX51
+MSRR0_SI1131
+MSRR0_SI1761
+MSRR0_SI501
+MSRR0_SX141
+MSRR0_SX231
+MSRR0_SX30
+MSRR0_SX411
+MSRR0_SX51
+MSTF0_SI1396
+MSTF0_SI766
+MSTF0_SI852
+MSTF0_SX136
+MSTF0_SX226
+MSTF0_SX316
+MSTF0_SX406
+MSTF0_SX46
+MSVS0_SI1568
+MSVS0_SI2198
+MSVS0_SI938
+MSVS0_SX128
+MSVS0_SX218
+MSVS0_SX308
+MSVS0_SX38
+MSVS0_SX398
+MTAB0_SI1572
+MTAB0_SI2202
+MTAB0_SI942
+MTAB0_SX132
+MTAB0_SX222
+MTAB0_SX312
+MTAB0_SX402
+MTAB0_SX42
+MTAS0_SI1385
+MTAS0_SI2015
+MTAS0_SI755
+MTAS0_SX125
+MTAS0_SX215
+MTAS0_SX305
+MTAS0_SX35
+MTAS0_SX395
+MTAT0_SI1110
+MTAT0_SI1740
+MTAT0_SI811
+MTAT0_SX120
+MTAT0_SX210
+MTAT0_SX30
+MTAT0_SX300
+MTAT0_SX390
+MTAT1_SI1409
+MTAT1_SI1627
+MTAT1_SI779
+MTAT1_SX149
+MTAT1_SX239
+MTAT1_SX329
+MTAT1_SX419
+MTAT1_SX59
+MTBC0_SI1173
+MTBC0_SI1803
+MTBC0_SI543
+MTBC0_SX183
+MTBC0_SX273
+MTBC0_SX347
+MTBC0_SX363
+MTBC0_SX93
+MTCS0_SI1972
+MTCS0_SI2265
+MTCS0_SI712
+MTCS0_SX172
+MTCS0_SX262
+MTCS0_SX352
+MTCS0_SX442
+MTCS0_SX82
+MTDB0_SI1401
+MTDB0_SI2031
+MTDB0_SI771
+MTDB0_SX141
+MTDB0_SX231
+MTDB0_SX321
+MTDB0_SX411
+MTDB0_SX51
+MTDP0_SI1274
+MTDP0_SI1521
+MTDP0_SI2151
+MTDP0_SX171
+MTDP0_SX261
+MTDP0_SX351
+MTDP0_SX441
+MTDP0_SX81
+MTER0_SI1157
+MTER0_SI1787
+MTER0_SI527
+MTER0_SX167
+MTER0_SX17
+MTER0_SX257
+MTER0_SX437
+MTER0_SX77
+MTJG0_SI1520
+MTJG0_SI2157
+MTJG0_SI890
+MTJG0_SX170
+MTJG0_SX260
+MTJG0_SX350
+MTJG0_SX440
+MTJG0_SX80
+MTJM0_SI1226
+MTJM0_SI1856
+MTJM0_SI655
+MTJM0_SX146
+MTJM0_SX236
+MTJM0_SX326
+MTJM0_SX416
+MTJM0_SX56
+MTJS0_SI1192
+MTJS0_SI1822
+MTJS0_SI562
+MTJS0_SX112
+MTJS0_SX202
+MTJS0_SX22
+MTJS0_SX292
+MTJS0_SX382
+MTJU0_SI2020
+MTJU0_SI2269
+MTJU0_SI760
+MTJU0_SX130
+MTJU0_SX220
+MTJU0_SX310
+MTJU0_SX40
+MTJU0_SX400
+MTKD0_SI1187
+MTKD0_SI1817
+MTKD0_SI630
+MTKD0_SX107
+MTKD0_SX17
+MTKD0_SX197
+MTKD0_SX287
+MTKD0_SX377
+MTKP0_SI1023
+MTKP0_SI2283
+MTKP0_SI454
+MTKP0_SX123
+MTKP0_SX213
+MTKP0_SX303
+MTKP0_SX33
+MTKP0_SX393
+MTLB0_SI1134
+MTLB0_SI1764
+MTLB0_SI504
+MTLB0_SX144
+MTLB0_SX234
+MTLB0_SX324
+MTLB0_SX414
+MTLB0_SX54
+MTLC0_SI1313
+MTLC0_SI1477
+MTLC0_SI847
+MTLC0_SX127
+MTLC0_SX217
+MTLC0_SX307
+MTLC0_SX37
+MTLC0_SX397
+MTML0_SI1065
+MTML0_SI1695
+MTML0_SI2325
+MTML0_SX165
+MTML0_SX255
+MTML0_SX345
+MTML0_SX435
+MTML0_SX75
+MTMN0_SI1064
+MTMN0_SI2324
+MTMN0_SI582
+MTMN0_SX164
+MTMN0_SX254
+MTMN0_SX344
+MTMN0_SX434
+MTMN0_SX74
+MTMT0_SI1118
+MTMT0_SI1748
+MTMT0_SI488
+MTMT0_SX128
+MTMT0_SX218
+MTMT0_SX308
+MTMT0_SX38
+MTMT0_SX398
+MTPF0_SI1235
+MTPF0_SI1865
+MTPF0_SI605
+MTPF0_SX155
+MTPF0_SX245
+MTPF0_SX335
+MTPF0_SX425
+MTPF0_SX65
+MTPG0_SI1383
+MTPG0_SI2013
+MTPG0_SI753
+MTPG0_SX123
+MTPG0_SX213
+MTPG0_SX303
+MTPG0_SX33
+MTPG0_SX393
+MTPP0_SI1508
+MTPP0_SI2138
+MTPP0_SI878
+MTPP0_SX158
+MTPP0_SX248
+MTPP0_SX338
+MTPP0_SX428
+MTPP0_SX68
+MTPR0_SI1600
+MTPR0_SI2230
+MTPR0_SI506
+MTPR0_SX160
+MTPR0_SX250
+MTPR0_SX340
+MTPR0_SX430
+MTPR0_SX70
+MTQC0_SI1441
+MTQC0_SI2071
+MTQC0_SI480
+MTQC0_SX181
+MTQC0_SX271
+MTQC0_SX361
+MTQC0_SX451
+MTQC0_SX91
+MTRC0_SI1623
+MTRC0_SI589
+MTRC0_SI993
+MTRC0_SX170
+MTRC0_SX183
+MTRC0_SX273
+MTRC0_SX363
+MTRC0_SX93
+MTRR0_SI1548
+MTRR0_SI2178
+MTRR0_SI918
+MTRR0_SX108
+MTRR0_SX18
+MTRR0_SX198
+MTRR0_SX288
+MTRR0_SX378
+MTRT0_SI1227
+MTRT0_SI1857
+MTRT0_SI597
+MTRT0_SX147
+MTRT0_SX237
+MTRT0_SX254
+MTRT0_SX417
+MTRT0_SX57
+MTWH1_SI1512
+MTWH1_SI2142
+MTWH1_SI882
+MTWH1_SX162
+MTWH1_SX252
+MTWH1_SX342
+MTWH1_SX432
+MTWH1_SX72
+MTXS0_SI1060
+MTXS0_SI1690
+MTXS0_SI2320
+MTXS0_SX160
+MTXS0_SX250
+MTXS0_SX340
+MTXS0_SX430
+MTXS0_SX70
+MVJH0_SI1556
+MVJH0_SI2186
+MVJH0_SI926
+MVJH0_SX116
+MVJH0_SX206
+MVJH0_SX26
+MVJH0_SX296
+MVJH0_SX386
+MVLO0_SI1147
+MVLO0_SI1777
+MVLO0_SI517
+MVLO0_SX157
+MVLO0_SX247
+MVLO0_SX337
+MVLO0_SX427
+MVLO0_SX67
+MVRW0_SI1485
+MVRW0_SI2115
+MVRW0_SI855
+MVRW0_SX135
+MVRW0_SX225
+MVRW0_SX315
+MVRW0_SX405
+MVRW0_SX45
+MWAC0_SI1601
+MWAC0_SI2231
+MWAC0_SI971
+MWAC0_SX161
+MWAC0_SX251
+MWAC0_SX341
+MWAC0_SX431
+MWAC0_SX71
+MWAD0_SI1062
+MWAD0_SI1749
+MWAD0_SI2322
+MWAD0_SX162
+MWAD0_SX252
+MWAD0_SX342
+MWAD0_SX432
+MWAD0_SX72
+MWAR0_SI1045
+MWAR0_SI1675
+MWAR0_SI2305
+MWAR0_SX145
+MWAR0_SX235
+MWAR0_SX325
+MWAR0_SX415
+MWAR0_SX55
+MWCH0_SI1622
+MWCH0_SI1895
+MWCH0_SI2252
+MWCH0_SX182
+MWCH0_SX272
+MWCH0_SX362
+MWCH0_SX452
+MWCH0_SX92
+MWDK0_SI1436
+MWDK0_SI2017
+MWDK0_SI806
+MWDK0_SX176
+MWDK0_SX266
+MWDK0_SX356
+MWDK0_SX446
+MWDK0_SX86
+MWEM0_SI1320
+MWEM0_SI1393
+MWEM0_SI1950
+MWEM0_SX150
+MWEM0_SX240
+MWEM0_SX330
+MWEM0_SX420
+MWEM0_SX60
+MWGR0_SI1606
+MWGR0_SI2236
+MWGR0_SI976
+MWGR0_SX166
+MWGR0_SX256
+MWGR0_SX346
+MWGR0_SX436
+MWGR0_SX76
+MWRE0_SI1057
+MWRE0_SI1687
+MWRE0_SI2317
+MWRE0_SX157
+MWRE0_SX247
+MWRE0_SX337
+MWRE0_SX427
+MWRE0_SX67
+MWRP0_SI1443
+MWRP0_SI1525
+MWRP0_SI2073
+MWRP0_SX183
+MWRP0_SX273
+MWRP0_SX3
+MWRP0_SX363
+MWRP0_SX93
+MWSB0_SI1626
+MWSB0_SI2256
+MWSB0_SI996
+MWSB0_SX186
+MWSB0_SX276
+MWSB0_SX366
+MWSB0_SX6
+MWSB0_SX96
+MWSH0_SI1426
+MWSH0_SI2266
+MWSH0_SI796
+MWSH0_SX166
+MWSH0_SX256
+MWSH0_SX346
+MWSH0_SX436
+MWSH0_SX76
+MZMB0_SI1166
+MZMB0_SI1796
+MZMB0_SI536
+MZMB0_SX176
+MZMB0_SX266
+MZMB0_SX356
+MZMB0_SX446
+MZMB0_SX86
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_matched/valid.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/valid.uid
new file mode 100644
index 0000000000000000000000000000000000000000..ab5ef381ab9319aa9aefa9054e0a5128aec6f5e1
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_matched/valid.uid
@@ -0,0 +1,400 @@
+FADG0_SI1279
+FADG0_SI1909
+FADG0_SI649
+FADG0_SX109
+FADG0_SX19
+FADG0_SX199
+FADG0_SX289
+FADG0_SX379
+FAKS0_SI1573
+FAKS0_SI2203
+FAKS0_SI943
+FAKS0_SX133
+FAKS0_SX223
+FAKS0_SX313
+FAKS0_SX403
+FAKS0_SX43
+FCAL1_SI1403
+FCAL1_SI2033
+FCAL1_SI773
+FCAL1_SX143
+FCAL1_SX233
+FCAL1_SX323
+FCAL1_SX413
+FCAL1_SX53
+FCMH0_SI1454
+FCMH0_SI2084
+FCMH0_SI824
+FCMH0_SX104
+FCMH0_SX14
+FCMH0_SX194
+FCMH0_SX284
+FCMH0_SX374
+FDAC1_SI1474
+FDAC1_SI2104
+FDAC1_SI844
+FDAC1_SX124
+FDAC1_SX214
+FDAC1_SX304
+FDAC1_SX34
+FDAC1_SX394
+FDMS0_SI1218
+FDMS0_SI1502
+FDMS0_SI1848
+FDMS0_SX138
+FDMS0_SX228
+FDMS0_SX318
+FDMS0_SX408
+FDMS0_SX48
+FDRW0_SI1283
+FDRW0_SI1423
+FDRW0_SI653
+FDRW0_SX113
+FDRW0_SX203
+FDRW0_SX23
+FDRW0_SX293
+FDRW0_SX383
+FEDW0_SI1084
+FEDW0_SI1653
+FEDW0_SI1714
+FEDW0_SX184
+FEDW0_SX274
+FEDW0_SX364
+FEDW0_SX4
+FEDW0_SX94
+FGJD0_SI1179
+FGJD0_SI549
+FGJD0_SI818
+FGJD0_SX189
+FGJD0_SX279
+FGJD0_SX369
+FGJD0_SX9
+FGJD0_SX99
+FJEM0_SI1264
+FJEM0_SI1894
+FJEM0_SI634
+FJEM0_SX184
+FJEM0_SX274
+FJEM0_SX364
+FJEM0_SX4
+FJEM0_SX94
+FJMG0_SI1181
+FJMG0_SI1811
+FJMG0_SI551
+FJMG0_SX101
+FJMG0_SX11
+FJMG0_SX191
+FJMG0_SX281
+FJMG0_SX371
+FJSJ0_SI1484
+FJSJ0_SI2114
+FJSJ0_SI854
+FJSJ0_SX134
+FJSJ0_SX224
+FJSJ0_SX314
+FJSJ0_SX404
+FJSJ0_SX44
+FKMS0_SI1490
+FKMS0_SI2120
+FKMS0_SI860
+FKMS0_SX140
+FKMS0_SX230
+FKMS0_SX320
+FKMS0_SX410
+FKMS0_SX50
+FMAH0_SI1289
+FMAH0_SI1919
+FMAH0_SI659
+FMAH0_SX119
+FMAH0_SX209
+FMAH0_SX29
+FMAH0_SX299
+FMAH0_SX389
+FMML0_SI1040
+FMML0_SI1670
+FMML0_SI2300
+FMML0_SX140
+FMML0_SX230
+FMML0_SX320
+FMML0_SX410
+FMML0_SX50
+FNMR0_SI1399
+FNMR0_SI2029
+FNMR0_SI769
+FNMR0_SX139
+FNMR0_SX229
+FNMR0_SX319
+FNMR0_SX409
+FNMR0_SX49
+FREW0_SI1030
+FREW0_SI1280
+FREW0_SI1910
+FREW0_SX110
+FREW0_SX20
+FREW0_SX200
+FREW0_SX290
+FREW0_SX380
+FSEM0_SI1198
+FSEM0_SI1828
+FSEM0_SI568
+FSEM0_SX118
+FSEM0_SX208
+FSEM0_SX28
+FSEM0_SX298
+FSEM0_SX388
+MAJC0_SI1946
+MAJC0_SI2095
+MAJC0_SI835
+MAJC0_SX115
+MAJC0_SX205
+MAJC0_SX25
+MAJC0_SX295
+MAJC0_SX385
+MBDG0_SI1463
+MBDG0_SI2093
+MBDG0_SI833
+MBDG0_SX113
+MBDG0_SX203
+MBDG0_SX23
+MBDG0_SX293
+MBDG0_SX383
+MBNS0_SI1220
+MBNS0_SI1850
+MBNS0_SI590
+MBNS0_SX140
+MBNS0_SX230
+MBNS0_SX320
+MBNS0_SX410
+MBNS0_SX50
+MBWM0_SI1304
+MBWM0_SI1934
+MBWM0_SI674
+MBWM0_SX134
+MBWM0_SX224
+MBWM0_SX314
+MBWM0_SX404
+MBWM0_SX44
+MCSH0_SI1549
+MCSH0_SI2179
+MCSH0_SI919
+MCSH0_SX109
+MCSH0_SX19
+MCSH0_SX199
+MCSH0_SX289
+MCSH0_SX379
+MDLF0_SI1583
+MDLF0_SI2213
+MDLF0_SI953
+MDLF0_SX143
+MDLF0_SX233
+MDLF0_SX323
+MDLF0_SX413
+MDLF0_SX53
+MDLS0_SI1628
+MDLS0_SI2258
+MDLS0_SI998
+MDLS0_SX188
+MDLS0_SX278
+MDLS0_SX368
+MDLS0_SX8
+MDLS0_SX98
+MDVC0_SI2174
+MDVC0_SI2196
+MDVC0_SI936
+MDVC0_SX126
+MDVC0_SX216
+MDVC0_SX306
+MDVC0_SX36
+MDVC0_SX396
+MERS0_SI1019
+MERS0_SI1649
+MERS0_SI497
+MERS0_SX119
+MERS0_SX209
+MERS0_SX29
+MERS0_SX299
+MERS0_SX389
+MGJF0_SI1901
+MGJF0_SI641
+MGJF0_SI776
+MGJF0_SX101
+MGJF0_SX11
+MGJF0_SX191
+MGJF0_SX281
+MGJF0_SX371
+MGLB0_SI1534
+MGLB0_SI2164
+MGLB0_SI904
+MGLB0_SX184
+MGLB0_SX274
+MGLB0_SX364
+MGLB0_SX4
+MGLB0_SX94
+MGWT0_SI1539
+MGWT0_SI2169
+MGWT0_SI909
+MGWT0_SX189
+MGWT0_SX279
+MGWT0_SX369
+MGWT0_SX9
+MGWT0_SX99
+MJAR0_SI1988
+MJAR0_SI2247
+MJAR0_SI728
+MJAR0_SX188
+MJAR0_SX278
+MJAR0_SX368
+MJAR0_SX8
+MJAR0_SX98
+MJFC0_SI1033
+MJFC0_SI1663
+MJFC0_SI2293
+MJFC0_SX133
+MJFC0_SX223
+MJFC0_SX313
+MJFC0_SX403
+MJFC0_SX43
+MJSW0_SI1010
+MJSW0_SI1640
+MJSW0_SI2270
+MJSW0_SX110
+MJSW0_SX20
+MJSW0_SX200
+MJSW0_SX290
+MJSW0_SX380
+MMDB1_SI1625
+MMDB1_SI2255
+MMDB1_SI995
+MMDB1_SX185
+MMDB1_SX275
+MMDB1_SX365
+MMDB1_SX5
+MMDB1_SX95
+MMDM2_SI1452
+MMDM2_SI1555
+MMDM2_SI2082
+MMDM2_SX102
+MMDM2_SX12
+MMDM2_SX192
+MMDM2_SX282
+MMDM2_SX372
+MMJR0_SI1648
+MMJR0_SI2166
+MMJR0_SI2278
+MMJR0_SX118
+MMJR0_SX208
+MMJR0_SX28
+MMJR0_SX298
+MMJR0_SX388
+MMWH0_SI1089
+MMWH0_SI1301
+MMWH0_SI459
+MMWH0_SX189
+MMWH0_SX279
+MMWH0_SX369
+MMWH0_SX9
+MMWH0_SX99
+MPDF0_SI1542
+MPDF0_SI2172
+MPDF0_SI912
+MPDF0_SX102
+MPDF0_SX12
+MPDF0_SX192
+MPDF0_SX282
+MPDF0_SX372
+MRCS0_SI1223
+MRCS0_SI1853
+MRCS0_SI593
+MRCS0_SX143
+MRCS0_SX233
+MRCS0_SX323
+MRCS0_SX413
+MRCS0_SX53
+MREB0_SI1375
+MREB0_SI2005
+MREB0_SI745
+MREB0_SX115
+MREB0_SX205
+MREB0_SX25
+MREB0_SX295
+MREB0_SX385
+MRJM4_SI1489
+MRJM4_SI2119
+MRJM4_SI859
+MRJM4_SX139
+MRJM4_SX229
+MRJM4_SX319
+MRJM4_SX409
+MRJM4_SX49
+MRJR0_SI1182
+MRJR0_SI1812
+MRJR0_SI2313
+MRJR0_SX102
+MRJR0_SX12
+MRJR0_SX192
+MRJR0_SX282
+MRJR0_SX372
+MROA0_SI1307
+MROA0_SI1970
+MROA0_SI677
+MROA0_SX137
+MROA0_SX227
+MROA0_SX317
+MROA0_SX407
+MROA0_SX47
+MRTK0_SI1093
+MRTK0_SI1723
+MRTK0_SI1750
+MRTK0_SX103
+MRTK0_SX13
+MRTK0_SX193
+MRTK0_SX283
+MRTK0_SX373
+MRWS1_SI1130
+MRWS1_SI1496
+MRWS1_SI500
+MRWS1_SX140
+MRWS1_SX230
+MRWS1_SX320
+MRWS1_SX410
+MRWS1_SX50
+MTAA0_SI1285
+MTAA0_SI1915
+MTAA0_SI596
+MTAA0_SX115
+MTAA0_SX205
+MTAA0_SX25
+MTAA0_SX295
+MTAA0_SX385
+MTDT0_SI1994
+MTDT0_SI2254
+MTDT0_SI994
+MTDT0_SX184
+MTDT0_SX274
+MTDT0_SX364
+MTDT0_SX4
+MTDT0_SX94
+MTEB0_SI1133
+MTEB0_SI2064
+MTEB0_SI503
+MTEB0_SX143
+MTEB0_SX233
+MTEB0_SX323
+MTEB0_SX413
+MTEB0_SX53
+MTHC0_SI1015
+MTHC0_SI1645
+MTHC0_SI2275
+MTHC0_SX115
+MTHC0_SX205
+MTHC0_SX25
+MTHC0_SX295
+MTHC0_SX385
+MWJG0_SI1124
+MWJG0_SI1754
+MWJG0_SI494
+MWJG0_SX134
+MWJG0_SX224
+MWJG0_SX314
+MWJG0_SX404
+MWJG0_SX44
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/test.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/test.uid
new file mode 100644
index 0000000000000000000000000000000000000000..e3967e42423d4d82d159cb395514d41c13316da2
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/test.uid
@@ -0,0 +1,1680 @@
+FADG0_SA1
+FADG0_SA2
+FADG0_SI1279
+FADG0_SI1909
+FADG0_SI649
+FADG0_SX109
+FADG0_SX19
+FADG0_SX199
+FADG0_SX289
+FADG0_SX379
+FAKS0_SA1
+FAKS0_SA2
+FAKS0_SI1573
+FAKS0_SI2203
+FAKS0_SI943
+FAKS0_SX133
+FAKS0_SX223
+FAKS0_SX313
+FAKS0_SX403
+FAKS0_SX43
+FASW0_SA1
+FASW0_SA2
+FASW0_SI1550
+FASW0_SI2180
+FASW0_SI920
+FASW0_SX110
+FASW0_SX20
+FASW0_SX200
+FASW0_SX290
+FASW0_SX380
+FAWF0_SA1
+FAWF0_SA2
+FAWF0_SI1000
+FAWF0_SI1630
+FAWF0_SI2260
+FAWF0_SX10
+FAWF0_SX100
+FAWF0_SX190
+FAWF0_SX280
+FAWF0_SX370
+FCAL1_SA1
+FCAL1_SA2
+FCAL1_SI1403
+FCAL1_SI2033
+FCAL1_SI773
+FCAL1_SX143
+FCAL1_SX233
+FCAL1_SX323
+FCAL1_SX413
+FCAL1_SX53
+FCAU0_SA1
+FCAU0_SA2
+FCAU0_SI1037
+FCAU0_SI1667
+FCAU0_SI2297
+FCAU0_SX137
+FCAU0_SX227
+FCAU0_SX317
+FCAU0_SX407
+FCAU0_SX47
+FCFT0_SA1
+FCFT0_SA2
+FCFT0_SI1178
+FCFT0_SI1808
+FCFT0_SI548
+FCFT0_SX188
+FCFT0_SX278
+FCFT0_SX368
+FCFT0_SX8
+FCFT0_SX98
+FCMH0_SA1
+FCMH0_SA2
+FCMH0_SI1454
+FCMH0_SI2084
+FCMH0_SI824
+FCMH0_SX104
+FCMH0_SX14
+FCMH0_SX194
+FCMH0_SX284
+FCMH0_SX374
+FCMH1_SA1
+FCMH1_SA2
+FCMH1_SI1493
+FCMH1_SI2123
+FCMH1_SI863
+FCMH1_SX143
+FCMH1_SX233
+FCMH1_SX323
+FCMH1_SX413
+FCMH1_SX53
+FCMR0_SA1
+FCMR0_SA2
+FCMR0_SI1105
+FCMR0_SI1735
+FCMR0_SI475
+FCMR0_SX115
+FCMR0_SX205
+FCMR0_SX25
+FCMR0_SX295
+FCMR0_SX385
+FCRH0_SA1
+FCRH0_SA2
+FCRH0_SI1088
+FCRH0_SI1718
+FCRH0_SI458
+FCRH0_SX188
+FCRH0_SX278
+FCRH0_SX368
+FCRH0_SX8
+FCRH0_SX98
+FDAC1_SA1
+FDAC1_SA2
+FDAC1_SI1474
+FDAC1_SI2104
+FDAC1_SI844
+FDAC1_SX124
+FDAC1_SX214
+FDAC1_SX304
+FDAC1_SX34
+FDAC1_SX394
+FDHC0_SA1
+FDHC0_SA2
+FDHC0_SI1559
+FDHC0_SI2189
+FDHC0_SI929
+FDHC0_SX119
+FDHC0_SX209
+FDHC0_SX29
+FDHC0_SX299
+FDHC0_SX389
+FDMS0_SA1
+FDMS0_SA2
+FDMS0_SI1218
+FDMS0_SI1502
+FDMS0_SI1848
+FDMS0_SX138
+FDMS0_SX228
+FDMS0_SX318
+FDMS0_SX408
+FDMS0_SX48
+FDRD1_SA1
+FDRD1_SA2
+FDRD1_SI1544
+FDRD1_SI1566
+FDRD1_SI2149
+FDRD1_SX104
+FDRD1_SX14
+FDRD1_SX194
+FDRD1_SX284
+FDRD1_SX374
+FDRW0_SA1
+FDRW0_SA2
+FDRW0_SI1283
+FDRW0_SI1423
+FDRW0_SI653
+FDRW0_SX113
+FDRW0_SX203
+FDRW0_SX23
+FDRW0_SX293
+FDRW0_SX383
+FEDW0_SA1
+FEDW0_SA2
+FEDW0_SI1084
+FEDW0_SI1653
+FEDW0_SI1714
+FEDW0_SX184
+FEDW0_SX274
+FEDW0_SX364
+FEDW0_SX4
+FEDW0_SX94
+FELC0_SA1
+FELC0_SA2
+FELC0_SI1386
+FELC0_SI2016
+FELC0_SI756
+FELC0_SX126
+FELC0_SX216
+FELC0_SX306
+FELC0_SX36
+FELC0_SX396
+FGJD0_SA1
+FGJD0_SA2
+FGJD0_SI1179
+FGJD0_SI549
+FGJD0_SI818
+FGJD0_SX189
+FGJD0_SX279
+FGJD0_SX369
+FGJD0_SX9
+FGJD0_SX99
+FGMD0_SA1
+FGMD0_SA2
+FGMD0_SI1943
+FGMD0_SI2107
+FGMD0_SI683
+FGMD0_SX143
+FGMD0_SX233
+FGMD0_SX323
+FGMD0_SX413
+FGMD0_SX53
+FGWR0_SA1
+FGWR0_SA2
+FGWR0_SI1578
+FGWR0_SI2208
+FGWR0_SI948
+FGWR0_SX138
+FGWR0_SX228
+FGWR0_SX318
+FGWR0_SX408
+FGWR0_SX48
+FHES0_SA1
+FHES0_SA2
+FHES0_SI1109
+FHES0_SI1739
+FHES0_SI479
+FHES0_SX119
+FHES0_SX209
+FHES0_SX29
+FHES0_SX299
+FHES0_SX389
+FHEW0_SA1
+FHEW0_SA2
+FHEW0_SI2023
+FHEW0_SI690
+FHEW0_SI763
+FHEW0_SX133
+FHEW0_SX223
+FHEW0_SX313
+FHEW0_SX403
+FHEW0_SX43
+FISB0_SA1
+FISB0_SA2
+FISB0_SI1579
+FISB0_SI2209
+FISB0_SI949
+FISB0_SX139
+FISB0_SX229
+FISB0_SX319
+FISB0_SX409
+FISB0_SX49
+FJAS0_SA1
+FJAS0_SA2
+FJAS0_SI1400
+FJAS0_SI2030
+FJAS0_SI770
+FJAS0_SX140
+FJAS0_SX230
+FJAS0_SX320
+FJAS0_SX410
+FJAS0_SX50
+FJCS0_SA1
+FJCS0_SA2
+FJCS0_SI1309
+FJCS0_SI1833
+FJCS0_SI1939
+FJCS0_SX139
+FJCS0_SX229
+FJCS0_SX319
+FJCS0_SX409
+FJCS0_SX49
+FJEM0_SA1
+FJEM0_SA2
+FJEM0_SI1264
+FJEM0_SI1894
+FJEM0_SI634
+FJEM0_SX184
+FJEM0_SX274
+FJEM0_SX364
+FJEM0_SX4
+FJEM0_SX94
+FJLM0_SA1
+FJLM0_SA2
+FJLM0_SI1043
+FJLM0_SI1673
+FJLM0_SI2303
+FJLM0_SX143
+FJLM0_SX233
+FJLM0_SX323
+FJLM0_SX413
+FJLM0_SX53
+FJMG0_SA1
+FJMG0_SA2
+FJMG0_SI1181
+FJMG0_SI1811
+FJMG0_SI551
+FJMG0_SX101
+FJMG0_SX11
+FJMG0_SX191
+FJMG0_SX281
+FJMG0_SX371
+FJRE0_SA1
+FJRE0_SA2
+FJRE0_SI1116
+FJRE0_SI1587
+FJRE0_SI1746
+FJRE0_SX126
+FJRE0_SX216
+FJRE0_SX306
+FJRE0_SX36
+FJRE0_SX396
+FJSA0_SA1
+FJSA0_SA2
+FJSA0_SI1379
+FJSA0_SI2009
+FJSA0_SI749
+FJSA0_SX119
+FJSA0_SX209
+FJSA0_SX29
+FJSA0_SX299
+FJSA0_SX389
+FJSJ0_SA1
+FJSJ0_SA2
+FJSJ0_SI1484
+FJSJ0_SI2114
+FJSJ0_SI854
+FJSJ0_SX134
+FJSJ0_SX224
+FJSJ0_SX314
+FJSJ0_SX404
+FJSJ0_SX44
+FJWB0_SA1
+FJWB0_SA2
+FJWB0_SI1265
+FJWB0_SI635
+FJWB0_SI992
+FJWB0_SX185
+FJWB0_SX275
+FJWB0_SX365
+FJWB0_SX5
+FJWB0_SX95
+FKMS0_SA1
+FKMS0_SA2
+FKMS0_SI1490
+FKMS0_SI2120
+FKMS0_SI860
+FKMS0_SX140
+FKMS0_SX230
+FKMS0_SX320
+FKMS0_SX410
+FKMS0_SX50
+FLAS0_SA1
+FLAS0_SA2
+FLAS0_SI1026
+FLAS0_SI1488
+FLAS0_SI858
+FLAS0_SX138
+FLAS0_SX228
+FLAS0_SX318
+FLAS0_SX408
+FLAS0_SX48
+FLBW0_SA1
+FLBW0_SA2
+FLBW0_SI1219
+FLBW0_SI1849
+FLBW0_SI2253
+FLBW0_SX139
+FLBW0_SX229
+FLBW0_SX319
+FLBW0_SX409
+FLBW0_SX49
+FLKD0_SA1
+FLKD0_SA2
+FLKD0_SI1369
+FLKD0_SI739
+FLKD0_SI894
+FLKD0_SX109
+FLKD0_SX19
+FLKD0_SX199
+FLKD0_SX289
+FLKD0_SX379
+FLNH0_SA1
+FLNH0_SA2
+FLNH0_SI1214
+FLNH0_SI584
+FLNH0_SI941
+FLNH0_SX134
+FLNH0_SX224
+FLNH0_SX314
+FLNH0_SX404
+FLNH0_SX44
+FMAF0_SA1
+FMAF0_SA2
+FMAF0_SI1459
+FMAF0_SI2089
+FMAF0_SI829
+FMAF0_SX109
+FMAF0_SX19
+FMAF0_SX199
+FMAF0_SX289
+FMAF0_SX379
+FMAH0_SA1
+FMAH0_SA2
+FMAH0_SI1289
+FMAH0_SI1919
+FMAH0_SI659
+FMAH0_SX119
+FMAH0_SX209
+FMAH0_SX29
+FMAH0_SX299
+FMAH0_SX389
+FMCM0_SA1
+FMCM0_SA2
+FMCM0_SI1180
+FMCM0_SI1810
+FMCM0_SI550
+FMCM0_SX10
+FMCM0_SX100
+FMCM0_SX190
+FMCM0_SX280
+FMCM0_SX370
+FMGD0_SA1
+FMGD0_SA2
+FMGD0_SI1564
+FMGD0_SI2194
+FMGD0_SI934
+FMGD0_SX124
+FMGD0_SX214
+FMGD0_SX304
+FMGD0_SX34
+FMGD0_SX394
+FMLD0_SA1
+FMLD0_SA2
+FMLD0_SI2185
+FMLD0_SI822
+FMLD0_SI925
+FMLD0_SX115
+FMLD0_SX205
+FMLD0_SX25
+FMLD0_SX295
+FMLD0_SX385
+FMML0_SA1
+FMML0_SA2
+FMML0_SI1040
+FMML0_SI1670
+FMML0_SI2300
+FMML0_SX140
+FMML0_SX230
+FMML0_SX320
+FMML0_SX410
+FMML0_SX50
+FNLP0_SA1
+FNLP0_SA2
+FNLP0_SI1308
+FNLP0_SI1938
+FNLP0_SI678
+FNLP0_SX138
+FNLP0_SX228
+FNLP0_SX318
+FNLP0_SX408
+FNLP0_SX48
+FNMR0_SA1
+FNMR0_SA2
+FNMR0_SI1399
+FNMR0_SI2029
+FNMR0_SI769
+FNMR0_SX139
+FNMR0_SX229
+FNMR0_SX319
+FNMR0_SX409
+FNMR0_SX49
+FPAS0_SA1
+FPAS0_SA2
+FPAS0_SI1272
+FPAS0_SI2204
+FPAS0_SI944
+FPAS0_SX134
+FPAS0_SX224
+FPAS0_SX314
+FPAS0_SX404
+FPAS0_SX44
+FPKT0_SA1
+FPKT0_SA2
+FPKT0_SI1538
+FPKT0_SI2168
+FPKT0_SI908
+FPKT0_SX188
+FPKT0_SX278
+FPKT0_SX368
+FPKT0_SX8
+FPKT0_SX98
+FRAM1_SA1
+FRAM1_SA2
+FRAM1_SI1360
+FRAM1_SI522
+FRAM1_SI730
+FRAM1_SX10
+FRAM1_SX100
+FRAM1_SX190
+FRAM1_SX280
+FRAM1_SX370
+FREW0_SA1
+FREW0_SA2
+FREW0_SI1030
+FREW0_SI1280
+FREW0_SI1910
+FREW0_SX110
+FREW0_SX20
+FREW0_SX200
+FREW0_SX290
+FREW0_SX380
+FRNG0_SA1
+FRNG0_SA2
+FRNG0_SI1355
+FRNG0_SI1985
+FRNG0_SI725
+FRNG0_SX185
+FRNG0_SX275
+FRNG0_SX365
+FRNG0_SX5
+FRNG0_SX95
+FSEM0_SA1
+FSEM0_SA2
+FSEM0_SI1198
+FSEM0_SI1828
+FSEM0_SI568
+FSEM0_SX118
+FSEM0_SX208
+FSEM0_SX28
+FSEM0_SX298
+FSEM0_SX388
+FSLB1_SA1
+FSLB1_SA2
+FSLB1_SI1904
+FSLB1_SI644
+FSLB1_SI891
+FSLB1_SX104
+FSLB1_SX14
+FSLB1_SX194
+FSLB1_SX284
+FSLB1_SX374
+FSXA0_SA1
+FSXA0_SA2
+FSXA0_SI1108
+FSXA0_SI1846
+FSXA0_SI478
+FSXA0_SX118
+FSXA0_SX208
+FSXA0_SX28
+FSXA0_SX298
+FSXA0_SX388
+FTLH0_SA1
+FTLH0_SA2
+FTLH0_SI1009
+FTLH0_SI1390
+FTLH0_SI1639
+FTLH0_SX109
+FTLH0_SX19
+FTLH0_SX199
+FTLH0_SX289
+FTLH0_SX379
+FUTB0_SA1
+FUTB0_SA2
+FUTB0_SI1204
+FUTB0_SI1330
+FUTB0_SI1834
+FUTB0_SX124
+FUTB0_SX214
+FUTB0_SX304
+FUTB0_SX34
+FUTB0_SX394
+MABW0_SA1
+MABW0_SA2
+MABW0_SI1230
+MABW0_SI1664
+MABW0_SI2294
+MABW0_SX134
+MABW0_SX224
+MABW0_SX314
+MABW0_SX404
+MABW0_SX44
+MAHH0_SA1
+MAHH0_SA2
+MAHH0_SI1294
+MAHH0_SI1924
+MAHH0_SI664
+MAHH0_SX124
+MAHH0_SX214
+MAHH0_SX304
+MAHH0_SX34
+MAHH0_SX394
+MAJC0_SA1
+MAJC0_SA2
+MAJC0_SI1946
+MAJC0_SI2095
+MAJC0_SI835
+MAJC0_SX115
+MAJC0_SX205
+MAJC0_SX25
+MAJC0_SX295
+MAJC0_SX385
+MBDG0_SA1
+MBDG0_SA2
+MBDG0_SI1463
+MBDG0_SI2093
+MBDG0_SI833
+MBDG0_SX113
+MBDG0_SX203
+MBDG0_SX23
+MBDG0_SX293
+MBDG0_SX383
+MBJK0_SA1
+MBJK0_SA2
+MBJK0_SI1175
+MBJK0_SI2128
+MBJK0_SI545
+MBJK0_SX185
+MBJK0_SX275
+MBJK0_SX365
+MBJK0_SX5
+MBJK0_SX95
+MBNS0_SA1
+MBNS0_SA2
+MBNS0_SI1220
+MBNS0_SI1850
+MBNS0_SI590
+MBNS0_SX140
+MBNS0_SX230
+MBNS0_SX320
+MBNS0_SX410
+MBNS0_SX50
+MBPM0_SA1
+MBPM0_SA2
+MBPM0_SI1577
+MBPM0_SI1584
+MBPM0_SI947
+MBPM0_SX137
+MBPM0_SX227
+MBPM0_SX317
+MBPM0_SX407
+MBPM0_SX47
+MBWM0_SA1
+MBWM0_SA2
+MBWM0_SI1304
+MBWM0_SI1934
+MBWM0_SI674
+MBWM0_SX134
+MBWM0_SX224
+MBWM0_SX314
+MBWM0_SX404
+MBWM0_SX44
+MCCS0_SA1
+MCCS0_SA2
+MCCS0_SI1469
+MCCS0_SI2099
+MCCS0_SI839
+MCCS0_SX119
+MCCS0_SX209
+MCCS0_SX29
+MCCS0_SX299
+MCCS0_SX389
+MCEM0_SA1
+MCEM0_SA2
+MCEM0_SI1398
+MCEM0_SI2028
+MCEM0_SI768
+MCEM0_SX138
+MCEM0_SX228
+MCEM0_SX318
+MCEM0_SX408
+MCEM0_SX48
+MCHH0_SA1
+MCHH0_SA2
+MCHH0_SI1004
+MCHH0_SI1634
+MCHH0_SI530
+MCHH0_SX104
+MCHH0_SX14
+MCHH0_SX194
+MCHH0_SX284
+MCHH0_SX374
+MCMB0_SA1
+MCMB0_SA2
+MCMB0_SI1268
+MCMB0_SI1898
+MCMB0_SI638
+MCMB0_SX188
+MCMB0_SX278
+MCMB0_SX368
+MCMB0_SX8
+MCMB0_SX98
+MCMJ0_SA1
+MCMJ0_SA2
+MCMJ0_SI1094
+MCMJ0_SI464
+MCMJ0_SI602
+MCMJ0_SX104
+MCMJ0_SX14
+MCMJ0_SX194
+MCMJ0_SX284
+MCMJ0_SX374
+MCRC0_SA1
+MCRC0_SA2
+MCRC0_SI1092
+MCRC0_SI1722
+MCRC0_SI462
+MCRC0_SX102
+MCRC0_SX12
+MCRC0_SX192
+MCRC0_SX282
+MCRC0_SX372
+MCSH0_SA1
+MCSH0_SA2
+MCSH0_SI1549
+MCSH0_SI2179
+MCSH0_SI919
+MCSH0_SX109
+MCSH0_SX19
+MCSH0_SX199
+MCSH0_SX289
+MCSH0_SX379
+MCTT0_SA1
+MCTT0_SA2
+MCTT0_SI1144
+MCTT0_SI2188
+MCTT0_SI928
+MCTT0_SX118
+MCTT0_SX208
+MCTT0_SX28
+MCTT0_SX298
+MCTT0_SX388
+MCTW0_SA1
+MCTW0_SA2
+MCTW0_SI1373
+MCTW0_SI2003
+MCTW0_SI743
+MCTW0_SX113
+MCTW0_SX203
+MCTW0_SX23
+MCTW0_SX293
+MCTW0_SX383
+MDAB0_SA1
+MDAB0_SA2
+MDAB0_SI1039
+MDAB0_SI1669
+MDAB0_SI2299
+MDAB0_SX139
+MDAB0_SX229
+MDAB0_SX319
+MDAB0_SX409
+MDAB0_SX49
+MDAC2_SA1
+MDAC2_SA2
+MDAC2_SI2259
+MDAC2_SI560
+MDAC2_SI999
+MDAC2_SX189
+MDAC2_SX279
+MDAC2_SX369
+MDAC2_SX9
+MDAC2_SX99
+MDAW1_SA1
+MDAW1_SA2
+MDAW1_SI1453
+MDAW1_SI2083
+MDAW1_SI823
+MDAW1_SX103
+MDAW1_SX13
+MDAW1_SX193
+MDAW1_SX283
+MDAW1_SX373
+MDBB0_SA1
+MDBB0_SA2
+MDBB0_SI1195
+MDBB0_SI1825
+MDBB0_SI565
+MDBB0_SX115
+MDBB0_SX205
+MDBB0_SX25
+MDBB0_SX295
+MDBB0_SX385
+MDLD0_SA1
+MDLD0_SA2
+MDLD0_SI1543
+MDLD0_SI2173
+MDLD0_SI913
+MDLD0_SX103
+MDLD0_SX13
+MDLD0_SX193
+MDLD0_SX283
+MDLD0_SX373
+MDLF0_SA1
+MDLF0_SA2
+MDLF0_SI1583
+MDLF0_SI2213
+MDLF0_SI953
+MDLF0_SX143
+MDLF0_SX233
+MDLF0_SX323
+MDLF0_SX413
+MDLF0_SX53
+MDLS0_SA1
+MDLS0_SA2
+MDLS0_SI1628
+MDLS0_SI2258
+MDLS0_SI998
+MDLS0_SX188
+MDLS0_SX278
+MDLS0_SX368
+MDLS0_SX8
+MDLS0_SX98
+MDRB0_SA1
+MDRB0_SA2
+MDRB0_SI1174
+MDRB0_SI2109
+MDRB0_SI544
+MDRB0_SX184
+MDRB0_SX274
+MDRB0_SX364
+MDRB0_SX4
+MDRB0_SX94
+MDRM0_SA1
+MDRM0_SA2
+MDRM0_SI1013
+MDRM0_SI1643
+MDRM0_SI2273
+MDRM0_SX113
+MDRM0_SX203
+MDRM0_SX23
+MDRM0_SX293
+MDRM0_SX383
+MDSC0_SA1
+MDSC0_SA2
+MDSC0_SI1038
+MDSC0_SI2298
+MDSC0_SI967
+MDSC0_SX138
+MDSC0_SX228
+MDSC0_SX318
+MDSC0_SX408
+MDSC0_SX48
+MDVC0_SA1
+MDVC0_SA2
+MDVC0_SI2174
+MDVC0_SI2196
+MDVC0_SI936
+MDVC0_SX126
+MDVC0_SX216
+MDVC0_SX306
+MDVC0_SX36
+MDVC0_SX396
+MDWA0_SA1
+MDWA0_SA2
+MDWA0_SI1146
+MDWA0_SI1445
+MDWA0_SI519
+MDWA0_SX185
+MDWA0_SX275
+MDWA0_SX365
+MDWA0_SX5
+MDWA0_SX95
+MDWK0_SA1
+MDWK0_SA2
+MDWK0_SI1540
+MDWK0_SI2170
+MDWK0_SI910
+MDWK0_SX10
+MDWK0_SX100
+MDWK0_SX190
+MDWK0_SX280
+MDWK0_SX370
+MERS0_SA1
+MERS0_SA2
+MERS0_SI1019
+MERS0_SI1649
+MERS0_SI497
+MERS0_SX119
+MERS0_SX209
+MERS0_SX29
+MERS0_SX299
+MERS0_SX389
+MESD0_SA1
+MESD0_SA2
+MESD0_SI1002
+MESD0_SI1632
+MESD0_SI2262
+MESD0_SX102
+MESD0_SX12
+MESD0_SX192
+MESD0_SX282
+MESD0_SX372
+MFGK0_SA1
+MFGK0_SA2
+MFGK0_SI1451
+MFGK0_SI1744
+MFGK0_SI484
+MFGK0_SX124
+MFGK0_SX214
+MFGK0_SX304
+MFGK0_SX34
+MFGK0_SX394
+MGJF0_SA1
+MGJF0_SA2
+MGJF0_SI1901
+MGJF0_SI641
+MGJF0_SI776
+MGJF0_SX101
+MGJF0_SX11
+MGJF0_SX191
+MGJF0_SX281
+MGJF0_SX371
+MGLB0_SA1
+MGLB0_SA2
+MGLB0_SI1534
+MGLB0_SI2164
+MGLB0_SI904
+MGLB0_SX184
+MGLB0_SX274
+MGLB0_SX364
+MGLB0_SX4
+MGLB0_SX94
+MGMM0_SA1
+MGMM0_SA2
+MGMM0_SI1129
+MGMM0_SI1759
+MGMM0_SI499
+MGMM0_SX139
+MGMM0_SX229
+MGMM0_SX319
+MGMM0_SX409
+MGMM0_SX49
+MGRT0_SA1
+MGRT0_SA2
+MGRT0_SI1450
+MGRT0_SI2080
+MGRT0_SI820
+MGRT0_SX10
+MGRT0_SX100
+MGRT0_SX190
+MGRT0_SX280
+MGRT0_SX370
+MGWT0_SA1
+MGWT0_SA2
+MGWT0_SI1539
+MGWT0_SI2169
+MGWT0_SI909
+MGWT0_SX189
+MGWT0_SX279
+MGWT0_SX369
+MGWT0_SX9
+MGWT0_SX99
+MHPG0_SA1
+MHPG0_SA2
+MHPG0_SI1090
+MHPG0_SI1720
+MHPG0_SI460
+MHPG0_SX10
+MHPG0_SX100
+MHPG0_SX190
+MHPG0_SX280
+MHPG0_SX370
+MJAR0_SA1
+MJAR0_SA2
+MJAR0_SI1988
+MJAR0_SI2247
+MJAR0_SI728
+MJAR0_SX188
+MJAR0_SX278
+MJAR0_SX368
+MJAR0_SX8
+MJAR0_SX98
+MJBR0_SA1
+MJBR0_SA2
+MJBR0_SI1001
+MJBR0_SI1631
+MJBR0_SI2261
+MJBR0_SX101
+MJBR0_SX11
+MJBR0_SX191
+MJBR0_SX281
+MJBR0_SX371
+MJDH0_SA1
+MJDH0_SA2
+MJDH0_SI1354
+MJDH0_SI1984
+MJDH0_SI724
+MJDH0_SX184
+MJDH0_SX274
+MJDH0_SX364
+MJDH0_SX4
+MJDH0_SX94
+MJDM1_SA1
+MJDM1_SA2
+MJDM1_SI1085
+MJDM1_SI1715
+MJDM1_SI455
+MJDM1_SX185
+MJDM1_SX275
+MJDM1_SX365
+MJDM1_SX5
+MJDM1_SX95
+MJES0_SA1
+MJES0_SA2
+MJES0_SI1384
+MJES0_SI2014
+MJES0_SI754
+MJES0_SX124
+MJES0_SX214
+MJES0_SX304
+MJES0_SX34
+MJES0_SX394
+MJFC0_SA1
+MJFC0_SA2
+MJFC0_SI1033
+MJFC0_SI1663
+MJFC0_SI2293
+MJFC0_SX133
+MJFC0_SX223
+MJFC0_SX313
+MJFC0_SX403
+MJFC0_SX43
+MJJG0_SA1
+MJJG0_SA2
+MJJG0_SI1003
+MJJG0_SI1633
+MJJG0_SI2263
+MJJG0_SX103
+MJJG0_SX13
+MJJG0_SX193
+MJJG0_SX283
+MJJG0_SX373
+MJLN0_SA1
+MJLN0_SA2
+MJLN0_SI1449
+MJLN0_SI2079
+MJLN0_SI819
+MJLN0_SX189
+MJLN0_SX279
+MJLN0_SX369
+MJLN0_SX9
+MJLN0_SX99
+MJMP0_SA1
+MJMP0_SA2
+MJMP0_SI1535
+MJMP0_SI1791
+MJMP0_SI905
+MJMP0_SX185
+MJMP0_SX275
+MJMP0_SX365
+MJMP0_SX5
+MJMP0_SX95
+MJRF0_SA1
+MJRF0_SA2
+MJRF0_SI1114
+MJRF0_SI2081
+MJRF0_SI821
+MJRF0_SX101
+MJRF0_SX11
+MJRF0_SX191
+MJRF0_SX281
+MJRF0_SX371
+MJSW0_SA1
+MJSW0_SA2
+MJSW0_SI1010
+MJSW0_SI1640
+MJSW0_SI2270
+MJSW0_SX110
+MJSW0_SX20
+MJSW0_SX200
+MJSW0_SX290
+MJSW0_SX380
+MJTC0_SA1
+MJTC0_SA2
+MJTC0_SI1460
+MJTC0_SI2090
+MJTC0_SI830
+MJTC0_SX110
+MJTC0_SX20
+MJTC0_SX200
+MJTC0_SX290
+MJTC0_SX380
+MJTH0_SA1
+MJTH0_SA2
+MJTH0_SI1296
+MJTH0_SI1926
+MJTH0_SI666
+MJTH0_SX126
+MJTH0_SX216
+MJTH0_SX306
+MJTH0_SX36
+MJTH0_SX396
+MJVW0_SA1
+MJVW0_SA2
+MJVW0_SI1733
+MJVW0_SI1758
+MJVW0_SI473
+MJVW0_SX113
+MJVW0_SX203
+MJVW0_SX23
+MJVW0_SX293
+MJVW0_SX383
+MKCH0_SA1
+MKCH0_SA2
+MKCH0_SI1378
+MKCH0_SI1425
+MKCH0_SI2008
+MKCH0_SX118
+MKCH0_SX208
+MKCH0_SX28
+MKCH0_SX298
+MKCH0_SX388
+MKCL0_SA1
+MKCL0_SA2
+MKCL0_SI1091
+MKCL0_SI1721
+MKCL0_SI461
+MKCL0_SX101
+MKCL0_SX11
+MKCL0_SX191
+MKCL0_SX281
+MKCL0_SX371
+MKDR0_SA1
+MKDR0_SA2
+MKDR0_SI1273
+MKDR0_SI1903
+MKDR0_SI643
+MKDR0_SX103
+MKDR0_SX13
+MKDR0_SX193
+MKDR0_SX283
+MKDR0_SX373
+MKJL0_SA1
+MKJL0_SA2
+MKJL0_SI1100
+MKJL0_SI1730
+MKJL0_SI470
+MKJL0_SX110
+MKJL0_SX20
+MKJL0_SX200
+MKJL0_SX290
+MKJL0_SX380
+MKLT0_SA1
+MKLT0_SA2
+MKLT0_SI1213
+MKLT0_SI1843
+MKLT0_SI583
+MKLT0_SX133
+MKLT0_SX223
+MKLT0_SX313
+MKLT0_SX403
+MKLT0_SX43
+MLIH0_SA1
+MLIH0_SA2
+MLIH0_SI1183
+MLIH0_SI1813
+MLIH0_SI553
+MLIH0_SX103
+MLIH0_SX13
+MLIH0_SX193
+MLIH0_SX283
+MLIH0_SX373
+MLJB0_SA1
+MLJB0_SA2
+MLJB0_SI1310
+MLJB0_SI1940
+MLJB0_SI680
+MLJB0_SX140
+MLJB0_SX230
+MLJB0_SX320
+MLJB0_SX410
+MLJB0_SX50
+MLLL0_SA1
+MLLL0_SA2
+MLLL0_SI1363
+MLLL0_SI1993
+MLLL0_SI733
+MLLL0_SX103
+MLLL0_SX13
+MLLL0_SX193
+MLLL0_SX283
+MLLL0_SX373
+MLNT0_SA1
+MLNT0_SA2
+MLNT0_SI1574
+MLNT0_SI1902
+MLNT0_SI642
+MLNT0_SX102
+MLNT0_SX12
+MLNT0_SX192
+MLNT0_SX282
+MLNT0_SX372
+MMAB0_SA1
+MMAB0_SA2
+MMAB0_SI1362
+MMAB0_SI1992
+MMAB0_SI732
+MMAB0_SX102
+MMAB0_SX12
+MMAB0_SX192
+MMAB0_SX282
+MMAB0_SX372
+MMDB1_SA1
+MMDB1_SA2
+MMDB1_SI1625
+MMDB1_SI2255
+MMDB1_SI995
+MMDB1_SX185
+MMDB1_SX275
+MMDB1_SX365
+MMDB1_SX5
+MMDB1_SX95
+MMDH0_SA1
+MMDH0_SA2
+MMDH0_SI1656
+MMDH0_SI2118
+MMDH0_SI2286
+MMDH0_SX126
+MMDH0_SX216
+MMDH0_SX306
+MMDH0_SX36
+MMDH0_SX396
+MMDM2_SA1
+MMDM2_SA2
+MMDM2_SI1452
+MMDM2_SI1555
+MMDM2_SI2082
+MMDM2_SX102
+MMDM2_SX12
+MMDM2_SX192
+MMDM2_SX282
+MMDM2_SX372
+MMJR0_SA1
+MMJR0_SA2
+MMJR0_SI1648
+MMJR0_SI2166
+MMJR0_SI2278
+MMJR0_SX118
+MMJR0_SX208
+MMJR0_SX28
+MMJR0_SX298
+MMJR0_SX388
+MMWH0_SA1
+MMWH0_SA2
+MMWH0_SI1089
+MMWH0_SI1301
+MMWH0_SI459
+MMWH0_SX189
+MMWH0_SX279
+MMWH0_SX369
+MMWH0_SX9
+MMWH0_SX99
+MNJM0_SA1
+MNJM0_SA2
+MNJM0_SI1580
+MNJM0_SI2210
+MNJM0_SI950
+MNJM0_SX140
+MNJM0_SX230
+MNJM0_SX320
+MNJM0_SX410
+MNJM0_SX50
+MNLS0_SA1
+MNLS0_SA2
+MNLS0_SI1483
+MNLS0_SI1610
+MNLS0_SI853
+MNLS0_SX133
+MNLS0_SX223
+MNLS0_SX313
+MNLS0_SX403
+MNLS0_SX43
+MPAB0_SA1
+MPAB0_SA2
+MPAB0_SI1103
+MPAB0_SI1128
+MPAB0_SI498
+MPAB0_SX138
+MPAB0_SX228
+MPAB0_SX318
+MPAB0_SX408
+MPAB0_SX48
+MPAM0_SA1
+MPAM0_SA2
+MPAM0_SI1189
+MPAM0_SI1819
+MPAM0_SI1961
+MPAM0_SX109
+MPAM0_SX19
+MPAM0_SX199
+MPAM0_SX289
+MPAM0_SX379
+MPAM1_SA1
+MPAM1_SA2
+MPAM1_SI1029
+MPAM1_SI1836
+MPAM1_SI576
+MPAM1_SX126
+MPAM1_SX216
+MPAM1_SX306
+MPAM1_SX36
+MPAM1_SX396
+MPCS0_SA1
+MPCS0_SA2
+MPCS0_SI1359
+MPCS0_SI1989
+MPCS0_SI729
+MPCS0_SX189
+MPCS0_SX279
+MPCS0_SX369
+MPCS0_SX9
+MPCS0_SX99
+MPDF0_SA1
+MPDF0_SA2
+MPDF0_SI1542
+MPDF0_SI2172
+MPDF0_SI912
+MPDF0_SX102
+MPDF0_SX12
+MPDF0_SX192
+MPDF0_SX282
+MPDF0_SX372
+MPGL0_SA1
+MPGL0_SA2
+MPGL0_SI1099
+MPGL0_SI1729
+MPGL0_SI469
+MPGL0_SX109
+MPGL0_SX19
+MPGL0_SX199
+MPGL0_SX289
+MPGL0_SX379
+MPLB0_SA1
+MPLB0_SA2
+MPLB0_SI1394
+MPLB0_SI2024
+MPLB0_SI764
+MPLB0_SX134
+MPLB0_SX224
+MPLB0_SX314
+MPLB0_SX404
+MPLB0_SX44
+MPWM0_SA1
+MPWM0_SA2
+MPWM0_SI1127
+MPWM0_SI1757
+MPWM0_SI2279
+MPWM0_SX137
+MPWM0_SX227
+MPWM0_SX317
+MPWM0_SX407
+MPWM0_SX47
+MRCS0_SA1
+MRCS0_SA2
+MRCS0_SI1223
+MRCS0_SI1853
+MRCS0_SI593
+MRCS0_SX143
+MRCS0_SX233
+MRCS0_SX323
+MRCS0_SX413
+MRCS0_SX53
+MRCZ0_SA1
+MRCZ0_SA2
+MRCZ0_SI1541
+MRCZ0_SI2171
+MRCZ0_SI911
+MRCZ0_SX101
+MRCZ0_SX11
+MRCZ0_SX191
+MRCZ0_SX281
+MRCZ0_SX371
+MREB0_SA1
+MREB0_SA2
+MREB0_SI1375
+MREB0_SI2005
+MREB0_SI745
+MREB0_SX115
+MREB0_SX205
+MREB0_SX25
+MREB0_SX295
+MREB0_SX385
+MRES0_SA1
+MRES0_SA2
+MRES0_SI1217
+MRES0_SI1847
+MRES0_SI587
+MRES0_SX137
+MRES0_SX227
+MRES0_SX317
+MRES0_SX407
+MRES0_SX47
+MRGG0_SA1
+MRGG0_SA2
+MRGG0_SI1199
+MRGG0_SI1829
+MRGG0_SI569
+MRGG0_SX119
+MRGG0_SX209
+MRGG0_SX29
+MRGG0_SX299
+MRGG0_SX389
+MRJM3_SA1
+MRJM3_SA2
+MRJM3_SI1448
+MRJM3_SI1809
+MRJM3_SI2078
+MRJM3_SX188
+MRJM3_SX278
+MRJM3_SX368
+MRJM3_SX8
+MRJM3_SX98
+MRJM4_SA1
+MRJM4_SA2
+MRJM4_SI1489
+MRJM4_SI2119
+MRJM4_SI859
+MRJM4_SX139
+MRJM4_SX229
+MRJM4_SX319
+MRJM4_SX409
+MRJM4_SX49
+MRJO0_SA1
+MRJO0_SA2
+MRJO0_SI1364
+MRJO0_SI1624
+MRJO0_SI734
+MRJO0_SX104
+MRJO0_SX14
+MRJO0_SX194
+MRJO0_SX284
+MRJO0_SX374
+MRJR0_SA1
+MRJR0_SA2
+MRJR0_SI1182
+MRJR0_SI1812
+MRJR0_SI2313
+MRJR0_SX102
+MRJR0_SX12
+MRJR0_SX192
+MRJR0_SX282
+MRJR0_SX372
+MRJS0_SA1
+MRJS0_SA2
+MRJS0_SI1444
+MRJS0_SI1523
+MRJS0_SI2074
+MRJS0_SX184
+MRJS0_SX274
+MRJS0_SX364
+MRJS0_SX4
+MRJS0_SX94
+MRKO0_SA1
+MRKO0_SA2
+MRKO0_SI1397
+MRKO0_SI2027
+MRKO0_SI767
+MRKO0_SX137
+MRKO0_SX227
+MRKO0_SX317
+MRKO0_SX407
+MRKO0_SX47
+MRMS1_SA1
+MRMS1_SA2
+MRMS1_SI1487
+MRMS1_SI2117
+MRMS1_SI857
+MRMS1_SX137
+MRMS1_SX227
+MRMS1_SX317
+MRMS1_SX407
+MRMS1_SX47
+MROA0_SA1
+MROA0_SA2
+MROA0_SI1307
+MROA0_SI1970
+MROA0_SI677
+MROA0_SX137
+MROA0_SX227
+MROA0_SX317
+MROA0_SX407
+MROA0_SX47
+MRPC0_SA1
+MRPC0_SA2
+MRPC0_SI1753
+MRPC0_SI493
+MRPC0_SI933
+MRPC0_SX133
+MRPC0_SX223
+MRPC0_SX313
+MRPC0_SX403
+MRPC0_SX43
+MRPP0_SA1
+MRPP0_SA2
+MRPP0_SI1184
+MRPP0_SI1814
+MRPP0_SI554
+MRPP0_SX104
+MRPP0_SX14
+MRPP0_SX194
+MRPP0_SX284
+MRPP0_SX374
+MRRK0_SA1
+MRRK0_SA2
+MRRK0_SI1288
+MRRK0_SI1716
+MRRK0_SI1918
+MRRK0_SX118
+MRRK0_SX208
+MRRK0_SX28
+MRRK0_SX298
+MRRK0_SX388
+MRTK0_SA1
+MRTK0_SA2
+MRTK0_SI1093
+MRTK0_SI1723
+MRTK0_SI1750
+MRTK0_SX103
+MRTK0_SX13
+MRTK0_SX193
+MRTK0_SX283
+MRTK0_SX373
+MRWS1_SA1
+MRWS1_SA2
+MRWS1_SI1130
+MRWS1_SI1496
+MRWS1_SI500
+MRWS1_SX140
+MRWS1_SX230
+MRWS1_SX320
+MRWS1_SX410
+MRWS1_SX50
+MSFH1_SA1
+MSFH1_SA2
+MSFH1_SI1270
+MSFH1_SI1900
+MSFH1_SI640
+MSFH1_SX10
+MSFH1_SX100
+MSFH1_SX190
+MSFH1_SX280
+MSFH1_SX370
+MSJS1_SA1
+MSJS1_SA2
+MSJS1_SI1899
+MSJS1_SI639
+MSJS1_SI869
+MSJS1_SX189
+MSJS1_SX279
+MSJS1_SX369
+MSJS1_SX9
+MSJS1_SX99
+MSLB0_SA1
+MSLB0_SA2
+MSLB0_SI1193
+MSLB0_SI1823
+MSLB0_SI563
+MSLB0_SX113
+MSLB0_SX203
+MSLB0_SX23
+MSLB0_SX293
+MSLB0_SX383
+MSTK0_SA1
+MSTK0_SA2
+MSTK0_SI1024
+MSTK0_SI2222
+MSTK0_SI2284
+MSTK0_SX124
+MSTK0_SX214
+MSTK0_SX304
+MSTK0_SX34
+MSTK0_SX394
+MTAA0_SA1
+MTAA0_SA2
+MTAA0_SI1285
+MTAA0_SI1915
+MTAA0_SI596
+MTAA0_SX115
+MTAA0_SX205
+MTAA0_SX25
+MTAA0_SX295
+MTAA0_SX385
+MTAS1_SA1
+MTAS1_SA2
+MTAS1_SI1473
+MTAS1_SI2098
+MTAS1_SI838
+MTAS1_SX118
+MTAS1_SX208
+MTAS1_SX28
+MTAS1_SX298
+MTAS1_SX388
+MTDT0_SA1
+MTDT0_SA2
+MTDT0_SI1994
+MTDT0_SI2254
+MTDT0_SI994
+MTDT0_SX184
+MTDT0_SX274
+MTDT0_SX364
+MTDT0_SX4
+MTDT0_SX94
+MTEB0_SA1
+MTEB0_SA2
+MTEB0_SI1133
+MTEB0_SI2064
+MTEB0_SI503
+MTEB0_SX143
+MTEB0_SX233
+MTEB0_SX323
+MTEB0_SX413
+MTEB0_SX53
+MTHC0_SA1
+MTHC0_SA2
+MTHC0_SI1015
+MTHC0_SI1645
+MTHC0_SI2275
+MTHC0_SX115
+MTHC0_SX205
+MTHC0_SX25
+MTHC0_SX295
+MTHC0_SX385
+MTLS0_SA1
+MTLS0_SA2
+MTLS0_SI1370
+MTLS0_SI2000
+MTLS0_SI740
+MTLS0_SX110
+MTLS0_SX20
+MTLS0_SX200
+MTLS0_SX290
+MTLS0_SX380
+MTMR0_SA1
+MTMR0_SA2
+MTMR0_SI1303
+MTMR0_SI1933
+MTMR0_SI673
+MTMR0_SX133
+MTMR0_SX223
+MTMR0_SX313
+MTMR0_SX403
+MTMR0_SX43
+MTWH0_SA1
+MTWH0_SA2
+MTWH0_SI1190
+MTWH0_SI1629
+MTWH0_SI1820
+MTWH0_SX110
+MTWH0_SX20
+MTWH0_SX200
+MTWH0_SX290
+MTWH0_SX380
+MWBT0_SA1
+MWBT0_SA2
+MWBT0_SI1553
+MWBT0_SI2183
+MWBT0_SI923
+MWBT0_SX113
+MWBT0_SX203
+MWBT0_SX23
+MWBT0_SX293
+MWBT0_SX383
+MWEW0_SA1
+MWEW0_SA2
+MWEW0_SI1361
+MWEW0_SI1991
+MWEW0_SI731
+MWEW0_SX101
+MWEW0_SX11
+MWEW0_SX191
+MWEW0_SX281
+MWEW0_SX371
+MWJG0_SA1
+MWJG0_SA2
+MWJG0_SI1124
+MWJG0_SI1754
+MWJG0_SI494
+MWJG0_SX134
+MWJG0_SX224
+MWJG0_SX314
+MWJG0_SX404
+MWJG0_SX44
+MWVW0_SA1
+MWVW0_SA2
+MWVW0_SI1476
+MWVW0_SI2106
+MWVW0_SI846
+MWVW0_SX126
+MWVW0_SX216
+MWVW0_SX306
+MWVW0_SX36
+MWVW0_SX396
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train.uid
new file mode 100644
index 0000000000000000000000000000000000000000..35b02e7f82cc788f59860befad083ba8cfc899c0
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train.uid
@@ -0,0 +1,3000 @@
+FAEM0_SA1
+FAEM0_SA2
+FAEM0_SI2022
+FAEM0_SX132
+FAEM0_SX222
+FAEM0_SX312
+FAEM0_SX402
+FAJW0_SA2
+FAJW0_SI1893
+FAJW0_SX183
+FAJW0_SX273
+FAJW0_SX363
+FALK0_SA1
+FALK0_SA2
+FALK0_SI1086
+FALK0_SI456
+FALK0_SX276
+FALK0_SX366
+FALK0_SX96
+FALR0_SA1
+FALR0_SA2
+FALR0_SI1955
+FALR0_SI695
+FALR0_SX155
+FALR0_SX245
+FALR0_SX425
+FALR0_SX65
+FAPB0_SA1
+FAPB0_SA2
+FAPB0_SI1693
+FAPB0_SX163
+FAPB0_SX253
+FAPB0_SX343
+FAPB0_SX73
+FBAS0_SA2
+FBAS0_SI1387
+FBAS0_SX127
+FBAS0_SX307
+FBAS0_SX37
+FBAS0_SX397
+FBCG1_SA2
+FBCG1_SI1612
+FBCG1_SI2242
+FBCG1_SI982
+FBCG1_SX262
+FBCG1_SX82
+FBCH0_SA1
+FBCH0_SA2
+FBCH0_SI1586
+FBCH0_SI956
+FBCH0_SX146
+FBCH0_SX326
+FBCH0_SX56
+FBJL0_SA1
+FBJL0_SA2
+FBJL0_SI1552
+FBJL0_SI2182
+FBJL0_SX112
+FBJL0_SX202
+FBJL0_SX22
+FBJL0_SX292
+FBJL0_SX382
+FBLV0_SA2
+FBLV0_SI2318
+FBLV0_SX158
+FBLV0_SX248
+FBLV0_SX428
+FBMH0_SA2
+FBMH0_SI1766
+FBMH0_SX146
+FBMH0_SX236
+FBMH0_SX326
+FBMH0_SX416
+FBMH0_SX56
+FBMJ0_SA2
+FBMJ0_SX156
+FBMJ0_SX246
+FBMJ0_SX426
+FBMJ0_SX66
+FCAG0_SA2
+FCAG0_SI1503
+FCAG0_SI1641
+FCAG0_SI2133
+FCAG0_SX333
+FCAG0_SX423
+FCAG0_SX63
+FCAJ0_SA1
+FCAJ0_SA2
+FCAJ0_SI1804
+FCAJ0_SI849
+FCAJ0_SX129
+FCAJ0_SX219
+FCAJ0_SX39
+FCAJ0_SX399
+FCDR1_SA1
+FCDR1_SA2
+FCDR1_SX16
+FCDR1_SX376
+FCEG0_SA1
+FCEG0_SI1248
+FCEG0_SI1878
+FCEG0_SI618
+FCEG0_SX168
+FCEG0_SX258
+FCEG0_SX348
+FCEG0_SX438
+FCEG0_SX78
+FCJF0_SA2
+FCJF0_SI1027
+FCJF0_SI1657
+FCJF0_SI648
+FCJF0_SX217
+FCJF0_SX307
+FCJF0_SX37
+FCJF0_SX397
+FCJS0_SA1
+FCJS0_SA2
+FCJS0_SI977
+FCJS0_SX167
+FCJS0_SX347
+FCJS0_SX437
+FCJS0_SX77
+FCKE0_SA1
+FCKE0_SI1111
+FCKE0_SX211
+FCKE0_SX301
+FCKE0_SX31
+FCKE0_SX391
+FCLT0_SA1
+FCLT0_SA2
+FCLT0_SI1438
+FCLT0_SX178
+FCLT0_SX268
+FCLT0_SX358
+FCMG0_SA1
+FCMG0_SI1242
+FCMG0_SX162
+FCMG0_SX252
+FCMG0_SX342
+FCMM0_SI1083
+FCMM0_SI453
+FCMM0_SX273
+FCMM0_SX363
+FCMM0_SX93
+FCRZ0_SA1
+FCRZ0_SA2
+FCRZ0_SI1913
+FCRZ0_SI793
+FCRZ0_SX163
+FCRZ0_SX253
+FCRZ0_SX343
+FCRZ0_SX73
+FCYL0_SA2
+FCYL0_SI1297
+FCYL0_SI1927
+FCYL0_SX127
+FCYL0_SX217
+FCYL0_SX397
+FDAS1_SA1
+FDAS1_SA2
+FDAS1_SX111
+FDAS1_SX21
+FDAS1_SX291
+FDAW0_SA1
+FDAW0_SA2
+FDAW0_SX146
+FDAW0_SX236
+FDAW0_SX326
+FDAW0_SX416
+FDAW0_SX56
+FDFB0_SI1318
+FDFB0_SI1948
+FDFB0_SX148
+FDFB0_SX238
+FDFB0_SX328
+FDFB0_SX418
+FDJH0_SA1
+FDJH0_SA2
+FDJH0_SI1565
+FDJH0_SI2195
+FDJH0_SX125
+FDJH0_SX215
+FDJH0_SX35
+FDJH0_SX395
+FDKN0_SA1
+FDKN0_SA2
+FDKN0_SI1081
+FDKN0_SI1711
+FDKN0_SX271
+FDKN0_SX361
+FDKN0_SX91
+FDML0_SA1
+FDML0_SI1149
+FDML0_SI1779
+FDML0_SI2075
+FDML0_SX339
+FDML0_SX69
+FDMY0_SI1197
+FDMY0_SX117
+FDMY0_SX207
+FDMY0_SX297
+FDNC0_SA1
+FDNC0_SA2
+FDNC0_SI2287
+FDNC0_SX108
+FDNC0_SX18
+FDNC0_SX378
+FDTD0_SA2
+FDTD0_SI1561
+FDTD0_SI2191
+FDTD0_SI931
+FDTD0_SX121
+FDTD0_SX301
+FDTD0_SX391
+FDXW0_SA2
+FDXW0_SI1511
+FDXW0_SI2141
+FDXW0_SI881
+FDXW0_SX161
+FDXW0_SX431
+FEAC0_SA1
+FEAC0_SA2
+FEAC0_SI1245
+FEAC0_SI1875
+FEAC0_SX255
+FEAC0_SX345
+FEAC0_SX435
+FEAR0_SA1
+FEAR0_SA2
+FEAR0_SI1252
+FEAR0_SI1882
+FEAR0_SX172
+FEAR0_SX262
+FEAR0_SX442
+FEAR0_SX82
+FECD0_SA2
+FECD0_SI2048
+FECD0_SX158
+FECD0_SX248
+FECD0_SX338
+FECD0_SX428
+FEEH0_SA2
+FEEH0_SI1112
+FEEH0_SX212
+FEEH0_SX302
+FEEH0_SX32
+FEEH0_SX392
+FEME0_SA2
+FEME0_SI1505
+FEME0_SI2135
+FEME0_SX245
+FEME0_SX425
+FETB0_SA2
+FETB0_SI1778
+FETB0_SI518
+FETB0_SX248
+FETB0_SX338
+FETB0_SX428
+FETB0_SX68
+FEXM0_SA2
+FEXM0_SI1731
+FEXM0_SX111
+FEXM0_SX201
+FEXM0_SX291
+FEXM0_SX381
+FGCS0_SA1
+FGCS0_SA2
+FGCS0_SI1486
+FGCS0_SI2116
+FGCS0_SI856
+FGCS0_SX46
+FGDP0_SA2
+FGDP0_SI1618
+FGDP0_SI2248
+FGDP0_SX178
+FGDP0_SX268
+FGDP0_SX358
+FGDP0_SX448
+FGMB0_SA1
+FGMB0_SA2
+FGMB0_SI515
+FGMB0_SX155
+FGMB0_SX425
+FGMB0_SX65
+FGRW0_SA2
+FGRW0_SI1782
+FGRW0_SI1990
+FGRW0_SX252
+FGRW0_SX342
+FGRW0_SX72
+FHLM0_SA1
+FHLM0_SA2
+FHLM0_SI1560
+FHLM0_SI2190
+FHLM0_SI930
+FHLM0_SX210
+FHLM0_SX300
+FHXS0_SI2335
+FHXS0_SX265
+FHXS0_SX355
+FHXS0_SX85
+FJDM2_SI1582
+FJDM2_SI1964
+FJDM2_SI2212
+FJDM2_SX322
+FJDM2_SX412
+FJEN0_SA2
+FJEN0_SI1047
+FJEN0_SI1677
+FJEN0_SI2307
+FJEN0_SX147
+FJEN0_SX237
+FJEN0_SX57
+FJHK0_SA1
+FJHK0_SA2
+FJHK0_SI1022
+FJHK0_SI1652
+FJHK0_SX122
+FJHK0_SX212
+FJHK0_SX32
+FJHK0_SX392
+FJKL0_SA1
+FJKL0_SA2
+FJKL0_SI1562
+FJKL0_SI2192
+FJKL0_SX122
+FJKL0_SX302
+FJKL0_SX32
+FJLG0_SA1
+FJLG0_SA2
+FJLG0_SI1506
+FJLG0_SX179
+FJLG0_SX269
+FJLG0_SX359
+FJLG0_SX449
+FJLG0_SX89
+FJLR0_SA2
+FJLR0_SI1861
+FJLR0_SI601
+FJLR0_SX151
+FJLR0_SX241
+FJLR0_SX331
+FJLR0_SX421
+FJLR0_SX61
+FJRB0_SA1
+FJRB0_SA2
+FJRB0_SI1302
+FJRB0_SI1932
+FJRB0_SI672
+FJRB0_SX132
+FJRB0_SX222
+FJRB0_SX312
+FJRB0_SX42
+FJRP1_SA2
+FJRP1_SI802
+FJRP1_SX172
+FJRP1_SX442
+FJSK0_SA2
+FJSK0_SI1682
+FJSK0_SI2312
+FJSK0_SX152
+FJSK0_SX242
+FJSK0_SX332
+FJSK0_SX422
+FJSK0_SX62
+FJSP0_SA1
+FJSP0_SA2
+FJSP0_SI1763
+FJSP0_SI804
+FJSP0_SX174
+FJSP0_SX84
+FJWB1_SA2
+FJWB1_SI2055
+FJWB1_SI795
+FJWB1_SX165
+FJWB1_SX255
+FJWB1_SX75
+FJXM0_SA2
+FJXM0_SI1211
+FJXM0_SI1971
+FJXM0_SX131
+FJXM0_SX221
+FJXP0_SA2
+FJXP0_SI492
+FJXP0_SX222
+FJXP0_SX312
+FJXP0_SX402
+FJXP0_SX42
+FKAA0_SA2
+FKAA0_SI1208
+FKAA0_SI1838
+FKAA0_SI578
+FKAA0_SX218
+FKAA0_SX308
+FKAA0_SX38
+FKDE0_SA2
+FKDE0_SI2221
+FKDE0_SX331
+FKDW0_SA1
+FKDW0_SA2
+FKDW0_SI577
+FKDW0_SX127
+FKDW0_SX217
+FKDW0_SX307
+FKDW0_SX37
+FKFB0_SA1
+FKFB0_SI2238
+FKFB0_SI978
+FKFB0_SX168
+FKFB0_SX258
+FKKH0_SI660
+FKKH0_SX210
+FKKH0_SX30
+FKKH0_SX300
+FKLC0_SA1
+FKLC0_SA2
+FKLC0_SI1615
+FKLC0_SI2245
+FKLC0_SX265
+FKLC0_SX445
+FKLC0_SX85
+FKLC1_SA1
+FKLC1_SA2
+FKLC1_SI1678
+FKLC1_SX148
+FKLC1_SX58
+FKLH0_SA1
+FKLH0_SI1887
+FKLH0_SI627
+FKLH0_SX267
+FKLH0_SX357
+FKLH0_SX447
+FKLH0_SX87
+FKSR0_SI1117
+FKSR0_SX161
+FKSR0_SX37
+FKSR0_SX397
+FLAC0_SA1
+FLAC0_SA2
+FLAC0_SI2161
+FLAC0_SI901
+FLAC0_SX181
+FLAC0_SX271
+FLAC0_SX361
+FLAC0_SX91
+FLAG0_SA1
+FLAG0_SI2094
+FLAG0_SX294
+FLEH0_SA1
+FLEH0_SA2
+FLEH0_SX151
+FLEH0_SX241
+FLEH0_SX421
+FLEH0_SX61
+FLET0_SA2
+FLET0_SI1137
+FLET0_SI1767
+FLET0_SX147
+FLET0_SX237
+FLET0_SX277
+FLET0_SX417
+FLET0_SX57
+FLHD0_SA1
+FLHD0_SA2
+FLHD0_SI1344
+FLHD0_SI1974
+FLHD0_SX174
+FLHD0_SX264
+FLHD0_SX444
+FLHD0_SX84
+FLJA0_SA2
+FLJA0_SI1708
+FLJA0_SX268
+FLJA0_SX358
+FLJA0_SX448
+FLJA0_SX88
+FLJD0_SA1
+FLJD0_SA2
+FLJD0_SI2146
+FLJD0_SX166
+FLJD0_SX256
+FLJD0_SX346
+FLJD0_SX436
+FLJG0_SA1
+FLJG0_SI1611
+FLJG0_SI2241
+FLJG0_SX261
+FLJG0_SX441
+FLJG0_SX81
+FLKM0_SI1880
+FLKM0_SX116
+FLMA0_SA2
+FLMA0_SI1243
+FLMA0_SI1873
+FLMA0_SX163
+FLMA0_SX253
+FLMA0_SX343
+FLMC0_SA1
+FLMC0_SA2
+FLMC0_SI2002
+FLMC0_SI742
+FLMC0_SX112
+FLMC0_SX292
+FLMC0_SX336
+FLMC0_SX382
+FLMK0_SA2
+FLMK0_SI2295
+FLMK0_SX135
+FLMK0_SX225
+FLMK0_SX45
+FLOD0_SA1
+FLOD0_SA2
+FLOD0_SI1287
+FLOD0_SI657
+FLOD0_SX207
+FLOD0_SX387
+FLTM0_SA2
+FLTM0_SI1700
+FLTM0_SX260
+FLTM0_SX80
+FMAH1_SA1
+FMAH1_SI1509
+FMAH1_SI2139
+FMAH1_SX249
+FMAH1_SX339
+FMAH1_SX429
+FMAH1_SX69
+FMBG0_SA1
+FMBG0_SI1790
+FMBG0_SX260
+FMBG0_SX3
+FMBG0_SX350
+FMBG0_SX440
+FMBG0_SX80
+FMEM0_SA2
+FMEM0_SI1377
+FMEM0_SI2007
+FMEM0_SX117
+FMEM0_SX207
+FMEM0_SX297
+FMJB0_SA1
+FMJB0_SA2
+FMJB0_SI1807
+FMJB0_SX187
+FMJB0_SX277
+FMJB0_SX367
+FMJB0_SX7
+FMJF0_SA1
+FMJF0_SI1254
+FMJF0_SI1884
+FMJF0_SX264
+FMJF0_SX354
+FMJF0_SX444
+FMJU0_SA1
+FMJU0_SA2
+FMJU0_SI2019
+FMJU0_SI759
+FMJU0_SX129
+FMJU0_SX219
+FMJU0_SX39
+FMKC0_SA1
+FMKC0_SA2
+FMKC0_SI1072
+FMKC0_SX172
+FMKC0_SX262
+FMKC0_SX352
+FMKF0_SA1
+FMKF0_SA2
+FMKF0_SI1536
+FMKF0_SI906
+FMKF0_SX276
+FMKF0_SX366
+FMKF0_SX6
+FMKF0_SX96
+FMMH0_SA1
+FMMH0_SA2
+FMMH0_SI1537
+FMMH0_SI2167
+FMMH0_SI907
+FMMH0_SX187
+FMMH0_SX367
+FMMH0_SX420
+FMMH0_SX7
+FMMH0_SX97
+FMPG0_SI1602
+FMPG0_SI2232
+FMPG0_SX252
+FMPG0_SX72
+FNKL0_SA1
+FNKL0_SA2
+FNKL0_SI2152
+FNKL0_SX172
+FNKL0_SX196
+FNKL0_SX262
+FNKL0_SX442
+FNKL0_SX82
+FNTB0_SA1
+FNTB0_SA2
+FNTB0_SX123
+FNTB0_SX213
+FNTB0_SX33
+FNTB0_SX393
+FPAB1_SA2
+FPAB1_SX121
+FPAB1_SX301
+FPAB1_SX31
+FPAB1_SX391
+FPAC0_SA1
+FPAC0_SI2011
+FPAC0_SX121
+FPAC0_SX211
+FPAC0_SX301
+FPAC0_SX31
+FPAC0_SX391
+FPAD0_SA1
+FPAD0_SI1346
+FPAD0_SI1976
+FPAD0_SX266
+FPAD0_SX446
+FPAF0_SI1684
+FPAF0_SI2314
+FPAF0_SX244
+FPAF0_SX334
+FPAF0_SX424
+FPAF0_SX64
+FPAZ0_SI1593
+FPAZ0_SX153
+FPAZ0_SX27
+FPAZ0_SX423
+FPAZ0_SX63
+FPJF0_SA2
+FPJF0_SI1046
+FPJF0_SI1676
+FPJF0_SX236
+FPJF0_SX326
+FPLS0_SA1
+FPLS0_SA2
+FPLS0_SI2220
+FPLS0_SX150
+FPLS0_SX240
+FPLS0_SX3
+FPLS0_SX60
+FPMY0_SA2
+FPMY0_SI1783
+FPMY0_SX163
+FPMY0_SX196
+FPMY0_SX253
+FPMY0_SX73
+FREH0_SI1315
+FREH0_SI685
+FREH0_SX145
+FREH0_SX235
+FREH0_SX325
+FREH0_SX55
+FRJB0_SA1
+FRJB0_SA2
+FRJB0_SI1427
+FRJB0_SI1470
+FRJB0_SI1794
+FRJB0_SX167
+FRJB0_SX257
+FRJB0_SX437
+FRJB0_SX77
+FRLL0_SA1
+FRLL0_SA2
+FRLL0_SI1514
+FRLL0_SI884
+FRLL0_SX164
+FRLL0_SX254
+FRLL0_SX344
+FRLL0_SX74
+FSAG0_SA2
+FSAG0_SI1953
+FSAG0_SI693
+FSAG0_SX63
+FSAH0_SI1244
+FSAH0_SI1874
+FSAH0_SX344
+FSAH0_SX74
+FSAK0_SA1
+FSAK0_SA2
+FSAK0_SI1930
+FSAK0_SI670
+FSAK0_SX130
+FSAK0_SX220
+FSAK0_SX310
+FSAK0_SX40
+FSAK0_SX400
+FSBK0_SA1
+FSBK0_SI1699
+FSBK0_SI2329
+FSBK0_SX259
+FSBK0_SX439
+FSBK0_SX79
+FSCN0_SI1886
+FSCN0_SX356
+FSDC0_SA1
+FSDC0_SI1942
+FSDC0_SI2234
+FSDC0_SX232
+FSDC0_SX412
+FSDJ0_SA1
+FSDJ0_SA2
+FSDJ0_SI1745
+FSDJ0_SX125
+FSDJ0_SX35
+FSGF0_SA1
+FSGF0_SA2
+FSGF0_SI1557
+FSGF0_SX207
+FSGF0_SX27
+FSGF0_SX297
+FSGF0_SX387
+FSJG0_SI1570
+FSJG0_SI2200
+FSJG0_SX310
+FSJK1_SA1
+FSJK1_SI1025
+FSJK1_SI2285
+FSJK1_SI696
+FSJK1_SX215
+FSJK1_SX305
+FSJK1_SX395
+FSJS0_SA2
+FSJS0_SI1171
+FSJS0_SI1801
+FSJS0_SI541
+FSJS0_SX271
+FSJS0_SX361
+FSJS0_SX91
+FSJW0_SA1
+FSJW0_SA2
+FSJW0_SI703
+FSJW0_SX163
+FSJW0_SX253
+FSJW0_SX343
+FSJW0_SX73
+FSKC0_SA1
+FSKC0_SA2
+FSKC0_SI2046
+FSKC0_SX156
+FSKC0_SX336
+FSKC0_SX426
+FSKC0_SX66
+FSKL0_SA1
+FSKL0_SA2
+FSKL0_SI2159
+FSKL0_SI899
+FSKL0_SX179
+FSKL0_SX269
+FSKL0_SX359
+FSKL0_SX89
+FSKP0_SA1
+FSKP0_SI1728
+FSKP0_SI468
+FSKP0_SX108
+FSKP0_SX18
+FSKP0_SX198
+FSKP0_SX288
+FSKP0_SX378
+FSLS0_SA1
+FSLS0_SA2
+FSLS0_SI1056
+FSLS0_SI1686
+FSLS0_SI2316
+FSLS0_SX202
+FSLS0_SX246
+FSLS0_SX66
+FSMA0_SA1
+FSMA0_SI1621
+FSMA0_SI2251
+FSMA0_SX271
+FSMA0_SX361
+FSMA0_SX91
+FSMM0_SA1
+FSMM0_SA2
+FSMM0_SI1314
+FSMM0_SI1944
+FSMM0_SI684
+FSMM0_SX414
+FSMM0_SX54
+FSMS1_SA1
+FSMS1_SA2
+FSMS1_SI1504
+FSMS1_SI2134
+FSMS1_SI874
+FSMS1_SX154
+FSMS1_SX334
+FSMS1_SX64
+FSPM0_SA1
+FSPM0_SI1871
+FSPM0_SI611
+FSPM0_SX341
+FSPM0_SX431
+FSRH0_SA1
+FSRH0_SA2
+FSRH0_SI1719
+FSRH0_SX131
+FSRH0_SX41
+FSSB0_SA1
+FSSB0_SA2
+FSSB0_SI1082
+FSSB0_SI2342
+FSSB0_SX182
+FSSB0_SX272
+FSSB0_SX452
+FSSB0_SX92
+FTAJ0_SA1
+FTAJ0_SA2
+FTAJ0_SI1329
+FTAJ0_SI474
+FTAJ0_SX339
+FTAJ0_SX69
+FTBR0_SA1
+FTBR0_SA2
+FTBR0_SI2181
+FTBR0_SX111
+FTBR0_SX201
+FTBR0_SX291
+FTBR0_SX381
+FTBW0_SA2
+FTBW0_SI1345
+FTBW0_SI1975
+FTBW0_SX265
+FTBW0_SX355
+FTBW0_SX445
+FTBW0_SX85
+FTLG0_SA1
+FTLG0_SA2
+FTLG0_SI840
+FTLG0_SX123
+FTLG0_SX213
+FTLG0_SX303
+FTLG0_SX33
+FTLG0_SX393
+FTMG0_SA1
+FTMG0_SA2
+FTMG0_SX182
+FTMG0_SX272
+FTMG0_SX362
+FTMG0_SX92
+FVFB0_SA1
+FVFB0_SI1032
+FVFB0_SI2292
+FVFB0_SX222
+FVFB0_SX312
+FVFB0_SX402
+FVKB0_SA2
+FVKB0_SI1159
+FVKB0_SI1789
+FVKB0_SI529
+FVKB0_SX169
+FVKB0_SX259
+FVKB0_SX439
+FVKB0_SX79
+FVMH0_SA1
+FVMH0_SI2096
+FVMH0_SX206
+FVMH0_SX296
+FVMH0_SX386
+MABC0_SA1
+MABC0_SA2
+MABC0_SX151
+MABC0_SX241
+MABC0_SX331
+MABC0_SX421
+MABC0_SX61
+MADC0_SA1
+MADC0_SA2
+MADC0_SI1997
+MADC0_SX17
+MADC0_SX197
+MADC0_SX287
+MADD0_SA1
+MADD0_SI1798
+MADD0_SI538
+MADD0_SX358
+MADD0_SX448
+MAEB0_SA1
+MAEB0_SA2
+MAEB0_SI2250
+MAEB0_SI990
+MAEB0_SX180
+MAEB0_SX270
+MAEB0_SX360
+MAEB0_SX90
+MAEO0_SA2
+MAEO0_SI1655
+MAEO0_SI1956
+MAEO0_SX156
+MAEO0_SX246
+MAEO0_SX336
+MAEO0_SX426
+MAEO0_SX66
+MAFM0_SA1
+MAFM0_SA2
+MAFM0_SI1569
+MAFM0_SI2199
+MAFM0_SX219
+MAFM0_SX39
+MAFM0_SX399
+MAJP0_SA1
+MAJP0_SI1074
+MAJP0_SI2334
+MAJP0_SX264
+MAJP0_SX354
+MAJP0_SX444
+MAJP0_SX84
+MAKB0_SA1
+MAKB0_SX206
+MAKB0_SX296
+MAKR0_SA1
+MAKR0_SA2
+MAKR0_SI1352
+MAKR0_SI1982
+MAKR0_SI722
+MAKR0_SX182
+MAKR0_SX272
+MAKR0_SX452
+MAPV0_SA1
+MAPV0_SA2
+MAPV0_SI1923
+MAPV0_SX123
+MAPV0_SX303
+MAPV0_SX33
+MAPV0_SX393
+MARC0_SA1
+MARC0_SI1188
+MARC0_SI1818
+MARC0_SI558
+MARC0_SX288
+MARC0_SX378
+MARW0_SA1
+MARW0_SA2
+MARW0_SI1276
+MARW0_SI646
+MARW0_SX106
+MARW0_SX16
+MARW0_SX376
+MBAR0_SA2
+MBAR0_SI1319
+MBAR0_SI1949
+MBAR0_SI689
+MBAR0_SX149
+MBAR0_SX239
+MBAR0_SX329
+MBBR0_SA1
+MBBR0_SA2
+MBBR0_SI1685
+MBBR0_SX155
+MBBR0_SX245
+MBBR0_SX425
+MBCG0_SA2
+MBCG0_SI2217
+MBCG0_SX147
+MBCG0_SX237
+MBCG0_SX417
+MBCG0_SX57
+MBEF0_SA1
+MBEF0_SA2
+MBEF0_SX111
+MBEF0_SX201
+MBEF0_SX291
+MBGT0_SA1
+MBGT0_SI1341
+MBGT0_SI711
+MBGT0_SX81
+MBJV0_SA2
+MBJV0_SI1247
+MBJV0_SI1877
+MBJV0_SX167
+MBJV0_SX257
+MBJV0_SX437
+MBJV0_SX77
+MBMA0_SA1
+MBMA0_SA2
+MBMA0_SI1852
+MBMA0_SX142
+MBMA0_SX322
+MBMA0_SX412
+MBMA1_SA1
+MBMA1_SA2
+MBMA1_SI2207
+MBMA1_SX144
+MBMA1_SX234
+MBMA1_SX414
+MBML0_SA1
+MBML0_SI1799
+MBML0_SI539
+MBML0_SX179
+MBML0_SX269
+MBML0_SX359
+MBML0_SX449
+MBOM0_SA1
+MBOM0_SI1014
+MBOM0_SI1644
+MBOM0_SX114
+MBOM0_SX204
+MBOM0_SX311
+MBOM0_SX384
+MBSB0_SA2
+MBSB0_SI1353
+MBSB0_SI1983
+MBSB0_SI723
+MBSB0_SX183
+MBSB0_SX273
+MBSB0_SX363
+MBSB0_SX93
+MBTH0_SA1
+MBTH0_SI505
+MBTH0_SI757
+MBTH0_SX212
+MBTH0_SX302
+MBTH0_SX392
+MBWP0_SA1
+MBWP0_SA2
+MBWP0_SI1531
+MBWP0_SI1969
+MBWP0_SI709
+MBWP0_SX169
+MBWP0_SX259
+MBWP0_SX439
+MBWP0_SX79
+MCAE0_SA1
+MCAE0_SA2
+MCAE0_SX187
+MCAE0_SX367
+MCAE0_SX7
+MCAE0_SX97
+MCAL0_SA1
+MCAL0_SI508
+MCAL0_SX148
+MCAL0_SX238
+MCAL0_SX328
+MCAL0_SX418
+MCAL0_SX58
+MCDC0_SA2
+MCDC0_SI1292
+MCDC0_SI1922
+MCDC0_SI662
+MCDC0_SX122
+MCDC0_SX302
+MCDC0_SX32
+MCDC0_SX392
+MCDD0_SA1
+MCDD0_SI1513
+MCDD0_SI2143
+MCDD0_SX163
+MCDD0_SX343
+MCDD0_SX73
+MCDR0_SA1
+MCDR0_SA2
+MCDR0_SX164
+MCDR0_SX254
+MCDR0_SX344
+MCDR0_SX434
+MCDR0_SX74
+MCEF0_SA1
+MCEF0_SA2
+MCEF0_SI1135
+MCEF0_SI1765
+MCEF0_SX145
+MCEF0_SX325
+MCEF0_SX55
+MCEW0_SI1442
+MCEW0_SX182
+MCEW0_SX272
+MCEW0_SX92
+MCHL0_SA1
+MCHL0_SA2
+MCHL0_SI1977
+MCHL0_SX177
+MCHL0_SX267
+MCHL0_SX357
+MCHL0_SX447
+MCLK0_SA1
+MCLK0_SA2
+MCLK0_SI1660
+MCLK0_SX130
+MCLK0_SX220
+MCLK0_SX40
+MCLK0_SX400
+MCLM0_SA2
+MCLM0_SI1456
+MCLM0_SX106
+MCLM0_SX16
+MCLM0_SX196
+MCLM0_SX286
+MCLM0_SX376
+MCPM0_SA2
+MCPM0_SI1194
+MCPM0_SI564
+MCPM0_SX204
+MCPM0_SX24
+MCRE0_SA1
+MCRE0_SA2
+MCRE0_SI1121
+MCRE0_SI1725
+MCRE0_SI1751
+MCRE0_SX131
+MCRE0_SX221
+MCRE0_SX24
+MCRE0_SX401
+MCRE0_SX41
+MCSS0_SA1
+MCSS0_SA2
+MCSS0_SX120
+MCSS0_SX210
+MCSS0_SX30
+MCSS0_SX300
+MCSS0_SX390
+MCTH0_SA2
+MCTH0_SI1209
+MCTH0_SI1839
+MCTH0_SI579
+MCTH0_SX129
+MCTH0_SX219
+MCTH0_SX309
+MCTH0_SX399
+MCTM0_SA1
+MCTM0_SA2
+MCTM0_SI720
+MCTM0_SX180
+MCTM0_SX270
+MCTM0_SX360
+MCTM0_SX450
+MCTM0_SX90
+MCXM0_SA1
+MCXM0_SA2
+MCXM0_SI1351
+MCXM0_SI1981
+MCXM0_SI721
+MCXM0_SX181
+MCXM0_SX271
+MCXM0_SX361
+MCXM0_SX451
+MDAC0_SA2
+MDAC0_SI1261
+MDAC0_SI1837
+MDAC0_SX271
+MDAC0_SX451
+MDAC0_SX91
+MDAS0_SA1
+MDAS0_SA2
+MDAS0_SI1266
+MDAS0_SX186
+MDAS0_SX21
+MDAS0_SX276
+MDAS0_SX96
+MDBB1_SA1
+MDBB1_SA2
+MDBB1_SI1006
+MDBB1_SI1636
+MDBB1_SI2056
+MDBB1_SX196
+MDBB1_SX286
+MDBP0_SA1
+MDBP0_SA2
+MDBP0_SI1158
+MDBP0_SI1788
+MDBP0_SX258
+MDBP0_SX348
+MDBP0_SX78
+MDCD0_SA1
+MDCD0_SA2
+MDCD0_SI2045
+MDCD0_SX155
+MDCD0_SX65
+MDCM0_SA1
+MDCM0_SA2
+MDCM0_SI2110
+MDCM0_SI850
+MDCM0_SX130
+MDCM0_SX220
+MDCM0_SX310
+MDDC0_SA1
+MDDC0_SA2
+MDDC0_SX249
+MDDC0_SX339
+MDDC0_SX429
+MDED0_SI1170
+MDED0_SI1800
+MDED0_SX180
+MDED0_SX270
+MDED0_SX360
+MDED0_SX450
+MDED0_SX90
+MDEF0_SA1
+MDEF0_SA2
+MDEF0_SI1563
+MDEF0_SI2193
+MDEF0_SX213
+MDEF0_SX33
+MDEF0_SX393
+MDEM0_SA2
+MDEM0_SI1868
+MDEM0_SX158
+MDEM0_SX248
+MDEM0_SX338
+MDEM0_SX68
+MDHL0_SA1
+MDHL0_SA2
+MDHL0_SI2069
+MDHL0_SI809
+MDHL0_SX179
+MDHL0_SX359
+MDHL0_SX89
+MDHS0_SX180
+MDHS0_SX270
+MDHS0_SX360
+MDHS0_SX450
+MDHS0_SX90
+MDJM0_SA1
+MDJM0_SA2
+MDJM0_SI2085
+MDJM0_SI825
+MDJM0_SX195
+MDJM0_SX285
+MDJM0_SX375
+MDKS0_SA1
+MDKS0_SA2
+MDKS0_SI1066
+MDKS0_SI1696
+MDKS0_SI2326
+MDKS0_SX256
+MDKS0_SX76
+MDLB0_SA1
+MDLB0_SI1936
+MDLB0_SI676
+MDLB0_SX226
+MDLB0_SX316
+MDLB0_SX46
+MDLC0_SA1
+MDLC0_SA2
+MDLC0_SI765
+MDLC0_SX135
+MDLC0_SX225
+MDLC0_SX315
+MDLC0_SX45
+MDLC1_SA1
+MDLC1_SX175
+MDLC1_SX265
+MDLC1_SX355
+MDLC1_SX85
+MDLC2_SA1
+MDLC2_SA2
+MDLC2_SI1614
+MDLC2_SI984
+MDLC2_SX174
+MDLC2_SX264
+MDLC2_SX444
+MDLC2_SX84
+MDLH0_SA1
+MDLH0_SI1960
+MDLH0_SI574
+MDLH0_SI700
+MDLH0_SX250
+MDLH0_SX340
+MDLH0_SX70
+MDLM0_SA1
+MDLM0_SA2
+MDLM0_SX244
+MDLM0_SX334
+MDLM0_SX64
+MDLR0_SI1233
+MDLR0_SX243
+MDLR0_SX423
+MDLR0_SX63
+MDLR1_SI1299
+MDLR1_SI1929
+MDLR1_SX129
+MDLR1_SX219
+MDLR1_SX309
+MDLR1_SX39
+MDLR1_SX399
+MDMA0_SA1
+MDMA0_SA2
+MDMA0_SI1238
+MDMA0_SI2060
+MDMT0_SI2341
+MDMT0_SI572
+MDMT0_SX212
+MDMT0_SX302
+MDMT0_SX392
+MDNS0_SA1
+MDNS0_SX111
+MDNS0_SX291
+MDNS0_SX381
+MDPB0_SA1
+MDPB0_SA2
+MDPB0_SI2126
+MDPB0_SX146
+MDPB0_SX236
+MDPB0_SX326
+MDPB0_SX56
+MDPK0_SA1
+MDPK0_SA2
+MDPK0_SI1683
+MDPK0_SI552
+MDPK0_SX153
+MDPK0_SX243
+MDPK0_SX63
+MDPS0_SA1
+MDPS0_SA2
+MDPS0_SI1651
+MDPS0_SI1979
+MDPS0_SX179
+MDPS0_SX269
+MDPS0_SX449
+MDPS0_SX89
+MDRD0_SA2
+MDRD0_SI1382
+MDRD0_SI2012
+MDRD0_SX122
+MDRD0_SX212
+MDRD0_SX302
+MDRD0_SX392
+MDSJ0_SA1
+MDSJ0_SA2
+MDSJ0_SI832
+MDSJ0_SX112
+MDSJ0_SX22
+MDSJ0_SX292
+MDSJ0_SX382
+MDSS0_SA1
+MDSS0_SI1881
+MDSS0_SI2087
+MDSS0_SI621
+MDSS0_SX171
+MDSS0_SX261
+MDSS0_SX351
+MDSS0_SX81
+MDSS1_SA2
+MDSS1_SI1713
+MDSS1_SX247
+MDSS1_SX337
+MDSS1_SX427
+MDTB0_SA1
+MDTB0_SA2
+MDTB0_SI570
+MDTB0_SX210
+MDTB0_SX300
+MDTB0_SX321
+MDTB0_SX390
+MDWD0_SA1
+MDWD0_SI1890
+MDWD0_SI557
+MDWD0_SX180
+MDWD0_SX360
+MDWD0_SX450
+MDWH0_SA2
+MDWH0_SI1925
+MDWH0_SX125
+MDWH0_SX35
+MDWH0_SX395
+MDWM0_SI1546
+MDWM0_SI2176
+MDWM0_SX106
+MDWM0_SX376
+MDWM0_SX433
+MEAL0_SA1
+MEAL0_SI1547
+MEAL0_SI917
+MEAL0_SX197
+MEAL0_SX287
+MEAL0_SX377
+MEDR0_SI744
+MEDR0_SX114
+MEDR0_SX204
+MEDR0_SX24
+MEDR0_SX294
+MEDR0_SX384
+MEFG0_SA2
+MEFG0_SI465
+MEFG0_SX105
+MEFG0_SX15
+MEFG0_SX195
+MEFG0_SX285
+MEFG0_SX375
+MEGJ0_SI1967
+MEGJ0_SX437
+MEGJ0_SX77
+MEJL0_SA2
+MEJL0_SI1592
+MEJL0_SI1654
+MEJL0_SI962
+MEJL0_SX332
+MEJL0_SX422
+MEJL0_SX62
+MEJS0_SA1
+MEJS0_SA2
+MEJS0_SI1870
+MEJS0_SX250
+MEJS0_SX430
+MEJS0_SX70
+MESG0_SA1
+MESG0_SA2
+MESG0_SI1332
+MESG0_SI1962
+MESG0_SX162
+MESG0_SX252
+MESG0_SX342
+MESG0_SX72
+MESJ0_SA1
+MESJ0_SA2
+MESJ0_SI2257
+MESJ0_SI997
+MESJ0_SX277
+MESJ0_SX367
+MESJ0_SX7
+MEWM0_SA1
+MEWM0_SA2
+MEWM0_SI1348
+MEWM0_SI1978
+MEWM0_SX268
+MEWM0_SX358
+MEWM0_SX448
+MFER0_SA1
+MFER0_SA2
+MFER0_SI1492
+MFER0_SI2122
+MFER0_SX232
+MFER0_SX322
+MFER0_SX412
+MFER0_SX52
+MFMC0_SA1
+MFMC0_SA2
+MFMC0_SI1132
+MFMC0_SI1762
+MFMC0_SI502
+MFMC0_SX142
+MFMC0_SX232
+MFMC0_SX322
+MFMC0_SX412
+MFMC0_SX52
+MFRM0_SA1
+MFRM0_SA2
+MFRM0_SI1155
+MFRM0_SI1717
+MFRM0_SI1785
+MFRM0_SX165
+MFRM0_SX255
+MFRM0_SX75
+MFWK0_SA1
+MFWK0_SA2
+MFWK0_SI1249
+MFWK0_SI619
+MFWK0_SX259
+MFWK0_SX439
+MFWK0_SX79
+MFXS0_SA1
+MFXS0_SA2
+MFXS0_SI1674
+MFXS0_SI2225
+MFXS0_SI2304
+MFXS0_SX144
+MFXS0_SX234
+MFXS0_SX414
+MFXV0_SA1
+MFXV0_SI1635
+MFXV0_SX15
+MFXV0_SX195
+MFXV0_SX285
+MFXV0_SX375
+MGAF0_SA2
+MGAF0_SI1912
+MGAF0_SI652
+MGAF0_SX112
+MGAF0_SX202
+MGAF0_SX292
+MGAG0_SA1
+MGAG0_SI1321
+MGAG0_SI645
+MGAG0_SX151
+MGAG0_SX241
+MGAG0_SX331
+MGAG0_SX421
+MGAG0_SX61
+MGAK0_SA1
+MGAK0_SA2
+MGAK0_SI1666
+MGAK0_SI2296
+MGAK0_SX316
+MGAK0_SX406
+MGAR0_SA1
+MGAR0_SA2
+MGAR0_SI1212
+MGAR0_SI1694
+MGAR0_SI1842
+MGAR0_SX222
+MGAR0_SX402
+MGAR0_SX42
+MGAW0_SA1
+MGAW0_SA2
+MGAW0_SI1802
+MGAW0_SX265
+MGAW0_SX355
+MGAW0_SX445
+MGAW0_SX85
+MGES0_SA2
+MGES0_SI1481
+MGES0_SX131
+MGES0_SX221
+MGES0_SX401
+MGES0_SX41
+MGJC0_SA1
+MGJC0_SI1256
+MGJC0_SI1335
+MGJC0_SI1965
+MGJC0_SX165
+MGJC0_SX255
+MGJC0_SX345
+MGRL0_SA1
+MGRL0_SA2
+MGRL0_SI1497
+MGRL0_SX237
+MGRL0_SX417
+MGRL0_SX57
+MGRP0_SA1
+MGRP0_SI1947
+MGRP0_SI687
+MGRP0_SX147
+MGRP0_SX237
+MGRP0_SX417
+MGRP0_SX57
+MGSH0_SA1
+MGSH0_SX186
+MGSH0_SX96
+MGSL0_SA2
+MGSL0_SI1164
+MGSL0_SX174
+MGSL0_SX354
+MGSL0_SX444
+MGSL0_SX84
+MGXP0_SA1
+MGXP0_SA2
+MGXP0_SI457
+MGXP0_SX277
+MGXP0_SX367
+MGXP0_SX97
+MHBS0_SA1
+MHBS0_SA2
+MHBS0_SI1575
+MHBS0_SI2205
+MHBS0_SX135
+MHBS0_SX225
+MHBS0_SX405
+MHIT0_SA2
+MHIT0_SI1613
+MHIT0_SI2243
+MHIT0_SX173
+MHIT0_SX263
+MHIT0_SX353
+MHIT0_SX443
+MHIT0_SX83
+MHJB0_SA2
+MHJB0_SI1647
+MHJB0_SI2277
+MHJB0_SX117
+MHJB0_SX207
+MHJB0_SX27
+MHJB0_SX297
+MHJB0_SX387
+MHMG0_SA1
+MHMG0_SA2
+MHMG0_SI1365
+MHMG0_SI1995
+MHMG0_SX105
+MHMG0_SX15
+MHMG0_SX285
+MHMG0_SX375
+MHMR0_SA2
+MHMR0_SI1119
+MHMR0_SX129
+MHMR0_SX219
+MHMR0_SX309
+MHMR0_SX39
+MHMR0_SX399
+MHRM0_SA2
+MHRM0_SI1475
+MHRM0_SI2218
+MHRM0_SX238
+MHRM0_SX328
+MHRM0_SX418
+MHXL0_SA1
+MHXL0_SA2
+MHXL0_SI512
+MHXL0_SI612
+MHXL0_SX152
+MHXL0_SX332
+MHXL0_SX422
+MHXL0_SX62
+MILB0_SA1
+MILB0_SI2163
+MILB0_SI807
+MILB0_SX183
+MILB0_SX273
+MILB0_SX3
+MILB0_SX363
+MILB0_SX93
+MJAC0_SA1
+MJAC0_SA2
+MJAC0_SI1331
+MJAC0_SI2148
+MJAC0_SX341
+MJAC0_SX431
+MJAE0_SA1
+MJAE0_SA2
+MJAE0_SI1524
+MJAE0_SI1999
+MJAE0_SI2154
+MJAE0_SX264
+MJAE0_SX354
+MJAE0_SX444
+MJAI0_SI1604
+MJAI0_SX164
+MJAI0_SX254
+MJAI0_SX344
+MJAI0_SX434
+MJAI0_SX74
+MJBG0_SA1
+MJBG0_SA2
+MJBG0_SI1232
+MJBG0_SI1724
+MJBG0_SI1862
+MJBG0_SX152
+MJBG0_SX242
+MJBG0_SX332
+MJBG0_SX422
+MJDA0_SA1
+MJDA0_SA2
+MJDA0_SI1661
+MJDA0_SI2291
+MJDA0_SX131
+MJDA0_SX221
+MJDA0_SX401
+MJDA0_SX41
+MJDC0_SA1
+MJDC0_SA2
+MJDC0_SI1161
+MJDC0_SI2165
+MJDC0_SX171
+MJDC0_SX261
+MJDC0_SX351
+MJDC0_SX441
+MJDC0_SX81
+MJDE0_SA2
+MJDE0_SX130
+MJDE0_SX310
+MJDE0_SX40
+MJDE0_SX400
+MJDG0_SA1
+MJDG0_SI1672
+MJDG0_SX142
+MJDG0_SX232
+MJDG0_SX322
+MJDG0_SX412
+MJDG0_SX52
+MJDM0_SA2
+MJDM0_SI1937
+MJDM0_SX260
+MJDM0_SX440
+MJDM0_SX80
+MJEB0_SA1
+MJEB0_SA2
+MJEB0_SI1286
+MJEB0_SI1916
+MJEB0_SX206
+MJEB0_SX26
+MJEB0_SX386
+MJEB1_SA1
+MJEB1_SI2097
+MJEB1_SX117
+MJEB1_SX27
+MJEB1_SX297
+MJEE0_SA2
+MJEE0_SI1237
+MJEE0_SI1867
+MJEE0_SI607
+MJEE0_SX157
+MJEE0_SX427
+MJEE0_SX67
+MJFH0_SA1
+MJFH0_SI1737
+MJFH0_SI477
+MJFH0_SX117
+MJFH0_SX207
+MJFH0_SX27
+MJFH0_SX297
+MJFH0_SX387
+MJFR0_SA2
+MJFR0_SI1605
+MJFR0_SI2235
+MJFR0_SI975
+MJFR0_SX165
+MJFR0_SX255
+MJFR0_SX345
+MJHI0_SA2
+MJHI0_SI555
+MJHI0_SI698
+MJHI0_SX248
+MJHI0_SX338
+MJHI0_SX428
+MJHI0_SX68
+MJJB0_SA2
+MJJB0_SI1139
+MJJB0_SI1277
+MJJB0_SI1769
+MJJB0_SX149
+MJJB0_SX329
+MJJB0_SX419
+MJJB0_SX59
+MJJJ0_SA1
+MJJJ0_SA2
+MJJJ0_SI1793
+MJJJ0_SI533
+MJJJ0_SX173
+MJJJ0_SX263
+MJJJ0_SX353
+MJJJ0_SX83
+MJJM0_SA1
+MJJM0_SI1457
+MJJM0_SX17
+MJJM0_SX197
+MJJM0_SX287
+MJJM0_SX377
+MJKR0_SA2
+MJKR0_SI1201
+MJKR0_SI1831
+MJKR0_SX121
+MJKR0_SX211
+MJKR0_SX301
+MJKR0_SX31
+MJKR0_SX391
+MJLB0_SA1
+MJLB0_SA2
+MJLB0_SI2246
+MJLB0_SI986
+MJLB0_SX266
+MJLB0_SX356
+MJLB0_SX446
+MJLB0_SX86
+MJLG1_SA1
+MJLG1_SA2
+MJLG1_SI1012
+MJLG1_SI1642
+MJLG1_SI2272
+MJLG1_SX112
+MJLG1_SX202
+MJLG1_SX22
+MJLG1_SX382
+MJLS0_SA1
+MJLS0_SA2
+MJLS0_SI1096
+MJLS0_SI466
+MJLS0_SX16
+MJLS0_SX196
+MJLS0_SX286
+MJLS0_SX376
+MJMA0_SI1495
+MJMA0_SI865
+MJMA0_SX145
+MJMA0_SX235
+MJMA0_SX325
+MJMA0_SX415
+MJMA0_SX55
+MJMD0_SA1
+MJMD0_SI1028
+MJMD0_SI1658
+MJMD0_SX128
+MJMD0_SX218
+MJMD0_SX398
+MJMM0_SA1
+MJMM0_SA2
+MJMM0_SI1885
+MJMM0_SI625
+MJMM0_SX265
+MJMM0_SX355
+MJMM0_SX445
+MJPG0_SA1
+MJPG0_SA2
+MJPG0_SI561
+MJPG0_SX291
+MJPG0_SX381
+MJPM0_SA1
+MJPM0_SI1998
+MJPM0_SI738
+MJPM0_SX108
+MJPM0_SX18
+MJPM0_SX198
+MJPM0_SX288
+MJPM1_SA1
+MJPM1_SA2
+MJPM1_SI1897
+MJPM1_SI761
+MJPM1_SX131
+MJPM1_SX221
+MJPM1_SX41
+MJRA0_SI606
+MJRA0_SX156
+MJRA0_SX246
+MJRA0_SX66
+MJRG0_SA1
+MJRG0_SA2
+MJRG0_SX106
+MJRG0_SX16
+MJRG0_SX286
+MJRH0_SA1
+MJRH0_SA2
+MJRH0_SI1125
+MJRH0_SI1755
+MJRH0_SX135
+MJRH0_SX315
+MJRH0_SX405
+MJRH0_SX45
+MJRH1_SA2
+MJRH1_SI1774
+MJRH1_SX334
+MJRH1_SX64
+MJRK0_SI2103
+MJRK0_SX340
+MJRK0_SX70
+MJRP0_SI1835
+MJRP0_SI585
+MJRP0_SX135
+MJRP0_SX315
+MJRP0_SX405
+MJRP0_SX45
+MJSR0_SA2
+MJSR0_SX164
+MJSR0_SX254
+MJSR0_SX434
+MJSR0_SX74
+MJWG0_SA2
+MJWG0_SI2155
+MJWG0_SX355
+MJWG0_SX445
+MJWG0_SX85
+MJWS0_SA1
+MJWS0_SA2
+MJWS0_SI1143
+MJWS0_SI1773
+MJWS0_SX243
+MJWS0_SX423
+MJWT0_SA2
+MJWT0_SI751
+MJXA0_SA1
+MJXA0_SA2
+MJXA0_SI1507
+MJXA0_SI2137
+MJXA0_SI877
+MJXA0_SX157
+MJXA0_SX247
+MJXA0_SX337
+MJXA0_SX67
+MJXL0_SA1
+MJXL0_SA2
+MJXL0_SI1795
+MJXL0_SX182
+MJXL0_SX272
+MJXL0_SX362
+MJXL0_SX452
+MJXL0_SX92
+MKAG0_SA2
+MKAG0_SI1609
+MKAG0_SI2239
+MKAG0_SX169
+MKAG0_SX30
+MKAG0_SX439
+MKAG0_SX79
+MKAH0_SA1
+MKAH0_SA2
+MKAH0_SI1528
+MKAH0_SI2158
+MKAH0_SI898
+MKAH0_SX268
+MKAH0_SX358
+MKAH0_SX448
+MKAH0_SX88
+MKAJ0_SA1
+MKAJ0_SI1414
+MKAJ0_SI2044
+MKAJ0_SI784
+MKAJ0_SX244
+MKAJ0_SX334
+MKAJ0_SX424
+MKAJ0_SX64
+MKAM0_SA2
+MKAM0_SI1316
+MKAM0_SX236
+MKAM0_SX416
+MKDB0_SI2132
+MKDB0_SI588
+MKDB0_SI872
+MKDB0_SX242
+MKDB0_SX332
+MKDB0_SX422
+MKDB0_SX62
+MKDD0_SA1
+MKDD0_SX127
+MKDD0_SX217
+MKDD0_SX307
+MKDD0_SX37
+MKDD0_SX397
+MKDT0_SA1
+MKDT0_SA2
+MKDT0_SI2153
+MKDT0_SI893
+MKDT0_SX173
+MKDT0_SX263
+MKDT0_SX353
+MKDT0_SX443
+MKDT0_SX83
+MKES0_SA2
+MKES0_SX263
+MKES0_SX353
+MKES0_SX443
+MKES0_SX83
+MKJO0_SA1
+MKJO0_SA2
+MKJO0_SI2147
+MKJO0_SX167
+MKJO0_SX257
+MKJO0_SX424
+MKJO0_SX77
+MKLN0_SA1
+MKLN0_SA2
+MKLN0_SI1598
+MKLN0_SI2228
+MKLN0_SX158
+MKLN0_SX338
+MKLN0_SX428
+MKLN0_SX68
+MKLR0_SA1
+MKLR0_SI1059
+MKLR0_SI2319
+MKLR0_SX159
+MKLR0_SX249
+MKLR0_SX339
+MKLR0_SX429
+MKLR0_SX69
+MKLS0_SA2
+MKLS0_SI1533
+MKLS0_SX177
+MKLS0_SX267
+MKLS0_SX447
+MKLS1_SI1545
+MKLS1_SI2175
+MKLS1_SX105
+MKLS1_SX15
+MKLS1_SX195
+MKLS1_SX285
+MKLW0_SA2
+MKLW0_SI1844
+MKLW0_SI2201
+MKLW0_SX131
+MKLW0_SX221
+MKLW0_SX401
+MKLW0_SX41
+MKRG0_SA1
+MKRG0_SA2
+MKRG0_SI1491
+MKRG0_SI2121
+MKRG0_SX141
+MKRG0_SX231
+MKRG0_SX31
+MKRG0_SX51
+MKXL0_SA1
+MKXL0_SI1185
+MKXL0_SX105
+MKXL0_SX195
+MKXL0_SX285
+MLBC0_SA2
+MLBC0_SI609
+MLBC0_SX159
+MLBC0_SX339
+MLBC0_SX429
+MLBC0_SX69
+MLEL0_SI1876
+MLEL0_SX346
+MLEL0_SX76
+MLJC0_SA1
+MLJC0_SA2
+MLJC0_SI1855
+MLJC0_SI595
+MLJC0_SX235
+MLJC0_SX325
+MLJC0_SX55
+MLJH0_SI1324
+MLJH0_SX154
+MLJH0_SX334
+MLJH0_SX424
+MLNS0_SA1
+MLNS0_SA2
+MLNS0_SI1407
+MLNS0_SI777
+MLNS0_SX147
+MLNS0_SX237
+MLNS0_SX327
+MLNS0_SX417
+MLNS0_SX57
+MLSH0_SA1
+MLSH0_SA2
+MLSH0_SI2047
+MLSH0_SI787
+MLSH0_SX157
+MLSH0_SX337
+MLSH0_SX427
+MLSH0_SX67
+MMAA0_SI2105
+MMAA0_SX125
+MMAA0_SX215
+MMAA0_SX305
+MMAA0_SX395
+MMAB1_SA1
+MMAB1_SA2
+MMAB1_SI2124
+MMAB1_SX144
+MMAB1_SX414
+MMAB1_SX54
+MMAG0_SI496
+MMAG0_SX226
+MMAG0_SX406
+MMAG0_SX46
+MMAM0_SA1
+MMAM0_SA2
+MMAM0_SI1597
+MMAM0_SI1668
+MMAM0_SX247
+MMAM0_SX337
+MMAM0_SX67
+MMAR0_SA1
+MMAR0_SA2
+MMAR0_SI1336
+MMAR0_SI706
+MMAR0_SX436
+MMAR0_SX76
+MMBS0_SA1
+MMBS0_SA2
+MMBS0_SI1151
+MMBS0_SX251
+MMBS0_SX341
+MMBS0_SX431
+MMBS0_SX71
+MMCC0_SA1
+MMCC0_SI1968
+MMCC0_SI708
+MMCC0_SX168
+MMCC0_SX258
+MMCC0_SX348
+MMCC0_SX438
+MMCC0_SX78
+MMDB0_SA1
+MMDB0_SA2
+MMDB0_SI1358
+MMDB0_SI1617
+MMDB0_SX267
+MMDB0_SX357
+MMDB0_SX447
+MMDB0_SX87
+MMDG0_SI2035
+MMDG0_SX340
+MMDG0_SX430
+MMDG0_SX70
+MMDM0_SA1
+MMDM0_SA2
+MMDM0_SX231
+MMDM0_SX321
+MMDM0_SX411
+MMDM0_SX51
+MMDM1_SA1
+MMDM1_SI1650
+MMDM1_SI783
+MMDM1_SX243
+MMDS0_SA2
+MMDS0_SI1343
+MMDS0_SI1973
+MMDS0_SI713
+MMDS0_SX173
+MMDS0_SX263
+MMDS0_SX353
+MMDS0_SX443
+MMDS0_SX83
+MMEA0_SA2
+MMEA0_SI1388
+MMEA0_SI2018
+MMEA0_SI758
+MMEA0_SX218
+MMEA0_SX308
+MMEA0_SX38
+MMEB0_SA1
+MMEB0_SI1357
+MMEB0_SI1987
+MMEB0_SI727
+MMEB0_SX7
+MMEB0_SX97
+MMGC0_SA1
+MMGC0_SI1935
+MMGC0_SI2184
+MMGC0_SX315
+MMGC0_SX405
+MMGC0_SX45
+MMGG0_SA1
+MMGG0_SA2
+MMGG0_SI1709
+MMGG0_SI2339
+MMGG0_SX179
+MMGG0_SX359
+MMGG0_SX89
+MMGK0_SA1
+MMGK0_SA2
+MMGK0_SI1322
+MMGK0_SI1952
+MMGK0_SI692
+MMGK0_SX152
+MMGK0_SX242
+MMGK0_SX422
+MMJB1_SA1
+MMJB1_SI1408
+MMJB1_SI2038
+MMJB1_SI778
+MMJB1_SX148
+MMJB1_SX238
+MMJB1_SX328
+MMJB1_SX418
+MMJB1_SX58
+MMLM0_SA1
+MMLM0_SA2
+MMLM0_SI1527
+MMLM0_SI897
+MMLM0_SX177
+MMLM0_SX267
+MMLM0_SX357
+MMLM0_SX447
+MMLM0_SX87
+MMPM0_SA1
+MMPM0_SA2
+MMPM0_SI1061
+MMPM0_SI1691
+MMPM0_SI2321
+MMPM0_SX251
+MMPM0_SX341
+MMPM0_SX431
+MMPM0_SX71
+MMRP0_SA1
+MMRP0_SI2034
+MMRP0_SI717
+MMRP0_SI774
+MMRP0_SX234
+MMRP0_SX414
+MMRP0_SX54
+MMSM0_SA1
+MMSM0_SA2
+MMSM0_SI1736
+MMSM0_SX26
+MMSM0_SX296
+MMSM0_SX386
+MMVP0_SI1284
+MMVP0_SI1914
+MMVP0_SX114
+MMVP0_SX204
+MMVP0_SX294
+MMVP0_SX384
+MMWB0_SA2
+MMWB0_SI1619
+MMWB0_SX179
+MMWB0_SX269
+MMWS0_SA1
+MMWS0_SI1518
+MMWS0_SI559
+MMWS0_SI888
+MMWS0_SX258
+MMWS0_SX78
+MMWS1_SA1
+MMWS1_SA2
+MMWS1_SI1071
+MMWS1_SI2331
+MMWS1_SX261
+MMWS1_SX27
+MMWS1_SX351
+MMWS1_SX441
+MMWS1_SX81
+MMXS0_SA1
+MMXS0_SA2
+MMXS0_SI629
+MMXS0_SI876
+MMXS0_SX156
+MMXS0_SX336
+MMXS0_SX66
+MNET0_SA1
+MNET0_SA2
+MNET0_SI1446
+MNET0_SI2076
+MNET0_SX186
+MNET0_SX276
+MNET0_SX366
+MNET0_SX96
+MNTW0_SA1
+MNTW0_SI2328
+MNTW0_SX202
+MNTW0_SX258
+MNTW0_SX348
+MPAR0_SA1
+MPAR0_SA2
+MPAR0_SI1576
+MPAR0_SX226
+MPAR0_SX406
+MPAR0_SX46
+MPEB0_SA1
+MPEB0_SA2
+MPEB0_SX150
+MPEB0_SX420
+MPEB0_SX60
+MPFU0_SA1
+MPFU0_SA2
+MPFU0_SI1888
+MPFU0_SX178
+MPFU0_SX268
+MPFU0_SX358
+MPFU0_SX88
+MPGH0_SA1
+MPGH0_SA2
+MPGH0_SI1554
+MPGH0_SI924
+MPGH0_SX204
+MPGH0_SX294
+MPGH0_SX384
+MPGR0_SA1
+MPGR0_SA2
+MPGR0_SI2040
+MPGR0_SI780
+MPGR0_SX150
+MPGR0_SX420
+MPGR0_SX60
+MPGR1_SA1
+MPGR1_SA2
+MPGR1_SI1269
+MPGR1_SI2129
+MPGR1_SX239
+MPGR1_SX329
+MPGR1_SX419
+MPGR1_SX59
+MPMB0_SX241
+MPPC0_SA2
+MPPC0_SI2042
+MPPC0_SI782
+MPPC0_SX152
+MPPC0_SX242
+MPPC0_SX332
+MPPC0_SX422
+MPPC0_SX62
+MPRB0_SA1
+MPRB0_SA2
+MPRB0_SI1205
+MPRB0_SX125
+MPRB0_SX215
+MPRB0_SX305
+MPRB0_SX35
+MPRB0_SX395
+MPRD0_SA2
+MPRD0_SI1431
+MPRD0_SI2061
+MPRK0_SA2
+MPRK0_SX17
+MPRK0_SX197
+MPRT0_SA2
+MPRT0_SI1210
+MPRT0_SI495
+MPRT0_SI580
+MPRT0_SX130
+MPRT0_SX220
+MPRT0_SX40
+MPRT0_SX400
+MPSW0_SA1
+MPSW0_SA2
+MPSW0_SI1697
+MPSW0_SI2327
+MPSW0_SX24
+MPSW0_SX257
+MPSW0_SX77
+MRAB0_SA1
+MRAB0_SA2
+MRAB0_SI1224
+MRAB0_SI594
+MRAB0_SX144
+MRAB0_SX234
+MRAB0_SX324
+MRAB0_SX414
+MRAB0_SX54
+MRAB1_SA1
+MRAB1_SA2
+MRAB1_SI1478
+MRAB1_SI2108
+MRAB1_SX218
+MRAB1_SX38
+MRAB1_SX398
+MRAI0_SI1954
+MRAI0_SX162
+MRAI0_SX252
+MRAI0_SX342
+MRAM0_SI1275
+MRAM0_SI1905
+MRAM0_SX105
+MRAM0_SX195
+MRAM0_SX285
+MRAM0_SX375
+MRAV0_SA1
+MRAV0_SA2
+MRAV0_SI1008
+MRAV0_SI1638
+MRAV0_SI2268
+MRAV0_SX108
+MRAV0_SX18
+MRAV0_SX198
+MRAV0_SX288
+MRAV0_SX378
+MRBC0_SA1
+MRBC0_SA2
+MRBC0_SI1665
+MRBC0_SI599
+MRBC0_SX149
+MRBC0_SX239
+MRBC0_SX59
+MRCG0_SA1
+MRCG0_SI2058
+MRCG0_SX258
+MRCG0_SX78
+MRCW0_SA2
+MRCW0_SI1371
+MRCW0_SI2001
+MRCW0_SX111
+MRCW0_SX201
+MRCW0_SX21
+MRCW0_SX381
+MRDD0_SA1
+MRDD0_SA2
+MRDD0_SI1050
+MRDD0_SI2310
+MRDD0_SX240
+MRDD0_SX330
+MRDM0_SA1
+MRDM0_SA2
+MRDM0_SI965
+MRDM0_SX155
+MRDM0_SX245
+MRDM0_SX425
+MRDS0_SA2
+MRDS0_SI1167
+MRDS0_SI1797
+MRDS0_SI537
+MRDS0_SX177
+MRDS0_SX267
+MRDS0_SX357
+MRDS0_SX447
+MRDS0_SX87
+MREE0_SA1
+MREE0_SA2
+MREE0_SI1734
+MREE0_SX114
+MREE0_SX204
+MREE0_SX294
+MREE0_SX384
+MREH1_SA2
+MREH1_SI2229
+MREH1_SX159
+MREH1_SX339
+MREH1_SX429
+MREM0_SA1
+MREM0_SI1591
+MREM0_SI961
+MREM0_SX151
+MREM0_SX241
+MREM0_SX331
+MREM0_SX421
+MREM0_SX61
+MREW1_SA1
+MREW1_SA2
+MREW1_SI1500
+MREW1_SI2130
+MREW1_SX150
+MREW1_SX240
+MREW1_SX330
+MREW1_SX420
+MREW1_SX60
+MRFK0_SA1
+MRFK0_SA2
+MRFK0_SI1706
+MRFK0_SI2336
+MRFK0_SX176
+MRFK0_SX266
+MRFK0_SX356
+MRFK0_SX86
+MRFL0_SA2
+MRFL0_SI1786
+MRFL0_SX346
+MRGM0_SA1
+MRGM0_SI1162
+MRGM0_SI1792
+MRGM0_SX416
+MRGM0_SX82
+MRGS0_SA1
+MRGS0_SI1986
+MRGS0_SX276
+MRGS0_SX366
+MRGS0_SX96
+MRHL0_SA1
+MRHL0_SA2
+MRHL0_SI1515
+MRHL0_SI2145
+MRHL0_SX165
+MRHL0_SX255
+MRHL0_SX75
+MRJB1_SI1020
+MRJB1_SX300
+MRJH0_SA1
+MRJH0_SI914
+MRJH0_SX259
+MRJH0_SX439
+MRJM0_SA1
+MRJM0_SA2
+MRJM0_SI1095
+MRJM0_SI1228
+MRJM0_SI1858
+MRJM0_SX238
+MRJM0_SX328
+MRJM0_SX418
+MRJM0_SX58
+MRJM1_SA1
+MRJM1_SI668
+MRJM1_SX218
+MRJM1_SX308
+MRJM1_SX38
+MRJM1_SX398
+MRJT0_SA1
+MRJT0_SI1805
+MRJT0_SX148
+MRJT0_SX238
+MRKM0_SA1
+MRKM0_SX187
+MRKM0_SX277
+MRKM0_SX7
+MRKM0_SX97
+MRLD0_SA1
+MRLD0_SI1594
+MRLD0_SI964
+MRLD0_SX244
+MRLD0_SX334
+MRLD0_SX64
+MRLJ0_SA2
+MRLJ0_SI1420
+MRLJ0_SI2050
+MRLJ0_SX160
+MRLJ0_SX430
+MRLJ0_SX70
+MRLJ1_SI1671
+MRLJ1_SI2332
+MRLJ1_SX141
+MRLJ1_SX231
+MRLJ1_SX411
+MRLJ1_SX51
+MRLK0_SA1
+MRLK0_SA2
+MRLK0_SI2140
+MRLK0_SX303
+MRLK0_SX33
+MRLK0_SX393
+MRLR0_SA1
+MRLR0_SA2
+MRLR0_SI1826
+MRLR0_SI566
+MRLR0_SX116
+MRLR0_SX206
+MRLR0_SX26
+MRLR0_SX296
+MRLR0_SX386
+MRMB0_SA1
+MRMB0_SI2211
+MRMB0_SI951
+MRMB0_SX141
+MRMB0_SX231
+MRMB0_SX321
+MRMB0_SX51
+MRMG0_SA2
+MRMG0_SI1710
+MRMG0_SI2340
+MRMG0_SX180
+MRMG0_SX270
+MRMG0_SX360
+MRMG0_SX90
+MRMH0_SA1
+MRMH0_SA2
+MRMH0_SI1021
+MRMH0_SX211
+MRMH0_SX301
+MRMH0_SX31
+MRMH0_SX391
+MRML0_SI2051
+MRML0_SI791
+MRML0_SX431
+MRML0_SX71
+MRMS0_SA1
+MRMS0_SA2
+MRMS0_SI1113
+MRMS0_SI2100
+MRMS0_SX120
+MRMS0_SX210
+MRMS0_SX30
+MRMS0_SX300
+MRMS0_SX390
+MRPC1_SA1
+MRPC1_SA2
+MRPC1_SI1482
+MRPC1_SI2026
+MRPC1_SX132
+MRPC1_SX222
+MRPC1_SX312
+MRPC1_SX402
+MRPC1_SX42
+MRRE0_SI704
+MRRE0_SX254
+MRRE0_SX434
+MRSO0_SA1
+MRSO0_SA2
+MRSO0_SI1659
+MRSO0_SI2289
+MRSO0_SX219
+MRSO0_SX309
+MRSO0_SX399
+MRSP0_SA1
+MRSP0_SA2
+MRSP0_SI2059
+MRSP0_SI799
+MRSP0_SX169
+MRSP0_SX196
+MRSP0_SX439
+MRSP0_SX79
+MRTC0_SA1
+MRTC0_SA2
+MRTC0_SI2088
+MRTC0_SI828
+MRTC0_SX108
+MRTC0_SX18
+MRTC0_SX198
+MRTC0_SX288
+MRTJ0_SA2
+MRTJ0_SI1551
+MRTJ0_SI2032
+MRTJ0_SX322
+MRTJ0_SX412
+MRVG0_SA1
+MRVG0_SA2
+MRVG0_SI1770
+MRVG0_SI510
+MRVG0_SX150
+MRVG0_SX330
+MRVG0_SX420
+MRVG0_SX60
+MRWA0_SA1
+MRWA0_SA2
+MRWA0_SI1603
+MRWA0_SI2233
+MRWA0_SX253
+MRWA0_SX343
+MRWA0_SX433
+MRWS0_SA1
+MRWS0_SA2
+MRWS0_SX112
+MRWS0_SX202
+MRWS0_SX292
+MRXB0_SA1
+MRXB0_SI1585
+MRXB0_SX145
+MRXB0_SX235
+MRXB0_SX325
+MRXB0_SX55
+MSAH1_SA1
+MSAH1_SA2
+MSAH1_SI1049
+MSAH1_SI2309
+MSAH1_SX149
+MSAH1_SX239
+MSAH1_SX329
+MSAH1_SX419
+MSAH1_SX59
+MSAS0_SA1
+MSAS0_SA2
+MSAS0_SI2006
+MSAS0_SX26
+MSAS0_SX296
+MSAT0_SA2
+MSAT0_SI1526
+MSAT0_SI2156
+MSAT0_SI896
+MSAT0_SX176
+MSAT0_SX266
+MSAT0_SX356
+MSAT0_SX446
+MSAT0_SX86
+MSAT1_SA1
+MSAT1_SA2
+MSAT1_SI1073
+MSAT1_SI1703
+MSAT1_SI2333
+MSAT1_SX173
+MSAT1_SX353
+MSDB0_SA1
+MSDB0_SA2
+MSDB0_SI1007
+MSDB0_SI1637
+MSDB0_SI2267
+MSDB0_SX107
+MSDB0_SX17
+MSDH0_SA1
+MSDH0_SA2
+MSDH0_SI2113
+MSDH0_SX260
+MSDH0_SX350
+MSDS0_SA2
+MSDS0_SI1707
+MSDS0_SI2337
+MSDS0_SX177
+MSDS0_SX447
+MSDS0_SX87
+MSEM1_SA1
+MSEM1_SA2
+MSEM1_SX360
+MSEM1_SX450
+MSEM1_SX90
+MSES0_SA1
+MSES0_SA2
+MSES0_SI2216
+MSES0_SI2219
+MSES0_SX149
+MSES0_SX329
+MSES0_SX59
+MSFH0_SA2
+MSFH0_SI1216
+MSFH0_SI586
+MSFH0_SX226
+MSFH0_SX46
+MSFV0_SA1
+MSFV0_SA2
+MSFV0_SI1262
+MSFV0_SX182
+MSFV0_SX272
+MSFV0_SX452
+MSJK0_SA1
+MSJK0_SA2
+MSJK0_SI2226
+MSJK0_SI966
+MSJK0_SX156
+MSJK0_SX246
+MSJK0_SX426
+MSJK0_SX66
+MSMC0_SA1
+MSMC0_SA2
+MSMC0_SI1907
+MSMC0_SI647
+MSMC0_SX107
+MSMC0_SX17
+MSMC0_SX197
+MSMC0_SX287
+MSMC0_SX377
+MSMR0_SA1
+MSMR0_SA2
+MSMR0_SI1405
+MSMR0_SI775
+MSMR0_SX145
+MSMR0_SX235
+MSMR0_SX325
+MSMR0_SX55
+MSMS0_SA2
+MSMS0_SI2063
+MSMS0_SI803
+MSMS0_SX263
+MSMS0_SX353
+MSMS0_SX443
+MSRG0_SA2
+MSRG0_SI1851
+MSRG0_SI591
+MSRG0_SX141
+MSRG0_SX231
+MSRG0_SX321
+MSRG0_SX411
+MSRG0_SX51
+MSRR0_SA1
+MSRR0_SA2
+MSRR0_SI1131
+MSRR0_SX141
+MSRR0_SX231
+MSRR0_SX30
+MSRR0_SX411
+MSRR0_SX51
+MSTF0_SA1
+MSTF0_SA2
+MSTF0_SI1396
+MSTF0_SX136
+MSTF0_SX226
+MSTF0_SX406
+MSVS0_SA1
+MSVS0_SI1568
+MSVS0_SX128
+MSVS0_SX218
+MSVS0_SX38
+MTAB0_SA1
+MTAB0_SA2
+MTAB0_SI2202
+MTAB0_SI942
+MTAB0_SX132
+MTAB0_SX222
+MTAB0_SX402
+MTAB0_SX42
+MTAS0_SA1
+MTAS0_SA2
+MTAS0_SI1385
+MTAS0_SI2015
+MTAS0_SI755
+MTAS0_SX125
+MTAS0_SX305
+MTAT0_SA2
+MTAT0_SI1740
+MTAT0_SX120
+MTAT0_SX210
+MTAT0_SX30
+MTAT0_SX300
+MTAT1_SA1
+MTAT1_SA2
+MTAT1_SI1409
+MTAT1_SI1627
+MTAT1_SX239
+MTAT1_SX419
+MTBC0_SA1
+MTBC0_SA2
+MTBC0_SI1173
+MTBC0_SX183
+MTBC0_SX273
+MTBC0_SX347
+MTBC0_SX363
+MTBC0_SX93
+MTCS0_SA1
+MTCS0_SI1972
+MTCS0_SX172
+MTCS0_SX262
+MTCS0_SX352
+MTCS0_SX442
+MTDB0_SA1
+MTDB0_SA2
+MTDB0_SI2031
+MTDB0_SX141
+MTDB0_SX231
+MTDB0_SX321
+MTDB0_SX411
+MTDB0_SX51
+MTDP0_SI1274
+MTDP0_SI2151
+MTDP0_SX261
+MTDP0_SX441
+MTDP0_SX81
+MTER0_SI527
+MTER0_SX167
+MTER0_SX17
+MTER0_SX257
+MTER0_SX77
+MTJG0_SA2
+MTJG0_SI1520
+MTJG0_SI890
+MTJG0_SX350
+MTJG0_SX440
+MTJG0_SX80
+MTJM0_SA1
+MTJM0_SA2
+MTJM0_SI1226
+MTJM0_SI655
+MTJM0_SX236
+MTJM0_SX326
+MTJM0_SX416
+MTJM0_SX56
+MTJS0_SA1
+MTJS0_SI1192
+MTJS0_SX112
+MTJS0_SX202
+MTJS0_SX22
+MTJS0_SX292
+MTJU0_SA1
+MTJU0_SA2
+MTJU0_SI2269
+MTJU0_SI760
+MTJU0_SX220
+MTJU0_SX310
+MTJU0_SX40
+MTKD0_SA1
+MTKD0_SA2
+MTKD0_SI1187
+MTKD0_SI1817
+MTKD0_SX17
+MTKD0_SX197
+MTKD0_SX377
+MTKP0_SA1
+MTKP0_SA2
+MTKP0_SX123
+MTKP0_SX213
+MTKP0_SX303
+MTKP0_SX33
+MTKP0_SX393
+MTLB0_SA2
+MTLB0_SI1764
+MTLB0_SI504
+MTLB0_SX144
+MTLB0_SX414
+MTLB0_SX54
+MTLC0_SA2
+MTLC0_SI847
+MTLC0_SX127
+MTLC0_SX217
+MTLC0_SX307
+MTLC0_SX37
+MTLC0_SX397
+MTML0_SA1
+MTML0_SA2
+MTML0_SI1065
+MTML0_SI1695
+MTML0_SX255
+MTML0_SX345
+MTML0_SX75
+MTMN0_SA1
+MTMN0_SX164
+MTMN0_SX254
+MTMN0_SX344
+MTMN0_SX74
+MTMT0_SA1
+MTMT0_SI1118
+MTMT0_SX128
+MTMT0_SX218
+MTMT0_SX308
+MTMT0_SX38
+MTMT0_SX398
+MTPF0_SA1
+MTPF0_SA2
+MTPF0_SI1235
+MTPF0_SI1865
+MTPF0_SI605
+MTPF0_SX155
+MTPF0_SX245
+MTPF0_SX335
+MTPF0_SX425
+MTPG0_SA1
+MTPG0_SA2
+MTPG0_SI2013
+MTPG0_SX123
+MTPG0_SX213
+MTPG0_SX33
+MTPG0_SX393
+MTPP0_SA1
+MTPP0_SA2
+MTPP0_SI2138
+MTPP0_SI878
+MTPP0_SX158
+MTPP0_SX248
+MTPP0_SX428
+MTPP0_SX68
+MTPR0_SA1
+MTPR0_SA2
+MTPR0_SI1600
+MTPR0_SI506
+MTPR0_SX250
+MTPR0_SX70
+MTQC0_SA2
+MTQC0_SI2071
+MTQC0_SX271
+MTQC0_SX361
+MTRC0_SA1
+MTRC0_SA2
+MTRC0_SI1623
+MTRC0_SI993
+MTRC0_SX170
+MTRC0_SX183
+MTRC0_SX273
+MTRC0_SX363
+MTRC0_SX93
+MTRR0_SA1
+MTRR0_SA2
+MTRR0_SI1548
+MTRR0_SI2178
+MTRR0_SX108
+MTRR0_SX18
+MTRR0_SX378
+MTRT0_SA1
+MTRT0_SI1857
+MTRT0_SI597
+MTRT0_SX147
+MTRT0_SX237
+MTRT0_SX417
+MTWH1_SA1
+MTWH1_SA2
+MTWH1_SI1512
+MTWH1_SI2142
+MTWH1_SI882
+MTWH1_SX162
+MTWH1_SX252
+MTWH1_SX342
+MTWH1_SX432
+MTXS0_SI1690
+MTXS0_SX250
+MTXS0_SX340
+MTXS0_SX70
+MVJH0_SA1
+MVJH0_SA2
+MVJH0_SI2186
+MVJH0_SX116
+MVJH0_SX26
+MVJH0_SX386
+MVLO0_SA2
+MVLO0_SI1147
+MVLO0_SI1777
+MVLO0_SX157
+MVLO0_SX247
+MVLO0_SX337
+MVLO0_SX427
+MVLO0_SX67
+MVRW0_SA1
+MVRW0_SI1485
+MVRW0_SI2115
+MVRW0_SI855
+MVRW0_SX315
+MVRW0_SX405
+MVRW0_SX45
+MWAC0_SA1
+MWAC0_SI2231
+MWAC0_SI971
+MWAC0_SX71
+MWAD0_SA1
+MWAD0_SA2
+MWAD0_SI1062
+MWAD0_SI1749
+MWAD0_SI2322
+MWAD0_SX162
+MWAD0_SX252
+MWAD0_SX342
+MWAR0_SA2
+MWAR0_SI2305
+MWAR0_SX145
+MWAR0_SX235
+MWAR0_SX325
+MWAR0_SX415
+MWAR0_SX55
+MWCH0_SA1
+MWCH0_SA2
+MWCH0_SI1622
+MWCH0_SX272
+MWCH0_SX362
+MWCH0_SX92
+MWDK0_SX266
+MWDK0_SX356
+MWDK0_SX446
+MWEM0_SA1
+MWEM0_SI1950
+MWEM0_SX240
+MWEM0_SX330
+MWEM0_SX60
+MWGR0_SA1
+MWGR0_SA2
+MWGR0_SI1606
+MWGR0_SI2236
+MWGR0_SI976
+MWGR0_SX166
+MWGR0_SX256
+MWGR0_SX436
+MWGR0_SX76
+MWRE0_SA1
+MWRE0_SI1687
+MWRE0_SI2317
+MWRE0_SX157
+MWRP0_SA2
+MWRP0_SI1525
+MWRP0_SI2073
+MWRP0_SX183
+MWRP0_SX3
+MWRP0_SX93
+MWSB0_SA1
+MWSB0_SA2
+MWSB0_SI1626
+MWSB0_SI2256
+MWSB0_SX186
+MWSB0_SX366
+MWSB0_SX6
+MWSB0_SX96
+MWSH0_SA1
+MWSH0_SA2
+MWSH0_SI2266
+MWSH0_SX346
+MWSH0_SX436
+MZMB0_SA2
+MZMB0_SI1166
+MZMB0_SI1796
+MZMB0_SI536
+MZMB0_SX176
+MZMB0_SX266
+MZMB0_SX356
+MZMB0_SX446
+MZMB0_SX86
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train_text.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train_text.uid
new file mode 100644
index 0000000000000000000000000000000000000000..0e0c2517c9415ce76d5863781f621402cd15b911
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/train_text.uid
@@ -0,0 +1,1000 @@
+FAEM0_SI762
+FAEM0_SX42
+FAJW0_SA1
+FAJW0_SX3
+FAJW0_SX93
+FALK0_SX186
+FALK0_SX6
+FALR0_SI1325
+FBAS0_SA1
+FBAS0_SX217
+FBCG1_SA1
+FBCG1_SX172
+FBCG1_SX442
+FBCH0_SX236
+FBCH0_SX416
+FBLV0_SA1
+FBLV0_SI1058
+FBLV0_SX338
+FBLV0_SX68
+FBMH0_SA1
+FBMJ0_SI815
+FCAG0_SA1
+FCAG0_SX153
+FCAG0_SX243
+FCAJ0_SI1479
+FCAJ0_SX309
+FCDR1_SX106
+FCDR1_SX196
+FCEG0_SA2
+FCJF0_SA1
+FCJF0_SX127
+FCJS0_SI1607
+FCJS0_SI2237
+FCJS0_SX257
+FCKE0_SA2
+FCKE0_SX121
+FCLT0_SI2068
+FCLT0_SX448
+FCLT0_SX88
+FCMG0_SA2
+FCMG0_SI1872
+FCMG0_SX72
+FCMM0_SA1
+FCMM0_SA2
+FCMM0_SX183
+FCRZ0_SI2053
+FCRZ0_SX433
+FCYL0_SA1
+FCYL0_SX37
+FDAS1_SI2091
+FDAS1_SX201
+FDAS1_SX381
+FDAW0_SI1406
+FDFB0_SA1
+FDFB0_SA2
+FDFB0_SI2010
+FDFB0_SX58
+FDJH0_SX305
+FDML0_SA2
+FDML0_SX159
+FDML0_SX249
+FDML0_SX429
+FDMY0_SA2
+FDMY0_SX27
+FDNC0_SX198
+FDNC0_SX288
+FDTD0_SX211
+FDXW0_SA1
+FDXW0_SX251
+FDXW0_SX341
+FDXW0_SX71
+FEAC0_SX165
+FEAC0_SX75
+FEAR0_SI622
+FECD0_SX68
+FEEH0_SA1
+FEEH0_SI1742
+FEEH0_SI471
+FEEH0_SX122
+FEME0_SA1
+FEME0_SX155
+FEME0_SX65
+FETB0_SA1
+FETB0_SI1148
+FETB0_SX158
+FEXM0_SI1101
+FGCS0_SX136
+FGCS0_SX226
+FGCS0_SX316
+FGCS0_SX406
+FGDP0_SA1
+FGMB0_SI1775
+FGMB0_SX245
+FHLM0_SX390
+FHXS0_SA2
+FHXS0_SX445
+FJDM2_SA1
+FJDM2_SX232
+FJDM2_SX52
+FJHK0_SX302
+FJKL0_SX212
+FJKL0_SX392
+FJLG0_SI2306
+FJLR0_SA1
+FJRP1_SI2062
+FJRP1_SX82
+FJSK0_SA1
+FJSP0_SX264
+FJSP0_SX354
+FJSP0_SX444
+FJWB1_SA1
+FJWB1_SX345
+FJWB1_SX435
+FJXM0_SA1
+FJXM0_SI581
+FJXM0_SX401
+FJXP0_SA1
+FJXP0_SI1122
+FJXP0_SX132
+FKAA0_SX128
+FKAA0_SX398
+FKDE0_SA1
+FKDE0_SX151
+FKDE0_SX241
+FKDE0_SX421
+FKDE0_SX61
+FKDW0_SX397
+FKFB0_SA2
+FKFB0_SX348
+FKFB0_SX78
+FKKH0_SA1
+FKKH0_SA2
+FKKH0_SX120
+FKKH0_SX390
+FKLC0_SX355
+FKLC1_SI2308
+FKLC1_SX238
+FKLC1_SX328
+FKLC1_SX418
+FKLH0_SA2
+FKLH0_SX177
+FKSR0_SA1
+FKSR0_SA2
+FKSR0_SI1747
+FKSR0_SI487
+FKSR0_SX217
+FLAC0_SX451
+FLAG0_SA2
+FLAG0_SX114
+FLAG0_SX204
+FLAG0_SX24
+FLAG0_SX384
+FLEH0_SI1681
+FLEH0_SI2311
+FLEH0_SX331
+FLET0_SA1
+FLHD0_SI1827
+FLHD0_SX354
+FLJA0_SA1
+FLJA0_SI2338
+FLJD0_SI886
+FLJD0_SX76
+FLJG0_SA2
+FLKM0_SA2
+FLKM0_SI686
+FLKM0_SX260
+FLKM0_SX80
+FLMA0_SA1
+FLMA0_SI613
+FLMA0_SX433
+FLMA0_SX73
+FLMC0_SX22
+FLMK0_SI1035
+FLMK0_SX315
+FLMK0_SX405
+FLOD0_SI1917
+FLOD0_SX117
+FLOD0_SX171
+FLOD0_SX297
+FLTM0_SA1
+FLTM0_SI1070
+FLTM0_SI2330
+FMAH1_SA2
+FMAH1_SX159
+FMBG0_SA2
+FMBG0_SI2264
+FMEM0_SI747
+FMEM0_SX387
+FMJB0_SI547
+FMJB0_SX97
+FMJF0_SA2
+FMJU0_SX309
+FMJU0_SX399
+FMKC0_SI1702
+FMKC0_SX442
+FMKC0_SX82
+FMKF0_SX186
+FMPG0_SA2
+FNKL0_SI1522
+FNTB0_SI1203
+FNTB0_SI573
+FNTB0_SX303
+FPAB1_SI1471
+FPAB1_SX211
+FPAC0_SA2
+FPAD0_SA2
+FPAD0_SX356
+FPAD0_SX86
+FPAF0_SA2
+FPAF0_SX154
+FPAZ0_SA1
+FPAZ0_SA2
+FPAZ0_SX243
+FPJF0_SA1
+FPJF0_SX146
+FPJF0_SX56
+FPLS0_SI1590
+FPLS0_SX330
+FPMY0_SA1
+FPMY0_SX343
+FREH0_SA1
+FREH0_SA2
+FREH0_SX415
+FRJB0_SX347
+FRLL0_SX434
+FSAG0_SA1
+FSAG0_SX243
+FSAH0_SA1
+FSAH0_SA2
+FSAH0_SX164
+FSAH0_SX434
+FSBK0_SA2
+FSBK0_SI1069
+FSBK0_SX169
+FSCN0_SA2
+FSCN0_SI626
+FSCN0_SX266
+FSCN0_SX446
+FSCN0_SX86
+FSDC0_SA2
+FSDC0_SX142
+FSDC0_SX322
+FSDC0_SX52
+FSDJ0_SI485
+FSDJ0_SX215
+FSDJ0_SX305
+FSDJ0_SX395
+FSGF0_SX117
+FSJG0_SX130
+FSJK1_SA2
+FSJK1_SX125
+FSJK1_SX35
+FSJS0_SX181
+FSJW0_SI1963
+FSJW0_SX433
+FSKC0_SI1416
+FSKC0_SI786
+FSKC0_SX246
+FSKL0_SI1529
+FSKL0_SX449
+FSKP0_SA2
+FSLS0_SX156
+FSLS0_SX426
+FSMA0_SA2
+FSMA0_SX181
+FSMM0_SX144
+FSMM0_SX234
+FSMS1_SX244
+FSMS1_SX347
+FSPM0_SA2
+FSPM0_SX161
+FSPM0_SX71
+FSRH0_SI1931
+FSRH0_SI671
+FSRH0_SX221
+FSRH0_SX401
+FTAJ0_SI699
+FTAJ0_SX159
+FTAJ0_SX249
+FTAJ0_SX429
+FTBR0_SX21
+FTBW0_SA1
+FTMG0_SI1532
+FTMG0_SI2162
+FTMG0_SX452
+FVFB0_SA2
+FVFB0_SX132
+FVFB0_SX42
+FVKB0_SA1
+FVMH0_SA2
+FVMH0_SX116
+FVMH0_SX26
+MABC0_SI1620
+MABC0_SI2041
+MABC0_SI781
+MADC0_SX107
+MADC0_SX377
+MADD0_SA2
+MADD0_SI1295
+MADD0_SX178
+MADD0_SX268
+MADD0_SX88
+MAEB0_SX450
+MAEO0_SA1
+MAFM0_SI939
+MAFM0_SX129
+MAFM0_SX309
+MAJP0_SA2
+MAKB0_SI1646
+MAKB0_SX26
+MAKB0_SX386
+MAKR0_SX362
+MAKR0_SX92
+MAPV0_SX213
+MARC0_SA2
+MARC0_SX108
+MARC0_SX18
+MARC0_SX198
+MARW0_SI1906
+MBAR0_SA1
+MBAR0_SX419
+MBAR0_SX59
+MBBR0_SI2315
+MBBR0_SX65
+MBCG0_SA1
+MBCG0_SI486
+MBEF0_SI1281
+MBEF0_SI1911
+MBEF0_SI651
+MBEF0_SX21
+MBEF0_SX381
+MBGT0_SA2
+MBGT0_SX261
+MBGT0_SX351
+MBGT0_SX441
+MBJV0_SA1
+MBJV0_SI617
+MBJV0_SX347
+MBMA0_SI592
+MBMA0_SX232
+MBMA0_SX52
+MBMA1_SI2214
+MBMA1_SX54
+MBML0_SA2
+MBML0_SI1169
+MBML0_SX89
+MBOM0_SA2
+MBOM0_SI2274
+MBOM0_SX294
+MBSB0_SA1
+MBSB0_SX3
+MBTH0_SA2
+MBTH0_SX122
+MBTH0_SX32
+MCAE0_SX277
+MCAL0_SA2
+MCAL0_SI1768
+MCDC0_SA1
+MCDC0_SX212
+MCDD0_SA2
+MCDD0_SI883
+MCDD0_SX253
+MCDD0_SX433
+MCDR0_SI1154
+MCEF0_SX235
+MCEF0_SX415
+MCEW0_SA2
+MCHL0_SX87
+MCLK0_SX310
+MCLM0_SA1
+MCLM0_SI2086
+MCLM0_SI826
+MCPM0_SA1
+MCPM0_SX114
+MCPM0_SX294
+MCPM0_SX384
+MCSS0_SI750
+MCTH0_SA1
+MCTH0_SX39
+MCXM0_SX91
+MDAC0_SA1
+MDAC0_SX181
+MDAC0_SX361
+MDAS0_SX6
+MDBB1_SX106
+MDBB1_SX16
+MDBB1_SX376
+MDBP0_SX168
+MDCD0_SI1415
+MDCD0_SX245
+MDCD0_SX425
+MDCM0_SX40
+MDCM0_SX400
+MDDC0_SI2049
+MDDC0_SI789
+MDDC0_SX159
+MDDC0_SX69
+MDED0_SA1
+MDED0_SA2
+MDEF0_SX123
+MDEF0_SX303
+MDHL0_SI1439
+MDHL0_SX269
+MDHL0_SX449
+MDHS0_SA1
+MDHS0_SA2
+MDHS0_SI1530
+MDHS0_SI2160
+MDJM0_SX105
+MDJM0_SX15
+MDKS0_SX436
+MDLB0_SA2
+MDLC0_SX405
+MDLC1_SA2
+MDLC1_SI2065
+MDLC1_SI2144
+MDLC1_SX445
+MDLC2_SI2244
+MDLC2_SX354
+MDLH0_SA2
+MDLM0_SI1234
+MDLM0_SI1864
+MDLM0_SX154
+MDLM0_SX424
+MDLR0_SA1
+MDLR0_SA2
+MDLR0_SI1863
+MDLR0_SI603
+MDLR0_SX153
+MDLR1_SA1
+MDLR1_SA2
+MDMA0_SI1430
+MDMA0_SX260
+MDMA0_SX80
+MDMT0_SA1
+MDMT0_SA2
+MDMT0_SI1832
+MDMT0_SX122
+MDMT0_SX32
+MDNS0_SA2
+MDNS0_SI2271
+MDNS0_SX201
+MDNS0_SX21
+MDPB0_SX416
+MDPK0_SI1053
+MDPK0_SX333
+MDPK0_SX423
+MDPS0_SI719
+MDPS0_SX359
+MDRD0_SA1
+MDRD0_SX32
+MDSJ0_SI2092
+MDSS0_SA2
+MDSS0_SX441
+MDSS1_SA1
+MDSS1_SI1327
+MDSS1_SI697
+MDSS1_SX157
+MDSS1_SX67
+MDTB0_SI1200
+MDTB0_SI1830
+MDTB0_SX120
+MDWD0_SA2
+MDWD0_SX270
+MDWD0_SX90
+MDWH0_SX215
+MDWH0_SX305
+MDWM0_SA1
+MDWM0_SA2
+MDWM0_SX16
+MDWM0_SX286
+MEAL0_SA2
+MEAL0_SI2177
+MEAL0_SX107
+MEAL0_SX347
+MEDR0_SA1
+MEDR0_SA2
+MEDR0_SI1374
+MEFG0_SA1
+MEGJ0_SA2
+MEGJ0_SX257
+MEGJ0_SX3
+MEJL0_SA1
+MEJL0_SX152
+MEJL0_SX242
+MEJS0_SI610
+MEJS0_SX160
+MEJS0_SX340
+MESG0_SX432
+MESJ0_SX187
+MESJ0_SX97
+MEWM0_SI718
+MEWM0_SX178
+MEWM0_SX88
+MFER0_SI862
+MFER0_SX142
+MFRM0_SX345
+MFRM0_SX435
+MFWK0_SI1879
+MFWK0_SX169
+MFXS0_SX54
+MFXV0_SA2
+MFXV0_SX105
+MGAF0_SA1
+MGAF0_SX22
+MGAF0_SX382
+MGAG0_SA2
+MGAK0_SX226
+MGAK0_SX46
+MGAR0_SX132
+MGAW0_SI535
+MGAW0_SX175
+MGES0_SA1
+MGES0_SI2111
+MGES0_SI851
+MGJC0_SA2
+MGJC0_SX75
+MGRL0_SI2127
+MGRL0_SI867
+MGRL0_SX147
+MGRP0_SA2
+MGSH0_SA2
+MGSH0_SI1806
+MGSH0_SX127
+MGSH0_SX276
+MGSH0_SX6
+MGSL0_SA1
+MGSL0_SI534
+MGSL0_SX264
+MGXP0_SX187
+MGXP0_SX7
+MHBS0_SX315
+MHBS0_SX45
+MHIT0_SA1
+MHJB0_SA1
+MHJB0_SI1017
+MHMG0_SX195
+MHMR0_SA1
+MHMR0_SI489
+MHRM0_SA1
+MHRM0_SI958
+MHRM0_SX148
+MHRM0_SX58
+MHXL0_SI1772
+MHXL0_SX242
+MILB0_SA2
+MJAC0_SX307
+MJAC0_SX71
+MJAE0_SX174
+MJAI0_SA1
+MJAI0_SA2
+MJBG0_SX62
+MJDA0_SI1031
+MJDA0_SX311
+MJDE0_SI463
+MJDG0_SA2
+MJDG0_SI1042
+MJDG0_SI1705
+MJDM0_SA1
+MJDM0_SI974
+MJEB0_SI656
+MJEB0_SX296
+MJEB1_SA2
+MJEB1_SX207
+MJEB1_SX387
+MJEE0_SA1
+MJEE0_SX247
+MJEE0_SX337
+MJFH0_SA2
+MJFH0_SI1107
+MJFR0_SX75
+MJHI0_SA1
+MJHI0_SX158
+MJJB0_SA1
+MJJB0_SX239
+MJJJ0_SX443
+MJJM0_SA2
+MJJM0_SI827
+MJJM0_SX107
+MJKR0_SA1
+MJKR0_SI571
+MJLB0_SX176
+MJLG1_SX292
+MJLS0_SX106
+MJMA0_SA1
+MJMA0_SA2
+MJMD0_SA2
+MJMD0_SX308
+MJMD0_SX38
+MJMM0_SX85
+MJPG0_SI1191
+MJPG0_SX111
+MJPG0_SX201
+MJPG0_SX21
+MJPM0_SA2
+MJPM0_SX378
+MJPM1_SI2280
+MJPM1_SX401
+MJRA0_SA1
+MJRA0_SA2
+MJRA0_SI1236
+MJRA0_SI1866
+MJRA0_SX426
+MJRG0_SI1366
+MJRG0_SI1996
+MJRG0_SX376
+MJRH0_SX225
+MJRH1_SA1
+MJRH1_SI514
+MJRH1_SX154
+MJRH1_SX244
+MJRH1_SX424
+MJRK0_SA1
+MJRK0_SA2
+MJRK0_SI1662
+MJRK0_SX160
+MJRK0_SX250
+MJRK0_SX430
+MJRP0_SA1
+MJRP0_SA2
+MJRP0_SX225
+MJSR0_SA1
+MJSR0_SI1424
+MJSR0_SX344
+MJWG0_SA1
+MJWG0_SX265
+MJWS0_SI513
+MJWS0_SX153
+MJWS0_SX63
+MJWT0_SA1
+MJWT0_SX121
+MJWT0_SX211
+MJWT0_SX301
+MJWT0_SX31
+MJWT0_SX391
+MJXA0_SX427
+MJXL0_SI542
+MKAG0_SA1
+MKAG0_SX259
+MKAJ0_SA2
+MKAJ0_SX154
+MKAM0_SA1
+MKAM0_SX146
+MKAM0_SX326
+MKAM0_SX56
+MKDB0_SA1
+MKDB0_SA2
+MKDB0_SX152
+MKDD0_SA2
+MKES0_SA1
+MKES0_SI1253
+MKES0_SI1883
+MKES0_SX173
+MKJO0_SI1517
+MKJO0_SI887
+MKJO0_SX437
+MKLN0_SI968
+MKLN0_SX248
+MKLR0_SA2
+MKLR0_SI1689
+MKLS0_SA1
+MKLS0_SX357
+MKLS0_SX87
+MKLS1_SA1
+MKLS1_SA2
+MKLS1_SX375
+MKLW0_SA1
+MKRG0_SX411
+MKXL0_SA2
+MKXL0_SX15
+MKXL0_SX375
+MLBC0_SA1
+MLBC0_SI1869
+MLBC0_SX249
+MLEL0_SA1
+MLEL0_SA2
+MLEL0_SI1246
+MLEL0_SX256
+MLEL0_SX436
+MLJC0_SX145
+MLJC0_SX415
+MLJH0_SX64
+MLNS0_SI2037
+MMAA0_SA1
+MMAA0_SA2
+MMAA0_SX35
+MMAB1_SI1494
+MMAB1_SX234
+MMAG0_SA2
+MMAG0_SI1126
+MMAG0_SX316
+MMAM0_SI2227
+MMAM0_SX157
+MMAM0_SX427
+MMAR0_SX256
+MMBS0_SI1781
+MMCC0_SA2
+MMDB0_SX177
+MMDG0_SA1
+MMDG0_SA2
+MMDG0_SI520
+MMDG0_SX160
+MMDG0_SX250
+MMDM0_SI1941
+MMDM0_SI681
+MMDM0_SX141
+MMDM1_SA2
+MMDM1_SI2043
+MMDM1_SX423
+MMDM1_SX63
+MMDS0_SA1
+MMEA0_SA1
+MMEA0_SX128
+MMEA0_SX398
+MMEB0_SA2
+MMEB0_SX187
+MMEB0_SX367
+MMGC0_SA2
+MMGC0_SX135
+MMGC0_SX225
+MMGG0_SX269
+MMGK0_SX332
+MMGK0_SX62
+MMJB1_SA2
+MMRP0_SA2
+MMRP0_SX144
+MMSM0_SX116
+MMSM0_SX206
+MMVP0_SA1
+MMVP0_SA2
+MMWB0_SI989
+MMWB0_SX89
+MMWS0_SA2
+MMWS0_SX168
+MMWS0_SX348
+MMWS0_SX438
+MMWS1_SI1701
+MMXS0_SI2136
+MMXS0_SX246
+MMXS0_SX426
+MNET0_SI816
+MNET0_SX6
+MNTW0_SA2
+MNTW0_SX168
+MNTW0_SX78
+MPAR0_SI2206
+MPAR0_SI946
+MPAR0_SX136
+MPAR0_SX316
+MPEB0_SI1034
+MPEB0_SI1860
+MPEB0_SX240
+MPEB0_SX330
+MPFU0_SI628
+MPFU0_SX448
+MPGH0_SX114
+MPGH0_SX24
+MPGR0_SX240
+MPGR0_SX330
+MPGR1_SX149
+MPPC0_SA1
+MPRD0_SA1
+MPRD0_SX261
+MPRD0_SX351
+MPRD0_SX441
+MPRD0_SX81
+MPRK0_SI1727
+MPRK0_SX107
+MPRK0_SX377
+MPRT0_SA1
+MPRT0_SX310
+MPSW0_SI1067
+MPSW0_SX167
+MPSW0_SX437
+MRAB1_SX128
+MRAB1_SX308
+MRAI0_SA1
+MRAI0_SA2
+MRAI0_SX72
+MRAM0_SA1
+MRAM0_SA2
+MRAM0_SX15
+MRBC0_SI1859
+MRBC0_SX329
+MRBC0_SX419
+MRCG0_SI798
+MRCG0_SX168
+MRCW0_SA1
+MRCW0_SX291
+MRDD0_SI1680
+MRDD0_SX150
+MRDD0_SX277
+MRDD0_SX60
+MRDM0_SI1595
+MRDM0_SX65
+MRDS0_SA1
+MREE0_SX24
+MREH1_SX249
+MREH1_SX69
+MREM0_SA2
+MREW1_SI870
+MRFK0_SX446
+MRFL0_SA1
+MRFL0_SX256
+MRFL0_SX436
+MRFL0_SX76
+MRGM0_SA2
+MRGM0_SX262
+MRGS0_SA2
+MRGS0_SX186
+MRHL0_SI885
+MRHL0_SX345
+MRHL0_SX435
+MRJB1_SA1
+MRJB1_SA2
+MRJB1_SX210
+MRJB1_SX30
+MRJB1_SX390
+MRJH0_SA2
+MRJH0_SX307
+MRJH0_SX79
+MRJM0_SX148
+MRJM1_SA2
+MRJM1_SI1298
+MRJM1_SI1928
+MRJM1_SX128
+MRJT0_SA2
+MRJT0_SI1498
+MRJT0_SX328
+MRJT0_SX418
+MRKM0_SA2
+MRKM0_SX367
+MRLD0_SA2
+MRLD0_SI2224
+MRLD0_SX154
+MRLD0_SX424
+MRLJ0_SA1
+MRLJ0_SX250
+MRLJ0_SX340
+MRLJ1_SA1
+MRLJ1_SA2
+MRLJ1_SX321
+MRLK0_SI843
+MRLK0_SX123
+MRLK0_SX213
+MRMB0_SA2
+MRMB0_SI1581
+MRMB0_SX411
+MRMG0_SA1
+MRMG0_SI1080
+MRMG0_SX450
+MRMH0_SI1349
+MRMH0_SI2281
+MRMH0_SX121
+MRML0_SA2
+MRML0_SX341
+MRPC1_SI2112
+MRRE0_SA2
+MRRE0_SX164
+MRRE0_SX344
+MRRE0_SX74
+MRSO0_SX129
+MRSO0_SX39
+MRSP0_SX259
+MRTC0_SX378
+MRVG0_SI1140
+MRVG0_SX240
+MRWA0_SI973
+MRWA0_SX163
+MRWA0_SX73
+MRWS0_SI1732
+MRWS0_SI472
+MRWS0_SX22
+MRWS0_SX382
+MRXB0_SA2
+MRXB0_SX415
+MSAH1_SI1679
+MSAS0_SX116
+MSAS0_SX206
+MSAS0_SX386
+MSAT0_SA1
+MSAT1_SX263
+MSAT1_SX443
+MSAT1_SX83
+MSDB0_SX197
+MSDB0_SX287
+MSDB0_SX377
+MSDH0_SI2240
+MSDH0_SX440
+MSDH0_SX80
+MSDS0_SA1
+MSEM1_SI1440
+MSEM1_SX180
+MSEM1_SX270
+MSES0_SI1589
+MSES0_SX239
+MSES0_SX419
+MSFH0_SX316
+MSFV0_SI1892
+MSFV0_SX362
+MSFV0_SX92
+MSMR0_SX415
+MSMS0_SA1
+MSMS0_SX173
+MSMS0_SX83
+MSRG0_SA1
+MSRG0_SI1221
+MSTF0_SI766
+MSTF0_SX316
+MSTF0_SX46
+MSVS0_SA2
+MSVS0_SX308
+MTAS0_SX215
+MTAS0_SX35
+MTAS0_SX395
+MTAT0_SX390
+MTAT1_SX59
+MTBC0_SI1803
+MTCS0_SA2
+MTCS0_SI2265
+MTCS0_SX82
+MTDP0_SA2
+MTER0_SA2
+MTER0_SI1787
+MTJG0_SA1
+MTJG0_SI2157
+MTJG0_SX260
+MTJM0_SI1856
+MTJM0_SX146
+MTJU0_SX130
+MTJU0_SX400
+MTKD0_SX107
+MTKD0_SX287
+MTKP0_SI1023
+MTLB0_SA1
+MTLB0_SX234
+MTLC0_SA1
+MTML0_SI2325
+MTML0_SX165
+MTMN0_SA2
+MTMN0_SI1064
+MTMN0_SI2324
+MTMN0_SX434
+MTMT0_SA2
+MTMT0_SI1748
+MTPF0_SX65
+MTPG0_SI1383
+MTPG0_SI753
+MTPG0_SX303
+MTPP0_SX338
+MTPR0_SX340
+MTQC0_SI480
+MTQC0_SX91
+MTRR0_SX198
+MTRR0_SX288
+MTRT0_SA2
+MTRT0_SX254
+MTRT0_SX57
+MTWH1_SX72
+MTXS0_SA1
+MTXS0_SA2
+MVJH0_SI926
+MVJH0_SX206
+MVJH0_SX296
+MVLO0_SA1
+MVRW0_SA2
+MVRW0_SX135
+MVRW0_SX225
+MWAC0_SA2
+MWAC0_SX341
+MWAC0_SX431
+MWAD0_SX432
+MWAD0_SX72
+MWAR0_SA1
+MWAR0_SI1675
+MWCH0_SI1895
+MWCH0_SI2252
+MWCH0_SX182
+MWCH0_SX452
+MWDK0_SA1
+MWDK0_SA2
+MWDK0_SI2017
+MWDK0_SI806
+MWDK0_SX176
+MWDK0_SX86
+MWEM0_SA2
+MWEM0_SI1320
+MWEM0_SI1393
+MWEM0_SX150
+MWGR0_SX346
+MWRE0_SX247
+MWRE0_SX337
+MWRE0_SX427
+MWRP0_SA1
+MWRP0_SX273
+MWRP0_SX363
+MWSB0_SX276
+MWSH0_SX256
+MWSH0_SX76
+MZMB0_SA1
diff --git a/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/valid.uid b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/valid.uid
new file mode 100644
index 0000000000000000000000000000000000000000..e99edfe937854a5f47a2f0384f0e067487336883
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/config/timit_unmatched/valid.uid
@@ -0,0 +1,620 @@
+FAEM0_SI1392
+FAJW0_SI1263
+FAJW0_SI633
+FALK0_SI658
+FALR0_SX335
+FAPB0_SI1063
+FAPB0_SI2323
+FAPB0_SX433
+FBAS0_SI1472
+FBAS0_SI2066
+FBCG1_SX352
+FBCH0_SI959
+FBJL0_SI922
+FBLV0_SI1688
+FBMH0_SI1136
+FBMH0_SI970
+FBMJ0_SA1
+FBMJ0_SI1776
+FBMJ0_SI516
+FBMJ0_SX336
+FCDR1_SI1186
+FCDR1_SI1816
+FCDR1_SI556
+FCDR1_SX286
+FCKE0_SI1741
+FCKE0_SI481
+FCLT0_SI808
+FCMG0_SI1142
+FCMG0_SX432
+FCMM0_SI1957
+FCMM0_SX420
+FCYL0_SI667
+FCYL0_SX349
+FDAS1_SI1461
+FDAS1_SI831
+FDAW0_SI1271
+FDAW0_SI2036
+FDJH0_SI935
+FDKN0_SI1202
+FDKN0_SX181
+FDKN0_SX451
+FDMY0_SA1
+FDMY0_SI567
+FDMY0_SI714
+FDMY0_SX387
+FDNC0_SI1278
+FDNC0_SI1908
+FDTD0_SA1
+FDTD0_SX321
+FEAC0_SI615
+FEAR0_SX352
+FECD0_SA1
+FECD0_SI1418
+FECD0_SI788
+FEME0_SI875
+FEME0_SX335
+FEXM0_SA1
+FEXM0_SI482
+FEXM0_SX366
+FGDP0_SI988
+FGDP0_SX88
+FGMB0_SI1145
+FGMB0_SX335
+FGRW0_SA1
+FGRW0_SI1152
+FGRW0_SX162
+FGRW0_SX432
+FHLM0_SX120
+FHLM0_SX349
+FHXS0_SA1
+FHXS0_SI1075
+FHXS0_SI2302
+FHXS0_SX175
+FJDM2_SA2
+FJDM2_SX142
+FJEN0_SA1
+FJEN0_SX327
+FJEN0_SX417
+FJHK0_SI2282
+FJKL0_SI932
+FJLG0_SI1889
+FJLR0_SI1231
+FJRB0_SX402
+FJRP1_SA1
+FJRP1_SI1432
+FJRP1_SX262
+FJRP1_SX352
+FJSK0_SI1052
+FJSP0_SI1434
+FJWB1_SI748
+FJXM0_SX311
+FJXM0_SX41
+FJXP0_SI1752
+FKAA0_SA1
+FKDE0_SI1141
+FKDE0_SI1771
+FKDW0_SI1207
+FKDW0_SI1891
+FKFB0_SI1608
+FKFB0_SX438
+FKKH0_SI1290
+FKKH0_SI1920
+FKLC0_SI985
+FKLC0_SX175
+FKLC1_SI1048
+FKLH0_SI1257
+FKSR0_SX366
+FLAC0_SI1339
+FLAG0_SI1464
+FLAG0_SI834
+FLEH0_SI1051
+FLET0_SI507
+FLJA0_SI1078
+FLJA0_SX178
+FLJD0_SI1516
+FLJG0_SI981
+FLJG0_SX171
+FLJG0_SX351
+FLKM0_SA1
+FLKM0_SI620
+FLKM0_SX350
+FLKM0_SX440
+FLMC0_SI1372
+FLMK0_SA1
+FLMK0_SI1229
+FLTM0_SX170
+FLTM0_SX350
+FLTM0_SX440
+FMAH1_SI879
+FMBG0_SI1160
+FMEM0_SA1
+FMEM0_SX333
+FMJB0_SI1177
+FMJF0_SI624
+FMJF0_SX174
+FMJF0_SX84
+FMJU0_SI1389
+FMKC0_SI1041
+FMKF0_SI1018
+FMPG0_SA1
+FMPG0_SI972
+FMPG0_SX162
+FMPG0_SX342
+FMPG0_SX432
+FNKL0_SI892
+FNTB0_SI679
+FPAB1_SA1
+FPAB1_SI2101
+FPAB1_SI841
+FPAC0_SI1921
+FPAC0_SI661
+FPAD0_SI716
+FPAD0_SX176
+FPAF0_SA1
+FPAF0_SI1054
+FPAZ0_SI2223
+FPAZ0_SI963
+FPJF0_SI1259
+FPJF0_SX352
+FPLS0_SI960
+FPMY0_SI1153
+FPMY0_SI523
+FREH0_SI1945
+FRLL0_SI805
+FSAG0_SI1323
+FSAG0_SX153
+FSAG0_SX333
+FSAG0_SX423
+FSAH0_SI614
+FSAH0_SX327
+FSAK0_SI1300
+FSBK0_SX349
+FSCN0_SA1
+FSCN0_SI705
+FSCN0_SX176
+FSDC0_SI1312
+FSDJ0_SI1115
+FSGF0_SI2187
+FSGF0_SI927
+FSJG0_SA1
+FSJG0_SA2
+FSJG0_SI940
+FSJG0_SX220
+FSJG0_SX40
+FSJG0_SX400
+FSJS0_SA1
+FSJS0_SX451
+FSJW0_SI1333
+FSKP0_SI1098
+FSMA0_SI991
+FSMA0_SX451
+FSMM0_SX324
+FSPM0_SI1241
+FSPM0_SX251
+FSRH0_SX311
+FSSB0_SI1712
+FSSB0_SX362
+FTBR0_SI1402
+FTBR0_SI921
+FTBW0_SI715
+FTBW0_SX175
+FTLG0_SI1743
+FTLG0_SI483
+FTMG0_SI902
+FVFB0_SI1510
+FVKB0_SX349
+FVMH0_SI1466
+FVMH0_SI836
+MADC0_SI1367
+MADC0_SI737
+MAEB0_SI1411
+MAEO0_SI1326
+MAJP0_SI1704
+MAJP0_SX174
+MAKB0_SA2
+MAKB0_SI1016
+MAKB0_SI2276
+MAKB0_SX116
+MAPV0_SI1293
+MAPV0_SI663
+MARW0_SX286
+MARW0_SX349
+MBBR0_SI1055
+MBBR0_SX335
+MBCG0_SI957
+MBCG0_SX327
+MBGT0_SI1841
+MBGT0_SX171
+MBMA0_SI1222
+MBMA1_SI954
+MBMA1_SX324
+MBTH0_SI2102
+MBWP0_SX349
+MCAE0_SI1447
+MCAE0_SI2077
+MCAE0_SI817
+MCAL0_SI1138
+MCDR0_SI1784
+MCDR0_SI524
+MCEF0_SI842
+MCEW0_SA1
+MCEW0_SI2072
+MCEW0_SI812
+MCEW0_SX362
+MCEW0_SX452
+MCHL0_SI1347
+MCHL0_SI1404
+MCLK0_SI2290
+MCLK0_SI650
+MCPM0_SI1824
+MCSS0_SI1380
+MCSS0_SI688
+MCTM0_SI1350
+MCTM0_SI1980
+MDAC0_SI631
+MDAS0_SI1896
+MDAS0_SI636
+MDBP0_SI528
+MDBP0_SX438
+MDCD0_SI785
+MDCD0_SX335
+MDCM0_SI1480
+MDDC0_SI1419
+MDED0_SI540
+MDEF0_SI1123
+MDEM0_SA1
+MDEM0_SI608
+MDEM0_SI800
+MDEM0_SX428
+MDHS0_SI900
+MDJM0_SI1455
+MDKS0_SX166
+MDKS0_SX346
+MDLB0_SI1306
+MDLB0_SX136
+MDLB0_SX406
+MDLC0_SI1395
+MDLC0_SI2025
+MDLC1_SI1435
+MDLH0_SX160
+MDLH0_SX430
+MDLM0_SI604
+MDLR0_SX333
+MDLR1_SI669
+MDMA0_SX170
+MDMA0_SX350
+MDMA0_SX440
+MDNS0_SI1011
+MDNS0_SI873
+MDPB0_SI1760
+MDPB0_SI866
+MDRD0_SI752
+MDSJ0_SI1462
+MDSJ0_SX438
+MDWD0_SI1260
+MDWH0_SA1
+MDWH0_SI1168
+MDWH0_SI665
+MDWM0_SI916
+MEDR0_SI2004
+MEFG0_SI491
+MEFG0_SI598
+MEGJ0_SA1
+MEGJ0_SI1337
+MEGJ0_SI707
+MEGJ0_SX167
+MEJS0_SI1240
+MESG0_SI702
+MESJ0_SI2039
+MFWK0_SX349
+MFXS0_SX324
+MFXV0_SI1005
+MFXV0_SI1342
+MGAF0_SI1282
+MGAG0_SI691
+MGAK0_SI1036
+MGAK0_SX136
+MGAR0_SX312
+MGAW0_SI1165
+MGES0_SX311
+MGJC0_SX435
+MGRL0_SX327
+MGRP0_SI1317
+MGRP0_SX327
+MGSH0_SI1176
+MGSH0_SI546
+MGSL0_SI797
+MGXP0_SI1087
+MGXP0_SI525
+MHBS0_SI945
+MHIT0_SI983
+MHMG0_SI735
+MHMR0_SI1692
+MILB0_SI903
+MJAC0_SI701
+MJAC0_SX251
+MJAE0_SX84
+MJAI0_SI682
+MJAI0_SI710
+MJDC0_SI531
+MJDE0_SA1
+MJDE0_SI1120
+MJDE0_SI490
+MJDE0_SX220
+MJDM0_SI1340
+MJDM0_SX170
+MJDM0_SX350
+MJEB0_SX170
+MJEB1_SI1467
+MJEB1_SI837
+MJFR0_SA1
+MJFR0_SX435
+MJHI0_SI1328
+MJJJ0_SI1163
+MJJM0_SI1251
+MJLB0_SI1616
+MJLS0_SI1726
+MJMA0_SI2125
+MJMD0_SI2288
+MJMM0_SI1255
+MJMM0_SX175
+MJPG0_SI1821
+MJPM0_SI1368
+MJPM1_SX311
+MJRA0_SX336
+MJRG0_SI736
+MJRG0_SX352
+MJRH0_SI1840
+MJRH1_SI1558
+MJRK0_SI880
+MJRP0_SI1845
+MJSR0_SI2054
+MJSR0_SI794
+MJWG0_SI813
+MJWG0_SI895
+MJWG0_SX175
+MJWS0_SX333
+MJWT0_SI1291
+MJWT0_SI1381
+MJXL0_SI1172
+MKAG0_SI979
+MKAH0_SX178
+MKAM0_SI1250
+MKAM0_SI1465
+MKDD0_SI1567
+MKDD0_SI2197
+MKDD0_SI937
+MKDT0_SI814
+MKES0_SI623
+MKLS0_SI1437
+MKLS0_SI2067
+MKLS1_SI915
+MKLW0_SI1571
+MKLW0_SX311
+MKRG0_SI861
+MKXL0_SI1815
+MKXL0_SI1958
+MLBC0_SI1239
+MLEL0_SI616
+MLEL0_SX166
+MLJC0_SI1225
+MLJH0_SA1
+MLJH0_SA2
+MLJH0_SI1422
+MLJH0_SI694
+MLJH0_SX244
+MLSH0_SI1417
+MLSH0_SX247
+MMAA0_SI1588
+MMAA0_SI845
+MMAB1_SI864
+MMAB1_SX324
+MMAG0_SA1
+MMAG0_SI1756
+MMAG0_SX136
+MMAR0_SI1966
+MMAR0_SX166
+MMAR0_SX346
+MMBS0_SI521
+MMBS0_SX161
+MMCC0_SI1338
+MMDB0_SI987
+MMDG0_SI1780
+MMDM0_SI1311
+MMDM1_SX153
+MMDM1_SX333
+MMEB0_SX327
+MMGC0_SI1305
+MMGG0_SI1079
+MMGG0_SX449
+MMLM0_SI2150
+MMPM0_SX161
+MMRP0_SX324
+MMSM0_SI1106
+MMSM0_SI476
+MMVP0_SI654
+MMVP0_SX347
+MMWB0_SA1
+MMWB0_SI2249
+MMWB0_SX359
+MMWB0_SX449
+MNTW0_SI1068
+MNTW0_SI1698
+MPEB0_SI600
+MPFU0_SI1258
+MPGH0_SI675
+MPGR0_SI1410
+MPGR1_SI1499
+MPMB0_SA1
+MPMB0_SA2
+MPMB0_SI1501
+MPMB0_SI2131
+MPMB0_SI871
+MPMB0_SX151
+MPMB0_SX331
+MPMB0_SX421
+MPMB0_SX61
+MPPC0_SI1412
+MPRB0_SI1215
+MPRB0_SI575
+MPRD0_SI801
+MPRD0_SX171
+MPRK0_SA1
+MPRK0_SI1097
+MPRK0_SI467
+MPRK0_SX287
+MRAB0_SI1854
+MRAB1_SI848
+MRAI0_SI2052
+MRAI0_SI792
+MRAI0_SX432
+MRAM0_SI1951
+MRCG0_SA2
+MRCG0_SI1428
+MRCG0_SX348
+MRCG0_SX438
+MRCW0_SI741
+MRDM0_SI1044
+MRDM0_SX335
+MREE0_SI1104
+MREE0_SI1959
+MREH1_SA1
+MREH1_SI1599
+MREH1_SI969
+MREM0_SI511
+MRFK0_SI1076
+MRFL0_SI1156
+MRFL0_SI526
+MRFL0_SX166
+MRGM0_SI532
+MRGM0_SX172
+MRGM0_SX442
+MRGS0_SI1356
+MRGS0_SI726
+MRGS0_SX6
+MRJB1_SI1413
+MRJB1_SI2021
+MRJB1_SX120
+MRJH0_SI1519
+MRJH0_SI889
+MRJH0_SX169
+MRJT0_SI868
+MRJT0_SX58
+MRKM0_SI1267
+MRKM0_SI1391
+MRKM0_SI637
+MRLJ0_SI790
+MRLJ1_SI2301
+MRLK0_SI1468
+MRLR0_SI1196
+MRML0_SA1
+MRML0_SI1421
+MRML0_SX161
+MRML0_SX251
+MRMS0_SI2057
+MRRE0_SA1
+MRRE0_SI1334
+MRRE0_SI952
+MRSO0_SI1206
+MRSP0_SI1429
+MRTC0_SI1458
+MRTJ0_SA1
+MRTJ0_SI772
+MRTJ0_SX142
+MRTJ0_SX232
+MRTJ0_SX52
+MRWS0_SI1102
+MRXB0_SI2215
+MRXB0_SI955
+MSAS0_SI1376
+MSAS0_SI746
+MSDH0_SI980
+MSDH0_SX170
+MSDS0_SI1077
+MSDS0_SX267
+MSDS0_SX357
+MSEM1_SI2070
+MSEM1_SI810
+MSFH0_SA1
+MSFH0_SI1738
+MSFH0_SX136
+MSFH0_SX406
+MSFV0_SI632
+MSJK0_SI1596
+MSJK0_SX336
+MSMC0_SI509
+MSMR0_SI1150
+MSMS0_SI1433
+MSRR0_SI1761
+MSRR0_SI501
+MSTF0_SI852
+MSVS0_SI2198
+MSVS0_SI938
+MSVS0_SX398
+MTAB0_SI1572
+MTAB0_SX312
+MTAT0_SA1
+MTAT0_SI1110
+MTAT0_SI811
+MTAT1_SI779
+MTAT1_SX149
+MTAT1_SX329
+MTBC0_SI543
+MTCS0_SI712
+MTDB0_SI1401
+MTDB0_SI771
+MTDP0_SA1
+MTDP0_SI1521
+MTDP0_SX171
+MTDP0_SX351
+MTER0_SA1
+MTER0_SI1157
+MTER0_SX437
+MTJG0_SX170
+MTJS0_SA2
+MTJS0_SI1822
+MTJS0_SI562
+MTJS0_SX382
+MTJU0_SI2020
+MTKD0_SI630
+MTKP0_SI2283
+MTKP0_SI454
+MTLB0_SI1134
+MTLB0_SX324
+MTLC0_SI1313
+MTLC0_SI1477
+MTML0_SX435
+MTMN0_SI582
+MTMT0_SI488
+MTPP0_SI1508
+MTPR0_SI2230
+MTPR0_SX160
+MTPR0_SX430
+MTQC0_SA1
+MTQC0_SI1441
+MTQC0_SX181
+MTQC0_SX451
+MTRC0_SI589
+MTRR0_SI918
+MTRT0_SI1227
+MTXS0_SI1060
+MTXS0_SI2320
+MTXS0_SX160
+MTXS0_SX430
+MVJH0_SI1556
+MVLO0_SI517
+MWAC0_SI1601
+MWAC0_SX161
+MWAC0_SX251
+MWAR0_SI1045
+MWDK0_SI1436
+MWEM0_SX420
+MWRE0_SA2
+MWRE0_SI1057
+MWRE0_SX67
+MWRP0_SI1443
+MWSB0_SI996
+MWSH0_SI1426
+MWSH0_SI796
+MWSH0_SX166
diff --git a/fairseq/examples/wav2vec/unsupervised/data/__init__.py b/fairseq/examples/wav2vec/unsupervised/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0545627efc9a6f9bb180e351ead519a2cb6dea7
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/data/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .extracted_features_dataset import ExtractedFeaturesDataset
+from .random_input_dataset import RandomInputDataset
+
+
+__all__ = [
+    "ExtractedFeaturesDataset",
+    "RandomInputDataset",
+]
diff --git a/fairseq/examples/wav2vec/unsupervised/data/extracted_features_dataset.py b/fairseq/examples/wav2vec/unsupervised/data/extracted_features_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6ee9c4a3602be9db8ddfe67d41ce8a96a98ad1e
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/data/extracted_features_dataset.py
@@ -0,0 +1,144 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import logging
+import os
+import contextlib
+
+import numpy as np
+import torch
+
+from fairseq.data import FairseqDataset, data_utils
+
+
+logger = logging.getLogger(__name__)
+
+
+class ExtractedFeaturesDataset(FairseqDataset):
+    def __init__(
+        self,
+        path,
+        split,
+        min_length=3,
+        max_length=None,
+        labels=None,
+        label_dict=None,
+        shuffle=True,
+        sort_by_length=True,
+    ):
+        super().__init__()
+
+        self.min_length = min_length
+        self.max_length = max_length
+        self.shuffle = shuffle
+        self.sort_by_length = sort_by_length
+        self.label_dict = label_dict
+
+        if labels is not None:
+            assert label_dict is not None
+
+        self.sizes = []
+        self.offsets = []
+        self.labels = []
+
+        path = os.path.join(path, split)
+        data_path = path
+        self.data = np.load(data_path + ".npy", mmap_mode="r")
+
+        offset = 0
+        skipped = 0
+
+        if not os.path.exists(path + f".{labels}"):
+            labels = None
+
+        with open(data_path + ".lengths", "r") as len_f, open(
+            path + f".{labels}", "r"
+        ) if labels is not None else contextlib.ExitStack() as lbl_f:
+            for line in len_f:
+                length = int(line.rstrip())
+                lbl = None if labels is None else next(lbl_f).rstrip().split()
+                if length >= min_length and (
+                    max_length is None or length <= max_length
+                ):
+                    self.sizes.append(length)
+                    self.offsets.append(offset)
+                    if lbl is not None:
+                        self.labels.append(lbl)
+                offset += length
+
+        self.sizes = np.asarray(self.sizes)
+        self.offsets = np.asarray(self.offsets)
+
+        logger.info(f"loaded {len(self.offsets)}, skipped {skipped} samples")
+
+    def __getitem__(self, index):
+        offset = self.offsets[index]
+        end = self.sizes[index] + offset
+        feats = torch.from_numpy(self.data[offset:end].copy()).float()
+
+        res = {"id": index, "features": feats}
+        if len(self.labels) > 0:
+            res["target"] = self.label_dict.encode_line(
+                self.labels[index],
+                line_tokenizer=lambda x: x,
+                append_eos=False,
+            )
+
+        return res
+
+    def __len__(self):
+        return len(self.sizes)
+
+    def collater(self, samples):
+        if len(samples) == 0:
+            return {}
+
+        features = [s["features"] for s in samples]
+        sizes = [len(s) for s in features]
+
+        target_size = max(sizes)
+
+        collated_features = features[0].new_zeros(
+            len(features), target_size, features[0].size(-1)
+        )
+        padding_mask = torch.BoolTensor(collated_features.shape[:-1]).fill_(False)
+        for i, (f, size) in enumerate(zip(features, sizes)):
+            collated_features[i, :size] = f
+            padding_mask[i, size:] = True
+
+        res = {
+            "id": torch.LongTensor([s["id"] for s in samples]),
+            "net_input": {"features": collated_features, "padding_mask": padding_mask},
+        }
+
+        if len(self.labels) > 0:
+            target = data_utils.collate_tokens(
+                [s["target"] for s in samples],
+                pad_idx=self.label_dict.pad(),
+                left_pad=False,
+            )
+            res["target"] = target
+        return res
+
+    def num_tokens(self, index):
+        return self.size(index)
+
+    def size(self, index):
+        return self.sizes[index]
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.shuffle:
+            order = [np.random.permutation(len(self))]
+        else:
+            order = [np.arange(len(self))]
+
+        if self.sort_by_length:
+            order.append(self.sizes)
+            return np.lexsort(order)[::-1]
+        else:
+            return order[0]
diff --git a/fairseq/examples/wav2vec/unsupervised/data/random_input_dataset.py b/fairseq/examples/wav2vec/unsupervised/data/random_input_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..886505616cc7f7a515ecebf34fae5c2bc541de03
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/data/random_input_dataset.py
@@ -0,0 +1,62 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import random
+from typing import List
+
+from fairseq.data import BaseWrapperDataset, data_utils
+
+
+class RandomInputDataset(BaseWrapperDataset):
+    def __init__(
+        self,
+        dataset,
+        random_input_dataset,
+        input_key_path: List[str],
+        add_to_input,
+        pad_idx,
+    ):
+        super().__init__(dataset)
+        self.random_input_dataset = random_input_dataset
+        if isinstance(input_key_path, str):
+            input_key_path = [input_key_path]
+        assert len(input_key_path) > 0
+        self.input_key_path = input_key_path
+        self.add_to_input = add_to_input
+        self.pad_idx = pad_idx
+
+    def get_target(self, item):
+        target_loc = item
+        for p in self.input_key_path[:-1]:
+            target_loc = target_loc[p]
+        return self.input_key_path[-1], target_loc
+
+    def get_target_value(self, item):
+        k, target_loc = self.get_target(item)
+        return target_loc[k]
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        k, target_loc = self.get_target(item)
+        target_loc[k] = random.choice(self.random_input_dataset)
+        return item
+
+    def collater(self, samples):
+        collated = self.dataset.collater(samples)
+        if len(collated) == 0:
+            return collated
+        indices = set(collated["id"].tolist())
+
+        random_inputs = data_utils.collate_tokens(
+            [self.get_target_value(s) for s in samples if s["id"] in indices],
+            pad_idx=self.pad_idx,
+            left_pad=False,
+        )
+        k, target_loc = self.get_target(
+            collated if not self.add_to_input else collated["net_input"]
+        )
+        target_loc[k] = random_inputs
+
+        return collated
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/README.md b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..314984fcbb6825169193b21bd6bb3fca5fd2503b
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/README.md
@@ -0,0 +1,56 @@
+# Self-Training with Kaldi HMM Models
+This folder contains recipes for self-training on pseudo phone transcripts and
+decoding into phones or words with [kaldi](https://github.com/kaldi-asr/kaldi).
+
+To start, download and install kaldi follow its instruction, and place this
+folder in `path/to/kaldi/egs`.
+
+## Training
+Assuming the following has been prepared:
+- `w2v_dir`: contains features `{train,valid}.{npy,lengths}`, real transcripts `{train,valid}.${label}`, and dict `dict.${label}.txt`
+- `lab_dir`: contains pseudo labels `{train,valid}.txt`
+- `arpa_lm`: Arpa-format n-gram phone LM for decoding
+- `arpa_lm_bin`: Arpa-format n-gram phone LM for unsupervised model selection to be used with KenLM
+
+Set these variables in `train.sh`, as well as `out_dir`, the output directory,
+and then run it.
+
+The output will be:
+```
+==== WER w.r.t. real transcript (select based on unsupervised metric)
+INFO:root:./out/exp/mono/decode_valid/scoring/14.0.0.tra.txt: score 0.9178 wer 28.71% lm_ppl 24.4500 gt_wer 25.57%
+INFO:root:./out/exp/tri1/decode_valid/scoring/17.1.0.tra.txt: score 0.9257 wer 26.99% lm_ppl 30.8494 gt_wer 21.90%
+INFO:root:./out/exp/tri2b/decode_valid/scoring/8.0.0.tra.txt: score 0.7506 wer 23.15% lm_ppl 25.5944 gt_wer 15.78%
+```
+where `wer` is the word eror rate with respect to the pseudo label, `gt_wer` to
+the ground truth label, `lm_ppl` the language model perplexity of HMM prediced
+transcripts, and `score` is the unsupervised metric for model selection. We
+choose the model and the LM parameter of the one with the lowest score. In the
+example above, it is `tri2b`, `8.0.0`.
+
+
+## Decoding into Phones
+In `decode_phone.sh`, set `out_dir` the same as used in `train.sh`, set
+`dec_exp` and `dec_lmparam` to the selected model and LM parameter (e.g.
+`tri2b` and `8.0.0` in the above example). `dec_script` needs to be set
+according to `dec_exp`: for mono/tri1/tri2b, use `decode.sh`; for tri3b, use
+`decode_fmllr.sh`.
+
+The output will be saved at `out_dir/dec_data`
+
+
+## Decoding into Words
+`decode_word_step1.sh` prepares WFSTs for word decoding. Besides the variables
+mentioned above, set
+- `wrd_arpa_lm`: Arpa-format n-gram word LM for decoding
+- `wrd_arpa_lm_bin`: Arpa-format n-gram word LM for unsupervised model selection
+
+`decode_word_step1.sh` decodes the `train` and `valid` split into word and runs
+unsupervised model selection using the `valid` split. The output is like:
+```
+INFO:root:./out/exp/tri2b/decodeword_valid/scoring/17.0.0.tra.txt: score 1.8693 wer 24.97% lm_ppl 1785.5333 gt_wer 31.45%
+```
+
+After determining the LM parameter (`17.0.0` in the example above), set it in
+`decode_word_step2.sh` and run it. The output will be saved at
+`out_dir/dec_data_word`.
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/cmd.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/cmd.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e74953194d41f0d93855d41b2acef08556d92477
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="run.pl --mem 2G"
+export decode_cmd="run.pl --mem 4G"
+export mkgraph_cmd="run.pl --mem 8G"
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_phone.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_phone.sh
new file mode 100644
index 0000000000000000000000000000000000000000..947342a0b7d8f50bcf4164b284ef3303a1247b64
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_phone.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# decode into phones (and prepare a new data directory for HMM outputs)
+
+. ./path.sh
+
+set -eu
+
+out_dir=  # same as in train.sh
+dec_lmparam=  # LM hyperparameters (e.g., 7.0.0)
+dec_exp=
+dec_script=
+dec_splits="train valid"
+dec_data_dir=$out_dir/dec_data  # where to write HMM output
+
+data_dir=${out_dir}/data
+
+local/decode.sh --nj 40 --graph_name graph \
+  --val_sets "$dec_splits" --decode_script $dec_script \
+  $out_dir/exp/$dec_exp $data_dir $data_dir/lang_test
+
+if [ ! -z $dec_lmparam ]; then
+  for x in $dec_splits; do
+    mkdir -p $dec_data_dir/$x
+    cp $data_dir/$x/{feats.scp,cmvn.scp,utt2spk,spk2utt} $dec_data_dir/$x/
+  
+    tra=$out_dir/exp/$dec_exp/decode_${x}/scoring/${dec_lmparam}.tra
+    cat $tra | utils/int2sym.pl -f 2- $data_dir/lang/words.txt | \
+      sed 's:<UNK>::g' | sed 's:<SIL>::g' > $dec_data_dir/${x}/text
+    utils/fix_data_dir.sh $dec_data_dir/${x}
+    echo "WER on ${x} is" $(compute-wer ark:$data_dir/${x}_gt/text ark:$dec_data_dir/$x/text | cut -d" " -f2-)
+  done
+fi
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step1.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step1.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c1276bbe4d0e02deb984c7c10d6c0486dce09a5f
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step1.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# prepare word WFSTs, reference data, and decode
+
+set -eu
+
+w2v_dir=  # same as in train.sh
+out_dir=  # same as in train.sh
+lexicon=  # word to phone mapping
+wrd_arpa_lm=  # word LM
+wrd_arpa_lm_bin=  # word LM for KenLM, used in unsupervised selection
+
+dec_exp=  # what HMM stage to decode (e.g., tri3b)
+dec_script=  # what decoding script to use (e.g., steps/decode_fmllr.sh)
+phn_label=phnc
+wrd_label=wrd
+dec_suffix=word
+dec_splits="train valid"
+valid_split="valid"
+
+data_dir=$out_dir/data
+wrd_data_dir=$out_dir/data_word
+
+lexicon_clean=$(mktemp)
+cat $lexicon | sort | uniq > $lexicon_clean
+local/prepare_lang_word.sh $w2v_dir/dict.${phn_label}.txt $data_dir $lexicon_clean && rm $lexicon_clean
+local/prepare_lm.sh --langdir $data_dir/lang_word --lmdir $data_dir/lang_test_word $wrd_arpa_lm $data_dir
+
+for x in $dec_splits; do
+  x_gt=${x}_gt
+  mkdir -p $wrd_data_dir/$x_gt
+  cp $data_dir/$x_gt/{feats.scp,cmvn.scp,utt2spk,spk2utt} $wrd_data_dir/$x_gt/
+  python local/copy_aligned_text.py < $w2v_dir/$x.$wrd_label > $wrd_data_dir/$x_gt/text
+done
+
+local/decode.sh --nj 40 --graph_name graph${dec_suffix} --decode_suffix $dec_suffix \
+  --val_sets "$dec_splits" --decode_script $dec_script \
+  $out_dir/exp/$dec_exp $data_dir $data_dir/lang_test_word
+
+local/unsup_select_decode_word.sh \
+  --split $valid_split --kenlm_path $wrd_arpa_lm_bin \
+  --ref_txt $wrd_data_dir/${valid_split}_gt/text \
+  --psd_txt $data_dir/${valid_split}/text \
+  --dec_name decode${dec_suffix} --graph_name graph${dec_suffix} \
+  --phonemize_lexicon $data_dir/local/dict_word/lexicon.txt \
+  $out_dir/exp
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step2.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step2.sh
new file mode 100644
index 0000000000000000000000000000000000000000..59a6cbb12539cf62658f8344f7be7cecf2e3380f
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/decode_word_step2.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# prepare a new data directory of HMM word output
+
+. ./path.sh
+
+set -eu
+
+out_dir=  # same as in train.sh
+dec_lmparam=  # LM hyperparameters (e.g., 7.0.0)
+
+dec_exp=tri3b  # what HMM stage to decode (e.g., tri3b)
+dec_suffix=word
+dec_splits="train valid"
+dec_data_dir=$out_dir/dec_data_word  # where to write HMM output
+
+data_dir=$out_dir/data
+wrd_data_dir=$out_dir/data_word
+
+for x in $dec_splits; do
+  mkdir -p $dec_data_dir/$x
+  cp $data_dir/$x/{feats.scp,cmvn.scp,utt2spk,spk2utt} $dec_data_dir/$x/
+
+  tra=$out_dir/exp/$dec_exp/decode${dec_suffix}_${x}/scoring/${dec_lmparam}.tra
+  cat $tra | utils/int2sym.pl -f 2- $data_dir/lang_word/words.txt | \
+    sed 's:<UNK>::g' | sed 's:<SIL>::g' > $dec_data_dir/$x/text
+  utils/fix_data_dir.sh $dec_data_dir/$x
+  echo "WER on $x is" $(compute-wer ark:$wrd_data_dir/${x}_gt/text ark:$dec_data_dir/$x/text | cut -d" " -f2-)
+done
+
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/copy_aligned_text.py b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/copy_aligned_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f4faa99218b0b30c980cad167c52b2297cd92c3
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/copy_aligned_text.py
@@ -0,0 +1,4 @@
+import sys
+
+for idx, line in enumerate(sys.stdin):
+    print(f"utt{idx:010d} {line}", end='')
\ No newline at end of file
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/decode.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/decode.sh
new file mode 100755
index 0000000000000000000000000000000000000000..811cb63c88bb7cdd03b0a250ef2db32b5eaa50df
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/decode.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+set -u
+
+val_sets="dev_other"
+graph_name=graph
+decode_suffix=""
+decode_script="steps/decode_fmllr.sh"
+decode_args=""
+nj=60
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+set -x
+exp_dir=$1
+data_root=$2
+lang_test=$3
+
+graph=$exp_dir/$graph_name
+
+if [ ! -d $graph ]; then
+  utils/mkgraph.sh $lang_test $exp_dir $graph
+fi
+
+for part in $val_sets; do
+  dec_dir=$exp_dir/decode${decode_suffix}_${part}
+  if [ ! -d $dec_dir ]; then
+    echo "decoding $part for $exp_dir"
+    $decode_script --nj $nj --cmd "$decode_cmd" $decode_args \
+      $graph $data_root/$part $dec_dir &
+  else
+    echo "$dec_dir exists. skip"
+  fi
+done
+
+wait
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_data_from_w2v.py b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_data_from_w2v.py
new file mode 100644
index 0000000000000000000000000000000000000000..66954ea5c9f3f3330e3230860229c7c4046a5d6a
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_data_from_w2v.py
@@ -0,0 +1,56 @@
+import kaldi_io
+import numpy as np
+import os
+
+
+def get_parser():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("w2v_dir", help="wav2vec feature and text directory")
+    parser.add_argument("tar_root", help="output data directory in kaldi's format")
+    parser.add_argument("split", help="name of the subset")
+    parser.add_argument("--label", default="", help="if specified, copy labels too")
+    return parser
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    tar_dir = os.path.join(args.tar_root, args.split)
+    os.makedirs(tar_dir, exist_ok=True)
+
+    lengths_path = os.path.join(args.w2v_dir, f"{args.split}.lengths")
+    with open(lengths_path) as f:
+        lengths = [int(line.rstrip()) for line in f]
+        offsets = [0] + np.cumsum(lengths[:-1]).tolist()
+    feats = np.load(
+        os.path.join(args.w2v_dir, f"{args.split}.npy"),
+        mmap_mode="r"
+    )
+    assert feats.shape[0] == sum(lengths), \
+        f"lengths mismatch {feats.shape[0]} != {sum(lengths)}"
+
+    ark_path = os.path.join(tar_dir, "feats.ark")
+    scp_path = os.path.join(tar_dir, "feats.scp")
+    wspec = f"ark:| copy-feats --compress=true ark:- ark,scp:{ark_path},{scp_path}"
+    with kaldi_io.open_or_fd(wspec, "wb") as f:
+        for idx, (offset, length) in enumerate(zip(offsets, lengths)):
+            feat = feats[offset:offset+length]
+            kaldi_io.write_mat(f, feat, key=f"utt{idx:010d}")
+
+    u2s_path = os.path.join(tar_dir, "utt2spk")
+    s2u_path = os.path.join(tar_dir, "spk2utt")
+    with open(u2s_path, "w") as f_u2s, open(s2u_path, "w") as f_s2u:
+        for idx in range(len(lengths)):
+            f_u2s.write(f"utt{idx:010d} utt{idx:010d}\n")
+            f_s2u.write(f"utt{idx:010d} utt{idx:010d}\n")
+
+    if bool(args.label):
+        lab_path = os.path.join(args.w2v_dir, f"{args.split}.{args.label}")
+        txt_path = os.path.join(tar_dir, "text")
+        with open(lab_path) as f_lab, open(txt_path, "w") as f_txt:
+            for idx, line in enumerate(f_lab):
+                f_txt.write(f"utt{idx:010d} {line}")
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e9a80001eb47d5af863d6aab11a59362a59cef61
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+sil_prob=0.5
+num_sil_states=3
+num_nonsil_states=1
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+set -eux
+
+dict=$1
+data_dir=$2
+
+dict_dir=$data_dir/local/dict
+tmplm_dir=$data_dir/local/lang_tmp
+lm_dir=$data_dir/lang
+
+mkdir -p $dict_dir $tmplm_dir $lm_dir
+
+# prepare dict
+echo "SIL" > $dict_dir/silence_phones.txt
+echo "SIL" > $dict_dir/optional_silence.txt
+awk '{print $1}' $dict > $dict_dir/nonsilence_phones.txt
+
+echo "SIL SIL" > $dict_dir/lexicon.txt
+echo "<UNK> SIL" >> $dict_dir/lexicon.txt
+awk '{print $1" "$1}' $dict >> $dict_dir/lexicon.txt
+
+echo "SIL" > $dict_dir/extra_questions.txt
+awk '{printf $1" "} END {printf "\n"}' $dict >> $dict_dir/extra_questions.txt
+
+# prepare lang
+utils/prepare_lang.sh --sil-prob $sil_prob --position-dependent-phones false \
+  --num_sil_states $num_sil_states --num_nonsil_states $num_nonsil_states \
+  $dict_dir "<UNK>" $tmplm_dir $lm_dir
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang_word.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang_word.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a7ea3877beefe1d4d53f9f7e32b004d8ce01e22a
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lang_word.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+num_sil_states=3
+num_nonsil_states=1
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+set -eux
+
+dict=$1
+data_dir=$2
+lexicon=$3
+
+dict_dir=$data_dir/local/dict_word
+tmplm_dir=$data_dir/local/lang_tmp_word
+lm_dir=$data_dir/lang_word
+
+mkdir -p $dict_dir $tmplm_dir $lm_dir
+
+# prepare dict
+echo "SIL" > $dict_dir/silence_phones.txt
+echo "SIL" > $dict_dir/optional_silence.txt
+awk '{print $1}' $dict > $dict_dir/nonsilence_phones.txt
+
+(echo "!SIL SIL"; echo "<UNK> SIL";) | cat - $lexicon > $dict_dir/lexicon.txt
+
+echo "SIL" > $dict_dir/extra_questions.txt
+awk '{printf $1" "} END {printf "\n"}' $dict >> $dict_dir/extra_questions.txt
+
+# prepare lang
+utils/prepare_lang.sh --position-dependent-phones false \
+  --num_sil_states $num_sil_states --num_nonsil_states $num_nonsil_states \
+  $dict_dir "<UNK>" $tmplm_dir $lm_dir
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lm.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lm.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c2edcefede2da3b6a991b9c8fbc78c96d46d27cb
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/prepare_lm.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+langdir=""
+lmdir=""
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+arpa_lm=$1
+data=$2
+
+if [ -z $langdir ]; then
+  langdir=$data/lang
+fi
+if [ -z $lmdir ]; then
+  lmdir=$data/lang_test
+fi
+
+if [ ! -d $langdir ]; then
+  echo "$langdir not found. run local/prepare_lang.sh first" && exit 1
+fi
+
+mkdir -p $lmdir
+cp -r $langdir/* $lmdir
+
+if [[ "$arpa_lm" == *.gz ]]; then
+  gunzip -c $arpa_lm | arpa2fst --disambig-symbol=#0 --read-symbol-table=$lmdir/words.txt - $lmdir/G.fst
+else
+  arpa2fst --disambig-symbol=#0 --read-symbol-table=$lmdir/words.txt $arpa_lm $lmdir/G.fst
+fi
+fstisstochastic $lmdir/G.fst
+utils/validate_lang.pl $lmdir || exit 1
+
+echo "done preparing lm ($lmdir)"
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/score.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/score.sh
new file mode 100755
index 0000000000000000000000000000000000000000..cb5bbb7277bfb9f2d5440da0514bf7b16da8140d
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/score.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#           2014  Guoguo Chen
+# Apache 2.0
+
+[ -f ./path.sh ] && . ./path.sh
+
+# begin configuration section.
+cmd=run.pl
+stage=0
+decode_mbr=true
+word_ins_penalty=0.0,0.5,1.0
+min_lmwt=7
+max_lmwt=17
+iter=final
+#end configuration section.
+
+[ -f ./path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
+  echo " Options:"
+  echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
+  echo "    --stage (0|1|2)                 # start scoring script from part-way through."
+  echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
+  echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
+  echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
+  exit 1;
+fi
+
+data=$1
+lang_or_graph=$2
+dir=$3
+
+symtab=$lang_or_graph/words.txt
+
+for f in $symtab $dir/lat.1.gz $data/text; do
+  [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
+done
+
+mkdir -p $dir/scoring/log
+
+cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
+
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
+    lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
+    lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+    lattice-best-path --word-symbol-table=$symtab \
+      ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
+done
+
+# Note: the double level of quoting for the sed command
+for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
+  $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
+    cat $dir/scoring/LMWT.$wip.tra \| \
+    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
+    compute-wer --text --mode=present \
+    ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
+done
+
+exit 0;
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/show_wer.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/show_wer.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9ecf1690c67f8a019009ef32d973fbd45b56c7ca
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/show_wer.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+split="dev_other"
+ref_data=""
+get_best_wer=true
+dec_name="decode"
+graph_name="graph"
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+exp_root=$1
+
+set -eu
+
+echo "==== WER w.r.t. pseudo transcript"
+for x in $exp_root/*/${dec_name}_${split}*; do grep WER $x/wer_* 2>/dev/null | utils/best_wer.sh; done
+
+
+if [ ! -z $ref_data ]; then
+  echo "==== WER w.r.t. real transcript (select based on pseudo WER)"
+  ref_txt=$ref_data/$split/text
+  for x in $exp_root/*/${dec_name}_${split}*; do
+    lang=$(dirname $x)/$graph_name
+
+    lmwt=$(
+      grep WER $x/wer_* 2>/dev/null | utils/best_wer.sh |
+      sed 's/.*wer_\(.*\)$/\1/g' | sed 's/_/./g'
+    )
+    tra=$x/scoring/$lmwt.tra
+    cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:<UNK>::g' | sed 's:<SIL>::g' | \
+      compute-wer --text --mode=present \
+      ark:$ref_txt  ark,p:- 2> /dev/null | grep WER | xargs -I{} echo {} $tra
+  done
+fi
+
+if [ ! -z $ref_data ] && $get_best_wer; then
+  echo "==== WER w.r.t. real transcript (select based on true WER)"
+  ref_txt=$ref_data/$split/text
+  for x in $exp_root/*/${dec_name}_${split}*; do
+    lang=$(dirname $x)/$graph_name
+
+    for tra in $x/scoring/*.tra; do
+      cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:<UNK>::g' | sed 's:<SIL>::g' | \
+        compute-wer --text --mode=present \
+        ark:$ref_txt  ark,p:- 2> /dev/null | grep WER | xargs -I{} echo {} $tra
+    done | sort -k2n | head -n1
+  done
+fi
+
+exit 0;
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/train_subset_lgbeam.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/train_subset_lgbeam.sh
new file mode 100755
index 0000000000000000000000000000000000000000..913c1d8e4357c146026b86e78f0b16f921776441
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/train_subset_lgbeam.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+
+out_root=/tmp
+out_name=train_${RANDOM}
+num_nonsil_states=1
+
+valid="dev_other"
+train="train"
+mono_size="-1"  # 2000
+tri1_size="-1"  # 5000
+tri2b_size="-1"  # 10000
+tri3b_size="-1"  # 10000
+
+# Acoustic model parameters
+numLeavesTri1=2000
+numGaussTri1=10000
+numLeavesMLLT=2500
+numGaussMLLT=15000
+numLeavesSAT=2500
+numGaussSAT=15000
+
+stage=1
+max_stage=1
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+data=$1
+lang=$2
+lang_test=$3
+
+exp_root=$out_root/$out_name
+
+# you might not want to do this for interactive shells.
+set -e
+
+
+if [ $stage -le 1 ] && [ $max_stage -ge 1 ]; then
+  # train a monophone system
+  if [ ! $mono_size -eq -1 ]; then
+    utils/subset_data_dir.sh $data/$train $mono_size $data/${train}_${mono_size}
+    mono_train=${train}_${mono_size}
+  else
+    mono_train=${train}
+  fi
+
+  steps/train_mono.sh --boost-silence 1.25 --nj 20 --cmd "$train_cmd" \
+    --initial-beam 40 --regular-beam 60 --retry-beam 120 \
+    $data/$mono_train $lang $exp_root/mono
+
+  utils/mkgraph.sh $lang_test $exp_root/mono $exp_root/mono/graph
+  steps/decode.sh --nj 20 --cmd "$decode_cmd" \
+    $exp_root/mono/graph $data/$valid $exp_root/mono/decode_$valid &
+fi
+
+
+if [ $stage -le 2 ] && [ $max_stage -ge 2 ]; then
+  # train a first delta + delta-delta triphone system on a subset of 5000 utterances
+  if [ ! $tri1_size -eq -1 ]; then
+    utils/subset_data_dir.sh $data/$train $tri1_size $data/${train}_${tri1_size}
+    tri1_train=${train}_${tri1_size}
+  else
+    tri1_train=${train}
+  fi
+
+  steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
+    $data/$tri1_train $lang \
+    $exp_root/mono $exp_root/mono_ali_${tri1_train}
+
+  steps_gan/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
+      --num_nonsil_states $num_nonsil_states $numLeavesTri1 $numGaussTri1 \
+      $data/$tri1_train $lang \
+      $exp_root/mono_ali_${tri1_train} $exp_root/tri1
+
+  utils/mkgraph.sh $lang_test $exp_root/tri1 $exp_root/tri1/graph
+  steps/decode.sh --nj 20 --cmd "$decode_cmd" \
+    $exp_root/tri1/graph $data/$valid $exp_root/tri1/decode_$valid &
+fi
+
+if [ $stage -le 3 ] && [ $max_stage -ge 3 ]; then
+  # train an LDA+MLLT system.
+  if [ ! $tri2b_size -eq -1 ]; then
+    utils/subset_data_dir.sh $data/$train $tri2b_size $data/${train}_${tri2b_size}
+    tri2b_train=${train}_${tri2b_size}
+  else
+    tri2b_train=${train}
+  fi
+
+  steps/align_si.sh --nj 10 --cmd "$train_cmd" \
+    $data/$tri2b_train $lang \
+    $exp_root/tri1 $exp_root/tri1_ali_${tri2b_train}
+
+  steps_gan/train_lda_mllt.sh --cmd "$train_cmd" \
+      --num_nonsil_states $num_nonsil_states \
+      --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT \
+      $data/$tri2b_train $lang \
+      $exp_root/tri1_ali_${tri2b_train} $exp_root/tri2b
+
+  utils/mkgraph.sh $lang_test $exp_root/tri2b $exp_root/tri2b/graph
+  steps/decode.sh --nj 20 --cmd "$decode_cmd" \
+    $exp_root/tri2b/graph $data/$valid $exp_root/tri2b/decode_$valid &
+fi
+
+
+if [ $stage -le 4 ] && [ $max_stage -ge 4 ]; then
+  # Train tri3b, which is LDA+MLLT+SAT on 10k utts
+  if [ ! $tri3b_size -eq -1 ]; then
+    utils/subset_data_dir.sh $data/$train $tri3b_size $data/${train}_${tri3b_size}
+    tri3b_train=${train}_${tri3b_size}
+  else
+    tri3b_train=${train}
+  fi
+
+  steps/align_si.sh  --nj 10 --cmd "$train_cmd" --use-graphs true \
+    $data/$tri3b_train $lang \
+    $exp_root/tri2b $exp_root/tri2b_ali_${tri2b_train}
+
+  steps_gan/train_sat.sh --cmd "$train_cmd" \
+    --num_nonsil_states $num_nonsil_states $numLeavesSAT $numGaussSAT \
+    $data/$tri3b_train $lang \
+    $exp_root/tri2b_ali_${tri2b_train} $exp_root/tri3b
+
+  utils/mkgraph.sh $lang_test $exp_root/tri3b $exp_root/tri3b/graph
+  steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
+    $exp_root/tri3b/graph $data/$valid $exp_root/tri3b/decode_$valid &
+fi
+
+wait
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select.py b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select.py
new file mode 100644
index 0000000000000000000000000000000000000000..1122c88c1964d8beead63bc8dfe21d41602b83bc
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select.py
@@ -0,0 +1,135 @@
+"""
+Implement unsupervised metric for decoding hyperparameter selection:
+    $$ alpha * LM_PPL + ViterbitUER(%) * 100 $$
+"""
+import argparse
+import logging
+import math
+import sys
+
+import kenlm
+import editdistance
+from g2p_en import G2p
+
+logging.root.setLevel(logging.INFO)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("ref_tra", help="reference pseudo labels")
+    parser.add_argument("hyp_tra", help="decoded pseudo labels to be assess")
+    parser.add_argument("--kenlm_path", default="/checkpoint/abaevski/data/speech/libri/librispeech_lm_novox.phnc_o5.bin", help="")
+    parser.add_argument("--uppercase", action="store_true", help="")
+    parser.add_argument("--skipwords", default="", help="")
+    parser.add_argument("--gt_tra", default="", help="ground truth pseudo labels for computing oracle WER")
+    parser.add_argument("--min_vt_uer", default=0.0, type=float)
+    parser.add_argument("--phonemize", action="store_true", help="phonemize word hypotheses, used when reference is phone transcript")
+    parser.add_argument("--phonemize_lexicon", default="", type=str, help="use a lexicon for phonemizing")
+    return parser
+
+def load_tra(tra_path):
+    with open(tra_path, "r") as f:
+        uid_to_tra = {}
+        for line in f:
+            toks = line.rstrip().split()
+            uid, tra = toks[0], " ".join(toks[1:])
+            uid_to_tra[uid] = tra
+    logger.debug(f"loaded {len(uid_to_tra)} utterances from {tra_path}")
+    return uid_to_tra
+
+def load_lex(lex_path):
+    with open(lex_path, "r") as f:
+        w2p = {}
+        for line in f:
+            w, p = line.rstrip().split(None, 1)
+            w2p[w] = p.split()
+    return w2p
+            
+def compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p, g2p_dict):
+    d_cnt = 0
+    w_cnt = 0
+    w_cnt_h = 0
+    for uid in hyp_uid_to_tra:
+        ref = ref_uid_to_tra[uid].split()
+        if g2p_dict is not None:
+            hyp = []
+            for word in hyp_uid_to_tra[uid].split():
+                if word in g2p_dict:
+                    hyp = hyp + g2p_dict[word]
+                else:
+                    logger.warning(f"{word} not in g2p_dict")
+        elif g2p is not None:
+            hyp = g2p(hyp_uid_to_tra[uid])
+            hyp = [p for p in hyp if p != "'" and p != " "]
+            hyp = [p[:-1] if p[-1].isnumeric() else p for p in hyp]
+        else:
+            hyp = hyp_uid_to_tra[uid].split()
+        logger.debug((
+            f"======================\n"
+            f"HYP: {' '.join(hyp)}\n"
+            f"REF: {' '.join(ref)}"
+        ))
+        d_cnt += editdistance.eval(ref, hyp)
+        w_cnt += len(ref)
+        w_cnt_h += len(hyp)
+    wer = float(d_cnt) / w_cnt
+    logger.debug((
+        f"wer = {wer*100:.2f}%; num. of ref words = {w_cnt}; "
+        f"num. of hyp words = {w_cnt_h}; num. of sentences = {len(ref_uid_to_tra)}"
+    ))
+    return wer
+
+def compute_lm_ppl(hyp_uid_to_tra, score_fn):
+    lm_score = 0.
+    w_cnt = 0
+    for hyp in hyp_uid_to_tra.values():
+        cur_score = score_fn(hyp)
+        cur_cnt = len(hyp.split()) + 1  # plus one for </s>
+        lm_score += cur_score
+        w_cnt += cur_cnt
+        logger.debug((
+            f"======================\n"
+            f"score sum/avg = {cur_score:.2f}/{cur_score/cur_cnt:.2f}\n"
+            f"hyp = {hyp}"
+        ))
+    lm_ppl = math.pow(10, -lm_score / w_cnt)
+    logger.debug(f"lm ppl = {lm_ppl:.2f}; num. of words = {w_cnt}")
+    return lm_ppl
+
+def main():
+    args = get_parser().parse_args()
+    logger.debug(f"Args: {args}")
+    
+    ref_uid_to_tra = load_tra(args.ref_tra)
+    hyp_uid_to_tra = load_tra(args.hyp_tra)
+    assert not bool(set(hyp_uid_to_tra.keys()) - set(ref_uid_to_tra.keys()))
+
+    lm = kenlm.Model(args.kenlm_path)
+    skipwords = set(args.skipwords.split(","))
+    def compute_lm_score(s):
+        s = " ".join(w for w in s.split() if w not in skipwords)
+        s = s.upper() if args.uppercase else s
+        return lm.score(s)
+
+    g2p, g2p_dict = None, None
+    if args.phonemize:
+        if args.phonemize_lexicon:
+            g2p_dict = load_lex(args.phonemize_lexicon)
+        else:
+            g2p = G2p()
+
+    wer = compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p, g2p_dict)
+    lm_ppl = compute_lm_ppl(hyp_uid_to_tra, compute_lm_score)
+    
+    gt_wer = -math.inf
+    if args.gt_tra:
+        gt_uid_to_tra = load_tra(args.gt_tra)
+        gt_wer = compute_wer(gt_uid_to_tra, hyp_uid_to_tra, None, None)
+
+    score = math.log(lm_ppl) * max(wer, args.min_vt_uer)
+    logging.info(f"{args.hyp_tra}: score={score:.4f}; wer={wer*100:.2f}%; lm_ppl={lm_ppl:.4f}; gt_wer={gt_wer*100:.2f}%")
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b34c5b6e0688914a53515162f817a93617b609e5
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+split="dev_other"
+ref_txt=""  # ground truth transcript path
+psd_txt=""  # pseudo transcript path
+get_best_wer=true
+dec_name="decode"
+graph_name="graph"
+kenlm_path=/checkpoint/abaevski/data/speech/libri/librispeech_lm_novox.phnc_o6.bin
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+
+exp_root=$1
+unsup_args=""
+if [ $# -ge 2 ]; then
+  unsup_args=$2
+fi
+
+set -eu
+
+if [ ! -z $ref_txt ] && $get_best_wer; then
+  echo "==== WER w.r.t. real transcript (select based on unsupervised metric)"
+  for x in $exp_root/*/${dec_name}_${split}*; do
+    lang=$(dirname $x)/$graph_name
+
+    (
+      for tra in $x/scoring/*.tra; do
+        cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:<UNK>::g' | sed 's:<SIL>::g' > $tra.txt
+        python local/unsup_select.py $psd_txt $tra.txt --kenlm_path $kenlm_path --gt_tra $ref_txt $unsup_args
+      done 2>/dev/null | grep "score=" | sed 's/=/ /g' | sed 's/;//g' | sort -k3n | head -n1
+    ) &
+  done
+fi
+wait
+
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode_word.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode_word.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c10a6b8809b77bca2b2c02df8b8702725bdd51c7
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/local/unsup_select_decode_word.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+split="dev_other"
+ref_txt=""  # ground truth transcript path
+psd_txt=""  # pseudo transcript path
+get_best_wer=true
+dec_name="decode"
+graph_name="graph"
+kenlm_path=/checkpoint/abaevski/data/speech/libri/librispeech_lm_novox.phnc_o6.bin
+phonemize_lexicon=""
+
+. ./cmd.sh
+. ./path.sh
+. parse_options.sh
+. /private/home/wnhsu/unsup_asr/fairseq-py-unsup/env.sh
+
+exp_root=$1
+
+set -eu
+
+if [ ! -z $ref_txt ] && $get_best_wer; then
+  echo "==== WER w.r.t. real transcript (select based on unsupervised metric)"
+  for x in $exp_root/*/${dec_name}_${split}*; do
+    lang=$(dirname $x)/$graph_name
+
+    for tra in $x/scoring/*.tra; do
+      cat $tra | utils/int2sym.pl -f 2- $lang/words.txt | sed 's:\<UNK\>::g' > $tra.txt
+      python local/unsup_select.py $psd_txt $tra.txt \
+        --kenlm_path $kenlm_path --gt_tra $ref_txt --phonemize \
+        --phonemize_lexicon "$phonemize_lexicon"
+    done | grep "score=" | sed 's/=/ /g' | sed 's/;//g' | sort -k3n | head -n1
+  done
+fi
+
+
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/path.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/path.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1a6fb5f891b55d9fd978cfe54565f112f7eedce7
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/path.sh
@@ -0,0 +1,5 @@
+export KALDI_ROOT=`pwd`/../../..
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps
new file mode 120000
index 0000000000000000000000000000000000000000..6e99bf5b5adab1a857cb113ced3567cc4dee8ebe
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_deltas.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_deltas.sh
new file mode 100755
index 0000000000000000000000000000000000000000..af68715ab0d87ae40666596d9d877d593684f8e2
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_deltas.sh
@@ -0,0 +1,175 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Begin configuration.
+stage=-4 #  This allows restarting after partway, when something when wrong.
+config=
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+beam=10
+careful=false
+retry_beam=40
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+norm_vars=false # deprecated.  Prefer --cmvn-opts "--norm-vars=true"
+                # use the option --cmvn-opts "--norm-means=false"
+cmvn_opts=
+delta_opts=
+context_opts=   # use"--context-width=5 --central-position=2" for quinphone
+num_nonsil_states=3
+# End configuration.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh;
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+   echo "Usage: steps/train_deltas.sh <num-leaves> <tot-gauss> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
+   echo "e.g.: steps/train_deltas.sh 2000 10000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
+   echo "main options (for others, see top of script file)"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --stage <stage>                                  # stage to do partial re-run from."
+   exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
+oov=`cat $lang/oov.int` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+sdata=$data/split$nj;
+split_data.sh $data $nj || exit 1;
+
+
+[ $(cat $alidir/cmvn_opts 2>/dev/null | wc -c) -gt 1 ] && [ -z "$cmvn_opts" ] && \
+  echo "$0: warning: ignoring CMVN options from source directory $alidir"
+$norm_vars && cmvn_opts="--norm-vars=true $cmvn_opts"
+echo $cmvn_opts  > $dir/cmvn_opts # keep track of options to CMVN.
+[ ! -z $delta_opts ] && echo $delta_opts > $dir/delta_opts
+
+feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |"
+
+rm $dir/.error 2>/dev/null
+
+if [ $stage -le -3 ]; then
+  echo "$0: accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts \
+    --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: getting questions for tree-building, via clustering"
+  # preparing questions, roots file...
+  cluster-phones --pdf-class-list=$(($num_nonsil_states / 2)) $context_opts \
+    $dir/treeacc $lang/phones/sets.int \
+    $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $lang/topo $dir/questions.int \
+    $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+
+  $cmd $dir/log/init_model.log \
+    gmm-init-model  --write-occs=$dir/1.occs  \
+      $dir/tree $dir/treeacc $lang/topo $dir/1.mdl || exit 1;
+  if grep 'no stats' $dir/log/init_model.log; then
+     echo "** The warnings above about 'no stats' generally mean you have phones **"
+     echo "** (or groups of phones) in your phone set that had no corresponding data. **"
+     echo "** You should probably figure out whether something went wrong, **"
+     echo "** or whether your data just doesn't happen to have examples of those **"
+     echo "** phones. **"
+  fi
+
+  gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl 2>$dir/log/mixup.log || exit 1;
+  rm $dir/treeacc
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo "$0: training pass $x"
+  if [ $stage -le $x ]; then
+    if echo $realign_iters | grep -w $x >/dev/null; then
+      echo "$0: aligning data"
+      mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+      $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+        gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \
+         "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+         "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+    fi
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+       "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --mix-up=$numgauss --power=$power \
+        --write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
+       "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.mdl $dir/final.occs 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+
+steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir
+
+# Summarize warning messages...
+utils/summarize_warnings.pl  $dir/log
+
+steps/info/gmm_dir_info.pl $dir
+
+echo "$0: Done training system with delta+delta-delta features in $dir"
+
+exit 0
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_lda_mllt.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_lda_mllt.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9d8c319ce848e431ec47a3548156347ae3b50ced
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_lda_mllt.sh
@@ -0,0 +1,239 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+#
+# LDA+MLLT refers to the way we transform the features after computing
+# the MFCCs: we splice across several frames, reduce the dimension (to 40
+# by default) using Linear Discriminant Analysis), and then later estimate,
+# over multiple iterations, a diagonalizing transform known as MLLT or STC.
+# See http://kaldi-asr.org/doc/transform.html for more explanation.
+#
+# Apache 2.0.
+
+# Begin configuration.
+cmd=run.pl
+config=
+stage=-5
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+realign_iters="10 20 30";
+mllt_iters="2 4 6 12";
+num_iters=35    # Number of iterations of training
+max_iter_inc=25  # Last iter to increase #Gauss on.
+dim=40
+beam=10
+retry_beam=40
+careful=false
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+power=0.25 # Exponent for number of gaussians according to occurrence counts
+randprune=4.0 # This is approximately the ratio by which we will speed up the
+              # LDA and MLLT calculations via randomized pruning.
+splice_opts=
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+norm_vars=false # deprecated.  Prefer --cmvn-opts "--norm-vars=false"
+cmvn_opts=
+context_opts=   # use "--context-width=5 --central-position=2" for quinphone.
+# End configuration.
+train_tree=true  # if false, don't actually train the tree.
+use_lda_mat=  # If supplied, use this LDA[+MLLT] matrix.
+num_nonsil_states=3
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_lda_mllt.sh [options] <#leaves> <#gauss> <data> <lang> <alignments> <dir>"
+  echo " e.g.: steps/train_lda_mllt.sh 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
+  [ ! -f $f ] && echo "train_lda_mllt.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter #gauss increment
+oov=`cat $lang/oov.int` || exit 1;
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+
+mkdir -p $dir/log
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+echo $nj >$dir/num_jobs
+echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options
+           # so that later stages of system building can know what they were.
+
+
+[ $(cat $alidir/cmvn_opts 2>/dev/null | wc -c) -gt 1 ] && [ -z "$cmvn_opts" ] && \
+  echo "$0: warning: ignoring CMVN options from source directory $alidir"
+$norm_vars && cmvn_opts="--norm-vars=true $cmvn_opts"
+echo $cmvn_opts > $dir/cmvn_opts # keep track of options to CMVN.
+
+sdata=$data/split$nj;
+split_data.sh $data $nj || exit 1;
+
+splicedfeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
+# Note: $feats gets overwritten later in the script.
+feats="$splicedfeats transform-feats $dir/0.mat ark:- ark:- |"
+
+
+
+if [ $stage -le -5 ]; then
+  if [ -z "$use_lda_mat" ]; then
+    echo "$0: Accumulating LDA statistics."
+    rm $dir/lda.*.acc 2>/dev/null
+    $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$randprune $alidir/final.mdl "$splicedfeats" ark,s,cs:- \
+      $dir/lda.JOB.acc || exit 1;
+    est-lda --write-full-matrix=$dir/full.mat --dim=$dim $dir/0.mat $dir/lda.*.acc \
+      2>$dir/log/lda_est.log || exit 1;
+    rm $dir/lda.*.acc
+  else
+    echo "$0: Using supplied LDA matrix $use_lda_mat"
+    cp $use_lda_mat $dir/0.mat || exit 1;
+    [ ! -z "$mllt_iters" ] && \
+      echo "$0: Warning: using supplied LDA matrix $use_lda_mat but we will do MLLT," && \
+      echo "     which you might not want; to disable MLLT, specify --mllt-iters ''" && \
+      sleep 5
+  fi
+fi
+
+cur_lda_iter=0
+
+if [ $stage -le -4 ] && $train_tree; then
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts \
+    --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ `ls $dir/*.treeacc | wc -w` -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones --pdf-class-list=$(($num_nonsil_states / 2)) $context_opts $dir/treeacc $lang/phones/sets.int \
+    $dir/questions.int 2> $dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $lang/topo $dir/questions.int \
+    $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  if $train_tree; then
+    gmm-init-model  --write-occs=$dir/1.occs  \
+      $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+    grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+    rm $dir/treeacc
+  else
+    cp $alidir/tree $dir/ || exit 1;
+    $cmd JOB=1 $dir/log/init_model.log \
+      gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \
+        "$feats subset-feats ark:- ark:-|" || exit 1;
+  fi
+fi
+
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+
+x=1
+while [ $x -lt $num_iters ]; do
+  echo Training pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+  if echo $mllt_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo "$0: Estimating MLLT"
+      $cmd JOB=1:$nj $dir/log/macc.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:- \| \
+        weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-acc-mllt --rand-prune=$randprune  $dir/$x.mdl "$feats" ark:- $dir/$x.JOB.macc \
+        || exit 1;
+      est-mllt $dir/$x.mat.new $dir/$x.*.macc 2> $dir/log/mupdate.$x.log || exit 1;
+      gmm-transform-means  $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
+        2> $dir/log/transform_means.$x.log || exit 1;
+      compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_lda_iter.mat $dir/$x.mat || exit 1;
+      rm $dir/$x.*.macc
+    fi
+    feats="$splicedfeats transform-feats $dir/$x.mat ark:- ark:- |"
+    cur_lda_iter=$x
+  fi
+
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali  $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss --power=$power \
+        $dir/$x.mdl "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc $dir/$x.occs
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+rm $dir/final.{mdl,mat,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $cur_lda_iter.mat $dir/final.mat
+
+steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir
+
+# Summarize warning messages...
+utils/summarize_warnings.pl $dir/log
+
+steps/info/gmm_dir_info.pl $dir
+
+echo "$0: Done training system with LDA+MLLT features in $dir"
+
+exit 0
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_sat.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_sat.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f75afafb1c4ad04ee71ab8541064ab0477430616
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/steps_gan/train_sat.sh
@@ -0,0 +1,281 @@
+#!/usr/bin/env bash
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
+
+
+# This does Speaker Adapted Training (SAT), i.e. train on
+# fMLLR-adapted features.  It can be done on top of either LDA+MLLT, or
+# delta and delta-delta features.  If there are no transforms supplied
+# in the alignment directory, it will estimate transforms itself before
+# building the tree (and in any case, it estimates transforms a number
+# of times during training).
+
+
+# Begin configuration section.
+stage=-5
+exit_stage=-100 # you can use this to require it to exit at the
+                # beginning of a specific stage.  Not all values are
+                # supported.
+fmllr_update_type=full
+cmd=run.pl
+scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
+beam=10
+retry_beam=40
+careful=false
+boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
+context_opts=  # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
+realign_iters="10 20 30";
+fmllr_iters="2 4 6 12";
+silence_weight=0.0 # Weight on silence in fMLLR estimation.
+num_iters=35   # Number of iterations of training
+max_iter_inc=25 # Last iter to increase #Gauss on.
+power=0.2 # Exponent for number of gaussians according to occurrence counts
+cluster_thresh=-1  # for build-tree control final bottom-up clustering of leaves
+phone_map=
+train_tree=true
+tree_stats_opts=
+cluster_phones_opts=
+compile_questions_opts=
+# End configuration section.
+num_nonsil_states=3
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+if [ $# != 6 ]; then
+  echo "Usage: steps/train_sat.sh <#leaves> <#gauss> <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/train_sat.sh 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b"
+  echo "Main options (for others, see top of script file)"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --stage <stage>                                  # stage to do partial re-run from."
+  exit 1;
+fi
+
+numleaves=$1
+totgauss=$2
+data=$3
+lang=$4
+alidir=$5
+dir=$6
+
+for f in $data/feats.scp $lang/phones.txt $alidir/final.mdl $alidir/ali.1.gz; do
+  [ ! -f $f ] && echo "train_sat.sh: no such file $f" && exit 1;
+done
+
+numgauss=$numleaves
+incgauss=$[($totgauss-$numgauss)/$max_iter_inc]  # per-iter #gauss increment
+oov=`cat $lang/oov.int`
+nj=`cat $alidir/num_jobs` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl`
+ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
+sdata=$data/split$nj;
+splice_opts=`cat $alidir/splice_opts 2>/dev/null` # frame-splicing options.
+cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null`
+delta_opts=`cat $alidir/delta_opts 2>/dev/null`
+phone_map_opt=
+[ ! -z "$phone_map" ] && phone_map_opt="--phone-map='$phone_map'"
+
+mkdir -p $dir/log
+cp $alidir/splice_opts $dir 2>/dev/null # frame-splicing options.
+cp $alidir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
+cp $alidir/delta_opts $dir 2>/dev/null # delta option.
+
+utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt || exit 1;
+cp $lang/phones.txt $dir || exit 1;
+
+echo $nj >$dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+# Set up features.
+
+if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+## Set up speaker-independent features.
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
+    cp $alidir/final.mat $dir
+    cp $alidir/full.mat $dir 2>/dev/null
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+## Get initial fMLLR transforms (possibly from alignment dir)
+if [ -f $alidir/trans.1 ]; then
+  echo "$0: Using transforms from $alidir"
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$alidir
+else
+  if [ $stage -le -5 ]; then
+    echo "$0: obtaining initial fMLLR transforms since not present in $alidir"
+    # The next line is necessary because of $silphonelist otherwise being incorrect; would require
+    # old $lang dir which would require another option.  Not needed anyway.
+    [ ! -z "$phone_map" ] && \
+       echo "$0: error: you must provide transforms if you use the --phone-map option." && exit 1;
+    $cmd JOB=1:$nj $dir/log/fmllr.0.JOB.log \
+      ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post $silence_weight $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $alidir/final.mdl "$sifeats" \
+      ark:- ark:$dir/trans.JOB || exit 1;
+  fi
+  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
+  cur_trans_dir=$dir
+fi
+
+if [ $stage -le -4 ] && $train_tree; then
+  # Get tree stats.
+  echo "$0: Accumulating tree stats"
+  $cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
+    acc-tree-stats $context_opts $tree_stats_opts $phone_map_opt --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
+    "ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
+  [ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-accs" && exit 1;
+  $cmd $dir/log/sum_tree_acc.log \
+    sum-tree-stats $dir/treeacc $dir/*.treeacc || exit 1;
+  rm $dir/*.treeacc
+fi
+
+if [ $stage -le -3 ] && $train_tree; then
+  echo "$0: Getting questions for tree clustering."
+  # preparing questions, roots file...
+  cluster-phones --pdf-class-list=$(($num_nonsil_states / 2)) \
+    $cluster_phones_opts $context_opts \
+    $dir/treeacc $lang/phones/sets.int $dir/questions.int 2>$dir/log/questions.log || exit 1;
+  cat $lang/phones/extra_questions.int >> $dir/questions.int
+  compile-questions $context_opts $compile_questions_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
+
+  echo "$0: Building the tree"
+  $cmd $dir/log/build_tree.log \
+    build-tree $context_opts --verbose=1 --max-leaves=$numleaves \
+    --cluster-thresh=$cluster_thresh $dir/treeacc $lang/phones/roots.int \
+    $dir/questions.qst $lang/topo $dir/tree || exit 1;
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: Initializing the model"
+  if $train_tree; then
+    gmm-init-model  --write-occs=$dir/1.occs  \
+      $dir/tree $dir/treeacc $lang/topo $dir/1.mdl 2> $dir/log/init_model.log || exit 1;
+    grep 'no stats' $dir/log/init_model.log && echo "This is a bad warning.";
+    rm $dir/treeacc
+  else
+    cp $alidir/tree $dir/ || exit 1;
+    $cmd JOB=1 $dir/log/init_model.log \
+      gmm-init-model-flat $dir/tree $lang/topo $dir/1.mdl \
+        "$feats subset-feats ark:- ark:-|" || exit 1;
+  fi
+fi
+
+if [ $stage -le -1 ]; then
+  # Convert the alignments.
+  echo "$0: Converting alignments from $alidir to use current tree"
+  $cmd JOB=1:$nj $dir/log/convert.JOB.log \
+    convert-ali $phone_map_opt $alidir/final.mdl $dir/1.mdl $dir/tree \
+     "ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+fi
+
+[ "$exit_stage" -eq 0 ] && echo "$0: Exiting early: --exit-stage $exit_stage" && exit 0;
+
+if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
+  echo "$0: Compiling graphs of transcripts"
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
+    compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/1.mdl  $lang/L.fst  \
+     "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt < $sdata/JOB/text |" \
+      "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+x=1
+while [ $x -lt $num_iters ]; do
+   echo Pass $x
+  if echo $realign_iters | grep -w $x >/dev/null && [ $stage -le $x ]; then
+    echo Aligning data
+    mdl="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $dir/$x.mdl - |"
+    $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
+      gmm-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam --careful=$careful "$mdl" \
+      "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
+  fi
+
+  if echo $fmllr_iters | grep -w $x >/dev/null; then
+    if [ $stage -le $x ]; then
+      echo Estimating fMLLR transforms
+      # We estimate a transform that's additional to the previous transform;
+      # we'll compose them.
+      $cmd JOB=1:$nj $dir/log/fmllr.$x.JOB.log \
+        ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+        weight-silence-post $silence_weight $silphonelist $dir/$x.mdl ark:- ark:- \| \
+        gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+        --spk2utt=ark:$sdata/JOB/spk2utt $dir/$x.mdl \
+        "$feats" ark:- ark:$dir/tmp_trans.JOB || exit 1;
+      for n in `seq $nj`; do
+        ! ( compose-transforms --b-is-affine=true \
+          ark:$dir/tmp_trans.$n ark:$cur_trans_dir/trans.$n ark:$dir/composed_trans.$n \
+          && mv $dir/composed_trans.$n $dir/trans.$n && \
+          rm $dir/tmp_trans.$n ) 2>$dir/log/compose_transforms.$x.log \
+          && echo "$0: Error composing transforms" && exit 1;
+      done
+    fi
+    feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+    cur_trans_dir=$dir
+  fi
+
+  if [ $stage -le $x ]; then
+    $cmd JOB=1:$nj $dir/log/acc.$x.JOB.log \
+      gmm-acc-stats-ali $dir/$x.mdl "$feats" \
+      "ark,s,cs:gunzip -c $dir/ali.JOB.gz|" $dir/$x.JOB.acc || exit 1;
+    [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+    $cmd $dir/log/update.$x.log \
+      gmm-est --power=$power --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl \
+      "gmm-sum-accs - $dir/$x.*.acc |" $dir/$[$x+1].mdl || exit 1;
+    rm $dir/$x.mdl $dir/$x.*.acc
+    rm $dir/$x.occs
+  fi
+  [ $x -le $max_iter_inc ] && numgauss=$[$numgauss+$incgauss];
+  x=$[$x+1];
+done
+
+
+if [ $stage -le $x ]; then
+  # Accumulate stats for "alignment model"-- this model is
+  # computed with the speaker-independent features, but matches Gaussian-for-Gaussian
+  # with the final speaker-adapted model.
+  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
+    ali-to-post "ark:gunzip -c $dir/ali.JOB.gz|" ark:-  \| \
+    gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$sifeats" \
+    ark,s,cs:- $dir/$x.JOB.acc || exit 1;
+  [ `ls $dir/$x.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
+  # Update model.
+  $cmd $dir/log/est_alimdl.log \
+    gmm-est --power=$power --remove-low-count-gaussians=false $dir/$x.mdl \
+    "gmm-sum-accs - $dir/$x.*.acc|" $dir/$x.alimdl  || exit 1;
+  rm $dir/$x.*.acc
+fi
+
+rm $dir/final.{mdl,alimdl,occs} 2>/dev/null
+ln -s $x.mdl $dir/final.mdl
+ln -s $x.occs $dir/final.occs
+ln -s $x.alimdl $dir/final.alimdl
+
+
+steps/diagnostic/analyze_alignments.sh --cmd "$cmd" $lang $dir
+
+utils/summarize_warnings.pl $dir/log
+(
+  echo "$0: Likelihood evolution:"
+  for x in `seq $[$num_iters-1]`; do
+    tail -n 30 $dir/log/acc.$x.*.log | awk '/Overall avg like/{l += $(NF-3)*$(NF-1); t += $(NF-1); }
+        /Overall average logdet/{d += $(NF-3)*$(NF-1); t2 += $(NF-1);}
+        END{ d /= t2; l /= t; printf("%s ", d+l); } '
+  done
+  echo
+) | tee $dir/log/summary.log
+
+
+steps/info/gmm_dir_info.pl $dir
+
+echo "$0: done training SAT system in $dir"
+
+exit 0
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/train.sh b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f3a3d3fc7cc98a38d8e9d523a0b43c0c8ea51bf9
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/train.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -eu
+
+w2v_dir=  # contains features `{train,valid}.{npy,lengths}`, real transcripts `{train,valid}.${label}`, and dict `dict.${label}.txt`
+lab_dir=  # contains pseudo labels `{train,valid}.txt`
+out_dir=  # output root
+arpa_lm=  # phone LM
+arpa_lm_bin=  # (binary) phone LM for KenLM, used in unsupervised selection
+
+label=phnc
+train_name="train"
+valid_name="valid"
+data_dir=${out_dir}/data
+
+mkdir -p ${out_dir}/exp
+local/prepare_lang.sh $w2v_dir/dict.${label}.txt $data_dir
+local/prepare_lm.sh $arpa_lm $data_dir
+
+for x in $train_name $valid_name; do
+  x_gt=${x}_gt
+
+  # prepare pseudo data
+  python local/prepare_data_from_w2v.py $w2v_dir $data_dir $x
+  steps/compute_cmvn_stats.sh $data_dir/$x $out_dir/exp/make_feat/$x $out_dir/feats/$x
+  python local/copy_aligned_text.py < $lab_dir/$x.txt > $data_dir/$x/text
+
+  # prepare ground truth data
+  mkdir $data_dir/$x_gt
+  cp $data_dir/$x/{feats.scp,cmvn.scp,utt2spk,spk2utt} $data_dir/$x_gt/
+  python local/copy_aligned_text.py < $w2v_dir/$x.$label > $data_dir/$x_gt/text
+done
+
+local/train_subset_lgbeam.sh \
+  --out_root ${out_dir} --out_name exp --train $train_name --valid $valid_name \
+  --mono_size 2000 --tri1_size 5000 --tri2b_size -1 --tri3b_size -1 \
+  --stage 1 --max_stage 3 $data_dir $data_dir/lang $data_dir/lang_test
+
+local/unsup_select_decode.sh \
+  --split $valid_name --kenlm_path $arpa_lm_bin \
+  --ref_txt $data_dir/${valid_name}_gt/text \
+  --psd_txt $data_dir/${valid_name}/text \
+  $out_dir/exp
diff --git a/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/utils b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/utils
new file mode 120000
index 0000000000000000000000000000000000000000..b240885218f9eaa37a81f7ca797be77746aeb44c
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/kaldi_self_train/st/utils
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/fairseq/examples/wav2vec/unsupervised/models/__init__.py b/fairseq/examples/wav2vec/unsupervised/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e3039b7081a9e3228c8abefb6391a75b4864439
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/models/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .wav2vec_u import Wav2vec_U
+
+
+__all__ = [
+    "Wav2vec_U",
+]
diff --git a/fairseq/examples/wav2vec/unsupervised/models/wav2vec_u.py b/fairseq/examples/wav2vec/unsupervised/models/wav2vec_u.py
new file mode 100644
index 0000000000000000000000000000000000000000..27792ebda842057e33fed3dc53dd9d8a594d0483
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/models/wav2vec_u.py
@@ -0,0 +1,637 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from enum import Enum, auto
+import math
+import numpy as np
+from typing import Tuple, List, Optional, Dict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import autograd
+
+from fairseq import checkpoint_utils, utils
+from fairseq.dataclass import FairseqDataclass
+from fairseq.models import BaseFairseqModel, register_model
+from fairseq.modules import (
+    SamePad,
+    TransposeLast,
+)
+
+
+class SegmentationType(Enum):
+    NONE = auto()
+    RANDOM = auto()
+    UNIFORM_RANDOM = auto()
+    UNIFORM_RANDOM_JOIN = auto()
+    JOIN = auto()
+
+
+@dataclass
+class SegmentationConfig(FairseqDataclass):
+    type: SegmentationType = SegmentationType.NONE
+    subsample_rate: float = 0.25
+    mean_pool: bool = True
+    mean_pool_join: bool = False
+    remove_zeros: bool = False
+
+
+@dataclass
+class Wav2vec_UConfig(FairseqDataclass):
+
+    discriminator_kernel: int = 3
+    discriminator_dilation: int = 1
+    discriminator_dim: int = 256
+    discriminator_causal: bool = True
+    discriminator_linear_emb: bool = False
+    discriminator_depth: int = 1
+    discriminator_max_pool: bool = False
+    discriminator_act_after_linear: bool = False
+    discriminator_dropout: float = 0.0
+    discriminator_spectral_norm: bool = False
+    discriminator_weight_norm: bool = False
+
+    generator_kernel: int = 4
+    generator_dilation: int = 1
+    generator_stride: int = 1
+    generator_bias: bool = False
+    generator_dropout: float = 0.0
+
+    blank_weight: float = 0
+    blank_mode: str = "add"
+    blank_is_sil: bool = False
+    no_softmax: bool = False
+
+    smoothness_weight: float = 0.0
+    smoothing: float = 0.0
+    smoothing_one_sided: bool = False
+    gradient_penalty: float = 0.0
+    probabilistic_grad_penalty_slicing: bool = False
+    code_penalty: float = 0.0
+    gumbel: bool = False
+    hard_gumbel: bool = True
+    temp: Tuple[float, float, float] = (2, 0.1, 0.99995)
+    input_dim: int = 128
+
+    segmentation: SegmentationConfig = SegmentationConfig()
+
+
+class Segmenter(nn.Module):
+    cfg: SegmentationConfig
+
+    def __init__(self, cfg: SegmentationConfig):
+        super().__init__()
+        self.cfg = cfg
+        self.subsample_rate = cfg.subsample_rate
+
+    def pre_segment(self, dense_x, dense_padding_mask):
+        return dense_x, dense_padding_mask
+
+    def logit_segment(self, logits, padding_mask):
+        return logits, padding_mask
+
+
+class RandomSegmenter(Segmenter):
+    def pre_segment(self, dense_x, dense_padding_mask):
+        target_num = math.ceil(dense_x.size(1) * self.subsample_rate)
+        ones = torch.ones(dense_x.shape[:-1], device=dense_x.device)
+        indices, _ = ones.multinomial(target_num).sort(dim=-1)
+        indices_ld = indices.unsqueeze(-1).expand(-1, -1, dense_x.size(-1))
+        dense_x = dense_x.gather(1, indices_ld)
+        dense_padding_mask = dense_padding_mask.gather(1, index=indices)
+        return dense_x, dense_padding_mask
+
+
+class UniformRandomSegmenter(Segmenter):
+    def pre_segment(self, dense_x, dense_padding_mask):
+        bsz, tsz, fsz = dense_x.shape
+
+        target_num = math.ceil(tsz * self.subsample_rate)
+
+        rem = tsz % target_num
+
+        if rem > 0:
+            dense_x = F.pad(dense_x, [0, 0, 0, target_num - rem])
+            dense_padding_mask = F.pad(
+                dense_padding_mask, [0, target_num - rem], value=True
+            )
+
+        dense_x = dense_x.view(bsz, target_num, -1, fsz)
+        dense_padding_mask = dense_padding_mask.view(bsz, target_num, -1)
+
+        if self.cfg.mean_pool:
+            dense_x = dense_x.mean(dim=-2)
+            dense_padding_mask = dense_padding_mask.all(dim=-1)
+        else:
+            ones = torch.ones((bsz, dense_x.size(2)), device=dense_x.device)
+            indices = ones.multinomial(1)
+            indices = indices.unsqueeze(-1).expand(-1, target_num, -1)
+            indices_ld = indices.unsqueeze(-1).expand(-1, -1, -1, fsz)
+            dense_x = dense_x.gather(2, indices_ld).reshape(bsz, -1, fsz)
+            dense_padding_mask = dense_padding_mask.gather(2, index=indices).reshape(
+                bsz, -1
+            )
+        return dense_x, dense_padding_mask
+
+
+class JoinSegmenter(Segmenter):
+    def logit_segment(self, logits, padding_mask):
+        preds = logits.argmax(dim=-1)
+
+        if padding_mask.any():
+            preds[padding_mask] = -1  # mark pad
+        uniques = []
+
+        bsz, tsz, csz = logits.shape
+
+        for p in preds:
+            uniques.append(
+                p.cpu().unique_consecutive(return_inverse=True, return_counts=True)
+            )
+
+        new_tsz = max(u[0].numel() for u in uniques)
+        new_logits = logits.new_zeros(bsz, new_tsz, csz)
+        new_pad = padding_mask.new_zeros(bsz, new_tsz)
+
+        for b in range(bsz):
+            u, idx, c = uniques[b]
+            keep = u != -1
+
+            if self.cfg.remove_zeros:
+                keep.logical_and_(u != 0)
+
+            if self.training and not self.cfg.mean_pool_join:
+                u[0] = 0
+                u[1:] = c.cumsum(0)[:-1]
+                m = c > 1
+                r = torch.rand(m.sum())
+                o = (c[m] * r).long()
+                u[m] += o
+                new_logits[b, : u.numel()] = logits[b, u]
+            else:
+                new_logits[b].index_add_(
+                    dim=0, index=idx.to(new_logits.device), source=logits[b]
+                )
+                new_logits[b, : c.numel()] /= c.unsqueeze(-1).to(new_logits.device)
+
+            new_sz = keep.sum()
+            if not keep.all():
+                kept_logits = new_logits[b, : c.numel()][keep]
+                new_logits[b, :new_sz] = kept_logits
+
+            if new_sz < new_tsz:
+                pad = new_tsz - new_sz
+                new_logits[b, -pad:] = 0
+                new_pad[b, -pad:] = True
+
+        return new_logits, new_pad
+
+
+class UniformRandomJoinSegmenter(UniformRandomSegmenter, JoinSegmenter):
+    pass
+
+
+SEGMENT_FACTORY = {
+    SegmentationType.NONE: Segmenter,
+    SegmentationType.RANDOM: RandomSegmenter,
+    SegmentationType.UNIFORM_RANDOM: UniformRandomSegmenter,
+    SegmentationType.UNIFORM_RANDOM_JOIN: UniformRandomJoinSegmenter,
+    SegmentationType.JOIN: JoinSegmenter,
+}
+
+
+class Discriminator(nn.Module):
+    def __init__(self, dim, cfg: Wav2vec_UConfig):
+        super().__init__()
+
+        inner_dim = cfg.discriminator_dim
+        kernel = cfg.discriminator_kernel
+        dilation = cfg.discriminator_dilation
+        self.max_pool = cfg.discriminator_max_pool
+
+        if cfg.discriminator_causal:
+            padding = kernel - 1
+        else:
+            padding = kernel // 2
+
+        def make_conv(in_d, out_d, k, p=0, has_dilation=True):
+            conv = nn.Conv1d(
+                in_d,
+                out_d,
+                kernel_size=k,
+                padding=p,
+                dilation=dilation if has_dilation else 1,
+            )
+            if cfg.discriminator_spectral_norm:
+                conv = nn.utils.spectral_norm(conv)
+            elif cfg.discriminator_weight_norm:
+                conv = nn.utils.weight_norm(conv)
+            return conv
+
+        inner_net = [
+            nn.Sequential(
+                make_conv(inner_dim, inner_dim, kernel, padding),
+                SamePad(kernel_size=kernel, causal=cfg.discriminator_causal),
+                nn.Dropout(cfg.discriminator_dropout),
+                nn.GELU(),
+            )
+            for _ in range(cfg.discriminator_depth - 1)
+        ] + [
+            make_conv(inner_dim, 1, kernel, padding, has_dilation=False),
+            SamePad(kernel_size=kernel, causal=cfg.discriminator_causal),
+        ]
+
+        if cfg.discriminator_linear_emb:
+            emb_net = [make_conv(dim, inner_dim, 1)]
+        else:
+            emb_net = [
+                make_conv(dim, inner_dim, kernel, padding),
+                SamePad(kernel_size=kernel, causal=cfg.discriminator_causal),
+            ]
+
+        if cfg.discriminator_act_after_linear:
+            emb_net.append(nn.GELU())
+
+        self.net = nn.Sequential(
+            *emb_net,
+            nn.Dropout(cfg.discriminator_dropout),
+            *inner_net,
+        )
+
+    def forward(self, x, padding_mask):
+        x = x.transpose(1, 2)  # BTC -> BCT
+        x = self.net(x)
+        x = x.transpose(1, 2)
+        x_sz = x.size(1)
+        if padding_mask is not None and padding_mask.any() and padding_mask.dim() > 1:
+            padding_mask = padding_mask[:, : x.size(1)]
+            x[padding_mask] = float("-inf") if self.max_pool else 0
+            x_sz = x_sz - padding_mask.sum(dim=-1)
+        x = x.squeeze(-1)
+        if self.max_pool:
+            x, _ = x.max(dim=-1)
+        else:
+            x = x.sum(dim=-1)
+            x = x / x_sz
+        return x
+
+
+class Generator(nn.Module):
+    def __init__(self, input_dim, output_dim, cfg: Wav2vec_UConfig):
+        super().__init__()
+
+        self.cfg = cfg
+        self.output_dim = output_dim
+        self.stride = cfg.generator_stride
+        self.dropout = nn.Dropout(cfg.generator_dropout)
+
+        padding = cfg.generator_kernel // 2
+        self.proj = nn.Sequential(
+            TransposeLast(),
+            nn.Conv1d(
+                input_dim,
+                output_dim,
+                kernel_size=cfg.generator_kernel,
+                stride=cfg.generator_stride,
+                dilation=cfg.generator_dilation,
+                padding=padding,
+                bias=cfg.generator_bias,
+            ),
+            TransposeLast(),
+        )
+
+    def forward(self, dense_x, tokens, dense_padding_mask):
+        dense_x = self.dropout(dense_x)
+
+        dense_x = self.proj(dense_x)
+        if self.stride > 1:
+            dense_padding_mask = dense_padding_mask[:, :: self.stride]
+
+        if dense_padding_mask.size(1) != dense_x.size(1):
+            new_padding = dense_padding_mask.new_zeros(dense_x.shape[:-1])
+            diff = new_padding.size(1) - dense_padding_mask.size(1)
+            assert (
+                diff > 0
+            ), f"{new_padding.shape}, {dense_padding_mask.shape}, {dense_x.shape}, {diff}"
+            if diff > 0:
+                new_padding[:, diff:] = dense_padding_mask
+            else:
+                assert diff < 0
+                new_padding = dense_padding_mask[:, :diff]
+
+            dense_padding_mask = new_padding
+
+        result = {}
+
+        token_x = None
+        if tokens is not None:
+            token_x = dense_x.new_zeros(tokens.numel(), self.output_dim)
+            token_x.scatter_(1, tokens.view(-1, 1).long(), 1)
+            token_x = token_x.view(tokens.shape + (self.output_dim,))
+
+        result["dense_x"] = dense_x
+        result["token_x"] = token_x
+        result["dense_padding_mask"] = dense_padding_mask
+
+        return result
+
+
+@register_model("wav2vec_u", dataclass=Wav2vec_UConfig)
+class Wav2vec_U(BaseFairseqModel):
+    def calc_gradient_penalty(self, real_data, fake_data):
+
+        b_size = min(real_data.size(0), fake_data.size(0))
+        t_size = min(real_data.size(1), fake_data.size(1))
+
+        if self.cfg.probabilistic_grad_penalty_slicing:
+
+            def get_slice(data, dim, target_size):
+
+                size = data.size(dim)
+                diff = size - target_size
+                if diff <= 0:
+                    return data
+
+                start = np.random.randint(0, diff + 1)
+                return data.narrow(dim=dim, start=start, length=target_size)
+
+            real_data = get_slice(real_data, 0, b_size)
+            real_data = get_slice(real_data, 1, t_size)
+            fake_data = get_slice(fake_data, 0, b_size)
+            fake_data = get_slice(fake_data, 1, t_size)
+
+        else:
+            real_data = real_data[:b_size, :t_size]
+            fake_data = fake_data[:b_size, :t_size]
+
+        alpha = torch.rand(real_data.size(0), 1, 1)
+        alpha = alpha.expand(real_data.size())
+        alpha = alpha.to(real_data.device)
+
+        interpolates = alpha * real_data + ((1 - alpha) * fake_data)
+
+        disc_interpolates = self.discriminator(interpolates, None)
+
+        gradients = autograd.grad(
+            outputs=disc_interpolates,
+            inputs=interpolates,
+            grad_outputs=torch.ones(disc_interpolates.size(), device=real_data.device),
+            create_graph=True,
+            retain_graph=True,
+            only_inputs=True,
+        )[0]
+
+        gradient_penalty = (gradients.norm(2, dim=1) - 1) ** 2
+        return gradient_penalty
+
+    def set_num_updates(self, num_updates):
+        super().set_num_updates(num_updates)
+        self.update_num = num_updates
+        self.curr_temp = max(
+            self.max_temp * self.temp_decay ** num_updates, self.min_temp
+        )
+
+    def discrim_step(self, num_updates):
+        return num_updates % 2 == 1
+
+    def get_groups_for_update(self, num_updates):
+        return "discriminator" if self.discrim_step(num_updates) else "generator"
+
+    def __init__(self, cfg: Wav2vec_UConfig, target_dict):
+        super().__init__()
+
+        self.cfg = cfg
+        self.zero_index = target_dict.index("<SIL>") if "<SIL>" in target_dict else 0
+        self.smoothness_weight = cfg.smoothness_weight
+
+        output_size = len(target_dict)
+        self.pad = target_dict.pad()
+        self.eos = target_dict.eos()
+        self.smoothing = cfg.smoothing
+        self.smoothing_one_sided = cfg.smoothing_one_sided
+        self.no_softmax = cfg.no_softmax
+        self.gumbel = cfg.gumbel
+        self.hard_gumbel = cfg.hard_gumbel
+        self.last_acc = None
+
+        self.gradient_penalty = cfg.gradient_penalty
+        self.code_penalty = cfg.code_penalty
+        self.blank_weight = cfg.blank_weight
+        self.blank_mode = cfg.blank_mode
+        self.blank_index = target_dict.index("<SIL>") if cfg.blank_is_sil else 0
+        assert self.blank_index != target_dict.unk()
+
+        self.discriminator = Discriminator(output_size, cfg)
+        for p in self.discriminator.parameters():
+            p.param_group = "discriminator"
+
+        self.pca_A = self.pca_b = None
+        d = cfg.input_dim
+
+        self.segmenter = SEGMENT_FACTORY[cfg.segmentation.type](cfg.segmentation)
+
+        self.generator = Generator(d, output_size, cfg)
+
+        for p in self.generator.parameters():
+            p.param_group = "generator"
+
+        for p in self.segmenter.parameters():
+            p.param_group = "generator"
+
+        self.max_temp, self.min_temp, self.temp_decay = cfg.temp
+        self.curr_temp = self.max_temp
+        self.update_num = 0
+
+    @classmethod
+    def build_model(cls, cfg, task):
+        return cls(cfg, task.target_dictionary)
+
+    def get_logits(
+        self,
+        net_output: Optional[Dict[str, List[Optional[torch.Tensor]]]],
+        normalize: bool = False,
+    ):
+        logits = net_output["logits"]
+
+        if self.blank_weight != 0:
+            if self.blank_mode == "add":
+                logits[..., self.blank_index] += self.blank_weight
+            elif self.blank_mode == "set":
+                logits[..., self.blank_index] = self.blank_weight
+            else:
+                raise Exception(f"invalid blank mode {self.blank_mode}")
+
+        padding = net_output["padding_mask"]
+        if padding.any():
+            logits[padding] = float("-inf")
+            logits[padding][..., self.blank_index] = float("inf")
+
+        if normalize:
+            logits = utils.log_softmax(logits.float(), dim=-1)
+
+        return logits.transpose(0, 1)
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[
+            torch.Tensor, Optional[Dict[str, List[Optional[torch.Tensor]]]]
+        ],
+        log_probs: bool,
+        sample: Optional[Dict[str, torch.Tensor]] = None,
+    ):
+        logits = self.get_logits(net_output)
+
+        probs = super().get_normalized_probs(logits, log_probs, sample)
+        # BTC -> TBC for ctc
+        probs = probs.transpose(0, 1)
+        return probs
+
+    def normalize(self, dense_x):
+
+        bsz, tsz, csz = dense_x.shape
+
+        if dense_x.numel() == 0:
+            raise Exception(dense_x.shape)
+        _, k = dense_x.max(-1)
+        hard_x = (
+            dense_x.new_zeros(bsz * tsz, csz)
+            .scatter_(-1, k.view(-1, 1), 1.0)
+            .view(-1, csz)
+        )
+        hard_probs = torch.mean(hard_x.float(), dim=0)
+        code_perplexity = torch.exp(
+            -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1)
+        )
+
+        avg_probs = torch.softmax(dense_x.reshape(-1, csz).float(), dim=-1).mean(dim=0)
+        prob_perplexity = torch.exp(
+            -torch.sum(avg_probs * torch.log(avg_probs + 1e-7), dim=-1)
+        )
+
+        if not self.no_softmax:
+            if self.training and self.gumbel:
+                dense_x = F.gumbel_softmax(
+                    dense_x.float(), tau=self.curr_temp, hard=self.hard_gumbel
+                ).type_as(dense_x)
+            else:
+                dense_x = dense_x.softmax(-1)
+
+        return dense_x, code_perplexity, prob_perplexity
+
+    def forward(
+        self,
+        features,
+        padding_mask,
+        random_label=None,
+        dense_x_only=False,
+        segment=True,
+    ):
+        if segment:
+            features, padding_mask = self.segmenter.pre_segment(features, padding_mask)
+
+        orig_size = features.size(0) * features.size(1) - padding_mask.sum()
+
+        gen_result = self.generator(features, random_label, padding_mask)
+
+        orig_dense_x, token_x = gen_result["dense_x"], gen_result["token_x"]
+        orig_dense_padding_mask = gen_result["dense_padding_mask"]
+
+        if segment:
+            dense_x, dense_padding_mask = self.segmenter.logit_segment(
+                orig_dense_x, orig_dense_padding_mask
+            )
+        else:
+            dense_x = orig_dense_x
+            dense_padding_mask = orig_dense_padding_mask
+
+        dense_logits = dense_x
+        prob_perplexity = None
+        code_perplexity = None
+
+        if not (self.no_softmax and dense_x_only):
+            dense_x, code_perplexity, prob_perplexity = self.normalize(dense_logits)
+
+        if dense_x_only or self.discriminator is None:
+            return {
+                "logits": dense_x,
+                "padding_mask": dense_padding_mask,
+            }
+
+        token_padding_mask = random_label == self.pad
+
+        dense_y = self.discriminator(dense_x, dense_padding_mask)
+        token_y = self.discriminator(token_x, token_padding_mask)
+
+        sample_size = features.size(0)
+
+        d_step = self.discrim_step(self.update_num)
+
+        fake_smooth = self.smoothing
+        real_smooth = self.smoothing
+        if self.smoothing_one_sided:
+            fake_smooth = 0
+
+        zero_loss = None
+        smoothness_loss = None
+        code_pen = None
+
+        if d_step:
+            loss_dense = F.binary_cross_entropy_with_logits(
+                dense_y,
+                dense_y.new_ones(dense_y.shape) - fake_smooth,
+                reduction="sum",
+            )
+            loss_token = F.binary_cross_entropy_with_logits(
+                token_y,
+                token_y.new_zeros(token_y.shape) + real_smooth,
+                reduction="sum",
+            )
+            if self.training and self.gradient_penalty > 0:
+                grad_pen = self.calc_gradient_penalty(token_x, dense_x)
+                grad_pen = grad_pen.sum() * self.gradient_penalty
+            else:
+                grad_pen = None
+        else:
+            grad_pen = None
+            loss_token = None
+            loss_dense = F.binary_cross_entropy_with_logits(
+                dense_y,
+                dense_y.new_zeros(dense_y.shape) + fake_smooth,
+                reduction="sum",
+            )
+            num_vars = dense_x.size(-1)
+            if prob_perplexity is not None:
+                code_pen = (num_vars - prob_perplexity) / num_vars
+                code_pen = code_pen * sample_size * self.code_penalty
+
+            if self.smoothness_weight > 0:
+                smoothness_loss = F.mse_loss(
+                    dense_logits[:, :-1], dense_logits[:, 1:], reduction="none"
+                )
+                smoothness_loss[dense_padding_mask[:, 1:]] = 0
+                smoothness_loss = (
+                    smoothness_loss.mean() * sample_size * self.smoothness_weight
+                )
+
+        result = {
+            "losses": {
+                "grad_pen": grad_pen,
+                "code_pen": code_pen,
+                "smoothness": smoothness_loss,
+            },
+            "temp": self.curr_temp,
+            "code_ppl": code_perplexity,
+            "prob_ppl": prob_perplexity,
+            "d_steps": int(d_step),
+            "sample_size": sample_size,
+        }
+
+        suff = "_d" if d_step else "_g"
+        result["losses"]["dense" + suff] = loss_dense
+        result["losses"]["token" + suff] = loss_token
+
+        return result
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/apply_pca.py b/fairseq/examples/wav2vec/unsupervised/scripts/apply_pca.py
new file mode 100644
index 0000000000000000000000000000000000000000..10ad6ce47cfdf0a87ba089b299fe9551b29fa167
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/apply_pca.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import os.path as osp
+import math
+import numpy as np
+import tqdm
+import torch
+from shutil import copyfile
+
+from npy_append_array import NpyAppendArray
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="transforms features via a given pca and stored them in target dir"
+    )
+    # fmt: off
+    parser.add_argument('source', help='directory with features')
+    parser.add_argument('--split', help='which split to read', required=True)
+    parser.add_argument('--save-dir', help='where to save the output', required=True)
+    parser.add_argument('--pca-path', type=str, help='pca location. will append _A.npy and _b.npy', required=True)
+    parser.add_argument('--batch-size', type=int, default=2048000, help='batch size')
+    parser.add_argument('--unfiltered', action='store_true', help='process the unfiltered version')
+    # fmt: on
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    source_path = osp.join(args.source, args.split)
+    data_poth = source_path + "_unfiltered" if args.unfiltered else source_path
+
+    print(f"data path: {data_poth}")
+
+    features = np.load(data_poth + ".npy", mmap_mode="r")
+    pca_A = torch.from_numpy(np.load(args.pca_path + "_A.npy")).cuda()
+    pca_b = torch.from_numpy(np.load(args.pca_path + "_b.npy")).cuda()
+
+    os.makedirs(args.save_dir, exist_ok=True)
+    save_path = osp.join(args.save_dir, args.split)
+
+    copyfile(source_path + ".tsv", save_path + ".tsv")
+    copyfile(data_poth + ".lengths", save_path + ".lengths")
+
+    if osp.exists(source_path + ".phn"):
+        copyfile(source_path + ".phn", save_path + ".phn")
+
+    if osp.exists(source_path + ".wrd"):
+        copyfile(source_path + ".wrd", save_path + ".wrd")
+
+    if osp.exists(save_path + ".npy"):
+        os.remove(save_path + ".npy")
+    npaa = NpyAppendArray(save_path + ".npy")
+
+    batches = math.ceil(features.shape[0] / args.batch_size)
+
+    with torch.no_grad():
+        for b in tqdm.trange(batches):
+            start = b * args.batch_size
+            end = start + args.batch_size
+            x = torch.from_numpy(features[start:end]).cuda()
+            x = torch.matmul(x, pca_A) + pca_b
+            npaa.append(x.cpu().numpy())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/copy_labels.py b/fairseq/examples/wav2vec/unsupervised/scripts/copy_labels.py
new file mode 100644
index 0000000000000000000000000000000000000000..989868388eefccc37c82d7602f709632035c7aa1
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/copy_labels.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+for idx, line in enumerate(sys.stdin):
+    print(f"utt{idx:010d} {line}", end="")
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/filter_lexicon.py b/fairseq/examples/wav2vec/unsupervised/scripts/filter_lexicon.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bf3e51e7a50ac3f07cc41739198cde946dc79aa
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/filter_lexicon.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import sys
+
+from fairseq.data import Dictionary
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="filters a lexicon given a unit dictionary"
+    )
+    parser.add_argument("-d", "--unit-dict", help="unit dictionary", required=True)
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    d = Dictionary.load(args.unit_dict)
+    symbols = set(d.symbols)
+
+    for line in sys.stdin:
+        items = line.rstrip().split()
+        skip = len(items) < 2
+        for x in items[1:]:
+            if x not in symbols:
+                skip = True
+                break
+        if not skip:
+            print(line, end="")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/filter_tsv.py b/fairseq/examples/wav2vec/unsupervised/scripts/filter_tsv.py
new file mode 100644
index 0000000000000000000000000000000000000000..a09d79acf31414ea3eae82db59cf9f105aefcdf1
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/filter_tsv.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import argparse
+import sys
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--tsv", required=True, type=str)
+parser.add_argument("--no-skip", action="store_true")
+parser.add_argument("--keep", action="store_true")
+params = parser.parse_args()
+
+
+def get_fname(line):
+    p = os.path.basename(line.split("\t")[0])
+    p = os.path.splitext(p)[0]
+    return p
+
+
+# filenames to exclude
+seen = set()
+with open(params.tsv) as f:
+    if not params.no_skip:
+        root = next(f).rstrip()
+    for line in f:
+        seen.add(get_fname(line))
+
+for i, line in enumerate(sys.stdin):
+    exists = get_fname(line) in seen
+    keep = (exists and params.keep) or (not exists and not params.keep)
+    if i == 0 or keep:
+        print(line, end="")
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py b/fairseq/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e31c307bd67d10941150160c7fb8c9e085ac5d9
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import sys
+
+from g2p_en import G2p
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--compact",
+        action="store_true",
+        help="if set, compacts phones",
+    )
+    args = parser.parse_args()
+
+    compact = args.compact
+
+    wrd_to_phn = {}
+    g2p = G2p()
+    for line in sys.stdin:
+        words = line.strip().split()
+        phones = []
+        for w in words:
+            if w not in wrd_to_phn:
+                wrd_to_phn[w] = g2p(w)
+                if compact:
+                    wrd_to_phn[w] = [
+                        p[:-1] if p[-1].isnumeric() else p for p in wrd_to_phn[w]
+                    ]
+            phones.extend(wrd_to_phn[w])
+        try:
+            print(" ".join(phones))
+        except:
+            print(wrd_to_phn, words, phones, file=sys.stderr)
+            raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/ltr_to_wrd.py b/fairseq/examples/wav2vec/unsupervised/scripts/ltr_to_wrd.py
new file mode 100644
index 0000000000000000000000000000000000000000..36c85d1e2f60487494a92207feb4685e78db8aa2
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/ltr_to_wrd.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+
+def main():
+    for line in sys.stdin:
+        print(line.replace(" ", "").replace("|", " ").strip())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/mean_pool.py b/fairseq/examples/wav2vec/unsupervised/scripts/mean_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..4eea048ef3455cb3c897e74c18778c78fdc9fcbf
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/mean_pool.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import os.path as osp
+import math
+import numpy as np
+import tqdm
+import torch
+import torch.nn.functional as F
+from shutil import copyfile
+
+from npy_append_array import NpyAppendArray
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="mean pools representations by compressing uniform splits of the data"
+    )
+    # fmt: off
+    parser.add_argument('source', help='directory with features')
+    parser.add_argument('--split', help='which split to read', required=True)
+    parser.add_argument('--save-dir', help='where to save the output', required=True)
+    parser.add_argument('--subsample-rate', type=float, default=0.5, help='size to subsample data to')
+
+    parser.add_argument('--remove-extra', action='store_true', help='if true, removes extra states that cant be pooled, otherwise pads with 0s')
+    # fmt: on
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    source_path = osp.join(args.source, args.split)
+
+    print(f"data path: {source_path}")
+
+    features = np.load(source_path + ".npy", mmap_mode="r")
+
+    os.makedirs(args.save_dir, exist_ok=True)
+    save_path = osp.join(args.save_dir, args.split)
+
+    copyfile(source_path + ".tsv", save_path + ".tsv")
+
+    if os.path.exists(source_path + ".phn"):
+        copyfile(source_path + ".phn", save_path + ".phn")
+    if os.path.exists(source_path + ".wrd"):
+        copyfile(source_path + ".wrd", save_path + ".wrd")
+
+    if os.path.exists(osp.join(args.source, "dict.phn.txt")):
+        copyfile(
+            osp.join(args.source, "dict.phn.txt"),
+            osp.join(args.save_dir, "dict.phn.txt"),
+        )
+
+    if osp.exists(save_path + ".npy"):
+        os.remove(save_path + ".npy")
+    npaa = NpyAppendArray(save_path + ".npy")
+
+    with open(source_path + ".lengths", "r") as lf:
+        lengths = lf.readlines()
+
+    fsz = features.shape[-1]
+    start = 0
+    with torch.no_grad():
+        with open(save_path + ".lengths", "w") as lengths_out:
+            for length in tqdm.tqdm(lengths):
+                length = int(length)
+                end = start + length
+                feats = features[start:end]
+                start += length
+                x = torch.from_numpy(feats).cuda()
+                target_num = math.ceil(length * args.subsample_rate)
+                rem = length % target_num
+
+                if rem > 0:
+                    if args.remove_extra:
+                        to_rem = target_num - rem
+                        target_num -= 1
+                        x = x[:-to_rem]
+                    else:
+                        to_add = target_num - rem
+                        x = F.pad(x, [0, 0, 0, to_add])
+                        x[-to_add:] = x[-to_add - 1]
+
+                x = x.view(target_num, -1, fsz)
+                x = x.mean(dim=-2)
+                print(target_num, file=lengths_out)
+                npaa.append(x.cpu().numpy())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/merge_clusters.py b/fairseq/examples/wav2vec/unsupervised/scripts/merge_clusters.py
new file mode 100644
index 0000000000000000000000000000000000000000..2780f9d971d847b3ad0b59e9a33780553ebce902
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/merge_clusters.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import os.path as osp
+import numpy as np
+import tqdm
+import torch
+import random
+from shutil import copyfile
+
+from npy_append_array import NpyAppendArray
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="transforms features via a given pca and stored them in target dir"
+    )
+    # fmt: off
+    parser.add_argument('source', help='directory with features')
+    parser.add_argument('--split', help='which split to read', required=True)
+    parser.add_argument('--save-dir', help='where to save the output', required=True)
+    parser.add_argument('--cluster-dir', help='where the clusters are')
+    parser.add_argument('--pooling', type=str, default='mean', choices=['mean', 'sample'], help='how to pool')
+    # fmt: on
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    source_path = osp.join(args.source, args.split)
+    cluster_path = osp.join(args.cluster_dir, args.split + ".src")
+    print(f"data path: {source_path}")
+
+    features = np.load(source_path + ".npy", mmap_mode="r")
+    sizes = []
+    offsets = []
+    offset = 0
+    with open(source_path + ".lengths", "r") as len_f:
+        for line in len_f:
+            length = int(line.rstrip())
+            sizes.append(length)
+            offsets.append(offset)
+            offset += length
+
+    clusters = []
+    with open(cluster_path, "r") as cf:
+        for line in cf:
+            line = line.rstrip()
+            items = line.split()
+            items = list(map(int, items))
+            clusters.append(items)
+
+    os.makedirs(args.save_dir, exist_ok=True)
+    save_path = osp.join(args.save_dir, args.split)
+
+    copyfile(source_path + ".tsv", save_path + ".tsv")
+
+    if os.path.exists(source_path + ".phn"):
+        copyfile(source_path + ".phn", save_path + ".phn")
+    if os.path.exists(osp.join(args.source, "dict.phn.txt")):
+        copyfile(
+            osp.join(args.source, "dict.phn.txt"),
+            osp.join(args.save_dir, "dict.phn.txt"),
+        )
+    if os.path.exists(source_path + ".wrd"):
+        copyfile(source_path + ".wrd", save_path + ".wrd")
+
+    if osp.exists(save_path + ".npy"):
+        os.remove(save_path + ".npy")
+    npaa = NpyAppendArray(save_path + ".npy")
+
+    def merge(feats, clust):
+        feats = torch.from_numpy(feats.copy())
+        clust = torch.LongTensor(clust)
+        _, counts = clust.unique_consecutive(return_counts=True)
+        curr = 0
+
+        merged = []
+        for c in counts:
+            c = c.item()
+            start = curr
+            end = curr + c
+            curr += c
+            if args.pooling == "mean":
+                new_x = feats[start:end].mean(dim=0)
+            elif args.pooling == "sample":
+                new_x = feats[start + int(random.random() * c)]
+            else:
+                raise NotImplementedError()
+            merged.append(new_x)
+
+        return torch.stack(merged, dim=0).numpy()
+
+    with open(save_path + ".lengths", "w") as l_f:
+        for size, offset, clust in tqdm.tqdm(
+            zip(sizes, offsets, clusters), total=len(sizes)
+        ):
+            end = size + offset
+            feats = features[offset:end]
+            feats = merge(feats, clust)
+            print(len(feats), file=l_f)
+            npaa.append(feats)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2bd16efb530af5af3f72ab0edb3044b4e9fcd5c
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import fasttext as ft
+import os
+import regex
+import sys
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="reads text from stdin and outputs normalized, lid-filtered version to stdout"
+    )
+    parser.add_argument(
+        "--fasttext-model",
+        help="path to fasttext model",
+        default="lid.187.bin",
+    )
+    parser.add_argument("--lang", help="language id", required=True)
+    parser.add_argument(
+        "--lid-threshold",
+        type=float,
+        help="threshold for this lang id probability",
+        default=0.4,
+    )
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+    filter_r = regex.compile(r"[^\p{L}\p{N}\p{M}\' \-]")
+
+    lg = args.lang.lower()
+    lg_label = f"__label__{lg}"
+    thresh = args.lid_threshold
+
+    if os.path.exists(args.fasttext_model):
+        model = ft.load_model(args.fasttext_model)
+    else:
+        print(
+            f"fasttext language id model {args.fasttext_model} not found. Proceeding without language filtering. "
+            f"To enable language filtering, please download the latest language id model "
+            f"from https://fasttext.cc/docs/en/language-identification.html",
+            file=sys.stderr,
+        )
+        model = None
+
+    for line in sys.stdin:
+        line = line.strip()
+        line = filter_r.sub(" ", line)
+        line = " ".join(line.split())
+
+        if model is not None:
+            lid, prob = model.predict(line, k=100)
+            try:
+                target_idx = lid.index(lg_label)
+            except ValueError:
+                continue
+            if target_idx == 0 or prob[target_idx] >= thresh:
+                print(line)
+        else:
+            print(line)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/normalize_text.py b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d0ffeb27d038a6b82aaf0f6bdf208af565663f6
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/normalize_text.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import regex
+import sys
+
+
+def main():
+    filter_r = regex.compile(r"[^\p{L}\p{N}\p{M}\' \-]")
+
+    for line in sys.stdin:
+        line = line.strip()
+        line = filter_r.sub(" ", line)
+        line = " ".join(line.split())
+        print(line)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/pca.py b/fairseq/examples/wav2vec/unsupervised/scripts/pca.py
new file mode 100644
index 0000000000000000000000000000000000000000..948cf5319fd86ba1bccff65270b2881048faf9b1
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/pca.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import os.path as osp
+import numpy as np
+
+import faiss
+
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="compute a pca matrix given an array of numpy features"
+    )
+    # fmt: off
+    parser.add_argument('data', help='numpy file containing features')
+    parser.add_argument('--output', help='where to save the pca matrix', required=True)
+    parser.add_argument('--dim', type=int, help='dim for pca reduction', required=True)
+    parser.add_argument('--eigen-power', type=float, default=0, help='eigen power, -0.5 for whitening')
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    print("Reading features")
+    x = np.load(args.data, mmap_mode="r")
+
+    print("Computing PCA")
+    pca = faiss.PCAMatrix(x.shape[-1], args.dim, args.eigen_power)
+    pca.train(x)
+    b = faiss.vector_to_array(pca.b)
+    A = faiss.vector_to_array(pca.A).reshape(pca.d_out, pca.d_in)
+
+    os.makedirs(args.output, exist_ok=True)
+
+    prefix = str(args.dim)
+    if args.eigen_power != 0:
+        prefix += f"_{args.eigen_power}"
+
+    np.save(osp.join(args.output, f"{prefix}_pca_A"), A.T)
+    np.save(osp.join(args.output, f"{prefix}_pca_b"), b)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py b/fairseq/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6512d7322def67b27aba46e9e36da171db6963b
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import numpy as np
+import sys
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="converts words to phones adding optional silences around in between words"
+    )
+    parser.add_argument(
+        "--sil-prob",
+        "-s",
+        type=float,
+        default=0,
+        help="probability of inserting silence between each word",
+    )
+    parser.add_argument(
+        "--surround",
+        action="store_true",
+        help="if set, surrounds each example with silence",
+    )
+    parser.add_argument(
+        "--lexicon",
+        help="lexicon to convert to phones",
+        required=True,
+    )
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    sil_prob = args.sil_prob
+    surround = args.surround
+    sil = "<SIL>"
+
+    wrd_to_phn = {}
+
+    with open(args.lexicon, "r") as lf:
+        for line in lf:
+            items = line.rstrip().split()
+            assert len(items) > 1, line
+            assert items[0] not in wrd_to_phn, items
+            wrd_to_phn[items[0]] = items[1:]
+
+    for line in sys.stdin:
+        words = line.strip().split()
+
+        if not all(w in wrd_to_phn for w in words):
+            continue
+
+        phones = []
+        if surround:
+            phones.append(sil)
+
+        sample_sil_probs = None
+        if sil_prob > 0 and len(words) > 1:
+            sample_sil_probs = np.random.random(len(words) - 1)
+
+        for i, w in enumerate(words):
+            phones.extend(wrd_to_phn[w])
+            if (
+                sample_sil_probs is not None
+                and i < len(sample_sil_probs)
+                and sample_sil_probs[i] < sil_prob
+            ):
+                phones.append(sil)
+
+        if surround:
+            phones.append(sil)
+        print(" ".join(phones))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio.sh
new file mode 100644
index 0000000000000000000000000000000000000000..013f7a9b055a7693a29f9c5ba1e4003a9a25850e
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_audio.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env zsh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+source_dir=$1
+tgt_dir=$2
+model=$3
+
+if [ -z "$4" ]
+  then
+    dim=512
+  else
+    dim=$4
+fi
+
+echo "using $dim dim for PCA"
+
+if [ -z "$5" ]
+  then
+    layer=14
+  else
+    layer=$5
+fi
+
+echo "extracting from layer $layer"
+
+train_split=train
+valid_split=valid
+test_split=test
+
+all_splits=($train_split)
+
+if [[ -f "$source_dir/valid.tsv" ]]; then
+    all_splits+=('valid')
+fi
+
+if [[ -f "$source_dir/test.tsv" ]]; then
+    all_splits+=('test')
+fi
+
+echo "processing splits: $all_splits"
+
+mkdir -p $tgt_dir
+
+cp $source_dir/*.tsv $tgt_dir
+cp $source_dir/*.wrd $tgt_dir
+cp $source_dir/*.ltr $tgt_dir
+cp $source_dir/*.phn $tgt_dir
+cp $source_dir/dict* $tgt_dir
+
+setopt shwordsplit
+
+for split in $all_splits; do
+  python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py $source_dir --split $split \
+  --save-dir $tgt_dir --checkpoint $model --layer $layer
+done
+
+python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py $tgt_dir/${train_split}.tsv \
+--checkpoint $model --save-dir $tgt_dir -f "CLUS128" --sample-pct 1.0
+
+for split in $all_splits; do
+  python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py $tgt_dir \
+  --checkpoint $model --path $tgt_dir/CLUS128 --split $split
+done
+
+python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/pca.py $tgt_dir/${train_split}.npy --output $tgt_dir/pca --dim $dim
+
+for split in $all_splits; do
+  python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/apply_pca.py $tgt_dir --split $split --save-dir $tgt_dir/precompute_pca$dim --pca-path $tgt_dir/pca/${dim}_pca --batch-size 1048000
+
+  python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/merge_clusters.py $tgt_dir/precompute_pca$dim --cluster-dir $tgt_dir/CLUS128 \
+  --split $split --save-dir $tgt_dir/precompute_pca${dim}_cls128_mean --pooling mean
+
+  python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/mean_pool.py $tgt_dir/precompute_pca${dim}_cls128_mean \
+  --save-dir $tgt_dir/precompute_pca${dim}_cls128_mean_pooled --split $split
+done
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_text.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_text.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1caf13cb6a2a0bd84e5322c92124b2fa37368f9a
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_text.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env zsh
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+lg=$1
+text_path=$2
+target_dir=$3
+min_phones=$4
+phonemizer=$5
+lid_path=$6
+
+if [ -z "$lid_path" ]; then
+  lid_path="lid.187.bin"
+fi
+
+ph_lg=${lg:l}
+if test "$lg" = 'fr'; then
+  ph_lg='fr-fr'
+elif test "$lg" = 'en'; then
+  ph_lg='en-us'
+elif test "$lg" = 'pt'; then
+  ph_lg='pt-br'
+fi
+
+ESPEAK_PATH=''
+if test "$phonemizer" = 'espeak'; then
+  ESPEAK_PATH=$(which espeak)
+elif test "$phonemizer" = 'espeak-ng'; then
+  ESPEAK_PATH=$(which espeak-ng)
+elif test "$phonemizer" = 'G2P'; then
+  ESPEAK_PATH=''
+else
+  echo "Unknown phonemizer $phonemizer. Valid options are espeak, espean-ng and G2P"
+  exit 1
+fi
+
+echo $lg
+echo $ph_lg
+echo $text_path
+echo $target_dir
+echo "min phone seen threshold is $min_phones"
+
+mkdir -p $target_dir
+python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/normalize_and_filter_text.py --lang $lg --fasttext-model $lid_path < $text_path | grep -v '\-\-\-' >! $target_dir/lm.upper.lid.txt
+python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $target_dir/lm.upper.lid.txt --only-source --destdir $target_dir --thresholdsrc 2 --padding-factor 1 --dict-only
+cut -f1 -d' ' $target_dir/dict.txt | grep -v -x '[[:punct:]]*' | grep -Pv '\d\d\d\d\d+' >! $target_dir/words.txt
+
+
+if [ -z "$ESPEAK_PATH" ]; then
+  python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/g2p_wrd_to_phn.py --compact < $target_dir/words.txt > $target_dir/phones.txt
+else
+  # echoing 1 into corpus will prevent the mismatch lines between lexicon and phones in case the phonemizer fails
+  one=$(echo "1" | PHONEMIZER_ESPEAK_PATH=$ESPEAK_PATH phonemize -p ' ' -w '' -l $ph_lg --language-switch remove-flags)
+  sed 's/$/ 1/' $target_dir/words.txt | PHONEMIZER_ESPEAK_PATH=$ESPEAK_PATH phonemize -o $target_dir/phones.txt -p ' ' -w '' -l $ph_lg -j 70 --language-switch remove-flags
+  echo "one is ${one}"
+  sed -i "s/${one}$//" $target_dir/phones.txt
+fi
+
+paste $target_dir/words.txt $target_dir/phones.txt >! $target_dir/lexicon.lst
+
+python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $target_dir/phones.txt --only-source --destdir $target_dir/phones --thresholdsrc $min_phones --padding-factor 1 --dict-only
+
+python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/filter_lexicon.py -d $target_dir/phones/dict.txt < $target_dir/lexicon.lst >! $target_dir/lexicon_filtered.lst
+python $FAIRSEQ_ROOT/examples/wav2vec/unsupervised/scripts/phonemize_with_sil.py -s 0.25 --surround --lexicon $target_dir/lexicon_filtered.lst < $target_dir/lm.upper.lid.txt >! $target_dir/phones/lm.phones.filtered.txt
+cp $target_dir/phones/dict.txt $target_dir/phones/dict.phn.txt
+echo "<SIL> 0" >> $target_dir/phones/dict.phn.txt
+python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $target_dir/phones/lm.phones.filtered.txt --workers 70 --only-source --destdir $target_dir/phones --srcdict $target_dir/phones/dict.phn.txt
+
+$KENLM_ROOT/lmplz -o 4 < $target_dir/lm.upper.lid.txt --discount_fallback --prune 0 0 0 3 >! $target_dir/kenlm.wrd.o40003.arpa
+$KENLM_ROOT/build_binary $target_dir/kenlm.wrd.o40003.arpa $target_dir/kenlm.wrd.o40003.bin
+
+lg=$lg python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$target_dir/fst/phn_to_words_sil lm_arpa=$target_dir/kenlm.wrd.o40003.arpa wav2letter_lexicon=$target_dir/lexicon_filtered.lst data_dir=$target_dir/phones in_labels=phn "blank_symbol='<SIL>'"
+lg=$lg python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$target_dir/fst/phn_to_words lm_arpa=$target_dir/kenlm.wrd.o40003.arpa wav2letter_lexicon=$target_dir/lexicon_filtered.lst data_dir=$target_dir/phones in_labels=phn
+
+$KENLM_ROOT/lmplz -o 4 < $target_dir/phones/lm.phones.filtered.txt --discount_fallback >! $target_dir/phones/lm.phones.filtered.04.arpa
+$KENLM_ROOT/build_binary $target_dir/phones/lm.phones.filtered.04.arpa $target_dir/phones/lm.phones.filtered.04.bin
+$KENLM_ROOT/lmplz -o 6 < $target_dir/phones/lm.phones.filtered.txt --discount_fallback >! $target_dir/phones/lm.phones.filtered.06.arpa
+$KENLM_ROOT/build_binary $target_dir/phones/lm.phones.filtered.06.arpa $target_dir/phones/lm.phones.filtered.06.bin
+
+lg=$lg python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$target_dir/fst/phn_to_phn_sil lm_arpa=$target_dir/phones/lm.phones.filtered.06.arpa data_dir=$target_dir/phones in_labels=phn "blank_symbol='<SIL>'"
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/prepare_timit.sh b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_timit.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d8f5d596b4b4ec55f11a82dbbf83bad4a22c0b6c
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/prepare_timit.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+timit_root=$1  # assume it is the upper-cased version
+tgt_dir=$2
+model=$3
+
+set -eu
+
+setups="matched unmatched"
+splits="test valid train train_text"
+
+tgt_dir=$(realpath $tgt_dir)
+sph2wav=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+wav_dir=$tgt_dir/wav
+
+
+mkdir -p $tgt_dir $wav_dir
+find $timit_root/{TRAIN,TEST} -iname "*.WAV" > $tgt_dir/all_sph.flist
+cat $tgt_dir/all_sph.flist | sed -e 's#//*#/#g' -e 's#.*/\([^/]*\)/\([^/]*\).WAV#\1_\2#g' > $tgt_dir/all.uid
+paste -d' ' $tgt_dir/{all_sph.flist,all.uid} | \
+  awk -v sph2wav=$sph2wav -v wav_dir=$wav_dir '{print sph2wav " -f wav " $1 " > " wav_dir "/" $2 ".wav"}' \
+  > $tgt_dir/sph2wav.sh
+bash $tgt_dir/sph2wav.sh
+cat $tgt_dir/all.uid | awk -v wav_dir=$(pwd)/$wav_dir '{print $1" "wav_dir"/"$1".wav"}' | sort > $tgt_dir/all_wav.scp
+cut -d' ' -f2 $tgt_dir/all_wav.scp | xargs -I{} soxi -s {} > $tgt_dir/all.dur
+paste -d' ' $tgt_dir/{all_wav.scp,all.dur} > $tgt_dir/all_wav_dur.scp
+rm $tgt_dir/{all.uid,all_sph.flist,sph2wav.sh}
+
+find $timit_root/{TRAIN,TEST} -iname "*.PHN" > $tgt_dir/all_phn60.flist
+while read line; do
+  if [ ! -f $line ]; then 
+    >&2 echo "Cannot find transcription file '$line'" && exit 1;
+  fi
+  cut -f3 -d' ' "$line" | tr '\n' ' ' | perl -ape 's: *$:\n:;'
+done < $tgt_dir/all_phn60.flist > $tgt_dir/all.phn60
+cat $tgt_dir/all_phn60.flist | sed -e 's#//*#/#g' -e 's#.*/\([^/]*\)/\([^/]*\).PHN#\1_\2#g' | \
+  paste -d' ' - $tgt_dir/all.phn60 | \
+  $KALDI_ROOT/egs/timit/s5/local/timit_norm_trans.pl -i - -m $KALDI_ROOT/egs/timit/s5/conf/phones.60-48-39.map -to 39 | \
+  sort > $tgt_dir/all.phn
+echo "done preparing wav and 39-phone transcripts"
+
+
+for s in $setups; do
+  mkdir -p $tgt_dir/$s
+  for x in $splits; do
+    uid_path=config/timit_${s}/${x}.uid
+    grep -w -f $uid_path $tgt_dir/all.phn | cut -d' ' -f2- > $tgt_dir/$s/$x.phn
+    ln -sf $(realpath $tgt_dir/$s/$x.phn) $tgt_dir/$s/$x.wrd
+    
+    echo "/" > $tgt_dir/$s/$x.tsv &&  grep -w -f $uid_path $tgt_dir/all_wav_dur.scp | cut -d' ' -f2- | sed 's# #\t#'  >> $tgt_dir/$s/$x.tsv
+  done
+  
+  for x in $splits; do
+    cat $tgt_dir/$s/$x.phn
+  done | tr ' ' '\n' | sort -u | awk '{print $1" "1}' > $tgt_dir/$s/dict.phn.txt
+  ln -sf $(realpath $tgt_dir/$s/dict.phn.txt) $tgt_dir/$s/dict.wrd.txt
+done
+echo "done preparing unmatched and matched setups for TIMIT"
+
+
+for s in $setups; do
+  zsh scripts/prepare_audio.sh $tgt_dir/$s $tgt_dir/$s/feat $model
+
+  lm_dir=$tgt_dir/$s/phones
+  fst_dir=$tgt_dir/$s/fst/phn_to_phn
+
+  python $FAIRSEQ_ROOT/fairseq_cli/preprocess.py --dataset-impl mmap --trainpref $tgt_dir/$s/train_text.phn --workers 10 --only-source --destdir $lm_dir --srcdict $tgt_dir/$s/dict.phn.txt
+  $KENLM_ROOT/lmplz -o 3 < $tgt_dir/$s/train_text.phn --discount_fallback >$lm_dir/train_text_phn.03.arpa
+  $KENLM_ROOT/build_binary $lm_dir/train_text_phn.03.arpa $lm_dir/train_text_phn.03.bin
+  $KENLM_ROOT/lmplz -o 4 < $tgt_dir/$s/train_text.phn --discount_fallback >$lm_dir/train_text_phn.04.arpa
+  $KENLM_ROOT/build_binary $lm_dir/train_text_phn.04.arpa $lm_dir/train_text_phn.04.bin
+  
+  python $FAIRSEQ_ROOT/examples/speech_recognition/kaldi/kaldi_initializer.py kaldi_root=$KALDI_ROOT fst_dir=$fst_dir lm_arpa=$lm_dir/train_text_phn.03.arpa data_dir=$tgt_dir/$s in_labels=phn
+done
+echo "done preprocessing audio and text for wav2vec-U"
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/remove_silence.py b/fairseq/examples/wav2vec/unsupervised/scripts/remove_silence.py
new file mode 100644
index 0000000000000000000000000000000000000000..fac88b989703262a84b242b2761df621bf02c739
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/remove_silence.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+get intervals from .vads file, specify output data, and this script removes silences and saves the audio data in out path folder
+paths=shards/train.tsv
+vads=shards/train.vads
+python remove_silence.py --paths $paths --vads $vads
+"""
+
+import os
+import argparse
+import torch
+import torchaudio
+import tqdm
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--tsv", default="", type=str)
+parser.add_argument("--vads", default="", type=str)
+parser.add_argument("--out", type=str)
+params = parser.parse_args()
+
+# load paths
+paths = []
+with open(params.tsv) as f:
+    root = next(f).rstrip()
+    for line in f:
+        paths.append(os.path.join(root, line.rstrip().split("\t")[0]))
+
+# load vads
+list_intervals = []
+with open(params.vads) as f:
+    for line in f:
+        interval = [
+            [int(w.split(":")[0]), int(w.split(":")[1])] for w in line.rstrip().split()
+        ]
+        list_intervals.append(interval)
+
+
+# load audio and keep only intervals (i.e. remove silences)
+for i in tqdm.trange(len(paths)):
+    data, _ = torchaudio.load(paths[i])
+    if len(list_intervals[i]) > 0:
+        data_filtered = torch.cat(
+            [data[0][int(it[0]) : int(it[1])] for it in list_intervals[i]]
+        ).unsqueeze(0)
+    else:
+        data_filtered = data
+
+    # YOU MAY NEED TO MODIFY THIS TO GET THE RIGHT SUBPATH
+    # outpath = params.out + '/'.join(paths[i].split('/')[-1])
+    outpath = params.out + "/" + "/".join(paths[i].split("/")[-2:])
+
+    if not os.path.isdir("/".join(outpath.split("/")[:-1])):
+        os.makedirs("/".join(outpath.split("/")[:-1]))
+    if not os.path.exists(outpath):
+        torchaudio.save(outpath, data_filtered, sample_rate=16000)
+    else:
+        print(outpath, "exists!")
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/vads.py b/fairseq/examples/wav2vec/unsupervised/scripts/vads.py
new file mode 100644
index 0000000000000000000000000000000000000000..2398da97d8c44b8f3f270b22d5508a003482b4d6
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/vads.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import sys
+
+from copy import deepcopy
+from scipy.signal import lfilter
+
+import numpy as np
+from tqdm import tqdm
+import soundfile as sf
+import os.path as osp
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="compute vad segments")
+    parser.add_argument(
+        "--rvad-home",
+        "-r",
+        help="path to rvad home (see https://github.com/zhenghuatan/rVADfast)",
+        required=True,
+    )
+
+    return parser
+
+
+def rvad(speechproc, path):
+    winlen, ovrlen, pre_coef, nfilter, nftt = 0.025, 0.01, 0.97, 20, 512
+    ftThres = 0.5
+    vadThres = 0.4
+    opts = 1
+
+    data, fs = sf.read(path)
+    assert fs == 16_000, "sample rate must be 16khz"
+    ft, flen, fsh10, nfr10 = speechproc.sflux(data, fs, winlen, ovrlen, nftt)
+
+    # --spectral flatness --
+    pv01 = np.zeros(ft.shape[0])
+    pv01[np.less_equal(ft, ftThres)] = 1
+    pitch = deepcopy(ft)
+
+    pvblk = speechproc.pitchblockdetect(pv01, pitch, nfr10, opts)
+
+    # --filtering--
+    ENERGYFLOOR = np.exp(-50)
+    b = np.array([0.9770, -0.9770])
+    a = np.array([1.0000, -0.9540])
+    fdata = lfilter(b, a, data, axis=0)
+
+    # --pass 1--
+    noise_samp, noise_seg, n_noise_samp = speechproc.snre_highenergy(
+        fdata, nfr10, flen, fsh10, ENERGYFLOOR, pv01, pvblk
+    )
+
+    # sets noisy segments to zero
+    for j in range(n_noise_samp):
+        fdata[range(int(noise_samp[j, 0]), int(noise_samp[j, 1]) + 1)] = 0
+
+    vad_seg = speechproc.snre_vad(
+        fdata, nfr10, flen, fsh10, ENERGYFLOOR, pv01, pvblk, vadThres
+    )
+    return vad_seg, data
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    sys.path.append(args.rvad_home)
+    import speechproc
+
+    stride = 160
+    lines = sys.stdin.readlines()
+    root = lines[0].rstrip()
+    for fpath in tqdm(lines[1:]):
+        path = osp.join(root, fpath.split()[0])
+        vads, wav = rvad(speechproc, path)
+
+        start = None
+        vad_segs = []
+        for i, v in enumerate(vads):
+            if start is None and v == 1:
+                start = i * stride
+            elif start is not None and v == 0:
+                vad_segs.append((start, i * stride))
+                start = None
+        if start is not None:
+            vad_segs.append((start, len(wav)))
+
+        print(" ".join(f"{v[0]}:{v[1]}" for v in vad_segs))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5dd7ae6c15b358206e067385be260c94021bf20
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_apply_cluster_faiss.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import os.path as osp
+import numpy as np
+import tqdm
+import torch
+import sys
+
+import faiss
+import torch.nn.functional as F
+
+from wav2vec_cluster_faiss import parse_faiss_specs, Wav2VecFeatureReader
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="apply clusters")
+    # fmt: off
+    parser.add_argument('data', help='location of tsv files')
+    parser.add_argument('--split', help='split to process', required=True)
+    parser.add_argument('--labels', help='split to process', default="phn")
+    parser.add_argument('--path', help='path to pca and centroids', required=True)
+    parser.add_argument('--checkpoint', type=str, help='checkpoint for wav2vec model (if using wav2vec features)', required=True)
+    parser.add_argument('--layer', '-l', type=int, help='which layer to read', default=14)
+    parser.add_argument('--max-tsz', type=int, help='batch kmeans up to this much', default=14)
+    # fmt: on
+
+    return parser
+
+
+def get_iterator(args):
+    label_path = osp.join(args.data, f"{args.split}.{args.labels}")
+    if osp.exists(label_path):
+        lp = open(label_path, "r")
+    else:
+        lp = None
+
+    with open(osp.join(args.data, f"{args.split}.tsv"), "r") as fp:
+        lines = fp.read().split("\n")
+        root = lines.pop(0).strip()
+        files = [line.rstrip() for line in lines if len(line) > 0]
+
+        if lp is not None:
+            lbls = [line.rstrip() for line in lp]
+        else:
+            lbls = [None] * len(files)
+
+        num = len(files)
+        reader = Wav2VecFeatureReader(args.checkpoint, args.layer)
+
+        def iterate():
+            for fname, lbl in zip(files, lbls):
+                file = osp.join(root, fname.split("\t")[0])
+                feats = reader.get_feats(file)
+                yield feats.data, fname, lbl
+
+        return iterate, num, root
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    spec = osp.basename(args.path)
+
+    try:
+        faiss_spec = parse_faiss_specs(spec.rstrip("/"))[0]
+    except:
+        print(spec)
+        raise
+
+    print("Faiss Spec:", faiss_spec, file=sys.stderr)
+
+    if faiss_spec.pca:
+        A = torch.from_numpy(np.load(osp.join(args.path, "pca_A.npy"))).cuda()
+        b = torch.from_numpy(np.load(osp.join(args.path, "pca_b.npy"))).cuda()
+        print("Loaded PCA", file=sys.stderr)
+
+    centroids = np.load(osp.join(args.path, "centroids.npy"))
+    print("Loaded centroids", centroids.shape, file=sys.stderr)
+
+    res = faiss.StandardGpuResources()
+    index_flat = (
+        faiss.IndexFlatL2(centroids.shape[1])
+        if not faiss_spec.sphere
+        else faiss.IndexFlatIP(centroids.shape[1])
+    )
+    faiss_index = faiss.index_cpu_to_gpu(res, 0, index_flat)
+    faiss_index.add(centroids)
+
+    generator, num, root = get_iterator(args)
+    iterator = generator()
+
+    had_labels = False
+    label_path = osp.join(args.path, f"{args.split}.{args.labels}")
+
+    with torch.no_grad():
+        with open(osp.join(args.path, f"{args.split}.src"), "w") as fp, open(
+            osp.join(args.path, f"{args.split}.tsv"), "w"
+        ) as pp, open(label_path, "w") as lp:
+            print(root, file=pp)
+            for f, fname, lbl in tqdm.tqdm(iterator, total=num):
+                if faiss_spec.pca:
+                    f = torch.mm(f, A) + b
+                if faiss_spec.norm:
+                    f = F.normalize(f, p=2, dim=-1)
+
+                f = f.cpu().numpy()
+
+                _, z = faiss_index.search(f, 1)
+
+                print(" ".join(str(x.item()) for x in z), file=fp)
+                print(fname, file=pp)
+
+                if lbl is not None:
+                    print(lbl, file=lp)
+                    had_labels = True
+    if not had_labels:
+        os.remove(label_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py
new file mode 100644
index 0000000000000000000000000000000000000000..632a69e9f4bd98d33abb689c15557c818d0e35ea
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_cluster_faiss.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import gc
+import os
+import os.path as osp
+import random
+import numpy as np
+import tqdm
+import torch
+
+from collections import namedtuple
+
+import faiss
+
+import fairseq
+import soundfile as sf
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="compute kmeans codebook from kaldi-computed feats"
+    )
+    # fmt: off
+    parser.add_argument('data', help='location of tsv files')
+    parser.add_argument('--save-dir', help='where to save the output', required=True)
+    parser.add_argument('--checkpoint', type=str, help='checkpoint for wav2vec model (if using wav2vec features)', required=True)
+    parser.add_argument('--sample-pct', '-r', type=float, help='percentage of timesteps to sample', default=0)
+    parser.add_argument('--layer', '-l', type=int, help='which layer to read', default=14)
+    parser.add_argument('--faiss-specs', '-f', type=str,
+                        help='faiss index specs; separated by space '
+                             'format is: PCAx_NORM_CLUSx_SPHERICAL -> '
+                                'PCAx if exists first apply PCA '
+                                'NORM if exists, normalize the vector by L2 norm '
+                                'CLUSx must exist, cluster to x clusters '
+                                'SPEHRICAL if exists, apply spherical kmeans',
+                        default='l2')
+    # fmt: on
+
+    return parser
+
+
+faiss_spec = namedtuple("faiss_spec", ["pca", "norm", "n_clus", "sphere", "spec_str"])
+
+
+def parse_faiss_specs(specs_str):
+    specs = []
+    for ss in specs_str.split():
+        comps = ss.split("_")
+        pca = 0
+        norm = False
+        n_clus = 0
+        sphere = False
+        for c in comps:
+            if c.startswith("PCA"):
+                pca = int(c[3:])
+            elif c == "NORM":
+                norm = True
+            elif c.startswith("CLUS"):
+                n_clus = int(c[4:])
+            elif c == "SPHERICAL":
+                sphere = True
+        assert n_clus > 0
+        specs.append(
+            faiss_spec(pca=pca, norm=norm, n_clus=n_clus, sphere=sphere, spec_str=ss)
+        )
+    return specs
+
+
+class Wav2VecFeatureReader(object):
+    def __init__(self, cp_file, layer):
+        state = fairseq.checkpoint_utils.load_checkpoint_to_cpu(cp_file)
+
+        self.layer = layer
+
+        if "cfg" in state:
+            w2v_args = state["cfg"]
+            task = fairseq.tasks.setup_task(w2v_args.task)
+            model = task.build_model(w2v_args.model)
+        else:
+            w2v_args = state["args"]
+            task = fairseq.tasks.setup_task(w2v_args)
+            model = task.build_model(w2v_args)
+        model.load_state_dict(state["model"], strict=True)
+        model.eval()
+        model.cuda()
+        self.model = model
+
+    def read_audio(self, fname):
+        """Load an audio file and return PCM along with the sample rate"""
+        wav, sr = sf.read(fname)
+        assert sr == 16e3
+
+        return wav
+
+    def get_feats(self, loc):
+        x = self.read_audio(loc)
+        with torch.no_grad():
+            source = torch.from_numpy(x).view(1, -1).float().cuda()
+            res = self.model(
+                source=source, mask=False, features_only=True, layer=self.layer
+            )
+            return res["layer_results"][self.layer][0].squeeze(1)
+
+
+def get_iterator(args):
+    with open(args.data, "r") as fp:
+        lines = fp.read().split("\n")
+        root = lines.pop(0).strip()
+        files = [osp.join(root, line.split("\t")[0]) for line in lines if len(line) > 0]
+
+        if getattr(args, "sample_pct", 0) > 0:
+            files = random.sample(files, int(args.sample_pct * len(files)))
+        num = len(files)
+        reader = Wav2VecFeatureReader(args.checkpoint, args.layer)
+
+        def iterate():
+            for fname in files:
+                feats = reader.get_feats(fname)
+                yield feats.cpu().numpy()
+
+    return iterate, num
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    faiss_specs = parse_faiss_specs(args.faiss_specs)
+    print("Faiss Specs:", faiss_specs)
+
+    feat_path = osp.join(args.save_dir, "features")
+    if osp.exists(feat_path + ".npy"):
+        feats = np.load(feat_path + ".npy")
+    else:
+        generator, num = get_iterator(args)
+        iterator = generator()
+
+        feats = []
+        for f in tqdm.tqdm(iterator, total=num):
+            feats.append(f)
+
+        del iterator
+        del generator
+
+        feats = np.concatenate(feats)
+
+        print(feats.shape)
+
+        os.makedirs(args.save_dir, exist_ok=True)
+        # np.save(feat_path, feats)
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    reload = False
+    for spec in faiss_specs:
+        print("Processing spec", spec)
+
+        if reload:
+            print("Reloading...")
+            del feats
+            gc.collect()
+            feats = np.load(feat_path + ".npy")
+
+        save_path = osp.join(args.save_dir, spec.spec_str)
+        os.makedirs(save_path, exist_ok=True)
+        d = feats.shape[-1]
+        x = feats
+        if spec.pca > 0:
+            print("Computing PCA")
+            pca = faiss.PCAMatrix(d, spec.pca)
+            pca.train(x)
+            d = spec.pca
+            b = faiss.vector_to_array(pca.b)
+            A = faiss.vector_to_array(pca.A).reshape(pca.d_out, pca.d_in)
+            np.save(osp.join(save_path, "pca_A"), A.T)
+            np.save(osp.join(save_path, "pca_b"), b)
+            print("Applying PCA")
+            x = pca.apply_py(x)
+
+        if spec.norm:
+            reload = spec.pca <= 0
+            print("Normalizing")
+            faiss.normalize_L2(x)
+
+        print("Computing kmeans")
+        kmeans = faiss.Kmeans(
+            d,
+            spec.n_clus,
+            niter=50,
+            verbose=True,
+            spherical=spec.sphere,
+            max_points_per_centroid=feats.shape[0],
+            gpu=True,
+            nredo=3,
+        )
+        kmeans.train(x)
+        np.save(osp.join(save_path, "centroids"), kmeans.centroids)
+        del kmeans
+        del x
+        gc.collect()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py
new file mode 100644
index 0000000000000000000000000000000000000000..b07e274d202414ce40d00aa64a27cf97bb49c1c3
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/wav2vec_extract_features.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import os.path as osp
+import tqdm
+import torch
+import torch.nn.functional as F
+from shutil import copyfile
+
+from npy_append_array import NpyAppendArray
+
+import fairseq
+import soundfile as sf
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="compute kmeans codebook from kaldi-computed feats"
+    )
+    # fmt: off
+    parser.add_argument('data', help='location of tsv files')
+    parser.add_argument('--split', help='which split to read', required=True)
+    parser.add_argument('--save-dir', help='where to save the output', required=True)
+    parser.add_argument('--checkpoint', type=str, help='checkpoint for wav2vec ctc model', required=True)
+    parser.add_argument('--layer', type=int, default=14, help='which layer to use')
+    # fmt: on
+
+    return parser
+
+
+class Wav2VecFeatureReader(object):
+    def __init__(self, cp_file, layer):
+        model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
+            [cp_file]
+        )
+        model = model[0]
+        model.eval()
+        model.cuda()
+        self.model = model
+        self.task = task
+        self.layer = layer
+
+    def read_audio(self, fname):
+        """Load an audio file and return PCM along with the sample rate"""
+        wav, sr = sf.read(fname)
+        assert sr == 16e3
+
+        return wav
+
+    def get_feats(self, loc):
+        x = self.read_audio(loc)
+        with torch.no_grad():
+            source = torch.from_numpy(x).float().cuda()
+            if self.task.cfg.normalize:
+                assert source.dim() == 1, source.dim()
+                with torch.no_grad():
+                    source = F.layer_norm(source, source.shape)
+            source = source.view(1, -1)
+
+            m_res = self.model(source=source, mask=False, features_only=True, layer=self.layer)
+            return m_res["x"].squeeze(0).cpu()
+
+
+def get_iterator(args):
+    with open(osp.join(args.data, args.split) + ".tsv", "r") as fp:
+        lines = fp.read().split("\n")
+        root = lines.pop(0).strip()
+        files = [osp.join(root, line.split("\t")[0]) for line in lines if len(line) > 0]
+
+        num = len(files)
+        reader = Wav2VecFeatureReader(args.checkpoint, args.layer)
+
+        def iterate():
+            for fname in files:
+                w2v_feats = reader.get_feats(fname)
+                yield w2v_feats
+
+    return iterate, num
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    os.makedirs(args.save_dir, exist_ok=True)
+
+    def create_files(dest):
+        copyfile(osp.join(args.data, args.split) + ".tsv", dest + ".tsv")
+        if osp.exists(osp.join(args.data, args.split) + ".wrd"):
+            copyfile(osp.join(args.data, args.split) + ".wrd", dest + ".wrd")
+        if osp.exists(osp.join(args.data, args.split) + ".phn"):
+            copyfile(osp.join(args.data, args.split) + ".phn", dest + ".phn")
+
+        if osp.exists(dest + ".npy"):
+            os.remove(dest + ".npy")
+        npaa = NpyAppendArray(dest + ".npy")
+        return npaa
+
+    save_path = osp.join(args.save_dir, args.split)
+    npaa = create_files(save_path)
+
+    generator, num = get_iterator(args)
+    iterator = generator()
+
+    with open(save_path + ".lengths", "w") as l_f:
+        for w2v_feats in tqdm.tqdm(iterator, total=num):
+            print(len(w2v_feats), file=l_f)
+
+            if len(w2v_feats) > 0:
+                npaa.append(w2v_feats.numpy())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wer.py b/fairseq/examples/wav2vec/unsupervised/scripts/wer.py
new file mode 100644
index 0000000000000000000000000000000000000000..613ab50d39019f6edf67c56c2353646be2a2f17d
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/wer.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Implement unsupervised metric for decoding hyperparameter selection:
+    $$ alpha * LM_PPL + ViterbitUER(%) * 100 $$
+"""
+import argparse
+import logging
+import sys
+
+import editdistance
+
+logging.root.setLevel(logging.INFO)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-s", "--hypo", help="hypo transcription", required=True)
+    parser.add_argument(
+        "-r", "--reference", help="reference transcription", required=True
+    )
+    return parser
+
+
+def compute_wer(ref_uid_to_tra, hyp_uid_to_tra, g2p):
+    d_cnt = 0
+    w_cnt = 0
+    w_cnt_h = 0
+    for uid in hyp_uid_to_tra:
+        ref = ref_uid_to_tra[uid].split()
+        if g2p is not None:
+            hyp = g2p(hyp_uid_to_tra[uid])
+            hyp = [p for p in hyp if p != "'" and p != " "]
+            hyp = [p[:-1] if p[-1].isnumeric() else p for p in hyp]
+        else:
+            hyp = hyp_uid_to_tra[uid].split()
+        d_cnt += editdistance.eval(ref, hyp)
+        w_cnt += len(ref)
+        w_cnt_h += len(hyp)
+    wer = float(d_cnt) / w_cnt
+    logger.debug(
+        (
+            f"wer = {wer * 100:.2f}%; num. of ref words = {w_cnt}; "
+            f"num. of hyp words = {w_cnt_h}; num. of sentences = {len(ref_uid_to_tra)}"
+        )
+    )
+    return wer
+
+
+def main():
+    args = get_parser().parse_args()
+
+    errs = 0
+    count = 0
+    with open(args.hypo, "r") as hf, open(args.reference, "r") as rf:
+        for h, r in zip(hf, rf):
+            h = h.rstrip().split()
+            r = r.rstrip().split()
+            errs += editdistance.eval(r, h)
+            count += len(r)
+
+    logger.info(f"UER: {errs / count * 100:.2f}%")
+
+
+if __name__ == "__main__":
+    main()
+
+
+def load_tra(tra_path):
+    with open(tra_path, "r") as f:
+        uid_to_tra = {}
+        for line in f:
+            uid, tra = line.split(None, 1)
+            uid_to_tra[uid] = tra
+    logger.debug(f"loaded {len(uid_to_tra)} utterances from {tra_path}")
+    return uid_to_tra
diff --git a/fairseq/examples/wav2vec/unsupervised/scripts/wrd_to_ltr.py b/fairseq/examples/wav2vec/unsupervised/scripts/wrd_to_ltr.py
new file mode 100644
index 0000000000000000000000000000000000000000..f83471409a434556cab70086ca9e2d72d4bdddd5
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/scripts/wrd_to_ltr.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+
+def main():
+    for line in sys.stdin:
+        print(" ".join(list(line.strip().replace(" ", "|"))) + " |")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/examples/wav2vec/unsupervised/tasks/__init__.py b/fairseq/examples/wav2vec/unsupervised/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d7dd625e09451be671908578f93148f371f53cd
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/tasks/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .unpaired_audio_text import UnpairedAudioText
+
+
+__all__ = [
+    "UnpairedAudioText",
+]
diff --git a/fairseq/examples/wav2vec/unsupervised/tasks/unpaired_audio_text.py b/fairseq/examples/wav2vec/unsupervised/tasks/unpaired_audio_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f292528f80d6bb51f16a4324d97342d28fce942
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/tasks/unpaired_audio_text.py
@@ -0,0 +1,447 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+from dataclasses import dataclass, field
+import logging
+import math
+import os
+from typing import Optional
+import torch
+
+from fairseq.logging import metrics
+from fairseq.tasks import FairseqTask, register_task
+from ..data import ExtractedFeaturesDataset, RandomInputDataset
+
+from fairseq.data import (
+    Dictionary,
+    data_utils,
+    StripTokenDataset,
+)
+from fairseq.dataclass import FairseqDataclass
+from fairseq.distributed.utils import get_data_parallel_world_size
+from omegaconf import MISSING
+
+from examples.speech_recognition.kaldi.kaldi_decoder import (
+    KaldiDecoder,
+    KaldiDecoderConfig,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class DecodingConfig(FairseqDataclass):
+    kenlm_path: Optional[str] = None
+    lm_weight: float = 0
+    blank_weight: float = 0
+
+
+@dataclass
+class UnpairedAudioTextConfig(FairseqDataclass):
+    data: str = field(
+        default=MISSING, metadata={"help": "path to data directory containing audio"}
+    )
+    text_data: str = field(
+        default=MISSING, metadata={"help": "path to data directory containing text"}
+    )
+    max_length: Optional[int] = None
+    labels: Optional[str] = field(
+        default=None,
+        metadata={"help": "extension of the label file to load, used for fine-tuning"},
+    )
+    unfiltered: bool = field(
+        default=False, metadata={"help": "load data with _unfiltered suffix"}
+    )
+    ctc_eval: bool = field(
+        default=False, metadata={"help": "eval UER as if computed by CTC"}
+    )
+    sort_by_length: bool = field(
+        default=True, metadata={"help": "sort examples by length of audio timesteps"}
+    )
+    shuffle: bool = field(default=True, metadata={"help": "shuffle examples"})
+    append_eos: bool = field(default=False, metadata={"help": "append eos"})
+    uppercase: Optional[bool] = field(
+        default=False, metadata={"help": "uppercase for LM score computation"}
+    )
+    skipwords: Optional[str] = field(
+        default="",
+        metadata={
+            "help": "comma-separated words to be removed for LM score computation"
+        },
+    )
+    kenlm_path: Optional[str] = None
+    vocab_usage_power: float = 2
+
+    word_decoder_config: Optional[KaldiDecoderConfig] = None
+    word_kenlm_path: Optional[str] = None
+
+    decoding_config: DecodingConfig = DecodingConfig()
+
+
+@register_task("unpaired_audio_text", dataclass=UnpairedAudioTextConfig)
+class UnpairedAudioText(FairseqTask):
+    """ """
+
+    cfg: UnpairedAudioTextConfig
+
+    def __init__(
+        self,
+        cfg: UnpairedAudioTextConfig,
+        source_dictionary=None,
+        target_dictionary=None,
+    ):
+        super().__init__(cfg)
+
+        self._target_dictionary = target_dictionary
+        self._source_dictionary = source_dictionary
+        self.num_symbols = (
+            len([s for s in target_dictionary.symbols if not s.startswith("madeup")])
+            - target_dictionary.nspecial
+        )
+        self.sil_id = (
+            target_dictionary.index("<SIL>") if "<SIL>" in target_dictionary else -1
+        )
+        self.kenlm = None
+        if cfg.kenlm_path is not None:
+            import kenlm
+
+            self.kenlm = kenlm.Model(cfg.kenlm_path)
+
+        self.word_kenlm = None
+        if cfg.word_kenlm_path is not None:
+            import kenlm
+
+            self.word_kenlm = kenlm.Model(cfg.word_kenlm_path)
+
+        self.uppercase = cfg.uppercase
+        self.skipwords = set(cfg.skipwords.split(","))
+
+        def str_postprocess(s):
+            s = " ".join(w for w in s.split() if w not in self.skipwords)
+            s = s.upper() if self.uppercase else s
+            return s
+
+        self.str_postprocess = str_postprocess
+        self.compute_lm_score = lambda s: self.kenlm.score(self.str_postprocess(s))
+
+        self.compute_word_score = None
+        if cfg.word_decoder_config is not None:
+            self.kaldi_decoder = KaldiDecoder(cfg.word_decoder_config, beam=10)
+
+            def compute_word_score(logits, padding):
+                res = self.kaldi_decoder.decode(logits, padding)
+                for r in res:
+                    r = r.result()
+                    assert len(r) == 1
+                    r = r[0]
+                    yield r["score"], r["words"]
+
+            self.compute_word_score = compute_word_score
+
+    @classmethod
+    def setup_task(cls, cfg: UnpairedAudioTextConfig, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            cfg (AudioPretrainingConfig): configuration of this task
+        """
+
+        dict_path = os.path.join(cfg.text_data, "dict.txt")
+        if os.path.exists(dict_path):
+            target_dictionary = Dictionary.load(dict_path)
+        else:
+            dict_path = os.path.join(cfg.data, f"dict.{cfg.labels}.txt")
+            target_dictionary = Dictionary.load(dict_path)
+
+        return cls(cfg, target_dictionary=target_dictionary)
+
+    def optimizer_step(self, optimizer, model, update_num):
+        if hasattr(model, "get_groups_for_update"):
+            groups = model.get_groups_for_update(update_num)
+            optimizer.step(groups={groups})
+        else:
+            optimizer.step()
+
+    def valid_step(self, sample, model, criterion):
+        res = model(
+            **sample["net_input"],
+            dense_x_only=True,
+        )
+
+        dense_x = res["logits"]
+        padding_mask = res["padding_mask"]
+
+        word_scores = None
+        if self.compute_word_score is not None:
+            word_scores = self.compute_word_score(dense_x.cpu(), padding_mask.cpu())
+
+        z = dense_x.argmax(-1)
+        z[padding_mask] = self.target_dictionary.pad()
+
+        vocab_seen = torch.zeros(self.num_symbols, dtype=torch.bool)
+
+        import editdistance
+
+        c_err = 0
+        c_len = 0
+        pred_c_len = 0
+        lm_score_sum = 0
+        for i, (x, t, id) in enumerate(
+            zip(
+                z,
+                sample["target"] if "target" in sample else [None] * len(z),
+                sample["id"],
+            )
+        ):
+
+            if t is not None:
+                t = t[(t >= self.target_dictionary.nspecial)]
+            x = x[
+                (x >= self.target_dictionary.nspecial)
+                & (x < (self.num_symbols + self.target_dictionary.nspecial))
+            ]
+            if self.sil_id >= 0:
+                x = x[x != self.sil_id]
+
+            vocab_seen[x - self.target_dictionary.nspecial] = True
+
+            pred_units_arr = x
+            if self.cfg.ctc_eval:
+                pred_units_arr = pred_units_arr.unique_consecutive()
+                pred_units_arr = pred_units_arr[pred_units_arr != 0]
+
+            if id == 0:
+                if t is not None:
+                    logger.info(f"REF: {self.target_dictionary.string(t)}")
+                logger.info(f"HYP: {self.target_dictionary.string(pred_units_arr)}")
+
+                if self.kenlm is not None:
+                    if t is not None:
+                        ref_lm_s = self.compute_lm_score(
+                            self.target_dictionary.string(t)
+                        )
+                        logger.info(
+                            f"LM [REF]: {ref_lm_s}, {math.pow(10, -ref_lm_s / (len(t) + 1))}"
+                        )
+
+                    hyp_lm_s = self.compute_lm_score(
+                        self.target_dictionary.string(pred_units_arr)
+                    )
+                    logger.info(
+                        f"LM [HYP]: {hyp_lm_s}, {math.pow(10, -hyp_lm_s / (len(pred_units_arr) + 1))}"
+                    )
+
+            pred_units_arr = pred_units_arr.tolist()
+
+            pred_c_len += len(pred_units_arr)
+
+            if t is not None:
+                t = t.tolist()
+                c_err += editdistance.eval(pred_units_arr, t)
+                c_len += len(t)
+            else:
+                c_len = pred_c_len
+
+            if self.kenlm is not None:
+                pred_str = self.target_dictionary.string(pred_units_arr)
+                lm_score = self.compute_lm_score(pred_str)
+                lm_score_sum += lm_score
+
+        kaldi_score_sum = 0
+        word_lm_sum = 0
+        num_words = 0
+        if word_scores is not None:
+            for score, words in word_scores:
+                kaldi_score_sum += score
+                num_words += len(words)
+                if self.word_kenlm is not None:
+                    word_lm_sum += self.kenlm.score(" ".join(words))
+
+        try:
+            world_size = get_data_parallel_world_size()
+        except:
+            world_size = 1
+
+        logging_output = {
+            "loss": c_err,
+            "_num_char_errors": c_err,
+            "_num_chars": c_len,
+            "_num_pred_chars": pred_c_len,
+            "ntokens": c_len,
+            "nsentences": z.size(0),
+            "sample_size": c_len,
+            "_world_size": world_size,
+            "_lm_score_sum": lm_score_sum,
+            "_kaldi_score_sum": kaldi_score_sum,
+            "_word_lm_sum": word_lm_sum,
+            "_num_words": num_words,
+            "_vocab_seen": vocab_seen,
+        }
+
+        return c_err, c_len, logging_output
+
+    def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs):
+        data_path = self.cfg.data
+        task_cfg = task_cfg or self.cfg
+
+        has_unpaired_text = os.path.exists(
+            os.path.join(self.cfg.text_data, f"{split}.idx")
+        )
+
+        self.datasets[split] = ExtractedFeaturesDataset(
+            path=data_path,
+            split=split,
+            min_length=3,
+            max_length=task_cfg.max_length,
+            labels=None if has_unpaired_text else task_cfg.labels,
+            label_dict=self.target_dictionary,
+            shuffle=getattr(task_cfg, "shuffle", True),
+            sort_by_length=task_cfg.sort_by_length,
+        )
+
+        logger.info(f"split {split} has unpaired text? {has_unpaired_text}")
+        if has_unpaired_text:
+            text_dataset = data_utils.load_indexed_dataset(
+                os.path.join(self.cfg.text_data, split), self.target_dictionary
+            )
+            text_dataset = StripTokenDataset(text_dataset, self.target_dictionary.eos())
+            self.datasets[split] = RandomInputDataset(
+                self.datasets[split],
+                text_dataset,
+                ["random_label"],
+                add_to_input=True,
+                pad_idx=self.target_dictionary.pad(),
+            )
+
+    @property
+    def source_dictionary(self):
+        return self._source_dictionary
+
+    @property
+    def target_dictionary(self):
+        """Return the :class:`~fairseq.data.Dictionary` for the language
+        model."""
+        return self._target_dictionary
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return None
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+
+        zero = torch.scalar_tensor(0.0)
+        num_char_errors = sum(
+            log.get("_num_char_errors", zero) for log in logging_outputs
+        )
+        num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs)
+        num_word_errors = sum(
+            log.get("_num_word_errors", zero) for log in logging_outputs
+        )
+        num_words = sum(log.get("_num_words", zero) for log in logging_outputs)
+        num_pred_chars = sum(
+            log.get("_num_pred_chars", zero) for log in logging_outputs
+        )
+
+        lm_score_sum = sum(log.get("_lm_score_sum", zero) for log in logging_outputs)
+        vocab_seen = (
+            sum(log.get("_vocab_seen", zero) for log in logging_outputs)
+            .bool()
+            .sum()
+            .item()
+        )
+        kaldi_score_sum = sum(
+            log.get("_kaldi_score_sum", zero) for log in logging_outputs
+        )
+        word_lm_sum = sum(log.get("_word_lm_sum", zero) for log in logging_outputs)
+
+        metrics.log_scalar_sum("_num_char_errors", num_char_errors)
+        metrics.log_scalar_sum("_num_chars", num_chars)
+        metrics.log_scalar_sum("_num_word_errors", num_word_errors)
+        metrics.log_scalar_sum("_num_words", num_words)
+
+        metrics.log_scalar_sum("lm_score_sum", lm_score_sum)
+        metrics.log_scalar_sum("num_pred_chars", num_pred_chars)
+
+        if self.cfg.word_kenlm_path is not None:
+            metrics.log_scalar_sum("kaldi_score_sum", kaldi_score_sum)
+            metrics.log_scalar_sum("word_lm_sum", word_lm_sum)
+
+        if num_chars > 0:
+            metrics.log_derived(
+                "uer",
+                lambda meters: meters["_num_char_errors"].sum
+                * 100.0
+                / meters["_num_chars"].sum
+                if meters["_num_chars"].sum > 0
+                else float("nan"),
+            )
+
+            if lm_score_sum < 0 and vocab_seen > 0:
+                metrics.log_scalar("vocab_seen_pct", vocab_seen / self.num_symbols)
+
+                metrics.log_derived(
+                    "weighted_lm_ppl",
+                    lambda meters: math.pow(
+                        10,
+                        -meters["lm_score_sum"].sum
+                        / (
+                            meters["num_pred_chars"].sum + meters["nsentences"].sum
+                        ),  # account for </s>
+                    )
+                    / meters["vocab_seen_pct"].avg ** self.cfg.vocab_usage_power,
+                )
+
+                metrics.log_derived(
+                    "lm_ppl",
+                    lambda meters: math.pow(
+                        10,
+                        -meters["lm_score_sum"].sum
+                        / (
+                            meters["num_pred_chars"].sum + meters["nsentences"].sum
+                        ),  # account for </s>
+                    ),
+                )
+            else:
+                metrics.log_derived("weighted_lm_ppl", lambda meters: float("inf"))
+
+        if num_words > 0:
+            if word_lm_sum != 0:
+                metrics.log_derived(
+                    "word_lm_ppl",
+                    lambda meters: math.pow(
+                        10,
+                        -meters["word_lm_sum"].sum
+                        / (
+                            meters["_num_words"].sum + meters["nsentences"].sum
+                        ),  # account for </s>
+                    ),
+                )
+                metrics.log_derived(
+                    "weighted_word_lm_ppl",
+                    lambda meters: math.pow(
+                        10,
+                        -meters["word_lm_sum"].sum
+                        / (
+                            meters["_num_words"].sum + meters["nsentences"].sum
+                        ),  # account for </s>
+                    )
+                    / meters["vocab_seen_pct"].avg ** self.cfg.vocab_usage_power,
+                )
+
+            if self.cfg.word_kenlm_path is not None:
+                metrics.log_derived(
+                    "kaldi_score",
+                    lambda meters: meters["kaldi_score_sum"].sum
+                    / meters["nsentences"].sum,
+                )
+
+    def build_model(self, cfg: FairseqDataclass):
+        model = super().build_model(cfg)
+
+        return model
diff --git a/fairseq/examples/wav2vec/unsupervised/w2vu_generate.py b/fairseq/examples/wav2vec/unsupervised/w2vu_generate.py
new file mode 100644
index 0000000000000000000000000000000000000000..6177239dc75f6937d036462a5a2379aaee202e7d
--- /dev/null
+++ b/fairseq/examples/wav2vec/unsupervised/w2vu_generate.py
@@ -0,0 +1,707 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Run inference for pre-processed data with a trained model.
+"""
+
+import ast
+from collections import namedtuple
+from dataclasses import dataclass, field
+from enum import Enum, auto
+import hydra
+from hydra.core.config_store import ConfigStore
+import logging
+import math
+import os
+from omegaconf import OmegaConf
+from typing import Optional
+import sys
+
+import editdistance
+import torch
+
+from hydra.core.hydra_config import HydraConfig
+
+from fairseq import checkpoint_utils, progress_bar, tasks, utils
+from fairseq.data.data_utils import post_process
+from fairseq.dataclass.configs import FairseqDataclass, FairseqConfig
+from fairseq.logging.meters import StopwatchMeter
+from omegaconf import open_dict
+
+from examples.speech_recognition.kaldi.kaldi_decoder import KaldiDecoderConfig
+
+logging.root.setLevel(logging.INFO)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class DecoderType(Enum):
+    VITERBI = auto()
+    KENLM = auto()
+    FAIRSEQ = auto()
+    KALDI = auto()
+
+
+@dataclass
+class UnsupGenerateConfig(FairseqDataclass):
+    fairseq: FairseqConfig = FairseqConfig()
+    lm_weight: float = field(
+        default=2.0,
+        metadata={"help": "language model weight"},
+    )
+    w2l_decoder: DecoderType = field(
+        default=DecoderType.VITERBI,
+        metadata={"help": "type of decoder to use"},
+    )
+    kaldi_decoder_config: Optional[KaldiDecoderConfig] = None
+    lexicon: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "path to lexicon. This is also used to 'phonemize' for unsupvised param tuning"
+        },
+    )
+    lm_model: Optional[str] = field(
+        default=None,
+        metadata={"help": "path to language model (kenlm or fairseq)"},
+    )
+    unit_lm: bool = field(
+        default=False,
+        metadata={"help": "whether to use unit lm"},
+    )
+    beam_threshold: float = field(
+        default=50.0,
+        metadata={"help": "beam score threshold"},
+    )
+    beam_size_token: float = field(
+        default=100.0,
+        metadata={"help": "max tokens per beam"},
+    )
+    beam: int = field(
+        default=5,
+        metadata={"help": "decoder beam size"},
+    )
+    nbest: int = field(
+        default=1,
+        metadata={"help": "number of results to return"},
+    )
+    word_score: float = field(
+        default=1.0,
+        metadata={"help": "word score to add at end of word"},
+    )
+    unk_weight: float = field(
+        default=-math.inf,
+        metadata={"help": "unknown token weight"},
+    )
+    sil_weight: float = field(
+        default=0.0,
+        metadata={"help": "silence token weight"},
+    )
+    targets: Optional[str] = field(
+        default=None,
+        metadata={"help": "extension of ground truth labels to compute UER"},
+    )
+    results_path: Optional[str] = field(
+        default=None,
+        metadata={"help": "where to store results"},
+    )
+    post_process: Optional[str] = field(
+        default=None,
+        metadata={"help": "how to post process results"},
+    )
+    vocab_usage_power: float = field(
+        default=2,
+        metadata={"help": "for unsupervised param tuning"},
+    )
+
+    viterbi_transcript: Optional[str] = field(
+        default=None,
+        metadata={"help": "for unsupervised param tuning"},
+    )
+    min_lm_ppl: float = field(
+        default=0,
+        metadata={"help": "for unsupervised param tuning"},
+    )
+    min_vt_uer: float = field(
+        default=0,
+        metadata={"help": "for unsupervised param tuning"},
+    )
+
+    blank_weight: float = field(
+        default=0,
+        metadata={"help": "value to add or set for blank emission"},
+    )
+    blank_mode: str = field(
+        default="set",
+        metadata={
+            "help": "can be add or set, how to modify blank emission with blank weight"
+        },
+    )
+    sil_is_blank: bool = field(
+        default=False,
+        metadata={"help": "if true, <SIL> token is same as blank token"},
+    )
+
+    unsupervised_tuning: bool = field(
+        default=False,
+        metadata={
+            "help": "if true, returns a score based on unsupervised param selection metric instead of UER"
+        },
+    )
+    is_ax: bool = field(
+        default=False,
+        metadata={
+            "help": "if true, assumes we are using ax for tuning and returns a tuple for ax to consume"
+        },
+    )
+
+
+def get_dataset_itr(cfg, task):
+    return task.get_batch_iterator(
+        dataset=task.dataset(cfg.fairseq.dataset.gen_subset),
+        max_tokens=cfg.fairseq.dataset.max_tokens,
+        max_sentences=cfg.fairseq.dataset.batch_size,
+        max_positions=(sys.maxsize, sys.maxsize),
+        ignore_invalid_inputs=cfg.fairseq.dataset.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=cfg.fairseq.dataset.required_batch_size_multiple,
+        num_shards=cfg.fairseq.dataset.num_shards,
+        shard_id=cfg.fairseq.dataset.shard_id,
+        num_workers=cfg.fairseq.dataset.num_workers,
+        data_buffer_size=cfg.fairseq.dataset.data_buffer_size,
+    ).next_epoch_itr(shuffle=False)
+
+
+def process_predictions(
+    cfg: UnsupGenerateConfig,
+    hypos,
+    tgt_dict,
+    target_tokens,
+    res_files,
+):
+    retval = []
+    word_preds = []
+    transcriptions = []
+    dec_scores = []
+
+    for i, hypo in enumerate(hypos[: min(len(hypos), cfg.nbest)]):
+        if torch.is_tensor(hypo["tokens"]):
+            tokens = hypo["tokens"].int().cpu()
+            tokens = tokens[tokens >= tgt_dict.nspecial]
+            hyp_pieces = tgt_dict.string(tokens)
+        else:
+            hyp_pieces = " ".join(hypo["tokens"])
+
+        if "words" in hypo and len(hypo["words"]) > 0:
+            hyp_words = " ".join(hypo["words"])
+        else:
+            hyp_words = post_process(hyp_pieces, cfg.post_process)
+
+        to_write = {}
+        if res_files is not None:
+            to_write[res_files["hypo.units"]] = hyp_pieces
+            to_write[res_files["hypo.words"]] = hyp_words
+
+        tgt_words = ""
+        if target_tokens is not None:
+            if isinstance(target_tokens, str):
+                tgt_pieces = tgt_words = target_tokens
+            else:
+                tgt_pieces = tgt_dict.string(target_tokens)
+                tgt_words = post_process(tgt_pieces, cfg.post_process)
+
+            if res_files is not None:
+                to_write[res_files["ref.units"]] = tgt_pieces
+                to_write[res_files["ref.words"]] = tgt_words
+
+        if not cfg.fairseq.common_eval.quiet:
+            logger.info(f"HYPO {i}:" + hyp_words)
+            if tgt_words:
+                logger.info("TARGET:" + tgt_words)
+
+            if "am_score" in hypo and "lm_score" in hypo:
+                logger.info(
+                    f"DECODER AM SCORE: {hypo['am_score']}, DECODER LM SCORE: {hypo['lm_score']}, DECODER SCORE: {hypo['score']}"
+                )
+            elif "score" in hypo:
+                logger.info(f"DECODER SCORE: {hypo['score']}")
+
+            logger.info("___________________")
+
+        hyp_words_arr = hyp_words.split()
+        tgt_words_arr = tgt_words.split()
+
+        retval.append(
+            (
+                editdistance.eval(hyp_words_arr, tgt_words_arr),
+                len(hyp_words_arr),
+                len(tgt_words_arr),
+                hyp_pieces,
+                hyp_words,
+            )
+        )
+        word_preds.append(hyp_words_arr)
+        transcriptions.append(to_write)
+        dec_scores.append(-hypo.get("score", 0))  # negate cuz kaldi returns NLL
+
+    if len(retval) > 1:
+        best = None
+        for r, t in zip(retval, transcriptions):
+            if best is None or r[0] < best[0][0]:
+                best = r, t
+        for dest, tran in best[1].items():
+            print(tran, file=dest)
+            dest.flush()
+        return best[0]
+
+    assert len(transcriptions) == 1
+    for dest, tran in transcriptions[0].items():
+        print(tran, file=dest)
+
+    return retval[0]
+
+
+def prepare_result_files(cfg: UnsupGenerateConfig):
+    def get_res_file(file_prefix):
+        if cfg.fairseq.dataset.num_shards > 1:
+            file_prefix = f"{cfg.fairseq.dataset.shard_id}_{file_prefix}"
+        path = os.path.join(
+            cfg.results_path,
+            "{}{}.txt".format(
+                cfg.fairseq.dataset.gen_subset,
+                file_prefix,
+            ),
+        )
+        return open(path, "w", buffering=1)
+
+    if not cfg.results_path:
+        return None
+
+    return {
+        "hypo.words": get_res_file(""),
+        "hypo.units": get_res_file("_units"),
+        "ref.words": get_res_file("_ref"),
+        "ref.units": get_res_file("_ref_units"),
+        "hypo.nbest.words": get_res_file("_nbest_words"),
+    }
+
+
+def optimize_models(cfg: UnsupGenerateConfig, use_cuda, models):
+    """Optimize ensemble for generation"""
+    for model in models:
+        model.eval()
+        if cfg.fairseq.common.fp16:
+            model.half()
+        if use_cuda:
+            model.cuda()
+
+
+GenResult = namedtuple(
+    "GenResult",
+    [
+        "count",
+        "errs_t",
+        "gen_timer",
+        "lengths_hyp_unit_t",
+        "lengths_hyp_t",
+        "lengths_t",
+        "lm_score_t",
+        "num_feats",
+        "num_sentences",
+        "num_symbols",
+        "vt_err_t",
+        "vt_length_t",
+    ],
+)
+
+
+def generate(cfg: UnsupGenerateConfig, models, saved_cfg, use_cuda):
+    task = tasks.setup_task(cfg.fairseq.task)
+    saved_cfg.task.labels = cfg.fairseq.task.labels
+    task.load_dataset(cfg.fairseq.dataset.gen_subset, task_cfg=saved_cfg.task)
+    # Set dictionary
+    tgt_dict = task.target_dictionary
+    logger.info(
+        "| {} {} {} examples".format(
+            cfg.fairseq.task.data,
+            cfg.fairseq.dataset.gen_subset,
+            len(task.dataset(cfg.fairseq.dataset.gen_subset)),
+        )
+    )
+    # Load dataset (possibly sharded)
+    itr = get_dataset_itr(cfg, task)
+    # Initialize generator
+    gen_timer = StopwatchMeter()
+
+    def build_generator(cfg: UnsupGenerateConfig):
+        w2l_decoder = cfg.w2l_decoder
+        if w2l_decoder == DecoderType.VITERBI:
+            from examples.speech_recognition.w2l_decoder import W2lViterbiDecoder
+
+            return W2lViterbiDecoder(cfg, task.target_dictionary)
+        elif w2l_decoder == DecoderType.KENLM:
+            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder
+
+            return W2lKenLMDecoder(cfg, task.target_dictionary)
+        elif w2l_decoder == DecoderType.FAIRSEQ:
+            from examples.speech_recognition.w2l_decoder import W2lFairseqLMDecoder
+
+            return W2lFairseqLMDecoder(cfg, task.target_dictionary)
+        elif w2l_decoder == DecoderType.KALDI:
+            from examples.speech_recognition.kaldi.kaldi_decoder import KaldiDecoder
+
+            assert cfg.kaldi_decoder_config is not None
+
+            return KaldiDecoder(
+                cfg.kaldi_decoder_config,
+                cfg.beam,
+            )
+        else:
+            raise NotImplementedError(
+                "only wav2letter decoders with (viterbi, kenlm, fairseqlm) options are supported at the moment but found "
+                + str(w2l_decoder)
+            )
+
+    generator = build_generator(cfg)
+
+    kenlm = None
+    fairseq_lm = None
+    if cfg.lm_model is not None:
+        import kenlm
+
+        kenlm = kenlm.Model(cfg.lm_model)
+
+    num_sentences = 0
+    if cfg.results_path is not None and not os.path.exists(cfg.results_path):
+        os.makedirs(cfg.results_path)
+
+    res_files = prepare_result_files(cfg)
+    errs_t = 0
+    lengths_hyp_t = 0
+    lengths_hyp_unit_t = 0
+    lengths_t = 0
+    count = 0
+    num_feats = 0
+    all_hyp_pieces = []
+    all_hyp_words = []
+
+    num_symbols = (
+        len([s for s in tgt_dict.symbols if not s.startswith("madeup")])
+        - tgt_dict.nspecial
+    )
+    targets = None
+    if cfg.targets is not None:
+        tgt_path = os.path.join(
+            cfg.fairseq.task.data, cfg.fairseq.dataset.gen_subset + "." + cfg.targets
+        )
+        if os.path.exists(tgt_path):
+            with open(tgt_path, "r") as f:
+                targets = f.read().splitlines()
+    viterbi_transcript = None
+    if cfg.viterbi_transcript is not None and len(cfg.viterbi_transcript) > 0:
+        logger.info(f"loading viterbi transcript from {cfg.viterbi_transcript}")
+        with open(cfg.viterbi_transcript, "r") as vf:
+            viterbi_transcript = vf.readlines()
+            viterbi_transcript = [v.rstrip().split() for v in viterbi_transcript]
+
+    gen_timer.start()
+
+    start = 0
+    end = len(itr)
+
+    hypo_futures = None
+    if cfg.w2l_decoder == DecoderType.KALDI:
+        logger.info("Extracting features")
+        hypo_futures = []
+        samples = []
+        with progress_bar.build_progress_bar(cfg.fairseq.common, itr) as t:
+            for i, sample in enumerate(t):
+                if "net_input" not in sample or i < start or i >= end:
+                    continue
+                if "padding_mask" not in sample["net_input"]:
+                    sample["net_input"]["padding_mask"] = None
+
+                hypos, num_feats = gen_hypos(
+                    generator, models, num_feats, sample, task, use_cuda
+                )
+                hypo_futures.append(hypos)
+                samples.append(sample)
+        itr = list(zip(hypo_futures, samples))
+        start = 0
+        end = len(itr)
+        logger.info("Finished extracting features")
+
+    with progress_bar.build_progress_bar(cfg.fairseq.common, itr) as t:
+        for i, sample in enumerate(t):
+            if i < start or i >= end:
+                continue
+
+            if hypo_futures is not None:
+                hypos, sample = sample
+                hypos = [h.result() for h in hypos]
+            else:
+                if "net_input" not in sample:
+                    continue
+
+                hypos, num_feats = gen_hypos(
+                    generator, models, num_feats, sample, task, use_cuda
+                )
+
+            for i, sample_id in enumerate(sample["id"].tolist()):
+                if targets is not None:
+                    target_tokens = targets[sample_id]
+                elif "target" in sample or "target_label" in sample:
+                    toks = (
+                        sample["target"][i, :]
+                        if "target_label" not in sample
+                        else sample["target_label"][i, :]
+                    )
+
+                    target_tokens = utils.strip_pad(toks, tgt_dict.pad()).int().cpu()
+                else:
+                    target_tokens = None
+
+                # Process top predictions
+                (
+                    errs,
+                    length_hyp,
+                    length,
+                    hyp_pieces,
+                    hyp_words,
+                ) = process_predictions(
+                    cfg,
+                    hypos[i],
+                    tgt_dict,
+                    target_tokens,
+                    res_files,
+                )
+                errs_t += errs
+                lengths_hyp_t += length_hyp
+                lengths_hyp_unit_t += (
+                    len(hyp_pieces) if len(hyp_pieces) > 0 else len(hyp_words)
+                )
+                lengths_t += length
+                count += 1
+                all_hyp_pieces.append(hyp_pieces)
+                all_hyp_words.append(hyp_words)
+
+            num_sentences += (
+                sample["nsentences"] if "nsentences" in sample else sample["id"].numel()
+            )
+
+    lm_score_sum = 0
+    if kenlm is not None:
+
+        if cfg.unit_lm:
+            lm_score_sum = sum(kenlm.score(w) for w in all_hyp_pieces)
+        else:
+            lm_score_sum = sum(kenlm.score(w) for w in all_hyp_words)
+    elif fairseq_lm is not None:
+        lm_score_sum = sum(fairseq_lm.score([h.split() for h in all_hyp_words])[0])
+
+    vt_err_t = 0
+    vt_length_t = 0
+    if viterbi_transcript is not None:
+        unit_hyps = []
+        if cfg.targets is not None and cfg.lexicon is not None:
+            lex = {}
+            with open(cfg.lexicon, "r") as lf:
+                for line in lf:
+                    items = line.rstrip().split()
+                    lex[items[0]] = items[1:]
+            for h in all_hyp_pieces:
+                hyp_ws = []
+                for w in h.split():
+                    assert w in lex, w
+                    hyp_ws.extend(lex[w])
+                unit_hyps.append(hyp_ws)
+
+        else:
+            unit_hyps.extend([h.split() for h in all_hyp_words])
+
+        vt_err_t = sum(
+            editdistance.eval(vt, h) for vt, h in zip(viterbi_transcript, unit_hyps)
+        )
+
+        vt_length_t = sum(len(h) for h in viterbi_transcript)
+
+    if res_files is not None:
+        for r in res_files.values():
+            r.close()
+
+    gen_timer.stop(lengths_hyp_t)
+
+    return GenResult(
+        count,
+        errs_t,
+        gen_timer,
+        lengths_hyp_unit_t,
+        lengths_hyp_t,
+        lengths_t,
+        lm_score_sum,
+        num_feats,
+        num_sentences,
+        num_symbols,
+        vt_err_t,
+        vt_length_t,
+    )
+
+
+def gen_hypos(generator, models, num_feats, sample, task, use_cuda):
+    sample = utils.move_to_cuda(sample) if use_cuda else sample
+
+    if "features" in sample["net_input"]:
+        sample["net_input"]["dense_x_only"] = True
+        num_feats += (
+            sample["net_input"]["features"].shape[0]
+            * sample["net_input"]["features"].shape[1]
+        )
+    hypos = task.inference_step(generator, models, sample, None)
+    return hypos, num_feats
+
+
+def main(cfg: UnsupGenerateConfig, model=None):
+    if (
+        cfg.fairseq.dataset.max_tokens is None
+        and cfg.fairseq.dataset.batch_size is None
+    ):
+        cfg.fairseq.dataset.max_tokens = 1024000
+
+    use_cuda = torch.cuda.is_available() and not cfg.fairseq.common.cpu
+
+    task = tasks.setup_task(cfg.fairseq.task)
+
+    overrides = ast.literal_eval(cfg.fairseq.common_eval.model_overrides)
+
+    if cfg.fairseq.task._name == "unpaired_audio_text":
+        overrides["model"] = {
+            "blank_weight": cfg.blank_weight,
+            "blank_mode": cfg.blank_mode,
+            "blank_is_sil": cfg.sil_is_blank,
+            "no_softmax": True,
+            "segmentation": {
+                "type": "NONE",
+            },
+        }
+    else:
+        overrides["model"] = {
+            "blank_weight": cfg.blank_weight,
+            "blank_mode": cfg.blank_mode,
+        }
+
+    if model is None:
+        # Load ensemble
+        logger.info("| loading model(s) from {}".format(cfg.fairseq.common_eval.path))
+        models, saved_cfg = checkpoint_utils.load_model_ensemble(
+            cfg.fairseq.common_eval.path.split("\\"),
+            arg_overrides=overrides,
+            task=task,
+            suffix=cfg.fairseq.checkpoint.checkpoint_suffix,
+            strict=(cfg.fairseq.checkpoint.checkpoint_shard_count == 1),
+            num_shards=cfg.fairseq.checkpoint.checkpoint_shard_count,
+        )
+        optimize_models(cfg, use_cuda, models)
+    else:
+        models = [model]
+        saved_cfg = cfg.fairseq
+
+    with open_dict(saved_cfg.task):
+        saved_cfg.task.shuffle = False
+        saved_cfg.task.sort_by_length = False
+
+    gen_result = generate(cfg, models, saved_cfg, use_cuda)
+
+    wer = None
+    if gen_result.lengths_t > 0:
+        wer = gen_result.errs_t * 100.0 / gen_result.lengths_t
+        logger.info(f"WER: {wer}")
+
+    lm_ppl = float("inf")
+
+    if gen_result.lm_score_t != 0 and gen_result.lengths_hyp_t > 0:
+        hyp_len = gen_result.lengths_hyp_t
+        lm_ppl = math.pow(
+            10, -gen_result.lm_score_t / (hyp_len + gen_result.num_sentences)
+        )
+        logger.info(f"LM PPL: {lm_ppl}")
+
+    logger.info(
+        "| Processed {} sentences ({} tokens) in {:.1f}s ({:.2f}"
+        " sentences/s, {:.2f} tokens/s)".format(
+            gen_result.num_sentences,
+            gen_result.gen_timer.n,
+            gen_result.gen_timer.sum,
+            gen_result.num_sentences / gen_result.gen_timer.sum,
+            1.0 / gen_result.gen_timer.avg,
+        )
+    )
+
+    vt_diff = None
+    if gen_result.vt_length_t > 0:
+        vt_diff = gen_result.vt_err_t / gen_result.vt_length_t
+        vt_diff = max(cfg.min_vt_uer, vt_diff)
+
+    lm_ppl = max(cfg.min_lm_ppl, lm_ppl)
+
+    if not cfg.unsupervised_tuning == 0:
+        weighted_score = wer
+    else:
+        weighted_score = math.log(lm_ppl) * (vt_diff or 1.0)
+
+    res = (
+        f"| Generate {cfg.fairseq.dataset.gen_subset} with beam={cfg.beam}, "
+        f"lm_weight={cfg.kaldi_decoder_config.acoustic_scale if cfg.kaldi_decoder_config else cfg.lm_weight}, "
+        f"word_score={cfg.word_score}, sil_weight={cfg.sil_weight}, blank_weight={cfg.blank_weight}, "
+        f"WER: {wer}, LM_PPL: {lm_ppl}, num feats: {gen_result.num_feats}, "
+        f"length: {gen_result.lengths_hyp_t}, UER to viterbi: {(vt_diff or 0) * 100}, score: {weighted_score}"
+    )
+
+    logger.info(res)
+    # print(res)
+
+    return task, weighted_score
+
+
+@hydra.main(
+    config_path=os.path.join("../../..", "fairseq", "config"), config_name="config"
+)
+def hydra_main(cfg):
+    with open_dict(cfg):
+        # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
+        cfg.job_logging_cfg = OmegaConf.to_container(
+            HydraConfig.get().job_logging, resolve=True
+        )
+
+    cfg = OmegaConf.create(
+        OmegaConf.to_container(cfg, resolve=False, enum_to_str=False)
+    )
+    OmegaConf.set_struct(cfg, True)
+    logger.info(cfg)
+
+    utils.import_user_module(cfg.fairseq.common)
+
+    _, score = main(cfg)
+
+    if cfg.is_ax:
+        return score, None
+    return score
+
+
+def cli_main():
+    try:
+        from hydra._internal.utils import get_args
+
+        cfg_name = get_args().config_name or "config"
+    except:
+        logger.warning("Failed to get config name from hydra args")
+        cfg_name = "config"
+
+    cs = ConfigStore.instance()
+    cs.store(name=cfg_name, node=UnsupGenerateConfig)
+    hydra_main()
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/examples/wav2vec/vq-wav2vec_featurize.py b/fairseq/examples/wav2vec/vq-wav2vec_featurize.py
new file mode 100644
index 0000000000000000000000000000000000000000..627072ee174c22831209e00984b945eb9dc2c279
--- /dev/null
+++ b/fairseq/examples/wav2vec/vq-wav2vec_featurize.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset
+"""
+
+import argparse
+import glob
+import os
+import os.path as osp
+import pprint
+
+import soundfile as sf
+import torch
+import fairseq
+from torch import nn
+from torch.utils.data import DataLoader
+
+
+try:
+    import tqdm
+except:
+    print("Install tqdm to use --log-format=tqdm")
+
+
+class FilesDataset:
+    def __init__(self, files, labels):
+        self.files = files
+        if labels and osp.exists(labels):
+            with open(labels, "r") as lbl_f:
+                self.labels = [line.rstrip() for line in lbl_f]
+        else:
+            self.labels = labels
+
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, index):
+        fname = self.files[index]
+
+        wav, sr = sf.read(fname)
+        assert sr == 16000
+
+        wav = torch.from_numpy(wav).float()
+        lbls = None
+        if self.labels:
+            if isinstance(self.labels, str):
+                lbl_file = osp.splitext(fname)[0] + "." + self.labels
+                with open(lbl_file, "r") as lblf:
+                    lbls = lblf.readline()
+                    assert lbls is not None
+            else:
+                lbls = self.labels[index]
+        return wav, lbls
+
+    def collate(self, batch):
+        return batch
+
+
+class ArgTypes:
+    @staticmethod
+    def existing_path(arg):
+        arg = str(arg)
+        assert osp.exists(arg), f"File {arg} does not exist"
+        return arg
+
+    @staticmethod
+    def mkdir(arg):
+        arg = str(arg)
+        os.makedirs(arg, exist_ok=True)
+        return arg
+
+
+class DatasetWriter:
+    def __init__(self):
+
+        self.args = self.load_config()
+        pprint.pprint(self.args.__dict__)
+
+        self.model = self.load_model()
+
+    def __getattr__(self, attr):
+        return getattr(self.args, attr)
+
+    def read_manifest(self, fname):
+
+        with open(fname, "r") as fp:
+            lines = fp.read().split("\n")
+            root = lines.pop(0).strip()
+            fnames = [
+                osp.join(root, line.split("\t")[0]) for line in lines if len(line) > 0
+            ]
+
+        return fnames
+
+    def process_splits(self):
+
+        if self.args.shard is not None or self.args.num_shards is not None:
+            assert self.args.shard is not None and self.args.num_shards is not None
+
+        for split in self.splits:
+            print(split)
+
+            if self.extension == "tsv":
+                datadir = osp.join(self.data_dir, f"{split}.{self.extension}")
+                print("Reading manifest file: ", datadir)
+                files = self.read_manifest(datadir)
+            else:
+                datadir = osp.join(self.data_dir, split, f"**/*.{self.extension}")
+                files = glob.glob(datadir, recursive=True)
+
+            assert len(files) > 0
+
+            if self.args.shard is not None:
+                files = files[self.args.shard :: self.args.num_shards]
+
+            lbls = []
+            with open(self.data_file(split), "w") as srcf:
+                for line, lbl in self.iterate(files):
+                    print(line, file=srcf)
+                    if self.args.labels:
+                        lbls.append(lbl + "\n")
+
+            if self.args.labels:
+                assert all(a is not None for a in lbls)
+                with open(self.lbl_file(split), "w") as lblf:
+                    lblf.writelines(lbls)
+
+    def iterate(self, files):
+
+        data = self.load_data(files)
+        for samples in tqdm.tqdm(data, total=len(files) // 32):
+
+            for wav, lbl in samples:
+                x = wav.unsqueeze(0).float().cuda()
+
+                div = 1
+                while x.size(-1) // div > self.args.max_size:
+                    div += 1
+
+                xs = x.chunk(div, dim=-1)
+
+                result = []
+                for x in xs:
+                    torch.cuda.empty_cache()
+                    x = self.model.feature_extractor(x)
+                    if self.quantize_location == "encoder":
+                        with torch.no_grad():
+                            _, idx = self.model.vector_quantizer.forward_idx(x)
+                            idx = idx.squeeze(0).cpu()
+                    else:
+                        with torch.no_grad():
+                            z = self.model.feature_aggregator(x)
+                            _, idx = self.model.vector_quantizer.forward_idx(z)
+                            idx = idx.squeeze(0).cpu()
+                    result.append(idx)
+
+                idx = torch.cat(result, dim=0)
+                yield " ".join("-".join(map(str, a.tolist())) for a in idx), lbl
+
+    def lbl_file(self, name):
+        shard_part = "" if self.args.shard is None else f".{self.args.shard}"
+        return osp.join(self.output_dir, f"{name}.lbl{shard_part}")
+
+    def data_file(self, name):
+        shard_part = "" if self.args.shard is None else f".{self.args.shard}"
+        return osp.join(self.output_dir, f"{name}.src{shard_part}")
+
+    def var_file(self):
+        return osp.join(self.output_dir, f"vars.pt")
+
+    def load_config(self):
+
+        parser = argparse.ArgumentParser("Vector Quantized wav2vec features")
+
+        # Model Arguments
+        parser.add_argument("--checkpoint", type=ArgTypes.existing_path, required=True)
+        parser.add_argument("--data-parallel", action="store_true")
+
+        # Output Arguments
+        parser.add_argument("--output-dir", type=ArgTypes.mkdir, required=True)
+
+        # Data Arguments
+        parser.add_argument("--data-dir", type=ArgTypes.existing_path, required=True)
+        parser.add_argument("--splits", type=str, nargs="+", required=True)
+        parser.add_argument("--extension", type=str, required=True)
+        parser.add_argument("--labels", type=str, required=False)
+
+        parser.add_argument("--shard", type=int, default=None)
+        parser.add_argument("--num-shards", type=int, default=None)
+        parser.add_argument("--max-size", type=int, default=1300000)
+
+        # Logger Arguments
+        parser.add_argument(
+            "--log-format", type=str, choices=["none", "simple", "tqdm"]
+        )
+
+        return parser.parse_args()
+
+    def load_data(self, fnames):
+
+        dataset = FilesDataset(fnames, self.args.labels)
+        loader = DataLoader(
+            dataset, batch_size=32, collate_fn=dataset.collate, num_workers=8
+        )
+        return loader
+
+    def load_model(self):
+        model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([self.checkpoint])
+        model = model[0]
+
+        self.quantize_location = getattr(cfg.model, "vq", "encoder")
+
+        model.eval().float()
+        model.cuda()
+
+        if self.data_parallel:
+            model = nn.DataParallel(model)
+
+        return model
+
+    def __call__(self):
+
+        self.process_splits()
+
+        if hasattr(self.model.feature_extractor, "vars") and (
+            self.args.shard is None or self.args.shard == 0
+        ):
+            vars = (
+                self.model.feature_extractor.vars.view(
+                    self.model.feature_extractor.banks,
+                    self.model.feature_extractor.num_vars,
+                    -1,
+                )
+                .cpu()
+                .detach()
+            )
+            print("writing learned latent variable embeddings: ", vars.shape)
+            torch.save(vars, self.var_file())
+
+
+if __name__ == "__main__":
+    write_data = DatasetWriter()
+
+    write_data()
+    print("Done.")
diff --git a/fairseq/examples/wav2vec/wav2vec_featurize.py b/fairseq/examples/wav2vec/wav2vec_featurize.py
new file mode 100644
index 0000000000000000000000000000000000000000..588268b7080cbd3400ac144604b2d75cef2876dd
--- /dev/null
+++ b/fairseq/examples/wav2vec/wav2vec_featurize.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Helper script to pre-compute embeddings for a flashlight (previously called wav2letter++) dataset
+"""
+
+import argparse
+import glob
+import os
+from shutil import copy
+
+import h5py
+import numpy as np
+import soundfile as sf
+import torch
+import tqdm
+import fairseq
+from torch import nn
+
+
+def read_audio(fname):
+    """ Load an audio file and return PCM along with the sample rate """
+
+    wav, sr = sf.read(fname)
+    assert sr == 16e3
+
+    return wav, 16e3
+
+
+class PretrainedWav2VecModel(nn.Module):
+    def __init__(self, fname):
+        super().__init__()
+
+        model, cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task([fname])
+        model = model[0]
+        model.eval()
+
+        self.model = model
+
+    def forward(self, x):
+        with torch.no_grad():
+            z = self.model.feature_extractor(x)
+            if isinstance(z, tuple):
+                z = z[0]
+            c = self.model.feature_aggregator(z)
+        return z, c
+
+
+class EmbeddingWriterConfig(argparse.ArgumentParser):
+    def __init__(self):
+        super().__init__("Pre-compute embeddings for flashlight datasets")
+
+        kwargs = {"action": "store", "type": str, "required": True}
+
+        self.add_argument("--input", "-i", help="Input Directory", **kwargs)
+        self.add_argument("--output", "-o", help="Output Directory", **kwargs)
+        self.add_argument("--model", help="Path to model checkpoint", **kwargs)
+        self.add_argument("--split", help="Dataset Splits", nargs="+", **kwargs)
+        self.add_argument(
+            "--ext", default="wav", required=False, help="Audio file extension"
+        )
+
+        self.add_argument(
+            "--no-copy-labels",
+            action="store_true",
+            help="Do not copy label files. Useful for large datasets, use --targetdir in flashlight then.",
+        )
+        self.add_argument(
+            "--use-feat",
+            action="store_true",
+            help="Use the feature vector ('z') instead of context vector ('c') for features",
+        )
+        self.add_argument("--gpu", help="GPU to use", default=0, type=int)
+
+
+class Prediction:
+    """ Lightweight wrapper around a fairspeech embedding model """
+
+    def __init__(self, fname, gpu=0):
+        self.gpu = gpu
+        self.model = PretrainedWav2VecModel(fname).cuda(gpu)
+
+    def __call__(self, x):
+        x = torch.from_numpy(x).float().cuda(self.gpu)
+        with torch.no_grad():
+            z, c = self.model(x.unsqueeze(0))
+
+        return z.squeeze(0).cpu().numpy(), c.squeeze(0).cpu().numpy()
+
+
+class H5Writer:
+    """ Write features as hdf5 file in flashlight compatible format """
+
+    def __init__(self, fname):
+        self.fname = fname
+        os.makedirs(os.path.dirname(self.fname), exist_ok=True)
+
+    def write(self, data):
+        channel, T = data.shape
+
+        with h5py.File(self.fname, "w") as out_ds:
+            data = data.T.flatten()
+            out_ds["features"] = data
+            out_ds["info"] = np.array([16e3 // 160, T, channel])
+
+
+class EmbeddingDatasetWriter(object):
+    """Given a model and a flashlight dataset, pre-compute and store embeddings
+
+    Args:
+        input_root, str :
+            Path to the flashlight dataset
+        output_root, str :
+            Desired output directory. Will be created if non-existent
+        split, str :
+            Dataset split
+    """
+
+    def __init__(
+        self,
+        input_root,
+        output_root,
+        split,
+        model_fname,
+        extension="wav",
+        gpu=0,
+        verbose=False,
+        use_feat=False,
+    ):
+
+        assert os.path.exists(model_fname)
+
+        self.model_fname = model_fname
+        self.model = Prediction(self.model_fname, gpu)
+
+        self.input_root = input_root
+        self.output_root = output_root
+        self.split = split
+        self.verbose = verbose
+        self.extension = extension
+        self.use_feat = use_feat
+
+        assert os.path.exists(self.input_path), "Input path '{}' does not exist".format(
+            self.input_path
+        )
+
+    def _progress(self, iterable, **kwargs):
+        if self.verbose:
+            return tqdm.tqdm(iterable, **kwargs)
+        return iterable
+
+    def require_output_path(self, fname=None):
+        path = self.get_output_path(fname)
+        os.makedirs(path, exist_ok=True)
+
+    @property
+    def input_path(self):
+        return self.get_input_path()
+
+    @property
+    def output_path(self):
+        return self.get_output_path()
+
+    def get_input_path(self, fname=None):
+        if fname is None:
+            return os.path.join(self.input_root, self.split)
+        return os.path.join(self.get_input_path(), fname)
+
+    def get_output_path(self, fname=None):
+        if fname is None:
+            return os.path.join(self.output_root, self.split)
+        return os.path.join(self.get_output_path(), fname)
+
+    def copy_labels(self):
+        self.require_output_path()
+
+        labels = list(
+            filter(
+                lambda x: self.extension not in x, glob.glob(self.get_input_path("*"))
+            )
+        )
+        for fname in tqdm.tqdm(labels):
+            copy(fname, self.output_path)
+
+    @property
+    def input_fnames(self):
+        return sorted(glob.glob(self.get_input_path("*.{}".format(self.extension))))
+
+    def __len__(self):
+        return len(self.input_fnames)
+
+    def write_features(self):
+
+        paths = self.input_fnames
+
+        fnames_context = map(
+            lambda x: os.path.join(
+                self.output_path, x.replace("." + self.extension, ".h5context")
+            ),
+            map(os.path.basename, paths),
+        )
+
+        for name, target_fname in self._progress(
+            zip(paths, fnames_context), total=len(self)
+        ):
+            wav, sr = read_audio(name)
+            z, c = self.model(wav)
+            feat = z if self.use_feat else c
+            writer = H5Writer(target_fname)
+            writer.write(feat)
+
+    def __repr__(self):
+
+        return "EmbeddingDatasetWriter ({n_files} files)\n\tinput:\t{input_root}\n\toutput:\t{output_root}\n\tsplit:\t{split})".format(
+            n_files=len(self), **self.__dict__
+        )
+
+
+if __name__ == "__main__":
+
+    args = EmbeddingWriterConfig().parse_args()
+
+    for split in args.split:
+
+        writer = EmbeddingDatasetWriter(
+            input_root=args.input,
+            output_root=args.output,
+            split=split,
+            model_fname=args.model,
+            gpu=args.gpu,
+            extension=args.ext,
+            use_feat=args.use_feat,
+        )
+
+        print(writer)
+        writer.require_output_path()
+
+        print("Writing Features...")
+        writer.write_features()
+        print("Done.")
+
+        if not args.no_copy_labels:
+            print("Copying label data...")
+            writer.copy_labels()
+            print("Done.")
diff --git a/fairseq/examples/wav2vec/wav2vec_manifest.py b/fairseq/examples/wav2vec/wav2vec_manifest.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b8aa180e88d9ee98bdca7089aed5046ec0d9cb9
--- /dev/null
+++ b/fairseq/examples/wav2vec/wav2vec_manifest.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data pre-processing: build vocabularies and binarize training data.
+"""
+
+import argparse
+import glob
+import os
+import random
+
+import soundfile
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "root", metavar="DIR", help="root directory containing flac files to index"
+    )
+    parser.add_argument(
+        "--valid-percent",
+        default=0.01,
+        type=float,
+        metavar="D",
+        help="percentage of data to use as validation set (between 0 and 1)",
+    )
+    parser.add_argument(
+        "--dest", default=".", type=str, metavar="DIR", help="output directory"
+    )
+    parser.add_argument(
+        "--ext", default="flac", type=str, metavar="EXT", help="extension to look for"
+    )
+    parser.add_argument("--seed", default=42, type=int, metavar="N", help="random seed")
+    parser.add_argument(
+        "--path-must-contain",
+        default=None,
+        type=str,
+        metavar="FRAG",
+        help="if set, path must contain this substring for a file to be included in the manifest",
+    )
+    return parser
+
+
+def main(args):
+    assert args.valid_percent >= 0 and args.valid_percent <= 1.0
+
+    if not os.path.exists(args.dest):
+        os.makedirs(args.dest)
+
+    dir_path = os.path.realpath(args.root)
+    search_path = os.path.join(dir_path, "**/*." + args.ext)
+    rand = random.Random(args.seed)
+
+    valid_f = (
+        open(os.path.join(args.dest, "valid.tsv"), "w")
+        if args.valid_percent > 0
+        else None
+    )
+
+    with open(os.path.join(args.dest, "train.tsv"), "w") as train_f:
+        print(dir_path, file=train_f)
+
+        if valid_f is not None:
+            print(dir_path, file=valid_f)
+
+        for fname in glob.iglob(search_path, recursive=True):
+            file_path = os.path.realpath(fname)
+
+            if args.path_must_contain and args.path_must_contain not in file_path:
+                continue
+
+            frames = soundfile.info(fname).frames
+            dest = train_f if rand.random() > args.valid_percent else valid_f
+            print(
+                "{}\t{}".format(os.path.relpath(file_path, dir_path), frames), file=dest
+            )
+    if valid_f is not None:
+        valid_f.close()
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    main(args)
diff --git a/fairseq/examples/wmt19/README.md b/fairseq/examples/wmt19/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5c90d0e6c4ae8d043ca622e70c5828dca6f9c2f2
--- /dev/null
+++ b/fairseq/examples/wmt19/README.md
@@ -0,0 +1,85 @@
+# WMT 19
+
+This page provides pointers to the models of Facebook-FAIR's WMT'19 news translation task submission [(Ng et al., 2019)](https://arxiv.org/abs/1907.06616).
+
+## Pre-trained models
+
+Model | Description | Download
+---|---|---
+`transformer.wmt19.en-de` | En->De Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz)
+`transformer.wmt19.de-en` | De->En Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz)
+`transformer.wmt19.en-ru` | En->Ru Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz)
+`transformer.wmt19.ru-en` | Ru->En Ensemble | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz)
+`transformer_lm.wmt19.en` | En Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.en.tar.gz)
+`transformer_lm.wmt19.de` | De Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.de.tar.gz)
+`transformer_lm.wmt19.ru` | Ru Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.ru.tar.gz)
+
+## Pre-trained single models before finetuning
+
+Model | Description | Download
+---|---|---
+`transformer.wmt19.en-de` | En->De Single, no finetuning | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.ffn8192.tar.gz)
+`transformer.wmt19.de-en` | De->En Single, no finetuning  | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.ffn8192.tar.gz)
+`transformer.wmt19.en-ru` | En->Ru Single, no finetuning | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ffn8192.tar.gz)
+`transformer.wmt19.ru-en` | Ru->En Single, no finetuning  | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ffn8192.tar.gz)
+
+## Example usage (torch.hub)
+
+#### Requirements
+
+We require a few additional Python dependencies for preprocessing:
+```bash
+pip install fastBPE sacremoses
+```
+
+#### Translation
+
+```python
+import torch
+
+# English to German translation
+en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt',
+                       tokenizer='moses', bpe='fastbpe')
+en2de.translate("Machine learning is great!")  # 'Maschinelles Lernen ist großartig!'
+
+# German to English translation
+de2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.de-en', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt',
+                       tokenizer='moses', bpe='fastbpe')
+de2en.translate("Maschinelles Lernen ist großartig!")  # 'Machine learning is great!'
+
+# English to Russian translation
+en2ru = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-ru', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt',
+                       tokenizer='moses', bpe='fastbpe')
+en2ru.translate("Machine learning is great!")  # 'Машинное обучение - это здорово!'
+
+# Russian to English translation
+ru2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.ru-en', checkpoint_file='model1.pt:model2.pt:model3.pt:model4.pt',
+                       tokenizer='moses', bpe='fastbpe')
+ru2en.translate("Машинное обучение - это здорово!")  # 'Machine learning is great!'
+```
+
+#### Language Modeling
+
+```python
+# Sample from the English LM
+en_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.en', tokenizer='moses', bpe='fastbpe')
+en_lm.sample("Machine learning is")  # 'Machine learning is the future of computing, says Microsoft boss Satya Nadella ...'
+
+# Sample from the German LM
+de_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.de', tokenizer='moses', bpe='fastbpe')
+de_lm.sample("Maschinelles lernen ist")  # 'Maschinelles lernen ist das A und O (neues-deutschland.de) Die Arbeitsbedingungen für Lehrerinnen und Lehrer sind seit Jahren verbesserungswürdig ...'
+
+# Sample from the Russian LM
+ru_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt19.ru', tokenizer='moses', bpe='fastbpe')
+ru_lm.sample("машинное обучение это")  # 'машинное обучение это то, что мы называем "искусственным интеллектом".'
+```
+
+## Citation
+```bibtex
+@inproceedings{ng2019facebook},
+  title = {Facebook FAIR's WMT19 News Translation Task Submission},
+  author = {Ng, Nathan and Yee, Kyra and Baevski, Alexei and Ott, Myle and Auli, Michael and Edunov, Sergey},
+  booktitle = {Proc. of WMT},
+  year = 2019,
+}
+```
diff --git a/fairseq/examples/wmt20/README.md b/fairseq/examples/wmt20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b4f2874652f8be19998a65faa1d9276d8017ec59
--- /dev/null
+++ b/fairseq/examples/wmt20/README.md
@@ -0,0 +1,72 @@
+# WMT 20
+
+This page provides pointers to the models of Facebook-FAIR's WMT'20 news translation task submission [(Chen et al., 2020)](https://arxiv.org/abs/2011.08298).
+
+## Single best MT models (after finetuning on part of WMT20 news dev set)
+
+Model | Description | Download
+---|---|---
+`transformer.wmt20.ta-en` | Ta->En | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.ta-en.single.tar.gz)
+`transformer.wmt20.en-ta` | En->Ta | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-ta.single.tar.gz)
+`transformer.wmt20.iu-en.news` | Iu->En (News domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.news.single.tar.gz)
+`transformer.wmt20.en-iu.news` | En->Iu (News domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.news.single.tar.gz)
+`transformer.wmt20.iu-en.nh` | Iu->En (Nunavut Hansard domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.nh.single.tar.gz)
+`transformer.wmt20.en-iu.nh` | En->Iu (Nunavut Hansard domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.nh.single.tar.gz)
+
+## Language models
+Model | Description | Download
+---|---|---
+`transformer_lm.wmt20.en` | En Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en.tar.gz)
+`transformer_lm.wmt20.ta` | Ta Language Model | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.ta.tar.gz)
+`transformer_lm.wmt20.iu.news` | Iu Language Model (News domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu.news.tar.gz)
+`transformer_lm.wmt20.iu.nh` | Iu Language Model (Nunavut Hansard domain) | [download (.tar.gz)](https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu.nh.tar.gz)
+
+## Example usage (torch.hub)
+
+#### Translation
+
+```python
+import torch
+
+# English to Tamil translation
+en2ta = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.en-ta')
+en2ta.translate("Machine learning is great!")  # 'இயந்திரக் கற்றல் அருமை!'
+
+# Tamil to English translation
+ta2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.ta-en')
+ta2en.translate("இயந்திரக் கற்றல் அருமை!")  # 'Machine learning is great!'
+
+# English to Inuktitut translation
+en2iu = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.en-iu.news')
+en2iu.translate("machine learning is great!")  # 'ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ ᐱᐅᔪᒻᒪᕆᒃ!'
+
+# Inuktitut to English translation
+iu2en = torch.hub.load('pytorch/fairseq', 'transformer.wmt20.iu-en.news')
+iu2en.translate("ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ ᐱᐅᔪᒻᒪᕆᒃ!")  # 'Machine learning excellence!'
+```
+
+#### Language Modeling
+
+```python
+# Sample from the English LM
+en_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt20.en')
+en_lm.sample("Machine learning is")  # 'Machine learning is a type of artificial intelligence that uses machine learning to learn from data and make predictions.'
+
+# Sample from the Tamil LM
+ta_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt20.ta')
+ta_lm.sample("இயந்திரக் கற்றல் என்பது செயற்கை நுண்ணறிவின்")  # 'இயந்திரக் கற்றல் என்பது செயற்கை நுண்ணறிவின் ஒரு பகுதியாகும்.'
+
+# Sample from the Inuktitut LM
+iu_lm = torch.hub.load('pytorch/fairseq', 'transformer_lm.wmt20.iu.news')
+iu_lm.sample("ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ")  # 'ᖃᒧᑕᐅᔭᓄᑦ ᐃᓕᓐᓂᐊᕐᓂᖅ, ᐊᒻᒪᓗ ᓯᓚᐅᑉ ᐊᓯᙳᖅᐸᓪᓕᐊᓂᖓᓄᑦ ᖃᓄᐃᓕᐅᕈᑎᒃᓴᑦ, ᐃᓚᖃᖅᖢᑎᒃ ᐅᑯᓂᖓ:'
+```
+
+## Citation
+```bibtex
+@inproceedings{chen2020facebook
+  title={Facebook AI's WMT20 News Translation Task Submission},
+  author={Peng-Jen Chen and Ann Lee and Changhan Wang and Naman Goyal and Angela Fan and Mary Williamson and Jiatao Gu},
+  booktitle={Proc. of WMT},
+  year={2020},
+}
+```
diff --git a/fairseq/examples/xlmr/README.md b/fairseq/examples/xlmr/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b95bfe15d3fe6d03951453679135c2e9187d73c7
--- /dev/null
+++ b/fairseq/examples/xlmr/README.md
@@ -0,0 +1,144 @@
+# Unsupervised Cross-lingual Representation Learning at Scale (XLM-RoBERTa)
+https://arxiv.org/pdf/1911.02116.pdf
+
+# Larger-Scale Transformers for Multilingual Masked Language Modeling
+https://arxiv.org/pdf/2105.00572.pdf
+
+
+## What's New:
+- June 2021: `XLMR-XL` AND `XLMR-XXL` models released.
+
+## Introduction
+
+`XLM-R` (`XLM-RoBERTa`) is a generic cross lingual sentence encoder that obtains state-of-the-art results on many cross-lingual understanding (XLU) benchmarks. It is trained on `2.5T` of filtered CommonCrawl data in 100 languages (list below).
+
+ Language | Language|Language |Language | Language
+---|---|---|---|---
+Afrikaans | Albanian | Amharic | Arabic | Armenian 
+Assamese | Azerbaijani | Basque | Belarusian | Bengali 
+Bengali Romanize | Bosnian | Breton | Bulgarian | Burmese 
+Burmese zawgyi font | Catalan | Chinese (Simplified) | Chinese (Traditional) | Croatian 
+Czech | Danish | Dutch | English | Esperanto 
+Estonian | Filipino | Finnish | French | Galician
+Georgian | German | Greek | Gujarati | Hausa
+Hebrew | Hindi | Hindi Romanize | Hungarian | Icelandic
+Indonesian | Irish | Italian | Japanese | Javanese
+Kannada | Kazakh | Khmer | Korean | Kurdish (Kurmanji)
+Kyrgyz | Lao | Latin | Latvian | Lithuanian
+Macedonian | Malagasy | Malay | Malayalam | Marathi
+Mongolian | Nepali | Norwegian | Oriya | Oromo
+Pashto | Persian | Polish | Portuguese | Punjabi
+Romanian | Russian | Sanskrit | Scottish Gaelic | Serbian
+Sindhi | Sinhala | Slovak | Slovenian | Somali
+Spanish | Sundanese | Swahili | Swedish | Tamil
+Tamil Romanize | Telugu | Telugu Romanize | Thai | Turkish
+Ukrainian | Urdu | Urdu Romanize | Uyghur | Uzbek
+Vietnamese | Welsh | Western Frisian | Xhosa | Yiddish
+
+## Pre-trained models
+
+Model | Description | #params | vocab size | Download
+---|---|---|---|---
+`xlmr.base` | XLM-R using the BERT-base architecture | 250M | 250k | [xlm.base.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz)
+`xlmr.large` | XLM-R using the BERT-large architecture | 560M | 250k | [xlm.large.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz)
+`xlmr.xl` | XLM-R (`layers=36, model_dim=2560`) | 3.5B | 250k | [xlm.xl.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xl.tar.gz)
+`xlmr.xxl` | XLM-R (`layers=48, model_dim=4096`) | 10.7B | 250k | [xlm.xxl.tar.gz](https://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xxl.tar.gz)
+
+## Results
+
+**[XNLI (Conneau et al., 2018)](https://arxiv.org/abs/1809.05053)**
+
+Model | average | en | fr | es | de | el | bg | ru | tr | ar | vi | th | zh | hi | sw | ur
+---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---
+`roberta.large.mnli` _(TRANSLATE-TEST)_ | 77.8 | 91.3 | 82.9 | 84.3 | 81.2 | 81.7 | 83.1 | 78.3 | 76.8 | 76.6 | 74.2 | 74.1 | 77.5 | 70.9 | 66.7 | 66.8
+`xlmr.large` _(TRANSLATE-TRAIN-ALL)_ | 83.6 | 89.1 | 85.1 | 86.6 | 85.7 | 85.3 | 85.9 | 83.5 | 83.2 | 83.1 | 83.7 | 81.5 | 83.7 | 81.6 | 78.0 | 78.1
+`xlmr.xl` _(TRANSLATE-TRAIN-ALL)_ | 85.4 | 91.1 | 87.2 | 88.1 | 87.0 | 87.4 | 87.8 | 85.3 | 85.2 | 85.3 | 86.2 | 83.8 | 85.3 | 83.1 | 79.8 | 78.2 | 85.4
+`xlmr.xxl` _(TRANSLATE-TRAIN-ALL)_ | 86.0 | 91.5 | 87.6 | 88.7 | 87.8 | 87.4 | 88.2 | 85.6 | 85.1 | 85.8 | 86.3 | 83.9 | 85.6 | 84.6 | 81.7 | 80.6
+
+**[MLQA (Lewis et al., 2018)](https://arxiv.org/abs/1910.07475)**
+
+Model | average | en | es | de | ar | hi | vi | zh
+---|---|---|---|---|---|---|---|---
+`BERT-large` | - | 80.2/67.4 | - | - | - | - | - | -
+`mBERT` | 57.7 / 41.6 | 77.7 / 65.2 | 64.3 / 46.6 | 57.9 / 44.3 | 45.7 / 29.8| 43.8 / 29.7 | 57.1 / 38.6 | 57.5 / 37.3
+`xlmr.large` | 70.7 / 52.7 | 80.6 / 67.8 | 74.1 / 56.0 | 68.5 / 53.6 | 63.1 / 43.5 | 69.2 / 51.6 | 71.3 / 50.9 | 68.0 / 45.4
+`xlmr.xl` | 73.4 / 55.3 | 85.1 / 72.6 | 66.7 / 46.2 | 70.5 / 55.5 | 74.3 / 56.9 | 72.2 / 54.7 | 74.4 / 52.9 | 70.9 / 48.5
+`xlmr.xxl` | 74.8 / 56.6 | 85.5 / 72.4 | 68.6 / 48.4 | 72.7 / 57.8 | 75.4 / 57.6 | 73.7 / 55.8 | 76.0 / 55.0 | 71.7 / 48.9 
+
+
+## Example usage
+
+##### Load XLM-R from torch.hub (PyTorch >= 1.1):
+```python
+import torch
+xlmr = torch.hub.load('pytorch/fairseq', 'xlmr.large')
+xlmr.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Load XLM-R (for PyTorch 1.0 or custom models):
+```python
+# Download xlmr.large model
+wget https://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz
+tar -xzvf xlmr.large.tar.gz
+
+# Load the model in fairseq
+from fairseq.models.roberta import XLMRModel
+xlmr = XLMRModel.from_pretrained('/path/to/xlmr.large', checkpoint_file='model.pt')
+xlmr.eval()  # disable dropout (or leave in train mode to finetune)
+```
+
+##### Apply sentence-piece-model (SPM) encoding to input text:
+```python
+en_tokens = xlmr.encode('Hello world!')
+assert en_tokens.tolist() == [0, 35378,  8999, 38, 2]
+xlmr.decode(en_tokens)  # 'Hello world!'
+
+zh_tokens = xlmr.encode('你好，世界')
+assert zh_tokens.tolist() == [0, 6, 124084, 4, 3221, 2]
+xlmr.decode(zh_tokens)  # '你好，世界'
+
+hi_tokens = xlmr.encode('नमस्ते दुनिया')
+assert hi_tokens.tolist() == [0, 68700, 97883, 29405, 2]
+xlmr.decode(hi_tokens)  # 'नमस्ते दुनिया'
+
+ar_tokens = xlmr.encode('مرحبا بالعالم')
+assert ar_tokens.tolist() == [0, 665, 193478, 258, 1705, 77796, 2]
+xlmr.decode(ar_tokens) # 'مرحبا بالعالم'
+
+fr_tokens = xlmr.encode('Bonjour le monde')
+assert fr_tokens.tolist() == [0, 84602, 95, 11146, 2]
+xlmr.decode(fr_tokens) # 'Bonjour le monde'
+```
+
+##### Extract features from XLM-R:
+```python
+# Extract the last layer's features
+last_layer_features = xlmr.extract_features(zh_tokens)
+assert last_layer_features.size() == torch.Size([1, 6, 1024])
+
+# Extract all layer's features (layer 0 is the embedding layer)
+all_layers = xlmr.extract_features(zh_tokens, return_all_hiddens=True)
+assert len(all_layers) == 25
+assert torch.all(all_layers[-1] == last_layer_features)
+```
+
+## Citation
+
+```bibtex
+@article{conneau2019unsupervised,
+  title={Unsupervised Cross-lingual Representation Learning at Scale},
+  author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
+  journal={arXiv preprint arXiv:1911.02116},
+  year={2019}
+}
+```
+
+
+```bibtex
+@article{goyal2021larger,
+  title={Larger-Scale Transformers for Multilingual Masked Language Modeling},
+  author={Goyal, Naman and Du, Jingfei and Ott, Myle and Anantharaman, Giri and Conneau, Alexis},
+  journal={arXiv preprint arXiv:2105.00572},
+  year={2021}
+}
+```
diff --git a/fairseq/fairseq/__init__.py b/fairseq/fairseq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc9fd1886d55756b5bdfeccf1ad329bd419a706e
--- /dev/null
+++ b/fairseq/fairseq/__init__.py
@@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import os
+import sys
+
+try:
+    from .version import __version__  # noqa
+except ImportError:
+    version_txt = os.path.join(os.path.dirname(__file__), "version.txt")
+    with open(version_txt) as f:
+        __version__ = f.read().strip()
+
+__all__ = ["pdb"]
+
+# backwards compatibility to support `from fairseq.X import Y`
+from fairseq.distributed import utils as distributed_utils
+from fairseq.logging import meters, metrics, progress_bar  # noqa
+
+sys.modules["fairseq.distributed_utils"] = distributed_utils
+sys.modules["fairseq.meters"] = meters
+sys.modules["fairseq.metrics"] = metrics
+sys.modules["fairseq.progress_bar"] = progress_bar
+
+# initialize hydra
+from fairseq.dataclass.initialize import hydra_init
+hydra_init()
+
+import fairseq.criterions  # noqa
+import fairseq.distributed  # noqa
+import fairseq.models  # noqa
+import fairseq.modules  # noqa
+import fairseq.optim  # noqa
+import fairseq.optim.lr_scheduler  # noqa
+import fairseq.pdb  # noqa
+import fairseq.scoring  # noqa
+import fairseq.tasks  # noqa
+import fairseq.token_generation_constraints  # noqa
+
+import fairseq.benchmark  # noqa
+import fairseq.model_parallel  # noqa
diff --git a/fairseq/fairseq/benchmark/__init__.py b/fairseq/fairseq/benchmark/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0317d5c623778fe40b7bf07b77769cd10c243244
--- /dev/null
+++ b/fairseq/fairseq/benchmark/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# import models/tasks to register them
+from . import dummy_dataset, dummy_lm, dummy_masked_lm, dummy_model, dummy_mt  # noqa
diff --git a/fairseq/fairseq/benchmark/dummy_dataset.py b/fairseq/fairseq/benchmark/dummy_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f051754af55966e26850e94c121e0ff439bfd28
--- /dev/null
+++ b/fairseq/fairseq/benchmark/dummy_dataset.py
@@ -0,0 +1,36 @@
+import numpy as np
+from fairseq.data import FairseqDataset
+
+
+class DummyDataset(FairseqDataset):
+    def __init__(self, batch, num_items, item_size):
+        super().__init__()
+        self.batch = batch
+        self.num_items = num_items
+        self.item_size = item_size
+
+    def __getitem__(self, index):
+        return index
+
+    def __len__(self):
+        return self.num_items
+
+    def collater(self, samples):
+        return self.batch
+
+    @property
+    def sizes(self):
+        return np.array([self.item_size] * self.num_items)
+
+    def num_tokens(self, index):
+        return self.item_size
+
+    def size(self, index):
+        return self.item_size
+
+    def ordered_indices(self):
+        return np.arange(self.num_items)
+
+    @property
+    def supports_prefetch(self):
+        return False
diff --git a/fairseq/fairseq/benchmark/dummy_lm.py b/fairseq/fairseq/benchmark/dummy_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6246a0c0e338fa36244b3aa4fb57f189fbffcb6
--- /dev/null
+++ b/fairseq/fairseq/benchmark/dummy_lm.py
@@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+
+import torch
+from .dummy_dataset import DummyDataset
+from fairseq.data import Dictionary
+from fairseq.dataclass import FairseqDataclass
+from fairseq.tasks import FairseqTask, register_task
+from omegaconf import II
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class DummyLMConfig(FairseqDataclass):
+    dict_size: int = 49996
+    dataset_size: int = 100000
+    tokens_per_sample: int = field(
+        default=512, metadata={"help": "max sequence length"}
+    )
+    add_bos_token: bool = False
+    batch_size: Optional[int] = II("dataset.batch_size")
+    max_tokens: Optional[int] = II("dataset.max_tokens")
+    max_target_positions: int = II("task.tokens_per_sample")
+
+
+@register_task("dummy_lm", dataclass=DummyLMConfig)
+class DummyLMTask(FairseqTask):
+    def __init__(self, cfg: DummyLMConfig):
+        super().__init__(cfg)
+
+        # load dictionary
+        self.dictionary = Dictionary()
+        for i in range(cfg.dict_size):
+            self.dictionary.add_symbol("word{}".format(i))
+        self.dictionary.pad_to_multiple_(8)  # often faster if divisible by 8
+        logger.info("dictionary: {} types".format(len(self.dictionary)))
+
+        seq = torch.arange(cfg.tokens_per_sample + 1) + self.dictionary.pad() + 1
+
+        self.dummy_src = seq[:-1]
+        self.dummy_tgt = seq[1:]
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        if self.cfg.batch_size is not None:
+            bsz = self.cfg.batch_size
+        else:
+            bsz = max(1, self.cfg.max_tokens // self.cfg.tokens_per_sample)
+        self.datasets[split] = DummyDataset(
+            {
+                "id": 1,
+                "net_input": {
+                    "src_tokens": torch.stack([self.dummy_src for _ in range(bsz)]),
+                    "src_lengths": torch.full(
+                        (bsz,), self.cfg.tokens_per_sample, dtype=torch.long
+                    ),
+                },
+                "target": torch.stack([self.dummy_tgt for _ in range(bsz)]),
+                "nsentences": bsz,
+                "ntokens": bsz * self.cfg.tokens_per_sample,
+            },
+            num_items=self.cfg.dataset_size,
+            item_size=self.cfg.tokens_per_sample,
+        )
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
diff --git a/fairseq/fairseq/benchmark/dummy_masked_lm.py b/fairseq/fairseq/benchmark/dummy_masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..12b9c5d0f55993bf8750564882a351fc3f8055f0
--- /dev/null
+++ b/fairseq/fairseq/benchmark/dummy_masked_lm.py
@@ -0,0 +1,94 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+
+import torch
+from omegaconf import II
+
+from .dummy_dataset import DummyDataset
+from fairseq.data import Dictionary
+from fairseq.dataclass import FairseqDataclass
+from fairseq.tasks import FairseqTask, register_task
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class DummyMaskedLMConfig(FairseqDataclass):
+    dict_size: int = 49996
+    dataset_size: int = 100000
+    tokens_per_sample: int = field(
+        default=512,
+        metadata={
+            "help": "max number of total tokens over all"
+            " segments per sample for BERT dataset"
+        },
+    )
+    batch_size: Optional[int] = II("dataset.batch_size")
+    max_tokens: Optional[int] = II("dataset.max_tokens")
+    max_target_positions: int = II("task.tokens_per_sample")
+
+
+@register_task("dummy_masked_lm", dataclass=DummyMaskedLMConfig)
+class DummyMaskedLMTask(FairseqTask):
+    def __init__(self, cfg: DummyMaskedLMConfig):
+        super().__init__(cfg)
+
+        self.dictionary = Dictionary()
+        for i in range(cfg.dict_size):
+            self.dictionary.add_symbol("word{}".format(i))
+        logger.info("dictionary: {} types".format(len(self.dictionary)))
+        # add mask token
+        self.mask_idx = self.dictionary.add_symbol("<mask>")
+        self.dictionary.pad_to_multiple_(8)  # often faster if divisible by 8
+
+        mask_idx = 0
+        pad_idx = 1
+        seq = torch.arange(cfg.tokens_per_sample) + pad_idx + 1
+        mask = torch.arange(2, cfg.tokens_per_sample, 7)  # ~15%
+        src = seq.clone()
+        src[mask] = mask_idx
+        tgt = torch.full_like(seq, pad_idx)
+        tgt[mask] = seq[mask]
+
+        self.dummy_src = src
+        self.dummy_tgt = tgt
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        if self.cfg.batch_size is not None:
+            bsz = self.cfg.batch_size
+        else:
+            bsz = max(1, self.cfg.max_tokens // self.cfg.tokens_per_sample)
+        self.datasets[split] = DummyDataset(
+            {
+                "id": 1,
+                "net_input": {
+                    "src_tokens": torch.stack([self.dummy_src for _ in range(bsz)]),
+                    "src_lengths": torch.full(
+                        (bsz,), self.cfg.tokens_per_sample, dtype=torch.long
+                    ),
+                },
+                "target": torch.stack([self.dummy_tgt for _ in range(bsz)]),
+                "nsentences": bsz,
+                "ntokens": bsz * self.cfg.tokens_per_sample,
+            },
+            num_items=self.cfg.dataset_size,
+            item_size=self.cfg.tokens_per_sample,
+        )
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
diff --git a/fairseq/fairseq/benchmark/dummy_model.py b/fairseq/fairseq/benchmark/dummy_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff26e4fe655d8e8d7f9942c4bd3df7cd267405fb
--- /dev/null
+++ b/fairseq/fairseq/benchmark/dummy_model.py
@@ -0,0 +1,96 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq.data import Dictionary
+from fairseq.models import (
+    FairseqDecoder,
+    FairseqLanguageModel,
+    register_model,
+    register_model_architecture,
+)
+
+
+@register_model("dummy_model")
+class DummyModel(FairseqLanguageModel):
+    def __init__(self, args, encoder):
+        super().__init__(encoder)
+        self.args = args
+
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument("--num-layers", type=int, default=24)
+        parser.add_argument("--embed-dim", type=int, default=1024)
+
+    @classmethod
+    def build_model(cls, args, task):
+        encoder = DummyEncoder(
+            num_embed=len(task.target_dictionary),
+            embed_dim=args.embed_dim,
+            num_layers=args.num_layers,
+        )
+        return cls(args, encoder)
+
+    def forward(self, src_tokens, masked_tokens=None, **kwargs):
+        return self.decoder(src_tokens, masked_tokens=masked_tokens)
+
+
+class DummyEncoder(FairseqDecoder):
+    def __init__(self, num_embed=50000, embed_dim=1024, num_layers=24):
+        super().__init__(Dictionary())
+        self.embed = nn.Embedding(
+            num_embeddings=num_embed, embedding_dim=embed_dim, padding_idx=0
+        )
+        self.layers_a = nn.ModuleList(
+            [
+                nn.Sequential(
+                    nn.LayerNorm(embed_dim),
+                    nn.Linear(embed_dim, 3 * embed_dim),  # q, k, v input projection
+                    nn.Linear(3 * embed_dim, embed_dim),  # skip self-attention
+                    nn.Linear(embed_dim, embed_dim),  # output projection
+                    nn.Dropout(),
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.layers_b = nn.ModuleList(
+            [
+                nn.Sequential(
+                    nn.LayerNorm(embed_dim),
+                    nn.Linear(embed_dim, 4 * embed_dim),  # FFN
+                    nn.ReLU(),
+                    nn.Linear(4 * embed_dim, embed_dim),  # FFN
+                    nn.Dropout(0.1),
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.out_proj = nn.Linear(embed_dim, num_embed)
+
+    def forward(self, tokens, masked_tokens=None):
+        x = self.embed(tokens)
+        for layer_a, layer_b in zip(self.layers_a, self.layers_b):
+            x = x + layer_a(x)
+            x = x + layer_b(x)
+        x = self.out_proj(x)
+        if masked_tokens is not None:
+            x = x[masked_tokens]
+        return (x,)
+
+    def max_positions(self):
+        return 1024
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        logits = net_output[0].float()
+        if log_probs:
+            return F.log_softmax(logits, dim=-1)
+        else:
+            return F.softmax(logits, dim=-1)
+
+
+@register_model_architecture("dummy_model", "dummy_model")
+def base_architecture(args):
+    pass
diff --git a/fairseq/fairseq/benchmark/dummy_mt.py b/fairseq/fairseq/benchmark/dummy_mt.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ca7be93a38d8d2b47685b74b4f8b8f9dcb03d2e
--- /dev/null
+++ b/fairseq/fairseq/benchmark/dummy_mt.py
@@ -0,0 +1,119 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import numpy as np
+import torch
+from fairseq.data import Dictionary, FairseqDataset
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("dummy_mt")
+class DummyMTTask(LegacyFairseqTask):
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument("--dict-size", default=49996, type=int)
+        parser.add_argument("--dataset-size", default=100000, type=int)
+        parser.add_argument("--src-len", default=30, type=int)
+        parser.add_argument("--tgt-len", default=30, type=int)
+
+    def __init__(self, args, dictionary):
+        super().__init__(args)
+        self.dictionary = dictionary
+        self.seed = args.seed
+
+        dictionary.pad_to_multiple_(8)  # often faster if divisible by 8
+
+        self.dummy_src = torch.arange(args.src_len + 1) + dictionary.pad() + 1
+        self.dummy_tgt = torch.arange(args.tgt_len + 1) + dictionary.pad() + 1
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task. """
+        dictionary = Dictionary()
+        for i in range(args.dict_size):
+            dictionary.add_symbol("word{}".format(i))
+        logger.info("dictionary: {} types".format(len(dictionary)))
+
+        args.max_source_positions = args.src_len + dictionary.pad() + 2
+        args.max_target_positions = args.tgt_len + dictionary.pad() + 2
+
+        return cls(args, dictionary)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        item_size = max(self.args.src_len, self.args.tgt_len)
+        if self.args.batch_size is not None:
+            bsz = self.args.batch_size
+        else:
+            bsz = max(1, self.args.max_tokens // item_size)
+        tgt = torch.stack([self.dummy_tgt for _ in range(bsz)])
+        self.datasets[split] = DummyDataset(
+            {
+                "id": 1,
+                "net_input": {
+                    "src_tokens": torch.stack([self.dummy_src for _ in range(bsz)]),
+                    "src_lengths": torch.full(
+                        (bsz,), self.args.src_len, dtype=torch.long
+                    ),
+                    "prev_output_tokens": tgt.clone(),
+                },
+                "target": tgt,
+                "nsentences": bsz,
+                "ntokens": bsz * self.args.tgt_len,
+            },
+            num_items=self.args.dataset_size,
+            item_size=item_size,
+        )
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+class DummyDataset(FairseqDataset):
+    def __init__(self, batch, num_items, item_size):
+        super().__init__()
+        self.batch = batch
+        self.num_items = num_items
+        self.item_size = item_size
+
+    def __getitem__(self, index):
+        return index
+
+    def __len__(self):
+        return self.num_items
+
+    def collater(self, samples):
+        return self.batch
+
+    @property
+    def sizes(self):
+        return np.array([self.item_size] * self.num_items)
+
+    def num_tokens(self, index):
+        return self.item_size
+
+    def size(self, index):
+        return self.item_size
+
+    def ordered_indices(self):
+        return np.arange(self.num_items)
+
+    @property
+    def supports_prefetch(self):
+        return False
diff --git a/fairseq/fairseq/binarizer.py b/fairseq/fairseq/binarizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae4d02a6dbbb523b76eb8684e87e38c74fe7c4a1
--- /dev/null
+++ b/fairseq/fairseq/binarizer.py
@@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import Counter
+from typing import Dict
+
+import torch
+
+from fairseq.file_chunker_utils import Chunker
+from fairseq.file_io import PathManager
+from fairseq.tokenizer import tokenize_line
+
+
+class Binarizer:
+    @staticmethod
+    def binarize(
+        filename,
+        dict,
+        consumer,
+        tokenize=tokenize_line,
+        append_eos=True,
+        reverse_order=False,
+        offset=0,
+        end=-1,
+        already_numberized=False,
+    ) -> Dict[str, int]:
+        nseq, ntok = 0, 0
+        replaced = Counter()
+
+        def replaced_consumer(word, idx):
+            if idx == dict.unk_index and word != dict.unk_word:
+                replaced.update([word])
+
+        with Chunker(
+            PathManager.get_local_path(filename), offset, end
+        ) as line_iterator:
+            for line in line_iterator:
+                if already_numberized:
+                    id_strings = line.strip().split()
+                    id_list = [int(id_string) for id_string in id_strings]
+                    if reverse_order:
+                        id_list.reverse()
+                    if append_eos:
+                        id_list.append(dict.eos())
+                    ids = torch.IntTensor(id_list)
+                else:
+                    ids = dict.encode_line(
+                        line=line,
+                        line_tokenizer=tokenize,
+                        add_if_not_exist=False,
+                        consumer=replaced_consumer,
+                        append_eos=append_eos,
+                        reverse_order=reverse_order,
+                    )
+                nseq += 1
+                ntok += len(ids)
+                consumer(ids)
+        return {
+            "nseq": nseq,
+            "nunk": sum(replaced.values()),
+            "ntok": ntok,
+            "replaced": replaced,
+        }
+
+    @staticmethod
+    def binarize_alignments(
+        filename, alignment_parser, consumer, offset=0, end=-1
+    ) -> Dict[str, int]:
+        nseq = 0
+
+        with Chunker(
+            PathManager.get_local_path(filename), offset, end
+        ) as line_iterator:
+            for line in line_iterator:
+                ids = alignment_parser(line)
+                nseq += 1
+                consumer(ids)
+        return {"nseq": nseq}
diff --git a/fairseq/fairseq/checkpoint_utils.py b/fairseq/fairseq/checkpoint_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef5d4c9022c3c35722f0bc9150260c7a65d35e5f
--- /dev/null
+++ b/fairseq/fairseq/checkpoint_utils.py
@@ -0,0 +1,858 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ast
+import collections
+import contextlib
+import logging
+import numpy as np
+import os
+import re
+import time
+import traceback
+from collections import OrderedDict
+from typing import Any, Dict, Optional, Union
+
+import torch
+from fairseq.data import data_utils
+from fairseq.dataclass.configs import CheckpointConfig
+from fairseq.dataclass.utils import (
+    convert_namespace_to_omegaconf,
+    overwrite_args_by_name,
+)
+from fairseq.distributed.fully_sharded_data_parallel import FSDP, has_FSDP
+from fairseq.file_io import PathManager
+from fairseq.models import FairseqDecoder, FairseqEncoder
+from omegaconf import DictConfig, open_dict, OmegaConf
+
+
+logger = logging.getLogger(__name__)
+
+
+def save_checkpoint(cfg: CheckpointConfig, trainer, epoch_itr, val_loss):
+    from fairseq import meters
+
+    # only one worker should attempt to create the required dir
+    if trainer.data_parallel_rank == 0:
+        os.makedirs(cfg.save_dir, exist_ok=True)
+
+    prev_best = getattr(save_checkpoint, "best", val_loss)
+    if val_loss is not None:
+        best_function = max if cfg.maximize_best_checkpoint_metric else min
+        save_checkpoint.best = best_function(val_loss, prev_best)
+
+    if cfg.no_save:
+        return
+
+    trainer.consolidate_optimizer()  # TODO(SS): do we need this if no_save_optimizer_state
+
+    if not trainer.should_save_checkpoint_on_current_rank:
+        if trainer.always_call_state_dict_during_save_checkpoint:
+            trainer.state_dict()
+        return
+
+    write_timer = meters.StopwatchMeter()
+    write_timer.start()
+
+    epoch = epoch_itr.epoch
+    end_of_epoch = epoch_itr.end_of_epoch()
+    updates = trainer.get_num_updates()
+
+    logger.info(f"Preparing to save checkpoint for epoch {epoch} @ {updates} updates")
+
+    def is_better(a, b):
+        return a >= b if cfg.maximize_best_checkpoint_metric else a <= b
+
+    suffix = trainer.checkpoint_suffix
+    checkpoint_conds = collections.OrderedDict()
+    checkpoint_conds["checkpoint{}{}.pt".format(epoch, suffix)] = (
+        end_of_epoch and not cfg.no_epoch_checkpoints and epoch % cfg.save_interval == 0
+    )
+    checkpoint_conds["checkpoint_{}_{}{}.pt".format(epoch, updates, suffix)] = (
+        not end_of_epoch
+        and cfg.save_interval_updates > 0
+        and updates % cfg.save_interval_updates == 0
+    )
+    checkpoint_conds["checkpoint_best{}.pt".format(suffix)] = val_loss is not None and (
+        not hasattr(save_checkpoint, "best")
+        or is_better(val_loss, save_checkpoint.best)
+    )
+    if val_loss is not None and cfg.keep_best_checkpoints > 0:
+        worst_best = getattr(save_checkpoint, "best", None)
+        chkpts = checkpoint_paths(
+            cfg.save_dir,
+            pattern=r"checkpoint\.best_{}_(\d+\.?\d*){}\.pt".format(
+                cfg.best_checkpoint_metric, suffix
+            ),
+        )
+        if len(chkpts) > 0:
+            p = chkpts[-1] if cfg.maximize_best_checkpoint_metric else chkpts[0]
+            worst_best = float(p.rsplit("_")[-1].replace("{}.pt".format(suffix), ""))
+        # add random digits to resolve ties
+        with data_utils.numpy_seed(epoch, updates, val_loss):
+            rand_sfx = np.random.randint(0, cfg.keep_best_checkpoints)
+
+        checkpoint_conds[
+            "checkpoint.best_{}_{:.3f}{}{}.pt".format(
+                cfg.best_checkpoint_metric,
+                val_loss,
+                rand_sfx,
+                suffix
+            )
+        ] = worst_best is None or is_better(val_loss, worst_best)
+    checkpoint_conds[
+        "checkpoint_last{}.pt".format(suffix)
+    ] = not cfg.no_last_checkpoints
+
+    extra_state = {"train_iterator": epoch_itr.state_dict(), "val_loss": val_loss}
+    if hasattr(save_checkpoint, "best"):
+        extra_state.update({"best": save_checkpoint.best})
+
+    checkpoints = [
+        os.path.join(cfg.save_dir, fn) for fn, cond in checkpoint_conds.items() if cond
+    ]
+    if len(checkpoints) > 0:
+        trainer.save_checkpoint(checkpoints[0], extra_state)
+        for cp in checkpoints[1:]:
+            if cfg.write_checkpoints_asynchronously:
+                # TODO[ioPath]: Need to implement a delayed asynchronous
+                # file copying/moving feature.
+                logger.warning(
+                    f"ioPath is not copying {checkpoints[0]} to {cp} "
+                    "since async write mode is on."
+                )
+            else:
+                assert PathManager.copy(
+                    checkpoints[0], cp, overwrite=True
+                ), f"Failed to copy {checkpoints[0]} to {cp}"
+
+        write_timer.stop()
+        logger.info(
+            "Saved checkpoint {} (epoch {} @ {} updates, score {}) (writing took {} seconds)".format(
+                checkpoints[0], epoch, updates, val_loss, write_timer.sum
+            )
+        )
+
+    if not end_of_epoch and cfg.keep_interval_updates > 0:
+        # remove old checkpoints; checkpoints are sorted in descending order
+        if cfg.keep_interval_updates_pattern == -1:
+            checkpoints = checkpoint_paths(
+                cfg.save_dir, pattern=r"checkpoint_\d+_(\d+){}\.pt".format(suffix)
+            )
+        else:
+            checkpoints = checkpoint_paths(
+                cfg.save_dir,
+                pattern=r"checkpoint_\d+_(\d+){}\.pt".format(suffix),
+                keep_match=True,
+            )
+            checkpoints = [
+                x[0]
+                for x in checkpoints
+                if x[1] % cfg.keep_interval_updates_pattern != 0
+            ]
+
+        for old_chk in checkpoints[cfg.keep_interval_updates :]:
+            if os.path.lexists(old_chk):
+                os.remove(old_chk)
+            elif PathManager.exists(old_chk):
+                PathManager.rm(old_chk)
+
+    if cfg.keep_last_epochs > 0:
+        # remove old epoch checkpoints; checkpoints are sorted in descending order
+        checkpoints = checkpoint_paths(
+            cfg.save_dir, pattern=r"checkpoint(\d+){}\.pt".format(suffix)
+        )
+        for old_chk in checkpoints[cfg.keep_last_epochs :]:
+            if os.path.lexists(old_chk):
+                os.remove(old_chk)
+            elif PathManager.exists(old_chk):
+                PathManager.rm(old_chk)
+
+    if cfg.keep_best_checkpoints > 0:
+        # only keep the best N checkpoints according to validation metric
+        checkpoints = checkpoint_paths(
+            cfg.save_dir,
+            pattern=r"checkpoint\.best_{}_(\d+\.?\d*){}\.pt".format(
+                cfg.best_checkpoint_metric, suffix
+            ),
+        )
+        if not cfg.maximize_best_checkpoint_metric:
+            checkpoints = checkpoints[::-1]
+        for old_chk in checkpoints[cfg.keep_best_checkpoints :]:
+            if os.path.lexists(old_chk):
+                os.remove(old_chk)
+            elif PathManager.exists(old_chk):
+                PathManager.rm(old_chk)
+
+
+def load_checkpoint(cfg: CheckpointConfig, trainer, **passthrough_args):
+    """
+    Load a checkpoint and restore the training iterator.
+
+    *passthrough_args* will be passed through to
+    ``trainer.get_train_iterator``.
+    """
+
+    reset_optimizer = cfg.reset_optimizer
+    reset_lr_scheduler = cfg.reset_lr_scheduler
+    optimizer_overrides = ast.literal_eval(cfg.optimizer_overrides)
+    reset_meters = cfg.reset_meters
+    reset_dataloader = cfg.reset_dataloader
+
+    if cfg.finetune_from_model is not None and (
+        reset_optimizer or reset_lr_scheduler or reset_meters or reset_dataloader
+    ):
+        raise ValueError(
+            "--finetune-from-model can not be set together with either --reset-optimizer"
+            " or reset_lr_scheduler or reset_meters or reset_dataloader"
+        )
+
+    suffix = trainer.checkpoint_suffix
+    if (
+        cfg.restore_file == "checkpoint_last.pt"
+    ):  # default value of restore_file is 'checkpoint_last.pt'
+        checkpoint_path = os.path.join(
+            cfg.save_dir, "checkpoint_last{}.pt".format(suffix)
+        )
+        first_launch = not PathManager.exists(checkpoint_path)
+        if cfg.finetune_from_model is not None and first_launch:
+            # if there is no last checkpoint to restore, start the finetune from pretrained model
+            # else just use usual logic to load checkpoint, e.g. restart from last checkpoint and etc.
+            if PathManager.exists(cfg.finetune_from_model):
+                checkpoint_path = cfg.finetune_from_model
+                reset_optimizer = True
+                reset_lr_scheduler = True
+                reset_meters = True
+                reset_dataloader = True
+                logger.info(
+                    f"loading pretrained model from {checkpoint_path}: "
+                    "optimizer, lr scheduler, meters, dataloader will be reset"
+                )
+            else:
+                raise ValueError(
+                    f"--funetune-from-model {cfg.finetune_from_model} does not exist"
+                )
+    elif suffix is not None:
+        checkpoint_path = cfg.restore_file.replace(".pt", suffix + ".pt")
+    else:
+        checkpoint_path = cfg.restore_file
+
+    if cfg.restore_file != "checkpoint_last.pt" and cfg.finetune_from_model:
+        raise ValueError(
+            "--finetune-from-model and --restore-file (non-default value) "
+            "can not be specified together: " + str(cfg)
+        )
+
+    extra_state = trainer.load_checkpoint(
+        checkpoint_path,
+        reset_optimizer,
+        reset_lr_scheduler,
+        optimizer_overrides,
+        reset_meters=reset_meters,
+    )
+
+    if (
+        extra_state is not None
+        and "best" in extra_state
+        and not reset_optimizer
+        and not reset_meters
+    ):
+        save_checkpoint.best = extra_state["best"]
+
+    if extra_state is not None and not reset_dataloader:
+        # restore iterator from checkpoint
+        itr_state = extra_state["train_iterator"]
+        epoch_itr = trainer.get_train_iterator(
+            epoch=itr_state["epoch"], load_dataset=True, **passthrough_args
+        )
+        epoch_itr.load_state_dict(itr_state)
+    else:
+        epoch_itr = trainer.get_train_iterator(
+            epoch=1, load_dataset=True, **passthrough_args
+        )
+
+    trainer.lr_step(epoch_itr.epoch)
+
+    return extra_state, epoch_itr
+
+
+def load_checkpoint_to_cpu(path, arg_overrides=None, load_on_all_ranks=False):
+    """Loads a checkpoint to CPU (with upgrading for backward compatibility).
+
+    If doing single-GPU training or if the checkpoint is only being loaded by at
+    most one process on each node (current default behavior is for only rank 0
+    to read the checkpoint from disk), load_on_all_ranks should be False to
+    avoid errors from torch.distributed not having been initialized or
+    torch.distributed.barrier() hanging.
+
+    If all processes on each node may be loading the checkpoint
+    simultaneously, load_on_all_ranks should be set to True to avoid I/O
+    conflicts.
+
+    There's currently no support for > 1 but < all processes loading the
+    checkpoint on each node.
+    """
+    local_path = PathManager.get_local_path(path)
+    # The locally cached file returned by get_local_path() may be stale for
+    # remote files that are periodically updated/overwritten (ex:
+    # checkpoint_last.pt) - so we remove the local copy, sync across processes
+    # (if needed), and then download a fresh copy.
+    if local_path != path and PathManager.path_requires_pathmanager(path):
+        try:
+            os.remove(local_path)
+        except FileNotFoundError:
+            # With potentially multiple processes removing the same file, the
+            # file being missing is benign (missing_ok isn't available until
+            # Python 3.8).
+            pass
+        if load_on_all_ranks:
+            torch.distributed.barrier()
+        local_path = PathManager.get_local_path(path)
+
+    with open(local_path, "rb") as f:
+        state = torch.load(f, map_location=torch.device("cpu"))
+
+    if "args" in state and state["args"] is not None and arg_overrides is not None:
+        args = state["args"]
+        for arg_name, arg_val in arg_overrides.items():
+            setattr(args, arg_name, arg_val)
+
+    if "cfg" in state and state["cfg"] is not None:
+
+        # hack to be able to set Namespace in dict config. this should be removed when we update to newer
+        # omegaconf version that supports object flags, or when we migrate all existing models
+        from omegaconf import _utils
+
+        old_primitive = _utils.is_primitive_type
+        _utils.is_primitive_type = lambda _: True
+
+        state["cfg"] = OmegaConf.create(state["cfg"])
+
+        _utils.is_primitive_type = old_primitive
+        OmegaConf.set_struct(state["cfg"], True)
+
+        if arg_overrides is not None:
+            overwrite_args_by_name(state["cfg"], arg_overrides)
+
+    state = _upgrade_state_dict(state)
+    return state
+
+
+def load_model_ensemble(
+    filenames,
+    arg_overrides: Optional[Dict[str, Any]] = None,
+    task=None,
+    strict=True,
+    suffix="",
+    num_shards=1,
+    state=None,
+):
+    """Loads an ensemble of models.
+
+    Args:
+        filenames (List[str]): checkpoint files to load
+        arg_overrides (Dict[str,Any], optional): override model args that
+            were used during model training
+        task (fairseq.tasks.FairseqTask, optional): task to use for loading
+    """
+    assert not (
+        strict and num_shards > 1
+    ), "Cannot load state dict with strict=True and checkpoint shards > 1"
+    ensemble, args, _task = load_model_ensemble_and_task(
+        filenames,
+        arg_overrides,
+        task,
+        strict,
+        suffix,
+        num_shards,
+        state,
+    )
+    return ensemble, args
+
+
+def get_maybe_sharded_checkpoint_filename(
+    filename: str, suffix: str, shard_idx: int, num_shards: int
+) -> str:
+    orig_filename = filename
+    filename = filename.replace(".pt", suffix + ".pt")
+    fsdp_filename = filename[:-3] + f"-shard{shard_idx}.pt"
+    model_parallel_filename = orig_filename[:-3] + f"_part{shard_idx}.pt"
+    if PathManager.exists(fsdp_filename):
+        return fsdp_filename
+    elif num_shards > 1:
+        return model_parallel_filename
+    else:
+        return filename
+
+
+def load_model_ensemble_and_task(
+    filenames,
+    arg_overrides: Optional[Dict[str, Any]] = None,
+    task=None,
+    strict=True,
+    suffix="",
+    num_shards=1,
+    state=None,
+):
+    assert state is None or len(filenames) == 1
+
+    from fairseq import tasks
+
+    assert not (
+        strict and num_shards > 1
+    ), "Cannot load state dict with strict=True and checkpoint shards > 1"
+    ensemble = []
+    cfg = None
+    for filename in filenames:
+        orig_filename = filename
+        model_shard_state = {"shard_weights": [], "shard_metadata": []}
+        assert num_shards > 0
+        st = time.time()
+        for shard_idx in range(num_shards):
+            filename = get_maybe_sharded_checkpoint_filename(
+                orig_filename, suffix, shard_idx, num_shards
+            )
+
+            if not PathManager.exists(filename):
+                raise IOError("Model file not found: {}".format(filename))
+            if state is None:
+                state = load_checkpoint_to_cpu(filename, arg_overrides)
+            if "args" in state and state["args"] is not None:
+                cfg = convert_namespace_to_omegaconf(state["args"])
+            elif "cfg" in state and state["cfg"] is not None:
+                cfg = state["cfg"]
+            else:
+                raise RuntimeError(
+                    f"Neither args nor cfg exist in state keys = {state.keys()}"
+                )
+
+            if task is None:
+                task = tasks.setup_task(cfg.task)
+
+            if "task_state" in state:
+                task.load_state_dict(state["task_state"])
+
+            if "fsdp_metadata" in state and num_shards > 1:
+                model_shard_state["shard_weights"].append(state["model"])
+                model_shard_state["shard_metadata"].append(state["fsdp_metadata"])
+                # check FSDP import before the code goes too far
+                if not has_FSDP:
+                    raise ImportError(
+                        "Cannot find FullyShardedDataParallel. "
+                        "Please install fairscale with: pip install fairscale"
+                    )
+                if shard_idx == num_shards - 1:
+                    consolidated_model_state = FSDP.consolidate_shard_weights(
+                        shard_weights=model_shard_state["shard_weights"],
+                        shard_metadata=model_shard_state["shard_metadata"],
+                    )
+                    model = task.build_model(cfg.model)
+                    model.load_state_dict(
+                        consolidated_model_state, strict=strict, model_cfg=cfg.model
+                    )
+            else:
+                # model parallel checkpoint or unsharded checkpoint
+                model = task.build_model(cfg.model)
+                model.load_state_dict(
+                    state["model"], strict=strict, model_cfg=cfg.model
+                )
+
+            # reset state so it gets loaded for the next model in ensemble
+            state = None
+            if shard_idx % 10 == 0 and shard_idx > 0:
+                elapsed = time.time() - st
+                logger.info(
+                    f"Loaded {shard_idx} shards in {elapsed:.2f}s, {elapsed / (shard_idx+1):.2f}s/shard"
+                )
+
+        # build model for ensemble
+        ensemble.append(model)
+    return ensemble, cfg, task
+
+
+def checkpoint_paths(path, pattern=r"checkpoint(\d+)\.pt", keep_match=False):
+    """Retrieves all checkpoints found in `path` directory.
+
+    Checkpoints are identified by matching filename to the specified pattern. If
+    the pattern contains groups, the result will be sorted by the first group in
+    descending order.
+    """
+    pt_regexp = re.compile(pattern)
+    files = PathManager.ls(path)
+
+    entries = []
+    for i, f in enumerate(files):
+        m = pt_regexp.fullmatch(f)
+        if m is not None:
+            idx = float(m.group(1)) if len(m.groups()) > 0 else i
+            entries.append((idx, m.group(0)))
+    if keep_match:
+        return [(os.path.join(path, x[1]), x[0]) for x in sorted(entries, reverse=True)]
+    else:
+        return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)]
+
+
+def torch_persistent_save(obj, filename, async_write: bool = False):
+    if async_write:
+        with PathManager.opena(filename, "wb") as f:
+            _torch_persistent_save(obj, f)
+    else:
+        if PathManager.supports_rename(filename):
+            # do atomic save
+            with PathManager.open(filename + ".tmp", "wb") as f:
+                _torch_persistent_save(obj, f)
+            PathManager.rename(filename + ".tmp", filename)
+        else:
+            # fallback to non-atomic save
+            with PathManager.open(filename, "wb") as f:
+                _torch_persistent_save(obj, f)
+
+
+def _torch_persistent_save(obj, f):
+    if isinstance(f, str):
+        with PathManager.open(f, "wb") as h:
+            torch_persistent_save(obj, h)
+        return
+    for i in range(3):
+        try:
+            return torch.save(obj, f)
+        except Exception:
+            if i == 2:
+                logger.error(traceback.format_exc())
+                raise
+
+
+def _upgrade_state_dict(state):
+    """Helper for upgrading old model checkpoints."""
+
+    # add optimizer_history
+    if "optimizer_history" not in state:
+        state["optimizer_history"] = [
+            {"criterion_name": "CrossEntropyCriterion", "best_loss": state["best_loss"]}
+        ]
+        state["last_optimizer_state"] = state["optimizer"]
+        del state["optimizer"]
+        del state["best_loss"]
+    # move extra_state into sub-dictionary
+    if "epoch" in state and "extra_state" not in state:
+        state["extra_state"] = {
+            "epoch": state["epoch"],
+            "batch_offset": state["batch_offset"],
+            "val_loss": state["val_loss"],
+        }
+        del state["epoch"]
+        del state["batch_offset"]
+        del state["val_loss"]
+    # reduce optimizer history's memory usage (only keep the last state)
+    if "optimizer" in state["optimizer_history"][-1]:
+        state["last_optimizer_state"] = state["optimizer_history"][-1]["optimizer"]
+        for optim_hist in state["optimizer_history"]:
+            del optim_hist["optimizer"]
+    # record the optimizer class name
+    if "optimizer_name" not in state["optimizer_history"][-1]:
+        state["optimizer_history"][-1]["optimizer_name"] = "FairseqNAG"
+    # move best_loss into lr_scheduler_state
+    if "lr_scheduler_state" not in state["optimizer_history"][-1]:
+        state["optimizer_history"][-1]["lr_scheduler_state"] = {
+            "best": state["optimizer_history"][-1]["best_loss"]
+        }
+        del state["optimizer_history"][-1]["best_loss"]
+    # keep track of number of updates
+    if "num_updates" not in state["optimizer_history"][-1]:
+        state["optimizer_history"][-1]["num_updates"] = 0
+    # old model checkpoints may not have separate source/target positions
+    if (
+        "args" in state
+        and hasattr(state["args"], "max_positions")
+        and not hasattr(state["args"], "max_source_positions")
+    ):
+        state["args"].max_source_positions = state["args"].max_positions
+        state["args"].max_target_positions = state["args"].max_positions
+    # use stateful training data iterator
+    if "train_iterator" not in state["extra_state"]:
+        state["extra_state"]["train_iterator"] = {
+            "epoch": state["extra_state"]["epoch"],
+            "iterations_in_epoch": state["extra_state"].get("batch_offset", 0),
+        }
+
+    # backward compatibility, cfg updates
+    if "args" in state and state["args"] is not None:
+        # default to translation task
+        if not hasattr(state["args"], "task"):
+            state["args"].task = "translation"
+        # --raw-text and --lazy-load are deprecated
+        if getattr(state["args"], "raw_text", False):
+            state["args"].dataset_impl = "raw"
+        elif getattr(state["args"], "lazy_load", False):
+            state["args"].dataset_impl = "lazy"
+        # epochs start at 1
+        if state["extra_state"]["train_iterator"] is not None:
+            state["extra_state"]["train_iterator"]["epoch"] = max(
+                state["extra_state"]["train_iterator"].get("epoch", 1), 1
+            )
+        # --remove-bpe ==> --postprocess
+        if hasattr(state["args"], "remove_bpe"):
+            state["args"].post_process = state["args"].remove_bpe
+        # --min-lr ==> --stop-min-lr
+        if hasattr(state["args"], "min_lr"):
+            state["args"].stop_min_lr = state["args"].min_lr
+            del state["args"].min_lr
+        # binary_cross_entropy / kd_binary_cross_entropy => wav2vec criterion
+        if (
+            hasattr(state["args"], "criterion")
+            and state["args"].criterion in [
+                "binary_cross_entropy",
+                "kd_binary_cross_entropy",
+            ]
+        ):
+            state["args"].criterion = "wav2vec"
+        # remove log_keys if it's None (criteria will supply a default value of [])
+        if hasattr(state["args"], "log_keys") and state["args"].log_keys is None:
+            delattr(state["args"], "log_keys")
+        # speech_pretraining => audio pretraining
+        if (
+            hasattr(state["args"], "task")
+            and state["args"].task == "speech_pretraining"
+        ):
+            state["args"].task = "audio_pretraining"
+        # audio_cpc => wav2vec
+        if hasattr(state["args"], "arch") and state["args"].arch == "audio_cpc":
+            state["args"].arch = "wav2vec"
+        # convert legacy float learning rate to List[float]
+        if hasattr(state["args"], "lr") and isinstance(state["args"].lr, float):
+            state["args"].lr = [state["args"].lr]
+        # convert task data arg to a string instead of List[string]
+        if (
+            hasattr(state["args"], "data")
+            and isinstance(state["args"].data, list)
+            and len(state["args"].data) > 0
+        ):
+            state["args"].data = state["args"].data[0]
+        # remove keys in state["args"] related to teacher-student learning
+        for key in [
+            "static_teachers",
+            "static_teacher_weights",
+            "dynamic_teachers",
+            "dynamic_teacher_weights",
+        ]:
+            if key in state["args"]:
+                delattr(state["args"], key)
+
+        state["cfg"] = convert_namespace_to_omegaconf(state["args"])
+
+    if "cfg" in state and state["cfg"] is not None:
+        cfg = state["cfg"]
+        with open_dict(cfg):
+            # any upgrades for Hydra-based configs
+            if (
+                "task" in cfg
+                and "eval_wer_config" in cfg.task
+                and isinstance(cfg.task.eval_wer_config.print_alignment, bool)
+            ):
+                cfg.task.eval_wer_config.print_alignment = "hard"
+            if "generation" in cfg and isinstance(cfg.generation.print_alignment, bool):
+                cfg.generation.print_alignment = "hard" if cfg.generation.print_alignment else None
+            if (
+                "model" in cfg
+                and "w2v_args" in cfg.model
+                and cfg.model.w2v_args is not None
+                and (
+                    hasattr(cfg.model.w2v_args, "task") or "task" in cfg.model.w2v_args
+                )
+                and hasattr(cfg.model.w2v_args.task, "eval_wer_config")
+                and cfg.model.w2v_args.task.eval_wer_config is not None
+                and isinstance(
+                    cfg.model.w2v_args.task.eval_wer_config.print_alignment, bool
+                )
+            ):
+                cfg.model.w2v_args.task.eval_wer_config.print_alignment = "hard"
+
+    return state
+
+
+def prune_state_dict(state_dict, model_cfg: Optional[DictConfig]):
+    """Prune the given state_dict if desired for LayerDrop
+    (https://arxiv.org/abs/1909.11556).
+
+    Training with LayerDrop allows models to be robust to pruning at inference
+    time. This function prunes state_dict to allow smaller models to be loaded
+    from a larger model and re-maps the existing state_dict for this to occur.
+
+    It's called by functions that load models from checkpoints and does not
+    need to be called directly.
+    """
+    arch = None
+    if model_cfg is not None:
+        arch = (
+            model_cfg._name
+            if isinstance(model_cfg, DictConfig)
+            else getattr(model_cfg, "arch", None)
+        )
+
+    if not model_cfg or arch is None or arch == "ptt_transformer":
+        # args should not be none, but don't crash if it is.
+        return state_dict
+
+    encoder_layers_to_keep = getattr(model_cfg, "encoder_layers_to_keep", None)
+    decoder_layers_to_keep = getattr(model_cfg, "decoder_layers_to_keep", None)
+
+    if not encoder_layers_to_keep and not decoder_layers_to_keep:
+        return state_dict
+
+    # apply pruning
+    logger.info(
+        "Pruning model to specified layer configuration - this works best if the model was trained with LayerDrop"
+    )
+
+    def create_pruning_pass(layers_to_keep, layer_name):
+        keep_layers = sorted(
+            int(layer_string) for layer_string in layers_to_keep.split(",")
+        )
+        mapping_dict = {}
+        for i in range(len(keep_layers)):
+            mapping_dict[str(keep_layers[i])] = str(i)
+
+        regex = re.compile(r"^{layer}.*\.layers\.(\d+)".format(layer=layer_name))
+        return {"substitution_regex": regex, "mapping_dict": mapping_dict}
+
+    pruning_passes = []
+    if encoder_layers_to_keep:
+        pruning_passes.append(create_pruning_pass(encoder_layers_to_keep, "encoder"))
+    if decoder_layers_to_keep:
+        pruning_passes.append(create_pruning_pass(decoder_layers_to_keep, "decoder"))
+
+    new_state_dict = {}
+    for layer_name in state_dict.keys():
+        match = re.search(r"\.layers\.(\d+)\.", layer_name)
+        # if layer has no number in it, it is a supporting layer, such as an
+        # embedding
+        if not match:
+            new_state_dict[layer_name] = state_dict[layer_name]
+            continue
+
+        # otherwise, layer should be pruned.
+        original_layer_number = match.group(1)
+        # figure out which mapping dict to replace from
+        for pruning_pass in pruning_passes:
+            if original_layer_number in pruning_pass["mapping_dict"] and pruning_pass[
+                "substitution_regex"
+            ].search(layer_name):
+                new_layer_number = pruning_pass["mapping_dict"][original_layer_number]
+                substitution_match = pruning_pass["substitution_regex"].search(
+                    layer_name
+                )
+                new_state_key = (
+                    layer_name[: substitution_match.start(1)]
+                    + new_layer_number
+                    + layer_name[substitution_match.end(1) :]
+                )
+                new_state_dict[new_state_key] = state_dict[layer_name]
+
+    # Since layers are now pruned, *_layers_to_keep are no longer needed.
+    # This is more of "It would make it work fix" rather than a proper fix.
+    if isinstance(model_cfg, DictConfig):
+        context = open_dict(model_cfg)
+    else:
+        context = contextlib.ExitStack()
+    with context:
+        if hasattr(model_cfg, "encoder_layers_to_keep"):
+            model_cfg.encoder_layers_to_keep = None
+        if hasattr(model_cfg, "decoder_layers_to_keep"):
+            model_cfg.decoder_layers_to_keep = None
+
+    return new_state_dict
+
+
+def load_pretrained_component_from_model(
+    component: Union[FairseqEncoder, FairseqDecoder], checkpoint: str
+):
+    """
+    Load a pretrained FairseqEncoder or FairseqDecoder from checkpoint into the
+    provided `component` object. If state_dict fails to load, there may be a
+    mismatch in the architecture of the corresponding `component` found in the
+    `checkpoint` file.
+    """
+    if not PathManager.exists(checkpoint):
+        raise IOError("Model file not found: {}".format(checkpoint))
+    state = load_checkpoint_to_cpu(checkpoint)
+    if isinstance(component, FairseqEncoder):
+        component_type = "encoder"
+    elif isinstance(component, FairseqDecoder):
+        component_type = "decoder"
+    else:
+        raise ValueError(
+            "component to load must be either a FairseqEncoder or "
+            "FairseqDecoder. Loading other component types are not supported."
+        )
+    component_state_dict = OrderedDict()
+    for key in state["model"].keys():
+        if key.startswith(component_type):
+            # encoder.input_layers.0.0.weight --> input_layers.0.0.weight
+            component_subkey = key[len(component_type) + 1 :]
+            component_state_dict[component_subkey] = state["model"][key]
+    component.load_state_dict(component_state_dict, strict=True)
+    return component
+
+
+def verify_checkpoint_directory(save_dir: str) -> None:
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir, exist_ok=True)
+    temp_file_path = os.path.join(save_dir, "dummy")
+    try:
+        with open(temp_file_path, "w"):
+            pass
+    except OSError as e:
+        logger.warning(
+            "Unable to access checkpoint save directory: {}".format(save_dir)
+        )
+        raise e
+    else:
+        os.remove(temp_file_path)
+
+
+def load_ema_from_checkpoint(fpath):
+    """Loads exponential moving averaged (EMA) checkpoint from input and
+    returns a model with ema weights.
+
+    Args:
+      fpath: A string path of checkpoint to load from.
+
+    Returns:
+      A dict of string keys mapping to various values. The 'model' key
+      from the returned dict should correspond to an OrderedDict mapping
+      string parameter names to torch Tensors.
+    """
+    params_dict = collections.OrderedDict()
+    new_state = None
+
+    with PathManager.open(fpath, 'rb') as f:
+        new_state = torch.load(
+            f,
+            map_location=(
+                lambda s, _: torch.serialization.default_restore_location(s, 'cpu')
+            ),
+        )
+
+        # EMA model is stored in a separate "extra state"
+        model_params = new_state['extra_state']['ema']
+
+        for key in list(model_params.keys()):
+            p = model_params[key]
+            if isinstance(p, torch.HalfTensor):
+                p = p.float()
+            if key not in params_dict:
+                params_dict[key] = p.clone()
+                # NOTE: clone() is needed in case of p is a shared parameter
+            else:
+                raise ValueError("Key {} is repeated in EMA model params.".format(key))
+
+        if len(params_dict) == 0:
+            raise ValueError(
+                f"Input checkpoint path '{fpath}' does not contain "
+                "ema model weights, is this model trained with EMA?"
+            )
+
+    new_state['model'] = params_dict
+    return new_state
diff --git a/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda.cpp b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..707219105a17a691e43de1296a72bbaffa0c7fe9
--- /dev/null
+++ b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda.cpp
@@ -0,0 +1,55 @@
+/*
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT License.
+*/
+
+#include <torch/extension.h>
+#include <vector>
+
+/*
+CPP Binding for CUDA OP
+*/
+
+// CUDA forward declarations
+torch::Tensor ngram_repeat_block_cuda_forward(
+    torch::Tensor tokens,
+    torch::Tensor lprobs,
+    int bsz,
+    int step,
+    int beam_size,
+    int no_repeat_ngram_size);
+
+#define CHECK_CUDA(x) \
+  TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+// Input check and call to CUDA OP
+// Backward method not required
+torch::Tensor ngram_repeat_block_forward(
+    torch::Tensor tokens,
+    torch::Tensor lprobs,
+    int bsz,
+    int step,
+    int beam_size,
+    int no_repeat_ngram_size) {
+  CHECK_INPUT(tokens);
+  CHECK_INPUT(lprobs);
+  assert(bsz > 0);
+  assert(step >= 0);
+  assert(beam_size > 0);
+  assert(no_repeat_ngram_size > 0);
+
+  return ngram_repeat_block_cuda_forward(
+      tokens, lprobs, bsz, step, beam_size, no_repeat_ngram_size);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def(
+      "forward",
+      &ngram_repeat_block_forward,
+      "No Repeat Ngram Block forward (CUDA)");
+}
diff --git a/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bd6106cba0672c3ff29c925b0f5cea557ab3eced
--- /dev/null
+++ b/fairseq/fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu
@@ -0,0 +1,82 @@
+/*
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT License.
+*/
+
+/*
+Kernel implementation for blocking repeated n-grams.
+*/
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <math.h>
+#include <torch/extension.h>
+#include <vector>
+
+// Ban repeated ngrams of length = 'no_repeat_ngram_size'
+__global__ void banRepeatedTokens(
+    long* __restrict__ tokens,
+    float* __restrict__ lprobs,
+    int max_predict_len,
+    int vocab_size,
+    int no_repeat_ngram_size) {
+  auto row = blockIdx.x;
+  auto col = threadIdx.x;
+  auto start = row * (max_predict_len) + col;
+  // Each thread compares ngram starting from
+  // thread index with final ngram starting from
+  // step - no_repeat_ngram_size +2
+  auto check_start_pos = blockDim.x;
+  auto lprob_start = row * vocab_size;
+  bool is_banned = true;
+  extern __shared__ long tokens_shm[];
+  tokens_shm[col] = tokens[start];
+  if (col == blockDim.x - 1) {
+    for (int i = 1; i < no_repeat_ngram_size; i++) {
+      if (col + i < max_predict_len) {
+        tokens_shm[col + i] = tokens[start + i];
+      }
+    }
+  }
+  __syncthreads();
+
+  for (int k = 0; k < no_repeat_ngram_size - 1; k++) {
+    if (tokens_shm[col + k] != tokens_shm[check_start_pos + k]) {
+      is_banned = false;
+    }
+  }
+  if (is_banned == true) {
+    auto token_to_be_banned = tokens_shm[col + no_repeat_ngram_size - 1];
+    lprobs[lprob_start + token_to_be_banned] = -INFINITY;
+  }
+}
+
+// Allocate blocks and threads based on
+// batch size and sequence length and launch
+// kernel
+torch::Tensor ngram_repeat_block_cuda_forward(
+    const torch::Tensor tokens,
+    torch::Tensor lprobs,
+    int bsz,
+    int step,
+    int beam_size,
+    int no_repeat_ngram_size) {
+  int threads = step - no_repeat_ngram_size + 2;
+  if (threads <= 0)
+    return lprobs;
+  int max_predict_len = tokens.size(1);
+  int vocab_size = lprobs.size(1);
+  auto token_ptr = tokens.data_ptr<long>();
+  auto lprob_ptr = lprobs.data_ptr<float>();
+  int blocks = bsz * beam_size;
+  int shared_mem_size = (step + 1) * sizeof(long);
+
+  // Launching N blocks where N is number of samples in a batch (beams*bsz)
+  // Launching T threads where T is number of previous ngrams in a sample
+  // Allocating shared mem per block for fastser access of input tokens since
+  // each token will be accessed N times to compare with current Ngram where
+  // N is Ngram size.
+  banRepeatedTokens<<<blocks, threads, shared_mem_size>>>(
+      token_ptr, lprob_ptr, max_predict_len, vocab_size, no_repeat_ngram_size);
+  return lprobs;
+}
diff --git a/fairseq/fairseq/clib/libbase/balanced_assignment.cpp b/fairseq/fairseq/clib/libbase/balanced_assignment.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1a5a1061f3892be5a17e49192f744c39e0d395e8
--- /dev/null
+++ b/fairseq/fairseq/clib/libbase/balanced_assignment.cpp
@@ -0,0 +1,109 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+C++ code for solving the linear assignment problem.
+Based on the Auction Algorithm from
+https://dspace.mit.edu/bitstream/handle/1721.1/3265/P-2108-26912652.pdf and the
+implementation from: https://github.com/bkj/auction-lap Adapted to be more
+efficient when each worker is looking for k jobs instead of 1.
+*/
+#include <torch/extension.h>
+#include <iostream>
+using namespace torch::indexing;
+torch::Tensor balanced_assignment(torch::Tensor job_and_worker_to_score) {
+  int max_iterations = 100;
+  torch::Tensor epsilon =
+      (job_and_worker_to_score.max() - job_and_worker_to_score.min()) / 50;
+  epsilon.clamp_min_(1e-04);
+  torch::Tensor worker_and_job_to_score =
+      job_and_worker_to_score.detach().transpose(0, 1).contiguous();
+  int num_workers = worker_and_job_to_score.size(0);
+  int num_jobs = worker_and_job_to_score.size(1);
+  auto device = worker_and_job_to_score.device();
+  int jobs_per_worker = num_jobs / num_workers;
+  torch::Tensor value = worker_and_job_to_score.clone();
+  int counter = 0;
+  torch::Tensor max_value = worker_and_job_to_score.max();
+
+  torch::Tensor bid_indices;
+  torch::Tensor cost = worker_and_job_to_score.new_zeros({1, num_jobs});
+  torch::Tensor bids =
+      worker_and_job_to_score.new_empty({num_workers, num_jobs});
+  torch::Tensor bid_increments =
+      worker_and_job_to_score.new_empty({num_workers, jobs_per_worker});
+  torch::Tensor top_values =
+      worker_and_job_to_score.new_empty({num_workers, jobs_per_worker + 1});
+  torch::Tensor high_bids = worker_and_job_to_score.new_empty({num_jobs});
+
+  torch::Tensor top_index = top_values.to(torch::kLong);
+  torch::Tensor high_bidders = top_index.new_empty({num_jobs});
+  torch::Tensor have_bids = high_bidders.to(torch::kBool);
+  torch::Tensor jobs_indices =
+      torch::arange({num_jobs}, torch::dtype(torch::kLong).device(device));
+  torch::Tensor true_tensor =
+      torch::ones({1}, torch::dtype(torch::kBool).device(device));
+
+  while (true) {
+    bids.zero_();
+    torch::topk_out(top_values, top_index, value, jobs_per_worker + 1, 1);
+
+    // Each worker bids the difference in value between that job and the k+1th
+    // job
+    torch::sub_out(
+        bid_increments,
+        top_values.index({Slice(None, None), Slice(0, jobs_per_worker)}),
+        top_values.index({Slice(None, None), jobs_per_worker}).unsqueeze(1));
+
+    bid_increments.add_(epsilon);
+    bids.scatter_(
+        1,
+        top_index.index({Slice(None, None), Slice(0, jobs_per_worker)}),
+        bid_increments);
+
+    if (counter < max_iterations && counter > 0) {
+      // Put in a minimal bid to retain items from the last round if no-one else
+      // bids for them this round
+      bids.view(-1).index_put_({bid_indices}, epsilon);
+    }
+
+    // Find the highest bidding worker per job
+    torch::max_out(high_bids, high_bidders, bids, 0);
+    torch::gt_out(have_bids, high_bids, 0);
+
+    if (have_bids.all().item<bool>()) {
+      // All jobs were bid for
+      break;
+    }
+
+    // Make popular items more expensive
+    cost.add_(high_bids);
+    torch::sub_out(value, worker_and_job_to_score, cost);
+
+    bid_indices = ((high_bidders * num_jobs) + jobs_indices).index({have_bids});
+
+    if (counter < max_iterations) {
+      // Make sure that this item will be in the winning worker's top-k next
+      // time.
+      value.view(-1).index_put_({bid_indices}, max_value);
+    } else {
+      // Suboptimal approximation that converges quickly from current solution
+      value.view(-1).index_put_(
+          {bid_indices}, worker_and_job_to_score.view(-1).index({bid_indices}));
+    }
+
+    counter += 1;
+  }
+
+  return top_index.index({Slice(None, None), Slice(0, jobs_per_worker)})
+      .reshape(-1);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("balanced_assignment", &balanced_assignment, "Balanced Assignment");
+}
diff --git a/fairseq/fairseq/clib/libbleu/libbleu.cpp b/fairseq/fairseq/clib/libbleu/libbleu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..939d9e1174e398fa48c840009b592c753a67939a
--- /dev/null
+++ b/fairseq/fairseq/clib/libbleu/libbleu.cpp
@@ -0,0 +1,157 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <array>
+#include <cstdio>
+#include <cstring>
+#include <map>
+
+// NOLINTNEXTLINE
+typedef struct {
+  size_t reflen;
+  size_t predlen;
+  size_t match1;
+  size_t count1;
+  size_t match2;
+  size_t count2;
+  size_t match3;
+  size_t count3;
+  size_t match4;
+  size_t count4;
+} bleu_stat;
+
+// left trim (remove pad)
+void bleu_ltrim(size_t* len, int** sent, int pad) {
+  size_t start = 0;
+  while (start < *len) {
+    if (*(*sent + start) != pad) {
+      break;
+    }
+    start++;
+  }
+  *sent += start;
+  *len -= start;
+}
+
+// right trim remove (eos)
+void bleu_rtrim(size_t* len, int** sent, int pad, int eos) {
+  size_t end = *len - 1;
+  while (end > 0) {
+    if (*(*sent + end) != eos && *(*sent + end) != pad) {
+      break;
+    }
+    end--;
+  }
+  *len = end + 1;
+}
+
+// left and right trim
+void bleu_trim(size_t* len, int** sent, int pad, int eos) {
+  bleu_ltrim(len, sent, pad);
+  bleu_rtrim(len, sent, pad, eos);
+}
+
+size_t bleu_hash(int len, int* data) {
+  size_t h = 14695981039346656037ul;
+  size_t prime = 0x100000001b3;
+  char* b = (char*)data;
+  size_t blen = sizeof(int) * len;
+
+  while (blen-- > 0) {
+    h ^= *b++;
+    h *= prime;
+  }
+
+  return h;
+}
+
+void bleu_addngram(
+    size_t* ntotal,
+    size_t* nmatch,
+    size_t n,
+    size_t reflen,
+    int* ref,
+    size_t predlen,
+    int* pred) {
+  if (predlen < n) {
+    return;
+  }
+
+  predlen = predlen - n + 1;
+  (*ntotal) += predlen;
+
+  if (reflen < n) {
+    return;
+  }
+
+  reflen = reflen - n + 1;
+
+  std::map<size_t, size_t> count;
+  while (predlen > 0) {
+    size_t w = bleu_hash(n, pred++);
+    count[w]++;
+    predlen--;
+  }
+
+  while (reflen > 0) {
+    size_t w = bleu_hash(n, ref++);
+    if (count[w] > 0) {
+      (*nmatch)++;
+      count[w] -= 1;
+    }
+    reflen--;
+  }
+}
+
+extern "C" {
+
+#ifdef _WIN64
+__declspec(dllexport)
+#endif
+    void bleu_zero_init(bleu_stat* stat) {
+  std::memset(stat, 0, sizeof(bleu_stat));
+}
+
+#ifdef _WIN64
+__declspec(dllexport)
+#endif
+    void bleu_one_init(bleu_stat* stat) {
+  bleu_zero_init(stat);
+  stat->count1 = 0;
+  stat->count2 = 1;
+  stat->count3 = 1;
+  stat->count4 = 1;
+  stat->match1 = 0;
+  stat->match2 = 1;
+  stat->match3 = 1;
+  stat->match4 = 1;
+}
+
+#ifdef _WIN64
+__declspec(dllexport)
+#endif
+    void bleu_add(
+        bleu_stat* stat,
+        size_t reflen,
+        int* ref,
+        size_t predlen,
+        int* pred,
+        int pad,
+        int eos) {
+
+  bleu_trim(&reflen, &ref, pad, eos);
+  bleu_trim(&predlen, &pred, pad, eos);
+  stat->reflen += reflen;
+  stat->predlen += predlen;
+
+  bleu_addngram(&stat->count1, &stat->match1, 1, reflen, ref, predlen, pred);
+  bleu_addngram(&stat->count2, &stat->match2, 2, reflen, ref, predlen, pred);
+  bleu_addngram(&stat->count3, &stat->match3, 3, reflen, ref, predlen, pred);
+  bleu_addngram(&stat->count4, &stat->match4, 4, reflen, ref, predlen, pred);
+}
+}
diff --git a/fairseq/fairseq/clib/libbleu/module.cpp b/fairseq/fairseq/clib/libbleu/module.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..35288b3177185670135f7bdc1f1589c5bb992304
--- /dev/null
+++ b/fairseq/fairseq/clib/libbleu/module.cpp
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <Python.h>
+
+static PyMethodDef method_def[] = {{NULL, NULL, 0, NULL}}; // NOLINT
+
+static struct PyModuleDef module_def = {
+    PyModuleDef_HEAD_INIT,
+    "libbleu", /* name of module */
+    // NOLINTNEXTLINE
+    NULL, /* module documentation, may be NULL */
+    -1, /* size of per-interpreter state of the module,
+           or -1 if the module keeps state in global variables. */
+    method_def}; // NOLINT
+
+#if PY_MAJOR_VERSION == 2
+PyMODINIT_FUNC init_libbleu()
+#else
+PyMODINIT_FUNC PyInit_libbleu()
+#endif
+{
+  PyObject* m = PyModule_Create(&module_def);
+  if (!m) {
+    return NULL;
+  }
+  return m;
+}
diff --git a/fairseq/fairseq/clib/libnat/edit_dist.cpp b/fairseq/fairseq/clib/libnat/edit_dist.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ffb60569d74d2868ed8113b7c787ef870e9da20
--- /dev/null
+++ b/fairseq/fairseq/clib/libnat/edit_dist.cpp
@@ -0,0 +1,231 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <pybind11/detail/common.h>
+#include <pybind11/pybind11.h>
+#include <torch/torch.h> // @manual=//caffe2:torch_extension
+#include <algorithm>
+#include <cstdint>
+#include <iosfwd>
+#include <memory>
+#include <new>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace ::std;
+
+vector<vector<uint32_t>> edit_distance2_with_dp(
+    vector<uint32_t>& x,
+    vector<uint32_t>& y) {
+  uint32_t lx = x.size();
+  uint32_t ly = y.size();
+  vector<vector<uint32_t>> d(lx + 1, vector<uint32_t>(ly + 1));
+  for (uint32_t i = 0; i < lx + 1; i++) {
+    d[i][0] = i;
+  }
+  for (uint32_t j = 0; j < ly + 1; j++) {
+    d[0][j] = j;
+  }
+  for (uint32_t i = 1; i < lx + 1; i++) {
+    for (uint32_t j = 1; j < ly + 1; j++) {
+      d[i][j] =
+          min(min(d[i - 1][j], d[i][j - 1]) + 1,
+              d[i - 1][j - 1] + 2 * (x.at(i - 1) == y.at(j - 1) ? 0 : 1));
+    }
+  }
+  return d;
+}
+
+vector<vector<uint32_t>> edit_distance2_backtracking(
+    vector<vector<uint32_t>>& d,
+    vector<uint32_t>& x,
+    vector<uint32_t>& y,
+    uint32_t terminal_symbol) {
+  vector<uint32_t> seq;
+  vector<vector<uint32_t>> edit_seqs(x.size() + 2, vector<uint32_t>());
+  /*
+  edit_seqs:
+  0~x.size() cell is the insertion sequences
+  last cell is the delete sequence
+  */
+
+  if (x.size() == 0) {
+    edit_seqs.at(0) = y;
+    return edit_seqs;
+  }
+
+  uint32_t i = d.size() - 1;
+  uint32_t j = d.at(0).size() - 1;
+
+  while ((i >= 0) && (j >= 0)) {
+    if ((i == 0) && (j == 0)) {
+      break;
+    }
+
+    if ((j > 0) && (d.at(i).at(j - 1) < d.at(i).at(j))) {
+      seq.push_back(1); // insert
+      seq.push_back(y.at(j - 1));
+      j--;
+    } else if ((i > 0) && (d.at(i - 1).at(j) < d.at(i).at(j))) {
+      seq.push_back(2); // delete
+      seq.push_back(x.at(i - 1));
+      i--;
+    } else {
+      seq.push_back(3); // keep
+      seq.push_back(x.at(i - 1));
+      i--;
+      j--;
+    }
+  }
+
+  uint32_t prev_op, op, s, word;
+  prev_op = 0, s = 0;
+  for (uint32_t k = 0; k < seq.size() / 2; k++) {
+    op = seq.at(seq.size() - 2 * k - 2);
+    word = seq.at(seq.size() - 2 * k - 1);
+    if (prev_op != 1) {
+      s++;
+    }
+    if (op == 1) // insert
+    {
+      edit_seqs.at(s - 1).push_back(word);
+    } else if (op == 2) // delete
+    {
+      edit_seqs.at(x.size() + 1).push_back(1);
+    } else {
+      edit_seqs.at(x.size() + 1).push_back(0);
+    }
+
+    prev_op = op;
+  }
+
+  for (uint32_t k = 0; k < edit_seqs.size(); k++) {
+    if (edit_seqs[k].size() == 0) {
+      edit_seqs[k].push_back(terminal_symbol);
+    }
+  }
+  return edit_seqs;
+}
+
+vector<vector<uint32_t>> edit_distance2_backtracking_with_delete(
+    vector<vector<uint32_t>>& d,
+    vector<uint32_t>& x,
+    vector<uint32_t>& y,
+    uint32_t terminal_symbol,
+    uint32_t deletion_symbol) {
+  vector<uint32_t> seq;
+  vector<vector<uint32_t>> edit_seqs(x.size() + 1, vector<uint32_t>());
+  /*
+  edit_seqs:
+  0~x.size() cell is the insertion sequences
+  last cell is the delete sequence
+  */
+
+  if (x.size() == 0) {
+    edit_seqs.at(0) = y;
+    return edit_seqs;
+  }
+
+  uint32_t i = d.size() - 1;
+  uint32_t j = d.at(0).size() - 1;
+
+  while ((i >= 0) && (j >= 0)) {
+    if ((i == 0) && (j == 0)) {
+      break;
+    }
+
+    if ((j > 0) && (d.at(i).at(j - 1) < d.at(i).at(j))) {
+      seq.push_back(1); // insert
+      seq.push_back(y.at(j - 1));
+      j--;
+    } else if ((i > 0) && (d.at(i - 1).at(j) < d.at(i).at(j))) {
+      seq.push_back(2); // delete
+      seq.push_back(x.at(i - 1));
+      i--;
+    } else {
+      seq.push_back(3); // keep
+      seq.push_back(x.at(i - 1));
+      i--;
+      j--;
+    }
+  }
+
+  uint32_t prev_op, op, s, word;
+  prev_op = 0, s = 0;
+  for (uint32_t k = 0; k < seq.size() / 2; k++) {
+    op = seq.at(seq.size() - 2 * k - 2);
+    word = seq.at(seq.size() - 2 * k - 1);
+    if (prev_op != 1) {
+      s++;
+    }
+    if (op == 1) // insert
+    {
+      edit_seqs.at(s - 1).push_back(word);
+    } else if (op == 2) // delete
+    {
+      edit_seqs.at(s - 1).push_back(deletion_symbol);
+    }
+
+    prev_op = op;
+  }
+
+  for (uint32_t k = 0; k < edit_seqs.size(); k++) {
+    if (edit_seqs.at(k).size() == 0) {
+      edit_seqs.at(k).push_back(terminal_symbol);
+    }
+  }
+  return edit_seqs;
+}
+
+vector<uint32_t> compute_ed2(
+    vector<vector<uint32_t>>& xs,
+    vector<vector<uint32_t>>& ys) {
+  vector<uint32_t> distances(xs.size());
+  for (uint32_t i = 0; i < xs.size(); i++) {
+    vector<vector<uint32_t>> d = edit_distance2_with_dp(xs.at(i), ys.at(i));
+    distances.at(i) = d.at(xs.at(i).size()).at(ys.at(i).size());
+  }
+  return distances;
+}
+
+vector<vector<vector<uint32_t>>> suggested_ed2_path(
+    vector<vector<uint32_t>>& xs,
+    vector<vector<uint32_t>>& ys,
+    uint32_t terminal_symbol) {
+  vector<vector<vector<uint32_t>>> seq(xs.size());
+  for (uint32_t i = 0; i < xs.size(); i++) {
+    vector<vector<uint32_t>> d = edit_distance2_with_dp(xs.at(i), ys.at(i));
+    seq.at(i) =
+        edit_distance2_backtracking(d, xs.at(i), ys.at(i), terminal_symbol);
+  }
+  return seq;
+}
+
+vector<vector<vector<uint32_t>>> suggested_ed2_path_with_delete(
+    vector<vector<uint32_t>>& xs,
+    vector<vector<uint32_t>>& ys,
+    uint32_t terminal_symbol,
+    uint32_t deletion_symbol) {
+  vector<vector<vector<uint32_t>>> seq(xs.size());
+  for (uint32_t i = 0; i < xs.size(); i++) {
+    vector<vector<uint32_t>> d = edit_distance2_with_dp(xs.at(i), ys.at(i));
+    seq.at(i) = edit_distance2_backtracking_with_delete(
+        d, xs.at(i), ys.at(i), terminal_symbol, deletion_symbol);
+  }
+  return seq;
+}
+
+PYBIND11_MODULE(libnat, m) {
+  m.def("compute_ed2", &compute_ed2, "compute_ed2");
+  m.def("suggested_ed2_path", &suggested_ed2_path, "suggested_ed2_path");
+  m.def(
+      "suggested_ed2_path_with_delete",
+      &suggested_ed2_path_with_delete,
+      "suggested_ed2_path_with_delete");
+}
diff --git a/fairseq/fairseq/clib/libnat_cuda/binding.cpp b/fairseq/fairseq/clib/libnat_cuda/binding.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ced91c0d0afab9071842911d9876e6360d90284a
--- /dev/null
+++ b/fairseq/fairseq/clib/libnat_cuda/binding.cpp
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/*
+ This code is partially adpoted from
+ https://github.com/1ytic/pytorch-edit-distance
+ */
+
+#include <torch/types.h>
+#include "edit_dist.h"
+
+#ifndef TORCH_CHECK
+#define TORCH_CHECK AT_CHECK
+#endif
+
+#define CHECK_CUDA(x) \
+  TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+torch::Tensor LevenshteinDistance(
+    torch::Tensor source,
+    torch::Tensor target,
+    torch::Tensor source_length,
+    torch::Tensor target_length) {
+  CHECK_INPUT(source);
+  CHECK_INPUT(target);
+  CHECK_INPUT(source_length);
+  CHECK_INPUT(target_length);
+  return LevenshteinDistanceCuda(source, target, source_length, target_length);
+}
+
+torch::Tensor GenerateDeletionLabel(
+    torch::Tensor source,
+    torch::Tensor operations) {
+  CHECK_INPUT(source);
+  CHECK_INPUT(operations);
+  return GenerateDeletionLabelCuda(source, operations);
+}
+
+std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabel(
+    torch::Tensor target,
+    torch::Tensor operations) {
+  CHECK_INPUT(target);
+  CHECK_INPUT(operations);
+  return GenerateInsertionLabelCuda(target, operations);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("levenshtein_distance", &LevenshteinDistance, "Levenshtein distance");
+  m.def(
+      "generate_deletion_labels",
+      &GenerateDeletionLabel,
+      "Generate Deletion Label");
+  m.def(
+      "generate_insertion_labels",
+      &GenerateInsertionLabel,
+      "Generate Insertion Label");
+}
diff --git a/fairseq/fairseq/clib/libnat_cuda/edit_dist.cu b/fairseq/fairseq/clib/libnat_cuda/edit_dist.cu
new file mode 100644
index 0000000000000000000000000000000000000000..96569d46c8475a90f3d51152023a46780464ecbd
--- /dev/null
+++ b/fairseq/fairseq/clib/libnat_cuda/edit_dist.cu
@@ -0,0 +1,344 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "edit_dist.h"
+
+#include <THC/THC.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <utility> // std::pair
+
+template <typename scalar_t>
+__global__ void generate_deletion_label_kernel(
+    const scalar_t* __restrict__ source,
+    const size_t source_size,
+    const size_t operation_size,
+    int* __restrict__ operations,
+    int* __restrict__ labels) {
+  const int index = blockIdx.x;
+  const int offset = index * operation_size;
+  const int offset_label = index * source_size;
+
+  for (int i = 0; i < source_size; i++) {
+    labels[offset_label + i] = 0;
+  }
+
+  int k = 0;
+  for (int i = 0; i < operation_size; i++) {
+    if (operations[offset + i] == 0) {
+      break;
+    } else if (operations[offset + i] == 1) {
+      continue;
+    } else {
+      labels[offset_label + k] = 3 - operations[offset + i];
+      k++;
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void generate_insertion_label_kernel(
+    const scalar_t* __restrict__ target,
+    const size_t target_size,
+    const size_t operation_size,
+    int* __restrict__ operations,
+    int* __restrict__ labels,
+    int* __restrict__ masks) {
+  const int index = blockIdx.x;
+  const int offset = index * operation_size;
+  const int offset_label = index * target_size;
+
+  int k = 0;
+  int u = 0;
+  int m = 0;
+
+  for (int i = 0; i < target_size; i++) {
+    labels[offset_label + i] = 0;
+    masks[offset_label + i] = 0;
+  }
+
+  for (int i = 0; i < operation_size - 1; i++) {
+    if (operations[offset + i] == 0) {
+      break;
+    } else if (operations[offset + i] == 2) {
+      continue;
+    } else if (operations[offset + i] == 1) {
+      masks[offset_label + m] = 1;
+      u++;
+      m++;
+    } else {
+      labels[offset_label + k] = u;
+      masks[offset_label + m] = 0;
+      k++;
+      m++;
+      u = 0;
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void levenshtein_distance_kernel(
+    const scalar_t* __restrict__ source,
+    const scalar_t* __restrict__ target,
+    const int* __restrict__ source_length,
+    const int* __restrict__ target_length,
+    const size_t source_size,
+    const size_t target_size,
+    int* __restrict__ operations,
+    int* __restrict__ errors_curr) {
+  const int index = blockIdx.x;
+  const int offset = index * (source_size + target_size);
+  const int d = index * (source_size + 1) * (target_size + 1);
+  const int t = target_size + 1;
+
+  auto err_idx = [d, t](int i, int j) { return d + i * t + j; };
+  auto opt_idx = [offset](int k) { return offset + k; };
+
+  const int hyp_len = source_length[index];
+  const int ref_len = target_length[index];
+  const scalar_t* hyp_begin = source + index * source_size;
+  const scalar_t* ref_begin = target + index * target_size;
+
+  // dynamic programming
+  for (int i = 0; i <= hyp_len; i++) {
+    errors_curr[err_idx(i, 0)] = i;
+  }
+  for (int j = 0; j <= ref_len; j++) {
+    errors_curr[err_idx(0, j)] = j;
+  }
+  for (int i = 1; i <= hyp_len; i++) {
+    for (int j = 1; j <= ref_len; j++) {
+      errors_curr[err_idx(i, j)] = min(
+          min(errors_curr[err_idx(i - 1, j)], errors_curr[err_idx(i, j - 1)]) +
+              1,
+          errors_curr[err_idx(i - 1, j - 1)] +
+              2 * (*(hyp_begin + i - 1) == *(ref_begin + j - 1) ? 0 : 1));
+    }
+  }
+
+  // back-tracing
+  int i = hyp_len;
+  int j = ref_len;
+  int o = hyp_len + ref_len;
+
+  for (int k = 0; k < source_size + target_size; k++) {
+    operations[opt_idx(k)] = 0;
+  }
+
+  while ((i >= 0) && (j >= 0)) {
+    if ((i == 0) && (j == 0)) {
+      break;
+    }
+
+    if ((j > 0) &&
+        (errors_curr[err_idx(i, j - 1)] < errors_curr[err_idx(i, j)])) {
+      o--;
+      operations[opt_idx(o)] = 1;
+      j--; // insertion
+    } else if (
+        (i > 0) &&
+        (errors_curr[err_idx(i - 1, j)] < errors_curr[err_idx(i, j)])) {
+      o--;
+      operations[opt_idx(o)] = 2;
+      i--; // deletion
+    } else {
+      o--;
+      operations[opt_idx(o)] = 3;
+      i--;
+      j--; // do nothing
+    }
+  }
+
+  // moving to the left
+  for (int k = 0; k < hyp_len + ref_len; k++) {
+    if (k + o < hyp_len + ref_len) {
+      operations[opt_idx(k)] = operations[opt_idx(k + o)];
+    } else {
+      operations[opt_idx(k)] = 0; // padding
+    }
+  }
+}
+
+template <typename scalar_t>
+__global__ void faster_levenshtein_distance_kernel(
+    const scalar_t* __restrict__ source,
+    const scalar_t* __restrict__ target,
+    const int* __restrict__ source_length,
+    const int* __restrict__ target_length,
+    const size_t source_size,
+    const size_t target_size,
+    int* __restrict__ operations) {
+  extern __shared__ short errors[];
+  auto errors_curr = errors;
+
+  const int index = blockIdx.x;
+  const int offset = index * (source_size + target_size);
+  const int t = target_size + 1;
+
+  auto err_idx = [t](int i, int j) { return i * t + j; };
+  auto opt_idx = [offset](int k) { return offset + k; };
+
+  const int hyp_len = source_length[index];
+  const int ref_len = target_length[index];
+  const scalar_t* hyp_begin = source + index * source_size;
+  const scalar_t* ref_begin = target + index * target_size;
+
+  // dynamic programming
+  for (int i = 0; i <= hyp_len; i++) {
+    errors_curr[err_idx(i, 0)] = i;
+  }
+  for (int j = 0; j <= ref_len; j++) {
+    errors_curr[err_idx(0, j)] = j;
+  }
+  for (int i = 1; i <= hyp_len; i++) {
+    for (int j = 1; j <= ref_len; j++) {
+      errors_curr[err_idx(i, j)] = min(
+          min(errors_curr[err_idx(i - 1, j)], errors_curr[err_idx(i, j - 1)]) +
+              1,
+          errors_curr[err_idx(i - 1, j - 1)] +
+              2 * (*(hyp_begin + i - 1) == *(ref_begin + j - 1) ? 0 : 1));
+    }
+  }
+
+  // back-tracing
+  int i = hyp_len;
+  int j = ref_len;
+  int o = hyp_len + ref_len;
+
+  for (int k = 0; k < source_size + target_size; k++) {
+    operations[opt_idx(k)] = 0;
+  }
+
+  while ((i >= 0) && (j >= 0)) {
+    if ((i == 0) && (j == 0)) {
+      break;
+    }
+
+    if ((j > 0) &&
+        (errors_curr[err_idx(i, j - 1)] < errors_curr[err_idx(i, j)])) {
+      o--;
+      operations[opt_idx(o)] = 1;
+      j--; // insertion
+    } else if (
+        (i > 0) &&
+        (errors_curr[err_idx(i - 1, j)] < errors_curr[err_idx(i, j)])) {
+      o--;
+      operations[opt_idx(o)] = 2;
+      i--; // deletion
+    } else {
+      o--;
+      operations[opt_idx(o)] = 3;
+      i--;
+      j--; // do nothing
+    }
+  }
+
+  // moving to the left
+  for (int k = 0; k < hyp_len + ref_len; k++) {
+    if (k + o < hyp_len + ref_len) {
+      operations[opt_idx(k)] = operations[opt_idx(k + o)];
+    } else {
+      operations[opt_idx(k)] = 0; // padding
+    }
+  }
+}
+
+torch::Tensor GenerateDeletionLabelCuda(
+    torch::Tensor source,
+    torch::Tensor operations) {
+  const auto batch_size = source.size(0);
+  at::TensorOptions options(source.device());
+  options = options.dtype(at::ScalarType::Int);
+  auto labels = torch::empty({batch_size, source.size(1)}, options);
+  auto stream = at::cuda::getCurrentCUDAStream(source.device().index());
+
+  AT_DISPATCH_ALL_TYPES(source.scalar_type(), "generate_deletion_labels", ([&] {
+                          generate_deletion_label_kernel<scalar_t>
+                              <<<batch_size, 1, 0, stream>>>(
+                                  source.data_ptr<scalar_t>(),
+                                  source.size(1),
+                                  operations.size(1),
+                                  operations.data_ptr<int>(),
+                                  labels.data_ptr<int>());
+                        }));
+
+  return labels;
+}
+
+std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabelCuda(
+    torch::Tensor target,
+    torch::Tensor operations) {
+  const auto batch_size = target.size(0);
+  at::TensorOptions options(target.device());
+  options = options.dtype(at::ScalarType::Int);
+  auto labels = torch::empty({batch_size, target.size(1)}, options);
+  auto masks = torch::empty({batch_size, target.size(1)}, options);
+  auto stream = at::cuda::getCurrentCUDAStream(target.device().index());
+
+  AT_DISPATCH_ALL_TYPES(
+      target.scalar_type(), "generate_insertion_labels", ([&] {
+        generate_insertion_label_kernel<scalar_t><<<batch_size, 1, 0, stream>>>(
+            target.data_ptr<scalar_t>(),
+            target.size(1),
+            operations.size(1),
+            operations.data_ptr<int>(),
+            labels.data_ptr<int>(),
+            masks.data_ptr<int>());
+      }));
+
+  return std::make_pair(labels, masks);
+}
+
+torch::Tensor LevenshteinDistanceCuda(
+    torch::Tensor source,
+    torch::Tensor target,
+    torch::Tensor source_length,
+    torch::Tensor target_length) {
+  const auto batch_size = source.size(0);
+  const auto shared_size =
+      (source.size(1) + 1) * (target.size(1) + 1) * sizeof(short);
+
+  at::TensorOptions options(source.device());
+  options = options.dtype(at::ScalarType::Int);
+  auto operations =
+      torch::empty({batch_size, source.size(1) + target.size(1)}, options);
+  auto stream = at::cuda::getCurrentCUDAStream(source.device().index());
+
+  if (shared_size > 40000) {
+    auto distances = torch::empty(
+        {batch_size, (source.size(1) + 1) * (target.size(1) + 1)}, options);
+    AT_DISPATCH_ALL_TYPES(source.scalar_type(), "levenshtein_distance", ([&] {
+                            levenshtein_distance_kernel<scalar_t>
+                                <<<batch_size, 1, 0, stream>>>(
+                                    source.data_ptr<scalar_t>(),
+                                    target.data_ptr<scalar_t>(),
+                                    source_length.data_ptr<int>(),
+                                    target_length.data_ptr<int>(),
+                                    source.size(1),
+                                    target.size(1),
+                                    operations.data_ptr<int>(),
+                                    distances.data_ptr<int>());
+                          }));
+  } else {
+    AT_DISPATCH_ALL_TYPES(
+        source.scalar_type(), "faster_levenshtein_distance", ([&] {
+          faster_levenshtein_distance_kernel<scalar_t>
+              <<<batch_size, 1, shared_size, stream>>>(
+                  source.data_ptr<scalar_t>(),
+                  target.data_ptr<scalar_t>(),
+                  source_length.data_ptr<int>(),
+                  target_length.data_ptr<int>(),
+                  source.size(1),
+                  target.size(1),
+                  operations.data_ptr<int>());
+        }));
+  }
+
+  return operations;
+}
diff --git a/fairseq/fairseq/clib/libnat_cuda/edit_dist.h b/fairseq/fairseq/clib/libnat_cuda/edit_dist.h
new file mode 100644
index 0000000000000000000000000000000000000000..5220c52fd80529b90a67ba74e9ca73c668dab099
--- /dev/null
+++ b/fairseq/fairseq/clib/libnat_cuda/edit_dist.h
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2017-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <torch/extension.h>
+
+torch::Tensor LevenshteinDistanceCuda(
+    torch::Tensor source,
+    torch::Tensor target,
+    torch::Tensor source_length,
+    torch::Tensor target_length);
+
+torch::Tensor GenerateDeletionLabelCuda(
+    torch::Tensor source,
+    torch::Tensor operations);
+
+std::pair<torch::Tensor, torch::Tensor> GenerateInsertionLabelCuda(
+    torch::Tensor source,
+    torch::Tensor operations);
diff --git a/fairseq/fairseq/config/__init__.py b/fairseq/fairseq/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6264236915a7269a4d920ee8213004374dd86a9a
--- /dev/null
+++ b/fairseq/fairseq/config/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/fairseq/config/config.yaml b/fairseq/fairseq/config/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2ed7168cb7f7473c43d864478c5c6ce51639e030
--- /dev/null
+++ b/fairseq/fairseq/config/config.yaml
@@ -0,0 +1,19 @@
+# @package _group_
+
+hydra:
+  run:
+    dir: .
+
+defaults:
+    - _self_
+    - task: null
+    - model: null
+    - criterion: cross_entropy
+    - optimizer: null
+    - lr_scheduler: fixed
+    - bpe: null
+    - tokenizer: null
+    - scoring: null
+    - generation: null
+    - common_eval: null
+    - eval_lm: null
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_gbw.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_gbw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30b1a4f1e0f5e7f7c2671ff8ec995cc32363f10f
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_gbw.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "relu"
+dropout: 0.1
+attention_dropout: 0.1
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 512
+decoder_output_dim: 512
+decoder_input_dim: 512
+decoder_ffn_embed_dim: 4096
+decoder_layers: 12
+decoder_attention_heads: 16
+decoder_normalize_before: true
+no_decoder_final_norm: true
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_wiki103.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_wiki103.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1154cfa660ee5ce6a272cd1a0049eead1e92c117
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_baevski_wiki103.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "relu"
+dropout: 0.3
+attention_dropout: 0.1
+activation_dropout: 0.1
+relu_dropout: 0.1
+decoder_embed_dim: 1024
+decoder_output_dim: 1024
+decoder_input_dim: 1024
+decoder_ffn_embed_dim: 4096
+decoder_layers: 16
+decoder_attention_heads: 8
+decoder_normalize_before: true
+no_decoder_final_norm: true
+adaptive_softmax_cutoff: "20000,60000"
+adaptive_softmax_dropout: 0.2
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: true
+adaptive_input_factor: 4
+adaptive_input_cutoff: "20000,60000"
+tie_adaptive_weights: true
+tie_adaptive_proj: true
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_big.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_big.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..309575310bfc5d9c5cde31563073bef18abc646e
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_big.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "relu"
+dropout: 0.1
+attention_dropout: 0.0
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 1024
+decoder_output_dim: 1024
+decoder_input_dim: 1024
+decoder_ffn_embed_dim: 4096
+decoder_layers: 12
+decoder_attention_heads: 16
+decoder_normalize_before: true
+no_decoder_final_norm: false
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gbw.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gbw.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..30b1a4f1e0f5e7f7c2671ff8ec995cc32363f10f
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gbw.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "relu"
+dropout: 0.1
+attention_dropout: 0.1
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 512
+decoder_output_dim: 512
+decoder_input_dim: 512
+decoder_ffn_embed_dim: 4096
+decoder_layers: 12
+decoder_attention_heads: 16
+decoder_normalize_before: true
+no_decoder_final_norm: true
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2c6cb7be3801115371566932ffc78651c9ac6c0f
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "gelu"
+dropout: 0.1
+attention_dropout: 0.1
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 768
+decoder_output_dim: 768
+decoder_input_dim: 768
+decoder_ffn_embed_dim: 3072
+decoder_layers: 12
+decoder_attention_heads: 12
+decoder_normalize_before: true
+no_decoder_final_norm: false
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_big.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_big.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a08769a1781abdb13302bf57bf1338bcaf68a0ec
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_big.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "gelu"
+dropout: 0.1
+attention_dropout: 0.1
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 1600
+decoder_output_dim: 1600
+decoder_input_dim: 1600
+decoder_ffn_embed_dim: 6400
+decoder_layers: 48
+decoder_attention_heads: 25
+decoder_normalize_before: true
+no_decoder_final_norm: false
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_medium.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_medium.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..64261d793c0f1ae091c9bf5c8c77093a07326137
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_medium.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "gelu"
+dropout: 0.1
+attention_dropout: 0.1
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 1280
+decoder_output_dim: 1280
+decoder_input_dim: 1280
+decoder_ffn_embed_dim: 5120
+decoder_layers: 36
+decoder_attention_heads: 20
+decoder_normalize_before: true
+no_decoder_final_norm: false
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_small.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_small.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..702e81f466c82edf40433589d389edbe0a7b96db
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_gpt2_small.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "gelu"
+dropout: 0.1
+attention_dropout: 0.1
+activation_dropout: 0.0
+relu_dropout: 0.0
+decoder_embed_dim: 1024
+decoder_output_dim: 1024
+decoder_input_dim: 1024
+decoder_ffn_embed_dim: 4096
+decoder_layers: 24
+decoder_attention_heads: 16
+decoder_normalize_before: true
+no_decoder_final_norm: false
+adaptive_softmax_cutoff: null
+adaptive_softmax_dropout: 0
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: false
+adaptive_input_factor: 4
+adaptive_input_cutoff: null
+tie_adaptive_weights: false
+tie_adaptive_proj: false
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/transformer_lm/transformer_lm_wiki103.yaml b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_wiki103.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1154cfa660ee5ce6a272cd1a0049eead1e92c117
--- /dev/null
+++ b/fairseq/fairseq/config/model/transformer_lm/transformer_lm_wiki103.yaml
@@ -0,0 +1,36 @@
+# @package _group_
+activation_fn: "relu"
+dropout: 0.3
+attention_dropout: 0.1
+activation_dropout: 0.1
+relu_dropout: 0.1
+decoder_embed_dim: 1024
+decoder_output_dim: 1024
+decoder_input_dim: 1024
+decoder_ffn_embed_dim: 4096
+decoder_layers: 16
+decoder_attention_heads: 8
+decoder_normalize_before: true
+no_decoder_final_norm: true
+adaptive_softmax_cutoff: "20000,60000"
+adaptive_softmax_dropout: 0.2
+adaptive_softmax_factor: 4
+no_token_positional_embeddings: false
+share_decoder_input_output_embed: false
+character_embeddings: false
+character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
+character_embedding_dim: 4
+char_embedder_highway_layers: 2
+adaptive_input: true
+adaptive_input_factor: 4
+adaptive_input_cutoff: "20000,60000"
+tie_adaptive_weights: true
+tie_adaptive_proj: true
+decoder_learned_pos: false
+decoder_layerdrop: 0
+decoder_layers_to_keep: null
+layernorm_embedding: false
+no_scale_embedding: false
+quant_noise_pq: 0
+quant_noise_pq_block_size: 8
+quant_noise_scalar: 0
diff --git a/fairseq/fairseq/config/model/wav2vec/vq_wav2vec_gumbel.yaml b/fairseq/fairseq/config/model/wav2vec/vq_wav2vec_gumbel.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ee1329bf4612d8bb295c6cc3d8bc0a3bcef1777d
--- /dev/null
+++ b/fairseq/fairseq/config/model/wav2vec/vq_wav2vec_gumbel.yaml
@@ -0,0 +1,5 @@
+# @package _group_
+activation: gelu
+vq_type: gumbel
+vq_depth: 2
+combine_groups: true
diff --git a/fairseq/fairseq/config/model/wav2vec2/wav2vec2_base.yaml b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce65499b808b9a3821cee4ca87c36e84d09005a1
--- /dev/null
+++ b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_base.yaml
@@ -0,0 +1,8 @@
+# @package _group_
+
+quantize_targets: true
+final_dim: 256
+encoder_layerdrop: 0.05
+dropout_input: 0.1
+dropout_features: 0.1
+feature_grad_mult: 0.1
diff --git a/fairseq/fairseq/config/model/wav2vec2/wav2vec2_large.yaml b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_large.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5846f75243f27f201c85bfe6820815c015971275
--- /dev/null
+++ b/fairseq/fairseq/config/model/wav2vec2/wav2vec2_large.yaml
@@ -0,0 +1,20 @@
+# @package _group_
+
+quantize_targets: true
+extractor_mode: layer_norm
+layer_norm_first: true
+final_dim: 768
+latent_temp: [2.0,0.1,0.999995]
+encoder_layerdrop: 0.0
+dropout_input: 0.0
+dropout_features: 0.0
+dropout: 0.0
+attention_dropout: 0.0
+conv_bias: true
+
+encoder_layers: 24
+encoder_embed_dim: 1024
+encoder_ffn_embed_dim: 4096
+encoder_attention_heads: 16
+
+feature_grad_mult: 1.0
diff --git a/fairseq/fairseq/criterions/__init__.py b/fairseq/fairseq/criterions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4dbf46a1cb31ce65c4224ae79cbc2d7cf9e4d111
--- /dev/null
+++ b/fairseq/fairseq/criterions/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import importlib
+import os
+
+from fairseq import registry
+from fairseq.criterions.fairseq_criterion import (  # noqa
+    FairseqCriterion,
+    LegacyFairseqCriterion,
+)
+from omegaconf import DictConfig
+
+
+(
+    build_criterion_,
+    register_criterion,
+    CRITERION_REGISTRY,
+    CRITERION_DATACLASS_REGISTRY,
+) = registry.setup_registry(
+    "--criterion", base_class=FairseqCriterion, default="cross_entropy"
+)
+
+
+def build_criterion(cfg: DictConfig, task):
+    return build_criterion_(cfg, task)
+
+
+# automatically import any Python files in the criterions/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        file_name = file[: file.find(".py")]
+        importlib.import_module("fairseq.criterions." + file_name)
diff --git a/fairseq/fairseq/criterions/adaptive_loss.py b/fairseq/fairseq/criterions/adaptive_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..6209ceaedb6d8120ad820c11b55c13596447933c
--- /dev/null
+++ b/fairseq/fairseq/criterions/adaptive_loss.py
@@ -0,0 +1,123 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass
+
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.constants import DDP_BACKEND_CHOICES
+from omegaconf import II
+
+
+@dataclass
+class AdaptiveLossConfig(FairseqDataclass):
+    sentence_avg: bool = II("optimization.sentence_avg")
+    ddp_backend: DDP_BACKEND_CHOICES = II("distributed_training.ddp_backend")
+
+
+@register_criterion("adaptive_loss", dataclass=AdaptiveLossConfig)
+class AdaptiveLoss(FairseqCriterion):
+    """This is an implementation of the loss function accompanying the adaptive softmax approximation for
+    graphical processing units (GPU), described in the paper "Efficient softmax approximation for GPUs"
+    (http://arxiv.org/abs/1609.04309)."""
+
+    def __init__(self, task, sentence_avg):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+
+    @classmethod
+    def build_criterion(cls, cfg: AdaptiveLossConfig, task):
+        if cfg.ddp_backend in {"c10d", "pytorch_ddp"}:
+            raise Exception(
+                "AdaptiveLoss is not compatible with the PyTorch "
+                "version of DistributedDataParallel. Please use "
+                "`--ddp-backend=legacy_ddp` instead."
+            )
+        return cls(task, cfg.sentence_avg)
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+
+        assert (
+            hasattr(model.decoder, "adaptive_softmax")
+            and model.decoder.adaptive_softmax is not None
+        )
+        adaptive_softmax = model.decoder.adaptive_softmax
+
+        net_output = model(**sample["net_input"])
+        orig_target = model.get_targets(sample, net_output)
+
+        nsentences = orig_target.size(0)
+        orig_target = orig_target.view(-1)
+
+        bsz = orig_target.size(0)
+
+        logits, target = adaptive_softmax(net_output[0], orig_target)
+        assert len(target) == len(logits)
+
+        loss = net_output[0].new(1 if reduce else bsz).zero_()
+
+        for i in range(len(target)):
+            if target[i] is not None:
+                assert target[i].min() >= 0 and target[i].max() <= logits[i].size(1)
+                loss += F.cross_entropy(
+                    logits[i],
+                    target[i],
+                    ignore_index=self.padding_idx,
+                    reduction="sum" if reduce else "none",
+                )
+
+        orig = utils.strip_pad(orig_target, self.padding_idx)
+        ntokens = orig.numel()
+        sample_size = sample["target"].size(0) if self.sentence_avg else ntokens
+        logging_output = {
+            "loss": loss.data,
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "sample_size": sample_size,
+        }
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+        ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs))
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+            )
+        else:
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/composite_loss.py b/fairseq/fairseq/criterions/composite_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..98e835fa6e4c0bcad062df9c519701bf795c98be
--- /dev/null
+++ b/fairseq/fairseq/criterions/composite_loss.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq import utils
+from fairseq.criterions import LegacyFairseqCriterion, register_criterion
+from torch import nn
+
+
+@register_criterion("composite_loss")
+class CompositeLoss(LegacyFairseqCriterion):
+    """This is a composite loss that, given a list of model outputs and a list of targets,
+    computes an average of losses for each output-target pair"""
+
+    def __init__(self, args, task):
+        super().__init__(args, task)
+        self.underlying_criterion = args.underlying_criterion
+
+    @staticmethod
+    def add_args(parser):
+        """Add criterion-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--underlying-criterion', type=str, metavar='VAL', required=True,
+                            help='underlying criterion to use for the composite loss')
+        # fmt: on
+
+    @staticmethod
+    def build_underlying_criterion(args, task):
+        saved_criterion = args.criterion
+        args.criterion = args.underlying_criterion
+        assert saved_criterion != args.underlying_criterion
+        underlying_criterion = task.build_criterion(args)
+        args.criterion = saved_criterion
+        return underlying_criterion
+
+    @classmethod
+    def build_criterion(cls, args, task):
+        underlying_criterion = CompositeLoss.build_underlying_criterion(args, task)
+
+        class FakeModel(nn.Module):
+            def __init__(self, model, net_out, target):
+                super().__init__()
+                self.model = model
+                self.net_out = net_out
+                self.target = target
+
+            def forward(self, **unused):
+                return self.net_out
+
+            def get_normalized_probs(self, net_output, log_probs, sample=None):
+                return self.model.get_normalized_probs(
+                    net_output, log_probs, sample=sample
+                )
+
+            def get_targets(self, *unused):
+                return self.target
+
+            @property
+            def decoder(self):
+                return self.model.decoder
+
+        class _CompositeLoss(LegacyFairseqCriterion):
+            def __init__(self, args, task, underlying_criterion):
+                super().__init__(args, task)
+                self.underlying_criterion = underlying_criterion
+
+            def forward(self, model, sample, reduce=True):
+                net_outputs = model(**sample["net_input"])
+                targets = sample["target"]
+
+                bsz = targets[0].size(0)
+                loss = net_outputs[0][0].new(1 if reduce else bsz).float().zero_()
+
+                sample_size = 0
+                logging_output = {}
+                for o, t in zip(net_outputs[0], targets):
+                    m = FakeModel(model, (o, net_outputs[1]), t)
+                    sample["target"] = t
+                    l, ss, logging_output = self.underlying_criterion(m, sample, reduce)
+                    loss += l
+                    sample_size += ss
+
+                loss.div_(len(targets))
+                sample_size /= len(targets)
+
+                logging_output["loss"] = utils.item(loss.data) if reduce else loss.data
+                return loss, sample_size, logging_output
+
+            @staticmethod
+            def aggregate_logging_outputs(logging_outputs):
+                return underlying_criterion.__class__.aggregate_logging_outputs(
+                    logging_outputs
+                )
+
+            @staticmethod
+            def reduce_metrics(logging_outputs) -> None:
+                underlying_criterion.__class__.reduce_metrics(logging_outputs)
+
+        return _CompositeLoss(args, task, underlying_criterion)
diff --git a/fairseq/fairseq/criterions/cross_entropy.py b/fairseq/fairseq/criterions/cross_entropy.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe461064716b38ecf2eb610daddbb609a1884e6b
--- /dev/null
+++ b/fairseq/fairseq/criterions/cross_entropy.py
@@ -0,0 +1,90 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass
+
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from omegaconf import II
+
+
+@dataclass
+class CrossEntropyCriterionConfig(FairseqDataclass):
+    sentence_avg: bool = II("optimization.sentence_avg")
+
+
+@register_criterion("cross_entropy", dataclass=CrossEntropyCriterionConfig)
+class CrossEntropyCriterion(FairseqCriterion):
+    def __init__(self, task, sentence_avg):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(**sample["net_input"])
+        loss, _ = self.compute_loss(model, net_output, sample, reduce=reduce)
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+        logging_output = {
+            "loss": loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+        }
+        return loss, sample_size, logging_output
+
+    def compute_loss(self, model, net_output, sample, reduce=True):
+        lprobs = model.get_normalized_probs(net_output, log_probs=True)
+        lprobs = lprobs.view(-1, lprobs.size(-1))
+        target = model.get_targets(sample, net_output).view(-1)
+        loss = F.nll_loss(
+            lprobs,
+            target,
+            ignore_index=self.padding_idx,
+            reduction="sum" if reduce else "none",
+        )
+        return loss, loss
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        # we divide by log(2) to convert the loss from base e to base 2
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+            )
+        else:
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/ctc.py b/fairseq/fairseq/criterions/ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..10e3618382c86a84466cb4264d62f31537980251
--- /dev/null
+++ b/fairseq/fairseq/criterions/ctc.py
@@ -0,0 +1,295 @@
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+import math
+from argparse import Namespace
+from dataclasses import dataclass, field
+from omegaconf import II
+from typing import Optional
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from fairseq.data.data_utils import post_process
+from fairseq.tasks import FairseqTask
+from fairseq.logging.meters import safe_round
+
+
+@dataclass
+class CtcCriterionConfig(FairseqDataclass):
+    zero_infinity: bool = field(
+        default=False,
+        metadata={"help": "zero inf loss when source length <= target length"},
+    )
+    sentence_avg: bool = II("optimization.sentence_avg")
+    post_process: str = field(
+        default="letter",
+        metadata={
+            "help": "how to post process predictions into words. can be letter, "
+            "wordpiece, BPE symbols, etc. "
+            "See fairseq.data.data_utils.post_process() for full list of options"
+        },
+    )
+    wer_kenlm_model: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "if this is provided, use kenlm to compute wer (along with other wer_* args)"
+        },
+    )
+    wer_lexicon: Optional[str] = field(
+        default=None,
+        metadata={"help": "lexicon to use with wer_kenlm_model"},
+    )
+    wer_lm_weight: float = field(
+        default=2.0,
+        metadata={"help": "lm weight to use with wer_kenlm_model"},
+    )
+    wer_word_score: float = field(
+        default=-1.0,
+        metadata={"help": "lm word score to use with wer_kenlm_model"},
+    )
+
+    wer_args: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "DEPRECATED: tuple of (wer_kenlm_model, wer_lexicon, wer_lm_weight, wer_word_score)"
+        },
+    )
+
+
+@register_criterion("ctc", dataclass=CtcCriterionConfig)
+class CtcCriterion(FairseqCriterion):
+    def __init__(self, cfg: CtcCriterionConfig, task: FairseqTask):
+        super().__init__(task)
+        self.blank_idx = (
+            task.target_dictionary.index(task.blank_symbol)
+            if hasattr(task, "blank_symbol")
+            else 0
+        )
+        self.pad_idx = task.target_dictionary.pad()
+        self.eos_idx = task.target_dictionary.eos()
+        self.post_process = cfg.post_process
+
+        if cfg.wer_args is not None:
+            (
+                cfg.wer_kenlm_model,
+                cfg.wer_lexicon,
+                cfg.wer_lm_weight,
+                cfg.wer_word_score,
+            ) = eval(cfg.wer_args)
+
+        if cfg.wer_kenlm_model is not None:
+            from examples.speech_recognition.w2l_decoder import W2lKenLMDecoder
+
+            dec_args = Namespace()
+            dec_args.nbest = 1
+            dec_args.criterion = "ctc"
+            dec_args.kenlm_model = cfg.wer_kenlm_model
+            dec_args.lexicon = cfg.wer_lexicon
+            dec_args.beam = 50
+            dec_args.beam_size_token = min(50, len(task.target_dictionary))
+            dec_args.beam_threshold = min(50, len(task.target_dictionary))
+            dec_args.lm_weight = cfg.wer_lm_weight
+            dec_args.word_score = cfg.wer_word_score
+            dec_args.unk_weight = -math.inf
+            dec_args.sil_weight = 0
+
+            self.w2l_decoder = W2lKenLMDecoder(dec_args, task.target_dictionary)
+        else:
+            self.w2l_decoder = None
+
+        self.zero_infinity = cfg.zero_infinity
+        self.sentence_avg = cfg.sentence_avg
+
+    def forward(self, model, sample, reduce=True):
+        net_output = model(**sample["net_input"])
+        lprobs = model.get_normalized_probs(
+            net_output, log_probs=True
+        ).contiguous()  # (T, B, C) from the encoder
+
+        if "src_lengths" in sample["net_input"]:
+            input_lengths = sample["net_input"]["src_lengths"]
+        else:
+            if net_output["padding_mask"] is not None:
+                non_padding_mask = ~net_output["padding_mask"]
+                input_lengths = non_padding_mask.long().sum(-1)
+            else:
+                input_lengths = lprobs.new_full(
+                    (lprobs.size(1),), lprobs.size(0), dtype=torch.long
+                )
+
+        pad_mask = (sample["target"] != self.pad_idx) & (
+            sample["target"] != self.eos_idx
+        )
+        targets_flat = sample["target"].masked_select(pad_mask)
+        if "target_lengths" in sample:
+            target_lengths = sample["target_lengths"]
+        else:
+            target_lengths = pad_mask.sum(-1)
+
+        with torch.backends.cudnn.flags(enabled=False):
+            loss = F.ctc_loss(
+                lprobs,
+                targets_flat,
+                input_lengths,
+                target_lengths,
+                blank=self.blank_idx,
+                reduction="sum",
+                zero_infinity=self.zero_infinity,
+            )
+
+        ntokens = (
+            sample["ntokens"] if "ntokens" in sample else target_lengths.sum().item()
+        )
+
+        sample_size = sample["target"].size(0) if self.sentence_avg else ntokens
+        logging_output = {
+            "loss": utils.item(loss.data),  # * sample['ntokens'],
+            "ntokens": ntokens,
+            "nsentences": sample["id"].numel(),
+            "sample_size": sample_size,
+        }
+
+        if not model.training:
+            import editdistance
+
+            with torch.no_grad():
+                lprobs_t = lprobs.transpose(0, 1).float().contiguous().cpu()
+
+                c_err = 0
+                c_len = 0
+                w_errs = 0
+                w_len = 0
+                wv_errs = 0
+                for lp, t, inp_l in zip(
+                    lprobs_t,
+                    sample["target_label"]
+                    if "target_label" in sample
+                    else sample["target"],
+                    input_lengths,
+                ):
+                    lp = lp[:inp_l].unsqueeze(0)
+
+                    decoded = None
+                    if self.w2l_decoder is not None:
+                        decoded = self.w2l_decoder.decode(lp)
+                        if len(decoded) < 1:
+                            decoded = None
+                        else:
+                            decoded = decoded[0]
+                            if len(decoded) < 1:
+                                decoded = None
+                            else:
+                                decoded = decoded[0]
+
+                    p = (t != self.task.target_dictionary.pad()) & (
+                        t != self.task.target_dictionary.eos()
+                    )
+                    targ = t[p]
+                    targ_units = self.task.target_dictionary.string(targ)
+                    targ_units_arr = targ.tolist()
+
+                    toks = lp.argmax(dim=-1).unique_consecutive()
+                    pred_units_arr = toks[toks != self.blank_idx].tolist()
+
+                    c_err += editdistance.eval(pred_units_arr, targ_units_arr)
+                    c_len += len(targ_units_arr)
+
+                    targ_words = post_process(targ_units, self.post_process).split()
+
+                    pred_units = self.task.target_dictionary.string(pred_units_arr)
+                    pred_words_raw = post_process(pred_units, self.post_process).split()
+
+                    if decoded is not None and "words" in decoded:
+                        pred_words = decoded["words"]
+                        w_errs += editdistance.eval(pred_words, targ_words)
+                        wv_errs += editdistance.eval(pred_words_raw, targ_words)
+                    else:
+                        dist = editdistance.eval(pred_words_raw, targ_words)
+                        w_errs += dist
+                        wv_errs += dist
+
+                    w_len += len(targ_words)
+
+                logging_output["wv_errors"] = wv_errs
+                logging_output["w_errors"] = w_errs
+                logging_output["w_total"] = w_len
+                logging_output["c_errors"] = c_err
+                logging_output["c_total"] = c_len
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+
+        loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+        ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs))
+        nsentences = utils.item(
+            sum(log.get("nsentences", 0) for log in logging_outputs)
+        )
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar("ntokens", ntokens)
+        metrics.log_scalar("nsentences", nsentences)
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+
+        c_errors = sum(log.get("c_errors", 0) for log in logging_outputs)
+        metrics.log_scalar("_c_errors", c_errors)
+        c_total = sum(log.get("c_total", 0) for log in logging_outputs)
+        metrics.log_scalar("_c_total", c_total)
+        w_errors = sum(log.get("w_errors", 0) for log in logging_outputs)
+        metrics.log_scalar("_w_errors", w_errors)
+        wv_errors = sum(log.get("wv_errors", 0) for log in logging_outputs)
+        metrics.log_scalar("_wv_errors", wv_errors)
+        w_total = sum(log.get("w_total", 0) for log in logging_outputs)
+        metrics.log_scalar("_w_total", w_total)
+
+        if c_total > 0:
+            metrics.log_derived(
+                "uer",
+                lambda meters: safe_round(
+                    meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3
+                )
+                if meters["_c_total"].sum > 0
+                else float("nan"),
+            )
+        if w_total > 0:
+            metrics.log_derived(
+                "wer",
+                lambda meters: safe_round(
+                    meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3
+                )
+                if meters["_w_total"].sum > 0
+                else float("nan"),
+            )
+            metrics.log_derived(
+                "raw_wer",
+                lambda meters: safe_round(
+                    meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3
+                )
+                if meters["_w_total"].sum > 0
+                else float("nan"),
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/fairseq_criterion.py b/fairseq/fairseq/criterions/fairseq_criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff4beb02503ea48a6c09596630aad4c710be94b6
--- /dev/null
+++ b/fairseq/fairseq/criterions/fairseq_criterion.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import inspect
+from typing import Any, Dict, List
+
+from fairseq import metrics, utils
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+from torch.nn.modules.loss import _Loss
+
+
+class FairseqCriterion(_Loss):
+    def __init__(self, task):
+        super().__init__()
+        self.task = task
+        if hasattr(task, "target_dictionary"):
+            tgt_dict = task.target_dictionary
+            self.padding_idx = tgt_dict.pad() if tgt_dict is not None else -100
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add criterion-specific arguments to the parser."""
+        dc = getattr(cls, "__dataclass", None)
+        if dc is not None:
+            gen_parser_from_dataclass(parser, dc())
+
+    @classmethod
+    def build_criterion(cls, cfg: FairseqDataclass, task):
+        """Construct a criterion from command-line args."""
+        # arguments in the __init__.
+        init_args = {}
+        for p in inspect.signature(cls).parameters.values():
+            if (
+                p.kind == p.POSITIONAL_ONLY
+                or p.kind == p.VAR_POSITIONAL
+                or p.kind == p.VAR_KEYWORD
+            ):
+                # we haven't implemented inference for these argument types,
+                # but PRs welcome :)
+                raise NotImplementedError("{} not supported".format(p.kind))
+
+            assert p.kind in {p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY}
+
+            if p.name == "task":
+                init_args["task"] = task
+            elif p.name == "cfg":
+                init_args["cfg"] = cfg
+            elif hasattr(cfg, p.name):
+                init_args[p.name] = getattr(cfg, p.name)
+            elif p.default != p.empty:
+                pass  # we'll use the default value
+            else:
+                raise NotImplementedError(
+                    "Unable to infer Criterion arguments, please implement "
+                    "{}.build_criterion".format(cls.__name__)
+                )
+        return cls(**init_args)
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def aggregate_logging_outputs(
+        logging_outputs: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Aggregate logging outputs from data parallel training."""
+        utils.deprecation_warning(
+            "The aggregate_logging_outputs API is deprecated. "
+            "Please use the reduce_metrics API instead."
+        )
+        raise NotImplementedError
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        utils.deprecation_warning(
+            "Criterions should implement the reduce_metrics API. "
+            "Falling back to deprecated aggregate_logging_outputs API."
+        )
+        agg_logging_outputs = cls.aggregate_logging_outputs(logging_outputs)
+        for k, v in agg_logging_outputs.items():
+            if k in {"nsentences", "ntokens", "sample_size"}:
+                continue
+            metrics.log_scalar(k, v)
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return False
+
+
+class LegacyFairseqCriterion(FairseqCriterion):
+    def __init__(self, args, task):
+        super().__init__(task=task)
+        self.args = args
+
+        utils.deprecation_warning(
+            "Criterions should take explicit arguments instead of an "
+            "argparse.Namespace object, please update your criterion by "
+            "extending FairseqCriterion instead of LegacyFairseqCriterion."
+        )
+
+    @classmethod
+    def build_criterion(cls, args, task):
+        """Construct a criterion from command-line args."""
+        return cls(args, task)
diff --git a/fairseq/fairseq/criterions/fastspeech2_loss.py b/fairseq/fairseq/criterions/fastspeech2_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..085d5628d4c4c242edee4aa3bc4a01aa4582eb21
--- /dev/null
+++ b/fairseq/fairseq/criterions/fastspeech2_loss.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+from typing import List, Dict, Any
+from dataclasses import dataclass, field
+
+import torch
+import torch.nn.functional as F
+
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from fairseq.data.data_utils import lengths_to_mask
+from fairseq.models.fairseq_model import FairseqEncoderModel
+
+
+@dataclass
+class FastSpeech2CriterionConfig(FairseqDataclass):
+    ctc_weight: float = field(
+        default=0.0, metadata={"help": "weight for CTC loss"}
+    )
+
+
+@register_criterion("fastspeech2", dataclass=FastSpeech2CriterionConfig)
+class FastSpeech2Loss(FairseqCriterion):
+    def __init__(self, task, ctc_weight):
+        super().__init__(task)
+        self.ctc_weight = ctc_weight
+
+    def forward(self, model: FairseqEncoderModel, sample, reduction="mean"):
+        src_tokens = sample["net_input"]["src_tokens"]
+        src_lens = sample["net_input"]["src_lengths"]
+        tgt_lens = sample["target_lengths"]
+        _feat_out, _, log_dur_out, pitch_out, energy_out = model(
+            src_tokens=src_tokens,
+            src_lengths=src_lens,
+            prev_output_tokens=sample["net_input"]["prev_output_tokens"],
+            incremental_state=None,
+            target_lengths=tgt_lens,
+            speaker=sample["speaker"],
+            durations=sample["durations"],
+            pitches=sample["pitches"],
+            energies=sample["energies"]
+        )
+
+        src_mask = lengths_to_mask(sample["net_input"]["src_lengths"])
+        tgt_mask = lengths_to_mask(sample["target_lengths"])
+
+        pitches, energies = sample["pitches"], sample["energies"]
+        pitch_out, pitches = pitch_out[src_mask], pitches[src_mask]
+        energy_out, energies = energy_out[src_mask], energies[src_mask]
+
+        feat_out, feat = _feat_out[tgt_mask], sample["target"][tgt_mask]
+        l1_loss = F.l1_loss(feat_out, feat, reduction=reduction)
+
+        pitch_loss = F.mse_loss(pitch_out, pitches, reduction=reduction)
+        energy_loss = F.mse_loss(energy_out, energies, reduction=reduction)
+
+        log_dur_out = log_dur_out[src_mask]
+        dur = sample["durations"].float()
+        dur = dur.half() if log_dur_out.type().endswith(".HalfTensor") else dur
+        log_dur = torch.log(dur + 1)[src_mask]
+        dur_loss = F.mse_loss(log_dur_out, log_dur, reduction=reduction)
+
+        ctc_loss = torch.tensor(0.).type_as(l1_loss)
+        if self.ctc_weight > 0.:
+            lprobs = model.get_normalized_probs((_feat_out,), log_probs=True)
+            lprobs = lprobs.transpose(0, 1)  # T x B x C
+            src_mask = lengths_to_mask(src_lens)
+            src_tokens_flat = src_tokens.masked_select(src_mask)
+            ctc_loss = F.ctc_loss(
+                lprobs, src_tokens_flat, tgt_lens, src_lens,
+                reduction=reduction, zero_infinity=True
+            ) * self.ctc_weight
+
+        loss = l1_loss + dur_loss + pitch_loss + energy_loss + ctc_loss
+
+        sample_size = sample["nsentences"]
+        logging_output = {
+            "loss": utils.item(loss.data),
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["nsentences"],
+            "sample_size": sample_size,
+            "l1_loss": utils.item(l1_loss.data),
+            "dur_loss": utils.item(dur_loss.data),
+            "pitch_loss": utils.item(pitch_loss.data),
+            "energy_loss": utils.item(energy_loss.data),
+            "ctc_loss": utils.item(ctc_loss.data),
+        }
+        return loss, sample_size, logging_output
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None:
+        ns = [log.get("sample_size", 0) for log in logging_outputs]
+        ntot = sum(ns)
+        ws = [n / (ntot + 1e-8) for n in ns]
+        for key in [
+            "loss", "l1_loss", "dur_loss", "pitch_loss", "energy_loss",
+            "ctc_loss"
+        ]:
+            vals = [log.get(key, 0) for log in logging_outputs]
+            val = sum(val * w for val, w in zip(vals, ws))
+            metrics.log_scalar(key, val, ntot, round=3)
+        metrics.log_scalar("sample_size", ntot, len(logging_outputs))
+
+        # inference metrics
+        if "targ_frames" not in logging_outputs[0]:
+            return
+        n = sum(log.get("targ_frames", 0) for log in logging_outputs)
+        for key, new_key in [
+                ("mcd_loss", "mcd_loss"),
+                ("pred_frames", "pred_ratio"),
+                ("nins", "ins_rate"),
+                ("ndel", "del_rate"),
+        ]:
+            val = sum(log.get(key, 0) for log in logging_outputs)
+            metrics.log_scalar(new_key, val / n, n, round=3)
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        return False
diff --git a/fairseq/fairseq/criterions/hubert_criterion.py b/fairseq/fairseq/criterions/hubert_criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..68cb24e6f142c46e108c53479fd4027a741f5f92
--- /dev/null
+++ b/fairseq/fairseq/criterions/hubert_criterion.py
@@ -0,0 +1,177 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+import re
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class HubertCriterionConfig(FairseqDataclass):
+    pred_masked_weight: float = field(
+        default=1.0,
+        metadata={"help": "weight for predictive loss for masked frames"},
+    )
+    pred_nomask_weight: float = field(
+        default=0.0,
+        metadata={"help": "weight for predictive loss for unmasked frames"},
+    )
+    loss_weights: Optional[List[float]] = field(
+        default=None,
+        metadata={"help": "weights for additional loss terms (not first one)"},
+    )
+    log_keys: List[str] = field(
+        default_factory=lambda: [],
+        metadata={"help": "output keys to log"},
+    )
+
+
+@register_criterion("hubert", dataclass=HubertCriterionConfig)
+class HubertCriterion(FairseqCriterion):
+    def __init__(self, task, pred_masked_weight, pred_nomask_weight, loss_weights=None, log_keys=None):
+        super().__init__(task)
+        self.pred_masked_weight = pred_masked_weight
+        self.pred_nomask_weight = pred_nomask_weight
+        self.loss_weights = loss_weights
+        self.log_keys = [] if log_keys is None else log_keys
+
+    def forward(self, model, sample, reduce=True, log_pred=False):
+        """Compute the loss for the given sample.
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(target_list=sample["target_list"], **sample["net_input"])
+        loss = 0.
+        sample_size = 0
+        logging_output = {}
+        reduction = "sum" if reduce else "none"
+
+        loss_m_list = []
+        logp_m_list = model.get_logits(net_output, True)
+        targ_m_list = model.get_targets(net_output, True)
+        assert self.pred_masked_weight == 0 or len(logp_m_list) > 0
+        for i, (logp_m, targ_m) in enumerate(zip(logp_m_list, targ_m_list)):
+            loss_m = F.cross_entropy(logp_m, targ_m, reduction=reduction)
+            loss_m_list.append(loss_m)
+            logging_output[f"loss_m_{i}"] = loss_m.detach().item()
+        if self.pred_masked_weight > 0:
+            loss += self.pred_masked_weight * sum(loss_m_list)
+            sample_size += targ_m_list[0].numel()
+
+        loss_u_list = []
+        logp_u_list = model.get_logits(net_output, False)
+        targ_u_list = model.get_targets(net_output, False)
+        assert self.pred_nomask_weight == 0 or len(logp_u_list) > 0
+        for i, (logp_u, targ_u) in enumerate(zip(logp_u_list, targ_u_list)):
+            loss_u = F.cross_entropy(logp_u, targ_u, reduction=reduction)
+            loss_u_list.append(loss_u)
+            logging_output[f"loss_u_{i}"] = loss_u.detach().item()
+        if self.pred_nomask_weight > 0:
+            loss += self.pred_nomask_weight * sum(loss_u_list)
+            sample_size += targ_u_list[0].numel()
+
+        if self.loss_weights is not None:
+            assert hasattr(model, "get_extra_losses")
+            extra_losses, names = model.get_extra_losses(net_output)
+            if torch.is_tensor(extra_losses):
+                extra_losses = [extra_losses]
+                names = [names]
+            if len(self.loss_weights) == 1 and len(extra_losses) != 1:
+                self.loss_weights = [self.loss_weights[0]] * len(extra_losses)
+            assert len(extra_losses) == len(self.loss_weights), f"{len(extra_losses)}, {len(self.loss_weights)}"
+            for p, n, coef in zip(extra_losses, names, self.loss_weights):
+                if coef != 0 and p is not None:
+                    p = coef * p.float() * sample_size
+                    loss += p
+                    logging_output[f"loss_{n}"] = p.item()
+
+        logging_output = {
+            "loss": loss.item() if reduce else loss,
+            "ntokens": sample_size,
+            "nsentences": sample["id"].numel(),
+            "sample_size": sample_size,
+            **logging_output,
+        }
+
+        for lk in self.log_keys:
+            if lk in net_output:
+                logging_output[lk] = float((net_output[lk]))
+
+        def compute_correct(logits):
+            if logits.numel() == 0:
+                return 0, 0
+            else:
+                assert logits.dim() > 1, logits.shape
+                max = logits.argmax(-1) == 0
+                min = logits.argmin(-1) == 0
+                both = max & min
+                corr = max.long().sum().item() - both.long().sum().item()
+                count = max.numel()
+                return corr, count
+
+        with torch.no_grad():
+            for i, logp_m in enumerate(logp_m_list):
+                corr_m, count_m = compute_correct(logp_m)
+                logging_output[f"correct_m_{i}"] = corr_m
+                logging_output[f"count_m_{i}"] = count_m
+
+            for i, logp_u in enumerate(logp_u_list):
+                corr_u, count_u = compute_correct(logp_u)
+                logging_output[f"correct_u_{i}"] = corr_u
+                logging_output[f"count_u_{i}"] = count_u
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training (copied from normal cross entropy)."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        metrics.log_scalar("loss", loss_sum / sample_size / math.log(2), sample_size, round=3)
+        if sample_size != ntokens:
+            metrics.log_scalar("nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3)
+            metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg))
+        else:
+            metrics.log_derived("ppl", lambda meters: utils.get_perplexity(meters["loss"].avg))
+
+        counts = {}
+        for lk in logging_outputs[0].keys():
+            if lk.startswith("count_"):
+                val = sum(log[lk] for log in logging_outputs)
+                metrics.log_scalar(lk, val)
+                counts[lk] = val
+
+        for lk in logging_outputs[0].keys():
+            if lk.startswith("loss_"):
+                val = sum(log[lk] for log in logging_outputs)
+                metrics.log_scalar(lk, val / sample_size / math.log(2), round=3)
+            elif lk.startswith("correct_"):
+                val = sum(log[lk] for log in logging_outputs)
+                metrics.log_scalar(lk, val / counts[re.sub("correct", "count", lk)])
+
+    @staticmethod
+    def aggregate_logging_outputs(logging_outputs):
+        """Aggregate logging outputs from data parallel training."""
+        raise NotImplementedError()
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return False
diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy.py
new file mode 100644
index 0000000000000000000000000000000000000000..56d63e3e1b5a036e0adf32480e2b66f371738013
--- /dev/null
+++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy.py
@@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+
+import torch
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from omegaconf import II
+
+
+@dataclass
+class LabelSmoothedCrossEntropyCriterionConfig(FairseqDataclass):
+    label_smoothing: float = field(
+        default=0.0,
+        metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"},
+    )
+    report_accuracy: bool = field(
+        default=False,
+        metadata={"help": "report accuracy metric"},
+    )
+    ignore_prefix_size: int = field(
+        default=0,
+        metadata={"help": "Ignore first N tokens"},
+    )
+    sentence_avg: bool = II("optimization.sentence_avg")
+
+
+def label_smoothed_nll_loss(lprobs, target, epsilon, ignore_index=None, reduce=True):
+    if target.dim() == lprobs.dim() - 1:
+        target = target.unsqueeze(-1)
+    nll_loss = -lprobs.gather(dim=-1, index=target)
+    smooth_loss = -lprobs.sum(dim=-1, keepdim=True)
+    if ignore_index is not None:
+        pad_mask = target.eq(ignore_index)
+        nll_loss.masked_fill_(pad_mask, 0.0)
+        smooth_loss.masked_fill_(pad_mask, 0.0)
+    else:
+        nll_loss = nll_loss.squeeze(-1)
+        smooth_loss = smooth_loss.squeeze(-1)
+    if reduce:
+        nll_loss = nll_loss.sum()
+        smooth_loss = smooth_loss.sum()
+    eps_i = epsilon / (lprobs.size(-1) - 1)
+    loss = (1.0 - epsilon - eps_i) * nll_loss + eps_i * smooth_loss
+    return loss, nll_loss
+
+
+@register_criterion(
+    "label_smoothed_cross_entropy", dataclass=LabelSmoothedCrossEntropyCriterionConfig
+)
+class LabelSmoothedCrossEntropyCriterion(FairseqCriterion):
+    def __init__(
+        self,
+        task,
+        sentence_avg,
+        label_smoothing,
+        ignore_prefix_size=0,
+        report_accuracy=False,
+    ):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+        self.eps = label_smoothing
+        self.ignore_prefix_size = ignore_prefix_size
+        self.report_accuracy = report_accuracy
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(**sample["net_input"])
+        loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce)
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+        logging_output = {
+            "loss": loss.data,
+            "nll_loss": nll_loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+        }
+        if self.report_accuracy:
+            n_correct, total = self.compute_accuracy(model, net_output, sample)
+            logging_output["n_correct"] = utils.item(n_correct.data)
+            logging_output["total"] = utils.item(total.data)
+        return loss, sample_size, logging_output
+
+    def get_lprobs_and_target(self, model, net_output, sample):
+        lprobs = model.get_normalized_probs(net_output, log_probs=True)
+        target = model.get_targets(sample, net_output)
+        if self.ignore_prefix_size > 0:
+            if getattr(lprobs, "batch_first", False):
+                lprobs = lprobs[:, self.ignore_prefix_size :, :].contiguous()
+                target = target[:, self.ignore_prefix_size :].contiguous()
+            else:
+                lprobs = lprobs[self.ignore_prefix_size :, :, :].contiguous()
+                target = target[self.ignore_prefix_size :, :].contiguous()
+        return lprobs.view(-1, lprobs.size(-1)), target.view(-1)
+
+    def compute_loss(self, model, net_output, sample, reduce=True):
+        lprobs, target = self.get_lprobs_and_target(model, net_output, sample)
+        loss, nll_loss = label_smoothed_nll_loss(
+            lprobs,
+            target,
+            self.eps,
+            ignore_index=self.padding_idx,
+            reduce=reduce,
+        )
+        return loss, nll_loss
+
+    def compute_accuracy(self, model, net_output, sample):
+        lprobs, target = self.get_lprobs_and_target(model, net_output, sample)
+        mask = target.ne(self.padding_idx)
+        n_correct = torch.sum(
+            lprobs.argmax(1).masked_select(mask).eq(target.masked_select(mask))
+        )
+        total = torch.sum(mask)
+        return n_correct, total
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        nll_loss_sum = sum(log.get("nll_loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar(
+            "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3
+        )
+        metrics.log_derived(
+            "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+        )
+
+        total = utils.item(sum(log.get("total", 0) for log in logging_outputs))
+        if total > 0:
+            metrics.log_scalar("total", total)
+            n_correct = utils.item(
+                sum(log.get("n_correct", 0) for log in logging_outputs)
+            )
+            metrics.log_scalar("n_correct", n_correct)
+            metrics.log_derived(
+                "accuracy",
+                lambda meters: round(
+                    meters["n_correct"].sum * 100.0 / meters["total"].sum, 3
+                )
+                if meters["total"].sum > 0
+                else float("nan"),
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy_latency_augmented.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_latency_augmented.py
new file mode 100644
index 0000000000000000000000000000000000000000..223a16f740c10b58ea45a0390814363e7b5f68b8
--- /dev/null
+++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_latency_augmented.py
@@ -0,0 +1,233 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+import torch
+from fairseq import metrics, utils
+from fairseq.criterions import register_criterion
+from fairseq.criterions.label_smoothed_cross_entropy import (
+    LabelSmoothedCrossEntropyCriterion,
+    LabelSmoothedCrossEntropyCriterionConfig
+)
+
+try:
+    from simuleval.metrics.latency import (
+        AverageLagging,
+        AverageProportion,
+        DifferentiableAverageLagging
+    )
+    LATENCY_METRICS = {
+        "average_lagging": AverageLagging,
+        "average_proportion": AverageProportion,
+        "differentiable_average_lagging":  DifferentiableAverageLagging,
+    }
+except ImportError:
+    LATENCY_METRICS = None
+
+
+@dataclass
+class LabelSmoothedCrossEntropyCriterionLatencyAugmentConfig(
+    LabelSmoothedCrossEntropyCriterionConfig
+):
+    latency_avg_weight: float = field(
+        default=0.0,
+        metadata={"help": "weight fot average latency loss."},
+    )
+    latency_var_weight: float = field(
+        default=0.0,
+        metadata={"help": "weight fot variance latency loss."},
+    )
+    latency_avg_type: str = field(
+        default="differentiable_average_lagging",
+        metadata={"help": "latency type for average loss"},
+    )
+    latency_var_type: str = field(
+        default="variance_delay",
+        metadata={"help": "latency typ for variance loss"},
+    )
+    latency_gather_method: str = field(
+        default="weighted_average",
+        metadata={"help": "method to gather latency loss for all heads"},
+    )
+    latency_update_after: int = field(
+        default=0,
+        metadata={"help": "Add latency loss after certain steps"},
+    )
+
+@register_criterion(
+    "latency_augmented_label_smoothed_cross_entropy",
+    dataclass=LabelSmoothedCrossEntropyCriterionLatencyAugmentConfig
+)
+class LatencyAugmentedLabelSmoothedCrossEntropyCriterion(
+    LabelSmoothedCrossEntropyCriterion
+):
+    def __init__(
+        self,
+        task,
+        sentence_avg,
+        label_smoothing,
+        ignore_prefix_size,
+        report_accuracy,
+        latency_avg_weight,
+        latency_var_weight,
+        latency_avg_type,
+        latency_var_type,
+        latency_gather_method,
+        latency_update_after,
+    ):
+        super().__init__(
+            task, sentence_avg, label_smoothing, ignore_prefix_size, report_accuracy
+        )
+        assert LATENCY_METRICS is not None, "Please make sure SimulEval is installed."
+
+        self.latency_avg_weight = latency_avg_weight
+        self.latency_var_weight = latency_var_weight
+        self.latency_avg_type = latency_avg_type
+        self.latency_var_type = latency_var_type
+        self.latency_gather_method = latency_gather_method
+        self.latency_update_after = latency_update_after
+
+    def forward(self, model, sample, reduce=True):
+        net_output = model(**sample["net_input"])
+        # 1. Compute cross entropy loss
+        loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce)
+
+        # 2. Compute cross latency loss
+        latency_loss, expected_latency, expected_delays_var = self.compute_latency_loss(
+            model, sample, net_output
+        )
+
+        if self.latency_update_after > 0:
+            num_updates = getattr(model.decoder, "num_updates", None)
+            assert num_updates is not None, (
+                "model.decoder doesn't have attribute 'num_updates'"
+            )
+            if num_updates <= self.latency_update_after:
+                latency_loss = 0
+
+        loss += latency_loss
+
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+
+        logging_output = {
+            "loss": loss.data,
+            "nll_loss": nll_loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+            "latency": expected_latency,
+            "delays_var": expected_delays_var,
+            "latency_loss": latency_loss,
+        }
+
+        if self.report_accuracy:
+            n_correct, total = self.compute_accuracy(model, net_output, sample)
+            logging_output["n_correct"] = utils.item(n_correct.data)
+            logging_output["total"] = utils.item(total.data)
+        return loss, sample_size, logging_output
+
+    def compute_latency_loss(self, model, sample, net_output):
+        assert (
+            net_output[-1].encoder_padding_mask is None
+            or not net_output[-1].encoder_padding_mask[:, 0].any()
+        ), (
+            "Only right padding on source is supported."
+        )
+        # 1. Obtain the expected alignment
+        alpha_list = [item["alpha"] for item in net_output[1].attn_list]
+        num_layers = len(alpha_list)
+        bsz, num_heads, tgt_len, src_len = alpha_list[0].size()
+
+        # bsz * num_layers * num_heads, tgt_len, src_len
+        alpha_all = torch.cat(alpha_list, dim=1).view(-1, tgt_len, src_len)
+
+        # 2 compute expected delays
+        # bsz * num_heads * num_layers, tgt_len, src_len for MMA
+        steps = (
+            torch.arange(1, 1 + src_len)
+            .unsqueeze(0)
+            .unsqueeze(1)
+            .expand_as(alpha_all)
+            .type_as(alpha_all)
+        )
+
+        expected_delays = torch.sum(steps * alpha_all, dim=-1)
+
+        target_padding_mask = (
+            model.get_targets(sample, net_output)
+            .eq(self.padding_idx)
+            .unsqueeze(1)
+            .expand(bsz, num_layers * num_heads, tgt_len)
+            .contiguous()
+            .view(-1, tgt_len)
+        )
+
+        src_lengths = (
+            sample["net_input"]["src_lengths"]
+            .unsqueeze(1)
+            .expand(bsz, num_layers * num_heads)
+            .contiguous()
+            .view(-1)
+        )
+        expected_latency = LATENCY_METRICS[self.latency_avg_type](
+            expected_delays, src_lengths, None,
+            target_padding_mask=target_padding_mask
+        )
+
+        # 2.1 average expected latency of heads
+        # bsz, num_layers * num_heads
+        expected_latency = expected_latency.view(bsz, -1)
+        if self.latency_gather_method == "average":
+            # bsz * tgt_len
+            expected_latency = expected_delays.mean(dim=1)
+        elif self.latency_gather_method == "weighted_average":
+            weights = torch.nn.functional.softmax(expected_latency, dim=1)
+            expected_latency = torch.sum(expected_latency * weights, dim=1)
+        elif self.latency_gather_method == "max":
+            expected_latency = expected_latency.max(dim=1)[0]
+        else:
+            raise NotImplementedError
+
+        expected_latency = expected_latency.sum()
+        avg_loss = self.latency_avg_weight * expected_latency
+
+        # 2.2 variance of expected delays
+        expected_delays_var = (
+            expected_delays.view(bsz, -1, tgt_len).var(dim=1).mean(dim=1)
+        )
+        expected_delays_var = expected_delays_var.sum()
+        var_loss = self.latency_avg_weight * expected_delays_var
+
+        # 3. Final loss
+        latency_loss = avg_loss + var_loss
+
+        return latency_loss, expected_latency, expected_delays_var
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs) -> None:
+        super().reduce_metrics(logging_outputs)
+        latency = sum(
+            log.get("latency", 0) for log in logging_outputs
+        )
+        delays_var = sum(
+            log.get("delays_var", 0) for log in logging_outputs
+        )
+        latency_loss = sum(
+            log.get("latency_loss", 0) for log in logging_outputs
+        )
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        metrics.log_scalar(
+            "latency", latency.float() / nsentences, nsentences, round=3
+        )
+        metrics.log_scalar(
+            "delays_var", delays_var / nsentences,
+            nsentences, round=3
+        )
+        metrics.log_scalar(
+            "latency_loss", latency_loss / nsentences,
+            nsentences, round=3
+        )
diff --git a/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_alignment.py b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ea37c16b4a477c48e4dd4500ec03f2d0c86d611
--- /dev/null
+++ b/fairseq/fairseq/criterions/label_smoothed_cross_entropy_with_alignment.py
@@ -0,0 +1,130 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+from fairseq import metrics, utils
+from fairseq.criterions import register_criterion
+
+from .label_smoothed_cross_entropy import (
+    LabelSmoothedCrossEntropyCriterion,
+    LabelSmoothedCrossEntropyCriterionConfig,
+)
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class LabelSmoothedCrossEntropyCriterionWithAlignmentConfig(
+    LabelSmoothedCrossEntropyCriterionConfig
+):
+    alignment_lambda: float = field(
+        default=0.05, metadata={"help": "weight for the alignment loss"}
+    )
+
+
+@register_criterion(
+    "label_smoothed_cross_entropy_with_alignment",
+    dataclass=LabelSmoothedCrossEntropyCriterionWithAlignmentConfig,
+)
+class LabelSmoothedCrossEntropyCriterionWithAlignment(
+    LabelSmoothedCrossEntropyCriterion
+):
+    def __init__(self, task, sentence_avg, label_smoothing, alignment_lambda):
+        super().__init__(task, sentence_avg, label_smoothing)
+        self.alignment_lambda = alignment_lambda
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(**sample["net_input"])
+        loss, nll_loss = self.compute_loss(model, net_output, sample, reduce=reduce)
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "nll_loss": utils.item(nll_loss.data) if reduce else nll_loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+        }
+
+        alignment_loss = None
+
+        # Compute alignment loss only for training set and non dummy batches.
+        if "alignments" in sample and sample["alignments"] is not None:
+            alignment_loss = self.compute_alignment_loss(sample, net_output)
+
+        if alignment_loss is not None:
+            logging_output["alignment_loss"] = utils.item(alignment_loss.data)
+            loss += self.alignment_lambda * alignment_loss
+
+        return loss, sample_size, logging_output
+
+    def compute_alignment_loss(self, sample, net_output):
+        attn_prob = net_output[1]["attn"][0]
+        bsz, tgt_sz, src_sz = attn_prob.shape
+        attn = attn_prob.view(bsz * tgt_sz, src_sz)
+
+        align = sample["alignments"]
+        align_weights = sample["align_weights"].float()
+
+        if len(align) > 0:
+            # Alignment loss computation. align (shape [:, 2]) contains the src-tgt index pairs corresponding to
+            # the alignments. align_weights (shape [:]) contains the 1 / frequency of a tgt index for normalizing.
+            loss = -(
+                (attn[align[:, 1][:, None], align[:, 0][:, None]]).log()
+                * align_weights[:, None]
+            ).sum()
+        else:
+            return None
+
+        return loss
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+        nll_loss_sum = utils.item(
+            sum(log.get("nll_loss", 0) for log in logging_outputs)
+        )
+        alignment_loss_sum = utils.item(
+            sum(log.get("alignment_loss", 0) for log in logging_outputs)
+        )
+        ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs))
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar(
+            "nll_loss", nll_loss_sum / ntokens / math.log(2), ntokens, round=3
+        )
+        metrics.log_scalar(
+            "alignment_loss",
+            alignment_loss_sum / sample_size / math.log(2),
+            sample_size,
+            round=3,
+        )
+        metrics.log_derived(
+            "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+        )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/legacy_masked_lm.py b/fairseq/fairseq/criterions/legacy_masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..c70608c5a143b7b4fbd8c58dfcf9f873639d379c
--- /dev/null
+++ b/fairseq/fairseq/criterions/legacy_masked_lm.py
@@ -0,0 +1,177 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+
+
+def compute_cross_entropy_loss(logits, targets, ignore_index=-100):
+    """
+    Function to compute the cross entropy loss. The default value of
+    ignore_index is the same as the default value for F.cross_entropy in
+    pytorch.
+    """
+    assert logits.size(0) == targets.size(
+        -1
+    ), "Logits and Targets tensor shapes don't match up"
+
+    loss = F.nll_loss(
+        F.log_softmax(logits, -1, dtype=torch.float32),
+        targets,
+        reduction="sum",
+        ignore_index=ignore_index,
+    )
+    return loss
+
+
+@register_criterion("legacy_masked_lm_loss")
+class LegacyMaskedLmLoss(FairseqCriterion):
+    """
+    Implementation for the loss used in masked language model (MLM) training.
+    This optionally also computes the next sentence prediction (NSP) loss and
+    adds it to the overall loss based on the specified args. There are three
+    cases to consider:
+        1) Generic MLM training without NSP loss. In this case sentence_targets
+           and sentence_logits are both None.
+        2) BERT training without NSP loss. In this case sentence_targets is
+           not None but sentence_logits is None and we should not be computing
+           a sentence level loss.
+        3) BERT training with NSP loss. In this case both sentence_targets and
+           sentence_logits are not None and we should be computing a sentence
+           level loss. The weight of the sentence level loss is specified as
+           an argument.
+    """
+
+    def __init__(self, task, masked_lm_only, nsp_loss_weight):
+        super().__init__(task)
+        self.masked_lm_only = masked_lm_only
+        self.nsp_loss_weight = nsp_loss_weight
+
+    @staticmethod
+    def add_args(parser):
+        """Args for MaskedLM Loss"""
+        # Default for masked_lm_only is False so as to not break BERT training
+        parser.add_argument(
+            "--masked-lm-only",
+            default=False,
+            action="store_true",
+            help="compute MLM loss only",
+        )
+        parser.add_argument(
+            "--nsp-loss-weight",
+            default=1.0,
+            type=float,
+            help="weight for next sentence prediction" " loss (default 1)",
+        )
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        lm_logits, output_metadata = model(**sample["net_input"])
+
+        # reshape lm_logits from (N,T,C) to (N*T,C)
+        lm_logits = lm_logits.view(-1, lm_logits.size(-1))
+        lm_targets = sample["lm_target"].view(-1)
+        lm_loss = compute_cross_entropy_loss(lm_logits, lm_targets, self.padding_idx)
+
+        # compute the number of tokens for which loss is computed. This is used
+        # to normalize the loss
+        ntokens = utils.strip_pad(lm_targets, self.padding_idx).numel()
+        loss = lm_loss / ntokens
+        nsentences = sample["nsentences"]
+        # nsentences = 0
+
+        # Compute sentence loss if masked_lm_only is False
+        sentence_loss = None
+        if not self.masked_lm_only:
+            sentence_logits = output_metadata["sentence_logits"]
+            sentence_targets = sample["sentence_target"].view(-1)
+            # This needs to be recomputed due to some differences between
+            # TokenBlock and BlockPair dataset. This can be resolved with a
+            # refactor of BERTModel which we will do in the future.
+            # TODO: Remove this after refactor of BERTModel
+            nsentences = sentence_targets.size(0)
+
+            # Check for logits being none which can happen when remove_heads
+            # is set to true in the BERT model. Ideally we should set
+            # masked_lm_only to true in this case, but that requires some
+            # refactor in the BERT model.
+            if sentence_logits is not None:
+                sentence_loss = compute_cross_entropy_loss(
+                    sentence_logits, sentence_targets
+                )
+
+                loss += self.nsp_loss_weight * (sentence_loss / nsentences)
+
+        # NOTE: as we are summing up per token mlm loss and per sentence nsp loss
+        # we don't need to use sample_size as denominator for the gradient
+        # here sample_size is just used for logging
+        sample_size = 1
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "lm_loss": utils.item(lm_loss.data) if reduce else lm_loss.data,
+            # sentence loss is not always computed
+            "sentence_loss": (
+                (utils.item(sentence_loss.data) if reduce else sentence_loss.data)
+                if sentence_loss is not None
+                else 0.0
+            ),
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "sample_size": sample_size,
+        }
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        lm_loss_sum = sum(log.get("lm_loss", 0) for log in logging_outputs)
+        sentence_loss_sum = sum(log.get("sentence_loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+        agg_loss = sum(log.get("loss", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss",
+            agg_loss / sample_size / math.log(2) if sample_size > 0 else 0.0,
+            sample_size,
+            round=3,
+        )
+        metrics.log_scalar(
+            "lm_loss",
+            lm_loss_sum / ntokens / math.log(2) if ntokens > 0 else 0.0,
+            ntokens,
+            round=3,
+        )
+        metrics.log_scalar(
+            "sentence_loss",
+            sentence_loss_sum / nsentences / math.log(2) if nsentences > 0 else 0.0,
+            nsentences,
+            round=3,
+        )
+        metrics.log_scalar(
+            "nll_loss",
+            lm_loss_sum / ntokens / math.log(2) if ntokens > 0 else 0.0,
+            ntokens,
+            round=3,
+        )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/masked_lm.py b/fairseq/fairseq/criterions/masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..279458f317ee258e393c4bf1879bb3c14a04ab51
--- /dev/null
+++ b/fairseq/fairseq/criterions/masked_lm.py
@@ -0,0 +1,98 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+import math
+from omegaconf import II
+
+import torch
+from fairseq import metrics, modules, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class MaskedLmConfig(FairseqDataclass):
+    tpu: bool = II("common.tpu")
+
+
+@register_criterion("masked_lm", dataclass=MaskedLmConfig)
+class MaskedLmLoss(FairseqCriterion):
+    """
+    Implementation for the loss used in masked language model (MLM) training.
+    """
+
+    def __init__(self, cfg: MaskedLmConfig, task):
+        super().__init__(task)
+        self.tpu = cfg.tpu
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        masked_tokens = sample["target"].ne(self.padding_idx)
+        sample_size = masked_tokens.int().sum()
+
+        # Rare: when all tokens are masked, project all tokens.
+        # We use torch.where to avoid device-to-host transfers,
+        # except on CPU where torch.where is not well supported
+        # (see github.com/pytorch/pytorch/issues/26247).
+        if self.tpu:
+            masked_tokens = None  # always project all tokens on TPU
+        elif masked_tokens.device == torch.device("cpu"):
+            if not masked_tokens.any():
+                masked_tokens = None
+        else:
+            masked_tokens = torch.where(
+                masked_tokens.any(),
+                masked_tokens,
+                masked_tokens.new([True]),
+            )
+
+        logits = model(**sample["net_input"], masked_tokens=masked_tokens)[0]
+        targets = model.get_targets(sample, [logits])
+        if masked_tokens is not None:
+            targets = targets[masked_tokens]
+
+        loss = modules.cross_entropy(
+            logits.view(-1, logits.size(-1)),
+            targets.view(-1),
+            reduction="sum",
+            ignore_index=self.padding_idx,
+        )
+
+        logging_output = {
+            "loss": loss if self.tpu else loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["nsentences"],
+            "sample_size": sample_size,
+        }
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_derived(
+            "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)
+        )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/model_criterion.py b/fairseq/fairseq/criterions/model_criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..30350f13b1c00498de6784579250d6b342ced7dd
--- /dev/null
+++ b/fairseq/fairseq/criterions/model_criterion.py
@@ -0,0 +1,138 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, List
+
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ModelCriterionConfig(FairseqDataclass):
+    loss_weights: Dict[str, float] = field(
+        default_factory=dict,
+        metadata={"help": "weights for the loss terms"},
+    )
+    log_keys: List[str] = field(
+        default_factory=list,
+        metadata={"help": "additional output keys to log"},
+    )
+
+
+@register_criterion("model", dataclass=ModelCriterionConfig)
+class ModelCriterion(FairseqCriterion):
+    """
+    This criterion relies on the model to supply losses.
+    The losses should be a dictionary of name -> scalar returned by
+    the model either by including it in the net_output dict or by
+    implementing a get_losses(net_output, sample) method. The final loss is
+    a scaled sum of all losses according to weights in loss_weights.
+    If no weights are provided, then all losses are scaled by 1.0.
+
+    The losses will be automatically logged. Additional keys from
+    net_output dict can be logged via the log_keys parameter.
+    """
+
+    def __init__(self, task, loss_weights=None, log_keys=None):
+        super().__init__(task)
+        self.loss_weights = loss_weights
+        self.log_keys = log_keys
+
+    def forward(self, model, sample, reduce=True):
+        net_output = model(**sample["net_input"])
+
+        sample_size = net_output["sample_size"]
+        scaled_losses = {}
+
+        if hasattr(model, "get_losses"):
+            losses = model.get_losses(net_output, sample)
+        elif isinstance(net_output, dict) and "losses" in net_output:
+            losses = net_output["losses"]
+        else:
+            raise Exception("Could not retrieve losses")
+
+        for lk, p in losses.items():
+            try:
+                coef = 1.0 if len(self.loss_weights) == 0 else self.loss_weights[lk]
+            except KeyError:
+                logger.error(
+                    f"weight for loss {lk} is not in loss_weights ({self.loss_weights})"
+                )
+                raise
+            if coef != 0 and p is not None:
+                scaled_losses[lk] = coef * p.float()
+
+        loss = sum(scaled_losses.values())
+        if reduce and loss.numel() > 1:
+            loss = loss.sum()
+
+        logging_output = {
+            "loss": loss.data,
+            "ntokens": sample_size,
+            "nsentences": sample["id"].numel(),
+            "sample_size": sample_size,
+            "_world_size": 1,
+        }
+
+        for lk in self.log_keys:
+            if lk in net_output and net_output[lk] is not None:
+                logging_output[lk] = float(net_output[lk])
+
+        if len(scaled_losses) > 1:
+            for lk, l in scaled_losses.items():
+                logging_output[f"loss_{lk}"] = l.item()
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+        ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs))
+        nsentences = utils.item(
+            sum(log.get("nsentences", 0) for log in logging_outputs)
+        )
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+
+        metrics.log_scalar("loss", loss_sum / sample_size, sample_size, round=3)
+        metrics.log_scalar("ntokens", ntokens)
+        metrics.log_scalar("nsentences", nsentences)
+
+        builtin_keys = {
+            "loss",
+            "ntokens",
+            "nsentences",
+            "sample_size",
+            "_world_size",
+        }
+
+        world_size = utils.item(
+            sum(log.get("_world_size", 0) for log in logging_outputs)
+        )
+
+        for k in logging_outputs[0]:
+            if k not in builtin_keys:
+                val = sum(log.get(k, 0) for log in logging_outputs)
+                if k.startswith("loss_"):
+                    metrics.log_scalar(k, val / sample_size, sample_size, round=3)
+                else:
+                    metrics.log_scalar(k, val / world_size, round=3)
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/nat_loss.py b/fairseq/fairseq/criterions/nat_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dac32fbaf4fb10089c0bcd42b75d23f92b5cf66
--- /dev/null
+++ b/fairseq/fairseq/criterions/nat_loss.py
@@ -0,0 +1,180 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from torch import Tensor
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class LabelSmoothedDualImitationCriterionConfig(FairseqDataclass):
+    label_smoothing: float = field(
+        default=0.0,
+        metadata={"help": "epsilon for label smoothing, 0 means no label smoothing"},
+    )
+
+
+@register_criterion("nat_loss", dataclass=LabelSmoothedDualImitationCriterionConfig)
+class LabelSmoothedDualImitationCriterion(FairseqCriterion):
+    def __init__(self, task, label_smoothing):
+        super().__init__(task)
+        self.label_smoothing = label_smoothing
+
+    def _compute_loss(
+        self, outputs, targets, masks=None, label_smoothing=0.0, name="loss", factor=1.0
+    ):
+        """
+        outputs: batch x len x d_model
+        targets: batch x len
+        masks:   batch x len
+
+        policy_logprob: if there is some policy
+            depends on the likelihood score as rewards.
+        """
+
+        def mean_ds(x: Tensor, dim=None) -> Tensor:
+            return (
+                x.float().mean().type_as(x)
+                if dim is None
+                else x.float().mean(dim).type_as(x)
+            )
+
+        if masks is not None:
+            outputs, targets = outputs[masks], targets[masks]
+
+        if masks is not None and not masks.any():
+            nll_loss = torch.tensor(0)
+            loss = nll_loss
+        else:
+            logits = F.log_softmax(outputs, dim=-1)
+            if targets.dim() == 1:
+                losses = F.nll_loss(logits, targets.to(logits.device), reduction="none")
+
+            else:  # soft-labels
+                losses = F.kl_div(logits, targets.to(logits.device), reduction="none")
+                losses = losses.sum(-1)
+
+            nll_loss = mean_ds(losses)
+            if label_smoothing > 0:
+                loss = (
+                    nll_loss * (1 - label_smoothing) - mean_ds(logits) * label_smoothing
+                )
+            else:
+                loss = nll_loss
+
+        loss = loss * factor
+        return {"name": name, "loss": loss, "nll_loss": nll_loss, "factor": factor}
+
+    def _custom_loss(self, loss, name="loss", factor=1.0):
+        return {"name": name, "loss": loss, "factor": factor}
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        nsentences, ntokens = sample["nsentences"], sample["ntokens"]
+
+        # B x T
+        src_tokens, src_lengths = (
+            sample["net_input"]["src_tokens"],
+            sample["net_input"]["src_lengths"],
+        )
+        tgt_tokens, prev_output_tokens = sample["target"], sample["prev_target"]
+
+        outputs = model(src_tokens, src_lengths, prev_output_tokens, tgt_tokens)
+        losses, nll_loss = [], []
+
+        for obj in outputs:
+            if outputs[obj].get("loss", None) is None:
+                _losses = self._compute_loss(
+                    outputs[obj].get("out"),
+                    outputs[obj].get("tgt"),
+                    outputs[obj].get("mask", None),
+                    outputs[obj].get("ls", 0.0),
+                    name=obj + "-loss",
+                    factor=outputs[obj].get("factor", 1.0),
+                )
+            else:
+                _losses = self._custom_loss(
+                    outputs[obj].get("loss"),
+                    name=obj + "-loss",
+                    factor=outputs[obj].get("factor", 1.0),
+                )
+
+            losses += [_losses]
+            if outputs[obj].get("nll_loss", False):
+                nll_loss += [_losses.get("nll_loss", 0.0)]
+
+        loss = sum(l["loss"] for l in losses)
+        nll_loss = sum(l for l in nll_loss) if len(nll_loss) > 0 else loss.new_tensor(0)
+
+        # NOTE:
+        # we don't need to use sample_size as denominator for the gradient
+        # here sample_size is just used for logging
+        sample_size = 1
+        logging_output = {
+            "loss": loss.data,
+            "nll_loss": nll_loss.data,
+            "ntokens": ntokens,
+            "nsentences": nsentences,
+            "sample_size": sample_size,
+        }
+
+        for l in losses:
+            logging_output[l["name"]] = (
+                utils.item(l["loss"].data / l["factor"])
+                if reduce
+                else l[["loss"]].data / l["factor"]
+            )
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+        loss = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+        nll_loss = utils.item(sum(log.get("nll_loss", 0) for log in logging_outputs))
+
+        metrics.log_scalar(
+            "loss", loss / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar(
+            "nll_loss", nll_loss / sample_size / math.log(2), sample_size, round=3
+        )
+        metrics.log_derived(
+            "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)
+        )
+
+        for key in logging_outputs[0]:
+            if key[-5:] == "-loss":
+                val = sum(log.get(key, 0) for log in logging_outputs)
+                metrics.log_scalar(
+                    key[:-5],
+                    val / sample_size / math.log(2) if sample_size > 0 else 0.0,
+                    sample_size,
+                    round=3,
+                )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/sentence_prediction.py b/fairseq/fairseq/criterions/sentence_prediction.py
new file mode 100644
index 0000000000000000000000000000000000000000..482b97985a36aca07146772f52dde41df76bf643
--- /dev/null
+++ b/fairseq/fairseq/criterions/sentence_prediction.py
@@ -0,0 +1,104 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class SentencePredictionConfig(FairseqDataclass):
+    classification_head_name: str = field(
+        default="sentence_classification_head",
+        metadata={"help": "name of the classification head to use"},
+    )
+    regression_target: bool = field(
+        default=False,
+    )
+
+
+@register_criterion("sentence_prediction", dataclass=SentencePredictionConfig)
+class SentencePredictionCriterion(FairseqCriterion):
+    def __init__(self, cfg: SentencePredictionConfig, task):
+        super().__init__(task)
+        self.classification_head_name = cfg.classification_head_name
+        self.regression_target = cfg.regression_target
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        assert (
+            hasattr(model, "classification_heads")
+            and self.classification_head_name in model.classification_heads
+        ), "model must provide sentence classification head for --criterion=sentence_prediction"
+
+        logits, _ = model(
+            **sample["net_input"],
+            features_only=True,
+            classification_head_name=self.classification_head_name,
+        )
+        targets = model.get_targets(sample, [logits]).view(-1)
+        sample_size = targets.numel()
+
+        if not self.regression_target:
+            lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32)
+            loss = F.nll_loss(lprobs, targets, reduction="sum")
+        else:
+            logits = logits.view(-1).float()
+            targets = targets.float()
+            loss = F.mse_loss(logits, targets, reduction="sum")
+
+        logging_output = {
+            "loss": loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample_size,
+            "sample_size": sample_size,
+        }
+        if not self.regression_target:
+            preds = logits.argmax(dim=1)
+            logging_output["ncorrect"] = (preds == targets).sum()
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+
+        if len(logging_outputs) > 0 and "ncorrect" in logging_outputs[0]:
+            ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs)
+            metrics.log_scalar(
+                "accuracy", 100.0 * ncorrect / nsentences, nsentences, round=1
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/sentence_ranking.py b/fairseq/fairseq/criterions/sentence_ranking.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4c76341d4d87e6d0da21ac89e833ce0bda13a0c
--- /dev/null
+++ b/fairseq/fairseq/criterions/sentence_ranking.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+
+
+@register_criterion("sentence_ranking")
+class SentenceRankingCriterion(FairseqCriterion):
+    def __init__(self, task, ranking_head_name, save_predictions, num_classes):
+        super().__init__(task)
+        self.ranking_head_name = ranking_head_name
+        if save_predictions is not None:
+            self.prediction_h = open(save_predictions, "w")
+        else:
+            self.prediction_h = None
+        self.num_classes = num_classes
+
+    def __del__(self):
+        if self.prediction_h is not None:
+            self.prediction_h.close()
+
+    @staticmethod
+    def add_args(parser):
+        # fmt: off
+        parser.add_argument('--save-predictions', metavar='FILE',
+                            help='file to save predictions to')
+        parser.add_argument('--ranking-head-name',
+                            default='sentence_classification_head',
+                            help='name of the ranking head to use')
+        # fmt: on
+
+    def forward(self, model, sample, reduce=True):
+        """Compute ranking loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        assert (
+            hasattr(model, "classification_heads")
+            and self.ranking_head_name in model.classification_heads
+        ), "model must provide sentence ranking head for --criterion=sentence_ranking"
+
+        scores = []
+        for idx in range(self.num_classes):
+            score, _ = model(
+                **sample["net_input{idx}".format(idx=idx + 1)],
+                classification_head_name=self.ranking_head_name,
+            )
+            scores.append(score)
+
+        logits = torch.cat(scores, dim=1)
+        sample_size = logits.size(0)
+
+        if "target" in sample:
+            targets = model.get_targets(sample, [logits]).view(-1)
+            lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32)
+            loss = F.nll_loss(lprobs, targets, reduction="sum")
+        else:
+            targets = None
+            loss = torch.tensor(0.0, requires_grad=True)
+
+        if self.prediction_h is not None:
+            preds = logits.argmax(dim=1)
+            for i, (id, pred) in enumerate(zip(sample["id"].tolist(), preds.tolist())):
+                if targets is not None:
+                    label = targets[i].item()
+                    print("{}\t{}\t{}".format(id, pred, label), file=self.prediction_h)
+                else:
+                    print("{}\t{}".format(id, pred), file=self.prediction_h)
+
+        logging_output = {
+            "loss": loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample_size,
+            "sample_size": sample_size,
+        }
+        if targets is not None:
+            logging_output["ncorrect"] = (logits.argmax(dim=1) == targets).sum()
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+
+        if len(logging_outputs) > 0 and "ncorrect" in logging_outputs[0]:
+            ncorrect = sum(log.get("ncorrect", 0) for log in logging_outputs)
+            metrics.log_scalar(
+                "accuracy", 100.0 * ncorrect / nsentences, nsentences, round=1
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/criterions/tacotron2_loss.py b/fairseq/fairseq/criterions/tacotron2_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c7b655c8c52f8fa478b4568850ec8f741dab78e
--- /dev/null
+++ b/fairseq/fairseq/criterions/tacotron2_loss.py
@@ -0,0 +1,210 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+import logging
+from typing import Any, Dict, List
+from functools import lru_cache
+from dataclasses import dataclass, field
+
+import torch
+from omegaconf import II
+
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from fairseq.data.data_utils import lengths_to_mask
+import torch.nn.functional as F
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Tacotron2CriterionConfig(FairseqDataclass):
+    bce_pos_weight: float = field(
+        default=1.0,
+        metadata={"help": "weight of positive examples for BCE loss"},
+    )
+    n_frames_per_step: int = field(
+        default=0,
+        metadata={"help": "Number of frames per decoding step"},
+    )
+    use_guided_attention_loss: bool = field(
+        default=False,
+        metadata={"help": "use guided attention loss"},
+    )
+    guided_attention_loss_sigma: float = field(
+        default=0.4,
+        metadata={"help": "weight of positive examples for BCE loss"},
+    )
+    ctc_weight: float = field(
+        default=0.0, metadata={"help": "weight for CTC loss"}
+    )
+    sentence_avg: bool = II("optimization.sentence_avg")
+
+
+class GuidedAttentionLoss(torch.nn.Module):
+    """
+    Efficiently Trainable Text-to-Speech System Based on Deep Convolutional
+    Networks with Guided Attention (https://arxiv.org/abs/1710.08969)
+    """
+
+    def __init__(self, sigma):
+        super().__init__()
+        self.sigma = sigma
+
+    @staticmethod
+    @lru_cache(maxsize=8)
+    def _get_weight(s_len, t_len, sigma):
+        grid_x, grid_y = torch.meshgrid(torch.arange(t_len), torch.arange(s_len))
+        grid_x = grid_x.to(s_len.device)
+        grid_y = grid_y.to(s_len.device)
+        w = (grid_y.float() / s_len - grid_x.float() / t_len) ** 2
+        return 1.0 - torch.exp(-w / (2 * (sigma ** 2)))
+
+    def _get_weights(self, src_lens, tgt_lens):
+        bsz, max_s_len, max_t_len = len(src_lens), max(src_lens), max(tgt_lens)
+        weights = torch.zeros((bsz, max_t_len, max_s_len))
+        for i, (s_len, t_len) in enumerate(zip(src_lens, tgt_lens)):
+            weights[i, :t_len, :s_len] = self._get_weight(s_len, t_len,
+                                                          self.sigma)
+        return weights
+
+    @staticmethod
+    def _get_masks(src_lens, tgt_lens):
+        in_masks = lengths_to_mask(src_lens)
+        out_masks = lengths_to_mask(tgt_lens)
+        return out_masks.unsqueeze(2) & in_masks.unsqueeze(1)
+
+    def forward(self, attn, src_lens, tgt_lens, reduction="mean"):
+        weights = self._get_weights(src_lens, tgt_lens).to(attn.device)
+        masks = self._get_masks(src_lens, tgt_lens).to(attn.device)
+        loss = (weights * attn.transpose(1, 2)).masked_select(masks)
+        loss = torch.sum(loss) if reduction == "sum" else torch.mean(loss)
+        return loss
+
+
+@register_criterion("tacotron2", dataclass=Tacotron2CriterionConfig)
+class Tacotron2Criterion(FairseqCriterion):
+    def __init__(self, task, sentence_avg, n_frames_per_step,
+                 use_guided_attention_loss, guided_attention_loss_sigma,
+                 bce_pos_weight, ctc_weight):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+        self.n_frames_per_step = n_frames_per_step
+        self.bce_pos_weight = bce_pos_weight
+
+        self.guided_attn = None
+        if use_guided_attention_loss:
+            self.guided_attn = GuidedAttentionLoss(guided_attention_loss_sigma)
+        self.ctc_weight = ctc_weight
+
+    def forward(self, model, sample, reduction="mean"):
+        bsz, max_len, _ = sample["target"].size()
+        feat_tgt = sample["target"]
+        feat_len = sample["target_lengths"].view(bsz, 1).expand(-1, max_len)
+        eos_tgt = torch.arange(max_len).to(sample["target"].device)
+        eos_tgt = eos_tgt.view(1, max_len).expand(bsz, -1)
+        eos_tgt = (eos_tgt == (feat_len - 1)).float()
+        src_tokens = sample["net_input"]["src_tokens"]
+        src_lens = sample["net_input"]["src_lengths"]
+        tgt_lens = sample["target_lengths"]
+
+        feat_out, eos_out, extra = model(
+            src_tokens=src_tokens,
+            src_lengths=src_lens,
+            prev_output_tokens=sample["net_input"]["prev_output_tokens"],
+            incremental_state=None,
+            target_lengths=tgt_lens,
+            speaker=sample["speaker"]
+        )
+
+        l1_loss, mse_loss, eos_loss = self.compute_loss(
+            extra["feature_out"], feat_out, eos_out, feat_tgt, eos_tgt,
+            tgt_lens, reduction,
+        )
+        attn_loss = torch.tensor(0.).type_as(l1_loss)
+        if self.guided_attn is not None:
+            attn_loss = self.guided_attn(extra['attn'], src_lens, tgt_lens, reduction)
+        ctc_loss = torch.tensor(0.).type_as(l1_loss)
+        if self.ctc_weight > 0.:
+            net_output = (feat_out, eos_out, extra)
+            lprobs = model.get_normalized_probs(net_output, log_probs=True)
+            lprobs = lprobs.transpose(0, 1)  # T x B x C
+            src_mask = lengths_to_mask(src_lens)
+            src_tokens_flat = src_tokens.masked_select(src_mask)
+            ctc_loss = F.ctc_loss(
+                lprobs, src_tokens_flat, tgt_lens, src_lens,
+                reduction=reduction, zero_infinity=True
+            ) * self.ctc_weight
+        loss = l1_loss + mse_loss + eos_loss + attn_loss + ctc_loss
+
+        sample_size = sample["nsentences"] if self.sentence_avg \
+            else sample["ntokens"]
+        logging_output = {
+            "loss": utils.item(loss.data),
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["nsentences"],
+            "sample_size": sample_size,
+            "l1_loss": utils.item(l1_loss.data),
+            "mse_loss": utils.item(mse_loss.data),
+            "eos_loss": utils.item(eos_loss.data),
+            "attn_loss": utils.item(attn_loss.data),
+            "ctc_loss": utils.item(ctc_loss.data),
+        }
+        return loss, sample_size, logging_output
+
+    def compute_loss(self, feat_out, feat_out_post, eos_out, feat_tgt,
+                     eos_tgt, tgt_lens, reduction="mean"):
+        mask = lengths_to_mask(tgt_lens)
+        _eos_out = eos_out[mask].squeeze()
+        _eos_tgt = eos_tgt[mask]
+        _feat_tgt = feat_tgt[mask]
+        _feat_out = feat_out[mask]
+        _feat_out_post = feat_out_post[mask]
+
+        l1_loss = (
+            F.l1_loss(_feat_out, _feat_tgt, reduction=reduction) +
+            F.l1_loss(_feat_out_post, _feat_tgt, reduction=reduction)
+        )
+        mse_loss = (
+            F.mse_loss(_feat_out, _feat_tgt, reduction=reduction) +
+            F.mse_loss(_feat_out_post, _feat_tgt, reduction=reduction)
+        )
+        eos_loss = F.binary_cross_entropy_with_logits(
+            _eos_out, _eos_tgt, pos_weight=torch.tensor(self.bce_pos_weight),
+            reduction=reduction
+        )
+        return l1_loss, mse_loss, eos_loss
+
+    @classmethod
+    def reduce_metrics(cls, logging_outputs: List[Dict[str, Any]]) -> None:
+        ns = [log.get("sample_size", 0) for log in logging_outputs]
+        ntot = sum(ns)
+        ws = [n / (ntot + 1e-8) for n in ns]
+        for key in ["loss", "l1_loss", "mse_loss", "eos_loss", "attn_loss", "ctc_loss"]:
+            vals = [log.get(key, 0) for log in logging_outputs]
+            val = sum(val * w for val, w in zip(vals, ws))
+            metrics.log_scalar(key, val, ntot, round=3)
+        metrics.log_scalar("sample_size", ntot, len(logging_outputs))
+
+        # inference metrics
+        if "targ_frames" not in logging_outputs[0]:
+            return
+        n = sum(log.get("targ_frames", 0) for log in logging_outputs)
+        for key, new_key in [
+                ("mcd_loss", "mcd_loss"),
+                ("pred_frames", "pred_ratio"),
+                ("nins", "ins_rate"),
+                ("ndel", "del_rate"),
+        ]:
+            val = sum(log.get(key, 0) for log in logging_outputs)
+            metrics.log_scalar(new_key, val / n, n, round=3)
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        return False
diff --git a/fairseq/fairseq/criterions/wav2vec_criterion.py b/fairseq/fairseq/criterions/wav2vec_criterion.py
new file mode 100644
index 0000000000000000000000000000000000000000..e04786cc3b75517cefd06303f98f8536f9279311
--- /dev/null
+++ b/fairseq/fairseq/criterions/wav2vec_criterion.py
@@ -0,0 +1,229 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+import torch
+import torch.nn.functional as F
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+from fairseq.dataclass import FairseqDataclass
+from fairseq.logging.meters import safe_round
+from fairseq.utils import is_xla_tensor
+
+
+@dataclass
+class Wav2VecCriterionConfig(FairseqDataclass):
+    infonce: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, uses cross entropy instead of binary cross entropy (i.e. InfoNCE loss)"
+        },
+    )
+    loss_weights: Optional[List[float]] = field(
+        default=None,
+        metadata={"help": "weights for additional loss terms (not first one)"},
+    )
+    log_keys: List[str] = field(
+        default_factory=lambda: [],
+        metadata={"help": "output keys to log"},
+    )
+
+@register_criterion("wav2vec", dataclass=Wav2VecCriterionConfig)
+class Wav2vecCriterion(FairseqCriterion):
+    def __init__(self, task, infonce=False, loss_weights=None, log_keys=None):
+        super().__init__(task)
+        self.infonce = infonce
+        self.loss_weights = loss_weights
+        self.log_keys = [] if log_keys is None else log_keys
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(**sample["net_input"])
+        logits = model.get_logits(net_output).float()
+        target = model.get_targets(sample, net_output)
+        self.xla = is_xla_tensor(logits)
+
+        # XXX: handle weights on xla.
+        weights = None
+        if hasattr(model, "get_target_weights") and not self.infonce:
+            weights = model.get_target_weights(target, net_output)
+            if torch.is_tensor(weights):
+                weights = weights.float()
+
+        losses = []
+
+        reduction = "none" if ((not reduce) or self.xla) else "sum"
+        if self.infonce:
+            loss = F.cross_entropy(logits, target, reduction=reduction)
+        else:
+            loss = F.binary_cross_entropy_with_logits(
+                logits, target.float(), weights, reduction=reduction
+            )
+
+        if self.xla:
+            # tpu-comment: since dynamic shapes lead to recompilations on xla,
+            # we don't shrink tensors using mask_indices.
+            # Instead, we use mask indices to adjust loss.
+            mi = (
+                sample['net_input']['mask_indices']
+                .transpose(0, 1)  # logits are transposed in `model.get_logits`
+                .reshape(logits.size(0))
+            )
+            loss = (loss * mi).sum() if reduce else (loss * mi)
+
+        if 'sample_size' in sample:
+            sample_size = sample['sample_size']
+        elif 'mask_indices' in sample['net_input']:
+            sample_size = sample['net_input']['mask_indices'].sum()
+        else:
+            sample_size = target.numel() if self.infonce else target.long().sum().item()
+        losses.append(loss.detach().clone())
+
+        if self.loss_weights is not None:
+            assert hasattr(model, "get_extra_losses")
+            extra_losses = model.get_extra_losses(net_output)
+            if torch.is_tensor(extra_losses):
+                extra_losses = [extra_losses]
+            if len(self.loss_weights) == 1 and len(extra_losses) != 1:
+                self.loss_weights = [self.loss_weights[0]] * len(extra_losses)
+            assert len(extra_losses) == len(
+                self.loss_weights
+            ), f"{len(extra_losses)}, {len(self.loss_weights)}"
+            for p, coef in zip(extra_losses, self.loss_weights):
+                if coef != 0 and p is not None:
+                    p = coef * p.float() * sample_size
+                    loss += p
+                    losses.append(p)
+
+        logging_output = {
+            "loss": loss.item() if (reduce and not self.xla) else loss.detach(),
+            "ntokens": sample_size,
+            "nsentences": sample["id"].numel(),
+            "sample_size": sample_size,
+        }
+
+        for lk in self.log_keys:
+            # Only store "logits" and "target" for computing MAP and MAUC
+            # during validation
+            if lk == "logits":
+                if not self.training:
+                    logging_output["logits"] = logits.cpu().numpy()
+            elif lk == "target":
+                if not self.training:
+                    # If the targets have been mixed with the predictions of
+                    # teacher models, find the original targets
+                    if hasattr(model, "get_original_targets"):
+                        original_target = model.get_original_targets(sample, net_output)
+                    else:
+                        original_target = target
+                    logging_output["target"] = original_target.cpu().numpy()
+            elif lk in net_output:
+                value = net_output[lk]
+                if not is_xla_tensor(value):
+                    value = float(value)
+                logging_output[lk] = value
+
+        if len(losses) > 1:
+            for i, l in enumerate(losses):
+                logging_output[f"loss_{i}"] = l.item() if not self.xla else l.detach()
+
+        if self.infonce:
+            with torch.no_grad():
+                if logits.numel() == 0:
+                    corr = 0
+                    count = 0
+                else:
+                    assert logits.dim() > 1, logits.shape
+                    max = logits.argmax(-1) == 0
+                    min = logits.argmin(-1) == 0
+                    if is_xla_tensor(logits):
+                        max, min = max * mi, min * mi
+                        both = max & min
+                        corr = max.long().sum() - both.long().sum()
+                        count = mi.sum()
+                    else:
+                        both = max & min
+                        corr = max.long().sum().item() - both.long().sum().item()
+                        count = float(max.numel())
+
+                logging_output["correct"] = corr
+                logging_output["count"] = count
+
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = utils.item(sum(log.get("loss", 0) for log in logging_outputs))
+        ntokens = utils.item(sum(log.get("ntokens", 0) for log in logging_outputs))
+        nsentences = utils.item(
+            sum(log.get("nsentences", 0) for log in logging_outputs)
+        )
+        sample_size = utils.item(
+            sum(log.get("sample_size", 0) for log in logging_outputs)
+        )
+
+        metrics.log_scalar(
+            "loss", loss_sum / (sample_size or 1) / math.log(2), sample_size, round=3
+        )
+        metrics.log_scalar("ntokens", ntokens)
+        metrics.log_scalar("nsentences", nsentences)
+
+        correct = sum(log.get("correct", 0) for log in logging_outputs)
+        metrics.log_scalar("_correct", correct)
+
+        total = sum(log.get("count", 0) for log in logging_outputs)
+        metrics.log_scalar("_total", total)
+
+        if total > 0:
+            metrics.log_derived(
+                "accuracy",
+                lambda meters: safe_round(
+                    meters["_correct"].sum / meters["_total"].sum, 5
+                )
+                if meters["_total"].sum > 0
+                else float("nan"),
+            )
+
+        builtin_keys = {
+            "loss",
+            "ntokens",
+            "nsentences",
+            "sample_size",
+            "correct",
+            "count",
+        }
+
+        for k in logging_outputs[0]:
+            if k not in builtin_keys:
+                val = sum(log.get(k, 0) for log in logging_outputs)
+                if k.startswith("loss"):
+                    metrics.log_scalar(
+                        k, val / (sample_size or 1) / math.log(2), sample_size, round=3
+                    )
+                else:
+                    metrics.log_scalar(k, val / len(logging_outputs), round=3)
+
+    # FIXME: revert when gather based xla reduction is implemented
+    #@staticmethod
+    #def logging_outputs_can_be_summed() -> bool:
+    def logging_outputs_can_be_summed(self) -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        # XXX: Gather based reduction not implemented for xla yet.
+        # So we fall to sum based reduction for xla.
+        return self.xla
diff --git a/fairseq/fairseq/data/__init__.py b/fairseq/fairseq/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b7eb2ec4fc5190c4dcdfe34b0259e6f448e18a9
--- /dev/null
+++ b/fairseq/fairseq/data/__init__.py
@@ -0,0 +1,128 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+from .dictionary import Dictionary, TruncatedDictionary
+
+from .fairseq_dataset import FairseqDataset, FairseqIterableDataset
+
+from .base_wrapper_dataset import BaseWrapperDataset
+
+from .add_target_dataset import AddTargetDataset
+from .append_token_dataset import AppendTokenDataset
+from .audio.raw_audio_dataset import BinarizedAudioDataset, FileAudioDataset
+from .audio.hubert_dataset import HubertDataset
+from .backtranslation_dataset import BacktranslationDataset
+from .bucket_pad_length_dataset import BucketPadLengthDataset
+from .colorize_dataset import ColorizeDataset
+from .concat_dataset import ConcatDataset
+from .concat_sentences_dataset import ConcatSentencesDataset
+from .denoising_dataset import DenoisingDataset
+from .id_dataset import IdDataset
+from .indexed_dataset import (
+    IndexedCachedDataset,
+    IndexedDataset,
+    IndexedRawTextDataset,
+    MMapIndexedDataset,
+)
+from .language_pair_dataset import LanguagePairDataset
+from .list_dataset import ListDataset
+from .lm_context_window_dataset import LMContextWindowDataset
+from .lru_cache_dataset import LRUCacheDataset
+from .mask_tokens_dataset import MaskTokensDataset
+from .monolingual_dataset import MonolingualDataset
+from .multi_corpus_sampled_dataset import MultiCorpusSampledDataset
+from .nested_dictionary_dataset import NestedDictionaryDataset
+from .noising import NoisingDataset
+from .numel_dataset import NumelDataset
+from .num_samples_dataset import NumSamplesDataset
+from .offset_tokens_dataset import OffsetTokensDataset
+from .pad_dataset import LeftPadDataset, PadDataset, RightPadDataset
+from .prepend_dataset import PrependDataset
+from .prepend_token_dataset import PrependTokenDataset
+from .raw_label_dataset import RawLabelDataset
+from .replace_dataset import ReplaceDataset
+from .resampling_dataset import ResamplingDataset
+from .roll_dataset import RollDataset
+from .round_robin_zip_datasets import RoundRobinZipDatasets
+from .sort_dataset import SortDataset
+from .strip_token_dataset import StripTokenDataset
+from .subsample_dataset import SubsampleDataset
+from .token_block_dataset import TokenBlockDataset
+from .transform_eos_dataset import TransformEosDataset
+from .transform_eos_lang_pair_dataset import TransformEosLangPairDataset
+from .shorten_dataset import TruncateDataset, RandomCropDataset
+from .multilingual.sampled_multi_dataset import SampledMultiDataset
+from .multilingual.sampled_multi_epoch_dataset import SampledMultiEpochDataset
+from .fasta_dataset import FastaDataset, EncodedFastaDataset
+
+from .iterators import (
+    CountingIterator,
+    EpochBatchIterator,
+    GroupedIterator,
+    ShardedIterator,
+)
+
+__all__ = [
+    "AddTargetDataset",
+    "AppendTokenDataset",
+    "BacktranslationDataset",
+    "BaseWrapperDataset",
+    "BinarizedAudioDataset",
+    "BucketPadLengthDataset",
+    "ColorizeDataset",
+    "ConcatDataset",
+    "ConcatSentencesDataset",
+    "CountingIterator",
+    "DenoisingDataset",
+    "Dictionary",
+    "EncodedFastaDataset",
+    "EpochBatchIterator",
+    "FairseqDataset",
+    "FairseqIterableDataset",
+    "FastaDataset",
+    "FileAudioDataset",
+    "GroupedIterator",
+    "HubertDataset",
+    "IdDataset",
+    "IndexedCachedDataset",
+    "IndexedDataset",
+    "IndexedRawTextDataset",
+    "LanguagePairDataset",
+    "LeftPadDataset",
+    "ListDataset",
+    "LMContextWindowDataset",
+    "LRUCacheDataset",
+    "MaskTokensDataset",
+    "MMapIndexedDataset",
+    "MonolingualDataset",
+    "MultiCorpusSampledDataset",
+    "NestedDictionaryDataset",
+    "NoisingDataset",
+    "NumelDataset",
+    "NumSamplesDataset",
+    "OffsetTokensDataset",
+    "PadDataset",
+    "PrependDataset",
+    "PrependTokenDataset",
+    "RandomCropDataset",
+    "RawLabelDataset",
+    "ResamplingDataset",
+    "ReplaceDataset",
+    "RightPadDataset",
+    "RollDataset",
+    "RoundRobinZipDatasets",
+    "SampledMultiDataset",
+    "SampledMultiEpochDataset",
+    "ShardedIterator",
+    "SortDataset",
+    "StripTokenDataset",
+    "SubsampleDataset",
+    "TokenBlockDataset",
+    "TransformEosDataset",
+    "TransformEosLangPairDataset",
+    "TruncateDataset",
+    "TruncatedDictionary",
+]
diff --git a/fairseq/fairseq/data/add_target_dataset.py b/fairseq/fairseq/data/add_target_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8a08e746dedb8a5d9d9e4b9ad149e0da469d644
--- /dev/null
+++ b/fairseq/fairseq/data/add_target_dataset.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import BaseWrapperDataset, data_utils
+from fairseq.data.text_compressor import TextCompressor, TextCompressionLevel
+
+
+class AddTargetDataset(BaseWrapperDataset):
+    def __init__(
+        self,
+        dataset,
+        labels,
+        pad,
+        eos,
+        batch_targets,
+        process_label=None,
+        label_len_fn=None,
+        add_to_input=False,
+        text_compression_level=TextCompressionLevel.none
+    ):
+        super().__init__(dataset)
+        self.labels = labels
+        self.batch_targets = batch_targets
+        self.pad = pad
+        self.eos = eos
+        self.process_label = process_label
+        self.label_len_fn = label_len_fn
+        self.add_to_input = add_to_input
+        self.text_compressor = TextCompressor(level=text_compression_level)
+
+    def get_label(self, index, process_fn=None):
+        lbl = self.labels[index]
+        lbl = self.text_compressor.decompress(lbl)
+        return lbl if process_fn is None else process_fn(lbl)
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        item["label"] = self.get_label(index, process_fn=self.process_label)
+        return item
+
+    def size(self, index):
+        sz = self.dataset.size(index)
+        own_sz = self.label_len_fn(self.get_label(index))
+        return sz, own_sz
+
+    def collater(self, samples):
+        collated = self.dataset.collater(samples)
+        if len(collated) == 0:
+            return collated
+        indices = set(collated["id"].tolist())
+        target = [s["label"] for s in samples if s["id"] in indices]
+
+        if self.batch_targets:
+            collated["target_lengths"] = torch.LongTensor([len(t) for t in target])
+            target = data_utils.collate_tokens(target, pad_idx=self.pad, left_pad=False)
+            collated["ntokens"] = collated["target_lengths"].sum().item()
+        else:
+            collated["ntokens"] = sum([len(t) for t in target])
+
+        collated["target"] = target
+
+        if self.add_to_input:
+            eos = target.new_full((target.size(0), 1), self.eos)
+            collated["target"] = torch.cat([target, eos], dim=-1).long()
+            collated["net_input"]["prev_output_tokens"] = torch.cat(
+                [eos, target], dim=-1
+            ).long()
+            collated["ntokens"] += target.size(0)
+        return collated
+
+    def filter_indices_by_size(self, indices, max_sizes):
+        indices, ignored = data_utils._filter_by_size_dynamic(
+            indices, self.size, max_sizes
+        )
+        return indices, ignored
diff --git a/fairseq/fairseq/data/append_token_dataset.py b/fairseq/fairseq/data/append_token_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..87695bd0f5fcb6b10247e3b743340623e6438cc1
--- /dev/null
+++ b/fairseq/fairseq/data/append_token_dataset.py
@@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+
+from . import BaseWrapperDataset
+
+
+class AppendTokenDataset(BaseWrapperDataset):
+    def __init__(self, dataset, token=None):
+        super().__init__(dataset)
+        self.token = token
+        if token is not None:
+            self._sizes = np.array(dataset.sizes) + 1
+        else:
+            self._sizes = dataset.sizes
+
+    def __getitem__(self, idx):
+        item = self.dataset[idx]
+        if self.token is not None:
+            item = torch.cat([item, item.new([self.token])])
+        return item
+
+    @property
+    def sizes(self):
+        return self._sizes
+
+    def num_tokens(self, index):
+        n = self.dataset.num_tokens(index)
+        if self.token is not None:
+            n += 1
+        return n
+
+    def size(self, index):
+        n = self.dataset.size(index)
+        if self.token is not None:
+            n += 1
+        return n
diff --git a/fairseq/fairseq/data/audio/__init__.py b/fairseq/fairseq/data/audio/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/fairseq/data/audio/audio_utils.py b/fairseq/fairseq/data/audio/audio_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9444cb8d005fe537b2968d9ed0d92273c46b8f6
--- /dev/null
+++ b/fairseq/fairseq/data/audio/audio_utils.py
@@ -0,0 +1,280 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from pathlib import Path
+from typing import BinaryIO, Optional, Tuple, Union, List
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+SF_AUDIO_FILE_EXTENSIONS = {".wav", ".flac", ".ogg"}
+FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS = {".npy", ".wav", ".flac", ".ogg"}
+
+
+def convert_waveform(
+        waveform: Union[np.ndarray, torch.Tensor], sample_rate: int,
+        normalize_volume: bool = False, to_mono: bool = False,
+        to_sample_rate: Optional[int] = None
+) -> Tuple[Union[np.ndarray, torch.Tensor], int]:
+    """convert a waveform:
+        - to a target sample rate
+        - from multi-channel to mono channel
+        - volume normalization
+
+        Args:
+            waveform (numpy.ndarray or torch.Tensor): 2D original waveform
+                (channels x length)
+            sample_rate (int): original sample rate
+            normalize_volume (bool): perform volume normalization
+            to_mono (bool): convert to mono channel if having multiple channels
+            to_sample_rate (Optional[int]): target sample rate
+        Returns:
+            waveform (numpy.ndarray): converted 2D waveform (channels x length)
+            sample_rate (float): target sample rate
+        """
+    try:
+        import torchaudio.sox_effects as ta_sox
+    except ImportError:
+        raise ImportError("Please install torchaudio: pip install torchaudio")
+
+    effects = []
+    if normalize_volume:
+        effects.append(["gain", "-n"])
+    if to_sample_rate is not None and to_sample_rate != sample_rate:
+        effects.append(["rate", f"{to_sample_rate}"])
+    if to_mono and waveform.shape[0] > 1:
+        effects.append(["channels", "1"])
+    if len(effects) > 0:
+        is_np_input = isinstance(waveform, np.ndarray)
+        _waveform = torch.from_numpy(waveform) if is_np_input else waveform
+        converted, converted_sample_rate = ta_sox.apply_effects_tensor(
+            _waveform, sample_rate, effects
+        )
+        if is_np_input:
+            converted = converted.numpy()
+        return converted, converted_sample_rate
+    return waveform, sample_rate
+
+
+def get_waveform(
+        path_or_fp: Union[str, BinaryIO], normalization: bool = True,
+        mono: bool = True, frames: int = -1, start: int = 0,
+        always_2d: bool = True, output_sample_rate: Optional[int] = None,
+        normalize_volume: bool = False
+) -> Tuple[np.ndarray, int]:
+    """Get the waveform and sample rate of a 16-bit WAV/FLAC/OGG Vorbis audio.
+
+    Args:
+        path_or_fp (str or BinaryIO): the path or file-like object
+        normalization (bool): normalize values to [-1, 1] (Default: True)
+        mono (bool): convert multi-channel audio to mono-channel one
+        frames (int): the number of frames to read. (-1 for reading all)
+        start (int): Where to start reading. A negative value counts from the end.
+        always_2d (bool): always return 2D array even for mono-channel audios
+        output_sample_rate (Optional[int]): output sample rate
+        normalize_volume (bool): normalize volume
+    Returns:
+        waveform (numpy.ndarray): 1D or 2D waveform (channels x length)
+        sample_rate (float): sample rate
+    """
+    if isinstance(path_or_fp, str):
+        ext = Path(path_or_fp).suffix
+        if ext not in SF_AUDIO_FILE_EXTENSIONS:
+            raise ValueError(f"Unsupported audio format: {ext}")
+
+    try:
+        import soundfile as sf
+    except ImportError:
+        raise ImportError("Please install soundfile: pip install soundfile")
+
+    waveform, sample_rate = sf.read(
+        path_or_fp, dtype="float32", always_2d=True, frames=frames, start=start
+    )
+    waveform = waveform.T  # T x C -> C x T
+    waveform, sample_rate = convert_waveform(
+        waveform, sample_rate, normalize_volume=normalize_volume, to_mono=mono,
+        to_sample_rate=output_sample_rate
+    )
+
+    if not normalization:
+        waveform *= 2 ** 15  # denormalized to 16-bit signed integers
+    if not always_2d:
+        waveform = waveform.squeeze(axis=0)
+    return waveform, sample_rate
+
+
+def _get_kaldi_fbank(
+    waveform: np.ndarray, sample_rate: int, n_bins=80
+) -> Optional[np.ndarray]:
+    """Get mel-filter bank features via PyKaldi."""
+    try:
+        from kaldi.feat.fbank import FbankOptions, Fbank
+        from kaldi.feat.mel import MelBanksOptions
+        from kaldi.feat.window import FrameExtractionOptions
+        from kaldi.matrix import Vector
+
+        mel_opts = MelBanksOptions()
+        mel_opts.num_bins = n_bins
+        frame_opts = FrameExtractionOptions()
+        frame_opts.samp_freq = sample_rate
+        opts = FbankOptions()
+        opts.mel_opts = mel_opts
+        opts.frame_opts = frame_opts
+        fbank = Fbank(opts=opts)
+        features = fbank.compute(Vector(waveform.squeeze()), 1.0).numpy()
+        return features
+    except ImportError:
+        return None
+
+
+def _get_torchaudio_fbank(
+    waveform: np.ndarray, sample_rate, n_bins=80
+) -> Optional[np.ndarray]:
+    """Get mel-filter bank features via TorchAudio."""
+    try:
+        import torchaudio.compliance.kaldi as ta_kaldi
+
+        waveform = torch.from_numpy(waveform)
+        features = ta_kaldi.fbank(
+            waveform, num_mel_bins=n_bins, sample_frequency=sample_rate
+        )
+        return features.numpy()
+    except ImportError:
+        return None
+
+
+def get_fbank(path_or_fp: Union[str, BinaryIO], n_bins=80) -> np.ndarray:
+    """Get mel-filter bank features via PyKaldi or TorchAudio. Prefer PyKaldi
+    (faster CPP implementation) to TorchAudio (Python implementation). Note that
+    Kaldi/TorchAudio requires 16-bit signed integers as inputs and hence the
+    waveform should not be normalized."""
+    waveform, sample_rate = get_waveform(path_or_fp, normalization=False)
+
+    features = _get_kaldi_fbank(waveform, sample_rate, n_bins)
+    if features is None:
+        features = _get_torchaudio_fbank(waveform, sample_rate, n_bins)
+    if features is None:
+        raise ImportError(
+            "Please install pyKaldi or torchaudio to enable "
+            "online filterbank feature extraction"
+        )
+
+    return features
+
+
+def is_npy_data(data: bytes) -> bool:
+    return data[0] == 147 and data[1] == 78
+
+
+def is_sf_audio_data(data: bytes) -> bool:
+    is_wav = data[0] == 82 and data[1] == 73 and data[2] == 70
+    is_flac = data[0] == 102 and data[1] == 76 and data[2] == 97
+    is_ogg = data[0] == 79 and data[1] == 103 and data[2] == 103
+    return is_wav or is_flac or is_ogg
+
+
+def read_from_stored_zip(zip_path: str, offset: int, file_size: int) -> bytes:
+    with open(zip_path, "rb") as f:
+        f.seek(offset)
+        data = f.read(file_size)
+    return data
+
+
+def parse_path(path: str) -> Tuple[str, List[int]]:
+    """Parse data path which is either a path to
+    1. a .npy/.wav/.flac/.ogg file
+    2. a stored ZIP file with slicing info: "[zip_path]:[offset]:[length]"
+
+      Args:
+          path (str): the data path to parse
+
+      Returns:
+          file_path (str): the file path
+          slice_ptr (list of int): empty in case 1;
+            byte offset and length for the slice in case 2
+    """
+
+    if Path(path).suffix in FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS:
+        _path, slice_ptr = path, []
+    else:
+        _path, *slice_ptr = path.split(":")
+        if not Path(_path).is_file():
+            raise FileNotFoundError(f"File not found: {_path}")
+    assert len(slice_ptr) in {0, 2}, f"Invalid path: {path}"
+    slice_ptr = [int(i) for i in slice_ptr]
+    return _path, slice_ptr
+
+
+def get_window(
+        window_fn: callable, n_fft: int, win_length: int
+) -> torch.Tensor:
+    padding = n_fft - win_length
+    assert padding >= 0
+    return F.pad(window_fn(win_length), (padding // 2, padding - padding // 2))
+
+
+def get_fourier_basis(n_fft: int) -> torch.Tensor:
+    basis = np.fft.fft(np.eye(n_fft))
+    basis = np.vstack(
+        [np.real(basis[:n_fft // 2 + 1, :]), np.imag(basis[:n_fft // 2 + 1, :])]
+    )
+    return torch.from_numpy(basis).float()
+
+
+def get_mel_filters(
+        sample_rate: int, n_fft: int, n_mels: int, f_min: float, f_max: float
+) -> torch.Tensor:
+    try:
+        import librosa
+    except ImportError:
+        raise ImportError("Please install librosa: pip install librosa")
+    basis = librosa.filters.mel(sample_rate, n_fft, n_mels, f_min, f_max)
+    return torch.from_numpy(basis).float()
+
+
+class TTSSpectrogram(torch.nn.Module):
+    def __init__(
+            self, n_fft: int, win_length: int, hop_length: int,
+            window_fn: callable = torch.hann_window, return_phase: bool = False
+    ) -> None:
+        super(TTSSpectrogram, self).__init__()
+        self.n_fft = n_fft
+        self.hop_length = hop_length
+        self.return_phase = return_phase
+
+        basis = get_fourier_basis(n_fft).unsqueeze(1)
+        basis *= get_window(window_fn, n_fft, win_length)
+        self.register_buffer('basis', basis)
+
+    def forward(
+            self, waveform: torch.Tensor
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        padding = (self.n_fft // 2, self.n_fft // 2)
+        x = F.pad(waveform.unsqueeze(1), padding, mode='reflect')
+        x = F.conv1d(x, self.basis, stride=self.hop_length)
+        real_part = x[:, :self.n_fft // 2 + 1, :]
+        imag_part = x[:, self.n_fft // 2 + 1:, :]
+        magnitude = torch.sqrt(real_part ** 2 + imag_part ** 2)
+        if self.return_phase:
+            phase = torch.atan2(imag_part, real_part)
+            return magnitude, phase
+        return magnitude
+
+
+class TTSMelScale(torch.nn.Module):
+    def __init__(
+            self, n_mels: int, sample_rate: int, f_min: float, f_max: float,
+            n_stft: int
+    ) -> None:
+        super(TTSMelScale, self).__init__()
+        basis = get_mel_filters(sample_rate, (n_stft - 1) * 2, n_mels, f_min,
+                                f_max)
+        self.register_buffer('basis', basis)
+
+    def forward(self, specgram: torch.Tensor) -> torch.Tensor:
+        return torch.matmul(self.basis, specgram)
diff --git a/fairseq/fairseq/data/audio/data_cfg.py b/fairseq/fairseq/data/audio/data_cfg.py
new file mode 100644
index 0000000000000000000000000000000000000000..a63bccb59c598d60578c37fa184df8bac987ccae
--- /dev/null
+++ b/fairseq/fairseq/data/audio/data_cfg.py
@@ -0,0 +1,139 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from pathlib import Path
+from typing import Dict, Optional
+
+
+class S2TDataConfig(object):
+    """Wrapper class for data config YAML"""
+
+    def __init__(self, yaml_path: Path):
+        try:
+            import yaml
+        except ImportError:
+            print("Please install PyYAML: pip install PyYAML")
+        self.config = {}
+        if yaml_path.is_file():
+            try:
+                with open(yaml_path) as f:
+                    self.config = yaml.safe_load(f, Loader=yaml.FullLoader)
+            except Exception as e:
+                raise Exception(
+                    f"Failed to load config from {yaml_path.as_posix()}: {e}"
+                )
+        else:
+            raise FileNotFoundError(f"{yaml_path.as_posix()} not found")
+        self.root = yaml_path.parent
+
+    def _auto_convert_to_abs_path(self, x):
+        if isinstance(x, str):
+            if not Path(x).exists() and (self.root / x).exists():
+                return (self.root / x).as_posix()
+        elif isinstance(x, dict):
+            return {k: self._auto_convert_to_abs_path(v) for k, v in x.items()}
+        return x
+
+    @property
+    def vocab_filename(self):
+        """fairseq vocabulary file under data root"""
+        return self.config.get("vocab_filename", "dict.txt")
+
+    @property
+    def speaker_set_filename(self):
+        """fairseq vocabulary file under data root"""
+        return self.config.get("speaker_set_filename", None)
+
+    @property
+    def shuffle(self) -> bool:
+        """Shuffle dataset samples before batching"""
+        return self.config.get("shuffle", False)
+
+    @property
+    def pre_tokenizer(self) -> Dict:
+        """Pre-tokenizer to apply before subword tokenization. Returning
+        a dictionary with `tokenizer` providing the tokenizer name and
+        the other items providing the tokenizer-specific arguments.
+        Tokenizers are defined in `fairseq.data.encoders.*`"""
+        tokenizer = self.config.get("pre_tokenizer", {"tokenizer": None})
+        return self._auto_convert_to_abs_path(tokenizer)
+
+    @property
+    def bpe_tokenizer(self) -> Dict:
+        """Subword tokenizer to apply after pre-tokenization. Returning
+        a dictionary with `bpe` providing the tokenizer name and
+        the other items providing the tokenizer-specific arguments.
+        Tokenizers are defined in `fairseq.data.encoders.*`"""
+        tokenizer = self.config.get("bpe_tokenizer", {"bpe": None})
+        return self._auto_convert_to_abs_path(tokenizer)
+
+    @property
+    def prepend_tgt_lang_tag(self) -> bool:
+        """Prepend target lang ID token as the target BOS (e.g. for to-many
+        multilingual setting). During inference, this requires `--prefix-size 1`
+        to force BOS to be lang ID token."""
+        return self.config.get("prepend_tgt_lang_tag", False)
+
+    @property
+    def input_feat_per_channel(self):
+        """The dimension of input features (per audio channel)"""
+        return self.config.get("input_feat_per_channel", 80)
+
+    @property
+    def input_channels(self):
+        """The number of channels in the input audio"""
+        return self.config.get("input_channels", 1)
+
+    @property
+    def sample_rate(self):
+        return self.config.get("sample_rate", 16_000)
+
+    @property
+    def sampling_alpha(self):
+        """Hyper-parameter alpha = 1/T for temperature-based resampling.
+        (alpha = 1 for no resampling)"""
+        return self.config.get("sampling_alpha", 1.0)
+
+    @property
+    def use_audio_input(self):
+        """Needed by the dataset loader to see if the model requires
+        raw audio as inputs."""
+        return self.config.get("use_audio_input", False)
+
+    @property
+    def use_sample_rate(self):
+        """Needed by the dataset loader to see if the model requires
+        raw audio with specific sample rate as inputs."""
+        return self.config.get("use_sample_rate", 16000)
+
+    @property
+    def audio_root(self):
+        """Audio paths in the manifest TSV can be relative and this provides
+        the root path. Set this to empty string when using absolute paths."""
+        return self.config.get("audio_root", "")
+
+    def get_feature_transforms(self, split, is_train):
+        """Split-specific feature transforms. Allowing train set
+        wildcard `_train`, evaluation set wildcard `_eval` and general
+        wildcard `*` for matching."""
+        from copy import deepcopy
+
+        cfg = deepcopy(self.config)
+        _cur = cfg.get("transforms", {})
+        cur = _cur.get(split)
+        cur = _cur.get("_train") if cur is None and is_train else cur
+        cur = _cur.get("_eval") if cur is None and not is_train else cur
+        cur = _cur.get("*") if cur is None else cur
+        cfg["transforms"] = cur
+        return cfg
+
+    @property
+    def global_cmvn_stats_npz(self) -> Optional[str]:
+        path = self.config.get("global_cmvn", {}).get("stats_npz_path", None)
+        return self._auto_convert_to_abs_path(path)
+
+    @property
+    def vocoder(self) -> Optional[Dict[str, str]]:
+        return self.config.get("vocoder", None)
diff --git a/fairseq/fairseq/data/audio/feature_transforms/__init__.py b/fairseq/fairseq/data/audio/feature_transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..359fa069716cba0dd615ce0959368b20828c31f7
--- /dev/null
+++ b/fairseq/fairseq/data/audio/feature_transforms/__init__.py
@@ -0,0 +1,82 @@
+import importlib
+import os
+from abc import ABC, abstractmethod
+from typing import Dict, Optional
+
+
+class AudioFeatureTransform(ABC):
+    @classmethod
+    @abstractmethod
+    def from_config_dict(cls, config: Optional[Dict] = None):
+        pass
+
+
+AUDIO_FEATURE_TRANSFORM_REGISTRY = {}
+AUDIO_FEATURE_TRANSFORM_CLASS_NAMES = set()
+
+
+def register_audio_feature_transform(name):
+    def register_audio_feature_transform_cls(cls):
+        if name in AUDIO_FEATURE_TRANSFORM_REGISTRY:
+            raise ValueError(f"Cannot register duplicate transform ({name})")
+        if not issubclass(cls, AudioFeatureTransform):
+            raise ValueError(
+                f"Transform ({name}: {cls.__name__}) must extend "
+                "AudioFeatureTransform"
+            )
+        if cls.__name__ in AUDIO_FEATURE_TRANSFORM_CLASS_NAMES:
+            raise ValueError(
+                f"Cannot register audio feature transform with duplicate "
+                f"class name ({cls.__name__})"
+            )
+        AUDIO_FEATURE_TRANSFORM_REGISTRY[name] = cls
+        AUDIO_FEATURE_TRANSFORM_CLASS_NAMES.add(cls.__name__)
+        return cls
+
+    return register_audio_feature_transform_cls
+
+
+def get_audio_feature_transform(name):
+    return AUDIO_FEATURE_TRANSFORM_REGISTRY[name]
+
+
+transforms_dir = os.path.dirname(__file__)
+for file in os.listdir(transforms_dir):
+    path = os.path.join(transforms_dir, file)
+    if (
+        not file.startswith("_")
+        and not file.startswith(".")
+        and (file.endswith(".py") or os.path.isdir(path))
+    ):
+        name = file[: file.find(".py")] if file.endswith(".py") else file
+        importlib.import_module("fairseq.data.audio.feature_transforms." + name)
+
+
+class CompositeAudioFeatureTransform(AudioFeatureTransform):
+    @classmethod
+    def from_config_dict(cls, config=None):
+        _config = {} if config is None else config
+        _transforms = _config.get("transforms")
+        if _transforms is None:
+            return None
+        transforms = [
+            get_audio_feature_transform(_t).from_config_dict(_config.get(_t))
+            for _t in _transforms
+        ]
+        return CompositeAudioFeatureTransform(transforms)
+
+    def __init__(self, transforms):
+        self.transforms = [t for t in transforms if t is not None]
+
+    def __call__(self, x):
+        for t in self.transforms:
+            x = t(x)
+        return x
+
+    def __repr__(self):
+        format_string = (
+            [self.__class__.__name__ + "("]
+            + [f"    {t.__repr__()}" for t in self.transforms]
+            + [")"]
+        )
+        return "\n".join(format_string)
diff --git a/fairseq/fairseq/data/audio/feature_transforms/global_cmvn.py b/fairseq/fairseq/data/audio/feature_transforms/global_cmvn.py
new file mode 100644
index 0000000000000000000000000000000000000000..e457ff176fee3b996da11f47e7dc61b81c445ba3
--- /dev/null
+++ b/fairseq/fairseq/data/audio/feature_transforms/global_cmvn.py
@@ -0,0 +1,29 @@
+import numpy as np
+from fairseq.data.audio.feature_transforms import (
+    AudioFeatureTransform,
+    register_audio_feature_transform,
+)
+
+
+@register_audio_feature_transform("global_cmvn")
+class GlobalCMVN(AudioFeatureTransform):
+    """Global CMVN (cepstral mean and variance normalization). The global mean
+    and variance need to be pre-computed and stored in NumPy format (.npz)."""
+
+    @classmethod
+    def from_config_dict(cls, config=None):
+        _config = {} if config is None else config
+        return GlobalCMVN(_config.get("stats_npz_path"))
+
+    def __init__(self, stats_npz_path):
+        self.stats_npz_path = stats_npz_path
+        stats = np.load(stats_npz_path)
+        self.mean, self.std = stats["mean"], stats["std"]
+
+    def __repr__(self):
+        return self.__class__.__name__ + f'(stats_npz_path="{self.stats_npz_path}")'
+
+    def __call__(self, x):
+        x = np.subtract(x, self.mean)
+        x = np.divide(x, self.std)
+        return x
diff --git a/fairseq/fairseq/data/audio/feature_transforms/specaugment.py b/fairseq/fairseq/data/audio/feature_transforms/specaugment.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce5802b41a903ea8f3e3e8a169d5048b4e908f99
--- /dev/null
+++ b/fairseq/fairseq/data/audio/feature_transforms/specaugment.py
@@ -0,0 +1,131 @@
+import math
+import numbers
+from typing import Optional
+
+import numpy as np
+from fairseq.data.audio.feature_transforms import (
+    AudioFeatureTransform,
+    register_audio_feature_transform,
+)
+
+
+@register_audio_feature_transform("specaugment")
+class SpecAugmentTransform(AudioFeatureTransform):
+    """SpecAugment (https://arxiv.org/abs/1904.08779)"""
+
+    @classmethod
+    def from_config_dict(cls, config=None):
+        _config = {} if config is None else config
+        return SpecAugmentTransform(
+            _config.get("time_warp_W", 0),
+            _config.get("freq_mask_N", 0),
+            _config.get("freq_mask_F", 0),
+            _config.get("time_mask_N", 0),
+            _config.get("time_mask_T", 0),
+            _config.get("time_mask_p", 0.0),
+            _config.get("mask_value", None),
+        )
+
+    def __init__(
+        self,
+        time_warp_w: int = 0,
+        freq_mask_n: int = 0,
+        freq_mask_f: int = 0,
+        time_mask_n: int = 0,
+        time_mask_t: int = 0,
+        time_mask_p: float = 0.0,
+        mask_value: Optional[float] = 0.0,
+    ):
+        # Sanity checks
+        assert mask_value is None or isinstance(
+            mask_value, numbers.Number
+        ), f"mask_value (type: {type(mask_value)}) must be None or a number"
+        if freq_mask_n > 0:
+            assert freq_mask_f > 0, (
+                f"freq_mask_F ({freq_mask_f}) "
+                f"must be larger than 0 when doing freq masking."
+            )
+        if time_mask_n > 0:
+            assert time_mask_t > 0, (
+                f"time_mask_T ({time_mask_t}) must be larger than 0 when "
+                f"doing time masking."
+            )
+
+        self.time_warp_w = time_warp_w
+        self.freq_mask_n = freq_mask_n
+        self.freq_mask_f = freq_mask_f
+        self.time_mask_n = time_mask_n
+        self.time_mask_t = time_mask_t
+        self.time_mask_p = time_mask_p
+        self.mask_value = mask_value
+
+    def __repr__(self):
+        return (
+            self.__class__.__name__
+            + "("
+            + ", ".join(
+                [
+                    f"time_warp_w={self.time_warp_w}",
+                    f"freq_mask_n={self.freq_mask_n}",
+                    f"freq_mask_f={self.freq_mask_f}",
+                    f"time_mask_n={self.time_mask_n}",
+                    f"time_mask_t={self.time_mask_t}",
+                    f"time_mask_p={self.time_mask_p}",
+                ]
+            )
+            + ")"
+        )
+
+    def __call__(self, spectrogram):
+        assert len(spectrogram.shape) == 2, "spectrogram must be a 2-D tensor."
+
+        distorted = spectrogram.copy()  # make a copy of input spectrogram.
+        num_frames = spectrogram.shape[0]  # or 'tau' in the paper.
+        num_freqs = spectrogram.shape[1]  # or 'miu' in the paper.
+        mask_value = self.mask_value
+
+        if mask_value is None:  # if no value was specified, use local mean.
+            mask_value = spectrogram.mean()
+
+        if num_frames == 0:
+            return spectrogram
+
+        if num_freqs < self.freq_mask_f:
+            return spectrogram
+
+        if self.time_warp_w > 0:
+            if 2 * self.time_warp_w < num_frames:
+                import cv2
+
+                w0 = np.random.randint(self.time_warp_w, num_frames - self.time_warp_w)
+                w = np.random.randint(-self.time_warp_w + 1, self.time_warp_w)
+                upper, lower = distorted[:w0, :], distorted[w0:, :]
+                upper = cv2.resize(
+                    upper, dsize=(num_freqs, w0 + w), interpolation=cv2.INTER_LINEAR
+                )
+                lower = cv2.resize(
+                    lower,
+                    dsize=(num_freqs, num_frames - w0 - w),
+                    interpolation=cv2.INTER_LINEAR,
+                )
+                distorted = np.concatenate((upper, lower), axis=0)
+
+        for _i in range(self.freq_mask_n):
+            f = np.random.randint(0, self.freq_mask_f)
+            f0 = np.random.randint(0, num_freqs - f)
+            if f != 0:
+                distorted[:, f0 : f0 + f] = mask_value
+
+        max_time_mask_t = min(
+            self.time_mask_t, math.floor(num_frames * self.time_mask_p)
+        )
+        if max_time_mask_t < 1:
+            return distorted
+
+        for _i in range(self.time_mask_n):
+            t = np.random.randint(0, max_time_mask_t)
+            t0 = np.random.randint(0, num_frames - t)
+            if t != 0:
+                distorted[t0 : t0 + t, :] = mask_value
+
+        return distorted
diff --git a/fairseq/fairseq/data/audio/feature_transforms/utterance_cmvn.py b/fairseq/fairseq/data/audio/feature_transforms/utterance_cmvn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bbd0ae821b42ab693f4141e7c161d6d7cb0b15a
--- /dev/null
+++ b/fairseq/fairseq/data/audio/feature_transforms/utterance_cmvn.py
@@ -0,0 +1,40 @@
+import numpy as np
+from fairseq.data.audio.feature_transforms import (
+    AudioFeatureTransform,
+    register_audio_feature_transform,
+)
+
+
+@register_audio_feature_transform("utterance_cmvn")
+class UtteranceCMVN(AudioFeatureTransform):
+    """Utterance-level CMVN (cepstral mean and variance normalization)"""
+
+    @classmethod
+    def from_config_dict(cls, config=None):
+        _config = {} if config is None else config
+        return UtteranceCMVN(
+            _config.get("norm_means", True),
+            _config.get("norm_vars", True),
+        )
+
+    def __init__(self, norm_means=True, norm_vars=True):
+        self.norm_means, self.norm_vars = norm_means, norm_vars
+
+    def __repr__(self):
+        return (
+            self.__class__.__name__
+            + f"(norm_means={self.norm_means}, norm_vars={self.norm_vars})"
+        )
+
+    def __call__(self, x):
+        mean = x.mean(axis=0)
+        square_sums = (x ** 2).sum(axis=0)
+
+        if self.norm_means:
+            x = np.subtract(x, mean)
+        if self.norm_vars:
+            var = square_sums / x.shape[0] - mean ** 2
+            std = np.sqrt(np.maximum(var, 1e-10))
+            x = np.divide(x, std)
+
+        return x
diff --git a/fairseq/fairseq/data/audio/frm_text_to_speech_dataset.py b/fairseq/fairseq/data/audio/frm_text_to_speech_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..125b1fc0c0a67190e6d9ba4866664cbc9006a142
--- /dev/null
+++ b/fairseq/fairseq/data/audio/frm_text_to_speech_dataset.py
@@ -0,0 +1,207 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.abs
+
+import csv
+import logging
+import os.path as op
+from typing import List, Optional
+
+import numpy as np
+import torch
+from fairseq.data import Dictionary
+from fairseq.data.audio.speech_to_text_dataset import (
+    S2TDataConfig
+)
+from fairseq.data.audio.text_to_speech_dataset import (
+    TextToSpeechDataset, TextToSpeechDatasetCreator
+)
+
+logger = logging.getLogger(__name__)
+
+
+class FrmTextToSpeechDataset(TextToSpeechDataset):
+    def __init__(
+        self,
+        split: str,
+        is_train_split: bool,
+        data_cfg: S2TDataConfig,
+        audio_paths: List[str],
+        n_frames: List[int],
+        src_texts: Optional[List[str]] = None,
+        tgt_texts: Optional[List[str]] = None,
+        speakers: Optional[List[str]] = None,
+        src_langs: Optional[List[str]] = None,
+        tgt_langs: Optional[List[str]] = None,
+        ids: Optional[List[str]] = None,
+        tgt_dict: Optional[Dictionary] = None,
+        pre_tokenizer=None,
+        bpe_tokenizer=None,
+        n_frames_per_step=1,
+        speaker_to_id=None,
+        do_chunk=False,
+        chunk_bound=-1,
+        chunk_init=50,
+        chunk_incr=5,
+        add_eos=True,
+        dedup=True,
+        ref_fpu=-1
+    ):
+        # It assumes texts are encoded at a fixed frame-rate
+        super().__init__(
+            split=split,
+            is_train_split=is_train_split,
+            data_cfg=data_cfg,
+            audio_paths=audio_paths,
+            n_frames=n_frames,
+            src_texts=src_texts,
+            tgt_texts=tgt_texts,
+            speakers=speakers,
+            src_langs=src_langs,
+            tgt_langs=tgt_langs,
+            ids=ids,
+            tgt_dict=tgt_dict,
+            pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer,
+            n_frames_per_step=n_frames_per_step,
+            speaker_to_id=speaker_to_id
+        )
+
+        self.do_chunk = do_chunk
+        self.chunk_bound = chunk_bound
+        self.chunk_init = chunk_init
+        self.chunk_incr = chunk_incr
+        self.add_eos = add_eos
+        self.dedup = dedup
+        self.ref_fpu = ref_fpu
+
+        self.chunk_size = -1
+
+        if do_chunk:
+            assert self.chunk_incr >= 0
+            assert self.pre_tokenizer is None
+
+    def __getitem__(self, index):
+        index, source, target, speaker_id, _, _, _ = super().__getitem__(index)
+        if target[-1].item() == self.tgt_dict.eos_index:
+            target = target[:-1]
+
+        fpu = source.size(0) / target.size(0)  # frame-per-unit
+        fps = self.n_frames_per_step
+        assert (
+            self.ref_fpu == -1 or
+            abs((fpu * fps - self.ref_fpu) / self.ref_fpu) < 0.1
+        ), f"{fpu*fps} != {self.ref_fpu}"
+
+        # only chunk training split
+        if self.is_train_split and self.do_chunk and self.chunk_size > 0:
+            lang = target[:int(self.data_cfg.prepend_tgt_lang_tag)]
+            text = target[int(self.data_cfg.prepend_tgt_lang_tag):]
+            size = len(text)
+            chunk_size = min(self.chunk_size, size)
+            chunk_start = np.random.randint(size - chunk_size + 1)
+            text = text[chunk_start:chunk_start+chunk_size]
+            target = torch.cat((lang, text), 0)
+
+            f_size = int(np.floor(chunk_size * fpu))
+            f_start = int(np.floor(chunk_start * fpu))
+            assert(f_size > 0)
+            source = source[f_start:f_start+f_size, :]
+
+        if self.dedup:
+            target = torch.unique_consecutive(target)
+
+        if self.add_eos:
+            eos_idx = self.tgt_dict.eos_index
+            target = torch.cat((target, torch.LongTensor([eos_idx])), 0)
+
+        return index, source, target, speaker_id
+
+    def set_epoch(self, epoch):
+        if self.is_train_split and self.do_chunk:
+            old = self.chunk_size
+            self.chunk_size = self.chunk_init + epoch * self.chunk_incr
+            if self.chunk_bound > 0:
+                self.chunk_size = min(self.chunk_size, self.chunk_bound)
+            logger.info((
+                f"{self.split}: setting chunk size "
+                f"from {old} to {self.chunk_size}"
+            ))
+
+
+class FrmTextToSpeechDatasetCreator(TextToSpeechDatasetCreator):
+    # inherit for key names
+    @classmethod
+    def from_tsv(
+        cls,
+        root: str,
+        data_cfg: S2TDataConfig,
+        split: str,
+        tgt_dict,
+        pre_tokenizer,
+        bpe_tokenizer,
+        is_train_split: bool,
+        n_frames_per_step: int,
+        speaker_to_id,
+        do_chunk: bool = False,
+        chunk_bound: int = -1,
+        chunk_init: int = 50,
+        chunk_incr: int = 5,
+        add_eos: bool = True,
+        dedup: bool = True,
+        ref_fpu: float = -1
+    ) -> FrmTextToSpeechDataset:
+        tsv_path = op.join(root, f"{split}.tsv")
+        if not op.isfile(tsv_path):
+            raise FileNotFoundError(f"Dataset not found: {tsv_path}")
+        with open(tsv_path) as f:
+            reader = csv.DictReader(
+                f,
+                delimiter="\t",
+                quotechar=None,
+                doublequote=False,
+                lineterminator="\n",
+                quoting=csv.QUOTE_NONE,
+            )
+            s = [dict(e) for e in reader]
+            assert len(s) > 0
+
+        ids = [ss[cls.KEY_ID] for ss in s]
+        audio_paths = [
+            op.join(data_cfg.audio_root, ss[cls.KEY_AUDIO]) for ss in s
+        ]
+        n_frames = [int(ss[cls.KEY_N_FRAMES]) for ss in s]
+        tgt_texts = [ss[cls.KEY_TGT_TEXT] for ss in s]
+        src_texts = [ss.get(cls.KEY_SRC_TEXT, cls.DEFAULT_SRC_TEXT) for ss in s]
+        speakers = [ss.get(cls.KEY_SPEAKER, cls.DEFAULT_SPEAKER) for ss in s]
+        src_langs = [ss.get(cls.KEY_SRC_LANG, cls.DEFAULT_LANG) for ss in s]
+        tgt_langs = [ss.get(cls.KEY_TGT_LANG, cls.DEFAULT_LANG) for ss in s]
+
+        return FrmTextToSpeechDataset(
+            split=split,
+            is_train_split=is_train_split,
+            data_cfg=data_cfg,
+            audio_paths=audio_paths,
+            n_frames=n_frames,
+            src_texts=src_texts,
+            tgt_texts=tgt_texts,
+            speakers=speakers,
+            src_langs=src_langs,
+            tgt_langs=tgt_langs,
+            ids=ids,
+            tgt_dict=tgt_dict,
+            pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer,
+            n_frames_per_step=n_frames_per_step,
+            speaker_to_id=speaker_to_id,
+            do_chunk=do_chunk,
+            chunk_bound=chunk_bound,
+            chunk_init=chunk_init,
+            chunk_incr=chunk_incr,
+            add_eos=add_eos,
+            dedup=dedup,
+            ref_fpu=ref_fpu
+        )
diff --git a/fairseq/fairseq/data/audio/hubert_dataset.py b/fairseq/fairseq/data/audio/hubert_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f00fe301a64a8740ed3ce07e44f6774edb933926
--- /dev/null
+++ b/fairseq/fairseq/data/audio/hubert_dataset.py
@@ -0,0 +1,358 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import logging
+import os
+import sys
+from typing import Any, List, Optional, Union
+
+import numpy as np
+
+import torch
+import torch.nn.functional as F
+from fairseq.data import data_utils
+from fairseq.data.fairseq_dataset import FairseqDataset
+
+logger = logging.getLogger(__name__)
+
+
+def load_audio(manifest_path, max_keep, min_keep):
+    n_long, n_short = 0, 0
+    names, inds, sizes = [], [], []
+    with open(manifest_path) as f:
+        root = f.readline().strip()
+        for ind, line in enumerate(f):
+            items = line.strip().split("\t")
+            assert len(items) == 2, line
+            sz = int(items[1])
+            if min_keep is not None and sz < min_keep:
+                n_short += 1
+            elif max_keep is not None and sz > max_keep:
+                n_long += 1
+            else:
+                names.append(items[0])
+                inds.append(ind)
+                sizes.append(sz)
+    tot = ind + 1
+    logger.info(
+        (
+            f"max_keep={max_keep}, min_keep={min_keep}, "
+            f"loaded {len(names)}, skipped {n_short} short and {n_long} long, "
+            f"longest-loaded={max(sizes)}, shortest-loaded={min(sizes)}"
+        )
+    )
+    return root, names, inds, tot, sizes
+
+
+def load_label(label_path, inds, tot):
+    with open(label_path) as f:
+        labels = [line.rstrip() for line in f]
+        assert (
+            len(labels) == tot
+        ), f"number of labels does not match ({len(labels)} != {tot})"
+        labels = [labels[i] for i in inds]
+    return labels
+
+
+def load_label_offset(label_path, inds, tot):
+    with open(label_path) as f:
+        code_lengths = [len(line.encode("utf-8")) for line in f]
+        assert (
+            len(code_lengths) == tot
+        ), f"number of labels does not match ({len(code_lengths)} != {tot})"
+        offsets = list(itertools.accumulate([0] + code_lengths))
+        offsets = [(offsets[i], offsets[i + 1]) for i in inds]
+    return offsets
+
+
+def verify_label_lengths(
+    audio_sizes,
+    audio_rate,
+    label_path,
+    label_rate,
+    inds,
+    tot,
+    tol=0.1,  # tolerance in seconds
+):
+    if label_rate < 0:
+        logger.info(f"{label_path} is sequence label. skipped")
+        return
+
+    with open(label_path) as f:
+        lengths = [len(line.rstrip().split()) for line in f]
+        assert len(lengths) == tot
+        lengths = [lengths[i] for i in inds]
+    num_invalid = 0
+    for i, ind in enumerate(inds):
+        dur_from_audio = audio_sizes[i] / audio_rate
+        dur_from_label = lengths[i] / label_rate
+        if abs(dur_from_audio - dur_from_label) > tol:
+            logger.warning(
+                (
+                    f"audio and label duration differ too much "
+                    f"(|{dur_from_audio} - {dur_from_label}| > {tol}) "
+                    f"in line {ind+1} of {label_path}. Check if `label_rate` "
+                    f"is correctly set (currently {label_rate}). "
+                    f"num. of samples = {audio_sizes[i]}; "
+                    f"label length = {lengths[i]}"
+                )
+            )
+            num_invalid += 1
+    if num_invalid > 0:
+        logger.warning(
+            f"total {num_invalid} (audio, label) pairs with mismatched lengths"
+        )
+
+
+class HubertDataset(FairseqDataset):
+    def __init__(
+        self,
+        manifest_path: str,
+        sample_rate: float,
+        label_paths: List[str],
+        label_rates: Union[List[float], float],  # -1 for sequence labels
+        pad_list: List[str],
+        eos_list: List[str],
+        label_processors: Optional[List[Any]] = None,
+        max_keep_sample_size: Optional[int] = None,
+        min_keep_sample_size: Optional[int] = None,
+        max_sample_size: Optional[int] = None,
+        shuffle: bool = True,
+        pad_audio: bool = False,
+        normalize: bool = False,
+        store_labels: bool = True,
+        random_crop: bool = False,
+        single_target: bool = False,
+    ):
+        self.audio_root, self.audio_names, inds, tot, self.sizes = load_audio(
+            manifest_path, max_keep_sample_size, min_keep_sample_size
+        )
+        self.sample_rate = sample_rate
+        self.shuffle = shuffle
+        self.random_crop = random_crop
+
+        self.num_labels = len(label_paths)
+        self.pad_list = pad_list
+        self.eos_list = eos_list
+        self.label_processors = label_processors
+        self.single_target = single_target
+        self.label_rates = (
+            [label_rates for _ in range(len(label_paths))]
+            if isinstance(label_rates, int)
+            else label_rates
+        )
+        self.store_labels = store_labels
+        if store_labels:
+            self.label_list = [load_label(p, inds, tot) for p in label_paths]
+        else:
+            self.label_paths = label_paths
+            self.label_offsets_list = [
+                load_label_offset(p, inds, tot) for p in label_paths
+            ]
+        assert (
+            label_processors is None
+            or len(label_processors) == self.num_labels
+        )
+        for label_path, label_rate in zip(label_paths, self.label_rates):
+            verify_label_lengths(
+                self.sizes, sample_rate, label_path, label_rate, inds, tot
+            )
+
+        self.max_sample_size = (
+            max_sample_size if max_sample_size is not None else sys.maxsize
+        )
+        self.pad_audio = pad_audio
+        self.normalize = normalize
+        logger.info(
+            f"pad_audio={pad_audio}, random_crop={random_crop}, "
+            f"normalize={normalize}, max_sample_size={self.max_sample_size}"
+        )
+
+    def get_audio(self, index):
+        import soundfile as sf
+
+        wav_path = os.path.join(self.audio_root, self.audio_names[index])
+        wav, cur_sample_rate = sf.read(wav_path)
+        wav = torch.from_numpy(wav).float()
+        wav = self.postprocess(wav, cur_sample_rate)
+        return wav
+
+    def get_label(self, index, label_idx):
+        if self.store_labels:
+            label = self.label_list[label_idx][index]
+        else:
+            with open(self.label_paths[label_idx]) as f:
+                offset_s, offset_e = self.label_offsets_list[label_idx][index]
+                f.seek(offset_s)
+                label = f.read(offset_e - offset_s)
+
+        if self.label_processors is not None:
+            label = self.label_processors[label_idx](label)
+        return label
+
+    def get_labels(self, index):
+        return [self.get_label(index, i) for i in range(self.num_labels)]
+
+    def __getitem__(self, index):
+        wav = self.get_audio(index)
+        labels = self.get_labels(index)
+        return {"id": index, "source": wav, "label_list": labels}
+
+    def __len__(self):
+        return len(self.sizes)
+
+    def crop_to_max_size(self, wav, target_size):
+        size = len(wav)
+        diff = size - target_size
+        if diff <= 0:
+            return wav, 0
+
+        start, end = 0, target_size
+        if self.random_crop:
+            start = np.random.randint(0, diff + 1)
+            end = size - diff + start
+        return wav[start:end], start
+
+    def collater(self, samples):
+        # target = max(sizes) -> random_crop not used
+        # target = max_sample_size -> random_crop used for long
+        samples = [s for s in samples if s["source"] is not None]
+        if len(samples) == 0:
+            return {}
+
+        audios = [s["source"] for s in samples]
+        audio_sizes = [len(s) for s in audios]
+        if self.pad_audio:
+            audio_size = min(max(audio_sizes), self.max_sample_size)
+        else:
+            audio_size = min(min(audio_sizes), self.max_sample_size)
+        collated_audios, padding_mask, audio_starts = self.collater_audio(
+            audios, audio_size
+        )
+
+        targets_by_label = [
+            [s["label_list"][i] for s in samples]
+            for i in range(self.num_labels)
+        ]
+        targets_list, lengths_list, ntokens_list = self.collater_label(
+            targets_by_label, audio_size, audio_starts
+        )
+
+        net_input = {"source": collated_audios, "padding_mask": padding_mask}
+        batch = {
+            "id": torch.LongTensor([s["id"] for s in samples]),
+            "net_input": net_input,
+        }
+
+        if self.single_target:
+            batch["target_lengths"] = lengths_list[0]
+            batch["ntokens"] = ntokens_list[0]
+            batch["target"] = targets_list[0]
+        else:
+            batch["target_lengths_list"] = lengths_list
+            batch["ntokens_list"] = ntokens_list
+            batch["target_list"] = targets_list
+        return batch
+
+    def collater_audio(self, audios, audio_size):
+        collated_audios = audios[0].new_zeros(len(audios), audio_size)
+        padding_mask = (
+            torch.BoolTensor(collated_audios.shape).fill_(False)
+            # if self.pad_audio else None
+        )
+        audio_starts = [0 for _ in audios]
+        for i, audio in enumerate(audios):
+            diff = len(audio) - audio_size
+            if diff == 0:
+                collated_audios[i] = audio
+            elif diff < 0:
+                assert self.pad_audio
+                collated_audios[i] = torch.cat(
+                    [audio, audio.new_full((-diff,), 0.0)]
+                )
+                padding_mask[i, diff:] = True
+            else:
+                collated_audios[i], audio_starts[i] = self.crop_to_max_size(
+                    audio, audio_size
+                )
+        return collated_audios, padding_mask, audio_starts
+
+    def collater_frm_label(
+        self, targets, audio_size, audio_starts, label_rate, pad
+    ):
+        assert label_rate > 0
+        s2f = label_rate / self.sample_rate
+        frm_starts = [int(round(s * s2f)) for s in audio_starts]
+        frm_size = int(round(audio_size * s2f))
+        if not self.pad_audio:
+            rem_size = [len(t) - s for t, s in zip(targets, frm_starts)]
+            frm_size = min(frm_size, *rem_size)
+        targets = [t[s: s + frm_size] for t, s in zip(targets, frm_starts)]
+        logger.debug(f"audio_starts={audio_starts}")
+        logger.debug(f"frame_starts={frm_starts}")
+        logger.debug(f"frame_size={frm_size}")
+
+        lengths = torch.LongTensor([len(t) for t in targets])
+        ntokens = lengths.sum().item()
+        targets = data_utils.collate_tokens(
+            targets, pad_idx=pad, left_pad=False
+        )
+        return targets, lengths, ntokens
+
+    def collater_seq_label(self, targets, pad):
+        lengths = torch.LongTensor([len(t) for t in targets])
+        ntokens = lengths.sum().item()
+        targets = data_utils.collate_tokens(
+            targets, pad_idx=pad, left_pad=False
+        )
+        return targets, lengths, ntokens
+
+    def collater_label(self, targets_by_label, audio_size, audio_starts):
+        targets_list, lengths_list, ntokens_list = [], [], []
+        itr = zip(targets_by_label, self.label_rates, self.pad_list)
+        for targets, label_rate, pad in itr:
+            if label_rate == -1:
+                targets, lengths, ntokens = self.collater_seq_label(
+                    targets, pad
+                )
+            else:
+                targets, lengths, ntokens = self.collater_frm_label(
+                    targets, audio_size, audio_starts, label_rate, pad
+                )
+            targets_list.append(targets)
+            lengths_list.append(lengths)
+            ntokens_list.append(ntokens)
+        return targets_list, lengths_list, ntokens_list
+
+    def num_tokens(self, index):
+        return self.size(index)
+
+    def size(self, index):
+        if self.pad_audio:
+            return self.sizes[index]
+        return min(self.sizes[index], self.max_sample_size)
+
+    def ordered_indices(self):
+        if self.shuffle:
+            order = [np.random.permutation(len(self))]
+        else:
+            order = [np.arange(len(self))]
+
+        order.append(self.sizes)
+        return np.lexsort(order)[::-1]
+
+    def postprocess(self, wav, cur_sample_rate):
+        if wav.dim() == 2:
+            wav = wav.mean(-1)
+        assert wav.dim() == 1, wav.dim()
+
+        if cur_sample_rate != self.sample_rate:
+            raise Exception(f"sr {cur_sample_rate} != {self.sample_rate}")
+
+        if self.normalize:
+            with torch.no_grad():
+                wav = F.layer_norm(wav, wav.shape)
+        return wav
diff --git a/fairseq/fairseq/data/audio/multi_modality_dataset.py b/fairseq/fairseq/data/audio/multi_modality_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..69d23d31c1eb66803fa5062b5991a7c34ab07dc7
--- /dev/null
+++ b/fairseq/fairseq/data/audio/multi_modality_dataset.py
@@ -0,0 +1,263 @@
+# Copyright (c) 2021-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+import logging
+import math
+from typing import List, Optional, NamedTuple
+
+import numpy as np
+import torch
+from fairseq.data import (
+    ConcatDataset,
+    LanguagePairDataset,
+    FileAudioDataset,
+    data_utils,
+)
+from fairseq.data import FairseqDataset
+
+logger = logging.getLogger(__name__)
+
+
+class ModalityDatasetItem(NamedTuple):
+    datasetname: str
+    dataset: any
+    max_positions: List[int]
+    max_tokens: Optional[int] = None
+    max_sentences: Optional[int] = None
+
+# MultiModalityDataset: it concate multiple datasets with different modalities.
+# Compared with ConcatDataset it can 1) sample data given the ratios for different datasets
+# 2) it adds mode to indicate what type of the data samples come from.
+# It will be used with GroupedEpochBatchIterator together to generate mini-batch with samples
+# from the same type of dataset
+# If only one dataset is used, it will perform like the original dataset with mode added
+class MultiModalityDataset(ConcatDataset):
+    def __init__(self, datasets: List[ModalityDatasetItem]):
+        id_to_mode = []
+        dsets = []
+        max_tokens = []
+        max_sentences = []
+        max_positions = []
+        for dset in datasets:
+            id_to_mode.append(dset.datasetname)
+            dsets.append(dset.dataset)
+            max_tokens.append(dset.max_tokens)
+            max_positions.append(dset.max_positions)
+            max_sentences.append(dset.max_sentences)
+        weights = [1.0 for s in dsets]
+        super().__init__(dsets, weights)
+        self.max_tokens = max_tokens
+        self.max_positions = max_positions
+        self.max_sentences = max_sentences
+        self.id_to_mode = id_to_mode
+        self.raw_sub_batch_samplers = []
+        self._cur_epoch = 0
+
+    def set_epoch(self, epoch):
+        super().set_epoch(epoch)
+        self._cur_epoch = epoch
+
+    def __getitem__(self, idx):
+        dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx)
+        sample = self.datasets[dataset_idx][sample_idx]
+        return (dataset_idx, sample)
+
+    def collater(self, samples):
+        if len(samples) == 0:
+            return {}
+        dataset_idx = samples[0][0]
+        # make sure all samples in samples are from same dataset
+        assert sum([0 if dataset_idx == s[0] else 1 for s in samples]) == 0
+        samples = self.datasets[dataset_idx].collater([x[1] for x in samples])
+        # add mode
+        samples["net_input"]["mode"] = self.id_to_mode[dataset_idx]
+
+        return samples
+
+    def size(self, index: int):
+        if len(self.datasets) == 1:
+            return self.datasets[0].size(index)
+        return super().size(index)
+
+    @property
+    def sizes(self):
+        if len(self.datasets) == 1:
+            return self.datasets[0].sizes
+        super().sizes
+
+    def ordered_indices(self):
+        """
+        Returns indices sorted by length. So less padding is needed.
+        """
+        if len(self.datasets) == 1:
+            return self.datasets[0].ordered_indices()
+        indices_group = []
+        for d_idx, ds in enumerate(self.datasets):
+            sample_num = self.cumulative_sizes[d_idx]
+            if d_idx > 0:
+                sample_num = sample_num - self.cumulative_sizes[d_idx - 1]
+            assert sample_num == len(ds)
+            indices_group.append(ds.ordered_indices())
+        return indices_group
+
+    def get_raw_batch_samplers(self, required_batch_size_multiple, seed):
+        if len(self.raw_sub_batch_samplers) > 0:
+            logger.info(" raw_sub_batch_samplers exists. No action is taken")
+            return
+        with data_utils.numpy_seed(seed):
+            indices = self.ordered_indices()
+        for i, ds in enumerate(self.datasets):
+            indices[i] = ds.filter_indices_by_size(
+                indices[i],
+                self.max_positions[i],
+            )[0]
+            sub_batch_sampler = ds.batch_by_size(
+                indices[i],
+                max_tokens=self.max_tokens[i],
+                max_sentences=self.max_sentences[i],
+                required_batch_size_multiple=required_batch_size_multiple,
+            )
+            self.raw_sub_batch_samplers.append(sub_batch_sampler)
+
+    def get_batch_samplers(self, mult_ratios, required_batch_size_multiple, seed):
+        self.get_raw_batch_samplers(required_batch_size_multiple, seed)
+        batch_samplers = []
+        for i, _ in enumerate(self.datasets):
+            if i > 0:
+                sub_batch_sampler = [
+                    [y + self.cumulative_sizes[i - 1] for y in x]
+                    for x in self.raw_sub_batch_samplers[i]
+                ]
+            else:
+                sub_batch_sampler = list(self.raw_sub_batch_samplers[i])
+            smp_r = mult_ratios[i]
+            if smp_r != 1:
+                is_increase = "increased" if smp_r > 1 else "decreased"
+                logger.info(
+                    "number of batch for the dataset {} is {} from {} to {}".format(
+                        self.id_to_mode[i],
+                        is_increase,
+                        len(sub_batch_sampler),
+                        int(len(sub_batch_sampler) * smp_r),
+                    )
+                )
+                mul_samplers = []
+                for _ in range(math.floor(smp_r)):
+                    mul_samplers = mul_samplers + sub_batch_sampler
+                if math.floor(smp_r) != smp_r:
+                    with data_utils.numpy_seed(seed + self._cur_epoch):
+                        np.random.shuffle(sub_batch_sampler)
+                        smp_num = int(
+                            (smp_r - math.floor(smp_r)) * len(sub_batch_sampler)
+                        )
+                    mul_samplers = mul_samplers + sub_batch_sampler[:smp_num]
+                sub_batch_sampler = mul_samplers
+            else:
+                logger.info(
+                    "dataset {} batch number is {} ".format(
+                        self.id_to_mode[i], len(sub_batch_sampler)
+                    )
+                )
+            batch_samplers.append(sub_batch_sampler)
+
+        return batch_samplers
+
+
+class LangPairMaskDataset(FairseqDataset):
+    def __init__(
+        self,
+        dataset: LanguagePairDataset,
+        src_eos: int,
+        src_bos: Optional[int] = None,
+        noise_id: Optional[int] = -1,
+        mask_ratio: Optional[float] = 0,
+        mask_type: Optional[str] = "random",
+    ):
+        self.dataset = dataset
+        self.src_eos = src_eos
+        self.src_bos = src_bos
+        self.noise_id = noise_id
+        self.mask_ratio = mask_ratio
+        self.mask_type = mask_type
+        assert mask_type in ("random", "tail")
+
+    @property
+    def src_sizes(self):
+        return self.dataset.src_sizes
+
+    @property
+    def tgt_sizes(self):
+        return self.dataset.tgt_sizes
+
+    @property
+    def sizes(self):
+        # dataset.sizes can be a dynamically computed sizes:
+        return self.dataset.sizes
+
+    def get_batch_shapes(self):
+        return self.dataset.buckets
+
+    def num_tokens_vec(self, indices):
+        return self.dataset.num_tokens_vec(indices)
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(index)
+
+    def size(self, index):
+        return self.dataset.size(index)
+
+    def ordered_indices(self):
+        return self.dataset.ordered_indices()
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        return self.dataset.prefetch(indices)
+
+    def mask_src_tokens(self, sample):
+        src_item = sample["source"]
+        mask = None
+        if self.mask_type == "random":
+            mask = torch.rand(len(src_item)).le(self.mask_ratio)
+        else:
+            mask = torch.ones(len(src_item))
+            mask[: int(len(src_item) * (1 - self.mask_ratio))] = 0
+            mask = mask.eq(1)
+        if src_item[0] == self.src_bos:
+            mask[0] = False
+        if src_item[-1] == self.src_eos:
+            mask[-1] = False
+        mask_src_item = src_item.masked_fill(mask, self.noise_id)
+        smp = {"id": sample["id"], "source": mask_src_item, "target": sample["target"]}
+        return smp
+
+    def __getitem__(self, index):
+        sample = self.dataset[index]
+        if self.mask_ratio > 0:
+            sample = self.mask_src_tokens(sample)
+        return sample
+
+    def collater(self, samples, pad_to_length=None):
+        return self.dataset.collater(samples, pad_to_length)
+
+
+class FileAudioDatasetWrapper(FileAudioDataset):
+    def collater(self, samples):
+        samples = super().collater(samples)
+        if len(samples) == 0:
+            return {}
+        samples["net_input"]["src_tokens"] = samples["net_input"]["source"]
+        samples["net_input"]["prev_output_tokens"] = None
+        del samples["net_input"]["source"]
+        samples["net_input"]["src_lengths"] = None
+        samples["net_input"]["alignment"] = None
+        return samples
diff --git a/fairseq/fairseq/data/audio/raw_audio_dataset.py b/fairseq/fairseq/data/audio/raw_audio_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4e965493cdf94a1f92fa7dab45cc68973c8cdb5
--- /dev/null
+++ b/fairseq/fairseq/data/audio/raw_audio_dataset.py
@@ -0,0 +1,392 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import logging
+import os
+import sys
+import io
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from .. import FairseqDataset
+from ..data_utils import compute_mask_indices, get_buckets, get_bucketed_sizes
+from fairseq.data.audio.audio_utils import (
+    parse_path,
+    read_from_stored_zip,
+    is_sf_audio_data,
+)
+from fairseq.data.text_compressor import TextCompressor, TextCompressionLevel
+
+
+logger = logging.getLogger(__name__)
+
+
+class RawAudioDataset(FairseqDataset):
+    def __init__(
+        self,
+        sample_rate,
+        max_sample_size=None,
+        min_sample_size=0,
+        shuffle=True,
+        pad=False,
+        normalize=False,
+        compute_mask_indices=False,
+        **mask_compute_kwargs,
+    ):
+        super().__init__()
+
+        self.sample_rate = sample_rate
+        self.sizes = []
+        self.max_sample_size = (
+            max_sample_size if max_sample_size is not None else sys.maxsize
+        )
+        self.min_sample_size = min_sample_size
+        self.pad = pad
+        self.shuffle = shuffle
+        self.normalize = normalize
+        self.compute_mask_indices = compute_mask_indices
+        if self.compute_mask_indices:
+            self.mask_compute_kwargs = mask_compute_kwargs
+            self._features_size_map = {}
+            self._C = mask_compute_kwargs["encoder_embed_dim"]
+            self._conv_feature_layers = eval(mask_compute_kwargs["conv_feature_layers"])
+
+    def __getitem__(self, index):
+        raise NotImplementedError()
+
+    def __len__(self):
+        return len(self.sizes)
+
+    def postprocess(self, feats, curr_sample_rate):
+        if feats.dim() == 2:
+            feats = feats.mean(-1)
+
+        if curr_sample_rate != self.sample_rate:
+            raise Exception(f"sample rate: {curr_sample_rate}, need {self.sample_rate}")
+
+        assert feats.dim() == 1, feats.dim()
+
+        if self.normalize:
+            with torch.no_grad():
+                feats = F.layer_norm(feats, feats.shape)
+        return feats
+
+    def crop_to_max_size(self, wav, target_size):
+        size = len(wav)
+        diff = size - target_size
+        if diff <= 0:
+            return wav
+
+        start = np.random.randint(0, diff + 1)
+        end = size - diff + start
+        return wav[start:end]
+
+    def _compute_mask_indices(self, dims, padding_mask):
+        B, T, C = dims
+        mask_indices, mask_channel_indices = None, None
+        if self.mask_compute_kwargs["mask_prob"] > 0:
+            mask_indices = compute_mask_indices(
+                (B, T),
+                padding_mask,
+                self.mask_compute_kwargs["mask_prob"],
+                self.mask_compute_kwargs["mask_length"],
+                self.mask_compute_kwargs["mask_selection"],
+                self.mask_compute_kwargs["mask_other"],
+                min_masks=2,
+                no_overlap=self.mask_compute_kwargs["no_mask_overlap"],
+                min_space=self.mask_compute_kwargs["mask_min_space"],
+            )
+            mask_indices = torch.from_numpy(mask_indices)
+        if self.mask_compute_kwargs["mask_channel_prob"] > 0:
+            mask_channel_indices = compute_mask_indices(
+                (B, C),
+                None,
+                self.mask_compute_kwargs["mask_channel_prob"],
+                self.mask_compute_kwargs["mask_channel_length"],
+                self.mask_compute_kwargs["mask_channel_selection"],
+                self.mask_compute_kwargs["mask_channel_other"],
+                no_overlap=self.mask_compute_kwargs["no_mask_channel_overlap"],
+                min_space=self.mask_compute_kwargs["mask_channel_min_space"],
+            )
+            mask_channel_indices = (
+                torch.from_numpy(mask_channel_indices).unsqueeze(1).expand(-1, T, -1)
+            )
+
+        return mask_indices, mask_channel_indices
+
+    @staticmethod
+    def _bucket_tensor(tensor, num_pad, value):
+        return F.pad(tensor, (0, num_pad), value=value)
+
+    def collater(self, samples):
+        samples = [s for s in samples if s["source"] is not None]
+        if len(samples) == 0:
+            return {}
+
+        sources = [s["source"] for s in samples]
+        sizes = [len(s) for s in sources]
+
+        if self.pad:
+            target_size = min(max(sizes), self.max_sample_size)
+        else:
+            target_size = min(min(sizes), self.max_sample_size)
+
+        collated_sources = sources[0].new_zeros(len(sources), target_size)
+        padding_mask = (
+            torch.BoolTensor(collated_sources.shape).fill_(False) if self.pad else None
+        )
+        for i, (source, size) in enumerate(zip(sources, sizes)):
+            diff = size - target_size
+            if diff == 0:
+                collated_sources[i] = source
+            elif diff < 0:
+                assert self.pad
+                collated_sources[i] = torch.cat(
+                    [source, source.new_full((-diff,), 0.0)]
+                )
+                padding_mask[i, diff:] = True
+            else:
+                collated_sources[i] = self.crop_to_max_size(source, target_size)
+
+        input = {"source": collated_sources}
+        out = {"id": torch.LongTensor([s["id"] for s in samples])}
+        if self.pad:
+            input["padding_mask"] = padding_mask
+
+        if hasattr(self, "num_buckets") and self.num_buckets > 0:
+            assert self.pad, "Cannot bucket without padding first."
+            bucket = max(self._bucketed_sizes[s["id"]] for s in samples)
+            num_pad = bucket - collated_sources.size(-1)
+            if num_pad:
+                input["source"] = self._bucket_tensor(collated_sources, num_pad, 0)
+                input["padding_mask"] = self._bucket_tensor(padding_mask, num_pad, True)
+
+        if self.compute_mask_indices:
+            B = input["source"].size(0)
+            T = self._get_mask_indices_dims(input["source"].size(-1))
+            padding_mask_reshaped = input["padding_mask"].clone()
+            extra = padding_mask_reshaped.size(1) % T
+            if extra > 0:
+                padding_mask_reshaped = padding_mask_reshaped[:, :-extra]
+            padding_mask_reshaped = padding_mask_reshaped.view(
+                padding_mask_reshaped.size(0), T, -1
+            )
+            padding_mask_reshaped = padding_mask_reshaped.all(-1)
+            input["padding_count"] = padding_mask_reshaped.sum(-1).max().item()
+            mask_indices, mask_channel_indices = self._compute_mask_indices(
+                (B, T, self._C),
+                padding_mask_reshaped,
+            )
+            input["mask_indices"] = mask_indices
+            input["mask_channel_indices"] = mask_channel_indices
+            out["sample_size"] = mask_indices.sum().item()
+
+        out["net_input"] = input
+        return out
+
+    def _get_mask_indices_dims(self, size, padding=0, dilation=1):
+        if size not in self._features_size_map:
+            L_in = size
+            for (_, kernel_size, stride) in self._conv_feature_layers:
+                L_out = L_in + 2 * padding - dilation * (kernel_size - 1) - 1
+                L_out = 1 + L_out // stride
+                L_in = L_out
+            self._features_size_map[size] = L_out
+        return self._features_size_map[size]
+
+    def num_tokens(self, index):
+        return self.size(index)
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        if self.pad:
+            return self.sizes[index]
+        return min(self.sizes[index], self.max_sample_size)
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+
+        if self.shuffle:
+            order = [np.random.permutation(len(self))]
+            order.append(
+                np.minimum(
+                    np.array(self.sizes),
+                    self.max_sample_size,
+                )
+            )
+            return np.lexsort(order)[::-1]
+        else:
+            return np.arange(len(self))
+
+    def set_bucket_info(self, num_buckets):
+        self.num_buckets = num_buckets
+        if self.num_buckets > 0:
+            self._collated_sizes = np.minimum(
+                np.array(self.sizes),
+                self.max_sample_size,
+            )
+            self.buckets = get_buckets(
+                self._collated_sizes,
+                self.num_buckets,
+            )
+            self._bucketed_sizes = get_bucketed_sizes(
+                self._collated_sizes, self.buckets
+            )
+            logger.info(
+                f"{len(self.buckets)} bucket(s) for the audio dataset: "
+                f"{self.buckets}"
+            )
+
+
+class FileAudioDataset(RawAudioDataset):
+    def __init__(
+        self,
+        manifest_path,
+        sample_rate,
+        max_sample_size=None,
+        min_sample_size=0,
+        shuffle=True,
+        pad=False,
+        normalize=False,
+        num_buckets=0,
+        compute_mask_indices=False,
+        text_compression_level=TextCompressionLevel.none,
+        **mask_compute_kwargs,
+    ):
+        super().__init__(
+            sample_rate=sample_rate,
+            max_sample_size=max_sample_size,
+            min_sample_size=min_sample_size,
+            shuffle=shuffle,
+            pad=pad,
+            normalize=normalize,
+            compute_mask_indices=compute_mask_indices,
+            **mask_compute_kwargs,
+        )
+
+        self.text_compressor = TextCompressor(level=text_compression_level)
+
+        skipped = 0
+        self.fnames = []
+        sizes = []
+        self.skipped_indices = set()
+
+        with open(manifest_path, "r") as f:
+            self.root_dir = f.readline().strip()
+            for i, line in enumerate(f):
+                items = line.strip().split("\t")
+                assert len(items) == 2, line
+                sz = int(items[1])
+                if min_sample_size is not None and sz < min_sample_size:
+                    skipped += 1
+                    self.skipped_indices.add(i)
+                    continue
+                self.fnames.append(self.text_compressor.compress(items[0]))
+                sizes.append(sz)
+        logger.info(f"loaded {len(self.fnames)}, skipped {skipped} samples")
+
+        self.sizes = np.array(sizes, dtype=np.int64)
+
+        try:
+            import pyarrow
+
+            self.fnames = pyarrow.array(self.fnames)
+        except:
+            logger.debug(
+                "Could not create a pyarrow array. Please install pyarrow for better performance"
+            )
+            pass
+
+        self.set_bucket_info(num_buckets)
+
+    def __getitem__(self, index):
+        import soundfile as sf
+        fn = self.fnames[index]
+        fn = fn if isinstance(self.fnames, list) else fn.as_py()
+        fn = self.text_compressor.decompress(fn)
+        path_or_fp = os.path.join(self.root_dir, fn)
+        _path, slice_ptr = parse_path(path_or_fp)
+        if len(slice_ptr) == 2:
+            byte_data = read_from_stored_zip(_path, slice_ptr[0], slice_ptr[1])
+            assert is_sf_audio_data(byte_data)
+            path_or_fp = io.BytesIO(byte_data)
+
+        wav, curr_sample_rate = sf.read(path_or_fp, dtype="float32")
+
+        feats = torch.from_numpy(wav).float()
+        feats = self.postprocess(feats, curr_sample_rate)
+        return {"id": index, "source": feats}
+
+
+class BinarizedAudioDataset(RawAudioDataset):
+    def __init__(
+        self,
+        data_dir,
+        split,
+        sample_rate,
+        max_sample_size=None,
+        min_sample_size=0,
+        shuffle=True,
+        pad=False,
+        normalize=False,
+        num_buckets=0,
+        compute_mask_indices=False,
+        **mask_compute_kwargs,
+    ):
+        super().__init__(
+            sample_rate=sample_rate,
+            max_sample_size=max_sample_size,
+            min_sample_size=min_sample_size,
+            shuffle=shuffle,
+            pad=pad,
+            normalize=normalize,
+            compute_mask_indices=compute_mask_indices,
+            **mask_compute_kwargs,
+        )
+
+        from fairseq.data import data_utils, Dictionary
+
+        self.fnames_dict = Dictionary.load(os.path.join(data_dir, "dict.txt"))
+
+        root_path = os.path.join(data_dir, f"{split}.root")
+        if os.path.exists(root_path):
+            with open(root_path, "r") as f:
+                self.root_dir = next(f).strip()
+        else:
+            self.root_dir = None
+
+        fnames_path = os.path.join(data_dir, split)
+        self.fnames = data_utils.load_indexed_dataset(fnames_path, self.fnames_dict)
+        lengths_path = os.path.join(data_dir, f"{split}.lengths")
+
+        with open(lengths_path, "r") as f:
+            for line in f:
+                sz = int(line.rstrip())
+                assert (
+                    sz >= min_sample_size
+                ), f"Min sample size is not supported for binarized dataset, but found a sample with size {sz}"
+                self.sizes.append(sz)
+
+        self.sizes = np.array(self.sizes, dtype=np.int64)
+
+        self.set_bucket_info(num_buckets)
+        logger.info(f"loaded {len(self.fnames)} samples")
+
+    def __getitem__(self, index):
+        import soundfile as sf
+
+        fname = self.fnames_dict.string(self.fnames[index], separator="")
+        if self.root_dir:
+            fname = os.path.join(self.root_dir, fname)
+
+        wav, curr_sample_rate = sf.read(fname)
+        feats = torch.from_numpy(wav).float()
+        feats = self.postprocess(feats, curr_sample_rate)
+        return {"id": index, "source": feats}
diff --git a/fairseq/fairseq/data/audio/speech_to_text_dataset.py b/fairseq/fairseq/data/audio/speech_to_text_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..164bf413e4fd41b895348c9ef0bb57421843eb17
--- /dev/null
+++ b/fairseq/fairseq/data/audio/speech_to_text_dataset.py
@@ -0,0 +1,525 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import csv
+import io
+import logging
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Optional
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+from fairseq.data import (
+    ConcatDataset,
+    Dictionary,
+    FairseqDataset,
+    ResamplingDataset,
+    data_utils as fairseq_data_utils,
+)
+from fairseq.data.audio.audio_utils import (
+    get_fbank,
+    get_waveform,
+    read_from_stored_zip,
+    is_npy_data,
+    is_sf_audio_data,
+    parse_path,
+    FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS,
+)
+from fairseq.data.audio.feature_transforms import CompositeAudioFeatureTransform
+from fairseq.data.audio.data_cfg import S2TDataConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_features_from_npy_or_audio(path):
+    ext = Path(path).suffix
+    if ext not in FEATURE_OR_SF_AUDIO_FILE_EXTENSIONS:
+        raise ValueError(f'Unsupported file format for "{path}"')
+    return np.load(path) if ext == ".npy" else get_fbank(path)
+
+
+def get_features_or_waveform_from_stored_zip(
+    path, byte_offset, byte_size, need_waveform=False, use_sample_rate=None,
+):
+    assert path.endswith(".zip")
+    data = read_from_stored_zip(path, byte_offset, byte_size)
+    f = io.BytesIO(data)
+    if is_npy_data(data):
+        features_or_waveform = np.load(f)
+    elif is_sf_audio_data(data):
+        features_or_waveform = \
+            get_waveform(
+                f, always_2d=False, output_sample_rate=use_sample_rate
+            )[0] if need_waveform else get_fbank(f)
+    else:
+        raise ValueError(f'Unknown file format for "{path}"')
+    return features_or_waveform
+
+
+def get_features_or_waveform(
+        path: str, need_waveform=False, use_sample_rate=None
+):
+    """Get speech features from .npy file or waveform from .wav/.flac file.
+    The file may be inside an uncompressed ZIP file and is accessed via byte
+    offset and length.
+
+    Args:
+        path (str): File path in the format of "<.npy/.wav/.flac path>" or
+        "<zip path>:<byte offset>:<byte length>".
+        need_waveform (bool): return waveform instead of features.
+        use_sample_rate (int): change sample rate for the input wave file
+
+    Returns:
+        features_or_waveform (numpy.ndarray): speech features or waveform.
+    """
+    _path, slice_ptr = parse_path(path)
+    if len(slice_ptr) == 0:
+        if need_waveform:
+            return get_waveform(
+                _path, always_2d=False, output_sample_rate=use_sample_rate
+            )[0]
+        return get_features_from_npy_or_audio(_path)
+    elif len(slice_ptr) == 2:
+        features_or_waveform = get_features_or_waveform_from_stored_zip(
+            _path, slice_ptr[0], slice_ptr[1], need_waveform=need_waveform,
+            use_sample_rate=use_sample_rate
+        )
+    else:
+        raise ValueError(f"Invalid path: {path}")
+
+    return features_or_waveform
+
+
+def _collate_frames(
+    frames: List[torch.Tensor], is_audio_input: bool = False
+) -> torch.Tensor:
+    """
+    Convert a list of 2D frames into a padded 3D tensor
+    Args:
+        frames (list): list of 2D frames of size L[i]*f_dim. Where L[i] is
+            length of i-th frame and f_dim is static dimension of features
+    Returns:
+        3D tensor of size len(frames)*len_max*f_dim where len_max is max of L[i]
+    """
+    max_len = max(frame.size(0) for frame in frames)
+    if is_audio_input:
+        out = frames[0].new_zeros((len(frames), max_len))
+    else:
+        out = frames[0].new_zeros((len(frames), max_len, frames[0].size(1)))
+    for i, v in enumerate(frames):
+        out[i, : v.size(0)] = v
+    return out
+
+
+@dataclass
+class SpeechToTextDatasetItem(object):
+    index: int
+    source: torch.Tensor
+    target: Optional[torch.Tensor] = None
+    speaker_id: Optional[int] = None
+
+
+class SpeechToTextDataset(FairseqDataset):
+    LANG_TAG_TEMPLATE = "<lang:{}>"
+
+    def __init__(
+        self,
+        split: str,
+        is_train_split: bool,
+        cfg: S2TDataConfig,
+        audio_paths: List[str],
+        n_frames: List[int],
+        src_texts: Optional[List[str]] = None,
+        tgt_texts: Optional[List[str]] = None,
+        speakers: Optional[List[str]] = None,
+        src_langs: Optional[List[str]] = None,
+        tgt_langs: Optional[List[str]] = None,
+        ids: Optional[List[str]] = None,
+        tgt_dict: Optional[Dictionary] = None,
+        pre_tokenizer=None,
+        bpe_tokenizer=None,
+        n_frames_per_step=1,
+        speaker_to_id=None
+    ):
+        self.split, self.is_train_split = split, is_train_split
+        self.cfg = cfg
+        self.audio_paths, self.n_frames = audio_paths, n_frames
+        self.n_samples = len(audio_paths)
+        assert len(n_frames) == self.n_samples > 0
+        assert src_texts is None or len(src_texts) == self.n_samples
+        assert tgt_texts is None or len(tgt_texts) == self.n_samples
+        assert speakers is None or len(speakers) == self.n_samples
+        assert src_langs is None or len(src_langs) == self.n_samples
+        assert tgt_langs is None or len(tgt_langs) == self.n_samples
+        assert ids is None or len(ids) == self.n_samples
+        assert (tgt_dict is None and tgt_texts is None) or (
+            tgt_dict is not None and tgt_texts is not None
+        )
+        self.src_texts, self.tgt_texts = src_texts, tgt_texts
+        self.src_langs, self.tgt_langs = src_langs, tgt_langs
+        self.speakers = speakers
+        self.tgt_dict = tgt_dict
+        self.check_tgt_lang_tag()
+        self.ids = ids
+        self.shuffle = cfg.shuffle if is_train_split else False
+
+        self.feature_transforms = CompositeAudioFeatureTransform.from_config_dict(
+            self.cfg.get_feature_transforms(split, is_train_split)
+        )
+
+        self.pre_tokenizer = pre_tokenizer
+        self.bpe_tokenizer = bpe_tokenizer
+        self.n_frames_per_step = n_frames_per_step
+        self.speaker_to_id = speaker_to_id
+
+        self.tgt_lens = self.get_tgt_lens_and_check_oov()
+
+        logger.info(self.__repr__())
+
+    def get_tgt_lens_and_check_oov(self):
+        if self.tgt_texts is None:
+            return [0 for _ in range(self.n_samples)]
+        tgt_lens = []
+        n_tokens, n_oov_tokens = 0, 0
+        for i in range(self.n_samples):
+            tokenized = self.get_tokenized_tgt_text(i).split(" ")
+            oov_tokens = [
+                t
+                for t in tokenized
+                if self.tgt_dict.index(t) == self.tgt_dict.unk_index
+            ]
+            n_tokens += len(tokenized)
+            n_oov_tokens += len(oov_tokens)
+            tgt_lens.append(len(tokenized))
+        logger.info(f"'{self.split}' has {n_oov_tokens / n_tokens * 100:.2f}% OOV")
+        return tgt_lens
+
+    def __repr__(self):
+        return (
+            self.__class__.__name__
+            + f'(split="{self.split}", n_samples={self.n_samples:_}, '
+            f"prepend_tgt_lang_tag={self.cfg.prepend_tgt_lang_tag}, "
+            f"shuffle={self.shuffle}, transforms={self.feature_transforms}, "
+            f"n_frames_per_step={self.n_frames_per_step}"
+        )
+
+    @classmethod
+    def is_lang_tag(cls, token):
+        pattern = cls.LANG_TAG_TEMPLATE.replace("{}", "(.*)")
+        return re.match(pattern, token)
+
+    def check_tgt_lang_tag(self):
+        if self.cfg.prepend_tgt_lang_tag:
+            assert self.tgt_langs is not None and self.tgt_dict is not None
+            tgt_lang_tags = [
+                self.LANG_TAG_TEMPLATE.format(t) for t in set(self.tgt_langs)
+            ]
+            assert all(t in self.tgt_dict for t in tgt_lang_tags)
+
+    @classmethod
+    def tokenize(cls, tokenizer, text: str):
+        return text if tokenizer is None else tokenizer.encode(text)
+
+    def get_tokenized_tgt_text(self, index: int):
+        text = self.tokenize(self.pre_tokenizer, self.tgt_texts[index])
+        text = self.tokenize(self.bpe_tokenizer, text)
+        return text
+
+    def pack_frames(self, feature: torch.Tensor):
+        if self.n_frames_per_step == 1:
+            return feature
+        n_packed_frames = feature.shape[0] // self.n_frames_per_step
+        feature = feature[:self.n_frames_per_step * n_packed_frames]
+        return feature.reshape(n_packed_frames, -1)
+
+    @classmethod
+    def get_lang_tag_idx(cls, lang: str, dictionary: Dictionary):
+        lang_tag_idx = dictionary.index(cls.LANG_TAG_TEMPLATE.format(lang))
+        assert lang_tag_idx != dictionary.unk()
+        return lang_tag_idx
+
+    def __getitem__(self, index: int) -> SpeechToTextDatasetItem:
+        source = get_features_or_waveform(
+            self.audio_paths[index],
+            need_waveform=self.cfg.use_audio_input,
+            use_sample_rate=self.cfg.use_sample_rate,
+        )
+        if self.feature_transforms is not None:
+            assert not self.cfg.use_audio_input
+            source = self.feature_transforms(source)
+        source = torch.from_numpy(source).float()
+        source = self.pack_frames(source)
+
+        target = None
+        if self.tgt_texts is not None:
+            tokenized = self.get_tokenized_tgt_text(index)
+            target = self.tgt_dict.encode_line(
+                tokenized, add_if_not_exist=False, append_eos=True
+            ).long()
+            if self.cfg.prepend_tgt_lang_tag:
+                lang_tag_idx = self.get_lang_tag_idx(
+                    self.tgt_langs[index], self.tgt_dict
+                )
+                target = torch.cat((torch.LongTensor([lang_tag_idx]), target), 0)
+
+        speaker_id = None
+        if self.speaker_to_id is not None:
+            speaker_id = self.speaker_to_id[self.speakers[index]]
+        return SpeechToTextDatasetItem(
+            index=index, source=source, target=target, speaker_id=speaker_id
+        )
+
+    def __len__(self):
+        return self.n_samples
+
+    def collater(
+        self, samples: List[SpeechToTextDatasetItem], return_order: bool = False
+    ) -> Dict:
+        if len(samples) == 0:
+            return {}
+        indices = torch.tensor([x.index for x in samples], dtype=torch.long)
+        frames = _collate_frames([x.source for x in samples], self.cfg.use_audio_input)
+        # sort samples by descending number of frames
+        n_frames = torch.tensor([x.source.size(0) for x in samples], dtype=torch.long)
+        n_frames, order = n_frames.sort(descending=True)
+        indices = indices.index_select(0, order)
+        frames = frames.index_select(0, order)
+
+        target, target_lengths = None, None
+        prev_output_tokens = None
+        ntokens = None
+        if self.tgt_texts is not None:
+            target = fairseq_data_utils.collate_tokens(
+                [x.target for x in samples],
+                self.tgt_dict.pad(),
+                self.tgt_dict.eos(),
+                left_pad=False,
+                move_eos_to_beginning=False,
+            )
+            target = target.index_select(0, order)
+            target_lengths = torch.tensor(
+                [x.target.size(0) for x in samples], dtype=torch.long
+            ).index_select(0, order)
+            prev_output_tokens = fairseq_data_utils.collate_tokens(
+                [x.target for x in samples],
+                self.tgt_dict.pad(),
+                self.tgt_dict.eos(),
+                left_pad=False,
+                move_eos_to_beginning=True,
+            )
+            prev_output_tokens = prev_output_tokens.index_select(0, order)
+            ntokens = sum(x.target.size(0) for x in samples)
+
+        speaker = None
+        if self.speaker_to_id is not None:
+            speaker = torch.tensor(
+                [s.speaker_id for s in samples], dtype=torch.long
+            ).index_select(0, order).view(-1, 1)
+
+        net_input = {
+            "src_tokens": frames,
+            "src_lengths": n_frames,
+            "prev_output_tokens": prev_output_tokens,
+        }
+        out = {
+            "id": indices,
+            "net_input": net_input,
+            "speaker": speaker,
+            "target": target,
+            "target_lengths": target_lengths,
+            "ntokens": ntokens,
+            "nsentences": len(samples),
+        }
+        if return_order:
+            out["order"] = order
+        return out
+
+    def num_tokens(self, index):
+        return self.n_frames[index]
+
+    def size(self, index):
+        return self.n_frames[index], self.tgt_lens[index]
+
+    @property
+    def sizes(self):
+        return np.array(self.n_frames)
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return True
+
+    def ordered_indices(self):
+        if self.shuffle:
+            order = [np.random.permutation(len(self))]
+        else:
+            order = [np.arange(len(self))]
+        # first by descending order of # of frames then by original/random order
+        order.append([-n for n in self.n_frames])
+        return np.lexsort(order)
+
+    def prefetch(self, indices):
+        raise False
+
+
+class SpeechToTextDatasetCreator(object):
+    # mandatory columns
+    KEY_ID, KEY_AUDIO, KEY_N_FRAMES = "id", "audio", "n_frames"
+    KEY_TGT_TEXT = "tgt_text"
+    # optional columns
+    KEY_SPEAKER, KEY_SRC_TEXT = "speaker", "src_text"
+    KEY_SRC_LANG, KEY_TGT_LANG = "src_lang", "tgt_lang"
+    # default values
+    DEFAULT_SPEAKER = DEFAULT_SRC_TEXT = DEFAULT_LANG = ""
+
+    @classmethod
+    def _from_list(
+        cls,
+        split_name: str,
+        is_train_split,
+        samples: List[Dict],
+        cfg: S2TDataConfig,
+        tgt_dict,
+        pre_tokenizer,
+        bpe_tokenizer,
+        n_frames_per_step,
+        speaker_to_id
+    ) -> SpeechToTextDataset:
+        audio_root = Path(cfg.audio_root)
+        ids = [s[cls.KEY_ID] for s in samples]
+        audio_paths = [(audio_root / s[cls.KEY_AUDIO]).as_posix() for s in samples]
+        n_frames = [int(s[cls.KEY_N_FRAMES]) for s in samples]
+        tgt_texts = [s[cls.KEY_TGT_TEXT] for s in samples]
+        src_texts = [s.get(cls.KEY_SRC_TEXT, cls.DEFAULT_SRC_TEXT) for s in samples]
+        speakers = [s.get(cls.KEY_SPEAKER, cls.DEFAULT_SPEAKER) for s in samples]
+        src_langs = [s.get(cls.KEY_SRC_LANG, cls.DEFAULT_LANG) for s in samples]
+        tgt_langs = [s.get(cls.KEY_TGT_LANG, cls.DEFAULT_LANG) for s in samples]
+        return SpeechToTextDataset(
+            split_name,
+            is_train_split,
+            cfg,
+            audio_paths,
+            n_frames,
+            src_texts=src_texts,
+            tgt_texts=tgt_texts,
+            speakers=speakers,
+            src_langs=src_langs,
+            tgt_langs=tgt_langs,
+            ids=ids,
+            tgt_dict=tgt_dict,
+            pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer,
+            n_frames_per_step=n_frames_per_step,
+            speaker_to_id=speaker_to_id
+        )
+
+    @classmethod
+    def get_size_ratios(
+        cls, datasets: List[SpeechToTextDataset], alpha: float = 1.0
+    ) -> List[float]:
+        """Size ratios for temperature-based sampling
+        (https://arxiv.org/abs/1907.05019)"""
+
+        id_to_lp, lp_to_sz = {}, defaultdict(int)
+        for ds in datasets:
+            lang_pairs = {f"{s}->{t}" for s, t in zip(ds.src_langs, ds.tgt_langs)}
+            assert len(lang_pairs) == 1
+            lang_pair = list(lang_pairs)[0]
+            id_to_lp[ds.split] = lang_pair
+            lp_to_sz[lang_pair] += sum(ds.n_frames)
+
+        sz_sum = sum(v for v in lp_to_sz.values())
+        lp_to_prob = {k: v / sz_sum for k, v in lp_to_sz.items()}
+        lp_to_tgt_prob = {k: v ** alpha for k, v in lp_to_prob.items()}
+        prob_sum = sum(v for v in lp_to_tgt_prob.values())
+        lp_to_tgt_prob = {k: v / prob_sum for k, v in lp_to_tgt_prob.items()}
+        lp_to_sz_ratio = {
+            k: (lp_to_tgt_prob[k] * sz_sum) / v for k, v in lp_to_sz.items()
+        }
+        size_ratio = [lp_to_sz_ratio[id_to_lp[ds.split]] for ds in datasets]
+
+        p_formatted = {
+            k: f"{lp_to_prob[k]:.3f}->{lp_to_tgt_prob[k]:.3f}" for k in lp_to_sz
+        }
+        logger.info(f"sampling probability balancing: {p_formatted}")
+        sr_formatted = {ds.split: f"{r:.3f}" for ds, r in zip(datasets, size_ratio)}
+        logger.info(f"balanced sampling size ratio: {sr_formatted}")
+        return size_ratio
+
+    @classmethod
+    def _load_samples_from_tsv(cls, root: str, split: str):
+        tsv_path = Path(root) / f"{split}.tsv"
+        if not tsv_path.is_file():
+            raise FileNotFoundError(f"Dataset not found: {tsv_path}")
+        with open(tsv_path) as f:
+            reader = csv.DictReader(
+                f,
+                delimiter="\t",
+                quotechar=None,
+                doublequote=False,
+                lineterminator="\n",
+                quoting=csv.QUOTE_NONE,
+            )
+            samples = [dict(e) for e in reader]
+        if len(samples) == 0:
+            raise ValueError(f"Empty manifest: {tsv_path}")
+        return samples
+
+    @classmethod
+    def _from_tsv(
+        cls,
+        root: str,
+        cfg: S2TDataConfig,
+        split: str,
+        tgt_dict,
+        is_train_split: bool,
+        pre_tokenizer,
+        bpe_tokenizer,
+        n_frames_per_step,
+        speaker_to_id
+    ) -> SpeechToTextDataset:
+        samples = cls._load_samples_from_tsv(root, split)
+        return cls._from_list(
+            split, is_train_split, samples, cfg, tgt_dict, pre_tokenizer,
+            bpe_tokenizer, n_frames_per_step, speaker_to_id
+        )
+
+    @classmethod
+    def from_tsv(
+        cls,
+        root: str,
+        cfg: S2TDataConfig,
+        splits: str,
+        tgt_dict,
+        pre_tokenizer,
+        bpe_tokenizer,
+        is_train_split: bool,
+        epoch: int,
+        seed: int,
+        n_frames_per_step: int = 1,
+        speaker_to_id=None
+    ) -> SpeechToTextDataset:
+        datasets = [
+            cls._from_tsv(
+                root, cfg, split, tgt_dict, is_train_split, pre_tokenizer,
+                bpe_tokenizer, n_frames_per_step, speaker_to_id
+            )
+            for split in splits.split(",")
+        ]
+
+        if is_train_split and len(datasets) > 1 and cfg.sampling_alpha != 1.0:
+            # temperature-based sampling
+            size_ratios = cls.get_size_ratios(datasets, alpha=cfg.sampling_alpha)
+            datasets = [
+                ResamplingDataset(
+                    d, size_ratio=r, seed=seed, epoch=epoch, replace=(r >= 1.0)
+                )
+                for r, d in zip(size_ratios, datasets)
+            ]
+
+        return ConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
diff --git a/fairseq/fairseq/data/audio/speech_to_text_joint_dataset.py b/fairseq/fairseq/data/audio/speech_to_text_joint_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..885ee7e0a32a246ce249810a6622c808f1a15e09
--- /dev/null
+++ b/fairseq/fairseq/data/audio/speech_to_text_joint_dataset.py
@@ -0,0 +1,288 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, NamedTuple
+
+import torch
+from fairseq.data import (
+    ConcatDataset,
+    Dictionary,
+    ResamplingDataset,
+    data_utils as fairseq_data_utils,
+)
+from fairseq.data.audio.speech_to_text_dataset import (
+    SpeechToTextDataset,
+    S2TDataConfig,
+    SpeechToTextDatasetCreator,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+class S2TJointDataConfig(S2TDataConfig):
+    """Wrapper class for data config YAML"""
+
+    @property
+    def src_vocab_filename(self):
+        """fairseq vocabulary file under data root"""
+        return self.config.get("src_vocab_filename", "src_dict.txt")
+
+    @property
+    def src_pre_tokenizer(self) -> Dict:
+        """Pre-tokenizer to apply before subword tokenization. Returning
+        a dictionary with `tokenizer` providing the tokenizer name and
+        the other items providing the tokenizer-specific arguments.
+        Tokenizers are defined in `fairseq.data.encoders.*`"""
+        return self.config.get("src_pre_tokenizer", {"tokenizer": None})
+
+    @property
+    def src_bpe_tokenizer(self) -> Dict:
+        """Subword tokenizer to apply on source text after pre-tokenization.
+        Returning a dictionary with `bpe` providing the tokenizer name and
+        the other items providing the tokenizer-specific arguments.
+        Tokenizers are defined in `fairseq.data.encoders.*`"""
+        return self.config.get("src_bpe_tokenizer", {"bpe": None})
+
+    @property
+    def prepend_tgt_lang_tag_no_change(self) -> bool:
+        """Prepend target lang ID token as the prev_output_tokens BOS (e.g. for
+        to-many multilingual setting). No change needed during inference.
+        """
+        return self.config.get("prepend_tgt_lang_tag_no_change", False)
+
+
+class SpeechToTextJointDatasetItem(NamedTuple):
+    index: int
+    source: torch.Tensor
+    target: Optional[torch.Tensor] = None
+    src_txt_tokens: Optional[torch.Tensor] = None
+    tgt_lang_tag: Optional[int] = None
+
+
+class SpeechToTextJointDataset(SpeechToTextDataset):
+    def __init__(
+        self,
+        split: str,
+        is_train_split: bool,
+        cfg: S2TJointDataConfig,
+        audio_paths: List[str],
+        n_frames: List[int],
+        src_texts: Optional[List[str]] = None,
+        tgt_texts: Optional[List[str]] = None,
+        speakers: Optional[List[str]] = None,
+        src_langs: Optional[List[str]] = None,
+        tgt_langs: Optional[List[str]] = None,
+        ids: Optional[List[str]] = None,
+        tgt_dict: Optional[Dictionary] = None,
+        src_dict: Optional[Dictionary] = None,
+        pre_tokenizer=None,
+        bpe_tokenizer=None,
+        src_pre_tokenizer=None,
+        src_bpe_tokenizer=None,
+    ):
+        super().__init__(
+            split,
+            is_train_split,
+            cfg,
+            audio_paths,
+            n_frames,
+            src_texts=src_texts,
+            tgt_texts=tgt_texts,
+            speakers=speakers,
+            src_langs=src_langs,
+            tgt_langs=tgt_langs,
+            ids=ids,
+            tgt_dict=tgt_dict,
+            pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer,
+        )
+
+        self.src_dict = src_dict
+        self.src_pre_tokenizer = src_pre_tokenizer
+        self.src_bpe_tokenizer = src_bpe_tokenizer
+
+    def get_tokenized_src_text(self, index: int):
+        text = self.tokenize(self.src_pre_tokenizer, self.src_texts[index])
+        text = self.tokenize(self.src_bpe_tokenizer, text)
+        return text
+
+    def __getitem__(self, index: int) -> SpeechToTextJointDatasetItem:
+        s2t_dataset_item = super().__getitem__(index)
+        src_tokens = None
+        if self.src_texts is not None and self.src_dict is not None:
+            src_tokens = self.get_tokenized_src_text(index)
+            src_tokens = self.src_dict.encode_line(
+                src_tokens, add_if_not_exist=False, append_eos=True
+            ).long()
+        tgt_lang_tag = None
+        if self.cfg.prepend_tgt_lang_tag_no_change:
+            # prepend_tgt_lang_tag_no_change: modify prev_output_tokens instead
+            tgt_lang_tag = self.get_lang_tag_idx(self.tgt_langs[index], self.tgt_dict)
+
+        return SpeechToTextJointDatasetItem(
+            index=index,
+            source=s2t_dataset_item.source,
+            target=s2t_dataset_item.target,
+            src_txt_tokens=src_tokens,
+            tgt_lang_tag=tgt_lang_tag,
+        )
+
+    def __len__(self):
+        return self.n_samples
+
+    def collater(self, samples: List[SpeechToTextJointDatasetItem]) -> Dict:
+        s2t_out = super().collater(samples, return_order=True)
+        if s2t_out == {}:
+            return s2t_out
+        net_input, order = s2t_out["net_input"], s2t_out["order"]
+
+        if self.src_texts is not None and self.src_dict is not None:
+            src_txt_tokens = fairseq_data_utils.collate_tokens(
+                [x.src_txt_tokens for x in samples],
+                self.src_dict.pad(),
+                self.src_dict.eos(),
+                left_pad=False,
+                move_eos_to_beginning=False,
+            )
+            src_txt_tokens = src_txt_tokens.index_select(0, order)
+            src_txt_lengths = torch.tensor(
+                [x.src_txt_tokens.size()[0] for x in samples], dtype=torch.long
+            ).index_select(0, order)
+            net_input["src_txt_tokens"] = src_txt_tokens
+            net_input["src_txt_lengths"] = src_txt_lengths
+
+        if self.tgt_texts is not None and samples[0].tgt_lang_tag is not None:
+            for i in range(len(samples)):
+                net_input["prev_output_tokens"][i][0] = samples[order[i]].tgt_lang_tag
+
+        out = {
+            "id": s2t_out["id"],
+            "net_input": net_input,
+            "target": s2t_out["target"],
+            "target_lengths": s2t_out["target_lengths"],
+            "ntokens": s2t_out["ntokens"],
+            "nsentences": len(samples),
+        }
+        return out
+
+
+class SpeechToTextJointDatasetCreator(SpeechToTextDatasetCreator):
+    @classmethod
+    def _from_list(
+        cls,
+        split_name: str,
+        is_train_split,
+        samples: List[Dict],
+        cfg: S2TJointDataConfig,
+        tgt_dict,
+        src_dict,
+        pre_tokenizer,
+        bpe_tokenizer,
+        src_pre_tokenizer,
+        src_bpe_tokenizer,
+    ) -> SpeechToTextJointDataset:
+        audio_root = Path(cfg.audio_root)
+        ids = [s[cls.KEY_ID] for s in samples]
+        audio_paths = [(audio_root / s[cls.KEY_AUDIO]).as_posix() for s in samples]
+        n_frames = [int(s[cls.KEY_N_FRAMES]) for s in samples]
+        tgt_texts = [s[cls.KEY_TGT_TEXT] for s in samples]
+        src_texts = [s.get(cls.KEY_SRC_TEXT, cls.DEFAULT_SRC_TEXT) for s in samples]
+        speakers = [s.get(cls.KEY_SPEAKER, cls.DEFAULT_SPEAKER) for s in samples]
+        src_langs = [s.get(cls.KEY_SRC_LANG, cls.DEFAULT_LANG) for s in samples]
+        tgt_langs = [s.get(cls.KEY_TGT_LANG, cls.DEFAULT_LANG) for s in samples]
+        return SpeechToTextJointDataset(
+            split_name,
+            is_train_split,
+            cfg,
+            audio_paths,
+            n_frames,
+            src_texts=src_texts,
+            tgt_texts=tgt_texts,
+            speakers=speakers,
+            src_langs=src_langs,
+            tgt_langs=tgt_langs,
+            ids=ids,
+            tgt_dict=tgt_dict,
+            src_dict=src_dict,
+            pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer,
+            src_pre_tokenizer=src_pre_tokenizer,
+            src_bpe_tokenizer=src_bpe_tokenizer,
+        )
+
+    @classmethod
+    def _from_tsv(
+        cls,
+        root: str,
+        cfg: S2TJointDataConfig,
+        split: str,
+        tgt_dict,
+        src_dict,
+        is_train_split: bool,
+        pre_tokenizer,
+        bpe_tokenizer,
+        src_pre_tokenizer,
+        src_bpe_tokenizer,
+    ) -> SpeechToTextJointDataset:
+        samples = cls._load_samples_from_tsv(root, split)
+        return cls._from_list(
+            split,
+            is_train_split,
+            samples,
+            cfg,
+            tgt_dict,
+            src_dict,
+            pre_tokenizer,
+            bpe_tokenizer,
+            src_pre_tokenizer,
+            src_bpe_tokenizer,
+        )
+
+    @classmethod
+    def from_tsv(
+        cls,
+        root: str,
+        cfg: S2TJointDataConfig,
+        splits: str,
+        tgt_dict,
+        src_dict,
+        pre_tokenizer,
+        bpe_tokenizer,
+        src_pre_tokenizer,
+        src_bpe_tokenizer,
+        is_train_split: bool,
+        epoch: int,
+        seed: int,
+    ) -> SpeechToTextJointDataset:
+        datasets = [
+            cls._from_tsv(
+                root,
+                cfg,
+                split,
+                tgt_dict,
+                src_dict,
+                is_train_split,
+                pre_tokenizer,
+                bpe_tokenizer,
+                src_pre_tokenizer,
+                src_bpe_tokenizer,
+            )
+            for split in splits.split(",")
+        ]
+
+        if is_train_split and len(datasets) > 1 and cfg.sampling_alpha != 1.0:
+            # temperature-based sampling
+            size_ratios = cls.get_size_ratios(datasets, alpha=cfg.sampling_alpha)
+            datasets = [
+                ResamplingDataset(
+                    d, size_ratio=r, seed=seed, epoch=epoch, replace=(r >= 1.0)
+                )
+                for r, d in zip(size_ratios, datasets)
+            ]
+
+        return ConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
diff --git a/fairseq/fairseq/data/audio/text_to_speech_dataset.py b/fairseq/fairseq/data/audio/text_to_speech_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..abfcb2be4028889acd72c6f40d4c832e48cff344
--- /dev/null
+++ b/fairseq/fairseq/data/audio/text_to_speech_dataset.py
@@ -0,0 +1,215 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.abs
+
+from pathlib import Path
+from typing import List, Dict, Optional, Any
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+
+from fairseq.data.audio.speech_to_text_dataset import (
+    SpeechToTextDataset, SpeechToTextDatasetCreator, S2TDataConfig,
+    _collate_frames, get_features_or_waveform
+)
+from fairseq.data import Dictionary, data_utils as fairseq_data_utils
+
+
+@dataclass
+class TextToSpeechDatasetItem(object):
+    index: int
+    source: torch.Tensor
+    target: Optional[torch.Tensor] = None
+    speaker_id: Optional[int] = None
+    duration: Optional[torch.Tensor] = None
+    pitch: Optional[torch.Tensor] = None
+    energy: Optional[torch.Tensor] = None
+
+
+class TextToSpeechDataset(SpeechToTextDataset):
+    def __init__(
+            self,
+            split: str,
+            is_train_split: bool,
+            cfg: S2TDataConfig,
+            audio_paths: List[str],
+            n_frames: List[int],
+            src_texts: Optional[List[str]] = None,
+            tgt_texts: Optional[List[str]] = None,
+            speakers: Optional[List[str]] = None,
+            src_langs: Optional[List[str]] = None,
+            tgt_langs: Optional[List[str]] = None,
+            ids: Optional[List[str]] = None,
+            tgt_dict: Optional[Dictionary] = None,
+            pre_tokenizer=None,
+            bpe_tokenizer=None,
+            n_frames_per_step=1,
+            speaker_to_id=None,
+            durations: Optional[List[List[int]]] = None,
+            pitches: Optional[List[str]] = None,
+            energies: Optional[List[str]] = None
+    ):
+        super(TextToSpeechDataset, self).__init__(
+            split, is_train_split, cfg, audio_paths, n_frames,
+            src_texts=src_texts, tgt_texts=tgt_texts, speakers=speakers,
+            src_langs=src_langs, tgt_langs=tgt_langs, ids=ids,
+            tgt_dict=tgt_dict, pre_tokenizer=pre_tokenizer,
+            bpe_tokenizer=bpe_tokenizer, n_frames_per_step=n_frames_per_step,
+            speaker_to_id=speaker_to_id
+        )
+        self.durations = durations
+        self.pitches = pitches
+        self.energies = energies
+
+    def __getitem__(self, index: int) -> TextToSpeechDatasetItem:
+        s2t_item = super().__getitem__(index)
+
+        duration, pitch, energy = None, None, None
+        if self.durations is not None:
+            duration = torch.tensor(
+                self.durations[index] + [0], dtype=torch.long  # pad 0 for EOS
+            )
+        if self.pitches is not None:
+            pitch = get_features_or_waveform(self.pitches[index])
+            pitch = torch.from_numpy(
+                np.concatenate((pitch, [0]))  # pad 0 for EOS
+            ).float()
+        if self.energies is not None:
+            energy = get_features_or_waveform(self.energies[index])
+            energy = torch.from_numpy(
+                np.concatenate((energy, [0]))  # pad 0 for EOS
+            ).float()
+        return TextToSpeechDatasetItem(
+            index=index, source=s2t_item.source, target=s2t_item.target,
+            speaker_id=s2t_item.speaker_id, duration=duration, pitch=pitch,
+            energy=energy
+        )
+
+    def collater(self, samples: List[TextToSpeechDatasetItem]) -> Dict[str, Any]:
+        if len(samples) == 0:
+            return {}
+
+        src_lengths, order = torch.tensor(
+            [s.target.shape[0] for s in samples], dtype=torch.long
+        ).sort(descending=True)
+        id_ = torch.tensor([s.index for s in samples],
+                           dtype=torch.long).index_select(0, order)
+        feat = _collate_frames(
+            [s.source for s in samples], self.cfg.use_audio_input
+        ).index_select(0, order)
+        target_lengths = torch.tensor(
+            [s.source.shape[0] for s in samples], dtype=torch.long
+        ).index_select(0, order)
+
+        src_tokens = fairseq_data_utils.collate_tokens(
+            [s.target for s in samples],
+            self.tgt_dict.pad(),
+            self.tgt_dict.eos(),
+            left_pad=False,
+            move_eos_to_beginning=False,
+        ).index_select(0, order)
+
+        speaker = None
+        if self.speaker_to_id is not None:
+            speaker = torch.tensor(
+                [s.speaker_id for s in samples], dtype=torch.long
+            ).index_select(0, order).view(-1, 1)
+
+        bsz, _, d = feat.size()
+        prev_output_tokens = torch.cat(
+            (feat.new_zeros((bsz, 1, d)), feat[:, :-1, :]), dim=1
+        )
+
+        durations, pitches, energies = None, None, None
+        if self.durations is not None:
+            durations = fairseq_data_utils.collate_tokens(
+                [s.duration for s in samples], 0
+            ).index_select(0, order)
+            assert src_tokens.shape[1] == durations.shape[1]
+        if self.pitches is not None:
+            pitches = _collate_frames([s.pitch for s in samples], True)
+            pitches = pitches.index_select(0, order)
+            assert src_tokens.shape[1] == pitches.shape[1]
+        if self.energies is not None:
+            energies = _collate_frames([s.energy for s in samples], True)
+            energies = energies.index_select(0, order)
+            assert src_tokens.shape[1] == energies.shape[1]
+        src_texts = [self.tgt_dict.string(samples[i].target) for i in order]
+
+        return {
+            "id": id_,
+            "net_input": {
+                "src_tokens": src_tokens,
+                "src_lengths": src_lengths,
+                "prev_output_tokens": prev_output_tokens,
+            },
+            "speaker": speaker,
+            "target": feat,
+            "durations": durations,
+            "pitches": pitches,
+            "energies": energies,
+            "target_lengths": target_lengths,
+            "ntokens": sum(target_lengths).item(),
+            "nsentences": len(samples),
+            "src_texts": src_texts,
+        }
+
+
+class TextToSpeechDatasetCreator(SpeechToTextDatasetCreator):
+    KEY_DURATION = "duration"
+    KEY_PITCH = "pitch"
+    KEY_ENERGY = "energy"
+
+    @classmethod
+    def _from_list(
+        cls,
+        split_name: str,
+        is_train_split,
+        samples: List[Dict],
+        cfg: S2TDataConfig,
+        tgt_dict,
+        pre_tokenizer,
+        bpe_tokenizer,
+        n_frames_per_step,
+        speaker_to_id
+    ) -> TextToSpeechDataset:
+        audio_root = Path(cfg.audio_root)
+        ids = [s[cls.KEY_ID] for s in samples]
+        audio_paths = [(audio_root / s[cls.KEY_AUDIO]).as_posix() for s in samples]
+        n_frames = [int(s[cls.KEY_N_FRAMES]) for s in samples]
+        tgt_texts = [s[cls.KEY_TGT_TEXT] for s in samples]
+        src_texts = [s.get(cls.KEY_SRC_TEXT, cls.DEFAULT_SRC_TEXT) for s in samples]
+        speakers = [s.get(cls.KEY_SPEAKER, cls.DEFAULT_SPEAKER) for s in samples]
+        src_langs = [s.get(cls.KEY_SRC_LANG, cls.DEFAULT_LANG) for s in samples]
+        tgt_langs = [s.get(cls.KEY_TGT_LANG, cls.DEFAULT_LANG) for s in samples]
+
+        durations = [s.get(cls.KEY_DURATION, None) for s in samples]
+        durations = [
+            None if dd is None else [int(d) for d in dd.split(" ")]
+            for dd in durations
+        ]
+        durations = None if any(dd is None for dd in durations) else durations
+
+        pitches = [s.get(cls.KEY_PITCH, None) for s in samples]
+        pitches = [
+            None if pp is None else (audio_root / pp).as_posix()
+            for pp in pitches
+        ]
+        pitches = None if any(pp is None for pp in pitches) else pitches
+
+        energies = [s.get(cls.KEY_ENERGY, None) for s in samples]
+        energies = [
+            None if ee is None else (audio_root / ee).as_posix()
+            for ee in energies]
+        energies = None if any(ee is None for ee in energies) else energies
+
+        return TextToSpeechDataset(
+            split_name, is_train_split, cfg, audio_paths, n_frames,
+            src_texts, tgt_texts, speakers, src_langs, tgt_langs, ids, tgt_dict,
+            pre_tokenizer, bpe_tokenizer, n_frames_per_step, speaker_to_id,
+            durations, pitches, energies
+        )
diff --git a/fairseq/fairseq/data/backtranslation_dataset.py b/fairseq/fairseq/data/backtranslation_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f70c90df3d237077537993e125d366c95292f1a
--- /dev/null
+++ b/fairseq/fairseq/data/backtranslation_dataset.py
@@ -0,0 +1,165 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq import utils
+
+from . import FairseqDataset
+
+
+def backtranslate_samples(samples, collate_fn, generate_fn, cuda=True):
+    """Backtranslate a list of samples.
+
+    Given an input (*samples*) of the form:
+
+        [{'id': 1, 'source': 'hallo welt'}]
+
+    this will return:
+
+        [{'id': 1, 'source': 'hello world', 'target': 'hallo welt'}]
+
+    Args:
+        samples (List[dict]): samples to backtranslate. Individual samples are
+            expected to have a 'source' key, which will become the 'target'
+            after backtranslation.
+        collate_fn (callable): function to collate samples into a mini-batch
+        generate_fn (callable): function to generate backtranslations
+        cuda (bool): use GPU for generation (default: ``True``)
+
+    Returns:
+        List[dict]: an updated list of samples with a backtranslated source
+    """
+    collated_samples = collate_fn(samples)
+    s = utils.move_to_cuda(collated_samples) if cuda else collated_samples
+    generated_sources = generate_fn(s)
+
+    id_to_src = {sample["id"]: sample["source"] for sample in samples}
+
+    # Go through each tgt sentence in batch and its corresponding best
+    # generated hypothesis and create a backtranslation data pair
+    # {id: id, source: generated backtranslation, target: original tgt}
+    return [
+        {
+            "id": id.item(),
+            "target": id_to_src[id.item()],
+            "source": hypos[0]["tokens"].cpu(),
+        }
+        for id, hypos in zip(collated_samples["id"], generated_sources)
+    ]
+
+
+class BacktranslationDataset(FairseqDataset):
+    """
+    Sets up a backtranslation dataset which takes a tgt batch, generates
+    a src using a tgt-src backtranslation function (*backtranslation_fn*),
+    and returns the corresponding `{generated src, input tgt}` batch.
+
+    Args:
+        tgt_dataset (~fairseq.data.FairseqDataset): the dataset to be
+            backtranslated. Only the source side of this dataset will be used.
+            After backtranslation, the source sentences in this dataset will be
+            returned as the targets.
+        src_dict (~fairseq.data.Dictionary): the dictionary of backtranslated
+            sentences.
+        tgt_dict (~fairseq.data.Dictionary, optional): the dictionary of
+            sentences to be backtranslated.
+        backtranslation_fn (callable, optional): function to call to generate
+            backtranslations. This is typically the `generate` method of a
+            :class:`~fairseq.sequence_generator.SequenceGenerator` object.
+            Pass in None when it is not available at initialization time, and
+            use set_backtranslation_fn function to set it when available.
+        output_collater (callable, optional): function to call on the
+            backtranslated samples to create the final batch
+            (default: ``tgt_dataset.collater``).
+        cuda: use GPU for generation
+    """
+
+    def __init__(
+        self,
+        tgt_dataset,
+        src_dict,
+        tgt_dict=None,
+        backtranslation_fn=None,
+        output_collater=None,
+        cuda=True,
+        **kwargs
+    ):
+        self.tgt_dataset = tgt_dataset
+        self.backtranslation_fn = backtranslation_fn
+        self.output_collater = (
+            output_collater if output_collater is not None else tgt_dataset.collater
+        )
+        self.cuda = cuda if torch.cuda.is_available() else False
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+
+    def __getitem__(self, index):
+        """
+        Returns a single sample from *tgt_dataset*. Note that backtranslation is
+        not applied in this step; use :func:`collater` instead to backtranslate
+        a batch of samples.
+        """
+        return self.tgt_dataset[index]
+
+    def __len__(self):
+        return len(self.tgt_dataset)
+
+    def set_backtranslation_fn(self, backtranslation_fn):
+        self.backtranslation_fn = backtranslation_fn
+
+    def collater(self, samples):
+        """Merge and backtranslate a list of samples to form a mini-batch.
+
+        Using the samples from *tgt_dataset*, load a collated target sample to
+        feed to the backtranslation model. Then take the backtranslation with
+        the best score as the source and the original input as the target.
+
+        Note: we expect *tgt_dataset* to provide a function `collater()` that
+        will collate samples into the format expected by *backtranslation_fn*.
+        After backtranslation, we will feed the new list of samples (i.e., the
+        `(backtranslated source, original source)` pairs) to *output_collater*
+        and return the result.
+
+        Args:
+            samples (List[dict]): samples to backtranslate and collate
+
+        Returns:
+            dict: a mini-batch with keys coming from *output_collater*
+        """
+        if samples[0].get("is_dummy", False):
+            return samples
+        samples = backtranslate_samples(
+            samples=samples,
+            collate_fn=self.tgt_dataset.collater,
+            generate_fn=(lambda net_input: self.backtranslation_fn(net_input)),
+            cuda=self.cuda,
+        )
+        return self.output_collater(samples)
+
+    def num_tokens(self, index):
+        """Just use the tgt dataset num_tokens"""
+        return self.tgt_dataset.num_tokens(index)
+
+    def ordered_indices(self):
+        """Just use the tgt dataset ordered_indices"""
+        return self.tgt_dataset.ordered_indices()
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used
+        when filtering a dataset with ``--max-positions``.
+
+        Note: we use *tgt_dataset* to approximate the length of the source
+        sentence, since we do not know the actual length until after
+        backtranslation.
+        """
+        tgt_size = self.tgt_dataset.size(index)[0]
+        return (tgt_size, tgt_size)
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.tgt_dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        return self.tgt_dataset.prefetch(indices)
diff --git a/fairseq/fairseq/data/base_wrapper_dataset.py b/fairseq/fairseq/data/base_wrapper_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..134d398b47dc73c8807759188504aee205b3b34d
--- /dev/null
+++ b/fairseq/fairseq/data/base_wrapper_dataset.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch.utils.data.dataloader import default_collate
+
+from . import FairseqDataset
+
+
+class BaseWrapperDataset(FairseqDataset):
+    def __init__(self, dataset):
+        super().__init__()
+        self.dataset = dataset
+
+    def __getitem__(self, index):
+        return self.dataset[index]
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def collater(self, samples):
+        if hasattr(self.dataset, "collater"):
+            return self.dataset.collater(samples)
+        else:
+            return default_collate(samples)
+
+    @property
+    def sizes(self):
+        return self.dataset.sizes
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(index)
+
+    def size(self, index):
+        return self.dataset.size(index)
+
+    def ordered_indices(self):
+        return self.dataset.ordered_indices()
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def attr(self, attr: str, index: int):
+        return self.dataset.attr(attr, index)
+
+    def prefetch(self, indices):
+        self.dataset.prefetch(indices)
+
+    def get_batch_shapes(self):
+        return self.dataset.get_batch_shapes()
+
+    def batch_by_size(
+        self,
+        indices,
+        max_tokens=None,
+        max_sentences=None,
+        required_batch_size_multiple=1,
+    ):
+        return self.dataset.batch_by_size(
+            indices,
+            max_tokens=max_tokens,
+            max_sentences=max_sentences,
+            required_batch_size_multiple=required_batch_size_multiple,
+        )
+
+    def filter_indices_by_size(self, indices, max_sizes):
+        return self.dataset.filter_indices_by_size(indices, max_sizes)
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return self.dataset.can_reuse_epoch_itr_across_epochs
+
+    def set_epoch(self, epoch):
+        super().set_epoch(epoch)
+        if hasattr(self.dataset, "set_epoch"):
+            self.dataset.set_epoch(epoch)
diff --git a/fairseq/fairseq/data/bucket_pad_length_dataset.py b/fairseq/fairseq/data/bucket_pad_length_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f9410014845873bb0344fca6478c231c88e9dea
--- /dev/null
+++ b/fairseq/fairseq/data/bucket_pad_length_dataset.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch.nn.functional as F
+from fairseq.data import BaseWrapperDataset
+from fairseq.data.data_utils import get_buckets, get_bucketed_sizes
+
+
+class BucketPadLengthDataset(BaseWrapperDataset):
+    """
+    Bucket and pad item lengths to the nearest bucket size. This can be used to
+    reduce the number of unique batch shapes, which is important on TPUs since
+    each new batch shape requires a recompilation.
+
+    Args:
+        dataset (FairseqDatset): dataset to bucket
+        sizes (List[int]): all item sizes
+        num_buckets (int): number of buckets to create
+        pad_idx (int): padding symbol
+        left_pad (bool): if True, pad on the left; otherwise right pad
+    """
+
+    def __init__(
+        self,
+        dataset,
+        sizes,
+        num_buckets,
+        pad_idx,
+        left_pad,
+        tensor_key=None,
+    ):
+        super().__init__(dataset)
+        self.pad_idx = pad_idx
+        self.left_pad = left_pad
+
+        assert num_buckets > 0
+        self.buckets = get_buckets(sizes, num_buckets)
+        self._bucketed_sizes = get_bucketed_sizes(sizes, self.buckets)
+        self._tensor_key = tensor_key
+
+    def _set_tensor(self, item, val):
+        if self._tensor_key is None:
+            return val
+        item[self._tensor_key] = val
+        return item
+
+    def _get_tensor(self, item):
+        if self._tensor_key is None:
+            return item
+        return item[self._tensor_key]
+
+    def _pad(self, tensor, bucket_size, dim=-1):
+        num_pad = bucket_size - tensor.size(dim)
+        return F.pad(
+            tensor,
+            (num_pad if self.left_pad else 0, 0 if self.left_pad else num_pad),
+            value=self.pad_idx,
+        )
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        bucket_size = self._bucketed_sizes[index]
+        tensor = self._get_tensor(item)
+        padded = self._pad(tensor, bucket_size)
+        return self._set_tensor(item, padded)
+
+    @property
+    def sizes(self):
+        return self._bucketed_sizes
+
+    def num_tokens(self, index):
+        return self._bucketed_sizes[index]
+
+    def size(self, index):
+        return self._bucketed_sizes[index]
diff --git a/fairseq/fairseq/data/colorize_dataset.py b/fairseq/fairseq/data/colorize_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ef097bff1a013f4944b1cb55e1e7e4e2480b3a6
--- /dev/null
+++ b/fairseq/fairseq/data/colorize_dataset.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import BaseWrapperDataset
+
+
+class ColorizeDataset(BaseWrapperDataset):
+    """ Adds 'colors' property to net input that is obtained from the provided color getter for use by models """
+
+    def __init__(self, dataset, color_getter):
+        super().__init__(dataset)
+        self.color_getter = color_getter
+
+    def collater(self, samples):
+        base_collate = super().collater(samples)
+        if len(base_collate) > 0:
+            base_collate["net_input"]["colors"] = torch.tensor(
+                list(self.color_getter(self.dataset, s["id"]) for s in samples),
+                dtype=torch.long,
+            )
+        return base_collate
diff --git a/fairseq/fairseq/data/concat_dataset.py b/fairseq/fairseq/data/concat_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..01a4078bb159fa44b2d1062b9a971fe7f1abd1c2
--- /dev/null
+++ b/fairseq/fairseq/data/concat_dataset.py
@@ -0,0 +1,124 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import bisect
+
+import numpy as np
+from torch.utils.data.dataloader import default_collate
+
+from . import FairseqDataset
+
+
+class ConcatDataset(FairseqDataset):
+    @staticmethod
+    def cumsum(sequence, sample_ratios):
+        r, s = [], 0
+        for e, ratio in zip(sequence, sample_ratios):
+            curr_len = int(ratio * len(e))
+            r.append(curr_len + s)
+            s += curr_len
+        return r
+
+    def __init__(self, datasets, sample_ratios=1):
+        super(ConcatDataset, self).__init__()
+        assert len(datasets) > 0, "datasets should not be an empty iterable"
+        self.datasets = list(datasets)
+        if isinstance(sample_ratios, int):
+            sample_ratios = [sample_ratios] * len(self.datasets)
+        self.sample_ratios = sample_ratios
+        self.cumulative_sizes = self.cumsum(self.datasets, sample_ratios)
+        self.real_sizes = [len(d) for d in self.datasets]
+
+    def __len__(self):
+        return self.cumulative_sizes[-1]
+
+    def __getitem__(self, idx):
+        dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx)
+        return self.datasets[dataset_idx][sample_idx]
+
+    def _get_dataset_and_sample_index(self, idx: int):
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+        if dataset_idx == 0:
+            sample_idx = idx
+        else:
+            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+        sample_idx = sample_idx % self.real_sizes[dataset_idx]
+        return dataset_idx, sample_idx
+
+    def collater(self, samples, **extra_args):
+        # For now only supports datasets with same underlying collater implementations
+        if hasattr(self.datasets[0], "collater"):
+            return self.datasets[0].collater(samples, **extra_args)
+        else:
+            return default_collate(samples, **extra_args)
+
+    def size(self, idx: int):
+        """
+        Return an example's size as a float or tuple.
+        """
+        dataset_idx, sample_idx = self._get_dataset_and_sample_index(idx)
+        return self.datasets[dataset_idx].size(sample_idx)
+
+    def num_tokens(self, index: int):
+        return np.max(self.size(index))
+
+    def attr(self, attr: str, index: int):
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, index)
+        return getattr(self.datasets[dataset_idx], attr, None)
+
+    @property
+    def sizes(self):
+        _dataset_sizes = []
+        for ds, sr in zip(self.datasets, self.sample_ratios):
+            if isinstance(ds.sizes, np.ndarray):
+                _dataset_sizes.append(np.tile(ds.sizes, sr))
+            else:
+                # Only support underlying dataset with single size array.
+                assert isinstance(ds.sizes, list)
+                _dataset_sizes.append(np.tile(ds.sizes[0], sr))
+        return np.concatenate(_dataset_sizes)
+
+    @property
+    def supports_prefetch(self):
+        return all(d.supports_prefetch for d in self.datasets)
+
+    def ordered_indices(self):
+        """
+        Returns indices sorted by length. So less padding is needed.
+        """
+        if isinstance(self.sizes, np.ndarray) and len(self.sizes.shape) > 1:
+            # special handling for concatenating lang_pair_datasets
+            indices = np.arange(len(self))
+            sizes = self.sizes
+            tgt_sizes = (
+                sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None
+            )
+            src_sizes = (
+                sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes
+            )
+            # sort by target length, then source length
+            if tgt_sizes is not None:
+                indices = indices[np.argsort(tgt_sizes[indices], kind="mergesort")]
+            return indices[np.argsort(src_sizes[indices], kind="mergesort")]
+        else:
+            return np.argsort(self.sizes)
+
+    def prefetch(self, indices):
+        frm = 0
+        for to, ds in zip(self.cumulative_sizes, self.datasets):
+            real_size = len(ds)
+            if getattr(ds, "supports_prefetch", False):
+                ds.prefetch([(i - frm) % real_size for i in indices if frm <= i < to])
+            frm = to
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return all(d.can_reuse_epoch_itr_across_epochs for d in self.datasets)
+
+    def set_epoch(self, epoch):
+        super().set_epoch(epoch)
+        for ds in self.datasets:
+            if hasattr(ds, "set_epoch"):
+                ds.set_epoch(epoch)
diff --git a/fairseq/fairseq/data/concat_sentences_dataset.py b/fairseq/fairseq/data/concat_sentences_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..625a29370e90f9d1d7274024afb902ed83a22325
--- /dev/null
+++ b/fairseq/fairseq/data/concat_sentences_dataset.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import FairseqDataset
+
+
+class ConcatSentencesDataset(FairseqDataset):
+    def __init__(self, *datasets):
+        super().__init__()
+        self.datasets = datasets
+        assert all(
+            len(ds) == len(datasets[0]) for ds in datasets
+        ), "datasets must have the same length"
+
+    def __getitem__(self, index):
+        return torch.cat([ds[index] for ds in self.datasets])
+
+    def __len__(self):
+        return len(self.datasets[0])
+
+    def collater(self, samples):
+        return self.datasets[0].collater(samples)
+
+    @property
+    def sizes(self):
+        return sum(ds.sizes for ds in self.datasets)
+
+    def num_tokens(self, index):
+        return sum(ds.num_tokens(index) for ds in self.datasets)
+
+    def size(self, index):
+        return sum(ds.size(index) for ds in self.datasets)
+
+    def ordered_indices(self):
+        return self.datasets[0].ordered_indices()
+
+    @property
+    def supports_prefetch(self):
+        return any(getattr(ds, "supports_prefetch", False) for ds in self.datasets)
+
+    def prefetch(self, indices):
+        for ds in self.datasets:
+            if getattr(ds, "supports_prefetch", False):
+                ds.prefetch(indices)
+
+    def set_epoch(self, epoch):
+        super().set_epoch(epoch)
+        for ds in self.datasets:
+            if hasattr(ds, "set_epoch"):
+                ds.set_epoch(epoch)
diff --git a/fairseq/fairseq/data/data_utils.py b/fairseq/fairseq/data/data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3de57681e0fb6b026003eff19f7745caf6799d3
--- /dev/null
+++ b/fairseq/fairseq/data/data_utils.py
@@ -0,0 +1,595 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+try:
+    from collections.abc import Iterable
+except ImportError:
+    from collections import Iterable
+import contextlib
+import itertools
+import logging
+import re
+import warnings
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+
+from fairseq.file_io import PathManager
+from fairseq import utils
+import os
+
+logger = logging.getLogger(__name__)
+
+
+def infer_language_pair(path):
+    """Infer language pair from filename: <split>.<lang1>-<lang2>.(...).idx"""
+    src, dst = None, None
+    for filename in PathManager.ls(path):
+        parts = filename.split(".")
+        if len(parts) >= 3 and len(parts[1].split("-")) == 2:
+            return parts[1].split("-")
+    return src, dst
+
+
+def collate_tokens(
+    values,
+    pad_idx,
+    eos_idx=None,
+    left_pad=False,
+    move_eos_to_beginning=False,
+    pad_to_length=None,
+    pad_to_multiple=1,
+    pad_to_bsz=None,
+):
+    """Convert a list of 1d tensors into a padded 2d tensor."""
+    size = max(v.size(0) for v in values)
+    size = size if pad_to_length is None else max(size, pad_to_length)
+    if pad_to_multiple != 1 and size % pad_to_multiple != 0:
+        size = int(((size - 0.1) // pad_to_multiple + 1) * pad_to_multiple)
+
+    batch_size = len(values) if pad_to_bsz is None else max(len(values), pad_to_bsz)
+    res = values[0].new(batch_size, size).fill_(pad_idx)
+
+    def copy_tensor(src, dst):
+        assert dst.numel() == src.numel()
+        if move_eos_to_beginning:
+            if eos_idx is None:
+                # if no eos_idx is specified, then use the last token in src
+                dst[0] = src[-1]
+            else:
+                dst[0] = eos_idx
+            dst[1:] = src[:-1]
+        else:
+            dst.copy_(src)
+
+    for i, v in enumerate(values):
+        copy_tensor(v, res[i][size - len(v) :] if left_pad else res[i][: len(v)])
+    return res
+
+def load_indexed_dataset(
+    path, dictionary=None, dataset_impl=None, combine=False, default="cached"
+):
+    """A helper function for loading indexed datasets.
+
+    Args:
+        path (str): path to indexed dataset (e.g., 'data-bin/train')
+        dictionary (~fairseq.data.Dictionary): data dictionary
+        dataset_impl (str, optional): which dataset implementation to use. If
+            not provided, it will be inferred automatically. For legacy indexed
+            data we use the 'cached' implementation by default.
+        combine (bool, optional): automatically load and combine multiple
+            datasets. For example, if *path* is 'data-bin/train', then we will
+            combine 'data-bin/train', 'data-bin/train1', ... and return a
+            single ConcatDataset instance.
+    """
+    import fairseq.data.indexed_dataset as indexed_dataset
+    from fairseq.data.concat_dataset import ConcatDataset
+
+    datasets = []
+    for k in itertools.count():
+        path_k = path + (str(k) if k > 0 else "")
+        try:
+            path_k = indexed_dataset.get_indexed_dataset_to_local(path_k)
+        except Exception as e:
+            if "StorageException: [404] Path not found" in str(e):
+                logger.warning(f"path_k: {e} not found")
+            else:
+                raise e
+
+        dataset_impl_k = dataset_impl
+        if dataset_impl_k is None:
+            dataset_impl_k = indexed_dataset.infer_dataset_impl(path_k)
+        dataset = indexed_dataset.make_dataset(
+            path_k,
+            impl=dataset_impl_k or default,
+            fix_lua_indexing=True,
+            dictionary=dictionary,
+        )
+        if dataset is None:
+            break
+        logger.info("loaded {:,} examples from: {}".format(len(dataset), path_k))
+        datasets.append(dataset)
+        if not combine:
+            break
+    if len(datasets) == 0:
+        return None
+    elif len(datasets) == 1:
+        return datasets[0]
+    else:
+        return ConcatDataset(datasets)
+
+
+@contextlib.contextmanager
+def numpy_seed(seed, *addl_seeds):
+    """Context manager which seeds the NumPy PRNG with the specified seed and
+    restores the state afterward"""
+    if seed is None:
+        yield
+        return
+    if len(addl_seeds) > 0:
+        seed = int(hash((seed, *addl_seeds)) % 1e6)
+    state = np.random.get_state()
+    np.random.seed(seed)
+    try:
+        yield
+    finally:
+        np.random.set_state(state)
+
+
+def collect_filtered(function, iterable, filtered):
+    """
+    Similar to :func:`filter` but collects filtered elements in ``filtered``.
+
+    Args:
+        function (callable): function that returns ``False`` for elements that
+            should be filtered
+        iterable (iterable): iterable to filter
+        filtered (list): list to store filtered elements
+    """
+    for el in iterable:
+        if function(el):
+            yield el
+        else:
+            filtered.append(el)
+
+
+def _filter_by_size_dynamic(indices, size_fn, max_positions, raise_exception=False):
+    def compare_leq(a, b):
+        return a <= b if not isinstance(a, tuple) else max(a) <= b
+
+    def check_size(idx):
+        if isinstance(max_positions, float) or isinstance(max_positions, int):
+            return size_fn(idx) <= max_positions
+        elif isinstance(max_positions, dict):
+            idx_size = size_fn(idx)
+            assert isinstance(idx_size, dict)
+            intersect_keys = set(max_positions.keys()) & set(idx_size.keys())
+            return all(
+                all(
+                    a is None or b is None or a <= b
+                    for a, b in zip(idx_size[key], max_positions[key])
+                )
+                for key in intersect_keys
+            )
+        else:
+            # For MultiCorpusSampledDataset, will generalize it later
+            if not isinstance(size_fn(idx), Iterable):
+                return all(size_fn(idx) <= b for b in max_positions)
+            return all(
+                a is None or b is None or a <= b
+                for a, b in zip(size_fn(idx), max_positions)
+            )
+
+    ignored = []
+    itr = collect_filtered(check_size, indices, ignored)
+    indices = np.fromiter(itr, dtype=np.int64, count=-1)
+    return indices, ignored
+
+
+def filter_by_size(indices, dataset, max_positions, raise_exception=False):
+    """
+    [deprecated] Filter indices based on their size.
+    Use `FairseqDataset::filter_indices_by_size` instead.
+
+    Args:
+        indices (List[int]): ordered list of dataset indices
+        dataset (FairseqDataset): fairseq dataset instance
+        max_positions (tuple): filter elements larger than this size.
+            Comparisons are done component-wise.
+        raise_exception (bool, optional): if ``True``, raise an exception if
+            any elements are filtered (default: False).
+    """
+    warnings.warn(
+        "data_utils.filter_by_size is deprecated. "
+        "Use `FairseqDataset::filter_indices_by_size` instead.",
+        stacklevel=2,
+    )
+    if isinstance(max_positions, float) or isinstance(max_positions, int):
+        if hasattr(dataset, "sizes") and isinstance(dataset.sizes, np.ndarray):
+            ignored = indices[dataset.sizes[indices] > max_positions].tolist()
+            indices = indices[dataset.sizes[indices] <= max_positions]
+        elif (
+            hasattr(dataset, "sizes")
+            and isinstance(dataset.sizes, list)
+            and len(dataset.sizes) == 1
+        ):
+            ignored = indices[dataset.sizes[0][indices] > max_positions].tolist()
+            indices = indices[dataset.sizes[0][indices] <= max_positions]
+        else:
+            indices, ignored = _filter_by_size_dynamic(
+                indices, dataset.size, max_positions
+            )
+    else:
+        indices, ignored = _filter_by_size_dynamic(indices, dataset.size, max_positions)
+
+    if len(ignored) > 0 and raise_exception:
+        raise Exception(
+            (
+                "Size of sample #{} is invalid (={}) since max_positions={}, "
+                "skip this example with --skip-invalid-size-inputs-valid-test"
+            ).format(ignored[0], dataset.size(ignored[0]), max_positions)
+        )
+    if len(ignored) > 0:
+        logger.warning(
+            (
+                "{} samples have invalid sizes and will be skipped, "
+                "max_positions={}, first few sample ids={}"
+            ).format(len(ignored), max_positions, ignored[:10])
+        )
+    return indices
+
+
+def filter_paired_dataset_indices_by_size(src_sizes, tgt_sizes, indices, max_sizes):
+    """Filter a list of sample indices. Remove those that are longer
+        than specified in max_sizes.
+
+    Args:
+        indices (np.array): original array of sample indices
+        max_sizes (int or list[int] or tuple[int]): max sample size,
+            can be defined separately for src and tgt (then list or tuple)
+
+    Returns:
+        np.array: filtered sample array
+        list: list of removed indices
+    """
+    if max_sizes is None:
+        return indices, []
+    if type(max_sizes) in (int, float):
+        max_src_size, max_tgt_size = max_sizes, max_sizes
+    else:
+        max_src_size, max_tgt_size = max_sizes
+    if tgt_sizes is None:
+        ignored = indices[src_sizes[indices] > max_src_size]
+    else:
+        ignored = indices[
+            (src_sizes[indices] > max_src_size) | (tgt_sizes[indices] > max_tgt_size)
+        ]
+    if len(ignored) > 0:
+        if tgt_sizes is None:
+            indices = indices[src_sizes[indices] <= max_src_size]
+        else:
+            indices = indices[
+                (src_sizes[indices] <= max_src_size)
+                & (tgt_sizes[indices] <= max_tgt_size)
+            ]
+    return indices, ignored.tolist()
+
+
+def batch_by_size(
+    indices,
+    num_tokens_fn,
+    num_tokens_vec=None,
+    max_tokens=None,
+    max_sentences=None,
+    required_batch_size_multiple=1,
+    fixed_shapes=None,
+):
+    """
+    Yield mini-batches of indices bucketed by size. Batches may contain
+    sequences of different lengths.
+
+    Args:
+        indices (List[int]): ordered list of dataset indices
+        num_tokens_fn (callable): function that returns the number of tokens at
+            a given index
+        num_tokens_vec (List[int], optional): precomputed vector of the number
+            of tokens for each index in indices (to enable faster batch generation)
+        max_tokens (int, optional): max number of tokens in each batch
+            (default: None).
+        max_sentences (int, optional): max number of sentences in each
+            batch (default: None).
+        required_batch_size_multiple (int, optional): require batch size to
+            be less than N or a multiple of N (default: 1).
+        fixed_shapes (List[Tuple[int, int]], optional): if given, batches will
+            only be created with the given shapes. *max_sentences* and
+            *required_batch_size_multiple* will be ignored (default: None).
+    """
+    try:
+        from fairseq.data.data_utils_fast import (
+            batch_by_size_fn,
+            batch_by_size_vec,
+            batch_fixed_shapes_fast,
+        )
+    except ImportError:
+        raise ImportError(
+            "Please build Cython components with: "
+            "`python setup.py build_ext --inplace`"
+        )
+    except ValueError:
+        raise ValueError(
+            "Please build (or rebuild) Cython components with `python setup.py build_ext --inplace`."
+        )
+
+    # added int() to avoid TypeError: an integer is required
+    max_tokens = (
+        int(max_tokens) if max_tokens is not None else -1
+    )
+    max_sentences = max_sentences if max_sentences is not None else -1
+    bsz_mult = required_batch_size_multiple
+
+    if not isinstance(indices, np.ndarray):
+        indices = np.fromiter(indices, dtype=np.int64, count=-1)
+
+    if num_tokens_vec is not None and not isinstance(num_tokens_vec, np.ndarray):
+        num_tokens_vec = np.fromiter(num_tokens_vec, dtype=np.int64, count=-1)
+
+    if fixed_shapes is None:
+        if num_tokens_vec is None:
+            return batch_by_size_fn(
+                indices,
+                num_tokens_fn,
+                max_tokens,
+                max_sentences,
+                bsz_mult,
+            )
+        else:
+            return batch_by_size_vec(
+                indices,
+                num_tokens_vec,
+                max_tokens,
+                max_sentences,
+                bsz_mult,
+            )
+
+    else:
+        fixed_shapes = np.array(fixed_shapes, dtype=np.int64)
+        sort_order = np.lexsort(
+            [
+                fixed_shapes[:, 1].argsort(),  # length
+                fixed_shapes[:, 0].argsort(),  # bsz
+            ]
+        )
+        fixed_shapes_sorted = fixed_shapes[sort_order]
+        return batch_fixed_shapes_fast(indices, num_tokens_fn, fixed_shapes_sorted)
+
+
+def post_process(sentence: str, symbol: str):
+    if symbol == "sentencepiece":
+        sentence = sentence.replace(" ", "").replace("\u2581", " ").strip()
+    elif symbol == "wordpiece":
+        sentence = sentence.replace(" ", "").replace("_", " ").strip()
+    elif symbol == "letter":
+        sentence = sentence.replace(" ", "").replace("|", " ").strip()
+    elif symbol == "silence":
+        import re
+        sentence = sentence.replace("<SIL>", "")
+        sentence = re.sub(' +', ' ', sentence).strip()
+    elif symbol == "_EOW":
+        sentence = sentence.replace(" ", "").replace("_EOW", " ").strip()
+    elif symbol in {"subword_nmt", "@@ ", "@@"}:
+        if symbol == "subword_nmt":
+            symbol = "@@ "
+        sentence = (sentence + " ").replace(symbol, "").rstrip()
+    elif symbol == "none":
+        pass
+    elif symbol is not None:
+        raise NotImplementedError(f"Unknown post_process option: {symbol}")
+    return sentence
+
+
+def compute_mask_indices(
+    shape: Tuple[int, int],
+    padding_mask: Optional[torch.Tensor],
+    mask_prob: float,
+    mask_length: int,
+    mask_type: str = "static",
+    mask_other: float = 0.0,
+    min_masks: int = 0,
+    no_overlap: bool = False,
+    min_space: int = 0,
+) -> np.ndarray:
+    """
+    Computes random mask spans for a given shape
+
+    Args:
+        shape: the the shape for which to compute masks.
+            should be of size 2 where first element is batch size and 2nd is timesteps
+        padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
+        mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by
+            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
+            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
+        mask_type: how to compute mask lengths
+            static = fixed size
+            uniform = sample from uniform distribution [mask_other, mask_length*2]
+            normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element
+            poisson = sample from possion distribution with lambda = mask length
+        min_masks: minimum number of masked spans
+        no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping
+        min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans
+    """
+
+    bsz, all_sz = shape
+    mask = np.full((bsz, all_sz), False)
+
+    all_num_mask = int(
+        # add a random number for probabilistic rounding
+        mask_prob * all_sz / float(mask_length)
+        + np.random.rand()
+    )
+
+    all_num_mask = max(min_masks, all_num_mask)
+
+    mask_idcs = []
+    for i in range(bsz):
+        if padding_mask is not None:
+            sz = all_sz - padding_mask[i].long().sum().item()
+            num_mask = int(
+                # add a random number for probabilistic rounding
+                mask_prob * sz / float(mask_length)
+                + np.random.rand()
+            )
+            num_mask = max(min_masks, num_mask)
+        else:
+            sz = all_sz
+            num_mask = all_num_mask
+
+        if mask_type == "static":
+            lengths = np.full(num_mask, mask_length)
+        elif mask_type == "uniform":
+            lengths = np.random.randint(mask_other, mask_length * 2 + 1, size=num_mask)
+        elif mask_type == "normal":
+            lengths = np.random.normal(mask_length, mask_other, size=num_mask)
+            lengths = [max(1, int(round(x))) for x in lengths]
+        elif mask_type == "poisson":
+            lengths = np.random.poisson(mask_length, size=num_mask)
+            lengths = [int(round(x)) for x in lengths]
+        else:
+            raise Exception("unknown mask selection " + mask_type)
+
+        if sum(lengths) == 0:
+            lengths[0] = min(mask_length, sz - 1)
+
+        if no_overlap:
+            mask_idc = []
+
+            def arrange(s, e, length, keep_length):
+                span_start = np.random.randint(s, e - length)
+                mask_idc.extend(span_start + i for i in range(length))
+
+                new_parts = []
+                if span_start - s - min_space >= keep_length:
+                    new_parts.append((s, span_start - min_space + 1))
+                if e - span_start - keep_length - min_space > keep_length:
+                    new_parts.append((span_start + length + min_space, e))
+                return new_parts
+
+            parts = [(0, sz)]
+            min_length = min(lengths)
+            for length in sorted(lengths, reverse=True):
+                lens = np.fromiter(
+                    (e - s if e - s >= length + min_space else 0 for s, e in parts),
+                    np.int,
+                )
+                l_sum = np.sum(lens)
+                if l_sum == 0:
+                    break
+                probs = lens / np.sum(lens)
+                c = np.random.choice(len(parts), p=probs)
+                s, e = parts.pop(c)
+                parts.extend(arrange(s, e, length, min_length))
+            mask_idc = np.asarray(mask_idc)
+        else:
+            min_len = min(lengths)
+            if sz - min_len <= num_mask:
+                min_len = sz - num_mask - 1
+
+            mask_idc = np.random.choice(sz - min_len, num_mask, replace=False)
+
+            mask_idc = np.asarray(
+                [
+                    mask_idc[j] + offset
+                    for j in range(len(mask_idc))
+                    for offset in range(lengths[j])
+                ]
+            )
+
+        mask_idcs.append(np.unique(mask_idc[mask_idc < sz]))
+
+    min_len = min([len(m) for m in mask_idcs])
+    for i, mask_idc in enumerate(mask_idcs):
+        if len(mask_idc) > min_len:
+            mask_idc = np.random.choice(mask_idc, min_len, replace=False)
+        mask[i, mask_idc] = True
+
+    return mask
+
+
+def get_mem_usage():
+    try:
+        import psutil
+
+        mb = 1024 * 1024
+        return f"used={psutil.virtual_memory().used / mb}Mb; avail={psutil.virtual_memory().available / mb}Mb"
+    except ImportError:
+        return "N/A"
+
+
+# lens: torch.LongTensor
+# returns: torch.BoolTensor
+def lengths_to_padding_mask(lens):
+    bsz, max_lens = lens.size(0), torch.max(lens).item()
+    mask = torch.arange(max_lens).to(lens.device).view(1, max_lens)
+    mask = mask.expand(bsz, -1) >= lens.view(bsz, 1).expand(-1, max_lens)
+    return mask
+
+
+# lens: torch.LongTensor
+# returns: torch.BoolTensor
+def lengths_to_mask(lens):
+    return ~lengths_to_padding_mask(lens)
+
+
+def get_buckets(sizes, num_buckets):
+    buckets = np.unique(
+        np.percentile(
+            sizes,
+            np.linspace(0, 100, num_buckets + 1),
+            interpolation='lower',
+        )[1:]
+    )
+    return buckets
+
+
+def get_bucketed_sizes(orig_sizes, buckets):
+    sizes = np.copy(orig_sizes)
+    assert np.min(sizes) >= 0
+    start_val = -1
+    for end_val in buckets:
+        mask = (sizes > start_val) & (sizes <= end_val)
+        sizes[mask] = end_val
+        start_val = end_val
+    return sizes
+
+
+
+def _find_extra_valid_paths(dataset_path: str) -> set:
+    paths = utils.split_paths(dataset_path)
+    all_valid_paths = set()
+    for sub_dir in paths:
+        contents = PathManager.ls(sub_dir)
+        valid_paths = [c for c in contents if re.match("valid*[0-9].*", c) is not None]
+        all_valid_paths |= {os.path.basename(p) for p in valid_paths}
+    # Remove .bin, .idx etc
+    roots = {os.path.splitext(p)[0] for p in all_valid_paths}
+    return roots
+
+
+def raise_if_valid_subsets_unintentionally_ignored(train_cfg) -> None:
+    """Raises if there are paths matching 'valid*[0-9].*' which are not combined or ignored."""
+    if (
+        train_cfg.dataset.ignore_unused_valid_subsets
+        or train_cfg.dataset.combine_valid_subsets
+        or train_cfg.dataset.disable_validation
+        or not hasattr(train_cfg.task, "data")
+    ):
+        return
+    other_paths = _find_extra_valid_paths(train_cfg.task.data)
+    specified_subsets = train_cfg.dataset.valid_subset.split(",")
+    ignored_paths = [p for p in other_paths if p not in specified_subsets]
+    if ignored_paths:
+        advice = "Set --combine-val to combine them or --ignore-unused-valid-subsets to ignore them."
+        msg = f"Valid paths {ignored_paths} will be ignored. {advice}"
+        raise ValueError(msg)
diff --git a/fairseq/fairseq/data/data_utils_fast.pyx b/fairseq/fairseq/data/data_utils_fast.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..c61f31d6b2113d4c6a03d6553335997098ba0c20
--- /dev/null
+++ b/fairseq/fairseq/data/data_utils_fast.pyx
@@ -0,0 +1,178 @@
+# cython: language_level=3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+
+cimport cython
+cimport numpy as np
+
+from libc.stdint cimport int32_t, int64_t
+from libcpp cimport bool as bool_t
+
+ctypedef int64_t DTYPE_t
+
+@cython.cdivision(True)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef list batch_by_size_vec(
+    np.ndarray[int64_t, ndim=1] indices,
+    np.ndarray[int64_t, ndim=1] num_tokens_vec,
+    int64_t max_tokens,
+    int64_t max_sentences,
+    int32_t bsz_mult,
+):
+    if indices.shape[0] == 0:
+        return []
+
+    assert max_tokens <= 0 or np.max(num_tokens_vec) <= max_tokens, (
+        f"Sentences lengths should not exceed max_tokens={max_tokens}"
+    )
+
+    cdef int32_t indices_len = indices.shape[0]
+    cdef np.ndarray[int32_t, ndim=1] batches_ends = \
+            np.zeros(indices_len, dtype=np.int32)
+    cdef int32_t[:] batches_ends_view = batches_ends
+    cdef int64_t[:] num_tokens_view = num_tokens_vec
+
+    cdef int32_t pos = 0
+    cdef int32_t new_batch_end = 0
+
+    cdef int64_t new_batch_max_tokens = 0
+    cdef int32_t new_batch_sentences = 0
+    cdef int64_t new_batch_num_tokens = 0
+
+    cdef bool_t overflow = False
+    cdef bool_t size_matches_with_bsz_mult = False
+
+    cdef int32_t batches_count = 0
+    cdef int32_t batch_start = 0
+    cdef int64_t tail_max_tokens = 0
+    cdef int64_t batch_max_tokens = 0
+
+    for pos in range(indices_len):
+        # At every pos we keep stats about the last complete batch [batch_start:batch_end),
+        #      and tail [batch_end:pos].
+        # 1) Every time when (batch + tail) forms a valid batch
+        #      (according to max_tokens, max_sentences and bsz_mult) we append tail to batch.
+        # 2) When (batch+tail) violates max_tokens or max_sentences constraints
+        #      we finalize running batch, and tail becomes a new batch.
+        # 3) There is a corner case when tail also violates constraints.
+        #      In that situation [batch_end:pos-1] (tail without the current pos)
+        #      gets added to the finalized batches, while [pos:pos] becomes a new tail.
+        #
+        # Important: For the sake of performance try to avoid using function calls within this loop.
+
+        tail_max_tokens = tail_max_tokens \
+                            if tail_max_tokens > num_tokens_view[pos] \
+                            else num_tokens_view[pos]
+        new_batch_end = pos + 1
+        new_batch_max_tokens = batch_max_tokens \
+                                if batch_max_tokens > tail_max_tokens \
+                                else tail_max_tokens
+        new_batch_sentences = new_batch_end - batch_start
+        new_batch_num_tokens = new_batch_sentences * new_batch_max_tokens
+
+        overflow = (new_batch_sentences > max_sentences > 0 or
+                    new_batch_num_tokens > max_tokens > 0)
+        size_matches_with_bsz_mult = (new_batch_sentences < bsz_mult or
+                                      new_batch_sentences % bsz_mult == 0)
+
+        if overflow:
+            tail_num_tokens = tail_max_tokens * \
+                    (new_batch_end - batches_ends_view[batches_count])
+            tail_overflow = tail_num_tokens > max_tokens > 0
+            # In case of a tail overflow finalize two batches
+            if tail_overflow:
+                batches_count += 1
+                batches_ends_view[batches_count] = pos
+                tail_max_tokens = num_tokens_view[pos]
+            batch_start = batches_ends_view[batches_count]
+            batches_count += 1
+            new_batch_max_tokens = tail_max_tokens
+
+        if overflow or size_matches_with_bsz_mult:
+            batches_ends_view[batches_count] = new_batch_end
+            batch_max_tokens = new_batch_max_tokens
+            tail_max_tokens = 0
+    if batches_ends_view[batches_count] != indices_len:
+        batches_count += 1
+    # Memory and time-efficient split
+    return np.split(indices, batches_ends[:batches_count])
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef list batch_by_size_fn(
+    np.ndarray[DTYPE_t, ndim=1] indices,
+    num_tokens_fn,
+    int64_t max_tokens,
+    int64_t max_sentences,
+    int32_t bsz_mult,
+):
+    cdef int32_t indices_len = indices.shape[0]
+    cdef np.ndarray[int64_t, ndim=1] num_tokens_vec = np.zeros(indices_len,
+                                                               dtype=np.int64)
+    cdef DTYPE_t[:] indices_view = indices
+    cdef DTYPE_t[:] num_tokens_vec_view = num_tokens_vec
+    cdef int64_t pos
+    for pos in range(indices_len):
+        num_tokens_vec[pos] = num_tokens_fn(indices_view[pos])
+    return batch_by_size_vec(indices, num_tokens_vec, max_tokens,
+        max_sentences, bsz_mult,)
+
+
+cdef _find_valid_shape(
+    DTYPE_t[:, :] shapes_view,
+    int64_t num_sentences,
+    int64_t num_tokens,
+):
+    """Return index of first valid shape of -1 if none is found."""
+    for i in range(shapes_view.shape[0]):
+        if num_sentences <= shapes_view[i][0] and num_tokens <= shapes_view[i][1]:
+            return i
+    return -1
+
+
+@cython.cdivision(True)
+cpdef list batch_fixed_shapes_fast(
+    np.ndarray[DTYPE_t, ndim=1] indices,
+    num_tokens_fn,
+    np.ndarray[DTYPE_t, ndim=2] fixed_shapes_sorted,
+):
+    cdef int64_t sample_len = 0
+    cdef list sample_lens = []
+    cdef list batch = []
+    cdef list batches = []
+    cdef int64_t mod_len
+    cdef int64_t i
+    cdef int64_t idx
+    cdef int64_t num_tokens
+    cdef DTYPE_t[:] indices_view = indices
+    cdef DTYPE_t[:, :] shapes_view = fixed_shapes_sorted
+
+    for i in range(len(indices_view)):
+        idx = indices_view[i]
+        num_tokens = num_tokens_fn(idx)
+        sample_lens.append(num_tokens)
+        sample_len = max(sample_len, num_tokens)
+
+        shape_idx = _find_valid_shape(shapes_view, len(batch) + 1, sample_len)
+        if shape_idx == -1:
+            batches.append(batch)
+            batch = []
+            sample_lens = []
+            sample_len = 0
+            shapes_view = fixed_shapes_sorted
+        elif shape_idx > 0:
+            # small optimization for the next call to _find_valid_shape
+            shapes_view = shapes_view[shape_idx:]
+
+        batch.append(idx)
+
+    if len(batch) > 0:
+        batches.append(batch)
+
+    return batches
diff --git a/fairseq/fairseq/data/denoising_dataset.py b/fairseq/fairseq/data/denoising_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdb62c8d5db9c8755c72db4d0d8083c936f18dc8
--- /dev/null
+++ b/fairseq/fairseq/data/denoising_dataset.py
@@ -0,0 +1,436 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import numpy as np
+import torch
+
+from . import FairseqDataset, data_utils
+
+
+def collate(
+    samples,
+    pad_idx,
+    eos_idx,
+    vocab,
+    left_pad_source=False,
+    left_pad_target=False,
+    input_feeding=True,
+    pad_to_length=None,
+):
+    assert input_feeding
+    if len(samples) == 0:
+        return {}
+
+    def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None):
+        return data_utils.collate_tokens(
+            [s[key] for s in samples],
+            pad_idx,
+            eos_idx=None,  # use eos_idx of each sample instead of vocab.eos()
+            left_pad=left_pad,
+            move_eos_to_beginning=move_eos_to_beginning,
+            pad_to_length=pad_to_length,
+        )
+
+    id = torch.LongTensor([s["id"] for s in samples])
+    src_tokens = merge(
+        "source",
+        left_pad=left_pad_source,
+        pad_to_length=pad_to_length["source"] if pad_to_length is not None else None,
+    )
+    # sort by descending source length
+    src_lengths = torch.LongTensor([s["source"].numel() for s in samples])
+    src_lengths, sort_order = src_lengths.sort(descending=True)
+    id = id.index_select(0, sort_order)
+    src_tokens = src_tokens.index_select(0, sort_order)
+
+    prev_output_tokens = None
+    target = None
+    if samples[0].get("target", None) is not None:
+        target = merge(
+            "target",
+            left_pad=left_pad_target,
+            pad_to_length=pad_to_length["target"]
+            if pad_to_length is not None
+            else None,
+        )
+        target = target.index_select(0, sort_order)
+        ntokens = sum(len(s["target"]) for s in samples)
+
+        if input_feeding:
+            # we create a shifted version of targets for feeding the
+            # previous output token(s) into the next decoder step
+            prev_output_tokens = merge(
+                "target",
+                left_pad=left_pad_target,
+                move_eos_to_beginning=True,
+                pad_to_length=pad_to_length["target"]
+                if pad_to_length is not None
+                else None,
+            )
+            prev_output_tokens = prev_output_tokens.index_select(0, sort_order)
+    else:
+        ntokens = sum(len(s["source"]) for s in samples)
+
+    batch = {
+        "id": id,
+        "ntokens": ntokens,
+        "net_input": {
+            "src_tokens": src_tokens,
+            "src_lengths": src_lengths,
+        },
+        "target": target,
+        "nsentences": samples[0]["source"].size(0),
+        "sort_order": sort_order,
+    }
+    if prev_output_tokens is not None:
+        batch["net_input"]["prev_output_tokens"] = prev_output_tokens
+
+    return batch
+
+
+class DenoisingDataset(FairseqDataset):
+    """
+    A wrapper around TokenBlockDataset for BART dataset.
+
+    Args:
+        dataset (TokenBlockDataset): dataset to wrap
+        sizes (List[int]): sentence lengths
+        vocab (~fairseq.data.Dictionary): vocabulary
+        mask_idx (int): dictionary index used for masked token
+        mask_whole_words: only mask whole words. This should be a byte mask
+            over vocab indices, indicating whether it is the beginning of a
+            word. We will extend any mask to encompass the whole word.
+        shuffle (bool, optional): shuffle the elements before batching.
+          Default: ``True``
+        seed: Seed for random number generator for reproducibility.
+        args: argparse arguments.
+    """
+
+    def __init__(
+        self,
+        dataset,
+        sizes,
+        vocab,
+        mask_idx,
+        mask_whole_words,
+        shuffle,
+        seed,
+        args,
+        eos=None,
+        item_transform_func=None,
+    ):
+        self.dataset = dataset
+
+        self.sizes = sizes
+
+        self.vocab = vocab
+        self.shuffle = shuffle
+        self.seed = seed
+        self.mask_idx = mask_idx
+        self.mask_whole_word = mask_whole_words
+        self.mask_ratio = args.mask
+        self.random_ratio = args.mask_random
+        self.insert_ratio = args.insert
+        self.rotate_ratio = args.rotate
+        self.permute_sentence_ratio = args.permute_sentences
+        self.eos = eos if eos is not None else vocab.eos()
+        self.item_transform_func = item_transform_func
+
+        if args.bpe != "gpt2":
+            self.full_stop_index = self.vocab.eos()
+        else:
+            assert args.bpe == "gpt2"
+            self.full_stop_index = self.vocab.index("13")
+
+        self.replace_length = args.replace_length
+        if self.replace_length not in [-1, 0, 1]:
+            raise ValueError(f"invalid arg: replace_length={self.replace_length}")
+        if args.mask_length not in ["subword", "word", "span-poisson"]:
+            raise ValueError(f"invalid arg: mask-length={args.mask_length}")
+        if args.mask_length == "subword" and args.replace_length not in [0, 1]:
+            raise ValueError(f"if using subwords, use replace-length=1 or 0")
+
+        self.mask_span_distribution = None
+        if args.mask_length == "span-poisson":
+            _lambda = args.poisson_lambda
+
+            lambda_to_the_k = 1
+            e_to_the_minus_lambda = math.exp(-_lambda)
+            k_factorial = 1
+            ps = []
+            for k in range(0, 128):
+                ps.append(e_to_the_minus_lambda * lambda_to_the_k / k_factorial)
+                lambda_to_the_k *= _lambda
+                k_factorial *= k + 1
+                if ps[-1] < 0.0000001:
+                    break
+            ps = torch.FloatTensor(ps)
+            self.mask_span_distribution = torch.distributions.Categorical(ps)
+
+        self.epoch = 0
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return True  # only the noise changes, not item sizes
+
+    def set_epoch(self, epoch, **unused):
+        self.epoch = epoch
+
+    def __getitem__(self, index):
+        with data_utils.numpy_seed(self.seed, self.epoch, index):
+            tokens = self.dataset[index]
+            assert tokens[-1] == self.eos
+            source, target = tokens, tokens.clone()
+
+            if self.permute_sentence_ratio > 0.0:
+                source = self.permute_sentences(source, self.permute_sentence_ratio)
+
+            if self.mask_ratio > 0:
+                source = self.add_whole_word_mask(source, self.mask_ratio)
+
+            if self.insert_ratio > 0:
+                source = self.add_insertion_noise(source, self.insert_ratio)
+
+            if self.rotate_ratio > 0.0 and np.random.random() < self.rotate_ratio:
+                source = self.add_rolling_noise(source)
+        # there can additional changes to make:
+        if self.item_transform_func is not None:
+            source, target = self.item_transform_func(source, target)
+
+        assert (source >= 0).all()
+        assert (source[1:-1] >= 1).all()
+        assert (source <= len(self.vocab)).all()
+        assert source[0] == self.vocab.bos()
+        assert source[-1] == self.eos
+        return {
+            "id": index,
+            "source": source,
+            "target": target,
+        }
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def permute_sentences(self, source, p=1.0):
+        full_stops = source == self.full_stop_index
+        # Pretend it ends with a full stop so last span is a sentence
+        full_stops[-2] = 1
+
+        # Tokens that are full stops, where the previous token is not
+        sentence_ends = (full_stops[1:] * ~full_stops[:-1]).nonzero(as_tuple=False) + 2
+        result = source.clone()
+
+        num_sentences = sentence_ends.size(0)
+        num_to_permute = math.ceil((num_sentences * 2 * p) / 2.0)
+        substitutions = torch.randperm(num_sentences)[:num_to_permute]
+        ordering = torch.arange(0, num_sentences)
+        ordering[substitutions] = substitutions[torch.randperm(num_to_permute)]
+
+        # Ignore <bos> at start
+        index = 1
+        for i in ordering:
+            sentence = source[(sentence_ends[i - 1] if i > 0 else 1) : sentence_ends[i]]
+            result[index : index + sentence.size(0)] = sentence
+            index += sentence.size(0)
+        return result
+
+    def word_starts(self, source):
+        if self.mask_whole_word is not None:
+            is_word_start = self.mask_whole_word.gather(0, source)
+        else:
+            is_word_start = torch.ones(source.size())
+        is_word_start[0] = 0
+        is_word_start[-1] = 0
+        return is_word_start
+
+    def add_whole_word_mask(self, source, p):
+        is_word_start = self.word_starts(source)
+        num_to_mask = int(math.ceil(is_word_start.float().sum() * p))
+        num_inserts = 0
+        if num_to_mask == 0:
+            return source
+
+        if self.mask_span_distribution is not None:
+            lengths = self.mask_span_distribution.sample(sample_shape=(num_to_mask,))
+
+            # Make sure we have enough to mask
+            cum_length = torch.cumsum(lengths, 0)
+            while cum_length[-1] < num_to_mask:
+                lengths = torch.cat(
+                    [
+                        lengths,
+                        self.mask_span_distribution.sample(sample_shape=(num_to_mask,)),
+                    ],
+                    dim=0,
+                )
+                cum_length = torch.cumsum(lengths, 0)
+
+            # Trim to masking budget
+            i = 0
+            while cum_length[i] < num_to_mask:
+                i += 1
+            lengths[i] = num_to_mask - (0 if i == 0 else cum_length[i - 1])
+            num_to_mask = i + 1
+            lengths = lengths[:num_to_mask]
+
+            # Handle 0-length mask (inserts) separately
+            lengths = lengths[lengths > 0]
+            num_inserts = num_to_mask - lengths.size(0)
+            num_to_mask -= num_inserts
+            if num_to_mask == 0:
+                return self.add_insertion_noise(source, num_inserts / source.size(0))
+
+            assert (lengths > 0).all()
+        else:
+            lengths = torch.ones((num_to_mask,)).long()
+        assert is_word_start[-1] == 0
+        word_starts = is_word_start.nonzero(as_tuple=False)
+        indices = word_starts[
+            torch.randperm(word_starts.size(0))[:num_to_mask]
+        ].squeeze(1)
+        mask_random = torch.FloatTensor(num_to_mask).uniform_() < self.random_ratio
+
+        source_length = source.size(0)
+        assert source_length - 1 not in indices
+        to_keep = torch.ones(source_length, dtype=torch.bool)
+        is_word_start[
+            -1
+        ] = 255  # acts as a long length, so spans don't go over the end of doc
+        if self.replace_length == 0:
+            to_keep[indices] = 0
+        else:
+            # keep index, but replace it with [MASK]
+            source[indices] = self.mask_idx
+            source[indices[mask_random]] = torch.randint(
+                1, len(self.vocab), size=(mask_random.sum(),)
+            )
+
+        if self.mask_span_distribution is not None:
+            assert len(lengths.size()) == 1
+            assert lengths.size() == indices.size()
+            lengths -= 1
+            while indices.size(0) > 0:
+                assert lengths.size() == indices.size()
+                lengths -= is_word_start[indices + 1].long()
+                uncompleted = lengths >= 0
+                indices = indices[uncompleted] + 1
+                mask_random = mask_random[uncompleted]
+                lengths = lengths[uncompleted]
+                if self.replace_length != -1:
+                    # delete token
+                    to_keep[indices] = 0
+                else:
+                    # keep index, but replace it with [MASK]
+                    source[indices] = self.mask_idx
+                    source[indices[mask_random]] = torch.randint(
+                        1, len(self.vocab), size=(mask_random.sum(),)
+                    )
+        else:
+            # A bit faster when all lengths are 1
+            while indices.size(0) > 0:
+                uncompleted = is_word_start[indices + 1] == 0
+                indices = indices[uncompleted] + 1
+                mask_random = mask_random[uncompleted]
+                if self.replace_length != -1:
+                    # delete token
+                    to_keep[indices] = 0
+                else:
+                    # keep index, but replace it with [MASK]
+                    source[indices] = self.mask_idx
+                    source[indices[mask_random]] = torch.randint(
+                        1, len(self.vocab), size=(mask_random.sum(),)
+                    )
+
+                assert source_length - 1 not in indices
+
+        source = source[to_keep]
+
+        if num_inserts > 0:
+            source = self.add_insertion_noise(source, num_inserts / source.size(0))
+
+        return source
+
+    def add_permuted_noise(self, tokens, p):
+        num_words = len(tokens)
+        num_to_permute = math.ceil(((num_words * 2) * p) / 2.0)
+        substitutions = torch.randperm(num_words - 2)[:num_to_permute] + 1
+        tokens[substitutions] = tokens[substitutions[torch.randperm(num_to_permute)]]
+        return tokens
+
+    def add_rolling_noise(self, tokens):
+        offset = np.random.randint(1, max(1, tokens.size(-1) - 1) + 1)
+        tokens = torch.cat(
+            (tokens[0:1], tokens[offset:-1], tokens[1:offset], tokens[-1:]),
+            dim=0,
+        )
+        return tokens
+
+    def add_insertion_noise(self, tokens, p):
+        if p == 0.0:
+            return tokens
+
+        num_tokens = len(tokens)
+        n = int(math.ceil(num_tokens * p))
+
+        noise_indices = torch.randperm(num_tokens + n - 2)[:n] + 1
+        noise_mask = torch.zeros(size=(num_tokens + n,), dtype=torch.bool)
+        noise_mask[noise_indices] = 1
+        result = torch.LongTensor(n + len(tokens)).fill_(-1)
+
+        num_random = int(math.ceil(n * self.random_ratio))
+        result[noise_indices[num_random:]] = self.mask_idx
+        result[noise_indices[:num_random]] = torch.randint(
+            low=1, high=len(self.vocab), size=(num_random,)
+        )
+
+        result[~noise_mask] = tokens
+
+        assert (result >= 0).all()
+        return result
+
+    def collater(self, samples, pad_to_length=None):
+        """Merge a list of samples to form a mini-batch.
+        Args:
+            samples (List[dict]): samples to collate
+        Returns:
+            dict: a mini-batch of data
+        """
+        return collate(
+            samples, self.vocab.pad(), self.eos, self.vocab, pad_to_length=pad_to_length
+        )
+
+    def num_tokens(self, index):
+        """Return the number of tokens in a sample. This value is used to
+        enforce ``--max-tokens`` during batching."""
+        return self.sizes[index]
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        return self.sizes[index]
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.shuffle:
+            indices = np.random.permutation(len(self))
+        else:
+            indices = np.arange(len(self))
+        return indices[np.argsort(self.sizes[indices], kind="mergesort")]
+
+    def prefetch(self, indices):
+        self.src.prefetch(indices)
+        self.tgt.prefetch(indices)
+
+    @property
+    def supports_prefetch(self):
+        return (
+            hasattr(self.src, "supports_prefetch")
+            and self.src.supports_prefetch
+            and hasattr(self.tgt, "supports_prefetch")
+            and self.tgt.supports_prefetch
+        )
diff --git a/fairseq/fairseq/data/dictionary.py b/fairseq/fairseq/data/dictionary.py
new file mode 100644
index 0000000000000000000000000000000000000000..85d2374666d93dd6ccefd34305a5ebb29122e9bc
--- /dev/null
+++ b/fairseq/fairseq/data/dictionary.py
@@ -0,0 +1,401 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from collections import Counter
+from multiprocessing import Pool
+
+import torch
+from fairseq import utils
+from fairseq.data import data_utils
+from fairseq.file_chunker_utils import Chunker, find_offsets
+from fairseq.file_io import PathManager
+from fairseq.tokenizer import tokenize_line
+
+
+class Dictionary:
+    """A mapping from symbols to consecutive integers"""
+
+    def __init__(
+        self,
+        *,  # begin keyword-only arguments
+        bos="<s>",
+        pad="<pad>",
+        eos="</s>",
+        unk="<separator>",
+        extra_special_symbols=None,
+    ):
+        self.bos_word, self.unk_word, self.pad_word, self.eos_word = bos, unk, pad, eos
+        self.symbols = []
+        self.count = []
+        self.indices = {}
+        self.bos_index = self.add_symbol(bos)
+        self.pad_index = self.add_symbol(pad)
+        self.eos_index = self.add_symbol(eos)
+        self.unk_index = self.add_symbol(unk)
+        if extra_special_symbols:
+            for s in extra_special_symbols:
+                self.add_symbol(s)
+        self.nspecial = len(self.symbols)
+
+    def __eq__(self, other):
+        return self.indices == other.indices
+
+    def __getitem__(self, idx):
+        if idx < len(self.symbols):
+            return self.symbols[idx]
+        return self.unk_word
+
+    def get_count(self, idx):
+        return self.count[idx]
+
+    def __len__(self):
+        """Returns the number of symbols in the dictionary"""
+        return len(self.symbols)
+
+    def __contains__(self, sym):
+        return sym in self.indices
+
+    def index(self, sym):
+        """Returns the index of the specified symbol"""
+        assert isinstance(sym, str)
+        if sym in self.indices:
+            return self.indices[sym]
+        return self.unk_index
+
+    def string(
+        self,
+        tensor,
+        bpe_symbol=None,
+        escape_unk=False,
+        extra_symbols_to_ignore=None,
+        unk_string=None,
+        include_eos=False,
+        separator=" ",
+    ):
+        """Helper for converting a tensor of token indices to a string.
+
+        Can optionally remove BPE symbols or escape <unk> words.
+        """
+        if torch.is_tensor(tensor) and tensor.dim() == 2:
+            return "\n".join(
+                self.string(
+                    t,
+                    bpe_symbol,
+                    escape_unk,
+                    extra_symbols_to_ignore,
+                    include_eos=include_eos,
+                )
+                for t in tensor
+            )
+
+        extra_symbols_to_ignore = set(extra_symbols_to_ignore or [])
+        if not include_eos:
+            extra_symbols_to_ignore.add(self.eos())
+
+        def token_string(i):
+            if i == self.unk():
+                if unk_string is not None:
+                    return unk_string
+                else:
+                    return self.unk_string(escape_unk)
+            else:
+                return self[i]
+
+        if hasattr(self, "bos_index"):
+            extra_symbols_to_ignore.add(self.bos())
+
+        sent = separator.join(
+            token_string(i)
+            for i in tensor
+            if utils.item(i) not in extra_symbols_to_ignore
+        )
+
+        return data_utils.post_process(sent, bpe_symbol)
+
+    def unk_string(self, escape=False):
+        """Return unknown string, optionally escaped as: <<unk>>"""
+        if escape:
+            return "<{}>".format(self.unk_word)
+        else:
+            return self.unk_word
+
+    def add_symbol(self, word, n=1, overwrite=False):
+        """Adds a word to the dictionary"""
+        if word in self.indices and not overwrite:
+            idx = self.indices[word]
+            self.count[idx] = self.count[idx] + n
+            return idx
+        else:
+            idx = len(self.symbols)
+            self.indices[word] = idx
+            self.symbols.append(word)
+            self.count.append(n)
+            return idx
+
+    def update(self, new_dict):
+        """Updates counts from new dictionary."""
+        for word in new_dict.symbols:
+            idx2 = new_dict.indices[word]
+            if word in self.indices:
+                idx = self.indices[word]
+                self.count[idx] = self.count[idx] + new_dict.count[idx2]
+            else:
+                idx = len(self.symbols)
+                self.indices[word] = idx
+                self.symbols.append(word)
+                self.count.append(new_dict.count[idx2])
+
+    def finalize(self, threshold=-1, nwords=-1, padding_factor=8):
+        """Sort symbols by frequency in descending order, ignoring special ones.
+
+        Args:
+            - threshold defines the minimum word count
+            - nwords defines the total number of words in the final dictionary,
+                including special symbols
+            - padding_factor can be used to pad the dictionary size to be a
+                multiple of 8, which is important on some hardware (e.g., Nvidia
+                Tensor Cores).
+        """
+        if nwords <= 0:
+            nwords = len(self)
+
+        new_indices = dict(zip(self.symbols[: self.nspecial], range(self.nspecial)))
+        new_symbols = self.symbols[: self.nspecial]
+        new_count = self.count[: self.nspecial]
+
+        c = Counter(
+            dict(
+                sorted(zip(self.symbols[self.nspecial :], self.count[self.nspecial :]))
+            )
+        )
+        for symbol, count in c.most_common(nwords - self.nspecial):
+            if count >= threshold:
+                new_indices[symbol] = len(new_symbols)
+                new_symbols.append(symbol)
+                new_count.append(count)
+            else:
+                break
+
+        assert len(new_symbols) == len(new_indices)
+
+        self.count = list(new_count)
+        self.symbols = list(new_symbols)
+        self.indices = new_indices
+
+        self.pad_to_multiple_(padding_factor)
+
+    def pad_to_multiple_(self, padding_factor):
+        """Pad Dictionary size to be a multiple of *padding_factor*."""
+        if padding_factor > 1:
+            i = 0
+            while len(self) % padding_factor != 0:
+                symbol = "madeupword{:04d}".format(i)
+                self.add_symbol(symbol, n=0)
+                i += 1
+
+    def bos(self):
+        """Helper to get index of beginning-of-sentence symbol"""
+        return self.bos_index
+
+    def pad(self):
+        """Helper to get index of pad symbol"""
+        return self.pad_index
+
+    def eos(self):
+        """Helper to get index of end-of-sentence symbol"""
+        return self.eos_index
+
+    def unk(self):
+        """Helper to get index of unk symbol"""
+        return self.unk_index
+
+    @classmethod
+    def load(cls, f):
+        """Loads the dictionary from a text file with the format:
+
+        ```
+        <symbol0> <count0>
+        <symbol1> <count1>
+        ...
+        ```
+        """
+        d = cls()
+        d.add_from_file(f)
+        return d
+
+    def add_from_file(self, f):
+        """
+        Loads a pre-existing dictionary from a text file and adds its symbols
+        to this instance.
+        """
+        if isinstance(f, str):
+            try:
+                with open(PathManager.get_local_path(f), "r", encoding="utf-8") as fd:
+                    self.add_from_file(fd)
+            except FileNotFoundError as fnfe:
+                raise fnfe
+            except UnicodeError:
+                raise Exception(
+                    "Incorrect encoding detected in {}, please "
+                    "rebuild the dataset".format(f)
+                )
+            return
+
+        lines = f.readlines()
+        indices_start_line = self._load_meta(lines)
+
+        for line in lines[indices_start_line:]:
+            try:
+                line, field = line.rstrip().rsplit(" ", 1)
+                if field == "#fairseq:overwrite":
+                    overwrite = True
+                    line, field = line.rsplit(" ", 1)
+                else:
+                    overwrite = False
+                count = int(field)
+                word = line
+                if word in self and not overwrite:
+                    raise RuntimeError(
+                        "Duplicate word found when loading Dictionary: '{}'. "
+                        "Duplicate words can overwrite earlier ones by adding the "
+                        "#fairseq:overwrite flag at the end of the corresponding row "
+                        "in the dictionary file. If using the Camembert model, please "
+                        "download an updated copy of the model file.".format(word)
+                    )
+                self.add_symbol(word, n=count, overwrite=overwrite)
+            except ValueError:
+                raise ValueError(
+                    f"Incorrect dictionary format, expected '<token> <cnt> [flags]': \"{line}\""
+                )
+
+    def _save(self, f, kv_iterator):
+        if isinstance(f, str):
+            PathManager.mkdirs(os.path.dirname(f))
+            with PathManager.open(f, "w", encoding="utf-8") as fd:
+                return self.save(fd)
+        for k, v in kv_iterator:
+            print("{} {}".format(k, v), file=f)
+
+    def _get_meta(self):
+        return [], []
+
+    def _load_meta(self, lines):
+        return 0
+
+    def save(self, f):
+        """Stores dictionary into a text file"""
+        ex_keys, ex_vals = self._get_meta()
+        self._save(
+            f,
+            zip(
+                ex_keys + self.symbols[self.nspecial :],
+                ex_vals + self.count[self.nspecial :],
+            ),
+        )
+
+    def dummy_sentence(self, length):
+        t = torch.Tensor(length).uniform_(self.nspecial + 1, len(self)).long()
+        t[-1] = self.eos()
+        return t
+
+    def encode_line(
+        self,
+        line,
+        line_tokenizer=tokenize_line,
+        add_if_not_exist=True,
+        consumer=None,
+        append_eos=True,
+        reverse_order=False,
+    ) -> torch.IntTensor:
+        words = line_tokenizer(line)
+        if reverse_order:
+            words = list(reversed(words))
+        nwords = len(words)
+        ids = torch.IntTensor(nwords + 1 if append_eos else nwords)
+
+        for i, word in enumerate(words):
+            if add_if_not_exist:
+                idx = self.add_symbol(word)
+            else:
+                idx = self.index(word)
+            if consumer is not None:
+                consumer(word, idx)
+            ids[i] = idx
+        if append_eos:
+            ids[nwords] = self.eos_index
+        return ids
+
+    @staticmethod
+    def _add_file_to_dictionary_single_worker(
+        filename,
+        tokenize,
+        eos_word,
+        start_offset,
+        end_offset,
+    ):
+        counter = Counter()
+        with Chunker(filename, start_offset, end_offset) as line_iterator:
+            for line in line_iterator:
+                for word in tokenize(line):
+                    counter.update([word])
+                counter.update([eos_word])
+        return counter
+
+    @staticmethod
+    def add_file_to_dictionary(filename, dict, tokenize, num_workers):
+        def merge_result(counter):
+            for w, c in sorted(counter.items()):
+                dict.add_symbol(w, c)
+
+        local_file = PathManager.get_local_path(filename)
+        offsets = find_offsets(local_file, num_workers)
+        if num_workers > 1:
+            chunks = zip(offsets, offsets[1:])
+            pool = Pool(processes=num_workers)
+            results = []
+            for (start_offset, end_offset) in chunks:
+                results.append(
+                    pool.apply_async(
+                        Dictionary._add_file_to_dictionary_single_worker,
+                        (
+                            local_file,
+                            tokenize,
+                            dict.eos_word,
+                            start_offset,
+                            end_offset,
+                        ),
+                    )
+                )
+            pool.close()
+            pool.join()
+            for r in results:
+                merge_result(r.get())
+        else:
+            merge_result(
+                Dictionary._add_file_to_dictionary_single_worker(
+                    local_file, tokenize, dict.eos_word, offsets[0], offsets[1]
+                )
+            )
+
+
+class TruncatedDictionary(object):
+    def __init__(self, wrapped_dict, length):
+        self.__class__ = type(
+            wrapped_dict.__class__.__name__,
+            (self.__class__, wrapped_dict.__class__),
+            {},
+        )
+        self.__dict__ = wrapped_dict.__dict__
+        self.wrapped_dict = wrapped_dict
+        self.length = min(len(self.wrapped_dict), length)
+
+    def __len__(self):
+        return self.length
+
+    def __getitem__(self, i):
+        if i < self.length:
+            return self.wrapped_dict[i]
+        return self.wrapped_dict.unk()
diff --git a/fairseq/fairseq/data/encoders/__init__.py b/fairseq/fairseq/data/encoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cbe00a10520331709441e5e77991bd2edca8c06
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/__init__.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import importlib
+import os
+
+from fairseq import registry
+
+
+build_tokenizer, register_tokenizer, TOKENIZER_REGISTRY, _ = registry.setup_registry(
+    "--tokenizer",
+    default=None,
+)
+
+
+build_bpe, register_bpe, BPE_REGISTRY, _ = registry.setup_registry(
+    "--bpe",
+    default=None,
+)
+
+
+# automatically import any Python files in the encoders/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        module = file[: file.find(".py")]
+        importlib.import_module("fairseq.data.encoders." + module)
diff --git a/fairseq/fairseq/data/encoders/byte_bpe.py b/fairseq/fairseq/data/encoders/byte_bpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..31e3a0627827f19ca7f0b58da45e46d40a80c3bf
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/byte_bpe.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from dataclasses import dataclass, field
+
+from fairseq import file_utils
+from fairseq.data.encoders import register_bpe
+from fairseq.data.encoders.byte_utils import (
+    SPACE,
+    SPACE_ESCAPE,
+    byte_encode,
+    smart_byte_decode,
+)
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class ByteBpeConfig(FairseqDataclass):
+    sentencepiece_model_path: str = field(
+        default="???", metadata={"help": "path to sentencepiece model"}
+    )
+
+
+@register_bpe("byte_bpe", dataclass=ByteBpeConfig)
+class ByteBPE(object):
+    def __init__(self, cfg):
+        vocab = file_utils.cached_path(cfg.sentencepiece_model_path)
+        try:
+            import sentencepiece as spm
+
+            self.sp = spm.SentencePieceProcessor()
+            self.sp.Load(vocab)
+        except ImportError:
+            raise ImportError(
+                "Please install sentencepiece with: pip install sentencepiece"
+            )
+
+    def encode(self, x: str) -> str:
+        byte_encoded = byte_encode(x)
+        return SPACE.join(self.sp.EncodeAsPieces(byte_encoded))
+
+    @staticmethod
+    def decode(x: str) -> str:
+        unescaped = x.replace(SPACE, "").replace(SPACE_ESCAPE, SPACE)
+        return smart_byte_decode(unescaped)
diff --git a/fairseq/fairseq/data/encoders/byte_utils.py b/fairseq/fairseq/data/encoders/byte_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a305c080926c2d094b7e8ae48f5331da82025a75
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/byte_utils.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import re
+
+
+WHITESPACE_NORMALIZER = re.compile(r"\s+")
+SPACE = chr(32)
+SPACE_ESCAPE = chr(9601)
+# excluding non-breaking space (160) here
+PRINTABLE_LATIN = set(
+    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) + list(range(174, 255 + 1))
+)
+BYTE_TO_BCHAR = {
+    b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+}
+BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
+
+
+def byte_encode(x: str) -> str:
+    normalized = WHITESPACE_NORMALIZER.sub(SPACE, x)
+    return "".join([BYTE_TO_BCHAR[b] for b in normalized.encode("utf-8")])
+
+
+def byte_decode(x: str) -> str:
+    try:
+        return bytes([BCHAR_TO_BYTE[bc] for bc in x]).decode("utf-8")
+    except ValueError:
+        return ""
+
+
+def smart_byte_decode(x: str) -> str:
+    output = byte_decode(x)
+    if output == "":
+        # DP the best recovery (max valid chars) if it's broken
+        n_bytes = len(x)
+        f = [0 for _ in range(n_bytes + 1)]
+        pt = [0 for _ in range(n_bytes + 1)]
+        for i in range(1, n_bytes + 1):
+            f[i], pt[i] = f[i - 1], i - 1
+            for j in range(1, min(4, i) + 1):
+                if f[i - j] + 1 > f[i] and len(byte_decode(x[i - j : i])) > 0:
+                    f[i], pt[i] = f[i - j] + 1, i - j
+        cur_pt = n_bytes
+        while cur_pt > 0:
+            if f[cur_pt] == f[pt[cur_pt]] + 1:
+                output = byte_decode(x[pt[cur_pt] : cur_pt]) + output
+            cur_pt = pt[cur_pt]
+    return output
diff --git a/fairseq/fairseq/data/encoders/bytes.py b/fairseq/fairseq/data/encoders/bytes.py
new file mode 100644
index 0000000000000000000000000000000000000000..f88f8f6929f5b6bdb0db470be9ebedf8fe1f752d
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/bytes.py
@@ -0,0 +1,34 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from fairseq.data.encoders import register_bpe
+from fairseq.data.encoders.byte_utils import (
+    SPACE,
+    SPACE_ESCAPE,
+    byte_encode,
+    smart_byte_decode,
+)
+
+
+@register_bpe("bytes")
+class Bytes(object):
+    def __init__(self, *unused):
+        pass
+
+    @staticmethod
+    def add_args(parser):
+        pass
+
+    @staticmethod
+    def encode(x: str) -> str:
+        encoded = byte_encode(x)
+        escaped = encoded.replace(SPACE, SPACE_ESCAPE)
+        return SPACE.join(list(escaped))
+
+    @staticmethod
+    def decode(x: str) -> str:
+        unescaped = x.replace(SPACE, "").replace(SPACE_ESCAPE, SPACE)
+        return smart_byte_decode(unescaped)
diff --git a/fairseq/fairseq/data/encoders/characters.py b/fairseq/fairseq/data/encoders/characters.py
new file mode 100644
index 0000000000000000000000000000000000000000..494ea219392716dc75d2c1e19d71cd55b9b2f4ba
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/characters.py
@@ -0,0 +1,30 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from fairseq.data.encoders import register_bpe
+
+
+SPACE = chr(32)
+SPACE_ESCAPE = chr(9601)
+
+
+@register_bpe("characters")
+class Characters(object):
+    def __init__(self, *unused):
+        pass
+
+    @staticmethod
+    def add_args(parser):
+        pass
+
+    @staticmethod
+    def encode(x: str) -> str:
+        escaped = x.replace(SPACE, SPACE_ESCAPE)
+        return SPACE.join(list(escaped))
+
+    @staticmethod
+    def decode(x: str) -> str:
+        return x.replace(SPACE, "").replace(SPACE_ESCAPE, SPACE)
diff --git a/fairseq/fairseq/data/encoders/fastbpe.py b/fairseq/fairseq/data/encoders/fastbpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7c21039549ea002e73d1ad7cde5735f215f11ee
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/fastbpe.py
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq import file_utils
+from fairseq.data.encoders import register_bpe
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class fastBPEConfig(FairseqDataclass):
+    bpe_codes: str = field(default="???", metadata={"help": "path to fastBPE BPE"})
+
+
+@register_bpe("fastbpe", dataclass=fastBPEConfig)
+class fastBPE(object):
+    def __init__(self, cfg):
+        if cfg.bpe_codes is None:
+            raise ValueError("--bpe-codes is required for --bpe=fastbpe")
+        codes = file_utils.cached_path(cfg.bpe_codes)
+        try:
+            import fastBPE
+
+            self.bpe = fastBPE.fastBPE(codes)
+            self.bpe_symbol = "@@ "
+        except ImportError:
+            raise ImportError("Please install fastBPE with: pip install fastBPE")
+
+    def encode(self, x: str) -> str:
+        return self.bpe.apply([x])[0]
+
+    def decode(self, x: str) -> str:
+        return (x + " ").replace(self.bpe_symbol, "").rstrip()
diff --git a/fairseq/fairseq/data/encoders/gpt2_bpe.py b/fairseq/fairseq/data/encoders/gpt2_bpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7426b249bbbabd8e20bbe8ca5449809efdf85fc
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/gpt2_bpe.py
@@ -0,0 +1,45 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq import file_utils
+from fairseq.data.encoders import register_bpe
+from fairseq.dataclass import FairseqDataclass
+
+from .gpt2_bpe_utils import get_encoder
+
+
+DEFAULT_ENCODER_JSON = "https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json"
+DEFAULT_VOCAB_BPE = "https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe"
+
+
+@dataclass
+class GPT2BPEConfig(FairseqDataclass):
+    gpt2_encoder_json: str = field(
+        default=DEFAULT_ENCODER_JSON, metadata={"help": "path to encoder.json"}
+    )
+    gpt2_vocab_bpe: str = field(
+        default=DEFAULT_VOCAB_BPE, metadata={"help": "path to vocab.bpe"}
+    )
+
+
+@register_bpe("gpt2", dataclass=GPT2BPEConfig)
+class GPT2BPE(object):
+    def __init__(self, cfg):
+        encoder_json = file_utils.cached_path(cfg.gpt2_encoder_json)
+        vocab_bpe = file_utils.cached_path(cfg.gpt2_vocab_bpe)
+        self.bpe = get_encoder(encoder_json, vocab_bpe)
+
+    def encode(self, x: str) -> str:
+        return " ".join(map(str, self.bpe.encode(x)))
+
+    def decode(self, x: str) -> str:
+        return self.bpe.decode(
+            [int(tok) if tok not in {"<unk>", "<mask>"} and not tok.startswith('<') else tok for tok in x.split()]
+        )
+
+    def is_beginning_of_word(self, x: str) -> bool:
+        return self.decode(x).startswith(" ")
diff --git a/fairseq/fairseq/data/encoders/gpt2_bpe_utils.py b/fairseq/fairseq/data/encoders/gpt2_bpe_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..688d4e36e358df2dcc432d37d3e57bd81e2f1ed1
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/gpt2_bpe_utils.py
@@ -0,0 +1,140 @@
+"""
+Byte pair encoding utilities from GPT-2.
+
+Original source: https://github.com/openai/gpt-2/blob/master/src/encoder.py
+Original license: MIT
+"""
+
+import json
+from functools import lru_cache
+
+
+@lru_cache()
+def bytes_to_unicode():
+    """
+    Returns list of utf-8 byte and a corresponding list of unicode strings.
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    And avoids mapping to whitespace/control characters the bpe code barfs on.
+    """
+    bs = (
+        list(range(ord("!"), ord("~") + 1))
+        + list(range(ord("¡"), ord("¬") + 1))
+        + list(range(ord("®"), ord("ÿ") + 1))
+    )
+    cs = bs[:]
+    n = 0
+    for b in range(2 ** 8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2 ** 8 + n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
+
+
+def get_pairs(word):
+    """Return set of symbol pairs in a word.
+    Word is represented as tuple of symbols (symbols being variable-length strings).
+    """
+    pairs = set()
+    prev_char = word[0]
+    for char in word[1:]:
+        pairs.add((prev_char, char))
+        prev_char = char
+    return pairs
+
+
+class Encoder:
+    def __init__(self, encoder, bpe_merges, errors="replace"):
+        self.encoder = encoder
+        self.decoder = {v: k for k, v in self.encoder.items()}
+        self.errors = errors  # how to handle errors in decoding
+        self.byte_encoder = bytes_to_unicode()
+        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
+        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
+        self.cache = {}
+
+        try:
+            import regex as re
+
+            self.re = re
+        except ImportError:
+            raise ImportError("Please install regex with: pip install regex")
+
+        # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions
+        self.pat = self.re.compile(
+            r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
+        )
+
+    def bpe(self, token):
+        if token in self.cache:
+            return self.cache[token]
+        word = tuple(token)
+        pairs = get_pairs(word)
+
+        if not pairs:
+            return token
+
+        while True:
+            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
+            if bigram not in self.bpe_ranks:
+                break
+            first, second = bigram
+            new_word = []
+            i = 0
+            while i < len(word):
+                try:
+                    j = word.index(first, i)
+                    new_word.extend(word[i:j])
+                    i = j
+                except:
+                    new_word.extend(word[i:])
+                    break
+
+                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            new_word = tuple(new_word)
+            word = new_word
+            if len(word) == 1:
+                break
+            else:
+                pairs = get_pairs(word)
+        word = " ".join(word)
+        self.cache[token] = word
+        return word
+
+    def encode(self, text):
+        bpe_tokens = []
+        for token in self.re.findall(self.pat, text):
+            token = "".join(self.byte_encoder[b] for b in token.encode("utf-8"))
+            bpe_tokens.extend(
+                self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ")
+            )
+        return bpe_tokens
+
+    def decode(self, tokens):
+        text = "".join([self.decoder.get(token, token) for token in tokens])
+        text = bytearray([self.byte_decoder[c] for c in text]).decode(
+            "utf-8", errors=self.errors
+        )
+        return text
+
+
+def get_encoder(encoder_json_path, vocab_bpe_path):
+    with open(encoder_json_path, "r") as f:
+        encoder = json.load(f)
+    with open(vocab_bpe_path, "r", encoding="utf-8") as f:
+        bpe_data = f.read()
+    bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split("\n")[1:-1]]
+    return Encoder(
+        encoder=encoder,
+        bpe_merges=bpe_merges,
+    )
diff --git a/fairseq/fairseq/data/encoders/hf_bert_bpe.py b/fairseq/fairseq/data/encoders/hf_bert_bpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..a41c059343ec7e2914b2c9d2f53f526c33f9659d
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/hf_bert_bpe.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+from fairseq.data.encoders import register_bpe
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class BertBPEConfig(FairseqDataclass):
+    bpe_cased: bool = field(default=False, metadata={"help": "set for cased BPE"})
+    bpe_vocab_file: Optional[str] = field(
+        default=None, metadata={"help": "bpe vocab file"}
+    )
+
+
+@register_bpe("bert", dataclass=BertBPEConfig)
+class BertBPE(object):
+    def __init__(self, cfg):
+        try:
+            from transformers import BertTokenizer
+        except ImportError:
+            raise ImportError(
+                "Please install transformers with: pip install transformers"
+            )
+
+        if cfg.bpe_vocab_file:
+            self.bert_tokenizer = BertTokenizer(
+                cfg.bpe_vocab_file, do_lower_case=not cfg.bpe_cased
+            )
+        else:
+            vocab_file_name = (
+                "bert-base-cased" if cfg.bpe_cased else "bert-base-uncased"
+            )
+            self.bert_tokenizer = BertTokenizer.from_pretrained(vocab_file_name)
+
+    def encode(self, x: str) -> str:
+        return " ".join(self.bert_tokenizer.tokenize(x))
+
+    def decode(self, x: str) -> str:
+        return self.bert_tokenizer.clean_up_tokenization(
+            self.bert_tokenizer.convert_tokens_to_string(x.split(" "))
+        )
+
+    def is_beginning_of_word(self, x: str) -> bool:
+        return not x.startswith("##")
diff --git a/fairseq/fairseq/data/encoders/hf_byte_bpe.py b/fairseq/fairseq/data/encoders/hf_byte_bpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..c508578d41bf6b7ce0a847e0797d71b19beb393d
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/hf_byte_bpe.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq.data.encoders import register_bpe
+from fairseq.dataclass import FairseqDataclass
+from fairseq import file_utils
+
+
+@dataclass
+class HuggingFaceByteLevelBPEConfig(FairseqDataclass):
+    bpe_merges: str = field(default="???", metadata={"help": "path to merges.txt"})
+    bpe_vocab: str = field(default="???", metadata={"help": "path to vocab.json"})
+    bpe_add_prefix_space: bool = field(
+        default=False, metadata={"help": "add prefix space before encoding"}
+    )
+
+
+@register_bpe("hf_byte_bpe", dataclass=HuggingFaceByteLevelBPEConfig)
+class HuggingFaceByteLevelBPE(object):
+    def __init__(self, cfg):
+        try:
+            from tokenizers import ByteLevelBPETokenizer
+        except ImportError:
+            raise ImportError(
+                "Please install huggingface/tokenizers with: " "pip install tokenizers"
+            )
+
+        bpe_vocab = file_utils.cached_path(cfg.bpe_vocab)
+        bpe_merges = file_utils.cached_path(cfg.bpe_merges)
+
+        self.bpe = ByteLevelBPETokenizer(
+            bpe_vocab,
+            bpe_merges,
+            add_prefix_space=cfg.bpe_add_prefix_space,
+        )
+
+    def encode(self, x: str) -> str:
+        return " ".join(map(str, self.bpe.encode(x).ids))
+
+    def decode(self, x: str) -> str:
+        return self.bpe.decode(
+            [int(tok) if tok not in {"<unk>", "<mask>"} else tok for tok in x.split()]
+        )
+
+    def is_beginning_of_word(self, x: str) -> bool:
+        return self.decode(x).startswith(" ")
diff --git a/fairseq/fairseq/data/encoders/moses_tokenizer.py b/fairseq/fairseq/data/encoders/moses_tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e236dad167a037a8ed95f7fc8292b27b10d580b0
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/moses_tokenizer.py
@@ -0,0 +1,49 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq.data.encoders import register_tokenizer
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class MosesTokenizerConfig(FairseqDataclass):
+    source_lang: str = field(default="en", metadata={"help": "source language"})
+    target_lang: str = field(default="en", metadata={"help": "target language"})
+    moses_no_dash_splits: bool = field(
+        default=False, metadata={"help": "don't apply dash split rules"}
+    )
+    moses_no_escape: bool = field(
+        default=False,
+        metadata={"help": "don't perform HTML escaping on apostrophe, quotes, etc."},
+    )
+
+
+@register_tokenizer("moses", dataclass=MosesTokenizerConfig)
+class MosesTokenizer(object):
+    def __init__(self, cfg: MosesTokenizerConfig):
+        self.cfg = cfg
+
+        try:
+            from sacremoses import MosesTokenizer, MosesDetokenizer
+
+            self.tok = MosesTokenizer(cfg.source_lang)
+            self.detok = MosesDetokenizer(cfg.target_lang)
+        except ImportError:
+            raise ImportError(
+                "Please install Moses tokenizer with: pip install sacremoses"
+            )
+
+    def encode(self, x: str) -> str:
+        return self.tok.tokenize(
+            x,
+            aggressive_dash_splits=(not self.cfg.moses_no_dash_splits),
+            return_str=True,
+            escape=(not self.cfg.moses_no_escape),
+        )
+
+    def decode(self, x: str) -> str:
+        return self.detok.detokenize(x.split())
diff --git a/fairseq/fairseq/data/encoders/nltk_tokenizer.py b/fairseq/fairseq/data/encoders/nltk_tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ab92377b3a23bb48384c3f7acf299612e8b0775
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/nltk_tokenizer.py
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.data.encoders import register_tokenizer
+from fairseq.dataclass import FairseqDataclass
+
+
+@register_tokenizer("nltk", dataclass=FairseqDataclass)
+class NLTKTokenizer(object):
+    def __init__(self, *unused):
+        try:
+            from nltk.tokenize import word_tokenize
+
+            self.word_tokenize = word_tokenize
+        except ImportError:
+            raise ImportError("Please install nltk with: pip install nltk")
+
+    def encode(self, x: str) -> str:
+        return " ".join(self.word_tokenize(x))
+
+    def decode(self, x: str) -> str:
+        return x
diff --git a/fairseq/fairseq/data/encoders/sentencepiece_bpe.py b/fairseq/fairseq/data/encoders/sentencepiece_bpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..a76d46a2014e81eff72b19f6c13084a855fcd477
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/sentencepiece_bpe.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq import file_utils
+from fairseq.data.encoders import register_bpe
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class SentencepieceConfig(FairseqDataclass):
+    sentencepiece_model: str = field(
+        default="???", metadata={"help": "path to sentencepiece model"}
+    )
+
+
+@register_bpe("sentencepiece", dataclass=SentencepieceConfig)
+class SentencepieceBPE(object):
+    def __init__(self, cfg):
+        sentencepiece_model = file_utils.cached_path(cfg.sentencepiece_model)
+        try:
+            import sentencepiece as spm
+
+            self.sp = spm.SentencePieceProcessor()
+            self.sp.Load(sentencepiece_model)
+        except ImportError:
+            raise ImportError(
+                "Please install sentencepiece with: pip install sentencepiece"
+            )
+
+    def encode(self, x: str) -> str:
+        return " ".join(self.sp.EncodeAsPieces(x))
+
+    def decode(self, x: str) -> str:
+        return x.replace(" ", "").replace("\u2581", " ").strip()
+
+    def is_beginning_of_word(self, x: str) -> bool:
+        if x in ["<unk>", "<s>", "</s>", "<pad>"]:
+            # special elements are always considered beginnings
+            # HACK: this logic is already present in fairseq/tasks/masked_lm.py
+            # but these special tokens are also contained in the sentencepiece
+            # vocabulary which causes duplicate special tokens. This hack makes
+            # sure that they are all taken into account.
+            return True
+        return x.startswith("\u2581")
diff --git a/fairseq/fairseq/data/encoders/space_tokenizer.py b/fairseq/fairseq/data/encoders/space_tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..925ad41b7c1aee6738c63938c36bd3ee16dca812
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/space_tokenizer.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import re
+
+from fairseq.data.encoders import register_tokenizer
+from fairseq.dataclass import FairseqDataclass
+
+
+@register_tokenizer("space", dataclass=FairseqDataclass)
+class SpaceTokenizer(object):
+    def __init__(self, *unused):
+        self.space_tok = re.compile(r"\s+")
+
+    def encode(self, x: str) -> str:
+        return self.space_tok.sub(" ", x)
+
+    def decode(self, x: str) -> str:
+        return x
diff --git a/fairseq/fairseq/data/encoders/subword_nmt_bpe.py b/fairseq/fairseq/data/encoders/subword_nmt_bpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d724d2730a5895ca55af2998c2ced471625b516
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/subword_nmt_bpe.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq import file_utils
+from fairseq.data.encoders import register_bpe
+from fairseq.dataclass import FairseqDataclass
+
+
+@dataclass
+class SubwordNMTBPEConfig(FairseqDataclass):
+    bpe_codes: str = field(default="???", metadata={"help": "path to subword NMT BPE"})
+    bpe_separator: str = field(default="@@", metadata={"help": "BPE separator"})
+
+
+@register_bpe("subword_nmt", dataclass=SubwordNMTBPEConfig)
+class SubwordNMTBPE(object):
+    def __init__(self, cfg):
+        if cfg.bpe_codes is None:
+            raise ValueError("--bpe-codes is required for --bpe=subword_nmt")
+        codes = file_utils.cached_path(cfg.bpe_codes)
+        try:
+            from subword_nmt import apply_bpe
+
+            bpe_parser = apply_bpe.create_parser()
+            bpe_args = bpe_parser.parse_args(
+                [
+                    "--codes",
+                    codes,
+                    "--separator",
+                    cfg.bpe_separator,
+                ]
+            )
+            self.bpe = apply_bpe.BPE(
+                bpe_args.codes,
+                bpe_args.merges,
+                bpe_args.separator,
+                None,
+                bpe_args.glossaries,
+            )
+            self.bpe_symbol = bpe_args.separator + " "
+        except ImportError:
+            raise ImportError(
+                "Please install subword_nmt with: pip install subword-nmt"
+            )
+
+    def encode(self, x: str) -> str:
+        return self.bpe.process_line(x)
+
+    def decode(self, x: str) -> str:
+        return (x + " ").replace(self.bpe_symbol, "").rstrip()
diff --git a/fairseq/fairseq/data/encoders/utils.py b/fairseq/fairseq/data/encoders/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d93eb532ef84f0e2bc708b777229ab2cb76ca14b
--- /dev/null
+++ b/fairseq/fairseq/data/encoders/utils.py
@@ -0,0 +1,30 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq.data import encoders
+
+
+def get_whole_word_mask(args, dictionary):
+    bpe = encoders.build_bpe(args)
+    if bpe is not None:
+
+        def is_beginning_of_word(i):
+            if i < dictionary.nspecial:
+                # special elements are always considered beginnings
+                return True
+            tok = dictionary[i]
+            if tok.startswith("madeupword"):
+                return True
+            try:
+                return bpe.is_beginning_of_word(tok)
+            except ValueError:
+                return True
+
+        mask_whole_words = torch.ByteTensor(
+            list(map(is_beginning_of_word, range(len(dictionary))))
+        )
+        return mask_whole_words
+    return None
diff --git a/fairseq/fairseq/data/fairseq_dataset.py b/fairseq/fairseq/data/fairseq_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..23e6992dbaf34e52f2fdcd0c8fc418c93744ea4e
--- /dev/null
+++ b/fairseq/fairseq/data/fairseq_dataset.py
@@ -0,0 +1,205 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import numpy as np
+import torch.utils.data
+from fairseq.data import data_utils
+
+logger = logging.getLogger(__name__)
+
+
+class EpochListening:
+    """Mixin for receiving updates whenever the epoch increments."""
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        """
+        Whether we can reuse the :class:`fairseq.data.EpochBatchIterator` for
+        this dataset across epochs.
+
+        This needs to return ``False`` if the sample sizes can change across
+        epochs, in which case we may need to regenerate batches at each epoch.
+        If your dataset relies in ``set_epoch`` then you should consider setting
+        this to ``False``.
+        """
+        return True
+
+    def set_epoch(self, epoch):
+        """Will receive the updated epoch number at the beginning of the epoch."""
+        pass
+
+
+class FairseqDataset(torch.utils.data.Dataset, EpochListening):
+    """A dataset that provides helpers for batching."""
+
+    def __getitem__(self, index):
+        raise NotImplementedError
+
+    def __len__(self):
+        raise NotImplementedError
+
+    def collater(self, samples):
+        """Merge a list of samples to form a mini-batch.
+
+        Args:
+            samples (List[dict]): samples to collate
+
+        Returns:
+            dict: a mini-batch suitable for forwarding with a Model
+        """
+        raise NotImplementedError
+
+    def num_tokens(self, index):
+        """Return the number of tokens in a sample. This value is used to
+        enforce ``--max-tokens`` during batching."""
+        raise NotImplementedError
+
+    def num_tokens_vec(self, indices):
+        """Return the number of tokens for a set of positions defined by indices.
+        This value is used to enforce ``--max-tokens`` during batching."""
+        raise NotImplementedError
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        raise NotImplementedError
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        return np.arange(len(self), dtype=np.int64)
+
+    @property
+    def supports_prefetch(self):
+        """Whether this dataset supports prefetching."""
+        return False
+
+    def attr(self, attr: str, index: int):
+        return getattr(self, attr, None)
+
+    def prefetch(self, indices):
+        """Prefetch the data required for this epoch."""
+        raise NotImplementedError
+
+    def get_batch_shapes(self):
+        """
+        Return a list of valid batch shapes, for example::
+
+            [(8, 512), (16, 256), (32, 128)]
+
+        The first dimension of each tuple is the batch size and can be ``None``
+        to automatically infer the max batch size based on ``--max-tokens``.
+        The second dimension of each tuple is the max supported length as given
+        by :func:`fairseq.data.FairseqDataset.num_tokens`.
+
+        This will be used by :func:`fairseq.data.FairseqDataset.batch_by_size`
+        to restrict batch shapes. This is useful on TPUs to avoid too many
+        dynamic shapes (and recompilations).
+        """
+        return None
+
+    def batch_by_size(
+        self,
+        indices,
+        max_tokens=None,
+        max_sentences=None,
+        required_batch_size_multiple=1,
+    ):
+        """
+        Given an ordered set of indices, return batches according to
+        *max_tokens*, *max_sentences* and *required_batch_size_multiple*.
+        """
+        from fairseq.data import data_utils
+
+        fixed_shapes = self.get_batch_shapes()
+        if fixed_shapes is not None:
+
+            def adjust_bsz(bsz, num_tokens):
+                if bsz is None:
+                    assert max_tokens is not None, "Must specify --max-tokens"
+                    bsz = max_tokens // num_tokens
+                if max_sentences is not None:
+                    bsz = min(bsz, max_sentences)
+                elif (
+                    bsz >= required_batch_size_multiple
+                    and bsz % required_batch_size_multiple != 0
+                ):
+                    bsz -= bsz % required_batch_size_multiple
+                return bsz
+
+            fixed_shapes = np.array(
+                [
+                    [adjust_bsz(bsz, num_tokens), num_tokens]
+                    for (bsz, num_tokens) in fixed_shapes
+                ]
+            )
+
+        try:
+            num_tokens_vec = self.num_tokens_vec(indices).astype('int64')
+        except NotImplementedError:
+            num_tokens_vec = None
+
+        return data_utils.batch_by_size(
+            indices,
+            num_tokens_fn=self.num_tokens,
+            num_tokens_vec=num_tokens_vec,
+            max_tokens=max_tokens,
+            max_sentences=max_sentences,
+            required_batch_size_multiple=required_batch_size_multiple,
+            fixed_shapes=fixed_shapes,
+        )
+
+    def filter_indices_by_size(self, indices, max_sizes):
+        """
+        Filter a list of sample indices. Remove those that are longer than
+        specified in *max_sizes*.
+
+        WARNING: don't update, override method in child classes
+
+        Args:
+            indices (np.array): original array of sample indices
+            max_sizes (int or list[int] or tuple[int]): max sample size,
+                can be defined separately for src and tgt (then list or tuple)
+
+        Returns:
+            np.array: filtered sample array
+            list: list of removed indices
+        """
+        if isinstance(max_sizes, float) or isinstance(max_sizes, int):
+            if hasattr(self, "sizes") and isinstance(self.sizes, np.ndarray):
+                ignored = indices[self.sizes[indices] > max_sizes].tolist()
+                indices = indices[self.sizes[indices] <= max_sizes]
+            elif (
+                hasattr(self, "sizes")
+                and isinstance(self.sizes, list)
+                and len(self.sizes) == 1
+            ):
+                ignored = indices[self.sizes[0][indices] > max_sizes].tolist()
+                indices = indices[self.sizes[0][indices] <= max_sizes]
+            else:
+                indices, ignored = data_utils._filter_by_size_dynamic(
+                    indices, self.size, max_sizes
+                )
+        else:
+            indices, ignored = data_utils._filter_by_size_dynamic(
+                indices, self.size, max_sizes
+            )
+        return indices, ignored
+
+    @property
+    def supports_fetch_outside_dataloader(self):
+        """Whether this dataset supports fetching outside the workers of the dataloader."""
+        return True
+
+
+class FairseqIterableDataset(torch.utils.data.IterableDataset, EpochListening):
+    """
+    For datasets that need to be read sequentially, usually because the data is
+    being streamed or otherwise can't be manipulated on a single machine.
+    """
+
+    def __iter__(self):
+        raise NotImplementedError
diff --git a/fairseq/fairseq/data/fasta_dataset.py b/fairseq/fairseq/data/fasta_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..007011974a997fd7446dd29d7eba097d7513bab0
--- /dev/null
+++ b/fairseq/fairseq/data/fasta_dataset.py
@@ -0,0 +1,107 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import subprocess
+import threading
+from pathlib import Path
+
+import numpy as np
+import torch
+
+
+def fasta_file_path(prefix_path):
+    return prefix_path + ".fasta"
+
+
+class FastaDataset(torch.utils.data.Dataset):
+    """
+    For loading protein sequence datasets in the common FASTA data format
+    """
+
+    def __init__(self, path: str, cache_indices=False):
+        self.fn = fasta_file_path(path)
+        self.threadlocal = threading.local()
+        self.cache = Path(f"{path}.fasta.idx.npy")
+        if cache_indices:
+            if self.cache.exists():
+                self.offsets, self.sizes = np.load(self.cache)
+            else:
+                self.offsets, self.sizes = self._build_index(path)
+                np.save(self.cache, np.stack([self.offsets, self.sizes]))
+        else:
+            self.offsets, self.sizes = self._build_index(path)
+
+    def _get_file(self):
+        if not hasattr(self.threadlocal, "f"):
+            self.threadlocal.f = open(self.fn, "r")
+        return self.threadlocal.f
+
+    def __getitem__(self, idx):
+        f = self._get_file()
+        f.seek(self.offsets[idx])
+        desc = f.readline().strip()
+        line = f.readline()
+        seq = ""
+        while line != "" and line[0] != ">":
+            seq += line.strip()
+            line = f.readline()
+        return desc, seq
+
+    def __len__(self):
+        return self.offsets.size
+
+    def _build_index(self, path: str):
+        # Use grep and awk to get 100M/s on local SSD.
+        # Should process your enormous 100G fasta in ~10 min single core...
+        path = fasta_file_path(path)
+        bytes_offsets = subprocess.check_output(
+            f"cat {path} | tqdm --bytes --total $(wc -c < {path})"
+            "| grep --byte-offset '^>' -o | cut -d: -f1",
+            shell=True,
+        )
+        fasta_lengths = subprocess.check_output(
+            f"cat {path} | tqdm --bytes --total $(wc -c < {path})"
+            "| awk '/^>/ {print \"\";next;} { printf(\"%s\",$0);}' | tail -n+2 | awk '{print length($1)}'",
+            shell=True,
+        )
+        bytes_np = np.fromstring(bytes_offsets, dtype=np.int64, sep=" ")
+        sizes_np = np.fromstring(fasta_lengths, dtype=np.int64, sep=" ")
+        return bytes_np, sizes_np
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+        self.threadlocal = threading.local()
+
+    def __getstate__(self):
+        d = {}
+        for i, v in self.__dict__.items():
+            if i != "threadlocal":
+                d[i] = v
+        return d
+
+    def __del__(self):
+        if hasattr(self.threadlocal, "f"):
+            self.threadlocal.f.close()
+            del self.threadlocal.f
+
+    @staticmethod
+    def exists(path):
+        return os.path.exists(fasta_file_path(path))
+
+
+class EncodedFastaDataset(FastaDataset):
+    """
+    The FastaDataset returns raw sequences - this allows us to return
+    indices with a dictionary instead.
+    """
+
+    def __init__(self, path, dictionary):
+        super().__init__(path, cache_indices=True)
+        self.dictionary = dictionary
+
+    def __getitem__(self, idx):
+        desc, seq = super().__getitem__(idx)
+        return self.dictionary.encode_line(seq, line_tokenizer=list).long()
diff --git a/fairseq/fairseq/data/huffman/__init__.py b/fairseq/fairseq/data/huffman/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b61fafadba28f65fe78a28b2099368b83cfcf41
--- /dev/null
+++ b/fairseq/fairseq/data/huffman/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .huffman_coder import HuffmanCodeBuilder, HuffmanCoder
+from .huffman_mmap_indexed_dataset import (
+    HuffmanMMapIndex,
+    HuffmanMMapIndexedDataset,
+    HuffmanMMapIndexedDatasetBuilder,
+    vocab_file_path,
+)
+
+__all__ = [
+    "HuffmanCoder",
+    "HuffmanCodeBuilder",
+    "HuffmanMMapIndexedDatasetBuilder",
+    "HuffmanMMapIndexedDataset",
+    "HuffmanMMapIndex",
+    "vocab_file_path",
+]
diff --git a/fairseq/fairseq/data/huffman/huffman_coder.py b/fairseq/fairseq/data/huffman/huffman_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..6531f1547cbd7250aa03e0ef8c2efbac49bb1aff
--- /dev/null
+++ b/fairseq/fairseq/data/huffman/huffman_coder.py
@@ -0,0 +1,265 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import re
+import typing as tp
+from collections import Counter, deque
+from dataclasses import dataclass
+
+from bitarray import bitarray, util
+from fairseq.data import Dictionary
+
+# basically we have to write to addressable bytes for the memory mapped
+# dataset loader. Sentences that get encoded to a length that is not a
+# multiple of BLOCKSIZE (a byte) will be padded to fit. (see _pad in the coder)
+BLOCKSIZE = 8
+
+
+class HuffmanCoder:
+    def __init__(
+        self, root: "HuffmanNode", bos="<s>", pad="<pad>", eos="</s>", unk="<unk>"
+    ):
+        self.root = root
+        self.table = root.code_table()
+        self.bos_word, self.unk_word, self.pad_word, self.eos_word = bos, unk, pad, eos
+
+    def _pad(self, a: bitarray) -> bitarray:
+        """
+        bitpadding, 1 then 0.
+
+        If the array is already a multiple of blocksize, we add a full block.
+        """
+        pad_len = BLOCKSIZE - (len(a) % BLOCKSIZE) - 1
+        padding = bitarray("1" + "0" * pad_len)
+        return a + padding
+
+    def _unpad(self, a: bitarray) -> bitarray:
+        """
+        remove the bitpadding.
+
+        There will be a set of 0s preceded by a 1 at the end of the bitarray, we remove that
+        """
+        # count the 0 padding at the end until we find the first 1
+        # we want to remove the one too
+        remove_cnt = util.rindex(a, 1)
+        return a[:remove_cnt]
+
+    def encode(self, iter: tp.List[str]) -> bytes:
+        """
+        encode a list of tokens a return bytes. We use bitpadding to make sure the encoded bits fit in bytes.
+        """
+        a = bitarray()
+        for token in iter:
+            code = self.get_code(token)
+            if code is None:
+                if self.unk_word is None:
+                    raise Exception(f"unknown token {token} cannot be encoded.")
+                else:
+                    token = self.unk_word
+            a = a + self.get_code(token)
+        return self._pad(a).tobytes()
+
+    def decode(self, bits: bytes) -> tp.Iterator["HuffmanNode"]:
+        """
+        take bitpadded bytes and decode it to a set of leaves. You can then use each node to find the symbol/id
+        """
+        a = bitarray()
+        a.frombytes(bits)
+        return self.root.decode(self._unpad(a))
+
+    def get_code(self, symbol: str) -> tp.Optional[bitarray]:
+        node = self.get_node(symbol)
+        return None if node is None else node.code
+
+    def get_node(self, symbol: str) -> "HuffmanNode":
+        return self.table.get(symbol)
+
+    @classmethod
+    def from_file(
+        cls,
+        filename: str,
+        bos="<s>",
+        pad="<pad>",
+        eos="</s>",
+        unk="<unk>",
+    ) -> "HuffmanCoder":
+        builder = HuffmanCodeBuilder.from_file(filename)
+        return builder.build_code(bos=bos, pad=pad, eos=eos, unk=unk)
+
+    def to_file(self, filename, sep="\t"):
+        nodes = list(self.table.values())
+        nodes.sort(key=lambda n: n.id)
+        with open(filename, "w", encoding="utf-8") as output:
+            for n in nodes:
+                output.write(f"{n.symbol}{sep}{n.count}\n")
+
+    def __iter__(self):
+        for n in self.table.values():
+            yield n
+
+    def merge(self, other_coder: "HuffmanCoder") -> "HuffmanCoder":
+        builder = HuffmanCodeBuilder()
+        for n in self:
+            builder.increment(n.symbol, n.count)
+        for n in other_coder:
+            builder.increment(n.symbol, n.count)
+        return builder.build_code()
+
+    def __eq__(self, other: "HuffmanCoder") -> bool:
+        return self.table == other.table
+
+    def __len__(self) -> int:
+        return len(self.table)
+
+    def __contains__(self, sym: str) -> bool:
+        return sym in self.table
+
+    def to_dictionary(self) -> Dictionary:
+        dictionary = Dictionary(bos=self.bos, unk=self.unk, pad=self.pad, eos=self.eos)
+        for n in self:
+            dictionary.add_symbol(n.symbol, n=n.count)
+        dictionary.finalize()
+        return dictionary
+
+
+@dataclass
+class HuffmanNode:
+    """
+    a node in a Huffman tree
+    """
+
+    id: int
+    count: int
+    symbol: tp.Optional[str] = None
+    left: tp.Optional["HuffmanNode"] = None
+    right: tp.Optional["HuffmanNode"] = None
+    code: tp.Optional[bitarray] = None
+
+    def is_leaf(self) -> bool:
+        return self.left is None and self.right is None
+
+    def code_table(self, prefix: tp.Optional[bitarray] = None) -> tp.Dict[str, "HuffmanNode"]:
+        defaulted_prefix = prefix if prefix is not None else bitarray()
+        if self.is_leaf():
+            self.code = (
+                defaulted_prefix if len(defaulted_prefix) > 0 else bitarray("0")
+            )  # leaf could be the root if there is only one symbol
+            return {self.symbol: self}
+
+        codes_right = self.right.code_table(defaulted_prefix + bitarray([0]))
+        codes_left = self.left.code_table(defaulted_prefix + bitarray([1]))
+        return {**codes_left, **codes_right}
+
+    def decode(self, bits: bitarray) -> tp.Iterator["HuffmanNode"]:
+        current_node = self
+        for bit in bits:
+            if bit == 0:  # go right
+                current_node = current_node.right
+            else:  # go left
+                current_node = current_node.left
+            if current_node is None:
+                # we shouldn't be on a leaf here
+                raise Exception("fell off a leaf")
+            if current_node.is_leaf():
+                yield current_node
+                current_node = self
+        if current_node != self:
+            raise Exception("couldn't decode all the bits")
+
+
+class HuffmanCodeBuilder:
+    """
+    build a dictionary with occurence count and then build the Huffman code for it.
+    """
+
+    def __init__(self):
+        self.symbols = Counter()
+
+    def add_symbols(self, *syms) -> None:
+        self.symbols.update(syms)
+
+    def increment(self, symbol: str, cnt: int) -> None:
+        self.symbols[symbol] += cnt
+
+    @classmethod
+    def from_file(cls, filename):
+        c = cls()
+        with open(filename, "r", encoding="utf-8") as input:
+            for line in input:
+                split = re.split(r"[\s]+", line)
+                c.increment(split[0], int(split[1]))
+        return c
+
+    def to_file(self, filename, sep="\t"):
+        with open(filename, "w", encoding="utf-8") as output:
+            for (tok, cnt) in self.symbols.most_common():
+                output.write(f"{tok}{sep}{cnt}\n")
+
+    def _smallest(self, q1: deque, q2: deque) -> HuffmanNode:
+        if len(q1) == 0:
+            return q2.pop()
+
+        if len(q2) == 0:
+            return q1.pop()
+
+        if q1[-1].count < q2[-1].count:
+            return q1.pop()
+
+        return q2.pop()
+
+    def __add__(self, c: "HuffmanCodeBuilder") -> "HuffmanCodeBuilder":
+        new_c = self.symbols + c.symbols
+        new_b = HuffmanCodeBuilder()
+        new_b.symbols = new_c
+        return new_b
+
+    def build_code(
+        self,
+        bos="<s>",
+        pad="<pad>",
+        eos="</s>",
+        unk="<unk>",
+    ) -> HuffmanCoder:
+        assert len(self.symbols) > 0, "cannot build code from empty list of symbols"
+
+        if self.symbols[bos] == 0:
+            self.add_symbols(bos)
+        if self.symbols[pad] == 0:
+            self.add_symbols(pad)
+        if self.symbols[eos] == 0:
+            self.add_symbols(eos)
+        if self.symbols[unk] == 0:
+            self.add_symbols(unk)
+
+        node_id = 0
+        leaves_queue = deque(
+            [
+                HuffmanNode(symbol=symbol, count=count, id=idx)
+                for idx, (symbol, count) in enumerate(self.symbols.most_common())
+            ]
+        )  # left are the most common, right are the least common
+
+        if len(leaves_queue) == 1:
+            root = leaves_queue.pop()
+            root.id = 0
+            return HuffmanCoder(root)
+
+        nodes_queue = deque()
+
+        while len(leaves_queue) > 0 or len(nodes_queue) != 1:
+            # get the lowest two nodes at the head of each queue
+            node1 = self._smallest(leaves_queue, nodes_queue)
+            node2 = self._smallest(leaves_queue, nodes_queue)
+
+            # add new node
+            nodes_queue.appendleft(
+                HuffmanNode(
+                    count=node1.count + node2.count, left=node1, right=node2, id=node_id
+                )
+            )
+            node_id += 1
+
+        # we are left with the root
+        return HuffmanCoder(nodes_queue.pop(), bos=bos, pad=pad, eos=eos, unk=unk)
diff --git a/fairseq/fairseq/data/huffman/huffman_mmap_indexed_dataset.py b/fairseq/fairseq/data/huffman/huffman_mmap_indexed_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3279dae89a8bca95178bbe1285d3cb334890b12f
--- /dev/null
+++ b/fairseq/fairseq/data/huffman/huffman_mmap_indexed_dataset.py
@@ -0,0 +1,287 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import mmap
+import os
+import shutil
+import struct
+import typing as tp
+from functools import lru_cache
+
+import numpy as np
+import torch
+from fairseq.data import indexed_dataset
+from fairseq.data.huffman import HuffmanCoder
+from fairseq.file_io import PathManager
+
+
+class HuffmanMMapIndex:
+    """
+    keep an index of the offsets in the huffman binary file.
+    First a header, then the list of sizes (num tokens) for each instance and finally
+    the addresses of each instance.
+    """
+
+    _HDR_MAGIC = b"HUFFIDX\x00\x00"
+    _VERSION = 1
+
+    @classmethod
+    def writer(cls, path: str, data_len: int):
+        class _Writer:
+            def __enter__(self):
+                self._file = open(path, "wb")
+
+                # write header (magic + version)
+                self._file.write(cls._HDR_MAGIC)
+                self._file.write(struct.pack("<Q", cls._VERSION))
+                self._file.write(struct.pack("<Q", data_len))
+
+                return self
+
+            def write(self, sizes, pointers):
+                # add number of items in the index to the header
+                self._file.write(struct.pack("<Q", len(sizes)))
+
+                # write sizes
+                sizes = np.array(sizes, dtype=np.int32)
+                self._file.write(sizes.tobytes(order="C"))
+                del sizes
+
+                # write address pointers
+                pointers = np.array(pointers, dtype=np.int64)
+                self._file.write(pointers.tobytes(order="C"))
+                del pointers
+
+            def __exit__(self, exc_type, exc_val, exc_tb):
+                self._file.close()
+
+        return _Writer()
+
+    def __init__(self, path):
+        with open(path, "rb") as stream:
+            # read headers
+            magic_test = stream.read(9)
+            assert self._HDR_MAGIC == magic_test, (
+                "Index file doesn't match expected format. "
+                "Make sure that --dataset-impl is configured properly."
+            )
+            (version,) = struct.unpack("<Q", stream.read(8))
+            assert (
+                self._VERSION == version
+            ), "Unexpected file version f{version} != code version f{self._VERSION}"
+
+            # read length of data file
+            (self._data_len,) = struct.unpack("<Q", stream.read(8))
+            # read number of items in data file/index
+            (self._len,) = struct.unpack("<Q", stream.read(8))
+            offset = stream.tell()
+
+        indexed_dataset._warmup_mmap_file(path)
+
+        self._bin_buffer_mmap = np.memmap(path, mode="r", order="C")
+        self._bin_buffer = memoryview(self._bin_buffer_mmap)
+        self._sizes = np.frombuffer(
+            self._bin_buffer, dtype=np.int32, count=self._len, offset=offset
+        )
+        self._pointers = np.frombuffer(
+            self._bin_buffer,
+            dtype=np.int64,
+            count=self._len,
+            offset=offset + self._sizes.nbytes,
+        )
+
+    def __del__(self):
+        self._bin_buffer_mmap._mmap.close()
+        del self._bin_buffer_mmap
+
+    def __iter__(self):
+        for i in range(self._len):
+            yield self[i]
+
+    @property
+    def data_len(self):
+        return self._data_len
+
+    @property
+    def sizes(self):
+        return self._sizes
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, i):
+        return self._pointers[i], self._sizes[i]
+
+    def __len__(self):
+        return self._len
+
+
+def vocab_file_path(prefix_path):
+    return prefix_path + ".vocab"
+
+
+class HuffmanMMapIndexedDataset(torch.utils.data.Dataset):
+    """
+    an indexed dataset that use mmap and memoryview to access data from disk
+    that was compressed with a HuffmanCoder.
+    """
+
+    def __init__(self, prefix_path):
+        super().__init__()
+
+        self._prefix_path = None
+        self._index = None
+        self._bin_buffer = None
+        self._coder = None
+        self._file = None
+
+        self._bin_buffer_mmap = None
+
+        self._do_init(prefix_path)
+
+    def __getstate__(self):
+        return self._prefix_path
+
+    def __setstate__(self, state):
+        self._do_init(state)
+
+    def _do_init(self, prefix_path):
+        self._prefix_path = prefix_path
+        self._index = HuffmanMMapIndex(
+            indexed_dataset.index_file_path(self._prefix_path)
+        )
+        self._coder = HuffmanCoder.from_file(vocab_file_path(self._prefix_path))
+
+        indexed_dataset._warmup_mmap_file(
+            indexed_dataset.data_file_path(self._prefix_path)
+        )
+        self._file = os.open(
+            indexed_dataset.data_file_path(self._prefix_path), os.O_RDONLY
+        )
+        self._bin_buffer_mmap = mmap.mmap(
+            self._file,
+            self._index.data_len,
+            access=mmap.ACCESS_READ,
+        )
+        self._bin_buffer = memoryview(self._bin_buffer_mmap)
+
+    def __del__(self):
+        del self._bin_buffer
+        if self._file:
+            os.close(self._file)
+        del self._index
+
+    def __len__(self):
+        return len(self._index)
+
+    def _decode(self, i):
+        ptr, _ = self._index[i]
+        if i == 0:
+            raw_bytes = self._bin_buffer[:ptr]
+        else:
+            (prev_ptr, _) = self._index[i - 1]
+            raw_bytes = self._bin_buffer[prev_ptr:ptr]
+
+        return self._coder.decode(raw_bytes.tobytes())
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, i):
+        nodes = self._decode(i)
+        return torch.tensor([n.id for n in nodes], dtype=torch.int64)
+
+    def __iter__(self):
+        for idx in range(len(self)):
+            yield self[idx]
+
+    def get_symbols(self, i):
+        nodes = self._decode(i)
+        for n in nodes:
+            yield n.symbol
+
+    @property
+    def sizes(self):
+        return self._index.sizes
+
+    @property
+    def supports_prefetch(self):
+        return False
+
+    @property
+    def coder(self):
+        return self._coder
+
+    @staticmethod
+    def exists(prefix_path):
+        return (
+            PathManager.exists(indexed_dataset.index_file_path(prefix_path))
+            and PathManager.exists(indexed_dataset.data_file_path(prefix_path))
+            and PathManager.exists(vocab_file_path(prefix_path))
+        )
+
+
+class HuffmanMMapIndexedDatasetBuilder:
+    """
+    Helper to build a memory mapped datasets with a huffman encoder.
+    You can either open/close this manually or use it as a ContextManager.
+    Provide your own coder, it will then be stored alongside the dataset.
+    The builder will first write the vocab file, then open the binary file so you can stream
+    into it, finally the index will be written when the builder is closed (your index should fit in memory).
+    """
+
+    def __init__(self, path_prefix: str, coder: HuffmanCoder) -> None:
+        self._path_prefix = path_prefix
+        self._coder = coder
+        self._sizes = []
+        self._ptrs = []
+        self._data_len = 0
+
+    def open(self):
+        self._coder.to_file(vocab_file_path(self._path_prefix))
+        self._data_file = open(indexed_dataset.data_file_path(self._path_prefix), "wb")
+
+    def __enter__(self) -> "HuffmanMMapIndexedDatasetBuilder":
+        self.open()
+        return self
+
+    def add_item(self, tokens: tp.List[str]) -> None:
+        """
+        add a list of tokens to the dataset, they will compressed with the
+        provided coder before being written to file.
+        """
+        encoded = self._coder.encode(tokens)
+        code_len = len(encoded)
+        last_ptr = 0
+        if len(self._ptrs) > 0:
+            last_ptr = self._ptrs[-1]
+        self._sizes.append(len(tokens))
+        self._ptrs.append(last_ptr + code_len)
+        self._data_len += code_len
+        self._data_file.write(encoded)
+
+    def append(self, other_dataset_path_prefix: str) -> None:
+        """
+        append an existing dataset.
+        Beware, if it wasn't built with the same coder, you are in trouble.
+        """
+        other_index = HuffmanMMapIndex(
+            indexed_dataset.index_file_path(other_dataset_path_prefix)
+        )
+        for (ptr, size) in other_index:
+            self._ptrs.append(ptr + self._data_len)
+            self._sizes.append(size)
+
+        # Concatenate data
+        with open(indexed_dataset.data_file_path(other_dataset_path_prefix), "rb") as f:
+            shutil.copyfileobj(f, self._data_file)
+
+        self._data_len += other_index.data_len
+
+    def close(self):
+        self._data_file.close()
+        with HuffmanMMapIndex.writer(
+            indexed_dataset.index_file_path(self._path_prefix), self._data_len
+        ) as index:
+            index.write(self._sizes, self._ptrs)
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.close()
diff --git a/fairseq/fairseq/data/id_dataset.py b/fairseq/fairseq/data/id_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e4d7969cf2a26e852b466f165a6fadabae3b35f
--- /dev/null
+++ b/fairseq/fairseq/data/id_dataset.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import FairseqDataset
+
+
+class IdDataset(FairseqDataset):
+    def __getitem__(self, index):
+        return index
+
+    def __len__(self):
+        return 0
+
+    def collater(self, samples):
+        return torch.tensor(samples)
diff --git a/fairseq/fairseq/data/indexed_dataset.py b/fairseq/fairseq/data/indexed_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..23afb43356557d65c0e8f441ff9cdc890136ddbf
--- /dev/null
+++ b/fairseq/fairseq/data/indexed_dataset.py
@@ -0,0 +1,585 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import shutil
+import struct
+from functools import lru_cache
+
+import numpy as np
+import torch
+from fairseq.dataclass.constants import DATASET_IMPL_CHOICES
+from fairseq.data.fasta_dataset import FastaDataset
+from fairseq.file_io import PathManager
+from fairseq.data.huffman import HuffmanMMapIndexedDataset, HuffmanMMapIndex
+
+from . import FairseqDataset
+
+from typing import Union
+
+
+def best_fitting_int_dtype(
+    max_int_to_represent,
+) -> Union[np.uint16, np.uint32, np.int64]:
+
+    if max_int_to_represent is None:
+        return np.uint32  # Safe guess
+    elif max_int_to_represent < 65500:
+        return np.uint16
+    elif max_int_to_represent < 4294967295:
+        return np.uint32
+    else:
+        return np.int64
+        # we avoid np.uint64 because it doesn't save space and its type promotion behaves unexpectedly
+        # https://github.com/numpy/numpy/issues/5745
+
+
+def get_available_dataset_impl():
+    return list(map(str, DATASET_IMPL_CHOICES))
+
+
+def infer_dataset_impl(path):
+    if IndexedRawTextDataset.exists(path):
+        return "raw"
+    elif IndexedDataset.exists(path):
+        with open(index_file_path(path), "rb") as f:
+            magic = f.read(8)
+            if magic == IndexedDataset._HDR_MAGIC:
+                return "cached"
+            elif magic == MMapIndexedDataset.Index._HDR_MAGIC[:8]:
+                return "mmap"
+            elif magic == HuffmanMMapIndex._HDR_MAGIC[:8]:
+                return "huffman"
+            else:
+                return None
+    elif FastaDataset.exists(path):
+        return "fasta"
+    else:
+        return None
+
+
+def make_builder(out_file, impl, vocab_size=None):
+    if impl == "mmap":
+        return MMapIndexedDatasetBuilder(
+            out_file, dtype=best_fitting_int_dtype(vocab_size)
+        )
+    elif impl == "fasta":
+        raise NotImplementedError
+    elif impl == "huffman":
+        raise ValueError("Use HuffmanCodeBuilder directly as it has a different interface.")
+    else:
+        return IndexedDatasetBuilder(out_file)
+
+
+def make_dataset(path, impl, fix_lua_indexing=False, dictionary=None):
+    if impl == "raw" and IndexedRawTextDataset.exists(path):
+        assert dictionary is not None
+        return IndexedRawTextDataset(path, dictionary)
+    elif impl == "lazy" and IndexedDataset.exists(path):
+        return IndexedDataset(path, fix_lua_indexing=fix_lua_indexing)
+    elif impl == "cached" and IndexedDataset.exists(path):
+        return IndexedCachedDataset(path, fix_lua_indexing=fix_lua_indexing)
+    elif impl == "mmap" and MMapIndexedDataset.exists(path):
+        return MMapIndexedDataset(path)
+    elif impl == "fasta" and FastaDataset.exists(path):
+        from fairseq.data.fasta_dataset import EncodedFastaDataset
+
+        return EncodedFastaDataset(path, dictionary)
+    elif impl == "huffman" and HuffmanMMapIndexedDataset.exists(path):
+        return HuffmanMMapIndexedDataset(path)
+    return None
+
+
+def dataset_exists(path, impl):
+    if impl == "raw":
+        return IndexedRawTextDataset.exists(path)
+    elif impl == "mmap":
+        return MMapIndexedDataset.exists(path)
+    elif impl == "huffman":
+        return HuffmanMMapIndexedDataset.exists(path)
+    else:
+        return IndexedDataset.exists(path)
+
+
+def read_longs(f, n):
+    a = np.empty(n, dtype=np.int64)
+    f.readinto(a)
+    return a
+
+
+def write_longs(f, a):
+    f.write(np.array(a, dtype=np.int64))
+
+
+_code_to_dtype = {
+    1: np.uint8,
+    2: np.int8,
+    3: np.int16,
+    4: np.int32,
+    5: np.int64,
+    6: np.float,
+    7: np.double,
+    8: np.uint16,
+    9: np.uint32,
+    10: np.uint64,
+}
+
+
+def _dtype_header_code(dtype) -> int:
+    for k in _code_to_dtype.keys():
+        if _code_to_dtype[k] == dtype:
+            return k
+    raise ValueError(dtype)
+
+
+def index_file_path(prefix_path):
+    return prefix_path + ".idx"
+
+
+def data_file_path(prefix_path):
+    return prefix_path + ".bin"
+
+
+class IndexedDataset(FairseqDataset):
+    """Loader for TorchNet IndexedDataset"""
+
+    _HDR_MAGIC = b"TNTIDX\x00\x00"
+
+    def __init__(self, path, fix_lua_indexing=False):
+        super().__init__()
+        self.path = path
+        self.fix_lua_indexing = fix_lua_indexing
+        self.data_file = None
+        self.read_index(path)
+
+    def read_index(self, path):
+        with open(index_file_path(path), "rb") as f:
+            magic = f.read(8)
+            assert magic == self._HDR_MAGIC, (
+                "Index file doesn't match expected format. "
+                "Make sure that --dataset-impl is configured properly."
+            )
+            version = f.read(8)
+            assert struct.unpack("<Q", version) == (1,)
+            code, self.element_size = struct.unpack("<QQ", f.read(16))
+            self.dtype = _code_to_dtype[code]
+            self._len, self.s = struct.unpack("<QQ", f.read(16))
+            self.dim_offsets = read_longs(f, self._len + 1)
+            self.data_offsets = read_longs(f, self._len + 1)
+            self.sizes = read_longs(f, self.s)
+
+    def read_data(self, path):
+        self.data_file = open(data_file_path(path), "rb", buffering=0)
+
+    def check_index(self, i):
+        if i < 0 or i >= self._len:
+            raise IndexError("index out of range")
+
+    def __del__(self):
+        if self.data_file:
+            self.data_file.close()
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, i) -> torch.Tensor:
+        if not self.data_file:
+            self.read_data(self.path)
+        self.check_index(i)
+        tensor_size = self.sizes[self.dim_offsets[i] : self.dim_offsets[i + 1]]
+        a = np.empty(tensor_size, dtype=self.dtype)
+        self.data_file.seek(self.data_offsets[i] * self.element_size)
+        self.data_file.readinto(a)
+        item = torch.from_numpy(a).long()
+        if self.fix_lua_indexing:
+            item -= 1  # subtract 1 for 0-based indexing
+        return item
+
+    def __len__(self):
+        return self._len
+
+    def num_tokens(self, index):
+        return self.sizes[index]
+
+    def size(self, index):
+        return self.sizes[index]
+
+    @staticmethod
+    def exists(path):
+        return PathManager.exists(index_file_path(path)) and PathManager.exists(
+            data_file_path(path)
+        )
+
+    @property
+    def supports_prefetch(self):
+        return False  # avoid prefetching to save memory
+
+
+class IndexedCachedDataset(IndexedDataset):
+    def __init__(self, path, fix_lua_indexing=False):
+        super().__init__(path, fix_lua_indexing=fix_lua_indexing)
+        self.cache = None
+        self.cache_index = {}
+
+    @property
+    def supports_prefetch(self):
+        return True
+
+    def prefetch(self, indices):
+        if all(i in self.cache_index for i in indices):
+            return
+        if not self.data_file:
+            self.read_data(self.path)
+        indices = sorted(set(indices))
+        total_size = 0
+        for i in indices:
+            total_size += self.data_offsets[i + 1] - self.data_offsets[i]
+        self.cache = np.empty(total_size, dtype=self.dtype)
+        ptx = 0
+        self.cache_index.clear()
+        for i in indices:
+            self.cache_index[i] = ptx
+            size = self.data_offsets[i + 1] - self.data_offsets[i]
+            a = self.cache[ptx : ptx + size]
+            self.data_file.seek(self.data_offsets[i] * self.element_size)
+            self.data_file.readinto(a)
+            ptx += size
+        if self.data_file:
+            # close and delete data file after prefetch so we can pickle
+            self.data_file.close()
+            self.data_file = None
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, i):
+        self.check_index(i)
+        tensor_size = self.sizes[self.dim_offsets[i] : self.dim_offsets[i + 1]]
+        a = np.empty(tensor_size, dtype=self.dtype)
+        ptx = self.cache_index[i]
+        np.copyto(a, self.cache[ptx : ptx + a.size])
+        item = torch.from_numpy(a).long()
+        if self.fix_lua_indexing:
+            item -= 1  # subtract 1 for 0-based indexing
+        return item
+
+
+class IndexedRawTextDataset(FairseqDataset):
+    """Takes a text file as input and binarizes it in memory at instantiation.
+    Original lines are also kept in memory"""
+
+    def __init__(self, path, dictionary, append_eos=True, reverse_order=False):
+        self.tokens_list = []
+        self.lines = []
+        self.sizes = []
+        self.append_eos = append_eos
+        self.reverse_order = reverse_order
+        self.read_data(path, dictionary)
+        self.size = len(self.tokens_list)
+
+    def read_data(self, path, dictionary):
+        with open(path, "r", encoding="utf-8") as f:
+            for line in f:
+                self.lines.append(line.strip("\n"))
+                tokens = dictionary.encode_line(
+                    line,
+                    add_if_not_exist=False,
+                    append_eos=self.append_eos,
+                    reverse_order=self.reverse_order,
+                ).long()
+                self.tokens_list.append(tokens)
+                self.sizes.append(len(tokens))
+        self.sizes = np.array(self.sizes)
+
+    def check_index(self, i):
+        if i < 0 or i >= self.size:
+            raise IndexError("index out of range")
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, i):
+        self.check_index(i)
+        return self.tokens_list[i]
+
+    def get_original_text(self, i):
+        self.check_index(i)
+        return self.lines[i]
+
+    def __del__(self):
+        pass
+
+    def __len__(self):
+        return self.size
+
+    def num_tokens(self, index):
+        return self.sizes[index]
+
+    def size(self, index):
+        return self.sizes[index]
+
+    @staticmethod
+    def exists(path):
+        return PathManager.exists(path)
+
+
+class IndexedDatasetBuilder:
+    element_sizes = {
+        np.uint8: 1,
+        np.int8: 1,
+        np.int16: 2,
+        np.int32: 4,
+        np.int64: 8,
+        np.float: 4,
+        np.double: 8,
+    }
+
+    def __init__(self, out_file, dtype=np.int32):
+        self.out_file = open(out_file, "wb")
+        self.dtype = dtype
+        self.data_offsets = [0]
+        self.dim_offsets = [0]
+        self.sizes = []
+        self.element_size = self.element_sizes[self.dtype]
+
+    def add_item(self, tensor):
+        # +1 for Lua compatibility
+        bytes = self.out_file.write(np.array(tensor.numpy() + 1, dtype=self.dtype))
+        self.data_offsets.append(self.data_offsets[-1] + bytes / self.element_size)
+        for s in tensor.size():
+            self.sizes.append(s)
+        self.dim_offsets.append(self.dim_offsets[-1] + len(tensor.size()))
+
+    def merge_file_(self, another_file):
+        index = IndexedDataset(another_file)
+        assert index.dtype == self.dtype
+
+        begin = self.data_offsets[-1]
+        for offset in index.data_offsets[1:]:
+            self.data_offsets.append(begin + offset)
+        self.sizes.extend(index.sizes)
+        begin = self.dim_offsets[-1]
+        for dim_offset in index.dim_offsets[1:]:
+            self.dim_offsets.append(begin + dim_offset)
+
+        with open(data_file_path(another_file), "rb") as f:
+            while True:
+                data = f.read(1024)
+                if data:
+                    self.out_file.write(data)
+                else:
+                    break
+
+    def finalize(self, index_file):
+        self.out_file.close()
+        index = open(index_file, "wb")
+        index.write(b"TNTIDX\x00\x00")
+        index.write(struct.pack("<Q", 1))
+        index.write(
+            struct.pack("<QQ", _dtype_header_code(self.dtype), self.element_size)
+        )
+        index.write(struct.pack("<QQ", len(self.data_offsets) - 1, len(self.sizes)))
+        write_longs(index, self.dim_offsets)
+        write_longs(index, self.data_offsets)
+        write_longs(index, self.sizes)
+        index.close()
+
+
+def _warmup_mmap_file(path):
+    with open(path, "rb") as stream:
+        while stream.read(100 * 1024 * 1024):
+            pass
+
+
+class MMapIndexedDataset(torch.utils.data.Dataset):
+    class Index:
+        _HDR_MAGIC = b"MMIDIDX\x00\x00"
+
+        @classmethod
+        def writer(cls, path, dtype):
+            class _Writer:
+                def __enter__(self):
+                    self._file = open(path, "wb")
+
+                    self._file.write(cls._HDR_MAGIC)
+                    self._file.write(struct.pack("<Q", 1))
+                    self._file.write(struct.pack("<B", _dtype_header_code(dtype)))
+
+                    return self
+
+                @staticmethod
+                def _get_pointers(sizes):
+                    dtype_size = dtype().itemsize
+                    address = 0
+                    pointers = []
+
+                    for size in sizes:
+                        pointers.append(address)
+                        address += size * dtype_size
+
+                    return pointers
+
+                def write(self, sizes):
+                    pointers = self._get_pointers(sizes)
+
+                    self._file.write(struct.pack("<Q", len(sizes)))
+
+                    sizes = np.array(sizes, dtype=np.int32)
+                    self._file.write(sizes.tobytes(order="C"))
+                    del sizes
+
+                    pointers = np.array(pointers, dtype=np.int64)
+                    self._file.write(pointers.tobytes(order="C"))
+                    del pointers
+
+                def __exit__(self, exc_type, exc_val, exc_tb):
+                    self._file.close()
+
+            return _Writer()
+
+        def __init__(self, path):
+            with open(path, "rb") as stream:
+                magic_test = stream.read(9)
+                assert self._HDR_MAGIC == magic_test, (
+                    "Index file doesn't match expected format. "
+                    "Make sure that --dataset-impl is configured properly."
+                )
+                version = struct.unpack("<Q", stream.read(8))
+                assert (1,) == version
+
+                (dtype_code,) = struct.unpack("<B", stream.read(1))
+                self._dtype = _code_to_dtype[dtype_code]
+                self._dtype_size = self._dtype().itemsize
+
+                self._len = struct.unpack("<Q", stream.read(8))[0]
+                offset = stream.tell()
+
+            _warmup_mmap_file(path)
+
+            self._bin_buffer_mmap = np.memmap(path, mode="r", order="C")
+            self._bin_buffer = memoryview(self._bin_buffer_mmap)
+            self._sizes = np.frombuffer(
+                self._bin_buffer, dtype=np.int32, count=self._len, offset=offset
+            )
+            self._pointers = np.frombuffer(
+                self._bin_buffer,
+                dtype=np.int64,
+                count=self._len,
+                offset=offset + self._sizes.nbytes,
+            )
+
+        def __del__(self):
+            self._bin_buffer_mmap._mmap.close()
+            del self._bin_buffer_mmap
+
+        @property
+        def dtype(self):
+            return self._dtype
+
+        @property
+        def sizes(self):
+            return self._sizes
+
+        @lru_cache(maxsize=8)
+        def __getitem__(self, i):
+            return self._pointers[i], self._sizes[i]
+
+        def __len__(self):
+            return self._len
+
+    def __init__(self, path):
+        super().__init__()
+
+        self._path = None
+        self._index = None
+        self._bin_buffer = None
+
+        self._do_init(path)
+
+    def __getstate__(self):
+        return self._path
+
+    def __setstate__(self, state):
+        self._do_init(state)
+
+    def _do_init(self, path):
+        self._path = path
+        self._index = self.Index(index_file_path(self._path))
+
+        _warmup_mmap_file(data_file_path(self._path))
+        self._bin_buffer_mmap = np.memmap(
+            data_file_path(self._path), mode="r", order="C"
+        )
+        self._bin_buffer = memoryview(self._bin_buffer_mmap)
+
+    def __del__(self):
+        self._bin_buffer_mmap._mmap.close()
+        del self._bin_buffer_mmap
+        del self._index
+
+    def __len__(self):
+        return len(self._index)
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, i):
+        ptr, size = self._index[i]
+        np_array = np.frombuffer(
+            self._bin_buffer, dtype=self._index.dtype, count=size, offset=ptr
+        )
+        if self._index.dtype != np.int64:
+            np_array = np_array.astype(np.int64)
+
+        return torch.from_numpy(np_array)
+
+    @property
+    def sizes(self):
+        return self._index.sizes
+
+    @property
+    def supports_prefetch(self):
+        return False
+
+    @staticmethod
+    def exists(path):
+        return PathManager.exists(index_file_path(path)) and PathManager.exists(
+            data_file_path(path)
+        )
+
+
+def get_indexed_dataset_to_local(path) -> str:
+    local_index_path = PathManager.get_local_path(index_file_path(path))
+    local_data_path = PathManager.get_local_path(data_file_path(path))
+
+    assert local_index_path.endswith(".idx") and local_data_path.endswith(".bin"), (
+        "PathManager.get_local_path does not return files with expected patterns: "
+        f"{local_index_path} and {local_data_path}"
+    )
+
+    local_path = local_data_path[:-4]  # stripping surfix ".bin"
+    assert local_path == local_index_path[:-4]  # stripping surfix ".idx"
+    return local_path
+
+
+class MMapIndexedDatasetBuilder:
+    def __init__(self, out_file, dtype=np.int64):
+        self._data_file = open(out_file, "wb")
+        self._dtype = dtype
+        self._sizes = []
+
+    def add_item(self, tensor):
+        np_array = np.array(tensor.numpy(), dtype=self._dtype)
+        self._data_file.write(np_array.tobytes(order="C"))
+        self._sizes.append(np_array.size)
+
+    def merge_file_(self, another_file):
+        # Concatenate index
+        index = MMapIndexedDataset.Index(index_file_path(another_file))
+        assert index.dtype == self._dtype
+
+        for size in index.sizes:
+            self._sizes.append(size)
+
+        # Concatenate data
+        with open(data_file_path(another_file), "rb") as f:
+            shutil.copyfileobj(f, self._data_file)
+
+    def finalize(self, index_file):
+        self._data_file.close()
+
+        with MMapIndexedDataset.Index.writer(index_file, self._dtype) as index:
+            index.write(self._sizes)
diff --git a/fairseq/fairseq/data/iterators.py b/fairseq/fairseq/data/iterators.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ce26e57e58f9006ea801e77a1437e45743a3b8b
--- /dev/null
+++ b/fairseq/fairseq/data/iterators.py
@@ -0,0 +1,765 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import logging
+import math
+import operator
+import os
+import queue
+import time
+from threading import Thread
+
+import numpy as np
+import torch
+from fairseq.data import data_utils
+
+
+logger = logging.getLogger(__name__)
+
+# Object used by _background_consumer to signal the source is exhausted
+# to the main thread.
+_sentinel = object()
+
+
+class CountingIterator(object):
+    """Wrapper around an iterable that maintains the iteration count.
+
+    Args:
+        iterable (iterable): iterable to wrap
+        start (int): starting iteration count. Note that this doesn't
+            actually advance the iterator.
+        total (int): override the iterator length returned by ``__len``.
+            This can be used to truncate *iterator*.
+
+    Attributes:
+        n (int): number of elements consumed from this iterator
+    """
+
+    def __init__(self, iterable, start=None, total=None):
+        self._itr = iter(iterable)
+        self.n = start or getattr(iterable, "n", 0)
+        self.total = total or self.n + len(iterable)
+
+    def __len__(self):
+        return self.total
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if not self.has_next():
+            raise StopIteration
+        try:
+            x = next(self._itr)
+        except StopIteration:
+            raise IndexError(f"Iterator expected to have length {self.total}, "
+                             "but exhausted at position {self.n}.")
+        self.n += 1
+        return x
+
+    def has_next(self):
+        """Whether the iterator has been exhausted."""
+        return self.n < self.total
+
+    def skip(self, n):
+        """Fast-forward the iterator by skipping n elements."""
+        for _ in range(n):
+            next(self)
+        return self
+
+    def take(self, n):
+        """Truncate the iterator to n elements at most."""
+        self.total = min(self.total, n)
+        # Propagate this change to the underlying iterator
+        if hasattr(self._itr, "take"):
+            self._itr.take(max(n - self.n, 0))
+        return self
+
+
+class EpochBatchIterating(object):
+    def __len__(self) -> int:
+        raise NotImplementedError
+
+    @property
+    def next_epoch_idx(self):
+        raise NotImplementedError
+
+    def next_epoch_itr(
+        self, shuffle=True, fix_batches_to_gpus=False, set_dataset_epoch=True
+    ):
+        """Return a new iterator over the dataset.
+
+        Args:
+            shuffle (bool, optional): shuffle batches before returning the
+                iterator (default: True).
+            fix_batches_to_gpus (bool, optional): ensure that batches are always
+                allocated to the same shards across epochs. Requires
+                that :attr:`dataset` supports prefetching (default: False).
+            set_dataset_epoch (bool, optional): update the wrapped Dataset with
+                the new epoch number (default: True).
+        """
+        raise NotImplementedError
+
+    def end_of_epoch(self) -> bool:
+        """Returns whether the most recent epoch iterator has been exhausted"""
+        raise NotImplementedError
+
+    @property
+    def iterations_in_epoch(self) -> int:
+        """The number of consumed batches in the current epoch."""
+        raise NotImplementedError
+
+    def state_dict(self):
+        """Returns a dictionary containing a whole state of the iterator."""
+        raise NotImplementedError
+
+    def load_state_dict(self, state_dict):
+        """Copies the state of the iterator from the given *state_dict*."""
+        raise NotImplementedError
+
+    @property
+    def first_batch(self):
+        return "DUMMY"
+
+
+class StreamingEpochBatchIterator(EpochBatchIterating):
+    """A steaming-style iterator over a :class:`torch.utils.data.IterableDataset`.
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset from which to load the data
+        max_sentences: batch size
+        collate_fn (callable): merges a list of samples to form a mini-batch
+        num_workers (int, optional): how many subprocesses to use for data
+            loading. 0 means the data will be loaded in the main process
+            (default: 0).
+        epoch (int, optional): the epoch to start the iterator from
+            (default: 1).
+        buffer_size (int, optional): the number of batches to keep ready in the
+            queue. Helps speeding up dataloading. When buffer_size is zero, the
+            default torch.utils.data.DataLoader preloading is used.
+        timeout (int, optional): if positive, the timeout value for collecting a batch
+            from workers. Should always be non-negative (default: ``0``).
+    """
+
+    def __init__(
+        self,
+        dataset,
+        max_sentences=1,
+        collate_fn=None,
+        epoch=1,
+        num_workers=0,
+        buffer_size=0,
+        timeout=0,
+    ):
+        assert isinstance(dataset, torch.utils.data.IterableDataset)
+        self.dataset = dataset
+        self.max_sentences = max_sentences
+        self.collate_fn = collate_fn
+        self.epoch = max(epoch, 1)  # we use 1-based indexing for epochs
+        self.num_workers = num_workers
+        # This upper limit here is to prevent people from abusing this feature
+        # in a shared computing environment.
+        self.buffer_size = min(buffer_size, 20)
+        self.timeout = timeout
+
+        self._current_epoch_iterator = None
+
+    @property
+    def next_epoch_idx(self):
+        """Return the epoch index after *next_epoch_itr* is called."""
+        if self._current_epoch_iterator is not None and self.end_of_epoch():
+            return self.epoch + 1
+        else:
+            return self.epoch
+
+    def next_epoch_itr(
+        self, shuffle=True, fix_batches_to_gpus=False, set_dataset_epoch=True
+    ):
+        self.epoch = self.next_epoch_idx
+        if set_dataset_epoch and hasattr(self.dataset, "set_epoch"):
+            self.dataset.set_epoch(self.epoch)
+        self._current_epoch_iterator = self._get_iterator_for_epoch(self.epoch, shuffle)
+        return self._current_epoch_iterator
+
+    def end_of_epoch(self) -> bool:
+        return not self._current_epoch_iterator.has_next()
+
+    @property
+    def iterations_in_epoch(self) -> int:
+        if self._current_epoch_iterator is not None:
+            return self._current_epoch_iterator.n
+        return 0
+
+    def state_dict(self):
+        return {
+            "epoch": self.epoch,
+        }
+
+    def load_state_dict(self, state_dict):
+        self.epoch = state_dict["epoch"]
+
+    def _get_iterator_for_epoch(self, epoch, shuffle, offset=0):
+        if self.num_workers > 0:
+            os.environ["PYTHONWARNINGS"] = "ignore:semaphore_tracker:UserWarning"
+
+        # Create data loader
+        worker_init_fn = getattr(self.dataset, "worker_init_fn", None)
+        itr = torch.utils.data.DataLoader(
+            self.dataset,
+            batch_size=self.max_sentences,
+            collate_fn=self.collate_fn,
+            num_workers=self.num_workers,
+            timeout=self.timeout,
+            worker_init_fn=worker_init_fn,
+            pin_memory=True,
+        )
+
+        # Wrap with a BufferedIterator if needed
+        if self.buffer_size > 0:
+            itr = BufferedIterator(self.buffer_size, itr)
+
+        # Wrap with CountingIterator
+        itr = CountingIterator(itr, start=offset)
+
+        return itr
+
+
+class EpochBatchIterator(EpochBatchIterating):
+    """A multi-epoch iterator over a :class:`torch.utils.data.Dataset`.
+
+    Compared to :class:`torch.utils.data.DataLoader`, this iterator:
+
+    - can be reused across multiple epochs with the :func:`next_epoch_itr`
+      method (optionally shuffled between epochs)
+    - can be serialized/deserialized with the :func:`state_dict` and
+      :func:`load_state_dict` methods
+    - supports sharding with the *num_shards* and *shard_id* arguments
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset from which to load the data
+        collate_fn (callable): merges a list of samples to form a mini-batch
+        batch_sampler (~torch.utils.data.Sampler or a callable): an iterator over batches of
+            indices, or a callable to create such an iterator (~torch.utils.data.Sampler).
+            A callable batch_sampler will be called for each epoch to enable per epoch dynamic
+            batch iterators defined by this callable batch_sampler.
+        seed (int, optional): seed for random number generator for
+            reproducibility (default: 1).
+        num_shards (int, optional): shard the data iterator into N
+            shards (default: 1).
+        shard_id (int, optional): which shard of the data iterator to
+            return (default: 0).
+        num_workers (int, optional): how many subprocesses to use for data
+            loading. 0 means the data will be loaded in the main process
+            (default: 0).
+        epoch (int, optional): the epoch to start the iterator from
+            (default: 1).
+        buffer_size (int, optional): the number of batches to keep ready in the
+            queue. Helps speeding up dataloading. When buffer_size is zero, the
+            default torch.utils.data.DataLoader preloading is used.
+        timeout (int, optional): if positive, the timeout value for collecting a batch
+            from workers. Should always be non-negative (default: ``0``).
+        disable_shuffling (bool, optional): force disable shuffling
+            (default: ``False``).
+    """
+
+    def __init__(
+        self,
+        dataset,
+        collate_fn,
+        batch_sampler,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=1,
+        buffer_size=0,
+        timeout=0,
+        disable_shuffling=False,
+    ):
+        assert isinstance(dataset, torch.utils.data.Dataset)
+        self.dataset = dataset
+        self.collate_fn = collate_fn
+        self.batch_sampler = batch_sampler
+        self._frozen_batches = (
+            tuple(batch_sampler) if not callable(batch_sampler) else None
+        )
+        self.seed = seed
+        self.num_shards = num_shards
+        self.shard_id = shard_id
+        self.num_workers = num_workers
+        # This upper limit here is to prevent people from abusing this feature
+        # in a shared computing environment.
+        self.buffer_size = min(buffer_size, 20)
+        self.timeout = timeout
+        self.disable_shuffling = disable_shuffling
+
+        self.epoch = max(epoch, 1)  # we use 1-based indexing for epochs
+        self.shuffle = not disable_shuffling
+        self._cur_epoch_itr = None
+        self._next_epoch_itr = None
+        self._supports_prefetch = getattr(dataset, "supports_prefetch", False)
+
+    @property
+    def frozen_batches(self):
+        if self._frozen_batches is None:
+            self._frozen_batches = tuple(self.batch_sampler(self.dataset, self.epoch))
+        return self._frozen_batches
+
+    @property
+    def first_batch(self):
+        if len(self.frozen_batches) == 0:
+            raise Exception(
+                "The dataset is empty. This could indicate "
+                "that all elements in the dataset have been skipped. "
+                "Try increasing the max number of allowed tokens or using "
+                "a larger dataset."
+            )
+
+        if getattr(self.dataset, "supports_fetch_outside_dataloader", True):
+            return self.collate_fn([self.dataset[i] for i in self.frozen_batches[0]])
+        else:
+            return "DUMMY"
+
+    def __len__(self):
+        return int(math.ceil(len(self.frozen_batches) / float(self.num_shards)))
+
+    @property
+    def n(self):
+        return self.iterations_in_epoch
+
+    @property
+    def next_epoch_idx(self):
+        """Return the epoch index after *next_epoch_itr* is called."""
+        if self._next_epoch_itr is not None:
+            return self.epoch
+        elif self._cur_epoch_itr is not None and self.end_of_epoch():
+            return self.epoch + 1
+        else:
+            return self.epoch
+
+    def next_epoch_itr(
+        self, shuffle=True, fix_batches_to_gpus=False, set_dataset_epoch=True
+    ):
+        """Return a new iterator over the dataset.
+
+        Args:
+            shuffle (bool, optional): shuffle batches before returning the
+                iterator (default: True).
+            fix_batches_to_gpus (bool, optional): ensure that batches are always
+                allocated to the same shards across epochs. Requires
+                that :attr:`dataset` supports prefetching (default: False).
+            set_dataset_epoch (bool, optional): update the wrapped Dataset with
+                the new epoch number (default: True).
+        """
+        if self.disable_shuffling:
+            shuffle = False
+        prev_epoch = self.epoch
+        self.epoch = self.next_epoch_idx
+        if set_dataset_epoch and hasattr(self.dataset, "set_epoch"):
+            self.dataset.set_epoch(self.epoch)
+        if self._next_epoch_itr is not None:
+            self._cur_epoch_itr = self._next_epoch_itr
+            self._next_epoch_itr = None
+        else:
+            if callable(self.batch_sampler) and prev_epoch != self.epoch:
+                # reset _frozen_batches to refresh the next epoch
+                self._frozen_batches = None
+            self._cur_epoch_itr = self._get_iterator_for_epoch(
+                self.epoch,
+                shuffle,
+                fix_batches_to_gpus=fix_batches_to_gpus,
+            )
+        self.shuffle = shuffle
+        return self._cur_epoch_itr
+
+    def end_of_epoch(self) -> bool:
+        """Returns whether the most recent epoch iterator has been exhausted"""
+        return not self._cur_epoch_itr.has_next()
+
+    @property
+    def iterations_in_epoch(self):
+        """The number of consumed batches in the current epoch."""
+        if self._cur_epoch_itr is not None:
+            return self._cur_epoch_itr.n
+        elif self._next_epoch_itr is not None:
+            return self._next_epoch_itr.n
+        return 0
+
+    def state_dict(self):
+        """Returns a dictionary containing a whole state of the iterator."""
+        if self.end_of_epoch():
+            epoch = self.epoch + 1
+            iter_in_epoch = 0
+        else:
+            epoch = self.epoch
+            iter_in_epoch = self.iterations_in_epoch
+        return {
+            "version": 2,
+            "epoch": epoch,
+            "iterations_in_epoch": iter_in_epoch,
+            "shuffle": self.shuffle,
+        }
+
+    def load_state_dict(self, state_dict):
+        """Copies the state of the iterator from the given *state_dict*."""
+        self.epoch = state_dict["epoch"]
+        itr_pos = state_dict.get("iterations_in_epoch", 0)
+        version = state_dict.get("version", 1)
+        if itr_pos > 0:
+            # fast-forward epoch iterator
+            self._next_epoch_itr = self._get_iterator_for_epoch(
+                self.epoch,
+                shuffle=state_dict.get("shuffle", True),
+                offset=itr_pos,
+            )
+            if self._next_epoch_itr is None:
+                if version == 1:
+                    # legacy behavior: we finished the epoch, increment epoch counter
+                    self.epoch += 1
+                else:
+                    raise RuntimeError(
+                        "Cannot resume training due to dataloader mismatch, please "
+                        "report this to the fairseq developers. You can relaunch "
+                        "training with `--reset-dataloader` and it should work."
+                    )
+        else:
+            self._next_epoch_itr = None
+
+    def _get_iterator_for_epoch(
+        self, epoch, shuffle, fix_batches_to_gpus=False, offset=0
+    ):
+        def shuffle_batches(batches, seed):
+            with data_utils.numpy_seed(seed):
+                np.random.shuffle(batches)
+            return batches
+
+        if self._supports_prefetch:
+            batches = self.frozen_batches
+
+            if shuffle and not fix_batches_to_gpus:
+                batches = shuffle_batches(list(batches), self.seed + epoch)
+
+            batches = list(
+                ShardedIterator(batches, self.num_shards, self.shard_id, fill_value=[])
+            )
+            self.dataset.prefetch([i for s in batches for i in s])
+
+            if shuffle and fix_batches_to_gpus:
+                batches = shuffle_batches(batches, self.seed + epoch + self.shard_id)
+        else:
+            if shuffle:
+                batches = shuffle_batches(list(self.frozen_batches), self.seed + epoch)
+            else:
+                batches = self.frozen_batches
+            batches = list(
+                ShardedIterator(batches, self.num_shards, self.shard_id, fill_value=[])
+            )
+
+        if offset > 0 and offset >= len(batches):
+            return None
+
+        if self.num_workers > 0:
+            os.environ["PYTHONWARNINGS"] = "ignore:semaphore_tracker:UserWarning"
+
+        # Create data loader
+        itr = torch.utils.data.DataLoader(
+            self.dataset,
+            collate_fn=self.collate_fn,
+            batch_sampler=batches[offset:],
+            num_workers=self.num_workers,
+            timeout=self.timeout,
+            pin_memory=True,
+        )
+
+        # Wrap with a BufferedIterator if needed
+        if self.buffer_size > 0:
+            itr = BufferedIterator(self.buffer_size, itr)
+
+        # Wrap with CountingIterator
+        itr = CountingIterator(itr, start=offset)
+        return itr
+
+
+class GroupedIterator(CountingIterator):
+    """Wrapper around an iterable that returns groups (chunks) of items.
+
+    Args:
+        iterable (iterable): iterable to wrap
+        chunk_size (int): size of each chunk
+
+    Attributes:
+        n (int): number of elements consumed from this iterator
+    """
+
+    def __init__(self, iterable, chunk_size):
+        itr = _chunk_iterator(iterable, chunk_size)
+        super().__init__(
+            itr,
+            start=int(math.ceil(getattr(iterable, "n", 0) / float(chunk_size))),
+            total=int(math.ceil(len(iterable) / float(chunk_size))),
+        )
+        self.chunk_size = chunk_size
+
+
+def _chunk_iterator(itr, chunk_size):
+    chunk = []
+    for x in itr:
+        chunk.append(x)
+        if len(chunk) == chunk_size:
+            yield chunk
+            chunk = []
+    if len(chunk) > 0:
+        yield chunk
+
+
+class ShardedIterator(CountingIterator):
+    """A sharded wrapper around an iterable, padded to length.
+
+    Args:
+        iterable (iterable): iterable to wrap
+        num_shards (int): number of shards to split the iterable into
+        shard_id (int): which shard to iterator over
+        fill_value (Any, optional): padding value when the iterable doesn't
+            evenly divide *num_shards* (default: None).
+
+    Attributes:
+        n (int): number of elements consumed from this iterator
+    """
+
+    def __init__(self, iterable, num_shards, shard_id, fill_value=None):
+        if shard_id < 0 or shard_id >= num_shards:
+            raise ValueError("shard_id must be between 0 and num_shards")
+        sharded_len = int(math.ceil(len(iterable) / float(num_shards)))
+        itr = map(
+            operator.itemgetter(1),
+            itertools.zip_longest(
+                range(sharded_len),
+                itertools.islice(iterable, shard_id, len(iterable), num_shards),
+                fillvalue=fill_value,
+            ),
+        )
+        super().__init__(
+            itr,
+            start=int(math.ceil(getattr(iterable, "n", 0) / float(num_shards))),
+            total=sharded_len,
+        )
+
+
+class BackgroundConsumer(Thread):
+    def __init__(self, queue, source, max_len, cuda_device):
+        Thread.__init__(self)
+
+        self._queue = queue
+        self._source = source
+        self._max_len = max_len
+        self.count = 0
+        self.cuda_device = cuda_device
+
+    def run(self):
+        # set_device to avoid creation of GPU0 context when using pin_memory
+        if self.cuda_device is not None:
+            torch.cuda.set_device(self.cuda_device)
+
+        try:
+            for item in self._source:
+                self._queue.put(item)
+
+                # Stop if we reached the maximum length
+                self.count += 1
+                if self._max_len is not None and self.count >= self._max_len:
+                    break
+
+            # Signal the consumer we are done.
+            self._queue.put(_sentinel)
+        except Exception as e:
+            self._queue.put(e)
+
+
+class BufferedIterator(object):
+    def __init__(self, size, iterable):
+        self._queue = queue.Queue(size)
+        self._iterable = iterable
+        self._consumer = None
+
+        self.start_time = time.time()
+        self.warning_time = None
+
+        self.total = len(iterable)
+
+    def _create_consumer(self):
+        self._consumer = BackgroundConsumer(
+            self._queue,
+            self._iterable,
+            self.total,
+            torch.cuda.current_device() if torch.cuda.is_available() else None
+        )
+        self._consumer.daemon = True
+        self._consumer.start()
+
+    def __iter__(self):
+        return self
+
+    def __len__(self):
+        return self.total
+
+    def take(self, n):
+        self.total = min(self.total, n)
+        # Propagate this change to the underlying iterator
+        if hasattr(self._iterable, "take"):
+            self._iterable.take(n)
+        return self
+
+    def __next__(self):
+        # Create consumer if not created yet
+        if self._consumer is None:
+            self._create_consumer()
+
+        # Notify the user if there is a data loading bottleneck
+        if self._queue.qsize() < min(2, max(1, self._queue.maxsize // 2)):
+            if time.time() - self.start_time > 5 * 60:
+                if (
+                    self.warning_time is None
+                    or time.time() - self.warning_time > 15 * 60
+                ):
+                    logger.debug(
+                        "Data loading buffer is empty or nearly empty. This may "
+                        "indicate a data loading bottleneck, and increasing the "
+                        "number of workers (--num-workers) may help."
+                    )
+                    self.warning_time = time.time()
+
+        # Get next example
+        item = self._queue.get(True)
+        if isinstance(item, Exception):
+            raise item
+        if item is _sentinel:
+            raise StopIteration()
+        return item
+
+class GroupedEpochBatchIterator(EpochBatchIterator):
+    """Grouped version of EpochBatchIterator
+    It takes several samplers from different datasets.
+    Each epoch shuffle the dataset wise sampler individually with different
+    random seed. The those sub samplers are combined with into
+    one big samplers with deterministic permutation to mix batches from
+    different datasets. It will act like EpochBatchIterator but make sure
+    1) data from one data set each time
+    2) for different workers, they use the same order to fetch the data
+    so they will use data from the same dataset everytime
+    mult_rate is used for update_freq > 1 case where we want to make sure update_freq
+    mini-batches come from same source
+    """
+
+    def __init__(
+        self,
+        dataset,
+        collate_fn,
+        batch_samplers,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=0,
+        mult_rate=1,
+        buffer_size=0,
+    ):
+        super().__init__(
+            dataset,
+            collate_fn,
+            batch_samplers,
+            seed,
+            num_shards,
+            shard_id,
+            num_workers,
+            epoch,
+            buffer_size,
+        )
+        # level 0: sub-samplers 1: batch_idx 2: batches
+        self._frozen_batches = tuple([tuple(sub_batch) for sub_batch in batch_samplers])
+        self.step_size = mult_rate * num_shards
+
+        self.lengths = [
+            (len(x) // self.step_size) * self.step_size for x in self.frozen_batches
+        ]
+
+    def __len__(self):
+        return sum(self.lengths)
+
+    @property
+    def first_batch(self):
+        if len(self.frozen_batches) == 0:
+            raise Exception(
+                "The dataset is empty. This could indicate "
+                "that all elements in the dataset have been skipped. "
+                "Try increasing the max number of allowed tokens or using "
+                "a larger dataset."
+            )
+
+        if self.dataset.supports_fetch_outside_dataloader:
+            return self.collate_fn([self.dataset[i] for i in self.frozen_batches[0][0]])
+        else:
+            return "DUMMY"
+
+    def _get_iterator_for_epoch(
+        self, epoch, shuffle, fix_batches_to_gpus=False, offset=0
+    ):
+        def shuffle_batches(batches, seed):
+            with data_utils.numpy_seed(seed):
+                np.random.shuffle(batches)
+            return batches
+
+        def return_full_batches(batch_sets, seed, shuffle):
+            if shuffle:
+                batch_sets = [shuffle_batches(list(x), seed) for x in batch_sets]
+
+            batch_sets = [
+                batch_sets[i][: self.lengths[i]] for i in range(len(batch_sets))
+            ]
+            batches = list(itertools.chain.from_iterable(batch_sets))
+
+            if shuffle:
+                with data_utils.numpy_seed(seed):
+                    idx = np.random.permutation(len(batches) // self.step_size)
+                    if len(idx) * self.step_size != len(batches):
+                        raise ValueError(
+                            "ERROR: %d %d %d %d"
+                            % (len(idx), self.step_size, len(batches), self.shard_id),
+                            ":".join(["%d" % x for x in self.lengths]),
+                        )
+                    mini_shards = [
+                        batches[i * self.step_size : (i + 1) * self.step_size]
+                        for i in idx
+                    ]
+                    batches = list(itertools.chain.from_iterable(mini_shards))
+
+            return batches
+
+        if self._supports_prefetch:
+            raise NotImplementedError("To be implemented")
+        else:
+            batches = return_full_batches(
+                self.frozen_batches, self.seed + epoch, shuffle
+            )
+            batches = list(
+                ShardedIterator(batches, self.num_shards, self.shard_id, fill_value=[])
+            )
+
+        if offset > 0 and offset >= len(batches):
+            return None
+
+        if self.num_workers > 0:
+            os.environ["PYTHONWARNINGS"] = "ignore:semaphore_tracker:UserWarning"
+
+        itr = torch.utils.data.DataLoader(
+            self.dataset,
+            collate_fn=self.collate_fn,
+            batch_sampler=batches[offset:],
+            num_workers=self.num_workers,
+        )
+        if self.buffer_size > 0:
+            itr = BufferedIterator(self.buffer_size, itr)
+
+        return CountingIterator(itr, start=offset)
diff --git a/fairseq/fairseq/data/language_pair_dataset.py b/fairseq/fairseq/data/language_pair_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff3e14bf14770638524ef6067b558e455dbe5f2b
--- /dev/null
+++ b/fairseq/fairseq/data/language_pair_dataset.py
@@ -0,0 +1,471 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import numpy as np
+import torch
+from fairseq.data import FairseqDataset, data_utils
+
+
+logger = logging.getLogger(__name__)
+
+
+def collate(
+    samples,
+    pad_idx,
+    eos_idx,
+    left_pad_source=True,
+    left_pad_target=False,
+    input_feeding=True,
+    pad_to_length=None,
+    pad_to_multiple=1,
+):
+    if len(samples) == 0:
+        return {}
+
+    def merge(key, left_pad, move_eos_to_beginning=False, pad_to_length=None):
+        return data_utils.collate_tokens(
+            [s[key] for s in samples],
+            pad_idx,
+            eos_idx,
+            left_pad,
+            move_eos_to_beginning,
+            pad_to_length=pad_to_length,
+            pad_to_multiple=pad_to_multiple,
+        )
+
+    def check_alignment(alignment, src_len, tgt_len):
+        if alignment is None or len(alignment) == 0:
+            return False
+        if (
+            alignment[:, 0].max().item() >= src_len - 1
+            or alignment[:, 1].max().item() >= tgt_len - 1
+        ):
+            logger.warning("alignment size mismatch found, skipping alignment!")
+            return False
+        return True
+
+    def compute_alignment_weights(alignments):
+        """
+        Given a tensor of shape [:, 2] containing the source-target indices
+        corresponding to the alignments, a weight vector containing the
+        inverse frequency of each target index is computed.
+        For e.g. if alignments = [[5, 7], [2, 3], [1, 3], [4, 2]], then
+        a tensor containing [1., 0.5, 0.5, 1] should be returned (since target
+        index 3 is repeated twice)
+        """
+        align_tgt = alignments[:, 1]
+        _, align_tgt_i, align_tgt_c = torch.unique(
+            align_tgt, return_inverse=True, return_counts=True
+        )
+        align_weights = align_tgt_c[align_tgt_i[np.arange(len(align_tgt))]]
+        return 1.0 / align_weights.float()
+
+    id = torch.LongTensor([s["id"] for s in samples])
+    src_tokens = merge(
+        "source",
+        left_pad=left_pad_source,
+        pad_to_length=pad_to_length["source"] if pad_to_length is not None else None,
+    )
+    # sort by descending source length
+    src_lengths = torch.LongTensor(
+        [s["source"].ne(pad_idx).long().sum() for s in samples]
+    )
+    src_lengths, sort_order = src_lengths.sort(descending=True)
+    id = id.index_select(0, sort_order)
+    src_tokens = src_tokens.index_select(0, sort_order)
+
+    prev_output_tokens = None
+    target = None
+    if samples[0].get("target", None) is not None:
+        target = merge(
+            "target",
+            left_pad=left_pad_target,
+            pad_to_length=pad_to_length["target"]
+            if pad_to_length is not None
+            else None,
+        )
+        target = target.index_select(0, sort_order)
+        tgt_lengths = torch.LongTensor(
+            [s["target"].ne(pad_idx).long().sum() for s in samples]
+        ).index_select(0, sort_order)
+        ntokens = tgt_lengths.sum().item()
+
+        if samples[0].get("prev_output_tokens", None) is not None:
+            prev_output_tokens = merge("prev_output_tokens", left_pad=left_pad_target)
+        elif input_feeding:
+            # we create a shifted version of targets for feeding the
+            # previous output token(s) into the next decoder step
+            prev_output_tokens = merge(
+                "target",
+                left_pad=left_pad_target,
+                move_eos_to_beginning=True,
+                pad_to_length=pad_to_length["target"]
+                if pad_to_length is not None
+                else None,
+            )
+    else:
+        ntokens = src_lengths.sum().item()
+
+    batch = {
+        "id": id,
+        "nsentences": len(samples),
+        "ntokens": ntokens,
+        "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths,},
+        "target": target,
+    }
+    if prev_output_tokens is not None:
+        batch["net_input"]["prev_output_tokens"] = prev_output_tokens.index_select(
+            0, sort_order
+        )
+
+    if samples[0].get("alignment", None) is not None:
+        bsz, tgt_sz = batch["target"].shape
+        src_sz = batch["net_input"]["src_tokens"].shape[1]
+
+        offsets = torch.zeros((len(sort_order), 2), dtype=torch.long)
+        offsets[:, 1] += torch.arange(len(sort_order), dtype=torch.long) * tgt_sz
+        if left_pad_source:
+            offsets[:, 0] += src_sz - src_lengths
+        if left_pad_target:
+            offsets[:, 1] += tgt_sz - tgt_lengths
+
+        alignments = [
+            alignment + offset
+            for align_idx, offset, src_len, tgt_len in zip(
+                sort_order, offsets, src_lengths, tgt_lengths
+            )
+            for alignment in [samples[align_idx]["alignment"].view(-1, 2)]
+            if check_alignment(alignment, src_len, tgt_len)
+        ]
+
+        if len(alignments) > 0:
+            alignments = torch.cat(alignments, dim=0)
+            align_weights = compute_alignment_weights(alignments)
+
+            batch["alignments"] = alignments
+            batch["align_weights"] = align_weights
+
+    if samples[0].get("constraints", None) is not None:
+        # Collate the packed constraints across the samples, padding to
+        # the length of the longest sample.
+        lens = [sample.get("constraints").size(0) for sample in samples]
+        max_len = max(lens)
+        constraints = torch.zeros((len(samples), max(lens))).long()
+        for i, sample in enumerate(samples):
+            constraints[i, 0 : lens[i]] = samples[i].get("constraints")
+        batch["constraints"] = constraints.index_select(0, sort_order)
+
+    return batch
+
+
+class LanguagePairDataset(FairseqDataset):
+    """
+    A pair of torch.utils.data.Datasets.
+
+    Args:
+        src (torch.utils.data.Dataset): source dataset to wrap
+        src_sizes (List[int]): source sentence lengths
+        src_dict (~fairseq.data.Dictionary): source vocabulary
+        tgt (torch.utils.data.Dataset, optional): target dataset to wrap
+        tgt_sizes (List[int], optional): target sentence lengths
+        tgt_dict (~fairseq.data.Dictionary, optional): target vocabulary
+        left_pad_source (bool, optional): pad source tensors on the left side
+            (default: True).
+        left_pad_target (bool, optional): pad target tensors on the left side
+            (default: False).
+        shuffle (bool, optional): shuffle dataset elements before batching
+            (default: True).
+        input_feeding (bool, optional): create a shifted version of the targets
+            to be passed into the model for teacher forcing (default: True).
+        remove_eos_from_source (bool, optional): if set, removes eos from end
+            of source if it's present (default: False).
+        append_eos_to_target (bool, optional): if set, appends eos to end of
+            target if it's absent (default: False).
+        align_dataset (torch.utils.data.Dataset, optional): dataset
+            containing alignments.
+        constraints (Tensor, optional): 2d tensor with a concatenated, zero-
+            delimited list of constraints for each sentence.
+        append_bos (bool, optional): if set, appends bos to the beginning of
+            source/target sentence.
+        num_buckets (int, optional): if set to a value greater than 0, then
+            batches will be bucketed into the given number of batch shapes.
+        src_lang_id (int, optional): source language ID, if set, the collated batch
+            will contain a field 'src_lang_id' in 'net_input' which indicates the
+            source language of the samples.
+        tgt_lang_id (int, optional): target language ID, if set, the collated batch
+            will contain a field 'tgt_lang_id' which indicates the target language
+             of the samples.
+    """
+
+    def __init__(
+        self,
+        src,
+        src_sizes,
+        src_dict,
+        tgt=None,
+        tgt_sizes=None,
+        tgt_dict=None,
+        left_pad_source=True,
+        left_pad_target=False,
+        shuffle=True,
+        input_feeding=True,
+        remove_eos_from_source=False,
+        append_eos_to_target=False,
+        align_dataset=None,
+        constraints=None,
+        append_bos=False,
+        eos=None,
+        num_buckets=0,
+        src_lang_id=None,
+        tgt_lang_id=None,
+        pad_to_multiple=1,
+    ):
+        if tgt_dict is not None:
+            assert src_dict.pad() == tgt_dict.pad()
+            assert src_dict.eos() == tgt_dict.eos()
+            assert src_dict.unk() == tgt_dict.unk()
+        if tgt is not None:
+            assert len(src) == len(
+                tgt
+            ), "Source and target must contain the same number of examples"
+        self.src = src
+        self.tgt = tgt
+        self.src_sizes = np.array(src_sizes)
+        self.tgt_sizes = np.array(tgt_sizes) if tgt_sizes is not None else None
+        self.sizes = (
+            np.vstack((self.src_sizes, self.tgt_sizes)).T
+            if self.tgt_sizes is not None
+            else self.src_sizes
+        )
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+        self.left_pad_source = left_pad_source
+        self.left_pad_target = left_pad_target
+        self.shuffle = shuffle
+        self.input_feeding = input_feeding
+        self.remove_eos_from_source = remove_eos_from_source
+        self.append_eos_to_target = append_eos_to_target
+        self.align_dataset = align_dataset
+        if self.align_dataset is not None:
+            assert (
+                self.tgt_sizes is not None
+            ), "Both source and target needed when alignments are provided"
+        self.constraints = constraints
+        self.append_bos = append_bos
+        self.eos = eos if eos is not None else src_dict.eos()
+        self.src_lang_id = src_lang_id
+        self.tgt_lang_id = tgt_lang_id
+        if num_buckets > 0:
+            from fairseq.data import BucketPadLengthDataset
+
+            self.src = BucketPadLengthDataset(
+                self.src,
+                sizes=self.src_sizes,
+                num_buckets=num_buckets,
+                pad_idx=self.src_dict.pad(),
+                left_pad=self.left_pad_source,
+            )
+            self.src_sizes = self.src.sizes
+            logger.info("bucketing source lengths: {}".format(list(self.src.buckets)))
+            if self.tgt is not None:
+                self.tgt = BucketPadLengthDataset(
+                    self.tgt,
+                    sizes=self.tgt_sizes,
+                    num_buckets=num_buckets,
+                    pad_idx=self.tgt_dict.pad(),
+                    left_pad=self.left_pad_target,
+                )
+                self.tgt_sizes = self.tgt.sizes
+                logger.info(
+                    "bucketing target lengths: {}".format(list(self.tgt.buckets))
+                )
+
+            # determine bucket sizes using self.num_tokens, which will return
+            # the padded lengths (thanks to BucketPadLengthDataset)
+            num_tokens = np.vectorize(self.num_tokens, otypes=[np.compat.long])
+            self.bucketed_num_tokens = num_tokens(np.arange(len(self.src)))
+            self.buckets = [
+                (None, num_tokens) for num_tokens in np.unique(self.bucketed_num_tokens)
+            ]
+        else:
+            self.buckets = None
+        self.pad_to_multiple = pad_to_multiple
+
+    def get_batch_shapes(self):
+        return self.buckets
+
+    def __getitem__(self, index):
+        tgt_item = self.tgt[index] if self.tgt is not None else None
+        src_item = self.src[index]
+        # Append EOS to end of tgt sentence if it does not have an EOS and remove
+        # EOS from end of src sentence if it exists. This is useful when we use
+        # use existing datasets for opposite directions i.e., when we want to
+        # use tgt_dataset as src_dataset and vice versa
+        if self.append_eos_to_target:
+            eos = self.tgt_dict.eos() if self.tgt_dict else self.src_dict.eos()
+            if self.tgt and self.tgt[index][-1] != eos:
+                tgt_item = torch.cat([self.tgt[index], torch.LongTensor([eos])])
+
+        if self.append_bos:
+            bos = self.tgt_dict.bos() if self.tgt_dict else self.src_dict.bos()
+            if self.tgt and self.tgt[index][0] != bos:
+                tgt_item = torch.cat([torch.LongTensor([bos]), self.tgt[index]])
+
+            bos = self.src_dict.bos()
+            if self.src[index][0] != bos:
+                src_item = torch.cat([torch.LongTensor([bos]), self.src[index]])
+
+        if self.remove_eos_from_source:
+            eos = self.src_dict.eos()
+            if self.src[index][-1] == eos:
+                src_item = self.src[index][:-1]
+
+        example = {
+            "id": index,
+            "source": src_item,
+            "target": tgt_item,
+        }
+        if self.align_dataset is not None:
+            example["alignment"] = self.align_dataset[index]
+        if self.constraints is not None:
+            example["constraints"] = self.constraints[index]
+        return example
+
+    def __len__(self):
+        return len(self.src)
+
+    def collater(self, samples, pad_to_length=None):
+        """Merge a list of samples to form a mini-batch.
+
+        Args:
+            samples (List[dict]): samples to collate
+            pad_to_length (dict, optional): a dictionary of
+                {'source': source_pad_to_length, 'target': target_pad_to_length}
+                to indicate the max length to pad to in source and target respectively.
+
+        Returns:
+            dict: a mini-batch with the following keys:
+
+                - `id` (LongTensor): example IDs in the original input order
+                - `ntokens` (int): total number of tokens in the batch
+                - `net_input` (dict): the input to the Model, containing keys:
+
+                  - `src_tokens` (LongTensor): a padded 2D Tensor of tokens in
+                    the source sentence of shape `(bsz, src_len)`. Padding will
+                    appear on the left if *left_pad_source* is ``True``.
+                  - `src_lengths` (LongTensor): 1D Tensor of the unpadded
+                    lengths of each source sentence of shape `(bsz)`
+                  - `prev_output_tokens` (LongTensor): a padded 2D Tensor of
+                    tokens in the target sentence, shifted right by one
+                    position for teacher forcing, of shape `(bsz, tgt_len)`.
+                    This key will not be present if *input_feeding* is
+                    ``False``.  Padding will appear on the left if
+                    *left_pad_target* is ``True``.
+                  - `src_lang_id` (LongTensor): a long Tensor which contains source
+                    language IDs of each sample in the batch
+
+                - `target` (LongTensor): a padded 2D Tensor of tokens in the
+                  target sentence of shape `(bsz, tgt_len)`. Padding will appear
+                  on the left if *left_pad_target* is ``True``.
+                - `tgt_lang_id` (LongTensor): a long Tensor which contains target language
+                   IDs of each sample in the batch
+        """
+        res = collate(
+            samples,
+            pad_idx=self.src_dict.pad(),
+            eos_idx=self.eos,
+            left_pad_source=self.left_pad_source,
+            left_pad_target=self.left_pad_target,
+            input_feeding=self.input_feeding,
+            pad_to_length=pad_to_length,
+            pad_to_multiple=self.pad_to_multiple,
+        )
+        if self.src_lang_id is not None or self.tgt_lang_id is not None:
+            src_tokens = res["net_input"]["src_tokens"]
+            bsz = src_tokens.size(0)
+            if self.src_lang_id is not None:
+                res["net_input"]["src_lang_id"] = (
+                    torch.LongTensor([[self.src_lang_id]]).expand(bsz, 1).to(src_tokens)
+                )
+            if self.tgt_lang_id is not None:
+                res["tgt_lang_id"] = (
+                    torch.LongTensor([[self.tgt_lang_id]]).expand(bsz, 1).to(src_tokens)
+                )
+        return res
+
+    def num_tokens(self, index):
+        """Return the number of tokens in a sample. This value is used to
+        enforce ``--max-tokens`` during batching."""
+        return max(
+            self.src_sizes[index],
+            self.tgt_sizes[index] if self.tgt_sizes is not None else 0,
+        )
+
+    def num_tokens_vec(self, indices):
+        """Return the number of tokens for a set of positions defined by indices.
+        This value is used to enforce ``--max-tokens`` during batching."""
+        sizes = self.src_sizes[indices]
+        if self.tgt_sizes is not None:
+            sizes = np.maximum(sizes, self.tgt_sizes[indices])
+        return sizes
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        return (
+            self.src_sizes[index],
+            self.tgt_sizes[index] if self.tgt_sizes is not None else 0,
+        )
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.shuffle:
+            indices = np.random.permutation(len(self)).astype(np.int64)
+        else:
+            indices = np.arange(len(self), dtype=np.int64)
+        if self.buckets is None:
+            # sort by target length, then source length
+            if self.tgt_sizes is not None:
+                indices = indices[np.argsort(self.tgt_sizes[indices], kind="mergesort")]
+            return indices[np.argsort(self.src_sizes[indices], kind="mergesort")]
+        else:
+            # sort by bucketed_num_tokens, which is:
+            #   max(padded_src_len, padded_tgt_len)
+            return indices[
+                np.argsort(self.bucketed_num_tokens[indices], kind="mergesort")
+            ]
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.src, "supports_prefetch", False) and (
+            getattr(self.tgt, "supports_prefetch", False) or self.tgt is None
+        )
+
+    def prefetch(self, indices):
+        self.src.prefetch(indices)
+        if self.tgt is not None:
+            self.tgt.prefetch(indices)
+        if self.align_dataset is not None:
+            self.align_dataset.prefetch(indices)
+
+    def filter_indices_by_size(self, indices, max_sizes):
+        """Filter a list of sample indices. Remove those that are longer
+            than specified in max_sizes.
+
+        Args:
+            indices (np.array): original array of sample indices
+            max_sizes (int or list[int] or tuple[int]): max sample size,
+                can be defined separately for src and tgt (then list or tuple)
+
+        Returns:
+            np.array: filtered sample array
+            list: list of removed indices
+        """
+        return data_utils.filter_paired_dataset_indices_by_size(
+            self.src_sizes, self.tgt_sizes, indices, max_sizes,
+        )
diff --git a/fairseq/fairseq/data/legacy/__init__.py b/fairseq/fairseq/data/legacy/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bd5c72b5e9d7f67fb7e4ef10808d7ec08967ff4
--- /dev/null
+++ b/fairseq/fairseq/data/legacy/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .block_pair_dataset import BlockPairDataset
+from .masked_lm_dataset import MaskedLMDataset
+from .masked_lm_dictionary import BertDictionary, MaskedLMDictionary
+
+
+__all__ = [
+    "BertDictionary",
+    "BlockPairDataset",
+    "MaskedLMDataset",
+    "MaskedLMDictionary",
+]
diff --git a/fairseq/fairseq/data/legacy/block_pair_dataset.py b/fairseq/fairseq/data/legacy/block_pair_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba069b46052286c531b4f9706d96788732cd2ad2
--- /dev/null
+++ b/fairseq/fairseq/data/legacy/block_pair_dataset.py
@@ -0,0 +1,311 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import numpy as np
+import torch
+from fairseq.data import FairseqDataset
+
+
+class BlockPairDataset(FairseqDataset):
+    """Break a Dataset of tokens into sentence pair blocks for next sentence
+       prediction as well as masked language model.
+
+       High-level logics are:
+       1. break input tensor to tensor blocks
+       2. pair the blocks with 50% next sentence and 50% random sentence
+       3. return paired blocks as well as related segment labels
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset to break into blocks
+        sizes: array of sentence lengths
+        dictionary: dictionary for the task
+        block_size: maximum block size
+        break_mode: mode for breaking copurs into block pairs. currently we support
+            2 modes
+            doc: respect document boundaries and each part of the pair should belong to on document
+            none: don't respect any boundary and cut tokens evenly
+        short_seq_prob: probability for generating shorter block pairs
+        doc_break_size: Size for empty line separating documents. Typically 1 if
+                        the sentences have eos, 0 otherwise.
+    """
+
+    def __init__(
+        self,
+        dataset,
+        dictionary,
+        sizes,
+        block_size,
+        break_mode="doc",
+        short_seq_prob=0.1,
+        doc_break_size=1,
+    ):
+        super().__init__()
+        self.dataset = dataset
+        self.pad = dictionary.pad()
+        self.eos = dictionary.eos()
+        self.cls = dictionary.cls()
+        self.mask = dictionary.mask()
+        self.sep = dictionary.sep()
+        self.break_mode = break_mode
+        self.dictionary = dictionary
+        self.short_seq_prob = short_seq_prob
+        self.block_indices = []
+
+        assert len(dataset) == len(sizes)
+
+        if break_mode == "doc":
+            cur_doc = []
+            for sent_id, sz in enumerate(sizes):
+                assert doc_break_size == 0 or sz != 0, (
+                    "when doc_break_size is non-zero, we expect documents to be"
+                    "separated by a blank line with a single eos."
+                )
+                # empty line as document separator
+                if sz == doc_break_size:
+                    if len(cur_doc) == 0:
+                        continue
+                    self.block_indices.append(cur_doc)
+                    cur_doc = []
+                else:
+                    cur_doc.append(sent_id)
+            max_num_tokens = block_size - 3  # Account for [CLS], [SEP], [SEP]
+            self.sent_pairs = []
+            self.sizes = []
+            for doc_id, doc in enumerate(self.block_indices):
+                self._generate_sentence_pair(doc, doc_id, max_num_tokens, sizes)
+        elif break_mode is None or break_mode == "none":
+            # each block should have half of the block size since we are constructing block pair
+            sent_length = (block_size - 3) // 2
+            total_len = sum(dataset.sizes)
+            length = math.ceil(total_len / sent_length)
+
+            def block_at(i):
+                start = i * sent_length
+                end = min(start + sent_length, total_len)
+                return (start, end)
+
+            sent_indices = np.array([block_at(i) for i in range(length)])
+            sent_sizes = np.array([e - s for s, e in sent_indices])
+            dataset_index = self._sent_to_dataset_index(sent_sizes)
+
+            # pair sentences
+            self._pair_sentences(dataset_index)
+        else:
+            raise ValueError("Invalid break_mode: " + break_mode)
+
+    def _pair_sentences(self, dataset_index):
+        """
+        Give a list of evenly cut blocks/sentences, pair these sentences with 50%
+        consecutive sentences and 50% random sentences.
+        This is used for none break mode
+        """
+        # pair sentences
+        for sent_id, sent in enumerate(dataset_index):
+            next_sent_label = (
+                1 if np.random.rand() > 0.5 and sent_id != len(dataset_index) - 1 else 0
+            )
+            if next_sent_label:
+                next_sent = dataset_index[sent_id + 1]
+            else:
+                next_sent = dataset_index[
+                    self._skip_sampling(len(dataset_index), [sent_id, sent_id + 1])
+                ]
+            self.sent_pairs.append((sent, next_sent, next_sent_label))
+
+            # The current blocks don't include the special tokens but the
+            # sizes already account for this
+            self.sizes.append(3 + sent[3] + next_sent[3])
+
+    def _sent_to_dataset_index(self, sent_sizes):
+        """
+        Build index mapping block indices to the underlying dataset indices
+        """
+        dataset_index = []
+        ds_idx, ds_remaining = -1, 0
+        for to_consume in sent_sizes:
+            sent_size = to_consume
+            if ds_remaining == 0:
+                ds_idx += 1
+                ds_remaining = sent_sizes[ds_idx]
+            start_ds_idx = ds_idx
+            start_offset = sent_sizes[ds_idx] - ds_remaining
+            while to_consume > ds_remaining:
+                to_consume -= ds_remaining
+                ds_idx += 1
+                ds_remaining = sent_sizes[ds_idx]
+            ds_remaining -= to_consume
+            dataset_index.append(
+                (
+                    start_ds_idx,  # starting index in dataset
+                    start_offset,  # starting offset within starting index
+                    ds_idx,  # ending index in dataset
+                    sent_size,  # sentence length
+                )
+            )
+        assert ds_remaining == 0
+        assert ds_idx == len(self.dataset) - 1
+        return dataset_index
+
+    def _generate_sentence_pair(self, doc, doc_id, max_num_tokens, sizes):
+        """
+        Go through a single document and genrate sentence paris from it
+        """
+        current_chunk = []
+        current_length = 0
+        curr = 0
+        # To provide more randomness, we decrease target seq length for parts of
+        # samples (10% by default). Note that max_num_tokens is the hard threshold
+        # for batching and will never be changed.
+        target_seq_length = max_num_tokens
+        if np.random.random() < self.short_seq_prob:
+            target_seq_length = np.random.randint(2, max_num_tokens)
+        # loop through all sentences in document
+        while curr < len(doc):
+            sent_id = doc[curr]
+            current_chunk.append(sent_id)
+            current_length = sum(sizes[current_chunk])
+            # split chunk and generate pair when exceed target_seq_length or
+            # finish the loop
+            if curr == len(doc) - 1 or current_length >= target_seq_length:
+                # split the chunk into 2 parts
+                a_end = 1
+                if len(current_chunk) > 2:
+                    a_end = np.random.randint(1, len(current_chunk) - 1)
+                sent_a = current_chunk[:a_end]
+                len_a = sum(sizes[sent_a])
+                # generate next sentence label, note that if there is only 1 sentence
+                # in current chunk, label is always 0
+                next_sent_label = (
+                    1 if np.random.rand() > 0.5 and len(current_chunk) != 1 else 0
+                )
+                if not next_sent_label:
+                    # if next sentence label is 0, sample sent_b from a random doc
+                    target_b_length = target_seq_length - len_a
+                    rand_doc_id = self._skip_sampling(len(self.block_indices), [doc_id])
+                    random_doc = self.block_indices[rand_doc_id]
+                    random_start = np.random.randint(0, len(random_doc))
+                    sent_b = []
+                    len_b = 0
+                    for j in range(random_start, len(random_doc)):
+                        sent_b.append(random_doc[j])
+                        len_b = sum(sizes[sent_b])
+                        if len_b >= target_b_length:
+                            break
+                    # return the second part of the chunk since it's not used
+                    num_unused_segments = len(current_chunk) - a_end
+                    curr -= num_unused_segments
+                else:
+                    # if next sentence label is 1, use the second part of chunk as sent_B
+                    sent_b = current_chunk[a_end:]
+                    len_b = sum(sizes[sent_b])
+                # currently sent_a and sent_B may be longer than max_num_tokens,
+                # truncate them and return block idx and offsets for them
+                sent_a, sent_b = self._truncate_sentences(
+                    sent_a, sent_b, max_num_tokens
+                )
+                self.sent_pairs.append((sent_a, sent_b, next_sent_label))
+                self.sizes.append(3 + sent_a[3] + sent_b[3])
+                current_chunk = []
+            curr += 1
+
+    def _skip_sampling(self, total, skip_ids):
+        """
+        Generate a random integer which is not in skip_ids. Sample range is [0, total)
+        TODO: ids in skip_ids should be consecutive, we can extend it to more generic version later
+        """
+        rand_id = np.random.randint(total - len(skip_ids))
+        return rand_id if rand_id < min(skip_ids) else rand_id + len(skip_ids)
+
+    def _truncate_sentences(self, sent_a, sent_b, max_num_tokens):
+        """
+        Trancate a pair of sentence to limit total length under max_num_tokens
+        Logics:
+            1. Truncate longer sentence
+            2. Tokens to be truncated could be at the beginning or the end of the sentnce
+        Returns:
+            Truncated sentences represented by dataset idx
+        """
+        len_a, len_b = sum(self.dataset.sizes[sent_a]), sum(self.dataset.sizes[sent_b])
+        front_cut_a = front_cut_b = end_cut_a = end_cut_b = 0
+
+        while True:
+            total_length = (
+                len_a + len_b - front_cut_a - front_cut_b - end_cut_a - end_cut_b
+            )
+            if total_length <= max_num_tokens:
+                break
+
+            if len_a - front_cut_a - end_cut_a > len_b - front_cut_b - end_cut_b:
+                if np.random.rand() < 0.5:
+                    front_cut_a += 1
+                else:
+                    end_cut_a += 1
+            else:
+                if np.random.rand() < 0.5:
+                    front_cut_b += 1
+                else:
+                    end_cut_b += 1
+
+        # calculate ds indices as well as offsets and return
+        truncated_sent_a = self._cut_sentence(sent_a, front_cut_a, end_cut_a)
+        truncated_sent_b = self._cut_sentence(sent_b, front_cut_b, end_cut_b)
+        return truncated_sent_a, truncated_sent_b
+
+    def _cut_sentence(self, sent, front_cut, end_cut):
+        """
+        Cut a sentence based on the numbers of tokens to be cut from beginning and end
+        Represent the sentence as dataset idx and return
+        """
+        start_ds_idx, end_ds_idx, offset = sent[0], sent[-1], 0
+        target_len = sum(self.dataset.sizes[sent]) - front_cut - end_cut
+        while front_cut > 0:
+            if self.dataset.sizes[start_ds_idx] > front_cut:
+                offset += front_cut
+                break
+            else:
+                front_cut -= self.dataset.sizes[start_ds_idx]
+                start_ds_idx += 1
+        while end_cut > 0:
+            if self.dataset.sizes[end_ds_idx] > end_cut:
+                break
+            else:
+                end_cut -= self.dataset.sizes[end_ds_idx]
+                end_ds_idx -= 1
+        return start_ds_idx, offset, end_ds_idx, target_len
+
+    def _fetch_block(self, start_ds_idx, offset, end_ds_idx, length):
+        """
+        Fetch a block of tokens based on its dataset idx
+        """
+        buffer = torch.cat(
+            [self.dataset[idx] for idx in range(start_ds_idx, end_ds_idx + 1)]
+        )
+        s, e = offset, offset + length
+        return buffer[s:e]
+
+    def __getitem__(self, index):
+        block1, block2, next_sent_label = self.sent_pairs[index]
+        block1 = self._fetch_block(*block1)
+        block2 = self._fetch_block(*block2)
+        return block1, block2, next_sent_label
+
+    def __len__(self):
+        return len(self.sizes)
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        prefetch_idx = set()
+        for index in indices:
+            for block1, block2, _ in [self.sent_pairs[index]]:
+                for ds_idx in range(block1[0], block1[2] + 1):
+                    prefetch_idx.add(ds_idx)
+                for ds_idx in range(block2[0], block2[2] + 1):
+                    prefetch_idx.add(ds_idx)
+        self.dataset.prefetch(prefetch_idx)
diff --git a/fairseq/fairseq/data/legacy/masked_lm_dataset.py b/fairseq/fairseq/data/legacy/masked_lm_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd8ea2c60aff306ab3a756223a298a28d41a4991
--- /dev/null
+++ b/fairseq/fairseq/data/legacy/masked_lm_dataset.py
@@ -0,0 +1,303 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
+from fairseq.data import Dictionary, FairseqDataset, data_utils
+from fairseq.data.concat_dataset import ConcatDataset
+from fairseq.data.legacy.block_pair_dataset import BlockPairDataset
+from fairseq.data.token_block_dataset import TokenBlockDataset
+
+
+class MaskedLMDataset(FairseqDataset):
+    """
+    A wrapper Dataset for masked language modelling. The dataset
+    wraps around TokenBlockDataset or BlockedPairDataset and creates a batch
+    where the input blocks are masked according to the specified masking
+    probability. Additionally the batch can also contain sentence level targets
+    if this is specified.
+
+    Args:
+        dataset: Dataset which generates blocks of data. Only BlockPairDataset
+            and TokenBlockDataset are supported.
+        sizes: Sentence lengths
+        vocab: Dictionary with the vocabulary and special tokens.
+        pad_idx: Id of padding token in dictionary
+        mask_idx: Id of mask token in dictionary
+        classif_token_idx: Id of classification token in dictionary. This is the
+            token associated with the sentence embedding (Eg: CLS for BERT)
+        sep_token_idx: Id of separator token in dictionary
+            (Eg: SEP in BERT)
+        seed: Seed for random number generator for reproducibility.
+        shuffle: Shuffle the elements before batching.
+        has_pairs: Specifies whether the underlying dataset
+            generates a pair of blocks along with a sentence_target or not.
+            Setting it to True assumes that the underlying dataset generates a
+            label for the pair of sentences which is surfaced as
+            sentence_target. The default value assumes a single block with no
+            sentence target.
+        segment_id: An optional segment id for filling in the segment labels
+            when we are in the single block setting (Eg: XLM). Default is 0.
+        masking_ratio: specifies what percentage of the blocks should be masked.
+        masking_prob: specifies the probability of a given token being
+            replaced with the "MASK" token.
+        random_token_prob: specifies the probability of a given token being
+            replaced by a random token from the vocabulary.
+    """
+
+    def __init__(
+        self,
+        dataset: FairseqDataset,
+        sizes: np.ndarray,
+        vocab: Dictionary,
+        pad_idx: int,
+        mask_idx: int,
+        classif_token_idx: int,
+        sep_token_idx: int,
+        seed: int = 1,
+        shuffle: bool = True,
+        has_pairs: bool = True,
+        segment_id: int = 0,
+        masking_ratio: float = 0.15,
+        masking_prob: float = 0.8,
+        random_token_prob: float = 0.1,
+    ):
+        # Make sure the input datasets are the ones supported
+        assert (
+            isinstance(dataset, TokenBlockDataset)
+            or isinstance(dataset, BlockPairDataset)
+            or isinstance(dataset, ConcatDataset)
+        ), (
+            "MaskedLMDataset only wraps TokenBlockDataset or BlockPairDataset or "
+            "ConcatDataset"
+        )
+
+        self.dataset = dataset
+        self.sizes = np.array(sizes)
+        self.vocab = vocab
+        self.pad_idx = pad_idx
+        self.mask_idx = mask_idx
+        self.classif_token_idx = classif_token_idx
+        self.sep_token_idx = sep_token_idx
+        self.shuffle = shuffle
+        self.seed = seed
+        self.has_pairs = has_pairs
+        self.segment_id = segment_id
+        self.masking_ratio = masking_ratio
+        self.masking_prob = masking_prob
+        self.random_token_prob = random_token_prob
+
+        # If we have only one block then sizes needs to be updated to include
+        # the classification token
+        if not has_pairs:
+            self.sizes = self.sizes + 1
+
+    def __getitem__(self, index: int):
+        # if has_pairs, then expect 2 blocks and a sentence target
+        if self.has_pairs:
+            (block_one, block_two, sentence_target) = self.dataset[index]
+        else:
+            block_one = self.dataset[index]
+
+        return {
+            "id": index,
+            "block_one": block_one,
+            "block_two": block_two if self.has_pairs else None,
+            "sentence_target": sentence_target if self.has_pairs else None,
+        }
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def _mask_block(
+        self,
+        sentence: np.ndarray,
+        mask_idx: int,
+        pad_idx: int,
+        dictionary_token_range: Tuple,
+    ):
+        """
+        Mask tokens for Masked Language Model training
+        Samples mask_ratio tokens that will be predicted by LM.
+
+        Note:This function may not be efficient enough since we had multiple
+        conversions between np and torch, we can replace them with torch
+        operators later.
+
+        Args:
+            sentence: 1d tensor to be masked
+            mask_idx: index to use for masking the sentence
+            pad_idx: index to use for masking the target for tokens we aren't
+                predicting
+            dictionary_token_range: range of indices in dictionary which can
+                be used for random word replacement
+                (e.g. without special characters)
+        Return:
+            masked_sent: masked sentence
+            target: target with words which we are not predicting replaced
+                by pad_idx
+        """
+        masked_sent = np.copy(sentence)
+        sent_length = len(sentence)
+        mask_num = math.ceil(sent_length * self.masking_ratio)
+        mask = np.random.choice(sent_length, mask_num, replace=False)
+        target = np.copy(sentence)
+
+        for i in range(sent_length):
+            if i in mask:
+                rand = np.random.random()
+
+                # replace with mask if probability is less than masking_prob
+                # (Eg: 0.8)
+                if rand < self.masking_prob:
+                    masked_sent[i] = mask_idx
+
+                # replace with random token if probability is less than
+                # masking_prob + random_token_prob (Eg: 0.9)
+                elif rand < (self.masking_prob + self.random_token_prob):
+                    # sample random token from dictionary
+                    masked_sent[i] = np.random.randint(
+                        dictionary_token_range[0], dictionary_token_range[1]
+                    )
+            else:
+                target[i] = pad_idx
+
+        return masked_sent, target
+
+    def _collate(self, samples: List[Dict], pad_idx: int, eos_idx: int):
+        """
+        Does the heavy lifting for creating a batch from the input list of
+        examples. The logic is as follows:
+            1. Mask the input blocks. In case has_pair is True then we have 2
+               blocks to mask.
+            2. Prepend the first masked block tensor with the special token
+               used as sentence embedding. Eg: CLS in BERT. This happens
+               irrespective of the value of has_pair.
+            3. If has_pair is True, then append the first masked block with the
+               special separator token (eg: SEP for BERT) and compute segment
+               label accordingly. In this case, also append the second masked
+               block with this special separator token and compute its segment
+               label.
+            4. For the targets tensor, prepend and append with padding index
+               accordingly.
+            5. Concatenate all tensors.
+        """
+        if len(samples) == 0:
+            return {}
+        # To ensure determinism, we reset the state of the PRNG after every
+        # batch based on the seed and the first id of the batch. This ensures
+        # that across epochs we get the same mask for the same example. This
+        # is needed for reproducibility and is how BERT does masking
+        # TODO: Can we add deteminism without this constraint?
+        with data_utils.numpy_seed(self.seed + samples[0]["id"]):
+            for s in samples:
+
+                # token range is needed for replacing with random token during
+                # masking
+                token_range = (self.vocab.nspecial, len(self.vocab))
+
+                # mask according to specified probabilities.
+                masked_blk_one, masked_tgt_one = self._mask_block(
+                    s["block_one"],
+                    self.mask_idx,
+                    self.pad_idx,
+                    token_range,
+                )
+
+                tokens = np.concatenate([[self.classif_token_idx], masked_blk_one])
+                targets = np.concatenate([[self.pad_idx], masked_tgt_one])
+                segments = np.ones(len(tokens)) * self.segment_id
+
+                # if has_pairs is True then we need to add the SEP token to both
+                # the blocks after masking and re-compute segments based on the new
+                # lengths.
+                if self.has_pairs:
+                    tokens_one = np.concatenate([tokens, [self.sep_token_idx]])
+                    targets_one = np.concatenate([targets, [self.pad_idx]])
+
+                    masked_blk_two, masked_tgt_two = self._mask_block(
+                        s["block_two"], self.mask_idx, self.pad_idx, token_range
+                    )
+                    tokens_two = np.concatenate([masked_blk_two, [self.sep_token_idx]])
+                    targets_two = np.concatenate([masked_tgt_two, [self.pad_idx]])
+
+                    # block + 1 sep + 1 special (CLS)
+                    segments_one = np.zeros(len(tokens_one))
+                    # block + 1 sep
+                    segments_two = np.ones(len(tokens_two))
+
+                    tokens = np.concatenate([tokens_one, tokens_two])
+                    targets = np.concatenate([targets_one, targets_two])
+                    segments = np.concatenate([segments_one, segments_two])
+
+                s["source"] = torch.LongTensor(tokens)
+                s["segment_labels"] = torch.LongTensor(segments)
+                s["lm_target"] = torch.LongTensor(targets)
+
+        def merge(key):
+            return data_utils.collate_tokens(
+                [s[key] for s in samples], pad_idx, eos_idx, left_pad=False
+            )
+
+        return {
+            "id": torch.LongTensor([s["id"] for s in samples]),
+            "ntokens": sum(len(s["source"]) for s in samples),
+            "net_input": {
+                "src_tokens": merge("source"),
+                "segment_labels": merge("segment_labels"),
+            },
+            "lm_target": merge("lm_target"),
+            "sentence_target": torch.LongTensor([s["sentence_target"] for s in samples])
+            if self.has_pairs
+            else None,
+            "nsentences": len(samples),
+        }
+
+    def collater(self, samples: List[Dict]):
+        """Merge a list of samples to form a mini-batch.
+
+        Args:
+            samples (List[dict]): samples to collate
+
+        Returns:
+            dict: a mini-batch of data
+        """
+        return self._collate(samples, self.vocab.pad(), self.vocab.eos())
+
+    def num_tokens(self, index: int):
+        """
+        Return the number of tokens in a sample. This value is used to
+        enforce max-tokens during batching.
+        """
+        return self.sizes[index]
+
+    def size(self, index: int):
+        """
+        Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with max-positions.
+        """
+        return self.sizes[index]
+
+    def ordered_indices(self):
+        """
+        Return an ordered list of indices. Batches will be constructed based
+        on this order.
+        """
+        if self.shuffle:
+            return np.random.permutation(len(self))
+        else:
+            order = [np.arange(len(self))]
+            order.append(self.sizes)
+            return np.lexsort(order)
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        self.dataset.prefetch(indices)
diff --git a/fairseq/fairseq/data/legacy/masked_lm_dictionary.py b/fairseq/fairseq/data/legacy/masked_lm_dictionary.py
new file mode 100644
index 0000000000000000000000000000000000000000..dee88f7a3ed72ea465ea4e8ffe7b1c01ff6f57f1
--- /dev/null
+++ b/fairseq/fairseq/data/legacy/masked_lm_dictionary.py
@@ -0,0 +1,60 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.data import Dictionary
+
+
+class MaskedLMDictionary(Dictionary):
+    """
+    Dictionary for Masked Language Modelling tasks. This extends Dictionary by
+    adding the mask symbol.
+    """
+
+    def __init__(
+        self,
+        pad="<pad>",
+        eos="</s>",
+        unk="<unk>",
+        mask="<mask>",
+    ):
+        super().__init__(pad=pad, eos=eos, unk=unk)
+        self.mask_word = mask
+        self.mask_index = self.add_symbol(mask)
+        self.nspecial = len(self.symbols)
+
+    def mask(self):
+        """Helper to get index of mask symbol"""
+        return self.mask_index
+
+
+class BertDictionary(MaskedLMDictionary):
+    """
+    Dictionary for BERT task. This extends MaskedLMDictionary by adding support
+    for cls and sep symbols.
+    """
+
+    def __init__(
+        self,
+        pad="<pad>",
+        eos="</s>",
+        unk="<unk>",
+        mask="<mask>",
+        cls="<cls>",
+        sep="<sep>",
+    ):
+        super().__init__(pad=pad, eos=eos, unk=unk, mask=mask)
+        self.cls_word = cls
+        self.sep_word = sep
+        self.cls_index = self.add_symbol(cls)
+        self.sep_index = self.add_symbol(sep)
+        self.nspecial = len(self.symbols)
+
+    def cls(self):
+        """Helper to get index of cls symbol"""
+        return self.cls_index
+
+    def sep(self):
+        """Helper to get index of sep symbol"""
+        return self.sep_index
diff --git a/fairseq/fairseq/data/list_dataset.py b/fairseq/fairseq/data/list_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..12f00aa43661d6bad701c9e72653ba8779136906
--- /dev/null
+++ b/fairseq/fairseq/data/list_dataset.py
@@ -0,0 +1,32 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import BaseWrapperDataset
+
+
+class ListDataset(BaseWrapperDataset):
+    def __init__(self, dataset, sizes=None):
+        super().__init__(dataset)
+        self._sizes = sizes
+
+    def __iter__(self):
+        for x in self.dataset:
+            yield x
+
+    def collater(self, samples):
+        return samples
+
+    @property
+    def sizes(self):
+        return self._sizes
+
+    def num_tokens(self, index):
+        return self.sizes[index]
+
+    def size(self, index):
+        return self.sizes[index]
+
+    def set_epoch(self, epoch):
+        pass
diff --git a/fairseq/fairseq/data/lm_context_window_dataset.py b/fairseq/fairseq/data/lm_context_window_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a945927cf0d96719003685676a990737a3762b2
--- /dev/null
+++ b/fairseq/fairseq/data/lm_context_window_dataset.py
@@ -0,0 +1,97 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from typing import Dict
+
+from fairseq.data.monolingual_dataset import MonolingualDataset
+
+from . import FairseqDataset
+
+
+class LMContextWindowDataset(FairseqDataset):
+    """
+    Wraps a MonolingualDataset and provides more context for evaluation.
+
+    Each item in the new dataset will have a maximum size of
+    ``tokens_per_sample + context_window``.
+
+    Args:
+        dataset: dataset to wrap
+        tokens_per_sample (int): the max number of tokens in each dataset item
+        context_window (int): the number of accumulated tokens to add to each
+            dataset item
+        pad_idx (int): padding symbol
+    """
+
+    def __init__(
+        self,
+        dataset: MonolingualDataset,
+        tokens_per_sample: int,
+        context_window: int,
+        pad_idx: int,
+    ):
+        assert context_window > 0
+        self.dataset = dataset
+        self.tokens_per_sample = tokens_per_sample
+        self.context_window = context_window
+        self.pad_idx = pad_idx
+        self.prev_tokens = np.empty([0])
+
+    def __getitem__(self, index):
+        return self.dataset[index]
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def collater(self, samples) -> Dict:
+        sample = self.dataset.collater(samples)
+
+        pad = self.pad_idx
+        max_sample_len = self.tokens_per_sample + self.context_window
+
+        bsz, tsz = sample["net_input"]["src_tokens"].shape
+        start_idxs = [0] * bsz
+        toks = sample["net_input"]["src_tokens"]
+        lengths = sample["net_input"]["src_lengths"]
+        tgt = sample["target"]
+        new_toks = np.empty([bsz, tsz + self.context_window], dtype=np.int64)
+        new_tgt = np.full([bsz, tsz + self.context_window], pad, dtype=np.int64)
+        sample_lens = toks.ne(pad).long().sum(dim=1).cpu()
+        for i in range(bsz):
+            sample_len = sample_lens[i]
+            extra = len(self.prev_tokens) + sample_len - max_sample_len
+            if extra > 0:
+                self.prev_tokens = self.prev_tokens[extra:]
+            pads = np.full(self.context_window - len(self.prev_tokens), pad)
+            new_toks[i] = np.concatenate([self.prev_tokens, toks[i].numpy(), pads])
+            new_tgt[
+                i, len(self.prev_tokens) : len(self.prev_tokens) + len(tgt[i])
+            ] = tgt[i]
+            start_idxs[i] = len(self.prev_tokens)
+            lengths[i] += len(self.prev_tokens)
+            self.prev_tokens = new_toks[i][new_toks[i] != pad][-self.context_window :]
+        sample["net_input"]["src_tokens"] = torch.from_numpy(new_toks)
+        sample["target"] = torch.from_numpy(new_tgt)
+        sample["start_indices"] = start_idxs
+        return sample
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(index)
+
+    def size(self, index):
+        return self.dataset.size(index)
+
+    def ordered_indices(self):
+        # NOTE we don't shuffle the data to retain access to the previous dataset elements
+        return np.arange(len(self.dataset))
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        return self.dataset.prefetch(indices)
diff --git a/fairseq/fairseq/data/lru_cache_dataset.py b/fairseq/fairseq/data/lru_cache_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7854ac1701392754ce5795cafe9c634671aebdf
--- /dev/null
+++ b/fairseq/fairseq/data/lru_cache_dataset.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from functools import lru_cache
+
+from . import BaseWrapperDataset
+
+
+class LRUCacheDataset(BaseWrapperDataset):
+    def __init__(self, dataset, token=None):
+        super().__init__(dataset)
+
+    @lru_cache(maxsize=8)
+    def __getitem__(self, index):
+        return self.dataset[index]
+
+    @lru_cache(maxsize=8)
+    def collater(self, samples):
+        return self.dataset.collater(samples)
diff --git a/fairseq/fairseq/data/mask_tokens_dataset.py b/fairseq/fairseq/data/mask_tokens_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9123235594c3977994a3ae8a03ab4c9e395cc5de
--- /dev/null
+++ b/fairseq/fairseq/data/mask_tokens_dataset.py
@@ -0,0 +1,220 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from functools import lru_cache
+
+import numpy as np
+import torch
+from fairseq.data import Dictionary, data_utils
+
+from . import BaseWrapperDataset, LRUCacheDataset
+
+
+class MaskTokensDataset(BaseWrapperDataset):
+    """
+    A wrapper Dataset for masked language modeling.
+
+    Input items are masked according to the specified masking probability.
+
+    Args:
+        dataset: Dataset to wrap.
+        sizes: Sentence lengths
+        vocab: Dictionary with the vocabulary and special tokens.
+        pad_idx: Id of pad token in vocab
+        mask_idx: Id of mask token in vocab
+        return_masked_tokens: controls whether to return the non-masked tokens
+            (the default) or to return a tensor with the original masked token
+            IDs (and *pad_idx* elsewhere). The latter is useful as targets for
+            masked LM training.
+        seed: Seed for random number generator for reproducibility.
+        mask_prob: probability of replacing a token with *mask_idx*.
+        leave_unmasked_prob: probability that a masked token is unmasked.
+        random_token_prob: probability of replacing a masked token with a
+            random token from the vocabulary.
+        freq_weighted_replacement: sample random replacement words based on
+            word frequencies in the vocab.
+        mask_whole_words: only mask whole words. This should be a byte mask
+            over vocab indices, indicating whether it is the beginning of a
+            word. We will extend any mask to encompass the whole word.
+        bpe: BPE to use for whole-word masking.
+        mask_multiple_length : repeat each mask index multiple times. Default
+            value is 1.
+        mask_stdev : standard deviation of masks distribution in case of
+            multiple masking. Default value is 0.
+    """
+
+    @classmethod
+    def apply_mask(cls, dataset: torch.utils.data.Dataset, *args, **kwargs):
+        """Return the source and target datasets for masked LM training."""
+        dataset = LRUCacheDataset(dataset)
+        return (
+            LRUCacheDataset(cls(dataset, *args, **kwargs, return_masked_tokens=False)),
+            LRUCacheDataset(cls(dataset, *args, **kwargs, return_masked_tokens=True)),
+        )
+
+    def __init__(
+        self,
+        dataset: torch.utils.data.Dataset,
+        vocab: Dictionary,
+        pad_idx: int,
+        mask_idx: int,
+        return_masked_tokens: bool = False,
+        seed: int = 1,
+        mask_prob: float = 0.15,
+        leave_unmasked_prob: float = 0.1,
+        random_token_prob: float = 0.1,
+        freq_weighted_replacement: bool = False,
+        mask_whole_words: torch.Tensor = None,
+        mask_multiple_length: int = 1,
+        mask_stdev: float = 0.0,
+    ):
+        assert 0.0 < mask_prob < 1.0
+        assert 0.0 <= random_token_prob <= 1.0
+        assert 0.0 <= leave_unmasked_prob <= 1.0
+        assert random_token_prob + leave_unmasked_prob <= 1.0
+        assert mask_multiple_length >= 1
+        assert mask_stdev >= 0.0
+
+        self.dataset = dataset
+        self.vocab = vocab
+        self.pad_idx = pad_idx
+        self.mask_idx = mask_idx
+        self.return_masked_tokens = return_masked_tokens
+        self.seed = seed
+        self.mask_prob = mask_prob
+        self.leave_unmasked_prob = leave_unmasked_prob
+        self.random_token_prob = random_token_prob
+        self.mask_whole_words = mask_whole_words
+        self.mask_multiple_length = mask_multiple_length
+        self.mask_stdev = mask_stdev
+
+        if random_token_prob > 0.0:
+            if freq_weighted_replacement:
+                weights = np.array(self.vocab.count)
+            else:
+                weights = np.ones(len(self.vocab))
+            weights[: self.vocab.nspecial] = 0
+            self.weights = weights / weights.sum()
+
+        self.epoch = 0
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return True  # only the noise changes, not item sizes
+
+    def set_epoch(self, epoch, **unused):
+        super().set_epoch(epoch)
+        self.epoch = epoch
+
+    def __getitem__(self, index: int):
+        return self.__getitem_cached__(self.seed, self.epoch, index)
+
+    @lru_cache(maxsize=8)
+    def __getitem_cached__(self, seed: int, epoch: int, index: int):
+        with data_utils.numpy_seed(self.seed, self.epoch, index):
+            item = self.dataset[index]
+            sz = len(item)
+
+            assert (
+                self.mask_idx not in item
+            ), "Dataset contains mask_idx (={}), this is not expected!".format(
+                self.mask_idx,
+            )
+
+            if self.mask_whole_words is not None:
+                word_begins_mask = self.mask_whole_words.gather(0, item)
+                word_begins_idx = word_begins_mask.nonzero().view(-1)
+                sz = len(word_begins_idx)
+                words = np.split(word_begins_mask, word_begins_idx)[1:]
+                assert len(words) == sz
+                word_lens = list(map(len, words))
+
+            # decide elements to mask
+            mask = np.full(sz, False)
+            num_mask = int(
+                # add a random number for probabilistic rounding
+                self.mask_prob * sz / float(self.mask_multiple_length)
+                + np.random.rand()
+            )
+
+            # multiple masking as described in the vq-wav2vec paper (https://arxiv.org/abs/1910.05453)
+            mask_idc = np.random.choice(sz, num_mask, replace=False)
+            if self.mask_stdev > 0.0:
+                lengths = np.random.normal(
+                    self.mask_multiple_length, self.mask_stdev, size=num_mask
+                )
+                lengths = [max(0, int(round(x))) for x in lengths]
+                mask_idc = np.asarray(
+                    [
+                        mask_idc[j] + offset
+                        for j in range(len(mask_idc))
+                        for offset in range(lengths[j])
+                    ],
+                    dtype=np.int64,
+                )
+            else:
+                mask_idc = np.concatenate(
+                    [mask_idc + i for i in range(self.mask_multiple_length)]
+                )
+            mask_idc = mask_idc[mask_idc < len(mask)]
+            try:
+                mask[mask_idc] = True
+            except:  # something wrong
+                print(
+                    "Assigning mask indexes {} to mask {} failed!".format(
+                        mask_idc, mask
+                    )
+                )
+                raise
+
+            if self.return_masked_tokens:
+                # exit early if we're just returning the masked tokens
+                # (i.e., the targets for masked LM training)
+                if self.mask_whole_words is not None:
+                    mask = np.repeat(mask, word_lens)
+                new_item = np.full(len(mask), self.pad_idx)
+                new_item[mask] = item[torch.from_numpy(mask.astype(np.uint8)) == 1]
+                return torch.from_numpy(new_item)
+
+            # decide unmasking and random replacement
+            rand_or_unmask_prob = self.random_token_prob + self.leave_unmasked_prob
+            if rand_or_unmask_prob > 0.0:
+                rand_or_unmask = mask & (np.random.rand(sz) < rand_or_unmask_prob)
+                if self.random_token_prob == 0.0:
+                    unmask = rand_or_unmask
+                    rand_mask = None
+                elif self.leave_unmasked_prob == 0.0:
+                    unmask = None
+                    rand_mask = rand_or_unmask
+                else:
+                    unmask_prob = self.leave_unmasked_prob / rand_or_unmask_prob
+                    decision = np.random.rand(sz) < unmask_prob
+                    unmask = rand_or_unmask & decision
+                    rand_mask = rand_or_unmask & (~decision)
+            else:
+                unmask = rand_mask = None
+
+            if unmask is not None:
+                mask = mask ^ unmask
+
+            if self.mask_whole_words is not None:
+                mask = np.repeat(mask, word_lens)
+
+            new_item = np.copy(item)
+            new_item[mask] = self.mask_idx
+            if rand_mask is not None:
+                num_rand = rand_mask.sum()
+                if num_rand > 0:
+                    if self.mask_whole_words is not None:
+                        rand_mask = np.repeat(rand_mask, word_lens)
+                        num_rand = rand_mask.sum()
+
+                    new_item[rand_mask] = np.random.choice(
+                        len(self.vocab),
+                        num_rand,
+                        p=self.weights,
+                    )
+
+            return torch.from_numpy(new_item)
diff --git a/fairseq/fairseq/data/monolingual_dataset.py b/fairseq/fairseq/data/monolingual_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..54fd583b64a3a475324ade6eaaeccf593d747fdc
--- /dev/null
+++ b/fairseq/fairseq/data/monolingual_dataset.py
@@ -0,0 +1,253 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+
+from . import FairseqDataset, data_utils
+
+
+def collate(samples, pad_idx, eos_idx, fixed_pad_length=None, pad_to_bsz=None):
+    if len(samples) == 0:
+        return {}
+
+    def merge(key, is_list=False):
+        if is_list:
+            res = []
+            for i in range(len(samples[0][key])):
+                res.append(
+                    data_utils.collate_tokens(
+                        [s[key][i] for s in samples],
+                        pad_idx,
+                        eos_idx,
+                        left_pad=False,
+                        pad_to_length=fixed_pad_length,
+                        pad_to_bsz=pad_to_bsz,
+                    )
+                )
+            return res
+        else:
+            return data_utils.collate_tokens(
+                [s[key] for s in samples],
+                pad_idx,
+                eos_idx,
+                left_pad=False,
+                pad_to_length=fixed_pad_length,
+                pad_to_bsz=pad_to_bsz,
+            )
+
+    src_tokens = merge("source")
+    if samples[0]["target"] is not None:
+        is_target_list = isinstance(samples[0]["target"], list)
+        target = merge("target", is_target_list)
+    else:
+        target = src_tokens
+
+    return {
+        "id": torch.LongTensor([s["id"] for s in samples]),
+        "nsentences": len(samples),
+        "ntokens": sum(len(s["source"]) for s in samples),
+        "net_input": {
+            "src_tokens": src_tokens,
+            "src_lengths": torch.LongTensor([s["source"].numel() for s in samples]),
+        },
+        "target": target,
+    }
+
+
+class MonolingualDataset(FairseqDataset):
+    """
+    A wrapper around torch.utils.data.Dataset for monolingual data.
+
+    Args:
+        dataset (torch.utils.data.Dataset): dataset to wrap
+        sizes (List[int]): sentence lengths
+        vocab (~fairseq.data.Dictionary): vocabulary
+        shuffle (bool, optional): shuffle the elements before batching
+            (default: True).
+    """
+
+    def __init__(
+        self,
+        dataset,
+        sizes,
+        src_vocab,
+        tgt_vocab=None,
+        add_eos_for_other_targets=False,
+        shuffle=False,
+        targets=None,
+        add_bos_token=False,
+        fixed_pad_length=None,
+        pad_to_bsz=None,
+        src_lang_idx=None,
+        tgt_lang_idx=None,
+    ):
+        self.dataset = dataset
+        self.sizes = np.array(sizes)
+        self.vocab = src_vocab
+        self.tgt_vocab = tgt_vocab or src_vocab
+        self.add_eos_for_other_targets = add_eos_for_other_targets
+        self.shuffle = shuffle
+        self.add_bos_token = add_bos_token
+        self.fixed_pad_length = fixed_pad_length
+        self.pad_to_bsz = pad_to_bsz
+        self.src_lang_idx = src_lang_idx
+        self.tgt_lang_idx = tgt_lang_idx
+
+        assert targets is None or all(
+            t in {"self", "future", "past"} for t in targets
+        ), "targets must be none or one of 'self', 'future', 'past'"
+        if targets is not None and len(targets) == 0:
+            targets = None
+        self.targets = targets
+
+    def __getitem__(self, index):
+        if self.targets is not None:
+            # *future_target* is the original sentence
+            # *source* is shifted right by 1 (maybe left-padded with eos)
+            # *past_target* is shifted right by 2 (left-padded as needed)
+            #
+            # Left-to-right language models should condition on *source* and
+            # predict *future_target*.
+            # Right-to-left language models should condition on *source* and
+            # predict *past_target*.
+            source, future_target, past_target = self.dataset[index]
+            source, target = self._make_source_target(
+                source, future_target, past_target
+            )
+        else:
+            source = self.dataset[index]
+            target = None
+        source, target = self._maybe_add_bos(source, target)
+        return {"id": index, "source": source, "target": target}
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def _make_source_target(self, source, future_target, past_target):
+        if self.targets is not None:
+            target = []
+
+            if (
+                self.add_eos_for_other_targets
+                and (("self" in self.targets) or ("past" in self.targets))
+                and source[-1] != self.vocab.eos()
+            ):
+                # append eos at the end of source
+                source = torch.cat([source, source.new([self.vocab.eos()])])
+
+                if "future" in self.targets:
+                    future_target = torch.cat(
+                        [future_target, future_target.new([self.vocab.pad()])]
+                    )
+                if "past" in self.targets:
+                    # first token is before the start of sentence which is only used in "none" break mode when
+                    # add_eos_for_other_targets is False
+                    past_target = torch.cat(
+                        [
+                            past_target.new([self.vocab.pad()]),
+                            past_target[1:],
+                            source[-2, None],
+                        ]
+                    )
+
+            for t in self.targets:
+                if t == "self":
+                    target.append(source)
+                elif t == "future":
+                    target.append(future_target)
+                elif t == "past":
+                    target.append(past_target)
+                else:
+                    raise Exception("invalid target " + t)
+
+            if len(target) == 1:
+                target = target[0]
+        else:
+            target = future_target
+
+        return source, self._filter_vocab(target)
+
+    def _maybe_add_bos(self, source, target):
+        if self.add_bos_token:
+            source = torch.cat([source.new([self.vocab.bos()]), source])
+            if target is not None:
+                target = torch.cat([target.new([self.tgt_vocab.bos()]), target])
+        return source, target
+
+    def num_tokens_vec(self, indices):
+        """Return the number of tokens for a set of positions defined by indices.
+        This value is used to enforce ``--max-tokens`` during batching."""
+        return self.sizes[indices]
+
+    def _filter_vocab(self, target):
+        if len(self.tgt_vocab) != len(self.vocab):
+
+            def _filter(target):
+                mask = target.ge(len(self.tgt_vocab))
+                if mask.any():
+                    target[mask] = self.tgt_vocab.unk()
+                return target
+
+            if isinstance(target, list):
+                return [_filter(t) for t in target]
+            return _filter(target)
+        return target
+
+    def collater(self, samples):
+        """Merge a list of samples to form a mini-batch.
+
+        Args:
+            samples (List[dict]): samples to collate
+
+        Returns:
+            dict: a mini-batch with the following keys:
+
+                - `id` (LongTensor): example IDs in the original input order
+                - `ntokens` (int): total number of tokens in the batch
+                - `net_input` (dict): the input to the Model, containing keys:
+
+                  - `src_tokens` (LongTensor): a padded 2D Tensor of tokens in
+                    the source sentence of shape `(bsz, src_len)`. Padding will
+                    appear on the right.
+
+                - `target` (LongTensor): a padded 2D Tensor of tokens in the
+                  target sentence of shape `(bsz, tgt_len)`. Padding will appear
+                  on the right.
+        """
+        return collate(
+            samples,
+            self.vocab.pad(),
+            self.vocab.eos(),
+            self.fixed_pad_length,
+            self.pad_to_bsz,
+        )
+
+    def num_tokens(self, index):
+        """Return the number of tokens in a sample. This value is used to
+        enforce ``--max-tokens`` during batching."""
+        return self.sizes[index]
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        return self.sizes[index]
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.shuffle:
+            order = [np.random.permutation(len(self))]
+        else:
+            order = [np.arange(len(self))]
+        order.append(self.sizes)
+        return np.lexsort(order)
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        self.dataset.prefetch(indices)
diff --git a/fairseq/fairseq/data/multi_corpus_dataset.py b/fairseq/fairseq/data/multi_corpus_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..746155e515897da9fc9c803f9396a45b5cead8d0
--- /dev/null
+++ b/fairseq/fairseq/data/multi_corpus_dataset.py
@@ -0,0 +1,245 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import time
+from collections import OrderedDict
+from typing import Dict, List
+
+import numpy as np
+from fairseq.data import data_utils
+
+from . import FairseqDataset
+
+logger = logging.getLogger(__name__)
+
+
+class MultiCorpusDataset(FairseqDataset):
+    """
+    Stores multiple instances of FairseqDataset together. Requires each instance
+    to be the same dataset, as the collate method needs to work on batches with
+    samples from each dataset.
+
+    Allows specifying a distribution over the datasets to use. Note that unlike
+    MultiCorpusSampledDataset, this distribution allows sampling for each item,
+    rather than on a batch level.
+
+    Each time ordered_indices() is called, a new sample is generated with
+    the specified distribution.
+
+    Args:
+        datasets: a OrderedDict of FairseqDataset instances.
+        distribution: a List containing the probability of getting an utterance from
+                        corresponding dataset
+        seed: random seed for sampling the datsets
+        sort_indices: if true, will sort the ordered indices by size
+        batch_sample: if true, will ensure each batch is from a single dataset
+    """
+
+    def __init__(
+        self,
+        datasets: Dict[str, FairseqDataset],
+        distribution: List[float],
+        seed: int,
+        sort_indices: bool = False,
+        batch_sample: bool = False,
+        distributed_rank=None,
+    ):
+        super().__init__()
+        assert isinstance(datasets, OrderedDict)
+        assert len(datasets) == len(distribution)
+        assert sum(distribution) == 1
+        self.datasets = datasets
+        self.distribution = distribution
+        self.seed = seed
+        self.sort_indices = sort_indices
+        self.batch_sample = batch_sample
+        self.distributed_rank = distributed_rank
+
+        # Avoid repeated conversions to list later
+        self.dataset_list = list(datasets.values())
+        self.total_num_instances = 0
+
+        first_dataset = list(self.datasets.values())[0]
+
+        self.dataset_offsets = []
+        for dataset in datasets.values():
+            assert isinstance(dataset, FairseqDataset)
+            assert type(dataset) is type(first_dataset)
+            self.dataset_offsets.append(self.total_num_instances)
+            self.total_num_instances += len(dataset)
+
+    def ordered_indices(self):
+        start = time.time()
+        with data_utils.numpy_seed(self.seed, self.epoch):
+            logger.info(f"sampling new dataset with seed {self.seed} epoch {self.epoch}")
+            sampled_indices = []
+            num_selected_instances = 0
+
+            # For each dataset i, sample self.distribution[i] * self.total_num_instances
+            for i, key in enumerate(self.datasets):
+
+                if i < len(self.datasets) - 1:
+                    num_instances = int(self.distribution[i] * self.total_num_instances)
+                    high = self.dataset_offsets[i + 1]
+                else:
+                    num_instances = self.total_num_instances - num_selected_instances
+                    high = self.total_num_instances
+
+                logger.info(f"sampling {num_instances} from {key} dataset")
+                num_selected_instances += num_instances
+
+                # First, add k copies of the dataset where k = num_instances // len(dataset).
+                # This ensures an equal distribution of the data points as much as possible.
+                # For the remaining entries randomly sample them
+                dataset_size = len(self.datasets[key])
+                num_copies = num_instances // dataset_size
+                dataset_indices = (
+                    np.random.permutation(high - self.dataset_offsets[i])
+                    + self.dataset_offsets[i]
+                )[: num_instances - num_copies * dataset_size]
+                if num_copies > 0:
+                    sampled_indices += list(
+                        np.concatenate(
+                            (
+                                np.repeat(
+                                    np.arange(self.dataset_offsets[i], high), num_copies
+                                ),
+                                dataset_indices,
+                            )
+                        )
+                    )
+                else:
+                    sampled_indices += list(dataset_indices)
+
+            assert (
+                len(sampled_indices) == self.total_num_instances
+            ), f"{len(sampled_indices)} vs {self.total_num_instances}"
+
+            np.random.shuffle(sampled_indices)
+            if self.sort_indices:
+                sampled_indices.sort(key=lambda i: self.num_tokens(i))
+
+            logger.info(
+                "multi_corpus_dataset ordered_indices took {}s".format(
+                    time.time() - start
+                )
+            )
+            return np.array(sampled_indices, dtype=np.int64)
+
+    def _map_index(self, index: int):
+        """
+        If dataset A has length N and dataset B has length M
+        then index 1 maps to index 1 of dataset A, and index N + 1
+        maps to index 1 of B.
+        """
+        counter = 0
+        for key, dataset in self.datasets.items():
+            if index < counter + len(dataset):
+                return index - counter, key
+            counter += len(dataset)
+        raise ValueError(
+            "Invalid index: {}, max: {}".format(index, self.total_num_instances)
+        )
+
+    def __len__(self):
+        """
+        Length of this dataset is the sum of individual datasets
+        """
+        return self.total_num_instances
+
+    def __getitem__(self, index):
+        new_index, key = self._map_index(index)
+        try:
+            item = self.datasets[key][new_index]
+            item["full_id"] = index
+            return item
+        except Exception as e:
+            e.args = (f"Error from {key} dataset", *e.args)
+            raise
+
+    def collater(self, samples):
+        """
+        If we are doing batch sampling, then pick the right collater to use.
+
+        Otherwise we assume all collaters are the same.
+        """
+        if len(samples) == 0:
+            return None
+        if "full_id" in samples[0]:
+            _, key = self._map_index(samples[0]["full_id"])
+            try:
+                batch = self.datasets[key].collater(samples)
+            except Exception:
+                print(f"Collating failed for key {key}", flush=True)
+                raise
+            return batch
+        else:
+            # Subclasses may override __getitem__ to not specify full_id
+            return list(self.datasets.values())[0].collater(samples)
+
+    def num_tokens(self, index: int):
+        index, key = self._map_index(index)
+        return self.datasets[key].num_tokens(index)
+
+    def size(self, index: int):
+        index, key = self._map_index(index)
+        return self.datasets[key].size(index)
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return False
+
+    def set_epoch(self, epoch, **unused):
+        super().set_epoch(epoch)
+        logger.info(f"setting epoch of multi_corpus_dataset to {epoch}")
+        self.epoch = epoch
+
+    @property
+    def supports_prefetch(self):
+        return False
+
+    @property
+    def supports_fetch_outside_dataloader(self):
+        return all(
+            self.datasets[key].supports_fetch_outside_dataloader
+            for key in self.datasets
+        )
+
+    def batch_by_size(
+        self,
+        indices,
+        max_tokens=None,
+        max_sentences=None,
+        required_batch_size_multiple=1,
+    ):
+        if not self.batch_sample:
+            return super().batch_by_size(
+                indices, max_tokens, max_sentences, required_batch_size_multiple
+            )
+
+        dataset_indices = {key: [] for key in self.datasets}
+        for i in indices:
+            _, key = self._map_index(i)
+            dataset_indices[key].append(i)
+
+        batches = []
+        for key in dataset_indices:
+            cur_batches = super().batch_by_size(
+                np.array(dataset_indices[key], dtype=np.int64),
+                max_tokens,
+                max_sentences,
+                required_batch_size_multiple,
+            )
+            logger.info(f"Created {len(cur_batches)} batches for dataset {key}")
+            batches += cur_batches
+
+        # If this dataset is used in a distributed training setup,
+        # then shuffle such that the order is seeded by the distributed rank
+        # as well
+        if self.distributed_rank is not None:
+            with data_utils.numpy_seed(self.seed, self.epoch, self.distributed_rank):
+                np.random.shuffle(batches)
+        return batches
diff --git a/fairseq/fairseq/data/multi_corpus_sampled_dataset.py b/fairseq/fairseq/data/multi_corpus_sampled_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2e9fdf004dd1da519a170a5e8bc225775776f72
--- /dev/null
+++ b/fairseq/fairseq/data/multi_corpus_sampled_dataset.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import OrderedDict
+from typing import Callable, Dict, List
+
+import numpy as np
+
+from . import FairseqDataset
+
+
+def uniform_sampler(x):
+    # Sample from uniform distribution
+    return np.random.choice(x, 1).item()
+
+
+class MultiCorpusSampledDataset(FairseqDataset):
+    """
+    Stores multiple instances of FairseqDataset together and in every iteration
+    creates a batch by first sampling a dataset according to a specified
+    probability distribution and then getting instances from that dataset.
+
+    Args:
+        datasets: an OrderedDict of FairseqDataset instances.
+        sampling_func: A function for sampling over list of dataset keys.
+            The default strategy is to sample uniformly.
+    """
+
+    def __init__(
+        self,
+        datasets: Dict[str, FairseqDataset],
+        sampling_func: Callable[[List], int] = None,
+    ):
+        super().__init__()
+        assert isinstance(datasets, OrderedDict)
+        self.datasets = datasets
+        if sampling_func is None:
+            sampling_func = uniform_sampler
+        self.sampling_func = sampling_func
+
+        self.total_num_instances = 0
+        for _, dataset in datasets.items():
+            assert isinstance(dataset, FairseqDataset)
+            self.total_num_instances += len(dataset)
+
+        self._ordered_indices = None
+
+    def __len__(self):
+        """
+        Length of this dataset is the sum of individual datasets
+        """
+        return self.total_num_instances
+
+    def ordered_indices(self):
+        """
+        Ordered indices for batching. Here we call the underlying
+        dataset's ordered_indices() so that we get the same random ordering
+        as we would have from using the underlying dataset directly.
+        """
+        if self._ordered_indices is None:
+            self._ordered_indices = OrderedDict(
+                [
+                    (key, dataset.ordered_indices())
+                    for key, dataset in self.datasets.items()
+                ]
+            )
+        return np.arange(len(self))
+
+    def _map_index_to_dataset(self, key: int, index: int):
+        """
+        Different underlying datasets have different lengths. In order to ensure
+        we are not accessing an index outside the range of the current dataset
+        size, we wrap around. This function should be called after we have
+        created an ordering for this and all underlying datasets.
+        """
+        assert (
+            self._ordered_indices is not None
+        ), "Must call MultiCorpusSampledDataset.ordered_indices() first"
+        mapped_index = index % len(self.datasets[key])
+        return self._ordered_indices[key][mapped_index]
+
+    def __getitem__(self, index: int):
+        """
+        Get the item associated with index from each underlying dataset.
+        Since index is in the range of [0, TotalNumInstances], we need to
+        map the index to the dataset before retrieving the item.
+        """
+        return OrderedDict(
+            [
+                (key, dataset[self._map_index_to_dataset(key, index)])
+                for key, dataset in self.datasets.items()
+            ]
+        )
+
+    def collater(self, samples: List[Dict]):
+        """
+        Generate a mini-batch for this dataset.
+        To convert this into a regular mini-batch we use the following
+        logic:
+            1. Select a dataset using the specified probability distribution.
+            2. Call the collater function of the selected dataset.
+        """
+        if len(samples) == 0:
+            return None
+
+        selected_key = self.sampling_func(list(self.datasets.keys()))
+        selected_samples = [sample[selected_key] for sample in samples]
+        return self.datasets[selected_key].collater(selected_samples)
+
+    def num_tokens(self, index: int):
+        """
+        Return an example's length (number of tokens), used for batching. Here
+        we return the max across all examples at index across all underlying
+        datasets.
+        """
+        return max(
+            dataset.num_tokens(self._map_index_to_dataset(key, index))
+            for key, dataset in self.datasets.items()
+        )
+
+    def size(self, index: int):
+        """
+        Return an example's size as a float or tuple. Here we return the max
+        across all underlying datasets. This value is used when filtering a
+        dataset with max-positions.
+        """
+        return max(
+            dataset.size(self._map_index_to_dataset(key, index))
+            for key, dataset in self.datasets.items()
+        )
+
+    @property
+    def supports_prefetch(self):
+        return all(
+            getattr(dataset, "supports_prefetch", False)
+            for dataset in self.datasets.values()
+        )
+
+    def prefetch(self, indices):
+        for key, dataset in self.datasets.items():
+            dataset.prefetch(
+                [self._map_index_to_dataset(key, index) for index in indices]
+            )
+
+    @property
+    def supports_fetch_outside_dataloader(self):
+        return all(
+            self.datasets[key].supports_fetch_outside_dataloader
+            for key in self.datasets
+        )
diff --git a/fairseq/fairseq/data/multilingual/__init__.py b/fairseq/fairseq/data/multilingual/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6264236915a7269a4d920ee8213004374dd86a9a
--- /dev/null
+++ b/fairseq/fairseq/data/multilingual/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fairseq/fairseq/data/multilingual/multilingual_data_manager.py b/fairseq/fairseq/data/multilingual/multilingual_data_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..137481b449b9cb5b2b486950c6cea669ac507c48
--- /dev/null
+++ b/fairseq/fairseq/data/multilingual/multilingual_data_manager.py
@@ -0,0 +1,1136 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import json
+import logging
+import math
+import os
+from collections import OrderedDict, defaultdict
+from argparse import ArgumentError
+
+from fairseq import utils
+from fairseq.data import (
+    AppendTokenDataset,
+    ConcatDataset,
+    Dictionary,
+    LanguagePairDataset,
+    PrependTokenDataset,
+    SampledMultiDataset,
+    SampledMultiEpochDataset,
+    StripTokenDataset,
+    TransformEosLangPairDataset,
+    TruncateDataset,
+    data_utils,
+    indexed_dataset,
+)
+from fairseq.data.multilingual.multilingual_utils import (
+    EncoderLangtok,
+    LangTokSpec,
+    LangTokStyle,
+    augment_dictionary,
+    get_lang_tok,
+)
+from fairseq.data.multilingual.sampled_multi_dataset import CollateFormat
+from fairseq.file_io import PathManager
+from fairseq.utils import FileContentsAction, csv_str_list, eval_str_dict
+
+
+logger = logging.getLogger(__name__)
+
+SRC_DICT_NAME = 'src'
+TGT_DICT_NAME = 'tgt'
+
+
+def _lang_id(dic: Dictionary, lang: str):
+    """Return language ID index."""
+    idx = dic.index(lang)
+    assert idx != dic.unk_index, "cannot find language ID for lang {}".format(lang)
+    return idx
+
+
+def load_sampling_weights(from_file):
+    with open(from_file) as f:
+        weights = json.load(f)
+    return weights
+
+
+class MultilingualDatasetManager(object):
+    def __init__(self, args, lang_pairs, langs, dicts, sampling_method):
+        super().__init__()
+        self.args = args
+        self.seed = args.seed
+        self.lang_pairs = lang_pairs
+        self.extra_lang_pairs = (
+                list(
+                    {p for _, v in args.extra_lang_pairs.items() for p in v.split(",")}
+                )
+                if args.extra_lang_pairs
+                else []
+            )
+        self.src_langs = {p.split("-")[0] for p in args.lang_pairs + self.extra_lang_pairs}
+        self.tgt_langs = {p.split("-")[1] for p in args.lang_pairs + self.extra_lang_pairs}
+        self.langs = langs
+        self.dicts = dicts
+        self.lang_dict = self.create_lang_dictionary(self.langs)
+        self.sampling_method = sampling_method
+        self.sampling_scheduler = None
+        self._has_sharded_data = False
+        self._num_shards_dict = {}
+        self._training_data_sizes = defaultdict(lambda: {})
+
+    @classmethod
+    def setup_data_manager(cls, args, lang_pairs, langs, dicts, sampling_method):
+        return MultilingualDatasetManager(
+            args, lang_pairs, langs, dicts, sampling_method
+        )
+
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument(
+            "data",
+            help="colon separated path to data directories list, \
+                            will be iterated upon during epochs in round-robin manner",
+            action=FileContentsAction,
+        )
+        parser.add_argument(
+            "--langs",
+            default=None,
+            type=csv_str_list,
+            help="a list of languages comma sperated languages which can appear in lang-pairs; "
+            "note that the ordering determines language token IDs",
+        )
+        parser.add_argument(
+            "--lang-dict",
+            default=None,
+            type=str,
+            help="an external file which contains a list of "
+            "languages which can appear in lang-pairs; "
+            "note that the ordering determines language token IDs; "
+            "--langs and --lang-dict are two exclusive options",
+        )
+        parser.add_argument('--source-dict', default=None, type=str,
+                            help='path to source dictionary; if specified it will override per language dictionary loading')
+        parser.add_argument('--target-dict', default=None, type=str,
+                            help='path to target dictionary; if specified it will override per language dictionary loading')
+        parser.add_argument(
+            "--lang-tok-style",
+            default=LangTokStyle.multilingual.value,
+            type=str,
+            choices=[LangTokStyle.multilingual.value, LangTokStyle.mbart.value],
+            help="language token styles",
+        )
+
+        parser.add_argument(
+            "--load-alignments",
+            action="store_true",
+            help="load the binarized alignments",
+        )
+        parser.add_argument(
+            "--left-pad-source",
+            default="True",
+            type=str,
+            metavar="BOOL",
+            help="pad the source on the left",
+        )
+        parser.add_argument(
+            "--left-pad-target",
+            default="False",
+            type=str,
+            metavar="BOOL",
+            help="pad the target on the left",
+        )
+        try:
+            parser.add_argument(
+                "--max-source-positions",
+                default=1024,
+                type=int,
+                metavar="N",
+                help="max number of tokens in the source sequence",
+            )
+            parser.add_argument(
+                "--max-target-positions",
+                default=1024,
+                type=int,
+                metavar="N",
+                help="max number of tokens in the target sequence",
+            )
+        except ArgumentError:
+            # this might have already been defined. Once we transition this to hydra it should be fine to add it here.
+            pass
+        parser.add_argument(
+            "--upsample-primary",
+            default=1,
+            type=int,
+            help="amount to upsample primary dataset",
+        )
+        parser.add_argument(
+            "--truncate-source",
+            action="store_true",
+            default=False,
+            help="truncate source to max-source-positions",
+        )
+        parser.add_argument(
+            "--encoder-langtok",
+            default=None,
+            type=str,
+            choices=[EncoderLangtok.src.value, EncoderLangtok.tgt.value],
+            metavar="SRCTGT",
+            help="prepend to the beginning of source sentence the source or target "
+            "language token. (src/tgt)",
+        )
+        parser.add_argument(
+            "--decoder-langtok",
+            action="store_true",
+            help="prepend to the beginning of target sentence the target language token",
+        )
+        parser.add_argument(
+            "--lang-tok-replacing-bos-eos", action="store_true", default=False
+        )
+        parser.add_argument(
+            "--enable-lang-ids",
+            default=False,
+            action="store_true",
+            help="whether to include language IDs in samples",
+        )
+        parser.add_argument(
+            "--enable-reservsed-directions-shared-datasets",
+            default=False,
+            action="store_true",
+            help="whether to allow datasets be used in reversed directions",
+        )
+
+        parser.add_argument(
+            "--extra-data",
+            help='a dictionary of data name to this path, \
+                            e.g. {"mined", path_to_mined_data, "denoised": path_to_denoised_data}',
+            type=lambda uf: eval_str_dict(uf, type=str),
+            default=None,
+        )
+        parser.add_argument(
+            "--extra-lang-pairs",
+            help='a dictionary of data name to the language pairs they serve, \
+                            e.g. {"mined": comma-separated-lang-pairs, "denoised":  comma-separated-lang-pairs}',
+            type=lambda uf: eval_str_dict(uf, type=str),
+            default=None,
+        )
+        parser.add_argument(
+            "--fixed-dictionary",
+            help="Fixed dictionary to use with model path",
+            default=None,
+            type=str,
+        )
+        parser.add_argument(
+            "--langtoks-specs",
+            help='a list of comma separated data types that a set of language tokens to be specialized for, \
+                            e.g. "main,dae,mined". There will be a set of language tokens added to the vocab to \
+                            distinguish languages in different training data types. If not specified, default language \
+                            tokens per languages will be added',
+            default=LangTokSpec.main.value,
+            type=csv_str_list,
+        )
+        parser.add_argument(
+            "--langtoks",
+            help='a dictionary of how to add language tokens, \
+                            e.g. {"mined": (None, "tgt"), "mono_dae": ("src.dae", "tgt"), "main": \
+                            ("src", "tgt")}, or {"mined": ("src.mined", "tgt")}',
+            default=None,
+            type=lambda uf: eval_str_dict(uf, type=str),
+        )
+        parser.add_argument(
+            "--sampling-weights-from-file",
+            help='a file contain a python dictionary of how to sample data sets, \
+                                e.g. { "main:en_XX-es_XX": 0.2, "mined:en_XX-pt_XX": 0.5, \
+                                    "mono_dae:es_XX-es_XX: 0.3, "main:en_xx-fr_XX": 0.8 }',
+            default=None,
+            type=str,
+        )
+        parser.add_argument(
+            "--sampling-weights",
+            help='a dictionary of how to sample data sets, \
+                            e.g. { "main:en_XX-es_XX": 0.2, "mined:en_XX-pt_XX": 0.5, \
+                                   "mono_dae:es_XX-es_XX: 0.3, "main:en_xx-fr_XX": 0.8 }',
+            default=None,
+            type=lambda uf: eval_str_dict(uf, type=str),
+        )
+        parser.add_argument(
+            "--virtual-epoch-size",
+            default=None,
+            type=int,
+            help="virtual epoch size to speed up data loading",
+        )
+        parser.add_argument(
+            "--virtual-data-size",
+            default=None,
+            type=int,
+            help="virtual data size of the whole joint dataset to speed"
+            "up data loading and have specific dynamic sampling strategy interval",
+        )
+
+    @classmethod
+    def load_langs(cls, args, **kwargs):
+        if args.lang_dict and args.langs:
+            raise ValueError("--langs and --lang-dict can not both be specified")
+        if args.lang_dict is None and args.langs is None:
+            logger.warning(
+                "External language dictionary is not provided; "
+                "use lang-pairs to infer the set of supported languages. "
+                "The language ordering is not stable which might cause "
+                "misalignment in pretraining and finetuning."
+            )
+            # infer from lang_pairs as it is
+            langs = list(
+                {x for lang_pair in args.lang_pairs for x in lang_pair.split("-")}
+            )
+            langs = sorted(langs)
+            logger.info(f"inferred language list: {langs}")
+        elif args.lang_dict:
+            with open(
+                PathManager.get_local_path(args.lang_dict), "r", encoding="utf-8"
+            ) as f:
+                langs = [lang.strip() for lang in f.readlines() if lang.strip()]
+                logger.info(
+                    f"loaded language list from {args.lang_dict} as they are ordered in file"
+                )
+        elif args.langs:
+            langs = args.langs
+            logger.info(
+                f"parsed the language list as they are ordered in the option: {langs}"
+            )
+        return langs
+
+    def has_sharded_data(self, split):
+        return self._has_sharded_data and split == getattr(
+            self.args, "train_subset", None
+        )
+
+    def _shared_collater(self):
+        return not (self.args.extra_data and "mono_dae" in self.args.extra_data) and (
+            not self.args.lang_tok_replacing_bos_eos
+        )
+
+    def estimate_global_pass_epoch(self, epoch):
+        if self.args.virtual_epoch_size is None or self.args.virtual_data_size is None:
+            return None
+        # one epoch more for remaining data in each shard
+        virtual_epochs_per_shard = math.ceil(
+            self.args.virtual_data_size / self.args.virtual_epoch_size
+        )
+        # note that fairseq epoch / shard_epoch starts from 1
+        shard_epoch = (epoch - 1) // virtual_epochs_per_shard + 1
+        return shard_epoch
+
+    @classmethod
+    def prepare(cls, load_dictionary, args, **kargs):
+        args.left_pad_source = utils.eval_bool(args.left_pad_source)
+        args.left_pad_target = utils.eval_bool(args.left_pad_target)
+
+        if not hasattr(args, "shuffle_instance"):
+            args.shuffle_instance = False
+        if args.langtoks is None:
+            args.langtoks = {}
+        if "main" not in args.langtoks:
+            src_langtok_spec = args.encoder_langtok if args.encoder_langtok else None
+            tgt_langtok_spec = "tgt" if args.decoder_langtok else None
+            args.langtoks["main"] = (src_langtok_spec, tgt_langtok_spec)
+
+        def check_langs(langs, pairs):
+            messages = []
+            for src, tgt in pairs:
+                if src not in langs or tgt not in langs:
+                    messages.append(
+                        f"language pair {src}-{tgt} contains languages "
+                        "that are not in the language dictionary"
+                    )
+            if len(messages) > 0:
+                raise ValueError(" ".join(messages) + f"; langs: {langs}")
+
+        if args.lang_pairs is None:
+            raise ValueError(
+                "--lang-pairs is required. List all the language pairs in the training objective."
+            )
+        if isinstance(args.lang_pairs, str):
+            args.lang_pairs = args.lang_pairs.split(",")
+        if args.source_lang is not None or args.target_lang is not None:
+            training = False
+        else:
+            training = True
+        language_list = cls.load_langs(args, **kargs)
+        check_langs(
+            language_list,
+            (
+                [p.split("-") for p in args.lang_pairs]
+                if training
+                else [(args.source_lang, args.target_lang)]
+            ),
+        )
+
+        def load_dictionary_and_postproc(path):
+            d = load_dictionary(path)
+            augment_dictionary(
+                dictionary=d,
+                language_list=language_list,
+                lang_tok_style=args.lang_tok_style,
+                langtoks_specs=args.langtoks_specs,
+                extra_data=args.extra_data,
+            )
+            return d
+
+        dicts = cls.load_all_dictionaries(args, language_list, load_dictionary_and_postproc, training)
+        return language_list, dicts, training
+
+    @classmethod
+    def load_all_dictionaries(cls, args, language_list, load_dictionary, training):
+        dicts = OrderedDict()
+        if args.source_dict is not None:
+            dicts[SRC_DICT_NAME] = load_dictionary(args.source_dict)
+        if args.target_dict is not None:
+            dicts[TGT_DICT_NAME] = load_dictionary(args.target_dict)
+
+        if training:
+            extra_lang_pairs = (
+                list(
+                    {p for _, v in args.extra_lang_pairs.items() for p in v.split(",")}
+                )
+                if args.extra_lang_pairs
+                else []
+            )
+            src_langs_to_load_dicts = sorted(
+                {p.split("-")[0] for p in (args.lang_pairs + extra_lang_pairs)}
+            )
+            tgt_langs_to_load_dicts = sorted(
+                {p.split("-")[1] for p in (args.lang_pairs + extra_lang_pairs)}
+            )
+        else:
+            src_langs_to_load_dicts = [args.source_lang]
+            tgt_langs_to_load_dicts = [args.target_lang]
+
+        paths = utils.split_paths(args.data)
+        assert len(paths) > 0
+
+        def load_dicts(langs_to_load_dicts):
+            for lang in langs_to_load_dicts:
+                dicts[lang] = load_dictionary(
+                    os.path.join(paths[0], "dict.{}.txt".format(lang))
+                )
+            if len(dicts) > 0:
+                dict0 = next(iter(dicts.values()))
+                assert dicts[lang].pad() == dict0.pad()
+                assert dicts[lang].eos() == dict0.eos()
+                assert dicts[lang].unk() == dict0.unk()
+            logger.info("[{}] dictionary: {} types".format(lang, len(dicts[lang])))
+
+        if args.fixed_dictionary is not None:
+            fixed_dict = load_dictionary(args.fixed_dictionary)
+            dicts = {lang: fixed_dict for lang in src_langs_to_load_dicts + tgt_langs_to_load_dicts}
+        else:
+            if args.source_dict is None:
+                load_dicts(src_langs_to_load_dicts)
+            if args.target_dict is None:
+                load_dicts(tgt_langs_to_load_dicts)
+        return dicts
+
+    def get_source_dictionary(self, lang):
+        if self.args.source_dict is not None:
+            return self.dicts[SRC_DICT_NAME]
+        else:
+            return self.dicts[lang]
+
+    def get_target_dictionary(self, lang):
+        if self.args.target_dict is not None:
+            return self.dicts[TGT_DICT_NAME]
+        else:
+            return self.dicts[lang]
+
+    @classmethod
+    def create_lang_dictionary(cls, langs):
+        unk = "<unk>"
+        # hack to remove symbols other than unk as they are not needed by lang dict
+        lang_dict = Dictionary(pad=unk, eos=unk, unk=unk, bos=unk)
+        for lang in langs:
+            lang_dict.add_symbol(lang)
+        return lang_dict
+
+    @classmethod
+    def get_langtok_index(cls, lang_tok, dic):
+        idx = dic.index(lang_tok)
+        assert (
+            idx != dic.unk_index
+        ), "cannot find language token {} in the dictionary".format(lang_tok)
+        return idx
+
+    def get_encoder_langtok(self, src_lang, tgt_lang, spec=None):
+        if spec is None:
+            return None
+        if spec and spec.startswith("src"):
+            if src_lang is None:
+                return None
+            langtok = get_lang_tok(
+                lang=src_lang, lang_tok_style=self.args.lang_tok_style, spec=spec
+            )
+        else:
+            if tgt_lang is None:
+                return None
+            langtok = get_lang_tok(
+                lang=tgt_lang, lang_tok_style=self.args.lang_tok_style, spec=spec
+            )
+        return self.get_langtok_index(
+            langtok, self.get_source_dictionary(src_lang) if src_lang else self.get_target_dictionary(tgt_lang)
+        )
+
+    def get_decoder_langtok(self, tgt_lang, spec=None):
+        if spec is None:
+            return None
+        langtok = get_lang_tok(
+            lang=tgt_lang, lang_tok_style=self.args.lang_tok_style, spec=spec
+        )
+        return self.get_langtok_index(langtok, self.get_target_dictionary(tgt_lang))
+
+    @classmethod
+    def load_data(cls, path, vdict, impl):
+        dataset = data_utils.load_indexed_dataset(path, vdict, impl)
+        return dataset
+
+    @classmethod
+    def split_exists(cls, split, src, tgt, lang, data_path, dataset_impl):
+        filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang))
+        return indexed_dataset.dataset_exists(filename, impl=dataset_impl)
+
+    def load_lang_dataset(
+        self,
+        data_path,
+        split,
+        src,
+        src_dict,
+        tgt,
+        tgt_dict,
+        combine,
+        dataset_impl,
+        upsample_primary,
+        max_source_positions,
+        prepend_bos=False,
+        load_alignments=False,
+        truncate_source=False,
+    ):
+
+        src_datasets = []
+        tgt_datasets = []
+
+        for k in itertools.count():
+            split_k = split + (str(k) if k > 0 else "")
+
+            # infer langcode
+            if self.split_exists(split_k, src, tgt, src, data_path, dataset_impl):
+                prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt))
+            elif self.split_exists(split_k, tgt, src, src, data_path, dataset_impl):
+                prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src))
+            else:
+                if k > 0:
+                    break
+                else:
+                    logger.error(
+                        f"Dataset not found: {data_path}, {split_k}, {src}, {tgt}"
+                    )
+                    raise FileNotFoundError(
+                        "Dataset not found: {} ({})".format(split, data_path)
+                    )
+
+            src_dataset = self.load_data(prefix + src, src_dict, dataset_impl)
+            if truncate_source:
+                src_dataset = AppendTokenDataset(
+                    TruncateDataset(
+                        StripTokenDataset(src_dataset, src_dict.eos()),
+                        max_source_positions - 1,
+                    ),
+                    src_dict.eos(),
+                )
+            src_datasets.append(src_dataset)
+            tgt_datasets.append(self.load_data(prefix + tgt, tgt_dict, dataset_impl))
+
+            logger.info(
+                "{} {} {}-{} {} examples".format(
+                    data_path, split_k, src, tgt, len(src_datasets[-1])
+                )
+            )
+
+            if not combine:
+                break
+
+        assert len(src_datasets) == len(tgt_datasets)
+
+        if len(src_datasets) == 1:
+            src_dataset, tgt_dataset = src_datasets[0], tgt_datasets[0]
+        else:
+            sample_ratios = [1] * len(src_datasets)
+            sample_ratios[0] = upsample_primary
+            src_dataset = ConcatDataset(src_datasets, sample_ratios)
+            tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios)
+
+        if prepend_bos:
+            assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index")
+            src_dataset = PrependTokenDataset(src_dataset, src_dict.bos())
+            tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos())
+
+        align_dataset = None
+        if load_alignments:
+            align_path = os.path.join(
+                data_path, "{}.align.{}-{}".format(split, src, tgt)
+            )
+            if indexed_dataset.dataset_exists(align_path, impl=dataset_impl):
+                align_dataset = data_utils.load_indexed_dataset(
+                    align_path, None, dataset_impl
+                )
+
+        return src_dataset, tgt_dataset, align_dataset
+
+    def load_langpair_dataset(
+        self,
+        data_path,
+        split,
+        src,
+        src_dict,
+        tgt,
+        tgt_dict,
+        combine,
+        dataset_impl,
+        upsample_primary,
+        left_pad_source,
+        left_pad_target,
+        max_source_positions,
+        max_target_positions,
+        prepend_bos=False,
+        load_alignments=False,
+        truncate_source=False,
+        src_dataset_transform_func=lambda dataset: dataset,
+        tgt_dataset_transform_func=lambda dataset: dataset,
+        src_lang_id=None,
+        tgt_lang_id=None,
+        langpairs_sharing_datasets=None,
+    ):
+        norm_direction = "-".join(sorted([src, tgt]))
+        if langpairs_sharing_datasets is not None:
+            src_dataset = langpairs_sharing_datasets.get(
+                (data_path, split, norm_direction, src), "NotInCache"
+            )
+            tgt_dataset = langpairs_sharing_datasets.get(
+                (data_path, split, norm_direction, tgt), "NotInCache"
+            )
+            align_dataset = langpairs_sharing_datasets.get(
+                (data_path, split, norm_direction, src, tgt), "NotInCache"
+            )
+
+        # a hack: any one is not in cache, we need to reload them
+        if (
+            langpairs_sharing_datasets is None
+            or src_dataset == "NotInCache"
+            or tgt_dataset == "NotInCache"
+            or align_dataset == "NotInCache"
+            or split != getattr(self.args, "train_subset", None)
+        ):
+            # source and target datasets can be reused in reversed directions to save memory
+            # reversed directions of valid and test data will not share source and target datasets
+            src_dataset, tgt_dataset, align_dataset = self.load_lang_dataset(
+                data_path,
+                split,
+                src,
+                src_dict,
+                tgt,
+                tgt_dict,
+                combine,
+                dataset_impl,
+                upsample_primary,
+                max_source_positions=max_source_positions,
+                prepend_bos=prepend_bos,
+                load_alignments=load_alignments,
+                truncate_source=truncate_source,
+            )
+            src_dataset = src_dataset_transform_func(src_dataset)
+            tgt_dataset = tgt_dataset_transform_func(tgt_dataset)
+            if langpairs_sharing_datasets is not None:
+                langpairs_sharing_datasets[
+                    (data_path, split, norm_direction, src)
+                ] = src_dataset
+                langpairs_sharing_datasets[
+                    (data_path, split, norm_direction, tgt)
+                ] = tgt_dataset
+                langpairs_sharing_datasets[
+                    (data_path, split, norm_direction, src, tgt)
+                ] = align_dataset
+                if align_dataset is None:
+                    # no align data so flag the reverse direction as well in sharing
+                    langpairs_sharing_datasets[
+                        (data_path, split, norm_direction, tgt, src)
+                    ] = align_dataset
+        else:
+            logger.info(
+                f"Reusing source and target datasets of [{split}] {tgt}-{src} for reversed direction: "
+                f"[{split}] {src}-{tgt}: src length={len(src_dataset)}; tgt length={len(tgt_dataset)}"
+            )
+
+        return LanguagePairDataset(
+            src_dataset,
+            src_dataset.sizes,
+            src_dict,
+            tgt_dataset,
+            tgt_dataset.sizes if tgt_dataset is not None else None,
+            tgt_dict,
+            left_pad_source=left_pad_source,
+            left_pad_target=left_pad_target,
+            align_dataset=align_dataset,
+            src_lang_id=src_lang_id,
+            tgt_lang_id=tgt_lang_id,
+        )
+
+    def src_dataset_tranform_func(self, src_lang, tgt_lang, dataset, spec=None):
+        if self.args.lang_tok_replacing_bos_eos:
+            # it is handled by self.alter_dataset_langtok
+            # TODO: Unifiy with alter_dataset_langtok
+            return dataset
+        if spec is None:
+            return dataset
+        tok = self.get_encoder_langtok(src_lang, tgt_lang, spec)
+        if tok:
+            return PrependTokenDataset(dataset, tok)
+        return dataset
+
+    def tgt_dataset_tranform_func(self, source_lang, target_lang, dataset, spec=None):
+        if dataset is None:
+            # note that target dataset can be None during inference time
+            return None
+        if self.args.lang_tok_replacing_bos_eos:
+            # TODO: Unifiy with alter_dataset_langtok
+            # It is handled by self.alter_dataset_langtok.
+            # The complication in self.alter_dataset_langtok
+            # makes a unified framework difficult.
+            return dataset
+        # if not self.args.decoder_langtok:
+        if not spec:
+            return dataset
+        tok = self.get_decoder_langtok(target_lang, spec)
+        if tok:
+            return PrependTokenDataset(dataset, tok)
+        return dataset
+
+    def alter_dataset_langtok(
+        self,
+        lang_pair_dataset,
+        src_eos=None,
+        src_lang=None,
+        tgt_eos=None,
+        tgt_lang=None,
+        src_langtok_spec=None,
+        tgt_langtok_spec=None,
+    ):
+        if src_langtok_spec is None and tgt_langtok_spec is None:
+            return lang_pair_dataset
+
+        new_src_eos = None
+        if (
+            src_langtok_spec is not None
+            and src_eos is not None
+            and (src_lang is not None or tgt_lang is not None)
+        ):
+            new_src_eos = self.get_encoder_langtok(src_lang, tgt_lang, src_langtok_spec)
+        else:
+            src_eos = None
+
+        new_tgt_bos = None
+        if tgt_langtok_spec and tgt_eos is not None and tgt_lang is not None:
+            new_tgt_bos = self.get_decoder_langtok(tgt_lang, tgt_langtok_spec)
+        else:
+            tgt_eos = None
+
+        return TransformEosLangPairDataset(
+            lang_pair_dataset,
+            src_eos=src_eos,
+            new_src_eos=new_src_eos,
+            tgt_bos=tgt_eos,
+            new_tgt_bos=new_tgt_bos,
+        )
+
+    def load_a_dataset(
+        self,
+        split,
+        data_path,
+        src,
+        src_dict,
+        tgt,
+        tgt_dict,
+        combine,
+        prepend_bos=False,
+        langpairs_sharing_datasets=None,
+        data_category=None,
+        **extra_kwargs,
+    ):
+        dataset_impl = self.args.dataset_impl
+        upsample_primary = self.args.upsample_primary
+        left_pad_source = self.args.left_pad_source
+        left_pad_target = self.args.left_pad_target
+        max_source_positions = self.args.max_source_positions
+        max_target_positions = self.args.max_target_positions
+        load_alignments = self.args.load_alignments
+        truncate_source = self.args.truncate_source
+        src_dataset_transform_func = self.src_dataset_tranform_func
+        tgt_dataset_transform_func = self.tgt_dataset_tranform_func
+        enable_lang_ids = self.args.enable_lang_ids
+        lang_dictionary = self.lang_dict
+        src_langtok_spec, tgt_langtok_spec = extra_kwargs["langtok_spec"]
+
+        src_langtok = self.get_encoder_langtok(src, tgt, src_langtok_spec)
+        tgt_langtok = self.get_decoder_langtok(tgt, tgt_langtok_spec)
+        logger.info(
+            f"{data_category}:{src}-{tgt} src_langtok: {src_langtok}; tgt_langtok: {tgt_langtok}"
+        )
+
+        langpair_ds = self.load_langpair_dataset(
+            data_path,
+            split,
+            src,
+            src_dict,
+            tgt,
+            tgt_dict,
+            combine,
+            dataset_impl,
+            upsample_primary,
+            left_pad_source,
+            left_pad_target,
+            max_source_positions,
+            max_target_positions,
+            prepend_bos,
+            load_alignments,
+            truncate_source,
+            src_dataset_transform_func=lambda dataset: src_dataset_transform_func(
+                src, tgt, dataset, src_langtok_spec
+            ),
+            tgt_dataset_transform_func=lambda dataset: tgt_dataset_transform_func(
+                src, tgt, dataset, tgt_langtok_spec
+            ),
+            src_lang_id=_lang_id(lang_dictionary, src)
+            if enable_lang_ids and lang_dictionary is not None
+            else None,
+            tgt_lang_id=_lang_id(lang_dictionary, tgt)
+            if enable_lang_ids and lang_dictionary is not None
+            else None,
+            langpairs_sharing_datasets=langpairs_sharing_datasets,
+        )
+        # TODO: handle modified lang toks for mined data and dae data
+        if self.args.lang_tok_replacing_bos_eos:
+            ds = self.alter_dataset_langtok(
+                langpair_ds,
+                src_eos=self.get_source_dictionary(src).eos() if src else self.get_target_dictionary(tgt).eos(),
+                src_lang=src,
+                tgt_eos=self.get_target_dictionary(tgt).eos(),
+                tgt_lang=tgt,
+                src_langtok_spec=src_langtok_spec,
+                tgt_langtok_spec=tgt_langtok_spec,
+            )
+        else:
+            ds = langpair_ds
+        return ds
+
+    def load_split_langpair_datasets(self, split, data_param_list):
+        datasets = []
+        langpairs_sharing_datasets = (
+            {} if self.args.enable_reservsed_directions_shared_datasets else None
+        )
+        for param in data_param_list:
+            ds = self.load_a_dataset(
+                split=split,
+                langpairs_sharing_datasets=langpairs_sharing_datasets,
+                **param,
+            )
+            datasets.append(ds)
+        return datasets
+
+    def get_data_paths_and_lang_pairs(self, split):
+        datapaths = {"main": self.args.data}
+        lang_pairs = {"main": self.lang_pairs}
+        if split == getattr(self.args, "train_subset", None):
+            # only training data can have extra data and extra language pairs
+            if self.args.extra_data:
+                extra_datapaths = self.args.extra_data
+                datapaths.update(extra_datapaths)
+            if self.args.extra_lang_pairs:
+                extra_lang_pairs = {
+                    k: v.split(",") for k, v in self.args.extra_lang_pairs.items()
+                }
+                lang_pairs.update(extra_lang_pairs)
+        return datapaths, lang_pairs
+
+    @classmethod
+    def get_dataset_key(cls, data_category, src, tgt):
+        return f"{data_category}:{src}-{tgt}"
+
+    @classmethod
+    def _get_shard_num_dict(cls, split, paths):
+        shards = defaultdict(int)
+        for path in paths:
+            files = PathManager.ls(path)
+            directions = set()
+            for f in files:
+                if f.startswith(split) and f.endswith(".idx"):
+                    # idx files of the form "{split}.{src}-{tgt}.{lang}.idx"
+                    direction = f.split(".")[-3]
+                    directions.add(direction)
+            for direction in directions:
+                shards[direction] += 1
+        return shards
+
+    def get_split_num_data_shards(self, split):
+        if split in self._num_shards_dict:
+            return self._num_shards_dict[split]
+        num_shards_dict = {}
+        data_paths, lang_pairs = self.get_data_paths_and_lang_pairs(split)
+
+        for data_category, paths in data_paths.items():
+            if data_category not in lang_pairs:
+                continue
+            paths = utils.split_paths(paths)
+            shards_dict = self._get_shard_num_dict(split, paths)
+            lang_dirs = [
+                lang_pair.split("-") for lang_pair in lang_pairs[data_category]
+            ]
+            lang_dirs = [x if len(x) > 1 else (x[0], x[0]) for x in lang_dirs]
+            for src, tgt in lang_dirs:
+                key = self.get_dataset_key(data_category, src, tgt)
+                if "mono_" in data_category:
+                    # monolingual data requires tgt only
+                    assert src is None or src == tgt, (
+                        f"error: src={src}, "
+                        "tgt={tgt} for data_category={data_category}"
+                    )
+                    num_shards_dict[key] = shards_dict[tgt]
+                else:
+                    if f"{src}-{tgt}" in shards_dict:
+                        num_shards_dict[key] = shards_dict[f"{src}-{tgt}"]
+                    elif f"{tgt}-{src}" in shards_dict:
+                        # follow the fairseq tradition to use reversed direction data if it is not available
+                        num_shards_dict[key] = shards_dict[f"{tgt}-{src}"]
+        self._num_shards_dict[split] = num_shards_dict
+        logger.info(f"[{split}] num of shards: {num_shards_dict}")
+        return num_shards_dict
+
+    @classmethod
+    def get_shard_id(cls, num_shards, epoch, shard_epoch=None):
+        shard = epoch if shard_epoch is None else shard_epoch
+        shard = (shard - 1) % num_shards
+        return shard
+
+    def get_split_data_path(self, paths, epoch, shard_epoch, num_shards):
+        path = paths[self.get_shard_id(num_shards, epoch, shard_epoch)]
+        return path
+
+    def get_split_data_param_list(self, split, epoch, shard_epoch=None):
+        # TODO: to extend with extra datasets and keys and loop over different shard data paths
+        param_list = []
+        data_paths, lang_pairs = self.get_data_paths_and_lang_pairs(split)
+        logger.info(f"langtoks settings: {self.args.langtoks}")
+        split_num_shards_dict = self.get_split_num_data_shards(split)
+        for data_category, paths in data_paths.items():
+            if data_category not in lang_pairs:
+                continue
+            paths = utils.split_paths(paths)
+            assert len(paths) > 0
+            if len(paths) > 1:
+                self._has_sharded_data = True
+            if split != getattr(self.args, "train_subset", None):
+                # if not training data set, use the first shard for valid and test
+                paths = paths[:1]
+
+            if data_category in self.args.langtoks:
+                lang_tok_spec = self.args.langtoks[data_category]
+            else:
+                # default to None
+                lang_tok_spec = (None, None)
+
+            # infer langcode
+            lang_dirs = [
+                lang_pair.split("-") for lang_pair in lang_pairs[data_category]
+            ]
+            lang_dirs = [x if len(x) > 1 else (x[0], x[0]) for x in lang_dirs]
+            for src, tgt in lang_dirs:
+                assert src is not None or data_category == "mono_dae", (
+                    f"error: src={src}, " "tgt={tgt} for data_category={data_category}"
+                )
+                # logger.info(f"preparing param for {data_category}: {src} - {tgt}")
+                key = self.get_dataset_key(data_category, src, tgt)
+                data_path = self.get_split_data_path(
+                    paths, epoch, shard_epoch, split_num_shards_dict[key]
+                )
+                param_list.append(
+                    {
+                        "key": key,
+                        "data_path": data_path,
+                        "split": split,
+                        "src": src,
+                        "src_dict": self.get_source_dictionary(src)
+                        if src and data_category != "mono_dae"
+                        else None,
+                        "tgt": tgt,
+                        "tgt_dict": self.get_target_dictionary(tgt),
+                        "data_category": data_category,
+                        "langtok_spec": lang_tok_spec,
+                    }
+                )
+        return param_list
+
+    def get_train_dataset_sizes(
+        self, data_param_list, datasets, epoch, shard_epoch=None
+    ):
+        num_shards = [
+            self.get_split_num_data_shards(param["split"])[param["key"]]
+            for param in data_param_list
+        ]
+        data_sizes = []
+        for (key, d), num_shard in zip(datasets, num_shards):
+            my_data_sizes = self._training_data_sizes[key]
+            shard_ind = self.get_shard_id(num_shard, epoch, shard_epoch)
+            if shard_ind not in my_data_sizes:
+                my_data_sizes[shard_ind] = len(d)
+            known_size = max(my_data_sizes.values())
+            data_sizes.append(
+                # If we don't know the data size of the shard yet,
+                # use the the max known data size to approximate.
+                # Note that we preprocess shards by a designated shard size
+                # and put any remaining data at the end into the last shard so
+                # the max shard size approximation is almost correct before loading
+                # the last shard; after loading the last shard, it will have the
+                # exact data sizes of the whole data size.
+                (key, sum(my_data_sizes.get(i, known_size) for i in range(num_shard)))
+            )
+        logger.info(
+            f"estimated total data sizes of all shards used in sampling ratios: {data_sizes}. "
+            "Note that if the data a shard has not been loaded yet, use the max known data size to approximate"
+        )
+        return [s for _, s in data_sizes]
+
+    def get_train_sampling_ratios(
+        self, data_param_list, datasets, epoch=1, shard_epoch=None
+    ):
+        data_sizes = self.get_train_dataset_sizes(
+            data_param_list, datasets, epoch, shard_epoch
+        )
+        sampling_func = self.sampling_method.sampling_method_selector()
+        sample_ratios = sampling_func(data_sizes) if sampling_func is not None else None
+        return sample_ratios
+
+    def get_sampling_ratios(self, data_param_list, datasets, epoch, shard_epoch=None):
+        if self.args.sampling_weights_from_file:
+            weights = load_sampling_weights(self.args.sampling_weights_from_file)
+            sample_ratios = [weights[k] for k, _ in datasets]
+            logger.info(
+                "| ignoring --sampling-weights when loadding sampling weights "
+                f"from file {self.args.sampling_weights_from_file}"
+            )
+        elif self.args.sampling_weights:
+            sample_ratios = [self.args.sampling_weights[k] for k, _ in datasets]
+        else:
+            sample_ratios = self.get_train_sampling_ratios(
+                data_param_list, datasets, epoch, shard_epoch
+            )
+
+        if sample_ratios is not None:
+            logger.info(
+                "| Upsample ratios: {}".format(
+                    list(zip(map(lambda x: x["key"], data_param_list), sample_ratios))
+                )
+            )
+            assert len(sample_ratios) == len(datasets)
+        return sample_ratios
+
+    def load_split_datasets(
+        self, split, training, epoch=1, combine=False, shard_epoch=None, **kwargs
+    ):
+        data_param_list = self.get_split_data_param_list(
+            split, epoch, shard_epoch=shard_epoch
+        )
+        langpairs_sharing_datasets = (
+            {} if self.args.enable_reservsed_directions_shared_datasets else None
+        )
+        datasets = [
+            (
+                param["key"],
+                self.load_a_dataset(
+                    combine=combine,
+                    langpairs_sharing_datasets=langpairs_sharing_datasets,
+                    **param,
+                ),
+            )
+            for param in data_param_list
+        ]
+        return datasets, data_param_list
+
+    def load_into_concat_dataset(self, split, datasets, data_param_list):
+        if self.args.lang_tok_replacing_bos_eos:
+            # TODO: to investigate why TransformEosLangPairDataset doesn't work with ConcatDataset
+            return SampledMultiDataset(
+                OrderedDict(datasets),
+                sampling_ratios=None,
+                eval_key=None,
+                collate_format=CollateFormat.single,
+                virtual_size=None,
+                split=split,
+            )
+        return ConcatDataset([d for _, d in datasets])
+
+    def load_sampled_multi_epoch_dataset(
+        self, split, training, epoch=0, combine=False, shard_epoch=None, **kwargs
+    ):
+        datasets, data_param_list = self.load_split_datasets(
+            split, training, epoch, combine, shard_epoch=shard_epoch, **kwargs
+        )
+        if training and split == getattr(self.args, "train_subset", None):
+            sample_ratios = self.get_sampling_ratios(data_param_list, datasets, epoch)
+            return SampledMultiEpochDataset(
+                OrderedDict(datasets),
+                epoch=epoch,
+                shard_epoch=shard_epoch,
+                # valid and test datasets will be degenerate to concating datasets:
+                sampling_ratios=sample_ratios,
+                eval_key=None,
+                collate_format=CollateFormat.single,
+                virtual_size=self.args.virtual_data_size,
+                split=split,
+                virtual_epoch_size=self.args.virtual_epoch_size,
+                # if not using lang_tok altering, simplified to use the same collater
+                shared_collater=self._shared_collater(),
+            )
+        else:
+            return self.load_into_concat_dataset(split, datasets, data_param_list)
+
+    def load_sampled_multi_dataset(
+        self, split, training, epoch=0, combine=False, shard_epoch=None, **kwargs
+    ):
+        datasets, data_param_list = self.load_split_datasets(
+            split, training, epoch, combine, shard_epoch=shard_epoch, **kwargs
+        )
+        if training and split == getattr(self.args, "train_subset", None):
+            sample_ratios = self.get_sampling_ratios(data_param_list, datasets, epoch)
+            return SampledMultiDataset(
+                OrderedDict(datasets),
+                epoch=epoch,
+                # valid and test datasets will be degerate to concating datasets:
+                sampling_ratios=sample_ratios,
+                eval_key=None,
+                collate_format=CollateFormat.single,
+                virtual_size=self.args.virtual_data_size,
+                split=split,
+                # if not using lang_tok altering, simplified to use the same collater
+                shared_collater=self._shared_collater(),
+            )
+        else:
+            return self.load_into_concat_dataset(split, datasets, data_param_list)
+
+    def load_dataset(
+        self, split, training, epoch=0, combine=False, shard_epoch=None, **kwargs
+    ):
+        if self.args.virtual_epoch_size is None:
+            return self.load_sampled_multi_dataset(
+                split, training, epoch, combine, shard_epoch, **kwargs
+            )
+        else:
+            return self.load_sampled_multi_epoch_dataset(
+                split, training, epoch, combine, shard_epoch, **kwargs
+            )
diff --git a/fairseq/fairseq/data/multilingual/multilingual_utils.py b/fairseq/fairseq/data/multilingual/multilingual_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4e0f9828cabfdbe375d05d9152b58bdbd6de7dc
--- /dev/null
+++ b/fairseq/fairseq/data/multilingual/multilingual_utils.py
@@ -0,0 +1,63 @@
+from enum import Enum
+from typing import Dict, List, Optional, Sequence
+
+import torch
+from fairseq.data import Dictionary
+
+
+class EncoderLangtok(Enum):
+    """
+    Prepend to the beginning of source sentence either the
+    source or target language token. (src/tgt).
+    """
+
+    src = "src"
+    tgt = "tgt"
+
+
+class LangTokSpec(Enum):
+    main = "main"
+    mono_dae = "mono_dae"
+
+
+class LangTokStyle(Enum):
+    multilingual = "multilingual"
+    mbart = "mbart"
+
+
+@torch.jit.export
+def get_lang_tok(
+    lang: str, lang_tok_style: str, spec: str = LangTokSpec.main.value
+) -> str:
+    # TOKEN_STYLES can't be defined outside this fn since it needs to be
+    # TorchScriptable.
+    TOKEN_STYLES: Dict[str, str] = {
+        LangTokStyle.mbart.value: "[{}]",
+        LangTokStyle.multilingual.value: "__{}__",
+    }
+
+    if spec.endswith("dae"):
+        lang = f"{lang}_dae"
+    elif spec.endswith("mined"):
+        lang = f"{lang}_mined"
+    style = TOKEN_STYLES[lang_tok_style]
+    return style.format(lang)
+
+
+def augment_dictionary(
+    dictionary: Dictionary,
+    language_list: List[str],
+    lang_tok_style: str,
+    langtoks_specs: Sequence[str] = (LangTokSpec.main.value,),
+    extra_data: Optional[Dict[str, str]] = None,
+) -> None:
+    for spec in langtoks_specs:
+        for language in language_list:
+            dictionary.add_symbol(
+                get_lang_tok(lang=language, lang_tok_style=lang_tok_style, spec=spec)
+            )
+
+    if lang_tok_style == LangTokStyle.mbart.value or (
+        extra_data is not None and LangTokSpec.mono_dae.value in extra_data
+    ):
+        dictionary.add_symbol("<mask>")
diff --git a/fairseq/fairseq/data/multilingual/sampled_multi_dataset.py b/fairseq/fairseq/data/multilingual/sampled_multi_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0a617424ee3c5923b37796773da4c97851a16c5
--- /dev/null
+++ b/fairseq/fairseq/data/multilingual/sampled_multi_dataset.py
@@ -0,0 +1,467 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import datetime
+import hashlib
+import logging
+import time
+from bisect import bisect_right
+from collections import OrderedDict, defaultdict
+from enum import Enum
+from typing import List
+
+import numpy as np
+import torch
+from fairseq.data import FairseqDataset, data_utils
+from fairseq.distributed import utils as distributed_utils
+
+
+def get_time_gap(s, e):
+    return (
+        datetime.datetime.fromtimestamp(e) - datetime.datetime.fromtimestamp(s)
+    ).__str__()
+
+
+logger = logging.getLogger(__name__)
+
+
+def default_virtual_size_func(datasets, ratios, max_scale_up=1.5):
+    sizes = [len(d) for d in datasets]
+    if ratios is None:
+        return sum(sizes)
+    largest_idx = np.argmax(sizes)
+    largest_r = ratios[largest_idx]
+    largest_s = sizes[largest_idx]
+    # set virtual sizes relative to the largest dataset
+    virtual_sizes = [(r / largest_r) * largest_s for r in ratios]
+    vsize = sum(virtual_sizes)
+    max_size = sum(sizes) * max_scale_up
+    return int(vsize if vsize < max_size else max_size)
+
+
+class CollateFormat(Enum):
+    single = 1
+    ordered_dict = 2
+
+
+class SampledMultiDataset(FairseqDataset):
+    """Samples from multiple sub-datasets according to given sampling ratios.
+    Args:
+        datasets (
+            List[~torch.utils.data.Dataset]
+            or OrderedDict[str, ~torch.utils.data.Dataset]
+        ): datasets
+        sampling_ratios (List[float]): list of probability of each dataset to be sampled
+            (default: None, which corresponds to concatenating all dataset together).
+        seed (int): RNG seed to use (default: 2).
+        epoch (int): starting epoch number (default: 1).
+        eval_key (str, optional): a key used at evaluation time that causes
+            this instance to pass-through batches from *datasets[eval_key]*.
+        collate_format (CollateFormat):  collater output format, either CollateFormat.ordered_dict or
+            CollateFormat.single (default: CollateFormat.single) where CollateFormat.single configures
+            the collater to output batches of data mixed from all sub-datasets,
+            and CollateFormat.ordered_dict configures the collater to output a dictionary of batches indexed by keys
+            of sub-datasets.
+            Note that not all sub-datasets will present in a single batch in both formats.
+        virtual_size (int, or callable): the expected virtual size of the dataset (default: default_virtual_size_func).
+        split (str): the split of the data, e.g. 'train', 'valid' or 'test'.
+        shared_collater (bool): whether or not to all sub-datasets have the same collater.
+        shuffle (bool): whether or not to shuffle data (default: True).
+    """
+
+    def __init__(
+        self,
+        datasets,
+        sampling_ratios=None,
+        seed=2,
+        epoch=1,
+        eval_key=None,
+        collate_format=CollateFormat.single,
+        virtual_size=default_virtual_size_func,
+        split="",
+        shared_collater=False,
+        shuffle=True,
+    ):
+        super().__init__()
+        self.shared_collater = shared_collater
+        self.shuffle = shuffle
+
+        if isinstance(datasets, OrderedDict):
+            self.keys = list(datasets.keys())
+            datasets = list(datasets.values())
+        elif isinstance(datasets, List):
+            self.keys = list(range(len(datasets)))
+        else:
+            raise AssertionError()
+        self.datasets = datasets
+        self.split = split
+
+        self.eval_key = eval_key
+        if self.eval_key is not None:
+            self.collate_format = CollateFormat.single
+        else:
+            self.collate_format = collate_format
+
+        self.seed = seed
+        self._cur_epoch = None
+
+        self.cumulated_sizes = None
+        # self.datasets[k][self._cur_indices[i]] is the data item i in this sampled dataset
+        # namely, data item i is sampled from the kth sub-dataset self.datasets[k]
+        # where self.cumulated_sizes[k-1] <= i < self.cumulated_sizes[k]
+        self._cur_indices = None
+
+        self._sizes = None
+        self.virtual_size_per_dataset = None
+        # caching properties
+        self._reset_cached_properties()
+        self.setup_sampling(sampling_ratios, virtual_size)
+        self.set_epoch(epoch)
+
+    def _clean_if_not_none(self, var_list):
+        for v in var_list:
+            if v is not None:
+                del v
+
+    def _reset_cached_properties(self):
+        self._clean_if_not_none([self._sizes, self._cur_indices])
+        self._sizes = None
+        self._cur_indices = None
+
+    def setup_sampling(self, sample_ratios, virtual_size):
+        sizes = [len(d) for d in self.datasets]
+        if sample_ratios is None:
+            # default back to concating datasets
+            self.sample_ratios = None
+            self.virtual_size = sum(sizes)
+        else:
+            if not isinstance(sample_ratios, np.ndarray):
+                sample_ratios = np.array(sample_ratios)
+            self.sample_ratios = sample_ratios
+            virtual_size = (
+                default_virtual_size_func if virtual_size is None else virtual_size
+            )
+            self.virtual_size = (
+                virtual_size(self.datasets, self.sample_ratios)
+                if callable(virtual_size)
+                else virtual_size
+            )
+
+    def adjust_sampling(self, epoch, sampling_ratios, virtual_size):
+        if sampling_ratios is not None:
+            sampling_ratios = self._sync_sample_ratios(sampling_ratios)
+            self.setup_sampling(sampling_ratios, virtual_size)
+
+    def _sync_sample_ratios(self, ratios):
+        # in case the ratios are not precisely the same across processes
+        # also to ensure every procresses update the ratios in the same pace
+        ratios = torch.DoubleTensor(ratios)
+        if torch.distributed.is_initialized():
+            if torch.cuda.is_available():
+                distributed_utils.all_reduce(
+                    ratios.cuda(), group=distributed_utils.get_data_parallel_group()
+                )
+            else:
+                distributed_utils.all_reduce(
+                    ratios, group=distributed_utils.get_data_parallel_group()
+                )
+            ret = ratios.cpu()
+            ret = ret.numpy()
+        return ret
+
+    def random_choice_in_dataset(self, rng, dataset, choice_size):
+        if hasattr(dataset, "random_choice_in_dataset"):
+            return dataset.random_choice_in_dataset(rng, choice_size)
+        dataset_size = len(dataset)
+        return rng.choice(
+            dataset_size, choice_size, replace=(choice_size > dataset_size)
+        )
+
+    def get_virtual_indices(self, rng, datasets, sample_ratios, virtual_size):
+        def get_counts(sample_ratios):
+            counts = np.array([virtual_size * r for r in sample_ratios], dtype=np.int64)
+            diff = virtual_size - counts.sum()
+            assert diff >= 0
+            # due to round-offs, the size might not match the desired sizes
+            if diff > 0:
+                dataset_indices = rng.choice(
+                    len(sample_ratios), size=diff, p=sample_ratios
+                )
+                for i in dataset_indices:
+                    counts[i] += 1
+            return counts
+
+        def get_in_dataset_indices(datasets, sizes, sample_ratios):
+            counts = get_counts(sample_ratios)
+            # uniformally sample desired counts for each dataset
+            # if the desired counts are large, sample with replacement:
+            indices = [
+                self.random_choice_in_dataset(rng, d, c)
+                for c, d in zip(counts, datasets)
+            ]
+            return indices
+
+        sizes = [len(d) for d in datasets]
+        if sample_ratios is None:
+            # default back to concating datasets
+            in_dataset_indices = [list(range(s)) for s in sizes]
+            virtual_sizes_per_dataset = sizes
+        else:
+            ratios = sample_ratios / sample_ratios.sum()
+            in_dataset_indices = get_in_dataset_indices(datasets, sizes, ratios)
+            virtual_sizes_per_dataset = [len(d) for d in in_dataset_indices]
+        virtual_sizes_per_dataset = np.array(virtual_sizes_per_dataset, np.int64)
+        cumulative_sizes = np.cumsum(virtual_sizes_per_dataset)
+        assert sum(virtual_sizes_per_dataset) == virtual_size
+        assert cumulative_sizes[-1] == virtual_size
+        if virtual_size < sum(sizes):
+            logger.warning(
+                f"virtual data size ({virtual_size}) is less than real data size ({sum(sizes)})."
+                " If virtual size << real data size, there could be data coverage issue."
+            )
+        in_dataset_indices = np.hstack(in_dataset_indices)
+        return in_dataset_indices, cumulative_sizes, virtual_sizes_per_dataset
+
+    def _get_dataset_and_index(self, index):
+        i = bisect_right(self.cumulated_sizes, index)
+        return i, self._cur_indices[index]
+
+    def __getitem__(self, index):
+        # self.__getitem__(index) returns self.datasets[k][self._cur_indices[index]]
+        # where k satisfies self.cumulated_sizes[k - 1] <= k < self.cumulated_sizes[k]
+        ds_idx, ds_sample_idx = self._get_dataset_and_index(index)
+        ret = (ds_idx, self.datasets[ds_idx][ds_sample_idx])
+        return ret
+
+    def num_tokens(self, index):
+        return self.sizes[index].max()
+
+    def num_tokens_vec(self, indices):
+        sizes_vec = self.sizes[np.array(indices)]
+        # max across all dimensions but first one
+        return np.amax(sizes_vec, axis=tuple(range(1, len(sizes_vec.shape))))
+
+    def size(self, index):
+        return self.sizes[index]
+
+    def __len__(self):
+        return self.virtual_size
+
+    def collater(self, samples, **extra_args):
+        """Merge a list of samples to form a mini-batch."""
+        if len(samples) == 0:
+            return None
+        if self.collate_format == "ordered_dict":
+            collect_samples = [[] for _ in range(len(self.datasets))]
+            for (i, sample) in samples:
+                collect_samples[i].append(sample)
+            batch = OrderedDict(
+                [
+                    (self.keys[i], dataset.collater(collect_samples[i]))
+                    for i, (key, dataset) in enumerate(zip(self.keys, self.datasets))
+                    if len(collect_samples[i]) > 0
+                ]
+            )
+        elif self.shared_collater:
+            batch = self.datasets[0].collater([s for _, s in samples])
+        else:
+            samples_dict = defaultdict(list)
+            pad_to_length = (
+                defaultdict(int)
+                if "pad_to_length" not in extra_args
+                else extra_args["pad_to_length"]
+            )
+            for ds_idx, s in samples:
+                pad_to_length["source"] = max(
+                    pad_to_length["source"], s["source"].size(0)
+                )
+                if s["target"] is not None:
+                    pad_to_length["target"] = max(
+                        pad_to_length["target"], s["target"].size(0)
+                    )
+                samples_dict[ds_idx].append(s)
+            batches = [
+                self.datasets[i].collater(samples_dict[i], pad_to_length=pad_to_length)
+                for i in range(len(self.datasets))
+                if len(samples_dict[i]) > 0
+            ]
+
+            def straight_data(tensors):
+                batch = torch.cat(tensors, dim=0)
+                return batch
+
+            src_lengths = straight_data(
+                [b["net_input"]["src_lengths"] for b in batches]
+            )
+            src_lengths, sort_order = src_lengths.sort(descending=True)
+
+            def straight_order(tensors):
+                batch = straight_data(tensors)
+                return batch.index_select(0, sort_order)
+
+            batch = {
+                "id": straight_order([b["id"] for b in batches]),
+                "nsentences": sum(b["nsentences"] for b in batches),
+                "ntokens": sum(b["ntokens"] for b in batches),
+                "net_input": {
+                    "src_tokens": straight_order(
+                        [b["net_input"]["src_tokens"] for b in batches]
+                    ),
+                    "src_lengths": src_lengths,
+                },
+                "target": straight_order([b["target"] for b in batches])
+                if batches[0]["target"] is not None
+                else None,
+            }
+            if "prev_output_tokens" in batches[0]["net_input"]:
+                batch["net_input"]["prev_output_tokens"] = straight_order(
+                    [b["net_input"]["prev_output_tokens"] for b in batches]
+                )
+            if "src_lang_id" in batches[0]["net_input"]:
+                batch["net_input"]["src_lang_id"] = straight_order(
+                    [b["net_input"]["src_lang_id"] for b in batches]
+                )
+            if "tgt_lang_id" in batches[0]:
+                batch["tgt_lang_id"] = straight_order(
+                    [b["tgt_lang_id"] for b in batches]
+                )
+        return batch
+
+    @property
+    def sizes(self):
+        if self._sizes is not None:
+            return self._sizes
+        start_time = time.time()
+        in_sub_dataset_indices = [
+            self._cur_indices[
+                0 if i == 0 else self.cumulated_sizes[i - 1] : self.cumulated_sizes[i]
+            ]
+            for i in range(len(self.datasets))
+        ]
+        sub_dataset_sizes = [
+            d.sizes[indices]
+            for d, indices in zip(self.datasets, in_sub_dataset_indices)
+        ]
+        self._sizes = np.vstack(sub_dataset_sizes)
+        logger.info(f"sizes() calling time: {get_time_gap(start_time, time.time())}")
+        return self._sizes
+
+    def ordered_indices(self):
+        if self.shuffle:
+            indices = np.random.permutation(len(self))
+        else:
+            indices = np.arange(len(self))
+
+        sizes = self.sizes
+        tgt_sizes = sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None
+        src_sizes = (
+            sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes
+        )
+
+        # sort by target length, then source length
+        if tgt_sizes is not None:
+            indices = indices[np.argsort(tgt_sizes[indices], kind="mergesort")]
+        sort_indices = indices[np.argsort(src_sizes[indices], kind="mergesort")]
+        return sort_indices
+
+    def prefetch(self, indices):
+        prefetch_indices = [[] for _ in range(len(self.datasets))]
+        for i in indices:
+            ds_idx, ds_sample_idx = self._get_dataset_and_index(i)
+            prefetch_indices[ds_idx].append(ds_sample_idx)
+        for i in range(len(prefetch_indices)):
+            self.datasets[i].prefetch(prefetch_indices[i])
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return False
+
+    def set_epoch(self, epoch):
+        super().set_epoch(epoch)
+        if epoch == self._cur_epoch:
+            # re-enter so return
+            return
+        for d in self.datasets:
+            if hasattr(d, "set_epoch"):
+                d.set_epoch(epoch)
+        self._cur_epoch = epoch
+        self._establish_virtual_datasets()
+
+    def _establish_virtual_datasets(self):
+        if self.sample_ratios is None and self._cur_indices is not None:
+            # not a samping dataset, no need to resample if indices are already established
+            return
+        self._reset_cached_properties()
+
+        start_time = time.time()
+        # Generate a weighted sample of indices as a function of the
+        # random seed and the current epoch.
+        rng = np.random.RandomState(
+            [
+                int(
+                    hashlib.sha1(
+                        str(self.__class__.__name__).encode("utf-8")
+                    ).hexdigest(),
+                    16,
+                )
+                % (2 ** 32),
+                self.seed % (2 ** 32),  # global seed
+                self._cur_epoch,  # epoch index,
+            ]
+        )
+        self._clean_if_not_none(
+            [self.cumulated_sizes, self.virtual_size_per_dataset, self._sizes]
+        )
+        self._sizes = None
+
+        indices, cumulated_sizes, virtual_size_per_dataset = self.get_virtual_indices(
+            rng, self.datasets, self.sample_ratios, self.virtual_size
+        )
+        self._cur_indices = indices
+        self.cumulated_sizes = cumulated_sizes
+        self.virtual_size_per_dataset = virtual_size_per_dataset
+
+        raw_sizes = [len(d) for d in self.datasets]
+        sampled_sizes = self.virtual_size_per_dataset
+        logger.info(
+            f"[{self.split}] Raw sizes: {str(dict(zip(self.keys, raw_sizes)))}; "
+            f"raw total size: {sum(raw_sizes)}"
+        )
+        logger.info(
+            f"[{self.split}] Resampled sizes: {str(dict(zip(self.keys, sampled_sizes)))}; "
+            f"resampled total size: {sum(sampled_sizes)}"
+        )
+        if self.sample_ratios is not None:
+            logger.info(
+                f"[{self.split}] Upsampling ratios: {str(dict(zip(self.keys, self.sample_ratios)))}"
+            )
+        else:
+            logger.info(f"[{self.split}] A concat dataset")
+        logger.info(
+            f"[{self.split}] virtual dataset established time: {get_time_gap(start_time, time.time())}"
+        )
+
+    def filter_indices_by_size(self, indices, max_sizes):
+        """Filter a list of sample indices. Remove those that are longer
+            than specified in max_sizes.
+
+        Args:
+            indices (np.array): original array of sample indices
+            max_sizes (int or list[int] or tuple[int]): max sample size,
+                can be defined separately for src and tgt (then list or tuple)
+
+        Returns:
+            np.array: filtered sample array
+            list: list of removed indices
+        """
+        sizes = self.sizes
+        tgt_sizes = sizes[:, 1] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else None
+        src_sizes = (
+            sizes[:, 0] if len(sizes.shape) > 0 and sizes.shape[1] > 1 else sizes
+        )
+
+        return data_utils.filter_paired_dataset_indices_by_size(
+            src_sizes, tgt_sizes, indices, max_sizes
+        )
diff --git a/fairseq/fairseq/data/multilingual/sampled_multi_epoch_dataset.py b/fairseq/fairseq/data/multilingual/sampled_multi_epoch_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..17387b2f85c0ee76db1a003091331b46de8d8def
--- /dev/null
+++ b/fairseq/fairseq/data/multilingual/sampled_multi_epoch_dataset.py
@@ -0,0 +1,199 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import hashlib
+import logging
+import math
+
+import numpy as np
+from fairseq.data import SampledMultiDataset
+
+from .sampled_multi_dataset import CollateFormat, default_virtual_size_func
+
+
+logger = logging.getLogger(__name__)
+
+
+class SampledMultiEpochDataset(SampledMultiDataset):
+    """Samples from multiple sub-datasets according to sampling ratios
+       using virtual epoch sizes to speed up dataloading.
+    Args:
+        datasets (
+            List[~torch.utils.data.Dataset]
+            or OrderedDict[str, ~torch.utils.data.Dataset]
+        ): datasets
+        sampling_ratios (List[float]): list of probability of each dataset to be sampled
+            (default: None, which corresponds to concating all dataset together).
+        seed (int): RNG seed to use (default: 2).
+        epoch (int): starting epoch number (default: 1).
+        eval_key (str, optional): a key used at evaluation time that causes
+            this instance to pass-through batches from *datasets[eval_key]*.
+        collate_format (CollateFormat):  collater output format, either CollateFormat.ordered_dict or
+            CollateFormat.single (default: CollateFormat.single) where CollateFormat.single configures
+            the collater to output batches of data mixed from all sub-datasets,
+            and CollateFormat.ordered_dict configures the collater to output a dictionary of batches indexed by keys
+            of sub-datasets.
+            Note that not all sub-datasets will present in a single batch in both formats.
+        virtual_size (int, or callable): the expected virtual size of the dataset (default: default_virtual_size_func).
+        split (str): the split of the data, e.g. 'train', 'valid' or 'test'.
+        virtual_epoch_size (int): virtual epoch size, the dataset will go through the data by
+            this virtual epoch size one by one to speed up data loading, e.g. indicing and filtering
+            can be performed whenever a virtual epoch is loaded without waiting for the whole dataset to be loaded.
+        shared_collater (bool): whether or not to all sub-datasets have the same collater.
+        shard_epoch (int): the real epoch number for shard selection.
+        shuffle (bool): whether or not to shuffle data (default: True).
+    """
+
+    def __init__(
+        self,
+        datasets,
+        sampling_ratios=None,
+        seed=2,
+        epoch=1,
+        eval_key=None,
+        collate_format=CollateFormat.single,
+        virtual_size=default_virtual_size_func,
+        split="",
+        virtual_epoch_size=None,
+        shared_collater=False,
+        shard_epoch=1,
+        shuffle=True,
+    ):
+        self.virtual_epoch_size = virtual_epoch_size
+        self._current_epoch_start_index = None
+        self._random_global_indices = None
+        self.shard_epoch = shard_epoch if shard_epoch is not None else 1
+        self.load_next_shard = None
+        self._epoch_sizes = None
+        super().__init__(
+            datasets=datasets,
+            sampling_ratios=sampling_ratios,
+            seed=seed,
+            epoch=epoch,
+            eval_key=eval_key,
+            collate_format=collate_format,
+            virtual_size=virtual_size,
+            split=split,
+            shared_collater=shared_collater,
+            shuffle=shuffle,
+        )
+
+    def _setup(self, epoch):
+        self.virtual_epoch_size = (
+            self.virtual_epoch_size
+            if self.virtual_epoch_size is not None
+            else self.virtual_size
+        )
+        if self.virtual_epoch_size > self.virtual_size:
+            logger.warning(
+                f"virtual epoch size {self.virtual_epoch_size} "
+                f"is greater than virtual dataset size {self.virtual_size}"
+            )
+            self.virtual_epoch_size = self.virtual_size
+        self.num_virtual_epochs = math.ceil(self.virtual_size / self.virtual_epoch_size)
+        self._current_epoch_start_index = self._get_epoch_start_index(epoch)
+        logger.info(
+            f"virtual epoch size {self.virtual_epoch_size}; virtual dataset size {self.virtual_size}"
+        )
+
+    def _map_epoch_index_to_global(self, index):
+        index = self._current_epoch_start_index + index
+        # add randomness
+        return self._random_global_indices[index]
+
+    @property
+    def sizes(self):
+        if self._epoch_sizes is not None:
+            return self._epoch_sizes
+        _sizes = super().sizes
+        indices = self._random_global_indices[
+            self._current_epoch_start_index : self._current_epoch_start_index
+            + len(self)
+        ]
+        self._epoch_sizes = _sizes[indices]
+        # del super()._sizes to save memory
+        del self._sizes
+        self._sizes = None
+        return self._epoch_sizes
+
+    def _get_dataset_and_index(self, index):
+        i = self._map_epoch_index_to_global(index)
+        return super()._get_dataset_and_index(i)
+
+    def __len__(self):
+        return (
+            self.virtual_epoch_size
+            if self._current_epoch_start_index + self.virtual_epoch_size
+            < self.virtual_size
+            else self.virtual_size - self._current_epoch_start_index
+        )
+
+    def set_epoch(self, epoch):
+        if self._current_epoch_start_index is None:
+            # initializing epoch idnices of a virtual dataset
+            self._setup(epoch)
+            self._next_virtual_epoch(epoch)
+        else:
+            # working on already intialized epoch indices
+            if epoch == self._cur_epoch:
+                # re-enter so return
+                return
+            self._next_virtual_epoch(epoch)
+
+    def _get_epoch_start_index(self, epoch):
+        assert epoch >= 1  # fairseq is using 1-based epoch everywhere
+        return ((epoch - 1) % self.num_virtual_epochs) * self.virtual_epoch_size
+
+    def _next_global_indices(self, epoch):
+        rng = np.random.RandomState(
+            [
+                int(
+                    hashlib.sha1(
+                        str(self.__class__.__name__).encode("utf-8")
+                    ).hexdigest(),
+                    16,
+                )
+                % (2 ** 32),
+                self.seed % (2 ** 32),  # global seed
+                epoch,  # epoch index,
+            ]
+        )
+        del self._random_global_indices
+        self._random_global_indices = rng.choice(
+            self.virtual_size, self.virtual_size, replace=False
+        )
+        if self.load_next_shard is None:
+            self.load_next_shard = False
+        else:
+            # increase shard epoch for next loading
+            self.shard_epoch += 1
+            self.load_next_shard = True
+            logger.info(
+                "to load next epoch/shard in next load_dataset: "
+                f"epoch={epoch}/shard_epoch={self.shard_epoch}"
+            )
+
+    def _next_virtual_epoch(self, epoch):
+        index = self._get_epoch_start_index(epoch)
+        if index == 0 or self._random_global_indices is None:
+            # need to start from the beginning,
+            # so call super().set_epoch(epoch) to establish the global virtual indices
+            logger.info(
+                "establishing a new set of global virtual indices for "
+                f"epoch={epoch}/shard_epoch={self.shard_epoch}"
+            )
+            super().set_epoch(epoch)
+            self._next_global_indices(epoch)
+        else:
+            self._cur_epoch = epoch
+
+        # reset cache sizes and ordered_indices for the epoch after moving to a new epoch
+        self._clean_if_not_none(
+            [
+                self._epoch_sizes,
+            ]
+        )
+        self._epoch_sizes = None
+        self._current_epoch_start_index = index
diff --git a/fairseq/fairseq/data/multilingual/sampling_method.py b/fairseq/fairseq/data/multilingual/sampling_method.py
new file mode 100644
index 0000000000000000000000000000000000000000..140c68f01d60e902ef88f11f30f8813dc15fc681
--- /dev/null
+++ b/fairseq/fairseq/data/multilingual/sampling_method.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import List
+
+
+logger = logging.getLogger(__name__)
+
+
+def uniform(dataset_sizes: List[int]):
+    return [1.0] * len(dataset_sizes)
+
+
+def temperature_sampling(dataset_sizes, temp):
+    total_size = sum(dataset_sizes)
+    return [(size / total_size) ** (1.0 / temp) for size in dataset_sizes]
+
+
+def make_temperature_sampling(temp=1.0):
+    def sampling_func(dataset_sizes):
+        return temperature_sampling(dataset_sizes, temp)
+
+    return sampling_func
+
+
+def make_ratio_sampling(ratios):
+    def sampling_func(dataset_sizes):
+        return ratios
+
+    return sampling_func
+
+
+class SamplingMethod:
+    @staticmethod
+    def add_arguments(parser):
+        parser.add_argument(
+            "--sampling-method",
+            choices=[
+                "uniform",
+                "temperature",
+                "concat",
+                "RoundRobin",
+            ],
+            type=str,
+            default="concat",
+            help="The method to sample data per language pairs",
+        )
+        parser.add_argument(
+            "--sampling-temperature",
+            default=1.5,
+            type=float,
+            help="only work with --sampling-method temperature",
+        )
+
+    @staticmethod
+    def build_sampler(args, task):
+        return SamplingMethod(args, task)
+
+    def __init__(self, args, task):
+        self.args = args
+        self.task = task
+
+    def is_adaptive(self):
+        return False
+
+    def sampling_method_selector(self):
+        args = self.args
+        logger.info(f"selected sampler: {args.sampling_method}")
+        if args.sampling_method == "uniform":
+            return uniform
+        elif args.sampling_method == "temperature" or self.is_adaptive():
+            return make_temperature_sampling(float(args.sampling_temperature))
+        else:
+            # default to concating all data set together
+            return None
diff --git a/fairseq/fairseq/data/nested_dictionary_dataset.py b/fairseq/fairseq/data/nested_dictionary_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..52e74abddacc923c5e29b0a0c41d7efc85482d3b
--- /dev/null
+++ b/fairseq/fairseq/data/nested_dictionary_dataset.py
@@ -0,0 +1,125 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import OrderedDict
+
+import torch
+from torch.utils.data.dataloader import default_collate
+
+from . import FairseqDataset
+
+
+def _flatten(dico, prefix=None):
+    """Flatten a nested dictionary."""
+    new_dico = OrderedDict()
+    if isinstance(dico, dict):
+        prefix = prefix + "." if prefix is not None else ""
+        for k, v in dico.items():
+            if v is None:
+                continue
+            new_dico.update(_flatten(v, prefix + k))
+    elif isinstance(dico, list):
+        for i, v in enumerate(dico):
+            new_dico.update(_flatten(v, prefix + ".[" + str(i) + "]"))
+    else:
+        new_dico = OrderedDict({prefix: dico})
+    return new_dico
+
+
+def _unflatten(dico):
+    """Unflatten a flattened dictionary into a nested dictionary."""
+    new_dico = OrderedDict()
+    for full_k, v in dico.items():
+        full_k = full_k.split(".")
+        node = new_dico
+        for k in full_k[:-1]:
+            if k.startswith("[") and k.endswith("]"):
+                k = int(k[1:-1])
+            if k not in node:
+                node[k] = OrderedDict()
+            node = node[k]
+        node[full_k[-1]] = v
+    return new_dico
+
+
+class NestedDictionaryDataset(FairseqDataset):
+    def __init__(self, defn, sizes=None):
+        super().__init__()
+        self.defn = _flatten(defn)
+        self.sizes = [sizes] if not isinstance(sizes, (list, tuple)) else sizes
+
+        first = None
+        for v in self.defn.values():
+            if not isinstance(
+                v,
+                (
+                    FairseqDataset,
+                    torch.utils.data.Dataset,
+                ),
+            ):
+                raise ValueError("Expected Dataset but found: {}".format(v.__class__))
+            first = first or v
+            if len(v) > 0:
+                assert len(v) == len(first), "dataset lengths must match"
+
+        self._len = len(first)
+
+    def __getitem__(self, index):
+        return OrderedDict((k, ds[index]) for k, ds in self.defn.items())
+
+    def __len__(self):
+        return self._len
+
+    def collater(self, samples):
+        """Merge a list of samples to form a mini-batch.
+
+        Args:
+            samples (List[dict]): samples to collate
+
+        Returns:
+            dict: a mini-batch suitable for forwarding with a Model
+        """
+        if len(samples) == 0:
+            return {}
+        sample = OrderedDict()
+        for k, ds in self.defn.items():
+            try:
+                sample[k] = ds.collater([s[k] for s in samples])
+            except NotImplementedError:
+                sample[k] = default_collate([s[k] for s in samples])
+        return _unflatten(sample)
+
+    def num_tokens(self, index):
+        """Return the number of tokens in a sample. This value is used to
+        enforce ``--max-tokens`` during batching."""
+        return max(s[index] for s in self.sizes)
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        if len(self.sizes) == 1:
+            return self.sizes[0][index]
+        else:
+            return (s[index] for s in self.sizes)
+
+    @property
+    def supports_prefetch(self):
+        """Whether this dataset supports prefetching."""
+        return any(ds.supports_prefetch for ds in self.defn.values())
+
+    def prefetch(self, indices):
+        """Prefetch the data required for this epoch."""
+        for ds in self.defn.values():
+            if getattr(ds, "supports_prefetch", False):
+                ds.prefetch(indices)
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return all(ds.can_reuse_epoch_itr_across_epochs for ds in self.defn.values())
+
+    def set_epoch(self, epoch):
+        super().set_epoch(epoch)
+        for ds in self.defn.values():
+            ds.set_epoch(epoch)
diff --git a/fairseq/fairseq/data/noising.py b/fairseq/fairseq/data/noising.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b1cc347203bfbdc9f1cba29e2e36427b7b5be57
--- /dev/null
+++ b/fairseq/fairseq/data/noising.py
@@ -0,0 +1,335 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from fairseq.data import data_utils
+
+
+class WordNoising(object):
+    """Generate a noisy version of a sentence, without changing words themselves."""
+
+    def __init__(self, dictionary, bpe_cont_marker="@@", bpe_end_marker=None):
+        self.dictionary = dictionary
+        self.bpe_end = None
+        if bpe_cont_marker:
+            self.bpe_end = np.array(
+                [
+                    not self.dictionary[i].endswith(bpe_cont_marker)
+                    for i in range(len(self.dictionary))
+                ]
+            )
+        elif bpe_end_marker:
+            self.bpe_end = np.array(
+                [
+                    self.dictionary[i].endswith(bpe_end_marker)
+                    for i in range(len(self.dictionary))
+                ]
+            )
+
+        self.get_word_idx = (
+            self._get_bpe_word_idx if self.bpe_end is not None else self._get_token_idx
+        )
+
+    def noising(self, x, lengths, noising_prob=0.0):
+        raise NotImplementedError()
+
+    def _get_bpe_word_idx(self, x):
+        """
+        Given a list of BPE tokens, for every index in the tokens list,
+        return the index of the word grouping that it belongs to.
+        For example, for input x corresponding to ["how", "are", "y@@", "ou"],
+        return [[0], [1], [2], [2]].
+        """
+        # x: (T x B)
+        bpe_end = self.bpe_end[x]
+
+        if x.size(0) == 1 and x.size(1) == 1:
+            # Special case when we only have one word in x. If x = [[N]],
+            # bpe_end is a scalar (bool) instead of a 2-dim array of bools,
+            # which makes the sum operation below fail.
+            return np.array([[0]])
+
+        # do a reduce front sum to generate word ids
+        word_idx = bpe_end[::-1].cumsum(0)[::-1]
+        word_idx = word_idx.max(0)[None, :] - word_idx
+        return word_idx
+
+    def _get_token_idx(self, x):
+        """
+        This is to extend noising functions to be able to apply to non-bpe
+        tokens, e.g. word or characters.
+        """
+        x = torch.t(x)
+        word_idx = np.array([range(len(x_i)) for x_i in x])
+        return np.transpose(word_idx)
+
+
+class WordDropout(WordNoising):
+    """Randomly drop input words. If not passing blank_idx (default is None),
+    then dropped words will be removed. Otherwise, it will be replaced by the
+    blank_idx."""
+
+    def __init__(
+        self,
+        dictionary,
+        default_dropout_prob=0.1,
+        bpe_cont_marker="@@",
+        bpe_end_marker=None,
+    ):
+        super().__init__(dictionary, bpe_cont_marker, bpe_end_marker)
+        self.default_dropout_prob = default_dropout_prob
+
+    def noising(self, x, lengths, dropout_prob=None, blank_idx=None):
+        if dropout_prob is None:
+            dropout_prob = self.default_dropout_prob
+        # x: (T x B), lengths: B
+        if dropout_prob == 0:
+            return x, lengths
+
+        assert 0 < dropout_prob < 1
+
+        # be sure to drop entire words
+        word_idx = self.get_word_idx(x)
+        sentences = []
+        modified_lengths = []
+        for i in range(lengths.size(0)):
+            # Since dropout probabilities need to apply over non-pad tokens,
+            # it is not trivial to generate the keep mask without consider
+            # input lengths; otherwise, this could be done outside the loop
+
+            # We want to drop whole words based on word_idx grouping
+            num_words = max(word_idx[:, i]) + 1
+
+            # ith example: [x0, x1, ..., eos, pad, ..., pad]
+            # We should only generate keep probs for non-EOS tokens. Thus if the
+            # input sentence ends in EOS, the last word idx is not included in
+            # the dropout mask generation and we append True to always keep EOS.
+            # Otherwise, just generate the dropout mask for all word idx
+            # positions.
+            has_eos = x[lengths[i] - 1, i] == self.dictionary.eos()
+            if has_eos:  # has eos?
+                keep = np.random.rand(num_words - 1) >= dropout_prob
+                keep = np.append(keep, [True])  # keep EOS symbol
+            else:
+                keep = np.random.rand(num_words) >= dropout_prob
+
+            words = x[: lengths[i], i].tolist()
+
+            # TODO: speed up the following loop
+            # drop words from the input according to keep
+            new_s = [
+                w if keep[word_idx[j, i]] else blank_idx for j, w in enumerate(words)
+            ]
+            new_s = [w for w in new_s if w is not None]
+            # we need to have at least one word in the sentence (more than the
+            # start / end sentence symbols)
+            if len(new_s) <= 1:
+                # insert at beginning in case the only token left is EOS
+                # EOS should be at end of list.
+                new_s.insert(0, words[np.random.randint(0, len(words))])
+            assert len(new_s) >= 1 and (
+                not has_eos  # Either don't have EOS at end or last token is EOS
+                or (len(new_s) >= 2 and new_s[-1] == self.dictionary.eos())
+            ), "New sentence is invalid."
+            sentences.append(new_s)
+            modified_lengths.append(len(new_s))
+        # re-construct input
+        modified_lengths = torch.LongTensor(modified_lengths)
+        modified_x = torch.LongTensor(
+            modified_lengths.max(), modified_lengths.size(0)
+        ).fill_(self.dictionary.pad())
+        for i in range(modified_lengths.size(0)):
+            modified_x[: modified_lengths[i], i].copy_(torch.LongTensor(sentences[i]))
+
+        return modified_x, modified_lengths
+
+
+class WordShuffle(WordNoising):
+    """Shuffle words by no more than k positions."""
+
+    def __init__(
+        self,
+        dictionary,
+        default_max_shuffle_distance=3,
+        bpe_cont_marker="@@",
+        bpe_end_marker=None,
+    ):
+        super().__init__(dictionary, bpe_cont_marker, bpe_end_marker)
+        self.default_max_shuffle_distance = 3
+
+    def noising(self, x, lengths, max_shuffle_distance=None):
+        if max_shuffle_distance is None:
+            max_shuffle_distance = self.default_max_shuffle_distance
+        # x: (T x B), lengths: B
+        if max_shuffle_distance == 0:
+            return x, lengths
+
+        # max_shuffle_distance < 1 will return the same sequence
+        assert max_shuffle_distance > 1
+
+        # define noise word scores
+        noise = np.random.uniform(
+            0,
+            max_shuffle_distance,
+            size=(x.size(0), x.size(1)),
+        )
+        noise[0] = -1  # do not move start sentence symbol
+        # be sure to shuffle entire words
+        word_idx = self.get_word_idx(x)
+        x2 = x.clone()
+        for i in range(lengths.size(0)):
+            length_no_eos = lengths[i]
+            if x[lengths[i] - 1, i] == self.dictionary.eos():
+                length_no_eos = lengths[i] - 1
+            # generate a random permutation
+            scores = word_idx[:length_no_eos, i] + noise[word_idx[:length_no_eos, i], i]
+            # ensure no reordering inside a word
+            scores += 1e-6 * np.arange(length_no_eos.item())
+            permutation = scores.argsort()
+            # shuffle words
+            x2[:length_no_eos, i].copy_(
+                x2[:length_no_eos, i][torch.from_numpy(permutation)]
+            )
+        return x2, lengths
+
+
+class UnsupervisedMTNoising(WordNoising):
+    """
+    Implements the default configuration for noising in UnsupervisedMT
+    (github.com/facebookresearch/UnsupervisedMT)
+    """
+
+    def __init__(
+        self,
+        dictionary,
+        max_word_shuffle_distance,
+        word_dropout_prob,
+        word_blanking_prob,
+        bpe_cont_marker="@@",
+        bpe_end_marker=None,
+    ):
+        super().__init__(dictionary)
+        self.max_word_shuffle_distance = max_word_shuffle_distance
+        self.word_dropout_prob = word_dropout_prob
+        self.word_blanking_prob = word_blanking_prob
+
+        self.word_dropout = WordDropout(
+            dictionary=dictionary,
+            bpe_cont_marker=bpe_cont_marker,
+            bpe_end_marker=bpe_end_marker,
+        )
+        self.word_shuffle = WordShuffle(
+            dictionary=dictionary,
+            bpe_cont_marker=bpe_cont_marker,
+            bpe_end_marker=bpe_end_marker,
+        )
+
+    def noising(self, x, lengths):
+        # 1. Word Shuffle
+        noisy_src_tokens, noisy_src_lengths = self.word_shuffle.noising(
+            x=x,
+            lengths=lengths,
+            max_shuffle_distance=self.max_word_shuffle_distance,
+        )
+        # 2. Word Dropout
+        noisy_src_tokens, noisy_src_lengths = self.word_dropout.noising(
+            x=noisy_src_tokens,
+            lengths=noisy_src_lengths,
+            dropout_prob=self.word_dropout_prob,
+        )
+        # 3. Word Blanking
+        noisy_src_tokens, noisy_src_lengths = self.word_dropout.noising(
+            x=noisy_src_tokens,
+            lengths=noisy_src_lengths,
+            dropout_prob=self.word_blanking_prob,
+            blank_idx=self.dictionary.unk(),
+        )
+
+        return noisy_src_tokens
+
+
+class NoisingDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        src_dataset,
+        src_dict,
+        seed,
+        noiser=None,
+        noising_class=UnsupervisedMTNoising,
+        **kwargs
+    ):
+        """
+        Wrap a :class:`~torch.utils.data.Dataset` and apply noise to the
+        samples based on the supplied noising configuration.
+
+        Args:
+            src_dataset (~torch.utils.data.Dataset): dataset to wrap.
+                to build self.src_dataset --
+                a LanguagePairDataset with src dataset as the source dataset and
+                None as the target dataset. Should NOT have padding so that
+                src_lengths are accurately calculated by language_pair_dataset
+                collate function.
+                We use language_pair_dataset here to encapsulate the tgt_dataset
+                so we can re-use the LanguagePairDataset collater to format the
+                batches in the structure that SequenceGenerator expects.
+            src_dict (~fairseq.data.Dictionary): source dictionary
+            seed (int): seed to use when generating random noise
+            noiser (WordNoising): a pre-initialized :class:`WordNoising`
+                instance. If this is None, a new instance will be created using
+                *noising_class* and *kwargs*.
+            noising_class (class, optional): class to use to initialize a
+                default :class:`WordNoising` instance.
+            kwargs (dict, optional): arguments to initialize the default
+                :class:`WordNoising` instance given by *noiser*.
+        """
+        self.src_dataset = src_dataset
+        self.src_dict = src_dict
+        self.seed = seed
+        self.noiser = (
+            noiser
+            if noiser is not None
+            else noising_class(
+                dictionary=src_dict,
+                **kwargs,
+            )
+        )
+        self.sizes = src_dataset.sizes
+
+
+    def __getitem__(self, index):
+        """
+        Returns a single noisy sample. Multiple samples are fed to the collater
+        create a noising dataset batch.
+        """
+        src_tokens = self.src_dataset[index]
+        src_lengths = torch.LongTensor([len(src_tokens)])
+        src_tokens = src_tokens.unsqueeze(0)
+
+        # Transpose src tokens to fit expected shape of x in noising function
+        # (batch size, sequence length) -> (sequence length, batch size)
+        src_tokens_t = torch.t(src_tokens)
+
+        with data_utils.numpy_seed(self.seed + index):
+            noisy_src_tokens = self.noiser.noising(src_tokens_t, src_lengths)
+
+        # Transpose back to expected src_tokens format
+        # (sequence length, 1) -> (1, sequence length)
+        noisy_src_tokens = torch.t(noisy_src_tokens)
+        return noisy_src_tokens[0]
+
+    def __len__(self):
+        """
+        The length of the noising dataset is the length of src.
+        """
+        return len(self.src_dataset)
+
+    @property
+    def supports_prefetch(self):
+        return self.src_dataset.supports_prefetch
+
+    def prefetch(self, indices):
+        if self.src_dataset.supports_prefetch:
+            self.src_dataset.prefetch(indices)
diff --git a/fairseq/fairseq/data/num_samples_dataset.py b/fairseq/fairseq/data/num_samples_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..99a17495c701d8a05e0268f98bf453905e11d078
--- /dev/null
+++ b/fairseq/fairseq/data/num_samples_dataset.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import FairseqDataset
+
+
+class NumSamplesDataset(FairseqDataset):
+    def __getitem__(self, index):
+        return 1
+
+    def __len__(self):
+        return 0
+
+    def collater(self, samples):
+        return sum(samples)
diff --git a/fairseq/fairseq/data/numel_dataset.py b/fairseq/fairseq/data/numel_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac86dfd2f1d89055de909656d61d6aca85523f00
--- /dev/null
+++ b/fairseq/fairseq/data/numel_dataset.py
@@ -0,0 +1,31 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+
+from . import BaseWrapperDataset
+
+
+class NumelDataset(BaseWrapperDataset):
+    def __init__(self, dataset, reduce=False):
+        super().__init__(dataset)
+        self.reduce = reduce
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        if torch.is_tensor(item):
+            return torch.numel(item)
+        else:
+            return np.size(item)
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def collater(self, samples):
+        if self.reduce:
+            return sum(samples)
+        else:
+            return torch.tensor(samples)
diff --git a/fairseq/fairseq/data/offset_tokens_dataset.py b/fairseq/fairseq/data/offset_tokens_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fabbdcdaa1a8f70d8d8c07db4cd53754503c194
--- /dev/null
+++ b/fairseq/fairseq/data/offset_tokens_dataset.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import BaseWrapperDataset
+
+
+class OffsetTokensDataset(BaseWrapperDataset):
+    def __init__(self, dataset, offset):
+        super().__init__(dataset)
+        self.offset = offset
+
+    def __getitem__(self, idx):
+        return self.dataset[idx] + self.offset
diff --git a/fairseq/fairseq/data/pad_dataset.py b/fairseq/fairseq/data/pad_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8075bba6a9efc5f8421368ee0b2ae66afe3f5009
--- /dev/null
+++ b/fairseq/fairseq/data/pad_dataset.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.data import data_utils
+
+from . import BaseWrapperDataset
+
+
+class PadDataset(BaseWrapperDataset):
+    def __init__(self, dataset, pad_idx, left_pad):
+        super().__init__(dataset)
+        self.pad_idx = pad_idx
+        self.left_pad = left_pad
+
+    def collater(self, samples):
+        return data_utils.collate_tokens(samples, self.pad_idx, left_pad=self.left_pad)
+
+
+class LeftPadDataset(PadDataset):
+    def __init__(self, dataset, pad_idx):
+        super().__init__(dataset, pad_idx, left_pad=True)
+
+
+class RightPadDataset(PadDataset):
+    def __init__(self, dataset, pad_idx):
+        super().__init__(dataset, pad_idx, left_pad=False)
diff --git a/fairseq/fairseq/data/plasma_utils.py b/fairseq/fairseq/data/plasma_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9fab3b739db46b685fa6859a2f851a14eef8407
--- /dev/null
+++ b/fairseq/fairseq/data/plasma_utils.py
@@ -0,0 +1,197 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import subprocess
+import json
+import tempfile
+import hashlib
+from typing import Hashable
+
+try:
+    import pyarrow.plasma as plasma
+
+    PYARROW_AVAILABLE = True
+except ImportError:
+    plasma = None
+    PYARROW_AVAILABLE = False
+
+
+class PlasmaArray:
+    """
+    Wrapper around numpy arrays that automatically moves the data to shared
+    memory upon serialization. This is particularly helpful when passing numpy
+    arrays through multiprocessing, so that data is not unnecessarily
+    duplicated or pickled.
+    """
+
+    def __init__(self, array):
+        super().__init__()
+        self.array = array
+        self.disable = array.nbytes < 134217728  # disable for arrays <128MB
+        self.object_id = None
+        self.path = None
+
+        # variables with underscores shouldn't be pickled
+        self._client = None
+        self._server = None
+        self._server_tmp = None
+        self._plasma = None
+
+    @property
+    def plasma(self):
+        if self._plasma is None and not self.disable:
+            self._plasma = plasma
+        return self._plasma
+
+    def start_server(self):
+        if self.plasma is None or self._server is not None:
+            return
+        assert self.object_id is None
+        assert self.path is None
+        self._server_tmp = tempfile.NamedTemporaryFile()
+        self.path = self._server_tmp.name
+        self._server = subprocess.Popen(
+            ["plasma_store", "-m", str(int(1.05 * self.array.nbytes)), "-s", self.path]
+        )
+
+    @property
+    def client(self):
+        if self._client is None:
+            assert self.path is not None
+            self._client = self.plasma.connect(self.path, num_retries=200)
+        return self._client
+
+    def __getstate__(self):
+        """Called on pickle load"""
+        if self.plasma is None:
+            return self.__dict__
+        if self.object_id is None:
+            self.start_server()
+            self.object_id = self.client.put(self.array)
+        state = self.__dict__.copy()
+        del state["array"]
+        state["_client"] = None
+        state["_server"] = None
+        state["_server_tmp"] = None
+        state["_plasma"] = None
+        return state
+
+    def __setstate__(self, state):
+        """Called on pickle save"""
+        self.__dict__.update(state)
+        if self.plasma is None:
+            return
+        self.array = self.client.get(self.object_id)
+
+    def __del__(self):
+        if self._server is not None:
+            self._server.kill()
+            self._server = None
+            self._server_tmp.close()
+            self._server_tmp = None
+
+
+DEFAULT_PLASMA_PATH = "/tmp/plasma"
+
+
+class PlasmaView:
+    """Interface to write and read from shared memory. Whereas PlasmaArray writes to plasma on serialization,
+    PlasmaView writes to shared memory on instantiation."""
+
+    def __init__(self, array, split_path: str, hash_data: Hashable, plasma_path=None):
+        """
+        Args:
+            array: numpy array to store. This can be read with ``PlasmaView().array``
+            split_path: the path whence the data was read, used for hashing
+            hash_data: other metadata about the array that can be used to create a unique key.
+                as of writing, the 3 callers in ``TokenBlockDataset`` use::
+
+                    hash_data = ((block_size, document_sep_len, str(break_mode), len(dataset)), 0|1|2)
+
+
+        """
+        assert PYARROW_AVAILABLE
+        assert split_path is not None
+        if plasma_path is None:
+            plasma_path = DEFAULT_PLASMA_PATH
+
+        self.path = plasma_path
+        self.split_path = split_path
+        self._client = None  # Initialize lazily for pickle. plasma clients should not be deep copied or serialized.
+        self._n = None
+
+        self.object_id = self.get_object_id(self.split_path, hash_data)
+        try:
+            self.client.put(array, object_id=self.object_id)
+        except plasma.PlasmaObjectExists:
+            pass
+
+    @property
+    def client(self):
+        if self._client is None:
+            self._client = plasma.connect(self.path, num_retries=200)
+        return self._client
+
+    @property
+    def array(self):
+        """Fetch a read only view of an np.array, stored in plasma."""
+        ret = self.client.get(self.object_id)
+        return ret
+
+    @staticmethod
+    def get_object_id(split_path: str, hash_data: Hashable):
+        """Returns plasma.ObjectID from hashing split_path and object_num."""
+        hash = hashlib.blake2b(bytes(split_path, "utf-8"), digest_size=20)
+        harg = json.dumps(hash_data).encode("utf-8")
+        hash.update(harg)
+        return plasma.ObjectID(hash.digest())
+
+    def __getstate__(self):
+        """Called on pickle save"""
+        self.disconnect()
+        state = self.__dict__.copy()
+        assert state["_client"] is None
+        assert "object_id" in state
+        return state
+
+    def __setstate__(self, state):
+        """Called on pickle load"""
+        self.__dict__.update(state)
+
+    def __del__(self):
+        self.disconnect()
+
+    def disconnect(self):
+        if self._client is not None:
+            self._client.disconnect()
+            self._client = None
+
+    def __len__(self):
+        """Save reads by caching len"""
+        if self._n is None:
+            self._n = len(self.array)
+        return self._n
+
+
+GB100 = (1024 ** 3) * 100
+
+
+class PlasmaStore:
+    def __init__(self, path=DEFAULT_PLASMA_PATH, nbytes: int = GB100):
+
+        self.server = self.start(path, nbytes)
+
+    def __del__(self):
+        self.server.kill()
+
+    @staticmethod
+    def start(path=DEFAULT_PLASMA_PATH, nbytes: int = GB100) -> subprocess.Popen:
+        if not PYARROW_AVAILABLE:
+            raise ImportError("please run pip install pyarrow to use --use_plasma_view")
+        # best practice is to allocate more space than we need. The limitation seems to be the size of /dev/shm
+        _server = subprocess.Popen(["plasma_store", "-m", str(nbytes), "-s", path])
+        plasma.connect(path, num_retries=200)  # If we can't connect we fail immediately
+        return _server
diff --git a/fairseq/fairseq/data/prepend_dataset.py b/fairseq/fairseq/data/prepend_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad74784d2d7920e4a6225282d95543ce16ea50d9
--- /dev/null
+++ b/fairseq/fairseq/data/prepend_dataset.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+
+from . import BaseWrapperDataset
+
+
+class PrependDataset(BaseWrapperDataset):
+    def __init__(self, dataset, prepend_getter, ensure_first_token_is=None):
+        super().__init__(dataset)
+        self.prepend_getter = prepend_getter
+        self.ensure_first_token = ensure_first_token_is
+
+    def __getitem__(self, idx):
+        item = self.dataset[idx]
+        is_tuple = isinstance(item, tuple)
+        src = item[0] if is_tuple else item
+
+        assert self.ensure_first_token is None or src[0] == self.ensure_first_token
+        prepend_idx = self.prepend_getter(self.dataset, idx)
+        assert isinstance(prepend_idx, int)
+        src[0] = prepend_idx
+        item = tuple((src,) + item[1:]) if is_tuple else src
+        return item
diff --git a/fairseq/fairseq/data/prepend_token_dataset.py b/fairseq/fairseq/data/prepend_token_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd1331f4c44c1595eb9bb78baa0cf5cf3bcce9ad
--- /dev/null
+++ b/fairseq/fairseq/data/prepend_token_dataset.py
@@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+
+from . import BaseWrapperDataset
+
+
+class PrependTokenDataset(BaseWrapperDataset):
+    def __init__(self, dataset, token=None):
+        super().__init__(dataset)
+        self.token = token
+        if token is not None:
+            self._sizes = np.array(dataset.sizes) + 1
+        else:
+            self._sizes = dataset.sizes
+
+    def __getitem__(self, idx):
+        item = self.dataset[idx]
+        if self.token is not None:
+            item = torch.cat([item.new([self.token]), item])
+        return item
+
+    @property
+    def sizes(self):
+        return self._sizes
+
+    def num_tokens(self, index):
+        n = self.dataset.num_tokens(index)
+        if self.token is not None:
+            n += 1
+        return n
+
+    def size(self, index):
+        n = self.dataset.size(index)
+        if self.token is not None:
+            n += 1
+        return n
diff --git a/fairseq/fairseq/data/raw_label_dataset.py b/fairseq/fairseq/data/raw_label_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d054904f419bd64855d33a2a770b43f671c7c8d8
--- /dev/null
+++ b/fairseq/fairseq/data/raw_label_dataset.py
@@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import FairseqDataset
+
+
+class RawLabelDataset(FairseqDataset):
+    def __init__(self, labels):
+        super().__init__()
+        self.labels = labels
+
+    def __getitem__(self, index):
+        return self.labels[index]
+
+    def __len__(self):
+        return len(self.labels)
+
+    def collater(self, samples):
+        return torch.tensor(samples)
diff --git a/fairseq/fairseq/data/replace_dataset.py b/fairseq/fairseq/data/replace_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5aac2ba96bee0a8bb65f4c9e56fa0b17248ee1d9
--- /dev/null
+++ b/fairseq/fairseq/data/replace_dataset.py
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import BaseWrapperDataset
+
+
+class ReplaceDataset(BaseWrapperDataset):
+    """Replaces tokens found in the dataset by a specified replacement token
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset to replace tokens in
+        replace_map(Dictionary[int,int]): map of token to replace -> replacement token
+        offsets (List[int]): do not replace tokens before (from left if pos, right if neg) this offset. should be
+        as many as the number of objects returned by the underlying dataset __getitem__ method.
+    """
+
+    def __init__(self, dataset, replace_map, offsets):
+        super().__init__(dataset)
+        assert len(replace_map) > 0
+        self.replace_map = replace_map
+        self.offsets = offsets
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        is_tuple = isinstance(item, tuple)
+        srcs = item if is_tuple else [item]
+
+        for offset, src in zip(self.offsets, srcs):
+            for k, v in self.replace_map.items():
+                src_off = src[offset:] if offset >= 0 else src[:offset]
+                src_off.masked_fill_(src_off == k, v)
+
+        item = srcs if is_tuple else srcs[0]
+        return item
diff --git a/fairseq/fairseq/data/resampling_dataset.py b/fairseq/fairseq/data/resampling_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d3b993164dc3962df48bacff26714328e843e80
--- /dev/null
+++ b/fairseq/fairseq/data/resampling_dataset.py
@@ -0,0 +1,139 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import numpy as np
+from fairseq.data import BaseWrapperDataset, plasma_utils
+
+
+logger = logging.getLogger(__name__)
+
+
+class ResamplingDataset(BaseWrapperDataset):
+    """Randomly samples from a given dataset at each epoch.
+
+    Sampling is done with or without replacement, depending on the "replace"
+    parameter.
+
+    Optionally, the epoch size can be rescaled. This is potentially desirable
+    to increase per-epoch coverage of the base dataset (since sampling with
+    replacement means that many items in the dataset will be left out). In the
+    case of sampling without replacement, size_ratio should be strictly less
+    than 1.
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset on which to sample.
+        weights (List[float]): list of probability weights
+            (default: None, which corresponds to uniform sampling).
+        replace (bool): sampling mode; True for "with replacement", or False
+            for "without replacement" (default: True)
+        size_ratio (float): the ratio to subsample to; must be positive
+            (default: 1.0).
+        batch_by_size (bool): whether or not to batch by sequence length
+            (default: True).
+        seed (int): RNG seed to use (default: 0).
+        epoch (int): starting epoch number (default: 1).
+    """
+
+    def __init__(
+        self,
+        dataset,
+        weights=None,
+        replace=True,
+        size_ratio=1.0,
+        batch_by_size=True,
+        seed=0,
+        epoch=1,
+    ):
+        super().__init__(dataset)
+
+        if weights is None:
+            self.weights = None
+
+        else:
+            assert len(weights) == len(dataset)
+            weights_arr = np.array(weights, dtype=np.float64)
+            weights_arr /= weights_arr.sum()
+            self.weights = plasma_utils.PlasmaArray(weights_arr)
+
+        self.replace = replace
+
+        assert size_ratio > 0.0
+        if not self.replace:
+            assert size_ratio < 1.0
+        self.size_ratio = float(size_ratio)
+        self.actual_size = np.ceil(len(dataset) * self.size_ratio).astype(int)
+
+        self.batch_by_size = batch_by_size
+        self.seed = seed
+
+        self._cur_epoch = None
+        self._cur_indices = None
+
+        self.set_epoch(epoch)
+
+    def __getitem__(self, index):
+        return self.dataset[self._cur_indices.array[index]]
+
+    def __len__(self):
+        return self.actual_size
+
+    @property
+    def sizes(self):
+        if isinstance(self.dataset.sizes, list):
+            return [s[self._cur_indices.array] for s in self.dataset.sizes]
+        return self.dataset.sizes[self._cur_indices.array]
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(self._cur_indices.array[index])
+
+    def size(self, index):
+        return self.dataset.size(self._cur_indices.array[index])
+
+    def ordered_indices(self):
+        if self.batch_by_size:
+            order = [
+                np.arange(len(self)),
+                self.sizes,
+            ]  # No need to handle `self.shuffle == True`
+            return np.lexsort(order)
+        else:
+            return np.arange(len(self))
+
+    def prefetch(self, indices):
+        self.dataset.prefetch(self._cur_indices.array[indices])
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return False
+
+    def set_epoch(self, epoch):
+        logger.debug("ResamplingDataset.set_epoch: {}".format(epoch))
+        super().set_epoch(epoch)
+
+        if epoch == self._cur_epoch:
+            return
+
+        self._cur_epoch = epoch
+
+        # Generate a weighted sample of indices as a function of the
+        # random seed and the current epoch.
+
+        rng = np.random.RandomState(
+            [
+                42,  # magic number
+                self.seed % (2 ** 32),  # global seed
+                self._cur_epoch,  # epoch index
+            ]
+        )
+        self._cur_indices = plasma_utils.PlasmaArray(
+            rng.choice(
+                len(self.dataset),
+                self.actual_size,
+                replace=self.replace,
+                p=(None if self.weights is None else self.weights.array),
+            )
+        )
diff --git a/fairseq/fairseq/data/roll_dataset.py b/fairseq/fairseq/data/roll_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2915eeb3e8fb4dfb4b2bb33e0464ad0783d854c
--- /dev/null
+++ b/fairseq/fairseq/data/roll_dataset.py
@@ -0,0 +1,18 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import BaseWrapperDataset
+
+
+class RollDataset(BaseWrapperDataset):
+    def __init__(self, dataset, shifts):
+        super().__init__(dataset)
+        self.shifts = shifts
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        return torch.roll(item, self.shifts)
diff --git a/fairseq/fairseq/data/round_robin_zip_datasets.py b/fairseq/fairseq/data/round_robin_zip_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb7447ea955a7c3ae7372f09ee426c08acd430e
--- /dev/null
+++ b/fairseq/fairseq/data/round_robin_zip_datasets.py
@@ -0,0 +1,160 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from collections import OrderedDict
+from typing import Dict, Sequence
+
+import numpy as np
+
+from . import FairseqDataset, LanguagePairDataset
+
+logger = logging.getLogger(__name__)
+
+
+class RoundRobinZipDatasets(FairseqDataset):
+    """Zip multiple :class:`~fairseq.data.FairseqDataset` instances together.
+
+    Shorter datasets are repeated in a round-robin fashion to match the length
+    of the longest one.
+
+    Args:
+        datasets (Dict[~fairseq.data.FairseqDataset]): a dictionary of
+            :class:`~fairseq.data.FairseqDataset` instances.
+        eval_key (str, optional): a key used at evaluation time that causes
+            this instance to pass-through batches from *datasets[eval_key]*.
+    """
+
+    def __init__(self, datasets, eval_key=None):
+        super().__init__()
+        if isinstance(datasets, dict):
+            datasets = OrderedDict(datasets)
+        assert isinstance(datasets, OrderedDict)
+        assert datasets, "Can't make a RoundRobinZipDatasets out of nothing"
+        for dataset in datasets.values():
+            assert isinstance(dataset, FairseqDataset)
+
+        self.datasets = datasets
+        self.eval_key = eval_key
+
+        self.longest_dataset_key = max(datasets, key=lambda k: len(datasets[k]))
+        self.longest_dataset = datasets[self.longest_dataset_key]
+        self._ordered_indices: Dict[str, Sequence[int]] = None
+
+    def _map_index(self, key, index):
+        assert (
+            self._ordered_indices is not None
+        ), "Must call RoundRobinZipDatasets.ordered_indices() first"
+        o = self._ordered_indices[key]
+        return o[index % len(o)]
+
+    def __getitem__(self, index):
+        if self.eval_key is None:
+            return OrderedDict(
+                [
+                    (key, dataset[self._map_index(key, index)])
+                    for key, dataset in self.datasets.items()
+                ]
+            )
+        else:
+            # at evaluation time it's useful to pass-through batches from a single key
+            return self.datasets[self.eval_key][self._map_index(self.eval_key, index)]
+
+    def __len__(self):
+        if self._ordered_indices is not None:
+            return len(self._ordered_indices[self.longest_dataset_key])
+        return len(self.longest_dataset)
+
+    def collater(self, samples):
+        """Merge a list of samples to form a mini-batch."""
+        if len(samples) == 0:
+            return None
+        if self.eval_key is None:
+            return OrderedDict(
+                [
+                    (key, dataset.collater([sample[key] for sample in samples]))
+                    for key, dataset in self.datasets.items()
+                ]
+            )
+        else:
+            # at evaluation time it's useful to pass-through batches from a single key
+            return self.datasets[self.eval_key].collater(samples)
+
+    def num_tokens(self, index):
+        """Return an example's length (number of tokens), used for batching."""
+        # TODO make it configurable whether to use max() or sum() here
+        return max(
+            dataset.num_tokens(self._map_index(key, index))
+            for key, dataset in self.datasets.items()
+        )
+
+    def size(self, index):
+        """Return an example's size as a float or tuple. This value is used when
+        filtering a dataset with ``--max-positions``."""
+        return {
+            key: dataset.size(self._map_index(key, index))
+            for key, dataset in self.datasets.items()
+        }
+
+    def ordered_indices(self):
+        """Ordered indices for batching."""
+        if self._ordered_indices is None:
+            # Call the underlying dataset's ordered_indices() here, so that we
+            # get the same random ordering as we would have from using the
+            # underlying sub-datasets directly.
+            self._ordered_indices = OrderedDict(
+                [
+                    (key, dataset.ordered_indices())
+                    for key, dataset in self.datasets.items()
+                ]
+            )
+        return np.arange(len(self))
+
+    def filter_indices_by_size(self, indices, max_positions=None):
+        """
+        Filter each sub-dataset independently, then update the round robin to work
+        on the filtered sub-datasets.
+        """
+
+        def _deep_until_language_pair(dataset):
+            if isinstance(dataset, LanguagePairDataset):
+                return dataset
+            if hasattr(dataset, "tgt_dataset"):
+                return _deep_until_language_pair(dataset.tgt_dataset)
+            if hasattr(dataset, "dataset"):
+                return _deep_until_language_pair(dataset.dataset)
+            raise Exception(f"Don't know how to unwrap this dataset: {dataset}")
+
+        if not isinstance(max_positions, dict):
+            max_positions = {k: max_positions for k in self.datasets.keys()}
+        ignored_some = False
+        for key, dataset in self.datasets.items():
+            dataset = _deep_until_language_pair(dataset)
+            self._ordered_indices[key], ignored = dataset.filter_indices_by_size(
+                self._ordered_indices[key], max_positions[key]
+            )
+            if len(ignored) > 0:
+                ignored_some = True
+                logger.warning(
+                    f"{len(ignored)} samples from {key} have invalid sizes and will be skipped, "
+                    f"max_positions={max_positions[key]}, first few sample ids={ignored[:10]}"
+                )
+        # Since we are modifying in place the _ordered_indices,
+        # it's not possible anymore to return valid ignored indices.
+        # Hopefully the extra debug information print above should be enough to debug.
+        # Ideally we would receive ignore_invalid_inputs so that we could have
+        # a proper error message.
+        return (np.arange(len(self)), [0] if ignored_some else [])
+
+    @property
+    def supports_prefetch(self):
+        return all(
+            getattr(dataset, "supports_prefetch", False)
+            for dataset in self.datasets.values()
+        )
+
+    def prefetch(self, indices):
+        for key, dataset in self.datasets.items():
+            dataset.prefetch([self._map_index(key, index) for index in indices])
diff --git a/fairseq/fairseq/data/shorten_dataset.py b/fairseq/fairseq/data/shorten_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ebb5d88feb3f29d1512a0873df304915d051209
--- /dev/null
+++ b/fairseq/fairseq/data/shorten_dataset.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+from fairseq.data import data_utils
+
+from . import BaseWrapperDataset
+
+
+class TruncateDataset(BaseWrapperDataset):
+    """Truncate a sequence by returning the first truncation_length tokens"""
+
+    def __init__(self, dataset, truncation_length):
+        super().__init__(dataset)
+        assert truncation_length is not None
+        self.truncation_length = truncation_length
+        self.dataset = dataset
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        item_len = item.size(0)
+        if item_len > self.truncation_length:
+            item = item[: self.truncation_length]
+        return item
+
+    @property
+    def sizes(self):
+        return np.minimum(self.dataset.sizes, self.truncation_length)
+
+    def __len__(self):
+        return len(self.dataset)
+
+
+class RandomCropDataset(TruncateDataset):
+    """Truncate a sequence by returning a random crop of truncation_length tokens"""
+
+    def __init__(self, dataset, truncation_length, seed=1):
+        super().__init__(dataset, truncation_length)
+        self.seed = seed
+        self.epoch = 0
+
+    @property
+    def can_reuse_epoch_itr_across_epochs(self):
+        return True  # only the crop changes, not item sizes
+
+    def set_epoch(self, epoch, **unused):
+        super().set_epoch(epoch)
+        self.epoch = epoch
+
+    def __getitem__(self, index):
+        with data_utils.numpy_seed(self.seed, self.epoch, index):
+            item = self.dataset[index]
+            item_len = item.size(0)
+            excess = item_len - self.truncation_length
+            if excess > 0:
+                start_idx = np.random.randint(0, excess)
+                item = item[start_idx : start_idx + self.truncation_length]
+            return item
+
+
+def maybe_shorten_dataset(
+    dataset,
+    split,
+    shorten_data_split_list,
+    shorten_method,
+    tokens_per_sample,
+    seed,
+):
+    truncate_split = (
+        split in shorten_data_split_list.split(",") or len(shorten_data_split_list) == 0
+    )
+    if shorten_method == "truncate" and truncate_split:
+        dataset = TruncateDataset(dataset, tokens_per_sample)
+    elif shorten_method == "random_crop" and truncate_split:
+        dataset = RandomCropDataset(dataset, tokens_per_sample, seed)
+    return dataset
diff --git a/fairseq/fairseq/data/sort_dataset.py b/fairseq/fairseq/data/sort_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3890e7279e1f26db2e48ec0a91c639e9299d60f
--- /dev/null
+++ b/fairseq/fairseq/data/sort_dataset.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+
+from . import BaseWrapperDataset
+
+
+class SortDataset(BaseWrapperDataset):
+    def __init__(self, dataset, sort_order):
+        super().__init__(dataset)
+        if not isinstance(sort_order, (list, tuple)):
+            sort_order = [sort_order]
+        self.sort_order = sort_order
+
+        assert all(len(so) == len(dataset) for so in sort_order)
+
+    def ordered_indices(self):
+        return np.lexsort(self.sort_order)
diff --git a/fairseq/fairseq/data/strip_token_dataset.py b/fairseq/fairseq/data/strip_token_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..cae39ba4d2f8106398eccd7eb0cf5c2194ec0db5
--- /dev/null
+++ b/fairseq/fairseq/data/strip_token_dataset.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import BaseWrapperDataset
+
+
+class StripTokenDataset(BaseWrapperDataset):
+    def __init__(self, dataset, id_to_strip):
+        super().__init__(dataset)
+        self.id_to_strip = id_to_strip
+
+    def __getitem__(self, index):
+        item = self.dataset[index]
+        while len(item) > 0 and item[-1] == self.id_to_strip:
+            item = item[:-1]
+        while len(item) > 0 and item[0] == self.id_to_strip:
+            item = item[1:]
+        return item
diff --git a/fairseq/fairseq/data/subsample_dataset.py b/fairseq/fairseq/data/subsample_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..48feaf883f87dc95f8637c24d3c96f3f9fd8bd1d
--- /dev/null
+++ b/fairseq/fairseq/data/subsample_dataset.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import numpy as np
+
+from . import BaseWrapperDataset
+
+
+logger = logging.getLogger(__name__)
+
+
+class SubsampleDataset(BaseWrapperDataset):
+    """Subsamples a given dataset by a specified ratio. Subsampling is done on the number of examples
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset to subsample
+        size_ratio(float): the ratio to subsample to. must be between 0 and 1 (exclusive)
+    """
+
+    def __init__(self, dataset, size_ratio, shuffle=False):
+        super().__init__(dataset)
+        assert size_ratio < 1
+        self.actual_size = np.ceil(len(dataset) * size_ratio).astype(int)
+        self.indices = np.random.choice(
+            list(range(len(self.dataset))), self.actual_size, replace=False
+        )
+        self.shuffle = shuffle
+        logger.info(
+            "subsampled dataset from {} to {} (ratio={})".format(
+                len(self.dataset), self.actual_size, size_ratio
+            )
+        )
+
+    def __getitem__(self, index):
+        return self.dataset[self.indices[index]]
+
+    def __len__(self):
+        return self.actual_size
+
+    def collater(self, samples):
+        return self.dataset.collater(samples)
+
+    @property
+    def sizes(self):
+        return self.dataset.sizes[self.indices]
+
+    @property
+    def name(self):
+        return self.dataset.name
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(self.indices[index])
+
+    def size(self, index):
+        return self.dataset.size(self.indices[index])
+
+    def ordered_indices(self):
+        """Return an ordered list of indices. Batches will be constructed based
+        on this order."""
+        if self.shuffle:
+            order = [np.random.permutation(len(self))]
+        else:
+            order = [np.arange(len(self))]
+        order.append(self.sizes)
+        return np.lexsort(order)
+
+    def prefetch(self, indices):
+        self.dataset.prefetch(self.indices[indices])
diff --git a/fairseq/fairseq/data/text_compressor.py b/fairseq/fairseq/data/text_compressor.py
new file mode 100644
index 0000000000000000000000000000000000000000..561e9ac89ad9f1e88df95647cfdc53e4fcf5d157
--- /dev/null
+++ b/fairseq/fairseq/data/text_compressor.py
@@ -0,0 +1,56 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from enum import Enum
+
+
+class TextCompressionLevel(Enum):
+    none = 0
+    low = 1
+    high = 2
+
+
+class TextCompressor(object):
+    def __init__(
+            self, level: TextCompressionLevel,
+            max_input_byte_length: int = 2 ** 16
+    ):
+        self.level = level
+        self.max_input_length = max_input_byte_length
+
+    def compress(self, text: str) -> bytes:
+        if self.level == TextCompressionLevel.low:
+            import zlib
+            # zlib: built-in, fast
+            return zlib.compress(text.encode(), level=0)
+        elif self.level == TextCompressionLevel.high:
+            try:
+                import unishox2
+                # unishox2: optimized for short text but slower
+            except ImportError:
+                raise ImportError(
+                    "Please install unishox2 for the text compression feature: "
+                    "pip install unishox2-py3"
+                )
+            assert len(text.encode()) <= self.max_input_length
+            return unishox2.compress(text)[0]
+        else:
+            return text.encode()
+
+    def decompress(self, compressed: bytes) -> str:
+        if self.level == TextCompressionLevel.low:
+            import zlib
+            return zlib.decompress(compressed).decode()
+        elif self.level == TextCompressionLevel.high:
+            try:
+                import unishox2
+            except ImportError:
+                raise ImportError(
+                    "Please install unishox2 for the text compression feature: "
+                    "pip install unishox2-py3"
+                )
+            return unishox2.decompress(compressed, self.max_input_length)
+        else:
+            return compressed.decode()
diff --git a/fairseq/fairseq/data/token_block_dataset.py b/fairseq/fairseq/data/token_block_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2c65fd7e058072911c3aa60bfc760288a0f83e5
--- /dev/null
+++ b/fairseq/fairseq/data/token_block_dataset.py
@@ -0,0 +1,202 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from fairseq.data import FairseqDataset, plasma_utils
+from fairseq.data.indexed_dataset import best_fitting_int_dtype
+from typing import Tuple
+
+
+class TokenBlockDataset(FairseqDataset):
+    """Break a Dataset of tokens into blocks.
+
+    Args:
+        dataset (~torch.utils.data.Dataset): dataset to break into blocks
+        sizes (List[int]): sentence lengths (required for 'complete' and 'eos')
+        block_size (int): maximum block size (ignored in 'eos' break mode)
+        break_mode (str, optional): Mode used for breaking tokens. Values can
+            be one of:
+            - 'none': break tokens into equally sized blocks (up to block_size)
+            - 'complete': break tokens into blocks (up to block_size) such that
+                blocks contains complete sentences, although block_size may be
+                exceeded if some sentences exceed block_size
+            - 'complete_doc': similar to 'complete' mode, but do not
+                cross document boundaries
+            - 'eos': each block contains one sentence (block_size is ignored)
+        include_targets (bool, optional): return next tokens as targets
+            (default: False).
+        document_sep_len (int, optional): document separator size (required for
+            'complete_doc' break mode). Typically 1 if the sentences have eos
+            and 0 otherwise.
+    """
+
+    def __init__(
+        self,
+        dataset,
+        sizes,
+        block_size,
+        pad,
+        eos,
+        break_mode=None,
+        include_targets=False,
+        document_sep_len=1,
+        use_plasma_view=False,
+        split_path=None,
+        plasma_path=None,
+    ):
+
+        super().__init__()
+        self.dataset = dataset
+        self.pad = pad
+        self.eos = eos
+        self.include_targets = include_targets
+
+        assert len(dataset) > 0
+
+        assert len(dataset) == len(sizes)
+        _sizes, block_to_dataset_index, slice_indices = self._build_slice_indices(
+            sizes, break_mode, document_sep_len, block_size
+        )
+        if use_plasma_view:
+            plasma_id = (block_size, document_sep_len, str(break_mode), len(dataset))
+            self._slice_indices = plasma_utils.PlasmaView(
+                slice_indices, split_path, (plasma_id, 0), plasma_path=plasma_path
+            )
+            self._sizes = plasma_utils.PlasmaView(
+                _sizes, split_path, (plasma_id, 1), plasma_path=plasma_path
+            )
+            self._block_to_dataset_index = plasma_utils.PlasmaView(
+                block_to_dataset_index, split_path, (plasma_id, 2), plasma_path=plasma_path,
+            )
+        else:
+            self._slice_indices = plasma_utils.PlasmaArray(slice_indices)
+            self._sizes = plasma_utils.PlasmaArray(_sizes)
+            self._block_to_dataset_index = plasma_utils.PlasmaArray(
+                block_to_dataset_index
+            )
+
+    @staticmethod
+    def _build_slice_indices(
+        sizes, break_mode, document_sep_len, block_size
+    ) -> Tuple[np.ndarray]:
+        """Use token_block_utils_fast to build arrays for indexing into self.dataset"""
+        try:
+            from fairseq.data.token_block_utils_fast import (
+                _get_slice_indices_fast,
+                _get_block_to_dataset_index_fast,
+            )
+        except ImportError:
+            raise ImportError(
+                "Please build Cython components with: `pip install --editable .` "
+                "or `python setup.py build_ext --inplace`"
+            )
+
+        if isinstance(sizes, list):
+            sizes = np.array(sizes, dtype=np.int64)
+        else:
+            if torch.is_tensor(sizes):
+                sizes = sizes.numpy()
+            sizes = sizes.astype(np.int64)
+
+        break_mode = break_mode if break_mode is not None else "none"
+
+        # For "eos" break-mode, block_size is not required parameters.
+        if break_mode == "eos" and block_size is None:
+            block_size = 0
+
+        slice_indices = _get_slice_indices_fast(
+            sizes, str(break_mode), block_size, document_sep_len
+        )
+        _sizes = slice_indices[:, 1] - slice_indices[:, 0]
+
+        # build index mapping block indices to the underlying dataset indices
+        if break_mode == "eos":
+            # much faster version for eos break mode
+            block_to_dataset_index = np.stack(
+                [
+                    np.arange(len(sizes)),  # starting index in dataset
+                    np.zeros(
+                        len(sizes), dtype=np.compat.long
+                    ),  # starting offset within starting index
+                    np.arange(len(sizes)),  # ending index in dataset
+                ],
+                1,
+            )
+        else:
+            block_to_dataset_index = _get_block_to_dataset_index_fast(
+                sizes, slice_indices,
+            )
+        size_dtype = np.uint16 if block_size < 65535 else np.uint32
+        num_tokens = slice_indices[-1].max()
+        slice_indices_dtype = best_fitting_int_dtype(num_tokens)
+        slice_indices = slice_indices.astype(slice_indices_dtype)
+        _sizes = _sizes.astype(size_dtype)
+        block_to_dataset_index = block_to_dataset_index.astype(slice_indices_dtype)
+        return _sizes, block_to_dataset_index, slice_indices
+
+    @property
+    def slice_indices(self):
+        return self._slice_indices.array
+
+    @property
+    def sizes(self):
+        return self._sizes.array
+
+    @property
+    def block_to_dataset_index(self):
+        return self._block_to_dataset_index.array
+
+    def attr(self, attr: str, index: int):
+        start_ds_idx, _, _ = self.block_to_dataset_index[index]
+        return self.dataset.attr(attr, start_ds_idx)
+
+    def __getitem__(self, index):
+        start_ds_idx, start_offset, end_ds_idx = self.block_to_dataset_index[index]
+
+        buffer = torch.cat(
+            [self.dataset[idx] for idx in range(start_ds_idx, end_ds_idx + 1)]
+        )
+        slice_s, slice_e = self.slice_indices[index]
+        length = slice_e - slice_s
+        s, e = start_offset, start_offset + length
+        item = buffer[s:e]
+
+        if self.include_targets:
+            # *target* is the original sentence (=item)
+            # *source* is shifted right by 1 (maybe left-padded with eos)
+            # *past_target* is shifted right by 2 (left-padded as needed)
+            if s == 0:
+                source = torch.cat([item.new([self.eos]), buffer[0 : e - 1]])
+                past_target = torch.cat(
+                    [item.new([self.pad, self.eos]), buffer[0 : e - 2]]
+                )
+            else:
+                source = buffer[s - 1 : e - 1]
+                if s == 1:
+                    past_target = torch.cat([item.new([self.eos]), buffer[0 : e - 2]])
+                else:
+                    past_target = buffer[s - 2 : e - 2]
+
+            return source, item, past_target
+
+        return item
+
+    def __len__(self):
+        return len(self.slice_indices)
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        self.dataset.prefetch(
+            {
+                ds_idx
+                for index in indices
+                for start_ds_idx, _, end_ds_idx in [self.block_to_dataset_index[index]]
+                for ds_idx in range(start_ds_idx, end_ds_idx + 1)
+            }
+        )
diff --git a/fairseq/fairseq/data/token_block_utils_fast.pyx b/fairseq/fairseq/data/token_block_utils_fast.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..08af4f30613a7b6ffa965a7c7084acabec8f8749
--- /dev/null
+++ b/fairseq/fairseq/data/token_block_utils_fast.pyx
@@ -0,0 +1,187 @@
+# cython: language_level=3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from itertools import chain
+from libc.math cimport ceil
+
+cimport cython
+cimport numpy as np
+
+from libc.stdint cimport int32_t, int64_t
+
+DTYPE = np.int64
+ctypedef int64_t DTYPE_t
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+cdef np.ndarray[DTYPE_t, ndim=2] _get_slice_indices_none_mode(np.ndarray[DTYPE_t, ndim=1] sizes, int block_size):
+    cdef DTYPE_t total_size = sizes.sum()
+    cdef DTYPE_t length = <DTYPE_t> ceil(total_size / <double> block_size)
+    cdef np.ndarray[DTYPE_t, ndim=2] slice_indices = np.zeros([length, 2], dtype=DTYPE)
+    cdef DTYPE_t[:, :] slice_indices_view = slice_indices
+    cdef DTYPE_t i
+    cdef DTYPE_t start
+    cdef DTYPE_t end
+    for i in range(length):
+        start = i * block_size
+        end = min(start + block_size, total_size)
+        slice_indices_view[i][0] = start
+        slice_indices_view[i][1] = end
+    return slice_indices
+
+
+cdef np.ndarray[DTYPE_t, ndim=2] _fast_convert_to_np_array(list list_of_list):
+    """
+    Faster function to convert DTYPE_t list of list.
+    Only fast when there are huge number of rows and low number of columns.
+    """
+    cdef np.ndarray[DTYPE_t, ndim=1] flat = np.fromiter(chain.from_iterable(list_of_list), DTYPE, -1)
+    return flat.reshape((len(list_of_list), -1))
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+cpdef np.ndarray[DTYPE_t, ndim=2] _get_slice_indices_fast(np.ndarray[DTYPE_t, ndim=1] sizes, str break_mode, int block_size, int document_sep_len):
+    cdef DTYPE_t tok_idx = 0
+    cdef DTYPE_t sz_idx = 0
+    cdef DTYPE_t curr_size = 0
+    cdef DTYPE_t i = 0
+    cdef DTYPE_t length
+    cdef DTYPE_t total_size
+    cdef DTYPE_t[:] sizes_view = sizes
+    cdef np.ndarray[DTYPE_t, ndim=2] slice_indices
+    cdef list slice_indices_list = []
+
+    if break_mode is None or break_mode == 'none':
+        slice_indices = _get_slice_indices_none_mode(sizes, block_size)
+    elif break_mode == 'complete':
+        while sz_idx < len(sizes_view):
+            if curr_size + sizes_view[sz_idx] <= block_size or curr_size == 0:
+                curr_size += sizes_view[sz_idx]
+                sz_idx += 1
+            else:
+                slice_indices_list.append((tok_idx, tok_idx + curr_size))
+                tok_idx += curr_size
+                curr_size = 0
+        if curr_size > 0:
+            slice_indices_list.append((tok_idx, tok_idx + curr_size))
+        slice_indices = _fast_convert_to_np_array(slice_indices_list)
+    elif break_mode == 'complete_doc':
+        while sz_idx < len(sizes_view):
+            if (
+                (curr_size + sizes_view[sz_idx] <= block_size or curr_size == 0)
+                # an empty sentence indicates end-of-document:
+                and sizes_view[sz_idx] != document_sep_len
+            ):
+                curr_size += sizes_view[sz_idx]
+                sz_idx += 1
+            else:
+                # Only keep non-empty documents.
+                if curr_size > 1:
+                    slice_indices_list.append((tok_idx, tok_idx + curr_size))
+                tok_idx += curr_size
+                curr_size = 0
+                if sizes_view[sz_idx] == document_sep_len:
+                    tok_idx += sizes_view[sz_idx]
+                    sz_idx += 1
+        if curr_size > 1:
+            slice_indices_list.append((tok_idx, tok_idx + curr_size))
+        slice_indices = _fast_convert_to_np_array(slice_indices_list)
+    elif break_mode == 'eos':
+        slice_indices = np.zeros((len(sizes), 2), dtype=DTYPE)
+        cumsum = sizes.cumsum(axis=0)
+        slice_indices[1:, 0] = cumsum[:cumsum.shape[0] - 1]
+        slice_indices[:, 1] = cumsum
+    else:
+        raise ValueError('Invalid break_mode: ' + break_mode)
+    return slice_indices
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+cpdef np.ndarray[DTYPE_t, ndim=2] _get_block_to_dataset_index_fast(np.ndarray[DTYPE_t, ndim=1] sizes, np.ndarray[DTYPE_t, ndim=2] slice_indices):
+    cdef DTYPE_t start_ds_idx
+    cdef DTYPE_t start_offset
+    cdef DTYPE_t end_ds_idx
+    cdef DTYPE_t i
+    cdef DTYPE_t s
+    cdef DTYPE_t e
+    cdef DatasetSearcher ds = DatasetSearcher(sizes)
+    cdef np.ndarray[DTYPE_t, ndim=2] block_to_dataset_index = np.zeros([len(slice_indices), 3], dtype=DTYPE)
+    cdef DTYPE_t[:, :] block_to_dataset_index_view = block_to_dataset_index
+    cdef DTYPE_t[:, :] slice_indices_view = slice_indices
+    cdef Py_ssize_t x_max = slice_indices.shape[0]
+
+    for i in range(x_max):
+        s = slice_indices_view[i][0]
+        e = slice_indices_view[i][1]
+        ds.seek(s)
+        start_ds_idx = ds.current_index
+        start_offset = ds.current_offset
+        if e <= s:
+            end_ds_idx = start_ds_idx
+        else:
+            ds.seek(e - 1)
+            end_ds_idx = ds.current_index
+        block_to_dataset_index_view[i][0] = start_ds_idx  # starting index in dataset
+        block_to_dataset_index_view[i][1] = start_offset  # starting offset within starting index
+        block_to_dataset_index_view[i][2] = end_ds_idx    # ending index in dataset
+    return block_to_dataset_index
+
+
+cdef class DatasetSearcher(object):
+    """Helper for mapping "flat" indices to indices and offsets in an
+    underlying dataset."""
+    cdef DTYPE_t current_i
+    cdef DTYPE_t current_offset
+    cdef DTYPE_t current_index
+    cdef DTYPE_t[:] sizes
+
+    def __init__(self, DTYPE_t[:] sizes):
+        self.sizes = sizes
+        self.reset()
+
+    cdef reset(self):
+        self.current_offset = 0     # offset within current index in underlying dataset
+        self.current_i = 0          # "flat" index
+        self.current_index = 0      # index in underlying dataset
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.nonecheck(False)
+    cdef int step(self, DTYPE_t i):
+        cdef DTYPE_t to_consume
+        cdef DTYPE_t remaining
+        if i < self.current_i:
+            self.reset()
+        if i > self.current_i:
+            to_consume = i - self.current_i
+            remaining = self.sizes[self.current_index] - self.current_offset
+            if remaining > to_consume:
+                self.current_offset += to_consume
+                self.current_i += to_consume
+            else:
+                assert remaining >= 0
+                self.current_i += remaining
+                self.current_index += 1
+                self.current_offset = 0
+                return 1
+        return 0
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.nonecheck(False)
+    cdef seek(self, DTYPE_t i):
+        cdef int not_done = 1
+        while not_done == 1:
+            not_done = self.step(i)
+        assert self.current_i == i
diff --git a/fairseq/fairseq/data/transform_eos_dataset.py b/fairseq/fairseq/data/transform_eos_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb14ff018edf13b20f5d0e486692dfb0a37ec6d1
--- /dev/null
+++ b/fairseq/fairseq/data/transform_eos_dataset.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from . import FairseqDataset
+
+
+class TransformEosDataset(FairseqDataset):
+    """A :class:`~fairseq.data.FairseqDataset` wrapper that appends/prepends/strips EOS.
+
+    Note that the transformation is applied in :func:`collater`.
+
+    Args:
+        dataset (~fairseq.data.FairseqDataset): dataset to wrap
+        eos (int): index of the end-of-sentence symbol
+        append_eos_to_src (bool, optional): append EOS to the end of src
+        remove_eos_from_src (bool, optional): remove EOS from the end of src
+        append_eos_to_tgt (bool, optional): append EOS to the end of tgt
+        remove_eos_from_tgt (bool, optional): remove EOS from the end of tgt
+    """
+
+    def __init__(
+        self,
+        dataset,
+        eos,
+        append_eos_to_src=False,
+        remove_eos_from_src=False,
+        append_eos_to_tgt=False,
+        remove_eos_from_tgt=False,
+        has_target=True,
+    ):
+        if not isinstance(dataset, FairseqDataset):
+            raise ValueError("dataset must be an instance of FairseqDataset")
+        if append_eos_to_src and remove_eos_from_src:
+            raise ValueError("cannot combine append_eos_to_src and remove_eos_from_src")
+        if append_eos_to_tgt and remove_eos_from_tgt:
+            raise ValueError("cannot combine append_eos_to_tgt and remove_eos_from_tgt")
+
+        self.dataset = dataset
+        self.eos = torch.LongTensor([eos])
+        self.append_eos_to_src = append_eos_to_src
+        self.remove_eos_from_src = remove_eos_from_src
+        self.append_eos_to_tgt = append_eos_to_tgt
+        self.remove_eos_from_tgt = remove_eos_from_tgt
+        self.has_target = has_target
+
+        # precompute how we should adjust the reported sizes
+        self._src_delta = 0
+        self._src_delta += 1 if append_eos_to_src else 0
+        self._src_delta -= 1 if remove_eos_from_src else 0
+        self._tgt_delta = 0
+        self._tgt_delta += 1 if append_eos_to_tgt else 0
+        self._tgt_delta -= 1 if remove_eos_from_tgt else 0
+
+        self._checked_src = False
+        self._checked_tgt = False
+
+    def _check_src(self, src, expect_eos):
+        if not self._checked_src:
+            assert (src[-1] == self.eos[0]) == expect_eos
+            self._checked_src = True
+
+    def _check_tgt(self, tgt, expect_eos):
+        if self.has_target and not self._checked_tgt:
+            assert (tgt[-1] == self.eos[0]) == expect_eos
+            self._checked_tgt = True
+
+    def __getitem__(self, index):
+        return self.dataset[index]
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def collater(self, samples):
+        def transform(item):
+            if self.append_eos_to_src:
+                self.eos = self.eos.to(device=item["source"].device)
+                self._check_src(item["source"], expect_eos=False)
+                item["source"] = torch.cat([item["source"], self.eos])
+            if self.remove_eos_from_src:
+                self.eos = self.eos.to(device=item["source"].device)
+                self._check_src(item["source"], expect_eos=True)
+                item["source"] = item["source"][:-1]
+            if self.append_eos_to_tgt:
+                self.eos = self.eos.to(device=item["target"].device)
+                self._check_tgt(item["target"], expect_eos=False)
+                item["target"] = torch.cat([item["target"], self.eos])
+            if self.remove_eos_from_tgt:
+                self.eos = self.eos.to(device=item["target"].device)
+                self._check_tgt(item["target"], expect_eos=True)
+                item["target"] = item["target"][:-1]
+            return item
+
+        samples = list(map(transform, samples))
+        return self.dataset.collater(samples)
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(index)
+
+    def size(self, index):
+        if self.has_target:
+            src_len, tgt_len = self.dataset.size(index)
+            return (src_len + self._src_delta, tgt_len + self._tgt_delta)
+        else:
+            return self.dataset.size(index)
+
+    def ordered_indices(self):
+        # NOTE: we assume that the ordering does not change based on the
+        # addition or removal of eos
+        return self.dataset.ordered_indices()
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        return self.dataset.prefetch(indices)
diff --git a/fairseq/fairseq/data/transform_eos_lang_pair_dataset.py b/fairseq/fairseq/data/transform_eos_lang_pair_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e21144a88e0038c2f35711333a40315613004256
--- /dev/null
+++ b/fairseq/fairseq/data/transform_eos_lang_pair_dataset.py
@@ -0,0 +1,113 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Optional
+
+import torch
+
+from . import FairseqDataset
+
+
+class TransformEosLangPairDataset(FairseqDataset):
+    """A :class:`~fairseq.data.FairseqDataset` wrapper that transform bos on
+    collated samples of language pair dataset.
+
+    Note that the transformation is applied in :func:`collater`.
+
+    Args:
+        dataset (~fairseq.data.FairseqDataset): dataset that collates sample into
+            LanguagePairDataset schema
+        src_eos (int): original source end-of-sentence symbol index to be replaced
+        new_src_eos (int, optional): new end-of-sentence symbol index to replace source eos symbol
+        tgt_bos (int, optional): original target beginning-of-sentence symbol index to be replaced
+        new_tgt_bos (int, optional): new beginning-of-sentence symbol index to replace at the
+            beginning of 'prev_output_tokens'
+    """
+
+    def __init__(
+        self,
+        dataset: FairseqDataset,
+        src_eos: int,
+        new_src_eos: Optional[int] = None,
+        tgt_bos: Optional[int] = None,
+        new_tgt_bos: Optional[int] = None,
+    ):
+        self.dataset = dataset
+        self.src_eos = src_eos
+        self.new_src_eos = new_src_eos
+        self.tgt_bos = tgt_bos
+        self.new_tgt_bos = new_tgt_bos
+
+    def __getitem__(self, index):
+        return self.dataset[index]
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def collater(self, samples, **extra_args):
+        samples = self.dataset.collater(samples, **extra_args)
+        if len(samples) == 0:
+            return samples
+
+        if 'net_input' not in samples:
+            return samples
+
+        if self.new_src_eos is not None:
+            if self.dataset.left_pad_source:
+                assert (
+                    samples["net_input"]["src_tokens"][:, -1] != self.src_eos
+                ).sum() == 0
+                samples["net_input"]["src_tokens"][:, -1] = self.new_src_eos
+            else:
+                eos_idx = samples["net_input"]["src_lengths"] - 1
+                assert (
+                    samples["net_input"]["src_tokens"][
+                        torch.arange(eos_idx.size(0)), eos_idx
+                    ]
+                    != self.src_eos
+                ).sum() == 0
+                eos_idx = eos_idx.resize_(len(samples["net_input"]["src_lengths"]), 1)
+                samples["net_input"]["src_tokens"].scatter_(
+                    1, eos_idx, self.new_src_eos
+                )
+
+        if (
+            self.new_tgt_bos is not None
+            and "prev_output_tokens" in samples["net_input"]
+        ):
+            if self.dataset.left_pad_target:
+                # TODO: support different padding direction on target side
+                raise NotImplementedError(
+                    "TransformEosLangPairDataset does not implement --left-pad-target True option"
+                )
+            else:
+                assert (
+                    samples["net_input"]["prev_output_tokens"][:, 0] != self.tgt_bos
+                ).sum() == 0
+                samples["net_input"]["prev_output_tokens"][:, 0] = self.new_tgt_bos
+
+        return samples
+
+    def num_tokens(self, index):
+        return self.dataset.num_tokens(index)
+
+    def size(self, index):
+        return self.dataset.size(index)
+
+    @property
+    def sizes(self):
+        # dataset.sizes can be a dynamically computed sizes:
+        return self.dataset.sizes
+
+    def ordered_indices(self):
+        return self.dataset.ordered_indices()
+
+    @property
+    def supports_prefetch(self):
+        return getattr(self.dataset, "supports_prefetch", False)
+
+    def prefetch(self, indices):
+        return self.dataset.prefetch(indices)
diff --git a/fairseq/fairseq/dataclass/__init__.py b/fairseq/fairseq/dataclass/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..25408d28ec44cee56eb5fb3ab0c817dc04159e95
--- /dev/null
+++ b/fairseq/fairseq/dataclass/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .configs import FairseqDataclass
+from .constants import ChoiceEnum
+
+
+__all__ = [
+    "FairseqDataclass",
+    "ChoiceEnum",
+]
diff --git a/fairseq/fairseq/dataclass/configs.py b/fairseq/fairseq/dataclass/configs.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e8cec92814f55a504d36f80fb79c3e0f8280eee
--- /dev/null
+++ b/fairseq/fairseq/dataclass/configs.py
@@ -0,0 +1,1058 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+from dataclasses import _MISSING_TYPE, dataclass, field
+from typing import Any, List, Optional
+
+import torch
+
+from fairseq.dataclass.constants import (
+    DATASET_IMPL_CHOICES,
+    DDP_BACKEND_CHOICES,
+    DDP_COMM_HOOK_CHOICES,
+    GENERATION_CONSTRAINTS_CHOICES,
+    GENERATION_DECODING_FORMAT_CHOICES,
+    LOG_FORMAT_CHOICES,
+    PIPELINE_CHECKPOINT_CHOICES,
+    PRINT_ALIGNMENT_CHOICES,
+    ZERO_SHARDING_CHOICES,
+)
+
+from omegaconf import II, MISSING
+
+
+@dataclass
+class FairseqDataclass:
+    """fairseq base dataclass that supported fetching attributes and metas"""
+
+    _name: Optional[str] = None
+
+    @staticmethod
+    def name():
+        return None
+
+    def _get_all_attributes(self) -> List[str]:
+        return [k for k in self.__dataclass_fields__.keys()]
+
+    def _get_meta(
+        self, attribute_name: str, meta: str, default: Optional[Any] = None
+    ) -> Any:
+        return self.__dataclass_fields__[attribute_name].metadata.get(meta, default)
+
+    def _get_name(self, attribute_name: str) -> str:
+        return self.__dataclass_fields__[attribute_name].name
+
+    def _get_default(self, attribute_name: str) -> Any:
+        if hasattr(self, attribute_name):
+            if str(getattr(self, attribute_name)).startswith("${"):
+                return str(getattr(self, attribute_name))
+            elif str(self.__dataclass_fields__[attribute_name].default).startswith(
+                "${"
+            ):
+                return str(self.__dataclass_fields__[attribute_name].default)
+            elif (
+                getattr(self, attribute_name)
+                != self.__dataclass_fields__[attribute_name].default
+            ):
+                return getattr(self, attribute_name)
+
+        f = self.__dataclass_fields__[attribute_name]
+        if not isinstance(f.default_factory, _MISSING_TYPE):
+            return f.default_factory()
+        return f.default
+
+    def _get_type(self, attribute_name: str) -> Any:
+        return self.__dataclass_fields__[attribute_name].type
+
+    def _get_help(self, attribute_name: str) -> Any:
+        return self._get_meta(attribute_name, "help")
+
+    def _get_argparse_const(self, attribute_name: str) -> Any:
+        return self._get_meta(attribute_name, "argparse_const")
+
+    def _get_argparse_alias(self, attribute_name: str) -> Any:
+        return self._get_meta(attribute_name, "argparse_alias")
+
+    def _get_choices(self, attribute_name: str) -> Any:
+        return self._get_meta(attribute_name, "choices")
+
+    @classmethod
+    def from_namespace(cls, args):
+        if isinstance(args, cls):
+            return args
+        else:
+            config = cls()
+            for k in config.__dataclass_fields__.keys():
+                if k.startswith("_"):
+                    # private member, skip
+                    continue
+                if hasattr(args, k):
+                    setattr(config, k, getattr(args, k))
+
+            return config
+
+
+
+@dataclass
+class CommonConfig(FairseqDataclass):
+    # This is the core dataclass including common parameters shared by all different jobs. Please append your params to other dataclasses if they were
+    # used for a particular purpose or task, such as those dedicated for `distributed training`, `optimization`, etc.
+    no_progress_bar: bool = field(
+        default=False, metadata={"help": "disable progress bar"}
+    )
+    log_interval: int = field(
+        default=100,
+        metadata={
+            "help": "log progress every N batches (when progress bar is disabled)"
+        },
+    )
+    log_format: Optional[LOG_FORMAT_CHOICES] = field(
+        default=None, metadata={"help": "log format to use"}
+    )
+    log_file: Optional[str] = field(
+        default=None, metadata={"help": "log file to copy metrics to."}
+    )
+    tensorboard_logdir: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "path to save logs for tensorboard, should match --logdir "
+            "of running tensorboard (default: no tensorboard logging)"
+        },
+    )
+    wandb_project: Optional[str] = field(
+        default=None,
+        metadata={"help": "Weights and Biases project name to use for logging"},
+    )
+    azureml_logging: Optional[bool] = field(
+        default=False, metadata={"help": "Log scalars to AzureML context"},
+    )
+    seed: int = field(
+        default=1, metadata={"help": "pseudo random number generator seed"}
+    )
+    cpu: bool = field(default=False, metadata={"help": "use CPU instead of CUDA"})
+    tpu: bool = field(default=False, metadata={"help": "use TPU instead of CUDA"})
+    bf16: bool = field(default=False, metadata={"help": "use bfloat16; implies --tpu"})
+    memory_efficient_bf16: bool = field(
+        default=False,
+        metadata={
+            "help": "use a memory-efficient version of BF16 training; implies --bf16"
+        },
+    )
+    fp16: bool = field(default=False, metadata={"help": "use FP16"})
+    memory_efficient_fp16: bool = field(
+        default=False,
+        metadata={
+            "help": "use a memory-efficient version of FP16 training; implies --fp16"
+        },
+    )
+    fp16_no_flatten_grads: bool = field(
+        default=False, metadata={"help": "don't flatten FP16 grads tensor"}
+    )
+    fp16_init_scale: int = field(
+        default=2 ** 7, metadata={"help": "default FP16 loss scale"}
+    )
+    fp16_scale_window: Optional[int] = field(
+        default=None,
+        metadata={"help": "number of updates before increasing loss scale"},
+    )
+    fp16_scale_tolerance: float = field(
+        default=0.0,
+        metadata={
+            "help": "pct of updates that can overflow before decreasing the loss scale"
+        },
+    )
+    on_cpu_convert_precision: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, the floating point conversion to fp16/bf16 runs on CPU. "
+            "This reduces bus transfer time and GPU memory usage."
+        }
+    )
+    min_loss_scale: float = field(
+        default=1e-4,
+        metadata={"help": "minimum FP16/AMP loss scale, after which training is stopped"},
+    )
+    threshold_loss_scale: Optional[float] = field(
+        default=None, metadata={"help": "threshold FP16 loss scale from below"}
+    )
+    amp: bool = field(default=False, metadata={"help": "use automatic mixed precision"})
+    amp_batch_retries: int = field(
+        default=2,
+        metadata={"help": "number of retries of same batch after reducing loss scale with AMP"},
+    )
+    amp_init_scale: int = field(
+        default=2 ** 7, metadata={"help": "default AMP loss scale"}
+    )
+    amp_scale_window: Optional[int] = field(
+        default=None,
+        metadata={"help": "number of updates before increasing AMP loss scale"},
+    )
+    user_dir: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "path to a python module containing custom extensions (tasks and/or architectures)"
+        },
+    )
+    empty_cache_freq: int = field(
+        default=0,
+        metadata={"help": "how often to clear the PyTorch CUDA cache (0 to disable)"},
+    )
+    all_gather_list_size: int = field(
+        default=16384,
+        metadata={"help": "number of bytes reserved for gathering stats from workers"},
+    )
+    model_parallel_size: int = field(
+        default=1, metadata={"help": "total number of GPUs to parallelize model over"}
+    )
+    quantization_config_path: Optional[str] = field(
+        default=None, metadata={"help": "path to quantization config file"}
+    )
+    profile: bool = field(
+        default=False, metadata={"help": "enable autograd profiler emit_nvtx"}
+    )
+    reset_logging: bool = field(
+        default=False,
+        metadata={
+            "help": "when using Hydra, reset the logging at the beginning of training"
+        },
+    )
+    suppress_crashes: bool = field(
+        default=False,
+        metadata={
+            "help": "suppress crashes when training with the hydra_train entry point so that the "
+                    "main method can return a value (useful for sweeps)"
+        },
+    )
+    use_plasma_view: bool = field(
+        default=False, metadata={"help": "Store indices and sizes in shared memory"}
+    )
+    plasma_path: Optional[str] = field(
+        default="/tmp/plasma",
+        metadata={
+            "help": "path to run plasma_store, defaults to /tmp/plasma. Paths outside /tmp tend to fail."
+        },
+    )
+
+
+@dataclass
+class DistributedTrainingConfig(FairseqDataclass):
+    distributed_world_size: int = field(
+        default=max(1, torch.cuda.device_count()),
+        metadata={
+            "help": "total number of GPUs across all nodes (default: all visible GPUs)"
+        },
+    )
+    distributed_num_procs: Optional[int] = field(
+        default=max(1, torch.cuda.device_count()),
+        metadata={
+            "help": "total number of processes to fork (default: all visible GPUs)"
+        },
+    )
+    distributed_rank: Optional[int] = field(
+        default=0, metadata={"help": "rank of the current worker"}
+    )
+    distributed_backend: str = field(
+        default="nccl", metadata={"help": "distributed backend"}
+    )
+    distributed_init_method: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "typically tcp://hostname:port that will be used to "
+            "establish initial connetion"
+        },
+    )
+    distributed_port: int = field(
+        default=-1,
+        metadata={
+            "help": "port number (not required if using --distributed-init-method)"
+        },
+    )
+    device_id: int = field(
+        default=0,
+        metadata={
+            "help": "which GPU to use (usually configured automatically)",
+            "argparse_alias": "--local_rank",
+        },
+    )
+    distributed_no_spawn: bool = field(
+        default=False,
+        metadata={
+            "help": "do not spawn multiple processes even if multiple GPUs are visible"
+        },
+    )
+    ddp_backend: DDP_BACKEND_CHOICES = field(
+        default="pytorch_ddp", metadata={"help": "DistributedDataParallel backend"}
+    )
+    ddp_comm_hook: DDP_COMM_HOOK_CHOICES = field(
+        default="none", metadata={"help": "communication hook"}
+    )
+    bucket_cap_mb: int = field(
+        default=25, metadata={"help": "bucket size for reduction"}
+    )
+    fix_batches_to_gpus: bool = field(
+        default=False,
+        metadata={
+            "help": "don't shuffle batches between GPUs; this reduces overall "
+            "randomness and may affect precision but avoids the cost of re-reading the data"
+        },
+    )
+    find_unused_parameters: bool = field(
+        default=False,
+        metadata={
+            "help": "disable unused parameter detection (not applicable to "
+            "--ddp-backend=legacy_ddp)"
+        },
+    )
+    gradient_as_bucket_view: bool = field(
+        default=False,
+        metadata={
+            "help": "when set to True, gradients will be views pointing to different offsets of allreduce communication buckets. This can reduce peak memory usage, where the saved memory size will be equal to the total gradients size. "
+            "--gradient-as-bucket-view=gradient_as_bucket_view)"
+        },
+    )
+    fast_stat_sync: bool = field(
+        default=False,
+        metadata={"help": "[deprecated] this is now defined per Criterion"},
+    )
+    heartbeat_timeout: int = field(
+        default=-1,
+        metadata={
+            "help": "kill the job if no progress is made in N seconds; "
+            "set to -1 to disable"
+        },
+    )
+    broadcast_buffers: bool = field(
+        default=False,
+        metadata={
+            "help": "Copy non-trainable parameters between GPUs, such as "
+            "batchnorm population statistics"
+        },
+    )
+    slowmo_momentum: Optional[float] = field(
+        default=None,
+        metadata={
+            "help": "SlowMo momentum term; by default use 0.0 for 16 GPUs, "
+            "0.2 for 32 GPUs; 0.5 for 64 GPUs, 0.6 for > 64 GPUs"
+        },
+    )
+    slowmo_algorithm: str = field(
+        default="LocalSGD", metadata={"help": "whether to use LocalSGD or SGP"}
+    )
+    localsgd_frequency: int = field(
+        default=3, metadata={"help": "Local SGD allreduce frequency"}
+    )
+    nprocs_per_node: int = field(
+        default=max(1, torch.cuda.device_count()),
+        metadata={
+            "help": "number of GPUs in each node. An allreduce operation across GPUs in "
+            "a node is very fast. Hence, we do allreduce across GPUs in a node, "
+            "and gossip across different nodes"
+        },
+    )
+    pipeline_model_parallel: bool = field(
+        default=False,
+        metadata={"help": "if set, use pipeline model parallelism across GPUs"},
+    )
+    pipeline_balance: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "partition the model into N_K pieces, where each piece "
+            "contains N_i layers. The sum(args.pipeline_balance) "
+            "should equal the total number of layers in the model"
+        },
+    )
+    pipeline_devices: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "a list of device indices indicating which device to place "
+            "each of the N_K partitions. The length of this list should "
+            "equal the length of the --pipeline-balance argument"
+        },
+    )
+    pipeline_chunks: Optional[int] = field(
+        default=0, metadata={"help": "microbatch count for pipeline model parallelism"}
+    )
+    pipeline_encoder_balance: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "partition the pipeline parallel encoder into N_K pieces, where each piece "
+            "contains N_i layers. The sum(args.pipeline_encoder_balance) "
+            "should equal the total number of encoder layers in the model"
+        },
+    )
+    pipeline_encoder_devices: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "a list of device indices indicating which device to place "
+            "each of the N_K partitions. The length of this list should "
+            "equal the length of the --pipeline-encoder-balance argument"
+        },
+    )
+    pipeline_decoder_balance: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "partition the pipeline parallel decoder into N_K pieces, where each piece "
+            "contains N_i layers. The sum(args.pipeline_decoder_balance) "
+            "should equal the total number of decoder layers in the model"
+        },
+    )
+    pipeline_decoder_devices: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "a list of device indices indicating which device to place "
+            "each of the N_K partitions. The length of this list should "
+            "equal the length of the --pipeline-decoder-balance argument"
+        },
+    )
+    pipeline_checkpoint: PIPELINE_CHECKPOINT_CHOICES = field(
+        default="never",
+        metadata={"help": "checkpointing mode for pipeline model parallelism"},
+    )
+    zero_sharding: ZERO_SHARDING_CHOICES = field(
+        default="none", metadata={"help": "ZeRO sharding"}
+    )
+    fp16: bool = II("common.fp16")
+    memory_efficient_fp16: bool = II("common.memory_efficient_fp16")
+    tpu: bool = II("common.tpu")
+    # configuration for --ddp-backend=fully_sharded
+    no_reshard_after_forward: bool = field(
+        default=False, metadata={"help": "don't reshard parameters after forward pass"},
+    )
+    fp32_reduce_scatter: bool = field(
+        default=False, metadata={"help": "reduce-scatter grads in FP32"},
+    )
+    cpu_offload: bool = field(
+        default=False, metadata={"help": "offload FP32 params to CPU"}
+    )
+    use_sharded_state: bool = field(
+        default=False, metadata={"help": "use sharded checkpoint files"},
+    )
+
+
+@dataclass
+class DatasetConfig(FairseqDataclass):
+    num_workers: int = field(
+        default=1, metadata={"help": "how many subprocesses to use for data loading"}
+    )
+    skip_invalid_size_inputs_valid_test: bool = field(
+        default=False,
+        metadata={"help": "ignore too long or too short lines in valid and test set"},
+    )
+    max_tokens: Optional[int] = field(
+        default=None, metadata={"help": "maximum number of tokens in a batch"}
+    )
+    batch_size: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "number of examples in a batch",
+            "argparse_alias": "--max-sentences",
+        },
+    )
+    required_batch_size_multiple: int = field(
+        default=8, metadata={"help": "batch size will be a multiplier of this value"}
+    )
+    required_seq_len_multiple: int = field(
+        default=1,
+        metadata={
+            "help": "maximum sequence length in batch will be a multiplier of this value"
+        },
+    )
+    dataset_impl: Optional[DATASET_IMPL_CHOICES] = field(
+        default=None, metadata={"help": "output dataset implementation"}
+    )
+    data_buffer_size: int = field(
+        default=10, metadata={"help": "Number of batches to preload"}
+    )
+    train_subset: str = field(
+        default="train",
+        metadata={"help": "data subset to use for training (e.g. train, valid, test)"},
+    )
+    valid_subset: str = field(
+        default="valid",
+        metadata={
+            "help": "comma separated list of data subsets to use for validation"
+            " (e.g. train, valid, test)"
+        },
+    )
+    combine_valid_subsets: Optional[bool] = field(
+        default=None,
+        metadata={
+            "help": "comma separated list of data subsets to use for validation"
+                    " (e.g. train, valid, test)",
+            "argparse_alias": "--combine-val",
+        },
+    )
+    ignore_unused_valid_subsets: Optional[bool] = field(
+        default=False,
+        metadata={"help": "do not raise error if valid subsets are ignored"},
+    )
+
+    validate_interval: int = field(
+        default=1, metadata={"help": "validate every N epochs"}
+    )
+    validate_interval_updates: int = field(
+        default=0, metadata={"help": "validate every N updates"}
+    )
+    validate_after_updates: int = field(
+        default=0, metadata={"help": "dont validate until reaching this many updates"}
+    )
+    fixed_validation_seed: Optional[int] = field(
+        default=None, metadata={"help": "specified random seed for validation"}
+    )
+    disable_validation: bool = field(
+        default=False, metadata={"help": "disable validation"}
+    )
+    max_tokens_valid: Optional[int] = field(
+        default=II("dataset.max_tokens"),
+        metadata={
+            "help": "maximum number of tokens in a validation batch"
+            " (defaults to --max-tokens)"
+        },
+    )
+    batch_size_valid: Optional[int] = field(
+        default=II("dataset.batch_size"),
+        metadata={
+            "help": "batch size of the validation batch (defaults to --batch-size)",
+            "argparse_alias": "--max-sentences-valid",
+        },
+    )
+    max_valid_steps: Optional[int] = field(default=None, metadata={'help': 'How many batches to evaluate',
+                                                                   "argparse_alias": "--nval"})
+    curriculum: int = field(
+        default=0, metadata={"help": "don't shuffle batches for first N epochs"}
+    )
+    gen_subset: str = field(
+        default="test",
+        metadata={"help": "data subset to generate (train, valid, test)"},
+    )
+    num_shards: int = field(
+        default=1, metadata={"help": "shard generation over N shards"}
+    )
+    shard_id: int = field(
+        default=0, metadata={"help": "id of the shard to generate (id < num_shards)"}
+    )
+
+
+@dataclass
+class OptimizationConfig(FairseqDataclass):
+    max_epoch: int = field(
+        default=0, metadata={"help": "force stop training at specified epoch"}
+    )
+    max_update: int = field(
+        default=0, metadata={"help": "force stop training at specified update"}
+    )
+    stop_time_hours: float = field(
+        default=0,
+        metadata={
+            "help": "force stop training after specified cumulative time (if >0)"
+        },
+    )
+    clip_norm: float = field(
+        default=0.0, metadata={"help": "clip threshold of gradients"}
+    )
+    sentence_avg: bool = field(
+        default=False,
+        metadata={
+            "help": "normalize gradients by the number of sentences in a batch"
+            " (default is to normalize by number of tokens)"
+        },
+    )
+    update_freq: List[int] = field(
+        default_factory=lambda: [1],
+        metadata={"help": "update parameters every N_i batches, when in epoch i"},
+    )
+    lr: List[float] = field(
+        default_factory=lambda: [0.25],
+        metadata={
+            "help": "learning rate for the first N epochs; all epochs >N using LR_N"
+            " (note: this may be interpreted differently depending on --lr-scheduler)"
+        },
+    )
+    stop_min_lr: float = field(
+        default=-1.0,
+        metadata={"help": "stop training when the learning rate reaches this minimum"},
+    )
+    use_bmuf: bool = field(
+        default=False,
+        metadata={
+            "help": "specify global optimizer for syncing models on different GPUs/shards"
+        },
+    )
+
+
+@dataclass
+class CheckpointConfig(FairseqDataclass):
+    save_dir: str = field(
+        default="checkpoints", metadata={"help": "path to save checkpoints"}
+    )
+    restore_file: str = field(
+        default="checkpoint_last.pt",
+        metadata={
+            "help": "filename from which to load checkpoint "
+            "(default: <save-dir>/checkpoint_last.pt"
+        },
+    )
+    finetune_from_model: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "finetune from a pretrained model; note that meters and lr scheduler will be reset"
+        },
+    )
+    reset_dataloader: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, does not reload dataloader state from the checkpoint"
+        },
+    )
+    reset_lr_scheduler: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, does not load lr scheduler state from the checkpoint"
+        },
+    )
+    reset_meters: bool = field(
+        default=False,
+        metadata={"help": "if set, does not load meters from the checkpoint"},
+    )
+    reset_optimizer: bool = field(
+        default=False,
+        metadata={"help": "if set, does not load optimizer state from the checkpoint"},
+    )
+    optimizer_overrides: str = field(
+        default="{}",
+        metadata={
+            "help": "a dictionary used to override optimizer args when loading a checkpoint"
+        },
+    )
+    save_interval: int = field(
+        default=1, metadata={"help": "save a checkpoint every N epochs"}
+    )
+    save_interval_updates: int = field(
+        default=0, metadata={"help": "save a checkpoint (and validate) every N updates"}
+    )
+    keep_interval_updates: int = field(
+        default=-1,
+        metadata={
+            "help": "keep the last N checkpoints saved with --save-interval-updates"
+        },
+    )
+    keep_interval_updates_pattern: int = field(
+        default=-1,
+        metadata={
+            "help": "when used with --keep-interval-updates, skips deleting "
+                    "any checkpoints with update X where "
+                    "X %% keep_interval_updates_pattern == 0"
+        },
+    )
+    keep_last_epochs: int = field(
+        default=-1, metadata={"help": "keep last N epoch checkpoints"}
+    )
+    keep_best_checkpoints: int = field(
+        default=-1, metadata={"help": "keep best N checkpoints based on scores"}
+    )
+    no_save: bool = field(
+        default=False, metadata={"help": "don't save models or checkpoints"}
+    )
+    no_epoch_checkpoints: bool = field(
+        default=False, metadata={"help": "only store last and best checkpoints"}
+    )
+    no_last_checkpoints: bool = field(
+        default=False, metadata={"help": "don't store last checkpoints"}
+    )
+    no_save_optimizer_state: bool = field(
+        default=False,
+        metadata={"help": "don't save optimizer-state as part of checkpoint"},
+    )
+    best_checkpoint_metric: str = field(
+        default="loss", metadata={"help": 'metric to use for saving "best" checkpoints'}
+    )
+    maximize_best_checkpoint_metric: bool = field(
+        default=False,
+        metadata={
+            "help": 'select the largest metric value for saving "best" checkpoints'
+        },
+    )
+    patience: int = field(
+        default=-1,
+        metadata={
+            "help": (
+                "early stop training if valid performance doesn't "
+                "improve for N consecutive validation runs; note "
+                "that this is influenced by --validate-interval"
+            )
+        },
+    )
+    checkpoint_suffix: str = field(
+        default="", metadata={"help": "suffix to add to the checkpoint file name"}
+    )
+    checkpoint_shard_count: int = field(
+        default=1,
+        metadata={
+            "help": "Number of shards containing the checkpoint - "
+            "if the checkpoint is over 300GB, it is preferable "
+            "to split it into shards to prevent OOM on CPU while loading "
+            "the checkpoint"
+        },
+    )
+    load_checkpoint_on_all_dp_ranks: bool = field(
+        default=False,
+        metadata={
+            "help": "load checkpoints on all data parallel devices "
+            "(default: only load on rank 0 and broadcast to other devices)"
+        },
+    )
+    write_checkpoints_asynchronously: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Write checkpoints asynchronously in a separate "
+                "thread. NOTE: This feature is currently being tested."
+            ),
+            "argparse_alias": "--save-async",
+        },
+    )
+    model_parallel_size: int = II("common.model_parallel_size")
+    use_ema_weights_to_init_param: bool = field(
+        default=False,
+        metadata={
+            "help": "if the checkpoint has ema weights, then use it to init the model param"
+            "(default: false, use noema weights to init the model param)"
+        },
+    )
+    use_latest_weights_to_init_ema: bool = field(
+        default=False,
+        metadata={
+            "help": "if the model has ema params, then force to use the latest weights in the ckpt to init the ema param, even ema weights exist in the ckpt"
+            "(default: false, use ema weights (if exist) to init the ema param)"
+        },
+    )
+
+
+@dataclass
+class FairseqBMUFConfig(FairseqDataclass):
+    block_lr: float = field(
+        default=1, metadata={"help": "block learning rate for bmuf"}
+    )
+    block_momentum: float = field(
+        default=0.875, metadata={"help": "block momentum for bmuf"}
+    )
+    global_sync_iter: int = field(
+        default=50, metadata={"help": "Iteration for syncing global model"}
+    )
+    warmup_iterations: int = field(
+        default=500, metadata={"help": "warmup iterations for model to broadcast"}
+    )
+    use_nbm: bool = field(
+        default=False,
+        metadata={"help": "Specify whether you want to use classical BM / Nesterov BM"},
+    )
+    average_sync: bool = field(
+        default=False,
+        metadata={
+            "help": "Specify whether you want to average the local momentum after each sync"
+        },
+    )
+    distributed_world_size: int = II("distributed_training.distributed_world_size")
+
+
+@dataclass
+class GenerationConfig(FairseqDataclass):
+    beam: int = field(
+        default=5, metadata={"help": "beam size"},
+    )
+    nbest: int = field(
+        default=1, metadata={"help": "number of hypotheses to output"},
+    )
+    max_len_a: float = field(
+        default=0,
+        metadata={
+            "help": "generate sequences of maximum length ax + b, where x is the source length"
+        },
+    )
+    max_len_b: int = field(
+        default=200,
+        metadata={
+            "help": "generate sequences of maximum length ax + b, where x is the source length"
+        },
+    )
+    min_len: int = field(
+        default=1, metadata={"help": "minimum generation length"},
+    )
+    match_source_len: bool = field(
+        default=False, metadata={"help": "generations should match the source length"},
+    )
+    unnormalized: bool = field(
+        default=False, metadata={"help": "compare unnormalized hypothesis scores"},
+    )
+    no_early_stop: bool = field(
+        default=False, metadata={"help": "deprecated"},
+    )
+    no_beamable_mm: bool = field(
+        default=False, metadata={"help": "don't use BeamableMM in attention layers"},
+    )
+    lenpen: float = field(
+        default=1,
+        metadata={
+            "help": "length penalty: <1.0 favors shorter, >1.0 favors longer sentences"
+        },
+    )
+    unkpen: float = field(
+        default=0,
+        metadata={
+            "help": "unknown word penalty: <0 produces more unks, >0 produces fewer"
+        },
+    )
+    replace_unk: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "perform unknown replacement (optionally with alignment dictionary)",
+            "argparse_const": "@@ ",
+        },
+    )
+    sacrebleu: bool = field(
+        default=False, metadata={"help": "score with sacrebleu"},
+    )
+    score_reference: bool = field(
+        default=False, metadata={"help": "just score the reference translation"},
+    )
+    prefix_size: int = field(
+        default=0,
+        metadata={"help": "initialize generation by target prefix of given length"},
+    )
+    no_repeat_ngram_size: int = field(
+        default=0,
+        metadata={
+            "help": "ngram blocking such that this size ngram cannot be repeated in the generation"
+        },
+    )
+    sampling: bool = field(
+        default=False,
+        metadata={"help": "sample hypotheses instead of using beam search"},
+    )
+    sampling_topk: int = field(
+        default=-1,
+        metadata={"help": "sample from top K likely next words instead of all words"},
+    )
+    sampling_topp: float = field(
+        default=-1.0,
+        metadata={
+            "help": "sample from the smallest set whose cumulative probability mass exceeds p for next words"
+        },
+    )
+    constraints: Optional[GENERATION_CONSTRAINTS_CHOICES] = field(
+        default=None,
+        metadata={
+            "help": "enables lexically constrained decoding",
+            "argparse_const": "ordered",
+        },
+    )
+    temperature: float = field(
+        default=1.0, metadata={"help": "temperature for generation"},
+    )
+    diverse_beam_groups: int = field(
+        default=-1, metadata={"help": "number of groups for Diverse Beam Search"},
+    )
+    diverse_beam_strength: float = field(
+        default=0.5,
+        metadata={"help": "strength of diversity penalty for Diverse Beam Search"},
+    )
+    diversity_rate: float = field(
+        default=-1.0,
+        metadata={"help": "strength of diversity penalty for Diverse Siblings Search"},
+    )
+    print_alignment: Optional[PRINT_ALIGNMENT_CHOICES] = field(
+        default=None,
+        metadata={
+            "help": "if set, uses attention feedback to compute and print alignment to source tokens "
+            "(valid options are: hard, soft, otherwise treated as hard alignment)",
+            "argparse_const": "hard",
+        },
+    )
+    print_step: bool = field(
+        default=False, metadata={"help": "print steps"},
+    )
+    lm_path: Optional[str] = field(
+        default=None, metadata={"help": "path to lm checkpoint for lm fusion"},
+    )
+    lm_weight: float = field(
+        default=0.0, metadata={"help": "weight for lm probs for lm fusion"},
+    )
+
+    # arguments for iterative refinement generator
+    iter_decode_eos_penalty: float = field(
+        default=0.0,
+        metadata={"help": "if > 0.0, it penalized early-stopping in decoding."},
+    )
+    iter_decode_max_iter: int = field(
+        default=10, metadata={"help": "maximum iterations for iterative refinement."},
+    )
+    iter_decode_force_max_iter: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, run exact the maximum number of iterations without early stop"
+        },
+    )
+    iter_decode_with_beam: int = field(
+        default=1,
+        metadata={
+            "help": "if > 1, model will generate translations varying by the lengths."
+        },
+    )
+    iter_decode_with_external_reranker: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, the last checkpoint are assumed to be a reranker to rescore the translations"
+        },
+    )
+    retain_iter_history: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, decoding returns the whole history of iterative refinement"
+        },
+    )
+    retain_dropout: bool = field(
+        default=False, metadata={"help": "Use dropout at inference time"},
+    )
+    # temporarily set to Any until https://github.com/facebookresearch/hydra/issues/1117 is fixed
+    # retain_dropout_modules: Optional[List[str]] = field(
+    retain_dropout_modules: Any = field(
+        default=None,
+        metadata={
+            "help": "if set, only retain dropout for the specified modules; "
+            "if not set, then dropout will be retained for all modules"
+        },
+    )
+    # special decoding format for advanced decoding.
+    decoding_format: Optional[GENERATION_DECODING_FORMAT_CHOICES] = field(
+        default=None,
+        metadata={"help": "special decoding format for advanced decoding."},
+    )
+    no_seed_provided: bool = field(
+        default=False,
+        metadata={"help": "if set, dont use seed for initializing random generators"},
+    )
+
+
+@dataclass
+class CommonEvalConfig(FairseqDataclass):
+    path: Optional[str] = field(
+        default=None, metadata={"help": "path(s) to model file(s), colon separated"},
+    )
+    post_process: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": (
+                "post-process text by removing BPE, letter segmentation, etc. "
+                "Valid options can be found in fairseq.data.utils.post_process."
+            ),
+            "argparse_const": "subword_nmt",
+            "argparse_alias": "--remove-bpe",
+        },
+    )
+    quiet: bool = field(default=False, metadata={"help": "only print final scores"})
+    model_overrides: str = field(
+        default="{}",
+        metadata={
+            "help": "a dictionary used to override model args at generation that were used during model training"
+        },
+    )
+    results_path: Optional[str] = field(
+        default=None, metadata={"help": "path to save eval results (optional)"}
+    )
+
+
+@dataclass
+class EvalLMConfig(FairseqDataclass):
+    output_word_probs: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, outputs words and their predicted log probabilities to standard output"
+        },
+    )
+    output_word_stats: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, outputs word statistics such as word count, average probability, etc"
+        },
+    )
+    context_window: int = field(
+        default=0,
+        metadata={
+            "help": "ensures that every evaluated token has access to a context of at least this size, if possible"
+        },
+    )
+    softmax_batch: int = field(
+        default=sys.maxsize,
+        metadata={
+            "help": "if BxT is more than this, will batch the softmax over vocab to this amount of tokens, in order to fit into GPU memory"
+        },
+    )
+
+
+@dataclass
+class InteractiveConfig(FairseqDataclass):
+    buffer_size: int = field(
+        default=0,
+        metadata={
+            "help": "read this many sentences into a buffer before processing them"
+        },
+    )
+    input: str = field(
+        default="-", metadata={"help": "file to read from; use - for stdin"},
+    )
+
+
+@dataclass
+class EMAConfig(FairseqDataclass):
+    store_ema: bool = field(
+        default=False, metadata={
+            help: "store exponential moving average shadow model"
+        }
+    )
+    ema_decay: float = field(
+        default=0.9999, metadata={
+            "help": 'decay for exponential moving average model'
+        }
+    )
+    ema_start_update : int = field(
+        default=0, metadata={"help": "start EMA update after this many model updates"}
+    )
+    ema_seed_model : Optional[str] = field(
+        default=None, metadata={
+            "help": "Seed to load EMA model from. "
+            "Used to load EMA model separately from the actual model."
+        }
+    )
+    ema_update_freq : int = field(
+        default=1, metadata={"help": "Do EMA update every this many model updates"}
+    )
+    ema_fp32: bool = field(
+        default=False,
+        metadata={"help": "If true, store EMA model in fp32 even if model is in fp16"},
+    )
+
+
+@dataclass
+class FairseqConfig(FairseqDataclass):
+    common: CommonConfig = CommonConfig()
+    common_eval: CommonEvalConfig = CommonEvalConfig()
+    distributed_training: DistributedTrainingConfig = DistributedTrainingConfig()
+    dataset: DatasetConfig = DatasetConfig()
+    optimization: OptimizationConfig = OptimizationConfig()
+    checkpoint: CheckpointConfig = CheckpointConfig()
+    bmuf: FairseqBMUFConfig = FairseqBMUFConfig()
+    generation: GenerationConfig = GenerationConfig()
+    eval_lm: EvalLMConfig = EvalLMConfig()
+    interactive: InteractiveConfig = InteractiveConfig()
+    model: Any = MISSING
+    task: Any = None
+    criterion: Any = None
+    optimizer: Any = None
+    lr_scheduler: Any = None
+    scoring: Any = None
+    bpe: Any = None
+    tokenizer: Any = None
+    ema: EMAConfig = EMAConfig()
diff --git a/fairseq/fairseq/dataclass/constants.py b/fairseq/fairseq/dataclass/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f159cfe9ac72b0524228fe290181c6898787265
--- /dev/null
+++ b/fairseq/fairseq/dataclass/constants.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from enum import Enum, EnumMeta
+from typing import List
+
+
+class StrEnumMeta(EnumMeta):
+    # this is workaround for submitit pickling leading to instance checks failing in hydra for StrEnum, see
+    # https://github.com/facebookresearch/hydra/issues/1156
+    @classmethod
+    def __instancecheck__(cls, other):
+        return "enum" in str(type(other))
+
+
+class StrEnum(Enum, metaclass=StrEnumMeta):
+    def __str__(self):
+        return self.value
+
+    def __eq__(self, other: str):
+        return self.value == other
+
+    def __repr__(self):
+        return self.value
+
+    def __hash__(self):
+        return hash(str(self))
+
+
+def ChoiceEnum(choices: List[str]):
+    """return the Enum class used to enforce list of choices"""
+    return StrEnum("Choices", {k: k for k in choices})
+
+
+LOG_FORMAT_CHOICES = ChoiceEnum(["json", "none", "simple", "tqdm"])
+DDP_BACKEND_CHOICES = ChoiceEnum([
+    "c10d",  # alias for pytorch_ddp
+    "fully_sharded",  # FullyShardedDataParallel from fairscale
+    "legacy_ddp",
+    "no_c10d",  # alias for legacy_ddp
+    "pytorch_ddp",
+    "slow_mo",
+])
+DDP_COMM_HOOK_CHOICES = ChoiceEnum(["none", "fp16"])
+DATASET_IMPL_CHOICES = ChoiceEnum(["raw", "lazy", "cached", "mmap", "fasta", "huffman"])
+GENERATION_CONSTRAINTS_CHOICES = ChoiceEnum(["ordered", "unordered"])
+GENERATION_DECODING_FORMAT_CHOICES = ChoiceEnum(
+    ["unigram", "ensemble", "vote", "dp", "bs"]
+)
+ZERO_SHARDING_CHOICES = ChoiceEnum(["none", "os"])
+PIPELINE_CHECKPOINT_CHOICES = ChoiceEnum(["always", "never", "except_last"])
+PRINT_ALIGNMENT_CHOICES = ChoiceEnum(["hard", "soft"])
diff --git a/fairseq/fairseq/dataclass/initialize.py b/fairseq/fairseq/dataclass/initialize.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f6cbafb805b293611e2175721132078123b81d0
--- /dev/null
+++ b/fairseq/fairseq/dataclass/initialize.py
@@ -0,0 +1,61 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import logging
+from hydra.core.config_store import ConfigStore
+from fairseq.dataclass.configs import FairseqConfig
+from omegaconf import DictConfig, OmegaConf
+
+
+logger = logging.getLogger(__name__)
+
+
+def hydra_init(cfg_name="config") -> None:
+
+    cs = ConfigStore.instance()
+    cs.store(name=f"{cfg_name}", node=FairseqConfig)
+
+    for k in FairseqConfig.__dataclass_fields__:
+        v = FairseqConfig.__dataclass_fields__[k].default
+        try:
+            cs.store(name=k, node=v)
+        except BaseException:
+            logger.error(f"{k} - {v}")
+            raise
+
+
+def add_defaults(cfg: DictConfig) -> None:
+    """This function adds default values that are stored in dataclasses that hydra doesn't know about """
+
+    from fairseq.registry import REGISTRIES
+    from fairseq.tasks import TASK_DATACLASS_REGISTRY
+    from fairseq.models import ARCH_MODEL_NAME_REGISTRY, MODEL_DATACLASS_REGISTRY
+    from fairseq.dataclass.utils import merge_with_parent
+    from typing import Any
+
+    OmegaConf.set_struct(cfg, False)
+
+    for k, v in FairseqConfig.__dataclass_fields__.items():
+        field_cfg = cfg.get(k)
+        if field_cfg is not None and v.type == Any:
+            dc = None
+
+            if isinstance(field_cfg, str):
+                field_cfg = DictConfig({"_name": field_cfg})
+                field_cfg.__dict__["_parent"] = field_cfg.__dict__["_parent"]
+
+            name = getattr(field_cfg, "_name", None)
+
+            if k == "task":
+                dc = TASK_DATACLASS_REGISTRY.get(name)
+            elif k == "model":
+                name = ARCH_MODEL_NAME_REGISTRY.get(name, name)
+                dc = MODEL_DATACLASS_REGISTRY.get(name)
+            elif k in REGISTRIES:
+                dc = REGISTRIES[k]["dataclass_registry"].get(name)
+
+            if dc is not None:
+                cfg[k] = merge_with_parent(dc, field_cfg)
diff --git a/fairseq/fairseq/dataclass/utils.py b/fairseq/fairseq/dataclass/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..1320ec473756c78ec949f72f9260420c19caff0f
--- /dev/null
+++ b/fairseq/fairseq/dataclass/utils.py
@@ -0,0 +1,493 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ast
+import inspect
+import logging
+import os
+import re
+from argparse import ArgumentError, ArgumentParser, Namespace
+from dataclasses import _MISSING_TYPE, MISSING, is_dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple, Type
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.configs import FairseqConfig
+from hydra.core.global_hydra import GlobalHydra
+from hydra.experimental import compose, initialize
+from omegaconf import DictConfig, OmegaConf, open_dict, _utils
+
+logger = logging.getLogger(__name__)
+
+
+def eval_str_list(x, x_type=float):
+    if x is None:
+        return None
+    if isinstance(x, str):
+        if len(x) == 0:
+            return []
+        x = ast.literal_eval(x)
+    try:
+        return list(map(x_type, x))
+    except TypeError:
+        return [x_type(x)]
+
+
+def interpret_dc_type(field_type):
+    if isinstance(field_type, str):
+        raise RuntimeError("field should be a type")
+
+    if field_type == Any:
+        return str
+
+    typestring = str(field_type)
+    if re.match(
+        r"(typing.|^)Union\[(.*), NoneType\]$", typestring
+    ) or typestring.startswith("typing.Optional"):
+        return field_type.__args__[0]
+    return field_type
+
+
+def gen_parser_from_dataclass(
+    parser: ArgumentParser,
+    dataclass_instance: FairseqDataclass,
+    delete_default: bool = False,
+    with_prefix: Optional[str] = None,
+) -> None:
+    """
+        convert a dataclass instance to tailing parser arguments.
+
+        If `with_prefix` is provided, prefix all the keys in the resulting parser with it. It means that we are
+        building a flat namespace from a structured dataclass (see transformer_config.py for example).
+    """
+
+    def argparse_name(name: str):
+        if name == "data" and (with_prefix is None or with_prefix == ''):
+            # normally data is positional args, so we don't add the -- nor the prefix
+            return name
+        if name == "_name":
+            # private member, skip
+            return None
+        full_name = "--" + name.replace("_", "-")
+        if with_prefix is not None and with_prefix != '':
+            # if a prefix is specified, construct the prefixed arg name
+            full_name = with_prefix + "-" + full_name[2:]  # strip -- when composing
+        return full_name
+
+    def get_kwargs_from_dc(
+        dataclass_instance: FairseqDataclass, k: str
+    ) -> Dict[str, Any]:
+        """k: dataclass attributes"""
+
+        kwargs = {}
+
+        field_type = dataclass_instance._get_type(k)
+        inter_type = interpret_dc_type(field_type)
+
+        field_default = dataclass_instance._get_default(k)
+
+        if isinstance(inter_type, type) and issubclass(inter_type, Enum):
+            field_choices = [t.value for t in list(inter_type)]
+        else:
+            field_choices = None
+
+        field_help = dataclass_instance._get_help(k)
+        field_const = dataclass_instance._get_argparse_const(k)
+
+        if isinstance(field_default, str) and field_default.startswith("${"):
+            kwargs["default"] = field_default
+        else:
+            if field_default is MISSING:
+                kwargs["required"] = True
+            if field_choices is not None:
+                kwargs["choices"] = field_choices
+            if (
+                isinstance(inter_type, type)
+                and (issubclass(inter_type, List) or issubclass(inter_type, Tuple))
+            ) or ("List" in str(inter_type) or "Tuple" in str(inter_type)):
+                if "int" in str(inter_type):
+                    kwargs["type"] = lambda x: eval_str_list(x, int)
+                elif "float" in str(inter_type):
+                    kwargs["type"] = lambda x: eval_str_list(x, float)
+                elif "str" in str(inter_type):
+                    kwargs["type"] = lambda x: eval_str_list(x, str)
+                else:
+                    raise NotImplementedError(
+                        "parsing of type " + str(inter_type) + " is not implemented"
+                    )
+                if field_default is not MISSING:
+                    kwargs["default"] = (
+                        ",".join(map(str, field_default))
+                        if field_default is not None
+                        else None
+                    )
+            elif (
+                isinstance(inter_type, type) and issubclass(inter_type, Enum)
+            ) or "Enum" in str(inter_type):
+                kwargs["type"] = str
+                if field_default is not MISSING:
+                    if isinstance(field_default, Enum):
+                        kwargs["default"] = field_default.value
+                    else:
+                        kwargs["default"] = field_default
+            elif inter_type is bool:
+                kwargs["action"] = (
+                    "store_false" if field_default is True else "store_true"
+                )
+                kwargs["default"] = field_default
+            else:
+                kwargs["type"] = inter_type
+                if field_default is not MISSING:
+                    kwargs["default"] = field_default
+
+        # build the help with the hierarchical prefix
+        if with_prefix is not None and with_prefix != '' and field_help is not None:
+            field_help = with_prefix[2:] + ': ' + field_help
+
+        kwargs["help"] = field_help
+        if field_const is not None:
+            kwargs["const"] = field_const
+            kwargs["nargs"] = "?"
+
+        return kwargs
+
+    for k in dataclass_instance._get_all_attributes():
+        field_name = argparse_name(dataclass_instance._get_name(k))
+        field_type = dataclass_instance._get_type(k)
+        if field_name is None:
+            continue
+        elif inspect.isclass(field_type) and issubclass(field_type, FairseqDataclass):
+            # for fields that are of type FairseqDataclass, we can recursively
+            # add their fields to the namespace (so we add the args from model, task, etc. to the root namespace)
+            prefix = None
+            if with_prefix is not None:
+                # if a prefix is specified, then we don't want to copy the subfields directly to the root namespace
+                # but we prefix them with the name of the current field.
+                prefix = field_name
+            gen_parser_from_dataclass(parser, field_type(), delete_default, prefix)
+            continue
+
+        kwargs = get_kwargs_from_dc(dataclass_instance, k)
+
+        field_args = [field_name]
+        alias = dataclass_instance._get_argparse_alias(k)
+        if alias is not None:
+            field_args.append(alias)
+
+        if "default" in kwargs:
+            if isinstance(kwargs["default"], str) and kwargs["default"].startswith(
+                "${"
+            ):
+                if kwargs["help"] is None:
+                    # this is a field with a name that will be added elsewhere
+                    continue
+                else:
+                    del kwargs["default"]
+            if delete_default and "default" in kwargs:
+                del kwargs["default"]
+        try:
+            parser.add_argument(*field_args, **kwargs)
+        except ArgumentError:
+            pass
+
+
+def _set_legacy_defaults(args, cls):
+    """Helper to set default arguments based on *add_args*."""
+    if not hasattr(cls, "add_args"):
+        return
+
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        argument_default=argparse.SUPPRESS, allow_abbrev=False
+    )
+    cls.add_args(parser)
+    # copied from argparse.py:
+    defaults = argparse.Namespace()
+    for action in parser._actions:
+        if action.dest is not argparse.SUPPRESS:
+            if not hasattr(defaults, action.dest):
+                if action.default is not argparse.SUPPRESS:
+                    setattr(defaults, action.dest, action.default)
+    for key, default_value in vars(defaults).items():
+        if not hasattr(args, key):
+            setattr(args, key, default_value)
+
+
+def _override_attr(
+    sub_node: str, data_class: Type[FairseqDataclass], args: Namespace
+) -> List[str]:
+    overrides = []
+
+    if not inspect.isclass(data_class) or not issubclass(data_class, FairseqDataclass):
+        return overrides
+
+    def get_default(f):
+        if not isinstance(f.default_factory, _MISSING_TYPE):
+            return f.default_factory()
+        return f.default
+
+    for k, v in data_class.__dataclass_fields__.items():
+        if k.startswith("_"):
+            # private member, skip
+            continue
+
+        val = get_default(v) if not hasattr(args, k) else getattr(args, k)
+
+        field_type = interpret_dc_type(v.type)
+        if (
+            isinstance(val, str)
+            and not val.startswith("${")  # not interpolation
+            and field_type != str
+            and (
+                not inspect.isclass(field_type) or not issubclass(field_type, Enum)
+            )  # not choices enum
+        ):
+            # upgrade old models that stored complex parameters as string
+            val = ast.literal_eval(val)
+
+        if isinstance(val, tuple):
+            val = list(val)
+
+        v_type = getattr(v.type, "__origin__", None)
+        if (
+            (v_type is List or v_type is list or v_type is Optional)
+            # skip interpolation
+            and not (isinstance(val, str) and val.startswith("${"))
+        ):
+            # if type is int but val is float, then we will crash later - try to convert here
+            if hasattr(v.type, "__args__"):
+                t_args = v.type.__args__
+                if len(t_args) == 1 and (t_args[0] is float or t_args[0] is int):
+                    val = list(map(t_args[0], val))
+        elif val is not None and (
+            field_type is int or field_type is bool or field_type is float
+        ):
+            try:
+                val = field_type(val)
+            except:
+                pass  # ignore errors here, they are often from interpolation args
+
+        if val is None:
+            overrides.append("{}.{}=null".format(sub_node, k))
+        elif val == "":
+            overrides.append("{}.{}=''".format(sub_node, k))
+        elif isinstance(val, str):
+            val = val.replace("'", r"\'")
+            overrides.append("{}.{}='{}'".format(sub_node, k, val))
+        elif isinstance(val, FairseqDataclass):
+            overrides += _override_attr(f"{sub_node}.{k}", type(val), args)
+        elif isinstance(val, Namespace):
+            sub_overrides, _ = override_module_args(val)
+            for so in sub_overrides:
+                overrides.append(f"{sub_node}.{k}.{so}")
+        else:
+            overrides.append("{}.{}={}".format(sub_node, k, val))
+
+    return overrides
+
+
+def migrate_registry(
+    name, value, registry, args, overrides, deletes, use_name_as_val=False
+):
+    if value in registry:
+        overrides.append("{}={}".format(name, value))
+        overrides.append("{}._name={}".format(name, value))
+        overrides.extend(_override_attr(name, registry[value], args))
+    elif use_name_as_val and value is not None:
+        overrides.append("{}={}".format(name, value))
+    else:
+        deletes.append(name)
+
+
+def override_module_args(args: Namespace) -> Tuple[List[str], List[str]]:
+    """use the field in args to overrides those in cfg"""
+    overrides = []
+    deletes = []
+
+    for k in FairseqConfig.__dataclass_fields__.keys():
+        overrides.extend(
+            _override_attr(k, FairseqConfig.__dataclass_fields__[k].type, args)
+        )
+
+    if args is not None:
+        if hasattr(args, "task"):
+            from fairseq.tasks import TASK_DATACLASS_REGISTRY
+
+            migrate_registry(
+                "task", args.task, TASK_DATACLASS_REGISTRY, args, overrides, deletes
+            )
+        else:
+            deletes.append("task")
+
+        # these options will be set to "None" if they have not yet been migrated
+        # so we can populate them with the entire flat args
+        CORE_REGISTRIES = {"criterion", "optimizer", "lr_scheduler"}
+
+        from fairseq.registry import REGISTRIES
+
+        for k, v in REGISTRIES.items():
+            if hasattr(args, k):
+                migrate_registry(
+                    k,
+                    getattr(args, k),
+                    v["dataclass_registry"],
+                    args,
+                    overrides,
+                    deletes,
+                    use_name_as_val=k not in CORE_REGISTRIES,
+                )
+            else:
+                deletes.append(k)
+
+        no_dc = True
+        if hasattr(args, "arch"):
+            from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_MODEL_NAME_REGISTRY
+
+            if args.arch in ARCH_MODEL_REGISTRY:
+                m_cls = ARCH_MODEL_REGISTRY[args.arch]
+                dc = getattr(m_cls, "__dataclass", None)
+                if dc is not None:
+                    m_name = ARCH_MODEL_NAME_REGISTRY[args.arch]
+                    overrides.append("model={}".format(m_name))
+                    overrides.append("model._name={}".format(args.arch))
+                    # override model params with those exist in args
+                    overrides.extend(_override_attr("model", dc, args))
+                    no_dc = False
+        if no_dc:
+            deletes.append("model")
+
+    return overrides, deletes
+
+
+class omegaconf_no_object_check:
+    def __init__(self):
+        self.old_is_primitive = _utils.is_primitive_type
+
+    def __enter__(self):
+        _utils.is_primitive_type = lambda _: True
+
+    def __exit__(self, type, value, traceback):
+        _utils.is_primitive_type = self.old_is_primitive
+
+
+def convert_namespace_to_omegaconf(args: Namespace) -> DictConfig:
+    """Convert a flat argparse.Namespace to a structured DictConfig."""
+
+    # Here we are using field values provided in args to override counterparts inside config object
+    overrides, deletes = override_module_args(args)
+
+    # configs will be in fairseq/config after installation
+    config_path = os.path.join("..", "config")
+
+    GlobalHydra.instance().clear()
+
+    with initialize(config_path=config_path):
+        try:
+            composed_cfg = compose("config", overrides=overrides, strict=False)
+        except:
+            logger.error("Error when composing. Overrides: " + str(overrides))
+            raise
+
+        for k in deletes:
+            composed_cfg[k] = None
+
+    cfg = OmegaConf.create(
+        OmegaConf.to_container(composed_cfg, resolve=True, enum_to_str=True)
+    )
+
+    # hack to be able to set Namespace in dict config. this should be removed when we update to newer
+    # omegaconf version that supports object flags, or when we migrate all existing models
+    from omegaconf import _utils
+
+    with omegaconf_no_object_check():
+        if cfg.task is None and getattr(args, "task", None):
+            cfg.task = Namespace(**vars(args))
+            from fairseq.tasks import TASK_REGISTRY
+
+            _set_legacy_defaults(cfg.task, TASK_REGISTRY[args.task])
+            cfg.task._name = args.task
+        if cfg.model is None and getattr(args, "arch", None):
+            cfg.model = Namespace(**vars(args))
+            from fairseq.models import ARCH_MODEL_REGISTRY
+
+            _set_legacy_defaults(cfg.model, ARCH_MODEL_REGISTRY[args.arch])
+            cfg.model._name = args.arch
+        if cfg.optimizer is None and getattr(args, "optimizer", None):
+            cfg.optimizer = Namespace(**vars(args))
+            from fairseq.optim import OPTIMIZER_REGISTRY
+
+            _set_legacy_defaults(cfg.optimizer, OPTIMIZER_REGISTRY[args.optimizer])
+            cfg.optimizer._name = args.optimizer
+        if cfg.lr_scheduler is None and getattr(args, "lr_scheduler", None):
+            cfg.lr_scheduler = Namespace(**vars(args))
+            from fairseq.optim.lr_scheduler import LR_SCHEDULER_REGISTRY
+
+            _set_legacy_defaults(
+                cfg.lr_scheduler, LR_SCHEDULER_REGISTRY[args.lr_scheduler]
+            )
+            cfg.lr_scheduler._name = args.lr_scheduler
+        if cfg.criterion is None and getattr(args, "criterion", None):
+            cfg.criterion = Namespace(**vars(args))
+            from fairseq.criterions import CRITERION_REGISTRY
+
+            _set_legacy_defaults(cfg.criterion, CRITERION_REGISTRY[args.criterion])
+            cfg.criterion._name = args.criterion
+
+    OmegaConf.set_struct(cfg, True)
+    return cfg
+
+
+def overwrite_args_by_name(cfg: DictConfig, overrides: Dict[str, any]):
+    # this will be deprecated when we get rid of argparse and model_overrides logic
+
+    from fairseq.registry import REGISTRIES
+
+    with open_dict(cfg):
+        for k in cfg.keys():
+            # "k in cfg" will return false if its a "mandatory value (e.g. ???)"
+            if k in cfg and isinstance(cfg[k], DictConfig):
+                if k in overrides and isinstance(overrides[k], dict):
+                    for ok, ov in overrides[k].items():
+                        if isinstance(ov, dict) and cfg[k][ok] is not None:
+                            overwrite_args_by_name(cfg[k][ok], ov)
+                        else:
+                            cfg[k][ok] = ov
+                else:
+                    overwrite_args_by_name(cfg[k], overrides)
+            elif k in cfg and isinstance(cfg[k], Namespace):
+                for override_key, val in overrides.items():
+                    setattr(cfg[k], override_key, val)
+            elif k in overrides:
+                if (
+                    k in REGISTRIES
+                    and overrides[k] in REGISTRIES[k]["dataclass_registry"]
+                ):
+                    cfg[k] = DictConfig(
+                        REGISTRIES[k]["dataclass_registry"][overrides[k]]
+                    )
+                    overwrite_args_by_name(cfg[k], overrides)
+                    cfg[k]._name = overrides[k]
+                else:
+                    cfg[k] = overrides[k]
+
+
+def merge_with_parent(dc: FairseqDataclass, cfg: DictConfig, remove_missing=True):
+    if remove_missing:
+
+        if is_dataclass(dc):
+            target_keys = set(dc.__dataclass_fields__.keys())
+        else:
+            target_keys = set(dc.keys())
+
+        with open_dict(cfg):
+            for k in list(cfg.keys()):
+                if k not in target_keys:
+                    del cfg[k]
+
+    merged_cfg = OmegaConf.merge(dc, cfg)
+    merged_cfg.__dict__["_parent"] = cfg.__dict__["_parent"]
+    OmegaConf.set_struct(merged_cfg, True)
+    return merged_cfg
diff --git a/fairseq/fairseq/distributed/__init__.py b/fairseq/fairseq/distributed/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0b96b734c4b5e7cd5d295238d0764c05093dc27
--- /dev/null
+++ b/fairseq/fairseq/distributed/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .distributed_timeout_wrapper import DistributedTimeoutWrapper
+from .fully_sharded_data_parallel import fsdp_enable_wrap, fsdp_wrap, FullyShardedDataParallel
+from .legacy_distributed_data_parallel import LegacyDistributedDataParallel
+from .module_proxy_wrapper import ModuleProxyWrapper
+from .tpu_distributed_data_parallel import TPUDistributedDataParallel
+
+
+__all__ = [
+    "DistributedTimeoutWrapper",
+    "fsdp_enable_wrap",
+    "fsdp_wrap",
+    "FullyShardedDataParallel",
+    "LegacyDistributedDataParallel",
+    "ModuleProxyWrapper",
+    "TPUDistributedDataParallel",
+]
diff --git a/fairseq/fairseq/distributed/distributed_timeout_wrapper.py b/fairseq/fairseq/distributed/distributed_timeout_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..18107ef27ea837b8c72dcaa49db18fd8e64267b1
--- /dev/null
+++ b/fairseq/fairseq/distributed/distributed_timeout_wrapper.py
@@ -0,0 +1,94 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import signal
+import threading
+
+from torch import nn
+
+
+logger = logging.getLogger(__name__)
+
+
+class DistributedTimeoutWrapper(nn.Module):
+    """
+    A wrapper that kills the process if no progress is made within a given
+    *timeout*. The timer is reset every time :func:`forward` is called.
+
+    Usage::
+
+        module = DistributedTimeoutWrapper(module, timeout=30)
+        x = module(input)
+        time.sleep(20)  # safe
+        x = module(input)
+        time.sleep(45)  # job will be killed before this returns
+
+    Args:
+        module (nn.Module): module to wrap
+        timeout (int): number of seconds before killing the process
+            (set to a value <= 0 to disable the timeout)
+        signal (Optional): signal to send once timeout is triggered
+    """
+    def __init__(self, module: nn.Module, timeout: int, signal=signal.SIGINT):
+        super().__init__()
+        self.module = module
+        self.timeout = timeout
+        self.signal = signal
+
+        if timeout > 0:
+            self._heartbeat = threading.Event()
+            self._heartbeat_thread = threading.Thread(
+                target=self._check_heartbeat,
+                args=(os.getpid(),),
+                daemon=True,
+            )
+            self._heartbeat_thread.start()
+            self._terminated = False
+        else:
+            self._heartbeat = None
+            self._heartbeat_thread = None
+
+    def __del__(self):
+        self.stop_timeout()
+
+    def __getattr__(self, name):
+        """Forward missing attributes to wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.module, name)
+
+    def stop_timeout(self):
+        if self._heartbeat_thread is not None:
+            self._terminated = True
+            self._heartbeat_thread.join()
+
+    def state_dict(self, *args, **kwargs):
+        return self.module.state_dict(*args, **kwargs)
+
+    def load_state_dict(self, *args, **kwargs):
+        return self.module.load_state_dict(*args, **kwargs)
+
+    def forward(self, *args, **kwargs):
+        if self._heartbeat is not None:
+            self._heartbeat.set()
+        return self.module(*args, **kwargs)
+
+    def _check_heartbeat(self, parent_pid):
+        self._heartbeat.wait()  # wait for the first forward pass
+        while True:
+            self._heartbeat.clear()
+            success = self._heartbeat.wait(timeout=self.timeout)
+            if self._terminated:
+                break
+            elif not success:
+                logger.error((
+                    "Killing job for not making progress in {} seconds. "
+                    "Set --heartbeat-timeout=-1 to disable this timeout."
+                ).format(int(self.timeout)))
+                os.kill(parent_pid, self.signal)
+                return
diff --git a/fairseq/fairseq/distributed/fully_sharded_data_parallel.py b/fairseq/fairseq/distributed/fully_sharded_data_parallel.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a96bfc76516682ac8e2b7e2c3bc2e6aa3d8ef0c
--- /dev/null
+++ b/fairseq/fairseq/distributed/fully_sharded_data_parallel.py
@@ -0,0 +1,135 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+from typing import Optional
+
+import torch
+from fairseq.dataclass.configs import DistributedTrainingConfig
+from fairseq.distributed import utils as dist_utils
+
+
+try:
+    from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
+
+    has_FSDP = True
+except ImportError:
+    FSDP = torch.nn.Module
+    has_FSDP = False
+
+
+class FullyShardedDataParallel(FSDP):
+    """
+    A small wrapper around fairscale's FullyShardedDataParallel (FSDP) with some
+    fairseq-specific checkpoint saving/loading logic.
+
+    Args:
+        use_sharded_state (bool): if True, then ``state_dict`` will return
+            ``FSDP.local_state_dict`` and ``load_state_dict`` will call
+            ``FSDP.load_local_state_dict``. Otherwise, ``state_dict`` will
+            return the full model weights on data parallel rank 0 (empty on
+            other ranks) and ``load_state_dict`` will broadcast model weights
+            from rank 0 to other ranks.
+    """
+
+    def __init__(self, *args, use_sharded_state: bool = False, **kwargs):
+        if not has_FSDP:
+            raise ImportError(
+                "Cannot find FullyShardedDataParallel. "
+                "Please install fairscale with: pip install fairscale"
+            )
+        super().__init__(*args, **kwargs)
+        self.use_sharded_state = use_sharded_state
+
+    @property
+    def unwrapped_module(self) -> torch.nn.Module:
+        if self.flatten_parameters:
+            return self.module.module
+        else:
+            return self.module
+
+    def state_dict(self, destination=None, prefix="", keep_vars=False):
+        if self.use_sharded_state:
+            return super().local_state_dict(
+                destination=destination, prefix=prefix, keep_vars=keep_vars
+            )
+        else:
+            if self.rank == 0:
+                return super().state_dict(
+                    destination=destination, prefix=prefix, keep_vars=keep_vars
+                )
+            else:
+                # We must call state_dict() due to use of communication
+                # primitives. But we don't use the result.
+                super().state_dict()
+                return destination or {}
+
+    def load_state_dict(self, state_dict, strict=True, model_cfg=None):
+        if self.use_sharded_state:
+            return super().load_local_state_dict(state_dict, strict=strict)
+        else:
+            state_dict = dist_utils.broadcast_object(
+                state_dict, src_rank=0, group=self.process_group
+            )
+            return super().load_state_dict(state_dict, strict=strict)
+
+
+@contextlib.contextmanager
+def fsdp_enable_wrap(cfg: DistributedTrainingConfig):
+    try:
+        from fairscale.nn import enable_wrap
+    except ImportError:
+        raise ImportError(
+            "Cannot find FullyShardedDataParallel. "
+            "Please install fairscale with: pip install fairscale"
+        )
+    if cfg.memory_efficient_fp16:
+        assert cfg.fp16  # memory_efficient_fp16 should imply fp16
+    group = dist_utils.get_data_parallel_group()
+    if group is None and cfg.distributed_world_size == 1:
+        from fairscale.utils.testing import DummyProcessGroup
+
+        group = DummyProcessGroup(rank=0, size=1)
+    fsdp_config = {
+        "process_group": group,
+        "reshard_after_forward": not cfg.no_reshard_after_forward,
+        "mixed_precision": cfg.fp16 and not cfg.memory_efficient_fp16,
+        "fp32_reduce_scatter": cfg.fp32_reduce_scatter,
+        "flatten_parameters": True,
+        "cpu_offload": cfg.cpu_offload,
+        "compute_dtype": torch.float16 if cfg.fp16 else torch.float32,
+        "bucket_cap_mb": cfg.bucket_cap_mb,
+        "state_dict_device": torch.device("cpu"),  # reduce GPU mem usage
+    }
+    with enable_wrap(
+        wrapper_cls=FullyShardedDataParallel,
+        use_sharded_state=cfg.use_sharded_state,
+        **fsdp_config,
+    ):
+        yield
+
+
+def fsdp_wrap(module, min_num_params: Optional[int] = None, **kwargs):
+    """
+    Helper to wrap layers/modules in FSDP. This falls back to a no-op if
+    fairscale is not available.
+
+    Args:
+        module (nn.Module): module to (maybe) wrap
+        min_num_params (int, Optional): minimum number of layer params to wrap
+    """
+    try:
+        from fairscale.nn import wrap
+
+        if min_num_params is not None:
+            num_params = sum(p.numel() for p in module.parameters())
+            if num_params >= min_num_params:
+                return wrap(module, **kwargs)
+            else:
+                return module
+        else:
+            return wrap(module, **kwargs)
+    except ImportError:
+        return module
diff --git a/fairseq/fairseq/distributed/legacy_distributed_data_parallel.py b/fairseq/fairseq/distributed/legacy_distributed_data_parallel.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2308f87c5233625a3fe1b27104f5ead003ae3cb
--- /dev/null
+++ b/fairseq/fairseq/distributed/legacy_distributed_data_parallel.py
@@ -0,0 +1,165 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+A modified version of the legacy DistributedDataParallel module that uses c10d
+communication primitives. This version is simpler than the latest PyTorch
+version and is useful for debugging. Notably it does not overlap gradient
+communication with the backward pass, which makes it slower but more robust
+than the PyTorch version.
+
+This version also supports the *no_sync* context manager, which allows faster
+training with `--update-freq`.
+"""
+
+from collections import OrderedDict
+from contextlib import contextmanager
+
+import torch
+from torch import nn
+
+from fairseq.distributed import utils
+
+
+class LegacyDistributedDataParallel(nn.Module):
+    """Implements distributed data parallelism at the module level.
+
+    A simplified version of :class:`torch.nn.parallel.DistributedDataParallel`.
+    This version uses a c10d process group for communication and does not
+    broadcast buffers.
+
+    Args:
+        module (~torch.nn.Module): module to be parallelized
+        process_group: the c10d process group to be used for distributed data
+            parallel all-reduction.
+        buffer_size (int, optional): number of elements to buffer before
+            performing all-reduce (default: 256M).
+    """
+
+    def __init__(self, module, process_group, buffer_size=2 ** 28):
+        super().__init__()
+
+        self.module = module
+        self.process_group = process_group
+        self.world_size = utils.get_world_size(self.process_group)
+
+        # Never use a bigger buffer than the number of model params
+        self.buffer_size = min(buffer_size, sum(p.numel() for p in module.parameters()))
+        self.buffer = None
+
+        # We can also forcibly accumulate grads locally and only do the
+        # all-reduce at some later time
+        self.accumulate_grads = False
+
+        # make per-device lists of parameters
+        paramlists = OrderedDict()
+        for param in self.module.parameters():
+            device = param.device
+            if paramlists.get(device) is None:
+                paramlists[device] = []
+            paramlists[device] += [param]
+        self.per_device_params = list(paramlists.values())
+
+    @contextmanager
+    def no_sync(self):
+        """A context manager to disable gradient synchronization."""
+        old_accumulate_grads = self.accumulate_grads
+        self.accumulate_grads = True
+        yield
+        self.accumulate_grads = old_accumulate_grads
+
+    def forward(self, *inputs, **kwargs):
+        return self.module(*inputs, **kwargs)
+
+    def all_reduce_grads(self):
+        """
+        This function must be called explicitly after backward to reduce
+        gradients. There is no automatic hook like c10d.
+        """
+
+        def all_reduce_params(params):
+            buffer = self.buffer
+            nonzero_buffer = False
+            if len(params) > 1:
+                offset = 0
+                for p in params:
+                    sz = p.numel()
+                    if p.grad is not None:
+                        buffer[offset : offset + sz].copy_(p.grad.data.view(-1))
+                        nonzero_buffer = True
+                    else:
+                        buffer[offset : offset + sz].zero_()
+                    offset += sz
+            else:
+                # we only have a single grad to all-reduce
+                p = params[0]
+                if p.grad is not None:
+                    buffer = p.grad.data
+                    nonzero_buffer = True
+                elif p.numel() <= self.buffer.numel():
+                    buffer = buffer[: p.numel()]
+                    buffer.zero_()
+                else:
+                    buffer = torch.zeros_like(p)
+
+            if nonzero_buffer:
+                buffer.div_(self.world_size)
+
+            utils.all_reduce(buffer, self.process_group)
+
+            # copy all-reduced grads back into their original place
+            offset = 0
+            for p in params:
+                sz = p.numel()
+                if p.grad is not None:
+                    p.grad.data.copy_(buffer[offset : offset + sz].view_as(p))
+                else:
+                    p.grad = buffer[offset : offset + sz].view_as(p).clone()
+                offset += sz
+
+        def reduction_fn():
+            # This function only needs to be called once
+            if self.accumulate_grads:
+                return
+
+            if self.buffer is None:
+                self.buffer = next(self.module.parameters()).new(self.buffer_size)
+
+            for params in self.per_device_params:
+                # All-reduce the gradients in buckets
+                offset = 0
+                buffered_params = []
+                for param in params:
+                    if not param.requires_grad:
+                        continue
+                    if param.grad is None:
+                        param.grad = torch.zeros_like(param)
+
+                    if hasattr(param, 'expert'):
+                        # Skip gradient sync for unshared parameters
+                        continue
+
+                    if param.grad.requires_grad:
+                        raise RuntimeError(
+                            "DistributedDataParallel only works "
+                            "with gradients that don't require "
+                            "grad"
+                        )
+                    sz = param.numel()
+                    if sz > self.buffer.numel():
+                        # all-reduce big params directly
+                        all_reduce_params([param])
+                    else:
+                        if offset + sz > self.buffer.numel():
+                            all_reduce_params(buffered_params)
+                            offset = 0
+                            buffered_params.clear()
+                        buffered_params.append(param)
+                        offset += sz
+
+                if len(buffered_params) > 0:
+                    all_reduce_params(buffered_params)
+
+        reduction_fn()
diff --git a/fairseq/fairseq/distributed/module_proxy_wrapper.py b/fairseq/fairseq/distributed/module_proxy_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc2c6f8c718f2ac8ece308e50f7ba74a05474f4a
--- /dev/null
+++ b/fairseq/fairseq/distributed/module_proxy_wrapper.py
@@ -0,0 +1,55 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from torch import nn
+
+
+class ModuleProxyWrapper(nn.Module):
+    """
+    Wrap a DistributedDataParallel module and forward requests for missing
+    attributes to the module wrapped by DDP (the twice-wrapped module).
+    Also forward calls to :func:`state_dict` and :func:`load_state_dict`.
+
+    Usage::
+
+        module.xyz = "hello world"
+        wrapped_module = DistributedDataParallel(module, **ddp_args)
+        wrapped_module = ModuleProxyWrapper(wrapped_module)
+        assert wrapped_module.xyz == "hello world"
+        assert wrapped_module.state_dict().keys() == module.state_dict().keys()
+
+    Args:
+        module (nn.Module): module to wrap
+    """
+
+    def __init__(self, module: nn.Module):
+        super().__init__()
+        assert hasattr(module, "module"), \
+            "ModuleProxyWrapper expects input to wrap another module"
+        self.module = module
+
+    def __getattr__(self, name):
+        """Forward missing attributes to twice-wrapped module."""
+        try:
+            # defer to nn.Module's logic
+            return super().__getattr__(name)
+        except AttributeError:
+            try:
+                # forward to the once-wrapped module
+                return getattr(self.module, name)
+            except AttributeError:
+                # forward to the twice-wrapped module
+                return getattr(self.module.module, name)
+
+    def state_dict(self, *args, **kwargs):
+        """Forward to the twice-wrapped module."""
+        return self.module.module.state_dict(*args, **kwargs)
+
+    def load_state_dict(self, *args, **kwargs):
+        """Forward to the twice-wrapped module."""
+        return self.module.module.load_state_dict(*args, **kwargs)
+
+    def forward(self, *args, **kwargs):
+        return self.module(*args, **kwargs)
diff --git a/fairseq/fairseq/distributed/tpu_distributed_data_parallel.py b/fairseq/fairseq/distributed/tpu_distributed_data_parallel.py
new file mode 100644
index 0000000000000000000000000000000000000000..e971cf07c57c4e864726781092a690dd4d7d3e46
--- /dev/null
+++ b/fairseq/fairseq/distributed/tpu_distributed_data_parallel.py
@@ -0,0 +1,43 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from torch import nn
+
+from fairseq.distributed import utils
+
+
+class TPUDistributedDataParallel(nn.Module):
+
+    def __init__(self, module, process_group):
+        super().__init__()
+        self.module = module
+        self.process_group = process_group
+        self.world_size = utils.get_world_size(self.process_group)
+
+    def forward(self, *inputs, **kwargs):
+        return self.module(*inputs, **kwargs)
+
+    def all_reduce_grads(self):
+        gradients = []
+        for p in self.parameters():
+            if not p.requires_grad:
+                continue
+            if p.grad is None:
+                p.grad = torch.zeros_like(p)
+            if p.grad.requires_grad:
+                raise RuntimeError(
+                    "TPUDistributedDataParallel only works with gradients that don't "
+                    "require grad"
+                )
+            gradients.append(p.grad)
+
+        import torch_xla.core.xla_model as xm
+        xm.all_reduce(
+            'sum',
+            gradients,
+            scale=1. / self.world_size,
+            groups=self.process_group[1],
+        )
diff --git a/fairseq/fairseq/distributed/utils.py b/fairseq/fairseq/distributed/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbf318e7035603c1294eb45af7e98097df36289d
--- /dev/null
+++ b/fairseq/fairseq/distributed/utils.py
@@ -0,0 +1,826 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import io
+import logging
+import os
+import pickle
+import random
+import socket
+import struct
+import subprocess
+import warnings
+from argparse import Namespace
+from collections import OrderedDict
+from dataclasses import dataclass
+from typing import Any, Dict, List, Mapping, Optional
+import sys
+import time
+
+import torch
+import torch.distributed as dist
+from fairseq.dataclass.configs import DistributedTrainingConfig, FairseqConfig
+from omegaconf import open_dict
+
+try:
+    import torch_xla.core.xla_model as xm
+except ImportError:
+    xm = None
+
+
+# Flag to indicate if we're using Megatron
+# NOTE: this is a temporary hack until we move away from Megatron's model parallel init
+_USE_MEGATRON = False
+
+# Whether to use XLA ops (e.g., on TPUs) instead of CUDA ops.
+_USE_XLA = False
+
+
+logger = logging.getLogger(__name__)
+
+
+def is_master(cfg: DistributedTrainingConfig):
+    return cfg.distributed_rank == 0
+
+
+def infer_init_method(cfg: DistributedTrainingConfig, force_distributed=False):
+    if cfg.distributed_init_method is not None or cfg.tpu:
+        return
+
+    num_pipelines_per_node = None
+    if cfg.pipeline_model_parallel:
+        num_pipeline_devices, num_pipelines_per_node = _pipeline_parallel_pre_init(cfg)
+
+    if all(
+        key in os.environ
+        for key in ["MASTER_ADDR", "MASTER_PORT", "WORLD_SIZE", "RANK"]
+    ):
+        # support torch.distributed.launch
+        _infer_torch_distributed_launch_init(cfg)
+    elif cfg.distributed_port > 0:
+        # we can determine the init method automatically for Slurm
+        _infer_slurm_init(cfg, num_pipelines_per_node)
+    elif cfg.distributed_world_size > 1 or force_distributed:
+        # fallback for single node with multiple GPUs
+        _infer_single_node_init(cfg)
+
+    if cfg.pipeline_model_parallel:
+        _pipeline_parallel_post_init(cfg, num_pipeline_devices, num_pipelines_per_node)
+    elif not cfg.distributed_no_spawn:
+        with open_dict(cfg):
+            cfg.distributed_num_procs = min(
+                torch.cuda.device_count(), cfg.distributed_world_size
+            )
+
+
+def _infer_torch_distributed_launch_init(cfg: DistributedTrainingConfig):
+    cfg.distributed_init_method = "env://"
+    cfg.distributed_world_size = int(os.environ["WORLD_SIZE"])
+    cfg.distributed_rank = int(os.environ["RANK"])
+    # processes are created by torch.distributed.launch
+    cfg.distributed_no_spawn = True
+
+
+def _infer_slurm_init(cfg: DistributedTrainingConfig, num_pipelines_per_node):
+    node_list = os.environ.get("SLURM_STEP_NODELIST")
+    if node_list is None:
+        node_list = os.environ.get("SLURM_JOB_NODELIST")
+    if node_list is not None:
+        try:
+            hostnames = subprocess.check_output(
+                ["scontrol", "show", "hostnames", node_list]
+            )
+            cfg.distributed_init_method = "tcp://{host}:{port}".format(
+                host=hostnames.split()[0].decode("utf-8"),
+                port=cfg.distributed_port,
+            )
+            nnodes = int(os.environ.get("SLURM_NNODES"))
+            ntasks_per_node = os.environ.get("SLURM_NTASKS_PER_NODE")
+            if ntasks_per_node is not None:
+                ntasks_per_node = int(ntasks_per_node)
+            else:
+                ntasks = int(os.environ.get("SLURM_NTASKS"))
+                nnodes = int(os.environ.get("SLURM_NNODES"))
+                assert ntasks % nnodes == 0
+                ntasks_per_node = int(ntasks / nnodes)
+            if ntasks_per_node == 1:
+                gpus_per_node = torch.cuda.device_count()
+                node_id = int(os.environ.get("SLURM_NODEID"))
+                cfg.distributed_rank = node_id * gpus_per_node
+                cfg.distributed_world_size = nnodes * gpus_per_node
+            elif cfg.pipeline_model_parallel:
+                assert ntasks_per_node == num_pipelines_per_node, (
+                    "SLURM --ntasks-per-node must match number of pipelines per "
+                    "node (={})".format(num_pipelines_per_node)
+                )
+                cfg.distributed_no_spawn = True
+                # For 4-way MP on nodes with 8 GPUs, ranks will be [0, 1] on
+                # the first node, [1, 2] on the second node, etc. This
+                # matches torch.distributed.launch.
+                node_id = int(os.environ.get("SLURM_NODEID"))
+                local_id = int(os.environ.get("SLURM_LOCALID"))
+                cfg.distributed_rank = node_id * num_pipelines_per_node + local_id
+                # In the above example, device_id will always be in [0, 1],
+                # which also matches torch.distributed.launch.
+                cfg.device_id = local_id
+                # We also want to set distributed_world_size to be the total
+                # number of pipelines across all nodes.
+                cfg.distributed_world_size = nnodes * num_pipelines_per_node
+            else:
+                assert ntasks_per_node == cfg.distributed_world_size // nnodes
+                cfg.distributed_no_spawn = True
+                cfg.distributed_rank = int(os.environ.get("SLURM_PROCID"))
+                cfg.device_id = int(os.environ.get("SLURM_LOCALID"))
+        except subprocess.CalledProcessError as e:  # scontrol failed
+            raise e
+        except FileNotFoundError:  # Slurm is not installed
+            pass
+
+
+def _infer_single_node_init(cfg: DistributedTrainingConfig):
+    assert (
+        cfg.distributed_world_size <= torch.cuda.device_count()
+    ), f"world size is {cfg.distributed_world_size} but have {torch.cuda.device_count()} available devices"
+    port = random.randint(10000, 20000)
+    cfg.distributed_init_method = "tcp://localhost:{port}".format(port=port)
+
+
+def _pipeline_parallel_pre_init(cfg: DistributedTrainingConfig):
+    from fairseq import utils
+
+    balance_exists = (
+        cfg.pipeline_balance is not None
+        or cfg.pipeline_encoder_balance is not None
+        or cfg.pipeline_decoder_balance is not None
+    )
+    devices_exist = (
+        cfg.pipeline_devices is not None
+        or cfg.pipeline_encoder_devices is not None
+        or cfg.pipeline_decoder_devices is not None
+    )
+    if not balance_exists:
+        raise ValueError(
+            "--pipeline-balance is currently required for pipeline model parallelism"
+        )
+    if not devices_exist:
+        raise ValueError(
+            "--pipeline-devices is currently required for pipeline model parallelism"
+        )
+
+    cfg.pipeline_balance = utils.eval_str_list(cfg.pipeline_balance, type=int)
+    if cfg.pipeline_devices is not None:
+        cfg.pipeline_devices = utils.eval_str_list(cfg.pipeline_devices, type=int)
+        num_pipeline_devices = len(set(cfg.pipeline_devices))
+    else:
+        cfg.pipeline_encoder_devices = utils.eval_str_list(
+            cfg.pipeline_encoder_devices, type=int
+        )
+        cfg.pipeline_decoder_devices = utils.eval_str_list(
+            cfg.pipeline_decoder_devices, type=int
+        )
+        num_pipeline_devices = len(
+            set(cfg.pipeline_encoder_devices + cfg.pipeline_decoder_devices)
+        )
+    gpus_per_node = torch.cuda.device_count()
+    assert (
+        gpus_per_node >= num_pipeline_devices
+        and gpus_per_node % num_pipeline_devices == 0
+    ), (
+        "the number of unique device IDs in --pipeline-devices must evenly divide "
+        "the number of GPUs per node (multi-node pipelining is not yet supported)"
+    )
+    num_pipelines_per_node = gpus_per_node // num_pipeline_devices
+    return num_pipeline_devices, num_pipelines_per_node
+
+
+def _pipeline_parallel_post_init(
+    cfg: DistributedTrainingConfig, num_pipeline_devices, num_pipelines_per_node
+):
+    if not cfg.distributed_no_spawn:
+        # When distributed_no_spawn is False, we expect distributed_rank and
+        # distributed_world_size to be based on the total number of GPUs, so
+        # we need to correct them to be based on the number of pipelines.
+        assert cfg.distributed_world_size % num_pipeline_devices == 0
+        cfg.distributed_world_size = (
+            cfg.distributed_world_size // num_pipeline_devices
+        )
+        # In the case of 4-way MP on nodes with 8 GPUs, we want
+        # distributed_rank to be the starting GPU index for each pipeline
+        # i.e., 0, 2, ...
+        gpus_per_node = torch.cuda.device_count()
+        assert cfg.distributed_rank % gpus_per_node == 0
+        assert cfg.distributed_rank % num_pipeline_devices == 0
+
+        with open_dict(cfg):
+            cfg.distributed_rank = cfg.distributed_rank // num_pipeline_devices
+            # launch one process per pipeline
+            cfg.distributed_num_procs = num_pipelines_per_node
+
+    # if we have 4-way MP on a node with 8 GPUs, we want device_ids to be 0
+    # and 4, indicating the starting device IDs for each pipeline
+    cfg.device_id *= num_pipeline_devices
+
+    if cfg.device_id > 0:
+        # if there's multiple pipelines on a node (e.g., 4-way MP on an 8
+        # GPU node), we need to adjust pipeline_devices accordingly
+        logger.debug(
+            "setting CUDA device={} on rank {}".format(
+                cfg.device_id, cfg.distributed_rank
+            )
+        )
+        torch.cuda.set_device(cfg.device_id)
+        with open_dict(cfg):
+            cfg.pipeline_devices = [cfg.device_id + d for d in cfg.pipeline_devices]
+        logger.info(
+            "setting pipeline_devices={} on rank {}".format(
+                cfg.pipeline_devices, cfg.distributed_rank
+            )
+        )
+
+
+def distributed_init(cfg: FairseqConfig):
+    if isinstance(cfg, Namespace):
+        from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    if not cfg.common.tpu:
+        if torch.distributed.is_available() and torch.distributed.is_initialized():
+            warnings.warn(
+                "Distributed is already initialized, cannot initialize twice!"
+            )
+        else:
+            logger.info(
+                "distributed init (rank {}): {}".format(
+                    cfg.distributed_training.distributed_rank,
+                    cfg.distributed_training.distributed_init_method,
+                )
+            )
+            logger.info('Start init')
+            max_time_wait = 600
+            for i in range(max_time_wait):
+                try:
+                    dist.init_process_group(
+                        backend=cfg.distributed_training.distributed_backend,
+                        init_method=cfg.distributed_training.distributed_init_method,
+                        world_size=cfg.distributed_training.distributed_world_size,
+                        rank=cfg.distributed_training.distributed_rank,
+                    )
+                    logger.info(
+                        "initialized host {} as rank {}".format(
+                            socket.gethostname(),
+                            cfg.distributed_training.distributed_rank,
+                        )
+                    )
+                    if torch.distributed.is_initialized():
+                        print("single-machine distributed training is initialized.")
+                        break
+                except ValueError:
+                    # This is caused by TCPStore failure.
+                    print('Retry: {}, with value error {}'.format(
+                        i + 1, sys.exc_info()[0]))
+                    time.sleep(5)
+                    if i == max_time_wait - 1:
+                        print('k8s resource wait too long time')
+                        exit(-1)
+                except Exception:
+                    print('Retry: {}, with value error {}'.format(
+                        i + 1, sys.exc_info()[0]))
+                    exit(-1)
+            # perform a dummy all-reduce to initialize the NCCL communicator
+            if torch.cuda.is_available():
+                dist.all_reduce(torch.zeros(1).cuda())
+
+        cfg.distributed_training.distributed_rank = torch.distributed.get_rank()
+    else:
+        assert xm.xrt_world_size() == cfg.distributed_training.distributed_world_size
+        global _USE_XLA
+        _USE_XLA = True
+        cfg.distributed_training.device_id = xm.get_local_ordinal()
+        cfg.distributed_training.distributed_rank = xm.get_ordinal()
+        xm.rendezvous("distributed_init")  # wait for all workers
+
+    if is_master(cfg.distributed_training):
+        logging.getLogger().setLevel(logging.INFO)
+    else:
+        logging.getLogger().setLevel(logging.WARNING)
+
+    if cfg.common.model_parallel_size > 1:
+        try:
+            from fairseq.model_parallel.megatron.mpu import (
+                initialize_model_parallel,
+                model_parallel_cuda_manual_seed,
+            )
+        except ImportError:
+            raise ImportError(
+                "\n\nPlease install the megatron submodule:"
+                "\n\n  git submodule update --init "
+                "fairseq/model_parallel/megatron"
+            )
+        global _USE_MEGATRON
+        _USE_MEGATRON = True
+        initialize_model_parallel(cfg.common.model_parallel_size)
+        model_parallel_cuda_manual_seed(cfg.common.seed)
+        model_part_number = get_model_parallel_rank()
+        cfg.checkpoint.checkpoint_suffix += "-model_part-{0}".format(model_part_number)
+
+    if hasattr(cfg,  "model") and getattr(cfg.model, "base_layers", 0) > 0:
+        cfg.checkpoint.checkpoint_suffix = f"-rank-{cfg.distributed_training.distributed_rank}"
+
+    return cfg.distributed_training.distributed_rank
+
+
+def distributed_main(i, main, cfg: FairseqConfig, kwargs):
+    cfg.distributed_training.device_id = i
+    if torch.cuda.is_available() and not cfg.common.cpu and not cfg.common.tpu:
+        torch.cuda.set_device(cfg.distributed_training.device_id)
+    if cfg.distributed_training.distributed_rank is None:  # torch.multiprocessing.spawn
+        cfg.distributed_training.distributed_rank = kwargs.pop("start_rank", 0) + i
+
+    cfg.distributed_training.distributed_rank = distributed_init(cfg)
+
+    after_distributed_init_fn = kwargs.pop("after_distributed_init_fn", None)
+    if after_distributed_init_fn:
+        cfg = after_distributed_init_fn(cfg)
+
+    main(cfg, **kwargs)
+
+    if torch.distributed.is_initialized():
+        torch.distributed.barrier(get_global_group())
+
+
+def call_main(cfg: FairseqConfig, main, **kwargs):
+    if cfg.distributed_training.distributed_init_method is None:
+        infer_init_method(cfg.distributed_training)
+
+    if cfg.distributed_training.distributed_init_method is not None:
+        # distributed training
+        if not cfg.distributed_training.distributed_no_spawn:
+            start_rank = cfg.distributed_training.distributed_rank
+            cfg.distributed_training.distributed_rank = None  # assign automatically
+            kwargs["start_rank"] = start_rank
+            torch.multiprocessing.spawn(
+                fn=distributed_main,
+                args=(main, cfg, kwargs),
+                nprocs=min(
+                    torch.cuda.device_count(),
+                    cfg.distributed_training.distributed_world_size,
+                ),
+                join=True,
+            )
+        else:
+            distributed_main(cfg.distributed_training.device_id, main, cfg, kwargs)
+    elif cfg.common.tpu and cfg.distributed_training.distributed_world_size > 1:
+        import torch_xla.distributed.xla_multiprocessing as xmp
+
+        torch.multiprocessing.set_sharing_strategy("file_system")
+        xmp.spawn(
+            fn=distributed_main,
+            args=(main, cfg, kwargs),
+            # tpu-comment:
+            #   8 devices in one TPU VM, is the max processes to be spawned.
+            #   The rest is driven by xm.distributed.xla_dist
+            nprocs=min(cfg.distributed_training.distributed_world_size, 8),
+        )
+    else:
+        # single GPU main
+        main(cfg, **kwargs)
+
+
+def use_xla():
+    global _USE_XLA
+    return _USE_XLA
+
+
+def new_groups(grouped_ranks: List[List[int]]):
+    if use_xla():
+        return ("tpu", grouped_ranks)
+    else:
+        groups = [dist.new_group(g) for g in grouped_ranks]
+        my_group_idx = _find_my_group_index(grouped_ranks)
+        return groups[my_group_idx]
+
+
+def _find_my_group_index(grouped_ranks):
+    my_rank = get_global_rank()
+    for i, group in enumerate(grouped_ranks):
+        if my_rank in group:
+            return i
+    raise RuntimeError
+
+
+def _find_my_group(grouped_ranks):
+    index = _find_my_group_index(grouped_ranks)
+    return grouped_ranks[index]
+
+
+def get_rank(group):
+    if use_xla():
+        assert group[0] == "tpu"
+        my_group = _find_my_group(group[1])
+        return my_group.index(get_global_rank())
+    else:
+        return dist.get_rank(group=group)
+
+
+def get_world_size(group):
+    if use_xla():
+        assert group[0] == "tpu"
+        my_group = _find_my_group(group[1])
+        return len(my_group)
+    elif torch.distributed.is_initialized():
+        return dist.get_world_size(group=group)
+    else:
+        return 1
+
+
+def get_global_group():
+    if use_xla():
+        return new_groups([list(range(get_global_world_size()))])
+    elif torch.distributed.is_initialized():
+        if not hasattr(get_global_group, "_global_group"):
+            # ideally we could use torch.distributed.group.WORLD, but it seems
+            # to cause random NCCL hangs in some cases
+            get_global_group._global_group = dist.new_group()
+        return get_global_group._global_group
+    else:
+        return None
+
+
+def get_global_rank():
+    if use_xla():
+        return xm.get_ordinal()
+    elif torch.distributed.is_initialized():
+        return torch.distributed.get_rank()
+    else:
+        return 0
+
+
+def get_global_world_size():
+    if use_xla():
+        return xm.xrt_world_size()
+    elif torch.distributed.is_initialized():
+        return torch.distributed.get_world_size()
+    else:
+        return 1
+
+
+def get_data_parallel_group():
+    """Get the data parallel group the caller rank belongs to."""
+    global _USE_MEGATRON
+    if _USE_MEGATRON:
+        from fairseq.model_parallel.megatron import mpu
+
+        return mpu.get_data_parallel_group()
+    else:
+        return get_global_group()
+
+
+def get_data_parallel_rank():
+    """Return my rank for the data parallel group."""
+    return get_rank(get_data_parallel_group())
+
+
+def get_data_parallel_world_size():
+    """Return world size for the data parallel group."""
+    return get_world_size(get_data_parallel_group())
+
+
+def get_model_parallel_group():
+    global _USE_MEGATRON
+    if _USE_MEGATRON:
+        from fairseq.model_parallel.megatron import mpu
+
+        return mpu.get_model_parallel_group()
+    else:
+        return None
+
+
+def get_model_parallel_rank():
+    """Return my rank for the model parallel group."""
+    return get_rank(get_model_parallel_group())
+
+
+def get_model_parallel_world_size():
+    """Return world size for the model parallel group."""
+    return get_world_size(get_model_parallel_group())
+
+
+def all_reduce(tensor, group, op="sum"):
+    if use_xla():
+        assert isinstance(group, tuple) and group[0] == "tpu"
+        tensor = [tensor]  # wrap in a list to make xm.all_reduce in-place
+        return xm.all_reduce(op, tensor, groups=group[1])[0]
+    else:
+        if op == "sum":
+            op = dist.ReduceOp.SUM
+        elif op == "max":
+            op = dist.ReduceOp.MAX
+        else:
+            raise NotImplementedError
+        dist.all_reduce(tensor, op=op, group=group)
+        return tensor
+
+
+def broadcast(tensor, src, group):
+    if use_xla():
+        # XLA doesn't support broadcast, hack it with all_reduce
+        if get_rank(group) != src:
+            tensor.zero_()
+        all_reduce(tensor, group)
+    else:
+        dist.broadcast(tensor, src=src, group=group)
+
+
+def all_to_all(tensor, group):
+    """Perform an all-to-all operation on a 1D Tensor."""
+    assert tensor.dim() == 1
+    split_count = get_world_size(group=group)
+    assert tensor.numel() % split_count == 0
+    if use_xla():
+        assert isinstance(group, tuple) and group[0] == "tpu"
+        return xm.all_to_all(
+            tensor,
+            split_dimension=0,
+            concat_dimension=0,
+            split_count=split_count,
+            groups=group[1],
+        )
+    else:
+        output = torch.zeros_like(tensor)
+        dist.all_to_all_single(output, tensor, group=group)
+        return output
+
+
+def all_gather(tensor, group, return_tensor=False):
+    """Perform an all-gather operation."""
+    if use_xla():
+        result = xm.all_gather(tensor, groups=group[1])
+        world_size = get_world_size(group=group)
+        result = result.view(world_size, *tensor.size())
+        if return_tensor:
+            return result
+        else:
+            return [result[i] for i in range(world_size)]
+    else:
+        world_size = get_world_size(group=group)
+        rank = get_rank(group=group)
+        tensor_list = [
+            tensor if i == rank else torch.empty_like(tensor) for i in range(world_size)
+        ]
+        dist.all_gather(tensor_list, tensor, group=group)
+        if return_tensor:
+            return torch.stack(tensor_list, dim=0)
+        else:
+            return tensor_list
+
+
+def all_gather_list(data, group=None, max_size=16384):
+    """Gathers arbitrary data from all nodes into a list.
+
+    Similar to :func:`~torch.distributed.all_gather` but for arbitrary Python
+    data. Note that *data* must be picklable and any CUDA tensors will be moved
+    to CPU and returned on CPU as well.
+
+    Args:
+        data (Any): data from the local worker to be gathered on other workers
+        group: group of the collective
+        max_size (int, optional): maximum size of the data to be gathered
+            across workers
+    """
+    from fairseq import utils
+
+    if group is None:
+        group = get_global_group()
+    torch.distributed.barrier(group=group)
+    rank = get_rank(group=group)
+    world_size = get_world_size(group=group)
+
+    buffer_size = max_size * world_size
+    if (
+        not hasattr(all_gather_list, "_buffer")
+        or all_gather_list._buffer.numel() < buffer_size
+    ):
+        all_gather_list._buffer = torch.cuda.ByteTensor(buffer_size)
+        all_gather_list._cpu_buffer = torch.ByteTensor(max_size).pin_memory()
+    buffer = all_gather_list._buffer
+    buffer.zero_()
+    cpu_buffer = all_gather_list._cpu_buffer
+
+    data = utils.move_to_cpu(data)
+    enc = pickle.dumps(data)
+    enc_size = len(enc)
+    header_size = 4  # size of header that contains the length of the encoded data
+    size = header_size + enc_size
+    if size > max_size:
+        raise ValueError(
+            "encoded data size ({}) exceeds max_size ({})".format(size, max_size)
+        )
+
+    header = struct.pack(">I", enc_size)
+    cpu_buffer[:size] = torch.ByteTensor(list(header + enc))
+    start = rank * max_size
+    buffer[start : start + size].copy_(cpu_buffer[:size])
+
+    all_reduce(buffer, group=group)
+
+    buffer = buffer.cpu()
+    try:
+        result = []
+        for i in range(world_size):
+            out_buffer = buffer[i * max_size : (i + 1) * max_size]
+            (enc_size,) = struct.unpack(">I", bytes(out_buffer[:header_size].tolist()))
+            if enc_size > 0:
+                result.append(
+                    pickle.loads(
+                        bytes(out_buffer[header_size : header_size + enc_size].tolist())
+                    )
+                )
+        return result
+    except pickle.UnpicklingError:
+        raise Exception(
+            "Unable to unpickle data from other workers. all_gather_list requires all "
+            "workers to enter the function together, so this error usually indicates "
+            "that the workers have fallen out of sync somehow. Workers can fall out of "
+            "sync if one of them runs out of memory, or if there are other conditions "
+            "in your training script that can cause one worker to finish an epoch "
+            "while other workers are still iterating over their portions of the data. "
+            "Try rerunning with --ddp-backend=legacy_ddp and see if that helps."
+        )
+
+
+def all_reduce_dict(data: Mapping[str, Any], device, group) -> Dict[str, Any]:
+    """
+    AllReduce a dictionary of values across workers. We separately
+    reduce items that are already on the device and items on CPU for
+    better performance.
+
+    Args:
+        data (Mapping[str, Any]): dictionary of data to all-reduce, but
+            cannot be a nested dictionary
+        device (torch.device): device for the reduction
+        group: group of the collective
+    """
+    data_keys = list(data.keys())
+
+    # We want to separately reduce items that are already on the
+    # device and items on CPU for performance reasons.
+    cpu_data = OrderedDict()
+    device_data = OrderedDict()
+    for k in data_keys:
+        t = data[k]
+        if not torch.is_tensor(t):
+            cpu_data[k] = torch.tensor(t, dtype=torch.double)
+        elif t.device.type != device.type:
+            cpu_data[k] = t.to(dtype=torch.double)
+        else:
+            device_data[k] = t.to(dtype=torch.double)
+
+    def _all_reduce_dict(data: OrderedDict):
+        if len(data) == 0:
+            return data
+        buf = torch.cat([t.view(-1) for t in data.values()]).to(device=device)
+        all_reduce(buf, group=group)
+        split_buf = torch.split(buf, [t.numel() for t in data.values()])
+        reduced_data = [t.view_as(orig) for t, orig in zip(split_buf, data.values())]
+        return OrderedDict(zip(data.keys(), reduced_data))
+
+    cpu_data = _all_reduce_dict(cpu_data)
+    device_data = _all_reduce_dict(device_data)
+
+    def get_from_stack(key):
+        if key in cpu_data:
+            return cpu_data[key]
+        elif key in device_data:
+            return device_data[key]
+        raise KeyError
+
+    return OrderedDict([(key, get_from_stack(key)) for key in data_keys])
+
+
+def broadcast_tensors(
+    tensors: Optional[List[torch.Tensor]],
+    src_rank: int,
+    group: object,
+    dist_device: Optional[torch.device] = None,
+) -> List[torch.Tensor]:
+    """
+    Broadcasts a list of tensors without other (non-src) ranks needing to know
+    the dtypes/shapes of the tensors.
+    """
+    if dist_device is None:
+        if torch.distributed.get_backend(group) == "nccl":
+            dist_device = torch.device("cuda")
+        else:
+            dist_device = torch.device("cpu")
+
+    # share metadata first to simplify transfer
+    is_src_rank = (get_rank(group) == src_rank)
+    if is_src_rank:
+        metadata = [
+            {"size": t.size(), "dtype": t.dtype, "device": t.device} for t in tensors
+        ]
+        metadata = _broadcast_object_slow(metadata, src_rank, group, dist_device)
+    else:
+        metadata = _broadcast_object_slow(None, src_rank, group, dist_device)
+
+    out_tensors = []
+    for i, meta in enumerate(metadata):
+        if is_src_rank:
+            tensor = tensors[i]
+            broadcast(tensors[i].to(dist_device), src=src_rank, group=group)
+        else:
+            tensor = torch.zeros(
+                [meta["size"].numel()], dtype=meta["dtype"], device=dist_device
+            )
+            broadcast(tensor, src=src_rank, group=group)
+        tensor = tensor.view(meta["size"]).to(meta["device"])
+        out_tensors.append(tensor)
+    return out_tensors
+
+
+def broadcast_object(
+    obj: Any,
+    src_rank: int,
+    group: object,
+    dist_device: Optional[torch.device] = None,
+) -> Any:
+    """Broadcast an arbitrary Python object to other workers."""
+    if dist_device is None:
+        if torch.distributed.get_backend(group) == "nccl":
+            dist_device = torch.device("cuda")
+        else:
+            dist_device = torch.device("cpu")
+
+    if get_rank(group) == src_rank:
+        # split the tensors from the non-tensors so we can broadcast them
+        # directly, avoiding unnecessary serialization/deserialization
+        tensors = []
+        obj = _split_tensors_from_obj(obj, tensors)
+        obj = _broadcast_object_slow(obj, src_rank, group, dist_device)
+        tensors = broadcast_tensors(tensors, src_rank, group, dist_device)
+    else:
+        obj = _broadcast_object_slow(None, src_rank, group, dist_device)
+        tensors = broadcast_tensors(None, src_rank, group, dist_device)
+    return _put_tensors_in_obj(obj, tensors)
+
+
+def _broadcast_object_slow(
+    obj: Any, src_rank: int, group: object, dist_device: torch.device,
+) -> Any:
+    if get_rank(group) == src_rank:
+        # Emit data
+        buffer = io.BytesIO()
+        torch.save(obj, buffer)
+        buffer = torch.ByteTensor(buffer.getbuffer()).to(dist_device)
+        length = torch.LongTensor([len(buffer)]).to(dist_device)
+        broadcast(length, src=src_rank, group=group)
+        broadcast(buffer, src=src_rank, group=group)
+    else:
+        # Fetch from the source
+        length = torch.LongTensor([0]).to(dist_device)
+        broadcast(length, src=src_rank, group=group)
+        buffer = torch.ByteTensor(int(length.item())).to(dist_device)
+        broadcast(buffer, src=src_rank, group=group)
+        buffer = io.BytesIO(buffer.cpu().numpy())
+        obj = torch.load(buffer, map_location="cpu")
+    return obj
+
+
+@dataclass(frozen=True)
+class _TensorPlaceholder:
+    index: int
+
+
+def _split_tensors_from_obj(obj: Any, tensors: List[torch.Tensor]) -> Any:
+    if torch.is_tensor(obj):
+        placeholder = _TensorPlaceholder(index=len(tensors))
+        tensors.append(obj)
+        return placeholder
+    elif isinstance(obj, dict):
+        return {k: _split_tensors_from_obj(v, tensors) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [_split_tensors_from_obj(v, tensors) for v in obj]
+    elif isinstance(obj, tuple):
+        return tuple(_split_tensors_from_obj(v, tensors) for v in obj)
+    elif isinstance(obj, set):
+        return {_split_tensors_from_obj(v, tensors) for v in obj}
+    else:
+        return obj
+
+
+def _put_tensors_in_obj(obj: Any, tensors: List[torch.Tensor]) -> Any:
+    if isinstance(obj, _TensorPlaceholder):
+        return tensors[obj.index]
+    elif isinstance(obj, dict):
+        return {k: _put_tensors_in_obj(v, tensors) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [_put_tensors_in_obj(v, tensors) for v in obj]
+    elif isinstance(obj, tuple):
+        return tuple(_put_tensors_in_obj(v, tensors) for v in obj)
+    elif isinstance(obj, set):
+        return {_put_tensors_in_obj(v, tensors) for v in obj}
+    else:
+        return obj
diff --git a/fairseq/fairseq/file_chunker_utils.py b/fairseq/fairseq/file_chunker_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..443100c61ab26808d820b7ea2b1307df6475007c
--- /dev/null
+++ b/fairseq/fairseq/file_chunker_utils.py
@@ -0,0 +1,84 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import typing as tp
+
+
+def _safe_readline(fd) -> str:
+    pos = fd.tell()
+    while True:
+        try:
+            return fd.readline()
+        except UnicodeDecodeError:
+            pos -= 1
+            fd.seek(pos)  # search where this character begins
+
+
+def find_offsets(filename: str, num_chunks: int) -> tp.List[int]:
+    """
+    given a file and a number of chuncks, find the offsets in the file
+    to be able to chunk around full lines.
+    """
+    with open(filename, "r", encoding="utf-8") as f:
+        size = os.fstat(f.fileno()).st_size
+        chunk_size = size // num_chunks
+        offsets = [0 for _ in range(num_chunks + 1)]
+        for i in range(1, num_chunks):
+            f.seek(chunk_size * i)
+            _safe_readline(f)
+            offsets[i] = f.tell()
+        offsets[-1] = size
+        return offsets
+
+
+class ChunkLineIterator:
+    """
+    Iterator to properly iterate over lines of a file chunck.
+    """
+
+    def __init__(self, fd, start_offset: int, end_offset: int):
+        self._fd = fd
+        self._start_offset = start_offset
+        self._end_offset = end_offset
+
+    def __iter__(self) -> tp.Iterable[str]:
+        self._fd.seek(self._start_offset)
+        # next(f) breaks f.tell(), hence readline() must be used
+        line = _safe_readline(self._fd)
+        while line:
+            pos = self._fd.tell()
+            # f.tell() does not always give the byte position in the file
+            # sometimes it skips to a very large number
+            # it is unlikely that through a normal read we go from
+            # end bytes to end + 2**32 bytes (4 GB) and this makes it unlikely
+            # that the procedure breaks by the undeterministic behavior of
+            # f.tell()
+            if (
+                self._end_offset > 0
+                and pos > self._end_offset
+                and pos < self._end_offset + 2 ** 32
+            ):
+                break
+            yield line
+            line = self._fd.readline()
+
+
+class Chunker:
+    """
+    contextmanager to read a chunck of a file line by line.
+    """
+
+    def __init__(self, path: str, start_offset: int, end_offset: int):
+        self.path = path
+        self.start_offset = start_offset
+        self.end_offset = end_offset
+
+    def __enter__(self) -> ChunkLineIterator:
+        self.fd = open(self.path, "r", encoding="utf-8")
+        return ChunkLineIterator(self.fd, self.start_offset, self.end_offset)
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.fd.close()
diff --git a/fairseq/fairseq/file_io.py b/fairseq/fairseq/file_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..dba663d4aafeb925ddffa50f5055933d6531a069
--- /dev/null
+++ b/fairseq/fairseq/file_io.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import shutil
+from typing import List, Optional
+
+
+logger = logging.getLogger(__file__)
+
+
+try:
+    from iopath.common.file_io import g_pathmgr as IOPathManager
+
+    try:
+        # [FB only - for now] AWS PathHandler for PathManager
+        from .fb_pathhandlers import S3PathHandler
+
+        IOPathManager.register_handler(S3PathHandler())
+    except KeyError:
+        logging.warning("S3PathHandler already registered.")
+    except ImportError:
+        logging.debug(
+            "S3PathHandler couldn't be imported. Either missing fb-only files, or boto3 module."
+        )
+
+except ImportError:
+    IOPathManager = None
+
+
+class PathManager:
+    """
+    Wrapper for insulating OSS I/O (using Python builtin operations) from
+    iopath's PathManager abstraction (for transparently handling various
+    internal backends).
+    """
+
+    @staticmethod
+    def open(
+        path: str,
+        mode: str = "r",
+        buffering: int = -1,
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+        newline: Optional[str] = None,
+    ):
+        if IOPathManager:
+            return IOPathManager.open(
+                path=path,
+                mode=mode,
+                buffering=buffering,
+                encoding=encoding,
+                errors=errors,
+                newline=newline,
+            )
+        return open(
+            path,
+            mode=mode,
+            buffering=buffering,
+            encoding=encoding,
+            errors=errors,
+            newline=newline,
+        )
+
+    @staticmethod
+    def copy(src_path: str, dst_path: str, overwrite: bool = False) -> bool:
+        if IOPathManager:
+            return IOPathManager.copy(
+                src_path=src_path, dst_path=dst_path, overwrite=overwrite
+            )
+        return shutil.copyfile(src_path, dst_path)
+
+    @staticmethod
+    def get_local_path(path: str, **kwargs) -> str:
+        if IOPathManager:
+            return IOPathManager.get_local_path(path, **kwargs)
+        return path
+
+    @staticmethod
+    def exists(path: str) -> bool:
+        if IOPathManager:
+            return IOPathManager.exists(path)
+        return os.path.exists(path)
+
+    @staticmethod
+    def isfile(path: str) -> bool:
+        if IOPathManager:
+            return IOPathManager.isfile(path)
+        return os.path.isfile(path)
+
+    @staticmethod
+    def ls(path: str) -> List[str]:
+        if IOPathManager:
+            return IOPathManager.ls(path)
+        return os.listdir(path)
+
+    @staticmethod
+    def mkdirs(path: str) -> None:
+        if IOPathManager:
+            return IOPathManager.mkdirs(path)
+        os.makedirs(path, exist_ok=True)
+
+    @staticmethod
+    def rm(path: str) -> None:
+        if IOPathManager:
+            return IOPathManager.rm(path)
+        os.remove(path)
+
+    @staticmethod
+    def chmod(path: str, mode: int) -> None:
+        if not PathManager.path_requires_pathmanager(path):
+            os.chmod(path, mode)
+
+    @staticmethod
+    def register_handler(handler) -> None:
+        if IOPathManager:
+            return IOPathManager.register_handler(handler=handler)
+
+    @staticmethod
+    def copy_from_local(
+        local_path: str, dst_path: str, overwrite: bool = False, **kwargs
+    ) -> None:
+        if IOPathManager:
+            return IOPathManager.copy_from_local(
+                local_path=local_path, dst_path=dst_path, overwrite=overwrite, **kwargs
+            )
+        return shutil.copyfile(local_path, dst_path)
+
+    @staticmethod
+    def path_requires_pathmanager(path: str) -> bool:
+        """Do we require PathManager to access given path?"""
+        if IOPathManager:
+            for p in IOPathManager._path_handlers.keys():
+                if path.startswith(p):
+                    return True
+        return False
+
+    @staticmethod
+    def supports_rename(path: str) -> bool:
+        # PathManager doesn't yet support renames
+        return not PathManager.path_requires_pathmanager(path)
+
+    @staticmethod
+    def rename(src: str, dst: str):
+        os.rename(src, dst)
+
+    """
+    ioPath async PathManager methods:
+    """
+    @staticmethod
+    def opena(
+        path: str,
+        mode: str = "r",
+        buffering: int = -1,
+        encoding: Optional[str] = None,
+        errors: Optional[str] = None,
+        newline: Optional[str] = None,
+    ):
+        """
+        Return file descriptor with asynchronous write operations.
+        """
+        global IOPathManager
+        if not IOPathManager:
+            logging.info("ioPath is initializing PathManager.")
+            try:
+                from iopath.common.file_io import PathManager
+                IOPathManager = PathManager()
+            except Exception:
+                logging.exception("Failed to initialize ioPath PathManager object.")
+        return IOPathManager.opena(
+            path=path,
+            mode=mode,
+            buffering=buffering,
+            encoding=encoding,
+            errors=errors,
+            newline=newline,
+        )
+
+    @staticmethod
+    def async_close() -> bool:
+        """
+        Wait for files to be written and clean up asynchronous PathManager.
+        NOTE: `PathManager.async_close()` must be called at the end of any
+        script that uses `PathManager.opena(...)`.
+        """
+        global IOPathManager
+        if IOPathManager:
+            return IOPathManager.async_close()
+        return False
diff --git a/fairseq/fairseq/file_utils.py b/fairseq/fairseq/file_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1d5ea65746682881264e4a9c462854dcfb3413f
--- /dev/null
+++ b/fairseq/fairseq/file_utils.py
@@ -0,0 +1,369 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Utilities for working with the local dataset cache.
+This file is adapted from `AllenNLP <https://github.com/allenai/allennlp>`_.
+and `huggingface <https://github.com/huggingface>`_.
+"""
+
+import fnmatch
+import json
+import logging
+import os
+import shutil
+import tarfile
+import tempfile
+from functools import partial, wraps
+from hashlib import sha256
+from io import open
+
+
+try:
+    from torch.hub import _get_torch_home
+
+    torch_cache_home = _get_torch_home()
+except ImportError:
+    torch_cache_home = os.path.expanduser(
+        os.getenv(
+            "TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "torch")
+        )
+    )
+default_cache_path = os.path.join(torch_cache_home, "pytorch_fairseq")
+
+try:
+    from urllib.parse import urlparse
+except ImportError:
+    from urlparse import urlparse
+
+try:
+    from pathlib import Path
+
+    PYTORCH_FAIRSEQ_CACHE = Path(os.getenv("PYTORCH_FAIRSEQ_CACHE", default_cache_path))
+except (AttributeError, ImportError):
+    PYTORCH_FAIRSEQ_CACHE = os.getenv("PYTORCH_FAIRSEQ_CACHE", default_cache_path)
+
+CONFIG_NAME = "config.json"
+WEIGHTS_NAME = "pytorch_model.bin"
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
+def load_archive_file(archive_file):
+    # redirect to the cache, if necessary
+    try:
+        resolved_archive_file = cached_path(archive_file, cache_dir=None)
+    except EnvironmentError:
+        logger.info(
+            "Archive name '{}' was not found in archive name list. "
+            "We assumed '{}' was a path or URL but couldn't find any file "
+            "associated to this path or URL.".format(
+                archive_file,
+                archive_file,
+            )
+        )
+        return None
+
+    if resolved_archive_file == archive_file:
+        logger.info("loading archive file {}".format(archive_file))
+    else:
+        logger.info(
+            "loading archive file {} from cache at {}".format(
+                archive_file, resolved_archive_file
+            )
+        )
+
+    # Extract archive to temp dir and replace .tar.bz2 if necessary
+    tempdir = None
+    if not os.path.isdir(resolved_archive_file):
+        tempdir = tempfile.mkdtemp()
+        logger.info(
+            "extracting archive file {} to temp dir {}".format(
+                resolved_archive_file, tempdir
+            )
+        )
+        ext = os.path.splitext(archive_file)[1][1:]
+        with tarfile.open(resolved_archive_file, "r:" + ext) as archive:
+            top_dir = os.path.commonprefix(archive.getnames())
+            archive.extractall(tempdir)
+        os.remove(resolved_archive_file)
+        shutil.move(os.path.join(tempdir, top_dir), resolved_archive_file)
+        shutil.rmtree(tempdir)
+
+    return resolved_archive_file
+
+
+def url_to_filename(url, etag=None):
+    """
+    Convert `url` into a hashed filename in a repeatable way.
+    If `etag` is specified, append its hash to the URL's, delimited
+    by a period.
+    """
+    url_bytes = url.encode("utf-8")
+    url_hash = sha256(url_bytes)
+    filename = url_hash.hexdigest()
+
+    if etag:
+        etag_bytes = etag.encode("utf-8")
+        etag_hash = sha256(etag_bytes)
+        filename += "." + etag_hash.hexdigest()
+
+    return filename
+
+
+def filename_to_url(filename, cache_dir=None):
+    """
+    Return the url and etag (which may be ``None``) stored for `filename`.
+    Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_FAIRSEQ_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    cache_path = os.path.join(cache_dir, filename)
+    if not os.path.exists(cache_path):
+        raise EnvironmentError("file {} not found".format(cache_path))
+
+    meta_path = cache_path + ".json"
+    if not os.path.exists(meta_path):
+        raise EnvironmentError("file {} not found".format(meta_path))
+
+    with open(meta_path, encoding="utf-8") as meta_file:
+        metadata = json.load(meta_file)
+    url = metadata["url"]
+    etag = metadata["etag"]
+
+    return url, etag
+
+
+def cached_path_from_pm(url_or_filename):
+    """
+    Tries to cache the specified URL using PathManager class.
+    Returns the cached path if success otherwise failure.
+    """
+    try:
+        from fairseq.file_io import PathManager
+        local_path = PathManager.get_local_path(url_or_filename)
+        return local_path
+    except Exception:
+        return None
+
+
+def cached_path(url_or_filename, cache_dir=None):
+    """
+    Given something that might be a URL (or might be a local path),
+    determine which. If it's a URL, download the file and cache it, and
+    return the path to the cached file. If it's already a local path,
+    make sure the file exists and then return the path.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_FAIRSEQ_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    parsed = urlparse(url_or_filename)
+
+    if parsed.scheme in ("http", "https", "s3"):
+        # URL, so get it from the cache (downloading if necessary)
+        return get_from_cache(url_or_filename, cache_dir)
+    elif os.path.exists(url_or_filename):
+        # File, and it exists.
+        return url_or_filename
+    elif parsed.scheme == "":
+        # File, but it doesn't exist.
+        raise EnvironmentError("file {} not found".format(url_or_filename))
+    else:
+        cached_path = cached_path_from_pm(url_or_filename)
+        if cached_path:
+            return cached_path
+        # Something unknown
+        raise ValueError(
+            "unable to parse {} as a URL or as a local path".format(url_or_filename)
+        )
+
+
+def split_s3_path(url):
+    """Split a full s3 path into the bucket name and path."""
+    parsed = urlparse(url)
+    if not parsed.netloc or not parsed.path:
+        raise ValueError("bad s3 path {}".format(url))
+    bucket_name = parsed.netloc
+    s3_path = parsed.path
+    # Remove '/' at beginning of path.
+    if s3_path.startswith("/"):
+        s3_path = s3_path[1:]
+    return bucket_name, s3_path
+
+
+def s3_request(func):
+    """
+    Wrapper function for s3 requests in order to create more helpful error
+    messages.
+    """
+
+    @wraps(func)
+    def wrapper(url, *args, **kwargs):
+        from botocore.exceptions import ClientError
+
+        try:
+            return func(url, *args, **kwargs)
+        except ClientError as exc:
+            if int(exc.response["Error"]["Code"]) == 404:
+                raise EnvironmentError("file {} not found".format(url))
+            else:
+                raise
+
+    return wrapper
+
+
+@s3_request
+def s3_etag(url):
+    """Check ETag on S3 object."""
+    import boto3
+
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_object = s3_resource.Object(bucket_name, s3_path)
+    return s3_object.e_tag
+
+
+@s3_request
+def s3_get(url, temp_file):
+    """Pull a file directly from S3."""
+    import boto3
+
+    s3_resource = boto3.resource("s3")
+    bucket_name, s3_path = split_s3_path(url)
+    s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file)
+
+
+def request_wrap_timeout(func, url):
+    import requests
+
+    for attempt, timeout in enumerate([10, 20, 40, 60, 60]):
+        try:
+            return func(timeout=timeout)
+        except requests.exceptions.Timeout as e:
+            logger.warning(
+                "Request for %s timed-out (attempt %d). Retrying with a timeout of %d secs",
+                url,
+                attempt,
+                timeout,
+                exc_info=e,
+            )
+            continue
+    raise RuntimeError(f"Unable to fetch file {url}")
+
+
+def http_get(url, temp_file):
+    import requests
+    from tqdm import tqdm
+
+    req = request_wrap_timeout(partial(requests.get, url, stream=True), url)
+    content_length = req.headers.get("Content-Length")
+    total = int(content_length) if content_length is not None else None
+    progress = tqdm(unit="B", total=total)
+    for chunk in req.iter_content(chunk_size=1024):
+        if chunk:  # filter out keep-alive new chunks
+            progress.update(len(chunk))
+            temp_file.write(chunk)
+    progress.close()
+
+
+def get_from_cache(url, cache_dir=None):
+    """
+    Given a URL, look for the corresponding dataset in the local cache.
+    If it's not there, download it. Then return the path to the cached file.
+    """
+    if cache_dir is None:
+        cache_dir = PYTORCH_FAIRSEQ_CACHE
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+
+    # Get eTag to add to filename, if it exists.
+    if url.startswith("s3://"):
+        etag = s3_etag(url)
+    else:
+        try:
+            import requests
+
+            response = request_wrap_timeout(
+                partial(requests.head, url, allow_redirects=True), url
+            )
+            if response.status_code != 200:
+                etag = None
+            else:
+                etag = response.headers.get("ETag")
+        except RuntimeError:
+            etag = None
+
+    filename = url_to_filename(url, etag)
+
+    # get cache path to put the file
+    cache_path = os.path.join(cache_dir, filename)
+
+    # If we don't have a connection (etag is None) and can't identify the file
+    # try to get the last downloaded one
+    if not os.path.exists(cache_path) and etag is None:
+        matching_files = fnmatch.filter(os.listdir(cache_dir), filename + ".*")
+        matching_files = list(filter(lambda s: not s.endswith(".json"), matching_files))
+        if matching_files:
+            cache_path = os.path.join(cache_dir, matching_files[-1])
+
+    if not os.path.exists(cache_path):
+        # Download to temporary file, then copy to cache dir once finished.
+        # Otherwise you get corrupt cache entries if the download gets interrupted.
+        with tempfile.NamedTemporaryFile() as temp_file:
+            logger.info("%s not found in cache, downloading to %s", url, temp_file.name)
+
+            # GET file object
+            if url.startswith("s3://"):
+                s3_get(url, temp_file)
+            else:
+                http_get(url, temp_file)
+
+            # we are copying the file before closing it, so flush to avoid truncation
+            temp_file.flush()
+            # shutil.copyfileobj() starts at the current position, so go to the start
+            temp_file.seek(0)
+
+            logger.info("copying %s to cache at %s", temp_file.name, cache_path)
+            with open(cache_path, "wb") as cache_file:
+                shutil.copyfileobj(temp_file, cache_file)
+
+            logger.info("creating metadata file for %s", cache_path)
+            meta = {"url": url, "etag": etag}
+            meta_path = cache_path + ".json"
+            with open(meta_path, "w") as meta_file:
+                output_string = json.dumps(meta)
+                meta_file.write(output_string)
+
+            logger.info("removing temp file %s", temp_file.name)
+
+    return cache_path
+
+
+def read_set_from_file(filename):
+    """
+    Extract a de-duped collection (set) of text from a file.
+    Expected file format is one item per line.
+    """
+    collection = set()
+    with open(filename, "r", encoding="utf-8") as file_:
+        for line in file_:
+            collection.add(line.rstrip())
+    return collection
+
+
+def get_file_extension(path, dot=True, lower=True):
+    ext = os.path.splitext(path)[1]
+    ext = ext if dot else ext[1:]
+    return ext.lower() if lower else ext
diff --git a/fairseq/fairseq/hub_utils.py b/fairseq/fairseq/hub_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d74470d2ecba2825221a2efa2ce21a9b698340df
--- /dev/null
+++ b/fairseq/fairseq/hub_utils.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import copy
+import logging
+import os
+from typing import Any, Dict, Iterator, List
+
+import torch
+from fairseq import utils
+from fairseq.data import encoders
+from omegaconf import open_dict
+from torch import nn
+
+
+logger = logging.getLogger(__name__)
+
+
+def from_pretrained(
+    model_name_or_path,
+    checkpoint_file="model.pt",
+    data_name_or_path=".",
+    archive_map=None,
+    **kwargs
+):
+    from fairseq import checkpoint_utils, file_utils
+
+    if archive_map is not None:
+        if model_name_or_path in archive_map:
+            model_name_or_path = archive_map[model_name_or_path]
+        if data_name_or_path is not None and data_name_or_path in archive_map:
+            data_name_or_path = archive_map[data_name_or_path]
+
+        # allow archive_map to set default arg_overrides (e.g., tokenizer, bpe)
+        # for each model
+        if isinstance(model_name_or_path, dict):
+            for k, v in model_name_or_path.items():
+                if k == "checkpoint_file":
+                    checkpoint_file = v
+                elif (
+                    k != "path"
+                    # only set kwargs that don't already have overrides
+                    and k not in kwargs
+                ):
+                    kwargs[k] = v
+            model_name_or_path = model_name_or_path["path"]
+
+    model_path = file_utils.load_archive_file(model_name_or_path)
+
+    # convenience hack for loading data and BPE codes from model archive
+    if data_name_or_path.startswith("."):
+        kwargs["data"] = os.path.abspath(os.path.join(model_path, data_name_or_path))
+    else:
+        kwargs["data"] = file_utils.load_archive_file(data_name_or_path)
+    for file, arg in {
+        "code": "bpe_codes",
+        "bpecodes": "bpe_codes",
+        "sentencepiece.bpe.model": "sentencepiece_model",
+        "merges.txt": "bpe_merges",
+        "vocab.json": "bpe_vocab",
+    }.items():
+        path = os.path.join(model_path, file)
+        if os.path.exists(path):
+            kwargs[arg] = path
+
+    if "user_dir" in kwargs:
+        utils.import_user_module(argparse.Namespace(user_dir=kwargs["user_dir"]))
+
+    models, args, task = checkpoint_utils.load_model_ensemble_and_task(
+        [os.path.join(model_path, cpt) for cpt in checkpoint_file.split(os.pathsep)],
+        arg_overrides=kwargs,
+    )
+
+    return {
+        "args": args,
+        "task": task,
+        "models": models,
+    }
+
+
+class GeneratorHubInterface(nn.Module):
+    """
+    PyTorch Hub interface for generating sequences from a pre-trained
+    translation or language model.
+    """
+
+    def __init__(self, cfg, task, models):
+        super().__init__()
+        self.cfg = cfg
+        self.task = task
+        self.models = nn.ModuleList(models)
+        self.src_dict = task.source_dictionary
+        self.tgt_dict = task.target_dictionary
+
+        # optimize model for generation
+        for model in self.models:
+            model.prepare_for_inference_(cfg)
+
+        # Load alignment dictionary for unknown word replacement
+        # (None if no unknown word replacement, empty if no path to align dictionary)
+        self.align_dict = utils.load_align_dict(cfg.generation.replace_unk)
+
+        self.tokenizer = encoders.build_tokenizer(cfg.tokenizer)
+        self.bpe = encoders.build_bpe(cfg.bpe)
+
+        self.max_positions = utils.resolve_max_positions(
+            self.task.max_positions(), *[model.max_positions() for model in models]
+        )
+
+        # this is useful for determining the device
+        self.register_buffer("_float_tensor", torch.tensor([0], dtype=torch.float))
+
+    @property
+    def device(self):
+        return self._float_tensor.device
+
+    def translate(
+        self, sentences: List[str], beam: int = 5, verbose: bool = False, **kwargs
+    ) -> List[str]:
+        return self.sample(sentences, beam, verbose, **kwargs)
+
+    def sample(
+        self, sentences: List[str], beam: int = 1, verbose: bool = False, **kwargs
+    ) -> List[str]:
+        if isinstance(sentences, str):
+            return self.sample([sentences], beam=beam, verbose=verbose, **kwargs)[0]
+        tokenized_sentences = [self.encode(sentence) for sentence in sentences]
+        batched_hypos = self.generate(tokenized_sentences, beam, verbose, **kwargs)
+        return [self.decode(hypos[0]["tokens"]) for hypos in batched_hypos]
+
+    def score(self, sentences: List[str], **kwargs):
+        if isinstance(sentences, str):
+            return self.score([sentences], **kwargs)[0]
+        # NOTE: this doesn't support translation tasks currently
+        tokenized_sentences = [self.encode(sentence) for sentence in sentences]
+        return [
+            hypos[0]
+            for hypos in self.generate(
+                tokenized_sentences, score_reference=True, **kwargs
+            )
+        ]
+
+    def generate(
+        self,
+        tokenized_sentences: List[torch.LongTensor],
+        beam: int = 5,
+        verbose: bool = False,
+        skip_invalid_size_inputs=False,
+        inference_step_args=None,
+        prefix_allowed_tokens_fn=None,
+        **kwargs
+    ) -> List[List[Dict[str, torch.Tensor]]]:
+        if torch.is_tensor(tokenized_sentences) and tokenized_sentences.dim() == 1:
+            return self.generate(
+                tokenized_sentences.unsqueeze(0), beam=beam, verbose=verbose, **kwargs
+            )[0]
+
+        # build generator using current args as well as any kwargs
+        gen_args = copy.deepcopy(self.cfg.generation)
+        with open_dict(gen_args):
+            gen_args.beam = beam
+            for k, v in kwargs.items():
+                setattr(gen_args, k, v)
+        generator = self.task.build_generator(
+            self.models,
+            gen_args,
+            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+        )
+
+        inference_step_args = inference_step_args or {}
+        results = []
+        for batch in self._build_batches(tokenized_sentences, skip_invalid_size_inputs):
+            batch = utils.apply_to_sample(lambda t: t.to(self.device), batch)
+            translations = self.task.inference_step(
+                generator, self.models, batch, **inference_step_args
+            )
+            for id, hypos in zip(batch["id"].tolist(), translations):
+                results.append((id, hypos))
+
+        # sort output to match input order
+        outputs = [hypos for _, hypos in sorted(results, key=lambda x: x[0])]
+
+        if verbose:
+
+            def getarg(name, default):
+                return getattr(gen_args, name, getattr(self.cfg, name, default))
+
+            for source_tokens, target_hypotheses in zip(tokenized_sentences, outputs):
+                src_str_with_unk = self.string(source_tokens)
+                logger.info("S\t{}".format(src_str_with_unk))
+                for hypo in target_hypotheses:
+                    hypo_str = self.decode(hypo["tokens"])
+                    logger.info("H\t{}\t{}".format(hypo["score"], hypo_str))
+                    logger.info(
+                        "P\t{}".format(
+                            " ".join(
+                                map(
+                                    lambda x: "{:.4f}".format(x),
+                                    hypo["positional_scores"].tolist(),
+                                )
+                            )
+                        )
+                    )
+                    if hypo["alignment"] is not None and getarg(
+                        "print_alignment", False
+                    ):
+                        logger.info(
+                            "A\t{}".format(
+                                " ".join(
+                                    [
+                                        "{}-{}".format(src_idx, tgt_idx)
+                                        for src_idx, tgt_idx in hypo["alignment"]
+                                    ]
+                                )
+                            )
+                        )
+        return outputs
+
+    def encode(self, sentence: str) -> torch.LongTensor:
+        sentence = self.tokenize(sentence)
+        sentence = self.apply_bpe(sentence)
+        return self.binarize(sentence)
+
+    def decode(self, tokens: torch.LongTensor) -> str:
+        sentence = self.string(tokens)
+        sentence = self.remove_bpe(sentence)
+        return self.detokenize(sentence)
+
+    def tokenize(self, sentence: str) -> str:
+        if self.tokenizer is not None:
+            sentence = self.tokenizer.encode(sentence)
+        return sentence
+
+    def detokenize(self, sentence: str) -> str:
+        if self.tokenizer is not None:
+            sentence = self.tokenizer.decode(sentence)
+        return sentence
+
+    def apply_bpe(self, sentence: str) -> str:
+        if self.bpe is not None:
+            sentence = self.bpe.encode(sentence)
+        return sentence
+
+    def remove_bpe(self, sentence: str) -> str:
+        if self.bpe is not None:
+            sentence = self.bpe.decode(sentence)
+        return sentence
+
+    def binarize(self, sentence: str) -> torch.LongTensor:
+        return self.src_dict.encode_line(sentence, add_if_not_exist=False).long()
+
+    def string(self, tokens: torch.LongTensor) -> str:
+        return self.tgt_dict.string(tokens)
+
+    def _build_batches(
+        self, tokens: List[List[int]], skip_invalid_size_inputs: bool
+    ) -> Iterator[Dict[str, Any]]:
+        lengths = torch.LongTensor([t.numel() for t in tokens])
+        batch_iterator = self.task.get_batch_iterator(
+            dataset=self.task.build_dataset_for_inference(tokens, lengths),
+            max_tokens=self.cfg.dataset.max_tokens,
+            max_sentences=self.cfg.dataset.batch_size,
+            max_positions=self.max_positions,
+            ignore_invalid_inputs=skip_invalid_size_inputs,
+            disable_iterator_cache=True,
+        ).next_epoch_itr(shuffle=False)
+        return batch_iterator
+
+
+class BPEHubInterface(object):
+    """PyTorch Hub interface for Byte-Pair Encoding (BPE)."""
+
+    def __init__(self, bpe, **kwargs):
+        super().__init__()
+        args = argparse.Namespace(bpe=bpe, **kwargs)
+        self.bpe = encoders.build_bpe(args)
+        assert self.bpe is not None
+
+    def encode(self, sentence: str) -> str:
+        return self.bpe.encode(sentence)
+
+    def decode(self, sentence: str) -> str:
+        return self.bpe.decode(sentence)
+
+
+class TokenizerHubInterface(object):
+    """PyTorch Hub interface for tokenization."""
+
+    def __init__(self, tokenizer, **kwargs):
+        super().__init__()
+        args = argparse.Namespace(tokenizer=tokenizer, **kwargs)
+        self.tokenizer = encoders.build_tokenizer(args)
+        assert self.tokenizer is not None
+
+    def encode(self, sentence: str) -> str:
+        return self.tokenizer.encode(sentence)
+
+    def decode(self, sentence: str) -> str:
+        return self.tokenizer.decode(sentence)
diff --git a/fairseq/fairseq/incremental_decoding_utils.py b/fairseq/fairseq/incremental_decoding_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b26e6cd01cd4cbdffa23d88b354eb4a55a94189b
--- /dev/null
+++ b/fairseq/fairseq/incremental_decoding_utils.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import uuid
+from typing import Dict, Optional
+
+from torch import Tensor
+
+
+class FairseqIncrementalState(object):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.init_incremental_state()
+
+    def init_incremental_state(self):
+        self._incremental_state_id = str(uuid.uuid4())
+
+    def _get_full_incremental_state_key(self, key: str) -> str:
+        return "{}.{}".format(self._incremental_state_id, key)
+
+    def get_incremental_state(
+        self,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
+        key: str,
+    ) -> Optional[Dict[str, Optional[Tensor]]]:
+        """Helper for getting incremental state for an nn.Module."""
+        full_key = self._get_full_incremental_state_key(key)
+        if incremental_state is None or full_key not in incremental_state:
+            return None
+        return incremental_state[full_key]
+
+    def set_incremental_state(
+        self,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
+        key: str,
+        value: Dict[str, Optional[Tensor]],
+    ) -> Optional[Dict[str, Dict[str, Optional[Tensor]]]]:
+        """Helper for setting incremental state for an nn.Module."""
+        if incremental_state is not None:
+            full_key = self._get_full_incremental_state_key(key)
+            incremental_state[full_key] = value
+        return incremental_state
+
+
+def with_incremental_state(cls):
+    cls.__bases__ = (FairseqIncrementalState,) + tuple(
+        b for b in cls.__bases__ if b != FairseqIncrementalState
+    )
+    return cls
diff --git a/fairseq/fairseq/iterative_refinement_generator.py b/fairseq/fairseq/iterative_refinement_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fb0946f499329ceb130761b59675d761df1c158
--- /dev/null
+++ b/fairseq/fairseq/iterative_refinement_generator.py
@@ -0,0 +1,359 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import namedtuple
+
+import numpy as np
+import torch
+from fairseq import utils
+
+
+DecoderOut = namedtuple(
+    "IterativeRefinementDecoderOut",
+    ["output_tokens", "output_scores", "attn", "step", "max_step", "history"],
+)
+
+
+class IterativeRefinementGenerator(object):
+    def __init__(
+        self,
+        tgt_dict,
+        models=None,
+        eos_penalty=0.0,
+        max_iter=10,
+        max_ratio=2,
+        beam_size=1,
+        decoding_format=None,
+        retain_dropout=False,
+        adaptive=True,
+        retain_history=False,
+        reranking=False,
+    ):
+        """
+        Generates translations based on iterative refinement.
+
+        Args:
+            tgt_dict: target dictionary
+            eos_penalty: if > 0.0, it penalized early-stopping in decoding
+            max_iter: maximum number of refinement iterations
+            max_ratio: generate sequences of maximum length ax, where x is the source length
+            decoding_format: decoding mode in {'unigram', 'ensemble', 'vote', 'dp', 'bs'}
+            retain_dropout: retaining dropout in the inference
+            adaptive: decoding with early stop
+        """
+        self.bos = tgt_dict.bos()
+        self.pad = tgt_dict.pad()
+        self.unk = tgt_dict.unk()
+        self.eos = tgt_dict.eos()
+        self.vocab_size = len(tgt_dict)
+        self.eos_penalty = eos_penalty
+        self.max_iter = max_iter
+        self.max_ratio = max_ratio
+        self.beam_size = beam_size
+        self.reranking = reranking
+        self.decoding_format = decoding_format
+        self.retain_dropout = retain_dropout
+        self.retain_history = retain_history
+        self.adaptive = adaptive
+        self.models = models
+
+    def generate_batched_itr(
+        self,
+        data_itr,
+        maxlen_a=None,
+        maxlen_b=None,
+        cuda=False,
+        timer=None,
+        prefix_size=0,
+    ):
+        """Iterate over a batched dataset and yield individual translations.
+
+        Args:
+            maxlen_a/b: generate sequences of maximum length ax + b,
+                where x is the source sentence length.
+            cuda: use GPU for generation
+            timer: StopwatchMeter for timing generations.
+        """
+
+        for sample in data_itr:
+            if "net_input" not in sample:
+                continue
+            if timer is not None:
+                timer.start()
+            with torch.no_grad():
+                hypos = self.generate(
+                    self.models,
+                    sample,
+                    prefix_tokens=sample["target"][:, :prefix_size]
+                    if prefix_size > 0
+                    else None,
+                )
+            if timer is not None:
+                timer.stop(sample["ntokens"])
+            for i, id in enumerate(sample["id"]):
+                # remove padding
+                src = utils.strip_pad(sample["net_input"]["src_tokens"][i, :], self.pad)
+                ref = utils.strip_pad(sample["target"][i, :], self.pad)
+                yield id, src, ref, hypos[i]
+
+    @torch.no_grad()
+    def generate(self, models, sample, prefix_tokens=None, constraints=None):
+        if constraints is not None:
+            raise NotImplementedError(
+                "Constrained decoding with the IterativeRefinementGenerator is not supported"
+            )
+
+        # TODO: iterative refinement generator does not support ensemble for now.
+        if not self.retain_dropout:
+            for model in models:
+                model.eval()
+
+        model, reranker = models[0], None
+        if self.reranking:
+            assert len(models) > 1, "Assuming the last checkpoint is the reranker"
+            assert (
+                self.beam_size > 1
+            ), "Reranking requires multiple translation for each example"
+
+            reranker = models[-1]
+            models = models[:-1]
+
+        if len(models) > 1 and hasattr(model, "enable_ensemble"):
+            assert model.allow_ensemble, "{} does not support ensembling".format(
+                model.__class__.__name__
+            )
+            model.enable_ensemble(models)
+
+        # TODO: better encoder inputs?
+        src_tokens = sample["net_input"]["src_tokens"]
+        src_lengths = sample["net_input"]["src_lengths"]
+        bsz, src_len = src_tokens.size()
+
+        # initialize
+        encoder_out = model.forward_encoder([src_tokens, src_lengths])
+        prev_decoder_out = model.initialize_output_tokens(encoder_out, src_tokens)
+
+        if self.beam_size > 1:
+            assert (
+                model.allow_length_beam
+            ), "{} does not support decoding with length beam.".format(
+                model.__class__.__name__
+            )
+
+            # regenerate data based on length-beam
+            length_beam_order = (
+                utils.new_arange(src_tokens, self.beam_size, bsz).t().reshape(-1)
+            )
+            encoder_out = model.encoder.reorder_encoder_out(
+                encoder_out, length_beam_order
+            )
+            prev_decoder_out = model.regenerate_length_beam(
+                prev_decoder_out, self.beam_size
+            )
+            bsz = bsz * self.beam_size
+
+        sent_idxs = torch.arange(bsz)
+        prev_output_tokens = prev_decoder_out.output_tokens.clone()
+
+        if self.retain_history:
+            prev_decoder_out = prev_decoder_out._replace(history=[prev_output_tokens])
+
+        finalized = [[] for _ in range(bsz)]
+
+        def is_a_loop(x, y, s, a):
+            b, l_x, l_y = x.size(0), x.size(1), y.size(1)
+            if l_x > l_y:
+                y = torch.cat([y, x.new_zeros(b, l_x - l_y).fill_(self.pad)], 1)
+                s = torch.cat([s, s.new_zeros(b, l_x - l_y)], 1)
+                if a is not None:
+                    a = torch.cat([a, a.new_zeros(b, l_x - l_y, a.size(2))], 1)
+            elif l_x < l_y:
+                x = torch.cat([x, y.new_zeros(b, l_y - l_x).fill_(self.pad)], 1)
+            return (x == y).all(1), y, s, a
+
+        def finalized_hypos(step, prev_out_token, prev_out_score, prev_out_attn):
+            cutoff = prev_out_token.ne(self.pad)
+            tokens = prev_out_token[cutoff]
+            if prev_out_score is None:
+                scores, score = None, None
+            else:
+                scores = prev_out_score[cutoff]
+                score = scores.mean()
+
+            if prev_out_attn is None:
+                hypo_attn, alignment = None, None
+            else:
+                hypo_attn = prev_out_attn[cutoff]
+                alignment = hypo_attn.max(dim=1)[1]
+            return {
+                "steps": step,
+                "tokens": tokens,
+                "positional_scores": scores,
+                "score": score,
+                "hypo_attn": hypo_attn,
+                "alignment": alignment,
+            }
+
+        for step in range(self.max_iter + 1):
+
+            decoder_options = {
+                "eos_penalty": self.eos_penalty,
+                "max_ratio": self.max_ratio,
+                "decoding_format": self.decoding_format,
+            }
+            prev_decoder_out = prev_decoder_out._replace(
+                step=step,
+                max_step=self.max_iter + 1,
+            )
+
+            decoder_out = model.forward_decoder(
+                prev_decoder_out, encoder_out, **decoder_options
+            )
+
+            if self.adaptive:
+                # terminate if there is a loop
+                terminated, out_tokens, out_scores, out_attn = is_a_loop(
+                    prev_output_tokens,
+                    decoder_out.output_tokens,
+                    decoder_out.output_scores,
+                    decoder_out.attn,
+                )
+                decoder_out = decoder_out._replace(
+                    output_tokens=out_tokens,
+                    output_scores=out_scores,
+                    attn=out_attn,
+                )
+
+            else:
+                terminated = decoder_out.output_tokens.new_zeros(
+                    decoder_out.output_tokens.size(0)
+                ).bool()
+
+            if step == self.max_iter:  # reach last iteration, terminate
+                terminated.fill_(1)
+
+            # collect finalized sentences
+            finalized_idxs = sent_idxs[terminated]
+            finalized_tokens = decoder_out.output_tokens[terminated]
+            finalized_scores = decoder_out.output_scores[terminated]
+            finalized_attn = (
+                None
+                if (decoder_out.attn is None or decoder_out.attn.size(0) == 0)
+                else decoder_out.attn[terminated]
+            )
+
+            if self.retain_history:
+                finalized_history_tokens = [h[terminated] for h in decoder_out.history]
+
+            for i in range(finalized_idxs.size(0)):
+                finalized[finalized_idxs[i]] = [
+                    finalized_hypos(
+                        step,
+                        finalized_tokens[i],
+                        finalized_scores[i],
+                        None if finalized_attn is None else finalized_attn[i],
+                    )
+                ]
+
+                if self.retain_history:
+                    finalized[finalized_idxs[i]][0]["history"] = []
+                    for j in range(len(finalized_history_tokens)):
+                        finalized[finalized_idxs[i]][0]["history"].append(
+                            finalized_hypos(
+                                step, finalized_history_tokens[j][i], None, None
+                            )
+                        )
+
+            # check if all terminated
+            if terminated.sum() == terminated.size(0):
+                break
+
+            # for next step
+            not_terminated = ~terminated
+            prev_decoder_out = decoder_out._replace(
+                output_tokens=decoder_out.output_tokens[not_terminated],
+                output_scores=decoder_out.output_scores[not_terminated],
+                attn=decoder_out.attn[not_terminated]
+                if (decoder_out.attn is not None and decoder_out.attn.size(0) > 0)
+                else None,
+                history=[h[not_terminated] for h in decoder_out.history]
+                if decoder_out.history is not None
+                else None,
+            )
+            encoder_out = model.encoder.reorder_encoder_out(
+                encoder_out, not_terminated.nonzero(as_tuple=False).squeeze()
+            )
+            sent_idxs = sent_idxs[not_terminated]
+            prev_output_tokens = prev_decoder_out.output_tokens.clone()
+
+        if self.beam_size > 1:
+            if reranker is not None:
+                finalized = self.rerank(
+                    reranker, finalized, [src_tokens, src_lengths], self.beam_size
+                )
+
+            # aggregate information from length beam
+            finalized = [
+                finalized[
+                    np.argmax(
+                        [
+                            finalized[self.beam_size * i + j][0]["score"]
+                            for j in range(self.beam_size)
+                        ]
+                    )
+                    + self.beam_size * i
+                ]
+                for i in range(len(finalized) // self.beam_size)
+            ]
+
+        return finalized
+
+    def rerank(self, reranker, finalized, encoder_input, beam_size):
+        def rebuild_batch(finalized):
+            finalized_tokens = [f[0]["tokens"] for f in finalized]
+            finalized_maxlen = max(f.size(0) for f in finalized_tokens)
+            final_output_tokens = (
+                finalized_tokens[0]
+                .new_zeros(len(finalized_tokens), finalized_maxlen)
+                .fill_(self.pad)
+            )
+            for i, f in enumerate(finalized_tokens):
+                final_output_tokens[i, : f.size(0)] = f
+            return final_output_tokens
+
+        final_output_tokens = rebuild_batch(finalized)
+        final_output_tokens[
+            :, 0
+        ] = self.eos  # autoregressive model assumes starting with EOS
+
+        reranker_encoder_out = reranker.encoder(*encoder_input)
+        length_beam_order = (
+            utils.new_arange(
+                final_output_tokens, beam_size, reranker_encoder_out.encoder_out.size(1)
+            )
+            .t()
+            .reshape(-1)
+        )
+        reranker_encoder_out = reranker.encoder.reorder_encoder_out(
+            reranker_encoder_out, length_beam_order
+        )
+        reranking_scores = reranker.get_normalized_probs(
+            reranker.decoder(final_output_tokens[:, :-1], reranker_encoder_out),
+            True,
+            None,
+        )
+        reranking_scores = reranking_scores.gather(2, final_output_tokens[:, 1:, None])
+        reranking_masks = final_output_tokens[:, 1:].ne(self.pad)
+        reranking_scores = (
+            reranking_scores[:, :, 0].masked_fill_(~reranking_masks, 0).sum(1)
+        )
+        reranking_scores = reranking_scores / reranking_masks.sum(1).type_as(
+            reranking_scores
+        )
+
+        for i in range(len(finalized)):
+            finalized[i][0]["score"] = reranking_scores[i]
+
+        return finalized
diff --git a/fairseq/fairseq/logging/__init__.py b/fairseq/fairseq/logging/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/fairseq/logging/meters.py b/fairseq/fairseq/logging/meters.py
new file mode 100644
index 0000000000000000000000000000000000000000..2100b1fa0b2704b1c585f59e9349655bba0cc9e6
--- /dev/null
+++ b/fairseq/fairseq/logging/meters.py
@@ -0,0 +1,323 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import bisect
+import time
+from collections import OrderedDict
+from typing import Dict, Optional
+
+
+try:
+    import torch
+
+    def type_as(a, b):
+        if torch.is_tensor(a) and torch.is_tensor(b):
+            return a.to(b)
+        else:
+            return a
+
+
+except ImportError:
+    torch = None
+
+    def type_as(a, b):
+        return a
+
+
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
+
+class Meter(object):
+    """Base class for Meters."""
+
+    def __init__(self):
+        pass
+
+    def state_dict(self):
+        return {}
+
+    def load_state_dict(self, state_dict):
+        pass
+
+    def reset(self):
+        raise NotImplementedError
+
+    @property
+    def smoothed_value(self) -> float:
+        """Smoothed value used for logging."""
+        raise NotImplementedError
+
+
+def safe_round(number, ndigits):
+    if hasattr(number, "__round__"):
+        return round(number, ndigits)
+    elif torch is not None and torch.is_tensor(number) and number.numel() == 1:
+        return safe_round(number.item(), ndigits)
+    elif np is not None and np.ndim(number) == 0 and hasattr(number, "item"):
+        return safe_round(number.item(), ndigits)
+    else:
+        return number
+
+
+class AverageMeter(Meter):
+    """Computes and stores the average and current value"""
+
+    def __init__(self, round: Optional[int] = None):
+        self.round = round
+        self.reset()
+
+    def reset(self):
+        self.val = None  # most recent update
+        self.sum = 0  # sum from all updates
+        self.count = 0  # total n from all updates
+
+    def update(self, val, n=1):
+        if val is not None:
+            self.val = val
+            if n > 0:
+                self.sum = type_as(self.sum, val) + (val * n)
+                self.count = type_as(self.count, n) + n
+
+    def state_dict(self):
+        return {
+            "val": self.val,
+            "sum": self.sum,
+            "count": self.count,
+            "round": self.round,
+        }
+
+    def load_state_dict(self, state_dict):
+        self.val = state_dict["val"]
+        self.sum = state_dict["sum"]
+        self.count = state_dict["count"]
+        self.round = state_dict.get("round", None)
+
+    @property
+    def avg(self):
+        return self.sum / self.count if self.count > 0 else self.val
+
+    @property
+    def smoothed_value(self) -> float:
+        val = self.avg
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+
+
+class SumMeter(Meter):
+    """Computes and stores the sum"""
+
+    def __init__(self, round: Optional[int] = None):
+        self.round = round
+        self.reset()
+
+    def reset(self):
+        self.sum = 0  # sum from all updates
+
+    def update(self, val):
+        if val is not None:
+            self.sum = type_as(self.sum, val) + val
+
+    def state_dict(self):
+        return {
+            "sum": self.sum,
+            "round": self.round,
+        }
+
+    def load_state_dict(self, state_dict):
+        self.sum = state_dict["sum"]
+        self.round = state_dict.get("round", None)
+
+    @property
+    def smoothed_value(self) -> float:
+        val = self.sum
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+
+
+class TimeMeter(Meter):
+    """Computes the average occurrence of some event per second"""
+
+    def __init__(
+        self,
+        init: int = 0,
+        n: int = 0,
+        round: Optional[int] = None,
+    ):
+        self.round = round
+        self.reset(init, n)
+
+    def reset(self, init=0, n=0):
+        self.init = init
+        self.start = time.perf_counter()
+        self.n = n
+        self.i = 0
+
+    def update(self, val=1):
+        self.n = type_as(self.n, val) + val
+        self.i += 1
+
+    def state_dict(self):
+        return {
+            "init": self.elapsed_time,
+            "n": self.n,
+            "round": self.round,
+        }
+
+    def load_state_dict(self, state_dict):
+        if "start" in state_dict:
+            # backwards compatibility for old state_dicts
+            self.reset(init=state_dict["init"])
+        else:
+            self.reset(init=state_dict["init"], n=state_dict["n"])
+            self.round = state_dict.get("round", None)
+
+    @property
+    def avg(self):
+        return self.n / self.elapsed_time
+
+    @property
+    def elapsed_time(self):
+        return self.init + (time.perf_counter() - self.start)
+
+    @property
+    def smoothed_value(self) -> float:
+        val = self.avg
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+
+
+class StopwatchMeter(Meter):
+    """Computes the sum/avg duration of some event in seconds"""
+
+    def __init__(self, round: Optional[int] = None):
+        self.round = round
+        self.sum = 0
+        self.n = 0
+        self.start_time = None
+
+    def start(self):
+        self.start_time = time.perf_counter()
+
+    def stop(self, n=1, prehook=None):
+        if self.start_time is not None:
+            if prehook is not None:
+                prehook()
+            delta = time.perf_counter() - self.start_time
+            self.sum = self.sum + delta
+            self.n = type_as(self.n, n) + n
+
+    def reset(self):
+        self.sum = 0  # cumulative time during which stopwatch was active
+        self.n = 0  # total n across all start/stop
+        self.start()
+
+    def state_dict(self):
+        return {
+            "sum": self.sum,
+            "n": self.n,
+            "round": self.round,
+        }
+
+    def load_state_dict(self, state_dict):
+        self.sum = state_dict["sum"]
+        self.n = state_dict["n"]
+        self.start_time = None
+        self.round = state_dict.get("round", None)
+
+    @property
+    def avg(self):
+        return self.sum / self.n if self.n > 0 else self.sum
+
+    @property
+    def elapsed_time(self):
+        if self.start_time is None:
+            return 0.0
+        return time.perf_counter() - self.start_time
+
+    @property
+    def smoothed_value(self) -> float:
+        val = self.avg if self.sum > 0 else self.elapsed_time
+        if self.round is not None and val is not None:
+            val = safe_round(val, self.round)
+        return val
+
+
+class MetersDict(OrderedDict):
+    """A sorted dictionary of :class:`Meters`.
+
+    Meters are sorted according to a priority that is given when the
+    meter is first added to the dictionary.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.priorities = []
+
+    def __setitem__(self, key, value):
+        assert key not in self, "MetersDict doesn't support reassignment"
+        priority, value = value
+        bisect.insort(self.priorities, (priority, len(self.priorities), key))
+        super().__setitem__(key, value)
+        for _, _, key in self.priorities:  # reorder dict to match priorities
+            self.move_to_end(key)
+
+    def add_meter(self, key, meter, priority):
+        self.__setitem__(key, (priority, meter))
+
+    def state_dict(self):
+        return [
+            (pri, key, self[key].__class__.__name__, self[key].state_dict())
+            for pri, _, key in self.priorities
+            # can't serialize DerivedMeter instances
+            if not isinstance(self[key], MetersDict._DerivedMeter)
+        ]
+
+    def load_state_dict(self, state_dict):
+        self.clear()
+        self.priorities.clear()
+        for pri, key, meter_cls, meter_state in state_dict:
+            meter = globals()[meter_cls]()
+            meter.load_state_dict(meter_state)
+            self.add_meter(key, meter, pri)
+
+    def get_smoothed_value(self, key: str) -> float:
+        """Get a single smoothed value."""
+        meter = self[key]
+        if isinstance(meter, MetersDict._DerivedMeter):
+            return meter.fn(self)
+        else:
+            return meter.smoothed_value
+
+    def get_smoothed_values(self) -> Dict[str, float]:
+        """Get all smoothed values."""
+        return OrderedDict(
+            [
+                (key, self.get_smoothed_value(key))
+                for key in self.keys()
+                if not key.startswith("_")
+            ]
+        )
+
+    def reset(self):
+        """Reset Meter instances."""
+        for meter in self.values():
+            if isinstance(meter, MetersDict._DerivedMeter):
+                continue
+            meter.reset()
+
+    class _DerivedMeter(Meter):
+        """A Meter whose values are derived from other Meters."""
+
+        def __init__(self, fn):
+            self.fn = fn
+
+        def reset(self):
+            pass
diff --git a/fairseq/fairseq/logging/metrics.py b/fairseq/fairseq/logging/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..58c2fb64e186ed9d5e9a06c73194d98a21bb7560
--- /dev/null
+++ b/fairseq/fairseq/logging/metrics.py
@@ -0,0 +1,314 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+A standalone module for aggregating metrics.
+
+Metrics can be logged from anywhere using the `log_*` functions defined
+in this module. The logged values will be aggregated dynamically based
+on the aggregation context in which the logging occurs. See the
+:func:`aggregate` context manager for more details.
+"""
+
+import contextlib
+import uuid
+from collections import defaultdict
+from typing import Callable, List, Optional
+
+from .meters import *
+
+
+# Aggregation contexts are considered "active" when inside the scope
+# created by the :func:`aggregate` context manager.
+_aggregators = OrderedDict()
+_active_aggregators = OrderedDict()
+_active_aggregators_cnt = defaultdict(lambda: 0)
+
+
+def reset() -> None:
+    """Reset all metrics aggregators."""
+    _aggregators.clear()
+    _active_aggregators.clear()
+    _active_aggregators_cnt.clear()
+
+    # The "default" aggregator observes all logged values.
+    _aggregators["default"] = MetersDict()
+    _active_aggregators["default"] = _aggregators["default"]
+    _active_aggregators_cnt["default"] = 1
+
+
+reset()
+
+
+@contextlib.contextmanager
+def aggregate(name: Optional[str] = None, new_root: bool = False):
+    """Context manager to aggregate metrics under a given name.
+
+    Aggregations can be nested. If *new_root* is ``False``, then logged
+    metrics will be recorded along the entire stack of nested
+    aggregators, including a global "default" aggregator. If *new_root*
+    is ``True``, then this aggregator will be the root of a new
+    aggregation stack, thus bypassing any parent aggregators.
+
+    Note that aggregation contexts are uniquely identified by their
+    *name* (e.g., train, valid). Creating a context with an existing
+    name will reuse the corresponding :class:`MetersDict` instance.
+    If no name is given, then a temporary aggregator will be created.
+
+    Usage::
+
+        with metrics.aggregate("train"):
+            for step, batch in enumerate(epoch):
+                with metrics.aggregate("train_inner") as agg:
+                    metrics.log_scalar("loss", get_loss(batch))
+                    if step % log_interval == 0:
+                        print(agg.get_smoothed_value("loss"))
+                        agg.reset()
+        print(metrics.get_smoothed_values("train")["loss"])
+
+    Args:
+        name (str): name of the aggregation. Defaults to a
+            random/temporary name if not given explicitly.
+        new_root (bool): make this aggregation the root of a new
+            aggregation stack.
+    """
+    if name is None:
+        # generate a temporary name
+        name = str(uuid.uuid4())
+        assert name not in _aggregators
+        agg = MetersDict()
+    else:
+        assert name != "default"
+        agg = _aggregators.setdefault(name, MetersDict())
+
+    if new_root:
+        backup_aggregators = _active_aggregators.copy()
+        _active_aggregators.clear()
+        backup_aggregators_cnt = _active_aggregators_cnt.copy()
+        _active_aggregators_cnt.clear()
+
+    _active_aggregators[name] = agg
+    _active_aggregators_cnt[name] += 1
+
+    yield agg
+
+    _active_aggregators_cnt[name] -= 1
+    if _active_aggregators_cnt[name] == 0 and name in _active_aggregators:
+        del _active_aggregators[name]
+
+    if new_root:
+        _active_aggregators.clear()
+        _active_aggregators.update(backup_aggregators)
+        _active_aggregators_cnt.clear()
+        _active_aggregators_cnt.update(backup_aggregators_cnt)
+
+
+def get_active_aggregators() -> List[MetersDict]:
+    return list(_active_aggregators.values())
+
+
+def log_scalar(
+    key: str,
+    value: float,
+    weight: float = 1,
+    priority: int = 10,
+    round: Optional[int] = None,
+):
+    """Log a scalar value.
+
+    Args:
+        key (str): name of the field to log
+        value (float): value to log
+        weight (float): weight that this value contributes to the average.
+            A weight of 0 will always log the latest value.
+        priority (int): smaller values are logged earlier in the output
+        round (Optional[int]): number of digits to round to when displaying
+    """
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, AverageMeter(round=round), priority)
+        agg[key].update(value, weight)
+
+def log_scalar_sum(
+    key: str,
+    value: float,
+    priority: int = 10,
+    round: Optional[int] = None,
+):
+    """Log a scalar value that is summed for reporting.
+
+    Args:
+        key (str): name of the field to log
+        value (float): value to log
+        priority (int): smaller values are logged earlier in the output
+        round (Optional[int]): number of digits to round to when displaying
+    """
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, SumMeter(round=round), priority)
+        agg[key].update(value)
+
+
+def log_derived(key: str, fn: Callable[[MetersDict], float], priority: int = 20):
+    """Log a scalar value derived from other meters.
+
+    Args:
+        key (str): name of the field to log
+        fn (Callable[[MetersDict], float]): function that takes a single
+            argument *meters* and returns the derived value
+        priority (int): smaller values are logged earlier in the output
+    """
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, MetersDict._DerivedMeter(fn), priority)
+
+
+def log_speed(
+    key: str,
+    value: float,
+    priority: int = 30,
+    round: Optional[int] = None,
+):
+    """Log the rate of some quantity per second.
+
+    Args:
+        key (str): name of the field to log
+        value (float): value to log
+        priority (int): smaller values are logged earlier in the output
+        round (Optional[int]): number of digits to round to when displaying
+    """
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, TimeMeter(round=round), priority)
+            agg[key].reset()  # reset meter on the first call
+        else:
+            agg[key].update(value)
+
+
+def log_start_time(key: str, priority: int = 40, round: Optional[int] = None):
+    """Log the duration of some event in seconds.
+
+    The duration will be computed once :func:`log_stop_time` is called.
+
+    Args:
+        key (str): name of the field to log
+        priority (int): smaller values are logged earlier in the output
+        round (Optional[int]): number of digits to round to when displaying
+    """
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, StopwatchMeter(round=round), priority)
+        agg[key].start()
+
+
+def log_stop_time(key: str, weight: float = 0.0, prehook=None):
+    """Log the duration of some event in seconds.
+
+    The duration will be computed since :func:`log_start_time` was called.
+    Set weight > 0 to report the average time instead of the sum.
+
+    Args:
+        key (str): name of the field to log
+        weight (float): weight that this time contributes to the average
+        prehook (function, no arguments): will be called before the timer
+        is stopped. For example, use prehook=torch.cuda.synchronize to
+        make sure all gpu operations are done before timer is stopped.
+    """
+    for agg in get_active_aggregators():
+        if key in agg:
+            agg[key].stop(weight, prehook)
+
+
+def log_custom(
+    new_meter_fn: Callable[[], Meter],
+    key: str,
+    *args,
+    priority: int = 50,
+    **kwargs,
+):
+    """Log using a custom Meter.
+
+    Any extra *args* or *kwargs* will be passed through to the Meter's
+    *update* method.
+
+    Args:
+        new_meter_fn (Callable[[], Meter]): function that returns a new
+            Meter instance
+        key (str): name of the field to log
+        priority (int): smaller values are logged earlier in the output
+    """
+    for agg in get_active_aggregators():
+        if key not in agg:
+            agg.add_meter(key, new_meter_fn(), priority)
+        agg[key].update(*args, **kwargs)
+
+
+def reset_meter(name: str, key: str) -> None:
+    """Reset Meter instance aggregated under a given *name* and *key*."""
+    meter = get_meter(name, key)
+    if meter is not None:
+        meter.reset()
+
+
+def reset_meters(name: str) -> None:
+    """Reset Meter instances aggregated under a given *name*."""
+    meters = get_meters(name)
+    if meters is not None:
+        meters.reset()
+
+
+def get_meter(name: str, key: str) -> Meter:
+    """Get a single Meter instance aggregated under *name* and *key*.
+
+    Returns:
+        Meter or None if no metrics have been logged under *name* and *key*.
+    """
+    if name not in _aggregators:
+        return None
+    return _aggregators[name].get(key, None)
+
+
+def get_meters(name: str) -> MetersDict:
+    """Get Meter instances aggregated under a given *name*.
+
+    Returns:
+        MetersDict or None if no metrics have been logged under *name*.
+    """
+    return _aggregators.get(name, None)
+
+
+def get_smoothed_value(name: str, key: str) -> float:
+    """Get a single smoothed value.
+
+    Raises:
+        KeyError: if no metrics have been logged under *name* and *key*.
+    """
+    return _aggregators[name].get_smoothed_value(key)
+
+
+def get_smoothed_values(name: str) -> Dict[str, float]:
+    """Get smoothed values aggregated under a given *name*.
+
+    Raises:
+        KeyError: if no metrics have been logged under *name*.
+    """
+    return _aggregators[name].get_smoothed_values()
+
+
+def state_dict():
+    return OrderedDict([(name, agg.state_dict()) for name, agg in _aggregators.items()])
+
+
+def load_state_dict(state_dict):
+    for name, agg_state in state_dict.items():
+        _aggregators[name] = MetersDict()
+        _aggregators[name].load_state_dict(agg_state)
+
+
+def xla_metrics_report():
+    try:
+        import torch_xla.debug.metrics as met
+        print(met.metrics_report())
+    except ImportError:
+        return
diff --git a/fairseq/fairseq/logging/progress_bar.py b/fairseq/fairseq/logging/progress_bar.py
new file mode 100644
index 0000000000000000000000000000000000000000..061082caefe542c5f0f87e04d9472583874126a3
--- /dev/null
+++ b/fairseq/fairseq/logging/progress_bar.py
@@ -0,0 +1,490 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Wrapper around various loggers and progress bars (e.g., tqdm).
+"""
+
+import atexit
+import json
+import logging
+import os
+import sys
+from collections import OrderedDict
+from contextlib import contextmanager
+from numbers import Number
+from typing import Optional
+
+import torch
+
+from .meters import AverageMeter, StopwatchMeter, TimeMeter
+
+
+logger = logging.getLogger(__name__)
+
+
+def progress_bar(
+    iterator,
+    log_format: Optional[str] = None,
+    log_interval: int = 100,
+    log_file: Optional[str] = None,
+    epoch: Optional[int] = None,
+    prefix: Optional[str] = None,
+    tensorboard_logdir: Optional[str] = None,
+    default_log_format: str = "tqdm",
+    wandb_project: Optional[str] = None,
+    wandb_run_name: Optional[str] = None,
+    azureml_logging: Optional[bool] = False,
+):
+    if log_format is None:
+        log_format = default_log_format
+    if log_file is not None:
+        handler = logging.FileHandler(filename=log_file)
+        logger.addHandler(handler)
+
+    if log_format == "tqdm" and not sys.stderr.isatty():
+        log_format = "simple"
+
+    if log_format == "json":
+        bar = JsonProgressBar(iterator, epoch, prefix, log_interval)
+    elif log_format == "none":
+        bar = NoopProgressBar(iterator, epoch, prefix)
+    elif log_format == "simple":
+        bar = SimpleProgressBar(iterator, epoch, prefix, log_interval)
+    elif log_format == "tqdm":
+        bar = TqdmProgressBar(iterator, epoch, prefix)
+    else:
+        raise ValueError("Unknown log format: {}".format(log_format))
+
+    if tensorboard_logdir:
+        try:
+            # [FB only] custom wrapper for TensorBoard
+            import palaas  # noqa
+            from .fb_tbmf_wrapper import FbTbmfWrapper
+
+            bar = FbTbmfWrapper(bar, log_interval)
+        except ImportError:
+            bar = TensorboardProgressBarWrapper(bar, tensorboard_logdir)
+
+    if wandb_project:
+        bar = WandBProgressBarWrapper(bar, wandb_project, run_name=wandb_run_name)
+
+    if azureml_logging:
+        bar = AzureMLProgressBarWrapper(bar)
+
+    return bar
+
+
+def build_progress_bar(
+    args,
+    iterator,
+    epoch: Optional[int] = None,
+    prefix: Optional[str] = None,
+    default: str = "tqdm",
+    no_progress_bar: str = "none",
+):
+    """Legacy wrapper that takes an argparse.Namespace."""
+    if getattr(args, "no_progress_bar", False):
+        default = no_progress_bar
+    if getattr(args, "distributed_rank", 0) == 0:
+        tensorboard_logdir = getattr(args, "tensorboard_logdir", None)
+    else:
+        tensorboard_logdir = None
+    return progress_bar(
+        iterator,
+        log_format=args.log_format,
+        log_interval=args.log_interval,
+        epoch=epoch,
+        prefix=prefix,
+        tensorboard_logdir=tensorboard_logdir,
+        default_log_format=default,
+    )
+
+
+def format_stat(stat):
+    if isinstance(stat, Number):
+        stat = "{:g}".format(stat)
+    elif isinstance(stat, AverageMeter):
+        stat = "{:.3f}".format(stat.avg)
+    elif isinstance(stat, TimeMeter):
+        stat = "{:g}".format(round(stat.avg))
+    elif isinstance(stat, StopwatchMeter):
+        stat = "{:g}".format(round(stat.sum))
+    elif torch.is_tensor(stat):
+        stat = stat.tolist()
+    return stat
+
+
+class BaseProgressBar(object):
+    """Abstract class for progress bars."""
+
+    def __init__(self, iterable, epoch=None, prefix=None):
+        self.iterable = iterable
+        self.n = getattr(iterable, "n", 0)
+        self.epoch = epoch
+        self.prefix = ""
+        if epoch is not None:
+            self.prefix += "epoch {:03d}".format(epoch)
+        if prefix is not None:
+            self.prefix += (" | " if self.prefix != "" else "") + prefix
+
+    def __len__(self):
+        return len(self.iterable)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc):
+        return False
+
+    def __iter__(self):
+        raise NotImplementedError
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats according to log_interval."""
+        raise NotImplementedError
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        raise NotImplementedError
+
+    def update_config(self, config):
+        """Log latest configuration."""
+        pass
+
+    def _str_commas(self, stats):
+        return ", ".join(key + "=" + stats[key].strip() for key in stats.keys())
+
+    def _str_pipes(self, stats):
+        return " | ".join(key + " " + stats[key].strip() for key in stats.keys())
+
+    def _format_stats(self, stats):
+        postfix = OrderedDict(stats)
+        # Preprocess stats according to datatype
+        for key in postfix.keys():
+            postfix[key] = str(format_stat(postfix[key]))
+        return postfix
+
+
+@contextmanager
+def rename_logger(logger, new_name):
+    old_name = logger.name
+    if new_name is not None:
+        logger.name = new_name
+    yield logger
+    logger.name = old_name
+
+
+class JsonProgressBar(BaseProgressBar):
+    """Log output in JSON format."""
+
+    def __init__(self, iterable, epoch=None, prefix=None, log_interval=1000):
+        super().__init__(iterable, epoch, prefix)
+        self.log_interval = log_interval
+        self.i = None
+        self.size = None
+
+    def __iter__(self):
+        self.size = len(self.iterable)
+        for i, obj in enumerate(self.iterable, start=self.n):
+            self.i = i
+            yield obj
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats according to log_interval."""
+        step = step or self.i or 0
+        if step > 0 and self.log_interval is not None and step % self.log_interval == 0:
+            update = (
+                self.epoch - 1 + (self.i + 1) / float(self.size)
+                if self.epoch is not None
+                else None
+            )
+            stats = self._format_stats(stats, epoch=self.epoch, update=update)
+            with rename_logger(logger, tag):
+                logger.info(json.dumps(stats))
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        self.stats = stats
+        if tag is not None:
+            self.stats = OrderedDict(
+                [(tag + "_" + k, v) for k, v in self.stats.items()]
+            )
+        stats = self._format_stats(self.stats, epoch=self.epoch)
+        with rename_logger(logger, tag):
+            logger.info(json.dumps(stats))
+
+    def _format_stats(self, stats, epoch=None, update=None):
+        postfix = OrderedDict()
+        if epoch is not None:
+            postfix["epoch"] = epoch
+        if update is not None:
+            postfix["update"] = round(update, 3)
+        # Preprocess stats according to datatype
+        for key in stats.keys():
+            postfix[key] = format_stat(stats[key])
+        return postfix
+
+
+class NoopProgressBar(BaseProgressBar):
+    """No logging."""
+
+    def __init__(self, iterable, epoch=None, prefix=None):
+        super().__init__(iterable, epoch, prefix)
+
+    def __iter__(self):
+        for obj in self.iterable:
+            yield obj
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats according to log_interval."""
+        pass
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        pass
+
+
+class SimpleProgressBar(BaseProgressBar):
+    """A minimal logger for non-TTY environments."""
+
+    def __init__(self, iterable, epoch=None, prefix=None, log_interval=1000):
+        super().__init__(iterable, epoch, prefix)
+        self.log_interval = log_interval
+        self.i = None
+        self.size = None
+
+    def __iter__(self):
+        self.size = len(self.iterable)
+        for i, obj in enumerate(self.iterable, start=self.n):
+            self.i = i
+            yield obj
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats according to log_interval."""
+        step = step or self.i or 0
+        if step > 0 and self.log_interval is not None and step % self.log_interval == 0:
+            stats = self._format_stats(stats)
+            postfix = self._str_commas(stats)
+            with rename_logger(logger, tag):
+                logger.info(
+                    "{}:  {:5d} / {:d} {}".format(
+                        self.prefix, self.i + 1, self.size, postfix
+                    )
+                )
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        postfix = self._str_pipes(self._format_stats(stats))
+        with rename_logger(logger, tag):
+            logger.info("{} | {}".format(self.prefix, postfix))
+
+
+class TqdmProgressBar(BaseProgressBar):
+    """Log to tqdm."""
+
+    def __init__(self, iterable, epoch=None, prefix=None):
+        super().__init__(iterable, epoch, prefix)
+        from tqdm import tqdm
+
+        self.tqdm = tqdm(
+            iterable,
+            self.prefix,
+            leave=False,
+            disable=(logger.getEffectiveLevel() > logging.INFO),
+        )
+
+    def __iter__(self):
+        return iter(self.tqdm)
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats according to log_interval."""
+        self.tqdm.set_postfix(self._format_stats(stats), refresh=False)
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        postfix = self._str_pipes(self._format_stats(stats))
+        with rename_logger(logger, tag):
+            logger.info("{} | {}".format(self.prefix, postfix))
+
+
+try:
+    _tensorboard_writers = {}
+    from torch.utils.tensorboard import SummaryWriter
+except ImportError:
+    try:
+        from tensorboardX import SummaryWriter
+    except ImportError:
+        SummaryWriter = None
+
+
+def _close_writers():
+    for w in _tensorboard_writers.values():
+        w.close()
+
+
+atexit.register(_close_writers)
+
+
+class TensorboardProgressBarWrapper(BaseProgressBar):
+    """Log to tensorboard."""
+
+    def __init__(self, wrapped_bar, tensorboard_logdir):
+        self.wrapped_bar = wrapped_bar
+        self.tensorboard_logdir = tensorboard_logdir
+
+        if SummaryWriter is None:
+            logger.warning(
+                "tensorboard not found, please install with: pip install tensorboard"
+            )
+
+    def _writer(self, key):
+        if SummaryWriter is None:
+            return None
+        _writers = _tensorboard_writers
+        if key not in _writers:
+            _writers[key] = SummaryWriter(os.path.join(self.tensorboard_logdir, key))
+            _writers[key].add_text("sys.argv", " ".join(sys.argv))
+        return _writers[key]
+
+    def __iter__(self):
+        return iter(self.wrapped_bar)
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats to tensorboard."""
+        self._log_to_tensorboard(stats, tag, step)
+        self.wrapped_bar.log(stats, tag=tag, step=step)
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        self._log_to_tensorboard(stats, tag, step)
+        self.wrapped_bar.print(stats, tag=tag, step=step)
+
+    def update_config(self, config):
+        """Log latest configuration."""
+        # TODO add hparams to Tensorboard
+        self.wrapped_bar.update_config(config)
+
+    def _log_to_tensorboard(self, stats, tag=None, step=None):
+        writer = self._writer(tag or "")
+        if writer is None:
+            return
+        if step is None:
+            step = stats["num_updates"]
+        for key in stats.keys() - {"num_updates"}:
+            if isinstance(stats[key], AverageMeter):
+                writer.add_scalar(key, stats[key].val, step)
+            elif isinstance(stats[key], Number):
+                writer.add_scalar(key, stats[key], step)
+            elif torch.is_tensor(stats[key]) and stats[key].numel() == 1:
+                writer.add_scalar(key, stats[key].item(), step)
+        writer.flush()
+
+
+try:
+    import wandb
+except ImportError:
+    wandb = None
+
+
+class WandBProgressBarWrapper(BaseProgressBar):
+    """Log to Weights & Biases."""
+
+    def __init__(self, wrapped_bar, wandb_project, run_name=None):
+        self.wrapped_bar = wrapped_bar
+        if wandb is None:
+            logger.warning("wandb not found, pip install wandb")
+            return
+
+        # reinit=False to ensure if wandb.init() is called multiple times
+        # within one process it still references the same run
+        wandb.init(project=wandb_project, reinit=False, name=run_name)
+
+    def __iter__(self):
+        return iter(self.wrapped_bar)
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats to tensorboard."""
+        self._log_to_wandb(stats, tag, step)
+        self.wrapped_bar.log(stats, tag=tag, step=step)
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats."""
+        self._log_to_wandb(stats, tag, step)
+        self.wrapped_bar.print(stats, tag=tag, step=step)
+
+    def update_config(self, config):
+        """Log latest configuration."""
+        if wandb is not None:
+            wandb.config.update(config)
+        self.wrapped_bar.update_config(config)
+
+    def _log_to_wandb(self, stats, tag=None, step=None):
+        if wandb is None:
+            return
+        if step is None:
+            step = stats["num_updates"]
+
+        prefix = "" if tag is None else tag + "/"
+
+        for key in stats.keys() - {"num_updates"}:
+            if isinstance(stats[key], AverageMeter):
+                wandb.log({prefix + key: stats[key].val}, step=step)
+            elif isinstance(stats[key], Number):
+                wandb.log({prefix + key: stats[key]}, step=step)
+
+
+try:
+    from azureml.core import Run
+except ImportError:
+    Run = None
+
+
+class AzureMLProgressBarWrapper(BaseProgressBar):
+    """Log to Azure ML"""
+
+    def __init__(self, wrapped_bar):
+        self.wrapped_bar = wrapped_bar
+        if Run is None:
+            logger.warning("azureml.core not found, pip install azureml-core")
+            return
+        self.run = Run.get_context()
+
+    def __exit__(self, *exc):
+        if Run is not None:
+            self.run.complete()
+        return False
+
+    def __iter__(self):
+        return iter(self.wrapped_bar)
+
+    def log(self, stats, tag=None, step=None):
+        """Log intermediate stats to AzureML"""
+        self._log_to_azureml(stats, tag, step)
+        self.wrapped_bar.log(stats, tag=tag, step=step)
+
+    def print(self, stats, tag=None, step=None):
+        """Print end-of-epoch stats"""
+        self._log_to_azureml(stats, tag, step)
+        self.wrapped_bar.print(stats, tag=tag, step=step)
+
+    def update_config(self, config):
+        """Log latest configuration."""
+        self.wrapped_bar.update_config(config)
+
+    def _log_to_azureml(self, stats, tag=None, step=None):
+        if Run is None:
+            return
+        if step is None:
+            step = stats["num_updates"]
+
+        prefix = "" if tag is None else tag + "/"
+
+        for key in stats.keys() - {"num_updates"}:
+            name = prefix + key
+            if isinstance(stats[key], AverageMeter):
+                self.run.log_row(name=name, **{"step": step, key: stats[key].val})
+            elif isinstance(stats[key], Number):
+                self.run.log_row(name=name, **{"step": step, key: stats[key]})
diff --git a/fairseq/fairseq/model_parallel/__init__.py b/fairseq/fairseq/model_parallel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..69f21684872f72ae8ee26d9ff7d2d2b6e6d526c3
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import criterions, models, modules  # noqa
diff --git a/fairseq/fairseq/model_parallel/criterions/__init__.py b/fairseq/fairseq/model_parallel/criterions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fae7bd4c2cfa7b4f64ad62dd9b9082f59f0e50d
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/criterions/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+
+# automatically import any Python files in the criterions/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        module = file[: file.find(".py")]
+        importlib.import_module("fairseq.model_parallel.criterions." + module)
diff --git a/fairseq/fairseq/model_parallel/criterions/vocab_parallel_cross_entropy.py b/fairseq/fairseq/model_parallel/criterions/vocab_parallel_cross_entropy.py
new file mode 100644
index 0000000000000000000000000000000000000000..35c50ee1521963c5cb6dfb7036ccf43401c6c6ac
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/criterions/vocab_parallel_cross_entropy.py
@@ -0,0 +1,87 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+from fairseq import metrics, utils
+from fairseq.criterions import FairseqCriterion, register_criterion
+
+
+try:
+    from fairseq.model_parallel.megatron.mpu.cross_entropy import (
+        vocab_parallel_cross_entropy,
+    )
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+
+@register_criterion("vocab_parallel_cross_entropy")
+class VocabParallelCrossEntropyCriterion(FairseqCriterion):
+    def __init__(self, task, sentence_avg):
+        super().__init__(task)
+        self.sentence_avg = sentence_avg
+        if not has_megatron_submodule:
+            raise ImportError(
+                "\n\nPlease install the megatron submodule:"
+                "\n\n  git submodule update --init "
+                "fairseq/model_parallel/megatron"
+            )
+
+    def forward(self, model, sample, reduce=True):
+        """Compute the loss for the given sample.
+
+        Returns a tuple with three elements:
+        1) the loss
+        2) the sample size, which is used as the denominator for the gradient
+        3) logging outputs to display while training
+        """
+        net_output = model(**sample["net_input"])
+        target = sample["target"]
+
+        loss = vocab_parallel_cross_entropy(net_output[0].float(), target)
+        loss = (loss * (target != self.padding_idx)).sum()
+        sample_size = (
+            sample["target"].size(0) if self.sentence_avg else sample["ntokens"]
+        )
+        logging_output = {
+            "loss": utils.item(loss.data) if reduce else loss.data,
+            "ntokens": sample["ntokens"],
+            "nsentences": sample["target"].size(0),
+            "sample_size": sample_size,
+        }
+        return loss, sample_size, logging_output
+
+    @staticmethod
+    def reduce_metrics(logging_outputs) -> None:
+        """Aggregate logging outputs from data parallel training."""
+        loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
+        ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+        sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
+
+        metrics.log_scalar(
+            "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
+        )
+        if sample_size != ntokens:
+            metrics.log_scalar(
+                "nll_loss", loss_sum / ntokens / math.log(2), ntokens, round=3
+            )
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["nll_loss"].avg)
+            )
+        else:
+            metrics.log_derived(
+                "ppl", lambda meters: utils.get_perplexity(meters["loss"].avg)
+            )
+
+    @staticmethod
+    def logging_outputs_can_be_summed() -> bool:
+        """
+        Whether the logging outputs returned by `forward` can be summed
+        across workers prior to calling `reduce_metrics`. Setting this
+        to True will improves distributed training speed.
+        """
+        return True
diff --git a/fairseq/fairseq/model_parallel/megatron_trainer.py b/fairseq/fairseq/model_parallel/megatron_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ab4657f73c6cda91e95637921edb84ccb76b3d0
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/megatron_trainer.py
@@ -0,0 +1,71 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Train a network across multiple GPUs.
+"""
+
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.distributed import utils as distributed_utils
+from fairseq.trainer import Trainer
+
+try:
+    from fairseq.model_parallel.megatron.mpu import (
+        get_data_parallel_rank,
+        get_data_parallel_world_size,
+        get_model_parallel_src_rank,
+        get_cuda_rng_tracker,
+    )
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+
+class MegatronTrainer(Trainer):
+    """Main class for model parallel with data parallel training."""
+
+    def __init__(self, cfg: FairseqConfig, task, model, criterion, **kwargs):
+        if not has_megatron_submodule:
+            raise ImportError(
+                "\n\nPlease install the megatron submodule:"
+                "\n\n  git submodule update --init "
+                "fairseq/model_parallel/megatron"
+            )
+        super().__init__(cfg, task, model, criterion, **kwargs)
+
+    def clip_grad_norm(self, clip_norm):
+        def _aggregate_model_parallel_grad_norm(total_norm):
+            total_norm = total_norm ** 2
+            distributed_utils.all_reduce(
+                total_norm, group=distributed_utils.get_model_parallel_group()
+            )
+            total_norm = total_norm ** 0.5
+            return total_norm
+
+        return self.optimizer.clip_grad_norm(
+            clip_norm,
+            aggregate_norm_fn=_aggregate_model_parallel_grad_norm,
+        )
+
+    def save_checkpoint(self, filename, extra_state):
+        """Save all training state in a checkpoint file."""
+        extra_state['rng_tracker_states'] \
+            = get_cuda_rng_tracker().get_states()
+        super().save_checkpoint(filename, extra_state)
+
+    def load_checkpoint(
+        self,
+        filename,
+        reset_optimizer=False,
+        reset_lr_scheduler=False,
+        optimizer_overrides=None,
+        reset_meters=False,
+    ):
+        extra_state = super().load_checkpoint(filename, reset_optimizer=reset_optimizer, reset_lr_scheduler=reset_lr_scheduler, optimizer_overrides=optimizer_overrides, reset_meters=reset_meters)
+        if extra_state is not None and 'rng_tracker_states' in extra_state:
+            get_cuda_rng_tracker().set_states(
+                extra_state['rng_tracker_states'])
+        return extra_state
diff --git a/fairseq/fairseq/model_parallel/models/__init__.py b/fairseq/fairseq/model_parallel/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3532479e52a0e1f1ba204c6f5d51c71c98ee5df0
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+
+# automatically import any Python files in the models/ directory
+models_dir = os.path.dirname(__file__)
+for file in os.listdir(models_dir):
+    path = os.path.join(models_dir, file)
+    if (
+        not file.startswith("_")
+        and not file.startswith(".")
+        and (file.endswith(".py") or os.path.isdir(path))
+    ):
+        model_name = file[: file.find(".py")] if file.endswith(".py") else file
+        module = importlib.import_module("fairseq.model_parallel.models." + model_name)
diff --git a/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..117827c3e9c176477f33e3a6fd7fe19a922411a2
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import *  # noqa
diff --git a/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb81ded341257ba0a43c4d0867e8f3c83f276bc7
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/layers.py
@@ -0,0 +1,600 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from collections import namedtuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import options, utils
+from fairseq.modules import (
+    AdaptiveSoftmax,
+    LayerNorm,
+    MultiheadAttention,
+    PositionalEmbedding,
+)
+
+
+EncoderOut = namedtuple(
+    "TransformerEncoderOut",
+    [
+        "encoder_out",  # T x B x C
+        "encoder_padding_mask",  # B x T
+        "encoder_embedding",  # B x T x C
+        "encoder_states",  # List[T x B x C]
+    ],
+)
+
+
+class TransformerEncoderEmbedding(nn.Module):
+    """ Encoder Embedding + Positional Embedding """
+
+    def __init__(self, args, embed_tokens):
+        super().__init__()
+        self.dropout = args.dropout
+        self.max_source_positions = args.max_source_positions
+        self.embed_tokens = embed_tokens
+        if isinstance(embed_tokens, nn.ModuleList):
+            self.padding_idx = embed_tokens[0].padding_idx
+            embed_dim = sum(e.embedding_dim for e in embed_tokens)
+        else:
+            self.padding_idx = embed_tokens.padding_idx
+            embed_dim = embed_tokens.embedding_dim
+        self.embed_scale = math.sqrt(embed_dim)
+        self.embed_positions = (
+            PositionalEmbedding(
+                args.max_source_positions,
+                embed_dim,
+                self.padding_idx,
+                learned=args.encoder_learned_pos,
+            )
+            if not args.no_token_positional_embeddings
+            else None
+        )
+        if getattr(args, "layernorm_embedding", False):
+            self.layernorm_embedding = LayerNorm(embed_dim)
+        else:
+            self.layernorm_embedding = None
+
+    def forward(self, input):
+        # embed tokens and positions
+        src_tokens = input[0]
+        prev_output_tokens = input[2]
+        if isinstance(self.embed_tokens, nn.ModuleList):
+            x_embed_list = []
+            for embed_tokens_part in self.embed_tokens:
+                x_embed_list.append(embed_tokens_part(src_tokens))
+
+            embedded = torch.cat(x_embed_list, dim=-1)
+        else:
+            embedded = self.embed_tokens(src_tokens)
+        x = embed = self.embed_scale * embedded
+        if self.embed_positions is not None:
+            x = embed + self.embed_positions(src_tokens)
+        if self.layernorm_embedding:
+            x = self.layernorm_embedding(x)
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # compute padding mask
+        encoder_padding_mask = src_tokens.eq(self.padding_idx)
+        return (x, encoder_padding_mask, prev_output_tokens)
+
+
+class TransformerEncoderLayerNorm(nn.Module):
+    """
+    Layer norm at the the end of all encoder layers if
+    args.encoder_enormalize_before = True
+    """
+
+    def __init__(self, args, embed_dim):
+        super().__init__()
+        if args.encoder_normalize_before:
+            self.layer_norm = LayerNorm(embed_dim)
+        else:
+            self.layer_norm = None
+
+    def forward(self, input):
+        x = input[0]
+        encoder_padding_mask = input[1]
+        prev_output_tokens = input[2]
+        if self.layer_norm:
+            x = self.layer_norm(x)
+        # keeping track of the incremental_state is not supported yet
+        return (x, encoder_padding_mask, prev_output_tokens)
+
+
+class TransformerDecoderEmbedding(nn.Module):
+    """ Decoder Embedding + Positional Embedding """
+
+    def __init__(self, args, embed_tokens):
+        super().__init__()
+        self.dropout = args.dropout
+        self.share_input_output_embed = args.share_decoder_input_output_embed
+        input_embed_dim = (
+            sum(e.embedding_dim for e in embed_tokens)
+            if isinstance(embed_tokens, nn.ModuleList)
+            else embed_tokens.embedding_dim
+        )
+        embed_dim = args.decoder_embed_dim
+        self.output_embed_dim = args.decoder_output_dim
+
+        padding_idx = (
+            embed_tokens[0].padding_idx
+            if isinstance(embed_tokens, nn.ModuleList)
+            else embed_tokens.padding_idx
+        )
+        self.max_target_positions = args.max_target_positions
+
+        self.embed_tokens = embed_tokens
+        self.embed_scale = math.sqrt(embed_dim)  # todo: try with input_embed_dim
+
+        self.project_in_dim = (
+            Linear(input_embed_dim, embed_dim, bias=False)
+            if embed_dim != input_embed_dim
+            else None
+        )
+
+        self.embed_positions = (
+            PositionalEmbedding(
+                args.max_target_positions,
+                embed_dim,
+                padding_idx,
+                learned=args.decoder_learned_pos,
+            )
+            if not args.no_token_positional_embeddings
+            else None
+        )
+
+    def forward(self, input):
+        mt_task = False
+        if isinstance(input, tuple):
+            if len(input) == 3:
+                encoder_out = input[0]
+                encoder_padding_mask = input[1]
+                prev_output_tokens = input[2]
+                incremental_state = None  # Hardcoding to avoid passing of None objects
+                mt_task = True
+            else:
+                # HACK for now, need to fix (TODO sidgoyal)
+                prev_output_tokens = input[0]
+                # discard "src_lengths"
+                encoder_out = None
+                encoder_padding_mask = None
+                incremental_state = None
+
+        else:
+            prev_output_tokens = input
+            encoder_out = None
+            encoder_padding_mask = None
+            incremental_state = None
+
+        positions = (
+            self.embed_positions(
+                prev_output_tokens,
+                incremental_state=incremental_state,
+            )
+            if self.embed_positions is not None
+            else None
+        )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        # embed tokens and positions
+
+        if isinstance(self.embed_tokens, nn.ModuleList):
+            x_embed_list = []
+            for embed_tokens_part in self.embed_tokens:
+                x_embed_list.append(embed_tokens_part(prev_output_tokens))
+
+            x = self.embed_scale * torch.cat(x_embed_list, dim=-1)
+        else:
+            x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+        x = F.dropout(x, p=self.dropout, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+        if mt_task:
+            return (x, encoder_out, encoder_padding_mask)
+        return x
+
+
+class TransformerDecoderOutputLayer(nn.Module):
+    def __init__(self, args, embed_tokens, dictionary):
+        super().__init__()
+        self.share_input_output_embed = args.share_decoder_input_output_embed
+        self.embed_tokens = embed_tokens
+        self.output_embed_dim = args.decoder_output_dim
+        embed_dim = args.decoder_embed_dim
+
+        self.project_out_dim = (
+            Linear(embed_dim, self.output_embed_dim, bias=False)
+            if embed_dim != self.output_embed_dim and not args.tie_adaptive_weights
+            else None
+        )
+        self.adaptive_softmax = None
+        if args.adaptive_softmax_cutoff is not None:
+            assert not isinstance(embed_tokens, nn.ModuleList)
+            self.adaptive_softmax = AdaptiveSoftmax(
+                len(dictionary),
+                self.output_embed_dim,
+                options.eval_str_list(args.adaptive_softmax_cutoff, type=int),
+                dropout=args.adaptive_softmax_dropout,
+                adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None,
+                factor=args.adaptive_softmax_factor,
+                tie_proj=args.tie_adaptive_proj,
+            )
+        elif not self.share_input_output_embed:
+            self.embed_tokens = nn.Parameter(
+                torch.Tensor(len(dictionary), self.output_embed_dim)
+            )
+            nn.init.normal_(
+                self.embed_tokens, mean=0, std=self.output_embed_dim ** -0.5
+            )
+
+        if args.decoder_normalize_before and not getattr(
+            args, "no_decoder_final_norm", False
+        ):
+            self.layer_norm = LayerNorm(embed_dim)
+        else:
+            self.layer_norm = None
+
+    def forward(self, input, apply_final_proj=True):
+        if isinstance(input, tuple):
+            x = input[0]
+        else:
+            x = input
+
+        if self.layer_norm:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+        if apply_final_proj:
+            x = self.output_layer(x)
+        return x
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the vocabulary size."""
+        if self.adaptive_softmax is None:
+            # project back to size of vocabulary
+            if self.share_input_output_embed:
+                if isinstance(self.embed_tokens, nn.ModuleList):
+                    output = None
+                    for i, emb in enumerate(self.embed_tokens):
+                        sidx = i * emb.embedding_dim
+                        eidx = (i + 1) * emb.embedding_dim
+                        if output is None:
+                            output = F.linear(features[:, :, sidx:eidx], emb.weight)
+                        else:
+                            output += F.linear(features[:, :, sidx:eidx], emb.weight)
+
+                    return output
+                else:
+                    return F.linear(features, self.embed_tokens.weight)
+            else:
+                return F.linear(features, self.embed_tokens)
+        else:
+            return features
+
+
+class TransformerEncoderLayer(nn.Module):
+    """Encoder layer block.
+    In the original paper each operation (multi-head attention or FFN) is
+    postprocessed with: `dropout -> add residual -> layernorm`. In the
+    tensor2tensor code they suggest that learning is more robust when
+    preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *args.encoder_normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+    """
+
+    def __init__(self, args):
+        super().__init__()
+        self.embed_dim = args.encoder_embed_dim
+        self.self_attn = MultiheadAttention(
+            self.embed_dim,
+            args.encoder_attention_heads,
+            dropout=args.attention_dropout,
+            self_attention=True,
+        )
+        self.self_attn_layer_norm = LayerNorm(self.embed_dim)
+        self.dropout = args.dropout
+        self.activation_fn = utils.get_activation_fn(
+            activation=getattr(args, "activation_fn", "relu")
+        )
+        self.activation_dropout = getattr(args, "activation_dropout", 0)
+        if self.activation_dropout == 0:
+            # for backwards compatibility with models that use args.relu_dropout
+            self.activation_dropout = getattr(args, "relu_dropout", 0)
+        self.normalize_before = args.encoder_normalize_before
+        self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
+        self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
+        self.final_layer_norm = LayerNorm(self.embed_dim)
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """
+        Rename layer norm states from `...layer_norms.0.weight` to
+        `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to
+        `...final_layer_norm.weight`
+        """
+        layer_norm_map = {"0": "self_attn_layer_norm", "1": "final_layer_norm"}
+        for old, new in layer_norm_map.items():
+            for m in ("weight", "bias"):
+                k = "{}.layer_norms.{}.{}".format(name, old, m)
+                if k in state_dict:
+                    state_dict["{}.{}.{}".format(name, new, m)] = state_dict[k]
+                    del state_dict[k]
+
+    def forward(self, input):
+        """
+        Args:
+            input (Tuple):
+                input[0] (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+                input[1] (ByteTensor/FloatTensor): encoder padding mask -
+                    binary ByteTensor of shape `(batch, src_len)` where padding elements
+                    are indicated by ``1``.
+                input[2] (LongTensor): previous decoder outputs of shape
+                    `(batch, tgt_len)`, for teacher forcing)
+        Returns:
+            output (Tuple):
+                output[0] (Tensor): encoded output of shape `(batch, src_len, embed_dim)`
+                output[1] (ByteTensor/FloatTensor): encoder padding mask
+                output[2] (LongTensor): previous decoder outputs
+        """
+        x = input[0]
+        encoder_padding_mask = input[1]
+        prev_output_tokens = input[2]
+        residual = x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True)
+        x, _ = self.self_attn(
+            query=x, key=x, value=x, key_padding_mask=encoder_padding_mask
+        )
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True)
+
+        residual = x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
+        x = self.activation_fn(self.fc1(x))
+        x = F.dropout(x, p=self.activation_dropout, training=self.training)
+        x = self.fc2(x)
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, after=True)
+        return (x, encoder_padding_mask, prev_output_tokens)
+
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+        assert before ^ after
+        if after ^ self.normalize_before:
+            return layer_norm(x)
+        else:
+            return x
+
+
+class TransformerDecoderLayer(nn.Module):
+    """Decoder layer block.
+
+    In the original paper each operation (multi-head attention, encoder
+    attention or FFN) is postprocessed with: `dropout -> add residual ->
+    layernorm`. In the tensor2tensor code they suggest that learning is more
+    robust when preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *args.decoder_normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False
+    ):
+        super().__init__()
+        self.embed_dim = args.decoder_embed_dim
+        self.self_attn = MultiheadAttention(
+            embed_dim=self.embed_dim,
+            num_heads=args.decoder_attention_heads,
+            dropout=args.attention_dropout,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+            self_attention=True,
+        )
+        self.dropout = args.dropout
+        self.activation_fn = utils.get_activation_fn(
+            activation=getattr(args, "activation_fn", "relu")
+        )
+        self.activation_dropout = getattr(args, "activation_dropout", 0)
+        if self.activation_dropout == 0:
+            # for backwards compatibility with models that use args.relu_dropout
+            self.activation_dropout = getattr(args, "relu_dropout", 0)
+        self.normalize_before = args.decoder_normalize_before
+
+        # use layerNorm rather than FusedLayerNorm for exporting.
+        # char_inputs can be used to determint this.
+        # TODO  remove this once we update apex with the fix
+        export = getattr(args, "char_inputs", False)
+        self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
+
+        if no_encoder_attn:
+            self.encoder_attn = None
+            self.encoder_attn_layer_norm = None
+        else:
+            self.encoder_attn = MultiheadAttention(
+                self.embed_dim,
+                args.decoder_attention_heads,
+                kdim=getattr(args, "encoder_embed_dim", None),
+                vdim=getattr(args, "encoder_embed_dim", None),
+                dropout=args.attention_dropout,
+                encoder_decoder_attention=True,
+            )
+            self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
+
+        self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim)
+        self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim)
+
+        self.final_layer_norm = LayerNorm(self.embed_dim, export=export)
+        self.need_attn = True
+
+        self.onnx_trace = False
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def forward(self, input):
+        """
+        Args:
+            input (Tuple):
+                input[0] (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+                input[1] (Tensor): encoder output of shape `(batch, src_len, embed_dim)`
+                input[2] (ByteTensor/FloatTensor): encoder padding mask -
+                    binary ByteTensor of shape `(batch, src_len)` where padding elements
+                    are indicated by ``1``.
+        Returns:
+            output (Tuple):
+                output[0] (Tensor): encoded output of shape `(batch, src_len, embed_dim)`
+                output[1] (ByteTensor/FloatTensor): encoder padding mask
+                output[2] (LongTensor): previous decoder outputs
+        """
+        # Note: incremental state is not yet supported
+        mt_task = False
+        if isinstance(input, tuple):
+            x = input[0]
+            encoder_out = input[1]
+            encoder_padding_mask = input[2]
+            incremental_state = None
+            mt_task = True
+        else:
+            x = input
+            encoder_out = None
+            encoder_padding_mask = None
+            incremental_state = None
+
+        if incremental_state is None:
+            self_attn_mask = self.buffered_future_mask(x)
+        else:
+            self_attn_mask = None
+
+        # TODO: add back prev_self_attn_state, prev_attn_state,
+        # self_attn_padding_mask
+        prev_self_attn_state = None
+        prev_attn_state = None
+        self_attn_padding_mask = None
+
+        residual = x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True)
+        if prev_self_attn_state is not None:
+            if incremental_state is None:
+                incremental_state = {}
+            prev_key, prev_value = prev_self_attn_state
+            saved_state = {"prev_key": prev_key, "prev_value": prev_value}
+            self.self_attn._set_input_buffer(incremental_state, saved_state)
+        x, attn = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            need_weights=False,
+            attn_mask=self_attn_mask,
+        )
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True)
+
+        if self.encoder_attn is not None:
+            residual = x
+            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True)
+            if prev_attn_state is not None:
+                if incremental_state is None:
+                    incremental_state = {}
+                prev_key, prev_value = prev_attn_state
+                saved_state = {"prev_key": prev_key, "prev_value": prev_value}
+                self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                need_weights=(not self.training and self.need_attn),
+            )
+            x = F.dropout(x, p=self.dropout, training=self.training)
+            x = residual + x
+            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True)
+
+        residual = x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
+        x = self.activation_fn(self.fc1(x))
+        x = F.dropout(x, p=self.activation_dropout, training=self.training)
+        x = self.fc2(x)
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, after=True)
+
+        if mt_task:
+            return (x, encoder_out, encoder_padding_mask)
+        return x
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        if (
+            not hasattr(self, "_future_mask")
+            or self._future_mask is None
+            or self._future_mask.device != tensor.device
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
+            )
+        if self._future_mask.size(0) < dim:
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
+            )
+        return self._future_mask[:dim, :dim]
+
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+        assert before ^ after
+        if after ^ self.normalize_before:
+            return layer_norm(x)
+        else:
+            return x
+
+    def make_generation_fast_(self, need_attn=False, **kwargs):
+        self.need_attn = need_attn
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
diff --git a/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/model.py b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8052d80ddfbe3a45be25ff54d5e4916292ce312b
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/pipeline_parallel_transformer/model.py
@@ -0,0 +1,767 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.model_parallel.models.pipeline_parallel_transformer.layers import (
+    Embedding,
+    TransformerDecoderEmbedding,
+    TransformerDecoderLayer,
+    TransformerDecoderOutputLayer,
+    TransformerEncoderEmbedding,
+    TransformerEncoderLayer,
+    TransformerEncoderLayerNorm,
+)
+from fairseq.models import (
+    BaseFairseqModel,
+    FairseqDecoder,
+    FairseqEncoder,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.fairseq_encoder import EncoderOut
+from fairseq.models.transformer import (
+    base_architecture,
+    transformer_iwslt_de_en,
+    transformer_wmt_en_de_big,
+)
+from fairseq.modules import SinusoidalPositionalEmbedding
+
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_MAX_SOURCE_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+TORCH_PIPE = False
+RPC_INIT = False
+
+def import_pipe():
+    global TORCH_PIPE
+    global RPC_INIT
+    try:
+        from torch.distributed.pipeline.sync import Pipe # noqa
+        global Pipe
+        from torch.distributed.pipeline.sync.utils import partition_model
+        global partition_model
+        from torch.distributed import rpc
+        import tempfile
+        TORCH_PIPE = True
+        # Initialize single process RPC agent since TORCH_PIPE requires
+        # RRef. RRef depends on RPC being initialized and as a result we initialize
+        # RPC with a single node.
+        tmpfile = tempfile.NamedTemporaryFile()
+        if not RPC_INIT:
+            rpc.init_rpc(
+                name="worker",
+                rank=0,
+                world_size=1,
+                rpc_backend_options=rpc.TensorPipeRpcBackendOptions(
+                    init_method="file://{}".format(tmpfile.name),
+                )
+            )
+            RPC_INIT = True
+        logger.info('Using torch pipe')
+    except ImportError:
+        try:
+            from fairscale.nn import Pipe # noqa
+            logger.info('Using fairscale pipe')
+        except ImportError:
+            raise ImportError("Please install fairscale with: pip install fairscale")
+
+
+@register_model("pipeline_parallel_transformer")
+class PipelineParallelTransformerModel(BaseFairseqModel):
+    def __init__(self, encoder, decoder, balance, devices, chunks, checkpoint):
+        import_pipe()
+        super().__init__()
+        assert isinstance(encoder, FairseqEncoder)
+        assert isinstance(decoder, FairseqDecoder)
+        encoder_module_list = (
+            [encoder.embedding_layer]
+            + list(encoder.encoder_layers)
+            + [encoder.final_layer_norm]
+        )
+        self.num_encoder_modules = len(encoder_module_list)
+        decoder_module_list = (
+            [decoder.embedding_layer]
+            + list(decoder.decoder_layers)
+            + [decoder.decoder_output_layer]
+        )
+        self.num_decoder_modules = len(decoder_module_list)
+        module_list = encoder_module_list + decoder_module_list
+        self.devices = devices
+        if TORCH_PIPE:
+            self.model = Pipe(
+                partition_model(nn.Sequential(*module_list), balance, devices),
+                chunks=chunks,
+                checkpoint=checkpoint,
+            )
+        else:
+            self.model = Pipe(
+                nn.Sequential(*module_list),
+                balance=balance,
+                devices=devices,
+                chunks=chunks,
+                checkpoint=checkpoint,
+            )
+        self.encoder_max_positions = self.max_positions_helper(
+            encoder.embedding_layer, "max_source_positions"
+        )
+        self.decoder_max_positions = self.max_positions_helper(
+            decoder.embedding_layer, "max_target_positions"
+        )
+        self.adaptive_softmax = getattr(decoder, "adaptive_softmax", None)
+        # Note: To be populated during inference
+        self.encoder = None
+        self.decoder = None
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens):
+        if self.training:
+            input_lst = [src_tokens, src_lengths, prev_output_tokens]
+            input = tuple(i.to(self.devices[0], non_blocking=True) for i in input_lst)
+            if TORCH_PIPE:
+                return self.model(input).local_value()
+            else:
+                return self.model(input)
+        else:
+            assert self.encoder is not None and self.decoder is not None, (
+                "encoder and decoder need to be initialized by "
+                + "calling the `prepare_for_inference_()` method"
+            )
+            encoder_output_tuple = self.encoder(input)
+            return self.decoder(encoder_output_tuple)
+
+    def prepare_for_inference_(self, cfg):
+        if self.encoder is not None and self.decoder is not None:
+            logger.info("Encoder and Decoder already initialized")
+            return
+        encoder_module_list = []
+        decoder_module_list = []
+        module_count = 0
+        for partition in self.model.partitions:
+            for module in partition:
+                if module_count < self.num_encoder_modules:
+                    encoder_module_list.append(module)
+                else:
+                    decoder_module_list.append(module)
+                module_count += 1
+        self.model = None
+        self.encoder = TransformerEncoder(cfg.distributed_training, None, None, encoder_module_list)
+        self.decoder = TransformerDecoder(
+            cfg.distributed_training, None, None, decoder_module_list=decoder_module_list
+        )
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--activation-fn',
+                            choices=utils.get_available_activation_fns(),
+                            help='activation function to use')
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability')
+        parser.add_argument('--attention-dropout', type=float, metavar='D',
+                            help='dropout probability for attention weights')
+        parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
+                            help='dropout probability after activation in FFN.')
+        parser.add_argument('--encoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained encoder embedding')
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension')
+        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension for FFN')
+        parser.add_argument('--encoder-layers', type=int, metavar='N',
+                            help='num encoder layers')
+        parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
+                            help='num encoder attention heads')
+        parser.add_argument('--encoder-normalize-before', action='store_true',
+                            help='apply layernorm before each encoder block')
+        parser.add_argument('--encoder-learned-pos', action='store_true',
+                            help='use learned positional embeddings in the encoder')
+        parser.add_argument('--decoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained decoder embedding')
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension')
+        parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension for FFN')
+        parser.add_argument('--decoder-layers', type=int, metavar='N',
+                            help='num decoder layers')
+        parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
+                            help='num decoder attention heads')
+        parser.add_argument('--decoder-learned-pos', action='store_true',
+                            help='use learned positional embeddings in the decoder')
+        parser.add_argument('--decoder-normalize-before', action='store_true',
+                            help='apply layernorm before each decoder block')
+        parser.add_argument('--share-decoder-input-output-embed', action='store_true',
+                            help='share decoder input and output embeddings')
+        parser.add_argument('--share-all-embeddings', action='store_true',
+                            help='share encoder, decoder and output embeddings'
+                                 ' (requires shared dictionary and embed dim)')
+        parser.add_argument('--no-token-positional-embeddings', default=False, action='store_true',
+                            help='if set, disables positional embeddings (outside self attention)')
+        parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
+                            help='comma separated list of adaptive softmax cutoff points. '
+                                 'Must be used with adaptive_loss criterion'),
+        parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
+                            help='sets adaptive softmax dropout for the tail projections')
+        parser.add_argument('--num-embedding-chunks', type=int, metavar='N', default=1,
+                            help='Number of embedding layer chunks (enables more even distribution'
+                                 'of optimizer states across data parallel nodes'
+                                 'when using optimizer state sharding and'
+                                 'a big embedding vocabulary)')
+        # fmt: on
+
+    @classmethod
+    def build_model_base(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if not hasattr(args, "max_source_positions"):
+            args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
+        if not hasattr(args, "max_target_positions"):
+            args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+
+        def build_embedding(dictionary, embed_dim, path=None, num_embed_chunks=1):
+            assert embed_dim % num_embed_chunks == 0, (
+                f"Number of embedding chunks = {num_embed_chunks} should be "
+                + f"divisible by the embedding dimension = {embed_dim}"
+            )
+            assert path is None or num_embed_chunks == 1, (
+                "Loading embedding from a path with number of embedding chunks > 1"
+                + " is not yet supported"
+            )
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            # if provided, load from preloaded dictionaries
+            if path:
+                emb = Embedding(num_embeddings, embed_dim, padding_idx)
+                embed_dict = utils.parse_embedding(path)
+                utils.load_embedding(embed_dict, dictionary, emb)
+            else:
+                embed_chunk_dim = embed_dim // num_embed_chunks
+                emb = nn.ModuleList()
+                for i in range(num_embed_chunks):
+                    emb.append(Embedding(num_embeddings, embed_chunk_dim, padding_idx))
+            return emb
+
+        num_embed_chunks = args.num_embedding_chunks
+        if args.share_all_embeddings:
+            if src_dict != tgt_dict:
+                raise ValueError("--share-all-embeddings requires a joined dictionary")
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            encoder_embed_tokens = build_embedding(
+                src_dict,
+                args.encoder_embed_dim,
+                args.encoder_embed_path,
+                num_embed_chunks,
+            )
+            decoder_embed_tokens = encoder_embed_tokens
+            args.share_decoder_input_output_embed = True
+        else:
+            assert args.share_decoder_input_output_embed or num_embed_chunks == 1, (
+                "Not sharing decoder I/O embeddings is not yet supported with number of "
+                + "embedding chunks > 1"
+            )
+            encoder_embed_tokens = build_embedding(
+                src_dict,
+                args.encoder_embed_dim,
+                args.encoder_embed_path,
+                num_embed_chunks,
+            )
+            decoder_embed_tokens = build_embedding(
+                tgt_dict,
+                args.decoder_embed_dim,
+                args.decoder_embed_path,
+                num_embed_chunks,
+            )
+
+        encoder = cls.build_encoder(args, src_dict, encoder_embed_tokens)
+        decoder = cls.build_decoder(args, tgt_dict, decoder_embed_tokens)
+        return (encoder, decoder)
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return TransformerEncoder(args, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return TransformerDecoder(args, tgt_dict, embed_tokens)
+
+    @classmethod
+    def build_model(cls, args, task):
+        encoder, decoder = cls.build_model_base(args, task)
+        return PipelineParallelTransformerModel(
+            encoder=encoder,
+            decoder=decoder,
+            balance=utils.eval_str_list(args.pipeline_balance, type=int),
+            devices=utils.eval_str_list(args.pipeline_devices, type=int),
+            chunks=args.pipeline_chunks,
+            checkpoint=args.pipeline_checkpoint,
+        )
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the default output size (typically vocabulary size)."""
+        return self.decoder.output_layer(features, **kwargs)
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return (self.encoder_max_positions, self.decoder_max_positions)
+
+    def max_positions_helper(
+        self, embedding_layer, max_positions_field="max_source_positions"
+    ):
+        """Maximum input length supported by the encoder or decoder."""
+        if embedding_layer.embed_positions is None:
+            return getattr(embedding_layer, max_positions_field)
+        return min(
+            getattr(embedding_layer, max_positions_field),
+            embedding_layer.embed_positions.max_positions,
+        )
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        """Get normalized probabilities (or log probs) from a net's output."""
+
+        if hasattr(self, "adaptive_softmax") and self.adaptive_softmax is not None:
+            if sample is not None:
+                assert "target" in sample
+                target = sample["target"]
+            else:
+                target = None
+            out = self.adaptive_softmax.get_log_prob(net_output, target=target)
+            return out.exp_() if not log_probs else out
+
+        # A Pipe() module returns a tuple of tensors as the output.
+        # In this case, the tuple has one element - the output tensor of logits
+        logits = net_output if isinstance(net_output, torch.Tensor) else net_output[0]
+        if log_probs:
+            return utils.log_softmax(logits, dim=-1, onnx_trace=False)
+        else:
+            return utils.softmax(logits, dim=-1, onnx_trace=False)
+
+    def max_decoder_positions(self):
+        """Maximum length supported by the decoder."""
+        return self.decoder_max_positions
+
+    def load_state_dict(self, state_dict, strict=True, model_cfg=None):
+        """Copies parameters and buffers from *state_dict* into this module and
+        its descendants.
+
+        Overrides the method in :class:`nn.Module`. Compared with that method
+        this additionally "upgrades" *state_dicts* from old checkpoints.
+        """
+        self.upgrade_state_dict(state_dict)
+        is_regular_transformer = not any("model.partitions" in k for k in state_dict)
+        if is_regular_transformer:
+            state_dict = self.convert_to_pipeline_parallel_state_dict(state_dict)
+        return super().load_state_dict(state_dict, strict)
+
+    def convert_to_pipeline_parallel_state_dict(self, state_dict):
+        new_state_dict = self.state_dict()
+        encoder_layer_idx = 0
+        decoder_layer_idx = 0
+        encoder_key_suffixes = [
+            "self_attn.k_proj.weight",
+            "self_attn.k_proj.bias",
+            "self_attn.v_proj.weight",
+            "self_attn.v_proj.bias",
+            "self_attn.q_proj.weight",
+            "self_attn.q_proj.bias",
+            "self_attn.out_proj.weight",
+            "self_attn.out_proj.bias",
+            "self_attn_layer_norm.weight",
+            "self_attn_layer_norm.bias",
+            "fc1.weight",
+            "fc1.bias",
+            "fc2.weight",
+            "fc2.bias",
+            "final_layer_norm.weight",
+            "final_layer_norm.bias",
+        ]
+        decoder_key_suffixes = [
+            "self_attn.k_proj.weight",
+            "self_attn.k_proj.bias",
+            "self_attn.v_proj.weight",
+            "self_attn.v_proj.bias",
+            "self_attn.q_proj.weight",
+            "self_attn.q_proj.bias",
+            "self_attn.out_proj.weight",
+            "self_attn.out_proj.bias",
+            "self_attn_layer_norm.weight",
+            "self_attn_layer_norm.bias",
+            "encoder_attn.k_proj.weight",
+            "encoder_attn.k_proj.bias",
+            "encoder_attn.v_proj.weight",
+            "encoder_attn.v_proj.bias",
+            "encoder_attn.q_proj.weight",
+            "encoder_attn.q_proj.bias",
+            "encoder_attn.out_proj.weight",
+            "encoder_attn.out_proj.bias",
+            "encoder_attn_layer_norm.weight",
+            "encoder_attn_layer_norm.bias",
+            "fc1.weight",
+            "fc1.bias",
+            "fc2.weight",
+            "fc2.bias",
+            "final_layer_norm.weight",
+            "final_layer_norm.bias",
+        ]
+        for pid, partition in enumerate(self.model.partitions):
+            logger.info(f"Begin Partition {pid}")
+            for mid, module in enumerate(partition):
+                # fmt: off
+                if isinstance(module, TransformerEncoderEmbedding):
+                    new_state_dict[f'model.partitions.{pid}.{mid}.embed_tokens.weight'] = state_dict['encoder.embed_tokens.weight']
+                    new_state_dict[f'model.partitions.{pid}.{mid}.embed_positions._float_tensor'] = state_dict['encoder.embed_positions._float_tensor']
+                if isinstance(module, TransformerEncoderLayer):
+                    for suffix in encoder_key_suffixes:
+                        new_state_dict[f'model.partitions.{pid}.{mid}.{suffix}'] = state_dict[f'encoder.layers.{encoder_layer_idx}.{suffix}']
+                    encoder_layer_idx += 1
+                if isinstance(module, TransformerDecoderLayer):
+                    for suffix in decoder_key_suffixes:
+                        new_state_dict[f'model.partitions.{pid}.{mid}.{suffix}'] = state_dict[f'decoder.layers.{decoder_layer_idx}.{suffix}']
+                    decoder_layer_idx += 1
+                if isinstance(module, TransformerEncoderLayerNorm):
+                    if 'encoder.layer_norm.weight' in state_dict:
+                        new_state_dict[f'model.partitions.{pid}.{mid}.layer_norm.weight'] = state_dict['encoder.layer_norm.weight']
+                        new_state_dict[f'model.partitions.{pid}.{mid}.layer_norm.bias'] = state_dict['encoder.layer_norm.bias']
+                if isinstance(module, TransformerDecoderEmbedding):
+                    new_state_dict[f'model.partitions.{pid}.{mid}.embed_tokens.weight'] = state_dict['decoder.embed_tokens.weight']
+                    new_state_dict[f'model.partitions.{pid}.{mid}.embed_positions._float_tensor'] = state_dict['decoder.embed_positions._float_tensor']
+                if isinstance(module, TransformerDecoderOutputLayer):
+                    new_state_dict[f'model.partitions.{pid}.{mid}.reg_head.weight'] = state_dict['decoder.reg_head.weight']
+                # fmt: on
+        return new_state_dict
+
+
+class TransformerEncoder(FairseqEncoder):
+    """
+    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
+    is a :class:`TransformerEncoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): encoding dictionary
+        embed_tokens (torch.nn.Embedding): input embedding
+    """
+
+    def __init__(self, args, dictionary, embed_tokens, encoder_module_list=None):
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([3]))
+        import_pipe()
+        self.use_pipeline = encoder_module_list is not None
+        if not self.use_pipeline:
+            self.embedding_layer = TransformerEncoderEmbedding(args, embed_tokens)
+            self.encoder_layers = nn.Sequential(*[TransformerEncoderLayer(args) for i in range(args.encoder_layers)])
+            if isinstance(embed_tokens, nn.ModuleList):
+                emb_dim = sum(e.embedding_dim for e in embed_tokens)
+            else:
+                emb_dim = embed_tokens.embedding_dim
+            self.final_layer_norm = TransformerEncoderLayerNorm(args, emb_dim)
+        else:
+            encoder_balance = utils.eval_str_list(
+                args.pipeline_encoder_balance, type=int
+            )
+            encoder_devices = utils.eval_str_list(
+                args.pipeline_encoder_devices, type=int
+            )
+            assert sum(encoder_balance) == len(encoder_module_list), (
+                f"Sum of encoder_balance={encoder_balance} is not equal "
+                + f"to num_encoder_modules={len(encoder_module_list)}"
+            )
+            if TORCH_PIPE:
+                self.model = Pipe(
+                    module=partition_model(nn.Sequential(*encoder_module_list), encoder_balance, encoder_devices),
+                    chunks=args.pipeline_chunks,
+                    checkpoint=args.pipeline_checkpoint,
+                )
+            else:
+                self.model = Pipe(
+                    module=nn.Sequential(*encoder_module_list),
+                    balance=encoder_balance,
+                    devices=encoder_devices,
+                    chunks=args.pipeline_chunks,
+                    checkpoint=args.pipeline_checkpoint,
+                )
+
+    def forward(self, src_tokens, src_lengths):
+        """
+        Args:
+            input_tuple(
+                src_tokens (LongTensor): tokens in the source language of shape
+                    `(batch, src_len)`
+                src_lengths (torch.LongTensor): lengths of each source sentence of
+                    shape `(batch)`
+            )
+
+        Returns:
+            output_tuple(
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+                - prev_output_tokens
+                - **encoder_states** (List[Tensor]): all intermediate
+                  hidden states of shape `(src_len, batch, embed_dim)`.
+                  Only populated if *return_all_hiddens* is True.
+            )
+        """
+        dummy_prev_output_tokens = torch.zeros(
+            1, dtype=src_tokens.dtype, device=src_tokens.device
+        )
+        input_tuple = (src_tokens, src_lengths, dummy_prev_output_tokens)
+        if self.use_pipeline:
+            input_tuple = tuple(i.to(self.model.devices[0]) for i in input_tuple)
+            if TORCH_PIPE:
+                encoder_out = self.model(input_tuple).local_value()
+            else:
+                encoder_out = self.model(input_tuple)
+        else:
+            encoder_embed_output_tuple = self.embedding_layer(input_tuple)
+            encoder_layers_output = self.encoder_layers(encoder_embed_output_tuple)
+            encoder_out = self.final_layer_norm(encoder_layers_output)
+        # first element is the encoder output
+        # second element is the encoder padding mask
+        # the remaining elements of EncoderOut are not computed by
+        # the PipelineParallelTransformer
+        return EncoderOut(encoder_out[0], encoder_out[1], None, None, None, None)
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        if encoder_out.encoder_out is not None:
+            encoder_out = encoder_out._replace(
+                encoder_out=encoder_out.encoder_out.index_select(1, new_order)
+            )
+        if encoder_out.encoder_padding_mask is not None:
+            encoder_out = encoder_out._replace(
+                encoder_padding_mask=encoder_out.encoder_padding_mask.index_select(
+                    0, new_order
+                )
+            )
+        if encoder_out.encoder_embedding is not None:
+            encoder_out = encoder_out._replace(
+                encoder_embedding=encoder_out.encoder_embedding.index_select(
+                    0, new_order
+                )
+            )
+        if encoder_out.encoder_states is not None:
+            for idx, state in enumerate(encoder_out.encoder_states):
+                encoder_out.encoder_states[idx] = state.index_select(1, new_order)
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        if self.embedding_layer.embed_positions is None:
+            return self.embedding_layer.max_source_positions
+        return min(
+            self.embedding_layer.max_source_positions,
+            self.embedding_layer.embed_positions.max_positions,
+        )
+
+
+class TransformerDecoder(FairseqDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self,
+        args,
+        dictionary,
+        embed_tokens,
+        no_encoder_attn=False,
+        decoder_module_list=None,
+    ):
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([3]))
+        import_pipe()
+        self.use_pipeline = decoder_module_list is not None
+        if not self.use_pipeline:
+            self.embedding_layer = TransformerDecoderEmbedding(args, embed_tokens)
+            self.decoder_layers = nn.Sequential(*[
+                TransformerDecoderLayer(args, no_encoder_attn)
+                for _ in range(args.decoder_layers)
+            ])
+            self.decoder_output_layer = TransformerDecoderOutputLayer(
+                args, embed_tokens, dictionary
+            )
+        else:
+            decoder_balance = utils.eval_str_list(
+                args.pipeline_decoder_balance, type=int
+            )
+            decoder_devices = utils.eval_str_list(
+                args.pipeline_decoder_devices, type=int
+            )
+            assert sum(decoder_balance) == len(decoder_module_list), (
+                f"Sum of decoder_balance={decoder_balance} is not equal "
+                + f"to num_decoder_modules={len(decoder_module_list)}"
+            )
+            if TORCH_PIPE:
+                self.model = Pipe(
+                    module=partition_model(nn.Sequential(*decoder_module_list), decoder_balance, decoder_devices),
+                    chunks=args.pipeline_chunks,
+                    checkpoint=args.pipeline_checkpoint,
+                )
+            else:
+                self.model = Pipe(
+                    module=nn.Sequential(*decoder_module_list),
+                    balance=decoder_balance,
+                    devices=decoder_devices,
+                    chunks=args.pipeline_chunks,
+                    checkpoint=args.pipeline_checkpoint,
+                )
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out=None,
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+            features_only (bool, optional): only return features without
+                applying output layer (default: False).
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        input_tuple = (
+            encoder_out.encoder_out,
+            encoder_out.encoder_padding_mask,
+            prev_output_tokens,
+        )
+        if self.use_pipeline:
+            input_tuple = tuple(i.to(self.model.devices[0]) for i in input_tuple)
+            if TORCH_PIPE:
+                return (self.model(input_tuple).local_value(),)
+            else:
+                return (self.model(input_tuple),)
+        else:
+            embed_layer_output = self.embedding_layer(input_tuple)
+            state = self.decoder_layers(embed_layer_output)
+            return (self.decoder_output_layer(state),)
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the vocabulary size."""
+        if self.adaptive_softmax is None:
+            # project back to size of vocabulary
+            if self.share_input_output_embed:
+                return F.linear(features, self.embed_tokens.weight)
+            else:
+                return F.linear(features, self.embed_out)
+        else:
+            return features
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        if self.embedding_layer.embed_positions is None:
+            return self.embedding_layer.max_target_positions
+        return min(
+            self.embedding_layer.max_target_positions,
+            self.embedding_layer.embed_positions.max_positions,
+        )
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        if (
+            not hasattr(self, "_future_mask")
+            or self._future_mask is None
+            or self._future_mask.device != tensor.device
+            or self._future_mask.size(0) < dim
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
+            )
+        return self._future_mask[:dim, :dim]
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = "{}.embed_positions.weights".format(name)
+            if weights_key in state_dict:
+                del state_dict[weights_key]
+            state_dict[
+                "{}.embed_positions._float_tensor".format(name)
+            ] = torch.FloatTensor(1)
+
+        for i in range(len(self.layers)):
+            # update layer norms
+            layer_norm_map = {
+                "0": "self_attn_layer_norm",
+                "1": "encoder_attn_layer_norm",
+                "2": "final_layer_norm",
+            }
+            for old, new in layer_norm_map.items():
+                for m in ("weight", "bias"):
+                    k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m)
+                    if k in state_dict:
+                        state_dict[
+                            "{}.layers.{}.{}.{}".format(name, i, new, m)
+                        ] = state_dict[k]
+                        del state_dict[k]
+
+        version_key = "{}.version".format(name)
+        if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2:
+            # earlier checkpoints did not normalize after the stack of layers
+            self.layer_norm = None
+            self.normalize = False
+            state_dict[version_key] = torch.Tensor([1])
+
+        return state_dict
+
+
+@register_model_architecture(
+    "pipeline_parallel_transformer", "transformer_iwslt_de_en_pipeline_parallel"
+)
+def transformer_iwslt_de_en_dist(args):
+    transformer_iwslt_de_en(args)
+
+
+@register_model_architecture(
+    "pipeline_parallel_transformer", "transformer_wmt_en_de_big_pipeline_parallel"
+)
+def transformer_wmt_en_de_big_dist(args):
+    transformer_wmt_en_de_big(args)
diff --git a/fairseq/fairseq/model_parallel/models/roberta/__init__.py b/fairseq/fairseq/model_parallel/models/roberta/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..117827c3e9c176477f33e3a6fd7fe19a922411a2
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/roberta/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .model import *  # noqa
diff --git a/fairseq/fairseq/model_parallel/models/roberta/model.py b/fairseq/fairseq/model_parallel/models/roberta/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..77a80ef72057219110b34678a38705549910edd3
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/roberta/model.py
@@ -0,0 +1,225 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+RoBERTa: A Robustly Optimized BERT Pretraining Approach.
+"""
+
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.model_parallel.models.transformer import ModelParallelTransformerEncoder
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.roberta import (
+    roberta_base_architecture,
+    roberta_prenorm_architecture,
+    RobertaEncoder,
+    RobertaModel,
+)
+from fairseq.modules import LayerNorm
+
+
+try:
+    from fairseq.model_parallel.megatron.mpu import (
+        copy_to_model_parallel_region,
+        gather_from_model_parallel_region,
+        ColumnParallelLinear,
+        VocabParallelEmbedding,
+    )
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("model_parallel_roberta")
+class ModelParallelRobertaModel(RobertaModel):
+    def __init__(self, args, encoder):
+        super().__init__(args, encoder)
+
+        self.classification_heads = nn.ModuleDict()
+
+    @staticmethod
+    def add_args(parser):
+        RobertaModel.add_args(parser)
+        parser.add_argument(
+            "--no-final-layer-norm",
+            action="store_true",
+            help=(
+                "don't add final layernorm (only applicable when "
+                "--encoder-normalize-before=True"
+            ),
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present
+        base_architecture(args)
+
+        task.source_dictionary.pad_to_multiple_(args.model_parallel_size * 8)
+        task.target_dictionary.pad_to_multiple_(args.model_parallel_size * 8)
+
+        if not hasattr(args, "max_positions"):
+            args.max_positions = args.tokens_per_sample
+
+        if getattr(args, "untie_weights_roberta", False):
+            raise NotImplementedError(
+                "--untie-weights-roberta is not supported in model parallel mode"
+            )
+
+        encoder = ModelParallelRobertaEncoder(args, task.source_dictionary)
+        return cls(args, encoder)
+
+    def forward(
+        self,
+        src_tokens,
+        features_only=False,
+        return_all_hiddens=False,
+        classification_head_name=None,
+        **kwargs
+    ):
+        if classification_head_name is not None:
+            features_only = True
+
+        x, extra = self.encoder(src_tokens, features_only, return_all_hiddens, **kwargs)
+
+        if classification_head_name is not None:
+            x = self.classification_heads[classification_head_name](x)
+        return x, extra
+
+    def register_classification_head(
+        self, name, num_classes=None, inner_dim=None, **kwargs
+    ):
+        """Register a classification head."""
+        if name in self.classification_heads:
+            prev_num_classes = self.classification_heads[name].out_proj.out_features
+            prev_inner_dim = self.classification_heads[name].dense.out_features
+            if num_classes != prev_num_classes or inner_dim != prev_inner_dim:
+                logger.warning(
+                    're-registering head "{}" with num_classes {} (prev: {}) '
+                    "and inner_dim {} (prev: {})".format(
+                        name, num_classes, prev_num_classes, inner_dim, prev_inner_dim
+                    )
+                )
+        self.classification_heads[name] = ModelParallelRobertaClassificationHead(
+            self.args.encoder_embed_dim,
+            inner_dim or self.args.encoder_embed_dim,
+            num_classes,
+            self.args.pooler_activation_fn,
+            self.args.pooler_dropout,
+        )
+
+
+class ModelParallelRobertaLMHead(nn.Module):
+    """Head for masked language modeling."""
+
+    def __init__(self, embed_dim, output_dim, activation_fn, weight=None):
+        super().__init__()
+        self.dense = ColumnParallelLinear(embed_dim, embed_dim, gather_output=True)
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.layer_norm = LayerNorm(embed_dim)
+
+        if weight is None:
+            weight = nn.Linear(embed_dim, output_dim, bias=False).weight
+        self.weight = weight
+        self.bias = nn.Parameter(torch.zeros(output_dim))
+
+    def forward(self, features, masked_tokens=None, **kwargs):
+        # Only project the unmasked tokens while training,
+        # saves both memory and computation
+        if masked_tokens is not None:
+            features = features[masked_tokens, :]
+
+        x = self.dense(features)
+        x = self.activation_fn(x)
+        x = self.layer_norm(x)
+
+        x = copy_to_model_parallel_region(x)
+        # project back to size of vocabulary with bias
+        x = F.linear(x, self.weight)
+        x = gather_from_model_parallel_region(x).contiguous()
+        x = x + self.bias
+        return x
+
+
+class ModelParallelRobertaClassificationHead(nn.Module):
+    """Head for sentence-level classification tasks."""
+
+    def __init__(
+        self, input_dim, inner_dim, num_classes, activation_fn, pooler_dropout
+    ):
+        super().__init__()
+        self.dense = ColumnParallelLinear(input_dim, inner_dim, gather_output=True)
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.dropout = nn.Dropout(p=pooler_dropout)
+        self.out_proj = nn.Linear(inner_dim, num_classes)
+
+    def forward(self, features, **kwargs):
+        x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
+        x = self.dropout(x)
+        x = self.dense(x)
+        x = self.activation_fn(x)
+        x = self.dropout(x)
+        x = self.out_proj(x)
+        return x
+
+
+class ModelParallelRobertaEncoder(RobertaEncoder):
+    """RoBERTa encoder."""
+
+    def __init__(self, args, dictionary):
+        super().__init__(args, dictionary)
+        assert not self.args.untie_weights_roberta
+
+    def build_embedding(self, vocab_size, embedding_dim, padding_idx):
+        return VocabParallelEmbedding(vocab_size, embedding_dim, padding_idx)
+
+    def build_encoder(self, args, dictionary, embed_tokens):
+        return ModelParallelTransformerEncoder(args, dictionary, embed_tokens)
+
+    def build_lm_head(self, embed_dim, output_dim, activation_fn, weight):
+        return ModelParallelRobertaLMHead(embed_dim, output_dim, activation_fn, weight)
+
+
+@register_model_architecture("model_parallel_roberta", "model_parallel_roberta")
+def base_architecture(args):
+    args.no_final_layer_norm = getattr(args, "no_final_layer_norm", False)
+    # model parallel RoBERTa defaults to "Pre-LN" formulation
+    roberta_prenorm_architecture(args)
+
+
+# earlier versions of model parallel RoBERTa removed the final layer norm
+@register_model_architecture("model_parallel_roberta", "model_parallel_roberta_v1")
+def model_parallel_roberta_v1_architecture(args):
+    args.no_final_layer_norm = getattr(args, "no_final_layer_norm", True)
+    base_architecture(args)
+
+
+@register_model_architecture(
+    "model_parallel_roberta", "model_parallel_roberta_postnorm"
+)
+def model_parallel_roberta_postnorm_architecture(args):
+    # the original BERT/RoBERTa uses the "Post-LN" formulation
+    roberta_base_architecture(args)
+
+
+@register_model_architecture("model_parallel_roberta", "model_parallel_roberta_base")
+def model_parallel_roberta_base_architecture(args):
+    base_architecture(args)
+
+
+@register_model_architecture("model_parallel_roberta", "model_parallel_roberta_large")
+def model_parallel_roberta_large_architecture(args):
+    args.encoder_layers = getattr(args, "encoder_layers", 24)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    base_architecture(args)
diff --git a/fairseq/fairseq/model_parallel/models/transformer.py b/fairseq/fairseq/model_parallel/models/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b330ef1b7f7a506e7e8176f20a0e722b5fd5149
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/transformer.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch.nn as nn
+from fairseq.model_parallel.modules import (
+    ModelParallelTransformerDecoderLayer,
+    ModelParallelTransformerEncoderLayer,
+)
+from fairseq.models import register_model
+from fairseq.models.transformer import (
+    TransformerDecoder,
+    TransformerEncoder,
+    TransformerModel,
+)
+
+
+try:
+    from fairseq.model_parallel.megatron.mpu import (
+        copy_to_model_parallel_region,
+        gather_from_model_parallel_region,
+        VocabParallelEmbedding,
+    )
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("model_parallel_transformer")
+class ModelParallelTransformerModel(TransformerModel):
+    """
+    Model parallel Transformer model.
+    """
+
+    @classmethod
+    def build_embedding(cls, args, dictionary, embed_dim, path=None):
+        if not has_megatron_submodule:
+            raise ImportError(
+                "\n\nPlease install the megatron submodule:"
+                "\n\n  git submodule update --init "
+                "fairseq/model_parallel/megatron"
+            )
+        dictionary.pad_to_multiple_(args.model_parallel_size * 8)
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+
+        def _vocab_init(tensor, **kwargs):
+            nn.init.normal_(tensor, mean=0, std=num_embeddings ** -0.5)
+            nn.init.constant_(tensor[1], 0)
+
+        emb = VocabParallelEmbedding(
+            num_embeddings, embed_dim, padding_idx, init_method=_vocab_init
+        )
+        # if provided, load from preloaded dictionaries
+        if path:
+            raise NotImplementedError(
+                "Loading of embedding from path is not supported for model parallel"
+            )
+        return emb
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return ModelParallelTransformerEncoder(args, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return ModelParallelTransformerDecoder(
+            args,
+            tgt_dict,
+            embed_tokens,
+            no_encoder_attn=getattr(args, "no_cross_attention", False),
+        )
+
+
+class ModelParallelTransformerEncoder(TransformerEncoder):
+    """
+    Model parallel Transformer encoder consisting of *args.encoder_layers* layers. Each layer
+    is a :class:`ModelParallelTransformerEncoderLayer`.
+    """
+
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(args, dictionary, embed_tokens)
+
+        if args.no_final_layer_norm:
+            self.layer_norm = None
+
+    def build_encoder_layer(self, args):
+        return ModelParallelTransformerEncoderLayer(args)
+
+
+class ModelParallelTransformerDecoder(TransformerDecoder):
+    """
+    Model Parallel Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`ModelParallelTransformerDecoderLayer`.
+    """
+
+    def build_decoder_layer(self, args, no_encoder_attn=False):
+        return ModelParallelTransformerDecoderLayer(args, no_encoder_attn)
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the vocabulary size."""
+        if not self.share_input_output_embed:
+            raise NotImplementedError(
+                "Model parallel training currently requires --share-decoder-input-output-embed"
+            )
+
+        features = copy_to_model_parallel_region(features)
+
+        # project back to size of vocabulary
+        x = self.output_projection(features)
+
+        if getattr(self.args, "criterion") != "vocab_parallel_cross_entropy":
+            x = gather_from_model_parallel_region(x).contiguous()
+        return x
diff --git a/fairseq/fairseq/model_parallel/models/transformer_lm.py b/fairseq/fairseq/model_parallel/models/transformer_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc52f6e8dd3899b6bf9bebae7415cee20baf9884
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/models/transformer_lm.py
@@ -0,0 +1,174 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+from fairseq.model_parallel.models.transformer import ModelParallelTransformerDecoder
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.transformer_lm import TransformerLanguageModel
+
+
+try:
+    from fairseq.model_parallel.megatron.mpu import VocabParallelEmbedding
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+
+
+@register_model("model_parallel_transformer_lm")
+class ModelParallelTransformerLanguageModel(TransformerLanguageModel):
+
+    @staticmethod
+    def add_args(parser):
+        TransformerLanguageModel.add_args(parser)
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        if not has_megatron_submodule:
+            raise ImportError(
+                "\n\nPlease install the megatron submodule:"
+                "\n\n  git submodule update --init "
+                "fairseq/model_parallel/megatron"
+            )
+
+        # make sure all arguments are present in older models
+        base_lm_architecture(args)
+
+        task.source_dictionary.pad_to_multiple_(args.model_parallel_size * 8)
+        task.target_dictionary.pad_to_multiple_(args.model_parallel_size * 8)
+
+        if args.decoder_layers_to_keep:
+            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))
+
+        if getattr(args, "max_target_positions", None) is None:
+            args.max_target_positions = getattr(
+                args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS
+            )
+
+        if args.character_embeddings:
+            raise NotImplementedError(
+                "Character embeddings is not supported for model parallel"
+            )
+        elif args.adaptive_input:
+            raise NotImplementedError(
+                "Adaptive input is not supported for model parallel"
+            )
+        else:
+            embed_tokens = cls.build_embedding(
+                args, task.source_dictionary, args.decoder_input_dim
+            )
+
+        decoder = ModelParallelTransformerDecoder(
+            args,
+            task.target_dictionary,
+            embed_tokens,
+            no_encoder_attn=True,
+        )
+        return cls(decoder)
+
+    @staticmethod
+    def add_args(parser):
+        TransformerLanguageModel.add_args(parser)
+
+    @classmethod
+    def build_embedding(cls, args, dictionary, embed_dim, path=None):
+        def _vocab_init(tensor, **kwargs):
+            nn.init.normal_(tensor, mean=0, std=embed_dim ** -0.5)
+            nn.init.constant_(tensor[1], 0)
+
+        embed_tokens = VocabParallelEmbedding(
+            len(dictionary), embed_dim, dictionary.pad(), init_method=_vocab_init
+        )
+        return embed_tokens
+
+
+def base_lm_architecture(args):
+    # backward compatibility for older model checkpoints
+    if hasattr(args, "no_tie_adaptive_proj"):
+        # previous models defined --no-tie-adaptive-proj, so use the existence of
+        # that option to determine if this is an "old" model checkpoint
+        args.no_decoder_final_norm = True  # old models always set this to True
+        if args.no_tie_adaptive_proj is False:
+            args.tie_adaptive_proj = True
+    if hasattr(args, "decoder_final_norm"):
+        args.no_decoder_final_norm = not args.decoder_final_norm
+
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.relu_dropout = getattr(args, "relu_dropout", 0.0)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 2048)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    # Model training is not stable without this
+    args.decoder_normalize_before = True
+    args.no_decoder_final_norm = getattr(args, "no_decoder_final_norm", False)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.adaptive_softmax_factor = getattr(args, "adaptive_softmax_factor", 4)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.character_embeddings = getattr(args, "character_embeddings", False)
+    args.character_filters = getattr(
+        args,
+        "character_filters",
+        "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]",
+    )
+    args.character_embedding_dim = getattr(args, "character_embedding_dim", 4)
+    args.char_embedder_highway_layers = getattr(args, "char_embedder_highway_layers", 2)
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.adaptive_input_factor = getattr(args, "adaptive_input_factor", 4)
+    args.adaptive_input_cutoff = getattr(args, "adaptive_input_cutoff", None)
+    args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False)
+    args.tie_adaptive_proj = getattr(args, "tie_adaptive_proj", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0)
+    args.decoder_layers_to_keep = getattr(args, "decoder_layers_to_keep", None)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", False)
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0.0)
+    args.quant_noise_pq_block_size = getattr(args, "quant_noise_pq_block_size", 8)
+    args.quant_noise_scalar = getattr(args, "quant_noise_scalar", 0.0)
+    args.add_bos_token = getattr(args, "add_bos_token", False)
+
+
+@register_model_architecture("model_parallel_transformer_lm", "transformer_lm_megatron")
+def transformer_lm_megatron(args):
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 3072)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 3072 * 4)
+    args.decoder_layers = getattr(args, "decoder_layers", 72)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 32)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
+
+
+@register_model_architecture(
+    "model_parallel_transformer_lm", "transformer_lm_megatron_11b"
+)
+def transformer_lm_megatron_11b(args):
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 3072)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 3072 * 6)
+    args.decoder_layers = getattr(args, "decoder_layers", 72)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 32)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
diff --git a/fairseq/fairseq/model_parallel/modules/__init__.py b/fairseq/fairseq/model_parallel/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..11603217a188f420ea849ae0fde19979736ba208
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/modules/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+from .multihead_attention import ModelParallelMultiheadAttention
+from .transformer_layer import (
+    ModelParallelTransformerEncoderLayer,
+    ModelParallelTransformerDecoderLayer,
+)
+
+__all__ = [
+    "ModelParallelMultiheadAttention",
+    "ModelParallelTransformerEncoderLayer",
+    "ModelParallelTransformerDecoderLayer",
+]
diff --git a/fairseq/fairseq/model_parallel/modules/multihead_attention.py b/fairseq/fairseq/model_parallel/modules/multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eb9d09dad37ab132295166d691873beec63eaf1
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/modules/multihead_attention.py
@@ -0,0 +1,349 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from torch import Tensor, nn
+
+
+try:
+    from fairseq.model_parallel.megatron.mpu import (
+        get_cuda_rng_tracker,
+        get_model_parallel_world_size,
+        ColumnParallelLinear,
+        RowParallelLinear,
+    )
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+
+@with_incremental_state
+class ModelParallelMultiheadAttention(nn.Module):
+    """Model parallel Multi-headed attention.
+    This performs the Multi-headed attention over multiple gpus.
+
+    See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details.
+    """
+
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        kdim=None,
+        vdim=None,
+        dropout=0.0,
+        bias=True,
+        self_attention=False,
+        encoder_decoder_attention=False,
+    ):
+        super().__init__()
+        if not has_megatron_submodule:
+            raise ImportError(
+                "\n\nPlease install the megatron submodule:"
+                "\n\n  git submodule update --init "
+                "fairseq/model_parallel/megatron"
+            )
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.model_parallel_size = get_model_parallel_world_size()
+
+        self.num_heads_partition = num_heads // self.model_parallel_size
+        assert (
+            self.num_heads_partition * self.model_parallel_size == num_heads
+        ), "Number of heads must be divisible by model parallel size"
+
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert (
+            not self.self_attention or self.qkv_same_dim
+        ), "Self-attention requires query, key and value to be of the same size"
+
+        self.k_proj = ColumnParallelLinear(
+            self.kdim, embed_dim, bias=bias, gather_output=False
+        )
+        self.v_proj = ColumnParallelLinear(
+            self.vdim, embed_dim, bias=bias, gather_output=False
+        )
+        self.q_proj = ColumnParallelLinear(
+            embed_dim, embed_dim, bias=bias, gather_output=False
+        )
+        self.out_proj = RowParallelLinear(
+            embed_dim, embed_dim, bias=bias, input_is_parallel=True
+        )
+
+    def forward(
+        self,
+        query,
+        key: Optional[Tensor],
+        value: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        static_kv: bool = False,
+        attn_mask: Optional[Tensor] = None,
+        **unused_kwargs,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+        """
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+
+        is_tpu = query.device.type == "xla"
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if saved_state is not None and "prev_key" in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            q = self.q_proj(query)
+            k = self.k_proj(query)
+            v = self.v_proj(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.q_proj(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.k_proj(key)
+                v = self.v_proj(key)
+
+        else:
+            assert key is not None and value is not None
+            q = self.q_proj(query)
+            k = self.k_proj(key)
+            v = self.v_proj(value)
+        q *= self.scaling
+
+        q = (
+            q.contiguous()
+            .view(tgt_len, bsz * self.num_heads_partition, self.head_dim)
+            .transpose(0, 1)
+        )
+        if k is not None:
+            k = (
+                k.contiguous()
+                .view(-1, bsz * self.num_heads_partition, self.head_dim)
+                .transpose(0, 1)
+            )
+        if v is not None:
+            v = (
+                v.contiguous()
+                .view(-1, bsz * self.num_heads_partition, self.head_dim)
+                .transpose(0, 1)
+            )
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads_partition, seq_len, head_dim)
+            if "prev_key" in saved_state:
+                _prev_key = saved_state["prev_key"]
+                assert _prev_key is not None
+                prev_key = _prev_key.view(
+                    bsz * self.num_heads_partition, -1, self.head_dim
+                )
+                if static_kv:
+                    k = prev_key
+                else:
+                    assert k is not None
+                    k = torch.cat([prev_key, k], dim=1)
+            if "prev_value" in saved_state:
+                _prev_value = saved_state["prev_value"]
+                assert _prev_value is not None
+                prev_value = _prev_value.view(
+                    bsz * self.num_heads_partition, -1, self.head_dim
+                )
+                if static_kv:
+                    v = prev_value
+                else:
+                    assert v is not None
+                    v = torch.cat([prev_value, v], dim=1)
+            prev_key_padding_mask: Optional[Tensor] = None
+            if "prev_key_padding_mask" in saved_state:
+                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
+            assert k is not None and v is not None
+            key_padding_mask = (
+                ModelParallelMultiheadAttention._append_prev_key_padding_mask(
+                    key_padding_mask=key_padding_mask,
+                    prev_key_padding_mask=prev_key_padding_mask,
+                    batch_size=bsz,
+                    src_len=k.size(1),
+                    static_kv=static_kv,
+                )
+            )
+
+            saved_state["prev_key"] = k.view(
+                bsz, self.num_heads_partition, -1, self.head_dim
+            )
+            saved_state["prev_value"] = v.view(
+                bsz, self.num_heads_partition, -1, self.head_dim
+            )
+            saved_state["prev_key_padding_mask"] = key_padding_mask
+            # In this branch incremental_state is never None
+            assert incremental_state is not None
+            incremental_state = self._set_input_buffer(incremental_state, saved_state)
+        assert k is not None
+        src_len = k.size(1)
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.dim() == 0:
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+
+        assert list(attn_weights.size()) == [
+            bsz * self.num_heads_partition,
+            tgt_len,
+            src_len,
+        ]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            attn_weights += attn_mask
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(
+                bsz, self.num_heads_partition, tgt_len, src_len
+            )
+            if not is_tpu:
+                attn_weights = attn_weights.masked_fill(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
+                    float("-inf"),
+                )
+            else:
+                attn_weights = attn_weights.transpose(0, 2)
+                attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf"))
+                attn_weights = attn_weights.transpose(0, 2)
+            attn_weights = attn_weights.view(
+                bsz * self.num_heads_partition, tgt_len, src_len
+            )
+
+        attn_weights_float = utils.softmax(attn_weights, dim=-1)
+        attn_weights = attn_weights_float.type_as(attn_weights)
+
+        with get_cuda_rng_tracker().fork():
+            attn_probs = self.dropout_module(attn_weights)
+
+        assert v is not None
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [
+            bsz * self.num_heads_partition,
+            tgt_len,
+            self.head_dim,
+        ]
+        embed_dim_partition = embed_dim // self.model_parallel_size
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim_partition)
+        attn = self.out_proj(attn)
+        # return attn_weights None to keep the return type same as single gpu multihead attention
+        # This will be deprecated.
+        attn_weights: Optional[Tensor] = None
+
+        return attn, attn_weights
+
+    @staticmethod
+    def _append_prev_key_padding_mask(
+        key_padding_mask: Optional[Tensor],
+        prev_key_padding_mask: Optional[Tensor],
+        batch_size: int,
+        src_len: int,
+        static_kv: bool,
+    ) -> Optional[Tensor]:
+        # saved key padding masks have shape (bsz, seq_len)
+        if prev_key_padding_mask is not None and static_kv:
+            new_key_padding_mask = prev_key_padding_mask
+        elif prev_key_padding_mask is not None and key_padding_mask is not None:
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1
+            )
+        # During incremental decoding, as the padding token enters and
+        # leaves the frame, there will be a time when prev or current
+        # is None
+        elif prev_key_padding_mask is not None:
+
+            filler = torch.zeros(batch_size, src_len - prev_key_padding_mask.size(1))
+            if prev_key_padding_mask.is_cuda:
+                filler = filler.cuda()
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), filler.float()], dim=1
+            )
+        elif key_padding_mask is not None:
+            filler = torch.zeros(batch_size, src_len - key_padding_mask.size(1))
+            if key_padding_mask.is_cuda:
+                filler = filler.cuda()
+            new_key_padding_mask = torch.cat(
+                [filler.float(), key_padding_mask.float()], dim=1
+            )
+        else:
+            new_key_padding_mask = prev_key_padding_mask
+        return new_key_padding_mask
+
+    def reorder_incremental_state(
+        self, incremental_state: Dict[str, Dict[str, Optional[Tensor]]], new_order
+    ):
+        """Reorder buffered internal state (for incremental generation)."""
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            for k in input_buffer.keys():
+                if input_buffer[k] is not None:
+                    input_buffer[k] = input_buffer[k].index_select(0, new_order)
+            incremental_state = self._set_input_buffer(incremental_state, input_buffer)
+        return incremental_state
+
+    def _get_input_buffer(
+        self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
+    ) -> Dict[str, Optional[Tensor]]:
+        result = self.get_incremental_state(incremental_state, "attn_state")
+        if result is not None:
+            return result
+        else:
+            empty_result: Dict[str, Optional[Tensor]] = {}
+            return empty_result
+
+    def _set_input_buffer(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        buffer: Dict[str, Optional[Tensor]],
+    ):
+        return self.set_incremental_state(incremental_state, "attn_state", buffer)
diff --git a/fairseq/fairseq/model_parallel/modules/transformer_layer.py b/fairseq/fairseq/model_parallel/modules/transformer_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ab53c6e5f12f15562717effb86ab8cb8d6b4fa3
--- /dev/null
+++ b/fairseq/fairseq/model_parallel/modules/transformer_layer.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.model_parallel.modules import ModelParallelMultiheadAttention
+from fairseq.modules import TransformerDecoderLayer, TransformerEncoderLayer
+
+
+try:
+    from fairseq.model_parallel.megatron.mpu import (
+        ColumnParallelLinear,
+        RowParallelLinear,
+    )
+
+    has_megatron_submodule = True
+except (ImportError, ModuleNotFoundError):
+    has_megatron_submodule = False
+
+
+class ModelParallelTransformerEncoderLayer(TransformerEncoderLayer):
+    """Encoder layer block over multiple gpus.
+
+    See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details.
+    """
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        if q_noise > 0:
+            raise NotImplementedError
+        return ColumnParallelLinear(input_dim, output_dim, gather_output=False)
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        if q_noise > 0:
+            raise NotImplementedError
+        return RowParallelLinear(input_dim, output_dim, input_is_parallel=True)
+
+    def build_self_attention(self, embed_dim, args, **unused_kwargs):
+        return ModelParallelMultiheadAttention(
+            embed_dim,
+            args.encoder_attention_heads,
+            dropout=args.attention_dropout,
+            self_attention=True,
+        )
+
+
+class ModelParallelTransformerDecoderLayer(TransformerDecoderLayer):
+    """Decoder layer block.
+
+    See "Megatron-LM: https://arxiv.org/pdf/1909.08053.pdf" for more details.
+    """
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        if q_noise > 0:
+            raise NotImplementedError
+        return ColumnParallelLinear(input_dim, output_dim, gather_output=False)
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        if q_noise > 0:
+            raise NotImplementedError
+        return RowParallelLinear(input_dim, output_dim, input_is_parallel=True)
+
+    def build_self_attention(self, embed_dim, args, **unused_kwargs):
+        return ModelParallelMultiheadAttention(
+            embed_dim=embed_dim,
+            num_heads=args.decoder_attention_heads,
+            dropout=args.attention_dropout,
+            self_attention=not getattr(args, "cross_self_attention", False),
+        )
+
+    def build_encoder_attention(self, embed_dim, args, **unused_kwargs):
+        return ModelParallelMultiheadAttention(
+            embed_dim=embed_dim,
+            num_heads=args.decoder_attention_heads,
+            kdim=getattr(args, "encoder_embed_dim", None),
+            vdim=getattr(args, "encoder_embed_dim", None),
+            dropout=args.attention_dropout,
+            encoder_decoder_attention=True,
+        )
diff --git a/fairseq/fairseq/models/__init__.py b/fairseq/fairseq/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..337c77ac7bfb7e11a0662b86b98c4c0a02da26df
--- /dev/null
+++ b/fairseq/fairseq/models/__init__.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import argparse
+import importlib
+import os
+from contextlib import ExitStack
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import merge_with_parent
+from hydra.core.config_store import ConfigStore
+from omegaconf import open_dict, OmegaConf
+
+from .composite_encoder import CompositeEncoder
+from .distributed_fairseq_model import DistributedFairseqModel
+from .fairseq_decoder import FairseqDecoder
+from .fairseq_encoder import FairseqEncoder
+from .fairseq_incremental_decoder import FairseqIncrementalDecoder
+from .fairseq_model import (
+    BaseFairseqModel,
+    FairseqEncoderDecoderModel,
+    FairseqEncoderModel,
+    FairseqLanguageModel,
+    FairseqModel,
+    FairseqMultiModel,
+)
+
+
+MODEL_REGISTRY = {}
+MODEL_DATACLASS_REGISTRY = {}
+ARCH_MODEL_REGISTRY = {}
+ARCH_MODEL_NAME_REGISTRY = {}
+ARCH_MODEL_INV_REGISTRY = {}
+ARCH_CONFIG_REGISTRY = {}
+
+
+__all__ = [
+    "BaseFairseqModel",
+    "CompositeEncoder",
+    "DistributedFairseqModel",
+    "FairseqDecoder",
+    "FairseqEncoder",
+    "FairseqEncoderDecoderModel",
+    "FairseqEncoderModel",
+    "FairseqIncrementalDecoder",
+    "FairseqLanguageModel",
+    "FairseqModel",
+    "FairseqMultiModel",
+]
+
+
+def build_model(cfg: FairseqDataclass, task):
+
+    model = None
+    model_type = getattr(cfg, "_name", None) or getattr(cfg, "arch", None)
+
+    if not model_type and len(cfg) == 1:
+        # this is hit if config object is nested in directory that is named after model type
+
+        model_type = next(iter(cfg))
+        if model_type in MODEL_DATACLASS_REGISTRY:
+            cfg = cfg[model_type]
+        else:
+            raise Exception(
+                "Could not infer model type from directory. Please add _name field to indicate model type. "
+                "Available models: "
+                + str(MODEL_DATACLASS_REGISTRY.keys())
+                + " Requested model type: "
+                + model_type
+            )
+
+    if model_type in ARCH_MODEL_REGISTRY:
+        # case 1: legacy models
+        model = ARCH_MODEL_REGISTRY[model_type]
+    elif model_type in MODEL_DATACLASS_REGISTRY:
+        # case 2: config-driven models
+        model = MODEL_REGISTRY[model_type]
+
+    if model_type in MODEL_DATACLASS_REGISTRY:
+        # set defaults from dataclass. note that arch name and model name can be the same
+        dc = MODEL_DATACLASS_REGISTRY[model_type]
+
+        if isinstance(cfg, argparse.Namespace):
+            cfg = dc.from_namespace(cfg)
+        else:
+            cfg = merge_with_parent(dc(), cfg)
+    else:
+        if model_type in ARCH_CONFIG_REGISTRY:
+            with open_dict(cfg) if OmegaConf.is_config(cfg) else ExitStack():
+                # this calls the different "arch" functions (like base_architecture()) that you indicate
+                # if you specify --arch on the command line. this is only applicable to the old argparse based models
+                # hydra models should expose different architectures via different config files
+                # it will modify the cfg object and default parameters according to the arch
+                ARCH_CONFIG_REGISTRY[model_type](cfg)
+
+    assert model is not None, (
+        f"Could not infer model type from {cfg}. "
+        "Available models: {}".format(
+            MODEL_DATACLASS_REGISTRY.keys()
+        )
+        + f" Requested model type: {model_type}"
+    )
+
+    return model.build_model(cfg, task)
+
+
+def register_model(name, dataclass=None):
+    """
+    New model types can be added to fairseq with the :func:`register_model`
+    function decorator.
+
+    For example::
+
+        @register_model('lstm')
+        class LSTM(FairseqEncoderDecoderModel):
+            (...)
+
+    .. note:: All models must implement the :class:`BaseFairseqModel` interface.
+        Typically you will extend :class:`FairseqEncoderDecoderModel` for
+        sequence-to-sequence tasks or :class:`FairseqLanguageModel` for
+        language modeling tasks.
+
+    Args:
+        name (str): the name of the model
+    """
+
+    def register_model_cls(cls):
+        if name in MODEL_REGISTRY:
+            raise ValueError("Cannot register duplicate model ({})".format(name))
+        if not issubclass(cls, BaseFairseqModel):
+            raise ValueError(
+                "Model ({}: {}) must extend BaseFairseqModel".format(name, cls.__name__)
+            )
+        MODEL_REGISTRY[name] = cls
+        if dataclass is not None and not issubclass(dataclass, FairseqDataclass):
+            raise ValueError(
+                "Dataclass {} must extend FairseqDataclass".format(dataclass)
+            )
+
+        cls.__dataclass = dataclass
+        if dataclass is not None:
+            MODEL_DATACLASS_REGISTRY[name] = dataclass
+
+            cs = ConfigStore.instance()
+            node = dataclass()
+            node._name = name
+            cs.store(name=name, group="model", node=node, provider="fairseq")
+
+            @register_model_architecture(name, name)
+            def noop(_):
+                pass
+
+        return cls
+
+    return register_model_cls
+
+
+def register_model_architecture(model_name, arch_name):
+    """
+    New model architectures can be added to fairseq with the
+    :func:`register_model_architecture` function decorator. After registration,
+    model architectures can be selected with the ``--arch`` command-line
+    argument.
+
+    For example::
+
+        @register_model_architecture('lstm', 'lstm_luong_wmt_en_de')
+        def lstm_luong_wmt_en_de(cfg):
+            args.encoder_embed_dim = getattr(cfg.model, 'encoder_embed_dim', 1000)
+            (...)
+
+    The decorated function should take a single argument *cfg*, which is a
+    :class:`omegaconf.DictConfig`. The decorated function should modify these
+    arguments in-place to match the desired architecture.
+
+    Args:
+        model_name (str): the name of the Model (Model must already be
+            registered)
+        arch_name (str): the name of the model architecture (``--arch``)
+    """
+
+    def register_model_arch_fn(fn):
+        if model_name not in MODEL_REGISTRY:
+            raise ValueError(
+                "Cannot register model architecture for unknown model type ({})".format(
+                    model_name
+                )
+            )
+        if arch_name in ARCH_MODEL_REGISTRY:
+            raise ValueError(
+                "Cannot register duplicate model architecture ({})".format(arch_name)
+            )
+        if not callable(fn):
+            raise ValueError(
+                "Model architecture must be callable ({})".format(arch_name)
+            )
+        ARCH_MODEL_REGISTRY[arch_name] = MODEL_REGISTRY[model_name]
+        ARCH_MODEL_NAME_REGISTRY[arch_name] = model_name
+        ARCH_MODEL_INV_REGISTRY.setdefault(model_name, []).append(arch_name)
+        ARCH_CONFIG_REGISTRY[arch_name] = fn
+        return fn
+
+    return register_model_arch_fn
+
+
+def import_models(models_dir, namespace):
+    for file in os.listdir(models_dir):
+        path = os.path.join(models_dir, file)
+        if (
+            not file.startswith("_")
+            and not file.startswith(".")
+            and (file.endswith(".py") or os.path.isdir(path))
+        ):
+            model_name = file[: file.find(".py")] if file.endswith(".py") else file
+            importlib.import_module(namespace + "." + model_name)
+
+            # extra `model_parser` for sphinx
+            if model_name in MODEL_REGISTRY:
+                parser = argparse.ArgumentParser(add_help=False)
+                group_archs = parser.add_argument_group("Named architectures")
+                group_archs.add_argument(
+                    "--arch", choices=ARCH_MODEL_INV_REGISTRY[model_name]
+                )
+                group_args = parser.add_argument_group(
+                    "Additional command-line arguments"
+                )
+                MODEL_REGISTRY[model_name].add_args(group_args)
+                globals()[model_name + "_parser"] = parser
+
+
+# automatically import any Python files in the models/ directory
+models_dir = os.path.dirname(__file__)
+import_models(models_dir, "fairseq.models")
diff --git a/fairseq/fairseq/models/bart/__init__.py b/fairseq/fairseq/models/bart/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a701923f7e5a2a8aa9b75e5580ddea22907f53ee
--- /dev/null
+++ b/fairseq/fairseq/models/bart/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .hub_interface import *  # noqa
+from .model import *  # noqa
diff --git a/fairseq/fairseq/models/bart/hub_interface.py b/fairseq/fairseq/models/bart/hub_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d47d9751837c744b1d0d460117b78fcbeeb12d8
--- /dev/null
+++ b/fairseq/fairseq/models/bart/hub_interface.py
@@ -0,0 +1,208 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import copy
+import logging
+from typing import Dict, List
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.data import encoders
+from fairseq.hub_utils import GeneratorHubInterface
+from omegaconf import open_dict
+
+
+logger = logging.getLogger(__name__)
+
+
+class BARTHubInterface(GeneratorHubInterface):
+    """A simple PyTorch Hub interface to BART.
+
+    Usage: https://github.com/pytorch/fairseq/tree/main/examples/bart
+    """
+
+    def __init__(self, cfg, task, model):
+        super().__init__(cfg, task, [model])
+        self.model = self.models[0]
+
+    def encode(
+        self, sentence: str, *addl_sentences, no_separator=True
+    ) -> torch.LongTensor:
+        """
+        BPE-encode a sentence (or multiple sentences).
+
+        Every sequence begins with a beginning-of-sentence (`<s>`) symbol.
+        Every sentence ends with an end-of-sentence (`</s>`).
+
+        Example (single sentence): `<s> a b c </s>`
+        Example (sentence pair): `<s> d e f </s> 1 2 3 </s>`
+
+        The BPE encoding follows GPT-2. One subtle detail is that the GPT-2 BPE
+        requires leading spaces. For example::
+
+            >>> bart.encode('Hello world').tolist()
+            [0, 31414, 232, 2]
+            >>> bart.encode(' world').tolist()
+            [0, 232, 2]
+            >>> bart.encode('world').tolist()
+            [0, 8331, 2]
+        """
+        tokens = self.bpe.encode(sentence)
+        if len(tokens.split(" ")) > min(self.max_positions) - 2:
+            tokens = " ".join(tokens.split(" ")[: min(self.max_positions) - 2])
+        bpe_sentence = "<s> " + tokens + " </s>"
+        for s in addl_sentences:
+            bpe_sentence += " </s>" if not no_separator else ""
+            bpe_sentence += " " + self.bpe.encode(s) + " </s>"
+        tokens = self.task.source_dictionary.encode_line(bpe_sentence, append_eos=False)
+        return tokens.long()
+
+    def decode(self, tokens: torch.LongTensor):
+        assert tokens.dim() == 1
+        tokens = tokens.cpu().numpy()
+        if tokens[0] == self.task.source_dictionary.bos():
+            tokens = tokens[1:]  # remove <s>
+        eos_mask = tokens == self.task.source_dictionary.eos()
+        doc_mask = eos_mask[1:] & eos_mask[:-1]
+        sentences = np.split(tokens, doc_mask.nonzero()[0] + 1)
+        sentences = [
+            self.bpe.decode(self.task.source_dictionary.string(s)) for s in sentences
+        ]
+        if len(sentences) == 1:
+            return sentences[0]
+        return sentences
+
+    def _build_sample(self, src_tokens: List[torch.LongTensor]):
+        # assert torch.is_tensor(src_tokens)
+        dataset = self.task.build_dataset_for_inference(
+            src_tokens,
+            [x.numel() for x in src_tokens],
+        )
+        sample = dataset.collater(dataset)
+        sample = utils.apply_to_sample(lambda tensor: tensor.to(self.device), sample)
+        return sample
+
+    def generate(
+        self,
+        tokenized_sentences: List[torch.LongTensor],
+        *args,
+        inference_step_args=None,
+        skip_invalid_size_inputs=False,
+        **kwargs
+    ) -> List[List[Dict[str, torch.Tensor]]]:
+        inference_step_args = inference_step_args or {}
+        if "prefix_tokens" in inference_step_args:
+            raise NotImplementedError("prefix generation not implemented for BART")
+        res = []
+        for batch in self._build_batches(tokenized_sentences, skip_invalid_size_inputs):
+            src_tokens = batch['net_input']['src_tokens']
+            inference_step_args["prefix_tokens"] =src_tokens.new_full(
+                (src_tokens.size(0), 1), fill_value=self.task.source_dictionary.bos()
+            ).to(device=self.device)
+            results = super().generate(
+                src_tokens,
+                *args,
+                inference_step_args=inference_step_args,
+                skip_invalid_size_inputs=skip_invalid_size_inputs,
+                **kwargs
+            )
+            for id, hypos in zip(batch['id'].tolist(), results):
+                res.append((id, hypos))
+        res = [hypos for _, hypos in sorted(res, key=lambda x: x[0])]
+        return res
+
+    def extract_features(
+        self, tokens: torch.LongTensor, return_all_hiddens: bool = False
+    ) -> torch.Tensor:
+        if tokens.dim() == 1:
+            tokens = tokens.unsqueeze(0)
+        if tokens.size(-1) > min(self.model.max_positions()):
+            raise ValueError(
+                "tokens exceeds maximum length: {} > {}".format(
+                    tokens.size(-1), self.model.max_positions()
+                )
+            )
+        tokens.to(device=self.device),
+        prev_output_tokens = tokens.clone()
+
+        prev_output_tokens[:, 0] = tokens.gather(
+            1,
+            (tokens.ne(self.task.source_dictionary.pad()).sum(dim=1) - 1).unsqueeze(-1),
+        ).squeeze()
+
+        prev_output_tokens[:, 1:] = tokens[:, :-1]
+        features, extra = self.model(
+            src_tokens=tokens,
+            src_lengths=None,
+            prev_output_tokens=prev_output_tokens,
+            features_only=True,
+            return_all_hiddens=return_all_hiddens,
+        )
+        if return_all_hiddens:
+            # convert from T x B x C -> B x T x C
+            inner_states = extra["inner_states"]
+            return [inner_state.transpose(0, 1) for inner_state in inner_states]
+        else:
+            return features  # just the last layer's features
+
+    def register_classification_head(
+        self, name: str, num_classes: int = None, embedding_size: int = None, **kwargs
+    ):
+        self.model.register_classification_head(
+            name, num_classes=num_classes, embedding_size=embedding_size, **kwargs
+        )
+
+    def predict(self, head: str, tokens: torch.LongTensor, return_logits: bool = False):
+        if tokens.dim() == 1:
+            tokens = tokens.unsqueeze(0)
+        features = self.extract_features(tokens.to(device=self.device))
+        sentence_representation = features[
+            tokens.eq(self.task.source_dictionary.eos()), :
+        ].view(features.size(0), -1, features.size(-1))[:, -1, :]
+
+        logits = self.model.classification_heads[head](sentence_representation)
+        if return_logits:
+            return logits
+        return F.log_softmax(logits, dim=-1)
+
+    def fill_mask(
+        self,
+        masked_inputs: List[str],
+        topk: int = 5,
+        match_source_len: bool = True,
+        **generate_kwargs
+    ):
+        masked_token = '<mask>'
+        batch_tokens = []
+        for masked_input in masked_inputs:
+            assert masked_token in masked_input, \
+                "please add one {} token for the input".format(masked_token)
+
+            text_spans = masked_input.split(masked_token)
+            text_spans_bpe = (' {0} '.format(masked_token)).join(
+                [self.bpe.encode(text_span.rstrip()) for text_span in text_spans]
+            ).strip()
+            tokens = self.task.source_dictionary.encode_line(
+                '<s> ' + text_spans_bpe + ' </s>',
+                append_eos=False,
+                add_if_not_exist=False,
+            ).long()
+            batch_tokens.append(tokens)
+
+        # ensure beam size is at least as big as topk
+        generate_kwargs['beam'] = max(
+            topk,
+            generate_kwargs.get('beam', -1),
+        )
+        generate_kwargs['match_source_len'] = match_source_len
+        batch_hypos = self.generate(batch_tokens, **generate_kwargs)
+
+        return [
+            [(self.decode(hypo['tokens']), hypo['score']) for hypo in hypos[:topk]]
+            for hypos in batch_hypos
+        ]
diff --git a/fairseq/fairseq/models/bart/model.py b/fairseq/fairseq/models/bart/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..37033b658d316b7424b00f54069851de220eed9f
--- /dev/null
+++ b/fairseq/fairseq/models/bart/model.py
@@ -0,0 +1,384 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+BART: Denoising Sequence-to-Sequence Pre-training for
+Natural Language Generation, Translation, and Comprehension
+"""
+from typing import Optional
+
+import logging
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.transformer import TransformerModel
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+
+from .hub_interface import BARTHubInterface
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("bart")
+class BARTModel(TransformerModel):
+    __jit_unused_properties__ = ["supported_targets"]
+
+    @classmethod
+    def hub_models(cls):
+        return {
+            "bart.base": "http://dl.fbaipublicfiles.com/fairseq/models/bart.base.tar.gz",
+            "bart.large": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.tar.gz",
+            "bart.large.mnli": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.mnli.tar.gz",
+            "bart.large.cnn": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.cnn.tar.gz",
+            "bart.large.xsum": "http://dl.fbaipublicfiles.com/fairseq/models/bart.large.xsum.tar.gz",
+        }
+
+    def __init__(self, args, encoder, decoder):
+        super().__init__(args, encoder, decoder)
+
+        # We follow BERT's random weight initialization
+        self.apply(init_bert_params)
+
+        self.classification_heads = nn.ModuleDict()
+        if hasattr(self.encoder, "dictionary"):
+            self.eos: int = self.encoder.dictionary.eos()
+
+    @staticmethod
+    def add_args(parser):
+        super(BARTModel, BARTModel).add_args(parser)
+        parser.add_argument(
+            "--pooler-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability in the masked_lm pooler layers",
+        )
+        parser.add_argument(
+            "--pooler-activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use for pooler layer",
+        )
+        parser.add_argument(
+            "--spectral-norm-classification-head",
+            action="store_true",
+            help="Apply spectral normalization on the classification head",
+        )
+
+    @property
+    def supported_targets(self):
+        return {"self"}
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        prev_output_tokens,
+        features_only: bool = False,
+        classification_head_name: Optional[str] = None,
+        token_embeddings: Optional[torch.Tensor] = None,
+        return_all_hiddens: bool = True,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        if classification_head_name is not None:
+            features_only = True
+
+        encoder_out = self.encoder(
+            src_tokens,
+            src_lengths=src_lengths,
+            token_embeddings=token_embeddings,
+            return_all_hiddens=return_all_hiddens
+        )
+        x, extra = self.decoder(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            features_only=features_only,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+            src_lengths=src_lengths,
+            return_all_hiddens=return_all_hiddens,
+        )
+        eos: int = self.eos
+        if classification_head_name is not None:
+            sentence_representation = x[
+                src_tokens.eq(eos), :
+            ].view(x.size(0), -1, x.size(-1))[:, -1, :]
+            for k, head in self.classification_heads.items():
+                # for torch script only supports iteration
+                if k == classification_head_name:
+                    x = head(sentence_representation)
+                    break
+        return x, extra
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path,
+        checkpoint_file="model.pt",
+        data_name_or_path=".",
+        bpe="gpt2",
+        sample_break_mode="eos",
+        **kwargs,
+    ):
+        from fairseq import hub_utils
+
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            sample_break_mode=sample_break_mode,
+            **kwargs,
+        )
+        return BARTHubInterface(x["args"], x["task"], x["models"][0])
+
+    def register_classification_head(
+        self, name, num_classes=None, inner_dim=None, **kwargs
+    ):
+        """Register a classification head."""
+        logger.info("Registering classification head: {0}".format(name))
+        if name in self.classification_heads:
+            prev_num_classes = self.classification_heads[name].out_proj.out_features
+            prev_inner_dim = self.classification_heads[name].dense.out_features
+            if num_classes != prev_num_classes or inner_dim != prev_inner_dim:
+                logger.warning(
+                    're-registering head "{}" with num_classes {} (prev: {}) '
+                    "and inner_dim {} (prev: {})".format(
+                        name, num_classes, prev_num_classes, inner_dim, prev_inner_dim
+                    )
+                )
+        self.classification_heads[name] = BARTClassificationHead(
+            input_dim=self.args.encoder_embed_dim,
+            inner_dim=inner_dim or self.args.encoder_embed_dim,
+            num_classes=num_classes,
+            activation_fn=self.args.pooler_activation_fn,
+            pooler_dropout=self.args.pooler_dropout,
+            do_spectral_norm=getattr(
+                self.args, "spectral_norm_classification_head", False
+            ),
+        )
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+
+        prefix = name + "." if name != "" else ""
+        current_head_names = (
+            []
+            if not hasattr(self, "classification_heads")
+            else self.classification_heads.keys()
+        )
+
+        # Handle new classification heads present in the state dict.
+        keys_to_delete = []
+        for k in state_dict.keys():
+            if not k.startswith(prefix + "classification_heads."):
+                continue
+
+            head_name = k[len(prefix + "classification_heads.") :].split(".")[0]
+            num_classes = state_dict[
+                prefix + "classification_heads." + head_name + ".out_proj.weight"
+            ].size(0)
+            inner_dim = state_dict[
+                prefix + "classification_heads." + head_name + ".dense.weight"
+            ].size(0)
+
+            if getattr(self.args, "load_checkpoint_heads", False):
+                if head_name not in current_head_names:
+                    self.register_classification_head(head_name, num_classes, inner_dim)
+            else:
+                if head_name not in current_head_names:
+                    logger.warning(
+                        "deleting classification head ({}) from checkpoint "
+                        "not present in current model: {}".format(head_name, k)
+                    )
+                    keys_to_delete.append(k)
+                elif (
+                    num_classes
+                    != self.classification_heads[head_name].out_proj.out_features
+                    or inner_dim
+                    != self.classification_heads[head_name].dense.out_features
+                ):
+                    logger.warning(
+                        "deleting classification head ({}) from checkpoint "
+                        "with different dimensions than current model: {}".format(
+                            head_name, k
+                        )
+                    )
+                    keys_to_delete.append(k)
+        for k in keys_to_delete:
+            del state_dict[k]
+
+        def truncate_emb(key):
+            if key in state_dict:
+                state_dict[key] = state_dict[key][:-1, :]
+
+        # When finetuning on translation task, remove last row of
+        # embedding matrix that corresponds to mask_idx token.
+        loaded_dict_size = state_dict["encoder.embed_tokens.weight"].size(0)
+        if (
+            loaded_dict_size == len(self.encoder.dictionary) + 1
+            and "<mask>" not in self.encoder.dictionary
+        ):
+            truncate_emb("encoder.embed_tokens.weight")
+            truncate_emb("decoder.embed_tokens.weight")
+            truncate_emb("encoder.reg_head.weight")
+            truncate_emb("decoder.reg_head.weight")
+
+        # When continued pretraining on new set of languages for mbart,
+        # add extra lang embeddings at the end of embed_tokens.
+        # Note: newly added languages are assumed to have been added at the end.
+        if self.args.task == "multilingual_denoising" and loaded_dict_size < len(
+            self.encoder.dictionary
+        ):
+            logger.info(
+                "Adding extra language embeddings not found in pretrained model for "
+                "continued pretraining of MBART on new set of languages."
+            )
+            loaded_mask_token_embedding = state_dict["encoder.embed_tokens.weight"][
+                -1, :
+            ]
+
+            num_langids_to_add = len(self.encoder.dictionary) - loaded_dict_size
+            embed_dim = state_dict["encoder.embed_tokens.weight"].size(1)
+
+            new_lang_embed_to_add = torch.zeros(num_langids_to_add, embed_dim)
+            nn.init.normal_(new_lang_embed_to_add, mean=0, std=embed_dim ** -0.5)
+            new_lang_embed_to_add = new_lang_embed_to_add.to(
+                dtype=state_dict["encoder.embed_tokens.weight"].dtype,
+            )
+
+            state_dict["encoder.embed_tokens.weight"] = torch.cat(
+                [
+                    state_dict["encoder.embed_tokens.weight"][
+                        : loaded_dict_size - 1, :
+                    ],
+                    new_lang_embed_to_add,
+                    loaded_mask_token_embedding.unsqueeze(0),
+                ]
+            )
+            state_dict["decoder.embed_tokens.weight"] = torch.cat(
+                [
+                    state_dict["decoder.embed_tokens.weight"][
+                        : loaded_dict_size - 1, :
+                    ],
+                    new_lang_embed_to_add,
+                    loaded_mask_token_embedding.unsqueeze(0),
+                ]
+            )
+
+        # Copy any newly-added classification heads into the state dict
+        # with their current weights.
+        if hasattr(self, "classification_heads"):
+            cur_state = self.classification_heads.state_dict()
+            for k, v in cur_state.items():
+                if prefix + "classification_heads." + k not in state_dict:
+                    logger.info("Overwriting " + prefix + "classification_heads." + k)
+                    state_dict[prefix + "classification_heads." + k] = v
+
+
+class BARTClassificationHead(nn.Module):
+    """Head for sentence-level classification tasks."""
+
+    def __init__(
+        self,
+        input_dim,
+        inner_dim,
+        num_classes,
+        activation_fn,
+        pooler_dropout,
+        do_spectral_norm=False,
+    ):
+        super().__init__()
+        self.dense = nn.Linear(input_dim, inner_dim)
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.dropout = nn.Dropout(p=pooler_dropout)
+        self.out_proj = nn.Linear(inner_dim, num_classes)
+
+        if do_spectral_norm:
+            self.out_proj = torch.nn.utils.spectral_norm(self.out_proj)
+
+    def forward(self, features, **kwargs):
+        x = features
+        x = self.dropout(x)
+        x = self.dense(x)
+        x = self.activation_fn(x)
+        x = self.dropout(x)
+        x = self.out_proj(x)
+        return x
+
+
+@register_model_architecture("bart", "bart_large")
+def bart_large_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * 1024)
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 12)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", True)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.relu_dropout = getattr(args, "relu_dropout", 0.0)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.max_target_positions = getattr(args, "max_target_positions", 1024)
+    args.max_source_positions = getattr(args, "max_source_positions", 1024)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", True
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", True)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", True)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", True)
+
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh")
+    args.pooler_dropout = getattr(args, "pooler_dropout", 0.0)
+
+
+@register_model_architecture("bart", "bart_base")
+def bart_base_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * 768)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 12)
+    bart_large_architecture(args)
+
+
+@register_model_architecture("bart", "mbart_large")
+def mbart_large_architecture(args):
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    bart_large_architecture(args)
+
+
+@register_model_architecture("bart", "mbart_base")
+def mbart_base_architecture(args):
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    bart_base_architecture(args)
+
+
+@register_model_architecture("bart", "mbart_base_wmt20")
+def mbart_base_wmt20_architecture(args):
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", False)
+    mbart_base_architecture(args)
diff --git a/fairseq/fairseq/models/composite_encoder.py b/fairseq/fairseq/models/composite_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e20fe3a833a2d87876cbec294ad2bebfba7f591
--- /dev/null
+++ b/fairseq/fairseq/models/composite_encoder.py
@@ -0,0 +1,57 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .fairseq_encoder import FairseqEncoder
+
+
+class CompositeEncoder(FairseqEncoder):
+    """
+    A wrapper around a dictionary of :class:`FairseqEncoder` objects.
+
+    We run forward on each encoder and return a dictionary of outputs. The first
+    encoder's dictionary is used for initialization.
+
+    Args:
+        encoders (dict): a dictionary of :class:`FairseqEncoder` objects.
+    """
+
+    def __init__(self, encoders):
+        super().__init__(next(iter(encoders.values())).dictionary)
+        self.encoders = encoders
+        for key in self.encoders:
+            self.add_module(key, self.encoders[key])
+
+    def forward(self, src_tokens, src_lengths):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (LongTensor): lengths of each source sentence of shape
+                `(batch)`
+
+        Returns:
+            dict:
+                the outputs from each Encoder
+        """
+        encoder_out = {}
+        for key in self.encoders:
+            encoder_out[key] = self.encoders[key](src_tokens, src_lengths)
+        return encoder_out
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        """Reorder encoder output according to new_order."""
+        for key in self.encoders:
+            encoder_out[key] = self.encoders[key].reorder_encoder_out(
+                encoder_out[key], new_order
+            )
+        return encoder_out
+
+    def max_positions(self):
+        return min(self.encoders[key].max_positions() for key in self.encoders)
+
+    def upgrade_state_dict(self, state_dict):
+        for key in self.encoders:
+            self.encoders[key].upgrade_state_dict(state_dict)
+        return state_dict
diff --git a/fairseq/fairseq/models/distributed_fairseq_model.py b/fairseq/fairseq/models/distributed_fairseq_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..5eda2276404ca686be124901674ddfe36bd6dfd1
--- /dev/null
+++ b/fairseq/fairseq/models/distributed_fairseq_model.py
@@ -0,0 +1,146 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import signal
+import threading
+
+import torch
+import torch.nn as nn
+from torch.nn.parallel import DistributedDataParallel
+
+from fairseq.distributed import (
+    DistributedTimeoutWrapper,
+    LegacyDistributedDataParallel,
+    ModuleProxyWrapper,
+    TPUDistributedDataParallel,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+_GOSSIP_DISABLED = False
+try:
+    import gossip
+except ImportError:
+    _GOSSIP_DISABLED = True
+
+
+def DistributedFairseqModel(args, model, process_group, device):
+    """
+    Wrap a *model* to support distributed data parallel training.
+
+    This is similar to the built-in DistributedDataParallel, but allows
+    additional configuration of the DistributedDataParallel class to
+    use, and also provides easier access to the wrapped model by
+    forwarding requests for missing attributes to the wrapped model.
+
+    Args:
+        args (argparse.Namespace): fairseq args
+        model (BaseFairseqModel): model to wrap
+        process_group: the c10d process group to be used for distributed data
+            parallel all-reduction.
+        device: device to move model to
+    """
+    assert isinstance(model, nn.Module)
+    if args.tpu:
+        wrapped_model = TPUDistributedDataParallel(
+            module=model.to(device),
+            process_group=process_group,
+        )
+        # forward missing getattr and state_dict/load_state_dict to orig model
+        wrapped_model = ModuleProxyWrapper(wrapped_model)
+    elif args.ddp_backend in {"c10d", "pytorch_ddp"}:
+        wrapped_model = DistributedDataParallel(
+            module=model.to(device),
+            device_ids=[args.device_id],
+            output_device=args.device_id,
+            broadcast_buffers=args.broadcast_buffers,
+            bucket_cap_mb=args.bucket_cap_mb,
+            process_group=process_group,
+            find_unused_parameters=args.find_unused_parameters,
+            gradient_as_bucket_view=args.gradient_as_bucket_view,
+        )
+        if args.ddp_comm_hook == "fp16":
+            logger.info("enable fp16 communication hook in DDP")
+            try:
+                from torch.distributed.algorithms.ddp_comm_hooks import (
+                    register_ddp_comm_hook,
+                    DDPCommHookType,
+                )
+            except:
+                logger.error(
+                    "Could not import from torch.distributed.algorithms.ddp_comm_hooks; you may need to update your pytorch version"
+                )
+                raise
+
+            register_ddp_comm_hook(DDPCommHookType.FP16_COMPRESS, wrapped_model)
+        # forward missing getattr and state_dict/load_state_dict to orig model
+        wrapped_model = ModuleProxyWrapper(wrapped_model)
+    elif args.ddp_backend in {"no_c10d", "legacy_ddp"}:
+        wrapped_model = LegacyDistributedDataParallel(
+            module=model.to(device),
+            buffer_size=2 ** 28,
+            process_group=process_group,
+        )
+        # forward missing getattr and state_dict/load_state_dict to orig model
+        wrapped_model = ModuleProxyWrapper(wrapped_model)
+    elif args.ddp_backend == "slow_mo":
+        if _GOSSIP_DISABLED:
+            raise ImportError(
+                "Cannot find gossip library. Please install from: "
+                "github.com/facebookresearch/stochastic_gradient_push"
+            )
+
+        # The values of slowmo_momentum below were obtained by tuning on the
+        # En-De 16 dataset by training the transformer_wmt_en_de_large model
+        if args.slowmo_momentum is None:
+            if args.distributed_world_size <= 16:
+                args.slowmo_momentum = 0.0
+            elif args.distributed_world_size <= 32:
+                args.slowmo_momentum = 0.2
+            elif args.distributed_world_size <= 64:
+                args.slowmo_momentum = 0.5
+            else:
+                args.slowmo_momentum = 0.6
+
+        wrapped_model = gossip.GossipDataParallel(
+            module=model.to(device),
+            device_ids=[args.device_id],
+            output_device=args.device_id,
+            broadcast_buffers=args.broadcast_buffers,
+            nprocs_per_node=args.nprocs_per_node,
+            slowmo_momentum=args.slowmo_momentum,
+            localsgd=(args.slowmo_algorithm == "LocalSGD"),
+            localsgd_frequency=args.localsgd_frequency,
+        )
+        # forward missing getattr and state_dict/load_state_dict to orig model
+        wrapped_model = ModuleProxyWrapper(wrapped_model)
+    elif args.ddp_backend == "fully_sharded":
+        try:
+            from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP
+        except ImportError:
+            raise ImportError(
+                "Cannot find FullyShardedDataParallel. "
+                "Please install fairscale with: pip install fairscale"
+            )
+        assert isinstance(model, FSDP), "expected model to already be wrapped in FSDP"
+        wrapped_model = model
+        if args.memory_efficient_fp16:
+            wrapped_model = wrapped_model.half()
+        if not args.cpu_offload:
+            wrapped_model = wrapped_model.to(device=device)
+    else:
+        raise ValueError("Unknown --ddp-backend: " + args.ddp_backend)
+
+    # kill hung distributed jobs after a timeout
+    if getattr(args, "heartbeat_timeout", -1) > 0:
+        wrapped_model = DistributedTimeoutWrapper(
+            wrapped_model, timeout=getattr(args, "heartbeat_timeout", -1)
+        )
+
+    return wrapped_model
diff --git a/fairseq/fairseq/models/ema/__init__.py b/fairseq/fairseq/models/ema/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..503ceaa609b092e48bd32a0031f4e2ffb875483f
--- /dev/null
+++ b/fairseq/fairseq/models/ema/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+from .ema import EMA
+
+
+def build_ema(model, cfg, device):
+    return EMA(model, cfg, device)
+
+
+# automatically import any Python files in the models/ema/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        file_name = file[: file.find(".py")]
+        importlib.import_module("fairseq.models.ema." + file_name)
diff --git a/fairseq/fairseq/models/ema/ema.py b/fairseq/fairseq/models/ema/ema.py
new file mode 100644
index 0000000000000000000000000000000000000000..010b60ba2fd766340d2c5b8ba96f9e57c6fe25b5
--- /dev/null
+++ b/fairseq/fairseq/models/ema/ema.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+
+"""
+This module has the EMA class used to store a copy of the exponentially decayed
+model params.
+
+Typical usage of EMA class involves initializing an object using an existing
+model (random or from a seed model) and setting the config like ema_decay,
+ema_start_update which determine how the EMA model is updated. After every
+update of the model i.e. at the end of the train_step, the EMA should be updated
+by passing the new model to the EMA.step function. The EMA model state dict
+can be stored in the extra state under the key of "ema" and dumped
+into a checkpoint and loaded. The EMA object can be passed to tasks
+by setting task.uses_ema property.
+EMA is a smoothed/ensemble model which might have better performance
+when used for inference or further fine-tuning. EMA class has a
+reverse function to load the EMA params into a model and use it
+like a regular model.
+"""
+
+import copy
+import logging
+
+import torch
+from fairseq import checkpoint_utils
+
+
+class EMA(object):
+    """Exponential Moving Average of Fairseq Models
+    EMA keeps a copy of the exponentially decayed model params.
+    The set of params should include both gradient-descent and
+    non-gradient descent params, such as batch mean/var and buffers.
+    This is a modified implementation of
+    the open source code in https://github.com/zhawe01/fairseq-gec.git,
+    and internal source code in
+    fbcode/mobile-vision/projects/classification_pytorch/lib/utils/model_ema.py.
+
+    Similar to TF EMA.
+    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage.
+    EMA provides a averaged and smoothed set of model weights, and has been shown to
+    improve vision models. EMA class does all necessary functions to update, reload,
+    or init EMA methods.
+
+    EMA object is initialized from an arbitrary model. By default, it is stored in
+    the same device (unless device specified at initialization) and with the
+    same precision as the model (unless ema_fp32 is True). ema_fp32 is recommended.
+    This stores the EMA parameters in fp32 only for the EMA update step, and
+    is used at the default precision otherwise.
+    EMA is usually enabled using EMAConfig with store_ema=True. Some important
+    parameters to configure EMA are
+    1) ema_decay - The decay of EMA
+    2) ema_update_freq - EMA is updated every this many model updates.
+    3) ema_start_update - Start EMA update after this many model updates [default 0]
+
+    Key methods:
+    1) step - One update of EMA using new model
+    2) restore - Update EMA from a state dict
+    3) reverse - Load EMA into a model
+    4) get_decay, _set_decay - Used to get or set the decay.  Note _set_decay is
+    called from step.
+    5) build_fp32_params - Used to initialize or update the fp32 copy of EMA params.
+    Note this is enabled only when ema_fp32=True
+    """
+
+    def __init__(self, model, config, device=None):
+        """
+        @param model model to initialize the EMA with
+        @param config EMAConfig object with configuration like
+        ema_decay, ema_update_freq, ema_fp32
+        @param device If provided, copy EMA to this device (e.g. gpu).
+        Otherwise EMA is in the same device as the model.
+        """
+
+        self.decay = config.ema_decay
+        self.model = copy.deepcopy(model)
+        self.model.requires_grad_(False)
+        self.config = config
+        self.fp32_params = {}
+
+        if self.config.ema_seed_model is not None:
+            state = checkpoint_utils.load_ema_from_checkpoint(self.config.ema_seed_model)
+            self.model.load_state_dict(state["model"], strict=True)
+
+        if device is not None:
+            logging.info(f"Copying EMA model to device {device}")
+            self.model = self.model.to(device=device)
+
+        if self.config.ema_fp32:
+            self.build_fp32_params()
+
+        self.update_freq_counter = 0
+
+    def get_model(self):
+        return self.model
+
+    def build_fp32_params(self, state_dict=None):
+        """
+        Store a copy of the EMA params in fp32.
+        If state dict is passed, the EMA params is copied from
+        the provided state dict. Otherwise, it is copied from the
+        current EMA model parameters.
+        """
+        if not self.config.ema_fp32:
+            raise RuntimeError(
+                "build_fp32_params should not be called if ema_fp32=False. "
+                "Use ema_fp32=True if this is really intended."
+            )
+
+        if state_dict is None:
+            state_dict = self.model.state_dict()
+
+        def _to_float(t):
+            return t.float() if torch.is_floating_point(t) else t
+
+        # for non-float params (like registered symbols), they are copied into this dict and covered in each update
+        for param_key in state_dict:
+            if param_key in self.fp32_params:
+                self.fp32_params[param_key].copy_(state_dict[param_key])
+            else:
+                self.fp32_params[param_key] = _to_float(state_dict[param_key])
+
+    def restore(self, state_dict, build_fp32_params=False):
+        """ Load data from a model spec into EMA model """
+        self.model.load_state_dict(state_dict, strict=False)
+        if build_fp32_params:
+            self.build_fp32_params(state_dict)
+
+    def _set_decay(self, decay):
+        self.decay = decay
+
+    def get_decay(self):
+        return self.decay
+
+    def _step_internal(self, new_model, updates=None):
+        """ One update of the EMA model based on new model weights """
+        decay = self.decay
+
+        ema_state_dict = {}
+        ema_params = self.fp32_params if self.config.ema_fp32 else self.model.state_dict()
+        for key, param in new_model.state_dict().items():
+            try:
+                ema_param = ema_params[key]
+            except KeyError:
+                ema_param = param.float().clone() if param.ndim == 1 else copy.deepcopy(param)
+
+            if param.shape != ema_param.shape:
+                raise ValueError(
+                    "incompatible tensor shapes between model param and ema param"
+                    + "{} vs. {}".format(param.shape, ema_param.shape)
+                )
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+
+            # for non-float params (like registered symbols), they are covered in each update
+            if not torch.is_floating_point(ema_param):
+                if ema_param.dtype != param.dtype:
+                    raise ValueError(
+                        "incompatible tensor dtypes between model param and ema param"
+                        + "{} vs. {}".format(param.dtype, ema_param.dtype)
+                    )
+                ema_param.copy_(param)
+            else:
+                ema_param.mul_(decay)
+                ema_param.add_(param.to(dtype=ema_param.dtype), alpha=1-decay)
+            ema_state_dict[key] = ema_param
+        self.restore(ema_state_dict, build_fp32_params=False)
+
+    def step(self, new_model, updates=None):
+        """
+        One update of EMA which is done every self.config.ema_update_freq
+        updates of the model.
+
+        @param updates The current number of model updates done.
+        Decay is set of 0 if model updates < ema_start_update, which means
+        the model will be simply copied over to the EMA.
+        When model updates >= ema_start_updates, then EMA is updated with
+        a decay of self.config.ema_decay.
+        """
+        self._set_decay(
+            0
+            if updates is not None
+            and updates < self.config.ema_start_update
+            else self.config.ema_decay
+        )
+        if updates is not None and self.config.ema_update_freq > 1:
+            self.update_freq_counter += 1
+            if self.update_freq_counter >= self.config.ema_update_freq:
+                self._step_internal(new_model, updates)
+                self.update_freq_counter = 0
+        else:
+            self._step_internal(new_model, updates)
+
+    def reverse(self, model):
+        """
+        Load the model parameters from EMA model.
+        Useful for inference or fine-tuning from the EMA model.
+        """
+        model.load_state_dict(self.model.state_dict(), strict=False)
+        return model
diff --git a/fairseq/fairseq/models/fairseq_decoder.py b/fairseq/fairseq/models/fairseq_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f1e8b52a2e0a50199050f11cc613ab02ca9febe
--- /dev/null
+++ b/fairseq/fairseq/models/fairseq_decoder.py
@@ -0,0 +1,105 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional, Tuple
+
+import torch.nn as nn
+from fairseq import utils
+from torch import Tensor
+
+
+class FairseqDecoder(nn.Module):
+    """Base class for decoders."""
+
+    def __init__(self, dictionary):
+        super().__init__()
+        self.dictionary = dictionary
+        self.onnx_trace = False
+        self.adaptive_softmax = None
+
+
+    def forward(self, prev_output_tokens, encoder_out=None, **kwargs):
+        """
+        Args:
+            prev_output_tokens (LongTensor): shifted output tokens of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (dict, optional): output from the encoder, used for
+                encoder-side attention
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        x, extra = self.extract_features(
+            prev_output_tokens, encoder_out=encoder_out, **kwargs
+        )
+        x = self.output_layer(x)
+        return x, extra
+
+    def extract_features(self, prev_output_tokens, encoder_out=None, **kwargs):
+        """
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        raise NotImplementedError
+
+    def output_layer(self, features, **kwargs):
+        """
+        Project features to the default output size, e.g., vocabulary size.
+
+        Args:
+            features (Tensor): features returned by *extract_features*.
+        """
+        raise NotImplementedError
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """Get normalized probabilities (or log probs) from a net's output."""
+        return self.get_normalized_probs_scriptable(net_output, log_probs, sample)
+
+    # TorchScript doesn't support super() method so that the scriptable Subclass
+    # can't access the base class model in Torchscript.
+    # Current workaround is to add a helper function with different name and
+    # call the helper function from scriptable Subclass.
+    def get_normalized_probs_scriptable(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """Get normalized probabilities (or log probs) from a net's output."""
+
+        if hasattr(self, "adaptive_softmax") and self.adaptive_softmax is not None:
+            if sample is not None:
+                assert "target" in sample
+                target = sample["target"]
+            else:
+                target = None
+            out = self.adaptive_softmax.get_log_prob(net_output[0], target=target)
+            return out.exp_() if not log_probs else out
+
+        logits = net_output[0]
+        if log_probs:
+            return utils.log_softmax(logits, dim=-1, onnx_trace=self.onnx_trace)
+        else:
+            return utils.softmax(logits, dim=-1, onnx_trace=self.onnx_trace)
+
+    def max_positions(self):
+        """Maximum input length supported by the decoder."""
+        return 1e6  # an arbitrary large number
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade old state dicts to work with newer code."""
+        return state_dict
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
diff --git a/fairseq/fairseq/models/fairseq_encoder.py b/fairseq/fairseq/models/fairseq_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..08cbde15a46e9b6d58e11c2f6052e7cf2d0cc8b2
--- /dev/null
+++ b/fairseq/fairseq/models/fairseq_encoder.py
@@ -0,0 +1,92 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, NamedTuple, Optional
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+
+EncoderOut = NamedTuple(
+    "EncoderOut",
+    [
+        ("encoder_out", Tensor),  # T x B x C
+        ("encoder_padding_mask", Optional[Tensor]),  # B x T
+        ("encoder_embedding", Optional[Tensor]),  # B x T x C
+        ("encoder_states", Optional[List[Tensor]]),  # List[T x B x C]
+        ("src_tokens", Optional[Tensor]),  # B x T
+        ("src_lengths", Optional[Tensor]),  # B x 1
+    ],
+)
+
+
+class FairseqEncoder(nn.Module):
+    """Base class for encoders."""
+
+    def __init__(self, dictionary):
+        super().__init__()
+        self.dictionary = dictionary
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (LongTensor): lengths of each source sentence of shape
+                `(batch)`
+        """
+        raise NotImplementedError
+
+    def forward_torchscript(self, net_input: Dict[str, Tensor]):
+        """A TorchScript-compatible version of forward.
+
+        Encoders which use additional arguments may want to override
+        this method for TorchScript compatibility.
+        """
+        if torch.jit.is_scripting():
+            return self.forward(
+                src_tokens=net_input["src_tokens"],
+                src_lengths=net_input["src_lengths"],
+            )
+        else:
+            return self.forward_non_torchscript(net_input)
+
+    @torch.jit.unused
+    def forward_non_torchscript(self, net_input: Dict[str, Tensor]):
+        encoder_input = {
+            k: v for k, v in net_input.items() if k != "prev_output_tokens"
+        }
+        return self.forward(**encoder_input)
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        """
+        Reorder encoder output according to `new_order`.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            `encoder_out` rearranged according to `new_order`
+        """
+        raise NotImplementedError
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return 1e6  # an arbitrary large number
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade old state dicts to work with newer code."""
+        return state_dict
+
+    def set_num_updates(self, num_updates):
+        """State from trainer to pass along to model at every update."""
+
+        def _apply(m):
+            if hasattr(m, "set_num_updates") and m != self:
+                m.set_num_updates(num_updates)
+
+        self.apply(_apply)
diff --git a/fairseq/fairseq/models/fairseq_incremental_decoder.py b/fairseq/fairseq/models/fairseq_incremental_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc72a0f8f3da238a8ce846240e5008d91ce1bc1a
--- /dev/null
+++ b/fairseq/fairseq/models/fairseq_incremental_decoder.py
@@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Dict, Optional
+
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.models import FairseqDecoder
+from torch import Tensor
+
+
+logger = logging.getLogger(__name__)
+
+
+@with_incremental_state
+class FairseqIncrementalDecoder(FairseqDecoder):
+    """Base class for incremental decoders.
+
+    Incremental decoding is a special mode at inference time where the Model
+    only receives a single timestep of input corresponding to the previous
+    output token (for teacher forcing) and must produce the next output
+    *incrementally*. Thus the model must cache any long-term state that is
+    needed about the sequence, e.g., hidden states, convolutional states, etc.
+
+    Compared to the standard :class:`FairseqDecoder` interface, the incremental
+    decoder interface allows :func:`forward` functions to take an extra keyword
+    argument (*incremental_state*) that can be used to cache state across
+    time-steps.
+
+    The :class:`FairseqIncrementalDecoder` interface also defines the
+    :func:`reorder_incremental_state` method, which is used during beam search
+    to select and reorder the incremental state based on the selection of beams.
+
+    To learn more about how incremental decoding works, refer to `this blog
+    <http://www.telesens.co/2019/04/21/understanding-incremental-decoding-in-fairseq/>`_.
+    """
+
+    def __init__(self, dictionary):
+        super().__init__(dictionary)
+
+    def forward(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): shifted output tokens of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (dict, optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict, optional): dictionary used for storing
+                state during :ref:`Incremental decoding`
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        raise NotImplementedError
+
+    def extract_features(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs
+    ):
+        """
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        raise NotImplementedError
+
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        new_order: Tensor,
+    ):
+        """Reorder incremental state.
+
+        This will be called when the order of the input has changed from the
+        previous time step. A typical use case is beam search, where the input
+        order changes between time steps based on the selection of beams.
+        """
+        pass
+
+    def reorder_incremental_state_scripting(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        new_order: Tensor,
+    ):
+        """Main entry point for reordering the incremental state.
+
+        Due to limitations in TorchScript, we call this function in
+        :class:`fairseq.sequence_generator.SequenceGenerator` instead of
+        calling :func:`reorder_incremental_state` directly.
+        """
+        for module in self.modules():
+            if hasattr(module, "reorder_incremental_state"):
+                result = module.reorder_incremental_state(incremental_state, new_order)
+                if result is not None:
+                    incremental_state = result
+
+    def set_beam_size(self, beam_size):
+        """Sets the beam size in the decoder and all children."""
+        if getattr(self, "_beam_size", -1) != beam_size:
+            seen = set()
+
+            def apply_set_beam_size(module):
+                if (
+                    module != self
+                    and hasattr(module, "set_beam_size")
+                    and module not in seen
+                ):
+                    seen.add(module)
+                    module.set_beam_size(beam_size)
+
+            self.apply(apply_set_beam_size)
+            self._beam_size = beam_size
diff --git a/fairseq/fairseq/models/fairseq_model.py b/fairseq/fairseq/models/fairseq_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e55c7ba1ad90f4e2f12db6c814d04a90c4e3b77c
--- /dev/null
+++ b/fairseq/fairseq/models/fairseq_model.py
@@ -0,0 +1,569 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Base classes for various fairseq models.
+"""
+
+import logging
+from argparse import Namespace
+from typing import Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.data import Dictionary
+from fairseq.dataclass.utils import (
+    convert_namespace_to_omegaconf,
+    gen_parser_from_dataclass,
+)
+from fairseq.models import FairseqDecoder, FairseqEncoder
+from omegaconf import DictConfig
+from torch import Tensor
+
+
+logger = logging.getLogger(__name__)
+
+
+def check_type(module, expected_type):
+    if hasattr(module, "unwrapped_module"):
+        assert isinstance(module.unwrapped_module, expected_type), \
+            f"{type(module.unwrapped_module)} != {expected_type}"
+    else:
+        assert isinstance(module, expected_type), f"{type(module)} != {expected_type}"
+
+
+class BaseFairseqModel(nn.Module):
+    """Base class for fairseq models."""
+
+    def __init__(self):
+        super().__init__()
+        self._is_generation_fast = False
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add model-specific arguments to the parser."""
+        dc = getattr(cls, "__dataclass", None)
+        if dc is not None:
+            # do not set defaults so that settings defaults from various architectures still works
+            gen_parser_from_dataclass(parser, dc(), delete_default=True)
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        raise NotImplementedError("Model must implement the build_model method")
+
+    def get_targets(self, sample, net_output):
+        """Get targets from either the sample or the net's output."""
+        return sample["target"]
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """Get normalized probabilities (or log probs) from a net's output."""
+        return self.get_normalized_probs_scriptable(net_output, log_probs, sample)
+
+    # TorchScript doesn't support super() method so that the scriptable Subclass
+    # can't access the base class model in Torchscript.
+    # Current workaround is to add a helper function with different name and
+    # call the helper function from scriptable Subclass.
+    def get_normalized_probs_scriptable(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """Scriptable helper function for get_normalized_probs in ~BaseFairseqModel"""
+        if hasattr(self, "decoder"):
+            return self.decoder.get_normalized_probs(net_output, log_probs, sample)
+        elif torch.is_tensor(net_output):
+            # syntactic sugar for simple models which don't have a decoder
+            # (e.g., the classification tutorial)
+            logits = net_output.float()
+            if log_probs:
+                return F.log_softmax(logits, dim=-1)
+            else:
+                return F.softmax(logits, dim=-1)
+        raise NotImplementedError
+
+    def extract_features(self, *args, **kwargs):
+        """Similar to *forward* but only return features."""
+        return self(*args, **kwargs)
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return None
+
+    def load_state_dict(
+        self,
+        state_dict,
+        strict=True,
+        model_cfg: Optional[DictConfig] = None,
+        args: Optional[Namespace] = None,
+    ):
+        """Copies parameters and buffers from *state_dict* into this module and
+        its descendants.
+
+        Overrides the method in :class:`nn.Module`. Compared with that method
+        this additionally "upgrades" *state_dicts* from old checkpoints.
+        """
+
+        if model_cfg is None and args is not None:
+            logger.warn("using 'args' is deprecated, please update your code to use dataclass config")
+            model_cfg = convert_namespace_to_omegaconf(args).model
+
+        self.upgrade_state_dict(state_dict)
+
+        from fairseq.checkpoint_utils import prune_state_dict
+
+        new_state_dict = prune_state_dict(state_dict, model_cfg)
+        return super().load_state_dict(new_state_dict, strict)
+
+    def upgrade_state_dict(self, state_dict):
+        """Upgrade old state dicts to work with newer code."""
+        self.upgrade_state_dict_named(state_dict, "")
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade old state dicts to work with newer code.
+
+        Args:
+            state_dict (dict): state dictionary to upgrade, in place
+            name (str): the state dict key corresponding to the current module
+        """
+        assert state_dict is not None
+
+        def do_upgrade(m, prefix):
+            if len(prefix) > 0:
+                prefix += "."
+
+            for n, c in m.named_children():
+                name = prefix + n
+                if hasattr(c, "upgrade_state_dict_named"):
+                    c.upgrade_state_dict_named(state_dict, name)
+                elif hasattr(c, "upgrade_state_dict"):
+                    c.upgrade_state_dict(state_dict)
+                do_upgrade(c, name)
+
+        do_upgrade(self, name)
+
+    def set_num_updates(self, num_updates):
+        """State from trainer to pass along to model at every update."""
+        for m in self.modules():
+            if hasattr(m, "set_num_updates") and m != self:
+                m.set_num_updates(num_updates)
+
+    def prepare_for_inference_(self, cfg: DictConfig):
+        """Prepare model for inference."""
+        kwargs = {}
+        kwargs["beamable_mm_beam_size"] = (
+            None
+            if getattr(cfg.generation, "no_beamable_mm", False)
+            else getattr(cfg.generation, "beam", 5)
+        )
+        kwargs["need_attn"] = getattr(cfg.generation, "print_alignment", False)
+        if getattr(cfg.generation, "retain_dropout", False):
+            kwargs["retain_dropout"] = cfg.generation.retain_dropout
+            kwargs["retain_dropout_modules"] = cfg.generation.retain_dropout_modules
+        self.make_generation_fast_(**kwargs)
+
+    def make_generation_fast_(self, **kwargs):
+        """
+        Legacy entry point to optimize model for faster generation.
+        Prefer prepare_for_inference_.
+        """
+        if self._is_generation_fast:
+            return  # only apply once
+        self._is_generation_fast = True
+
+        # remove weight norm from all modules in the network
+        def apply_remove_weight_norm(module):
+            try:
+                nn.utils.remove_weight_norm(module)
+            except (AttributeError, ValueError):  # this module didn't have weight norm
+                return
+
+        self.apply(apply_remove_weight_norm)
+
+        def apply_make_generation_fast_(module, prefix):
+            if len(prefix) > 0:
+                prefix += "."
+
+            base_func = BaseFairseqModel.make_generation_fast_
+            for n, m in module.named_modules():
+                if (
+                    m != self
+                    and hasattr(m, "make_generation_fast_")
+                    # don't call this implementation again, e.g., if
+                    # children modules also inherit from BaseFairseqModel
+                    and m.make_generation_fast_.__func__ is not base_func
+                ):
+                    name = prefix + n
+                    m.make_generation_fast_(name=name, **kwargs)
+
+        apply_make_generation_fast_(self, "")
+
+        def train(mode=True):
+            if mode:
+                raise RuntimeError("cannot train after make_generation_fast")
+
+        # this model should no longer be used for training
+        self.eval()
+        self.train = train
+
+    def prepare_for_onnx_export_(self, **kwargs):
+        """Make model exportable via ONNX trace."""
+        seen = set()
+
+        def apply_prepare_for_onnx_export_(module):
+            if (
+                module != self
+                and hasattr(module, "prepare_for_onnx_export_")
+                and module not in seen
+            ):
+                seen.add(module)
+                module.prepare_for_onnx_export_(**kwargs)
+
+        self.apply(apply_prepare_for_onnx_export_)
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path,
+        checkpoint_file="model.pt",
+        data_name_or_path=".",
+        **kwargs,
+    ):
+        """
+        Load a :class:`~fairseq.models.FairseqModel` from a pre-trained model
+        file. Downloads and caches the pre-trained model file if needed.
+
+        The base implementation returns a
+        :class:`~fairseq.hub_utils.GeneratorHubInterface`, which can be used to
+        generate translations or sample from language models. The underlying
+        :class:`~fairseq.models.FairseqModel` can be accessed via the
+        *generator.models* attribute.
+
+        Other models may override this to implement custom hub interfaces.
+
+        Args:
+            model_name_or_path (str): either the name of a pre-trained model to
+                load or a path/URL to a pre-trained model state dict
+            checkpoint_file (str, optional): colon-separated list of checkpoint
+                files in the model archive to ensemble (default: 'model.pt')
+            data_name_or_path (str, optional): point args.data to the archive
+                at the given path/URL. Can start with '.' or './' to reuse the
+                model archive path.
+        """
+        from fairseq import hub_utils
+
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            **kwargs,
+        )
+        logger.info(x["args"])
+        return hub_utils.GeneratorHubInterface(x["args"], x["task"], x["models"])
+
+    @classmethod
+    def hub_models(cls):
+        return {}
+
+
+class FairseqEncoderDecoderModel(BaseFairseqModel):
+    """Base class for encoder-decoder models.
+
+    Args:
+        encoder (FairseqEncoder): the encoder
+        decoder (FairseqDecoder): the decoder
+    """
+
+    def __init__(self, encoder, decoder):
+        super().__init__()
+
+        self.encoder = encoder
+        self.decoder = decoder
+
+        check_type(self.encoder, FairseqEncoder)
+        check_type(self.decoder, FairseqDecoder)
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs):
+        """
+        Run the forward pass for an encoder-decoder model.
+
+        First feed a batch of source tokens through the encoder. Then, feed the
+        encoder output and previous decoder outputs (i.e., teacher forcing) to
+        the decoder to produce the next outputs::
+
+            encoder_out = self.encoder(src_tokens, src_lengths)
+            return self.decoder(prev_output_tokens, encoder_out)
+
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (LongTensor): source sentence lengths of shape `(batch)`
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+        decoder_out = self.decoder(
+            prev_output_tokens, encoder_out=encoder_out, **kwargs
+        )
+        return decoder_out
+
+    def forward_decoder(self, prev_output_tokens, **kwargs):
+        return self.decoder(prev_output_tokens, **kwargs)
+
+    def extract_features(self, src_tokens, src_lengths, prev_output_tokens, **kwargs):
+        """
+        Similar to *forward* but only return features.
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+        features = self.decoder.extract_features(
+            prev_output_tokens, encoder_out=encoder_out, **kwargs
+        )
+        return features
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the default output size (typically vocabulary size)."""
+        return self.decoder.output_layer(features, **kwargs)
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return (self.encoder.max_positions(), self.decoder.max_positions())
+
+    def max_decoder_positions(self):
+        """Maximum length supported by the decoder."""
+        return self.decoder.max_positions()
+
+
+class FairseqModel(FairseqEncoderDecoderModel):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        utils.deprecation_warning(
+            "FairseqModel is deprecated, please use FairseqEncoderDecoderModel "
+            "or BaseFairseqModel instead",
+            stacklevel=4,
+        )
+
+
+class FairseqMultiModel(BaseFairseqModel):
+    """Base class for combining multiple encoder-decoder models."""
+
+    def __init__(self, encoders, decoders):
+        super().__init__()
+        assert encoders.keys() == decoders.keys()
+        self.keys = list(encoders.keys())
+        for key in self.keys:
+            check_type(encoders[key], FairseqEncoder)
+            check_type(decoders[key], FairseqDecoder)
+
+        self.models = nn.ModuleDict(
+            {
+                key: FairseqEncoderDecoderModel(encoders[key], decoders[key])
+                for key in self.keys
+            }
+        )
+
+    @staticmethod
+    def build_shared_embeddings(
+        dicts: Dict[str, Dictionary],
+        langs: List[str],
+        embed_dim: int,
+        build_embedding: callable,
+        pretrained_embed_path: Optional[str] = None,
+    ):
+        """
+        Helper function to build shared embeddings for a set of languages after
+        checking that all dicts corresponding to those languages are equivalent.
+
+        Args:
+            dicts: Dict of lang_id to its corresponding Dictionary
+            langs: languages that we want to share embeddings for
+            embed_dim: embedding dimension
+            build_embedding: callable function to actually build the embedding
+            pretrained_embed_path: Optional path to load pretrained embeddings
+        """
+        shared_dict = dicts[langs[0]]
+        if any(dicts[lang] != shared_dict for lang in langs):
+            raise ValueError(
+                "--share-*-embeddings requires a joined dictionary: "
+                "--share-encoder-embeddings requires a joined source "
+                "dictionary, --share-decoder-embeddings requires a joined "
+                "target dictionary, and --share-all-embeddings requires a "
+                "joint source + target dictionary."
+            )
+        return build_embedding(shared_dict, embed_dim, pretrained_embed_path)
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs):
+        raise NotImplementedError
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return {
+            key: (
+                self.models[key].encoder.max_positions(),
+                self.models[key].decoder.max_positions(),
+            )
+            for key in self.keys
+        }
+
+    def max_decoder_positions(self):
+        """Maximum length supported by the decoder."""
+        return min(model.decoder.max_positions() for model in self.models.values())
+
+    @property
+    def encoder(self):
+        return self.models[self.keys[0]].encoder
+
+    @property
+    def decoder(self):
+        return self.models[self.keys[0]].decoder
+
+    def forward_decoder(self, prev_output_tokens, **kwargs):
+        return self.decoder(prev_output_tokens, **kwargs)
+
+    def load_state_dict(
+        self,
+        state_dict,
+        strict=True,
+        model_cfg=None,
+        args: Optional[Namespace] = None,
+    ):
+        """Copies parameters and buffers from *state_dict* into this module and
+        its descendants.
+
+        Overrides the method in :class:`nn.Module`. Compared with that method
+        this additionally "upgrades" *state_dicts* from old checkpoints.
+        """
+
+        if model_cfg is None and args is not None:
+            logger.warn("using 'args' is deprecated, please update your code to use dataclass config")
+            model_cfg = convert_namespace_to_omegaconf(args).model
+
+        self.upgrade_state_dict(state_dict)
+
+        from fairseq.checkpoint_utils import prune_state_dict
+
+        new_state_dict = prune_state_dict(state_dict, model_cfg)
+        return super().load_state_dict(new_state_dict, strict)
+
+
+class FairseqLanguageModel(BaseFairseqModel):
+    """Base class for decoder-only models.
+
+    Args:
+        decoder (FairseqDecoder): the decoder
+    """
+
+    def __init__(self, decoder):
+        super().__init__()
+        self.decoder = decoder
+        check_type(self.decoder, FairseqDecoder)
+
+    def forward(self, src_tokens, **kwargs):
+        """
+        Run the forward pass for a decoder-only model.
+
+        Feeds a batch of tokens through the decoder to predict the next tokens.
+
+        Args:
+            src_tokens (LongTensor): tokens on which to condition the decoder,
+                of shape `(batch, tgt_len)`
+            src_lengths (LongTensor): source sentence lengths of shape `(batch)`
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, seq_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        return self.decoder(src_tokens, **kwargs)
+
+    def forward_decoder(self, prev_output_tokens, **kwargs):
+        return self.decoder(prev_output_tokens, **kwargs)
+
+    def extract_features(self, src_tokens, **kwargs):
+        """
+        Similar to *forward* but only return features.
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, seq_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        return self.decoder.extract_features(src_tokens, **kwargs)
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the default output size (typically vocabulary size)."""
+        return self.decoder.output_layer(features, **kwargs)
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return self.decoder.max_positions()
+
+    def max_decoder_positions(self):
+        """Maximum length supported by the decoder."""
+        return self.decoder.max_positions()
+
+    @property
+    def supported_targets(self):
+        return {"future"}
+
+
+class FairseqEncoderModel(BaseFairseqModel):
+    """Base class for encoder-only models.
+
+    Args:
+        encoder (FairseqEncoder): the encoder
+    """
+
+    def __init__(self, encoder):
+        super().__init__()
+        self.encoder = encoder
+        check_type(self.encoder, FairseqEncoder)
+
+    def forward(self, src_tokens, src_lengths, **kwargs):
+        """
+        Run the forward pass for a encoder-only model.
+
+        Feeds a batch of tokens through the encoder to generate features.
+
+        Args:
+            src_tokens (LongTensor): input tokens of shape `(batch, src_len)`
+            src_lengths (LongTensor): source sentence lengths of shape `(batch)`
+
+        Returns:
+            the encoder's output, typically of shape `(batch, src_len, features)`
+        """
+        return self.encoder(src_tokens, src_lengths, **kwargs)
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        """Get normalized probabilities (or log probs) from a net's output."""
+        encoder_out = net_output["encoder_out"]
+        if torch.is_tensor(encoder_out):
+            logits = encoder_out.float()
+            if log_probs:
+                return F.log_softmax(logits, dim=-1)
+            else:
+                return F.softmax(logits, dim=-1)
+        raise NotImplementedError
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return self.encoder.max_positions()
diff --git a/fairseq/fairseq/models/fconv.py b/fairseq/fairseq/models/fconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..c99a2151014d816ec9aff6f4b27d71224dd7b4cf
--- /dev/null
+++ b/fairseq/fairseq/models/fconv.py
@@ -0,0 +1,756 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import (
+    AdaptiveSoftmax,
+    BeamableMM,
+    FairseqDropout,
+    GradMultiply,
+    LearnedPositionalEmbedding,
+    LinearizedConvolution,
+)
+
+
+@register_model("fconv")
+class FConvModel(FairseqEncoderDecoderModel):
+    """
+    A fully convolutional model, i.e. a convolutional encoder and a
+    convolutional decoder, as described in `"Convolutional Sequence to Sequence
+    Learning" (Gehring et al., 2017) <https://arxiv.org/abs/1705.03122>`_.
+
+    Args:
+        encoder (FConvEncoder): the encoder
+        decoder (FConvDecoder): the decoder
+
+    The Convolutional model provides the following named architectures and
+    command-line arguments:
+
+    .. argparse::
+        :ref: fairseq.models.fconv_parser
+        :prog:
+    """
+
+    @classmethod
+    def hub_models(cls):
+        def moses_subword(path):
+            return {
+                "path": path,
+                "tokenizer": "moses",
+                "bpe": "subword_nmt",
+            }
+
+        return {
+            "conv.wmt14.en-fr": moses_subword(
+                "https://dl.fbaipublicfiles.com/fairseq/models/wmt14.v2.en-fr.fconv-py.tar.bz2"
+            ),
+            "conv.wmt14.en-de": moses_subword(
+                "https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-de.fconv-py.tar.bz2"
+            ),
+            "conv.wmt17.en-de": moses_subword(
+                "https://dl.fbaipublicfiles.com/fairseq/models/wmt17.v2.en-de.fconv-py.tar.bz2"
+            ),
+        }
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+        self.encoder.num_attention_layers = sum(
+            layer is not None for layer in decoder.attention
+        )
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability')
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension')
+        parser.add_argument('--encoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained encoder embedding')
+        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
+                            help='encoder layers [(dim, kernel_size), ...]')
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension')
+        parser.add_argument('--decoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained decoder embedding')
+        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
+                            help='decoder layers [(dim, kernel_size), ...]')
+        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
+                            help='decoder output embedding dimension')
+        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
+                            help='decoder attention [True, ...]')
+        parser.add_argument('--share-input-output-embed', action='store_true',
+                            help='share input and output embeddings (requires'
+                                 ' --decoder-out-embed-dim and --decoder-embed-dim'
+                                 ' to be equal)')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure that all args are properly defaulted (in case there are any new ones)
+        base_architecture(args)
+
+        encoder_embed_dict = None
+        if args.encoder_embed_path:
+            encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
+            utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)
+
+        decoder_embed_dict = None
+        if args.decoder_embed_path:
+            decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
+            utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)
+
+        encoder = FConvEncoder(
+            dictionary=task.source_dictionary,
+            embed_dim=args.encoder_embed_dim,
+            embed_dict=encoder_embed_dict,
+            convolutions=eval(args.encoder_layers),
+            dropout=args.dropout,
+            max_positions=args.max_source_positions,
+        )
+        decoder = FConvDecoder(
+            dictionary=task.target_dictionary,
+            embed_dim=args.decoder_embed_dim,
+            embed_dict=decoder_embed_dict,
+            convolutions=eval(args.decoder_layers),
+            out_embed_dim=args.decoder_out_embed_dim,
+            attention=eval(args.decoder_attention),
+            dropout=args.dropout,
+            max_positions=args.max_target_positions,
+            share_embed=args.share_input_output_embed,
+        )
+        return FConvModel(encoder, decoder)
+
+
+class FConvEncoder(FairseqEncoder):
+    """
+    Convolutional encoder consisting of `len(convolutions)` layers.
+
+    Args:
+        dictionary (~fairseq.data.Dictionary): encoding dictionary
+        embed_dim (int, optional): embedding dimension
+        embed_dict (str, optional): filename from which to load pre-trained
+            embeddings
+        max_positions (int, optional): maximum supported input sequence length
+        convolutions (list, optional): the convolutional layer structure. Each
+            list item `i` corresponds to convolutional layer `i`. Layers are
+            given as ``(out_channels, kernel_width, [residual])``. Residual
+            connections are added between layers when ``residual=1`` (which is
+            the default behavior).
+        dropout (float, optional): dropout to be applied before each conv layer
+    """
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        embed_dict=None,
+        max_positions=1024,
+        convolutions=((512, 3),) * 20,
+        dropout=0.1,
+    ):
+        super().__init__(dictionary)
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.num_attention_layers = None
+
+        num_embeddings = len(dictionary)
+        self.padding_idx = dictionary.pad()
+        self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
+        if embed_dict:
+            self.embed_tokens = utils.load_embedding(
+                embed_dict, self.dictionary, self.embed_tokens
+            )
+
+        self.embed_positions = PositionalEmbedding(
+            max_positions,
+            embed_dim,
+            self.padding_idx,
+        )
+
+        convolutions = extend_conv_spec(convolutions)
+        in_channels = convolutions[0][0]
+        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
+        self.projections = nn.ModuleList()
+        self.convolutions = nn.ModuleList()
+        self.residuals = []
+
+        layer_in_channels = [in_channels]
+        for _, (out_channels, kernel_size, residual) in enumerate(convolutions):
+            if residual == 0:
+                residual_dim = out_channels
+            else:
+                residual_dim = layer_in_channels[-residual]
+            self.projections.append(
+                Linear(residual_dim, out_channels)
+                if residual_dim != out_channels
+                else None
+            )
+            if kernel_size % 2 == 1:
+                padding = kernel_size // 2
+            else:
+                padding = 0
+            self.convolutions.append(
+                ConvTBC(
+                    in_channels,
+                    out_channels * 2,
+                    kernel_size,
+                    dropout=dropout,
+                    padding=padding,
+                )
+            )
+            self.residuals.append(residual)
+            in_channels = out_channels
+            layer_in_channels.append(out_channels)
+        self.fc2 = Linear(in_channels, embed_dim)
+
+    def forward(self, src_tokens, src_lengths):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (LongTensor): lengths of each source sentence of shape
+                `(batch)`
+
+        Returns:
+            dict:
+                - **encoder_out** (tuple): a tuple with two elements, where the
+                  first element is the last encoder layer's output and the
+                  second element is the same quantity summed with the input
+                  embedding (used for attention). The shape of both tensors is
+                  `(batch, src_len, embed_dim)`.
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+        """
+        # embed tokens and positions
+        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
+        x = self.dropout_module(x)
+        input_embedding = x
+
+        # project to size of convolution
+        x = self.fc1(x)
+
+        # used to mask padding in input
+        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()  # -> T x B
+        if not encoder_padding_mask.any():
+            encoder_padding_mask = None
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        residuals = [x]
+        # temporal convolutions
+        for proj, conv, res_layer in zip(
+            self.projections, self.convolutions, self.residuals
+        ):
+            if res_layer > 0:
+                residual = residuals[-res_layer]
+                residual = residual if proj is None else proj(residual)
+            else:
+                residual = None
+
+            if encoder_padding_mask is not None:
+                x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0)
+
+            x = self.dropout_module(x)
+            if conv.kernel_size[0] % 2 == 1:
+                # padding is implicit in the conv
+                x = conv(x)
+            else:
+                padding_l = (conv.kernel_size[0] - 1) // 2
+                padding_r = conv.kernel_size[0] // 2
+                x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
+                x = conv(x)
+            x = F.glu(x, dim=2)
+
+            if residual is not None:
+                x = (x + residual) * math.sqrt(0.5)
+            residuals.append(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(1, 0)
+
+        # project back to size of embedding
+        x = self.fc2(x)
+
+        if encoder_padding_mask is not None:
+            encoder_padding_mask = encoder_padding_mask.t()  # -> B x T
+            x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0)
+
+        # scale gradients (this only affects backward, not forward)
+        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))
+
+        # add output to input embedding for attention
+        y = (x + input_embedding) * math.sqrt(0.5)
+
+        return {
+            "encoder_out": (x, y),
+            "encoder_padding_mask": encoder_padding_mask,  # B x T
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        if encoder_out["encoder_out"] is not None:
+            encoder_out["encoder_out"] = (
+                encoder_out["encoder_out"][0].index_select(0, new_order),
+                encoder_out["encoder_out"][1].index_select(0, new_order),
+            )
+        if encoder_out["encoder_padding_mask"] is not None:
+            encoder_out["encoder_padding_mask"] = encoder_out[
+                "encoder_padding_mask"
+            ].index_select(0, new_order)
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return self.embed_positions.max_positions
+
+
+class AttentionLayer(nn.Module):
+    def __init__(self, conv_channels, embed_dim, bmm=None):
+        super().__init__()
+        # projects from output of convolution to embedding dimension
+        self.in_projection = Linear(conv_channels, embed_dim)
+        # projects from embedding dimension to convolution size
+        self.out_projection = Linear(embed_dim, conv_channels)
+
+        self.bmm = bmm if bmm is not None else torch.bmm
+
+    def forward(self, x, target_embedding, encoder_out, encoder_padding_mask):
+        residual = x
+
+        # attention
+        x = (self.in_projection(x) + target_embedding) * math.sqrt(0.5)
+        x = self.bmm(x, encoder_out[0])
+
+        # don't attend over padding
+        if encoder_padding_mask is not None:
+            x = (
+                x.float()
+                .masked_fill(encoder_padding_mask.unsqueeze(1), float("-inf"))
+                .type_as(x)
+            )  # FP16 support: cast to float and back
+
+        # softmax over last dim
+        sz = x.size()
+        x = F.softmax(x.view(sz[0] * sz[1], sz[2]), dim=1)
+        x = x.view(sz)
+        attn_scores = x
+
+        x = self.bmm(x, encoder_out[1])
+
+        # scale attention output (respecting potentially different lengths)
+        s = encoder_out[1].size(1)
+        if encoder_padding_mask is None:
+            x = x * (s * math.sqrt(1.0 / s))
+        else:
+            s = s - encoder_padding_mask.type_as(x).sum(
+                dim=1, keepdim=True
+            )  # exclude padding
+            s = s.unsqueeze(-1)
+            x = x * (s * s.rsqrt())
+
+        # project back
+        x = (self.out_projection(x) + residual) * math.sqrt(0.5)
+        return x, attn_scores
+
+    def make_generation_fast_(self, beamable_mm_beam_size=None, **kwargs):
+        """Replace torch.bmm with BeamableMM."""
+        if beamable_mm_beam_size is not None:
+            del self.bmm
+            self.add_module("bmm", BeamableMM(beamable_mm_beam_size))
+
+
+class FConvDecoder(FairseqIncrementalDecoder):
+    """Convolutional decoder"""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        embed_dict=None,
+        out_embed_dim=256,
+        max_positions=1024,
+        convolutions=((512, 3),) * 20,
+        attention=True,
+        dropout=0.1,
+        share_embed=False,
+        positional_embeddings=True,
+        adaptive_softmax_cutoff=None,
+        adaptive_softmax_dropout=0.0,
+    ):
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([2]))
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.need_attn = True
+
+        convolutions = extend_conv_spec(convolutions)
+        in_channels = convolutions[0][0]
+        if isinstance(attention, bool):
+            # expand True into [True, True, ...] and do the same with False
+            attention = [attention] * len(convolutions)
+        if not isinstance(attention, list) or len(attention) != len(convolutions):
+            raise ValueError(
+                "Attention is expected to be a list of booleans of "
+                "length equal to the number of layers."
+            )
+
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+        if embed_dict:
+            self.embed_tokens = utils.load_embedding(
+                embed_dict, self.dictionary, self.embed_tokens
+            )
+
+        self.embed_positions = (
+            PositionalEmbedding(
+                max_positions,
+                embed_dim,
+                padding_idx,
+            )
+            if positional_embeddings
+            else None
+        )
+
+        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
+        self.projections = nn.ModuleList()
+        self.convolutions = nn.ModuleList()
+        self.attention = nn.ModuleList()
+        self.residuals = []
+
+        layer_in_channels = [in_channels]
+        for i, (out_channels, kernel_size, residual) in enumerate(convolutions):
+            if residual == 0:
+                residual_dim = out_channels
+            else:
+                residual_dim = layer_in_channels[-residual]
+            self.projections.append(
+                Linear(residual_dim, out_channels)
+                if residual_dim != out_channels
+                else None
+            )
+            self.convolutions.append(
+                LinearizedConv1d(
+                    in_channels,
+                    out_channels * 2,
+                    kernel_size,
+                    padding=(kernel_size - 1),
+                    dropout=dropout,
+                )
+            )
+            self.attention.append(
+                AttentionLayer(out_channels, embed_dim) if attention[i] else None
+            )
+            self.residuals.append(residual)
+            in_channels = out_channels
+            layer_in_channels.append(out_channels)
+
+        self.adaptive_softmax = None
+        self.fc2 = self.fc3 = None
+
+        if adaptive_softmax_cutoff is not None:
+            assert not share_embed
+            self.adaptive_softmax = AdaptiveSoftmax(
+                num_embeddings,
+                in_channels,
+                adaptive_softmax_cutoff,
+                dropout=adaptive_softmax_dropout,
+            )
+        else:
+            self.fc2 = Linear(in_channels, out_embed_dim)
+            if share_embed:
+                assert out_embed_dim == embed_dim, (
+                    "Shared embed weights implies same dimensions "
+                    " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim)
+                )
+                self.fc3 = nn.Linear(out_embed_dim, num_embeddings)
+                self.fc3.weight = self.embed_tokens.weight
+            else:
+                self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout)
+
+    def forward(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused
+    ):
+        if encoder_out is not None:
+            encoder_padding_mask = encoder_out["encoder_padding_mask"]
+            encoder_out = encoder_out["encoder_out"]
+
+            # split and transpose encoder outputs
+            encoder_a, encoder_b = self._split_encoder_out(
+                encoder_out, incremental_state
+            )
+
+        if self.embed_positions is not None:
+            pos_embed = self.embed_positions(prev_output_tokens, incremental_state)
+        else:
+            pos_embed = 0
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+        x = self._embed_tokens(prev_output_tokens, incremental_state)
+
+        # embed tokens and combine with positional embeddings
+        x += pos_embed
+        x = self.dropout_module(x)
+        target_embedding = x
+
+        # project to size of convolution
+        x = self.fc1(x)
+
+        # B x T x C -> T x B x C
+        x = self._transpose_if_training(x, incremental_state)
+
+        # temporal convolutions
+        avg_attn_scores = None
+        num_attn_layers = len(self.attention)
+        residuals = [x]
+        for proj, conv, attention, res_layer in zip(
+            self.projections, self.convolutions, self.attention, self.residuals
+        ):
+            if res_layer > 0:
+                residual = residuals[-res_layer]
+                residual = residual if proj is None else proj(residual)
+            else:
+                residual = None
+
+            x = self.dropout_module(x)
+            x = conv(x, incremental_state)
+            x = F.glu(x, dim=2)
+
+            # attention
+            if attention is not None:
+                x = self._transpose_if_training(x, incremental_state)
+
+                x, attn_scores = attention(
+                    x, target_embedding, (encoder_a, encoder_b), encoder_padding_mask
+                )
+
+                if not self.training and self.need_attn:
+                    attn_scores = attn_scores / num_attn_layers
+                    if avg_attn_scores is None:
+                        avg_attn_scores = attn_scores
+                    else:
+                        avg_attn_scores.add_(attn_scores)
+
+                x = self._transpose_if_training(x, incremental_state)
+
+            # residual
+            if residual is not None:
+                x = (x + residual) * math.sqrt(0.5)
+            residuals.append(x)
+
+        # T x B x C -> B x T x C
+        x = self._transpose_if_training(x, incremental_state)
+
+        # project back to size of vocabulary if not using adaptive softmax
+        if self.fc2 is not None and self.fc3 is not None:
+            x = self.fc2(x)
+            x = self.dropout_module(x)
+            x = self.fc3(x)
+
+        return x, avg_attn_scores
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        super().reorder_incremental_state(incremental_state, new_order)
+        encoder_out = utils.get_incremental_state(
+            self, incremental_state, "encoder_out"
+        )
+        if encoder_out is not None:
+            encoder_out = tuple(eo.index_select(0, new_order) for eo in encoder_out)
+            utils.set_incremental_state(
+                self, incremental_state, "encoder_out", encoder_out
+            )
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        return (
+            self.embed_positions.max_positions
+            if self.embed_positions is not None
+            else float("inf")
+        )
+
+    def upgrade_state_dict(self, state_dict):
+        if utils.item(state_dict.get("decoder.version", torch.Tensor([1]))[0]) < 2:
+            # old models use incorrect weight norm dimension
+            for i, conv in enumerate(self.convolutions):
+                # reconfigure weight norm
+                nn.utils.remove_weight_norm(conv)
+                self.convolutions[i] = nn.utils.weight_norm(conv, dim=0)
+            state_dict["decoder.version"] = torch.Tensor([1])
+        return state_dict
+
+    def make_generation_fast_(self, need_attn=False, **kwargs):
+        self.need_attn = need_attn
+
+    def _embed_tokens(self, tokens, incremental_state):
+        if incremental_state is not None:
+            # keep only the last token for incremental forward pass
+            tokens = tokens[:, -1:]
+        return self.embed_tokens(tokens)
+
+    def _split_encoder_out(self, encoder_out, incremental_state):
+        """Split and transpose encoder outputs.
+
+        This is cached when doing incremental inference.
+        """
+        cached_result = utils.get_incremental_state(
+            self, incremental_state, "encoder_out"
+        )
+        if cached_result is not None:
+            return cached_result
+
+        # transpose only once to speed up attention layers
+        encoder_a, encoder_b = encoder_out
+        encoder_a = encoder_a.transpose(1, 2).contiguous()
+        result = (encoder_a, encoder_b)
+
+        if incremental_state is not None:
+            utils.set_incremental_state(self, incremental_state, "encoder_out", result)
+        return result
+
+    def _transpose_if_training(self, x, incremental_state):
+        if incremental_state is None:
+            x = x.transpose(0, 1)
+        return x
+
+
+def extend_conv_spec(convolutions):
+    """
+    Extends convolutional spec that is a list of tuples of 2 or 3 parameters
+    (kernel size, dim size and optionally how many layers behind to look for residual)
+    to default the residual propagation param if it is not specified
+    """
+    extended = []
+    for spec in convolutions:
+        if len(spec) == 3:
+            extended.append(spec)
+        elif len(spec) == 2:
+            extended.append(spec + (1,))
+        else:
+            raise Exception(
+                "invalid number of parameters in convolution spec "
+                + str(spec)
+                + ". expected 2 or 3"
+            )
+    return tuple(extended)
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, 0, 0.1)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx):
+    m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx)
+    nn.init.normal_(m.weight, 0, 0.1)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def Linear(in_features, out_features, dropout=0.0):
+    """Weight-normalized Linear layer (input: N x T x C)"""
+    m = nn.Linear(in_features, out_features)
+    nn.init.normal_(m.weight, mean=0, std=math.sqrt((1 - dropout) / in_features))
+    nn.init.constant_(m.bias, 0)
+    return nn.utils.weight_norm(m)
+
+
+def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs):
+    """Weight-normalized Conv1d layer optimized for decoding"""
+    m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs)
+    std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
+    nn.init.normal_(m.weight, mean=0, std=std)
+    nn.init.constant_(m.bias, 0)
+    return nn.utils.weight_norm(m, dim=2)
+
+
+def ConvTBC(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs):
+    """Weight-normalized Conv1d layer"""
+    from fairseq.modules import ConvTBC
+
+    m = ConvTBC(in_channels, out_channels, kernel_size, **kwargs)
+    std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
+    nn.init.normal_(m.weight, mean=0, std=std)
+    nn.init.constant_(m.bias, 0)
+    return nn.utils.weight_norm(m, dim=2)
+
+
+@register_model_architecture("fconv", "fconv")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_layers = getattr(args, "encoder_layers", "[(512, 3)] * 20")
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_layers = getattr(args, "decoder_layers", "[(512, 3)] * 20")
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256)
+    args.decoder_attention = getattr(args, "decoder_attention", "True")
+    args.share_input_output_embed = getattr(args, "share_input_output_embed", False)
+
+
+@register_model_architecture("fconv", "fconv_iwslt_de_en")
+def fconv_iwslt_de_en(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_layers = getattr(args, "encoder_layers", "[(256, 3)] * 4")
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256)
+    args.decoder_layers = getattr(args, "decoder_layers", "[(256, 3)] * 3")
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256)
+    base_architecture(args)
+
+
+@register_model_architecture("fconv", "fconv_wmt_en_ro")
+def fconv_wmt_en_ro(args):
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512)
+    base_architecture(args)
+
+
+@register_model_architecture("fconv", "fconv_wmt_en_de")
+def fconv_wmt_en_de(args):
+    convs = "[(512, 3)] * 9"  # first 9 layers have 512 units
+    convs += " + [(1024, 3)] * 4"  # next 4 layers have 1024 units
+    convs += " + [(2048, 1)] * 2"  # final 2 layers use 1x1 convolutions
+
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.encoder_layers = getattr(args, "encoder_layers", convs)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 768)
+    args.decoder_layers = getattr(args, "decoder_layers", convs)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512)
+    base_architecture(args)
+
+
+@register_model_architecture("fconv", "fconv_wmt_en_fr")
+def fconv_wmt_en_fr(args):
+    convs = "[(512, 3)] * 6"  # first 6 layers have 512 units
+    convs += " + [(768, 3)] * 4"  # next 4 layers have 768 units
+    convs += " + [(1024, 3)] * 3"  # next 3 layers have 1024 units
+    convs += " + [(2048, 1)] * 1"  # next 1 layer uses 1x1 convolutions
+    convs += " + [(4096, 1)] * 1"  # final 1 layer uses 1x1 convolutions
+
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.encoder_layers = getattr(args, "encoder_layers", convs)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 768)
+    args.decoder_layers = getattr(args, "decoder_layers", convs)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512)
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/fconv_lm.py b/fairseq/fairseq/models/fconv_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b243d6669cb57880353b45a01843ec22010fb5f
--- /dev/null
+++ b/fairseq/fairseq/models/fconv_lm.py
@@ -0,0 +1,136 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq import utils
+from fairseq.models import (
+    FairseqLanguageModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.fconv import FConvDecoder
+from fairseq.utils import safe_hasattr
+
+
+@register_model("fconv_lm")
+class FConvLanguageModel(FairseqLanguageModel):
+    def __init__(self, decoder):
+        super().__init__(decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-layers",
+            type=str,
+            metavar="EXPR",
+            help="decoder layers [(dim, kernel_size), ...]",
+        )
+        parser.add_argument(
+            "--decoder-out-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder output embedding dimension",
+        )
+        parser.add_argument(
+            "--adaptive-softmax-cutoff",
+            metavar="EXPR",
+            help="comma separated list of adaptive softmax cutoff points. "
+            "Must be used with adaptive_loss criterion",
+        )
+        parser.add_argument(
+            "--adaptive-softmax-dropout",
+            type=float,
+            metavar="D",
+            help="sets adaptive softmax dropout for the tail projections",
+        )
+        parser.add_argument(
+            "--decoder-attention",
+            type=str,
+            metavar="EXPR",
+            help="decoder attention [True, ...]",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure all arguments are present in older models
+        base_lm_architecture(args)
+
+        if safe_hasattr(args, "max_target_positions") and not safe_hasattr(
+            args, "tokens_per_sample"
+        ):
+            args.tokens_per_sample = args.max_target_positions
+
+        decoder = FConvDecoder(
+            dictionary=task.target_dictionary,
+            embed_dim=args.decoder_embed_dim,
+            convolutions=eval(args.decoder_layers),
+            out_embed_dim=args.decoder_embed_dim,
+            attention=eval(args.decoder_attention),
+            dropout=args.dropout,
+            max_positions=args.tokens_per_sample,
+            share_embed=False,
+            positional_embeddings=False,
+            adaptive_softmax_cutoff=(
+                utils.eval_str_list(args.adaptive_softmax_cutoff, type=int)
+                if args.criterion == "adaptive_loss"
+                else None
+            ),
+            adaptive_softmax_dropout=args.adaptive_softmax_dropout,
+        )
+        return FConvLanguageModel(decoder)
+
+
+@register_model_architecture("fconv_lm", "fconv_lm")
+def base_lm_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128)
+    args.decoder_layers = getattr(args, "decoder_layers", "[(1268, 4)] * 13")
+    args.decoder_attention = getattr(args, "decoder_attention", "False")
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+
+
+@register_model_architecture("fconv_lm", "fconv_lm_dauphin_wikitext103")
+def fconv_lm_dauphin_wikitext103(args):
+    layers = "[(850, 6)] * 3"
+    layers += " + [(850, 1)] * 1"
+    layers += " + [(850, 5)] * 4"
+    layers += " + [(850, 1)] * 1"
+    layers += " + [(850, 4)] * 3"
+    layers += " + [(1024, 4)] * 1"
+    layers += " + [(2048, 4)] * 1"
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 280)
+    args.decoder_layers = getattr(args, "decoder_layers", layers)
+    args.decoder_attention = getattr(args, "decoder_attention", "False")
+    args.adaptive_softmax_cutoff = getattr(
+        args, "adaptive_softmax_cutoff", "10000,20000,200000"
+    )
+    base_lm_architecture(args)
+
+
+@register_model_architecture("fconv_lm", "fconv_lm_dauphin_gbw")
+def fconv_lm_dauphin_gbw(args):
+    layers = "[(512, 5)]"
+    layers += " + [(128, 1, 0), (128, 5, 0), (512, 1, 3)] * 3"
+    layers += " + [(512, 1, 0), (512, 5, 0), (1024, 1, 3)] * 3"
+    layers += " + [(1024, 1, 0), (1024, 5, 0), (2048, 1, 3)] * 6"
+    layers += " + [(1024, 1, 0), (1024, 5, 0), (4096, 1, 3)]"
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128)
+    args.decoder_layers = getattr(args, "decoder_layers", layers)
+    args.decoder_attention = getattr(args, "decoder_attention", "False")
+    args.adaptive_softmax_cutoff = getattr(
+        args, "adaptive_softmax_cutoff", "10000,50000,200000"
+    )
+    base_lm_architecture(args)
diff --git a/fairseq/fairseq/models/fconv_self_att.py b/fairseq/fairseq/models/fconv_self_att.py
new file mode 100644
index 0000000000000000000000000000000000000000..8357ef7847ed25a62345e219c41906156828c233
--- /dev/null
+++ b/fairseq/fairseq/models/fconv_self_att.py
@@ -0,0 +1,674 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import math
+import os
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import checkpoint_utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.models import (
+    CompositeEncoder,
+    FairseqDecoder,
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import (
+    DownsampledMultiHeadAttention,
+    FairseqDropout,
+    GradMultiply,
+    LayerNorm,
+    LearnedPositionalEmbedding,
+    LinearizedConvolution,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("fconv_self_att")
+class FConvModelSelfAtt(FairseqEncoderDecoderModel):
+    @classmethod
+    def hub_models(cls):
+        return {
+            "conv.stories.pretrained": {
+                "path": "https://dl.fbaipublicfiles.com/fairseq/models/stories_checkpoint.tar.gz",
+                "checkpoint_file": "pretrained_checkpoint.pt",
+                "tokenizer": "nltk",
+            },
+            "conv.stories": {
+                "path": "https://dl.fbaipublicfiles.com/fairseq/models/stories_checkpoint.tar.gz",
+                "checkpoint_file": "fusion_checkpoint.pt",
+                "tokenizer": "nltk",
+                "pretrained": "True",
+                "pretrained_checkpoint": "./pretrained_checkpoint.pt",
+            },
+            # Test set containing dictionaries
+            "data.stories": "https://dl.fbaipublicfiles.com/fairseq/data/stories_test.tar.bz2",
+        }
+
+    def __init__(self, encoder, decoder, pretrained_encoder=None):
+        super().__init__(encoder, decoder)
+        self.encoder.num_attention_layers = sum(
+            layer is not None for layer in decoder.attention
+        )
+        self.pretrained_encoder = pretrained_encoder
+        if self.pretrained_encoder is None:
+            encoders = {"encoder": encoder}
+        else:
+            encoders = {"encoder": encoder, "pretrained": self.pretrained_encoder}
+        # for fusion model, CompositeEncoder contains both pretrained and training encoders
+        # these are forwarded and then combined in the decoder
+        self.encoder = CompositeEncoder(encoders)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability')
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension')
+        parser.add_argument('--encoder-layers', type=str, metavar='EXPR',
+                            help='encoder layers [(dim, kernel_size), ...]')
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension')
+        parser.add_argument('--decoder-layers', type=str, metavar='EXPR',
+                            help='decoder layers [(dim, kernel_size), ...]')
+        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
+                            help='decoder output embedding dimension')
+        parser.add_argument('--decoder-attention', type=str, metavar='EXPR',
+                            help='decoder attention [True, ...]')
+        parser.add_argument('--self-attention', type=str, metavar='EXPR',
+                            help='decoder self-attention layers, ex: [True] + [False]*5')
+        parser.add_argument('--multihead-attention-nheads', type=int,
+                            help='Number of heads to use in attention')
+        parser.add_argument('--multihead-self-attention-nheads', type=int,
+                            help='Number of heads to use in self-attention')
+        parser.add_argument('--encoder-attention', type=str, metavar='EXPR',
+                            help='encoder attention [True, ...]')
+        parser.add_argument('--encoder-attention-nheads', type=int,
+                            help='Number of heads to use in encoder attention')
+        parser.add_argument('--project-input', type=str, metavar='EXPR',
+                            help='Use projections in self-attention [True, ...]')
+        parser.add_argument('--gated-attention', type=str, metavar='EXPR',
+                            help='Use GLU layers in self-attention projections [True, ...]')
+        parser.add_argument('--downsample', type=str, metavar='EXPR',
+                            help='Use downsampling in self-attention [True, ...]')
+        parser.add_argument('--pretrained-checkpoint', metavar='DIR',
+                            help='path to load checkpoint from pretrained model')
+        parser.add_argument('--pretrained', type=str, metavar='EXPR',
+                            help='use pretrained model when training [True, ...]')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        trained_encoder, trained_decoder = None, None
+        pretrained = eval(args.pretrained)
+        if pretrained:
+            logger.info("loading pretrained model")
+            if not os.path.exists(args.pretrained_checkpoint):
+                new_pretrained_checkpoint = os.path.join(
+                    args.data, args.pretrained_checkpoint
+                )
+                if os.path.exists(new_pretrained_checkpoint):
+                    args.pretrained_checkpoint = new_pretrained_checkpoint
+            trained_model = checkpoint_utils.load_model_ensemble(
+                filenames=[args.pretrained_checkpoint],
+                task=task,
+            )[0][0]
+            trained_decoder = list(trained_model.children())[1]
+            trained_encoder = list(trained_model.children())[0]
+
+            # freeze pretrained model
+            for param in trained_decoder.parameters():
+                param.requires_grad = False
+            for param in trained_encoder.parameters():
+                param.requires_grad = False
+
+        encoder = FConvEncoder(
+            task.source_dictionary,
+            embed_dim=args.encoder_embed_dim,
+            convolutions=eval(args.encoder_layers),
+            dropout=args.dropout,
+            max_positions=args.max_source_positions,
+            attention=eval(args.encoder_attention),
+            attention_nheads=args.encoder_attention_nheads,
+        )
+
+        decoder = FConvDecoder(
+            task.target_dictionary,
+            embed_dim=args.decoder_embed_dim,
+            convolutions=eval(args.decoder_layers),
+            out_embed_dim=args.decoder_out_embed_dim,
+            attention=eval(args.decoder_attention),
+            dropout=args.dropout,
+            max_positions=args.max_target_positions,
+            selfattention=eval(args.self_attention),
+            attention_nheads=args.multihead_attention_nheads,
+            selfattention_nheads=args.multihead_self_attention_nheads,
+            project_input=eval(args.project_input),
+            gated_attention=eval(args.gated_attention),
+            downsample=eval(args.downsample),
+            pretrained=pretrained,
+            trained_decoder=trained_decoder,
+        )
+        model = FConvModelSelfAtt(encoder, decoder, trained_encoder)
+
+        return model
+
+    @property
+    def pretrained(self):
+        return self.pretrained_encoder is not None
+
+
+class FConvEncoder(FairseqEncoder):
+    """Convolutional encoder"""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        max_positions=1024,
+        convolutions=((512, 3),) * 20,
+        dropout=0.1,
+        attention=False,
+        attention_nheads=1,
+    ):
+        super().__init__(dictionary)
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.num_attention_layers = None
+
+        num_embeddings = len(dictionary)
+        self.padding_idx = dictionary.pad()
+        self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
+        self.embed_positions = PositionalEmbedding(
+            max_positions,
+            embed_dim,
+            self.padding_idx,
+        )
+
+        def expand_bool_array(val):
+            if isinstance(val, bool):
+                # expand True into [True, True, ...] and do the same with False
+                return [val] * len(convolutions)
+            return val
+
+        attention = expand_bool_array(attention)
+
+        in_channels = convolutions[0][0]
+        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
+        self.projections = nn.ModuleList()
+        self.convolutions = nn.ModuleList()
+        self.attention = nn.ModuleList()
+        self.attproj = nn.ModuleList()
+        for i, (out_channels, kernel_size) in enumerate(convolutions):
+            self.projections.append(
+                Linear(in_channels, out_channels)
+                if in_channels != out_channels
+                else None
+            )
+            self.convolutions.append(
+                ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout)
+            )
+
+            self.attention.append(
+                SelfAttention(out_channels, embed_dim, attention_nheads)
+                if attention[i]
+                else None
+            )
+            in_channels = out_channels
+
+        self.fc2 = Linear(in_channels, embed_dim)
+
+    def forward(self, src_tokens, src_lengths):
+        # embed tokens and positions
+        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
+        x = self.dropout_module(x)
+        input_embedding = x.transpose(0, 1)
+
+        # project to size of convolution
+        x = self.fc1(x)
+
+        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()  # -> T x B
+        if not encoder_padding_mask.any():
+            encoder_padding_mask = None
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # temporal convolutions
+        for proj, conv, attention in zip(
+            self.projections, self.convolutions, self.attention
+        ):
+            residual = x if proj is None else proj(x)
+
+            if encoder_padding_mask is not None:
+                x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0)
+
+            x = self.dropout_module(x)
+            padding_l = (conv.kernel_size[0] - 1) // 2
+            padding_r = conv.kernel_size[0] // 2
+            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
+            x = conv(x)
+            x = F.glu(x, dim=2)
+            if attention is not None:
+                x = attention(x)
+            x = (x + residual) * math.sqrt(0.5)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(1, 0)
+
+        # project back to size of embedding
+        x = self.fc2(x)
+
+        if encoder_padding_mask is not None:
+            encoder_padding_mask = encoder_padding_mask.t()  # -> B x T
+            x = x.masked_fill(encoder_padding_mask.unsqueeze(-1), 0)
+
+        # scale gradients (this only affects backward, not forward)
+        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))
+
+        # add output to input embedding for attention
+        y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5)
+
+        return {
+            "encoder_out": (x, y),
+            "encoder_padding_mask": encoder_padding_mask,  # B x T
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        encoder_out["encoder_out"] = tuple(
+            eo.index_select(0, new_order) for eo in encoder_out["encoder_out"]
+        )
+
+        if encoder_out["encoder_padding_mask"] is not None:
+            encoder_out["encoder_padding_mask"] = encoder_out[
+                "encoder_padding_mask"
+            ].index_select(0, new_order)
+
+        if "pretrained" in encoder_out:
+            encoder_out["pretrained"]["encoder_out"] = tuple(
+                eo.index_select(0, new_order)
+                for eo in encoder_out["pretrained"]["encoder_out"]
+            )
+
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return self.embed_positions.max_positions
+
+
+@with_incremental_state
+class FConvDecoder(FairseqDecoder):
+    """Convolutional decoder"""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        out_embed_dim=256,
+        max_positions=1024,
+        convolutions=((512, 3),) * 8,
+        attention=True,
+        dropout=0.1,
+        selfattention=False,
+        attention_nheads=1,
+        selfattention_nheads=1,
+        project_input=False,
+        gated_attention=False,
+        downsample=False,
+        pretrained=False,
+        trained_decoder=None,
+    ):
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([2]))
+        self.pretrained = pretrained
+        self.pretrained_decoder = trained_decoder
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.need_attn = True
+        in_channels = convolutions[0][0]
+
+        def expand_bool_array(val):
+            if isinstance(val, bool):
+                # expand True into [True, True, ...] and do the same with False
+                return [val] * len(convolutions)
+            return val
+
+        attention = expand_bool_array(attention)
+        selfattention = expand_bool_array(selfattention)
+
+        if not isinstance(attention, list) or len(attention) != len(convolutions):
+            raise ValueError(
+                "Attention is expected to be a list of booleans of "
+                "length equal to the number of layers."
+            )
+
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+        self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+
+        self.embed_positions = PositionalEmbedding(
+            max_positions,
+            embed_dim,
+            padding_idx,
+        )
+
+        self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
+        self.projections = nn.ModuleList()
+        self.convolutions = nn.ModuleList()
+        self.attention = nn.ModuleList()
+        self.selfattention = nn.ModuleList()
+        self.attproj = nn.ModuleList()
+        for i, (out_channels, kernel_size) in enumerate(convolutions):
+            self.projections.append(
+                Linear(in_channels, out_channels)
+                if in_channels != out_channels
+                else None
+            )
+            self.convolutions.append(
+                LinearizedConv1d(
+                    in_channels,
+                    out_channels * 2,
+                    kernel_size,
+                    padding=(kernel_size - 1),
+                    dropout=dropout,
+                )
+            )
+
+            self.attention.append(
+                DownsampledMultiHeadAttention(
+                    out_channels,
+                    embed_dim,
+                    attention_nheads,
+                    project_input=project_input,
+                    gated=False,
+                    downsample=False,
+                )
+                if attention[i]
+                else None
+            )
+
+            self.attproj.append(
+                Linear(out_channels, embed_dim, dropout=dropout)
+                if attention[i]
+                else None
+            )
+            self.selfattention.append(
+                SelfAttention(
+                    out_channels,
+                    embed_dim,
+                    selfattention_nheads,
+                    project_input=project_input,
+                    gated=gated_attention,
+                    downsample=downsample,
+                )
+                if selfattention[i]
+                else None
+            )
+            in_channels = out_channels
+
+        self.fc2 = Linear(in_channels, out_embed_dim)
+        self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout)
+
+        # model fusion
+        if self.pretrained:
+            # independent gates are learned from the concatenated input
+            self.gate1 = nn.Sequential(
+                Linear(out_embed_dim * 2, out_embed_dim), nn.Sigmoid()
+            )
+            self.gate2 = nn.Sequential(
+                Linear(out_embed_dim * 2, out_embed_dim), nn.Sigmoid()
+            )
+            # pretrained and trained models are joined
+            self.joining = nn.Sequential(
+                Linear(out_embed_dim * 2, out_embed_dim * 2),
+                LayerNorm(out_embed_dim * 2),
+                nn.GLU(),
+                Linear(out_embed_dim, out_embed_dim * 2),
+                LayerNorm(out_embed_dim * 2),
+                nn.GLU(),
+                Linear(out_embed_dim, out_embed_dim),
+                LayerNorm(out_embed_dim),
+            )
+            # pretrained model contains an output layer that is nhid -> vocab size
+            # but the models are combined in their hidden state
+            # the hook stores the output of the pretrained model forward
+            self.pretrained_outputs = {}
+
+            def save_output():
+                def hook(a, b, output):
+                    self.pretrained_outputs["out"] = output
+
+                return hook
+
+            self.pretrained_decoder.fc2.register_forward_hook(save_output())
+
+    def forward(self, prev_output_tokens, encoder_out):
+        trained_encoder_out = encoder_out["pretrained"] if self.pretrained else None
+        encoder_out = encoder_out["encoder"]["encoder_out"]
+
+        encoder_a, encoder_b = self._split_encoder_out(encoder_out)
+
+        # embed positions
+        positions = self.embed_positions(prev_output_tokens)
+
+        # embed tokens and positions
+        x = self.embed_tokens(prev_output_tokens) + positions
+        x = self.dropout_module(x)
+        target_embedding = x.transpose(0, 1)
+
+        # project to size of convolution
+        x = self.fc1(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # temporal convolutions
+        avg_attn_scores = None
+        for proj, conv, attention, selfattention, attproj in zip(
+            self.projections,
+            self.convolutions,
+            self.attention,
+            self.selfattention,
+            self.attproj,
+        ):
+            residual = x if proj is None else proj(x)
+
+            x = self.dropout_module(x)
+            x = conv(x)
+            x = F.glu(x, dim=2)
+
+            # attention
+            if attention is not None:
+                r = x
+                x, attn_scores = attention(
+                    attproj(x) + target_embedding, encoder_a, encoder_b
+                )
+                x = x + r
+                if not self.training and self.need_attn:
+                    if avg_attn_scores is None:
+                        avg_attn_scores = attn_scores
+                    else:
+                        avg_attn_scores.add_(attn_scores)
+
+            if selfattention is not None:
+                x = selfattention(x)
+
+            x = (x + residual) * math.sqrt(0.5)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        # project back to size of vocabulary
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        if not self.pretrained:
+            x = self.fc3(x)
+
+        # fusion gating
+        if self.pretrained:
+            trained_x, _ = self.pretrained_decoder.forward(
+                prev_output_tokens, trained_encoder_out
+            )
+            y = torch.cat([x, self.pretrained_outputs["out"]], dim=-1)
+            gate1 = self.gate1(y)
+            gate2 = self.gate2(y)
+            gated_x1 = gate1 * x
+            gated_x2 = gate2 * self.pretrained_outputs["out"]
+            fusion = torch.cat([gated_x1, gated_x2], dim=-1)
+            fusion = self.joining(fusion)
+            fusion_output = self.fc3(fusion)
+            return fusion_output, avg_attn_scores
+        else:
+            return x, avg_attn_scores
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        return self.embed_positions.max_positions
+
+    def make_generation_fast_(self, need_attn=False, **kwargs):
+        self.need_attn = need_attn
+
+    def _split_encoder_out(self, encoder_out):
+        """Split and transpose encoder outputs."""
+        # transpose only once to speed up attention layers
+        encoder_a, encoder_b = encoder_out
+        encoder_a = encoder_a.transpose(0, 1).contiguous()
+        encoder_b = encoder_b.transpose(0, 1).contiguous()
+        result = (encoder_a, encoder_b)
+        return result
+
+
+class SelfAttention(nn.Module):
+    def __init__(
+        self,
+        out_channels,
+        embed_dim,
+        num_heads,
+        project_input=False,
+        gated=False,
+        downsample=False,
+    ):
+        super().__init__()
+        self.attention = DownsampledMultiHeadAttention(
+            out_channels,
+            embed_dim,
+            num_heads,
+            dropout=0,
+            bias=True,
+            project_input=project_input,
+            gated=gated,
+            downsample=downsample,
+        )
+        self.in_proj_q = Linear(out_channels, embed_dim)
+        self.in_proj_k = Linear(out_channels, embed_dim)
+        self.in_proj_v = Linear(out_channels, embed_dim)
+        self.ln = LayerNorm(out_channels)
+
+    def forward(self, x):
+        residual = x
+        query = self.in_proj_q(x)
+        key = self.in_proj_k(x)
+        value = self.in_proj_v(x)
+        x, _ = self.attention(
+            query, key, value, mask_future_timesteps=True, use_scalar_bias=True
+        )
+        return self.ln(x + residual)
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    m.weight.data.normal_(0, 0.1)
+    return m
+
+
+def PositionalEmbedding(num_embeddings, embedding_dim, padding_idx):
+    m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx)
+    m.weight.data.normal_(0, 0.1)
+    return m
+
+
+def Linear(in_features, out_features, dropout=0.0):
+    """Weight-normalized Linear layer (input: N x T x C)"""
+    m = nn.Linear(in_features, out_features)
+    m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))
+    m.bias.data.zero_()
+    return m
+
+
+def LinearizedConv1d(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs):
+    """Weight-normalized Conv1d layer optimized for decoding"""
+    m = LinearizedConvolution(in_channels, out_channels, kernel_size, **kwargs)
+    std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
+    m.weight.data.normal_(mean=0, std=std)
+    m.bias.data.zero_()
+    return m
+
+
+def ConvTBC(in_channels, out_channels, kernel_size, dropout=0.0, **kwargs):
+    """Weight-normalized Conv1d layer"""
+    from fairseq.modules import ConvTBC
+
+    m = ConvTBC(in_channels, out_channels, kernel_size, **kwargs)
+    std = math.sqrt((4 * (1.0 - dropout)) / (m.kernel_size[0] * in_channels))
+    m.weight.data.normal_(mean=0, std=std)
+    m.bias.data.zero_()
+    return m
+
+
+@register_model_architecture("fconv_self_att", "fconv_self_att")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_layers = getattr(args, "encoder_layers", "[(512, 3)] * 3")
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_layers = getattr(args, "decoder_layers", "[(512, 3)] * 8")
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256)
+    args.decoder_attention = getattr(args, "decoder_attention", "True")
+    args.self_attention = getattr(args, "self_attention", "False")
+    args.encoder_attention = getattr(args, "encoder_attention", "False")
+    args.multihead_attention_nheads = getattr(args, "multihead_attention_nheads", 1)
+    args.multihead_self_attention_nheads = getattr(
+        args, "multihead_self_attention_nheads", 1
+    )
+    args.encoder_attention_nheads = getattr(args, "encoder_attention_nheads", 1)
+    args.project_input = getattr(args, "project_input", "False")
+    args.gated_attention = getattr(args, "gated_attention", "False")
+    args.downsample = getattr(args, "downsample", "False")
+    args.pretrained_checkpoint = getattr(args, "pretrained_checkpoint", "")
+    args.pretrained = getattr(args, "pretrained", "False")
+
+
+@register_model_architecture("fconv_self_att", "fconv_self_att_wp")
+def fconv_self_att_wp(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_layers = getattr(
+        args, "encoder_layers", "[(128, 3)] * 2 + [(512,3)] * 1"
+    )
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256)
+    args.decoder_layers = getattr(
+        args, "decoder_layers", "[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1"
+    )
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256)
+    args.self_attention = getattr(args, "self_attention", "True")
+    args.multihead_self_attention_nheads = getattr(
+        args, "multihead_self_attention_nheads", 4
+    )
+    args.project_input = getattr(args, "project_input", "True")
+    args.gated_attention = getattr(args, "gated_attention", "True")
+    args.downsample = getattr(args, "downsample", "True")
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/hubert/__init__.py b/fairseq/fairseq/models/hubert/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1b0eabbdbcaf12b15bb96b329ab1e276256f79a
--- /dev/null
+++ b/fairseq/fairseq/models/hubert/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .hubert import *  # noqa
+from .hubert_asr import *  # noqa
diff --git a/fairseq/fairseq/models/hubert/hubert.py b/fairseq/fairseq/models/hubert/hubert.py
new file mode 100644
index 0000000000000000000000000000000000000000..232a5e402a146023e5c93f3c2574ecec98faf9d5
--- /dev/null
+++ b/fairseq/fairseq/models/hubert/hubert.py
@@ -0,0 +1,563 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+from dataclasses import dataclass, field
+from fairseq import utils
+from fairseq.data.data_utils import compute_mask_indices
+from fairseq.data.dictionary import Dictionary
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.models import BaseFairseqModel, register_model
+from fairseq.models.wav2vec.wav2vec2 import (
+    ConvFeatureExtractionModel,
+    TransformerEncoder,
+)
+from fairseq.modules import GradMultiply, LayerNorm
+from fairseq.tasks.hubert_pretraining import (
+    HubertPretrainingConfig,
+    HubertPretrainingTask,
+)
+from omegaconf import II
+
+logger = logging.getLogger(__name__)
+
+EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"])
+MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(
+    ["static", "uniform", "normal", "poisson"]
+)
+
+
+@dataclass
+class HubertConfig(FairseqDataclass):
+    label_rate: int = II("task.label_rate")
+
+    extractor_mode: EXTRACTOR_MODE_CHOICES = field(
+        default="default",
+        metadata={
+            "help": "mode for feature extractor. default has a single group "
+            "norm with d groups in the first conv block, whereas layer_norm "
+            "has layer norms in every block (meant to use with normalize=True)"
+        },
+    )
+    encoder_layers: int = field(
+        default=12, metadata={"help": "num encoder layers in the transformer"}
+    )
+    encoder_embed_dim: int = field(
+        default=768, metadata={"help": "encoder embedding dimension"}
+    )
+    encoder_ffn_embed_dim: int = field(
+        default=3072, metadata={"help": "encoder embedding dimension for FFN"}
+    )
+    encoder_attention_heads: int = field(
+        default=12, metadata={"help": "num encoder attention heads"}
+    )
+    activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field(
+        default="gelu", metadata={"help": "activation function to use"}
+    )
+
+    # dropouts
+    dropout: float = field(
+        default=0.1,
+        metadata={"help": "dropout probability for the transformer"},
+    )
+    attention_dropout: float = field(
+        default=0.1,
+        metadata={"help": "dropout probability for attention weights"},
+    )
+    activation_dropout: float = field(
+        default=0.0,
+        metadata={"help": "dropout probability after activation in FFN"},
+    )
+    encoder_layerdrop: float = field(
+        default=0.0,
+        metadata={"help": "probability of dropping a tarnsformer layer"},
+    )
+    dropout_input: float = field(
+        default=0.0,
+        metadata={"help": "dropout to apply to the input (after feat extr)"},
+    )
+    dropout_features: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout to apply to the features (after feat extr)"
+        },
+    )
+
+    final_dim: int = field(
+        default=0,
+        metadata={
+            "help": "project final representations and targets to this many "
+            "dimensions. set to encoder_embed_dim is <= 0"
+        },
+    )
+    untie_final_proj: bool = field(
+        default=False,
+        metadata={"help": "use separate projection for each target"},
+    )
+    layer_norm_first: bool = field(
+        default=False,
+        metadata={"help": "apply layernorm first in the transformer"},
+    )
+    conv_feature_layers: str = field(
+        default="[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2",
+        metadata={
+            "help": "string describing convolutional feature extraction "
+            "layers in form of a python list that contains "
+            "[(dim, kernel_size, stride), ...]"
+        },
+    )
+    conv_bias: bool = field(
+        default=False, metadata={"help": "include bias in conv encoder"}
+    )
+    logit_temp: float = field(
+        default=0.1, metadata={"help": "temperature to divide logits by"}
+    )
+    target_glu: bool = field(
+        default=False, metadata={"help": "adds projection + glu to targets"}
+    )
+    feature_grad_mult: float = field(
+        default=1.0,
+        metadata={"help": "multiply feature extractor var grads by this"},
+    )
+
+    # masking
+    mask_length: int = field(default=10, metadata={"help": "mask length"})
+    mask_prob: float = field(
+        default=0.65,
+        metadata={"help": "probability of replacing a token with mask"},
+    )
+    mask_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static", metadata={"help": "how to choose mask length"}
+    )
+    mask_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument "
+            "(used for more complex distributions), "
+            "see help in compute_mask_indicesh"
+        },
+    )
+    no_mask_overlap: bool = field(
+        default=False, metadata={"help": "whether to allow masks to overlap"}
+    )
+    mask_min_space: int = field(
+        default=1,
+        metadata={
+            "help": "min space between spans (if no overlap is enabled)"
+        },
+    )
+
+    # channel masking
+    mask_channel_length: int = field(
+        default=10,
+        metadata={"help": "length of the mask for features (channels)"},
+    )
+    mask_channel_prob: float = field(
+        default=0.0,
+        metadata={"help": "probability of replacing a feature with 0"},
+    )
+    mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static",
+        metadata={"help": "how to choose mask length for channel masking"},
+    )
+    mask_channel_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument "
+            "(used for more complex distributions), "
+            "see help in compute_mask_indicesh"
+        },
+    )
+    no_mask_channel_overlap: bool = field(
+        default=False,
+        metadata={"help": "whether to allow channel masks to overlap"},
+    )
+    mask_channel_min_space: int = field(
+        default=1,
+        metadata={
+            "help": "min space between spans (if no overlap is enabled)"
+        },
+    )
+
+    # positional embeddings
+    conv_pos: int = field(
+        default=128,
+        metadata={
+            "help": "number of filters for convolutional positional embeddings"
+        },
+    )
+    conv_pos_groups: int = field(
+        default=16,
+        metadata={
+            "help": "number of groups for convolutional positional embedding"
+        },
+    )
+
+    latent_temp: Tuple[float, float, float] = field(
+        default=(2, 0.5, 0.999995),
+        metadata={"help": "legacy (to be removed)"},
+    )
+
+    # loss computation
+    skip_masked: bool = field(
+        default=False,
+        metadata={"help": "skip computing losses over masked frames"},
+    )
+    skip_nomask: bool = field(
+        default=False,
+        metadata={"help": "skip computing losses over unmasked frames"},
+    )
+
+
+@register_model("hubert", dataclass=HubertConfig)
+class HubertModel(BaseFairseqModel):
+    def __init__(
+        self,
+        cfg: HubertConfig,
+        task_cfg: HubertPretrainingConfig,
+        dictionaries: List[Dictionary],
+    ) -> None:
+        super().__init__()
+        logger.info(f"HubertModel Config: {cfg}")
+
+        feature_enc_layers = eval(cfg.conv_feature_layers)  # noqa
+        self.embed = feature_enc_layers[-1][0]
+
+        self.feature_extractor = ConvFeatureExtractionModel(
+            conv_layers=feature_enc_layers,
+            dropout=0.0,
+            mode=cfg.extractor_mode,
+            conv_bias=cfg.conv_bias,
+        )
+        feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers])
+        self.feat2tar_ratio = (
+            cfg.label_rate * feature_ds_rate / task_cfg.sample_rate
+        )
+
+        self.post_extract_proj = (
+            nn.Linear(self.embed, cfg.encoder_embed_dim)
+            if self.embed != cfg.encoder_embed_dim
+            else None
+        )
+
+        self.mask_prob = cfg.mask_prob
+        self.mask_selection = cfg.mask_selection
+        self.mask_other = cfg.mask_other
+        self.mask_length = cfg.mask_length
+        self.no_mask_overlap = cfg.no_mask_overlap
+        self.mask_min_space = cfg.mask_min_space
+
+        self.mask_channel_prob = cfg.mask_channel_prob
+        self.mask_channel_selection = cfg.mask_channel_selection
+        self.mask_channel_other = cfg.mask_channel_other
+        self.mask_channel_length = cfg.mask_channel_length
+        self.no_mask_channel_overlap = cfg.no_mask_channel_overlap
+        self.mask_channel_min_space = cfg.mask_channel_min_space
+
+        self.dropout_input = nn.Dropout(cfg.dropout_input)
+        self.dropout_features = nn.Dropout(cfg.dropout_features)
+
+        self.feature_grad_mult = cfg.feature_grad_mult
+        self.logit_temp = cfg.logit_temp
+        self.skip_masked = cfg.skip_masked
+        self.skip_nomask = cfg.skip_nomask
+
+        final_dim = (
+            cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim
+        )
+
+        self.mask_emb = nn.Parameter(
+            torch.FloatTensor(cfg.encoder_embed_dim).uniform_()
+        )
+
+        self.encoder = TransformerEncoder(cfg)
+        self.layer_norm = LayerNorm(self.embed)
+
+        self.target_glu = None
+        if cfg.target_glu:
+            self.target_glu = nn.Sequential(
+                nn.Linear(final_dim, final_dim * 2), nn.GLU()
+            )
+
+        self.untie_final_proj = cfg.untie_final_proj
+        if self.untie_final_proj:
+            self.final_proj = nn.Linear(
+                cfg.encoder_embed_dim, final_dim * len(dictionaries)
+            )
+        else:
+            self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim)
+
+        # modules below are not needed during fine-tuning
+        if any([d is None for d in dictionaries]):
+            logger.info(
+                "cannot find dictionary. assume will be used for fine-tuning"
+            )
+        else:
+            self.num_classes = [len(d) for d in dictionaries]
+            self.label_embs_concat = nn.Parameter(
+                torch.FloatTensor(sum(self.num_classes), final_dim)
+            )
+            nn.init.uniform_(self.label_embs_concat)
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+
+        super().upgrade_state_dict_named(state_dict, name)
+        return state_dict
+
+    @classmethod
+    def build_model(cls, cfg: HubertConfig, task: HubertPretrainingTask):
+        """Build a new model instance."""
+
+        model = HubertModel(cfg, task.cfg, task.dictionaries)
+        return model
+
+    def apply_mask(self, x, padding_mask, target_list):
+        B, T, C = x.shape
+        if self.mask_prob > 0:
+            mask_indices = compute_mask_indices(
+                (B, T),
+                padding_mask,
+                self.mask_prob,
+                self.mask_length,
+                self.mask_selection,
+                self.mask_other,
+                min_masks=2,
+                no_overlap=self.no_mask_overlap,
+                min_space=self.mask_min_space,
+            )
+            mask_indices = torch.from_numpy(mask_indices).to(x.device)
+            x[mask_indices] = self.mask_emb
+        else:
+            mask_indices = None
+
+        if self.mask_channel_prob > 0:
+            mask_channel_indices = compute_mask_indices(
+                (B, C),
+                None,
+                self.mask_channel_prob,
+                self.mask_channel_length,
+                self.mask_channel_selection,
+                self.mask_channel_other,
+                no_overlap=self.no_mask_channel_overlap,
+                min_space=self.mask_channel_min_space,
+            )
+            mask_channel_indices = (
+                torch.from_numpy(mask_channel_indices)
+                .to(x.device)
+                .unsqueeze(1)
+                .expand(-1, T, -1)
+            )
+            x[mask_channel_indices] = 0
+
+        return x, mask_indices
+
+    def compute_nce(self, x, pos, negs):
+        neg_is_pos = (pos == negs).all(-1)
+        pos = pos.unsqueeze(0)
+        targets = torch.cat([pos, negs], dim=0)
+
+        logits = torch.cosine_similarity(
+            x.float(), targets.float(), dim=-1
+        ).type_as(x)
+        logits /= self.logit_temp
+        if neg_is_pos.any():
+            logits[1:][neg_is_pos] = float("-inf")
+        logits = logits.transpose(0, 1)  # (num_x, num_cls+1)
+        return logits
+
+    def forward_features(self, source: torch.Tensor) -> torch.Tensor:
+        if self.feature_grad_mult > 0:
+            features = self.feature_extractor(source)
+            if self.feature_grad_mult != 1.0:
+                features = GradMultiply.apply(features, self.feature_grad_mult)
+        else:
+            with torch.no_grad():
+                features = self.feature_extractor(source)
+        return features
+
+    def forward_targets(
+        self, features: torch.Tensor, target_list: List[torch.Tensor],
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Trim features to ensure labels exist and then get aligned labels
+        feat_tsz = features.size(2)
+        targ_tsz = min([t.size(1) for t in target_list])
+        if self.feat2tar_ratio * feat_tsz > targ_tsz:
+            feat_tsz = int(targ_tsz / self.feat2tar_ratio)
+            features = features[..., :feat_tsz]
+        target_inds = torch.arange(feat_tsz).float() * self.feat2tar_ratio
+        target_list = [t[:, target_inds.long()] for t in target_list]
+        return features, target_list
+
+    def forward_padding_mask(
+        self, features: torch.Tensor, padding_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        extra = padding_mask.size(1) % features.size(1)
+        if extra > 0:
+            padding_mask = padding_mask[:, :-extra]
+        padding_mask = padding_mask.view(
+            padding_mask.size(0), features.size(1), -1
+        )
+        padding_mask = padding_mask.all(-1)
+        return padding_mask
+
+    def forward(
+        self,
+        source: torch.Tensor,
+        target_list: Optional[List[torch.Tensor]] = None,
+        padding_mask: Optional[torch.Tensor] = None,
+        mask: bool = True,
+        features_only: bool = False,
+        output_layer: Optional[int] = None,
+    ) -> Dict[str, torch.Tensor]:
+        """output layer is 1-based"""
+        features = self.forward_features(source)
+        if target_list is not None:
+            features, target_list = self.forward_targets(features, target_list)
+
+        features_pen = features.float().pow(2).mean()
+
+        features = features.transpose(1, 2)
+        features = self.layer_norm(features)
+        unmasked_features = features.clone()
+
+        if padding_mask is not None:
+            padding_mask = self.forward_padding_mask(features, padding_mask)
+
+        if self.post_extract_proj is not None:
+            features = self.post_extract_proj(features)
+
+        features = self.dropout_input(features)
+        unmasked_features = self.dropout_features(unmasked_features)
+
+        if mask:
+            x, mask_indices = self.apply_mask(
+                features, padding_mask, target_list
+            )
+        else:
+            x = features
+            mask_indices = None
+
+        # feature: (B, T, D), float
+        # target: (B, T), long
+        # x: (B, T, D), float
+        # padding_mask: (B, T), bool
+        # mask_indices: (B, T), bool
+        x, _ = self.encoder(
+            x,
+            padding_mask=padding_mask,
+            layer=None if output_layer is None else output_layer - 1
+        )
+
+        if features_only:
+            return {"x": x, "padding_mask": padding_mask, "features": features}
+
+        def compute_pred(proj_x, target, label_embs):
+            # compute logits for the i-th label set
+            y = torch.index_select(label_embs, 0, target.long())
+            negs = label_embs.unsqueeze(1).expand(-1, proj_x.size(0), -1)
+            if self.target_glu:
+                y = self.target_glu(y)
+                negs = self.target_glu(negs)
+            # proj_x: (S, D)
+            # y: (S, D)
+            # negs: (Neg, S, D)
+            return self.compute_nce(proj_x, y, negs)
+
+        label_embs_list = self.label_embs_concat.split(self.num_classes, 0)
+
+        if not self.skip_masked:
+            masked_indices = torch.logical_and(~padding_mask, mask_indices)
+            proj_x_m = self.final_proj(x[masked_indices])
+            if self.untie_final_proj:
+                proj_x_m_list = proj_x_m.chunk(len(target_list), dim=-1)
+            else:
+                proj_x_m_list = [proj_x_m for _ in range(len(target_list))]
+            logit_m_list = [
+                compute_pred(proj_x_m, t[masked_indices], label_embs_list[i])
+                for i, (proj_x_m, t) in enumerate(
+                    zip(proj_x_m_list, target_list)
+                )
+            ]
+        else:
+            logit_m_list = [None for _ in target_list]
+
+        if not self.skip_nomask:
+            nomask_indices = torch.logical_and(~padding_mask, ~mask_indices)
+            proj_x_u = self.final_proj(x[nomask_indices])
+            if self.untie_final_proj:
+                proj_x_u_list = proj_x_u.chunk(len(target_list), dim=-1)
+            else:
+                proj_x_u_list = [proj_x_u for _ in range(len(target_list))]
+
+            logit_u_list = [
+                compute_pred(proj_x_u, t[nomask_indices], label_embs_list[i])
+                for i, (proj_x_u, t) in enumerate(
+                    zip(proj_x_u_list, target_list)
+                )
+            ]
+        else:
+            logit_u_list = [None for _ in target_list]
+
+        result = {
+            "logit_m_list": logit_m_list,
+            "logit_u_list": logit_u_list,
+            "padding_mask": padding_mask,
+            "features_pen": features_pen,
+        }
+        return result
+
+    def extract_features(
+        self,
+        source: torch.Tensor,
+        padding_mask: Optional[torch.Tensor] = None,
+        mask: bool = False,
+        ret_conv: bool = False,
+        output_layer: Optional[int] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        res = self.forward(
+            source,
+            padding_mask=padding_mask,
+            mask=mask,
+            features_only=True,
+            output_layer=output_layer,
+        )
+        feature = res["features"] if ret_conv else res["x"]
+        return feature, res["padding_mask"]
+
+    def get_logits(self, net_output, is_masked=True):
+        if is_masked:
+            logits_list = net_output["logit_m_list"]
+        else:
+            logits_list = net_output["logit_u_list"]
+        logits_list = [x.float() for x in logits_list if x is not None]
+        return logits_list
+
+    def get_targets(self, net_output, is_masked=True):
+        logits_list = self.get_logits(net_output, is_masked)
+        targets_list = [
+            x.new_zeros(x.size(0), dtype=torch.long) for x in logits_list
+        ]
+        return targets_list
+
+    def get_extra_losses(self, net_output):
+        extra_losses = []
+        names = []
+
+        if "features_pen" in net_output:
+            extra_losses.append(net_output["features_pen"])
+            names.append("features_pen")
+
+        return extra_losses, names
+
+    def remove_pretraining_modules(self):
+        self.target_glu = None
+        self.final_proj = None
diff --git a/fairseq/fairseq/models/hubert/hubert_asr.py b/fairseq/fairseq/models/hubert/hubert_asr.py
new file mode 100644
index 0000000000000000000000000000000000000000..dce899c9de3ab68341c0b21bea749a3ee29e0d8a
--- /dev/null
+++ b/fairseq/fairseq/models/hubert/hubert_asr.py
@@ -0,0 +1,376 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+from argparse import Namespace
+from typing import Any
+
+import torch
+import torch.nn as nn
+from dataclasses import dataclass, field
+from fairseq import checkpoint_utils, tasks, utils
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.models import BaseFairseqModel, FairseqEncoder, register_model
+from fairseq.models.hubert.hubert import MASKING_DISTRIBUTION_CHOICES
+from fairseq.tasks import FairseqTask
+from omegaconf import II, MISSING
+
+
+@dataclass
+class HubertAsrConfig(FairseqDataclass):
+    w2v_path: str = field(
+        default=MISSING, metadata={"help": "path to hubert model"}
+    )
+    no_pretrained_weights: bool = field(
+        default=False,
+        metadata={"help": "if true, does not load pretrained weights"},
+    )
+    dropout_input: float = field(
+        default=0.0,
+        metadata={"help": "dropout to apply to the input (after feat extr)"},
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout after transformer and before final projection"
+        },
+    )
+    dropout: float = field(
+        default=0.0,
+        metadata={"help": "dropout probability inside hubert model"},
+    )
+    attention_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability for attention weights "
+            "inside hubert model"
+        },
+    )
+    activation_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability after activation in FFN "
+            "inside hubert model"
+        },
+    )
+
+    # masking
+    apply_mask: bool = field(
+        default=False, metadata={"help": "apply masking during fine-tuning"}
+    )
+    mask_length: int = field(
+        default=10, metadata={"help": "repeat the mask indices multiple times"}
+    )
+    mask_prob: float = field(
+        default=0.5,
+        metadata={
+            "help": "probability of replacing a token with mask "
+            "(normalized by length)"
+        },
+    )
+    mask_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static", metadata={"help": "how to choose masks"}
+    )
+    mask_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument "
+            "(used for more complex distributions), "
+            "see help in compute_mask_indices"
+        },
+    )
+    no_mask_overlap: bool = field(
+        default=False, metadata={"help": "whether to allow masks to overlap"}
+    )
+
+    # channel masking
+    mask_channel_length: int = field(
+        default=10,
+        metadata={"help": "length of the mask for features (channels)"},
+    )
+    mask_channel_prob: float = field(
+        default=0.0,
+        metadata={"help": "probability of replacing a feature with 0"},
+    )
+    mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static",
+        metadata={"help": "how to choose mask length for channel masking"},
+    )
+    mask_channel_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument "
+            "(used for more complex distributions), "
+            "see help in compute_mask_indices"
+        },
+    )
+    no_mask_channel_overlap: bool = field(
+        default=False,
+        metadata={"help": "whether to allow channel masks to overlap"},
+    )
+    freeze_finetune_updates: int = field(
+        default=0,
+        metadata={"help": "dont finetune hubert for this many updates"},
+    )
+    feature_grad_mult: float = field(
+        default=0.0,
+        metadata={"help": "reset feature grad mult in hubert to this"},
+    )
+    layerdrop: float = field(
+        default=0.0,
+        metadata={"help": "probability of dropping a layer in hubert"},
+    )
+    normalize: bool = II("task.normalize")
+    data: str = II("task.data")
+
+    # this holds the loaded hubert args
+    w2v_args: Any = None
+
+
+@dataclass
+class HubertCtcConfig(HubertAsrConfig):
+    pass
+
+
+@register_model("hubert_ctc", dataclass=HubertCtcConfig)
+class HubertCtc(BaseFairseqModel):
+    def __init__(self, cfg: HubertCtcConfig, w2v_encoder: BaseFairseqModel):
+        super().__init__()
+        self.cfg = cfg
+        self.w2v_encoder = w2v_encoder
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+        return state_dict
+
+    @classmethod
+    def build_model(cls, cfg: HubertCtcConfig, task: FairseqTask):
+        """Build a new model instance."""
+        w2v_encoder = HubertEncoder(cfg, task.target_dictionary)
+        return cls(cfg, w2v_encoder)
+
+    def get_normalized_probs(self, net_output, log_probs):
+        """Get normalized probabilities (or log probs) from a net's output."""
+
+        logits = net_output["encoder_out"]
+        if log_probs:
+            return utils.log_softmax(logits.float(), dim=-1)
+        else:
+            return utils.softmax(logits.float(), dim=-1)
+
+    def get_logits(self, net_output):
+        logits = net_output["encoder_out"]
+        padding = net_output["encoder_padding_mask"]
+        if padding is not None and padding.any():
+            padding = padding.T
+            logits[padding][..., 0] = 0
+            logits[padding][..., 1:] = float("-inf")
+
+        return logits
+
+    def forward(self, **kwargs):
+        x = self.w2v_encoder(**kwargs)
+        return x
+
+
+@dataclass
+class HubertSeq2SeqConfig(HubertAsrConfig):
+    decoder_embed_dim: int = field(
+        default=768, metadata={"help": "decoder embedding dimension"}
+    )
+    decoder_ffn_embed_dim: int = field(
+        default=3072, metadata={"help": "decoder embedding dimension for FFN"}
+    )
+    decoder_layers: int = field(
+        default=6, metadata={"help": "num of decoder layers"}
+    )
+    decoder_layerdrop: float = field(
+        default=0.0, metadata={"help": "decoder layerdrop chance"}
+    )
+    decoder_attention_heads: int = field(
+        default=4, metadata={"help": "num decoder attention heads"}
+    )
+    decoder_learned_pos: bool = field(
+        default=False,
+        metadata={"help": "use learned positional embeddings in the decoder"},
+    )
+    decoder_normalize_before: bool = field(
+        default=False,
+        metadata={"help": "apply layernorm before each decoder block"},
+    )
+    no_token_positional_embeddings: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, disables positional embeddings "
+            "(outside self attention)"
+        },
+    )
+    decoder_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability in the decoder"}
+    )
+    decoder_attention_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability for attention weights "
+            "inside the decoder"
+        },
+    )
+    decoder_activation_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability after activation in FFN "
+            "inside the decoder"
+        },
+    )
+    max_target_positions: int = field(
+        default=2048, metadata={"help": "max target positions"}
+    )
+    share_decoder_input_output_embed: bool = field(
+        default=False,
+        metadata={"help": "share decoder input and output embeddings"},
+    )
+
+
+class HubertEncoder(FairseqEncoder):
+    def __init__(self, cfg: HubertAsrConfig, tgt_dict=None):
+        self.apply_mask = cfg.apply_mask
+
+        arg_overrides = {
+            "dropout": cfg.dropout,
+            "activation_dropout": cfg.activation_dropout,
+            "dropout_input": cfg.dropout_input,
+            "attention_dropout": cfg.attention_dropout,
+            "mask_length": cfg.mask_length,
+            "mask_prob": cfg.mask_prob,
+            "mask_selection": cfg.mask_selection,
+            "mask_other": cfg.mask_other,
+            "no_mask_overlap": cfg.no_mask_overlap,
+            "mask_channel_length": cfg.mask_channel_length,
+            "mask_channel_prob": cfg.mask_channel_prob,
+            "mask_channel_selection": cfg.mask_channel_selection,
+            "mask_channel_other": cfg.mask_channel_other,
+            "no_mask_channel_overlap": cfg.no_mask_channel_overlap,
+            "encoder_layerdrop": cfg.layerdrop,
+            "feature_grad_mult": cfg.feature_grad_mult,
+        }
+
+        if cfg.w2v_args is None:
+            state = checkpoint_utils.load_checkpoint_to_cpu(
+                cfg.w2v_path, arg_overrides
+            )
+            w2v_args = state.get("cfg", None)
+            if w2v_args is None:
+                w2v_args = convert_namespace_to_omegaconf(state["args"])
+            cfg.w2v_args = w2v_args
+        else:
+            state = None
+            w2v_args = cfg.w2v_args
+            if isinstance(w2v_args, Namespace):
+                cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(
+                    w2v_args
+                )
+
+        assert cfg.normalize == w2v_args.task.normalize, (
+            "Fine-tuning works best when data normalization is the same. "
+            "Please check that --normalize is set or unset for "
+            "both pre-training and here"
+        )
+
+        w2v_args.task.data = cfg.data
+        task = tasks.setup_task(w2v_args.task)
+        if state is not None and "task_state" in state:
+            # This will load the stored "dictionaries" object
+            task.load_state_dict(state["task_state"])
+        model = task.build_model(w2v_args.model)
+
+        if state is not None and not cfg.no_pretrained_weights:
+            # set strict=False because we omit some modules
+            model.load_state_dict(state["model"], strict=False)
+
+        model.remove_pretraining_modules()
+
+        super().__init__(task.source_dictionary)
+
+        d = w2v_args.model.encoder_embed_dim
+
+        self.w2v_model = model
+
+        self.final_dropout = nn.Dropout(cfg.final_dropout)
+        self.freeze_finetune_updates = cfg.freeze_finetune_updates
+        self.num_updates = 0
+
+        if tgt_dict is not None:
+            self.proj = Linear(d, len(tgt_dict))
+        elif getattr(cfg, "decoder_embed_dim", d) != d:
+            self.proj = Linear(d, cfg.decoder_embed_dim)
+        else:
+            self.proj = None
+
+    def set_num_updates(self, num_updates):
+        """Set the number of parameters updates."""
+        super().set_num_updates(num_updates)
+        self.num_updates = num_updates
+
+    def forward(self, source, padding_mask, tbc=True, **kwargs):
+
+        w2v_args = {
+            "source": source,
+            "padding_mask": padding_mask,
+            "mask": self.apply_mask and self.training,
+        }
+
+        ft = self.freeze_finetune_updates <= self.num_updates
+
+        with torch.no_grad() if not ft else contextlib.ExitStack():
+            x, padding_mask = self.w2v_model.extract_features(**w2v_args)
+
+            if tbc:
+                # B x T x C -> T x B x C
+                x = x.transpose(0, 1)
+
+        x = self.final_dropout(x)
+
+        if self.proj:
+            x = self.proj(x)
+
+        return {
+            "encoder_out": x,  # T x B x C
+            "encoder_padding_mask": padding_mask,  # B x T
+            "padding_mask": padding_mask,
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        if encoder_out["encoder_out"] is not None:
+            encoder_out["encoder_out"] = encoder_out[
+                "encoder_out"
+            ].index_select(1, new_order)
+        if encoder_out["encoder_padding_mask"] is not None:
+            encoder_out["encoder_padding_mask"] = encoder_out[
+                "encoder_padding_mask"
+            ].index_select(0, new_order)
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return None
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        return state_dict
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
diff --git a/fairseq/fairseq/models/huggingface/__init__.py b/fairseq/fairseq/models/huggingface/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7911c2c8edf516855023a285b18935e5389ec02
--- /dev/null
+++ b/fairseq/fairseq/models/huggingface/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+import os
+
+
+# automatically import any Python files in the models/huggingface/ directory
+models_dir = os.path.dirname(__file__)
+for file in os.listdir(models_dir):
+    path = os.path.join(models_dir, file)
+    if (
+        not file.startswith("_")
+        and not file.startswith(".")
+        and (file.endswith(".py") or os.path.isdir(path))
+    ):
+        model_name = file[: file.find(".py")] if file.endswith(".py") else file
+        module = importlib.import_module("fairseq.models.huggingface." + model_name)
diff --git a/fairseq/fairseq/models/huggingface/hf_gpt2.py b/fairseq/fairseq/models/huggingface/hf_gpt2.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a8eb78198f5808557092f814e92f1c9d72933ec
--- /dev/null
+++ b/fairseq/fairseq/models/huggingface/hf_gpt2.py
@@ -0,0 +1,168 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+from typing import Dict, List, Optional
+
+import torch
+from fairseq.models import (
+    FairseqIncrementalDecoder,
+    FairseqLanguageModel,
+    register_model,
+    register_model_architecture,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+
+
+@register_model("hf_gpt2")
+class HuggingFaceGPT2LanguageModel(FairseqLanguageModel):
+    def __init__(self, decoder):
+        super().__init__(decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--embed-dim', type=int, metavar='N',
+                            help='embedding dimension')
+        parser.add_argument('--num-attention-heads', type=int, metavar='N',
+                            help='num attention heads')
+        parser.add_argument('--num-layers', type=int, metavar='N',
+                            help='num layers')
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability for all fully connected layers '
+                                 'in the embeddings, encoder, and pooler')
+        parser.add_argument('--attention-dropout', type=float, metavar='D',
+                            help='dropout probability for attention weights')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        default_architecture(args)
+        return cls(HuggingFaceGPT2Decoder(args, task))
+
+
+class HuggingFaceGPT2Decoder(FairseqIncrementalDecoder):
+    def __init__(self, args, task):
+        try:
+            from transformers import GPT2Config, GPT2LMHeadModel
+        except ImportError:
+            raise ImportError(
+                "\n\nPlease install huggingface/transformers with:"
+                "\n\n  pip install transformers"
+            )
+
+        super().__init__(task.target_dictionary)
+
+        config = GPT2Config(
+            vocab_size=len(task.target_dictionary),
+            n_positions=args.max_target_positions + 1,
+            n_ctx=args.max_target_positions,
+            n_embd=args.embed_dim,
+            n_layer=args.num_layers,
+            n_head=args.num_attention_heads,
+            resid_pdrop=args.dropout,
+            embd_pdrop=args.dropout,
+            attn_pdrop=args.attention_dropout,
+            layer_norm_epsilon=1e-6,
+        )
+        self.model = GPT2LMHeadModel(config)
+
+        # set zero embedding for padding symbol
+        self.pad_idx = task.target_dictionary.pad()
+        self.model.transformer.wte.weight.data[self.pad_idx].zero_()
+        self.model.transformer.wpe.weight.data[0].zero_()
+
+    def forward(
+        self,
+        prev_output_tokens,
+        src_lengths=None,
+        incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None,
+        encoder_out=None,
+    ):
+        features = self.extract_features(prev_output_tokens, incremental_state)
+        lm_logits = self.model.lm_head(features)
+        return (lm_logits,)
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        incremental_state: Optional[Dict[str, List[torch.Tensor]]] = None,
+    ):
+        if incremental_state:
+            past = self.get_incremental_state("past")
+        else:
+            past = None
+
+        # don't attend to padding symbols
+        attention_mask = prev_output_tokens.ne(self.pad_idx).int()
+
+        # set position ids to exclude padding symbols
+        position_ids = attention_mask * (
+            torch.arange(1, 1 + prev_output_tokens.size(1))
+            .to(prev_output_tokens)
+            .repeat(prev_output_tokens.size(0), 1)
+        )
+
+        outputs = self.model.transformer(
+            input_ids=prev_output_tokens,
+            past=past,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+        )
+        last_hidden_states = outputs[0]
+
+        if incremental_state:
+            self.set_incremental_state(incremental_state, "past", outputs[1])
+
+        return last_hidden_states
+
+    def max_positions(self):
+        return self.model.config.n_positions - 1
+
+
+@register_model_architecture("hf_gpt2", "hf_gpt2")
+def default_architecture(args):
+    if getattr(args, "max_target_positions", None) is None:
+        args.max_target_positions = getattr(
+            args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS
+        )
+    args.embed_dim = getattr(args, "embed_dim", 768)
+    args.num_attention_heads = getattr(args, "num_attention_heads", 12)
+    args.num_layers = getattr(args, "num_layers", 12)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+
+
+@register_model_architecture("hf_gpt2", "hf_gpt2_medium")
+def hf_gpt2_medium(args):
+    args.embed_dim = getattr(args, "embed_dim", 1024)
+    args.num_attention_heads = getattr(args, "num_attention_heads", 16)
+    args.num_layers = getattr(args, "num_layers", 24)
+    default_architecture(args)
+
+
+@register_model_architecture("hf_gpt2", "hf_gpt2_large")
+def hf_gpt2_large(args):
+    args.embed_dim = getattr(args, "embed_dim", 1280)
+    args.num_attention_heads = getattr(args, "num_attention_heads", 20)
+    args.num_layers = getattr(args, "num_layers", 36)
+    default_architecture(args)
+
+
+@register_model_architecture("hf_gpt2", "hf_gpt2_xl")
+def hf_gpt2_xl(args):
+    args.embed_dim = getattr(args, "embed_dim", 1600)
+    args.num_attention_heads = getattr(args, "num_attention_heads", 25)
+    args.num_layers = getattr(args, "num_layers", 48)
+    default_architecture(args)
diff --git a/fairseq/fairseq/models/lightconv.py b/fairseq/fairseq/models/lightconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..4edfe359379bc2445c1ae1ada04bd34ca4a32798
--- /dev/null
+++ b/fairseq/fairseq/models/lightconv.py
@@ -0,0 +1,1019 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import (
+    AdaptiveSoftmax,
+    DynamicConv,
+    FairseqDropout,
+    LayerNorm,
+    LightweightConv,
+    MultiheadAttention,
+    PositionalEmbedding,
+)
+from fairseq.utils import safe_hasattr
+
+
+@register_model("lightconv")
+class LightConvModel(FairseqEncoderDecoderModel):
+    """
+    LightConv and DynamicConv model from `"Pay Less Attention with Lightweight and Dynamic Convolutions" (Wu, et al, 2019)
+    <https://openreview.net/pdf?id=SkVhlh09tX>`_.
+    To use LightConv please set ``--encoder-conv-type lightweight --decoder-conv-type lightweight``
+    To use DynamicConv please set ``--encoder-conv-type dynamic --decoder-conv-type dynamic``
+
+    Args:
+        encoder (LightConvEncoder): the encoder
+        decoder (LightConvDecoder): the decoder
+
+    The LightConv model provides the following named architectures and
+    command-line arguments:
+
+    .. argparse::
+        :ref: fairseq.models.lightconv_parser
+        :prog:
+    """
+
+    @classmethod
+    def hub_models(cls):
+        # fmt: off
+
+        def moses_subword(path):
+            return {
+                'path': path,
+                'tokenizer': 'moses',
+                'bpe': 'subword_nmt',
+            }
+
+        return {
+            'lightconv.no_glu.iwslt14.de-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.lightconv.tar.gz'),
+            'dynamicconv.no_glu.iwslt14.de-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/iwslt14.de-en.dynamicconv.tar.gz'),
+            'lightconv.no_glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv.tar.gz'),
+            'dynamicconv.no_glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv.tar.gz'),
+            'lightconv.glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv-glu.tar.gz'),
+            'dynamicconv.glu.wmt16.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv-glu.tar.gz'),
+            'lightconv.glu.wmt17.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.lightconv-glu.tar.gz'),
+            'dynamicconv.glu.wmt17.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt16.en-de.joined-dict.dynamicconv-glu.tar.gz'),
+            'lightconv.glu.wmt14.en-fr': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.lightconv-glu.tar.gz'),
+            'dynamicconv.glu.wmt14.en-fr': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt14.en-fr.joined-dict.dynamicconv-glu.tar.gz'),
+            'lightconv.glu.wmt17.zh-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.lightconv-glu.tar.gz'),
+            'dynamicconv.glu.wmt17.zh-en': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/dynamicconv/wmt17.zh-en.dynamicconv-glu.tar.gz'),
+        }
+        # fmt: on
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--relu-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after ReLU in FFN",
+        )
+        parser.add_argument(
+            "--input-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability of the inputs",
+        )
+        parser.add_argument(
+            "--encoder-embed-path",
+            type=str,
+            metavar="STR",
+            help="path to pre-trained encoder embedding",
+        )
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-conv-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="N", help="num encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num encoder attention heads or LightConv/DynamicConv heads",
+        )
+        parser.add_argument(
+            "--encoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each encoder block",
+        )
+        parser.add_argument(
+            "--encoder-learned-pos",
+            action="store_true",
+            help="use learned positional embeddings in the encoder",
+        )
+        parser.add_argument(
+            "--decoder-embed-path",
+            type=str,
+            metavar="STR",
+            help="path to pre-trained decoder embedding",
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-conv-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="num decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num decoder attention heads or LightConv/DynamicConv heads",
+        )
+        parser.add_argument(
+            "--decoder-learned-pos",
+            action="store_true",
+            help="use learned positional embeddings in the decoder",
+        )
+        parser.add_argument(
+            "--decoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each decoder block",
+        )
+        parser.add_argument(
+            "--share-decoder-input-output-embed",
+            action="store_true",
+            help="share decoder input and output embeddings",
+        )
+        parser.add_argument(
+            "--share-all-embeddings",
+            action="store_true",
+            help="share encoder, decoder and output embeddings"
+            " (requires shared dictionary and embed dim)",
+        )
+        parser.add_argument(
+            "--adaptive-softmax-cutoff",
+            metavar="EXPR",
+            help="comma separated list of adaptive softmax cutoff points. "
+            "Must be used with adaptive_loss criterion",
+        ),
+        parser.add_argument(
+            "--adaptive-softmax-dropout",
+            type=float,
+            metavar="D",
+            help="sets adaptive softmax dropout for the tail projections",
+        )
+
+        """LightConv and DynamicConv arguments"""
+        parser.add_argument(
+            "--encoder-kernel-size-list",
+            type=lambda x: utils.eval_str_list(x, int),
+            help='list of kernel size (default: "[3,7,15,31,31,31,31]")',
+        )
+        parser.add_argument(
+            "--decoder-kernel-size-list",
+            type=lambda x: utils.eval_str_list(x, int),
+            help='list of kernel size (default: "[3,7,15,31,31,31]")',
+        )
+        parser.add_argument(
+            "--encoder-glu", type=utils.eval_bool, help="glu after in proj"
+        )
+        parser.add_argument(
+            "--decoder-glu", type=utils.eval_bool, help="glu after in proj"
+        )
+        parser.add_argument(
+            "--encoder-conv-type",
+            default="dynamic",
+            type=str,
+            choices=["dynamic", "lightweight"],
+            help="type of convolution",
+        )
+        parser.add_argument(
+            "--decoder-conv-type",
+            default="dynamic",
+            type=str,
+            choices=["dynamic", "lightweight"],
+            help="type of convolution",
+        )
+        parser.add_argument("--weight-softmax", default=True, type=utils.eval_bool)
+        parser.add_argument(
+            "--weight-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for conv weights",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if not safe_hasattr(args, "max_source_positions"):
+            args.max_source_positions = 1024
+        if not safe_hasattr(args, "max_target_positions"):
+            args.max_target_positions = 1024
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+
+        def build_embedding(dictionary, embed_dim, path=None):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            emb = Embedding(num_embeddings, embed_dim, padding_idx)
+            # if provided, load from preloaded dictionaries
+            if path:
+                embed_dict = utils.parse_embedding(path)
+                utils.load_embedding(embed_dict, dictionary, emb)
+            return emb
+
+        if args.share_all_embeddings:
+            if src_dict != tgt_dict:
+                raise RuntimeError(
+                    "--share-all-embeddings requires a joined dictionary"
+                )
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise RuntimeError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise RuntimeError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            encoder_embed_tokens = build_embedding(
+                src_dict, args.encoder_embed_dim, args.encoder_embed_path
+            )
+            decoder_embed_tokens = encoder_embed_tokens
+            args.share_decoder_input_output_embed = True
+        else:
+            encoder_embed_tokens = build_embedding(
+                src_dict, args.encoder_embed_dim, args.encoder_embed_path
+            )
+            decoder_embed_tokens = build_embedding(
+                tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
+            )
+
+        encoder = LightConvEncoder(args, src_dict, encoder_embed_tokens)
+        decoder = LightConvDecoder(args, tgt_dict, decoder_embed_tokens)
+        return LightConvModel(encoder, decoder)
+
+
+class LightConvEncoder(FairseqEncoder):
+    """
+    LightConv encoder consisting of *args.encoder_layers* layers. Each layer
+    is a :class:`LightConvEncoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): encoding dictionary
+        embed_tokens (torch.nn.Embedding): input embedding
+    """
+
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(dictionary)
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+
+        embed_dim = embed_tokens.embedding_dim
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_source_positions = args.max_source_positions
+
+        self.embed_tokens = embed_tokens
+        self.embed_scale = math.sqrt(embed_dim)
+        self.embed_positions = (
+            PositionalEmbedding(
+                args.max_source_positions,
+                embed_dim,
+                self.padding_idx,
+                learned=args.encoder_learned_pos,
+            )
+            if not args.no_token_positional_embeddings
+            else None
+        )
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                LightConvEncoderLayer(
+                    args, kernel_size=args.encoder_kernel_size_list[i]
+                )
+                for i in range(args.encoder_layers)
+            ]
+        )
+        self.register_buffer("version", torch.Tensor([2]))
+        self.normalize = args.encoder_normalize_before
+        if self.normalize:
+            self.layer_norm = LayerNorm(embed_dim)
+
+    def forward(self, src_tokens, **unused):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+
+        Returns:
+            dict:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+        """
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(src_tokens)
+        if self.embed_positions is not None:
+            x += self.embed_positions(src_tokens)
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # compute padding mask
+        encoder_padding_mask = src_tokens.eq(self.padding_idx)
+        if not encoder_padding_mask.any():
+            encoder_padding_mask = None
+
+        # encoder layers
+        for layer in self.layers:
+            x = layer(x, encoder_padding_mask)
+
+        if self.normalize:
+            x = self.layer_norm(x)
+
+        return {
+            "encoder_out": x,  # T x B x C
+            "encoder_padding_mask": encoder_padding_mask,  # B x T
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        if encoder_out["encoder_out"] is not None:
+            encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select(
+                1, new_order
+            )
+        if encoder_out["encoder_padding_mask"] is not None:
+            encoder_out["encoder_padding_mask"] = encoder_out[
+                "encoder_padding_mask"
+            ].index_select(0, new_order)
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        if self.embed_positions is None:
+            return self.max_source_positions
+        return min(self.max_source_positions, self.embed_positions.max_positions)
+
+
+class LightConvDecoder(FairseqIncrementalDecoder):
+    """
+    LightConv decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`LightConvDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs.
+            Default: ``False``
+    """
+
+    def __init__(
+        self, args, dictionary, embed_tokens, no_encoder_attn=False, final_norm=True
+    ):
+        super().__init__(dictionary)
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.share_input_output_embed = args.share_decoder_input_output_embed
+
+        input_embed_dim = embed_tokens.embedding_dim
+        embed_dim = args.decoder_embed_dim
+        output_embed_dim = args.decoder_output_dim
+
+        padding_idx = embed_tokens.padding_idx
+        self.max_target_positions = args.max_target_positions
+
+        self.embed_tokens = embed_tokens
+        self.embed_scale = math.sqrt(embed_dim)  # todo: try with input_embed_dim
+
+        self.project_in_dim = (
+            Linear(input_embed_dim, embed_dim, bias=False)
+            if embed_dim != input_embed_dim
+            else None
+        )
+
+        self.embed_positions = (
+            PositionalEmbedding(
+                args.max_target_positions,
+                embed_dim,
+                padding_idx,
+                learned=args.decoder_learned_pos,
+            )
+            if not args.no_token_positional_embeddings
+            else None
+        )
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                LightConvDecoderLayer(
+                    args, no_encoder_attn, kernel_size=args.decoder_kernel_size_list[i]
+                )
+                for i in range(args.decoder_layers)
+            ]
+        )
+
+        self.adaptive_softmax = None
+
+        self.project_out_dim = (
+            Linear(embed_dim, output_embed_dim, bias=False)
+            if embed_dim != output_embed_dim and not args.tie_adaptive_weights
+            else None
+        )
+
+        if args.adaptive_softmax_cutoff is not None:
+            self.adaptive_softmax = AdaptiveSoftmax(
+                len(dictionary),
+                output_embed_dim,
+                utils.eval_str_list(args.adaptive_softmax_cutoff, type=int),
+                dropout=args.adaptive_softmax_dropout,
+                adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None,
+                factor=args.adaptive_softmax_factor,
+                tie_proj=args.tie_adaptive_proj,
+            )
+        elif not self.share_input_output_embed:
+            self.embed_out = nn.Parameter(
+                torch.Tensor(len(dictionary), output_embed_dim)
+            )
+            nn.init.normal_(self.embed_out, mean=0, std=output_embed_dim ** -0.5)
+        self.register_buffer("version", torch.Tensor([2]))
+        self.normalize = args.decoder_normalize_before and final_norm
+        if self.normalize:
+            self.layer_norm = LayerNorm(embed_dim)
+
+    def forward(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (Tensor, optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+
+        Returns:
+            tuple:
+                - the last decoder layer's output of shape `(batch, tgt_len,
+                  vocab)`
+                - the last decoder layer's attention weights of shape `(batch,
+                  tgt_len, src_len)`
+        """
+        # embed positions
+        positions = (
+            self.embed_positions(
+                prev_output_tokens,
+                incremental_state=incremental_state,
+            )
+            if self.embed_positions is not None
+            else None
+        )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+        attn = None
+
+        inner_states = [x]
+
+        # decoder layers
+        for layer in self.layers:
+            x, attn = layer(
+                x,
+                encoder_out["encoder_out"] if encoder_out is not None else None,
+                encoder_out["encoder_padding_mask"]
+                if encoder_out is not None
+                else None,
+                incremental_state,
+            )
+            inner_states.append(x)
+
+        if self.normalize:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        if self.adaptive_softmax is None:
+            # project back to size of vocabulary
+            if self.share_input_output_embed:
+                x = F.linear(x, self.embed_tokens.weight)
+            else:
+                x = F.linear(x, self.embed_out)
+
+        return x, {"attn": attn, "inner_states": inner_states}
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        if self.embed_positions is None:
+            return self.max_target_positions
+        return min(self.max_target_positions, self.embed_positions.max_positions)
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        if (
+            not hasattr(self, "_future_mask")
+            or self._future_mask is None
+            or self._future_mask.device != tensor.device
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
+            )
+        if self._future_mask.size(0) < dim:
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
+            )
+        return self._future_mask[:dim, :dim]
+
+
+class LightConvEncoderLayer(nn.Module):
+    """Encoder layer block.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        kernel_size: kernel size of the convolution
+    """
+
+    def __init__(self, args, kernel_size=0):
+        super().__init__()
+        self.embed_dim = args.encoder_embed_dim
+        self.conv_dim = args.encoder_conv_dim
+        padding_l = (
+            kernel_size // 2
+            if kernel_size % 2 == 1
+            else ((kernel_size - 1) // 2, kernel_size // 2)
+        )
+
+        if args.encoder_glu:
+            self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim)
+            self.act = nn.GLU()
+        else:
+            self.linear1 = Linear(self.embed_dim, self.conv_dim)
+            self.act = None
+        if args.encoder_conv_type == "lightweight":
+            self.conv = LightweightConv(
+                self.conv_dim,
+                kernel_size,
+                padding_l=padding_l,
+                weight_softmax=args.weight_softmax,
+                num_heads=args.encoder_attention_heads,
+                weight_dropout=args.weight_dropout,
+            )
+        elif args.encoder_conv_type == "dynamic":
+            self.conv = DynamicConv(
+                self.conv_dim,
+                kernel_size,
+                padding_l=padding_l,
+                weight_softmax=args.weight_softmax,
+                num_heads=args.encoder_attention_heads,
+                weight_dropout=args.weight_dropout,
+            )
+        else:
+            raise NotImplementedError
+        self.linear2 = Linear(self.conv_dim, self.embed_dim)
+
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.relu_dropout_module = FairseqDropout(
+            args.relu_dropout, module_name=self.__class__.__name__
+        )
+        self.input_dropout_module = FairseqDropout(
+            args.input_dropout, module_name=self.__class__.__name__
+        )
+        self.normalize_before = args.encoder_normalize_before
+        self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim)
+        self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim)
+        self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)])
+
+    def forward(self, x, encoder_padding_mask):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor): binary ByteTensor of shape
+                `(batch, src_len)` where padding elements are indicated by ``1``.
+
+        Returns:
+            encoded output of shape `(batch, src_len, embed_dim)`
+        """
+        residual = x
+        x = self.maybe_layer_norm(0, x, before=True)
+        x = self.input_dropout_module(x)
+        x = self.linear1(x)
+        if self.act is not None:
+            x = self.act(x)
+        if encoder_padding_mask is not None:
+            x = x.masked_fill(encoder_padding_mask.transpose(0, 1).unsqueeze(2), 0)
+        x = self.conv(x)
+        x = self.linear2(x)
+        x = self.dropout_module(x)
+        x = residual + x
+        x = self.maybe_layer_norm(0, x, after=True)
+
+        residual = x
+        x = self.maybe_layer_norm(1, x, before=True)
+        x = F.relu(self.fc1(x))
+        x = self.relu_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = residual + x
+        x = self.maybe_layer_norm(1, x, after=True)
+        return x
+
+    def maybe_layer_norm(self, i, x, before=False, after=False):
+        assert before ^ after
+        if after ^ self.normalize_before:
+            return self.layer_norms[i](x)
+        else:
+            return x
+
+    def extra_repr(self):
+        return (
+            "dropout={}, relu_dropout={}, input_dropout={}, normalize_before={}".format(
+                self.dropout_module.p,
+                self.relu_dropout_module.p,
+                self.input_dropout_module.p,
+                self.normalize_before,
+            )
+        )
+
+
+class LightConvDecoderLayer(nn.Module):
+    """Decoder layer block.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs.
+            Default: ``False``
+        kernel_size: kernel size of the convolution
+    """
+
+    def __init__(self, args, no_encoder_attn=False, kernel_size=0):
+        super().__init__()
+        self.embed_dim = args.decoder_embed_dim
+        self.conv_dim = args.decoder_conv_dim
+        if args.decoder_glu:
+            self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim)
+            self.act = nn.GLU()
+        else:
+            self.linear1 = Linear(self.embed_dim, self.conv_dim)
+            self.act = None
+        if args.decoder_conv_type == "lightweight":
+            self.conv = LightweightConv(
+                self.conv_dim,
+                kernel_size,
+                padding_l=kernel_size - 1,
+                weight_softmax=args.weight_softmax,
+                num_heads=args.decoder_attention_heads,
+                weight_dropout=args.weight_dropout,
+            )
+        elif args.decoder_conv_type == "dynamic":
+            self.conv = DynamicConv(
+                self.conv_dim,
+                kernel_size,
+                padding_l=kernel_size - 1,
+                weight_softmax=args.weight_softmax,
+                num_heads=args.decoder_attention_heads,
+                weight_dropout=args.weight_dropout,
+            )
+        else:
+            raise NotImplementedError
+        self.linear2 = Linear(self.conv_dim, self.embed_dim)
+
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.relu_dropout_module = FairseqDropout(
+            args.relu_dropout, module_name=self.__class__.__name__
+        )
+        self.input_dropout_module = FairseqDropout(
+            args.input_dropout, module_name=self.__class__.__name__
+        )
+        self.normalize_before = args.decoder_normalize_before
+
+        self.conv_layer_norm = LayerNorm(self.embed_dim)
+
+        if no_encoder_attn:
+            self.encoder_attn = None
+            self.encoder_attn_layer_norm = None
+        else:
+            self.encoder_attn = MultiheadAttention(
+                self.embed_dim,
+                args.decoder_attention_heads,
+                dropout=args.attention_dropout,
+                encoder_decoder_attention=True,
+            )
+            self.encoder_attn_layer_norm = LayerNorm(self.embed_dim)
+
+        self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim)
+        self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim)
+
+        self.final_layer_norm = LayerNorm(self.embed_dim)
+        self.need_attn = True
+
+    def forward(
+        self,
+        x,
+        encoder_out,
+        encoder_padding_mask,
+        incremental_state,
+        prev_conv_state=None,
+        prev_attn_state=None,
+        conv_mask=None,
+        conv_padding_mask=None,
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor): binary ByteTensor of shape
+                `(batch, src_len)` where padding elements are indicated by ``1``.
+
+        Returns:
+            encoded output of shape `(batch, src_len, embed_dim)`
+        """
+        residual = x
+        x = self.maybe_layer_norm(self.conv_layer_norm, x, before=True)
+        if prev_conv_state is not None:
+            if incremental_state is None:
+                incremental_state = {}
+            self.conv._set_input_buffer(incremental_state, prev_conv_state)
+        x = self.input_dropout_module(x)
+        x = self.linear1(x)
+        if self.act is not None:
+            x = self.act(x)
+        x = self.conv(x, incremental_state=incremental_state)
+        x = self.linear2(x)
+        x = self.dropout_module(x)
+        x = residual + x
+        x = self.maybe_layer_norm(self.conv_layer_norm, x, after=True)
+
+        attn = None
+        if self.encoder_attn is not None:
+            residual = x
+            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True)
+            if prev_attn_state is not None:
+                if incremental_state is None:
+                    incremental_state = {}
+                prev_key, prev_value = prev_attn_state
+                saved_state = {"prev_key": prev_key, "prev_value": prev_value}
+                self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                need_weights=(not self.training and self.need_attn),
+            )
+            x = self.dropout_module(x)
+            x = residual + x
+            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True)
+
+        residual = x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
+        x = F.relu(self.fc1(x))
+        x = self.relu_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = residual + x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, after=True)
+        return x, attn
+
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+        assert before ^ after
+        if after ^ self.normalize_before:
+            return layer_norm(x)
+        else:
+            return x
+
+    def make_generation_fast_(self, need_attn=False, **kwargs):
+        self.need_attn = need_attn
+
+    def extra_repr(self):
+        return (
+            "dropout={}, relu_dropout={}, input_dropout={}, normalize_before={}".format(
+                self.dropout_module.p,
+                self.relu_dropout_module.p,
+                self.input_dropout_module.p,
+                self.normalize_before,
+            )
+        )
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
+
+
+@register_model_architecture("lightconv", "lightconv")
+def base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 7)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.relu_dropout = getattr(args, "relu_dropout", 0.0)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.encoder_conv_dim = getattr(args, "encoder_conv_dim", args.encoder_embed_dim)
+    args.decoder_conv_dim = getattr(args, "decoder_conv_dim", args.decoder_embed_dim)
+
+    args.encoder_kernel_size_list = getattr(
+        args, "encoder_kernel_size_list", [3, 7, 15, 31, 31, 31, 31]
+    )
+    args.decoder_kernel_size_list = getattr(
+        args, "decoder_kernel_size_list", [3, 7, 15, 31, 31, 31]
+    )
+    if len(args.encoder_kernel_size_list) == 1:
+        args.encoder_kernel_size_list = (
+            args.encoder_kernel_size_list * args.encoder_layers
+        )
+    if len(args.decoder_kernel_size_list) == 1:
+        args.decoder_kernel_size_list = (
+            args.decoder_kernel_size_list * args.decoder_layers
+        )
+    assert (
+        len(args.encoder_kernel_size_list) == args.encoder_layers
+    ), "encoder_kernel_size_list doesn't match encoder_layers"
+    assert (
+        len(args.decoder_kernel_size_list) == args.decoder_layers
+    ), "decoder_kernel_size_list doesn't match decoder_layers"
+    args.encoder_glu = getattr(args, "encoder_glu", True)
+    args.decoder_glu = getattr(args, "decoder_glu", True)
+    args.input_dropout = getattr(args, "input_dropout", 0.1)
+    args.weight_dropout = getattr(args, "weight_dropout", args.attention_dropout)
+
+
+@register_model_architecture("lightconv", "lightconv_iwslt_de_en")
+def lightconv_iwslt_de_en(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.encoder_layers = getattr(args, "encoder_layers", 7)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.weight_dropout = getattr(args, "weight_dropout", 0.1)
+    args.encoder_glu = getattr(args, "encoder_glu", False)
+    args.decoder_glu = getattr(args, "decoder_glu", False)
+    args.input_dropout = getattr(args, "input_dropout", 0.0)
+    base_architecture(args)
+
+
+@register_model_architecture("lightconv", "lightconv_wmt_en_de")
+def lightconv_wmt_en_de(args):
+    base_architecture(args)
+
+
+@register_model_architecture("lightconv", "lightconv_wmt_en_de_big")
+def lightconv_wmt_en_de_big(args):
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.3)
+    base_architecture(args)
+
+
+@register_model_architecture("lightconv", "lightconv_wmt_en_fr_big")
+def lightconv_wmt_en_fr_big(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    lightconv_wmt_en_de_big(args)
+
+
+@register_model_architecture("lightconv", "lightconv_wmt_zh_en_big")
+def lightconv_wmt_zh_en_big(args):
+    args.dropout = getattr(args, "dropout", 0.2)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.2)
+    args.weight_dropout = getattr(args, "weight_dropout", 0.2)
+    lightconv_wmt_en_de_big(args)
diff --git a/fairseq/fairseq/models/lightconv_lm.py b/fairseq/fairseq/models/lightconv_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d9efc4e42a5ecc1b83338055f18ade5a83ea666
--- /dev/null
+++ b/fairseq/fairseq/models/lightconv_lm.py
@@ -0,0 +1,306 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq import utils
+from fairseq.models import (
+    FairseqLanguageModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.lightconv import Embedding, LightConvDecoder
+from fairseq.modules import AdaptiveInput, CharacterTokenEmbedder
+
+
+@register_model("lightconv_lm")
+class LightConvLanguageModel(FairseqLanguageModel):
+    def __init__(self, decoder):
+        super().__init__(decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--dropout",
+            default=0.1,
+            type=float,
+            metavar="D",
+            help="dropout probability",
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            default=0.0,
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--relu-dropout",
+            default=0.0,
+            type=float,
+            metavar="D",
+            help="dropout probability after ReLU in FFN",
+        )
+        parser.add_argument(
+            "--input-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability of the inputs",
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-output-dim",
+            type=int,
+            metavar="N",
+            help="decoder output dimension",
+        )
+        parser.add_argument(
+            "--decoder-input-dim", type=int, metavar="N", help="decoder input dimension"
+        )
+        parser.add_argument(
+            "--decoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="num decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num decoder attention heads or LightConv/DynamicConv heads",
+        )
+        parser.add_argument(
+            "--decoder-normalize-before",
+            default=False,
+            action="store_true",
+            help="apply layernorm before each decoder block",
+        )
+        parser.add_argument(
+            "--adaptive-softmax-cutoff",
+            metavar="EXPR",
+            help="comma separated list of adaptive softmax cutoff points. "
+            "Must be used with adaptive_loss criterion",
+        )
+        parser.add_argument(
+            "--adaptive-softmax-dropout",
+            type=float,
+            metavar="D",
+            help="sets adaptive softmax dropout for the tail projections",
+        )
+        parser.add_argument(
+            "--adaptive-softmax-factor",
+            type=float,
+            metavar="N",
+            help="adaptive input factor",
+        )
+        parser.add_argument(
+            "--no-token-positional-embeddings",
+            default=False,
+            action="store_true",
+            help="if set, disables positional embeddings (outside self attention)",
+        )
+        parser.add_argument(
+            "--share-decoder-input-output-embed",
+            default=False,
+            action="store_true",
+            help="share decoder input and output embeddings",
+        )
+        parser.add_argument(
+            "--character-embeddings",
+            default=False,
+            action="store_true",
+            help="if set, uses character embedding convolutions to produce token embeddings",
+        )
+        parser.add_argument(
+            "--character-filters",
+            type=str,
+            metavar="LIST",
+            default="[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]",
+            help="size of character embeddings",
+        )
+        parser.add_argument(
+            "--character-embedding-dim",
+            type=int,
+            metavar="N",
+            default=4,
+            help="size of character embeddings",
+        )
+        parser.add_argument(
+            "--char-embedder-highway-layers",
+            type=int,
+            metavar="N",
+            default=2,
+            help="number of highway layers for character token embeddder",
+        )
+        parser.add_argument(
+            "--adaptive-input",
+            default=False,
+            action="store_true",
+            help="if set, uses adaptive input",
+        )
+        parser.add_argument(
+            "--adaptive-input-factor",
+            type=float,
+            metavar="N",
+            help="adaptive input factor",
+        )
+        parser.add_argument(
+            "--adaptive-input-cutoff",
+            metavar="EXPR",
+            help="comma separated list of adaptive input cutoff points.",
+        )
+        parser.add_argument(
+            "--tie-adaptive-weights",
+            action="store_true",
+            help="if set, ties the weights of adaptive softmax and adaptive input",
+        )
+        parser.add_argument(
+            "--tie-adaptive-proj",
+            action="store_true",
+            help="if set, ties the projection weights of adaptive softmax and adaptive input",
+        )
+        parser.add_argument(
+            "--decoder-learned-pos",
+            action="store_true",
+            help="use learned positional embeddings in the decoder",
+        )
+
+        """LightConv and DynamicConv arguments"""
+        parser.add_argument(
+            "--decoder-kernel-size-list",
+            type=lambda x: utils.eval_str_list(x, int),
+            help='list of kernel size (default: "[3,7,15,31,31,31]")',
+        )
+        parser.add_argument(
+            "--decoder-glu", type=utils.eval_bool, help="glu after in proj"
+        )
+        parser.add_argument(
+            "--decoder-conv-type",
+            default="dynamic",
+            type=str,
+            choices=["dynamic", "lightweight"],
+            help="type of convolution",
+        )
+        parser.add_argument("--weight-softmax", default=True, type=utils.eval_bool)
+        parser.add_argument(
+            "--weight-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for conv weights",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_lm_architecture(args)
+
+        if getattr(args, "max_source_positions", None) is None:
+            args.max_source_positions = args.tokens_per_sample
+        if getattr(args, "max_target_positions", None) is None:
+            args.max_target_positions = args.tokens_per_sample
+
+        if args.character_embeddings:
+            embed_tokens = CharacterTokenEmbedder(
+                task.dictionary,
+                eval(args.character_filters),
+                args.character_embedding_dim,
+                args.decoder_embed_dim,
+                args.char_embedder_highway_layers,
+            )
+        elif args.adaptive_input:
+            embed_tokens = AdaptiveInput(
+                len(task.dictionary),
+                task.dictionary.pad(),
+                args.decoder_input_dim,
+                args.adaptive_input_factor,
+                args.decoder_embed_dim,
+                utils.eval_str_list(args.adaptive_input_cutoff, type=int),
+            )
+        else:
+            embed_tokens = Embedding(
+                len(task.dictionary), args.decoder_input_dim, task.dictionary.pad()
+            )
+
+        if args.tie_adaptive_weights:
+            assert args.adaptive_input
+            assert args.adaptive_input_factor == args.adaptive_softmax_factor
+            assert (
+                args.adaptive_softmax_cutoff == args.adaptive_input_cutoff
+            ), "{} != {}".format(
+                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff
+            )
+            assert args.decoder_input_dim == args.decoder_output_dim
+
+        decoder = LightConvDecoder(
+            args,
+            task.output_dictionary,
+            embed_tokens,
+            no_encoder_attn=True,
+            final_norm=False,
+        )
+        return LightConvLanguageModel(decoder)
+
+
+@register_model_architecture("lightconv_lm", "lightconv_lm")
+def base_lm_architecture(args):
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 2048)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.adaptive_softmax_factor = getattr(args, "adaptive_softmax_factor", 4)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+
+    args.character_embeddings = getattr(args, "character_embeddings", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+    args.decoder_conv_dim = getattr(args, "decoder_conv_dim", args.decoder_embed_dim)
+
+    # The model training is not stable without this
+    args.decoder_normalize_before = True
+
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.adaptive_input_factor = getattr(args, "adaptive_input_factor", 4)
+    args.adaptive_input_cutoff = getattr(args, "adaptive_input_cutoff", None)
+
+    args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False)
+    args.tie_adaptive_proj = getattr(args, "tie_adaptive_proj", False)
+
+    args.decoder_kernel_size_list = getattr(
+        args, "decoder_kernel_size_list", [3, 7, 15, 31, 31, 31]
+    )
+    if len(args.decoder_kernel_size_list) == 1:
+        args.decoder_kernel_size_list = (
+            args.decoder_kernel_size_list * args.decoder_layers
+        )
+    assert (
+        len(args.decoder_kernel_size_list) == args.decoder_layers
+    ), "decoder_kernel_size_list doesn't match decoder_layers"
+    args.decoder_glu = getattr(args, "decoder_glu", True)
+    args.input_dropout = getattr(args, "input_dropout", 0.1)
+    args.weight_dropout = getattr(args, "weight_dropout", args.attention_dropout)
+
+
+@register_model_architecture("lightconv_lm", "lightconv_lm_gbw")
+def lightconv_lm_gbw(args):
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    base_lm_architecture(args)
diff --git a/fairseq/fairseq/models/lstm.py b/fairseq/fairseq/models/lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1e66a7d50fa1b1b313e9d1a6e7862ac9bfaa074
--- /dev/null
+++ b/fairseq/fairseq/models/lstm.py
@@ -0,0 +1,753 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import AdaptiveSoftmax, FairseqDropout
+from torch import Tensor
+
+
+DEFAULT_MAX_SOURCE_POSITIONS = 1e5
+DEFAULT_MAX_TARGET_POSITIONS = 1e5
+
+
+@register_model("lstm")
+class LSTMModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability')
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension')
+        parser.add_argument('--encoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained encoder embedding')
+        parser.add_argument('--encoder-freeze-embed', action='store_true',
+                            help='freeze encoder embeddings')
+        parser.add_argument('--encoder-hidden-size', type=int, metavar='N',
+                            help='encoder hidden size')
+        parser.add_argument('--encoder-layers', type=int, metavar='N',
+                            help='number of encoder layers')
+        parser.add_argument('--encoder-bidirectional', action='store_true',
+                            help='make all layers of encoder bidirectional')
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension')
+        parser.add_argument('--decoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained decoder embedding')
+        parser.add_argument('--decoder-freeze-embed', action='store_true',
+                            help='freeze decoder embeddings')
+        parser.add_argument('--decoder-hidden-size', type=int, metavar='N',
+                            help='decoder hidden size')
+        parser.add_argument('--decoder-layers', type=int, metavar='N',
+                            help='number of decoder layers')
+        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
+                            help='decoder output embedding dimension')
+        parser.add_argument('--decoder-attention', type=str, metavar='BOOL',
+                            help='decoder attention')
+        parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
+                            help='comma separated list of adaptive softmax cutoff points. '
+                                 'Must be used with adaptive_loss criterion')
+        parser.add_argument('--share-decoder-input-output-embed', default=False,
+                            action='store_true',
+                            help='share decoder input and output embeddings')
+        parser.add_argument('--share-all-embeddings', default=False, action='store_true',
+                            help='share encoder, decoder and output embeddings'
+                                 ' (requires shared dictionary and embed dim)')
+
+        # Granular dropout settings (if not specified these default to --dropout)
+        parser.add_argument('--encoder-dropout-in', type=float, metavar='D',
+                            help='dropout probability for encoder input embedding')
+        parser.add_argument('--encoder-dropout-out', type=float, metavar='D',
+                            help='dropout probability for encoder output')
+        parser.add_argument('--decoder-dropout-in', type=float, metavar='D',
+                            help='dropout probability for decoder input embedding')
+        parser.add_argument('--decoder-dropout-out', type=float, metavar='D',
+                            help='dropout probability for decoder output')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure that all args are properly defaulted (in case there are any new ones)
+        base_architecture(args)
+
+        if args.encoder_layers != args.decoder_layers:
+            raise ValueError("--encoder-layers must match --decoder-layers")
+
+        max_source_positions = getattr(
+            args, "max_source_positions", DEFAULT_MAX_SOURCE_POSITIONS
+        )
+        max_target_positions = getattr(
+            args, "max_target_positions", DEFAULT_MAX_TARGET_POSITIONS
+        )
+
+        def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+            embed_dict = utils.parse_embedding(embed_path)
+            utils.print_embed_overlap(embed_dict, dictionary)
+            return utils.load_embedding(embed_dict, dictionary, embed_tokens)
+
+        if args.encoder_embed_path:
+            pretrained_encoder_embed = load_pretrained_embedding_from_file(
+                args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim
+            )
+        else:
+            num_embeddings = len(task.source_dictionary)
+            pretrained_encoder_embed = Embedding(
+                num_embeddings, args.encoder_embed_dim, task.source_dictionary.pad()
+            )
+
+        if args.share_all_embeddings:
+            # double check all parameters combinations are valid
+            if task.source_dictionary != task.target_dictionary:
+                raise ValueError("--share-all-embeddings requires a joint dictionary")
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embed not compatible with --decoder-embed-path"
+                )
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to "
+                    "match --decoder-embed-dim"
+                )
+            pretrained_decoder_embed = pretrained_encoder_embed
+            args.share_decoder_input_output_embed = True
+        else:
+            # separate decoder input embeddings
+            pretrained_decoder_embed = None
+            if args.decoder_embed_path:
+                pretrained_decoder_embed = load_pretrained_embedding_from_file(
+                    args.decoder_embed_path,
+                    task.target_dictionary,
+                    args.decoder_embed_dim,
+                )
+        # one last double check of parameter combinations
+        if args.share_decoder_input_output_embed and (
+            args.decoder_embed_dim != args.decoder_out_embed_dim
+        ):
+            raise ValueError(
+                "--share-decoder-input-output-embeddings requires "
+                "--decoder-embed-dim to match --decoder-out-embed-dim"
+            )
+
+        if args.encoder_freeze_embed:
+            pretrained_encoder_embed.weight.requires_grad = False
+        if args.decoder_freeze_embed:
+            pretrained_decoder_embed.weight.requires_grad = False
+
+        encoder = LSTMEncoder(
+            dictionary=task.source_dictionary,
+            embed_dim=args.encoder_embed_dim,
+            hidden_size=args.encoder_hidden_size,
+            num_layers=args.encoder_layers,
+            dropout_in=args.encoder_dropout_in,
+            dropout_out=args.encoder_dropout_out,
+            bidirectional=args.encoder_bidirectional,
+            pretrained_embed=pretrained_encoder_embed,
+            max_source_positions=max_source_positions,
+        )
+        decoder = LSTMDecoder(
+            dictionary=task.target_dictionary,
+            embed_dim=args.decoder_embed_dim,
+            hidden_size=args.decoder_hidden_size,
+            out_embed_dim=args.decoder_out_embed_dim,
+            num_layers=args.decoder_layers,
+            dropout_in=args.decoder_dropout_in,
+            dropout_out=args.decoder_dropout_out,
+            attention=utils.eval_bool(args.decoder_attention),
+            encoder_output_units=encoder.output_units,
+            pretrained_embed=pretrained_decoder_embed,
+            share_input_output_embed=args.share_decoder_input_output_embed,
+            adaptive_softmax_cutoff=(
+                utils.eval_str_list(args.adaptive_softmax_cutoff, type=int)
+                if args.criterion == "adaptive_loss"
+                else None
+            ),
+            max_target_positions=max_target_positions,
+            residuals=False,
+        )
+        return cls(encoder, decoder)
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        prev_output_tokens,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+    ):
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths)
+        decoder_out = self.decoder(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            incremental_state=incremental_state,
+        )
+        return decoder_out
+
+
+class LSTMEncoder(FairseqEncoder):
+    """LSTM encoder."""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        hidden_size=512,
+        num_layers=1,
+        dropout_in=0.1,
+        dropout_out=0.1,
+        bidirectional=False,
+        left_pad=True,
+        pretrained_embed=None,
+        padding_idx=None,
+        max_source_positions=DEFAULT_MAX_SOURCE_POSITIONS,
+    ):
+        super().__init__(dictionary)
+        self.num_layers = num_layers
+        self.dropout_in_module = FairseqDropout(
+            dropout_in*1.0, module_name=self.__class__.__name__
+        )
+        self.dropout_out_module = FairseqDropout(
+            dropout_out*1.0, module_name=self.__class__.__name__
+        )
+        self.bidirectional = bidirectional
+        self.hidden_size = hidden_size
+        self.max_source_positions = max_source_positions
+
+        num_embeddings = len(dictionary)
+        self.padding_idx = padding_idx if padding_idx is not None else dictionary.pad()
+        if pretrained_embed is None:
+            self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx)
+        else:
+            self.embed_tokens = pretrained_embed
+
+        self.lstm = LSTM(
+            input_size=embed_dim,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            dropout=self.dropout_out_module.p if num_layers > 1 else 0.0,
+            bidirectional=bidirectional,
+        )
+        self.left_pad = left_pad
+
+        self.output_units = hidden_size
+        if bidirectional:
+            self.output_units *= 2
+
+    def forward(
+        self,
+        src_tokens: Tensor,
+        src_lengths: Tensor,
+        enforce_sorted: bool = True,
+    ):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of
+                shape `(batch, src_len)`
+            src_lengths (LongTensor): lengths of each source sentence of
+                shape `(batch)`
+            enforce_sorted (bool, optional): if True, `src_tokens` is
+                expected to contain sequences sorted by length in a
+                decreasing order. If False, this condition is not
+                required. Default: True.
+        """
+        if self.left_pad:
+            # nn.utils.rnn.pack_padded_sequence requires right-padding;
+            # convert left-padding to right-padding
+            src_tokens = utils.convert_padding_direction(
+                src_tokens,
+                torch.zeros_like(src_tokens).fill_(self.padding_idx),
+                left_to_right=True,
+            )
+
+        bsz, seqlen = src_tokens.size()
+
+        # embed tokens
+        x = self.embed_tokens(src_tokens)
+        x = self.dropout_in_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # pack embedded source tokens into a PackedSequence
+        packed_x = nn.utils.rnn.pack_padded_sequence(
+            x, src_lengths.cpu(), enforce_sorted=enforce_sorted
+        )
+
+        # apply LSTM
+        if self.bidirectional:
+            state_size = 2 * self.num_layers, bsz, self.hidden_size
+        else:
+            state_size = self.num_layers, bsz, self.hidden_size
+        h0 = x.new_zeros(*state_size)
+        c0 = x.new_zeros(*state_size)
+        packed_outs, (final_hiddens, final_cells) = self.lstm(packed_x, (h0, c0))
+
+        # unpack outputs and apply dropout
+        x, _ = nn.utils.rnn.pad_packed_sequence(
+            packed_outs, padding_value=self.padding_idx * 1.0
+        )
+        x = self.dropout_out_module(x)
+        assert list(x.size()) == [seqlen, bsz, self.output_units]
+
+        if self.bidirectional:
+            final_hiddens = self.combine_bidir(final_hiddens, bsz)
+            final_cells = self.combine_bidir(final_cells, bsz)
+
+        encoder_padding_mask = src_tokens.eq(self.padding_idx).t()
+
+        return tuple(
+            (
+                x,  # seq_len x batch x hidden
+                final_hiddens,  # num_layers x batch x num_directions*hidden
+                final_cells,  # num_layers x batch x num_directions*hidden
+                encoder_padding_mask,  # seq_len x batch
+            )
+        )
+
+    def combine_bidir(self, outs, bsz: int):
+        out = outs.view(self.num_layers, 2, bsz, -1).transpose(1, 2).contiguous()
+        return out.view(self.num_layers, bsz, -1)
+
+    def reorder_encoder_out(self, encoder_out: Tuple[Tensor, Tensor, Tensor, Tensor], new_order):
+        return tuple(
+            (
+                encoder_out[0].index_select(1, new_order),
+                encoder_out[1].index_select(1, new_order),
+                encoder_out[2].index_select(1, new_order),
+                encoder_out[3].index_select(1, new_order),
+            )
+        )
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return self.max_source_positions
+
+
+class AttentionLayer(nn.Module):
+    def __init__(self, input_embed_dim, source_embed_dim, output_embed_dim, bias=False):
+        super().__init__()
+
+        self.input_proj = Linear(input_embed_dim, source_embed_dim, bias=bias)
+        self.output_proj = Linear(
+            input_embed_dim + source_embed_dim, output_embed_dim, bias=bias
+        )
+
+    def forward(self, input, source_hids, encoder_padding_mask):
+        # input: bsz x input_embed_dim
+        # source_hids: srclen x bsz x source_embed_dim
+
+        # x: bsz x source_embed_dim
+        x = self.input_proj(input)
+
+        # compute attention
+        attn_scores = (source_hids * x.unsqueeze(0)).sum(dim=2)
+
+        # don't attend over padding
+        if encoder_padding_mask is not None:
+            attn_scores = (
+                attn_scores.float()
+                .masked_fill_(encoder_padding_mask, float("-inf"))
+                .type_as(attn_scores)
+            )  # FP16 support: cast to float and back
+
+        attn_scores = F.softmax(attn_scores, dim=0)  # srclen x bsz
+
+        # sum weighted sources
+        x = (attn_scores.unsqueeze(2) * source_hids).sum(dim=0)
+
+        x = torch.tanh(self.output_proj(torch.cat((x, input), dim=1)))
+        return x, attn_scores
+
+
+class LSTMDecoder(FairseqIncrementalDecoder):
+    """LSTM decoder."""
+
+    def __init__(
+        self,
+        dictionary,
+        embed_dim=512,
+        hidden_size=512,
+        out_embed_dim=512,
+        num_layers=1,
+        dropout_in=0.1,
+        dropout_out=0.1,
+        attention=True,
+        encoder_output_units=512,
+        pretrained_embed=None,
+        share_input_output_embed=False,
+        adaptive_softmax_cutoff=None,
+        max_target_positions=DEFAULT_MAX_TARGET_POSITIONS,
+        residuals=False,
+    ):
+        super().__init__(dictionary)
+        self.dropout_in_module = FairseqDropout(
+            dropout_in*1.0, module_name=self.__class__.__name__
+        )
+        self.dropout_out_module = FairseqDropout(
+            dropout_out*1.0, module_name=self.__class__.__name__
+        )
+        self.hidden_size = hidden_size
+        self.share_input_output_embed = share_input_output_embed
+        self.need_attn = True
+        self.max_target_positions = max_target_positions
+        self.residuals = residuals
+        self.num_layers = num_layers
+
+        self.adaptive_softmax = None
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+        if pretrained_embed is None:
+            self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+        else:
+            self.embed_tokens = pretrained_embed
+
+        self.encoder_output_units = encoder_output_units
+        if encoder_output_units != hidden_size and encoder_output_units != 0:
+            self.encoder_hidden_proj = Linear(encoder_output_units, hidden_size)
+            self.encoder_cell_proj = Linear(encoder_output_units, hidden_size)
+        else:
+            self.encoder_hidden_proj = self.encoder_cell_proj = None
+
+        # disable input feeding if there is no encoder
+        # input feeding is described in arxiv.org/abs/1508.04025
+        input_feed_size = 0 if encoder_output_units == 0 else hidden_size
+        self.layers = nn.ModuleList(
+            [
+                LSTMCell(
+                    input_size=input_feed_size + embed_dim
+                    if layer == 0
+                    else hidden_size,
+                    hidden_size=hidden_size,
+                )
+                for layer in range(num_layers)
+            ]
+        )
+
+        if attention:
+            # TODO make bias configurable
+            self.attention = AttentionLayer(
+                hidden_size, encoder_output_units, hidden_size, bias=False
+            )
+        else:
+            self.attention = None
+
+        if hidden_size != out_embed_dim:
+            self.additional_fc = Linear(hidden_size, out_embed_dim)
+
+        if adaptive_softmax_cutoff is not None:
+            # setting adaptive_softmax dropout to dropout_out for now but can be redefined
+            self.adaptive_softmax = AdaptiveSoftmax(
+                num_embeddings,
+                hidden_size,
+                adaptive_softmax_cutoff,
+                dropout=dropout_out,
+            )
+        elif not self.share_input_output_embed:
+            self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Tuple[Tensor, Tensor, Tensor, Tensor]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        src_lengths: Optional[Tensor] = None,
+    ):
+        x, attn_scores = self.extract_features(
+            prev_output_tokens, encoder_out, incremental_state
+        )
+        return self.output_layer(x), attn_scores
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Tuple[Tensor, Tensor, Tensor, Tensor]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+    ):
+        """
+        Similar to *forward* but only return features.
+        """
+        # get outputs from encoder
+        if encoder_out is not None:
+            encoder_outs = encoder_out[0]
+            encoder_hiddens = encoder_out[1]
+            encoder_cells = encoder_out[2]
+            encoder_padding_mask = encoder_out[3]
+        else:
+            encoder_outs = torch.empty(0)
+            encoder_hiddens = torch.empty(0)
+            encoder_cells = torch.empty(0)
+            encoder_padding_mask = torch.empty(0)
+        srclen = encoder_outs.size(0)
+
+        if incremental_state is not None and len(incremental_state) > 0:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+
+        bsz, seqlen = prev_output_tokens.size()
+
+        # embed tokens
+        x = self.embed_tokens(prev_output_tokens)
+        x = self.dropout_in_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # initialize previous states (or get from cache during incremental generation)
+        if incremental_state is not None and len(incremental_state) > 0:
+            prev_hiddens, prev_cells, input_feed = self.get_cached_state(
+                incremental_state
+            )
+        elif encoder_out is not None:
+            # setup recurrent cells
+            prev_hiddens = [encoder_hiddens[i] for i in range(self.num_layers)]
+            prev_cells = [encoder_cells[i] for i in range(self.num_layers)]
+            if self.encoder_hidden_proj is not None:
+                prev_hiddens = [self.encoder_hidden_proj(y) for y in prev_hiddens]
+                prev_cells = [self.encoder_cell_proj(y) for y in prev_cells]
+            input_feed = x.new_zeros(bsz, self.hidden_size)
+        else:
+            # setup zero cells, since there is no encoder
+            zero_state = x.new_zeros(bsz, self.hidden_size)
+            prev_hiddens = [zero_state for i in range(self.num_layers)]
+            prev_cells = [zero_state for i in range(self.num_layers)]
+            input_feed = None
+
+        assert (
+            srclen > 0 or self.attention is None
+        ), "attention is not supported if there are no encoder outputs"
+        attn_scores: Optional[Tensor] = (
+            x.new_zeros(srclen, seqlen, bsz) if self.attention is not None else None
+        )
+        outs = []
+        for j in range(seqlen):
+            # input feeding: concatenate context vector from previous time step
+            if input_feed is not None:
+                input = torch.cat((x[j, :, :], input_feed), dim=1)
+            else:
+                input = x[j]
+
+            for i, rnn in enumerate(self.layers):
+                # recurrent cell
+                hidden, cell = rnn(input, (prev_hiddens[i], prev_cells[i]))
+
+                # hidden state becomes the input to the next layer
+                input = self.dropout_out_module(hidden)
+                if self.residuals:
+                    input = input + prev_hiddens[i]
+
+                # save state for next time step
+                prev_hiddens[i] = hidden
+                prev_cells[i] = cell
+
+            # apply attention using the last layer's hidden state
+            if self.attention is not None:
+                assert attn_scores is not None
+                out, attn_scores[:, j, :] = self.attention(
+                    hidden, encoder_outs, encoder_padding_mask
+                )
+            else:
+                out = hidden
+            out = self.dropout_out_module(out)
+
+            # input feeding
+            if input_feed is not None:
+                input_feed = out
+
+            # save final output
+            outs.append(out)
+
+        # Stack all the necessary tensors together and store
+        prev_hiddens_tensor = torch.stack(prev_hiddens)
+        prev_cells_tensor = torch.stack(prev_cells)
+        cache_state = torch.jit.annotate(
+            Dict[str, Optional[Tensor]],
+            {
+                "prev_hiddens": prev_hiddens_tensor,
+                "prev_cells": prev_cells_tensor,
+                "input_feed": input_feed,
+            },
+        )
+        self.set_incremental_state(incremental_state, "cached_state", cache_state)
+
+        # collect outputs across time steps
+        x = torch.cat(outs, dim=0).view(seqlen, bsz, self.hidden_size)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(1, 0)
+
+        if hasattr(self, "additional_fc") and self.adaptive_softmax is None:
+            x = self.additional_fc(x)
+            x = self.dropout_out_module(x)
+        # srclen x tgtlen x bsz -> bsz x tgtlen x srclen
+        if not self.training and self.need_attn and self.attention is not None:
+            assert attn_scores is not None
+            attn_scores = attn_scores.transpose(0, 2)
+        else:
+            attn_scores = None
+        return x, attn_scores
+
+    def output_layer(self, x):
+        """Project features to the vocabulary size."""
+        if self.adaptive_softmax is None:
+            if self.share_input_output_embed:
+                x = F.linear(x, self.embed_tokens.weight)
+            else:
+                x = self.fc_out(x)
+        return x
+
+    def get_cached_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+    ) -> Tuple[List[Tensor], List[Tensor], Optional[Tensor]]:
+        cached_state = self.get_incremental_state(incremental_state, "cached_state")
+        assert cached_state is not None
+        prev_hiddens_ = cached_state["prev_hiddens"]
+        assert prev_hiddens_ is not None
+        prev_cells_ = cached_state["prev_cells"]
+        assert prev_cells_ is not None
+        prev_hiddens = [prev_hiddens_[i] for i in range(self.num_layers)]
+        prev_cells = [prev_cells_[j] for j in range(self.num_layers)]
+        input_feed = cached_state[
+            "input_feed"
+        ]  # can be None for decoder-only language models
+        return prev_hiddens, prev_cells, input_feed
+
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        new_order: Tensor,
+    ):
+        if incremental_state is None or len(incremental_state) == 0:
+            return
+        prev_hiddens, prev_cells, input_feed = self.get_cached_state(incremental_state)
+        prev_hiddens = [p.index_select(0, new_order) for p in prev_hiddens]
+        prev_cells = [p.index_select(0, new_order) for p in prev_cells]
+        if input_feed is not None:
+            input_feed = input_feed.index_select(0, new_order)
+        cached_state_new = torch.jit.annotate(
+            Dict[str, Optional[Tensor]],
+            {
+                "prev_hiddens": torch.stack(prev_hiddens),
+                "prev_cells": torch.stack(prev_cells),
+                "input_feed": input_feed,
+            },
+        )
+        self.set_incremental_state(incremental_state, "cached_state", cached_state_new),
+        return
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        return self.max_target_positions
+
+    def make_generation_fast_(self, need_attn=False, **kwargs):
+        self.need_attn = need_attn
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.uniform_(m.weight, -0.1, 0.1)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def LSTM(input_size, hidden_size, **kwargs):
+    m = nn.LSTM(input_size, hidden_size, **kwargs)
+    for name, param in m.named_parameters():
+        if "weight" in name or "bias" in name:
+            param.data.uniform_(-0.1, 0.1)
+    return m
+
+
+def LSTMCell(input_size, hidden_size, **kwargs):
+    m = nn.LSTMCell(input_size, hidden_size, **kwargs)
+    for name, param in m.named_parameters():
+        if "weight" in name or "bias" in name:
+            param.data.uniform_(-0.1, 0.1)
+    return m
+
+
+def Linear(in_features, out_features, bias=True, dropout=0.0):
+    """Linear layer (input: N x T x C)"""
+    m = nn.Linear(in_features, out_features, bias=bias)
+    m.weight.data.uniform_(-0.1, 0.1)
+    if bias:
+        m.bias.data.uniform_(-0.1, 0.1)
+    return m
+
+
+@register_model_architecture("lstm", "lstm")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_freeze_embed = getattr(args, "encoder_freeze_embed", False)
+    args.encoder_hidden_size = getattr(
+        args, "encoder_hidden_size", args.encoder_embed_dim
+    )
+    args.encoder_layers = getattr(args, "encoder_layers", 1)
+    args.encoder_bidirectional = getattr(args, "encoder_bidirectional", False)
+    args.encoder_dropout_in = getattr(args, "encoder_dropout_in", args.dropout)
+    args.encoder_dropout_out = getattr(args, "encoder_dropout_out", args.dropout)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_freeze_embed = getattr(args, "decoder_freeze_embed", False)
+    args.decoder_hidden_size = getattr(
+        args, "decoder_hidden_size", args.decoder_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 1)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512)
+    args.decoder_attention = getattr(args, "decoder_attention", "1")
+    args.decoder_dropout_in = getattr(args, "decoder_dropout_in", args.dropout)
+    args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.adaptive_softmax_cutoff = getattr(
+        args, "adaptive_softmax_cutoff", "10000,50000,200000"
+    )
+
+
+@register_model_architecture("lstm", "lstm_wiseman_iwslt_de_en")
+def lstm_wiseman_iwslt_de_en(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_dropout_in = getattr(args, "encoder_dropout_in", 0)
+    args.encoder_dropout_out = getattr(args, "encoder_dropout_out", 0)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 256)
+    args.decoder_dropout_in = getattr(args, "decoder_dropout_in", 0)
+    args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout)
+    base_architecture(args)
+
+
+@register_model_architecture("lstm", "lstm_luong_wmt_en_de")
+def lstm_luong_wmt_en_de(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1000)
+    args.encoder_layers = getattr(args, "encoder_layers", 4)
+    args.encoder_dropout_out = getattr(args, "encoder_dropout_out", 0)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1000)
+    args.decoder_layers = getattr(args, "decoder_layers", 4)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 1000)
+    args.decoder_dropout_out = getattr(args, "decoder_dropout_out", 0)
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/lstm_lm.py b/fairseq/fairseq/models/lstm_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..454f0ac36fab78bf02a8e2f07ed9607d1da87e34
--- /dev/null
+++ b/fairseq/fairseq/models/lstm_lm.py
@@ -0,0 +1,142 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq import utils
+from fairseq.models import (
+    FairseqLanguageModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.lstm import Embedding, LSTMDecoder
+
+
+DEFAULT_MAX_TARGET_POSITIONS = 1e5
+
+
+@register_model("lstm_lm")
+class LSTMLanguageModel(FairseqLanguageModel):
+    def __init__(self, decoder):
+        super().__init__(decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability')
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension')
+        parser.add_argument('--decoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained decoder embedding')
+        parser.add_argument('--decoder-hidden-size', type=int, metavar='N',
+                            help='decoder hidden size')
+        parser.add_argument('--decoder-layers', type=int, metavar='N',
+                            help='number of decoder layers')
+        parser.add_argument('--decoder-out-embed-dim', type=int, metavar='N',
+                            help='decoder output embedding dimension')
+        parser.add_argument('--decoder-attention', type=str, metavar='BOOL',
+                            help='decoder attention')
+        parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
+                            help='comma separated list of adaptive softmax cutoff points. '
+                                 'Must be used with adaptive_loss criterion')
+        parser.add_argument('--residuals', default=False,
+                            action='store_true',
+                            help='applying residuals between LSTM layers')
+
+        # Granular dropout settings (if not specified these default to --dropout)
+        parser.add_argument('--decoder-dropout-in', type=float, metavar='D',
+                            help='dropout probability for decoder input embedding')
+        parser.add_argument('--decoder-dropout-out', type=float, metavar='D',
+                            help='dropout probability for decoder output')
+        parser.add_argument('--share-decoder-input-output-embed', default=False,
+                            action='store_true',
+                            help='share decoder input and output embeddings')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if getattr(args, "max_target_positions", None) is not None:
+            max_target_positions = args.max_target_positions
+        else:
+            max_target_positions = getattr(
+                args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS
+            )
+
+        def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
+            embed_dict = utils.parse_embedding(embed_path)
+            utils.print_embed_overlap(embed_dict, dictionary)
+            return utils.load_embedding(embed_dict, dictionary, embed_tokens)
+
+        pretrained_decoder_embed = None
+        if args.decoder_embed_path:
+            pretrained_decoder_embed = load_pretrained_embedding_from_file(
+                args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim
+            )
+
+        if args.share_decoder_input_output_embed:
+            # double check all parameters combinations are valid
+            if task.source_dictionary != task.target_dictionary:
+                raise ValueError(
+                    "--share-decoder-input-output-embeddings requires a joint dictionary"
+                )
+
+            if args.decoder_embed_dim != args.decoder_out_embed_dim:
+                raise ValueError(
+                    "--share-decoder-input-output-embeddings requires "
+                    "--decoder-embed-dim to match --decoder-out-embed-dim"
+                )
+
+        decoder = LSTMDecoder(
+            dictionary=task.dictionary,
+            embed_dim=args.decoder_embed_dim,
+            hidden_size=args.decoder_hidden_size,
+            out_embed_dim=args.decoder_out_embed_dim,
+            num_layers=args.decoder_layers,
+            dropout_in=args.decoder_dropout_in,
+            dropout_out=args.decoder_dropout_out,
+            attention=False,  # decoder-only language model doesn't support attention
+            encoder_output_units=0,
+            pretrained_embed=pretrained_decoder_embed,
+            share_input_output_embed=args.share_decoder_input_output_embed,
+            adaptive_softmax_cutoff=(
+                utils.eval_str_list(args.adaptive_softmax_cutoff, type=int)
+                if args.criterion == "adaptive_loss"
+                else None
+            ),
+            max_target_positions=max_target_positions,
+            residuals=args.residuals,
+        )
+
+        return cls(decoder)
+
+
+@register_model_architecture("lstm_lm", "lstm_lm")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_hidden_size = getattr(
+        args, "decoder_hidden_size", args.decoder_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 1)
+    args.decoder_out_embed_dim = getattr(args, "decoder_out_embed_dim", 512)
+    args.decoder_attention = getattr(args, "decoder_attention", "0")
+    args.decoder_dropout_in = getattr(args, "decoder_dropout_in", args.dropout)
+    args.decoder_dropout_out = getattr(args, "decoder_dropout_out", args.dropout)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.adaptive_softmax_cutoff = getattr(
+        args, "adaptive_softmax_cutoff", "10000,50000,200000"
+    )
+    args.residuals = getattr(args, "residuals", False)
diff --git a/fairseq/fairseq/models/masked_lm.py b/fairseq/fairseq/models/masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cb49dd77cc3514e6c1383c4286e90979f6edb34
--- /dev/null
+++ b/fairseq/fairseq/models/masked_lm.py
@@ -0,0 +1,404 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import (
+    LayerNorm,
+    SinusoidalPositionalEmbedding,
+    TransformerSentenceEncoder,
+)
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+from fairseq.utils import safe_hasattr
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("masked_lm")
+class MaskedLMModel(FairseqEncoderModel):
+    """
+    Class for training a Masked Language Model. It also supports an
+    additional sentence level prediction if the sent-loss argument is set.
+    """
+
+    def __init__(self, args, encoder):
+        super().__init__(encoder)
+        self.args = args
+
+        # if specified then apply bert initialization on the model. We need
+        # to explictly call this to make sure that the output embeddings
+        # and projection layers are also correctly initialized
+        if getattr(args, "apply_bert_init", False):
+            self.apply(init_bert_params)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # Arguments related to dropout
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for" " attention weights",
+        )
+        parser.add_argument(
+            "--act-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after" " activation in FFN",
+        )
+
+        # Arguments related to hidden states and self-attention
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="N", help="num encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num encoder attention heads",
+        )
+
+        # Arguments related to input and output embeddings
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--share-encoder-input-output-embed",
+            action="store_true",
+            help="share encoder input" " and output embeddings",
+        )
+        parser.add_argument(
+            "--encoder-learned-pos",
+            action="store_true",
+            help="use learned positional embeddings in the encoder",
+        )
+        parser.add_argument(
+            "--no-token-positional-embeddings",
+            action="store_true",
+            help="if set, disables positional embeddings" " (outside self attention)",
+        )
+        parser.add_argument(
+            "--num-segment", type=int, metavar="N", help="num segment in the input"
+        )
+        parser.add_argument(
+            "--max-positions", type=int, help="number of positional embeddings to learn"
+        )
+
+        # Arguments related to sentence level prediction
+        parser.add_argument(
+            "--sentence-class-num",
+            type=int,
+            metavar="N",
+            help="number of classes for sentence task",
+        )
+        parser.add_argument(
+            "--sent-loss",
+            action="store_true",
+            help="if set," " calculate sentence level predictions",
+        )
+
+        # Arguments related to parameter initialization
+        parser.add_argument(
+            "--apply-bert-init",
+            action="store_true",
+            help="use custom param initialization for BERT",
+        )
+
+        # misc params
+        parser.add_argument(
+            "--activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use",
+        )
+        parser.add_argument(
+            "--pooler-activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="Which activation function to use for pooler layer.",
+        )
+        parser.add_argument(
+            "--encoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each encoder block",
+        )
+
+    def forward(self, src_tokens, segment_labels=None, **kwargs):
+        return self.encoder(src_tokens, segment_labels=segment_labels, **kwargs)
+
+    def max_positions(self):
+        return self.encoder.max_positions
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if not safe_hasattr(args, "max_positions"):
+            args.max_positions = args.tokens_per_sample
+
+        logger.info(args)
+
+        encoder = MaskedLMEncoder(args, task.dictionary)
+        return cls(args, encoder)
+
+
+class MaskedLMEncoder(FairseqEncoder):
+    """
+    Encoder for Masked Language Modelling.
+    """
+
+    def __init__(self, args, dictionary):
+        super().__init__(dictionary)
+
+        self.padding_idx = dictionary.pad()
+        self.vocab_size = dictionary.__len__()
+        self.max_positions = args.max_positions
+
+        self.sentence_encoder = TransformerSentenceEncoder(
+            padding_idx=self.padding_idx,
+            vocab_size=self.vocab_size,
+            num_encoder_layers=args.encoder_layers,
+            embedding_dim=args.encoder_embed_dim,
+            ffn_embedding_dim=args.encoder_ffn_embed_dim,
+            num_attention_heads=args.encoder_attention_heads,
+            dropout=args.dropout,
+            attention_dropout=args.attention_dropout,
+            activation_dropout=args.act_dropout,
+            max_seq_len=self.max_positions,
+            num_segments=args.num_segment,
+            use_position_embeddings=not args.no_token_positional_embeddings,
+            encoder_normalize_before=args.encoder_normalize_before,
+            apply_bert_init=args.apply_bert_init,
+            activation_fn=args.activation_fn,
+            learned_pos_embedding=args.encoder_learned_pos,
+        )
+
+        self.share_input_output_embed = args.share_encoder_input_output_embed
+        self.embed_out = None
+        self.sentence_projection_layer = None
+        self.sentence_out_dim = args.sentence_class_num
+        self.lm_output_learned_bias = None
+
+        # Remove head is set to true during fine-tuning
+        self.load_softmax = not getattr(args, "remove_head", False)
+
+        self.masked_lm_pooler = nn.Linear(
+            args.encoder_embed_dim, args.encoder_embed_dim
+        )
+        self.pooler_activation = utils.get_activation_fn(args.pooler_activation_fn)
+
+        self.lm_head_transform_weight = nn.Linear(
+            args.encoder_embed_dim, args.encoder_embed_dim
+        )
+        self.activation_fn = utils.get_activation_fn(args.activation_fn)
+        self.layer_norm = LayerNorm(args.encoder_embed_dim)
+
+        self.lm_output_learned_bias = None
+        if self.load_softmax:
+            self.lm_output_learned_bias = nn.Parameter(torch.zeros(self.vocab_size))
+
+            if not self.share_input_output_embed:
+                self.embed_out = nn.Linear(
+                    args.encoder_embed_dim, self.vocab_size, bias=False
+                )
+
+            if args.sent_loss:
+                self.sentence_projection_layer = nn.Linear(
+                    args.encoder_embed_dim, self.sentence_out_dim, bias=False
+                )
+
+    def forward(self, src_tokens, segment_labels=None, masked_tokens=None, **unused):
+        """
+        Forward pass for Masked LM encoder. This first computes the token
+        embedding using the token embedding matrix, position embeddings (if
+        specified) and segment embeddings (if specified).
+
+        Here we assume that the sentence representation corresponds to the
+        output of the classification_token (see bert_task or cross_lingual_lm
+        task for more details).
+        Args:
+            - src_tokens: B x T matrix representing sentences
+            - segment_labels: B x T matrix representing segment label for tokens
+        Returns:
+            - a tuple of the following:
+                - logits for predictions in format B x T x C to be used in
+                  softmax afterwards
+                - a dictionary of additional data, where 'pooled_output' contains
+                  the representation for classification_token and 'inner_states'
+                  is a list of internal model states used to compute the
+                  predictions (similar in ELMO). 'sentence_logits'
+                  is the prediction logit for NSP task and is only computed if
+                  this is specified in the input arguments.
+        """
+
+        inner_states, sentence_rep = self.sentence_encoder(
+            src_tokens,
+            segment_labels=segment_labels,
+        )
+
+        x = inner_states[-1].transpose(0, 1)
+        # project masked tokens only
+        if masked_tokens is not None:
+            x = x[masked_tokens, :]
+        x = self.layer_norm(self.activation_fn(self.lm_head_transform_weight(x)))
+
+        pooled_output = self.pooler_activation(self.masked_lm_pooler(sentence_rep))
+
+        # project back to size of vocabulary
+        if self.share_input_output_embed and hasattr(
+            self.sentence_encoder.embed_tokens, "weight"
+        ):
+            x = F.linear(x, self.sentence_encoder.embed_tokens.weight)
+        elif self.embed_out is not None:
+            x = self.embed_out(x)
+        if self.lm_output_learned_bias is not None:
+            x = x + self.lm_output_learned_bias
+        sentence_logits = None
+        if self.sentence_projection_layer:
+            sentence_logits = self.sentence_projection_layer(pooled_output)
+
+        return x, {
+            "inner_states": inner_states,
+            "pooled_output": pooled_output,
+            "sentence_logits": sentence_logits,
+        }
+
+    def max_positions(self):
+        """Maximum output length supported by the encoder."""
+        return self.max_positions
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        if isinstance(
+            self.sentence_encoder.embed_positions, SinusoidalPositionalEmbedding
+        ):
+            state_dict[
+                name + ".sentence_encoder.embed_positions._float_tensor"
+            ] = torch.FloatTensor(1)
+        if not self.load_softmax:
+            for k in list(state_dict.keys()):
+                if (
+                    "embed_out.weight" in k
+                    or "sentence_projection_layer.weight" in k
+                    or "lm_output_learned_bias" in k
+                ):
+                    del state_dict[k]
+        return state_dict
+
+
+@register_model_architecture("masked_lm", "masked_lm")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.act_dropout = getattr(args, "act_dropout", 0.0)
+
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.share_encoder_input_output_embed = getattr(
+        args, "share_encoder_input_output_embed", False
+    )
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.num_segment = getattr(args, "num_segment", 2)
+
+    args.sentence_class_num = getattr(args, "sentence_class_num", 2)
+    args.sent_loss = getattr(args, "sent_loss", False)
+
+    args.apply_bert_init = getattr(args, "apply_bert_init", False)
+
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh")
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+
+
+@register_model_architecture("masked_lm", "bert_base")
+def bert_base_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.share_encoder_input_output_embed = getattr(
+        args, "share_encoder_input_output_embed", True
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True)
+    args.num_segment = getattr(args, "num_segment", 2)
+
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 3072)
+
+    args.sentence_class_num = getattr(args, "sentence_class_num", 2)
+    args.sent_loss = getattr(args, "sent_loss", True)
+
+    args.apply_bert_init = getattr(args, "apply_bert_init", True)
+
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh")
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    base_architecture(args)
+
+
+@register_model_architecture("masked_lm", "bert_large")
+def bert_large_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_layers = getattr(args, "encoder_layers", 24)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    bert_base_architecture(args)
+
+
+@register_model_architecture("masked_lm", "xlm_base")
+def xlm_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.share_encoder_input_output_embed = getattr(
+        args, "share_encoder_input_output_embed", True
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True)
+    args.num_segment = getattr(args, "num_segment", 1)
+
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+
+    args.sent_loss = getattr(args, "sent_loss", False)
+
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh")
+    args.apply_bert_init = getattr(args, "apply_bert_init", True)
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/model_utils.py b/fairseq/fairseq/models/model_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..732d66b1d5f695151c26d29eb7f6b53179c269f1
--- /dev/null
+++ b/fairseq/fairseq/models/model_utils.py
@@ -0,0 +1,92 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List, Optional
+
+import torch
+from torch import Tensor
+
+
+@torch.jit.script
+def script_skip_tensor_list(x: List[Tensor], mask):
+    res = [xi[mask] if xi.size(0) == mask.size(0) else xi[:, mask] for xi in x]
+    outputs = []
+    for i, t in enumerate(res):
+        if t.numel() != 0:
+            outputs.append(t)
+        else:
+            outputs.append(x[i])
+    return outputs
+
+
+@torch.jit.script
+def script_skip_tensor(x: Tensor, mask):
+    # None case
+    if x.size(0) == 0:
+        return x
+    res = x[mask] if x.size(0) == mask.size(0) else x[:, mask]
+    if res.numel() == 0:
+        return x
+    else:
+        return res
+
+
+@torch.jit.script
+def expand_2d_or_3d_tensor(x, trg_dim: int, padding_idx: int):
+    """
+    Expand 2D/3D tensor on dim=1
+    """
+    if x is None:
+        return None
+
+    assert x.dim() == 2 or x.dim() == 3
+    assert trg_dim >= x.size(1), (trg_dim, x.size())
+    if trg_dim == x.size(1):
+        return x
+
+    dims = [x.size(0), trg_dim - x.size(1)]
+    if x.dim() == 3:
+        dims.append(x.size(2))
+    x = torch.cat([x, torch.zeros(dims).to(x).fill_(padding_idx)], 1)
+
+    return x
+
+
+@torch.jit.script
+def coalesce(x: Optional[Tensor], y: Tensor) -> Tensor:
+    return x if x is not None else y
+
+
+@torch.jit.script
+def fill_tensors(
+    x: Optional[Tensor], mask, y: Optional[Tensor], padding_idx: int
+) -> Optional[Tensor]:
+    """
+    Filling tensor x with y at masked positions (dim=0).
+    """
+    if x is None or x.size()[0] == 0 or y is None:
+        return x
+    assert x.dim() == y.dim() and mask.size(0) == x.size(0)
+    assert x.dim() == 2 or (x.dim() == 3 and x.size(2) == y.size(2))
+
+    n_selected = mask.sum()
+    if n_selected == 0:
+        return x
+    assert n_selected == y.size(0)
+    if n_selected == x.size(0):
+        return y
+
+    if x.size(1) < y.size(1):
+        x = expand_2d_or_3d_tensor(x, y.size(1), padding_idx)
+        x[mask] = y
+    elif x.size(1) > y.size(1):
+        x[mask] = torch.tensor(padding_idx).type_as(x)
+        if x.dim() == 2:
+            x[mask, : y.size(1)] = y
+        else:
+            x[mask, : y.size(1), :] = y
+    else:
+        x[mask] = y
+    return x
diff --git a/fairseq/fairseq/models/multilingual_transformer.py b/fairseq/fairseq/models/multilingual_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e722b647edd92c95a3e93489031ae331f90e0463
--- /dev/null
+++ b/fairseq/fairseq/models/multilingual_transformer.py
@@ -0,0 +1,229 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import OrderedDict
+
+from fairseq import utils
+from fairseq.models import (
+    FairseqMultiModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import (
+    Embedding,
+    TransformerDecoder,
+    TransformerEncoder,
+    TransformerModel,
+    base_architecture,
+)
+from fairseq.utils import safe_hasattr
+
+
+@register_model("multilingual_transformer")
+class MultilingualTransformerModel(FairseqMultiModel):
+    """Train Transformer models for multiple language pairs simultaneously.
+
+    Requires `--task multilingual_translation`.
+
+    We inherit all arguments from TransformerModel and assume that all language
+    pairs use a single Transformer architecture. In addition, we provide several
+    options that are specific to the multilingual setting.
+
+    Args:
+        --share-encoder-embeddings: share encoder embeddings across all source languages
+        --share-decoder-embeddings: share decoder embeddings across all target languages
+        --share-encoders: share all encoder params (incl. embeddings) across all source languages
+        --share-decoders: share all decoder params (incl. embeddings) across all target languages
+    """
+
+    def __init__(self, encoders, decoders):
+        super().__init__(encoders, decoders)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        TransformerModel.add_args(parser)
+        parser.add_argument(
+            "--share-encoder-embeddings",
+            action="store_true",
+            help="share encoder embeddings across languages",
+        )
+        parser.add_argument(
+            "--share-decoder-embeddings",
+            action="store_true",
+            help="share decoder embeddings across languages",
+        )
+        parser.add_argument(
+            "--share-encoders",
+            action="store_true",
+            help="share encoders across languages",
+        )
+        parser.add_argument(
+            "--share-decoders",
+            action="store_true",
+            help="share decoders across languages",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        from fairseq.tasks.multilingual_translation import MultilingualTranslationTask
+
+        assert isinstance(task, MultilingualTranslationTask)
+
+        # make sure all arguments are present in older models
+        base_multilingual_architecture(args)
+
+        if not safe_hasattr(args, "max_source_positions"):
+            args.max_source_positions = 1024
+        if not safe_hasattr(args, "max_target_positions"):
+            args.max_target_positions = 1024
+
+        src_langs = [lang_pair.split("-")[0] for lang_pair in task.model_lang_pairs]
+        tgt_langs = [lang_pair.split("-")[1] for lang_pair in task.model_lang_pairs]
+
+        if args.share_encoders:
+            args.share_encoder_embeddings = True
+        if args.share_decoders:
+            args.share_decoder_embeddings = True
+
+        def build_embedding(dictionary, embed_dim, path=None):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            emb = Embedding(num_embeddings, embed_dim, padding_idx)
+            # if provided, load from preloaded dictionaries
+            if path:
+                embed_dict = utils.parse_embedding(path)
+                utils.load_embedding(embed_dict, dictionary, emb)
+            return emb
+
+        # build shared embeddings (if applicable)
+        shared_encoder_embed_tokens, shared_decoder_embed_tokens = None, None
+        if args.share_all_embeddings:
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            shared_encoder_embed_tokens = FairseqMultiModel.build_shared_embeddings(
+                dicts=task.dicts,
+                langs=task.langs,
+                embed_dim=args.encoder_embed_dim,
+                build_embedding=build_embedding,
+                pretrained_embed_path=args.encoder_embed_path,
+            )
+            shared_decoder_embed_tokens = shared_encoder_embed_tokens
+            args.share_decoder_input_output_embed = True
+        else:
+            if args.share_encoder_embeddings:
+                shared_encoder_embed_tokens = FairseqMultiModel.build_shared_embeddings(
+                    dicts=task.dicts,
+                    langs=src_langs,
+                    embed_dim=args.encoder_embed_dim,
+                    build_embedding=build_embedding,
+                    pretrained_embed_path=args.encoder_embed_path,
+                )
+            if args.share_decoder_embeddings:
+                shared_decoder_embed_tokens = FairseqMultiModel.build_shared_embeddings(
+                    dicts=task.dicts,
+                    langs=tgt_langs,
+                    embed_dim=args.decoder_embed_dim,
+                    build_embedding=build_embedding,
+                    pretrained_embed_path=args.decoder_embed_path,
+                )
+
+        # encoders/decoders for each language
+        lang_encoders, lang_decoders = {}, {}
+
+        def get_encoder(lang):
+            if lang not in lang_encoders:
+                if shared_encoder_embed_tokens is not None:
+                    encoder_embed_tokens = shared_encoder_embed_tokens
+                else:
+                    encoder_embed_tokens = build_embedding(
+                        task.dicts[lang],
+                        args.encoder_embed_dim,
+                        args.encoder_embed_path,
+                    )
+                lang_encoders[lang] = cls._get_module_class(
+                    True, args, task.dicts[lang], encoder_embed_tokens, src_langs
+                )
+            return lang_encoders[lang]
+
+        def get_decoder(lang):
+            if lang not in lang_decoders:
+                if shared_decoder_embed_tokens is not None:
+                    decoder_embed_tokens = shared_decoder_embed_tokens
+                else:
+                    decoder_embed_tokens = build_embedding(
+                        task.dicts[lang],
+                        args.decoder_embed_dim,
+                        args.decoder_embed_path,
+                    )
+                lang_decoders[lang] = cls._get_module_class(
+                    False, args, task.dicts[lang], decoder_embed_tokens, tgt_langs
+                )
+            return lang_decoders[lang]
+
+        # shared encoders/decoders (if applicable)
+        shared_encoder, shared_decoder = None, None
+        if args.share_encoders:
+            shared_encoder = get_encoder(src_langs[0])
+        if args.share_decoders:
+            shared_decoder = get_decoder(tgt_langs[0])
+
+        encoders, decoders = OrderedDict(), OrderedDict()
+        for lang_pair, src, tgt in zip(task.model_lang_pairs, src_langs, tgt_langs):
+            encoders[lang_pair] = (
+                shared_encoder if shared_encoder is not None else get_encoder(src)
+            )
+            decoders[lang_pair] = (
+                shared_decoder if shared_decoder is not None else get_decoder(tgt)
+            )
+
+        return MultilingualTransformerModel(encoders, decoders)
+
+    @classmethod
+    def _get_module_class(cls, is_encoder, args, lang_dict, embed_tokens, langs):
+        module_class = TransformerEncoder if is_encoder else TransformerDecoder
+        return module_class(args, lang_dict, embed_tokens)
+
+    def load_state_dict(self, state_dict, strict=True, model_cfg=None):
+        state_dict_subset = state_dict.copy()
+        for k, _ in state_dict.items():
+            assert k.startswith("models.")
+            lang_pair = k.split(".")[1]
+            if lang_pair not in self.models:
+                del state_dict_subset[k]
+        super().load_state_dict(state_dict_subset, strict=strict, model_cfg=model_cfg)
+
+
+@register_model_architecture("multilingual_transformer", "multilingual_transformer")
+def base_multilingual_architecture(args):
+    base_architecture(args)
+    args.share_encoder_embeddings = getattr(args, "share_encoder_embeddings", False)
+    args.share_decoder_embeddings = getattr(args, "share_decoder_embeddings", False)
+    args.share_encoders = getattr(args, "share_encoders", False)
+    args.share_decoders = getattr(args, "share_decoders", False)
+
+
+@register_model_architecture(
+    "multilingual_transformer", "multilingual_transformer_iwslt_de_en"
+)
+def multilingual_transformer_iwslt_de_en(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    base_multilingual_architecture(args)
diff --git a/fairseq/fairseq/models/nat/__init__.py b/fairseq/fairseq/models/nat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..05fe822487c3bcde8346648d5826f1669c6bc1ca
--- /dev/null
+++ b/fairseq/fairseq/models/nat/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+from .fairseq_nat_model import *
+from .nonautoregressive_transformer import *
+from .nat_crf_transformer import *
+from .iterative_nonautoregressive_transformer import *
+from .cmlm_transformer import *
+from .levenshtein_transformer import *
+from .insertion_transformer import *
diff --git a/fairseq/fairseq/models/nat/cmlm_transformer.py b/fairseq/fairseq/models/nat/cmlm_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c876e9453c101c00bd8e93e6e6f1fb48dc26f993
--- /dev/null
+++ b/fairseq/fairseq/models/nat/cmlm_transformer.py
@@ -0,0 +1,162 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This file implements:
+Ghazvininejad, Marjan, et al.
+"Constant-time machine translation with conditional masked language models."
+arXiv preprint arXiv:1904.09324 (2019).
+"""
+
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.nat import NATransformerModel
+from fairseq.utils import new_arange
+
+
+def _skeptical_unmasking(output_scores, output_masks, p):
+    sorted_index = output_scores.sort(-1)[1]
+    boundary_len = (
+        (output_masks.sum(1, keepdim=True).type_as(output_scores) - 2) * p
+    ).long()
+    skeptical_mask = new_arange(output_masks) < boundary_len
+    return skeptical_mask.scatter(1, sorted_index, skeptical_mask)
+
+
+@register_model("cmlm_transformer")
+class CMLMNATransformerModel(NATransformerModel):
+    @staticmethod
+    def add_args(parser):
+        NATransformerModel.add_args(parser)
+
+    def forward(
+        self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs
+    ):
+        assert not self.decoder.src_embedding_copy, "do not support embedding copy."
+
+        # encoding
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+        # length prediction
+        length_out = self.decoder.forward_length(
+            normalize=False, encoder_out=encoder_out
+        )
+        length_tgt = self.decoder.forward_length_prediction(
+            length_out, encoder_out, tgt_tokens
+        )
+
+        # decoding
+        word_ins_out = self.decoder(
+            normalize=False,
+            prev_output_tokens=prev_output_tokens,
+            encoder_out=encoder_out,
+        )
+        word_ins_mask = prev_output_tokens.eq(self.unk)
+
+        return {
+            "word_ins": {
+                "out": word_ins_out,
+                "tgt": tgt_tokens,
+                "mask": word_ins_mask,
+                "ls": self.args.label_smoothing,
+                "nll_loss": True,
+            },
+            "length": {
+                "out": length_out,
+                "tgt": length_tgt,
+                "factor": self.decoder.length_loss_factor,
+            },
+        }
+
+    def forward_decoder(self, decoder_out, encoder_out, decoding_format=None, **kwargs):
+
+        step = decoder_out.step
+        max_step = decoder_out.max_step
+
+        output_tokens = decoder_out.output_tokens
+        output_scores = decoder_out.output_scores
+        history = decoder_out.history
+
+        # execute the decoder
+        output_masks = output_tokens.eq(self.unk)
+        _scores, _tokens = self.decoder(
+            normalize=True,
+            prev_output_tokens=output_tokens,
+            encoder_out=encoder_out,
+        ).max(-1)
+        output_tokens.masked_scatter_(output_masks, _tokens[output_masks])
+        output_scores.masked_scatter_(output_masks, _scores[output_masks])
+
+        if history is not None:
+            history.append(output_tokens.clone())
+
+        # skeptical decoding (depend on the maximum decoding steps.)
+        if (step + 1) < max_step:
+            skeptical_mask = _skeptical_unmasking(
+                output_scores, output_tokens.ne(self.pad), 1 - (step + 1) / max_step
+            )
+
+            output_tokens.masked_fill_(skeptical_mask, self.unk)
+            output_scores.masked_fill_(skeptical_mask, 0.0)
+
+            if history is not None:
+                history.append(output_tokens.clone())
+
+        return decoder_out._replace(
+            output_tokens=output_tokens,
+            output_scores=output_scores,
+            attn=None,
+            history=history,
+        )
+
+
+@register_model_architecture("cmlm_transformer", "cmlm_transformer")
+def cmlm_base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", True)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.apply_bert_init = getattr(args, "apply_bert_init", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    # --- special arguments ---
+    args.sg_length_pred = getattr(args, "sg_length_pred", False)
+    args.pred_length_offset = getattr(args, "pred_length_offset", False)
+    args.length_loss_factor = getattr(args, "length_loss_factor", 0.1)
+    args.ngram_predictor = getattr(args, "ngram_predictor", 1)
+    args.src_embedding_copy = getattr(args, "src_embedding_copy", False)
+
+
+@register_model_architecture("cmlm_transformer", "cmlm_transformer_wmt_en_de")
+def cmlm_wmt_en_de(args):
+    cmlm_base_architecture(args)
diff --git a/fairseq/fairseq/models/nat/fairseq_nat_model.py b/fairseq/fairseq/models/nat/fairseq_nat_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..b09394112f57d9e82f2a4cbc371af888281b9e8a
--- /dev/null
+++ b/fairseq/fairseq/models/nat/fairseq_nat_model.py
@@ -0,0 +1,170 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+from fairseq.models.transformer import (
+    TransformerDecoder,
+    TransformerEncoder,
+    TransformerModel,
+)
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+
+
+def ensemble_encoder(func):
+    def wrapper(self, *args, **kwargs):
+        if self.ensemble_models is None or len(self.ensemble_models) == 1:
+            return func(self, *args, **kwargs)
+        encoder_outs = [func(model, *args, **kwargs, return_all_hiddens=True) for model in self.ensemble_models]
+        _encoder_out = encoder_outs[0].copy()
+
+        def stack(key):
+            outs = [e[key][0] for e in encoder_outs]
+            return [torch.stack(outs, -1) if outs[0] is not None else None]
+
+        _encoder_out["encoder_out"] = stack("encoder_out")
+        _encoder_out["encoder_embedding"] = stack("encoder_embedding")
+
+        num_layers = len(_encoder_out["encoder_states"])
+        if num_layers > 0:
+            _encoder_out["encoder_states"] = [
+                torch.stack([e["encoder_states"][i] for e in encoder_outs], -1)
+                for i in range(num_layers)
+            ]
+        return _encoder_out
+
+    return wrapper
+
+
+def ensemble_decoder(func):
+    def wrapper(self, normalize=False, encoder_out=None, *args, **kwargs):
+        if self.ensemble_models is None or len(self.ensemble_models) == 1:
+            return func(
+                self, normalize=normalize, encoder_out=encoder_out, *args, **kwargs
+            )
+
+        def _replace(encoder_out, new_val):
+            new_encoder_out = encoder_out.copy()
+            new_encoder_out["encoder_out"] = [new_val]
+            return new_encoder_out
+
+        action_outs = [
+            func(
+                model,
+                normalize=normalize,
+                encoder_out=_replace(
+                    encoder_out,
+                    encoder_out["encoder_out"][0][:, :, :, i]
+                ),
+                *args,
+                **kwargs
+            )
+            for i, model in enumerate(self.ensemble_models)
+        ]
+
+        if not isinstance(action_outs[0], tuple):  # return multiple values
+            action_outs = [[a] for a in action_outs]
+        else:
+            action_outs = [list(a) for a in action_outs]
+
+        ensembled_outs = []
+        for i in range(len(action_outs[0])):
+            if i == 0 and normalize:
+                ensembled_outs += [
+                    torch.logsumexp(
+                        torch.stack([a[i] for a in action_outs], -1), dim=-1
+                    )
+                    - math.log(len(self.ensemble_models))
+                ]
+            elif action_outs[0][i] is not None:
+                ensembled_outs += [torch.stack([a[i] for a in action_outs], -1)]
+            else:
+                ensembled_outs += [None]
+
+        if len(ensembled_outs) == 1:
+            return ensembled_outs[0]
+        return tuple(ensembled_outs)
+
+    return wrapper
+
+
+class FairseqNATModel(TransformerModel):
+    """
+    Abstract class for all nonautoregressive-based models
+    """
+
+    def __init__(self, args, encoder, decoder):
+        super().__init__(args, encoder, decoder)
+        self.tgt_dict = decoder.dictionary
+        self.bos = decoder.dictionary.bos()
+        self.eos = decoder.dictionary.eos()
+        self.pad = decoder.dictionary.pad()
+        self.unk = decoder.dictionary.unk()
+
+        self.ensemble_models = None
+
+    @property
+    def allow_length_beam(self):
+        return False
+
+    @property
+    def allow_ensemble(self):
+        return True
+
+    def enable_ensemble(self, models):
+        self.encoder.ensemble_models = [m.encoder for m in models]
+        self.decoder.ensemble_models = [m.decoder for m in models]
+
+    @staticmethod
+    def add_args(parser):
+        TransformerModel.add_args(parser)
+        parser.add_argument(
+            "--apply-bert-init",
+            action="store_true",
+            help="use custom param initialization for BERT",
+        )
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        decoder = FairseqNATDecoder(args, tgt_dict, embed_tokens)
+        if getattr(args, "apply_bert_init", False):
+            decoder.apply(init_bert_params)
+        return decoder
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        encoder = FairseqNATEncoder(args, src_dict, embed_tokens)
+        if getattr(args, "apply_bert_init", False):
+            encoder.apply(init_bert_params)
+        return encoder
+
+    def forward_encoder(self, encoder_inputs):
+        return self.encoder(*encoder_inputs)
+
+    def forward_decoder(self, *args, **kwargs):
+        return NotImplementedError
+
+    def initialize_output_tokens(self, *args, **kwargs):
+        return NotImplementedError
+
+    def forward(self, *args, **kwargs):
+        return NotImplementedError
+
+
+class FairseqNATEncoder(TransformerEncoder):
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(args, dictionary, embed_tokens)
+        self.ensemble_models = None
+
+    @ensemble_encoder
+    def forward(self, *args, **kwargs):
+        return super().forward(*args, **kwargs)
+
+
+class FairseqNATDecoder(TransformerDecoder):
+    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
+        super().__init__(args, dictionary, embed_tokens, no_encoder_attn)
+        self.ensemble_models = None
diff --git a/fairseq/fairseq/models/nat/insertion_transformer.py b/fairseq/fairseq/models/nat/insertion_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc28000f59a3b9e8098f9fe710cc8335d39eea3e
--- /dev/null
+++ b/fairseq/fairseq/models/nat/insertion_transformer.py
@@ -0,0 +1,280 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.nat import (
+    FairseqNATModel,
+    LevenshteinTransformerDecoder,
+    LevenshteinTransformerModel,
+    ensemble_decoder,
+)
+from fairseq.models.transformer import Linear
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+from fairseq.utils import new_arange
+
+
+class NegativeDistanceScore(object):
+    def __init__(self):
+
+        # pre-compute some values
+        self.scores = {}
+
+        self.scores[0.5] = self.compute_score_full(50, 0.5)
+        self.scores[1.0] = self.compute_score_full(50, 1.0)
+        self.scores[2.0] = self.compute_score_full(50, 2.0)
+
+    def __call__(self, i, L, tau):
+        if (tau is None) or (tau > 1000):
+            return 1 / L
+
+        if tau in self.scores:
+            if L < self.scores[tau].shape[0]:
+                return self.scores[tau][L - 1, i]
+        return self.compute_score(L, tau)[i]
+
+    def compute_score(self, L, tau):
+        s = np.array([-abs(L / 2 - i) / tau for i in range(L)])
+        s = np.exp(s - s.max())
+        return s / s.sum()
+
+    def compute_score_full(self, L, tau):
+        s = -abs(np.arange(0, L - 1)[:, None] / 2 - np.arange(L)[None, :]) / tau
+        s = np.tril(s, 0) + np.triu(s - float("inf"), 1)
+        s = np.exp(s - s.max(1, keepdims=True))
+        return s / s.sum(1, keepdims=True)
+
+
+neg_scorer = NegativeDistanceScore()
+
+
+def _get_ins_targets(in_tokens, out_tokens, padding_idx, unk_idx, vocab_size, tau=None):
+    try:
+        from fairseq import libnat
+    except ImportError as e:
+        import sys
+
+        sys.stderr.write("ERROR: missing libnat. run `pip install --editable .`\n")
+        raise e
+
+    B = in_tokens.size(0)
+    T = in_tokens.size(1)
+    V = vocab_size
+
+    with torch.cuda.device_of(in_tokens):
+        in_tokens_list = [
+            [t for t in s if t != padding_idx] for i, s in enumerate(in_tokens.tolist())
+        ]
+        out_tokens_list = [
+            [t for t in s if t != padding_idx]
+            for i, s in enumerate(out_tokens.tolist())
+        ]
+
+    full_labels = libnat.suggested_ed2_path(
+        in_tokens_list, out_tokens_list, padding_idx
+    )
+    insert_labels = [a[:-1] for a in full_labels]
+
+    # numericalize1
+    insert_label_tensors = in_tokens.new_zeros(B * (T - 1) * V).float()
+    insert_index, insert_labels = zip(
+        *[
+            (w + (j + i * (T - 1)) * V, neg_scorer(k, len(label), tau))
+            for i, labels in enumerate(insert_labels)
+            for j, label in enumerate(labels[1:-1])
+            for k, w in enumerate(label)
+        ]
+    )  # HACK 1:-1
+    insert_index, insert_labels = [
+        torch.tensor(list(a), device=in_tokens.device)
+        for a in [insert_index, insert_labels]
+    ]
+    insert_label_tensors.scatter_(0, insert_index.long(), insert_labels)
+    insert_label_tensors = insert_label_tensors.view(B, T - 1, V)
+
+    return insert_label_tensors
+
+
+def _apply_ins_words(in_tokens, in_scores, word_ins_pred, word_ins_scores, padding_idx):
+
+    padding_masks = in_tokens[:, 1:].eq(padding_idx)
+    word_ins_scores.masked_fill_(padding_masks, 0.0)
+    word_ins_pred.masked_fill_(padding_masks, padding_idx)
+
+    in_coords = new_arange(in_tokens).type_as(in_scores)
+
+    # shift all padding predictions to infinite
+    out_coords = (in_coords[:, 1:] - 0.5).masked_fill(
+        word_ins_pred.eq(padding_idx), float("inf")
+    )
+    out_coords = torch.cat([in_coords, out_coords], 1).sort(-1)[1]
+    out_tokens = torch.cat([in_tokens, word_ins_pred], 1).gather(1, out_coords)
+    out_scores = torch.cat([in_scores, word_ins_scores], 1).gather(1, out_coords)
+    return out_tokens, out_scores
+
+
+@register_model("insertion_transformer")
+class InsertionTransformerModel(LevenshteinTransformerModel):
+    def __init__(self, args, encoder, decoder):
+        super().__init__(args, encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        FairseqNATModel.add_args(parser)
+        parser.add_argument("--label-tau", default=None, type=float)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        decoder = InsertionTransformerDecoder(args, tgt_dict, embed_tokens)
+        if getattr(args, "apply_bert_init", False):
+            decoder.apply(init_bert_params)
+        return decoder
+
+    def forward(
+        self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs
+    ):
+
+        assert tgt_tokens is not None, "forward function only supports training."
+
+        # encoding
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+
+        # generate training labels for insertion
+        word_ins_out = self.decoder.forward_word_ins(
+            normalize=False,
+            prev_output_tokens=prev_output_tokens,
+            encoder_out=encoder_out,
+        )
+
+        word_ins_tgt = _get_ins_targets(
+            prev_output_tokens,
+            tgt_tokens,
+            self.pad,
+            self.unk,
+            len(self.tgt_dict),
+            tau=self.decoder.label_tau,
+        ).type_as(word_ins_out)
+        word_ins_masks = prev_output_tokens[:, 1:].ne(self.pad)
+
+        return {
+            "word_ins": {
+                "out": word_ins_out,
+                "tgt": word_ins_tgt,
+                "mask": word_ins_masks,
+                "ls": self.args.label_smoothing,
+                "nll_loss": True,
+            }
+        }
+
+    def forward_decoder(
+        self, decoder_out, encoder_out, eos_penalty=0.0, max_ratio=None, **kwargs
+    ):
+
+        output_tokens = decoder_out.output_tokens
+        output_scores = decoder_out.output_scores
+        history = decoder_out.history
+
+        # TODO: decoding for InsertionTransformer
+        word_ins_score = self.decoder.forward_word_ins(
+            normalize=True, prev_output_tokens=output_tokens, encoder_out=encoder_out
+        )
+
+        if eos_penalty > 0.0:
+            word_ins_score[:, :, self.pad] -= eos_penalty
+        word_ins_score, word_ins_pred = word_ins_score.max(-1)
+        output_tokens, output_scores = _apply_ins_words(
+            output_tokens, output_scores, word_ins_pred, word_ins_score, self.pad
+        )
+
+        # delete some unnecessary paddings
+        cut_off = output_tokens.ne(self.pad).sum(1).max()
+        output_tokens = output_tokens[:, :cut_off]
+        output_scores = output_scores[:, :cut_off]
+
+        if history is not None:
+            history.append(output_tokens.clone())
+
+        return decoder_out._replace(
+            output_tokens=output_tokens,
+            output_scores=output_scores,
+            attn=None,
+            history=history,
+        )
+
+
+class InsertionTransformerDecoder(LevenshteinTransformerDecoder):
+    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
+        # use the TransformerDecoder's __init__
+        super(LevenshteinTransformerDecoder, self).__init__(
+            args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn
+        )
+
+        self.dictionary = dictionary
+        self.bos = dictionary.bos()
+        self.unk = dictionary.unk()
+        self.eos = dictionary.eos()
+        self.pool_out = Linear(self.output_embed_dim * 2, self.output_embed_dim)
+
+        self.label_tau = getattr(args, "label_tau", None)
+
+    @ensemble_decoder
+    def forward_word_ins(self, normalize, encoder_out, prev_output_tokens):
+        features = self.extract_features(prev_output_tokens, encoder_out=encoder_out)[0]
+        features = self.pool_out(
+            torch.cat([features[:, :-1, :], features[:, 1:, :]], 2)
+        )
+        decoder_out = self.output_layer(features)
+        return F.log_softmax(decoder_out, -1) if normalize else decoder_out
+
+    def forward_mask_ins(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def forward_word_del(self, *args, **kwargs):
+        raise NotImplementedError
+
+
+@register_model_architecture("insertion_transformer", "insertion_transformer")
+def insertion_base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.apply_bert_init = getattr(args, "apply_bert_init", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    # special for insertion transformer
+    args.label_tau = getattr(args, "label_tau", None)
diff --git a/fairseq/fairseq/models/nat/iterative_nonautoregressive_transformer.py b/fairseq/fairseq/models/nat/iterative_nonautoregressive_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc39509980a80eb8c21e0bfdb304649ad3acc4d0
--- /dev/null
+++ b/fairseq/fairseq/models/nat/iterative_nonautoregressive_transformer.py
@@ -0,0 +1,228 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.nat import NATransformerModel
+
+
+def _sequential_poisoning(s, V, beta=0.33, bos=2, eos=3, pad=1):
+    # s: input batch
+    # V: vocabulary size
+    rand_words = torch.randint(low=4, high=V, size=s.size(), device=s.device)
+    choices = torch.rand(size=s.size(), device=s.device)
+    choices.masked_fill_((s == pad) | (s == bos) | (s == eos), 1)
+
+    replace = choices < beta / 3
+    repeat = (choices >= beta / 3) & (choices < beta * 2 / 3)
+    swap = (choices >= beta * 2 / 3) & (choices < beta)
+    safe = choices >= beta
+
+    for i in range(s.size(1) - 1):
+        rand_word = rand_words[:, i]
+        next_word = s[:, i + 1]
+        self_word = s[:, i]
+
+        replace_i = replace[:, i]
+        swap_i = swap[:, i] & (next_word != 3)
+        repeat_i = repeat[:, i] & (next_word != 3)
+        safe_i = safe[:, i] | ((next_word == 3) & (~replace_i))
+
+        s[:, i] = (
+            self_word * (safe_i | repeat_i).long()
+            + next_word * swap_i.long()
+            + rand_word * replace_i.long()
+        )
+        s[:, i + 1] = (
+            next_word * (safe_i | replace_i).long()
+            + self_word * (swap_i | repeat_i).long()
+        )
+    return s
+
+
+def gumbel_noise(input, TINY=1e-8):
+    return (
+        input.new_zeros(*input.size())
+        .uniform_()
+        .add_(TINY)
+        .log_()
+        .neg_()
+        .add_(TINY)
+        .log_()
+        .neg_()
+    )
+
+
+@register_model("iterative_nonautoregressive_transformer")
+class IterNATransformerModel(NATransformerModel):
+    @staticmethod
+    def add_args(parser):
+        NATransformerModel.add_args(parser)
+        parser.add_argument(
+            "--train-step",
+            type=int,
+            help="number of refinement iterations during training",
+        )
+        parser.add_argument(
+            "--dae-ratio",
+            type=float,
+            help="the probability of switching to the denoising auto-encoder loss",
+        )
+        parser.add_argument(
+            "--stochastic-approx",
+            action="store_true",
+            help="sampling from the decoder as the inputs for next iteration",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        model = super().build_model(args, task)
+        model.train_step = getattr(args, "train_step", 4)
+        model.dae_ratio = getattr(args, "dae_ratio", 0.5)
+        model.stochastic_approx = getattr(args, "stochastic_approx", False)
+        return model
+
+    def forward(
+        self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs
+    ):
+
+        B, T = prev_output_tokens.size()
+
+        # encoding
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+
+        # length prediction
+        length_out = self.decoder.forward_length(
+            normalize=False, encoder_out=encoder_out
+        )
+        length_tgt = self.decoder.forward_length_prediction(
+            length_out, encoder_out, tgt_tokens
+        )
+
+        # decoding
+        word_ins_outs, word_ins_tgts, word_ins_masks = [], [], []
+        for t in range(self.train_step):
+            word_ins_out = self.decoder(
+                normalize=False,
+                prev_output_tokens=prev_output_tokens,
+                encoder_out=encoder_out,
+                step=t,
+            )
+            word_ins_tgt = tgt_tokens
+            word_ins_mask = word_ins_tgt.ne(self.pad)
+
+            word_ins_outs.append(word_ins_out)
+            word_ins_tgts.append(word_ins_tgt)
+            word_ins_masks.append(word_ins_mask)
+
+            if t < (self.train_step - 1):
+                # prediction for next iteration
+                if self.stochastic_approx:
+                    word_ins_prediction = (
+                        word_ins_out + gumbel_noise(word_ins_out)
+                    ).max(-1)[1]
+                else:
+                    word_ins_prediction = word_ins_out.max(-1)[1]
+
+                prev_output_tokens = prev_output_tokens.masked_scatter(
+                    word_ins_mask, word_ins_prediction[word_ins_mask]
+                )
+
+                if self.dae_ratio > 0:
+                    # we do not perform denoising for the first iteration
+                    corrputed = (
+                        torch.rand(size=(B,), device=prev_output_tokens.device)
+                        < self.dae_ratio
+                    )
+                    corrputed_tokens = _sequential_poisoning(
+                        tgt_tokens[corrputed],
+                        len(self.tgt_dict),
+                        0.33,
+                        self.bos,
+                        self.eos,
+                        self.pad,
+                    )
+                    prev_output_tokens[corrputed] = corrputed_tokens
+
+        # concat everything
+        word_ins_out = torch.cat(word_ins_outs, 0)
+        word_ins_tgt = torch.cat(word_ins_tgts, 0)
+        word_ins_mask = torch.cat(word_ins_masks, 0)
+
+        return {
+            "word_ins": {
+                "out": word_ins_out,
+                "tgt": word_ins_tgt,
+                "mask": word_ins_mask,
+                "ls": self.args.label_smoothing,
+                "nll_loss": True,
+            },
+            "length": {
+                "out": length_out,
+                "tgt": length_tgt,
+                "factor": self.decoder.length_loss_factor,
+            },
+        }
+
+
+@register_model_architecture(
+    "iterative_nonautoregressive_transformer", "iterative_nonautoregressive_transformer"
+)
+def inat_base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.apply_bert_init = getattr(args, "apply_bert_init", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    # --- special arguments ---
+    args.sg_length_pred = getattr(args, "sg_length_pred", False)
+    args.pred_length_offset = getattr(args, "pred_length_offset", False)
+    args.length_loss_factor = getattr(args, "length_loss_factor", 0.1)
+    args.ngram_predictor = getattr(args, "ngram_predictor", 1)
+    args.src_embedding_copy = getattr(args, "src_embedding_copy", False)
+
+    args.train_step = getattr(args, "train_step", 4)
+    args.dae_ratio = getattr(args, "dae_ratio", 0.5)
+    args.stochastic_approx = getattr(args, "stochastic_approx", False)
+
+
+@register_model_architecture(
+    "iterative_nonautoregressive_transformer",
+    "iterative_nonautoregressive_transformer_wmt_en_de",
+)
+def iter_nat_wmt_en_de(args):
+    inat_base_architecture(args)
diff --git a/fairseq/fairseq/models/nat/levenshtein_transformer.py b/fairseq/fairseq/models/nat/levenshtein_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d60d3c52d50b1f20957039a75622ffb95d5eea24
--- /dev/null
+++ b/fairseq/fairseq/models/nat/levenshtein_transformer.py
@@ -0,0 +1,510 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq.iterative_refinement_generator import DecoderOut
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.nat import FairseqNATDecoder, FairseqNATModel, ensemble_decoder
+from fairseq.models.transformer import Embedding
+from fairseq.modules import TransformerDecoderLayer
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+
+from .levenshtein_utils import (
+    _apply_del_words,
+    _apply_ins_masks,
+    _apply_ins_words,
+    _fill,
+    _get_del_targets,
+    _get_ins_targets,
+    _skip,
+    _skip_encoder_out,
+)
+
+
+@register_model("levenshtein_transformer")
+class LevenshteinTransformerModel(FairseqNATModel):
+    @property
+    def allow_length_beam(self):
+        return False
+
+    @staticmethod
+    def add_args(parser):
+        FairseqNATModel.add_args(parser)
+        parser.add_argument(
+            "--early-exit",
+            default="6,6,6",
+            type=str,
+            help="number of decoder layers before word_del, mask_ins, word_ins",
+        )
+        parser.add_argument(
+            "--no-share-discriminator",
+            action="store_true",
+            help="separate parameters for discriminator",
+        )
+        parser.add_argument(
+            "--no-share-maskpredictor",
+            action="store_true",
+            help="separate parameters for mask-predictor",
+        )
+        parser.add_argument(
+            "--share-discriminator-maskpredictor",
+            action="store_true",
+            help="share the parameters for both mask-predictor and discriminator",
+        )
+        parser.add_argument(
+            "--sampling-for-deletion",
+            action="store_true",
+            help="instead of argmax, use sampling to predict the tokens",
+        )
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        decoder = LevenshteinTransformerDecoder(args, tgt_dict, embed_tokens)
+        if getattr(args, "apply_bert_init", False):
+            decoder.apply(init_bert_params)
+        return decoder
+
+    def forward(
+        self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs
+    ):
+
+        assert tgt_tokens is not None, "forward function only supports training."
+
+        # encoding
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+
+        # generate training labels for insertion
+        masked_tgt_masks, masked_tgt_tokens, mask_ins_targets = _get_ins_targets(
+            prev_output_tokens, tgt_tokens, self.pad, self.unk
+        )
+        mask_ins_targets = mask_ins_targets.clamp(min=0, max=255)  # for safe prediction
+        mask_ins_masks = prev_output_tokens[:, 1:].ne(self.pad)
+
+        mask_ins_out, _ = self.decoder.forward_mask_ins(
+            normalize=False,
+            prev_output_tokens=prev_output_tokens,
+            encoder_out=encoder_out,
+        )
+        word_ins_out, _ = self.decoder.forward_word_ins(
+            normalize=False,
+            prev_output_tokens=masked_tgt_tokens,
+            encoder_out=encoder_out,
+        )
+
+        # make online prediction
+        if self.decoder.sampling_for_deletion:
+            word_predictions = torch.multinomial(
+                F.softmax(word_ins_out, -1).view(-1, word_ins_out.size(-1)), 1
+            ).view(word_ins_out.size(0), -1)
+        else:
+            word_predictions = F.log_softmax(word_ins_out, dim=-1).max(2)[1]
+
+        word_predictions.masked_scatter_(
+            ~masked_tgt_masks, tgt_tokens[~masked_tgt_masks]
+        )
+
+        # generate training labels for deletion
+        word_del_targets = _get_del_targets(word_predictions, tgt_tokens, self.pad)
+        word_del_out, _ = self.decoder.forward_word_del(
+            normalize=False,
+            prev_output_tokens=word_predictions,
+            encoder_out=encoder_out,
+        )
+        word_del_masks = word_predictions.ne(self.pad)
+
+        return {
+            "mask_ins": {
+                "out": mask_ins_out,
+                "tgt": mask_ins_targets,
+                "mask": mask_ins_masks,
+                "ls": 0.01,
+            },
+            "word_ins": {
+                "out": word_ins_out,
+                "tgt": tgt_tokens,
+                "mask": masked_tgt_masks,
+                "ls": self.args.label_smoothing,
+                "nll_loss": True,
+            },
+            "word_del": {
+                "out": word_del_out,
+                "tgt": word_del_targets,
+                "mask": word_del_masks,
+            },
+        }
+
+    def forward_decoder(
+        self, decoder_out, encoder_out, eos_penalty=0.0, max_ratio=None, **kwargs
+    ):
+
+        output_tokens = decoder_out.output_tokens
+        output_scores = decoder_out.output_scores
+        attn = decoder_out.attn
+        history = decoder_out.history
+
+        bsz = output_tokens.size(0)
+        if max_ratio is None:
+            max_lens = torch.zeros_like(output_tokens).fill_(255)
+        else:
+            if not encoder_out["encoder_padding_mask"]:
+                max_src_len = encoder_out["encoder_out"].size(0)
+                src_lens = encoder_out["encoder_out"].new(bsz).fill_(max_src_len)
+            else:
+                src_lens = (~encoder_out["encoder_padding_mask"][0]).sum(1)
+            max_lens = (src_lens * max_ratio).clamp(min=10).long()
+
+        # delete words
+        # do not delete tokens if it is <s> </s>
+        can_del_word = output_tokens.ne(self.pad).sum(1) > 2
+        if can_del_word.sum() != 0:  # we cannot delete, skip
+            word_del_score, word_del_attn = self.decoder.forward_word_del(
+                normalize=True,
+                prev_output_tokens=_skip(output_tokens, can_del_word),
+                encoder_out=_skip_encoder_out(self.encoder, encoder_out, can_del_word),
+            )
+            word_del_pred = word_del_score.max(-1)[1].bool()
+
+            _tokens, _scores, _attn = _apply_del_words(
+                output_tokens[can_del_word],
+                output_scores[can_del_word],
+                word_del_attn,
+                word_del_pred,
+                self.pad,
+                self.bos,
+                self.eos,
+            )
+            output_tokens = _fill(output_tokens, can_del_word, _tokens, self.pad)
+            output_scores = _fill(output_scores, can_del_word, _scores, 0)
+            attn = _fill(attn, can_del_word, _attn, 0.0)
+
+            if history is not None:
+                history.append(output_tokens.clone())
+
+        # insert placeholders
+        can_ins_mask = output_tokens.ne(self.pad).sum(1) < max_lens
+        if can_ins_mask.sum() != 0:
+            mask_ins_score, _ = self.decoder.forward_mask_ins(
+                normalize=True,
+                prev_output_tokens=_skip(output_tokens, can_ins_mask),
+                encoder_out=_skip_encoder_out(self.encoder, encoder_out, can_ins_mask),
+            )
+            if eos_penalty > 0.0:
+                mask_ins_score[:, :, 0] = mask_ins_score[:, :, 0] - eos_penalty
+            mask_ins_pred = mask_ins_score.max(-1)[1]
+            mask_ins_pred = torch.min(
+                mask_ins_pred, max_lens[can_ins_mask, None].expand_as(mask_ins_pred)
+            )
+
+            _tokens, _scores = _apply_ins_masks(
+                output_tokens[can_ins_mask],
+                output_scores[can_ins_mask],
+                mask_ins_pred,
+                self.pad,
+                self.unk,
+                self.eos,
+            )
+            output_tokens = _fill(output_tokens, can_ins_mask, _tokens, self.pad)
+            output_scores = _fill(output_scores, can_ins_mask, _scores, 0)
+
+            if history is not None:
+                history.append(output_tokens.clone())
+
+        # insert words
+        can_ins_word = output_tokens.eq(self.unk).sum(1) > 0
+        if can_ins_word.sum() != 0:
+            word_ins_score, word_ins_attn = self.decoder.forward_word_ins(
+                normalize=True,
+                prev_output_tokens=_skip(output_tokens, can_ins_word),
+                encoder_out=_skip_encoder_out(self.encoder, encoder_out, can_ins_word),
+            )
+            word_ins_score, word_ins_pred = word_ins_score.max(-1)
+            _tokens, _scores = _apply_ins_words(
+                output_tokens[can_ins_word],
+                output_scores[can_ins_word],
+                word_ins_pred,
+                word_ins_score,
+                self.unk,
+            )
+
+            output_tokens = _fill(output_tokens, can_ins_word, _tokens, self.pad)
+            output_scores = _fill(output_scores, can_ins_word, _scores, 0)
+            attn = _fill(attn, can_ins_word, word_ins_attn, 0.0)
+
+            if history is not None:
+                history.append(output_tokens.clone())
+
+        # delete some unnecessary paddings
+        cut_off = output_tokens.ne(self.pad).sum(1).max()
+        output_tokens = output_tokens[:, :cut_off]
+        output_scores = output_scores[:, :cut_off]
+        attn = None if attn is None else attn[:, :cut_off, :]
+
+        return decoder_out._replace(
+            output_tokens=output_tokens,
+            output_scores=output_scores,
+            attn=attn,
+            history=history,
+        )
+
+    def initialize_output_tokens(self, encoder_out, src_tokens):
+        initial_output_tokens = src_tokens.new_zeros(src_tokens.size(0), 2)
+        initial_output_tokens[:, 0] = self.bos
+        initial_output_tokens[:, 1] = self.eos
+
+        initial_output_scores = initial_output_tokens.new_zeros(
+            *initial_output_tokens.size()
+        ).type_as(encoder_out["encoder_out"][0])
+
+        return DecoderOut(
+            output_tokens=initial_output_tokens,
+            output_scores=initial_output_scores,
+            attn=None,
+            step=0,
+            max_step=0,
+            history=None,
+        )
+
+
+class LevenshteinTransformerDecoder(FairseqNATDecoder):
+    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
+        super().__init__(
+            args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn
+        )
+        self.dictionary = dictionary
+        self.bos = dictionary.bos()
+        self.unk = dictionary.unk()
+        self.eos = dictionary.eos()
+        self.sampling_for_deletion = getattr(args, "sampling_for_deletion", False)
+        self.embed_mask_ins = Embedding(256, self.output_embed_dim * 2, None)
+        self.embed_word_del = Embedding(2, self.output_embed_dim, None)
+
+        # del_word, ins_mask, ins_word
+        self.early_exit = [int(i) for i in args.early_exit.split(",")]
+        assert len(self.early_exit) == 3
+
+        # copy layers for mask-predict/deletion
+        self.layers_msk = None
+        if getattr(args, "no_share_maskpredictor", False):
+            self.layers_msk = nn.ModuleList(
+                [
+                    TransformerDecoderLayer(args, no_encoder_attn)
+                    for _ in range(self.early_exit[1])
+                ]
+            )
+        self.layers_del = None
+        if getattr(args, "no_share_discriminator", False):
+            self.layers_del = nn.ModuleList(
+                [
+                    TransformerDecoderLayer(args, no_encoder_attn)
+                    for _ in range(self.early_exit[0])
+                ]
+            )
+
+        if getattr(args, "share_discriminator_maskpredictor", False):
+            assert getattr(
+                args, "no_share_discriminator", False
+            ), "must set saperate discriminator"
+            self.layers_msk = self.layers_del
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out=None,
+        early_exit=None,
+        layers=None,
+        **unused
+    ):
+        """
+        Similar to *forward* but only return features.
+        Inputs:
+            prev_output_tokens: Tensor(B, T)
+            encoder_out: a dictionary of hidden states and masks
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+            the LevenshteinTransformer decoder has full-attention to all generated tokens
+        """
+        # embed positions
+        positions = (
+            self.embed_positions(prev_output_tokens)
+            if self.embed_positions is not None
+            else None
+        )
+
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+        attn = None
+        inner_states = [x]
+
+        # decoder layers
+        decoder_padding_mask = prev_output_tokens.eq(self.padding_idx)
+        layers = self.layers if layers is None else layers
+        early_exit = len(layers) if early_exit is None else early_exit
+        for _, layer in enumerate(layers[:early_exit]):
+            x, attn, _ = layer(
+                x,
+                encoder_out["encoder_out"][0]
+                if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0)
+                else None,
+                encoder_out["encoder_padding_mask"][0]
+                if (
+                    encoder_out is not None
+                    and len(encoder_out["encoder_padding_mask"]) > 0
+                )
+                else None,
+                self_attn_mask=None,
+                self_attn_padding_mask=decoder_padding_mask,
+            )
+            inner_states.append(x)
+
+        if self.layer_norm:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        return x, {"attn": attn, "inner_states": inner_states}
+
+    @ensemble_decoder
+    def forward_mask_ins(self, normalize, encoder_out, prev_output_tokens, **unused):
+        features, extra = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            early_exit=self.early_exit[1],
+            layers=self.layers_msk,
+            **unused
+        )
+        features_cat = torch.cat([features[:, :-1, :], features[:, 1:, :]], 2)
+        decoder_out = F.linear(features_cat, self.embed_mask_ins.weight)
+        if normalize:
+            return F.log_softmax(decoder_out, -1), extra["attn"]
+        return decoder_out, extra["attn"]
+
+    @ensemble_decoder
+    def forward_word_ins(self, normalize, encoder_out, prev_output_tokens, **unused):
+        features, extra = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            early_exit=self.early_exit[2],
+            layers=self.layers,
+            **unused
+        )
+        decoder_out = self.output_layer(features)
+        if normalize:
+            return F.log_softmax(decoder_out, -1), extra["attn"]
+        return decoder_out, extra["attn"]
+
+    @ensemble_decoder
+    def forward_word_del(self, normalize, encoder_out, prev_output_tokens, **unused):
+        features, extra = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            early_exit=self.early_exit[0],
+            layers=self.layers_del,
+            **unused
+        )
+        decoder_out = F.linear(features, self.embed_word_del.weight)
+        if normalize:
+            return F.log_softmax(decoder_out, -1), extra["attn"]
+        return decoder_out, extra["attn"]
+
+
+@register_model_architecture("levenshtein_transformer", "levenshtein_transformer")
+def levenshtein_base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.apply_bert_init = getattr(args, "apply_bert_init", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.sampling_for_deletion = getattr(args, "sampling_for_deletion", False)
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+    args.early_exit = getattr(args, "early_exit", "6,6,6")
+    args.no_share_discriminator = getattr(args, "no_share_discriminator", False)
+    args.no_share_maskpredictor = getattr(args, "no_share_maskpredictor", False)
+    args.share_discriminator_maskpredictor = getattr(
+        args, "share_discriminator_maskpredictor", False
+    )
+    args.no_share_last_layer = getattr(args, "no_share_last_layer", False)
+
+
+@register_model_architecture(
+    "levenshtein_transformer", "levenshtein_transformer_wmt_en_de"
+)
+def levenshtein_transformer_wmt_en_de(args):
+    levenshtein_base_architecture(args)
+
+
+# similar parameters used in the "Attention Is All You Need" paper (Vaswani et al., 2017)
+@register_model_architecture(
+    "levenshtein_transformer", "levenshtein_transformer_vaswani_wmt_en_de_big"
+)
+def levenshtein_transformer_vaswani_wmt_en_de_big(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.3)
+    levenshtein_base_architecture(args)
+
+
+# default parameters used in tensor2tensor implementation
+@register_model_architecture(
+    "levenshtein_transformer", "levenshtein_transformer_wmt_en_de_big"
+)
+def levenshtein_transformer_wmt_en_de_big_t2t(args):
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.1)
+    levenshtein_transformer_vaswani_wmt_en_de_big(args)
diff --git a/fairseq/fairseq/models/nat/levenshtein_utils.py b/fairseq/fairseq/models/nat/levenshtein_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..375a98c2e11354de085f0a7926f407bd1a6a2ad4
--- /dev/null
+++ b/fairseq/fairseq/models/nat/levenshtein_utils.py
@@ -0,0 +1,293 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq.utils import new_arange
+
+
+# -------------- Helper Functions --------------------------------------------------- #
+
+
+def load_libnat():
+    try:
+        from fairseq import libnat_cuda
+
+        return libnat_cuda, True
+
+    except ImportError as e:
+        print(str(e) + "... fall back to CPU version")
+
+        try:
+            from fairseq import libnat
+
+            return libnat, False
+
+        except ImportError as e:
+            import sys
+
+            sys.stderr.write(
+                "ERROR: missing libnat_cuda. run `python setup.py build_ext --inplace`\n"
+            )
+            raise e
+
+
+def _get_ins_targets(in_tokens, out_tokens, padding_idx, unk_idx):
+    libnat, use_cuda = load_libnat()
+
+    def _get_ins_targets_cuda(in_tokens, out_tokens, padding_idx, unk_idx):
+        in_masks = in_tokens.ne(padding_idx)
+        out_masks = out_tokens.ne(padding_idx)
+        mask_ins_targets, masked_tgt_masks = libnat.generate_insertion_labels(
+            out_tokens.int(),
+            libnat.levenshtein_distance(
+                in_tokens.int(),
+                out_tokens.int(),
+                in_masks.sum(1).int(),
+                out_masks.sum(1).int(),
+            ),
+        )
+        masked_tgt_masks = masked_tgt_masks.bool() & out_masks
+        mask_ins_targets = mask_ins_targets.type_as(in_tokens)[
+            :, 1 : in_masks.size(1)
+        ].masked_fill_(~in_masks[:, 1:], 0)
+        masked_tgt_tokens = out_tokens.masked_fill(masked_tgt_masks, unk_idx)
+        return masked_tgt_masks, masked_tgt_tokens, mask_ins_targets
+
+    def _get_ins_targets_cpu(in_tokens, out_tokens, padding_idx, unk_idx):
+        in_seq_len, out_seq_len = in_tokens.size(1), out_tokens.size(1)
+
+        in_tokens_list = [
+            [t for t in s if t != padding_idx] for i, s in enumerate(in_tokens.tolist())
+        ]
+        out_tokens_list = [
+            [t for t in s if t != padding_idx]
+            for i, s in enumerate(out_tokens.tolist())
+        ]
+
+        full_labels = libnat.suggested_ed2_path(
+            in_tokens_list, out_tokens_list, padding_idx
+        )
+        mask_inputs = [
+            [len(c) if c[0] != padding_idx else 0 for c in a[:-1]] for a in full_labels
+        ]
+
+        # generate labels
+        masked_tgt_masks = []
+        for mask_input in mask_inputs:
+            mask_label = []
+            for beam_size in mask_input[1:-1]:  # HACK 1:-1
+                mask_label += [0] + [1 for _ in range(beam_size)]
+            masked_tgt_masks.append(
+                mask_label + [0 for _ in range(out_seq_len - len(mask_label))]
+            )
+        mask_ins_targets = [
+            mask_input[1:-1]
+            + [0 for _ in range(in_seq_len - 1 - len(mask_input[1:-1]))]
+            for mask_input in mask_inputs
+        ]
+
+        # transform to tensor
+        masked_tgt_masks = torch.tensor(
+            masked_tgt_masks, device=out_tokens.device
+        ).bool()
+        mask_ins_targets = torch.tensor(mask_ins_targets, device=in_tokens.device)
+        masked_tgt_tokens = out_tokens.masked_fill(masked_tgt_masks, unk_idx)
+        return masked_tgt_masks, masked_tgt_tokens, mask_ins_targets
+
+    if use_cuda:
+        return _get_ins_targets_cuda(in_tokens, out_tokens, padding_idx, unk_idx)
+    return _get_ins_targets_cpu(in_tokens, out_tokens, padding_idx, unk_idx)
+
+
+def _get_del_targets(in_tokens, out_tokens, padding_idx):
+    libnat, use_cuda = load_libnat()
+
+    def _get_del_targets_cuda(in_tokens, out_tokens, padding_idx):
+        in_masks = in_tokens.ne(padding_idx)
+        out_masks = out_tokens.ne(padding_idx)
+
+        word_del_targets = libnat.generate_deletion_labels(
+            in_tokens.int(),
+            libnat.levenshtein_distance(
+                in_tokens.int(),
+                out_tokens.int(),
+                in_masks.sum(1).int(),
+                out_masks.sum(1).int(),
+            ),
+        )
+        word_del_targets = word_del_targets.type_as(in_tokens).masked_fill_(
+            ~in_masks, 0
+        )
+        return word_del_targets
+
+    def _get_del_targets_cpu(in_tokens, out_tokens, padding_idx):
+        out_seq_len = out_tokens.size(1)
+        with torch.cuda.device_of(in_tokens):
+            in_tokens_list = [
+                [t for t in s if t != padding_idx]
+                for i, s in enumerate(in_tokens.tolist())
+            ]
+            out_tokens_list = [
+                [t for t in s if t != padding_idx]
+                for i, s in enumerate(out_tokens.tolist())
+            ]
+
+        full_labels = libnat.suggested_ed2_path(
+            in_tokens_list, out_tokens_list, padding_idx
+        )
+        word_del_targets = [b[-1] for b in full_labels]
+        word_del_targets = [
+            labels + [0 for _ in range(out_seq_len - len(labels))]
+            for labels in word_del_targets
+        ]
+
+        # transform to tensor
+        word_del_targets = torch.tensor(word_del_targets, device=out_tokens.device)
+        return word_del_targets
+
+    if use_cuda:
+        return _get_del_targets_cuda(in_tokens, out_tokens, padding_idx)
+    return _get_del_targets_cpu(in_tokens, out_tokens, padding_idx)
+
+
+def _apply_ins_masks(
+    in_tokens, in_scores, mask_ins_pred, padding_idx, unk_idx, eos_idx
+):
+
+    in_masks = in_tokens.ne(padding_idx)
+    in_lengths = in_masks.sum(1)
+
+    # HACK: hacky way to shift all the paddings to eos first.
+    in_tokens.masked_fill_(~in_masks, eos_idx)
+    mask_ins_pred.masked_fill_(~in_masks[:, 1:], 0)
+
+    out_lengths = in_lengths + mask_ins_pred.sum(1)
+    out_max_len = out_lengths.max()
+    out_masks = new_arange(out_lengths, out_max_len)[None, :] < out_lengths[:, None]
+
+    reordering = (mask_ins_pred + in_masks[:, 1:].long()).cumsum(1)
+    out_tokens = (
+        in_tokens.new_zeros(in_tokens.size(0), out_max_len)
+        .fill_(padding_idx)
+        .masked_fill_(out_masks, unk_idx)
+    )
+    out_tokens[:, 0] = in_tokens[:, 0]
+    out_tokens.scatter_(1, reordering, in_tokens[:, 1:])
+
+    out_scores = None
+    if in_scores is not None:
+        in_scores.masked_fill_(~in_masks, 0)
+        out_scores = in_scores.new_zeros(*out_tokens.size())
+        out_scores[:, 0] = in_scores[:, 0]
+        out_scores.scatter_(1, reordering, in_scores[:, 1:])
+
+    return out_tokens, out_scores
+
+
+def _apply_ins_words(in_tokens, in_scores, word_ins_pred, word_ins_scores, unk_idx):
+    word_ins_masks = in_tokens.eq(unk_idx)
+    out_tokens = in_tokens.masked_scatter(word_ins_masks, word_ins_pred[word_ins_masks])
+
+    if in_scores is not None:
+        out_scores = in_scores.masked_scatter(
+            word_ins_masks, word_ins_scores[word_ins_masks]
+        )
+    else:
+        out_scores = None
+
+    return out_tokens, out_scores
+
+
+def _apply_del_words(
+    in_tokens, in_scores, in_attn, word_del_pred, padding_idx, bos_idx, eos_idx
+):
+    # apply deletion to a tensor
+    in_masks = in_tokens.ne(padding_idx)
+    bos_eos_masks = in_tokens.eq(bos_idx) | in_tokens.eq(eos_idx)
+
+    max_len = in_tokens.size(1)
+    word_del_pred.masked_fill_(~in_masks, 1)
+    word_del_pred.masked_fill_(bos_eos_masks, 0)
+
+    reordering = new_arange(in_tokens).masked_fill_(word_del_pred, max_len).sort(1)[1]
+
+    out_tokens = in_tokens.masked_fill(word_del_pred, padding_idx).gather(1, reordering)
+
+    out_scores = None
+    if in_scores is not None:
+        out_scores = in_scores.masked_fill(word_del_pred, 0).gather(1, reordering)
+
+    out_attn = None
+    if in_attn is not None:
+        _mask = word_del_pred[:, :, None].expand_as(in_attn)
+        _reordering = reordering[:, :, None].expand_as(in_attn)
+        out_attn = in_attn.masked_fill(_mask, 0.0).gather(1, _reordering)
+
+    return out_tokens, out_scores, out_attn
+
+
+def _skip(x, mask):
+    """
+    Getting sliced (dim=0) tensor by mask. Supporting tensor and list/dict of tensors.
+    """
+    if isinstance(x, int):
+        return x
+
+    if x is None:
+        return None
+
+    if isinstance(x, torch.Tensor):
+        if x.size(0) == mask.size(0):
+            return x[mask]
+        elif x.size(1) == mask.size(0):
+            return x[:, mask]
+
+    if isinstance(x, list):
+        return [_skip(x_i, mask) for x_i in x]
+
+    if isinstance(x, dict):
+        return {k: _skip(v, mask) for k, v in x.items()}
+
+    raise NotImplementedError
+
+
+def _skip_encoder_out(encoder, encoder_out, mask):
+    if not mask.any():
+        return encoder_out
+    else:
+        return encoder.reorder_encoder_out(
+            encoder_out, mask.nonzero(as_tuple=False).squeeze()
+        )
+
+
+def _fill(x, mask, y, padding_idx):
+    """
+    Filling tensor x with y at masked positions (dim=0).
+    """
+    if x is None:
+        return y
+    assert x.dim() == y.dim() and mask.size(0) == x.size(0)
+    assert x.dim() == 2 or (x.dim() == 3 and x.size(2) == y.size(2))
+    n_selected = mask.sum()
+    assert n_selected == y.size(0)
+
+    if n_selected == x.size(0):
+        return y
+
+    if x.size(1) < y.size(1):
+        dims = [x.size(0), y.size(1) - x.size(1)]
+        if x.dim() == 3:
+            dims.append(x.size(2))
+        x = torch.cat([x, x.new_zeros(*dims).fill_(padding_idx)], 1)
+        x[mask] = y
+    elif x.size(1) > y.size(1):
+        x[mask] = padding_idx
+        if x.dim() == 2:
+            x[mask, : y.size(1)] = y
+        else:
+            x[mask, : y.size(1), :] = y
+    else:
+        x[mask] = y
+    return x
diff --git a/fairseq/fairseq/models/nat/nat_crf_transformer.py b/fairseq/fairseq/models/nat/nat_crf_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4b3cd931ceb077eb30db73df1d5d6cd714a86c2
--- /dev/null
+++ b/fairseq/fairseq/models/nat/nat_crf_transformer.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.nat import NATransformerModel, base_architecture
+from fairseq.modules import DynamicCRF
+
+
+@register_model("nacrf_transformer")
+class NACRFTransformerModel(NATransformerModel):
+    def __init__(self, args, encoder, decoder):
+        super().__init__(args, encoder, decoder)
+        self.crf_layer = DynamicCRF(
+            num_embedding=len(self.tgt_dict),
+            low_rank=args.crf_lowrank_approx,
+            beam_size=args.crf_beam_approx,
+        )
+
+    @property
+    def allow_ensemble(self):
+        return False
+
+    @staticmethod
+    def add_args(parser):
+        NATransformerModel.add_args(parser)
+        parser.add_argument(
+            "--crf-lowrank-approx",
+            type=int,
+            help="the dimension of low-rank approximation of transition",
+        )
+        parser.add_argument(
+            "--crf-beam-approx",
+            type=int,
+            help="the beam size for apporixmating the normalizing factor",
+        )
+        parser.add_argument(
+            "--word-ins-loss-factor",
+            type=float,
+            help="weights on NAT loss used to co-training with CRF loss.",
+        )
+
+    def forward(
+        self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs
+    ):
+        # encoding
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+
+        # length prediction
+        length_out = self.decoder.forward_length(
+            normalize=False, encoder_out=encoder_out
+        )
+        length_tgt = self.decoder.forward_length_prediction(
+            length_out, encoder_out, tgt_tokens
+        )
+
+        # decoding
+        word_ins_out = self.decoder(
+            normalize=False,
+            prev_output_tokens=prev_output_tokens,
+            encoder_out=encoder_out,
+        )
+        word_ins_tgt, word_ins_mask = tgt_tokens, tgt_tokens.ne(self.pad)
+
+        # compute the log-likelihood of CRF
+        crf_nll = -self.crf_layer(word_ins_out, word_ins_tgt, word_ins_mask)
+        crf_nll = (crf_nll / word_ins_mask.type_as(crf_nll).sum(-1)).mean()
+
+        return {
+            "word_ins": {
+                "out": word_ins_out,
+                "tgt": word_ins_tgt,
+                "mask": word_ins_mask,
+                "ls": self.args.label_smoothing,
+                "nll_loss": True,
+                "factor": self.args.word_ins_loss_factor,
+            },
+            "word_crf": {"loss": crf_nll},
+            "length": {
+                "out": length_out,
+                "tgt": length_tgt,
+                "factor": self.decoder.length_loss_factor,
+            },
+        }
+
+    def forward_decoder(self, decoder_out, encoder_out, decoding_format=None, **kwargs):
+        output_tokens = decoder_out.output_tokens
+        output_scores = decoder_out.output_scores
+        history = decoder_out.history
+
+        # execute the decoder and get emission scores
+        output_masks = output_tokens.ne(self.pad)
+        word_ins_out = self.decoder(
+            normalize=False, prev_output_tokens=output_tokens, encoder_out=encoder_out
+        )
+
+        # run viterbi decoding through CRF
+        _scores, _tokens = self.crf_layer.forward_decoder(word_ins_out, output_masks)
+        output_tokens.masked_scatter_(output_masks, _tokens[output_masks])
+        output_scores.masked_scatter_(output_masks, _scores[output_masks])
+        if history is not None:
+            history.append(output_tokens.clone())
+
+        return decoder_out._replace(
+            output_tokens=output_tokens,
+            output_scores=output_scores,
+            attn=None,
+            history=history,
+        )
+
+
+@register_model_architecture("nacrf_transformer", "nacrf_transformer")
+def nacrf_base_architecture(args):
+    args.crf_lowrank_approx = getattr(args, "crf_lowrank_approx", 32)
+    args.crf_beam_approx = getattr(args, "crf_beam_approx", 64)
+    args.word_ins_loss_factor = getattr(args, "word_ins_loss_factor", 0.5)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/nat/nonautoregressive_ensembles.py b/fairseq/fairseq/models/nat/nonautoregressive_ensembles.py
new file mode 100644
index 0000000000000000000000000000000000000000..705a04fb49658c91114a26efd411b4653c65b943
--- /dev/null
+++ b/fairseq/fairseq/models/nat/nonautoregressive_ensembles.py
@@ -0,0 +1,253 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.nn.functional as F
+from fairseq.models.nat import (
+    _apply_del_words,
+    _apply_ins_masks,
+    _apply_ins_words,
+    _fill,
+    _skip,
+    _skip_encoder_out,
+)
+
+
+class _EnsembleModelEncoder(object):
+    def __init__(self, models):
+        self.models = models
+
+    def reorder_encoder_out(self, encoder_outs, new_order):
+        encoder_outs = [
+            model.encoder.reorder_encoder_out(encoder_out, new_order)
+            for model, encoder_out in zip(self.models, encoder_outs)
+        ]
+        return encoder_outs
+
+
+class BasicEnsembleModel(torch.nn.Module):
+    """A wrapper around an ensemble of models."""
+
+    def __init__(self, models):
+        super().__init__()
+        self.models = torch.nn.ModuleList(models)
+        self.bos = self.models[0].decoder.dictionary.bos()
+        self.eos = self.models[0].decoder.dictionary.eos()
+        self.pad = self.models[0].decoder.dictionary.pad()
+        self.unk = self.models[0].decoder.dictionary.unk()
+        self.encoder = _EnsembleModelEncoder(self.models)
+
+    def has_encoder(self):
+        return hasattr(self.models[0], "encoder")
+
+    def max_decoder_positions(self):
+        return min(m.max_decoder_positions() for m in self.models)
+
+    @torch.no_grad()
+    def forward_encoder(self, encoder_input):
+        if not self.has_encoder():
+            return None
+        return [model.forward_encoder(encoder_input) for model in self.models]
+
+    @torch.no_grad()
+    def forward_decoder(self, *inputs):
+        raise NotImplementedError
+
+    def initialize_output_tokens(self, *inputs):
+        raise NotImplementedError
+
+
+class EnsembleLevT(BasicEnsembleModel):
+    """A wrapper around an ensemble of models."""
+
+    def __init__(self, models):
+        super().__init__(models)
+
+    @torch.no_grad()
+    def forward_decoder(
+        self, decoder_out, encoder_outs, eos_penalty=0.0, max_ratio=None, **kwargs
+    ):
+        # LevT ensembling
+        # A pipeline of three steps: deletion, placeholder, and word insertion.
+        # We need to average scores in each step in a pipeline way because of dependence.
+        # deletion
+        output_tokens = decoder_out.output_tokens
+        output_scores = decoder_out.output_scores
+        attn = decoder_out.attn
+
+        bsz = output_tokens.size(0)
+        if max_ratio is None:
+            max_lens = output_tokens.new().fill_(255)
+        else:
+            if not encoder_outs[0]["encoder_padding_mask"]:
+                src_lens = (
+                    encoder_outs[0]["encoder_out"][0].new(bsz)
+                    .fill_(encoder_outs[0]["encoder_out"][0].size(1))
+                )
+            else:
+                src_lens = (~encoder_outs[0]["encoder_padding_mask"][0]).sum(1)
+            max_lens = (src_lens * max_ratio).clamp(min=10).long()
+
+        # delete words
+        # do not delete tokens if it is <s> </s>
+        can_del_word = output_tokens.ne(self.pad).sum(1) > 2
+        if can_del_word.sum() != 0:  # we cannot delete, skip
+            output_tokens, output_scores, attn = self.forward_word_del(
+                encoder_outs,
+                output_tokens,
+                output_scores,
+                attn,
+                can_del_word,
+            )
+
+        # insert placeholders
+        can_ins_mask = output_tokens.ne(self.pad).sum(1) < max_lens
+        if can_ins_mask.sum() != 0:
+            output_tokens, output_scores = self.forward_mask_ins(
+                encoder_outs,
+                output_tokens,
+                output_scores,
+                can_ins_mask,
+                eos_penalty,
+                max_lens,
+            )
+
+        # insert words
+        can_ins_word = output_tokens.eq(self.unk).sum(1) > 0
+        if can_ins_word.sum() != 0:
+            output_tokens, output_scores, attn = self.forward_word_ins(
+                encoder_outs,
+                output_tokens,
+                output_scores,
+                attn,
+                can_ins_word,
+            )
+
+        # delete some unnecessary paddings
+        cut_off = output_tokens.ne(self.pad).sum(1).max()
+        output_tokens = output_tokens[:, :cut_off]
+        output_scores = output_scores[:, :cut_off]
+        attn = None if attn is None else attn[:, :cut_off, :]
+        return decoder_out._replace(
+            output_tokens=output_tokens,
+            output_scores=output_scores,
+            attn=attn,
+            history=None,
+        )
+
+    def forward_word_del(
+        self, encoder_outs, output_tokens, output_scores, attn, can_del_word
+    ):
+        word_del_score_avg = []
+        word_del_attn_avg = []
+        for model, encoder_out in zip(self.models, encoder_outs):
+            word_del_out, word_del_attn = model.decoder.forward_word_del(
+                _skip(output_tokens, can_del_word),
+                _skip_encoder_out(model.encoder, encoder_out, can_del_word),
+            )
+            word_del_score = F.log_softmax(word_del_out, 2)
+            word_del_score_avg.append(word_del_score)
+            word_del_attn_avg.append(word_del_attn)
+        word_del_score_avg = torch.logsumexp(
+            torch.stack(word_del_score_avg, dim=0), dim=0
+        ) - math.log(len(self.models))
+        word_del_pred = word_del_score_avg.max(-1)[1].bool()
+        if word_del_attn_avg[0] is not None:
+            word_del_attn_avg = torch.stack(word_del_attn_avg, dim=0) / len(self.models)
+        else:
+            word_del_attn_avg = None
+
+        _tokens, _scores, _attn = _apply_del_words(
+            output_tokens[can_del_word],
+            output_scores[can_del_word],
+            word_del_attn_avg,
+            word_del_pred,
+            self.pad,
+            self.bos,
+            self.eos,
+        )
+        output_tokens = _fill(output_tokens, can_del_word, _tokens, self.pad)
+        output_scores = _fill(output_scores, can_del_word, _scores, 0)
+        attn = _fill(attn, can_del_word, _attn, 0.0)
+        return output_tokens, output_scores, attn
+
+    def forward_mask_ins(
+        self,
+        encoder_outs,
+        output_tokens,
+        output_scores,
+        can_ins_mask,
+        eos_penalty,
+        max_lens,
+    ):
+        mask_ins_score_avg = []
+        for model, encoder_out in zip(self.models, encoder_outs):
+            mask_ins_out, _ = model.decoder.forward_mask_ins(
+                _skip(output_tokens, can_ins_mask),
+                _skip_encoder_out(model.encoder, encoder_out, can_ins_mask),
+            )
+            mask_ins_score = F.log_softmax(mask_ins_out, 2)
+            if eos_penalty > 0.0:
+                mask_ins_score[:, :, 0] -= eos_penalty
+            mask_ins_score_avg.append(mask_ins_score)
+        mask_ins_score_avg = torch.logsumexp(
+            torch.stack(mask_ins_score_avg, dim=0), dim=0
+        ) - math.log(len(self.models))
+        mask_ins_pred = mask_ins_score_avg.max(-1)[1]
+        mask_ins_pred = torch.min(
+            mask_ins_pred, max_lens[can_ins_mask, None].expand_as(mask_ins_pred)
+        )
+        _tokens, _scores = _apply_ins_masks(
+            output_tokens[can_ins_mask],
+            output_scores[can_ins_mask],
+            mask_ins_pred,
+            self.pad,
+            self.unk,
+            self.eos,
+        )
+        output_tokens = _fill(output_tokens, can_ins_mask, _tokens, self.pad)
+        output_scores = _fill(output_scores, can_ins_mask, _scores, 0)
+        return output_tokens, output_scores
+
+    def forward_word_ins(
+        self, encoder_outs, output_tokens, output_scores, attn, can_ins_word
+    ):
+        word_ins_score_avg = []
+        word_ins_attn_avg = []
+        for model, encoder_out in zip(self.models, encoder_outs):
+            word_ins_out, word_ins_attn = model.decoder.forward_word_ins(
+                _skip(output_tokens, can_ins_word),
+                _skip_encoder_out(model.encoder, encoder_out, can_ins_word),
+            )
+            word_ins_score = F.log_softmax(word_ins_out, 2)
+            word_ins_score_avg.append(word_ins_score)
+            word_ins_attn_avg.append(word_ins_attn)
+        word_ins_score_avg = torch.logsumexp(
+            torch.stack(word_ins_score_avg, dim=0), dim=0
+        ) - math.log(len(self.models))
+        if word_ins_attn_avg[0] is not None:
+            word_ins_attn_avg = torch.stack(word_ins_attn_avg, dim=0) / len(self.models)
+        else:
+            word_ins_attn_avg = None
+        word_ins_score_max, word_ins_pred = word_ins_score_avg.max(-1)
+
+        _tokens, _scores = _apply_ins_words(
+            output_tokens[can_ins_word],
+            output_scores[can_ins_word],
+            word_ins_pred,
+            word_ins_score_max,
+            self.unk,
+        )
+
+        output_tokens = _fill(output_tokens, can_ins_word, _tokens, self.pad)
+        output_scores = _fill(output_scores, can_ins_word, _scores, 0)
+        attn = _fill(attn, can_ins_word, word_ins_attn, 0.0)
+        return output_tokens, output_scores, attn
+
+    def initialize_output_tokens(self, encoder_outs, src_tokens):
+        # LevT doesn't do length prediction.
+        return self.models[0].initialize_output_tokens(encoder_outs[0], src_tokens)
diff --git a/fairseq/fairseq/models/nat/nonautoregressive_transformer.py b/fairseq/fairseq/models/nat/nonautoregressive_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d114202d25fbd1dca66c7abebb0b0a8bffbe094d
--- /dev/null
+++ b/fairseq/fairseq/models/nat/nonautoregressive_transformer.py
@@ -0,0 +1,456 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.iterative_refinement_generator import DecoderOut
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.nat import FairseqNATDecoder, FairseqNATModel, ensemble_decoder
+from fairseq.models.transformer import Embedding
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+
+
+def _mean_pooling(enc_feats, src_masks):
+    # enc_feats: T x B x C
+    # src_masks: B x T or None
+    if src_masks is None:
+        enc_feats = enc_feats.mean(0)
+    else:
+        src_masks = (~src_masks).transpose(0, 1).type_as(enc_feats)
+        enc_feats = (
+            (enc_feats / src_masks.sum(0)[None, :, None]) * src_masks[:, :, None]
+        ).sum(0)
+    return enc_feats
+
+
+def _argmax(x, dim):
+    return (x == x.max(dim, keepdim=True)[0]).type_as(x)
+
+
+def _uniform_assignment(src_lens, trg_lens):
+    max_trg_len = trg_lens.max()
+    steps = (src_lens.float() - 1) / (trg_lens.float() - 1)  # step-size
+    # max_trg_len
+    index_t = utils.new_arange(trg_lens, max_trg_len).float()
+    index_t = steps[:, None] * index_t[None, :]  # batch_size X max_trg_len
+    index_t = torch.round(index_t).long().detach()
+    return index_t
+
+
+@register_model("nonautoregressive_transformer")
+class NATransformerModel(FairseqNATModel):
+    @property
+    def allow_length_beam(self):
+        return True
+
+    @staticmethod
+    def add_args(parser):
+        FairseqNATModel.add_args(parser)
+
+        # length prediction
+        parser.add_argument(
+            "--src-embedding-copy",
+            action="store_true",
+            help="copy encoder word embeddings as the initial input of the decoder",
+        )
+        parser.add_argument(
+            "--pred-length-offset",
+            action="store_true",
+            help="predicting the length difference between the target and source sentences",
+        )
+        parser.add_argument(
+            "--sg-length-pred",
+            action="store_true",
+            help="stop the gradients back-propagated from the length predictor",
+        )
+        parser.add_argument(
+            "--length-loss-factor",
+            type=float,
+            help="weights on the length prediction loss",
+        )
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        decoder = NATransformerDecoder(args, tgt_dict, embed_tokens)
+        if getattr(args, "apply_bert_init", False):
+            decoder.apply(init_bert_params)
+        return decoder
+
+    def forward(
+        self, src_tokens, src_lengths, prev_output_tokens, tgt_tokens, **kwargs
+    ):
+        # encoding
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+
+        # length prediction
+        length_out = self.decoder.forward_length(
+            normalize=False, encoder_out=encoder_out
+        )
+        length_tgt = self.decoder.forward_length_prediction(
+            length_out, encoder_out, tgt_tokens
+        )
+
+        # decoding
+        word_ins_out = self.decoder(
+            normalize=False,
+            prev_output_tokens=prev_output_tokens,
+            encoder_out=encoder_out,
+        )
+
+        return {
+            "word_ins": {
+                "out": word_ins_out,
+                "tgt": tgt_tokens,
+                "mask": tgt_tokens.ne(self.pad),
+                "ls": self.args.label_smoothing,
+                "nll_loss": True,
+            },
+            "length": {
+                "out": length_out,
+                "tgt": length_tgt,
+                "factor": self.decoder.length_loss_factor,
+            },
+        }
+
+    def forward_decoder(self, decoder_out, encoder_out, decoding_format=None, **kwargs):
+        step = decoder_out.step
+        output_tokens = decoder_out.output_tokens
+        output_scores = decoder_out.output_scores
+        history = decoder_out.history
+
+        # execute the decoder
+        output_masks = output_tokens.ne(self.pad)
+        _scores, _tokens = self.decoder(
+            normalize=True,
+            prev_output_tokens=output_tokens,
+            encoder_out=encoder_out,
+            step=step,
+        ).max(-1)
+
+        output_tokens.masked_scatter_(output_masks, _tokens[output_masks])
+        output_scores.masked_scatter_(output_masks, _scores[output_masks])
+        if history is not None:
+            history.append(output_tokens.clone())
+
+        return decoder_out._replace(
+            output_tokens=output_tokens,
+            output_scores=output_scores,
+            attn=None,
+            history=history,
+        )
+
+    def initialize_output_tokens(self, encoder_out, src_tokens):
+        # length prediction
+        length_tgt = self.decoder.forward_length_prediction(
+            self.decoder.forward_length(normalize=True, encoder_out=encoder_out),
+            encoder_out=encoder_out,
+        )
+
+        max_length = length_tgt.clamp_(min=2).max()
+        idx_length = utils.new_arange(src_tokens, max_length)
+
+        initial_output_tokens = src_tokens.new_zeros(
+            src_tokens.size(0), max_length
+        ).fill_(self.pad)
+        initial_output_tokens.masked_fill_(
+            idx_length[None, :] < length_tgt[:, None], self.unk
+        )
+        initial_output_tokens[:, 0] = self.bos
+        initial_output_tokens.scatter_(1, length_tgt[:, None] - 1, self.eos)
+
+        initial_output_scores = initial_output_tokens.new_zeros(
+            *initial_output_tokens.size()
+        ).type_as(encoder_out["encoder_out"][0])
+
+        return DecoderOut(
+            output_tokens=initial_output_tokens,
+            output_scores=initial_output_scores,
+            attn=None,
+            step=0,
+            max_step=0,
+            history=None,
+        )
+
+    def regenerate_length_beam(self, decoder_out, beam_size):
+        output_tokens = decoder_out.output_tokens
+        length_tgt = output_tokens.ne(self.pad).sum(1)
+        length_tgt = (
+            length_tgt[:, None]
+            + utils.new_arange(length_tgt, 1, beam_size)
+            - beam_size // 2
+        )
+        length_tgt = length_tgt.view(-1).clamp_(min=2)
+        max_length = length_tgt.max()
+        idx_length = utils.new_arange(length_tgt, max_length)
+
+        initial_output_tokens = output_tokens.new_zeros(
+            length_tgt.size(0), max_length
+        ).fill_(self.pad)
+        initial_output_tokens.masked_fill_(
+            idx_length[None, :] < length_tgt[:, None], self.unk
+        )
+        initial_output_tokens[:, 0] = self.bos
+        initial_output_tokens.scatter_(1, length_tgt[:, None] - 1, self.eos)
+
+        initial_output_scores = initial_output_tokens.new_zeros(
+            *initial_output_tokens.size()
+        ).type_as(decoder_out.output_scores)
+
+        return decoder_out._replace(
+            output_tokens=initial_output_tokens, output_scores=initial_output_scores
+        )
+
+
+class NATransformerDecoder(FairseqNATDecoder):
+    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
+        super().__init__(
+            args, dictionary, embed_tokens, no_encoder_attn=no_encoder_attn
+        )
+        self.dictionary = dictionary
+        self.bos = dictionary.bos()
+        self.unk = dictionary.unk()
+        self.eos = dictionary.eos()
+
+        self.encoder_embed_dim = args.encoder_embed_dim
+        self.sg_length_pred = getattr(args, "sg_length_pred", False)
+        self.pred_length_offset = getattr(args, "pred_length_offset", False)
+        self.length_loss_factor = getattr(args, "length_loss_factor", 0.1)
+        self.src_embedding_copy = getattr(args, "src_embedding_copy", False)
+        self.embed_length = Embedding(256, self.encoder_embed_dim, None)
+
+    @ensemble_decoder
+    def forward(self, normalize, encoder_out, prev_output_tokens, step=0, **unused):
+        features, _ = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            embedding_copy=(step == 0) & self.src_embedding_copy,
+        )
+        decoder_out = self.output_layer(features)
+        return F.log_softmax(decoder_out, -1) if normalize else decoder_out
+
+    @ensemble_decoder
+    def forward_length(self, normalize, encoder_out):
+        enc_feats = encoder_out["encoder_out"][0]  # T x B x C
+        if len(encoder_out["encoder_padding_mask"]) > 0:
+            src_masks = encoder_out["encoder_padding_mask"][0]  # B x T
+        else:
+            src_masks = None
+        enc_feats = _mean_pooling(enc_feats, src_masks)
+        if self.sg_length_pred:
+            enc_feats = enc_feats.detach()
+        length_out = F.linear(enc_feats, self.embed_length.weight)
+        return F.log_softmax(length_out, -1) if normalize else length_out
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out=None,
+        early_exit=None,
+        embedding_copy=False,
+        **unused
+    ):
+        """
+        Similar to *forward* but only return features.
+
+        Inputs:
+            prev_output_tokens: Tensor(B, T)
+            encoder_out: a dictionary of hidden states and masks
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+            the LevenshteinTransformer decoder has full-attention to all generated tokens
+        """
+        # embedding
+        if embedding_copy:
+            src_embd = encoder_out["encoder_embedding"][0]
+            if len(encoder_out["encoder_padding_mask"]) > 0:
+                src_mask = encoder_out["encoder_padding_mask"][0]
+            else:
+                src_mask = None
+            src_mask = (
+                ~src_mask
+                if src_mask is not None
+                else prev_output_tokens.new_ones(*src_embd.size()[:2]).bool()
+            )
+
+            x, decoder_padding_mask = self.forward_embedding(
+                prev_output_tokens,
+                self.forward_copying_source(
+                    src_embd, src_mask, prev_output_tokens.ne(self.padding_idx)
+                ),
+            )
+
+        else:
+
+            x, decoder_padding_mask = self.forward_embedding(prev_output_tokens)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+        attn = None
+        inner_states = [x]
+
+        # decoder layers
+        for i, layer in enumerate(self.layers):
+
+            # early exit from the decoder.
+            if (early_exit is not None) and (i >= early_exit):
+                break
+
+            x, attn, _ = layer(
+                x,
+                encoder_out["encoder_out"][0]
+                if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0)
+                else None,
+                encoder_out["encoder_padding_mask"][0]
+                if (
+                    encoder_out is not None
+                    and len(encoder_out["encoder_padding_mask"]) > 0
+                )
+                else None,
+                self_attn_mask=None,
+                self_attn_padding_mask=decoder_padding_mask,
+            )
+            inner_states.append(x)
+
+        if self.layer_norm:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        return x, {"attn": attn, "inner_states": inner_states}
+
+    def forward_embedding(self, prev_output_tokens, states=None):
+        # embed positions
+        positions = (
+            self.embed_positions(prev_output_tokens)
+            if self.embed_positions is not None
+            else None
+        )
+
+        # embed tokens and positions
+        if states is None:
+            x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+            if self.project_in_dim is not None:
+                x = self.project_in_dim(x)
+        else:
+            x = states
+
+        if positions is not None:
+            x += positions
+        x = self.dropout_module(x)
+        decoder_padding_mask = prev_output_tokens.eq(self.padding_idx)
+        return x, decoder_padding_mask
+
+    def forward_copying_source(self, src_embeds, src_masks, tgt_masks):
+        length_sources = src_masks.sum(1)
+        length_targets = tgt_masks.sum(1)
+        mapped_inputs = _uniform_assignment(length_sources, length_targets).masked_fill(
+            ~tgt_masks, 0
+        )
+        copied_embedding = torch.gather(
+            src_embeds,
+            1,
+            mapped_inputs.unsqueeze(-1).expand(
+                *mapped_inputs.size(), src_embeds.size(-1)
+            ),
+        )
+        return copied_embedding
+
+    def forward_length_prediction(self, length_out, encoder_out, tgt_tokens=None):
+        enc_feats = encoder_out["encoder_out"][0]  # T x B x C
+        if len(encoder_out["encoder_padding_mask"]) > 0:
+            src_masks = encoder_out["encoder_padding_mask"][0]  # B x T
+        else:
+            src_masks = None
+        if self.pred_length_offset:
+            if src_masks is None:
+                src_lengs = enc_feats.new_ones(enc_feats.size(1)).fill_(
+                    enc_feats.size(0)
+                )
+            else:
+                src_lengs = (~src_masks).transpose(0, 1).type_as(enc_feats).sum(0)
+            src_lengs = src_lengs.long()
+
+        if tgt_tokens is not None:
+            # obtain the length target
+            tgt_lengs = tgt_tokens.ne(self.padding_idx).sum(1).long()
+            if self.pred_length_offset:
+                length_tgt = tgt_lengs - src_lengs + 128
+            else:
+                length_tgt = tgt_lengs
+            length_tgt = length_tgt.clamp(min=0, max=255)
+
+        else:
+            # predict the length target (greedy for now)
+            # TODO: implementing length-beam
+            pred_lengs = length_out.max(-1)[1]
+            if self.pred_length_offset:
+                length_tgt = pred_lengs - 128 + src_lengs
+            else:
+                length_tgt = pred_lengs
+
+        return length_tgt
+
+
+@register_model_architecture(
+    "nonautoregressive_transformer", "nonautoregressive_transformer"
+)
+def base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.apply_bert_init = getattr(args, "apply_bert_init", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    # --- special arguments ---
+    args.sg_length_pred = getattr(args, "sg_length_pred", False)
+    args.pred_length_offset = getattr(args, "pred_length_offset", False)
+    args.length_loss_factor = getattr(args, "length_loss_factor", 0.1)
+    args.src_embedding_copy = getattr(args, "src_embedding_copy", False)
+
+
+@register_model_architecture(
+    "nonautoregressive_transformer", "nonautoregressive_transformer_wmt_en_de"
+)
+def nonautoregressive_transformer_wmt_en_de(args):
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/roberta/__init__.py b/fairseq/fairseq/models/roberta/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cd723ae96aec8e3182773483f123109d23b620e
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .hub_interface import *  # noqa
+from .model import *  # noqa
+from .enc_dec import *  # noqa
+from .model_camembert import *  # noqa
+from .model_gottbert import *  # noqa
+from .model_xlmr import *  # noqa
diff --git a/fairseq/fairseq/models/roberta/alignment_utils.py b/fairseq/fairseq/models/roberta/alignment_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccc7f74cb94d5b8baa2d4e9dfd44f653d47ee43e
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/alignment_utils.py
@@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import Counter
+from typing import List
+
+import torch
+
+
+def align_bpe_to_words(roberta, bpe_tokens: torch.LongTensor, other_tokens: List[str]):
+    """
+    Helper to align GPT-2 BPE to other tokenization formats (e.g., spaCy).
+
+    Args:
+        roberta (RobertaHubInterface): RoBERTa instance
+        bpe_tokens (torch.LongTensor): GPT-2 BPE tokens of shape `(T_bpe)`
+        other_tokens (List[str]): other tokens of shape `(T_words)`
+
+    Returns:
+        List[str]: mapping from *other_tokens* to corresponding *bpe_tokens*.
+    """
+    assert bpe_tokens.dim() == 1
+    assert bpe_tokens[0] == 0
+
+    def clean(text):
+        return text.strip()
+
+    # remove whitespaces to simplify alignment
+    bpe_tokens = [roberta.task.source_dictionary.string([x]) for x in bpe_tokens]
+    bpe_tokens = [
+        clean(roberta.bpe.decode(x) if x not in {"<s>", ""} else x) for x in bpe_tokens
+    ]
+    other_tokens = [clean(str(o)) for o in other_tokens]
+
+    # strip leading <s>
+    bpe_tokens = bpe_tokens[1:]
+    assert "".join(bpe_tokens) == "".join(other_tokens)
+
+    # create alignment from every word to a list of BPE tokens
+    alignment = []
+    bpe_toks = filter(lambda item: item[1] != "", enumerate(bpe_tokens, start=1))
+    j, bpe_tok = next(bpe_toks)
+    for other_tok in other_tokens:
+        bpe_indices = []
+        while True:
+            if other_tok.startswith(bpe_tok):
+                bpe_indices.append(j)
+                other_tok = other_tok[len(bpe_tok) :]
+                try:
+                    j, bpe_tok = next(bpe_toks)
+                except StopIteration:
+                    j, bpe_tok = None, None
+            elif bpe_tok.startswith(other_tok):
+                # other_tok spans multiple BPE tokens
+                bpe_indices.append(j)
+                bpe_tok = bpe_tok[len(other_tok) :]
+                other_tok = ""
+            else:
+                raise Exception('Cannot align "{}" and "{}"'.format(other_tok, bpe_tok))
+            if other_tok == "":
+                break
+        assert len(bpe_indices) > 0
+        alignment.append(bpe_indices)
+    assert len(alignment) == len(other_tokens)
+
+    return alignment
+
+
+def align_features_to_words(roberta, features, alignment):
+    """
+    Align given features to words.
+
+    Args:
+        roberta (RobertaHubInterface): RoBERTa instance
+        features (torch.Tensor): features to align of shape `(T_bpe x C)`
+        alignment: alignment between BPE tokens and words returned by
+            func:`align_bpe_to_words`.
+    """
+    assert features.dim() == 2
+
+    bpe_counts = Counter(j for bpe_indices in alignment for j in bpe_indices)
+    assert bpe_counts[0] == 0  # <s> shouldn't be aligned
+    denom = features.new([bpe_counts.get(j, 1) for j in range(len(features))])
+    weighted_features = features / denom.unsqueeze(-1)
+
+    output = [weighted_features[0]]
+    largest_j = -1
+    for bpe_indices in alignment:
+        output.append(weighted_features[bpe_indices].sum(dim=0))
+        largest_j = max(largest_j, *bpe_indices)
+    for j in range(largest_j + 1, len(features)):
+        output.append(weighted_features[j])
+    output = torch.stack(output)
+    assert torch.all(torch.abs(output.sum(dim=0) - features.sum(dim=0)) < 1e-4)
+    return output
+
+
+def spacy_nlp():
+    if getattr(spacy_nlp, "_nlp", None) is None:
+        try:
+            from spacy.lang.en import English
+
+            spacy_nlp._nlp = English()
+        except ImportError:
+            raise ImportError("Please install spacy with: pip install spacy")
+    return spacy_nlp._nlp
+
+
+def spacy_tokenizer():
+    if getattr(spacy_tokenizer, "_tokenizer", None) is None:
+        try:
+            nlp = spacy_nlp()
+            spacy_tokenizer._tokenizer = nlp.Defaults.create_tokenizer(nlp)
+        except ImportError:
+            raise ImportError("Please install spacy with: pip install spacy")
+    return spacy_tokenizer._tokenizer
diff --git a/fairseq/fairseq/models/roberta/enc_dec.py b/fairseq/fairseq/models/roberta/enc_dec.py
new file mode 100644
index 0000000000000000000000000000000000000000..70701b7d5684d6443cc52b3cf377c6211c3d759a
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/enc_dec.py
@@ -0,0 +1,192 @@
+import argparse
+import logging
+
+import torch.nn as nn
+import fairseq.checkpoint_utils
+from fairseq.models import (
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import TransformerDecoder
+from fairseq.models.roberta import model as roberta
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("roberta_enc_dec")
+class RobertaEncDecModel(FairseqEncoderDecoderModel):
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument(
+            "--pretrained-mlm-checkpoint",
+            default=None,
+            type=str,
+            metavar="PRETRAINED",
+            help="path to pretrained mlm checkpoint",
+        )
+        parser.add_argument(
+            "--pretrained-decoder", action="store_true", help="reload decoder"
+        )
+        parser.add_argument(
+            "--hack-layernorm-embedding",
+            action="store_true",
+            help="hack to reload old models trained with encoder-normalize-before=False (no equivalent to encoder-normalize-before=False and layernorm_embedding=False",
+        )
+        parser.add_argument(
+            "--share-decoder-input-output-embed",
+            action="store_true",
+            help="share decoder input and output embeddings",
+        )
+        parser.add_argument(
+            "--share-all-embeddings",
+            action="store_true",
+            help="share encoder, decoder and output embeddings"
+            " (requires shared dictionary and embed dim)",
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present
+        base_enc_dec_architecture(args)
+        if args.pretrained_mlm_checkpoint:
+            arg_overrides = None
+            if args.hack_layernorm_embedding:
+                arg_overrides = {"layernorm_embedding": False}
+            loaded = fairseq.checkpoint_utils.load_model_ensemble_and_task(
+                [args.pretrained_mlm_checkpoint], arg_overrides=arg_overrides
+            )
+            ([roberta_enc], _cfg, _task) = loaded
+        else:
+            # Do we need to edit untie_weights here ?
+            share_in_out = (
+                args.share_decoder_input_output_embed or args.share_all_embeddings
+            )
+            args.untie_weights_roberta = not share_in_out
+            if args.hack_layernorm_embedding:
+                args.layernorm_embedding = False
+                args.encoder_normalize_before = False
+            roberta_enc = roberta.RobertaModel.build_model(args, task)
+
+        return cls.from_roberta(roberta_enc, args, task.source_dictionary)
+
+    @staticmethod
+    def from_roberta(roberta_enc: roberta.RobertaModel, args, dictionary):
+        encoder = roberta_enc.encoder.sentence_encoder
+        vocab_size, embed_dim = encoder.embed_tokens.weight.shape
+
+        if args.share_all_embeddings:
+            lm_head = roberta_enc.encoder.lm_head
+            assert encoder.embed_tokens.weight is lm_head.weight, (
+                "Can't use --share-all-embeddings with a model "
+                "that was pretraiend with --untie-weights-roberta_enc"
+            )
+        else:
+            lm_head = roberta.RobertaLMHead(
+                embed_dim, vocab_size, roberta_enc.args.activation_fn
+            )
+
+        dec_embs = nn.Embedding(vocab_size, embed_dim, dictionary.pad())
+        if args.share_all_embeddings or args.share_decoder_input_output_embed:
+            # Note: I wasn't able to use Embedding _weight parameter to achive this sharing.
+            dec_embs.weight = lm_head.weight
+
+        decoder = TransformerDecoder(
+            RobertaEncDecModel.read_args_from_roberta(roberta_enc.args),
+            dictionary,
+            dec_embs,
+            no_encoder_attn=False,
+            output_projection=lm_head,
+        )
+        if getattr(args, "pretrained_decoder", False):
+            decoder_dict = encoder.state_dict()
+
+            # TODO: hide setting "encoder_attn" layers behind a flag.
+            for k, w in list(decoder_dict.items()):
+                if ".self_attn" in k:
+                    k_enc_attn = k.replace(".self_attn", ".encoder_attn")
+                    decoder_dict[k_enc_attn] = w.detach().clone()
+
+            for k, w in lm_head.state_dict().items():
+                decoder_dict["reg_head." + k] = w
+
+            missing_keys, unexpected_keys = decoder.load_state_dict(
+                decoder_dict, strict=False
+            )
+            # missing_keys = [m for m in missing_keys if ".encoder_attn" not in m]
+            assert not missing_keys and not unexpected_keys, (
+                "Failed to load state dict. "
+                f"Missing keys: {missing_keys}. "
+                f"Unexpected keys: {unexpected_keys}."
+            )
+
+        if args.share_all_embeddings:
+            assert decoder.reg_head.weight is decoder.embed_tokens.weight
+            assert encoder.embed_tokens.weight is decoder.embed_tokens.weight
+        elif args.share_decoder_input_output_embed:
+            assert decoder.reg_head.weight is decoder.embed_tokens.weight
+            assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight
+        else:
+            assert decoder.reg_head.weight is not decoder.embed_tokens.weight
+            assert encoder.embed_tokens.weight is not decoder.embed_tokens.weight
+
+        return RobertaEncDecModel(encoder, decoder)
+
+    @staticmethod
+    def read_args_from_roberta(roberta_args: argparse.Namespace):
+        # TODO: this would become easier if encoder/decoder where using a similar
+        # TransformerConfig object
+        args = argparse.Namespace(**vars(roberta_args))
+        attr_map = [
+            ("encoder_attention_heads", "decoder_attention_heads"),
+            ("encoder_embed_dim", "decoder_embed_dim"),
+            ("encoder_embed_dim", "decoder_output_dim"),
+            ("encoder_normalize_before", "decoder_normalize_before"),
+            ("encoder_layers_to_keep", "decoder_layers_to_keep"),
+            ("encoder_ffn_embed_dim", "decoder_ffn_embed_dim"),
+            ("encoder_layerdrop", "decoder_layerdrop"),
+            ("encoder_layers", "decoder_layers"),
+            ("encoder_learned_pos", "decoder_learned_pos"),
+            # should this be set from here ?
+            ("max_positions", "max_target_positions"),
+        ]
+        for k1, k2 in attr_map:
+            setattr(args, k2, getattr(roberta_args, k1))
+
+        args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+        args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+        args.share_decoder_input_output_embed = not roberta_args.untie_weights_roberta
+        return args
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+        super().upgrade_state_dict_named(state_dict, name)
+        old_keys = list(state_dict.keys())
+
+        # rename decoder -> encoder before upgrading children modules
+        for k in old_keys:
+            if k.startswith(prefix + "encoder.lm_head"):
+                state_dict.pop(k)
+                continue
+            new_k = k
+            new_k = new_k.replace(".sentence_encoder.", ".")
+            new_k = new_k.replace("decoder.lm_head.", "decoder.reg_head.")
+            if k == new_k:
+                continue
+            # print(k, "->", new_k)
+            state_dict[new_k] = state_dict.pop(k)
+
+
+@register_model_architecture("roberta_enc_dec", "roberta_enc_dec")
+def base_enc_dec_architecture(args):
+    args.hack_layernorm_embedding = getattr(args, "hack_layernorm_embedding", False)
+    args.pretrained_mlm_checkpoint = getattr(args, "pretrained_mlm_checkpoint", None)
+    args.pretrained_decoder = getattr(args, "pretrained_decoder", None)
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+
+    roberta.base_architecture(args)
diff --git a/fairseq/fairseq/models/roberta/hub_interface.py b/fairseq/fairseq/models/roberta/hub_interface.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba298d63ba5da2a5b2f1a44d0384a6b249277ef4
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/hub_interface.py
@@ -0,0 +1,235 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.data import encoders
+
+
+class RobertaHubInterface(nn.Module):
+    """A simple PyTorch Hub interface to RoBERTa.
+
+    Usage: https://github.com/pytorch/fairseq/tree/main/examples/roberta
+    """
+
+    def __init__(self, cfg, task, model):
+        super().__init__()
+        self.cfg = cfg
+        self.task = task
+        self.model = model
+
+        self.bpe = encoders.build_bpe(cfg.bpe)
+
+        # this is useful for determining the device
+        self.register_buffer("_float_tensor", torch.tensor([0], dtype=torch.float))
+
+    @property
+    def device(self):
+        return self._float_tensor.device
+
+    def encode(
+        self, sentence: str, *addl_sentences, no_separator=False
+    ) -> torch.LongTensor:
+        """
+        BPE-encode a sentence (or multiple sentences).
+
+        Every sequence begins with a beginning-of-sentence (`<s>`) symbol.
+        Every sentence ends with an end-of-sentence (`</s>`) and we use an
+        extra end-of-sentence (`</s>`) as a separator.
+
+        Example (single sentence): `<s> a b c </s>`
+        Example (sentence pair): `<s> d e f </s> </s> 1 2 3 </s>`
+
+        The BPE encoding follows GPT-2. One subtle detail is that the GPT-2 BPE
+        requires leading spaces. For example::
+
+            >>> roberta.encode('Hello world').tolist()
+            [0, 31414, 232, 2]
+            >>> roberta.encode(' world').tolist()
+            [0, 232, 2]
+            >>> roberta.encode('world').tolist()
+            [0, 8331, 2]
+        """
+        bpe_sentence = "<s> " + self.bpe.encode(sentence) + " </s>"
+        for s in addl_sentences:
+            bpe_sentence += " </s>" if not no_separator else ""
+            bpe_sentence += " " + self.bpe.encode(s) + " </s>"
+        tokens = self.task.source_dictionary.encode_line(
+            bpe_sentence, append_eos=False, add_if_not_exist=False
+        )
+        return tokens.long()
+
+    def decode(self, tokens: torch.LongTensor):
+        assert tokens.dim() == 1
+        tokens = tokens.numpy()
+        if tokens[0] == self.task.source_dictionary.bos():
+            tokens = tokens[1:]  # remove <s>
+        eos_mask = tokens == self.task.source_dictionary.eos()
+        doc_mask = eos_mask[1:] & eos_mask[:-1]
+        sentences = np.split(tokens, doc_mask.nonzero()[0] + 1)
+        sentences = [
+            self.bpe.decode(self.task.source_dictionary.string(s)) for s in sentences
+        ]
+        if len(sentences) == 1:
+            return sentences[0]
+        return sentences
+
+    def extract_features(
+        self, tokens: torch.LongTensor, return_all_hiddens: bool = False
+    ) -> torch.Tensor:
+        if tokens.dim() == 1:
+            tokens = tokens.unsqueeze(0)
+        if tokens.size(-1) > self.model.max_positions():
+            raise ValueError(
+                "tokens exceeds maximum length: {} > {}".format(
+                    tokens.size(-1), self.model.max_positions()
+                )
+            )
+        features, extra = self.model(
+            tokens.to(device=self.device),
+            features_only=True,
+            return_all_hiddens=return_all_hiddens,
+        )
+        if return_all_hiddens:
+            # convert from T x B x C -> B x T x C
+            inner_states = extra["inner_states"]
+            return [inner_state.transpose(0, 1) for inner_state in inner_states]
+        else:
+            return features  # just the last layer's features
+
+    def register_classification_head(
+        self, name: str, num_classes: int = None, embedding_size: int = None, **kwargs
+    ):
+        self.model.register_classification_head(
+            name, num_classes=num_classes, embedding_size=embedding_size, **kwargs
+        )
+
+    def predict(self, head: str, tokens: torch.LongTensor, return_logits: bool = False):
+        features = self.extract_features(tokens.to(device=self.device))
+        logits = self.model.classification_heads[head](features)
+        if return_logits:
+            return logits
+        return F.log_softmax(logits, dim=-1)
+
+    def extract_features_aligned_to_words(
+        self, sentence: str, return_all_hiddens: bool = False
+    ) -> torch.Tensor:
+        """Extract RoBERTa features, aligned to spaCy's word-level tokenizer."""
+        from fairseq.models.roberta import alignment_utils
+        from spacy.tokens import Doc
+
+        nlp = alignment_utils.spacy_nlp()
+        tokenizer = alignment_utils.spacy_tokenizer()
+
+        # tokenize both with GPT-2 BPE and spaCy
+        bpe_toks = self.encode(sentence)
+        spacy_toks = tokenizer(sentence)
+        spacy_toks_ws = [t.text_with_ws for t in tokenizer(sentence)]
+        alignment = alignment_utils.align_bpe_to_words(self, bpe_toks, spacy_toks_ws)
+
+        # extract features and align them
+        features = self.extract_features(
+            bpe_toks, return_all_hiddens=return_all_hiddens
+        )
+        features = features.squeeze(0)
+        aligned_feats = alignment_utils.align_features_to_words(
+            self, features, alignment
+        )
+
+        # wrap in spaCy Doc
+        doc = Doc(
+            nlp.vocab,
+            words=["<s>"] + [x.text for x in spacy_toks] + ["</s>"],
+            spaces=[True]
+            + [x.endswith(" ") for x in spacy_toks_ws[:-1]]
+            + [True, False],
+        )
+        assert len(doc) == aligned_feats.size(0)
+        doc.user_token_hooks["vector"] = lambda token: aligned_feats[token.i]
+        return doc
+
+    def fill_mask(self, masked_input: str, topk: int = 5):
+        masked_token = "<mask>"
+        assert (
+            masked_token in masked_input and masked_input.count(masked_token) == 1
+        ), "Please add one {0} token for the input, eg: 'He is a {0} guy'".format(
+            masked_token
+        )
+
+        text_spans = masked_input.split(masked_token)
+        text_spans_bpe = (
+            (" {0} ".format(masked_token))
+            .join([self.bpe.encode(text_span.rstrip()) for text_span in text_spans])
+            .strip()
+        )
+        tokens = self.task.source_dictionary.encode_line(
+            "<s> " + text_spans_bpe + " </s>",
+            append_eos=False,
+            add_if_not_exist=False,
+        )
+
+        masked_index = (tokens == self.task.mask_idx).nonzero(as_tuple=False)
+        if tokens.dim() == 1:
+            tokens = tokens.unsqueeze(0)
+
+        with utils.model_eval(self.model):
+            features, extra = self.model(
+                tokens.long().to(device=self.device),
+                features_only=False,
+                return_all_hiddens=False,
+            )
+        logits = features[0, masked_index, :].squeeze()
+        prob = logits.softmax(dim=0)
+        values, index = prob.topk(k=topk, dim=0)
+        topk_predicted_token_bpe = self.task.source_dictionary.string(index)
+
+        topk_filled_outputs = []
+        for index, predicted_token_bpe in enumerate(
+            topk_predicted_token_bpe.split(" ")
+        ):
+            predicted_token = self.bpe.decode(predicted_token_bpe)
+            # Quick hack to fix https://github.com/pytorch/fairseq/issues/1306
+            if predicted_token_bpe.startswith("\u2581"):
+                predicted_token = " " + predicted_token
+            if " {0}".format(masked_token) in masked_input:
+                topk_filled_outputs.append(
+                    (
+                        masked_input.replace(
+                            " {0}".format(masked_token), predicted_token
+                        ),
+                        values[index].item(),
+                        predicted_token,
+                    )
+                )
+            else:
+                topk_filled_outputs.append(
+                    (
+                        masked_input.replace(masked_token, predicted_token),
+                        values[index].item(),
+                        predicted_token,
+                    )
+                )
+        return topk_filled_outputs
+
+    def disambiguate_pronoun(self, sentence: str) -> bool:
+        """
+        Usage::
+
+            >>> disambiguate_pronoun('The _trophy_ would not fit in the brown suitcase because [it] was too big.')
+            True
+
+            >>> disambiguate_pronoun('The trophy would not fit in the brown suitcase because [it] was too big.')
+            'The trophy'
+        """
+        assert hasattr(
+            self.task, "disambiguate_pronoun"
+        ), "roberta.disambiguate_pronoun() requires a model trained with the WSC task."
+        with utils.model_eval(self.model):
+            return self.task.disambiguate_pronoun(
+                self.model, sentence, use_cuda=self.device.type == "cuda"
+            )
diff --git a/fairseq/fairseq/models/roberta/model.py b/fairseq/fairseq/models/roberta/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb205b910daaecd55effd1e77e77d0b43952624f
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/model.py
@@ -0,0 +1,594 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+RoBERTa: A Robustly Optimized BERT Pretraining Approach.
+"""
+
+import logging
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import DEFAULT_MIN_PARAMS_TO_WRAP, TransformerEncoder
+from fairseq.modules import LayerNorm
+from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+from fairseq.utils import safe_getattr, safe_hasattr
+
+from .hub_interface import RobertaHubInterface
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("roberta")
+class RobertaModel(FairseqEncoderModel):
+    @classmethod
+    def hub_models(cls):
+        return {
+            "roberta.base": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz",
+            "roberta.large": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz",
+            "roberta.large.mnli": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.mnli.tar.gz",
+            "roberta.large.wsc": "http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.wsc.tar.gz",
+        }
+
+    def __init__(self, args, encoder):
+        super().__init__(encoder)
+        self.args = args
+
+        # We follow BERT's random weight initialization
+        self.apply(init_bert_params)
+
+        self.classification_heads = nn.ModuleDict()
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="L", help="num encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="H",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="F",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="A",
+            help="num encoder attention heads",
+        )
+        parser.add_argument(
+            "--activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use",
+        )
+        parser.add_argument(
+            "--pooler-activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use for pooler layer",
+        )
+        parser.add_argument(
+            "--encoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each encoder block",
+        )
+        parser.add_argument(
+            "--layernorm-embedding",
+            action="store_true",
+            help="add layernorm to embedding",
+        )
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after activation in FFN",
+        )
+        parser.add_argument(
+            "--pooler-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability in the masked_lm pooler layers",
+        )
+        parser.add_argument(
+            "--max-positions", type=int, help="number of positional embeddings to learn"
+        )
+        parser.add_argument(
+            "--load-checkpoint-heads",
+            action="store_true",
+            help="(re-)register and load heads when loading checkpoints",
+        )
+        parser.add_argument(
+            "--untie-weights-roberta",
+            action="store_true",
+            help="Untie weights between embeddings and classifiers in RoBERTa",
+        )
+        # args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
+        parser.add_argument(
+            "--encoder-layerdrop",
+            type=float,
+            metavar="D",
+            default=0,
+            help="LayerDrop probability for encoder",
+        )
+        parser.add_argument(
+            "--encoder-layers-to-keep",
+            default=None,
+            help="which layers to *keep* when pruning as a comma-separated list",
+        )
+        # args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
+        parser.add_argument(
+            "--quant-noise-pq",
+            type=float,
+            metavar="D",
+            default=0,
+            help="iterative PQ quantization noise at training time",
+        )
+        parser.add_argument(
+            "--quant-noise-pq-block-size",
+            type=int,
+            metavar="D",
+            default=8,
+            help="block size of quantization noise at training time",
+        )
+        parser.add_argument(
+            "--quant-noise-scalar",
+            type=float,
+            metavar="D",
+            default=0,
+            help="scalar quantization noise and scalar quantization at training time",
+        )
+        # args for "Better Fine-Tuning by Reducing Representational Collapse" (Aghajanyan et al. 2020)
+        parser.add_argument(
+            "--spectral-norm-classification-head",
+            action="store_true",
+            default=False,
+            help="Apply spectral normalization on the classification head",
+        )
+        # args for Fully Sharded Data Parallel (FSDP) training
+        parser.add_argument(
+            "--min-params-to-wrap",
+            type=int,
+            metavar="D",
+            default=DEFAULT_MIN_PARAMS_TO_WRAP,
+            help=(
+                "minimum number of params for a layer to be wrapped with FSDP() when "
+                "training with --ddp-backend=fully_sharded. Smaller values will "
+                "improve memory efficiency, but may make torch.distributed "
+                "communication less efficient due to smaller input sizes. This option "
+                "is set to 0 (i.e., always wrap) when --checkpoint-activations or "
+                "--offload-activations are passed."
+            )
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        from omegaconf import OmegaConf
+
+        if OmegaConf.is_config(args):
+            OmegaConf.set_struct(args, False)
+
+        # make sure all arguments are present
+        base_architecture(args)
+
+        if not safe_hasattr(args, "max_positions"):
+            if not safe_hasattr(args, "tokens_per_sample"):
+                args.tokens_per_sample = task.max_positions()
+            args.max_positions = args.tokens_per_sample
+
+        encoder = RobertaEncoder(args, task.source_dictionary)
+
+        if OmegaConf.is_config(args):
+            OmegaConf.set_struct(args, True)
+
+        return cls(args, encoder)
+
+    def forward(
+        self,
+        src_tokens,
+        features_only=False,
+        return_all_hiddens=False,
+        classification_head_name=None,
+        **kwargs,
+    ):
+        if classification_head_name is not None:
+            features_only = True
+
+        x, extra = self.encoder(src_tokens, features_only, return_all_hiddens, **kwargs)
+
+        if classification_head_name is not None:
+            x = self.classification_heads[classification_head_name](x)
+        return x, extra
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        """Get normalized probabilities (or log probs) from a net's output."""
+        logits = net_output[0].float()
+        if log_probs:
+            return F.log_softmax(logits, dim=-1)
+        else:
+            return F.softmax(logits, dim=-1)
+
+    def register_classification_head(
+        self, name, num_classes=None, inner_dim=None, **kwargs
+    ):
+        """Register a classification head."""
+        if name in self.classification_heads:
+            prev_num_classes = self.classification_heads[name].out_proj.out_features
+            prev_inner_dim = self.classification_heads[name].dense.out_features
+            if num_classes != prev_num_classes or inner_dim != prev_inner_dim:
+                logger.warning(
+                    're-registering head "{}" with num_classes {} (prev: {}) '
+                    "and inner_dim {} (prev: {})".format(
+                        name, num_classes, prev_num_classes, inner_dim, prev_inner_dim
+                    )
+                )
+        self.classification_heads[name] = RobertaClassificationHead(
+            input_dim=self.args.encoder_embed_dim,
+            inner_dim=inner_dim or self.args.encoder_embed_dim,
+            num_classes=num_classes,
+            activation_fn=self.args.pooler_activation_fn,
+            pooler_dropout=self.args.pooler_dropout,
+            q_noise=self.args.quant_noise_pq,
+            qn_block_size=self.args.quant_noise_pq_block_size,
+            do_spectral_norm=self.args.spectral_norm_classification_head,
+        )
+
+    @property
+    def supported_targets(self):
+        return {"self"}
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path,
+        checkpoint_file="model.pt",
+        data_name_or_path=".",
+        bpe="gpt2",
+        **kwargs,
+    ):
+        from fairseq import hub_utils
+
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            **kwargs,
+        )
+
+        logger.info(x["args"])
+        return RobertaHubInterface(x["args"], x["task"], x["models"][0])
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+
+        # rename decoder -> encoder before upgrading children modules
+        for k in list(state_dict.keys()):
+            if k.startswith(prefix + "decoder"):
+                new_k = prefix + "encoder" + k[len(prefix + "decoder") :]
+                state_dict[new_k] = state_dict[k]
+                del state_dict[k]
+
+        # rename emb_layer_norm -> layernorm_embedding
+        for k in list(state_dict.keys()):
+            if ".emb_layer_norm." in k:
+                new_k = k.replace(".emb_layer_norm.", ".layernorm_embedding.")
+                state_dict[new_k] = state_dict[k]
+                del state_dict[k]
+
+        # upgrade children modules
+        super().upgrade_state_dict_named(state_dict, name)
+
+        # Handle new classification heads present in the state dict.
+        current_head_names = (
+            []
+            if not hasattr(self, "classification_heads")
+            else self.classification_heads.keys()
+        )
+        keys_to_delete = []
+        for k in state_dict.keys():
+            if not k.startswith(prefix + "classification_heads."):
+                continue
+
+            head_name = k[len(prefix + "classification_heads.") :].split(".")[0]
+            num_classes = state_dict[
+                prefix + "classification_heads." + head_name + ".out_proj.weight"
+            ].size(0)
+            inner_dim = state_dict[
+                prefix + "classification_heads." + head_name + ".dense.weight"
+            ].size(0)
+
+            if getattr(self.args, "load_checkpoint_heads", False):
+                if head_name not in current_head_names:
+                    self.register_classification_head(head_name, num_classes, inner_dim)
+            else:
+                if head_name not in current_head_names:
+                    logger.warning(
+                        "deleting classification head ({}) from checkpoint "
+                        "not present in current model: {}".format(head_name, k)
+                    )
+                    keys_to_delete.append(k)
+                elif (
+                    num_classes
+                    != self.classification_heads[head_name].out_proj.out_features
+                    or inner_dim
+                    != self.classification_heads[head_name].dense.out_features
+                ):
+                    logger.warning(
+                        "deleting classification head ({}) from checkpoint "
+                        "with different dimensions than current model: {}".format(
+                            head_name, k
+                        )
+                    )
+                    keys_to_delete.append(k)
+        for k in keys_to_delete:
+            del state_dict[k]
+
+        # Copy any newly-added classification heads into the state dict
+        # with their current weights.
+        if hasattr(self, "classification_heads"):
+            cur_state = self.classification_heads.state_dict()
+            for k, v in cur_state.items():
+                if prefix + "classification_heads." + k not in state_dict:
+                    logger.info("Overwriting " + prefix + "classification_heads." + k)
+                    state_dict[prefix + "classification_heads." + k] = v
+
+
+class RobertaLMHead(nn.Module):
+    """Head for masked language modeling."""
+
+    def __init__(self, embed_dim, output_dim, activation_fn, weight=None):
+        super().__init__()
+        self.dense = nn.Linear(embed_dim, embed_dim)
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.layer_norm = LayerNorm(embed_dim)
+
+        if weight is None:
+            weight = nn.Linear(embed_dim, output_dim, bias=False).weight
+        self.weight = weight
+        self.bias = nn.Parameter(torch.zeros(output_dim))
+
+    def forward(self, features, masked_tokens=None, **kwargs):
+        # Only project the masked tokens while training,
+        # saves both memory and computation
+        if masked_tokens is not None:
+            features = features[masked_tokens, :]
+
+        x = self.dense(features)
+        x = self.activation_fn(x)
+        x = self.layer_norm(x)
+        # project back to size of vocabulary with bias
+        x = F.linear(x, self.weight) + self.bias
+        return x
+
+
+class RobertaClassificationHead(nn.Module):
+    """Head for sentence-level classification tasks."""
+
+    def __init__(
+        self,
+        input_dim,
+        inner_dim,
+        num_classes,
+        activation_fn,
+        pooler_dropout,
+        q_noise=0,
+        qn_block_size=8,
+        do_spectral_norm=False,
+    ):
+        super().__init__()
+        self.dense = nn.Linear(input_dim, inner_dim)
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.dropout = nn.Dropout(p=pooler_dropout)
+        self.out_proj = apply_quant_noise_(
+            nn.Linear(inner_dim, num_classes), q_noise, qn_block_size
+        )
+        if do_spectral_norm:
+            if q_noise != 0:
+                raise NotImplementedError(
+                    "Attempting to use Spectral Normalization with Quant Noise. This is not officially supported"
+                )
+            self.out_proj = torch.nn.utils.spectral_norm(self.out_proj)
+
+    def forward(self, features, **kwargs):
+        x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
+        x = self.dropout(x)
+        x = self.dense(x)
+        x = self.activation_fn(x)
+        x = self.dropout(x)
+        x = self.out_proj(x)
+        return x
+
+
+class RobertaEncoder(FairseqEncoder):
+    """RoBERTa encoder."""
+
+    def __init__(self, args, dictionary):
+        super().__init__(dictionary)
+
+        # set any missing default values
+        base_architecture(args)
+        self.args = args
+
+        if args.encoder_layers_to_keep:
+            args.encoder_layers = len(args.encoder_layers_to_keep.split(","))
+
+        embed_tokens = self.build_embedding(
+            len(dictionary), args.encoder_embed_dim, dictionary.pad()
+        )
+
+        self.sentence_encoder = self.build_encoder(args, dictionary, embed_tokens)
+
+        self.lm_head = self.build_lm_head(
+            embed_dim=args.encoder_embed_dim,
+            output_dim=len(dictionary),
+            activation_fn=args.activation_fn,
+            weight=(
+                self.sentence_encoder.embed_tokens.weight
+                if not args.untie_weights_roberta
+                else None
+            ),
+        )
+
+    def build_embedding(self, vocab_size, embedding_dim, padding_idx):
+        return nn.Embedding(vocab_size, embedding_dim, padding_idx)
+
+    def build_encoder(self, args, dictionary, embed_tokens):
+        encoder = TransformerEncoder(args, dictionary, embed_tokens)
+        encoder.apply(init_bert_params)
+        return encoder
+
+    def build_lm_head(self, embed_dim, output_dim, activation_fn, weight):
+        return RobertaLMHead(embed_dim, output_dim, activation_fn, weight)
+
+    def forward(
+        self,
+        src_tokens,
+        features_only=False,
+        return_all_hiddens=False,
+        masked_tokens=None,
+        **unused,
+    ):
+        """
+        Args:
+            src_tokens (LongTensor): input tokens of shape `(batch, src_len)`
+            features_only (bool, optional): skip LM head and just return
+                features. If True, the output will be of shape
+                `(batch, src_len, embed_dim)`.
+            return_all_hiddens (bool, optional): also return all of the
+                intermediate hidden states (default: False).
+
+        Returns:
+            tuple:
+                - the LM output of shape `(batch, src_len, vocab)`
+                - a dictionary of additional data, where 'inner_states'
+                  is a list of hidden states. Note that the hidden
+                  states have shape `(src_len, batch, vocab)`.
+        """
+        x, extra = self.extract_features(
+            src_tokens, return_all_hiddens=return_all_hiddens
+        )
+        if not features_only:
+            x = self.output_layer(x, masked_tokens=masked_tokens)
+        return x, extra
+
+    def extract_features(self, src_tokens, return_all_hiddens=False, **kwargs):
+        encoder_out = self.sentence_encoder(
+            src_tokens,
+            return_all_hiddens=return_all_hiddens,
+            token_embeddings=kwargs.get("token_embeddings", None),
+        )
+        # T x B x C -> B x T x C
+        features = encoder_out["encoder_out"][0].transpose(0, 1)
+        inner_states = encoder_out["encoder_states"] if return_all_hiddens else None
+        return features, {"inner_states": inner_states}
+
+    def output_layer(self, features, masked_tokens=None, **unused):
+        return self.lm_head(features, masked_tokens)
+
+    def max_positions(self):
+        """Maximum output length supported by the encoder."""
+        return self.args.max_positions
+
+
+@register_model_architecture("roberta", "roberta")
+def base_architecture(args):
+    args.encoder_layers = safe_getattr(args, "encoder_layers", 12)
+    args.encoder_embed_dim = safe_getattr(args, "encoder_embed_dim", 768)
+    args.encoder_ffn_embed_dim = safe_getattr(args, "encoder_ffn_embed_dim", 3072)
+    args.encoder_attention_heads = safe_getattr(args, "encoder_attention_heads", 12)
+
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_dropout = safe_getattr(args, "activation_dropout", 0.0)
+    args.pooler_dropout = safe_getattr(args, "pooler_dropout", 0.0)
+
+    args.max_source_positions = safe_getattr(args, "max_positions", 512)
+    args.no_token_positional_embeddings = safe_getattr(
+        args, "no_token_positional_embeddings", False
+    )
+
+    # BERT has a few structural differences compared to the original Transformer
+    args.encoder_learned_pos = safe_getattr(args, "encoder_learned_pos", True)
+    args.layernorm_embedding = safe_getattr(args, "layernorm_embedding", True)
+    args.no_scale_embedding = safe_getattr(args, "no_scale_embedding", True)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    args.encoder_normalize_before = safe_getattr(args, "encoder_normalize_before", False)
+    args.pooler_activation_fn = safe_getattr(args, "pooler_activation_fn", "tanh")
+    args.untie_weights_roberta = safe_getattr(args, "untie_weights_roberta", False)
+
+    # Adaptive input config
+    args.adaptive_input = safe_getattr(args, "adaptive_input", False)
+
+    # LayerDrop config
+    args.encoder_layerdrop = safe_getattr(args, "encoder_layerdrop", 0.0)
+    args.encoder_layers_to_keep = safe_getattr(args, "encoder_layers_to_keep", None)
+
+    # Quantization noise config
+    args.quant_noise_pq = safe_getattr(args, "quant_noise_pq", 0)
+    args.quant_noise_pq_block_size = safe_getattr(args, "quant_noise_pq_block_size", 8)
+    args.quant_noise_scalar = safe_getattr(args, "quant_noise_scalar", 0)
+
+    # R4F config
+    args.spectral_norm_classification_head = safe_getattr(
+        args, "spectral_norm_classification_head", False
+    )
+
+
+@register_model_architecture("roberta", "roberta_prenorm")
+def roberta_prenorm_architecture(args):
+    args.layernorm_embedding = safe_getattr(args, "layernorm_embedding", False)
+    args.encoder_normalize_before = safe_getattr(args, "encoder_normalize_before", True)
+    base_architecture(args)
+
+
+@register_model_architecture("roberta", "roberta_base")
+def roberta_base_architecture(args):
+    base_architecture(args)
+
+
+@register_model_architecture("roberta", "roberta_large")
+def roberta_large_architecture(args):
+    args.encoder_layers = safe_getattr(args, "encoder_layers", 24)
+    args.encoder_embed_dim = safe_getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = safe_getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = safe_getattr(args, "encoder_attention_heads", 16)
+    base_architecture(args)
+
+
+@register_model_architecture("roberta", "xlm")
+def xlm_architecture(args):
+    args.encoder_layers = safe_getattr(args, "encoder_layers", 16)
+    args.encoder_embed_dim = safe_getattr(args, "encoder_embed_dim", 1280)
+    args.encoder_ffn_embed_dim = safe_getattr(args, "encoder_ffn_embed_dim", 1280 * 4)
+    args.encoder_attention_heads = safe_getattr(args, "encoder_attention_heads", 16)
+    base_architecture(args)
diff --git a/fairseq/fairseq/models/roberta/model_camembert.py b/fairseq/fairseq/models/roberta/model_camembert.py
new file mode 100644
index 0000000000000000000000000000000000000000..46447546fafb4a0a887b481022cac07631047c80
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/model_camembert.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+CamemBERT: a Tasty French Language Model
+"""
+
+from fairseq.models import register_model
+
+from .hub_interface import RobertaHubInterface
+from .model import RobertaModel
+
+
+@register_model("camembert")
+class CamembertModel(RobertaModel):
+    @classmethod
+    def hub_models(cls):
+        return {
+            "camembert": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz",
+            "camembert.v0": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz",
+            "camembert-base": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base.tar.gz",
+            "camembert-large": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-large.tar.gz",
+            "camembert-base-ccnet": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet.tar.gz",
+            "camembert-base-ccnet-4gb": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-ccnet-4gb.tar.gz",
+            "camembert-base-wikipedia-4gb": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-wikipedia-4gb.tar.gz",
+            "camembert-base-oscar-4gb": "http://dl.fbaipublicfiles.com/fairseq/models/camembert-base-oscar-4gb.tar.gz",
+        }
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path,
+        checkpoint_file="model.pt",
+        data_name_or_path=".",
+        bpe="sentencepiece",
+        **kwargs
+    ):
+        from fairseq import hub_utils
+
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            **kwargs,
+        )
+        return RobertaHubInterface(x["args"], x["task"], x["models"][0])
diff --git a/fairseq/fairseq/models/roberta/model_gottbert.py b/fairseq/fairseq/models/roberta/model_gottbert.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e8c66354ac7ce7309226bb091a7baa4776fbfdc
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/model_gottbert.py
@@ -0,0 +1,49 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+GottBERT: a pure German Language Model
+"""
+
+from fairseq.models import register_model
+
+from .hub_interface import RobertaHubInterface
+from .model import RobertaModel
+
+
+@register_model('gottbert')
+class GottbertModel(RobertaModel):
+
+    @classmethod
+    def hub_models(cls):
+        return {
+            'gottbert-base': 'https://dl.gottbert.de/fairseq/models/gottbert-base.tar.gz',
+        }
+
+    @classmethod
+    def from_pretrained(cls,
+                        model_name_or_path,
+                        checkpoint_file='model.pt',
+                        data_name_or_path='.',
+                        bpe='hf_byte_bpe',
+                        bpe_vocab='vocab.json',
+                        bpe_merges='merges.txt',
+                        bpe_add_prefix_space=False,
+                        **kwargs
+                        ):
+        from fairseq import hub_utils
+
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            bpe_vocab=bpe_vocab,
+            bpe_merges=bpe_merges,
+            bpe_add_prefix_space=bpe_add_prefix_space,
+            **kwargs,
+        )
+        return RobertaHubInterface(x['args'], x['task'], x['models'][0])
diff --git a/fairseq/fairseq/models/roberta/model_xlmr.py b/fairseq/fairseq/models/roberta/model_xlmr.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf6e354d53b918dd4c7c78bfcd38ac0d63cab3bd
--- /dev/null
+++ b/fairseq/fairseq/models/roberta/model_xlmr.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Unsupervised Cross-lingual Representation Learning at Scale
+"""
+
+from fairseq.models import register_model
+
+from .hub_interface import RobertaHubInterface
+from .model import RobertaModel
+
+
+@register_model("xlmr")
+class XLMRModel(RobertaModel):
+    @classmethod
+    def hub_models(cls):
+        return {
+            "xlmr.base": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz",
+            "xlmr.large": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz",
+            "xlmr.xl": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xl.tar.gz",
+            "xlmr.xxl": "http://dl.fbaipublicfiles.com/fairseq/models/xlmr/xlmr.xxl.tar.gz",
+        }
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path,
+        checkpoint_file="model.pt",
+        data_name_or_path=".",
+        bpe="sentencepiece",
+        **kwargs
+    ):
+        from fairseq import hub_utils
+
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            **kwargs,
+        )
+        return RobertaHubInterface(x["args"], x["task"], x["models"][0])
diff --git a/fairseq/fairseq/models/speech_to_text/__init__.py b/fairseq/fairseq/models/speech_to_text/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c5189c0f7fb4d66077d9d6498cb78cacff76de8
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .berard import *  # noqa
+from .convtransformer import *  # noqa
+from .s2t_transformer import *  # noqa
+from .xm_transformer import *  # noqa
diff --git a/fairseq/fairseq/models/speech_to_text/berard.py b/fairseq/fairseq/models/speech_to_text/berard.py
new file mode 100644
index 0000000000000000000000000000000000000000..c505e3acaa84e5f3263ccbfaf9556f77123f09fc
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/berard.py
@@ -0,0 +1,606 @@
+#!/usr/bin/env python3
+
+from ast import literal_eval
+from typing import List, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import checkpoint_utils, utils
+from fairseq.data.data_utils import lengths_to_padding_mask
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+    register_model_architecture,
+)
+
+
+@register_model("s2t_berard")
+class BerardModel(FairseqEncoderDecoderModel):
+    """Implementation of a model similar to https://arxiv.org/abs/1802.04200
+
+    Paper title: End-to-End Automatic Speech Translation of Audiobooks
+    An implementation is available in tensorflow at
+    https://github.com/eske/seq2seq
+    Relevant files in this implementation are the config
+    (https://github.com/eske/seq2seq/blob/master/config/LibriSpeech/AST.yaml)
+    and the model code
+    (https://github.com/eske/seq2seq/blob/master/translate/models.py).
+    The encoder and decoder try to be close to the original implementation.
+    The attention is an MLP as in Bahdanau et al.
+    (https://arxiv.org/abs/1409.0473).
+    There is no state initialization by averaging the encoder outputs.
+    """
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument(
+            "--input-layers",
+            type=str,
+            metavar="EXPR",
+            help="List of linear layer dimensions. These "
+            "layers are applied to the input features and "
+            "are followed by tanh and possibly dropout.",
+        )
+        parser.add_argument(
+            "--dropout",
+            type=float,
+            metavar="D",
+            help="Dropout probability to use in the encoder/decoder. "
+            "Note that this parameters control dropout in various places, "
+            "there is no fine-grained control for dropout for embeddings "
+            "vs LSTM layers for example.",
+        )
+        parser.add_argument(
+            "--in-channels",
+            type=int,
+            metavar="N",
+            help="Number of encoder input channels. " "Typically value is 1.",
+        )
+        parser.add_argument(
+            "--conv-layers",
+            type=str,
+            metavar="EXPR",
+            help="List of conv layers " "(format: (channels, kernel, stride)).",
+        )
+        parser.add_argument(
+            "--num-blstm-layers",
+            type=int,
+            metavar="N",
+            help="Number of encoder bi-LSTM layers.",
+        )
+        parser.add_argument(
+            "--lstm-size", type=int, metavar="N", help="LSTM hidden size."
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="Embedding dimension of the decoder target tokens.",
+        )
+        parser.add_argument(
+            "--decoder-hidden-dim",
+            type=int,
+            metavar="N",
+            help="Decoder LSTM hidden dimension.",
+        )
+        parser.add_argument(
+            "--decoder-num-layers",
+            type=int,
+            metavar="N",
+            help="Number of decoder LSTM layers.",
+        )
+        parser.add_argument(
+            "--attention-dim",
+            type=int,
+            metavar="N",
+            help="Hidden layer dimension in MLP attention.",
+        )
+        parser.add_argument(
+            "--output-layer-dim",
+            type=int,
+            metavar="N",
+            help="Hidden layer dim for linear layer prior to output projection.",
+        )
+        parser.add_argument(
+            "--load-pretrained-encoder-from",
+            type=str,
+            metavar="STR",
+            help="model to take encoder weights from (for initialization)",
+        )
+        parser.add_argument(
+            "--load-pretrained-decoder-from",
+            type=str,
+            metavar="STR",
+            help="model to take decoder weights from (for initialization)",
+        )
+
+    @classmethod
+    def build_encoder(cls, args, task):
+        encoder = BerardEncoder(
+            input_layers=literal_eval(args.input_layers),
+            conv_layers=literal_eval(args.conv_layers),
+            in_channels=args.input_channels,
+            input_feat_per_channel=args.input_feat_per_channel,
+            num_blstm_layers=args.num_blstm_layers,
+            lstm_size=args.lstm_size,
+            dropout=args.dropout,
+        )
+        if getattr(args, "load_pretrained_encoder_from", None):
+            encoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=encoder, checkpoint=args.load_pretrained_encoder_from
+            )
+        return encoder
+
+    @classmethod
+    def build_decoder(cls, args, task):
+        decoder = LSTMDecoder(
+            dictionary=task.target_dictionary,
+            embed_dim=args.decoder_embed_dim,
+            num_layers=args.decoder_num_layers,
+            hidden_size=args.decoder_hidden_dim,
+            dropout=args.dropout,
+            encoder_output_dim=2 * args.lstm_size,  # bidirectional
+            attention_dim=args.attention_dim,
+            output_layer_dim=args.output_layer_dim,
+        )
+        if getattr(args, "load_pretrained_decoder_from", None):
+            decoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=decoder, checkpoint=args.load_pretrained_decoder_from
+            )
+        return decoder
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+        encoder = cls.build_encoder(args, task)
+        decoder = cls.build_decoder(args, task)
+
+        return cls(encoder, decoder)
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        # net_output['encoder_out'] is a (B, T, D) tensor
+        lprobs = super().get_normalized_probs(net_output, log_probs, sample)
+        # lprobs is a (B, T, D) tensor
+        lprobs.batch_first = True
+        return lprobs
+
+
+class BerardEncoder(FairseqEncoder):
+    def __init__(
+        self,
+        input_layers: List[int],
+        conv_layers: List[Tuple[int]],
+        in_channels: int,
+        input_feat_per_channel: int,
+        num_blstm_layers: int,
+        lstm_size: int,
+        dropout: float,
+    ):
+        """
+        Args:
+            input_layers: list of linear layer dimensions. These layers are
+                applied to the input features and are followed by tanh and
+                possibly dropout.
+            conv_layers: list of conv2d layer configurations. A configuration is
+                a tuple (out_channels, conv_kernel_size, stride).
+            in_channels: number of input channels.
+            input_feat_per_channel: number of input features per channel. These
+                are speech features, typically 40 or 80.
+            num_blstm_layers: number of bidirectional LSTM layers.
+            lstm_size: size of the LSTM hidden (and cell) size.
+            dropout: dropout probability. Dropout can be applied after the
+                linear layers and LSTM layers but not to the convolutional
+                layers.
+        """
+        super().__init__(None)
+
+        self.input_layers = nn.ModuleList()
+        in_features = input_feat_per_channel
+        for out_features in input_layers:
+            if dropout > 0:
+                self.input_layers.append(
+                    nn.Sequential(
+                        nn.Linear(in_features, out_features), nn.Dropout(p=dropout)
+                    )
+                )
+            else:
+                self.input_layers.append(nn.Linear(in_features, out_features))
+            in_features = out_features
+
+        self.in_channels = in_channels
+        self.input_dim = input_feat_per_channel
+        self.conv_kernel_sizes_and_strides = []
+        self.conv_layers = nn.ModuleList()
+        lstm_input_dim = input_layers[-1]
+        for conv_layer in conv_layers:
+            out_channels, conv_kernel_size, conv_stride = conv_layer
+            self.conv_layers.append(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    conv_kernel_size,
+                    stride=conv_stride,
+                    padding=conv_kernel_size // 2,
+                )
+            )
+            self.conv_kernel_sizes_and_strides.append((conv_kernel_size, conv_stride))
+            in_channels = out_channels
+            lstm_input_dim //= conv_stride
+
+        lstm_input_dim *= conv_layers[-1][0]
+        self.lstm_size = lstm_size
+        self.num_blstm_layers = num_blstm_layers
+        self.lstm = nn.LSTM(
+            input_size=lstm_input_dim,
+            hidden_size=lstm_size,
+            num_layers=num_blstm_layers,
+            dropout=dropout,
+            bidirectional=True,
+        )
+        self.output_dim = 2 * lstm_size  # bidirectional
+        if dropout > 0:
+            self.dropout = nn.Dropout(p=dropout)
+        else:
+            self.dropout = None
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        """
+        Args
+            src_tokens: padded tensor (B, T, C * feat)
+            src_lengths: tensor of original lengths of input utterances (B,)
+        """
+        bsz, max_seq_len, _ = src_tokens.size()
+        # (B, C, T, feat)
+        x = (
+            src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim)
+            .transpose(1, 2)
+            .contiguous()
+        )
+
+        for input_layer in self.input_layers:
+            x = input_layer(x)
+            x = torch.tanh(x)
+
+        for conv_layer in self.conv_layers:
+            x = conv_layer(x)
+
+        bsz, _, output_seq_len, _ = x.size()
+
+        # (B, C, T, feat) -> (B, T, C, feat) -> (T, B, C, feat) ->
+        # (T, B, C * feat)
+        x = x.transpose(1, 2).transpose(0, 1).contiguous().view(output_seq_len, bsz, -1)
+
+        input_lengths = src_lengths.clone()
+        for k, s in self.conv_kernel_sizes_and_strides:
+            p = k // 2
+            input_lengths = (input_lengths.float() + 2 * p - k) / s + 1
+            input_lengths = input_lengths.floor().long()
+
+        packed_x = nn.utils.rnn.pack_padded_sequence(x, input_lengths)
+
+        h0 = x.new(2 * self.num_blstm_layers, bsz, self.lstm_size).zero_()
+        c0 = x.new(2 * self.num_blstm_layers, bsz, self.lstm_size).zero_()
+        packed_outs, _ = self.lstm(packed_x, (h0, c0))
+
+        # unpack outputs and apply dropout
+        x, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_outs)
+        if self.dropout is not None:
+            x = self.dropout(x)
+
+        encoder_padding_mask = (
+            lengths_to_padding_mask(output_lengths).to(src_tokens.device).t()
+        )
+
+        return {
+            "encoder_out": x,  # (T, B, C)
+            "encoder_padding_mask": encoder_padding_mask,  # (T, B)
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select(
+            1, new_order
+        )
+        encoder_out["encoder_padding_mask"] = encoder_out[
+            "encoder_padding_mask"
+        ].index_select(1, new_order)
+        return encoder_out
+
+
+class MLPAttention(nn.Module):
+    """The original attention from Badhanau et al. (2014)
+
+    https://arxiv.org/abs/1409.0473, based on a Multi-Layer Perceptron.
+    The attention score between position i in the encoder and position j in the
+    decoder is: alpha_ij = V_a * tanh(W_ae * enc_i + W_ad * dec_j + b_a)
+    """
+
+    def __init__(self, decoder_hidden_state_dim, context_dim, attention_dim):
+        super().__init__()
+
+        self.context_dim = context_dim
+        self.attention_dim = attention_dim
+        # W_ae and b_a
+        self.encoder_proj = nn.Linear(context_dim, self.attention_dim, bias=True)
+        # W_ad
+        self.decoder_proj = nn.Linear(
+            decoder_hidden_state_dim, self.attention_dim, bias=False
+        )
+        # V_a
+        self.to_scores = nn.Linear(self.attention_dim, 1, bias=False)
+
+    def forward(self, decoder_state, source_hids, encoder_padding_mask):
+        """The expected input dimensions are:
+        decoder_state: bsz x decoder_hidden_state_dim
+        source_hids: src_len x bsz x context_dim
+        encoder_padding_mask: src_len x bsz
+        """
+        src_len, bsz, _ = source_hids.size()
+        # (src_len*bsz) x context_dim (to feed through linear)
+        flat_source_hids = source_hids.view(-1, self.context_dim)
+        # (src_len*bsz) x attention_dim
+        encoder_component = self.encoder_proj(flat_source_hids)
+        # src_len x bsz x attention_dim
+        encoder_component = encoder_component.view(src_len, bsz, self.attention_dim)
+        # 1 x bsz x attention_dim
+        decoder_component = self.decoder_proj(decoder_state).unsqueeze(0)
+        # Sum with broadcasting and apply the non linearity
+        # src_len x bsz x attention_dim
+        hidden_att = torch.tanh(
+            (decoder_component + encoder_component).view(-1, self.attention_dim)
+        )
+        # Project onto the reals to get attentions scores (src_len x bsz)
+        attn_scores = self.to_scores(hidden_att).view(src_len, bsz)
+
+        # Mask + softmax (src_len x bsz)
+        if encoder_padding_mask is not None:
+            attn_scores = (
+                attn_scores.float()
+                .masked_fill_(encoder_padding_mask, float("-inf"))
+                .type_as(attn_scores)
+            )  # FP16 support: cast to float and back
+        # srclen x bsz
+        normalized_masked_attn_scores = F.softmax(attn_scores, dim=0)
+
+        # Sum weighted sources (bsz x context_dim)
+        attn_weighted_context = (
+            source_hids * normalized_masked_attn_scores.unsqueeze(2)
+        ).sum(dim=0)
+
+        return attn_weighted_context, normalized_masked_attn_scores
+
+
+class LSTMDecoder(FairseqIncrementalDecoder):
+    def __init__(
+        self,
+        dictionary,
+        embed_dim,
+        num_layers,
+        hidden_size,
+        dropout,
+        encoder_output_dim,
+        attention_dim,
+        output_layer_dim,
+    ):
+        """
+        Args:
+            dictionary: target text dictionary.
+            embed_dim: embedding dimension for target tokens.
+            num_layers: number of LSTM layers.
+            hidden_size: hidden size for LSTM layers.
+            dropout: dropout probability. Dropout can be applied to the
+                embeddings, the LSTM layers, and the context vector.
+            encoder_output_dim: encoder output dimension (hidden size of
+                encoder LSTM).
+            attention_dim: attention dimension for MLP attention.
+            output_layer_dim: size of the linear layer prior to output
+                projection.
+        """
+        super().__init__(dictionary)
+        self.num_layers = num_layers
+        self.hidden_size = hidden_size
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+        self.embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx)
+        if dropout > 0:
+            self.dropout = nn.Dropout(p=dropout)
+        else:
+            self.dropout = None
+
+        self.layers = nn.ModuleList()
+        for layer_id in range(num_layers):
+            input_size = embed_dim if layer_id == 0 else encoder_output_dim
+            self.layers.append(
+                nn.LSTMCell(input_size=input_size, hidden_size=hidden_size)
+            )
+
+        self.context_dim = encoder_output_dim
+        self.attention = MLPAttention(
+            decoder_hidden_state_dim=hidden_size,
+            context_dim=encoder_output_dim,
+            attention_dim=attention_dim,
+        )
+
+        self.deep_output_layer = nn.Linear(
+            hidden_size + encoder_output_dim + embed_dim, output_layer_dim
+        )
+        self.output_projection = nn.Linear(output_layer_dim, num_embeddings)
+
+    def forward(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **kwargs
+    ):
+        encoder_padding_mask = encoder_out["encoder_padding_mask"]
+        encoder_outs = encoder_out["encoder_out"]
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+        bsz, seqlen = prev_output_tokens.size()
+
+        srclen = encoder_outs.size(0)
+
+        # embed tokens
+        embeddings = self.embed_tokens(prev_output_tokens)
+        x = embeddings
+        if self.dropout is not None:
+            x = self.dropout(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # initialize previous states (or get from cache during incremental
+        # generation)
+        cached_state = utils.get_incremental_state(
+            self, incremental_state, "cached_state"
+        )
+        if cached_state is not None:
+            prev_hiddens, prev_cells = cached_state
+        else:
+            prev_hiddens = [encoder_out["encoder_out"].mean(dim=0)] * self.num_layers
+            prev_cells = [x.new_zeros(bsz, self.hidden_size)] * self.num_layers
+
+        attn_scores = x.new_zeros(bsz, srclen)
+        attention_outs = []
+        outs = []
+        for j in range(seqlen):
+            input = x[j, :, :]
+            attention_out = None
+            for i, layer in enumerate(self.layers):
+                # the previous state is one layer below except for the bottom
+                # layer where the previous state is the state emitted by the
+                # top layer
+                hidden, cell = layer(
+                    input,
+                    (
+                        prev_hiddens[(i - 1) % self.num_layers],
+                        prev_cells[(i - 1) % self.num_layers],
+                    ),
+                )
+                if self.dropout is not None:
+                    hidden = self.dropout(hidden)
+                prev_hiddens[i] = hidden
+                prev_cells[i] = cell
+                if attention_out is None:
+                    attention_out, attn_scores = self.attention(
+                        hidden, encoder_outs, encoder_padding_mask
+                    )
+                    if self.dropout is not None:
+                        attention_out = self.dropout(attention_out)
+                    attention_outs.append(attention_out)
+                input = attention_out
+
+            # collect the output of the top layer
+            outs.append(hidden)
+
+        # cache previous states (no-op except during incremental generation)
+        utils.set_incremental_state(
+            self, incremental_state, "cached_state", (prev_hiddens, prev_cells)
+        )
+
+        # collect outputs across time steps
+        x = torch.cat(outs, dim=0).view(seqlen, bsz, self.hidden_size)
+        attention_outs_concat = torch.cat(attention_outs, dim=0).view(
+            seqlen, bsz, self.context_dim
+        )
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+        attention_outs_concat = attention_outs_concat.transpose(0, 1)
+
+        # concat LSTM output, attention output and embedding
+        # before output projection
+        x = torch.cat((x, attention_outs_concat, embeddings), dim=2)
+        x = self.deep_output_layer(x)
+        x = torch.tanh(x)
+        if self.dropout is not None:
+            x = self.dropout(x)
+        # project back to size of vocabulary
+        x = self.output_projection(x)
+
+        # to return the full attn_scores tensor, we need to fix the decoder
+        # to account for subsampling input frames
+        # return x, attn_scores
+        return x, None
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        super().reorder_incremental_state(incremental_state, new_order)
+        cached_state = utils.get_incremental_state(
+            self, incremental_state, "cached_state"
+        )
+        if cached_state is None:
+            return
+
+        def reorder_state(state):
+            if isinstance(state, list):
+                return [reorder_state(state_i) for state_i in state]
+            return state.index_select(0, new_order)
+
+        new_state = tuple(map(reorder_state, cached_state))
+        utils.set_incremental_state(self, incremental_state, "cached_state", new_state)
+
+
+@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard")
+def berard(args):
+    """The original version: "End-to-End Automatic Speech Translation of
+    Audiobooks" (https://arxiv.org/abs/1802.04200)
+    """
+    args.input_layers = getattr(args, "input_layers", "[256, 128]")
+    args.conv_layers = getattr(args, "conv_layers", "[(16, 3, 2), (16, 3, 2)]")
+    args.num_blstm_layers = getattr(args, "num_blstm_layers", 3)
+    args.lstm_size = getattr(args, "lstm_size", 256)
+    args.dropout = getattr(args, "dropout", 0.2)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 128)
+    args.decoder_num_layers = getattr(args, "decoder_num_layers", 2)
+    args.decoder_hidden_dim = getattr(args, "decoder_hidden_dim", 512)
+    args.attention_dim = getattr(args, "attention_dim", 512)
+    args.output_layer_dim = getattr(args, "output_layer_dim", 128)
+    args.load_pretrained_encoder_from = getattr(
+        args, "load_pretrained_encoder_from", None
+    )
+    args.load_pretrained_decoder_from = getattr(
+        args, "load_pretrained_decoder_from", None
+    )
+
+
+@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard_256_3_3")
+def berard_256_3_3(args):
+    """Used in
+    * "Harnessing Indirect Training Data for End-to-End Automatic Speech
+    Translation: Tricks of the Trade" (https://arxiv.org/abs/1909.06515)
+    * "CoVoST: A Diverse Multilingual Speech-To-Text Translation Corpus"
+    (https://arxiv.org/pdf/2002.01320.pdf)
+    * "Self-Supervised Representations Improve End-to-End Speech Translation"
+    (https://arxiv.org/abs/2006.12124)
+    """
+    args.decoder_num_layers = getattr(args, "decoder_num_layers", 3)
+    berard(args)
+
+
+@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard_512_3_2")
+def berard_512_3_2(args):
+    args.num_blstm_layers = getattr(args, "num_blstm_layers", 3)
+    args.lstm_size = getattr(args, "lstm_size", 512)
+    args.dropout = getattr(args, "dropout", 0.3)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256)
+    args.decoder_num_layers = getattr(args, "decoder_num_layers", 2)
+    args.decoder_hidden_dim = getattr(args, "decoder_hidden_dim", 1024)
+    args.attention_dim = getattr(args, "attention_dim", 512)
+    args.output_layer_dim = getattr(args, "output_layer_dim", 256)
+    berard(args)
+
+
+@register_model_architecture(model_name="s2t_berard", arch_name="s2t_berard_512_5_3")
+def berard_512_5_3(args):
+    args.num_blstm_layers = getattr(args, "num_blstm_layers", 5)
+    args.lstm_size = getattr(args, "lstm_size", 512)
+    args.dropout = getattr(args, "dropout", 0.3)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256)
+    args.decoder_num_layers = getattr(args, "decoder_num_layers", 3)
+    args.decoder_hidden_dim = getattr(args, "decoder_hidden_dim", 1024)
+    args.attention_dim = getattr(args, "attention_dim", 512)
+    args.output_layer_dim = getattr(args, "output_layer_dim", 256)
+    berard(args)
diff --git a/fairseq/fairseq/models/speech_to_text/convtransformer.py b/fairseq/fairseq/models/speech_to_text/convtransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..eba000d7b0826d2ecf5dc471156f8f8cc9f5e402
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/convtransformer.py
@@ -0,0 +1,448 @@
+#!/usr/bin/env python3
+
+import logging
+import math
+from typing import Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import checkpoint_utils, utils
+from fairseq.data.data_utils import lengths_to_padding_mask
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import Embedding, TransformerDecoder
+from fairseq.modules import LayerNorm, PositionalEmbedding, TransformerEncoderLayer
+from torch import Tensor
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("convtransformer")
+class ConvTransformerModel(FairseqEncoderDecoderModel):
+    """
+    Transformer-based Speech translation model from ESPNet-ST
+    https://arxiv.org/abs/2004.10234
+    """
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        parser.add_argument(
+            "--input-feat-per-channel",
+            type=int,
+            metavar="N",
+            help="encoder input dimension per input channel",
+        )
+        parser.add_argument(
+            "--activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use",
+        )
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            "--relu-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after activation in FFN.",
+        )
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="N", help="num encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num encoder attention heads",
+        )
+        parser.add_argument(
+            "--encoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each encoder block",
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="num decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num decoder attention heads",
+        )
+        parser.add_argument(
+            "--decoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each decoder block",
+        )
+        parser.add_argument(
+            "--decoder-output-dim",
+            type=int,
+            metavar="N",
+            help="decoder output dimension (extra linear layer if different from decoder embed dim)",
+        )
+        parser.add_argument(
+            "--share-decoder-input-output-embed",
+            action="store_true",
+            help="share decoder input and output embeddings",
+        )
+        parser.add_argument(
+            "--layernorm-embedding",
+            action="store_true",
+            help="add layernorm to embedding",
+        )
+        parser.add_argument(
+            "--no-scale-embedding",
+            action="store_true",
+            help="if True, dont scale embeddings",
+        )
+        parser.add_argument(
+            "--load-pretrained-encoder-from",
+            type=str,
+            metavar="STR",
+            help="model to take encoder weights from (for initialization)",
+        )
+        parser.add_argument(
+            "--load-pretrained-decoder-from",
+            type=str,
+            metavar="STR",
+            help="model to take decoder weights from (for initialization)",
+        )
+        parser.add_argument(
+            "--conv-out-channels",
+            type=int,
+            metavar="INT",
+            help="the number of output channels of conv layer",
+        )
+
+    @classmethod
+    def build_encoder(cls, args):
+        encoder = ConvTransformerEncoder(args)
+        if getattr(args, "load_pretrained_encoder_from", None):
+            encoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=encoder, checkpoint=args.load_pretrained_encoder_from
+            )
+        return encoder
+
+    @classmethod
+    def build_decoder(cls, args, task, embed_tokens):
+        decoder = TransformerDecoderNoExtra(args, task.target_dictionary, embed_tokens)
+        if getattr(args, "load_pretrained_decoder_from", None):
+            decoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=decoder, checkpoint=args.load_pretrained_decoder_from
+            )
+        return decoder
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        def build_embedding(dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            return Embedding(num_embeddings, embed_dim, padding_idx)
+
+        decoder_embed_tokens = build_embedding(
+            task.target_dictionary, args.decoder_embed_dim
+        )
+        encoder = cls.build_encoder(args)
+        decoder = cls.build_decoder(args, task, decoder_embed_tokens)
+        return cls(encoder, decoder)
+
+    @staticmethod
+    @torch.jit.unused
+    def set_batch_first(lprobs):
+        lprobs.batch_first = True
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        # net_output['encoder_out'] is a (B, T, D) tensor
+        lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample)
+        if self.training:
+            self.set_batch_first(lprobs)
+        return lprobs
+
+    def output_layout(self):
+        return "BTD"
+
+    """
+    The forward method inherited from the base class has a **kwargs argument in
+    its input, which is not supported in torchscript. This method overrites the forward
+    method definition without **kwargs.
+    """
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens):
+        encoder_out = self.encoder(src_tokens=src_tokens, src_lengths=src_lengths)
+        decoder_out = self.decoder(
+            prev_output_tokens=prev_output_tokens, encoder_out=encoder_out
+        )
+        return decoder_out
+
+
+class ConvTransformerEncoder(FairseqEncoder):
+    """Conv + Transformer encoder"""
+
+    def __init__(self, args):
+        """Construct an Encoder object."""
+        super().__init__(None)
+
+        self.dropout = args.dropout
+        self.embed_scale = (
+            1.0 if args.no_scale_embedding else math.sqrt(args.encoder_embed_dim)
+        )
+        self.padding_idx = 1
+        self.in_channels = 1
+        self.input_dim = args.input_feat_per_channel
+        self.conv = torch.nn.Sequential(
+            torch.nn.Conv2d(1, args.conv_out_channels, 3, stride=2, padding=3 // 2),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(
+                args.conv_out_channels,
+                args.conv_out_channels,
+                3,
+                stride=2,
+                padding=3 // 2,
+            ),
+            torch.nn.ReLU(),
+        )
+        transformer_input_dim = self.infer_conv_output_dim(
+            self.in_channels, self.input_dim, args.conv_out_channels
+        )
+        self.out = torch.nn.Linear(transformer_input_dim, args.encoder_embed_dim)
+        self.embed_positions = PositionalEmbedding(
+            args.max_source_positions,
+            args.encoder_embed_dim,
+            self.padding_idx,
+            learned=False,
+        )
+
+        self.transformer_layers = nn.ModuleList([])
+        self.transformer_layers.extend(
+            [TransformerEncoderLayer(args) for i in range(args.encoder_layers)]
+        )
+        if args.encoder_normalize_before:
+            self.layer_norm = LayerNorm(args.encoder_embed_dim)
+        else:
+            self.layer_norm = None
+
+    def pooling_ratio(self):
+        return 4
+
+    def infer_conv_output_dim(self, in_channels, input_dim, out_channels):
+        sample_seq_len = 200
+        sample_bsz = 10
+        x = torch.randn(sample_bsz, in_channels, sample_seq_len, input_dim)
+        x = torch.nn.Conv2d(1, out_channels, 3, stride=2, padding=3 // 2)(x)
+        x = torch.nn.Conv2d(out_channels, out_channels, 3, stride=2, padding=3 // 2)(x)
+        x = x.transpose(1, 2)
+        mb, seq = x.size()[:2]
+        return x.contiguous().view(mb, seq, -1).size(-1)
+
+    def forward(self, src_tokens, src_lengths):
+        """Encode input sequence.
+        :param torch.Tensor xs: input tensor
+        :param torch.Tensor masks: input mask
+        :return: position embedded tensor and mask
+        :rtype Tuple[torch.Tensor, torch.Tensor]:
+        """
+        bsz, max_seq_len, _ = src_tokens.size()
+        x = (
+            src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim)
+            .transpose(1, 2)
+            .contiguous()
+        )
+        x = self.conv(x)
+        bsz, _, output_seq_len, _ = x.size()
+        x = x.transpose(1, 2).transpose(0, 1).contiguous().view(output_seq_len, bsz, -1)
+        x = self.out(x)
+        x = self.embed_scale * x
+
+        subsampling_factor = int(max_seq_len * 1.0 / output_seq_len + 0.5)
+        input_len_0 = (src_lengths.float() / subsampling_factor).ceil().long()
+        input_len_1 = x.size(0) * torch.ones([src_lengths.size(0)]).long().to(
+            input_len_0.device
+        )
+        input_lengths = torch.min(input_len_0, input_len_1)
+
+        encoder_padding_mask = lengths_to_padding_mask(input_lengths)
+
+        positions = self.embed_positions(encoder_padding_mask).transpose(0, 1)
+        x += positions
+        x = F.dropout(x, p=self.dropout, training=self.training)
+
+        for layer in self.transformer_layers:
+            x = layer(x, encoder_padding_mask)
+
+        if not encoder_padding_mask.any():
+            maybe_encoder_padding_mask = None
+        else:
+            maybe_encoder_padding_mask = encoder_padding_mask
+
+        return {
+            "encoder_out": [x],
+            "encoder_padding_mask": [maybe_encoder_padding_mask]
+            if maybe_encoder_padding_mask is not None
+            else [],
+            "encoder_embedding": [],
+            "encoder_states": [],
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+    @torch.jit.export
+    def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)]
+        if len(encoder_out["encoder_padding_mask"]) == 0:
+            new_encoder_padding_mask = []
+        else:
+            new_encoder_padding_mask = [
+                (encoder_out["encoder_padding_mask"][0]).index_select(0, new_order)
+            ]
+        if len(encoder_out["encoder_embedding"]) == 0:
+            new_encoder_embedding = []
+        else:
+            new_encoder_embedding = [
+                (encoder_out["encoder_embedding"][0]).index_select(0, new_order)
+            ]
+        encoder_states = encoder_out["encoder_states"]
+        if len(encoder_states) > 0:
+            for idx, state in enumerate(encoder_states):
+                encoder_states[idx] = state.index_select(1, new_order)
+
+        return {
+            "encoder_out": new_encoder_out,
+            "encoder_padding_mask": new_encoder_padding_mask,
+            "encoder_embedding": new_encoder_embedding,
+            "encoder_states": encoder_states,
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+
+class TransformerDecoderNoExtra(TransformerDecoder):
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        # call scriptable method from parent class
+        x, _ = self.extract_features_scriptable(
+            prev_output_tokens,
+            encoder_out,
+            incremental_state,
+            full_context_alignment,
+            alignment_layer,
+            alignment_heads,
+        )
+        return x, None
+
+
+@register_model_architecture(model_name="convtransformer", arch_name="convtransformer")
+def base_architecture(args):
+    args.input_feat_per_channel = getattr(args, "input_feat_per_channel", 80)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+    args.max_source_positions = getattr(args, "max_source_positions", 3000)
+    args.max_target_positions = getattr(args, "max_target_positions", 1024)
+    args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False)
+    args.conv_out_channels = getattr(args, "conv_out_channels", args.encoder_embed_dim)
+
+
+@register_model_architecture("convtransformer", "convtransformer_espnet")
+def convtransformer_espnet(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
diff --git a/fairseq/fairseq/models/speech_to_text/modules/augmented_memory_attention.py b/fairseq/fairseq/models/speech_to_text/modules/augmented_memory_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7465bc889fd1ba6ca2c60905a2eb6ff5cc62b9d
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/modules/augmented_memory_attention.py
@@ -0,0 +1,488 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple, List
+
+import torch
+import torch.nn.functional as F
+from fairseq.models import FairseqEncoder
+from fairseq.models.speech_to_text import (
+    ConvTransformerEncoder,
+)
+from fairseq.models.speech_to_text.utils import attention_suppression
+from fairseq.models.speech_to_text.utils import (
+    lengths_to_encoder_padding_mask,
+    segments_to_sequence,
+    sequence_to_segments,
+)
+from fairseq.modules import MultiheadAttention, TransformerEncoderLayer
+from torch import nn, Tensor
+
+# ------------------------------------------------------------------------------
+#   AugmentedMemoryConvTransformerEncoder
+# ------------------------------------------------------------------------------
+
+
+class AugmentedMemoryConvTransformerEncoder(ConvTransformerEncoder):
+    def __init__(self, args):
+        super().__init__(args)
+
+        args.encoder_stride = self.stride()
+
+        self.left_context = args.left_context // args.encoder_stride
+
+        self.right_context = args.right_context // args.encoder_stride
+
+        self.left_context_after_stride = args.left_context // args.encoder_stride
+        self.right_context_after_stride = args.right_context // args.encoder_stride
+
+        self.transformer_layers = nn.ModuleList([])
+        self.transformer_layers.extend(
+            [
+                AugmentedMemoryTransformerEncoderLayer(args)
+                for i in range(args.encoder_layers)
+            ]
+        )
+
+    def stride(self):
+        # Hard coded here. Should infer from convs in future
+        stride = 4
+        return stride
+
+    def forward(self, src_tokens, src_lengths, states=None):
+        """Encode input sequence.
+        :param torch.Tensor xs: input tensor
+        :param torch.Tensor masks: input mask
+        :return: position embedded tensor and mask
+        :rtype Tuple[torch.Tensor, torch.Tensor]:
+        """
+        bsz, max_seq_len, _ = src_tokens.size()
+        x = (
+            src_tokens.view(bsz, max_seq_len, self.in_channels, self.input_dim)
+            .transpose(1, 2)
+            .contiguous()
+        )
+        x = self.conv(x)
+        bsz, _, output_seq_len, _ = x.size()
+        x = x.transpose(1, 2).transpose(0, 1).contiguous().view(output_seq_len, bsz, -1)
+        x = self.out(x)
+        x = self.embed_scale * x
+
+        subsampling_factor = 1.0 * max_seq_len / output_seq_len
+        input_lengths = torch.max(
+            (src_lengths.float() / subsampling_factor).ceil().long(),
+            x.size(0) * src_lengths.new_ones([src_lengths.size(0)]).long(),
+        )
+
+        encoder_padding_mask, _ = lengths_to_encoder_padding_mask(
+            input_lengths, batch_first=True
+        )
+
+        # TODO: fix positional embedding
+        positions = self.embed_positions(encoder_padding_mask).transpose(0, 1)
+
+        x += positions
+        x = F.dropout(x, p=self.dropout, training=self.training)
+
+        # State to store memory banks etc.
+        if states is None:
+            states = [
+                {"memory_banks": None, "encoder_states": None}
+                for i in range(len(self.transformer_layers))
+            ]
+
+        for i, layer in enumerate(self.transformer_layers):
+            # x size:
+            # (self.left_size + self.segment_size + self.right_size)
+            # / self.stride, num_heads, dim
+            # TODO: Consider mask here
+            x = layer(x, states[i])
+            states[i]["encoder_states"] = x[
+                self.left_context_after_stride : -self.right_context_after_stride
+            ]
+
+        lengths = (
+            (
+                ~encoder_padding_mask[
+                    :, self.left_context_after_stride : -self.right_context_after_stride
+                ]
+            )
+            .sum(dim=1, keepdim=True)
+            .long()
+        )
+
+        return states[-1]["encoder_states"], lengths, states
+
+
+# ------------------------------------------------------------------------------
+#   AugmentedMemoryTransformerEncoderLayer
+# ------------------------------------------------------------------------------
+class AugmentedMemoryTransformerEncoderLayer(TransformerEncoderLayer):
+    def __init__(self, args):
+        super().__init__(args)
+
+        self.left_context = args.left_context // args.encoder_stride
+        self.right_context = args.right_context // args.encoder_stride
+
+    def forward(self, x, state):
+
+        length, batch_size, x_dim = x.size()
+
+        residual = x
+
+        if self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        # init_state
+        if state.get("memory_banks", None) is None:
+            state["memory_banks"] = []
+
+        # TODO reseach new sum_query method
+        seg_start = self.left_context
+        seg_end = length - self.right_context
+        if seg_start < seg_end:
+            summarization_query = torch.mean(x[seg_start:seg_end], keepdim=True, dim=0)
+        else:
+            summarization_query = x.new_zeros(1, batch_size, x_dim)
+
+        x = torch.cat([x, summarization_query], dim=0)
+
+        x = self.self_attn(input_and_summary=x, state=state)
+
+        x = self.dropout_module(x)
+        x = residual + x
+
+        if not self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.final_layer_norm(x)
+
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = residual + x
+        if not self.normalize_before:
+            x = self.final_layer_norm(x)
+
+        return x
+
+    def build_self_attention(self, embed_dim, args):
+        return AugmentedMemoryMultiheadAttention(
+            embed_dim=embed_dim,
+            num_heads=args.encoder_attention_heads,
+            dropout=args.attention_dropout,
+            self_attention=True,
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+            tanh_on_mem=True,
+            max_memory_size=args.max_memory_size,
+        )
+
+
+# ------------------------------------------------------------------------------
+#   AugmentedMemoryMultiheadAttention
+# ------------------------------------------------------------------------------
+class AugmentedMemoryMultiheadAttention(MultiheadAttention):
+    """
+    Augmented Memory Attention from
+    Streaming Transformer-based Acoustic Models
+    Using Self-attention with Augmented Memory
+    https://arxiv.org/abs/2005.08042
+    """
+
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        kdim=None,
+        vdim=None,
+        dropout=0.0,
+        bias=True,
+        add_bias_kv=False,
+        add_zero_attn=False,
+        self_attention=False,
+        encoder_decoder_attention=False,
+        q_noise=0.0,
+        qn_block_size=8,
+        tanh_on_mem=False,
+        memory_dim=None,
+        std_scale=0.5,  # 0.5 based on https://arxiv.org/abs/2005.09137
+        max_memory_size=-1,
+        disable_mem_on_mem_attn=True,
+    ):
+        super().__init__(
+            embed_dim,
+            num_heads,
+            kdim,
+            vdim,
+            dropout,
+            bias,
+            add_bias_kv,
+            add_zero_attn,
+            self_attention,
+            encoder_decoder_attention,
+            q_noise,
+            qn_block_size,
+        )
+
+        self.memory_dim = memory_dim if memory_dim is not None else embed_dim
+        self.std_scale = std_scale
+        self.disable_mem_on_mem_attn = disable_mem_on_mem_attn
+
+        # This Operator was used for factorization in PySpeech
+        self.v2e = lambda x: x
+
+        if tanh_on_mem:
+            self.squash_mem = torch.tanh
+            self.nonlinear_squash_mem = True
+        else:
+            self.squash_mem = lambda x: x
+            self.nonlinear_squash_mem = False
+
+        self.max_memory_size = max_memory_size
+
+    def forward(self, input_and_summary, state):
+        """
+        input: Encoder states of current segment with left or right context,
+            plus one summarization query
+
+        """
+
+        length, batch_size, _ = input_and_summary.shape
+        length = length - 1  # not include sum_query, last index
+
+        memory = state["memory_banks"]
+        # TODO: positional embedding on memory
+
+        if self.max_memory_size > -1 and len(memory) > self.max_memory_size:
+            # TODO: need to fix here
+            if self.max_memory_size == 0:
+                memory = memory.new_zeros(1, memory.size(1), self.memory_dim)
+            else:
+                memory = memory[-self.max_memory_size :]
+
+        memory_and_input = torch.cat(memory + [input_and_summary[:-1]], dim=0)
+        input_and_sum_query = input_and_summary
+
+        q = self.q_proj(self.v2e(input_and_sum_query))
+        k = self.k_proj(self.v2e(memory_and_input))
+        v = self.v_proj(self.v2e(memory_and_input))
+
+        q = (
+            q.contiguous()
+            .view(-1, batch_size * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+            * self.scaling
+        )
+        k = (
+            k.contiguous()
+            .view(-1, batch_size * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+
+        v = (
+            v.contiguous()
+            .view(-1, batch_size * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+
+        attention_weights = torch.bmm(q, k.transpose(1, 2))
+
+        if self.disable_mem_on_mem_attn:
+            attention_weights = self.suppress_mem_on_mem_attention(
+                batch_size, self.num_heads, len(memory), attention_weights
+            )
+
+        if self.std_scale is not None:
+            attention_weights = attention_suppression(attention_weights, self.std_scale)
+
+        assert list(attention_weights.shape) == [
+            batch_size * self.num_heads,
+            length + 1,
+            length + len(memory),
+        ]
+
+        attention_weights = torch.nn.functional.softmax(
+            attention_weights.float(), dim=-1
+        ).type_as(attention_weights)
+
+        attention_probs = self.dropout_module(attention_weights)
+
+        # [T, T, B, n_head] + [T, B, n_head, d_head] -> [T, B, n_head, d_head]
+        attention = torch.bmm(attention_probs, v)
+
+        assert list(attention.shape) == [
+            batch_size * self.num_heads,
+            length + 1,
+            self.head_dim,
+        ]
+
+        attention = (
+            attention.transpose(0, 1)
+            .contiguous()
+            .view(length + 1, batch_size, self.embed_dim)
+        )
+
+        output_and_memory = self.out_proj(attention)
+
+        next_m = output_and_memory[-1:]
+        next_m = self.squash_mem(next_m)
+        output = output_and_memory[:-1]
+
+        state["memory_banks"].append(next_m)
+
+        return output
+
+    def suppress_mem_on_mem_attention(
+        self, B: int, num_heads: int, mem_size: int, attention_weight: Tensor
+    ):
+        """
+        Arguments:
+            - B: batch size
+            - num_heads: number of attention heads
+            - mem_size: size of memory bank
+            - attention_weight: a [B*num_heads, T + 1, T + mem_size] vector
+
+        Return:
+            modified attention_weight with [B*num_heads, -1, :mem_size] = -inf
+        """
+        attention_weight[:, -1, :mem_size] = float("-inf")
+        return attention_weight
+
+
+# ------------------------------------------------------------------------------
+#   SequenceEncoder
+# ------------------------------------------------------------------------------
+class SequenceEncoder(FairseqEncoder):
+    """
+    SequenceEncoder encodes sequences.
+
+    More specifically, `src_tokens` and `src_lengths` in `forward()` should
+    describe a batch of "complete" sequences rather than segments.
+
+    Segment-by-segment inference can be triggered by `segment_size`:
+    1) `segment_size` is None:
+        SequenceEncoder treats the input sequence as one single segment.
+    2) `segment_size` is not None (some int instead):
+        SequenceEncoder does the following:
+            1. breaks the input sequence into several segments
+            2. inference on each segment and collect the outputs
+            3. concatanete segment outputs into the output sequence.
+    Note that `segment_size` here shouldn't include additional left/right
+    contexts needed, for example if we wish to infer with LC-BLSTM where the
+    middle chunk size is 100 and right context is 20, `segment_size` should be
+    100.
+    """
+
+    def __init__(self, args, module):
+        super().__init__(None)
+
+        self.module = module
+        self.input_time_axis = 1
+        self.output_time_axis = 0
+        self.segment_size = args.segment_size
+        self.left_context = args.left_context
+        self.right_context = args.right_context
+
+    def forward(
+        self,
+        src_tokens: Tensor,
+        src_lengths: Tensor,
+        states=None,
+    ):
+
+        seg_src_tokens_lengths = sequence_to_segments(
+            sequence=src_tokens,
+            time_axis=self.input_time_axis,
+            lengths=src_lengths,
+            segment_size=self.segment_size,
+            extra_left_context=self.left_context,
+            extra_right_context=self.right_context,
+        )
+
+        seg_encoder_states_lengths: List[Tuple[Tensor, Tensor]] = []
+
+        for seg_src_tokens, seg_src_lengths in seg_src_tokens_lengths:
+            (seg_encoder_states, seg_enc_lengths, states) = self.module(
+                seg_src_tokens,
+                seg_src_lengths,
+                states=states,
+            )
+
+            seg_encoder_states_lengths.append((seg_encoder_states, seg_enc_lengths))
+
+        encoder_out, enc_lengths = segments_to_sequence(
+            segments=seg_encoder_states_lengths, time_axis=self.output_time_axis
+        )
+
+        encoder_padding_mask, _ = lengths_to_encoder_padding_mask(
+            enc_lengths, batch_first=True
+        )
+
+        if not encoder_padding_mask.any():
+            encoder_padding_mask = None
+
+        return {
+            "encoder_out": [encoder_out],
+            "encoder_padding_mask": [encoder_padding_mask],
+            "encoder_embedding": [],
+            "encoder_states": [states],
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+    def incremental_encode(
+        self,
+        seg_src_tokens: Tensor,
+        seg_src_lengths: Tensor,
+        states=None,
+    ):
+        """
+        Different from forward function, this function takes segmented speech
+        as input, and append encoder states to previous states
+        """
+        (seg_encoder_states, seg_enc_lengths, states) = self.module(
+            seg_src_tokens,
+            seg_src_lengths,
+            states=states,
+        )
+        return seg_encoder_states, seg_enc_lengths, states
+
+
+# ------------------------------------------------------------------------------
+#   Augmented memory model decorator
+# ------------------------------------------------------------------------------
+def augmented_memory(klass):
+    class StreamSeq2SeqModel(klass):
+        @staticmethod
+        def add_args(parser):
+            super(StreamSeq2SeqModel, StreamSeq2SeqModel).add_args(parser)
+            parser.add_argument(
+                "--segment-size", type=int, required=True, help="Length of the segment."
+            )
+            parser.add_argument(
+                "--left-context",
+                type=int,
+                default=0,
+                help="Left context for the segment.",
+            )
+            parser.add_argument(
+                "--right-context",
+                type=int,
+                default=0,
+                help="Right context for the segment.",
+            )
+            parser.add_argument(
+                "--max-memory-size",
+                type=int,
+                default=-1,
+                help="Right context for the segment.",
+            )
+
+    StreamSeq2SeqModel.__name__ = klass.__name__
+    return StreamSeq2SeqModel
diff --git a/fairseq/fairseq/models/speech_to_text/modules/emformer.py b/fairseq/fairseq/models/speech_to_text/modules/emformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ef76bd012ba40b0395fec2ca9ae9e9c136ffe40
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/modules/emformer.py
@@ -0,0 +1,1837 @@
+#!/usr/bin/env python3
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+
+import math
+import re
+from functools import partial
+from typing import List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from fairseq.models import (
+    FairseqEncoder,
+)
+from fairseq.models.speech_to_text.utils import (
+    NoOp,
+    lengths_to_padding_mask,
+    segments_to_sequence,
+)
+from fairseq.models.speech_to_text.utils import (
+    attention_suppression,
+    layer_norm_backward_hook,
+)
+from torch import Tensor, device as Device
+from torch.quantization.qconfig import (
+    default_dynamic_qconfig,
+    per_channel_dynamic_qconfig,
+)
+
+
+class RelativePositionEmbedding(nn.Module):
+    """
+    Implementation according to https://arxiv.org/abs/1803.02155
+    """
+
+    def __init__(self, head_dim, max_position, norm_init=True):
+        super().__init__()
+        self.head_dim = head_dim
+        self.max_position = max_position
+        self.embeddings = nn.Parameter(torch.Tensor(max_position * 2 + 1, head_dim))
+        if norm_init:
+            nn.init.xavier_normal_(self.embeddings)
+        else:
+            nn.init.xavier_uniform_(self.embeddings)
+
+    def forward(self, input: Tensor):
+        output = nn.functional.embedding(input.long(), self.embeddings)
+        return output
+
+
+class Fp32LayerNorm(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        clamp_grad=True,
+        max_grad_value=256,
+        eps=1e-5,
+        elementwise_affine=True,
+    ):
+        super().__init__()
+        self.torch_module = torch.nn.LayerNorm(
+            input_dim, eps=eps, elementwise_affine=elementwise_affine
+        )
+        if clamp_grad:
+            hook = partial(layer_norm_backward_hook, clamp_value=max_grad_value)
+            self.torch_module.register_backward_hook(hook)
+
+    def forward(self, input):
+        output = torch.nn.functional.layer_norm(
+            input.float(),
+            self.torch_module.normalized_shape,
+            self.torch_module.weight.float()
+            if self.torch_module.weight is not None
+            else None,
+            self.torch_module.bias.float()
+            if self.torch_module.bias is not None
+            else None,
+            self.torch_module.eps,
+        ).type_as(input)
+        return output
+
+
+# ------------------------------------------------------------------------------
+#   PositionwiseFF
+# ------------------------------------------------------------------------------
+
+
+class PositionwiseFF(nn.Module):
+    """
+    FFN layer in transformer.
+
+    Args:
+        input_dim: input embedding dimension
+        ffn_dim: FFN layer inner dimension
+        dropout_on_fc1: dropout for first linear layer
+        dropout_on_fc2: dropout fr second linear layer
+        activation_fn: activation function used after first linear layer. \
+                Only relu or gelu is supported.
+
+    """
+
+    def __init__(
+        self, input_dim, ffn_dim, dropout_on_fc1, dropout_on_fc2, activation_fn
+    ):
+        super(PositionwiseFF, self).__init__()
+
+        self.input_dim = input_dim
+        self.ffn_dim = ffn_dim
+        if activation_fn == "relu":
+            ac = nn.ReLU()
+        elif activation_fn == "gelu":
+            ac = nn.GELU()
+        else:
+            raise ValueError("Unsupported activation_fn = ({})".format(activation_fn))
+
+        # fc1 -> ac -> dropout -> fc2 -> dropout
+        self.module = nn.Sequential(
+            nn.Linear(input_dim, ffn_dim),
+            ac,
+            nn.Dropout(dropout_on_fc1),
+            nn.Linear(ffn_dim, input_dim),
+            nn.Dropout(dropout_on_fc2),
+        )
+
+        self.layer_norm = Fp32LayerNorm(input_dim)
+
+    def forward(self, input):
+        module_out = self.module(self.layer_norm(input))
+        output = module_out + input
+
+        return output
+
+    def quantize_(self, params=None):
+        if params and "per_channel" in params and params["per_channel"]:
+            qconfig = per_channel_dynamic_qconfig
+        else:
+            qconfig = default_dynamic_qconfig
+        torch.quantization.quantize_dynamic(
+            self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
+        )
+        return self
+
+
+# ------------------------------------------------------------------------------
+#   SummarizationLayer
+# ------------------------------------------------------------------------------
+
+
+class SummarizationLayer(nn.Module):
+    def __init__(self, method, segment_size, embedding_dim):
+        super(SummarizationLayer, self).__init__()
+        self.segment_size = segment_size
+        self.embedding_dim = embedding_dim
+        nonlin_match = re.match(r"nonlinear\((?P<act>[a-z]+),(?P<dim>[0-9]+)\)", method)
+        self.method = method
+        if method == "mean":
+            self.module = nn.AvgPool1d(
+                kernel_size=segment_size,
+                stride=segment_size,
+                ceil_mode=True,
+            )
+        elif method == "max":
+            self.module = nn.MaxPool1d(
+                kernel_size=segment_size,
+                stride=segment_size,
+                ceil_mode=True,
+            )
+        elif method == "linear":
+            self.module = nn.Linear(segment_size, 1)
+        elif nonlin_match:
+            nonlin_args = nonlin_match.groupdict()
+            act_type = nonlin_args["act"]
+            hid_dim = int(nonlin_args["dim"])
+            if act_type == "relu":
+                act = nn.ReLU()
+            elif act_type == "gelu":
+                act = nn.GELU()
+            else:
+                raise ValueError("Unsupported activation_fn = ({})".format(act_type))
+            self.module = nn.Sequential(
+                nn.Linear(segment_size, hid_dim),
+                act,
+                nn.Linear(hid_dim, 1),
+            )
+        else:
+            raise ValueError("Unsupported summarization method = ({})".format(method))
+
+    def forward(self, input):
+        # T, B, D -> B, D, T
+        input = input.permute(1, 2, 0)
+
+        if self.method == "mean" or self.method == "max":
+            output = self.module(input)
+            output = output.permute(2, 0, 1)
+            return output
+
+        full_seg_length = input.size(2) // self.segment_size * self.segment_size
+        if full_seg_length > 0:
+            # at least one seg is full
+            B = input.size(0)
+            D = input.size(1)
+            input_todo = (
+                input[:, :, :full_seg_length]
+                .contiguous()
+                .view(B, -1, self.segment_size)
+            )
+            output = self.module(input_todo)
+            output = output.view(B, D, -1)
+        else:
+            output = input.new_zeros(input.size(0), input.size(1), 0)
+        left = input.size(2) - full_seg_length
+        if left > 0:
+            # when last seg is not full, use zeros as last memory placeholder
+            zeros = input.new_zeros(input.size(0), input.size(1), 1)
+            output = torch.cat([output, zeros], dim=2)
+        output = output.permute(2, 0, 1)
+        return output
+
+
+# ------------------------------------------------------------------------------
+#   NoSegAugmentedMemoryMultiheadAttentionBmm
+# ------------------------------------------------------------------------------
+
+
+class NoSegAugmentedMemoryMultiheadAttentionBmm(nn.Module):
+    """
+    Whole utterance augmented memory multihead attention using BMM.
+
+    Different with previous augmented memory multihead attention where
+    the utterance is chunked into segments. Here we use attention mask
+    achieve so. The input embedding [right_context, utterance, summary]
+    is a concatenation of right context, utterance and summary.
+
+    Right context block is the concatenation of all the right context for
+    each segments. [right_context_0, right_context_1, ..., right_context_n]
+    For example, if we have utterance = [v0, v1, v2, ...., v20]. segment
+    size 8, right_context size 4. Then the right context blocks =
+    [v8, v9, v10, v11, v16, v17, v18, v19, 0, 0, 0, 0], where v8, v9, v10,
+    and v11 are the right context for first segment. v16, v17, v18 and v19
+    are the right context for second segment. 0, 0, 0 and 0 are right context
+    for the last segment.
+
+    utterance is corresponding to input embedding sequence
+
+    summary is concatenation of average of each segments. [summary_0,
+    summary_1, ..., ].
+
+    In augmented memory multihead attention, the query is [right_context,
+    utterance, summary], key is [memory, right_context, utterance]. Different
+    with AugmentedMemoryMultiheadAttentionBmm, memory here is passed from
+    previous attention layer. For the first attention layer, memory is average
+    of each segment.
+
+    Memory is a concatenation of memory from each segments in previous attention
+    layer. For example, current layer is i, then memory is [m_0, m_1, ..., m_n].
+    Each m_k is the output from seg_k in layer i-1.
+
+    args:
+        input_dim: input embedding dimension
+        num_heads: number of heads in multihead self-attention
+        dropout: attention dropout
+        std_scale: if std_scale is not None. The weak attention suppression is
+            turned on. For std_scale = 0.5, all the attention smaller than
+            mean + 0.5 * std will be suppressed.
+        scaled_init: whether to use scaled init for linear weight
+        tanh_on_mem: whether to use tanh on memory output
+        use_mem: whether to use memory or not. When max_memory_size is 0, then
+            we don't have memory anymore.
+        layer_index: current self-attention layer index that is used in depth
+            initialization
+        max_relative_position: max relative position used in relative position
+            embedding
+        rpe_old_option: To be compatible with previous model. The previous model
+            was trained with attention += attention + rpe. The correct equation
+            should be attention = attention + rpe
+
+    """
+
+    def __init__(
+        self,
+        input_dim,
+        num_heads,
+        dropout=0.0,
+        std_scale=None,
+        scaled_init=False,
+        tanh_on_mem=False,
+        use_mem=True,
+        mini_batches=False,
+        negative_inf="-inf",
+        layer_index=-1,
+        max_relative_position=0,
+        rpe_old_option=True,
+    ):
+        if input_dim % num_heads:
+            raise ValueError(
+                "input_dim ({}) must be divisible by num_heads ({})".format(
+                    input_dim, num_heads
+                )
+            )
+
+        super().__init__()
+
+        embed_dim = input_dim
+        self.e2h_kv = torch.nn.Linear(input_dim, 2 * input_dim, bias=True)
+        self.e2h_q = torch.nn.Linear(input_dim, input_dim, bias=True)
+        self.rpe_old_option = rpe_old_option
+        if max_relative_position > 0:
+            self.use_rpe = True
+            self.rpe_k = RelativePositionEmbedding(
+                head_dim=input_dim // num_heads,
+                max_position=max_relative_position,
+            )
+            self.rpe_v = RelativePositionEmbedding(
+                head_dim=input_dim // num_heads,
+                max_position=max_relative_position,
+            )
+        else:
+            self.use_rpe = False
+            self.rpe_k = None
+            self.rpe_v = None
+        if scaled_init:
+            if layer_index == -1:
+                gain = 1.0 / math.sqrt(2)
+            else:
+                # https://arxiv.org/abs/2005.09684 depthwise initialization
+                # stablize the training greatly. Use depthwise initialization to
+                # replace incremental loss.
+                gain = 1.0 / math.sqrt(layer_index + 1)
+            torch.nn.init.xavier_uniform_(self.e2h_kv.weight, gain=gain)
+            torch.nn.init.xavier_uniform_(self.e2h_q.weight, gain=gain)
+
+        self.out_proj = torch.nn.Linear(embed_dim, embed_dim, bias=True)
+
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+
+        self.head_dim = embed_dim // num_heads
+        self.scaling = self.head_dim ** -0.5
+
+        self.std_scale = std_scale
+        self.use_mem = use_mem
+        self.mini_batches = mini_batches
+        self.negative_inf = negative_inf
+
+        if tanh_on_mem:
+            self.squash_mem = torch.tanh
+            self.nonlinear_squash_mem = True
+        else:
+            self.squash_mem = NoOp()
+            self.nonlinear_squash_mem = False
+
+    def prepare_qkv(
+        self,
+        input: Tensor,
+        mems: Tensor,
+        lengths: Tensor,
+        summary_length: int,
+        lc_length: int,
+    ):
+        # T: right_context length + utterance_length  + summary_length
+        T, B, D = input.shape
+        mem_length = mems.size(0)
+        utterance_length = torch.max(lengths)
+
+        right_context_blocks_length = T - utterance_length - summary_length
+        rc_block = input[:right_context_blocks_length, :, :]
+        utterance_block = input[right_context_blocks_length : T - summary_length, :, :]
+
+        if B == 1:
+            padding_mask = None
+        else:
+            klengths = lengths + mem_length + right_context_blocks_length + lc_length
+            padding_mask = lengths_to_padding_mask(lengths=klengths)
+
+        mem_rc_input = torch.cat([mems, rc_block, utterance_block], dim=0)
+
+        # In training lc_length = 0
+        key_length = mem_rc_input.size(0) + lc_length
+        rc_input_sum = input
+        q = self.e2h_q(rc_input_sum)
+        kv = self.e2h_kv(mem_rc_input)
+        k, v = kv.chunk(chunks=2, dim=2)
+        result_qkv = (q, k, v)
+        input_shape = (T, B, D)
+        result_lengths_info = (
+            mem_length,
+            utterance_length,
+            right_context_blocks_length,
+            key_length,
+        )
+        if padding_mask is not None:
+            assert padding_mask.size(0) == B
+            assert padding_mask.size(1) == key_length
+
+        return result_qkv, input_shape, result_lengths_info, padding_mask
+
+    def prepare_attention_weights(
+        self,
+        q: Tensor,
+        new_k: Tensor,
+        new_v: Tensor,
+        input_shape: Tuple[int, int, int],
+        rpe: Optional[Tensor],
+    ) -> Tuple[Tensor, Tensor, Tensor]:
+        T, B, D = input_shape
+        q = (
+            q.contiguous().view(-1, B * self.num_heads, self.head_dim).transpose(0, 1)
+            * self.scaling
+        )
+
+        k = (
+            new_k.contiguous()
+            .view(-1, B * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+
+        v = (
+            new_v.contiguous()
+            .view(-1, B * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+
+        attention_weights = torch.bmm(q, k.transpose(1, 2))
+        if self.use_rpe and rpe is not None and self.rpe_v is not None:
+            r_k = self.rpe_k(rpe)
+            # [q, B*h, d] * [q, k, d] -> [B*h, q, k]
+            attention_weights_rpe = torch.matmul(
+                q.transpose(0, 1), r_k.transpose(1, 2)
+            ).transpose(0, 1)
+            attention_weights = attention_weights + attention_weights_rpe
+        attention_weights_float = attention_weights.float()
+
+        return attention_weights, attention_weights_float, v
+
+    def prepare_attention_output(
+        self,
+        attention_weights: Tensor,
+        attention_weights_float: Tensor,
+        v: Tensor,
+        input_shape: Tuple[int, int, int],
+        key_length: int,
+        padding_mask: Optional[Tensor],
+        rpe: Optional[Tensor],
+    ) -> Tensor:
+        T, B, D = input_shape
+        if padding_mask is not None:
+            attention_weights_float = attention_weights_float.view(
+                B, self.num_heads, T, key_length
+            )
+            attention_weights_float = attention_weights_float.masked_fill(
+                padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), float("-inf")
+            )
+            attention_weights_float = attention_weights_float.view(
+                B * self.num_heads, T, key_length
+            )
+
+        if self.std_scale is not None:
+            attention_weights_float = attention_suppression(
+                attention_weights_float, self.std_scale
+            )
+
+        attention_weights_float = torch.nn.functional.softmax(
+            attention_weights_float, dim=-1
+        )
+        attention_weights = attention_weights_float.type_as(attention_weights)
+
+        attention_probs = torch.nn.functional.dropout(
+            attention_weights, p=self.dropout, training=self.training
+        )
+
+        # [T, key_length, B, n_head]+ [key_length, B, n_head, d_head]
+        # -> [T, B, n_head, d_head]
+        attention = torch.bmm(attention_probs, v)
+        if self.use_rpe and rpe is not None and self.rpe_v is not None:
+            r_v = self.rpe_v(rpe)
+            attention_rpe = torch.matmul(
+                attention_probs.transpose(0, 1), r_v
+            ).transpose(0, 1)
+
+            if self.rpe_old_option:
+                attention += attention + attention_rpe
+            else:
+                attention = attention + attention_rpe
+
+        assert list(attention.shape) == [B * self.num_heads, T, self.head_dim]
+
+        attention = attention.transpose(0, 1).contiguous().view(T, B, self.embed_dim)
+
+        rc_output_memory = self.out_proj(attention)
+        return rc_output_memory
+
+    @torch.jit.unused
+    def forward(
+        self,
+        input: Tensor,
+        lengths: Tensor,
+        mems: Tensor,
+        attention_mask: Tensor,
+        pre_mems: Optional[Tensor] = None,
+        left_context_key: Optional[Tensor] = None,
+        left_context_val: Optional[Tensor] = None,
+        rpe: Optional[Tensor] = None,
+    ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+        """
+        forward function for NoSegAugmentedMemoryMultiheadAttentionBmm in training.
+
+        args:
+            input: formed in the following way
+                [right_context_0, right_contex_1, ..., seg_0, seg_1,
+                ..., summary_0, summary_1,..]
+            lengths: the length of query which is [seg_0, seg_1, ....]
+            mems: [mem_0, mem_1, ...].
+            attention_mask: attention mask for query = [right_context, query, summary]
+                key = [mem, right_context, query]. This is only used for traing.
+
+        """
+        if self.use_mem:
+            mem_length = mems.size(0)
+            summary_length = mem_length + 1
+            if pre_mems is not None:
+                mems = torch.cat([pre_mems, mems], dim=0)
+        else:
+            mem_length = 0
+            summary_length = 0
+
+        # In training, lc_length = 0
+        if left_context_key is not None:
+            lc_length = left_context_key.size(0)
+        else:
+            lc_length = 0
+        results = self.prepare_qkv(
+            input=input,
+            mems=mems,
+            lengths=lengths,
+            summary_length=summary_length,
+            lc_length=lc_length,
+        )
+        result_qkv, input_shape, result_lengths_info, padding_mask = results
+        q, k, v = result_qkv
+        (
+            mem_length,
+            utterance_length,
+            right_context_blocks_length,
+            key_length,
+        ) = result_lengths_info
+
+        if left_context_key is not None:
+            # add the cache key and value
+            new_k = torch.cat(
+                [
+                    k[: mem_length + right_context_blocks_length, :, :],
+                    left_context_key,
+                    k[-utterance_length:, :, :],
+                ],
+                dim=0,
+            )
+            new_v = torch.cat(
+                [
+                    v[: mem_length + right_context_blocks_length, :, :],
+                    left_context_val,
+                    v[-utterance_length:, :, :],
+                ],
+                dim=0,
+            )
+            next_k = new_k[mem_length + right_context_blocks_length :, :, :]
+            next_v = new_v[mem_length + right_context_blocks_length :, :, :]
+        else:
+            new_k = k
+            new_v = v
+            next_k = None
+            next_v = None
+
+        attention_weights, attention_weights_float, v = self.prepare_attention_weights(
+            q=q,
+            new_k=new_k,
+            new_v=new_v,
+            input_shape=input_shape,
+            rpe=rpe,
+        )
+
+        # mask attention
+        attention_mask = attention_mask.unsqueeze(0)
+        attention_weights_float = attention_weights_float.masked_fill(
+            attention_mask, float(self.negative_inf)
+        )
+
+        rc_output_memory = self.prepare_attention_output(
+            attention_weights=attention_weights,
+            attention_weights_float=attention_weights_float,
+            v=v,
+            input_shape=input_shape,
+            key_length=key_length,
+            padding_mask=padding_mask,
+            rpe=rpe,
+        )
+
+        if self.use_mem:
+            # next_m length equals to summary length - 1
+            # last memory is ignored
+            if self.mini_batches:
+                next_m = rc_output_memory[-summary_length:]
+            else:
+                next_m = rc_output_memory[-summary_length:-1]
+
+            next_m = self.squash_mem(next_m)
+            # rc and output
+            rc_output = rc_output_memory[:-summary_length]
+            if not self.nonlinear_squash_mem:
+                next_m = torch.clamp(next_m, min=-10, max=10)
+        else:
+            next_m = mems
+            rc_output = rc_output_memory
+
+        return rc_output, next_m, next_k, next_v
+
+    @torch.jit.export
+    def forward_jit(
+        self,
+        input: Tensor,
+        lengths: Tensor,
+        mems: Tensor,
+        left_context_key: Tensor,
+        left_context_val: Tensor,
+        rpe: Optional[Tensor],
+    ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+        """
+        forward function for NoSegAugmentedMemoryMultiheadAttentionBmm in decoding.
+
+        args:
+            input: formed in the following way
+                [right_context_0, right_contex_1, ..., seg_0, seg_1,
+                ..., summary_0, summary_1,..]
+            lengths: the length of query which is [seg_0, seg_1, ....]
+            mems: [mem_0, mem_1, ...].
+            left_context_key: left_context for key part. This is only used for online
+                decoding. In training, this is empty tensor
+            left_context_val: left_context for value part. This is only used for online
+                decoding. In training, this is empty tensor
+
+        """
+        lc_length = left_context_key.size(0)
+
+        # In decoding, summary_length = 1 or 0
+        if self.use_mem:
+            summary_length = 1
+        else:
+            summary_length = 0
+
+        results = self.prepare_qkv(
+            input=input,
+            mems=mems,
+            lengths=lengths,
+            summary_length=summary_length,
+            lc_length=lc_length,
+        )
+        result_qkv, input_shape, result_lengths_info, padding_mask = results
+        q, k, v = result_qkv
+        (
+            mem_length,
+            utterance_length,
+            right_context_blocks_length,
+            key_length,
+        ) = result_lengths_info
+
+        # add the cache key and value
+        new_k = torch.cat(
+            [
+                k[: mem_length + right_context_blocks_length, :, :],
+                left_context_key,
+                k[-utterance_length:, :, :],
+            ],
+            dim=0,
+        )
+        new_v = torch.cat(
+            [
+                v[: mem_length + right_context_blocks_length, :, :],
+                left_context_val,
+                v[-utterance_length:, :, :],
+            ],
+            dim=0,
+        )
+        next_k = new_k[mem_length + right_context_blocks_length :, :, :]
+        next_v = new_v[mem_length + right_context_blocks_length :, :, :]
+
+        attention_weights, attention_weights_float, v = self.prepare_attention_weights(
+            q=q,
+            new_k=new_k,
+            new_v=new_v,
+            input_shape=input_shape,
+            rpe=rpe,
+        )
+        # In online decoding, we don't have attention mask. But we still need
+        # to disable the attention from summary query to memory
+        attention_weights_float[:, -1, :mem_length] = float(self.negative_inf)
+        rc_output_memory = self.prepare_attention_output(
+            attention_weights=attention_weights,
+            attention_weights_float=attention_weights_float,
+            v=v,
+            input_shape=input_shape,
+            key_length=key_length,
+            padding_mask=padding_mask,
+            rpe=rpe,
+        )
+
+        # In decoding, summary length is 1
+        if self.use_mem:
+            next_m = rc_output_memory[-1:]
+            next_m = self.squash_mem(next_m)
+            # rc and output
+            rc_output = rc_output_memory[:-1]
+            if not self.nonlinear_squash_mem:
+                next_m = torch.clamp(next_m, min=-10, max=10)
+        else:
+            rc_output = rc_output_memory
+            # empty tensor as input mems
+            next_m = mems
+
+        return rc_output, next_m, next_k, next_v
+
+    def quantize_(self, params=None):
+        if params and "per_channel" in params and params["per_channel"]:
+            qconfig = per_channel_dynamic_qconfig
+        else:
+            qconfig = default_dynamic_qconfig
+        torch.quantization.quantize_dynamic(
+            self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
+        )
+        return self
+
+
+class NoSegAugmentedMemoryTransformer(nn.Module):
+    """
+    Whole utterance augmented memory transformer.
+
+    This is not pyspeech nn layer. It is used as a module in a master layer where
+    multiple transformers is used.
+    """
+
+    def __init__(
+        self,
+        input_dim,
+        num_heads,
+        ffn_dim,
+        dropout_in_attn=0.0,
+        dropout_on_attn=None,
+        dropout_on_fc1=None,
+        dropout_on_fc2=None,
+        activation_fn="relu",
+        tanh_on_mem=False,
+        std_scale=None,
+        scaled_init=False,
+        segment_size=128,
+        use_mem=True,
+        mini_batches=False,
+        negative_inf="-inf",
+        layer_index=-1,
+        summarization_method="mean",
+        max_relative_position=0,
+        rpe_old_option=True,
+    ):
+        super(NoSegAugmentedMemoryTransformer, self).__init__()
+
+        self.attention = NoSegAugmentedMemoryMultiheadAttentionBmm(
+            input_dim=input_dim,
+            num_heads=num_heads,
+            dropout=dropout_in_attn,
+            scaled_init=scaled_init,
+            tanh_on_mem=tanh_on_mem,
+            std_scale=std_scale,
+            use_mem=use_mem,
+            mini_batches=mini_batches,
+            negative_inf=negative_inf,
+            layer_index=layer_index,
+            max_relative_position=max_relative_position,
+        )
+        self.dropout = nn.Dropout(dropout_on_attn)
+        self.pos_ff = PositionwiseFF(
+            input_dim=input_dim,
+            ffn_dim=ffn_dim,
+            dropout_on_fc1=dropout_on_fc1,
+            dropout_on_fc2=dropout_on_fc2,
+            activation_fn=activation_fn,
+        )
+        self.layer_norm_pre = Fp32LayerNorm(input_dim)
+        self.layer_norm = Fp32LayerNorm(input_dim)
+        self.segment_size = segment_size
+        self.use_mem = use_mem
+
+        self.memory_op = SummarizationLayer(
+            summarization_method, segment_size, input_dim
+        )
+
+    def set_mini_batches(self, mini_batches):
+        self.attention.mini_batches = mini_batches
+
+    def gen_summary_queries(self, input):
+        sum_input = self.memory_op(input)
+        return sum_input
+
+    def pre_attention_ops(self, input, right_context_blocks):
+        rc_length = right_context_blocks.size(0)
+        input_length = input.size(0)
+
+        rc_and_input = torch.cat([right_context_blocks, input], dim=0)
+        residual_input = rc_and_input
+        rc_and_input = self.layer_norm_pre(rc_and_input)
+
+        query_input = rc_and_input[-input_length:, :, :]
+        return rc_length, input_length, residual_input, query_input, rc_and_input
+
+    def after_attention_ops(self, attention_output, residual_input):
+        output = self.dropout(attention_output)
+        output = output + residual_input
+        output = self.pos_ff(output)
+        output = self.layer_norm(output)
+        return output
+
+    @torch.jit.export
+    def forward_jit(
+        self,
+        input: Tensor,
+        lengths: Tensor,
+        mems: Tensor,
+        left_context_key: Tensor,
+        left_context_val: Tensor,
+        right_context_blocks: Tensor,
+        rpe: Optional[Tensor],
+    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+
+        results = self.pre_attention_ops(input, right_context_blocks)
+        rc_length, input_length, residual_input, query_input, rc_and_input = results
+
+        # In online decoding, the summary query size is always 1 or 0
+        if self.use_mem:
+            summary_query = self.gen_summary_queries(query_input)
+            summary_query = summary_query[0:1, :, :]
+            rc_qu_su = torch.cat([rc_and_input, summary_query], dim=0)
+        else:
+            rc_qu_su = rc_and_input
+
+        rc_output, next_m, next_k, next_v = self.attention.forward_jit(
+            input=rc_qu_su,
+            lengths=lengths,
+            mems=mems,
+            left_context_key=left_context_key,
+            left_context_val=left_context_val,
+            rpe=rpe,
+        )
+        rc_output = self.after_attention_ops(rc_output, residual_input)
+        results = (
+            rc_output[-input_length:, :, :],
+            next_m,
+            rc_output[0:rc_length, :, :],
+            next_k,
+            next_v,
+        )
+        return results
+
+    @torch.jit.unused
+    def forward(
+        self,
+        input,
+        lengths,
+        mems,
+        right_context_blocks,
+        attention_mask,
+        pre_mems,
+        left_context_key,
+        left_context_val,
+        rpe,
+    ):
+
+        results = self.pre_attention_ops(input, right_context_blocks)
+        rc_length, input_length, residual_input, query_input, rc_and_input = results
+        if self.use_mem:
+            summary_query = self.gen_summary_queries(query_input)
+            rc_qu_su = torch.cat([rc_and_input, summary_query], dim=0)
+        else:
+            rc_qu_su = rc_and_input
+
+        rc_output, next_m, next_k, next_v = self.attention(
+            input=rc_qu_su,
+            lengths=lengths,
+            mems=mems,
+            attention_mask=attention_mask,
+            pre_mems=pre_mems,
+            left_context_key=left_context_key,
+            left_context_val=left_context_val,
+            rpe=rpe,
+        )
+
+        # [TODO] Note memory did not go through pos_ff. What happen if we pass
+        # memory through the pos_ff as well?
+        rc_output = self.after_attention_ops(rc_output, residual_input)
+        results = (
+            rc_output[-input_length:, :, :],
+            next_m,
+            rc_output[0:rc_length, :, :],
+            next_k,
+            next_v,
+        )
+
+        return results
+
+
+class NoSegAugmentedMemoryTransformerEncoderLayer(FairseqEncoder):
+    """
+    Whole utterance augmented memory transformer encoder layer. This is a master layer
+    where we can define multiple augmented memory transformers. There are two reasons
+    to setup the master layer.
+    1. We only need to define once about the attention mask. All the layers in the master
+       layer share the same mask.
+    2. pyspeech nn layer has special input and output format. Defining one master layer is
+       easier to passing memory between different layes inside the master layer
+
+    args:
+        input_dim: input embedding dimension
+        num_heads: number of heads in multihead self-attention
+        ffn_dim: ffn dimension in FFN layer
+        num_layers: number of augmented memory transformer layers
+        dropout_in_attn: dropout used in multi-head self-attention
+        dropout_on_attn: dropout used for output from te multihead self-attention
+        dropout_on_fc1: dropout used in FFN layer for the first linear layer
+        dropout_on_fc2: dropout used in FFN layer for the second linear layer
+        segment_size: segment size for each segment
+        context_config: (left_context_size, right_context_size) defines the surround context size
+            for each segment
+        max_memory_size: maximum memory size used for each segment
+        scaled_init: whether use scaled init for weight initialization in attention layer
+        std_scale: if std_scale is not None. The weak attention suppression is
+            turned on. For std_scale = 0.5, all the attention smaller than
+            mean + 0.5 * std will be suppressed.
+        activation_fn: activation function used in FFN layer. [ReLU, GELU] supported
+        tanh_on_mem: whether use tanh on memory
+        mini_batches: use mini-btach training
+        negative_inf: the negative infinity value used in attention masking. default is "-inf".
+            For some situation, e.g. LM. it is better to use "-1e8" to avoid nan issue.
+        summarization_method: method to generate segment summrization embedding
+        max_relative_position: max relatie position for relative position embedding
+        rpe_old_option: To be compatible with previous model. The previous model
+            was trained with attention += attention + rpe. The correct equation
+            should be attention = attention + rpe
+        [TODO]: remove the rpe_old_option by the end of 2021 Q1.
+
+    """
+
+    def __init__(
+        self,
+        input_dim,
+        num_heads,
+        ffn_dim,
+        num_layers=1,
+        dropout_in_attn=0.0,
+        dropout_on_attn=0.0,
+        dropout_on_fc1=0.0,
+        dropout_on_fc2=0.0,
+        segment_size=128,
+        context_config=(0, 0),
+        max_memory_size=0,
+        scaled_init=True,
+        std_scale=None,
+        activation_fn="relu",
+        tanh_on_mem=False,
+        mini_batches=False,
+        negative_inf="-inf",
+        deep_init=True,
+        summarization_method="mean",
+        max_relative_position=0,
+        rpe_old_option=True,
+    ):
+        super().__init__(None)
+        if input_dim % num_heads:
+            raise ValueError(
+                "input_dim ({}) must be divisible by num_heads ({})".format(
+                    input_dim, num_heads
+                )
+            )
+
+        # we used to support growing memory size. However, it will cause
+        # cross stream batching failure. Now we need to have exact max memory size
+        if max_memory_size < 0:
+            raise ValueError("max_memory_size must be >= 0")
+
+        # Only assign right_context. In decoding, left context will be cached.
+        # No need to let the online decoder to re-assign the left context
+        self.left_context, self.right_context = context_config
+        self.segment_size = segment_size
+        self.memory_dim = input_dim
+        self.max_memory_size = max_memory_size
+        self.mini_batches = mini_batches
+        if self.max_memory_size != 0:
+            self.use_mem = True
+        else:
+            self.use_mem = False
+
+        self.memory_op = SummarizationLayer(
+            summarization_method, segment_size, input_dim
+        )
+
+        self.layers = torch.nn.ModuleList()
+        self.num_layers = num_layers
+        self.max_relative_position = max_relative_position
+        if self.max_relative_position > 0:
+            self.use_rpe = True
+        else:
+            self.use_rpe = False
+        for i in range(self.num_layers):
+            if deep_init:
+                layer_index = i
+            else:
+                layer_index = -1
+
+            self.layers.append(
+                NoSegAugmentedMemoryTransformer(
+                    num_heads=num_heads,
+                    input_dim=input_dim,
+                    ffn_dim=ffn_dim,
+                    dropout_in_attn=dropout_in_attn,
+                    dropout_on_attn=dropout_on_attn,
+                    dropout_on_fc1=dropout_on_fc1,
+                    dropout_on_fc2=dropout_on_fc2,
+                    segment_size=segment_size,
+                    std_scale=std_scale,
+                    activation_fn=activation_fn,
+                    tanh_on_mem=tanh_on_mem,
+                    scaled_init=scaled_init,
+                    use_mem=self.use_mem,
+                    mini_batches=mini_batches,
+                    negative_inf=negative_inf,
+                    layer_index=layer_index,
+                    summarization_method=summarization_method,
+                    max_relative_position=max_relative_position,
+                    rpe_old_option=rpe_old_option,
+                )
+            )
+
+    def set_mini_batches(self, mini_batches):
+        # handy function only used for unit test
+        self.mini_batches = mini_batches
+        for layer in self.layers:
+            layer.set_mini_batches(mini_batches)
+
+    def _get_relative_position(
+        self,
+        input: Tensor,
+        max_relative_position: int,
+        left_context_length: int,
+        past_length: int,
+        is_decoding: bool,
+    ):
+        # For training, we copy the right context to the start of the utterance
+        # First dimension in distance is corresponding to query.
+        # [right context, utterance, summary vector]
+        # Second dimension in distance is corresponding to key.
+        # [Memory bank, right context, utterance]
+        # For summary vector in query part, the distance with
+        # all other position is 2*max_position. For memory bank in key,
+        # the distance with all other positions is 0.
+
+        T, B, D = input.shape
+        num_segs = math.ceil((T - self.right_context) / self.segment_size)
+
+        # utterance
+        u_st = past_length * self.segment_size
+        u_ed = u_st + T
+        utterance_ranges = torch.arange(u_st, u_ed - self.right_context)
+
+        # left context. Only in minibatch or decoding
+        left_context_ranges = torch.arange(u_st - left_context_length, u_st)
+
+        # Right context block
+        # right context + utterance
+        right_context_blocks = []
+        for i in range(0, num_segs - 1):
+            st = (i + 1) * self.segment_size + u_st
+            ed = st + self.right_context
+            assert ed < u_ed
+            temp = torch.arange(st, ed)
+            right_context_blocks.append(temp)
+        right_context_blocks.append(torch.arange(u_ed - self.right_context, u_ed))
+        right_context_ranges = torch.cat(right_context_blocks)
+
+        if self.use_mem:
+            # Memory bank
+            # The position for memory -n, .., -1
+            if is_decoding:
+                memory_size = min(past_length, self.max_memory_size)
+            else:
+                memory_size = num_segs + past_length - 1
+            memory_bank_ranges = torch.arange(
+                -max_relative_position - 1, -max_relative_position - 1 - memory_size, -1
+            )
+
+            # summary vector
+            # The position for summary vector as the T+max_relative_position+1.
+            # After the clamping, the relative position is max_relative_position
+            summary_pos_st = u_ed + max_relative_position + 1
+            summary_vector_ranges = torch.arange(
+                summary_pos_st, summary_pos_st + num_segs
+            )
+
+            key_ranges = torch.cat(
+                [
+                    memory_bank_ranges,
+                    right_context_ranges,
+                    left_context_ranges,
+                    utterance_ranges,
+                ]
+            )
+
+            query_ranges = torch.cat(
+                [right_context_ranges, utterance_ranges, summary_vector_ranges]
+            )
+        else:
+            key_ranges = torch.cat(
+                [right_context_ranges, left_context_ranges, utterance_ranges]
+            )
+
+            query_ranges = torch.cat([right_context_ranges, utterance_ranges])
+
+        distance = key_ranges[None, :] - query_ranges[:, None]
+        distance_clamp = (
+            torch.clamp(distance, -max_relative_position, max_relative_position)
+            + max_relative_position
+        )
+        distance_clamp = distance_clamp.to(input.device).long().detach()
+        return distance_clamp
+
+    def _get_attention_mask(self, input, past_length=0, left_context_cache=0):
+        # attention mask for each query contains three parts:
+        # 1. memory part
+        # 2. left_context + segment
+        # 3. right_context_block
+        # so for each segment and its correspoinding right context block,
+        # the attention matrix is formed by 9 parts:
+        # [0, m, 0, 0, right_context, 0, 0, seg, 0]
+        # [before memory, memory, after memory, before right context, right_context,
+        #  after right context, before seg, seg, after seg]
+        #
+        # Query is formed in the way as [right_context_blocks, utterance, summary]
+        #
+        # Note: put m and right_context before segment is convenient
+        # for padding_mask operation.
+        # Key lengths = m_length + right_context_block_length + lengths
+        utterance_length, batch_size, _ = input.shape
+        summary_length = math.ceil(utterance_length / self.segment_size)
+        num_segs = summary_length
+        rc_length = self.right_context * num_segs
+        rc = self.right_context
+        lc = self.left_context
+
+        # using mini-batches, there is left context cache available for current
+        # sequence.
+        lcc = left_context_cache
+
+        # max_memory_size is 0 then we don't have memory and summary
+        # past_length is the memory carry from previous sequence
+        if self.use_mem:
+            mem_length = num_segs - 1 + past_length
+        else:
+            mem_length = 0
+        rc_mask = []
+        query_mask = []
+        summary_mask = []
+        for j in range(0, num_segs):
+            ssize = min(self.segment_size, utterance_length - j * self.segment_size)
+
+            rc_size = rc
+            rc_mat = []
+            q_mat = []
+            s_mat = []
+            m_start = max(j + past_length - self.max_memory_size, 0)
+
+            # max_memory_size is 0, then we don't use memory
+            if self.use_mem:
+                # part 0: before memory
+                rc_mat.append(input.new_zeros(rc_size, m_start))
+                q_mat.append(input.new_zeros(ssize, m_start))
+                s_mat.append(input.new_zeros(1, m_start))
+
+                # part 1: memory
+                col_1 = j + past_length - m_start
+                rc_mat.append(torch.ones(rc_size, col_1, device=input.device))
+                q_mat.append(torch.ones(ssize, col_1, device=input.device))
+                # based on D22875746, disable summary query attention
+                # on memeory is better for long form utterance
+                s_mat.append(input.new_zeros(1, col_1))
+
+                # part 2: after memory
+                col_2 = mem_length - (j + past_length)
+                rc_mat.append(input.new_zeros(rc_size, col_2))
+                q_mat.append(input.new_zeros(ssize, col_2))
+                s_mat.append(input.new_zeros(1, col_2))
+
+            # part 3: before right context
+            rc_start = j * rc
+            rc_mat.append(input.new_zeros(rc_size, rc_start))
+            q_mat.append(input.new_zeros(ssize, rc_start))
+            s_mat.append(input.new_zeros(1, rc_start))
+
+            # part 4: right context
+            rc_end = rc_start + rc
+            col_4 = rc
+            rc_mat.append(torch.ones(rc_size, col_4, device=input.device))
+            q_mat.append(torch.ones(ssize, col_4, device=input.device))
+            s_mat.append(torch.ones(1, col_4, device=input.device))
+
+            # part 5: after right context
+            col_5 = rc_length - rc_end
+            rc_mat.append(input.new_zeros(rc_size, col_5))
+            q_mat.append(input.new_zeros(ssize, col_5))
+            s_mat.append(input.new_zeros(1, col_5))
+
+            # part 6: before query segment
+            seg_start = max(j * self.segment_size + lcc - lc, 0)
+            rc_mat.append(input.new_zeros(rc_size, seg_start))
+            q_mat.append(input.new_zeros(ssize, seg_start))
+            s_mat.append(input.new_zeros(1, seg_start))
+
+            # part 7: query segment
+            # note: right context is put in right context block
+            # here we only need to consider about left context
+            seg_end = min((j + 1) * self.segment_size + lcc, utterance_length + lcc)
+            col_7 = seg_end - seg_start
+            rc_mat.append(torch.ones(rc_size, col_7, device=input.device))
+            q_mat.append(torch.ones(ssize, col_7, device=input.device))
+            s_mat.append(torch.ones(1, col_7, device=input.device))
+
+            # part 8: after query segment
+            col_8 = utterance_length + lcc - seg_end
+            rc_mat.append(input.new_zeros(rc_size, col_8))
+            q_mat.append(input.new_zeros(ssize, col_8))
+            s_mat.append(input.new_zeros(1, col_8))
+
+            rc_mask.append(torch.cat(rc_mat, dim=1))
+            query_mask.append(torch.cat(q_mat, dim=1))
+            summary_mask.append(torch.cat(s_mat, dim=1))
+
+        # no memory, then we don't need summary either
+        if self.use_mem:
+            attention_mask = (
+                1
+                - torch.cat(
+                    [
+                        torch.cat(rc_mask, dim=0),
+                        torch.cat(query_mask, dim=0),
+                        torch.cat(summary_mask, dim=0),
+                    ],
+                    dim=0,
+                )
+            ).to(torch.bool)
+        else:
+            attention_mask = (
+                1
+                - torch.cat(
+                    [torch.cat(rc_mask, dim=0), torch.cat(query_mask, dim=0)], dim=0
+                )
+            ).to(torch.bool)
+
+        return attention_mask
+
+    @torch.jit.export
+    def init_state(
+        self, batch_size: int, device: Optional[Device] = None
+    ) -> List[Tensor]:
+        empty_memory = torch.zeros(
+            self.num_layers,
+            self.max_memory_size,
+            batch_size,
+            self.memory_dim,
+            device=device,
+        )
+        left_context_key = torch.zeros(
+            self.num_layers,
+            self.left_context,
+            batch_size,
+            self.memory_dim,
+            device=device,
+        )
+        left_context_val = torch.zeros(
+            self.num_layers,
+            self.left_context,
+            batch_size,
+            self.memory_dim,
+            device=device,
+        )
+        past_length = torch.zeros(1, batch_size, dtype=torch.int32, device=device)
+
+        return [empty_memory, left_context_key, left_context_val, past_length]
+
+    @torch.jit.export
+    def batch_state(self, states: List[List[Tensor]]) -> List[Tensor]:
+        if len(states) == 0:
+            return []
+        batched_m = []
+        batched_lc_key = []
+        batched_lc_val = []
+        batched_past_length = []
+        for state in states:
+            if len(state) == 0:
+                continue
+            m, lc_key, lc_val, past_length = state
+            batched_m.append(m)
+            batched_lc_key.append(lc_key)
+            batched_lc_val.append(lc_val)
+            batched_past_length.append(past_length)
+
+        if (
+            (len(batched_m) == 0)
+            or (len(batched_lc_key) == 0)
+            or (len(batched_lc_val) == 0)
+            or (len(batched_past_length) == 0)
+        ):
+            return [
+                torch.tensor([]),
+                torch.tensor([]),
+                torch.tensor([]),
+                torch.tensor([]),
+            ]
+
+        batched_m = torch.cat(batched_m, dim=2)
+        batched_lc_key = torch.cat(batched_lc_key, dim=2)
+        batched_lc_val = torch.cat(batched_lc_val, dim=2)
+        batched_past_length = torch.cat(batched_past_length, dim=1)
+        return [batched_m, batched_lc_key, batched_lc_val, batched_past_length]
+
+    @torch.jit.export
+    def reorder_state(self, state: List[Tensor], indices: Tensor) -> List[Tensor]:
+        if len(state) == 0:
+            return []
+        m, lc_key, lc_val, past_length = state
+        indices = indices.to(device=m.device)
+        reord_m = torch.index_select(m, 2, indices)
+        reord_lc_key = torch.index_select(lc_key, 2, indices)
+        reord_lc_val = torch.index_select(lc_val, 2, indices)
+        reord_past_length = torch.index_select(past_length, 1, indices)
+        return [reord_m, reord_lc_key, reord_lc_val, reord_past_length]
+
+    @torch.jit.export
+    def reset_state(self, state: List[Tensor], indices: Tensor) -> List[Tensor]:
+        m, lc_key, lc_val, past_length = state
+        m = m.index_fill(dim=2, index=indices, value=0.0)
+        lc_key = lc_key.index_fill(dim=2, index=indices, value=0.0)
+        lc_val = lc_val.index_fill(dim=2, index=indices, value=0.0)
+        past_length = past_length.index_fill(dim=1, index=indices, value=0)
+
+        return [m, lc_key, lc_val, past_length]
+
+    @torch.jit.export
+    def state_size(self) -> int:
+        return 4
+
+    @torch.jit.export
+    def batch_size_in_state(
+        self, state: Optional[List[Tensor]], sloppy: bool = True
+    ) -> Optional[int]:
+        if state is None:
+            return None
+        return state[0].size(2)
+
+    def gen_summary_queries(self, input):
+        sum_input = self.memory_op(input)
+        return sum_input
+
+    def _gen_right_context_padded_input(self, input):
+        # This function deals with input that is already
+        # padded with right context (e.g. minibatch training)
+        right_context_blocks = []
+        T, B, D = input.shape
+        num_segs = math.ceil((T - self.right_context) / self.segment_size)
+        for i in range(0, num_segs - 1):
+            st = (i + 1) * self.segment_size
+            ed = st + self.right_context
+            assert ed < T
+            temp = input[st:ed, :, :]
+            right_context_blocks.append(temp)
+
+        # last segment right context is already available
+        right_context_blocks.append(input[T - self.right_context :, :, :])
+        return torch.cat(right_context_blocks, dim=0)
+
+    def _gen_segs_right_context(self, input, lengths):
+        segments = []
+        T, B, D = input.size()
+        nT = T - self.right_context
+
+        # assume input is right context padded
+        num_segs = math.ceil(nT / self.segment_size)
+        # pad zeros to the utterance to make sure each
+        # segment has the same right context. For the
+        for i in range(0, num_segs - 1):
+            st = i * self.segment_size
+            ed = min(T, st + self.segment_size + self.right_context)
+            temp = input[st:ed, :, :]
+            rest_lengths = torch.clamp(
+                lengths - self.segment_size, min=0, max=nT - (i + 1) * self.segment_size
+            )
+            segments.append((temp, lengths - rest_lengths + self.right_context))
+            lengths = rest_lengths
+
+        last_seg = input[st + self.segment_size :, :, :]
+        segments.append((last_seg, rest_lengths + self.right_context))
+
+        return segments
+
+    @torch.jit.unused
+    def forward(
+        self, input: Tensor, padding_masks: Tensor, state: Optional[List[Tensor]] = None
+    ) -> Tuple[Tensor, Tensor, List[Tensor], List[Tensor]]:
+        # Xutai: originally the second argument is lengths.
+        lengths = (~padding_masks).sum(dim=1).long()
+        # mini batch training.
+        if self.mini_batches:
+            return self.forward_mini_batches(input, lengths, state)
+
+        # regular full sequence training. Note, assume the right context in provided
+        # in the input.
+        T, B, D = input.size()
+        right_context_blocks = self._gen_right_context_padded_input(input)
+
+        # generate the relative positional embedding
+        if self.use_rpe:
+            rpe = self._get_relative_position(
+                input=input,
+                max_relative_position=self.max_relative_position,
+                left_context_length=0,
+                past_length=0,
+                is_decoding=False,
+            )
+        else:
+            rpe = None
+        input = input[: T - self.right_context, :, :]
+
+        attention_mask = self._get_attention_mask(input)
+
+        # firt layer use each segment mean as memory
+        # ignore the last one seg average
+        if self.use_mem:
+            mems = self.gen_summary_queries(input)[:-1, :, :]
+        else:
+            mems = torch.zeros(0, input.size(1), input.size(2), device=input.device)
+            mems = mems.type_as(input)
+
+        output = input
+        all_outputs = []
+
+        for layer in self.layers:
+            output, mems, right_context_blocks, _, _ = layer(
+                input=output,
+                lengths=lengths,
+                attention_mask=attention_mask,
+                mems=mems,
+                right_context_blocks=right_context_blocks,
+                pre_mems=None,
+                left_context_key=None,
+                left_context_val=None,
+                rpe=rpe,
+            )
+            all_outputs.append(output)
+        return output, padding_masks, [], all_outputs
+
+    def forward_jit_mini_batch_init(
+        self,
+        seg: Tensor,
+        state: Optional[List[Tensor]] = None,
+        is_decoding: bool = False,
+    ):
+        # Prepare state. In whole sequence training, state is ignored.
+        # For minibatch training, we need to prepare state
+        if state is None:
+            state = self.init_state(batch_size=seg.size(1), device=seg.device)
+            if seg.dtype == torch.half:
+                state = [state[0].half(), state[1].half(), state[2].half(), state[3]]
+
+        if self.use_mem:
+            # note input average only on seg, not on right context
+            # first layer use each segmetn mean as memory. the last
+            # one segment average is used in state
+            full_mems = self.gen_summary_queries(seg)
+            if is_decoding:
+                mems = full_mems[0:1, :, :]
+                state_mems = torch.cat([state[0][0], mems], dim=0)
+            else:
+                mems = full_mems[:-1, :, :]
+                state_mems = torch.cat([state[0][0], full_mems], dim=0)
+        else:
+            mems = state[0][0]
+            state_mems = mems
+
+        # track processed segment number or memory number
+        # the same batch as the same bumber of past length
+        past_length = state[3][0][0].item()
+        past_left_context = min(past_length * self.segment_size, self.left_context)
+        past_length = min(self.max_memory_size, past_length)
+
+        return state, mems, state_mems, past_length, past_left_context
+
+    def state_update_before(
+        self, layer: int, state: List[Tensor], past_length: int, past_left_context: int
+    ):
+        pre_mems = state[0][layer][self.max_memory_size - past_length :, :, :]
+        lc_key = state[1][layer][self.left_context - past_left_context :, :, :]
+        lc_val = state[2][layer][self.left_context - past_left_context :, :, :]
+        return pre_mems, lc_key, lc_val
+
+    def state_update_after(
+        self,
+        layer: int,
+        state: List[Tensor],
+        mems: Tensor,
+        next_key: Tensor,
+        next_val: Tensor,
+        mems_list: List[Tensor],
+        lc_key_list: List[Tensor],
+        lc_val_list: List[Tensor],
+    ):
+        # mems is used for next layer
+        if layer < self.num_layers - 1:
+            state_mems = torch.cat([state[0][layer + 1], mems], dim=0)
+            mems_list.append(state_mems[-self.max_memory_size :, :, :])
+
+        # when mems pass to next sequence, we need the last memory. when mems
+        # use for the next layer, we can ignore the last memory
+        mems = mems[:-1, :, :]
+
+        # note state[1][i] and state[2][i] original length equals to self.left_context
+        new_k = torch.cat([state[1][layer], next_key], dim=0)
+        new_v = torch.cat([state[2][layer], next_val], dim=0)
+        lc_key_list.append(new_k[-self.left_context :, :, :])
+        lc_val_list.append(new_v[-self.left_context :, :, :])
+        return mems_list, lc_key_list, lc_val_list, mems
+
+    def state_update_after_loop(
+        self,
+        state: List[Tensor],
+        mems_list: List[Tensor],
+        lc_key_list: List[Tensor],
+        lc_val_list: List[Tensor],
+        update_length: int,
+    ):
+        state[0] = torch.stack(mems_list, dim=0)
+        state[1] = torch.stack(lc_key_list, dim=0)
+        state[2] = torch.stack(lc_val_list, dim=0)
+        state[3] = state[3] + update_length
+        return state
+
+    @torch.jit.unused
+    def forward_mini_batches(
+        self, input: Tensor, lengths: Tensor, state: Optional[List[Tensor]] = None
+    ) -> Tuple[Tensor, Tensor, List[Tensor], List[Tensor]]:
+        T, B, D = input.size()
+
+        # input without right context
+        seg = input[: T - self.right_context, :, :]
+
+        # get right context blocks
+        right_context_blocks = self._gen_right_context_padded_input(input)
+
+        mems_list = []
+        lc_key_list = []
+        lc_val_list = []
+        results = self.forward_jit_mini_batch_init(seg, state, False)
+        state, mems, state_mems, past_length, past_left_context = results
+
+        # relative position embedding
+        if self.use_rpe:
+            rpe = self._get_relative_position(
+                input=input,
+                max_relative_position=self.max_relative_position,
+                left_context_length=past_left_context,
+                past_length=past_length,
+                is_decoding=False,
+            )
+        else:
+            rpe = None
+
+        # get attention mask based on seg (not include right context) and available
+        # left context
+        attention_mask = self._get_attention_mask(seg, past_length, past_left_context)
+        mems_list.append(state_mems[-self.max_memory_size :, :, :])
+        output = seg
+        i = 0
+        all_outputs = []
+        for layer in self.layers:
+            # In order to make cross stream batching work, mem, left context key
+            # and left context value in the state should always be the same shape.
+            # We use the past length to track the processed segment number. In this
+            # way, we take out the essential memory, left context key and left
+            # context val from the state. After finish the forward for current segment
+            # we add the new memory, left context key and left context value into the
+            # staate and trim out the oldest part to keep the shape consistent.
+            pre_mems, lc_key, lc_val = self.state_update_before(
+                i, state, past_length, past_left_context
+            )
+
+            output, mems, right_context_blocks, next_key, next_val = layer.forward(
+                input=output,
+                lengths=lengths,
+                attention_mask=attention_mask,
+                mems=mems,
+                right_context_blocks=right_context_blocks,
+                pre_mems=pre_mems,
+                left_context_key=lc_key,
+                left_context_val=lc_val,
+                rpe=rpe,
+            )
+            all_outputs.append(output)
+            mems_list, lc_key_list, lc_val_list, mems = self.state_update_after(
+                layer=i,
+                state=state,
+                mems=mems,
+                next_key=next_key,
+                next_val=next_val,
+                mems_list=mems_list,
+                lc_key_list=lc_key_list,
+                lc_val_list=lc_val_list,
+            )
+
+            i += 1
+
+        # update state
+        update_length = math.ceil((T - self.right_context) / self.segment_size)
+        state = self.state_update_after_loop(
+            state=state,
+            mems_list=mems_list,
+            lc_key_list=lc_key_list,
+            lc_val_list=lc_val_list,
+            update_length=update_length,
+        )
+
+        return output, lengths, state, all_outputs
+
+    def forward_jit_test(
+        self, input: Tensor, lengths: Tensor, state: Optional[List[Tensor]] = None
+    ) -> Tuple[Tensor, Tensor, List[Tensor]]:
+        """
+        This one simulate sequence encoder forward jit. This is for unit test purpose.
+        It is not used in training or decoding. Note, extra_right_context is set in
+        the model. In unit test, input = [utterance, right_context], lengths =
+        [utterance_length].
+        args:
+            input: input utterance
+            lengths: utterance input length
+            state: None here. input is whole utterance
+        """
+        # [TODO] sequence_to_segment has bug in lengths.
+        seg_src_tokens_lengths = self._gen_segs_right_context(input, lengths)
+
+        seg_enc_tokens_lengths: List[Tuple[Tensor, Tensor]] = []
+        state: Optional[List[Tensor]] = None
+        for seg_src_tokens, seg_src_lengths in seg_src_tokens_lengths:
+            seg_enc_tokens, seg_enc_lengths, state = self.forward_jit(
+                input=seg_src_tokens, lengths=seg_src_lengths, state=state
+            )
+            seg_enc_tokens_lengths.append((seg_enc_tokens, seg_enc_lengths))
+
+        enc_tokens, enc_lengths = segments_to_sequence(
+            segments=seg_enc_tokens_lengths, time_axis=0
+        )
+
+        state = []  # returns trivial state
+
+        return enc_tokens, enc_lengths, state
+
+    @torch.jit.export
+    def forward_jit(
+        self, input: Tensor, lengths: Tensor, state: Optional[List[Tensor]] = None
+    ) -> Tuple[Tensor, Tensor, List[Tensor]]:
+        """
+        Forward helper for online decoding.
+
+        args:
+            input: [seg, right_context]. We assume in online we
+                always padding the right context to the preset right context size.
+                For the last segment, we may have short segment size, but right
+                context size is the same as other segments
+            lengths: utterance input length is the utterance segment length and
+                     right context size
+            state: [memory, left_context_key, left_context_val]. To improve throughput,
+                in addition to memory, we also cache key and value for left_context in
+                multihead self-attention
+        """
+        # In online decoding, input = [segment, right_context]
+        # Lengths = [segment_length, right_context_length]
+        # so we need strip right context in output
+        T, B, D = input.size()
+        rc_str = T - self.right_context
+        rc_end = T
+        right_context_blocks = input[rc_str:rc_end, :, :]
+        seg = input[:rc_str, :, :]
+        lengths = torch.clamp(lengths - self.right_context, min=0)
+        mems_list = []
+        lc_key_list = []
+        lc_val_list = []
+
+        results = self.forward_jit_mini_batch_init(seg, state, True)
+        state, mems, state_mems, past_length, past_left_context = results
+
+        # relative position embedding
+        if self.use_rpe:
+            rpe = self._get_relative_position(
+                input=input,
+                max_relative_position=self.max_relative_position,
+                left_context_length=past_left_context,
+                past_length=past_length,
+                is_decoding=True,
+            )
+        else:
+            rpe = None
+
+        # memory for first layer.
+        mems_list.append(state_mems[-self.max_memory_size :, :, :])
+        output = seg
+        i = 0
+        for layer in self.layers:
+            # In order to make cross stream batching work, mem, left context key
+            # and left context value in the state should always be the same shape.
+            # We use the past length to track the processed segment number. In this
+            # way, we take out the essential memory, left context key and left
+            # context val from the state. After finish the forward for current segment
+            # we add the new memory, left context key and left context value into the
+            # staate and trim out the oldest part to keep the shape consistent.
+            true_mems, lc_key, lc_val = self.state_update_before(
+                layer=i,
+                state=state,
+                past_length=past_length,
+                past_left_context=past_left_context,
+            )
+
+            output, mems, right_context_blocks, next_key, next_val = layer.forward_jit(
+                input=output,
+                lengths=lengths,
+                mems=true_mems,
+                right_context_blocks=right_context_blocks,
+                left_context_key=lc_key,
+                left_context_val=lc_val,
+                rpe=rpe,
+            )
+            # mems is used for next layer
+            mems_list, lc_key_list, lc_val_list, _ = self.state_update_after(
+                layer=i,
+                state=state,
+                mems_list=mems_list,
+                mems=mems,
+                next_key=next_key,
+                next_val=next_val,
+                lc_key_list=lc_key_list,
+                lc_val_list=lc_val_list,
+            )
+            i += 1
+
+        # update state
+        state = self.state_update_after_loop(
+            state=state,
+            mems_list=mems_list,
+            lc_key_list=lc_key_list,
+            lc_val_list=lc_val_list,
+            update_length=1,
+        )
+
+        return output, lengths, state
+
+    def quantize_(self, params=None):
+        if params and "per_channel" in params and params["per_channel"]:
+            qconfig = per_channel_dynamic_qconfig
+        else:
+            qconfig = default_dynamic_qconfig
+        torch.quantization.quantize_dynamic(
+            self, {torch.nn.Linear: qconfig}, dtype=torch.qint8, inplace=True
+        )
+        return self
+
+
+# ------------------------------------------------------------------------------
+#   Emformer encoder for seq2seq model
+#   This is a wrapper over the original emformer
+# ------------------------------------------------------------------------------
+def emformer_encoder(klass):
+    class SpeechEncoder(klass):
+        def __init__(self, args):
+            super().__init__(args)
+            stride = SpeechEncoder.conv_layer_stride(args)
+            trf_left_context = args.segment_left_context // stride
+            trf_right_context = args.segment_right_context // stride
+            context_config = [trf_left_context, trf_right_context]
+            self.transformer_layers = nn.ModuleList(
+                [
+                    NoSegAugmentedMemoryTransformerEncoderLayer(
+                        input_dim=args.encoder_embed_dim,
+                        num_heads=args.encoder_attention_heads,
+                        ffn_dim=args.encoder_ffn_embed_dim,
+                        num_layers=args.encoder_layers,
+                        dropout_in_attn=args.dropout,
+                        dropout_on_attn=args.dropout,
+                        dropout_on_fc1=args.dropout,
+                        dropout_on_fc2=args.dropout,
+                        activation_fn=args.activation_fn,
+                        context_config=context_config,
+                        segment_size=args.segment_length,
+                        max_memory_size=args.max_memory_size,
+                        scaled_init=True,  # TODO: use constant for now.
+                        tanh_on_mem=args.amtrf_tanh_on_mem,
+                    )
+                ]
+            )
+
+        def forward(self, src_tokens, src_lengths):
+            encoder_out = super().forward(src_tokens, src_lengths)
+            output = encoder_out["encoder_out"][0]
+            encoder_padding_masks = encoder_out["encoder_padding_mask"][0]
+
+            # This is because that in the original implementation
+            # the output didn't consider the last segment as right context.
+            encoder_padding_masks = encoder_padding_masks[:, : output.size(0)]
+
+            return {
+                "encoder_out": [output],
+                "encoder_padding_mask": [encoder_padding_masks],
+                "encoder_embedding": [],
+                "encoder_states": [],
+                "src_tokens": [],
+                "src_lengths": [],
+            }
+
+        @staticmethod
+        def conv_layer_stride(args):
+            # TODO: make it configurable from the args
+            return 4
+
+    SpeechEncoder.__name__ = klass.__name__
+    return SpeechEncoder
diff --git a/fairseq/fairseq/models/speech_to_text/s2t_transformer.py b/fairseq/fairseq/models/speech_to_text/s2t_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..aff9d0ffc7b7e671c476ff28d1cd945e9ff41519
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/s2t_transformer.py
@@ -0,0 +1,502 @@
+#!/usr/bin/env python3
+
+import logging
+import math
+from typing import Dict, List, Optional, Tuple
+from pathlib import Path
+
+import torch
+import torch.nn as nn
+from fairseq import checkpoint_utils, utils
+from fairseq.data.data_utils import lengths_to_padding_mask
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import Embedding, TransformerDecoder
+from fairseq.modules import (
+    FairseqDropout,
+    LayerNorm,
+    PositionalEmbedding,
+    TransformerEncoderLayer,
+)
+from torch import Tensor
+
+
+logger = logging.getLogger(__name__)
+
+
+class Conv1dSubsampler(nn.Module):
+    """Convolutional subsampler: a stack of 1D convolution (along temporal
+    dimension) followed by non-linear activation via gated linear units
+    (https://arxiv.org/abs/1911.08460)
+
+    Args:
+        in_channels (int): the number of input channels
+        mid_channels (int): the number of intermediate channels
+        out_channels (int): the number of output channels
+        kernel_sizes (List[int]): the kernel size for each convolutional layer
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        mid_channels: int,
+        out_channels: int,
+        kernel_sizes: List[int] = (3, 3),
+    ):
+        super(Conv1dSubsampler, self).__init__()
+        self.n_layers = len(kernel_sizes)
+        self.conv_layers = nn.ModuleList(
+            nn.Conv1d(
+                in_channels if i == 0 else mid_channels // 2,
+                mid_channels if i < self.n_layers - 1 else out_channels * 2,
+                k,
+                stride=2,
+                padding=k // 2,
+            )
+            for i, k in enumerate(kernel_sizes)
+        )
+
+    def get_out_seq_lens_tensor(self, in_seq_lens_tensor):
+        out = in_seq_lens_tensor.clone()
+        for _ in range(self.n_layers):
+            out = ((out.float() - 1) / 2 + 1).floor().long()
+        return out
+
+    def forward(self, src_tokens, src_lengths):
+        bsz, in_seq_len, _ = src_tokens.size()  # B x T x (C x D)
+        x = src_tokens.transpose(1, 2).contiguous()  # -> B x (C x D) x T
+        for conv in self.conv_layers:
+            x = conv(x)
+            x = nn.functional.glu(x, dim=1)
+        _, _, out_seq_len = x.size()
+        x = x.transpose(1, 2).transpose(0, 1).contiguous()  # -> T x B x (C x D)
+        return x, self.get_out_seq_lens_tensor(src_lengths)
+
+
+@register_model("s2t_transformer")
+class S2TTransformerModel(FairseqEncoderDecoderModel):
+    """Adapted Transformer model (https://arxiv.org/abs/1706.03762) for
+    speech-to-text tasks. The Transformer encoder/decoder remains the same.
+    A trainable input subsampler is prepended to the Transformer encoder to
+    project inputs into the encoder dimension as well as downsample input
+    sequence for computational efficiency."""
+
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # input
+        parser.add_argument(
+            "--conv-kernel-sizes",
+            type=str,
+            metavar="N",
+            help="kernel sizes of Conv1d subsampling layers",
+        )
+        parser.add_argument(
+            "--conv-channels",
+            type=int,
+            metavar="N",
+            help="# of channels in Conv1d subsampling layers",
+        )
+        # Transformer
+        parser.add_argument(
+            "--activation-fn",
+            type=str,
+            default="relu",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use",
+        )
+        parser.add_argument(
+            "--dropout", type=float, metavar="D", help="dropout probability"
+        )
+        parser.add_argument(
+            "--attention-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability for attention weights",
+        )
+        parser.add_argument(
+            "--activation-dropout",
+            "--relu-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability after activation in FFN.",
+        )
+        parser.add_argument(
+            "--encoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension",
+        )
+        parser.add_argument(
+            "--encoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="encoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--encoder-layers", type=int, metavar="N", help="num encoder layers"
+        )
+        parser.add_argument(
+            "--encoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num encoder attention heads",
+        )
+        parser.add_argument(
+            "--encoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each encoder block",
+        )
+        parser.add_argument(
+            "--decoder-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension",
+        )
+        parser.add_argument(
+            "--decoder-ffn-embed-dim",
+            type=int,
+            metavar="N",
+            help="decoder embedding dimension for FFN",
+        )
+        parser.add_argument(
+            "--decoder-layers", type=int, metavar="N", help="num decoder layers"
+        )
+        parser.add_argument(
+            "--decoder-attention-heads",
+            type=int,
+            metavar="N",
+            help="num decoder attention heads",
+        )
+        parser.add_argument(
+            "--decoder-normalize-before",
+            action="store_true",
+            help="apply layernorm before each decoder block",
+        )
+        parser.add_argument(
+            "--share-decoder-input-output-embed",
+            action="store_true",
+            help="share decoder input and output embeddings",
+        )
+        parser.add_argument(
+            "--layernorm-embedding",
+            action="store_true",
+            help="add layernorm to embedding",
+        )
+        parser.add_argument(
+            "--no-scale-embedding",
+            action="store_true",
+            help="if True, dont scale embeddings",
+        )
+        parser.add_argument(
+            "--load-pretrained-encoder-from",
+            type=str,
+            metavar="STR",
+            help="model to take encoder weights from (for initialization)",
+        )
+        parser.add_argument(
+            '--encoder-freezing-updates',
+            type=int,
+            metavar='N',
+            help='freeze encoder for first N updates'
+        )
+
+    @classmethod
+    def build_encoder(cls, args):
+        encoder = S2TTransformerEncoder(args)
+        pretraining_path = getattr(args, "load_pretrained_encoder_from", None)
+        if pretraining_path is not None:
+            if not Path(pretraining_path).exists():
+                logger.warning(
+                    f"skipped pretraining because {pretraining_path} does not exist"
+                )
+            else:
+                encoder = checkpoint_utils.load_pretrained_component_from_model(
+                    component=encoder, checkpoint=pretraining_path
+                )
+                logger.info(f"loaded pretrained encoder from: {pretraining_path}")
+        return encoder
+
+    @classmethod
+    def build_decoder(cls, args, task, embed_tokens):
+        return TransformerDecoderScriptable(args, task.target_dictionary, embed_tokens)
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        def build_embedding(dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            return Embedding(num_embeddings, embed_dim, padding_idx)
+
+        decoder_embed_tokens = build_embedding(
+            task.target_dictionary, args.decoder_embed_dim
+        )
+        encoder = cls.build_encoder(args)
+        decoder = cls.build_decoder(args, task, decoder_embed_tokens)
+        return cls(encoder, decoder)
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        # net_output['encoder_out'] is a (B, T, D) tensor
+        lprobs = self.get_normalized_probs_scriptable(net_output, log_probs, sample)
+        lprobs.batch_first = True
+        return lprobs
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens):
+        """
+        The forward method inherited from the base class has a **kwargs
+        argument in its input, which is not supported in torchscript. This
+        method overwrites the forward method definition without **kwargs.
+        """
+        encoder_out = self.encoder(src_tokens=src_tokens, src_lengths=src_lengths)
+        decoder_out = self.decoder(
+            prev_output_tokens=prev_output_tokens, encoder_out=encoder_out
+        )
+        return decoder_out
+
+
+class S2TTransformerEncoder(FairseqEncoder):
+    """Speech-to-text Transformer encoder that consists of input subsampler and
+    Transformer encoder."""
+
+    def __init__(self, args):
+        super().__init__(None)
+
+        self.encoder_freezing_updates = args.encoder_freezing_updates
+        self.num_updates = 0
+
+        self.dropout_module = FairseqDropout(
+            p=args.dropout, module_name=self.__class__.__name__
+        )
+        self.embed_scale = math.sqrt(args.encoder_embed_dim)
+        if args.no_scale_embedding:
+            self.embed_scale = 1.0
+        self.padding_idx = 1
+
+        self.subsample = Conv1dSubsampler(
+            args.input_feat_per_channel * args.input_channels,
+            args.conv_channels,
+            args.encoder_embed_dim,
+            [int(k) for k in args.conv_kernel_sizes.split(",")],
+        )
+
+        self.embed_positions = PositionalEmbedding(
+            args.max_source_positions, args.encoder_embed_dim, self.padding_idx
+        )
+
+        self.transformer_layers = nn.ModuleList(
+            [TransformerEncoderLayer(args) for _ in range(args.encoder_layers)]
+        )
+        if args.encoder_normalize_before:
+            self.layer_norm = LayerNorm(args.encoder_embed_dim)
+        else:
+            self.layer_norm = None
+
+    def _forward(self, src_tokens, src_lengths, return_all_hiddens=False):
+        x, input_lengths = self.subsample(src_tokens, src_lengths)
+        x = self.embed_scale * x
+
+        encoder_padding_mask = lengths_to_padding_mask(input_lengths)
+        positions = self.embed_positions(encoder_padding_mask).transpose(0, 1)
+        x += positions
+        x = self.dropout_module(x)
+
+        encoder_states = []
+
+        for layer in self.transformer_layers:
+            x = layer(x, encoder_padding_mask)
+            if return_all_hiddens:
+                encoder_states.append(x)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [encoder_padding_mask] if encoder_padding_mask.any() else [],  # B x T
+            "encoder_embedding": [],  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+    def forward(self, src_tokens, src_lengths, return_all_hiddens=False):
+        if self.num_updates < self.encoder_freezing_updates:
+            with torch.no_grad():
+                x = self._forward(src_tokens, src_lengths,
+                                  return_all_hiddens=return_all_hiddens)
+        else:
+            x = self._forward(src_tokens, src_lengths,
+                              return_all_hiddens=return_all_hiddens)
+        return x
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        new_encoder_out = (
+            [] if len(encoder_out["encoder_out"]) == 0
+            else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]]
+        )
+
+        new_encoder_padding_mask = (
+            [] if len(encoder_out["encoder_padding_mask"]) == 0
+            else [x.index_select(0, new_order) for x in encoder_out["encoder_padding_mask"]]
+        )
+
+        new_encoder_embedding = (
+            [] if len(encoder_out["encoder_embedding"]) == 0
+            else [x.index_select(0, new_order) for x in encoder_out["encoder_embedding"]]
+        )
+
+        encoder_states = encoder_out["encoder_states"]
+        if len(encoder_states) > 0:
+            for idx, state in enumerate(encoder_states):
+                encoder_states[idx] = state.index_select(1, new_order)
+
+        return {
+            "encoder_out": new_encoder_out,  # T x B x C
+            "encoder_padding_mask": new_encoder_padding_mask,  # B x T
+            "encoder_embedding": new_encoder_embedding,  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],  # B x T
+            "src_lengths": [],  # B x 1
+        }
+
+    def set_num_updates(self, num_updates):
+        super().set_num_updates(num_updates)
+        self.num_updates = num_updates
+
+
+class TransformerDecoderScriptable(TransformerDecoder):
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        # call scriptable method from parent class
+        x, _ = self.extract_features_scriptable(
+            prev_output_tokens,
+            encoder_out,
+            incremental_state,
+            full_context_alignment,
+            alignment_layer,
+            alignment_heads,
+        )
+        return x, None
+
+
+@register_model_architecture(model_name="s2t_transformer", arch_name="s2t_transformer")
+def base_architecture(args):
+    args.encoder_freezing_updates = getattr(args, "encoder_freezing_updates", 0)
+    # Convolutional subsampler
+    args.conv_kernel_sizes = getattr(args, "conv_kernel_sizes", "5,5")
+    args.conv_channels = getattr(args, "conv_channels", 1024)
+    # Transformer
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.attention_dropout = getattr(args, "attention_dropout", args.dropout)
+    args.activation_dropout = getattr(args, "activation_dropout", args.dropout)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0)
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_s")
+def s2t_transformer_s(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 8)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.dropout = getattr(args, "dropout", 0.1)
+    base_architecture(args)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_xs")
+def s2t_transformer_xs(args):
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.decoder_layers = getattr(args, "decoder_layers", 3)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 256 * 4)
+    args.dropout = getattr(args, "dropout", 0.3)
+    s2t_transformer_s(args)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_sp")
+def s2t_transformer_sp(args):
+    args.encoder_layers = getattr(args, "encoder_layers", 16)
+    s2t_transformer_s(args)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_m")
+def s2t_transformer_m(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 512 * 4)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.dropout = getattr(args, "dropout", 0.15)
+    base_architecture(args)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_mp")
+def s2t_transformer_mp(args):
+    args.encoder_layers = getattr(args, "encoder_layers", 16)
+    s2t_transformer_m(args)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_l")
+def s2t_transformer_l(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024 * 4)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.2)
+    base_architecture(args)
+
+
+@register_model_architecture("s2t_transformer", "s2t_transformer_lp")
+def s2t_transformer_lp(args):
+    args.encoder_layers = getattr(args, "encoder_layers", 16)
+    s2t_transformer_l(args)
diff --git a/fairseq/fairseq/models/speech_to_text/utils.py b/fairseq/fairseq/models/speech_to_text/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..168b8bf13b0e734eee3f6989ff0f28a016a09c2b
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/utils.py
@@ -0,0 +1,563 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+
+import logging
+from collections.abc import Iterable
+from itertools import repeat
+from typing import List, Optional, Tuple
+
+import torch
+from torch import Tensor
+
+
+# ------------------------------------------------------------------------------
+#   assert_equal()
+# ------------------------------------------------------------------------------
+
+
+def assert_equal(value1, value2, name1=None, name2=None):
+    """Asserts two values are equal otherwise raise an error."""
+
+    str_name1 = "" if name1 is None else "{} ".format(name1)
+    str_name2 = "" if name2 is None else "{} ".format(name2)
+    if value1 != value2:
+        str_value1 = "{}" if name1 is None else "({})"
+        str_value1 = str_value1.format(value1)
+        str_value2 = "{}" if name2 is None else "({})"
+        str_value2 = str_value2.format(value2)
+        raise ValueError(
+            "Expected {}{} == {}{}".format(str_name1, str_value1, str_name2, str_value2)
+        )
+
+
+def fill_config(config, key, value):
+    if value is not None:
+        if key not in config or config[key] is None:
+            config[key] = value
+        assert_equal(value, config[key], "value", f'config["{key}"]')
+
+
+# ------------------------------------------------------------------------------
+#   check_and_return_expected()
+# ------------------------------------------------------------------------------
+
+
+def check_and_return_expected(value, undefined_value, expected_value, name=None):
+    """
+    Return the expected value while checking if the given value is undefined or
+    equal to the expected value.
+    """
+    if (undefined_value is None and value is None) or (undefined_value == value):
+        return expected_value
+    if value != expected_value:
+        str_name = "" if name is None else "{} ".format(name)
+        str_value = "{}" if name is None else "({})"
+        str_value = str_value.format(value)
+        raise ValueError(
+            "Expected {}{} == {}".format(str_name, str_value, expected_value)
+        )
+    return expected_value
+
+
+# ------------------------------------------------------------------------------
+#   get_time_axis()
+# ------------------------------------------------------------------------------
+
+
+def get_time_axis(layout):
+    """
+    Extract the time axis from the layout, for example for breaking sequence into
+    segments.
+    """
+    if layout in ["TB", "TBD"]:
+        return 0
+    if layout in ["BT", "BTD"]:
+        return 1
+    if layout in ["BCTD"]:
+        return 2
+    raise ValueError("Unsupported layout = {}".format(layout))
+
+
+# ------------------------------------------------------------------------------
+#   get_batch_axis()
+# ------------------------------------------------------------------------------
+
+
+def get_batch_axis(layout):
+    """
+    Extract the batch axis from the layout
+    """
+    if layout in ["TB", "TBD"]:
+        return 1
+    if layout in ["BT", "BTD", "BCTD"]:
+        return 0
+    raise ValueError("Unsupported layout = {}".format(layout))
+
+
+# ------------------------------------------------------------------------------
+#   monotonically_increasing_and_bounded()
+# ------------------------------------------------------------------------------
+
+
+def monotonically_increasing_and_bounded(iterable, min=None, max=None):
+    """
+    Check if the elements in the given iterable are monotonically increasing and
+    bounded by upper/lower bounds.
+    """
+    if not isinstance(iterable, Iterable):
+        raise TypeError(
+            "Expected iterable to be of type Iterable, got ({})".format(
+                iterable.__class__.__name__
+            )
+        )
+    for i in range(len(iterable)):
+        if min is not None and iterable[i] < min:
+            return False
+        if max is not None and iterable[i] > max:
+            return False
+        if i > 0 and iterable[i] <= iterable[i - 1]:
+            return False
+    return True
+
+
+# ------------------------------------------------------------------------------
+#   to_pair()
+# ------------------------------------------------------------------------------
+
+
+def to_pair(value, name):
+    """Make a pair (of type tuple) of given value."""
+    if isinstance(value, Iterable):
+        if len(value) != 2:
+            raise ValueError(
+                "Expected `{}` to have exactly 2 elements, got: ({})".format(
+                    name, value
+                )
+            )
+        return value
+    return tuple(repeat(value, 2))
+
+
+# ------------------------------------------------------------------------------
+#   infer_conv_output_attrs()
+# ------------------------------------------------------------------------------
+
+
+# TODO(cfyeh): figure out if we can get `output_dim` without calling the module.
+def infer_conv_output_attrs(
+    module, input_channels, input_dim, batch_size=1, max_length=8
+):
+    """Get output attributes of a module with input."""
+    input = torch.randn(batch_size, input_channels, max_length, input_dim)
+    output = module(input)
+    output_channels = output.shape[1]
+    output_dim = output.shape[-1]
+    return output_channels, output_dim
+
+
+# ------------------------------------------------------------------------------
+#   NoOp
+# ------------------------------------------------------------------------------
+
+
+class NoOp(torch.nn.Module):
+    """
+    NoOp simply passes the input as the output.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input: Tensor) -> Tensor:
+        return input
+
+
+# ------------------------------------------------------------------------------
+#   Permute: a torch.nn.Module applies permutation on the input tensor.
+# ------------------------------------------------------------------------------
+
+
+class Permute(torch.nn.Module):
+    def __init__(self, dims):
+        super().__init__()
+        self.dims = dims
+
+    def forward(self, input: Tensor) -> Tensor:
+        return input.permute(self.dims).contiguous()
+
+
+# ------------------------------------------------------------------------------
+#   lengths_to_padding_mask()
+# ------------------------------------------------------------------------------
+
+
+def lengths_to_padding_mask(lengths: Tensor) -> Tensor:
+    """Convert lengths of shape (B, ) to padding mask."""
+    batch_size = lengths.shape[0]
+    max_length = int(torch.max(lengths).item())
+    padding_mask = torch.arange(  # [0, ..., T-1]
+        max_length, device=lengths.device, dtype=lengths.dtype
+    ).expand(batch_size, max_length) >= lengths.unsqueeze(1)
+
+    return padding_mask
+
+
+# ------------------------------------------------------------------------------
+#   lengths_to_attention_mask()
+# ------------------------------------------------------------------------------
+
+
+def lengths_to_attention_mask(
+    lengths: Tensor,
+    left_context: Optional[int] = None,
+    right_context: Optional[int] = None,
+) -> Optional[Tensor]:
+    """
+    Generate attention mask based on (lengths, left_context, right_context).
+    left_context is None means unlimited left context.
+    right_context is None means unlimited right context.
+    """
+
+    if left_context is None and right_context is None:
+        return None
+
+    max_length = int(torch.max(lengths).item())
+
+    # For example, with `max_length` == 5,
+    # indices = tensor([
+    #     [ 0,  1,  2,  3,  4,  5],
+    #     [-1,  0,  1,  2,  3,  4],
+    #     [-2, -1,  0,  1,  2,  3],
+    #     [-3, -2, -1,  0,  1,  2],
+    #     [-4, -3, -2, -1,  0,  1],
+    #     [-5, -4, -3, -2, -1,  0],
+    # ])
+
+    # In some cases the second torch.arange is created on cpu which causes a
+    # failure. Adding the device option to guard against it.
+    indices = torch.arange(
+        max_length, device=lengths.device, dtype=lengths.dtype
+    ).expand(max_length, max_length) - torch.arange(
+        max_length, device=lengths.device
+    ).view(
+        max_length, -1
+    )
+
+    # For example, with `max_length` == 5,
+    # bool_mask = tensor([
+    #     [True, True, True, True, True],
+    #     [True, True, True, True, True],
+    #     [True, True, True, True, True],
+    #     [True, True, True, True, True],
+    #     [True, True, True, True, True],
+    # ])
+    bool_mask = (
+        torch.tensor([True]).to(device=lengths.device).expand(max_length, max_length)
+    )
+
+    # For example, with `max_length` == 5, left_context == 2
+    # left_mask = tensor([
+    #     [ True,  True, True, True, True],
+    #     [ True,  True, True, True, True],
+    #     [ True,  True, True, True, True],
+    #     [False,  True, True, True, True],
+    #     [False, False, True, True, True],
+    # ])
+    if left_context is not None:
+        left_mask = indices >= -left_context
+        bool_mask = bool_mask & left_mask
+
+    # For example, with `max_length` == 5, right_context == 1
+    # right_mask = tensor([
+    #     [True, True, False, False, False],
+    #     [True, True,  True, False, False],
+    #     [True, True,  True,  True, False],
+    #     [True, True,  True,  True,  True],
+    #     [True, True,  True,  True,  True],
+    # ])
+    if right_context is not None:
+        right_mask = indices <= right_context
+        bool_mask = bool_mask & right_mask
+
+    bool_mask = (~bool_mask).to(device=lengths.device)
+    return bool_mask
+
+
+# ------------------------------------------------------------------------------
+#   infer_output_norm()
+# ------------------------------------------------------------------------------
+
+
+def infer_output_norm(module, output_norm=None):
+    """
+    Infer the output norm (string and module) needed on the module gvien desired
+    output normalization.
+    """
+    if output_norm == module.output_norm():
+        # output_norm already matches module.output_norm().
+        return (None, NoOp())
+
+    if output_norm is None and module.output_norm() is not None:
+        logger = logging.getLogger("infer_output_norm()")
+        logger.warning(
+            "trying to set output_norm ({}) ".format(output_norm)
+            + "but got module.output_norm() ({}), ".format(module.output_norm())
+            + "the combined output_norm() will be ({})".format(module.output_norm())
+        )
+        return (None, NoOp())
+
+    if output_norm == "log_softmax":
+        if module.output_norm() is not None:
+            raise ValueError(
+                "incompatible output_norm ({}) ".format(output_norm)
+                + "and module.output_norm() ({})".format(module.output_norm())
+            )
+        else:
+            return ("log_softmax", torch.nn.LogSoftmax(dim=-1))
+
+    if output_norm == "softmax":
+        if module.output_norm() is not None:
+            raise ValueError(
+                "incompatible output_norm ({}) ".format(output_norm)
+                + "and module.output_norm() ({})".format(module.output_norm())
+            )
+        else:
+            return ("softmax", torch.nn.Softmax(dim=-1))
+
+    raise ValueError(
+        "output_norm ({}) not in ".format(output_norm)
+        + "supported list = [None, softmax, log_softmax]"
+    )
+
+
+# ------------------------------------------------------------------------------
+#   infer_channels_from_layout()
+# ------------------------------------------------------------------------------
+
+
+def infer_channels_from_layout(layout, channels):
+    """Extract the number of channels from the layout."""
+    if layout in ("TBD", "BTD"):
+        if channels is not None and channels != 1:
+            raise ValueError(
+                "Expected channels ({}) to be 1 for layout = {}".format(
+                    channels, layout
+                )
+            )
+        if channels is None:
+            return 1
+    return channels
+
+
+# ------------------------------------------------------------------------------
+#   pad_sequence()
+# ------------------------------------------------------------------------------
+
+
+@torch.jit.export
+def pad_sequence(
+    sequence: Tensor,
+    time_axis: int,
+    extra_left_context: int = 0,
+    extra_right_context: int = 0,
+) -> Tensor:
+    """Pad extra left/right contexts to the sequence."""
+
+    if extra_left_context == 0 and extra_right_context == 0:
+        return sequence
+
+    tensors_to_concat = []
+
+    if extra_left_context:
+        size = (extra_left_context,)
+        fill_value = 0
+        indices = torch.full(
+            size=size,
+            fill_value=fill_value,
+            dtype=torch.long,
+            device=sequence.device,
+        )
+        left_padding = torch.index_select(sequence, time_axis, indices)
+        tensors_to_concat.append(left_padding)
+
+    tensors_to_concat.append(sequence)
+
+    # NOTE(cfyeh): for efficiency reason we pad 0 instead of the last frame for
+    #              extra right contexts.
+    if extra_right_context:
+        size = list(sequence.shape)
+        size[time_axis] = extra_right_context
+        right_padding = torch.zeros(size, dtype=sequence.dtype, device=sequence.device)
+        tensors_to_concat.append(right_padding)
+
+    padded_sequence = torch.cat(tensors_to_concat, dim=time_axis)
+    return padded_sequence
+
+
+# ------------------------------------------------------------------------------
+#   sequence_to_segments()
+# ------------------------------------------------------------------------------
+
+
+@torch.jit.export
+def sequence_to_segments(
+    sequence: Tensor,
+    time_axis: int,
+    lengths: Tensor,
+    segment_size: Optional[int] = None,
+    extra_left_context: int = 0,
+    extra_right_context: int = 0,
+) -> List[Tuple[Tensor, Tensor]]:
+    """Breaks sequence into segments."""
+
+    sequence = pad_sequence(
+        sequence=sequence,
+        time_axis=time_axis,
+        extra_left_context=extra_left_context,
+        extra_right_context=extra_right_context,
+    )
+
+    lengths = lengths + extra_left_context + extra_right_context
+
+    segments: List[Tuple[Tensor, Tensor]] = []
+
+    if segment_size is None:
+        segments.append((sequence, lengths))
+        return segments
+
+    offset = 0
+    end = sequence.shape[time_axis]
+    step = segment_size
+    size = extra_left_context + segment_size + extra_right_context
+
+    while offset + extra_left_context + extra_right_context < end:
+        clamped_size = min(size, end - offset)
+        segment_lengths = torch.clamp(lengths - offset, min=0, max=clamped_size)
+        indices = torch.arange(
+            start=offset,
+            end=(offset + clamped_size),
+            step=1,
+            dtype=torch.long,
+            device=sequence.device,
+        )
+        segment_tensor = torch.index_select(sequence, time_axis, indices)
+        segments.append((segment_tensor, segment_lengths))
+        offset = offset + step
+
+    return segments
+
+
+# ------------------------------------------------------------------------------
+#   segments_to_sequence()
+# ------------------------------------------------------------------------------
+
+
+@torch.jit.export
+def segments_to_sequence(
+    segments: List[Tuple[Tensor, Tensor]], time_axis: int
+) -> Tuple[Tensor, Tensor]:
+    """Concatenate segments into a full sequence."""
+    if len(segments) == 1:
+        return segments[0]
+
+    tensors_to_concat: List[Tensor] = []
+    lengths_to_stack: List[Tensor] = []
+
+    for tensor, lengths in segments:
+        tensors_to_concat.append(tensor)
+        lengths_to_stack.append(lengths)
+
+    sequence = torch.cat(tensors_to_concat, dim=time_axis)
+    lengths = torch.stack(lengths_to_stack, dim=0)
+    lengths = torch.sum(lengths, dim=0)
+
+    return sequence, lengths
+
+
+def lengths_to_encoder_padding_mask(lengths, batch_first: bool = False):
+    """
+    convert lengths (a 1-D Long/Int tensor) to 2-D binary tensor
+
+    Args:
+        lengths: a (B, )-shaped tensor
+        batch_first: whether to return a (B, T) tensor
+
+    Return:
+        max_length: maximum length of B sequences
+        encoder_padding_mask: a (max_length, B) binary mask, where
+        [t, b] = False for t < lengths[b] and True otherwise
+
+    TODO:
+        kernelize this function if benchmarking shows this function is slow
+    """
+    max_lengths = torch.max(lengths).item()
+    bsz = lengths.size(0)
+    encoder_padding_mask = torch.arange(
+        max_lengths
+    ).to(  # a (T, ) tensor with [0, ..., T-1]
+        lengths.device
+    ).view(  # move to the right device
+        1, max_lengths
+    ).expand(  # reshape to (1, T)-shaped tensor
+        bsz, -1
+    ) > lengths.view(  # expand to (B, T)-shaped tensor
+        bsz, 1
+    ).expand(
+        -1, max_lengths
+    )
+    if not batch_first:
+        return encoder_padding_mask.t(), max_lengths
+    else:
+        return encoder_padding_mask, max_lengths
+
+
+# ------------------------------------------------------------------------------
+#   attention suppression
+# ------------------------------------------------------------------------------
+
+
+def attention_suppression(attention_weights: Tensor, scale: float):
+    # B, H, qlen, klen -> B, H, qlen, 1
+    attention_prob = torch.nn.functional.softmax(attention_weights.float(), dim=-1)
+    attention_nozeros = attention_prob.to(torch.bool)
+    nozeros_sum = torch.sum(attention_nozeros.to(torch.float), dim=-1, keepdim=True)
+
+    # For very sparse situation, we need get round about 0s
+    key_sum = torch.sum(attention_prob, dim=-1, keepdim=True)
+
+    # nozeros_sum should > 1
+    key_mean = key_sum / (nozeros_sum + 1e-8)
+
+    # std calculation
+    dis = (attention_prob - key_mean) * (attention_prob - key_mean)
+
+    # if attention_prob[i] < threshold, then dis_masked[i] = 0; for all i
+    dis_masked = torch.where(
+        attention_nozeros, dis, attention_prob.new_zeros(attention_prob.size())
+    )
+
+    key_var = torch.sum(dis_masked, dim=-1, keepdim=True)
+    key_var = key_var / (nozeros_sum - 1.0 + 1e-8)
+    key_std = torch.sqrt(key_var)
+    key_thread = key_mean - scale * key_std
+
+    # if attention_prob[i] >= key_thread, then attention_prob[i]
+    # , otherwise "-inf"
+    inf_tensor = attention_prob.new_zeros(attention_prob.size()).detach()
+    inf_tensor[:] = float("-inf")
+    attention_weights_float = torch.where(
+        attention_prob < key_thread,
+        inf_tensor,
+        attention_weights.float(),
+    )
+
+    return attention_weights_float.type_as(attention_weights)
+
+
+def layer_norm_backward_hook(module, grad_input, grad_output, clamp_value):
+    return tuple(torch.clamp(v, min=-clamp_value, max=clamp_value) for v in grad_input)
diff --git a/fairseq/fairseq/models/speech_to_text/xm_transformer.py b/fairseq/fairseq/models/speech_to_text/xm_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5eecbfa2158dcbee90eef6d395bb5611ff8ee8de
--- /dev/null
+++ b/fairseq/fairseq/models/speech_to_text/xm_transformer.py
@@ -0,0 +1,505 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import copy
+from typing import Dict, List, Optional, Tuple
+
+from fairseq import utils, checkpoint_utils
+from fairseq.models import (FairseqEncoderDecoderModel, FairseqEncoder,
+                            register_model, register_model_architecture)
+from fairseq.models.transformer import Embedding, TransformerDecoder
+from fairseq.models.wav2vec import Wav2VecEncoder
+from fairseq.modules.layer_norm import LayerNorm
+from fairseq.data.data_utils import lengths_to_padding_mask
+from fairseq.utils import safe_hasattr
+from torch import Tensor
+import torch.nn as nn
+
+
+logger = logging.getLogger(__name__)
+
+
+class Conv1dAdaptor(nn.Module):
+    def __init__(self, in_dim, out_dim, n_layers=3, kernel_size=3, stride=2,
+                 add_layernorm=False):
+        super().__init__()
+        self.layers = nn.ModuleList(
+            nn.Conv1d(in_dim if i == 0 else out_dim, out_dim * 2, kernel_size,
+                      stride=stride, padding=kernel_size // 2)
+            for i in range(n_layers)
+        )
+        self.layernorms = None
+        if add_layernorm:
+            self.layernorms = nn.ModuleList(LayerNorm(out_dim)
+                                            for _ in range(n_layers))
+        self.stride = stride
+
+    @classmethod
+    def add_args(cls, parser):
+        parser.add_argument("--adaptor-n-layers", type=int)
+        parser.add_argument("--adaptor-kernel-size", type=int)
+        parser.add_argument("--adaptor-stride", type=int)
+        parser.add_argument("--adaptor-layernorm", action='store_true')
+
+    def get_out_seq_lens_tensor(self, in_seq_lens_tensor):
+        out = in_seq_lens_tensor.clone()
+        for _ in self.layers:
+            out = ((out.float() - 1) / self.stride + 1).floor().long()
+        return out
+
+    def forward(self, x, padding_mask):
+        # T x B x C -> B x C x T
+        x = x.transpose(0, 1).transpose(1, 2)
+        for i, layer in enumerate(self.layers):
+            x = nn.functional.glu(layer(x), dim=1)
+            if self.layernorms is not None:
+                x = self.layernorms[i](x.transpose(1, 2)).transpose(1, 2)
+        # B x C x T -> T x B x C
+        x = x.transpose(1, 2).transpose(0, 1)
+
+        if padding_mask is None:
+            out_padding_mask = None
+        else:
+            out_lengths = self.get_out_seq_lens_tensor((~padding_mask).sum(1))
+            out_padding_mask = lengths_to_padding_mask(out_lengths)
+        return x, out_padding_mask
+
+
+def add_wav2vec_asr_args(parser):
+    parser.add_argument("--w2v-path", help="path to wav2vec 2.0 model")
+    parser.add_argument(
+        "--no-pretrained-weights",
+        action="store_true",
+        help="if true, does not load pretrained weights",
+    )
+    parser.add_argument(
+        "--dropout-input",
+        type=float,
+        metavar="D",
+        help="dropout to apply to the input (after feat extr)",
+    )
+    parser.add_argument(
+        "--final-dropout",
+        type=float,
+        metavar="D",
+        help="dropout after transformer and before final projection",
+    )
+    parser.add_argument(
+        "--apply-mask", action="store_true", help="apply masking during fine-tuning"
+    )
+    parser.add_argument(
+        "--dropout",
+        type=float,
+        metavar="D",
+        help="dropout probability inside wav2vec 2.0 model",
+    )
+    parser.add_argument(
+        "--attention-dropout",
+        type=float,
+        metavar="D",
+        help="dropout probability for attention weights inside wav2vec 2.0 model",
+    )
+    parser.add_argument(
+        "--activation-dropout",
+        "--relu-dropout",
+        type=float,
+        metavar="D",
+        help="dropout probability after activation in FFN inside wav2vec 2.0 model",
+    )
+
+    parser.add_argument(
+        "--mask-length", type=int, help="repeat the mask indices multiple times"
+    )
+
+    parser.add_argument(
+        "--mask-prob", type=float, help="probability of replacing a token with mask"
+    )
+
+    parser.add_argument(
+        "--mask-selection",
+        type=str,
+        choices=["static", "uniform", "normal", "poisson"],
+        help="how to choose masks",
+    )
+
+    parser.add_argument(
+        "--mask-other",
+        type=float,
+        help="stdev of the mask length in case of 'normal' selection strategy",
+    )
+
+    parser.add_argument(
+        "--no-mask-overlap",
+        action="store_true",
+        help="whether to allow masks to overlap",
+    )
+
+    parser.add_argument(
+        "--mask-channel-length", type=int, help="repeat the mask indices multiple times"
+    )
+
+    parser.add_argument(
+        "--mask-channel-prob",
+        type=float,
+        help="probability of replacing a token with mask",
+    )
+
+    parser.add_argument(
+        "--mask-channel-selection",
+        type=str,
+        choices=["static", "uniform", "normal", "poisson"],
+        help="how to choose masks",
+    )
+
+    parser.add_argument(
+        "--mask-channel-other",
+        type=float,
+        help="stdev of the mask length in case of 'normal' selection strategy",
+    )
+
+    parser.add_argument(
+        "--no-mask-channel-overlap",
+        action="store_true",
+        help="whether to allow masks to overlap",
+    )
+
+    parser.add_argument(
+        "--freeze-finetune-updates",
+        default=0,
+        type=int,
+        help="dont finetune wav2vec for this many updates",
+    )
+
+    parser.add_argument(
+        "--feature-grad-mult",
+        default=None,
+        type=float,
+        help="reset feature grad mult in wav2vec 2.0 to this",
+    )
+
+    parser.add_argument(
+        "--layerdrop",
+        default=0.0,
+        type=float,
+        help="probability of dropping a layer in wav2vec 2.0",
+    )
+    parser.add_argument("--w2v-args", default=None)
+
+
+class Wav2VecEncoderWithAdaptor(FairseqEncoder):
+    def __init__(self, args):
+        super().__init__(None)
+        self.w2v_encoder = Wav2VecEncoder(args)
+        encoder_out_dim = self.w2v_encoder.w2v_model.encoder.embedding_dim
+        # Projection + 8x shrinking
+        self.adaptor = Conv1dAdaptor(
+            encoder_out_dim, args.decoder_embed_dim,
+            n_layers=args.adaptor_n_layers,
+            kernel_size=args.adaptor_kernel_size, stride=args.adaptor_stride,
+            add_layernorm=args.adaptor_layernorm
+        )
+        for k, p in self.w2v_encoder.w2v_model.named_parameters():
+            # Freeze pretrained models by default
+            if safe_hasattr(args, 'finetune_w2v_params') and XMTransformerModel.finetune_params(
+                    args.finetune_w2v_params, k):
+                p.requires_grad = True
+            else:
+                p.requires_grad = False
+
+    @classmethod
+    def add_args(cls, parser):
+        add_wav2vec_asr_args(parser)
+        parser.add_argument(
+            "--normalize", action="store_true",
+            help="if set, normalizes input to have 0 mean and unit variance",
+        )
+        parser.add_argument("--finetune-w2v-params", type=str, metavar="STR",
+                            help="comma-separated param strings to finetune.")
+        Conv1dAdaptor.add_args(parser)
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        padding_mask = lengths_to_padding_mask(src_lengths)
+        out = self.w2v_encoder.forward(src_tokens, padding_mask, tbc=True)
+        x = out["encoder_out"]
+        enc_padding_mask = None
+        if out["encoder_padding_mask"] is not None:
+            enc_padding_mask = out["encoder_padding_mask"].transpose(0, 1)   # T X B --> B X T
+
+        x, enc_padding_mask = self.adaptor(x, enc_padding_mask)
+
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [enc_padding_mask] if enc_padding_mask.any() else [],  # B x T
+            "encoder_embedding": [],  # B x T x C
+            "encoder_states": [],  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        new_encoder_out = (
+            [] if len(encoder_out["encoder_out"]) == 0
+            else [x.index_select(1, new_order) for x in encoder_out["encoder_out"]]
+        )
+
+        new_encoder_padding_mask = (
+            [] if len(encoder_out["encoder_padding_mask"]) == 0
+            else [x.index_select(0, new_order) for x in
+                  encoder_out["encoder_padding_mask"]]
+        )
+
+        new_encoder_embedding = (
+            [] if len(encoder_out["encoder_embedding"]) == 0
+            else [x.index_select(0, new_order) for x in
+                  encoder_out["encoder_embedding"]]
+        )
+
+        encoder_states = encoder_out["encoder_states"]
+        if len(encoder_states) > 0:
+            for idx, state in enumerate(encoder_states):
+                encoder_states[idx] = state.index_select(1, new_order)
+
+        return {
+            "encoder_out": new_encoder_out,  # T x B x C
+            "encoder_padding_mask": new_encoder_padding_mask,  # B x T
+            "encoder_embedding": new_encoder_embedding,  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],  # B x T
+            "src_lengths": [],  # B x 1
+        }
+
+
+def add_decoder_args(parser):
+    parser.add_argument("--activation-fn", type=str, default='relu',
+                        choices=utils.get_available_activation_fns(),
+                        help="activation function to use")
+    parser.add_argument("--decoder-dropout", type=float, metavar="D",
+                        help="dropout probability")
+    parser.add_argument("--decoder-attention-dropout", type=float,
+                        metavar="D",
+                        help="dropout probability for attention weights")
+    parser.add_argument("--decoder-activation-dropout", type=float,
+                        metavar="D",
+                        help="dropout probability after activation in FFN.")
+    parser.add_argument("--decoder-embed-dim", type=int, metavar="N",
+                        help="decoder embedding dimension")
+    parser.add_argument("--decoder-ffn-embed-dim", type=int, metavar="N",
+                        help="decoder embedding dimension for FFN")
+    parser.add_argument("--decoder-layers", type=int, metavar="N",
+                        help="num decoder layers")
+    parser.add_argument("--decoder-attention-heads", type=int, metavar="N",
+                        help="num decoder attention heads")
+    parser.add_argument("--decoder-normalize-before", action="store_true",
+                        help="apply layernorm before each decoder block")
+    parser.add_argument("--layernorm-embedding", action="store_true",
+                        help="add layernorm to embedding")
+    parser.add_argument("--no-scale-embedding", action="store_true",
+                        help="if True, dont scale embeddings")
+    parser.add_argument(
+        "--load-pretrained-decoder-from", type=str, metavar="STR",
+        help="model to take decoder weights from (for initialization)"
+    )
+    parser.add_argument("--finetune-decoder-params", type=str,
+                        metavar="STR",
+                        help="comma-separated param strings to finetune.")
+    parser.add_argument("--checkpoint-activations", action="store_true")
+
+
+@register_model("xm_transformer")
+class XMTransformerModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add model-specific arguments to the parser."""
+        Wav2VecEncoderWithAdaptor.add_args(parser)
+        add_decoder_args(parser)
+
+    @classmethod
+    def build_encoder(cls, args):
+        _args = copy.deepcopy(args)
+        state = checkpoint_utils.load_checkpoint_to_cpu(args.w2v_path)
+        if state.get("cfg") is not None:
+            encoder_embed_dim = state["cfg"]._content["model"]["encoder_embed_dim"]
+        elif state.get("args") is not None:
+            encoder_embed_dim = state["args"].encoder_embed_dim
+        else:
+            raise ValueError(f"Invalid config in {args.w2v_path}")
+        _args.decoder_embed_dim = encoder_embed_dim
+        encoder = Wav2VecEncoderWithAdaptor(_args)
+        return encoder
+
+    @classmethod
+    def build_decoder(cls, args, task, embed_tokens):
+        _args = copy.deepcopy(args)
+        _args.dropout = args.decoder_dropout
+        _args.attention_dropout = args.decoder_attention_dropout
+        _args.activation_dropout = args.decoder_activation_dropout
+        _args.max_target_positions = 1024
+
+        decoder = TransformerDecoder(_args, task.target_dictionary,
+                                     embed_tokens)
+        if getattr(args, "load_pretrained_decoder_from", None):
+            decoder = checkpoint_utils.load_pretrained_component_from_model(
+                component=decoder, checkpoint=args.load_pretrained_decoder_from
+            )
+        for k, p in decoder.named_parameters():
+            # Freeze pretrained models by default
+            if safe_hasattr(args, 'finetune_decoder_params') and XMTransformerModel.finetune_params(
+                    args.finetune_decoder_params, k):
+                p.requires_grad = True
+            else:
+                p.requires_grad = False
+        return decoder
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        def build_embedding(dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            return Embedding(num_embeddings, embed_dim, padding_idx)
+
+        decoder_embed_tokens = build_embedding(task.target_dictionary,
+                                               args.decoder_embed_dim)
+        encoder = cls.build_encoder(args)
+        decoder = cls.build_decoder(args, task, decoder_embed_tokens)
+        return cls(encoder, decoder)
+
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        # net_output['encoder_out'] is a (B, T, D) tensor
+        lprobs = self.get_normalized_probs_scriptable(net_output, log_probs,
+                                                      sample)
+        lprobs.batch_first = True
+        return lprobs
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs):
+        """
+        The forward method inherited from the base class has a **kwargs
+        argument in its input, which is not supported in torchscript. This
+        method overrites the forward method definition without **kwargs.
+        """
+        encoder_out = self.encoder(src_tokens=src_tokens,
+                                   src_lengths=src_lengths, **kwargs)
+        decoder_out = self.decoder(prev_output_tokens=prev_output_tokens,
+                                   encoder_out=encoder_out)
+        return decoder_out
+
+    def upgrade_state_dict(self, state_dict):
+        for k, _ in state_dict.items():
+            if 'adaptor.layers' in state_dict:
+                print(k)
+                new = k.replace('adaptor.layers', 'adaptor_layers')
+                state_dict[new] = state_dict[k]
+                del state_dict[k]
+
+    @staticmethod
+    def finetune_params(finetune_params, param_name):
+        if finetune_params == "all":
+            return True
+        finetune_params_list = finetune_params.split(",")
+        for finetune_param in finetune_params_list:
+            if finetune_param in param_name:
+                return True
+        return False
+
+
+def set_default_w2v_encoder_args(args):
+    args.no_pretrained_weights = getattr(args, "no_pretrained_weights", False)
+    args.dropout_input = getattr(args, "dropout_input", 0)
+    args.final_dropout = getattr(args, "final_dropout", 0)
+    args.apply_mask = getattr(args, "apply_mask", False)
+    args.dropout = getattr(args, "dropout", 0)
+    args.attention_dropout = getattr(args, "attention_dropout", 0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0)
+
+    args.mask_length = getattr(args, "mask_length", 10)
+    args.mask_prob = getattr(args, "mask_prob", 0.5)
+    args.mask_selection = getattr(args, "mask_selection", "static")
+    args.mask_other = getattr(args, "mask_other", 0)
+    args.no_mask_overlap = getattr(args, "no_mask_overlap", False)
+    args.mask_channel_length = getattr(args, "mask_channel_length", 10)
+    args.mask_channel_prob = getattr(args, "mask_channel_prob", 0.5)
+    args.mask_channel_before = getattr(args, "mask_channel_before", False)
+    args.mask_channel_selection = getattr(args, "mask_channel_selection",
+                                          "static")
+    args.mask_channel_other = getattr(args, "mask_channel_other", 0)
+    args.no_mask_channel_overlap = getattr(args, "no_mask_channel_overlap",
+                                           False)
+
+    args.freeze_finetune_updates = getattr(args, "freeze_finetune_updates", 0)
+    args.feature_grad_mult = 0.1
+    args.layerdrop = getattr(args, "layerdrop", 0.0)
+
+    args.normalize = getattr(args, "normalize", False)
+
+
+def set_default_adaptor_args(args):
+    args.adaptor_n_layers = getattr(args, "adaptor_n_layers", 3)
+    args.adaptor_kernel_size = getattr(args, "adaptor_kernel_size", 3)
+    args.adaptor_stride = getattr(args, "adaptor_stride", 2)
+    args.adaptor_layernorm = getattr(args, "adaptor_layernorm", False)
+
+
+def set_default_mbart_decoder_args(args):
+    args.decoder_embed_path = getattr(args, 'decoder_embed_path', None)
+    args.decoder_embed_dim = getattr(args, 'decoder_embed_dim', 1024)
+    args.decoder_ffn_embed_dim = getattr(args, 'decoder_ffn_embed_dim',
+                                         4 * 1024)
+    args.decoder_layers = getattr(args, 'decoder_layers', 12)
+    args.decoder_attention_heads = getattr(args, 'decoder_attention_heads', 16)
+    args.decoder_normalize_before = getattr(args, 'decoder_normalize_before',
+                                            True)
+    args.decoder_learned_pos = getattr(args, 'decoder_learned_pos', True)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0.0)
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.decoder_attention_dropout = getattr(args, 'decoder_attention_dropout',
+                                             0.)
+    args.decoder_activation_dropout = getattr(args,
+                                              'decoder_activation_dropout', 0.)
+    args.decoder_dropout = getattr(args, 'decoder_dropout', 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, 'adaptive_softmax_cutoff',
+                                           None)
+    args.adaptive_softmax_dropout = getattr(args, 'adaptive_softmax_dropout', 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, 'share_decoder_input_output_embed', True
+    )
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+
+    args.decoder_output_dim = getattr(args, 'decoder_output_dim',
+                                      args.decoder_embed_dim)
+    args.decoder_input_dim = getattr(args, 'decoder_input_dim',
+                                     args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, 'no_scale_embedding', False)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+    args.layernorm_embedding = getattr(args, 'layernorm_embedding', True)
+
+    args.activation_fn = getattr(args, 'activation_fn', 'gelu')
+    args.pooler_activation_fn = getattr(args, 'pooler_activation_fn', 'tanh')
+    args.pooler_dropout = getattr(args, 'pooler_dropout', 0.0)
+    args.checkpoint_activations = getattr(args, "checkpoint_activations", False)
+
+
+@register_model_architecture(model_name="xm_transformer",
+                             arch_name="xm_transformer")
+def base_architecture(args):
+    set_default_w2v_encoder_args(args)
+    set_default_adaptor_args(args)
+    set_default_mbart_decoder_args(args)
diff --git a/fairseq/fairseq/models/text_to_speech/__init__.py b/fairseq/fairseq/models/text_to_speech/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..652fee0d685b61af47b314367037888fa640e1a7
--- /dev/null
+++ b/fairseq/fairseq/models/text_to_speech/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .tacotron2 import *  # noqa
+from .tts_transformer import *  # noqa
+from .fastspeech2 import *  # noqa
diff --git a/fairseq/fairseq/models/text_to_speech/fastspeech2.py b/fairseq/fairseq/models/text_to_speech/fastspeech2.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c38d0917d997ed5e255ec7a5ed8882b405baffa
--- /dev/null
+++ b/fairseq/fairseq/models/text_to_speech/fastspeech2.py
@@ -0,0 +1,352 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from torch import nn
+
+from fairseq.models import (FairseqEncoder, FairseqEncoderModel, register_model,
+                            register_model_architecture)
+from fairseq.modules import (
+    LayerNorm, PositionalEmbedding, FairseqDropout, MultiheadAttention
+)
+from fairseq import utils
+from fairseq.data.data_utils import lengths_to_padding_mask
+
+
+logger = logging.getLogger(__name__)
+
+
+def model_init(m):
+    if isinstance(m, nn.Conv1d):
+        nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("relu"))
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx=None):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    return m
+
+
+class PositionwiseFeedForward(nn.Module):
+    def __init__(self, in_dim, hidden_dim, kernel_size, dropout):
+        super().__init__()
+        self.ffn = nn.Sequential(
+            nn.Conv1d(in_dim, hidden_dim, kernel_size=kernel_size,
+                      padding=(kernel_size - 1) // 2),
+            nn.ReLU(),
+            nn.Conv1d(hidden_dim, in_dim, kernel_size=kernel_size,
+                      padding=(kernel_size - 1) // 2)
+        )
+        self.layer_norm = LayerNorm(in_dim)
+        self.dropout = self.dropout_module = FairseqDropout(
+            p=dropout, module_name=self.__class__.__name__
+        )
+
+    def forward(self, x):
+        # B x T x C
+        residual = x
+        x = self.ffn(x.transpose(1, 2)).transpose(1, 2)
+        x = self.dropout(x)
+        return self.layer_norm(x + residual)
+
+
+class FFTLayer(torch.nn.Module):
+    def __init__(
+            self, embed_dim, n_heads, hidden_dim, kernel_size, dropout,
+            attention_dropout
+    ):
+        super().__init__()
+        self.self_attn = MultiheadAttention(
+            embed_dim, n_heads, dropout=attention_dropout, self_attention=True
+        )
+        self.layer_norm = LayerNorm(embed_dim)
+        self.ffn = PositionwiseFeedForward(
+            embed_dim, hidden_dim, kernel_size, dropout=dropout
+        )
+
+    def forward(self, x, padding_mask=None):
+        # B x T x C
+        residual = x
+        x = x.transpose(0, 1)
+        x, _ = self.self_attn(
+            query=x, key=x, value=x, key_padding_mask=padding_mask,
+            need_weights=False
+        )
+        x = x.transpose(0, 1)
+        x = self.layer_norm(x + residual)
+        return self.ffn(x)
+
+
+class LengthRegulator(nn.Module):
+    def forward(self, x, durations):
+        # x: B x T x C
+        out_lens = durations.sum(dim=1)
+        max_len = out_lens.max()
+        bsz, seq_len, dim = x.size()
+        out = x.new_zeros((bsz, max_len, dim))
+
+        for b in range(bsz):
+            indices = []
+            for t in range(seq_len):
+                indices.extend([t] * utils.item(durations[b, t]))
+            indices = torch.tensor(indices, dtype=torch.long).to(x.device)
+            out_len = utils.item(out_lens[b])
+            out[b, :out_len] = x[b].index_select(0, indices)
+
+        return out, out_lens
+
+
+class VariancePredictor(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.conv1 = nn.Sequential(
+            nn.Conv1d(
+                args.encoder_embed_dim, args.var_pred_hidden_dim,
+                kernel_size=args.var_pred_kernel_size,
+                padding=(args.var_pred_kernel_size - 1) // 2
+            ),
+            nn.ReLU()
+        )
+        self.ln1 = nn.LayerNorm(args.var_pred_hidden_dim)
+        self.dropout_module = FairseqDropout(
+            p=args.var_pred_dropout, module_name=self.__class__.__name__
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv1d(
+                args.var_pred_hidden_dim, args.var_pred_hidden_dim,
+                kernel_size=args.var_pred_kernel_size, padding=1
+            ),
+            nn.ReLU()
+        )
+        self.ln2 = nn.LayerNorm(args.var_pred_hidden_dim)
+        self.proj = nn.Linear(args.var_pred_hidden_dim, 1)
+
+    def forward(self, x):
+        # Input: B x T x C; Output: B x T
+        x = self.conv1(x.transpose(1, 2)).transpose(1, 2)
+        x = self.dropout_module(self.ln1(x))
+        x = self.conv2(x.transpose(1, 2)).transpose(1, 2)
+        x = self.dropout_module(self.ln2(x))
+        return self.proj(x).squeeze(dim=2)
+
+
+class VarianceAdaptor(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.args = args
+        self.length_regulator = LengthRegulator()
+        self.duration_predictor = VariancePredictor(args)
+        self.pitch_predictor = VariancePredictor(args)
+        self.energy_predictor = VariancePredictor(args)
+
+        n_bins, steps = self.args.var_pred_n_bins, self.args.var_pred_n_bins - 1
+        self.pitch_bins = torch.linspace(args.pitch_min, args.pitch_max, steps)
+        self.embed_pitch = Embedding(n_bins, args.encoder_embed_dim)
+        self.energy_bins = torch.linspace(args.energy_min, args.energy_max, steps)
+        self.embed_energy = Embedding(n_bins, args.encoder_embed_dim)
+
+    def get_pitch_emb(self, x, tgt=None, factor=1.0):
+        out = self.pitch_predictor(x)
+        bins = self.pitch_bins.to(x.device)
+        if tgt is None:
+            out = out * factor
+            emb = self.embed_pitch(torch.bucketize(out, bins))
+        else:
+            emb = self.embed_pitch(torch.bucketize(tgt, bins))
+        return out, emb
+
+    def get_energy_emb(self, x, tgt=None, factor=1.0):
+        out = self.energy_predictor(x)
+        bins = self.energy_bins.to(x.device)
+        if tgt is None:
+            out = out * factor
+            emb = self.embed_energy(torch.bucketize(out, bins))
+        else:
+            emb = self.embed_energy(torch.bucketize(tgt, bins))
+        return out, emb
+
+    def forward(
+            self, x, padding_mask, durations=None, pitches=None, energies=None,
+            d_factor=1.0, p_factor=1.0, e_factor=1.0
+    ):
+        # x: B x T x C
+        log_dur_out = self.duration_predictor(x)
+        dur_out = torch.clamp(
+            torch.round((torch.exp(log_dur_out) - 1) * d_factor).long(), min=0
+        )
+        dur_out.masked_fill_(padding_mask, 0)
+
+        pitch_out, pitch_emb = self.get_pitch_emb(x, pitches, p_factor)
+        x = x + pitch_emb
+        energy_out, energy_emb = self.get_energy_emb(x, energies, e_factor)
+        x = x + energy_emb
+
+        x, out_lens = self.length_regulator(
+            x, dur_out if durations is None else durations
+        )
+
+        return x, out_lens, log_dur_out, pitch_out, energy_out
+
+
+class FastSpeech2Encoder(FairseqEncoder):
+    def __init__(self, args, src_dict, embed_speaker):
+        super().__init__(src_dict)
+        self.args = args
+        self.padding_idx = src_dict.pad()
+        self.n_frames_per_step = args.n_frames_per_step
+        self.out_dim = args.output_frame_dim * args.n_frames_per_step
+
+        self.embed_speaker = embed_speaker
+        self.spk_emb_proj = None
+        if embed_speaker is not None:
+            self.spk_emb_proj = nn.Linear(
+                args.encoder_embed_dim + args.speaker_embed_dim,
+                args.encoder_embed_dim
+            )
+
+        self.dropout_module = FairseqDropout(
+            p=args.dropout, module_name=self.__class__.__name__
+        )
+        self.embed_tokens = Embedding(
+            len(src_dict), args.encoder_embed_dim, padding_idx=self.padding_idx
+        )
+
+        self.embed_positions = PositionalEmbedding(
+            args.max_source_positions, args.encoder_embed_dim, self.padding_idx
+        )
+        self.pos_emb_alpha = nn.Parameter(torch.ones(1))
+        self.dec_pos_emb_alpha = nn.Parameter(torch.ones(1))
+
+        self.encoder_fft_layers = nn.ModuleList(
+            FFTLayer(
+                args.encoder_embed_dim, args.encoder_attention_heads,
+                args.fft_hidden_dim, args.fft_kernel_size,
+                dropout=args.dropout, attention_dropout=args.attention_dropout
+            )
+            for _ in range(args.encoder_layers)
+        )
+
+        self.var_adaptor = VarianceAdaptor(args)
+
+        self.decoder_fft_layers = nn.ModuleList(
+            FFTLayer(
+                args.decoder_embed_dim, args.decoder_attention_heads,
+                args.fft_hidden_dim, args.fft_kernel_size,
+                dropout=args.dropout, attention_dropout=args.attention_dropout
+            )
+            for _ in range(args.decoder_layers)
+        )
+
+        self.out_proj = nn.Linear(args.decoder_embed_dim, self.out_dim)
+
+        self.apply(model_init)
+
+    def forward(self, src_tokens, src_lengths=None, speaker=None,
+                durations=None, pitches=None, energies=None, **kwargs):
+        x = self.embed_tokens(src_tokens)
+
+        enc_padding_mask = src_tokens.eq(self.padding_idx)
+        x += self.pos_emb_alpha * self.embed_positions(enc_padding_mask)
+        x = self.dropout_module(x)
+
+        for layer in self.encoder_fft_layers:
+            x = layer(x, enc_padding_mask)
+
+        if self.embed_speaker is not None:
+            bsz, seq_len, _ = x.size()
+            emb = self.embed_speaker(speaker).expand(bsz, seq_len, -1)
+            x = self.spk_emb_proj(torch.cat([x, emb], dim=2))
+
+        x, out_lens, log_dur_out, pitch_out, energy_out = \
+            self.var_adaptor(x, enc_padding_mask, durations, pitches, energies)
+
+        dec_padding_mask = lengths_to_padding_mask(out_lens)
+        x += self.dec_pos_emb_alpha * self.embed_positions(dec_padding_mask)
+        for layer in self.decoder_fft_layers:
+            x = layer(x, dec_padding_mask)
+
+        x = self.out_proj(x)
+
+        return x, out_lens, log_dur_out, pitch_out, energy_out
+
+
+@register_model("fastspeech2")
+class FastSpeech2Model(FairseqEncoderModel):
+    """
+    Implementation for https://arxiv.org/abs/2006.04558
+    """
+
+    NON_AUTOREGRESSIVE = True
+
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument("--dropout", type=float)
+        parser.add_argument("--output-frame-dim", type=int)
+        parser.add_argument("--speaker-embed-dim", type=int)
+        # FFT blocks
+        parser.add_argument("--fft-hidden-dim", type=int)
+        parser.add_argument("--fft-kernel-size", type=int)
+        parser.add_argument("--attention-dropout", type=float)
+        parser.add_argument("--encoder-layers", type=int)
+        parser.add_argument("--encoder-embed-dim", type=int)
+        parser.add_argument("--encoder-attention-heads", type=int)
+        parser.add_argument("--decoder-layers", type=int)
+        parser.add_argument("--decoder-embed-dim", type=int)
+        parser.add_argument("--decoder-attention-heads", type=int)
+        # variance predictor
+        parser.add_argument("--var-pred-n-bins", type=int)
+        parser.add_argument("--var-pred-hidden-dim", type=int)
+        parser.add_argument("--var-pred-kernel-size", type=int)
+        parser.add_argument("--var-pred-dropout", type=float)
+
+    def __init__(self, encoder, args, src_dict):
+        super().__init__(encoder)
+        self._num_updates = 0
+
+        out_dim = args.output_frame_dim * args.n_frames_per_step
+        self.ctc_proj = None
+        if getattr(args, "ctc_weight", 0.) > 0.:
+            self.ctc_proj = nn.Linear(out_dim, len(src_dict))
+
+    @classmethod
+    def build_model(cls, args, task):
+        embed_speaker = task.get_speaker_embeddings(args)
+        encoder = FastSpeech2Encoder(args, task.src_dict, embed_speaker)
+        return cls(encoder, args, task.src_dict)
+
+    def set_num_updates(self, num_updates):
+        super().set_num_updates(num_updates)
+        self._num_updates = num_updates
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        logits = self.ctc_proj(net_output[0])
+        if log_probs:
+            return utils.log_softmax(logits.float(), dim=-1)
+        else:
+            return utils.softmax(logits.float(), dim=-1)
+
+
+@register_model_architecture("fastspeech2", "fastspeech2")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.2)
+    args.output_frame_dim = getattr(args, "output_frame_dim", 80)
+    args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 64)
+    # FFT blocks
+    args.fft_hidden_dim = getattr(args, "fft_hidden_dim", 1024)
+    args.fft_kernel_size = getattr(args, "fft_kernel_size", 9)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.encoder_layers = getattr(args, "encoder_layers", 4)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 256)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 2)
+    args.decoder_layers = getattr(args, "decoder_layers", 4)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 256)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 2)
+    # variance predictor
+    args.var_pred_n_bins = getattr(args, "var_pred_n_bins", 256)
+    args.var_pred_hidden_dim = getattr(args, "var_pred_hidden_dim", 256)
+    args.var_pred_kernel_size = getattr(args, "var_pred_kernel_size", 3)
+    args.var_pred_dropout = getattr(args, "var_pred_dropout", 0.5)
diff --git a/fairseq/fairseq/models/text_to_speech/hifigan.py b/fairseq/fairseq/models/text_to_speech/hifigan.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc7db6015ebea18f40c8949ae78c0b5b61e1297
--- /dev/null
+++ b/fairseq/fairseq/models/text_to_speech/hifigan.py
@@ -0,0 +1,173 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import Conv1d, ConvTranspose1d
+from torch.nn.utils import weight_norm, remove_weight_norm
+
+LRELU_SLOPE = 0.1
+
+
+def init_weights(m, mean=0.0, std=0.01):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(mean, std)
+
+
+def get_padding(kernel_size, dilation=1):
+    return (kernel_size * dilation - dilation) // 2
+
+
+class ResBlock(torch.nn.Module):
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
+        super(ResBlock, self).__init__()
+        self.convs1 = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[0],
+                        padding=get_padding(kernel_size, dilation[0]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[1],
+                        padding=get_padding(kernel_size, dilation[1]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[2],
+                        padding=get_padding(kernel_size, dilation[2]),
+                    )
+                ),
+            ]
+        )
+        self.convs1.apply(init_weights)
+
+        self.convs2 = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=1,
+                        padding=get_padding(kernel_size, 1),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=1,
+                        padding=get_padding(kernel_size, 1),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=1,
+                        padding=get_padding(kernel_size, 1),
+                    )
+                ),
+            ]
+        )
+        self.convs2.apply(init_weights)
+
+    def forward(self, x):
+        for c1, c2 in zip(self.convs1, self.convs2):
+            xt = F.leaky_relu(x, LRELU_SLOPE)
+            xt = c1(xt)
+            xt = F.leaky_relu(xt, LRELU_SLOPE)
+            xt = c2(xt)
+            x = xt + x
+        return x
+
+    def remove_weight_norm(self):
+        for layer in self.convs1:
+            remove_weight_norm(layer)
+        for layer in self.convs2:
+            remove_weight_norm(layer)
+
+
+class Generator(torch.nn.Module):
+    def __init__(self, cfg):
+        super(Generator, self).__init__()
+        self.num_kernels = len(cfg["resblock_kernel_sizes"])
+        self.num_upsamples = len(cfg["upsample_rates"])
+        self.conv_pre = weight_norm(
+            Conv1d(80, cfg["upsample_initial_channel"], 7, 1, padding=3)
+        )
+
+        self.ups = nn.ModuleList()
+        for i, (u, k) in enumerate(
+                zip(cfg["upsample_rates"], cfg["upsample_kernel_sizes"])
+        ):
+            self.ups.append(
+                weight_norm(
+                    ConvTranspose1d(
+                        cfg["upsample_initial_channel"] // (2 ** i),
+                        cfg["upsample_initial_channel"] // (2 ** (i + 1)),
+                        k,
+                        u,
+                        padding=(k - u) // 2,
+                    )
+                )
+            )
+
+        self.resblocks = nn.ModuleList()
+        for i in range(len(self.ups)):
+            ch = cfg["upsample_initial_channel"] // (2 ** (i + 1))
+            for k, d in zip(
+                    cfg["resblock_kernel_sizes"], cfg["resblock_dilation_sizes"]
+            ):
+                self.resblocks.append(ResBlock(ch, k, d))
+
+        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
+        self.ups.apply(init_weights)
+        self.conv_post.apply(init_weights)
+
+    def forward(self, x):
+        x = self.conv_pre(x)
+        for i in range(self.num_upsamples):
+            x = F.leaky_relu(x, LRELU_SLOPE)
+            x = self.ups[i](x)
+            xs = None
+            for j in range(self.num_kernels):
+                if xs is None:
+                    xs = self.resblocks[i * self.num_kernels + j](x)
+                else:
+                    xs += self.resblocks[i * self.num_kernels + j](x)
+            x = xs / self.num_kernels
+        x = F.leaky_relu(x)
+        x = self.conv_post(x)
+        x = torch.tanh(x)
+
+        return x
+
+    def remove_weight_norm(self):
+        print("Removing weight norm...")
+        for layer in self.ups:
+            remove_weight_norm(layer)
+        for layer in self.resblocks:
+            layer.remove_weight_norm()
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
diff --git a/fairseq/fairseq/models/text_to_speech/tacotron2.py b/fairseq/fairseq/models/text_to_speech/tacotron2.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb327e81e74900349e1357261bf2f14bc037ccd6
--- /dev/null
+++ b/fairseq/fairseq/models/text_to_speech/tacotron2.py
@@ -0,0 +1,350 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from fairseq.models import (FairseqEncoder, FairseqEncoderDecoderModel,
+                            FairseqIncrementalDecoder, register_model,
+                            register_model_architecture)
+from fairseq.modules import LSTMCellWithZoneOut, LocationAttention
+
+
+logger = logging.getLogger(__name__)
+
+
+def encoder_init(m):
+    if isinstance(m, nn.Conv1d):
+        nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("relu"))
+
+
+class Tacotron2Encoder(FairseqEncoder):
+    def __init__(self, args, src_dict, embed_speaker):
+        super().__init__(src_dict)
+        self.padding_idx = src_dict.pad()
+        self.embed_speaker = embed_speaker
+        self.spk_emb_proj = None
+        if embed_speaker is not None:
+            self.spk_emb_proj = nn.Linear(
+                args.encoder_embed_dim + args.speaker_embed_dim,
+                args.encoder_embed_dim
+            )
+
+        self.embed_tokens = nn.Embedding(len(src_dict), args.encoder_embed_dim,
+                                         padding_idx=self.padding_idx)
+
+        assert(args.encoder_conv_kernel_size % 2 == 1)
+        self.convolutions = nn.ModuleList(
+            nn.Sequential(
+                nn.Conv1d(args.encoder_embed_dim, args.encoder_embed_dim,
+                          kernel_size=args.encoder_conv_kernel_size,
+                          padding=((args.encoder_conv_kernel_size - 1) // 2)),
+                nn.BatchNorm1d(args.encoder_embed_dim),
+                nn.ReLU(),
+                nn.Dropout(args.encoder_dropout)
+            )
+            for _ in range(args.encoder_conv_layers)
+        )
+
+        self.lstm = nn.LSTM(args.encoder_embed_dim, args.encoder_embed_dim // 2,
+                            num_layers=args.encoder_lstm_layers,
+                            batch_first=True, bidirectional=True)
+
+        self.apply(encoder_init)
+
+    def forward(self, src_tokens, src_lengths=None, speaker=None, **kwargs):
+        x = self.embed_tokens(src_tokens)
+        x = x.transpose(1, 2).contiguous()  # B x T x C -> B x C x T
+        for conv in self.convolutions:
+            x = conv(x)
+        x = x.transpose(1, 2).contiguous()  # B x C x T -> B x T x C
+
+        src_lengths = src_lengths.cpu().long()
+        x = nn.utils.rnn.pack_padded_sequence(x, src_lengths, batch_first=True)
+        x = self.lstm(x)[0]
+        x = nn.utils.rnn.pad_packed_sequence(x, batch_first=True)[0]
+
+        encoder_padding_mask = src_tokens.eq(self.padding_idx)
+
+        if self.embed_speaker is not None:
+            seq_len, bsz, _ = x.size()
+            emb = self.embed_speaker(speaker).expand(seq_len, bsz, -1)
+            x = self.spk_emb_proj(torch.cat([x, emb], dim=2))
+
+        return {
+            "encoder_out": [x],  # B x T x C
+            "encoder_padding_mask": encoder_padding_mask,   # B x T
+        }
+
+
+class Prenet(nn.Module):
+    def __init__(self, in_dim, n_layers, n_units, dropout):
+        super().__init__()
+        self.layers = nn.ModuleList(
+            nn.Sequential(nn.Linear(in_dim if i == 0 else n_units, n_units),
+                          nn.ReLU())
+            for i in range(n_layers)
+        )
+        self.dropout = dropout
+
+    def forward(self, x):
+        for layer in self.layers:
+            x = F.dropout(layer(x), p=self.dropout)  # always applies dropout
+        return x
+
+
+class Postnet(nn.Module):
+    def __init__(self, in_dim, n_channels, kernel_size, n_layers, dropout):
+        super(Postnet, self).__init__()
+        self.convolutions = nn.ModuleList()
+        assert(kernel_size % 2 == 1)
+        for i in range(n_layers):
+            cur_layers = [
+                nn.Conv1d(in_dim if i == 0 else n_channels,
+                          n_channels if i < n_layers - 1 else in_dim,
+                          kernel_size=kernel_size,
+                          padding=((kernel_size - 1) // 2)),
+                nn.BatchNorm1d(n_channels if i < n_layers - 1 else in_dim)
+            ] + ([nn.Tanh()] if i < n_layers - 1 else []) + [nn.Dropout(dropout)]
+            nn.init.xavier_uniform_(
+                cur_layers[0].weight,
+                torch.nn.init.calculate_gain(
+                    "tanh" if i < n_layers - 1 else "linear"
+                )
+            )
+            self.convolutions.append(nn.Sequential(*cur_layers))
+
+    def forward(self, x):
+        x = x.transpose(1, 2)  # B x T x C -> B x C x T
+        for conv in self.convolutions:
+            x = conv(x)
+        return x.transpose(1, 2)
+
+
+def decoder_init(m):
+    if isinstance(m, torch.nn.Conv1d):
+        nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("tanh"))
+
+
+class Tacotron2Decoder(FairseqIncrementalDecoder):
+    def __init__(self, args, src_dict):
+        super().__init__(None)
+        self.args = args
+        self.n_frames_per_step = args.n_frames_per_step
+        self.out_dim = args.output_frame_dim * args.n_frames_per_step
+
+        self.prenet = Prenet(self.out_dim, args.prenet_layers, args.prenet_dim,
+                             args.prenet_dropout)
+
+        # take prev_context, prev_frame, (speaker embedding) as input
+        self.attention_lstm = LSTMCellWithZoneOut(
+            args.zoneout,
+            args.prenet_dim + args.encoder_embed_dim,
+            args.decoder_lstm_dim
+        )
+
+        # take attention_lstm output, attention_state, encoder_out as input
+        self.attention = LocationAttention(
+            args.attention_dim, args.encoder_embed_dim, args.decoder_lstm_dim,
+            (1 + int(args.attention_use_cumprob)),
+            args.attention_conv_dim, args.attention_conv_kernel_size
+        )
+
+        # take attention_lstm output, context, (gated_latent) as input
+        self.lstm = nn.ModuleList(
+            LSTMCellWithZoneOut(
+                args.zoneout,
+                args.encoder_embed_dim + args.decoder_lstm_dim,
+                args.decoder_lstm_dim
+            )
+            for i in range(args.decoder_lstm_layers)
+        )
+
+        proj_in_dim = args.encoder_embed_dim + args.decoder_lstm_dim
+        self.feat_proj = nn.Linear(proj_in_dim, self.out_dim)
+        self.eos_proj = nn.Linear(proj_in_dim, 1)
+
+        self.postnet = Postnet(self.out_dim, args.postnet_conv_dim,
+                               args.postnet_conv_kernel_size,
+                               args.postnet_layers, args.postnet_dropout)
+
+        self.ctc_proj = None
+        if getattr(args, "ctc_weight", 0.) > 0.:
+            self.ctc_proj = nn.Linear(self.out_dim, len(src_dict))
+
+        self.apply(decoder_init)
+
+    def _get_states(self, incremental_state, enc_out):
+        bsz, in_len, _ = enc_out.size()
+        alstm_h = self.get_incremental_state(incremental_state, "alstm_h")
+        if alstm_h is None:
+            alstm_h = enc_out.new_zeros(bsz, self.args.decoder_lstm_dim)
+        alstm_c = self.get_incremental_state(incremental_state, "alstm_c")
+        if alstm_c is None:
+            alstm_c = enc_out.new_zeros(bsz, self.args.decoder_lstm_dim)
+
+        lstm_h = self.get_incremental_state(incremental_state, "lstm_h")
+        if lstm_h is None:
+            lstm_h = [enc_out.new_zeros(bsz, self.args.decoder_lstm_dim)
+                      for _ in range(self.args.decoder_lstm_layers)]
+        lstm_c = self.get_incremental_state(incremental_state, "lstm_c")
+        if lstm_c is None:
+            lstm_c = [enc_out.new_zeros(bsz, self.args.decoder_lstm_dim)
+                      for _ in range(self.args.decoder_lstm_layers)]
+
+        attn_w = self.get_incremental_state(incremental_state, "attn_w")
+        if attn_w is None:
+            attn_w = enc_out.new_zeros(bsz, in_len)
+        attn_w_cum = self.get_incremental_state(incremental_state, "attn_w_cum")
+        if attn_w_cum is None:
+            attn_w_cum = enc_out.new_zeros(bsz, in_len)
+        return alstm_h, alstm_c, lstm_h, lstm_c, attn_w, attn_w_cum
+
+    def _get_init_attn_c(self, enc_out, enc_mask):
+        bsz = enc_out.size(0)
+        if self.args.init_attn_c == "zero":
+            return enc_out.new_zeros(bsz, self.args.encoder_embed_dim)
+        elif self.args.init_attn_c == "avg":
+            enc_w = (~enc_mask).type(enc_out.type())
+            enc_w = enc_w / enc_w.sum(dim=1, keepdim=True)
+            return torch.sum(enc_out * enc_w.unsqueeze(2), dim=1)
+        else:
+            raise ValueError(f"{self.args.init_attn_c} not supported")
+
+    def forward(self, prev_output_tokens, encoder_out=None,
+                incremental_state=None, target_lengths=None, **kwargs):
+        enc_mask = encoder_out["encoder_padding_mask"]
+        enc_out = encoder_out["encoder_out"][0]
+        in_len = enc_out.size(1)
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:, :]
+        bsz, out_len, _ = prev_output_tokens.size()
+
+        prenet_out = self.prenet(prev_output_tokens)
+        (alstm_h, alstm_c, lstm_h, lstm_c,
+         attn_w, attn_w_cum) = self._get_states(incremental_state, enc_out)
+        attn_ctx = self._get_init_attn_c(enc_out, enc_mask)
+
+        attn_out = enc_out.new_zeros(bsz, in_len, out_len)
+        feat_out = enc_out.new_zeros(bsz, out_len, self.out_dim)
+        eos_out = enc_out.new_zeros(bsz, out_len)
+        for t in range(out_len):
+            alstm_in = torch.cat((attn_ctx, prenet_out[:, t, :]), dim=1)
+            alstm_h, alstm_c = self.attention_lstm(alstm_in, (alstm_h, alstm_c))
+
+            attn_state = attn_w.unsqueeze(1)
+            if self.args.attention_use_cumprob:
+                attn_state = torch.stack((attn_w, attn_w_cum), dim=1)
+            attn_ctx, attn_w = self.attention(
+                enc_out, enc_mask, alstm_h, attn_state
+            )
+            attn_w_cum = attn_w_cum + attn_w
+            attn_out[:, :, t] = attn_w
+
+            for i, cur_lstm in enumerate(self.lstm):
+                if i == 0:
+                    lstm_in = torch.cat((attn_ctx, alstm_h), dim=1)
+                else:
+                    lstm_in = torch.cat((attn_ctx, lstm_h[i - 1]), dim=1)
+                lstm_h[i], lstm_c[i] = cur_lstm(lstm_in, (lstm_h[i], lstm_c[i]))
+
+            proj_in = torch.cat((attn_ctx, lstm_h[-1]), dim=1)
+            feat_out[:, t, :] = self.feat_proj(proj_in)
+            eos_out[:, t] = self.eos_proj(proj_in).squeeze(1)
+        self.attention.clear_cache()
+
+        self.set_incremental_state(incremental_state, "alstm_h", alstm_h)
+        self.set_incremental_state(incremental_state, "alstm_c", alstm_c)
+        self.set_incremental_state(incremental_state, "lstm_h", lstm_h)
+        self.set_incremental_state(incremental_state, "lstm_c", lstm_c)
+        self.set_incremental_state(incremental_state, "attn_w", attn_w)
+        self.set_incremental_state(incremental_state, "attn_w_cum", attn_w_cum)
+
+        post_feat_out = feat_out + self.postnet(feat_out)
+        eos_out = eos_out.view(bsz, out_len, 1)
+        return post_feat_out, eos_out, {"attn": attn_out, "feature_out": feat_out}
+
+
+@register_model("tacotron_2")
+class Tacotron2Model(FairseqEncoderDecoderModel):
+    """
+    Implementation for https://arxiv.org/pdf/1712.05884.pdf
+    """
+
+    @staticmethod
+    def add_args(parser):
+        # encoder
+        parser.add_argument("--encoder-dropout", type=float)
+        parser.add_argument("--encoder-embed-dim", type=int)
+        parser.add_argument("--encoder-conv-layers", type=int)
+        parser.add_argument("--encoder-conv-kernel-size", type=int)
+        parser.add_argument("--encoder-lstm-layers", type=int)
+        # decoder
+        parser.add_argument("--attention-dim", type=int)
+        parser.add_argument("--attention-conv-dim", type=int)
+        parser.add_argument("--attention-conv-kernel-size", type=int)
+        parser.add_argument("--prenet-dropout", type=float)
+        parser.add_argument("--prenet-layers", type=int)
+        parser.add_argument("--prenet-dim", type=int)
+        parser.add_argument("--postnet-dropout", type=float)
+        parser.add_argument("--postnet-layers", type=int)
+        parser.add_argument("--postnet-conv-dim", type=int)
+        parser.add_argument("--postnet-conv-kernel-size", type=int)
+        parser.add_argument("--init-attn-c", type=str)
+        parser.add_argument("--attention-use-cumprob", action='store_true')
+        parser.add_argument("--zoneout", type=float)
+        parser.add_argument("--decoder-lstm-layers", type=int)
+        parser.add_argument("--decoder-lstm-dim", type=int)
+        parser.add_argument("--output-frame-dim", type=int)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._num_updates = 0
+
+    @classmethod
+    def build_model(cls, args, task):
+        embed_speaker = task.get_speaker_embeddings(args)
+        encoder = Tacotron2Encoder(args, task.src_dict, embed_speaker)
+        decoder = Tacotron2Decoder(args, task.src_dict)
+        return cls(encoder, decoder)
+
+    def forward_encoder(self, src_tokens, src_lengths, **kwargs):
+        return self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+
+    def set_num_updates(self, num_updates):
+        super().set_num_updates(num_updates)
+        self._num_updates = num_updates
+
+
+@register_model_architecture("tacotron_2", "tacotron_2")
+def base_architecture(args):
+    # encoder
+    args.encoder_dropout = getattr(args, "encoder_dropout", 0.5)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_conv_layers = getattr(args, "encoder_conv_layers", 3)
+    args.encoder_conv_kernel_size = getattr(args, "encoder_conv_kernel_size", 5)
+    args.encoder_lstm_layers = getattr(args, "encoder_lstm_layers", 1)
+    # decoder
+    args.attention_dim = getattr(args, "attention_dim", 128)
+    args.attention_conv_dim = getattr(args, "attention_conv_dim", 32)
+    args.attention_conv_kernel_size = getattr(args,
+                                              "attention_conv_kernel_size", 15)
+    args.prenet_dropout = getattr(args, "prenet_dropout", 0.5)
+    args.prenet_layers = getattr(args, "prenet_layers", 2)
+    args.prenet_dim = getattr(args, "prenet_dim", 256)
+    args.postnet_dropout = getattr(args, "postnet_dropout", 0.5)
+    args.postnet_layers = getattr(args, "postnet_layers", 5)
+    args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512)
+    args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5)
+    args.init_attn_c = getattr(args, "init_attn_c", "zero")
+    args.attention_use_cumprob = getattr(args, "attention_use_cumprob", True)
+    args.zoneout = getattr(args, "zoneout", 0.1)
+    args.decoder_lstm_layers = getattr(args, "decoder_lstm_layers", 2)
+    args.decoder_lstm_dim = getattr(args, "decoder_lstm_dim", 1024)
+    args.output_frame_dim = getattr(args, "output_frame_dim", 80)
diff --git a/fairseq/fairseq/models/text_to_speech/tts_transformer.py b/fairseq/fairseq/models/text_to_speech/tts_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff7af78bd49708cc5429cd3d481d3866b4612779
--- /dev/null
+++ b/fairseq/fairseq/models/text_to_speech/tts_transformer.py
@@ -0,0 +1,371 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import List, Optional
+
+import torch
+from torch import nn
+
+from fairseq.models import (FairseqEncoder, FairseqEncoderDecoderModel,
+                            FairseqIncrementalDecoder, register_model,
+                            register_model_architecture)
+from fairseq.modules import (
+    TransformerEncoderLayer, TransformerDecoderLayer
+)
+from fairseq.models.text_to_speech.tacotron2 import Prenet, Postnet
+from fairseq.modules import LayerNorm, PositionalEmbedding, FairseqDropout
+from fairseq.data.data_utils import lengths_to_padding_mask
+from fairseq import utils
+
+logger = logging.getLogger(__name__)
+
+
+def encoder_init(m):
+    if isinstance(m, nn.Conv1d):
+        nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("relu"))
+
+
+def Embedding(num_embeddings, embedding_dim):
+    m = nn.Embedding(num_embeddings, embedding_dim)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    return m
+
+
+class TTSTransformerEncoder(FairseqEncoder):
+    def __init__(self, args, src_dict, embed_speaker):
+        super().__init__(src_dict)
+        self.padding_idx = src_dict.pad()
+        self.embed_speaker = embed_speaker
+        self.spk_emb_proj = None
+        if embed_speaker is not None:
+            self.spk_emb_proj = nn.Linear(
+                args.encoder_embed_dim + args.speaker_embed_dim,
+                args.encoder_embed_dim
+            )
+
+        self.dropout_module = FairseqDropout(
+            p=args.dropout, module_name=self.__class__.__name__
+        )
+        self.embed_tokens = nn.Embedding(len(src_dict), args.encoder_embed_dim,
+                                         padding_idx=self.padding_idx)
+        assert(args.encoder_conv_kernel_size % 2 == 1)
+        self.prenet = nn.ModuleList(
+            nn.Sequential(
+                nn.Conv1d(args.encoder_embed_dim, args.encoder_embed_dim,
+                          kernel_size=args.encoder_conv_kernel_size,
+                          padding=((args.encoder_conv_kernel_size - 1) // 2)),
+                nn.BatchNorm1d(args.encoder_embed_dim),
+                nn.ReLU(),
+                nn.Dropout(args.encoder_dropout),
+            )
+            for _ in range(args.encoder_conv_layers)
+        )
+        self.prenet_proj = nn.Linear(
+            args.encoder_embed_dim, args.encoder_embed_dim
+        )
+        self.embed_positions = PositionalEmbedding(
+            args.max_source_positions, args.encoder_embed_dim, self.padding_idx
+        )
+        self.pos_emb_alpha = nn.Parameter(torch.ones(1))
+
+        self.transformer_layers = nn.ModuleList(
+            TransformerEncoderLayer(args)
+            for _ in range(args.encoder_transformer_layers)
+        )
+        if args.encoder_normalize_before:
+            self.layer_norm = LayerNorm(args.encoder_embed_dim)
+        else:
+            self.layer_norm = None
+
+        self.apply(encoder_init)
+
+    def forward(self, src_tokens, src_lengths=None, speaker=None, **kwargs):
+        x = self.embed_tokens(src_tokens)
+        x = x.transpose(1, 2).contiguous()  # B x T x C -> B x C x T
+        for conv in self.prenet:
+            x = conv(x)
+        x = x.transpose(1, 2).contiguous()  # B x C x T -> B x T x C
+        x = self.prenet_proj(x)
+
+        padding_mask = src_tokens.eq(self.padding_idx)
+        positions = self.embed_positions(padding_mask)
+        x += self.pos_emb_alpha * positions
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        for layer in self.transformer_layers:
+            x = layer(x, padding_mask)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        if self.embed_speaker is not None:
+            seq_len, bsz, _ = x.size()
+            emb = self.embed_speaker(speaker).transpose(0, 1)
+            emb = emb.expand(seq_len, bsz, -1)
+            x = self.spk_emb_proj(torch.cat([x, emb], dim=2))
+
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [padding_mask] if padding_mask.any() else [],  # B x T
+            "encoder_embedding": [],  # B x T x C
+            "encoder_states": [],  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [],
+        }
+
+
+def decoder_init(m):
+    if isinstance(m, torch.nn.Conv1d):
+        nn.init.xavier_uniform_(m.weight, torch.nn.init.calculate_gain("tanh"))
+
+
+class TTSTransformerDecoder(FairseqIncrementalDecoder):
+    def __init__(self, args, src_dict):
+        super().__init__(None)
+        self._future_mask = torch.empty(0)
+
+        self.args = args
+        self.padding_idx = src_dict.pad()
+        self.n_frames_per_step = args.n_frames_per_step
+        self.out_dim = args.output_frame_dim * args.n_frames_per_step
+
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.embed_positions = PositionalEmbedding(
+            args.max_target_positions, args.decoder_embed_dim, self.padding_idx
+        )
+        self.pos_emb_alpha = nn.Parameter(torch.ones(1))
+        self.prenet = nn.Sequential(
+            Prenet(self.out_dim, args.prenet_layers, args.prenet_dim,
+                   args.prenet_dropout),
+            nn.Linear(args.prenet_dim, args.decoder_embed_dim),
+        )
+
+        self.n_transformer_layers = args.decoder_transformer_layers
+        self.transformer_layers = nn.ModuleList(
+            TransformerDecoderLayer(args)
+            for _ in range(self.n_transformer_layers)
+        )
+        if args.decoder_normalize_before:
+            self.layer_norm = LayerNorm(args.decoder_embed_dim)
+        else:
+            self.layer_norm = None
+
+        self.feat_proj = nn.Linear(args.decoder_embed_dim, self.out_dim)
+        self.eos_proj = nn.Linear(args.decoder_embed_dim, 1)
+
+        self.postnet = Postnet(self.out_dim, args.postnet_conv_dim,
+                               args.postnet_conv_kernel_size,
+                               args.postnet_layers, args.postnet_dropout)
+
+        self.ctc_proj = None
+        if getattr(args, "ctc_weight", 0.) > 0.:
+            self.ctc_proj = nn.Linear(self.out_dim, len(src_dict))
+
+        self.apply(decoder_init)
+
+    def extract_features(
+            self, prev_outputs, encoder_out=None, incremental_state=None,
+            target_lengths=None, speaker=None, **kwargs
+    ):
+        alignment_layer = self.n_transformer_layers - 1
+        self_attn_padding_mask = lengths_to_padding_mask(target_lengths)
+        positions = self.embed_positions(
+            self_attn_padding_mask, incremental_state=incremental_state
+        )
+
+        if incremental_state is not None:
+            prev_outputs = prev_outputs[:, -1:, :]
+            self_attn_padding_mask = self_attn_padding_mask[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        x = self.prenet(prev_outputs)
+        x += self.pos_emb_alpha * positions
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        if not self_attn_padding_mask.any():
+            self_attn_padding_mask = None
+
+        attn: Optional[torch.Tensor] = None
+        inner_states: List[Optional[torch.Tensor]] = [x]
+        for idx, transformer_layer in enumerate(self.transformer_layers):
+            if incremental_state is None:
+                self_attn_mask = self.buffered_future_mask(x)
+            else:
+                self_attn_mask = None
+
+            x, layer_attn, _ = transformer_layer(
+                x,
+                encoder_out["encoder_out"][0]
+                if (encoder_out is not None and len(encoder_out["encoder_out"]) > 0)
+                else None,
+                encoder_out["encoder_padding_mask"][0]
+                if (
+                        encoder_out is not None
+                        and len(encoder_out["encoder_padding_mask"]) > 0
+                )
+                else None,
+                incremental_state,
+                self_attn_mask=self_attn_mask,
+                self_attn_padding_mask=self_attn_padding_mask,
+                need_attn=bool((idx == alignment_layer)),
+                need_head_weights=bool((idx == alignment_layer)),
+            )
+            inner_states.append(x)
+            if layer_attn is not None and idx == alignment_layer:
+                attn = layer_attn.float().to(x)
+
+        if attn is not None:
+            # average probabilities over heads, transpose to
+            # (B, src_len, tgt_len)
+            attn = attn.mean(dim=0).transpose(2, 1)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        return x, {"attn": attn, "inner_states": inner_states}
+
+    def forward(self, prev_output_tokens, encoder_out=None,
+                incremental_state=None, target_lengths=None, speaker=None,
+                **kwargs):
+        x, extra = self.extract_features(
+            prev_output_tokens, encoder_out=encoder_out,
+            incremental_state=incremental_state, target_lengths=target_lengths,
+            speaker=speaker, **kwargs
+        )
+        attn = extra["attn"]
+        feat_out = self.feat_proj(x)
+        bsz, seq_len, _ = x.size()
+        eos_out = self.eos_proj(x)
+        post_feat_out = feat_out + self.postnet(feat_out)
+        return post_feat_out, eos_out, {"attn": attn, "feature_out": feat_out}
+
+    def get_normalized_probs(self, net_output, log_probs, sample):
+        logits = self.ctc_proj(net_output[2]["feature_out"])
+        if log_probs:
+            return utils.log_softmax(logits.float(), dim=-1)
+        else:
+            return utils.softmax(logits.float(), dim=-1)
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround.
+        if (
+            self._future_mask.size(0) == 0
+            or (not self._future_mask.device == tensor.device)
+            or self._future_mask.size(0) < dim
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1
+            )
+        self._future_mask = self._future_mask.to(tensor)
+        return self._future_mask[:dim, :dim]
+
+
+@register_model("tts_transformer")
+class TTSTransformerModel(FairseqEncoderDecoderModel):
+    """
+    Implementation for https://arxiv.org/pdf/1809.08895.pdf
+    """
+
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument("--dropout", type=float)
+        parser.add_argument("--output-frame-dim", type=int)
+        parser.add_argument("--speaker-embed-dim", type=int)
+        # encoder prenet
+        parser.add_argument("--encoder-dropout", type=float)
+        parser.add_argument("--encoder-conv-layers", type=int)
+        parser.add_argument("--encoder-conv-kernel-size", type=int)
+        # encoder transformer layers
+        parser.add_argument("--encoder-transformer-layers", type=int)
+        parser.add_argument("--encoder-embed-dim", type=int)
+        parser.add_argument("--encoder-ffn-embed-dim", type=int)
+        parser.add_argument("--encoder-normalize-before", action="store_true")
+        parser.add_argument("--encoder-attention-heads", type=int)
+        parser.add_argument("--attention-dropout", type=float)
+        parser.add_argument("--activation-dropout", "--relu-dropout", type=float)
+        parser.add_argument("--activation-fn", type=str, default="relu")
+        # decoder prenet
+        parser.add_argument("--prenet-dropout", type=float)
+        parser.add_argument("--prenet-layers", type=int)
+        parser.add_argument("--prenet-dim", type=int)
+        # decoder postnet
+        parser.add_argument("--postnet-dropout", type=float)
+        parser.add_argument("--postnet-layers", type=int)
+        parser.add_argument("--postnet-conv-dim", type=int)
+        parser.add_argument("--postnet-conv-kernel-size", type=int)
+        # decoder transformer layers
+        parser.add_argument("--decoder-transformer-layers", type=int)
+        parser.add_argument("--decoder-embed-dim", type=int)
+        parser.add_argument("--decoder-ffn-embed-dim", type=int)
+        parser.add_argument("--decoder-normalize-before", action="store_true")
+        parser.add_argument("--decoder-attention-heads", type=int)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._num_updates = 0
+
+    @classmethod
+    def build_model(cls, args, task):
+        embed_speaker = task.get_speaker_embeddings(args)
+        encoder = TTSTransformerEncoder(args, task.src_dict, embed_speaker)
+        decoder = TTSTransformerDecoder(args, task.src_dict)
+        return cls(encoder, decoder)
+
+    def forward_encoder(self, src_tokens, src_lengths, speaker=None, **kwargs):
+        return self.encoder(src_tokens, src_lengths=src_lengths,
+                            speaker=speaker, **kwargs)
+
+    def set_num_updates(self, num_updates):
+        super().set_num_updates(num_updates)
+        self._num_updates = num_updates
+
+
+@register_model_architecture("tts_transformer", "tts_transformer")
+def base_architecture(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.output_frame_dim = getattr(args, "output_frame_dim", 80)
+    args.speaker_embed_dim = getattr(args, "speaker_embed_dim", 64)
+    # encoder prenet
+    args.encoder_dropout = getattr(args, "encoder_dropout", 0.5)
+    args.encoder_conv_layers = getattr(args, "encoder_conv_layers", 3)
+    args.encoder_conv_kernel_size = getattr(args, "encoder_conv_kernel_size", 5)
+    # encoder transformer layers
+    args.encoder_transformer_layers = getattr(args, "encoder_transformer_layers", 6)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * args.encoder_embed_dim)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    # decoder prenet
+    args.prenet_dropout = getattr(args, "prenet_dropout", 0.5)
+    args.prenet_layers = getattr(args, "prenet_layers", 2)
+    args.prenet_dim = getattr(args, "prenet_dim", 256)
+    # decoder postnet
+    args.postnet_dropout = getattr(args, "postnet_dropout", 0.5)
+    args.postnet_layers = getattr(args, "postnet_layers", 5)
+    args.postnet_conv_dim = getattr(args, "postnet_conv_dim", 512)
+    args.postnet_conv_kernel_size = getattr(args, "postnet_conv_kernel_size", 5)
+    # decoder transformer layers
+    args.decoder_transformer_layers = getattr(args, "decoder_transformer_layers", 6)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4 * args.decoder_embed_dim)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
diff --git a/fairseq/fairseq/models/text_to_speech/vocoder.py b/fairseq/fairseq/models/text_to_speech/vocoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..65d9f9f06bfe7ffa3ed332bb41c4cdd65ac2b916
--- /dev/null
+++ b/fairseq/fairseq/models/text_to_speech/vocoder.py
@@ -0,0 +1,197 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import json
+from typing import Dict
+
+import numpy as np
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from fairseq.data.audio.audio_utils import (
+    get_window, get_fourier_basis, get_mel_filters, TTSSpectrogram
+)
+from fairseq.data.audio.speech_to_text_dataset import S2TDataConfig
+from fairseq.models.text_to_speech.hifigan import Generator as HiFiGANModel
+
+logger = logging.getLogger(__name__)
+
+
+class PseudoInverseMelScale(torch.nn.Module):
+    def __init__(self, n_stft, n_mels, sample_rate, f_min, f_max) -> None:
+        super(PseudoInverseMelScale, self).__init__()
+        self.n_mels = n_mels
+        basis = get_mel_filters(
+            sample_rate, (n_stft - 1) * 2, n_mels, f_min, f_max
+        )
+        basis = torch.pinverse(basis)  # F x F_mel
+        self.register_buffer('basis', basis)
+
+    def forward(self, melspec: torch.Tensor) -> torch.Tensor:
+        # pack batch
+        shape = melspec.shape  # B_1 x ... x B_K x F_mel x T
+        n_mels, time = shape[-2], shape[-1]
+        melspec = melspec.view(-1, n_mels, time)
+
+        freq, _ = self.basis.size()  # F x F_mel
+        assert self.n_mels == n_mels, (self.n_mels, n_mels)
+        specgram = self.basis.matmul(melspec).clamp(min=0)
+
+        # unpack batch
+        specgram = specgram.view(shape[:-2] + (freq, time))
+        return specgram
+
+
+class GriffinLim(torch.nn.Module):
+    def __init__(
+            self, n_fft: int, win_length: int, hop_length: int, n_iter: int,
+            window_fn=torch.hann_window
+    ):
+        super(GriffinLim, self).__init__()
+        self.transform = TTSSpectrogram(
+            n_fft, win_length, hop_length, return_phase=True
+        )
+
+        basis = get_fourier_basis(n_fft)
+        basis = torch.pinverse(n_fft / hop_length * basis).T[:, None, :]
+        basis *= get_window(window_fn, n_fft, win_length)
+        self.register_buffer('basis', basis)
+
+        self.n_fft = n_fft
+        self.win_length = win_length
+        self.hop_length = hop_length
+        self.n_iter = n_iter
+
+        self.tiny = 1.1754944e-38
+
+    @classmethod
+    def get_window_sum_square(
+            cls, n_frames, hop_length, win_length, n_fft,
+            window_fn=torch.hann_window
+    ) -> torch.Tensor:
+        w_sq = get_window(window_fn, n_fft, win_length) ** 2
+        n = n_fft + hop_length * (n_frames - 1)
+        x = torch.zeros(n, dtype=torch.float32)
+        for i in range(n_frames):
+            ofst = i * hop_length
+            x[ofst: min(n, ofst + n_fft)] += w_sq[:max(0, min(n_fft, n - ofst))]
+        return x
+
+    def inverse(self, magnitude: torch.Tensor, phase) -> torch.Tensor:
+        x = torch.cat(
+            [magnitude * torch.cos(phase), magnitude * torch.sin(phase)],
+            dim=1
+        )
+        x = F.conv_transpose1d(x, self.basis, stride=self.hop_length)
+        win_sum_sq = self.get_window_sum_square(
+            magnitude.shape[-1], hop_length=self.hop_length,
+            win_length=self.win_length, n_fft=self.n_fft
+        ).to(magnitude.device)
+        # remove modulation effects
+        approx_nonzero_indices = win_sum_sq > self.tiny
+        x[:, :, approx_nonzero_indices] /= win_sum_sq[approx_nonzero_indices]
+        x *= self.n_fft / self.hop_length
+        x = x[:, :, self.n_fft // 2:]
+        x = x[:, :, :-self.n_fft // 2:]
+        return x
+
+    def forward(self, specgram: torch.Tensor) -> torch.Tensor:
+        angles = np.angle(np.exp(2j * np.pi * np.random.rand(*specgram.shape)))
+        angles = torch.from_numpy(angles).to(specgram)
+        _specgram = specgram.view(-1, specgram.shape[-2], specgram.shape[-1])
+        waveform = self.inverse(_specgram, angles).squeeze(1)
+        for _ in range(self.n_iter):
+            _, angles = self.transform(waveform)
+            waveform = self.inverse(_specgram, angles).squeeze(1)
+        return waveform.squeeze(0)
+
+
+class GriffinLimVocoder(nn.Module):
+    def __init__(self, sample_rate, win_size, hop_size, n_fft,
+                 n_mels, f_min, f_max, window_fn,
+                 spec_bwd_max_iter=32,
+                 fp16=False):
+        super().__init__()
+        self.inv_mel_transform = PseudoInverseMelScale(
+            n_stft=n_fft // 2 + 1, n_mels=n_mels, sample_rate=sample_rate,
+            f_min=f_min, f_max=f_max
+        )
+        self.gl_transform = GriffinLim(
+            n_fft=n_fft, win_length=win_size, hop_length=hop_size,
+            window_fn=window_fn, n_iter=spec_bwd_max_iter
+        )
+        if fp16:
+            self.half()
+            self.inv_mel_transform.half()
+            self.gl_transform.half()
+        else:
+            self.float()
+            self.inv_mel_transform.float()
+            self.gl_transform.float()
+
+    def forward(self, x):
+        # x: (B x) T x D -> (B x) 1 x T
+        # NOTE: batched forward produces noisier waveform. recommend running
+        # one utterance at a time
+        self.eval()
+        x = x.exp().transpose(-1, -2)
+        x = self.inv_mel_transform(x)
+        x = self.gl_transform(x)
+        return x
+
+    @classmethod
+    def from_data_cfg(cls, args, data_cfg: S2TDataConfig):
+        feat_cfg = data_cfg.config["features"]
+        window_fn = getattr(torch, feat_cfg["window_fn"] + "_window")
+        return cls(
+            sample_rate=feat_cfg["sample_rate"],
+            win_size=int(feat_cfg["win_len_t"] * feat_cfg["sample_rate"]),
+            hop_size=int(feat_cfg["hop_len_t"] * feat_cfg["sample_rate"]),
+            n_fft=feat_cfg["n_fft"], n_mels=feat_cfg["n_mels"],
+            f_min=feat_cfg["f_min"], f_max=feat_cfg["f_max"],
+            window_fn=window_fn, spec_bwd_max_iter=args.spec_bwd_max_iter,
+            fp16=args.fp16
+        )
+
+
+class HiFiGANVocoder(nn.Module):
+    def __init__(
+            self, checkpoint_path: str, model_cfg: Dict[str, str],
+            fp16: bool = False
+    ) -> None:
+        super().__init__()
+        self.model = HiFiGANModel(model_cfg)
+        state_dict = torch.load(checkpoint_path)
+        self.model.load_state_dict(state_dict["generator"])
+        if fp16:
+            self.model.half()
+        logger.info(f"loaded HiFiGAN checkpoint from {checkpoint_path}")
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # (B x) T x D -> (B x) 1 x T
+        model = self.model.eval()
+        if len(x.shape) == 2:
+            return model(x.unsqueeze(0).transpose(1, 2)).detach().squeeze(0)
+        else:
+            return model(x.transpose(-1, -2)).detach()
+
+    @classmethod
+    def from_data_cfg(cls, args, data_cfg: S2TDataConfig):
+        vocoder_cfg = data_cfg.vocoder
+        assert vocoder_cfg.get("type", "griffin_lim") == "hifigan"
+        with open(vocoder_cfg["config"]) as f:
+            model_cfg = json.load(f)
+        return cls(vocoder_cfg["checkpoint"], model_cfg, fp16=args.fp16)
+
+
+def get_vocoder(args, data_cfg: S2TDataConfig):
+    if args.vocoder == "griffin_lim":
+        return GriffinLimVocoder.from_data_cfg(args, data_cfg)
+    elif args.vocoder == "hifigan":
+        return HiFiGANVocoder.from_data_cfg(args, data_cfg)
+    else:
+        raise ValueError("Unknown vocoder")
diff --git a/fairseq/fairseq/models/transformer/__init__.py b/fairseq/fairseq/models/transformer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..681fca3d4553f6832a65f61fc186793bc4ee0679
--- /dev/null
+++ b/fairseq/fairseq/models/transformer/__init__.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+from .transformer_config import (
+    TransformerConfig,
+    DEFAULT_MAX_SOURCE_POSITIONS,
+    DEFAULT_MAX_TARGET_POSITIONS,
+    DEFAULT_MIN_PARAMS_TO_WRAP,
+)
+from .transformer_decoder import TransformerDecoder, TransformerDecoderBase, Linear
+from .transformer_encoder import TransformerEncoder, TransformerEncoderBase
+from .transformer_legacy import (
+    TransformerModel,
+    base_architecture,
+    tiny_architecture,
+    transformer_iwslt_de_en,
+    transformer_wmt_en_de,
+    transformer_vaswani_wmt_en_de_big,
+    transformer_vaswani_wmt_en_fr_big,
+    transformer_wmt_en_de_big,
+    transformer_wmt_en_de_big_t2t,
+)
+from .transformer_base import TransformerModelBase, Embedding
+
+
+__all__ = [
+    "TransformerModelBase",
+    "TransformerConfig",
+    "TransformerDecoder",
+    "TransformerDecoderBase",
+    "TransformerEncoder",
+    "TransformerEncoderBase",
+    "TransformerModel",
+    "Embedding",
+    "Linear",
+    "base_architecture",
+    "tiny_architecture",
+    "transformer_iwslt_de_en",
+    "transformer_wmt_en_de",
+    "transformer_vaswani_wmt_en_de_big",
+    "transformer_vaswani_wmt_en_fr_big",
+    "transformer_wmt_en_de_big",
+    "transformer_wmt_en_de_big_t2t",
+    "DEFAULT_MAX_SOURCE_POSITIONS",
+    "DEFAULT_MAX_TARGET_POSITIONS",
+    "DEFAULT_MIN_PARAMS_TO_WRAP",
+]
diff --git a/fairseq/fairseq/models/transformer/transformer_base.py b/fairseq/fairseq/models/transformer/transformer_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4d5604dbbae979b424650882d33b45ebab323e6
--- /dev/null
+++ b/fairseq/fairseq/models/transformer/transformer_base.py
@@ -0,0 +1,179 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+from fairseq.distributed import fsdp_wrap
+from fairseq.models import FairseqEncoderDecoderModel
+from fairseq.models.transformer import (
+    TransformerEncoderBase,
+    TransformerDecoderBase,
+    TransformerConfig,
+)
+from torch import Tensor
+
+
+class TransformerModelBase(FairseqEncoderDecoderModel):
+    """
+    Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)
+    <https://arxiv.org/abs/1706.03762>`_.
+
+    Args:
+        encoder (TransformerEncoder): the encoder
+        decoder (TransformerDecoder): the decoder
+
+    The Transformer model provides the following named architectures and
+    command-line arguments:
+
+    .. argparse::
+        :ref: fairseq.models.transformer_parser
+        :prog:
+    """
+
+    def __init__(self, cfg, encoder, decoder):
+        super().__init__(encoder, decoder)
+        self.cfg = cfg
+        self.supports_align_args = True
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add model-specific arguments to the parser."""
+        # we want to build the args recursively in this case.
+        gen_parser_from_dataclass(
+            parser, TransformerConfig(), delete_default=False, with_prefix=""
+        )
+
+    @classmethod
+    def build_model(cls, cfg, task):
+        """Build a new model instance."""
+
+        # --  TODO T96535332
+        #  bug caused by interaction between OmegaConf II and argparsing
+        cfg.decoder.input_dim = int(cfg.decoder.input_dim)
+        cfg.decoder.output_dim = int(cfg.decoder.output_dim)
+        # --
+
+        if cfg.encoder.layers_to_keep:
+            cfg.encoder.layers = len(cfg.encoder.layers_to_keep.split(","))
+        if cfg.decoder.layers_to_keep:
+            cfg.decoder.layers = len(cfg.decoder.layers_to_keep.split(","))
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+
+        if cfg.share_all_embeddings:
+            if src_dict != tgt_dict:
+                raise ValueError("--share-all-embeddings requires a joined dictionary")
+            if cfg.encoder.embed_dim != cfg.decoder.embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if cfg.decoder.embed_path and (
+                cfg.decoder.embed_path != cfg.encoder.embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            encoder_embed_tokens = cls.build_embedding(
+                cfg, src_dict, cfg.encoder.embed_dim, cfg.encoder.embed_path
+            )
+            decoder_embed_tokens = encoder_embed_tokens
+            cfg.share_decoder_input_output_embed = True
+        else:
+            encoder_embed_tokens = cls.build_embedding(
+                cfg, src_dict, cfg.encoder.embed_dim, cfg.encoder.embed_path
+            )
+            decoder_embed_tokens = cls.build_embedding(
+                cfg, tgt_dict, cfg.decoder.embed_dim, cfg.decoder.embed_path
+            )
+        if cfg.offload_activations:
+            cfg.checkpoint_activations = True  # offloading implies checkpointing
+        encoder = cls.build_encoder(cfg, src_dict, encoder_embed_tokens)
+        decoder = cls.build_decoder(cfg, tgt_dict, decoder_embed_tokens)
+        if not cfg.share_all_embeddings:
+            # fsdp_wrap is a no-op when --ddp-backend != fully_sharded
+            encoder = fsdp_wrap(encoder, min_num_params=cfg.min_params_to_wrap)
+            decoder = fsdp_wrap(decoder, min_num_params=cfg.min_params_to_wrap)
+        return cls(cfg, encoder, decoder)
+
+    @classmethod
+    def build_embedding(cls, cfg, dictionary, embed_dim, path=None):
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+
+        emb = Embedding(num_embeddings, embed_dim, padding_idx)
+        # if provided, load from preloaded dictionaries
+        if path:
+            embed_dict = utils.parse_embedding(path)
+            utils.load_embedding(embed_dict, dictionary, emb)
+        return emb
+
+    @classmethod
+    def build_encoder(cls, cfg, src_dict, embed_tokens):
+        return TransformerEncoderBase(cfg, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, cfg, tgt_dict, embed_tokens):
+        return TransformerDecoderBase(
+            cfg,
+            tgt_dict,
+            embed_tokens,
+            no_encoder_attn=cfg.no_cross_attention,
+        )
+
+    # TorchScript doesn't support optional arguments with variable length (**kwargs).
+    # Current workaround is to add union of all arguments in child classes.
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        prev_output_tokens,
+        return_all_hiddens: bool = True,
+        features_only: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        """
+        Run the forward pass for an encoder-decoder model.
+
+        Copied from the base class, but without ``**kwargs``,
+        which are not supported by TorchScript.
+        """
+        encoder_out = self.encoder(
+            src_tokens, src_lengths=src_lengths, return_all_hiddens=return_all_hiddens
+        )
+        decoder_out = self.decoder(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            features_only=features_only,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+            src_lengths=src_lengths,
+            return_all_hiddens=return_all_hiddens,
+        )
+        return decoder_out
+
+    # Since get_normalized_probs is in the Fairseq Model which is not scriptable,
+    # I rewrite the get_normalized_probs from Base Class to call the
+    # helper function in the Base Class.
+    @torch.jit.export
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """Get normalized probabilities (or log probs) from a net's output."""
+        return self.get_normalized_probs_scriptable(net_output, log_probs, sample)
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
diff --git a/fairseq/fairseq/models/transformer/transformer_config.py b/fairseq/fairseq/models/transformer/transformer_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2580d20aacc5be4680971646d9523489d903c56c
--- /dev/null
+++ b/fairseq/fairseq/models/transformer/transformer_config.py
@@ -0,0 +1,318 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import re
+from dataclasses import dataclass, field, fields
+from typing import List, Optional
+
+from fairseq import utils
+from fairseq.dataclass import FairseqDataclass, ChoiceEnum
+from omegaconf import II
+
+DEFAULT_MAX_SOURCE_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+
+DEFAULT_MIN_PARAMS_TO_WRAP = int(1e8)
+
+_NAME_PARSER = r"(decoder|encoder|quant_noise)_(.*)"
+
+
+@dataclass
+class EncDecBaseConfig(FairseqDataclass):
+    embed_path: Optional[str] = field(
+        default=None, metadata={"help": "path to pre-trained embedding"}
+    )
+    embed_dim: Optional[int] = field(
+        default=512, metadata={"help": "embedding dimension"}
+    )
+    ffn_embed_dim: int = field(
+        default=2048, metadata={"help": "embedding dimension for FFN"}
+    )
+    layers: int = field(default=6, metadata={"help": "number of layers"})
+    attention_heads: int = field(
+        default=8, metadata={"help": "number of attention heads"}
+    )
+    normalize_before: bool = field(
+        default=False, metadata={"help": "apply layernorm before each block"}
+    )
+    learned_pos: bool = field(
+        default=False, metadata={"help": "use learned positional embeddings"}
+    )
+    # args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
+    layerdrop: float = field(default=0, metadata={"help": "LayerDrop probability"})
+    layers_to_keep: Optional[List[int]] = field(
+        default=None, metadata={"help": "which layers to *keep* when pruning"}
+    )
+
+
+@dataclass
+class DecoderConfig(EncDecBaseConfig):
+    input_dim: int = II("model.decoder.embed_dim")
+    output_dim: int = field(
+        default=II("model.decoder.embed_dim"),
+        metadata={
+            "help": "decoder output dimension (extra linear layer if different from decoder embed dim)"
+        },
+    )
+
+    def __post_init__(self):
+        #  II doesn't work if we are just creating the object outside of hydra so fix that
+        if self.input_dim == II("model.decoder.embed_dim"):
+            self.input_dim = self.embed_dim
+        if self.output_dim == II("model.decoder.embed_dim"):
+            self.output_dim = self.embed_dim
+
+
+@dataclass
+class QuantNoiseConfig(FairseqDataclass):
+    pq: float = field(
+        default=0.0,
+        metadata={"help": "iterative PQ quantization noise at training time"},
+    )
+    pq_block_size: int = field(
+        default=8,
+        metadata={"help": "block size of quantization noise at training time"},
+    )
+    scalar: float = field(
+        default=0.0,
+        metadata={
+            "help": "scalar quantization noise and scalar quantization at training time"
+        },
+    )
+
+
+@dataclass
+class TransformerConfig(FairseqDataclass):
+    activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field(
+        default="relu",
+        metadata={"help": "activation function to use"},
+    )
+    dropout: float = field(default=0.1, metadata={"help": "dropout probability"})
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability for attention weights"}
+    )
+    activation_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability after activation in FFN.",
+            "alias": "--relu-dropout",
+        },
+    )
+    adaptive_input: bool = False
+    encoder: EncDecBaseConfig = EncDecBaseConfig()
+    # TODO should really be in the encoder config
+    max_source_positions: int = field(
+        default=DEFAULT_MAX_SOURCE_POSITIONS,
+        metadata={"help": "Maximum input length supported by the encoder"},
+    )
+    decoder: DecoderConfig = DecoderConfig()
+    # TODO should really be in the decoder config
+    max_target_positions: int = field(
+        default=DEFAULT_MAX_TARGET_POSITIONS,
+        metadata={"help": "Maximum output length supported by the decoder"},
+    )
+    share_decoder_input_output_embed: bool = field(
+        default=False, metadata={"help": "share decoder input and output embeddings"}
+    )
+    share_all_embeddings: bool = field(
+        default=False,
+        metadata={
+            "help": "share encoder, decoder and output embeddings (requires shared dictionary and embed dim)"
+        },
+    )
+    no_token_positional_embeddings: bool = field(
+        default=False,
+        metadata={
+            "help": "if True, disables positional embeddings (outside self attention)"
+        },
+    )
+    adaptive_softmax_cutoff: Optional[List[int]] = field(
+        default=None,
+        metadata={
+            "help": "list of adaptive softmax cutoff points. Must be used with adaptive_loss criterion"
+        },
+    )
+    adaptive_softmax_dropout: float = field(
+        default=0.0,
+        metadata={"help": "sets adaptive softmax dropout for the tail projections"},
+    )
+    adaptive_softmax_factor: float = field(
+        default=4, metadata={"help": "adaptive input factor"}
+    )
+    layernorm_embedding: bool = field(
+        default=False, metadata={"help": "add layernorm to embedding"}
+    )
+    tie_adaptive_weights: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, ties the weights of adaptive softmax and adaptive input"
+        },
+    )
+    tie_adaptive_proj: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, ties the projection weights of adaptive softmax and adaptive input"
+        },
+    )
+    no_scale_embedding: bool = field(
+        default=False, metadata={"help": "if True, dont scale embeddings"}
+    )
+    checkpoint_activations: bool = field(
+        default=False,
+        metadata={
+            "help": "checkpoint activations at each layer, which saves GPU memory usage at the cost of some additional compute"
+        },
+    )
+    offload_activations: bool = field(
+        default=False,
+        metadata={
+            "help": "checkpoint activations at each layer, then save to gpu. Sets --checkpoint-activations."
+        },
+    )
+    # args for "Cross+Self-Attention for Transformer Models" (Peitz et al., 2019)
+    no_cross_attention: bool = field(
+        default=False, metadata={"help": "do not perform cross-attention"}
+    )
+    cross_self_attention: bool = field(
+        default=False, metadata={"help": "perform cross+self-attention"}
+    )
+    # args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
+    quant_noise: QuantNoiseConfig = field(default=QuantNoiseConfig())
+    min_params_to_wrap: int = field(
+        default=DEFAULT_MIN_PARAMS_TO_WRAP,
+        metadata={
+            "help": "minimum number of params for a layer to be wrapped with FSDP() when "
+            "training with --ddp-backend=fully_sharded. Smaller values will "
+            "improve memory efficiency, but may make torch.distributed "
+            "communication less efficient due to smaller input sizes. This option "
+            "is set to 0 (i.e., always wrap) when --checkpoint-activations or "
+            "--offload-activations are passed."
+        },
+    )
+    # DEPRECATED field, but some old checkpoints might have it
+    char_inputs: bool = field(
+        default=False, metadata={"help": "if set, model takes character ids as input"}
+    )
+    relu_dropout: float = 0.0
+    # config for "BASE Layers: Simplifying Training of Large, Sparse Models"
+    base_layers: Optional[int] = field(
+        default=0, metadata={"help": "number of BASE layers in total"}
+    )
+    base_sublayers: Optional[int] = field(
+        default=1, metadata={"help": "number of sublayers in each BASE layer"}
+    )
+    base_shuffle: Optional[int] = field(
+        default=1,
+        metadata={"help": "shuffle tokens between workers before computing assignment"},
+    )
+
+    export: bool = field(
+        default=False,
+        metadata={"help": "make the layernorm exportable with torchscript."},
+    )
+
+    # copied from transformer_lm but expected in transformer_decoder:
+    no_decoder_final_norm: bool = field(
+        default=False,
+        metadata={"help": "don't add an extra layernorm after the last decoder block"},
+    )
+
+    # We need to make this hierarchical dataclass like the flat namespace
+    # __getattr__ and __setattr__ here allow backward compatibility
+    # for subclasses of Transformer(Legacy) that depend on read/write on
+    # the flat namespace.
+
+    def __getattr__(self, name):
+        match = re.match(_NAME_PARSER, name)
+        if match:
+            sub = getattr(self, match[1])
+            return getattr(sub, match[2])
+        raise AttributeError(f"invalid argument {name}.")
+
+    def __setattr__(self, name, value):
+        match = re.match(_NAME_PARSER, name)
+        if match:
+            sub = getattr(self, match[1])
+            setattr(sub, match[2], value)
+        else:
+            super().__setattr__(name, value)
+
+    @staticmethod
+    def _copy_keys(args, cls, prefix, seen):
+        """
+            copy the prefixed keys (decoder_embed_dim) to the DC fields: decoder.embed_dim
+        """
+        cfg = cls()
+        for fld in fields(cls):
+            # for all the fields in the DC, find the fields (e.g. embed_dim)
+            # in the namespace with the prefix (e.g. decoder)
+            # and set it on the dc.
+            args_key = f"{prefix}_{fld.name}"
+            if hasattr(args, args_key):
+                seen.add(args_key)
+                setattr(cfg, fld.name, getattr(args, args_key))
+            if hasattr(args, fld.name):
+                seen.add(fld.name)
+                setattr(cfg, fld.name, getattr(args, fld.name))
+        return cfg
+
+    @classmethod
+    def from_namespace(cls, args):
+        if args is None:
+            return None
+        if not isinstance(args, cls):
+            seen = set()
+            config = cls()
+            # currently, we can go generically from DC fields to args hierarchically
+            # but we can't easily deconstruct a flat namespace to a hierarchical
+            # DC. Mostly because we could have a sub-dc called `decoder-foo` that should not
+            # go to the sub struct called `decoder`. There are ways to go around this, but let's keep it simple
+            # for now.
+            for fld in fields(cls):
+                # concretelly, the transformer_config know what sub-dc it has, so we go through all the dc fields
+                # and if it's one that has a sub-dc, we build that sub-dc with `copy_keys()`
+                if fld.name == "decoder":
+                    if hasattr(args, "decoder"):
+                        #  in some cases, the args we receive is already structured (as DictConfigs), so let's just build the correct DC
+                        seen.add("decoder")
+                        config.decoder = DecoderConfig(**args.decoder)
+                    else:
+                        config.decoder = cls._copy_keys(
+                            args, DecoderConfig, "decoder", seen
+                        )
+                elif fld.name == "encoder":
+                    # same but for encoder
+                    if hasattr(args, "encoder"):
+                        seen.add("encoder")
+                        config.encoder = EncDecBaseConfig(**args.encoder)
+                    else:
+                        config.encoder = cls._copy_keys(
+                            args, EncDecBaseConfig, "encoder", seen
+                        )
+                elif fld.name == "quant_noise":
+                    # same but for quant_noise
+                    if hasattr(args, "quant_noise"):
+                        seen.add("quant_noise")
+                        config.quant_noise = QuantNoiseConfig(**args.quant_noise)
+                    else:
+                        config.quant_noise = cls._copy_keys(
+                            args, QuantNoiseConfig, "quant_noise", seen
+                        )
+                elif hasattr(args, fld.name):
+                    # if it's not a structure field, it's just a normal field, copy it over
+                    seen.add(fld.name)
+                    setattr(config, fld.name, getattr(args, fld.name))
+            # we got all the fields defined in the dataclass, but
+            # the argparse namespace might have extra args for two reasons:
+            #   - we are in a legacy class so all the args are not declared in the dataclass. Ideally once everyone has defined a dataclass for their model, we won't need this
+            #   - some places expect args to be there but never define them
+            args_dict = args._asdict() if hasattr(args, '_asdict') else vars(args) if hasattr(args, '__dict__') else {}  # namedtupled doesn't have __dict__ :-/
+            for key, value in args_dict.items():
+                if key not in seen:
+                    setattr(config, key, value)
+            return config
+        else:
+            return args
diff --git a/fairseq/fairseq/models/transformer/transformer_decoder.py b/fairseq/fairseq/models/transformer/transformer_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..49e37917ccca2e847917ad25ed15cc6df716ccd8
--- /dev/null
+++ b/fairseq/fairseq/models/transformer/transformer_decoder.py
@@ -0,0 +1,482 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Any, Dict, List, Optional
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.distributed import fsdp_wrap
+from fairseq.models import FairseqIncrementalDecoder
+from fairseq.models.transformer import TransformerConfig
+from fairseq.modules import (
+    AdaptiveSoftmax,
+    BaseLayer,
+    FairseqDropout,
+    LayerDropModuleList,
+    LayerNorm,
+    PositionalEmbedding,
+    SinusoidalPositionalEmbedding,
+)
+from fairseq.modules import transformer_layer
+from fairseq.modules.checkpoint_activations import checkpoint_wrapper
+from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_
+from torch import Tensor
+
+
+# rewrite name for backward compatibility in `make_generation_fast_`
+def module_name_fordropout(module_name: str) -> str:
+    if module_name == 'TransformerDecoderBase':
+        return 'TransformerDecoder'
+    else:
+        return module_name
+
+
+class TransformerDecoderBase(FairseqIncrementalDecoder):
+    """
+    Transformer decoder consisting of *cfg.decoder.layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self,
+        cfg,
+        dictionary,
+        embed_tokens,
+        no_encoder_attn=False,
+        output_projection=None,
+    ):
+        self.cfg = cfg
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([3]))
+        self._future_mask = torch.empty(0)
+
+        self.dropout_module = FairseqDropout(
+            cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__)
+        )
+        self.decoder_layerdrop = cfg.decoder.layerdrop
+        self.share_input_output_embed = cfg.share_decoder_input_output_embed
+
+        input_embed_dim = embed_tokens.embedding_dim
+        embed_dim = cfg.decoder.embed_dim
+        self.embed_dim = embed_dim
+        self.output_embed_dim = cfg.decoder.output_dim
+
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_target_positions = cfg.max_target_positions
+
+        self.embed_tokens = embed_tokens
+
+        self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim)
+
+        if not cfg.adaptive_input and cfg.quant_noise.pq > 0:
+            self.quant_noise = apply_quant_noise_(
+                nn.Linear(embed_dim, embed_dim, bias=False),
+                cfg.quant_noise.pq,
+                cfg.quant_noise.pq_block_size,
+            )
+        else:
+            self.quant_noise = None
+
+        self.project_in_dim = (
+            Linear(input_embed_dim, embed_dim, bias=False)
+            if embed_dim != input_embed_dim
+            else None
+        )
+        self.embed_positions = (
+            PositionalEmbedding(
+                self.max_target_positions,
+                embed_dim,
+                self.padding_idx,
+                learned=cfg.decoder.learned_pos,
+            )
+            if not cfg.no_token_positional_embeddings
+            else None
+        )
+        if cfg.layernorm_embedding:
+            self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export)
+        else:
+            self.layernorm_embedding = None
+
+        self.cross_self_attention = cfg.cross_self_attention
+
+        if self.decoder_layerdrop > 0.0:
+            self.layers = LayerDropModuleList(p=self.decoder_layerdrop)
+        else:
+            self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                self.build_decoder_layer(cfg, no_encoder_attn)
+                for _ in range(cfg.decoder.layers)
+            ]
+        )
+        self.num_layers = len(self.layers)
+
+        if cfg.decoder.normalize_before and not cfg.no_decoder_final_norm:
+            self.layer_norm = LayerNorm(embed_dim, export=cfg.export)
+        else:
+            self.layer_norm = None
+
+        self.project_out_dim = (
+            Linear(embed_dim, self.output_embed_dim, bias=False)
+            if embed_dim != self.output_embed_dim and not cfg.tie_adaptive_weights
+            else None
+        )
+
+        self.adaptive_softmax = None
+        self.output_projection = output_projection
+        if self.output_projection is None:
+            self.build_output_projection(cfg, dictionary, embed_tokens)
+
+    def build_output_projection(self, cfg, dictionary, embed_tokens):
+        if cfg.adaptive_softmax_cutoff is not None:
+            self.adaptive_softmax = AdaptiveSoftmax(
+                len(dictionary),
+                self.output_embed_dim,
+                utils.eval_str_list(cfg.adaptive_softmax_cutoff, type=int),
+                dropout=cfg.adaptive_softmax_dropout,
+                adaptive_inputs=embed_tokens if cfg.tie_adaptive_weights else None,
+                factor=cfg.adaptive_softmax_factor,
+                tie_proj=cfg.tie_adaptive_proj,
+            )
+        elif self.share_input_output_embed:
+            self.output_projection = nn.Linear(
+                self.embed_tokens.weight.shape[1],
+                self.embed_tokens.weight.shape[0],
+                bias=False,
+            )
+            self.output_projection.weight = self.embed_tokens.weight
+        else:
+            self.output_projection = nn.Linear(
+                self.output_embed_dim, len(dictionary), bias=False
+            )
+            nn.init.normal_(
+                self.output_projection.weight, mean=0, std=self.output_embed_dim ** -0.5
+            )
+        num_base_layers = cfg.base_layers
+        for i in range(num_base_layers):
+            self.layers.insert(
+                ((i + 1) * cfg.decoder.layers) // (num_base_layers + 1),
+                BaseLayer(cfg),
+            )
+
+    def build_decoder_layer(self, cfg, no_encoder_attn=False):
+        layer = transformer_layer.TransformerDecoderLayerBase(cfg, no_encoder_attn)
+        checkpoint = cfg.checkpoint_activations
+        if checkpoint:
+            offload_to_cpu = cfg.offload_activations
+            layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu)
+        # if we are checkpointing, enforce that FSDP always wraps the
+        # checkpointed layer, regardless of layer size
+        min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0
+        layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap)
+        return layer
+
+    def forward(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        features_only: bool = False,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+        src_lengths: Optional[Any] = None,
+        return_all_hiddens: bool = False,
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (optional): output from the encoder, used for
+                encoder-side attention, should be of size T x B x C
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+            features_only (bool, optional): only return features without
+                applying output layer (default: False).
+            full_context_alignment (bool, optional): don't apply
+                auto-regressive mask to self-attention (default: False).
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+
+        x, extra = self.extract_features(
+            prev_output_tokens,
+            encoder_out=encoder_out,
+            incremental_state=incremental_state,
+            full_context_alignment=full_context_alignment,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+        )
+
+        if not features_only:
+            x = self.output_layer(x)
+        return x, extra
+
+    def extract_features(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        return self.extract_features_scriptable(
+            prev_output_tokens,
+            encoder_out,
+            incremental_state,
+            full_context_alignment,
+            alignment_layer,
+            alignment_heads,
+        )
+
+    """
+    A scriptable subclass of this class has an extract_features method and calls
+    super().extract_features, but super() is not supported in torchscript. A copy of
+    this function is made to be used in the subclass instead.
+    """
+
+    def extract_features_scriptable(
+        self,
+        prev_output_tokens,
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        """
+        Similar to *forward* but only return features.
+
+        Includes several features from "Jointly Learning to Align and
+        Translate with Transformer Models" (Garg et al., EMNLP 2019).
+
+        Args:
+            full_context_alignment (bool, optional): don't apply
+                auto-regressive mask to self-attention (default: False).
+            alignment_layer (int, optional): return mean alignment over
+                heads at this layer (default: last layer).
+            alignment_heads (int, optional): only average alignment over
+                this many heads (default: all heads).
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        bs, slen = prev_output_tokens.size()
+        if alignment_layer is None:
+            alignment_layer = self.num_layers - 1
+
+        enc: Optional[Tensor] = None
+        padding_mask: Optional[Tensor] = None
+        if encoder_out is not None and len(encoder_out["encoder_out"]) > 0:
+            enc = encoder_out["encoder_out"][0]
+            assert (
+                enc.size()[1] == bs
+            ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}"
+        if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0:
+            padding_mask = encoder_out["encoder_padding_mask"][0]
+
+        # embed positions
+        positions = None
+        if self.embed_positions is not None:
+            positions = self.embed_positions(
+                prev_output_tokens, incremental_state=incremental_state
+            )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+
+        if self.quant_noise is not None:
+            x = self.quant_noise(x)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+
+        if self.layernorm_embedding is not None:
+            x = self.layernorm_embedding(x)
+
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        self_attn_padding_mask: Optional[Tensor] = None
+        if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any():
+            self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx)
+
+        # decoder layers
+        attn: Optional[Tensor] = None
+        inner_states: List[Optional[Tensor]] = [x]
+        for idx, layer in enumerate(self.layers):
+            if incremental_state is None and not full_context_alignment:
+                self_attn_mask = self.buffered_future_mask(x)
+            else:
+                self_attn_mask = None
+
+            x, layer_attn, _ = layer(
+                x,
+                enc,
+                padding_mask,
+                incremental_state,
+                self_attn_mask=self_attn_mask,
+                self_attn_padding_mask=self_attn_padding_mask,
+                need_attn=bool((idx == alignment_layer)),
+                need_head_weights=bool((idx == alignment_layer)),
+            )
+            inner_states.append(x)
+            if layer_attn is not None and idx == alignment_layer:
+                attn = layer_attn.float().to(x)
+
+        if attn is not None:
+            if alignment_heads is not None:
+                attn = attn[:alignment_heads]
+
+            # average probabilities over heads
+            attn = attn.mean(dim=0)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        return x, {"attn": [attn], "inner_states": inner_states}
+
+    def output_layer(self, features):
+        """Project features to the vocabulary size."""
+        if self.adaptive_softmax is None:
+            # project back to size of vocabulary
+            return self.output_projection(features)
+        else:
+            return features
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        if self.embed_positions is None:
+            return self.max_target_positions
+        return min(self.max_target_positions, self.embed_positions.max_positions)
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround.
+        if (
+            self._future_mask.size(0) == 0
+            or (not self._future_mask.device == tensor.device)
+            or self._future_mask.size(0) < dim
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1
+            )
+        self._future_mask = self._future_mask.to(tensor)
+        return self._future_mask[:dim, :dim]
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = "{}.embed_positions.weights".format(name)
+            if weights_key in state_dict:
+                del state_dict[weights_key]
+            state_dict[
+                "{}.embed_positions._float_tensor".format(name)
+            ] = torch.FloatTensor(1)
+
+        if f"{name}.output_projection.weight" not in state_dict:
+            if self.share_input_output_embed:
+                embed_out_key = f"{name}.embed_tokens.weight"
+            else:
+                embed_out_key = f"{name}.embed_out"
+            if embed_out_key in state_dict:
+                state_dict[f"{name}.output_projection.weight"] = state_dict[
+                    embed_out_key
+                ]
+                if not self.share_input_output_embed:
+                    del state_dict[embed_out_key]
+
+        for i in range(self.num_layers):
+            # update layer norms
+            layer_norm_map = {
+                "0": "self_attn_layer_norm",
+                "1": "encoder_attn_layer_norm",
+                "2": "final_layer_norm",
+            }
+            for old, new in layer_norm_map.items():
+                for m in ("weight", "bias"):
+                    k = "{}.layers.{}.layer_norms.{}.{}".format(name, i, old, m)
+                    if k in state_dict:
+                        state_dict[
+                            "{}.layers.{}.{}.{}".format(name, i, new, m)
+                        ] = state_dict[k]
+                        del state_dict[k]
+
+        version_key = "{}.version".format(name)
+        if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2:
+            # earlier checkpoints did not normalize after the stack of layers
+            self.layer_norm = None
+            self.normalize = False
+            state_dict[version_key] = torch.Tensor([1])
+
+        return state_dict
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
+
+
+class TransformerDecoder(TransformerDecoderBase):
+    def __init__(
+        self,
+        args,
+        dictionary,
+        embed_tokens,
+        no_encoder_attn=False,
+        output_projection=None,
+    ):
+        self.args = args
+        super().__init__(
+            TransformerConfig.from_namespace(args),
+            dictionary,
+            embed_tokens,
+            no_encoder_attn=no_encoder_attn,
+            output_projection=output_projection,
+        )
+
+    def build_output_projection(self, args, dictionary, embed_tokens):
+        super().build_output_projection(
+            TransformerConfig.from_namespace(args), dictionary, embed_tokens
+        )
+
+    def build_decoder_layer(self, args, no_encoder_attn=False):
+        return super().build_decoder_layer(
+            TransformerConfig.from_namespace(args), no_encoder_attn=no_encoder_attn
+        )
diff --git a/fairseq/fairseq/models/transformer/transformer_encoder.py b/fairseq/fairseq/models/transformer/transformer_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f007776a6f3b7e6731edc01d95aa24eed255d0e8
--- /dev/null
+++ b/fairseq/fairseq/models/transformer/transformer_encoder.py
@@ -0,0 +1,341 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Dict, List, Optional
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.distributed import fsdp_wrap
+from fairseq.models import FairseqEncoder
+from fairseq.modules import (
+    FairseqDropout,
+    LayerDropModuleList,
+    LayerNorm,
+    PositionalEmbedding,
+    SinusoidalPositionalEmbedding,
+)
+from fairseq.modules import transformer_layer
+from fairseq.modules.checkpoint_activations import checkpoint_wrapper
+from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_
+from torch import Tensor
+from fairseq.models.transformer import (
+    TransformerConfig,
+)
+
+
+# rewrite name for backward compatibility in `make_generation_fast_`
+def module_name_fordropout(module_name: str) -> str:
+    if module_name == 'TransformerEncoderBase':
+        return 'TransformerEncoder'
+    else:
+        return module_name
+
+
+class TransformerEncoderBase(FairseqEncoder):
+    """
+    Transformer encoder consisting of *cfg.encoder.layers* layers. Each layer
+    is a :class:`TransformerEncoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): encoding dictionary
+        embed_tokens (torch.nn.Embedding): input embedding
+    """
+
+    def __init__(self, cfg, dictionary, embed_tokens):
+        self.cfg = cfg
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([3]))
+
+        self.dropout_module = FairseqDropout(
+            cfg.dropout, module_name=module_name_fordropout(self.__class__.__name__)
+        )
+        self.encoder_layerdrop = cfg.encoder.layerdrop
+
+        embed_dim = embed_tokens.embedding_dim
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_source_positions = cfg.max_source_positions
+
+        self.embed_tokens = embed_tokens
+
+        self.embed_scale = 1.0 if cfg.no_scale_embedding else math.sqrt(embed_dim)
+
+        self.embed_positions = (
+            PositionalEmbedding(
+                cfg.max_source_positions,
+                embed_dim,
+                self.padding_idx,
+                learned=cfg.encoder.learned_pos,
+            )
+            if not cfg.no_token_positional_embeddings
+            else None
+        )
+        if cfg.layernorm_embedding:
+            self.layernorm_embedding = LayerNorm(embed_dim, export=cfg.export)
+        else:
+            self.layernorm_embedding = None
+
+        if not cfg.adaptive_input and cfg.quant_noise.pq > 0:
+            self.quant_noise = apply_quant_noise_(
+                nn.Linear(embed_dim, embed_dim, bias=False),
+                cfg.quant_noise.pq,
+                cfg.quant_noise.pq_block_size,
+            )
+        else:
+            self.quant_noise = None
+
+        if self.encoder_layerdrop > 0.0:
+            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
+        else:
+            self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)]
+        )
+        self.num_layers = len(self.layers)
+
+        if cfg.encoder.normalize_before:
+            self.layer_norm = LayerNorm(embed_dim, export=cfg.export)
+        else:
+            self.layer_norm = None
+
+    def build_encoder_layer(self, cfg):
+        layer = transformer_layer.TransformerEncoderLayerBase(cfg)
+        checkpoint = cfg.checkpoint_activations
+        if checkpoint:
+            offload_to_cpu = cfg.offload_activations
+            layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu)
+        # if we are checkpointing, enforce that FSDP always wraps the
+        # checkpointed layer, regardless of layer size
+        min_params_to_wrap = cfg.min_params_to_wrap if not checkpoint else 0
+        layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap)
+        return layer
+
+    def forward_embedding(
+        self, src_tokens, token_embedding: Optional[torch.Tensor] = None
+    ):
+        # embed tokens and positions
+        if token_embedding is None:
+            token_embedding = self.embed_tokens(src_tokens)
+        x = embed = self.embed_scale * token_embedding
+        if self.embed_positions is not None:
+            x = embed + self.embed_positions(src_tokens)
+        if self.layernorm_embedding is not None:
+            x = self.layernorm_embedding(x)
+        x = self.dropout_module(x)
+        if self.quant_noise is not None:
+            x = self.quant_noise(x)
+        return x, embed
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths: Optional[torch.Tensor] = None,
+        return_all_hiddens: bool = False,
+        token_embeddings: Optional[torch.Tensor] = None,
+    ):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (torch.LongTensor): lengths of each source sentence of
+                shape `(batch)`
+            return_all_hiddens (bool, optional): also return all of the
+                intermediate hidden states (default: False).
+            token_embeddings (torch.Tensor, optional): precomputed embeddings
+                default `None` will recompute embeddings
+
+        Returns:
+            dict:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+                - **encoder_embedding** (Tensor): the (scaled) embedding lookup
+                  of shape `(batch, src_len, embed_dim)`
+                - **encoder_states** (List[Tensor]): all intermediate
+                  hidden states of shape `(src_len, batch, embed_dim)`.
+                  Only populated if *return_all_hiddens* is True.
+        """
+        return self.forward_scriptable(
+            src_tokens, src_lengths, return_all_hiddens, token_embeddings
+        )
+
+    # TorchScript doesn't support super() method so that the scriptable Subclass
+    # can't access the base class model in Torchscript.
+    # Current workaround is to add a helper function with different name and
+    # call the helper function from scriptable Subclass.
+    def forward_scriptable(
+        self,
+        src_tokens,
+        src_lengths: Optional[torch.Tensor] = None,
+        return_all_hiddens: bool = False,
+        token_embeddings: Optional[torch.Tensor] = None,
+    ):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (torch.LongTensor): lengths of each source sentence of
+                shape `(batch)`
+            return_all_hiddens (bool, optional): also return all of the
+                intermediate hidden states (default: False).
+            token_embeddings (torch.Tensor, optional): precomputed embeddings
+                default `None` will recompute embeddings
+
+        Returns:
+            dict:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+                - **encoder_embedding** (Tensor): the (scaled) embedding lookup
+                  of shape `(batch, src_len, embed_dim)`
+                - **encoder_states** (List[Tensor]): all intermediate
+                  hidden states of shape `(src_len, batch, embed_dim)`.
+                  Only populated if *return_all_hiddens* is True.
+        """
+        # compute padding mask
+        encoder_padding_mask = src_tokens.eq(self.padding_idx)
+        has_pads = src_tokens.device.type == "xla" or encoder_padding_mask.any()
+
+        x, encoder_embedding = self.forward_embedding(src_tokens, token_embeddings)
+
+        # account for padding while computing the representation
+        if has_pads:
+            x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x))
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        encoder_states = []
+
+        if return_all_hiddens:
+            encoder_states.append(x)
+
+        # encoder layers
+        for layer in self.layers:
+            x = layer(
+                x, encoder_padding_mask=encoder_padding_mask if has_pads else None
+            )
+            if return_all_hiddens:
+                assert encoder_states is not None
+                encoder_states.append(x)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in
+        # `forward` so we use a dictionary instead.
+        # TorchScript does not support mixed values so the values are all lists.
+        # The empty list is equivalent to None.
+        src_lengths = src_tokens.ne(self.padding_idx).sum(dim=1, dtype=torch.int32).reshape(-1, 1).contiguous()
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [encoder_padding_mask],  # B x T
+            "encoder_embedding": [encoder_embedding],  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [src_lengths],
+        }
+
+    @torch.jit.export
+    def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        if len(encoder_out["encoder_out"]) == 0:
+            new_encoder_out = []
+        else:
+            new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)]
+        if len(encoder_out["encoder_padding_mask"]) == 0:
+            new_encoder_padding_mask = []
+        else:
+            new_encoder_padding_mask = [
+                encoder_out["encoder_padding_mask"][0].index_select(0, new_order)
+            ]
+        if len(encoder_out["encoder_embedding"]) == 0:
+            new_encoder_embedding = []
+        else:
+            new_encoder_embedding = [
+                encoder_out["encoder_embedding"][0].index_select(0, new_order)
+            ]
+
+        if len(encoder_out["src_tokens"]) == 0:
+            src_tokens = []
+        else:
+            src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)]
+
+        if len(encoder_out["src_lengths"]) == 0:
+            src_lengths = []
+        else:
+            src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)]
+
+        encoder_states = encoder_out["encoder_states"]
+        if len(encoder_states) > 0:
+            for idx, state in enumerate(encoder_states):
+                encoder_states[idx] = state.index_select(1, new_order)
+
+        return {
+            "encoder_out": new_encoder_out,  # T x B x C
+            "encoder_padding_mask": new_encoder_padding_mask,  # B x T
+            "encoder_embedding": new_encoder_embedding,  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": src_tokens,  # B x T
+            "src_lengths": src_lengths,  # B x 1
+        }
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        if self.embed_positions is None:
+            return self.max_source_positions
+        return min(self.max_source_positions, self.embed_positions.max_positions)
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = "{}.embed_positions.weights".format(name)
+            if weights_key in state_dict:
+                print("deleting {0}".format(weights_key))
+                del state_dict[weights_key]
+            state_dict[
+                "{}.embed_positions._float_tensor".format(name)
+            ] = torch.FloatTensor(1)
+        for i in range(self.num_layers):
+            # update layer norms
+            self.layers[i].upgrade_state_dict_named(
+                state_dict, "{}.layers.{}".format(name, i)
+            )
+
+        version_key = "{}.version".format(name)
+        if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2:
+            # earlier checkpoints did not normalize after the stack of layers
+            self.layer_norm = None
+            self.normalize = False
+            state_dict[version_key] = torch.Tensor([1])
+        return state_dict
+
+
+class TransformerEncoder(TransformerEncoderBase):
+    def __init__(self, args, dictionary, embed_tokens):
+        self.args = args
+        super().__init__(
+            TransformerConfig.from_namespace(args),
+            dictionary,
+            embed_tokens,
+        )
+
+    def build_encoder_layer(self, args):
+        return super().build_encoder_layer(
+            TransformerConfig.from_namespace(args),
+        )
diff --git a/fairseq/fairseq/models/transformer/transformer_legacy.py b/fairseq/fairseq/models/transformer/transformer_legacy.py
new file mode 100644
index 0000000000000000000000000000000000000000..af9646740a79ce720eeba513e2d994b39509ac49
--- /dev/null
+++ b/fairseq/fairseq/models/transformer/transformer_legacy.py
@@ -0,0 +1,275 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+from fairseq.models import (
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer.transformer_config import (
+    TransformerConfig,
+    DEFAULT_MAX_SOURCE_POSITIONS,
+    DEFAULT_MAX_TARGET_POSITIONS,
+    DEFAULT_MIN_PARAMS_TO_WRAP,
+)
+from fairseq.models.transformer.transformer_base import (
+    TransformerModelBase,
+)
+
+
+@register_model("transformer")
+class TransformerModel(TransformerModelBase):
+    """
+    This is the legacy implementation of the transformer model that
+    uses argparse for configuration.
+    """
+
+    @classmethod
+    def hub_models(cls):
+        # fmt: off
+
+        def moses_subword(path):
+            return {
+                'path': path,
+                'tokenizer': 'moses',
+                'bpe': 'subword_nmt',
+            }
+
+        def moses_fastbpe(path):
+            return {
+                'path': path,
+                'tokenizer': 'moses',
+                'bpe': 'fastbpe',
+            }
+
+        def spm(path):
+            return {
+                'path': path,
+                'bpe': 'sentencepiece',
+                'tokenizer': 'space',
+            }
+
+        return {
+            'transformer.wmt14.en-fr': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/wmt14.en-fr.joined-dict.transformer.tar.bz2'),
+            'transformer.wmt16.en-de': 'https://dl.fbaipublicfiles.com/fairseq/models/wmt16.en-de.joined-dict.transformer.tar.bz2',
+            'transformer.wmt18.en-de': moses_subword('https://dl.fbaipublicfiles.com/fairseq/models/wmt18.en-de.ensemble.tar.gz'),
+            'transformer.wmt19.en-de': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.ensemble.tar.gz'),
+            'transformer.wmt19.en-ru': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.ensemble.tar.gz'),
+            'transformer.wmt19.de-en': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.ensemble.tar.gz'),
+            'transformer.wmt19.ru-en': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.ensemble.tar.gz'),
+            'transformer.wmt19.en-de.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-de.joined-dict.single_model.tar.gz'),
+            'transformer.wmt19.en-ru.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.en-ru.single_model.tar.gz'),
+            'transformer.wmt19.de-en.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.de-en.joined-dict.single_model.tar.gz'),
+            'transformer.wmt19.ru-en.single_model': moses_fastbpe('https://dl.fbaipublicfiles.com/fairseq/models/wmt19.ru-en.single_model.tar.gz'),
+            'transformer.wmt20.en-ta': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-ta.single.tar.gz'),
+            'transformer.wmt20.en-iu.news': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.news.single.tar.gz'),
+            'transformer.wmt20.en-iu.nh': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.en-iu.nh.single.tar.gz'),
+            'transformer.wmt20.ta-en': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.ta-en.single.tar.gz'),
+            'transformer.wmt20.iu-en.news': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.news.single.tar.gz'),
+            'transformer.wmt20.iu-en.nh': spm('https://dl.fbaipublicfiles.com/fairseq/models/wmt20.iu-en.nh.single.tar.gz'),
+            'transformer.flores101.mm100.615M': spm('https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_615M.tar.gz'),
+            'transformer.flores101.mm100.175M': spm('https://dl.fbaipublicfiles.com/flores101/pretrained_models/flores101_mm100_175M.tar.gz'),
+        }
+        # fmt: on
+
+    def __init__(self, args, encoder, decoder):
+        cfg = TransformerConfig.from_namespace(args)
+        super().__init__(cfg, encoder, decoder)
+        self.args = args
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add model-specific arguments to the parser."""
+        # we want to build the args recursively in this case.
+        # do not set defaults so that settings defaults from various architectures still works
+        gen_parser_from_dataclass(
+            parser, TransformerConfig(), delete_default=True, with_prefix=""
+        )
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if args.encoder_layers_to_keep:
+            args.encoder_layers = len(args.encoder_layers_to_keep.split(","))
+        if args.decoder_layers_to_keep:
+            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))
+
+        if getattr(args, "max_source_positions", None) is None:
+            args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
+        if getattr(args, "max_target_positions", None) is None:
+            args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+
+        if args.share_all_embeddings:
+            if src_dict != tgt_dict:
+                raise ValueError("--share-all-embeddings requires a joined dictionary")
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            args.share_decoder_input_output_embed = True
+
+        if getattr(args, "offload_activations", False):
+            args.checkpoint_activations = True  # offloading implies checkpointing
+
+        if not args.share_all_embeddings:
+            args.min_params_to_wrap = getattr(
+                args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP
+            )
+        cfg = TransformerConfig.from_namespace(args)
+        return super().build_model(cfg, task)
+
+    @classmethod
+    def build_embedding(cls, args, dictionary, embed_dim, path=None):
+        return super().build_embedding(
+            TransformerConfig.from_namespace(args), dictionary, embed_dim, path
+        )
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return super().build_encoder(
+            TransformerConfig.from_namespace(args), src_dict, embed_tokens
+        )
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return super().build_decoder(
+            TransformerConfig.from_namespace(args), tgt_dict, embed_tokens
+        )
+
+
+# architectures
+
+
+@register_model_architecture("transformer", "transformer_tiny")
+def tiny_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 64)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 64)
+    args.encoder_layers = getattr(args, "encoder_layers", 2)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 2)
+    args.decoder_layers = getattr(args, "decoder_layers", 2)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 2)
+    return base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer")
+def base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.no_cross_attention = getattr(args, "no_cross_attention", False)
+    args.cross_self_attention = getattr(args, "cross_self_attention", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", False)
+    args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False)
+    args.checkpoint_activations = getattr(args, "checkpoint_activations", False)
+    args.offload_activations = getattr(args, "offload_activations", False)
+    if args.offload_activations:
+        args.checkpoint_activations = True
+    args.encoder_layers_to_keep = getattr(args, "encoder_layers_to_keep", None)
+    args.decoder_layers_to_keep = getattr(args, "decoder_layers_to_keep", None)
+    args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+    args.quant_noise_pq_block_size = getattr(args, "quant_noise_pq_block_size", 8)
+    args.quant_noise_scalar = getattr(args, "quant_noise_scalar", 0)
+
+
+@register_model_architecture("transformer", "transformer_iwslt_de_en")
+def transformer_iwslt_de_en(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 1024)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 4)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 1024)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 4)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_wmt_en_de")
+def transformer_wmt_en_de(args):
+    base_architecture(args)
+
+
+# parameters used in the "Attention Is All You Need" paper (Vaswani et al., 2017)
+@register_model_architecture("transformer", "transformer_vaswani_wmt_en_de_big")
+def transformer_vaswani_wmt_en_de_big(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 1024)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4096)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.dropout = getattr(args, "dropout", 0.3)
+    base_architecture(args)
+
+
+@register_model_architecture("transformer", "transformer_vaswani_wmt_en_fr_big")
+def transformer_vaswani_wmt_en_fr_big(args):
+    args.dropout = getattr(args, "dropout", 0.1)
+    transformer_vaswani_wmt_en_de_big(args)
+
+
+@register_model_architecture("transformer", "transformer_wmt_en_de_big")
+def transformer_wmt_en_de_big(args):
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    transformer_vaswani_wmt_en_de_big(args)
+
+
+# default parameters used in tensor2tensor implementation
+@register_model_architecture("transformer", "transformer_wmt_en_de_big_t2t")
+def transformer_wmt_en_de_big_t2t(args):
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.1)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.1)
+    transformer_vaswani_wmt_en_de_big(args)
diff --git a/fairseq/fairseq/models/transformer_align.py b/fairseq/fairseq/models/transformer_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..eaf585bd10e630ae6cd89920f197cd165f55ad58
--- /dev/null
+++ b/fairseq/fairseq/models/transformer_align.py
@@ -0,0 +1,93 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.transformer import (
+    TransformerModel,
+    base_architecture,
+    transformer_wmt_en_de_big,
+)
+
+
+@register_model("transformer_align")
+class TransformerAlignModel(TransformerModel):
+    """
+    See "Jointly Learning to Align and Translate with Transformer
+    Models" (Garg et al., EMNLP 2019).
+    """
+
+    def __init__(self, encoder, decoder, args):
+        super().__init__(args, encoder, decoder)
+        self.alignment_heads = args.alignment_heads
+        self.alignment_layer = args.alignment_layer
+        self.full_context_alignment = args.full_context_alignment
+
+    @staticmethod
+    def add_args(parser):
+        # fmt: off
+        super(TransformerAlignModel, TransformerAlignModel).add_args(parser)
+        parser.add_argument('--alignment-heads', type=int, metavar='D',
+                            help='Number of cross attention heads per layer to supervised with alignments')
+        parser.add_argument('--alignment-layer', type=int, metavar='D',
+                            help='Layer number which has to be supervised. 0 corresponding to the bottommost layer.')
+        parser.add_argument('--full-context-alignment', action='store_true',
+                            help='Whether or not alignment is supervised conditioned on the full target context.')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        # set any default arguments
+        transformer_align(args)
+
+        transformer_model = TransformerModel.build_model(args, task)
+        return TransformerAlignModel(
+            transformer_model.encoder, transformer_model.decoder, args
+        )
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens):
+        encoder_out = self.encoder(src_tokens, src_lengths)
+        return self.forward_decoder(prev_output_tokens, encoder_out)
+
+    def forward_decoder(
+        self,
+        prev_output_tokens,
+        encoder_out=None,
+        incremental_state=None,
+        features_only=False,
+        **extra_args,
+    ):
+        attn_args = {
+            "alignment_layer": self.alignment_layer,
+            "alignment_heads": self.alignment_heads,
+        }
+        decoder_out = self.decoder(prev_output_tokens, encoder_out, **attn_args)
+
+        if self.full_context_alignment:
+            attn_args["full_context_alignment"] = self.full_context_alignment
+            _, alignment_out = self.decoder(
+                prev_output_tokens,
+                encoder_out,
+                features_only=True,
+                **attn_args,
+                **extra_args,
+            )
+            decoder_out[1]["attn"] = alignment_out["attn"]
+
+        return decoder_out
+
+
+@register_model_architecture("transformer_align", "transformer_align")
+def transformer_align(args):
+    args.alignment_heads = getattr(args, "alignment_heads", 1)
+    args.alignment_layer = getattr(args, "alignment_layer", 4)
+    args.full_context_alignment = getattr(args, "full_context_alignment", False)
+    base_architecture(args)
+
+
+@register_model_architecture("transformer_align", "transformer_wmt_en_de_big_align")
+def transformer_wmt_en_de_big_align(args):
+    args.alignment_heads = getattr(args, "alignment_heads", 1)
+    args.alignment_layer = getattr(args, "alignment_layer", 4)
+    transformer_wmt_en_de_big(args)
diff --git a/fairseq/fairseq/models/transformer_from_pretrained_xlm.py b/fairseq/fairseq/models/transformer_from_pretrained_xlm.py
new file mode 100644
index 0000000000000000000000000000000000000000..236d9942e1fb0238cc92e2b4f160520b5cdd6504
--- /dev/null
+++ b/fairseq/fairseq/models/transformer_from_pretrained_xlm.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from typing import Any, Dict
+
+from fairseq import checkpoint_utils
+from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary
+from fairseq.models import register_model, register_model_architecture
+from fairseq.models.transformer import (
+    TransformerDecoder,
+    TransformerEncoder,
+    TransformerModel,
+    base_architecture as transformer_base_architecture,
+)
+
+
+@register_model("transformer_from_pretrained_xlm")
+class TransformerFromPretrainedXLMModel(TransformerModel):
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        TransformerModel.add_args(parser)
+        parser.add_argument(
+            "--pretrained-xlm-checkpoint",
+            type=str,
+            metavar="STR",
+            help="XLM model to use for initializing transformer encoder and/or decoder",
+        )
+        parser.add_argument(
+            "--init-encoder-only",
+            action="store_true",
+            help="if set, don't load the XLM weights and embeddings into decoder",
+        )
+        parser.add_argument(
+            "--init-decoder-only",
+            action="store_true",
+            help="if set, don't load the XLM weights and embeddings into encoder",
+        )
+
+    @classmethod
+    def build_model(self, args, task, cls_dictionary=MaskedLMDictionary):
+        assert hasattr(args, "pretrained_xlm_checkpoint"), (
+            "You must specify a path for --pretrained-xlm-checkpoint to use "
+            "--arch transformer_from_pretrained_xlm"
+        )
+        assert isinstance(task.source_dictionary, cls_dictionary) and isinstance(
+            task.target_dictionary, cls_dictionary
+        ), (
+            "You should use a MaskedLMDictionary when using --arch "
+            "transformer_from_pretrained_xlm because the pretrained XLM model "
+            "was trained using data binarized with MaskedLMDictionary. "
+            "For translation, you may want to use --task "
+            "translation_from_pretrained_xlm"
+        )
+        assert not (
+            getattr(args, "init_encoder_only", False)
+            and getattr(args, "init_decoder_only", False)
+        ), "Only one of --init-encoder-only and --init-decoder-only can be set."
+        return super().build_model(args, task)
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return TransformerEncoderFromPretrainedXLM(args, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return TransformerDecoderFromPretrainedXLM(args, tgt_dict, embed_tokens)
+
+
+def upgrade_state_dict_with_xlm_weights(
+    state_dict: Dict[str, Any], pretrained_xlm_checkpoint: str
+) -> Dict[str, Any]:
+    """
+    Load XLM weights into a Transformer encoder or decoder model.
+
+    Args:
+        state_dict: state dict for either TransformerEncoder or
+            TransformerDecoder
+        pretrained_xlm_checkpoint: checkpoint to load XLM weights from
+
+    Raises:
+        AssertionError: If architecture (num layers, attention heads, etc.)
+            does not match between the current Transformer encoder or
+            decoder and the pretrained_xlm_checkpoint
+    """
+    if not os.path.exists(pretrained_xlm_checkpoint):
+        raise IOError("Model file not found: {}".format(pretrained_xlm_checkpoint))
+
+    state = checkpoint_utils.load_checkpoint_to_cpu(pretrained_xlm_checkpoint)
+    xlm_state_dict = state["model"]
+    for key in xlm_state_dict.keys():
+
+        for search_key in ["embed_tokens", "embed_positions", "layers"]:
+            if search_key in key:
+                subkey = key[key.find(search_key) :]
+                assert subkey in state_dict, (
+                    "{} Transformer encoder / decoder "
+                    "state_dict does not contain {}. Cannot "
+                    "load {} from pretrained XLM checkpoint "
+                    "{} into Transformer.".format(
+                        str(state_dict.keys()), subkey, key, pretrained_xlm_checkpoint
+                    )
+                )
+
+                state_dict[subkey] = xlm_state_dict[key]
+    return state_dict
+
+
+class TransformerEncoderFromPretrainedXLM(TransformerEncoder):
+    def __init__(self, args, dictionary, embed_tokens):
+        super().__init__(args, dictionary, embed_tokens)
+        if getattr(args, "init_decoder_only", False):
+            # Don't load XLM weights for encoder if --init-decoder-only
+            return
+
+        assert hasattr(args, "pretrained_xlm_checkpoint"), (
+            "--pretrained-xlm-checkpoint must be specified to load Transformer "
+            "encoder from pretrained XLM"
+        )
+        xlm_loaded_state_dict = upgrade_state_dict_with_xlm_weights(
+            state_dict=self.state_dict(),
+            pretrained_xlm_checkpoint=args.pretrained_xlm_checkpoint,
+        )
+        self.load_state_dict(xlm_loaded_state_dict, strict=True)
+
+
+class TransformerDecoderFromPretrainedXLM(TransformerDecoder):
+    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
+        super().__init__(args, dictionary, embed_tokens, no_encoder_attn)
+        if getattr(args, "init_encoder_only", False):
+            # Don't load XLM weights for decoder if --init-encoder-only
+            return
+        assert hasattr(args, "pretrained_xlm_checkpoint"), (
+            "--pretrained-xlm-checkpoint must be specified to load Transformer "
+            "decoder from pretrained XLM"
+        )
+
+        xlm_loaded_state_dict = upgrade_state_dict_with_xlm_weights(
+            state_dict=self.state_dict(),
+            pretrained_xlm_checkpoint=args.pretrained_xlm_checkpoint,
+        )
+        self.load_state_dict(xlm_loaded_state_dict, strict=True)
+
+
+@register_model_architecture(
+    "transformer_from_pretrained_xlm", "transformer_from_pretrained_xlm"
+)
+def base_architecture(args):
+    transformer_base_architecture(args)
diff --git a/fairseq/fairseq/models/transformer_lm.py b/fairseq/fairseq/models/transformer_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..eedd5151ba5b1a7050b37639023cf8a158fae8d4
--- /dev/null
+++ b/fairseq/fairseq/models/transformer_lm.py
@@ -0,0 +1,545 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+from fairseq import options, utils
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.models import (
+    FairseqLanguageModel,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.models.transformer import (
+    DEFAULT_MIN_PARAMS_TO_WRAP, Embedding, TransformerDecoder
+)
+from fairseq.modules import AdaptiveInput, CharacterTokenEmbedder
+from fairseq.utils import safe_getattr, safe_hasattr
+from omegaconf import II
+
+
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+
+
+@dataclass
+class TransformerLanguageModelConfig(FairseqDataclass):
+    activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field(
+        default="relu", metadata={"help": "activation function to use"}
+    )
+    dropout: float = field(default=0.1, metadata={"help": "dropout probability"})
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability for attention weights"}
+    )
+    activation_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability after activation in FFN."}
+    )
+    relu_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability after activation in FFN."}
+    )
+    decoder_embed_dim: int = field(
+        default=512, metadata={"help": "decoder embedding dimension"}
+    )
+    decoder_output_dim: int = field(
+        default=512, metadata={"help": "decoder output dimension"}
+    )
+    decoder_input_dim: int = field(
+        default=512, metadata={"help": "decoder input dimension"}
+    )
+    decoder_ffn_embed_dim: int = field(
+        default=2048, metadata={"help": "decoder embedding dimension for FFN"}
+    )
+    decoder_layers: int = field(default=6, metadata={"help": "num decoder layers"})
+    decoder_attention_heads: int = field(
+        default=8, metadata={"help": "num decoder attention heads"}
+    )
+    decoder_normalize_before: bool = field(
+        default=False, metadata={"help": "apply layernorm before each decoder block"}
+    )
+    no_decoder_final_norm: bool = field(
+        default=False,
+        metadata={"help": "don't add an extra layernorm after the last decoder block"},
+    )
+    adaptive_softmax_cutoff: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "comma separated list of adaptive softmax cutoff points. "
+            "Must be used with adaptive_loss criterion"
+        },
+    )
+    adaptive_softmax_dropout: float = field(
+        default=0,
+        metadata={"help": "sets adaptive softmax dropout for the tail projections"},
+    )
+    adaptive_softmax_factor: float = field(
+        default=4, metadata={"help": "adaptive input factor"}
+    )
+    no_token_positional_embeddings: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, disables positional embeddings (outside self attention)"
+        },
+    )
+    share_decoder_input_output_embed: bool = field(
+        default=False, metadata={"help": "share decoder input and output embeddings"}
+    )
+    character_embeddings: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, uses character embedding convolutions to produce token embeddings"
+        },
+    )
+    character_filters: str = field(
+        default="[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]",
+        metadata={"help": "size of character embeddings"},
+    )
+    character_embedding_dim: int = field(
+        default=4, metadata={"help": "size of character embeddings"}
+    )
+    char_embedder_highway_layers: int = field(
+        default=2,
+        metadata={"help": "number of highway layers for character token embeddder"},
+    )
+    adaptive_input: bool = field(
+        default=False, metadata={"help": "if set, uses adaptive input"}
+    )
+    adaptive_input_factor: float = field(
+        default=4, metadata={"help": "adaptive input factor"}
+    )
+    adaptive_input_cutoff: Optional[str] = field(
+        default=None,
+        metadata={"help": "comma separated list of adaptive input cutoff points."},
+    )
+    tie_adaptive_weights: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, ties the weights of adaptive softmax and adaptive input"
+        },
+    )
+    tie_adaptive_proj: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, ties the projection weights of adaptive softmax and adaptive input"
+        },
+    )
+    decoder_learned_pos: bool = field(
+        default=False,
+        metadata={"help": "use learned positional embeddings in the decoder"},
+    )
+    layernorm_embedding: bool = field(
+        default=False, metadata={"help": "add layernorm to embedding"}
+    )
+    no_scale_embedding: bool = field(
+        default=False, metadata={"help": "if True, dont scale embeddings"}
+    )
+    checkpoint_activations: bool = field(
+        default=False, metadata={"help": "checkpoint activations at each layer"}
+    )
+    offload_activations: bool = field(
+        default=False,
+        metadata={"help": "move checkpointed activations to CPU after they are used."},
+    )
+    # config for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
+    decoder_layerdrop: float = field(
+        default=0.0, metadata={"help": "LayerDrop probability for decoder"}
+    )
+    decoder_layers_to_keep: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "which layers to *keep* when pruning as a comma-separated list"
+        },
+    )
+    # config for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
+    quant_noise_pq: float = field(
+        default=0.0,
+        metadata={"help": "iterative PQ quantization noise at training time"},
+    )
+    quant_noise_pq_block_size: int = field(
+        default=8,
+        metadata={"help": "block size of quantization noise at training time"},
+    )
+    quant_noise_scalar: float = field(
+        default=0.0,
+        metadata={
+            "help": "scalar quantization noise and scalar quantization at training time"
+        },
+    )
+    # config for Fully Sharded Data Parallel (FSDP) training
+    min_params_to_wrap: int = field(
+        default=DEFAULT_MIN_PARAMS_TO_WRAP,
+        metadata={
+            "help": (
+                "minimum number of params for a layer to be wrapped with FSDP() when "
+                "training with --ddp-backend=fully_sharded. Smaller values will "
+                "improve memory efficiency, but may make torch.distributed "
+                "communication less efficient due to smaller input sizes. This option "
+                "is set to 0 (i.e., always wrap) when --checkpoint-activations or "
+                "--offload-activations are passed."
+            )
+        }
+    )
+    # config for "BASE Layers: Simplifying Training of Large, Sparse Models"
+    base_layers: Optional[int] = field(
+        default=0, metadata={"help": "number of BASE layers in total"}
+    )
+    base_sublayers: Optional[int] = field(
+        default=1, metadata={"help": "number of sublayers in each BASE layer"}
+    )
+    base_shuffle: Optional[int] = field(
+        default=1, metadata={"help": "shuffle tokens between workers before computing assignment"}
+    )
+    # options from other parts of the config
+    add_bos_token: bool = II("task.add_bos_token")
+    tokens_per_sample: int = II("task.tokens_per_sample")
+    max_target_positions: Optional[int] = II("task.max_target_positions")
+    tpu: bool = II("common.tpu")
+
+
+@register_model("transformer_lm", dataclass=TransformerLanguageModelConfig)
+class TransformerLanguageModel(FairseqLanguageModel):
+    @classmethod
+    def hub_models(cls):
+        def moses_fastbpe(path):
+            return {"path": path, "tokenizer": "moses", "bpe": "fastbpe"}
+
+        def spm(path):
+            return {"path": path, "tokenizer": "space", "bpe": "sentencepiece"}
+
+        return {
+            "transformer_lm.gbw.adaptive_huge": "https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_gbw_huge.tar.bz2",
+            "transformer_lm.wiki103.adaptive": "https://dl.fbaipublicfiles.com/fairseq/models/lm/adaptive_lm_wiki103.v2.tar.bz2",
+            "transformer_lm.wmt19.en": moses_fastbpe(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.en.tar.bz2"
+            ),
+            "transformer_lm.wmt19.de": moses_fastbpe(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.de.tar.bz2"
+            ),
+            "transformer_lm.wmt19.ru": moses_fastbpe(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt19.ru.tar.bz2"
+            ),
+            "transformer_lm.wmt20.en": spm(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.en.tar.gz"
+            ),
+            "transformer_lm.wmt20.ta": spm(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.ta.tar.gz"
+            ),
+            "transformer_lm.wmt20.iu.news": spm(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.iu.news.tar.gz"
+            ),
+            "transformer_lm.wmt20.iu.nh": spm(
+                "https://dl.fbaipublicfiles.com/fairseq/models/lm/wmt20.iu.nh.tar.gz"
+            ),
+        }
+
+    def __init__(self, decoder):
+        super().__init__(decoder)
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        if args.decoder_layers_to_keep:
+            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))
+
+        if safe_getattr(args, "max_target_positions", None) is None:
+            args.max_target_positions = safe_getattr(
+                args, "tokens_per_sample", DEFAULT_MAX_TARGET_POSITIONS
+            )
+
+        if args.character_embeddings:
+            embed_tokens = CharacterTokenEmbedder(
+                task.source_dictionary,
+                eval(args.character_filters),
+                args.character_embedding_dim,
+                args.decoder_embed_dim,
+                args.char_embedder_highway_layers,
+            )
+        elif args.adaptive_input:
+            embed_tokens = AdaptiveInput(
+                len(task.source_dictionary),
+                task.source_dictionary.pad(),
+                args.decoder_input_dim,
+                args.adaptive_input_factor,
+                args.decoder_embed_dim,
+                options.eval_str_list(args.adaptive_input_cutoff, type=int),
+                args.quant_noise_pq,
+                args.quant_noise_pq_block_size,
+            )
+        else:
+            embed_tokens = cls.build_embedding(
+                args, task.source_dictionary, args.decoder_input_dim
+            )
+
+        if args.tie_adaptive_weights:
+            assert args.adaptive_input
+            assert args.adaptive_input_factor == args.adaptive_softmax_factor
+            assert (
+                args.adaptive_softmax_cutoff == args.adaptive_input_cutoff
+            ), "{} != {}".format(
+                args.adaptive_softmax_cutoff, args.adaptive_input_cutoff
+            )
+            assert args.decoder_input_dim == args.decoder_output_dim
+
+        decoder = TransformerDecoder(
+            args, task.target_dictionary, embed_tokens, no_encoder_attn=True
+        )
+        return cls(decoder)
+
+    @classmethod
+    def build_embedding(cls, args, dictionary, embed_dim, path=None):
+        embed_tokens = Embedding(len(dictionary), embed_dim, dictionary.pad())
+        return embed_tokens
+
+
+def base_lm_architecture(args):
+    # backward compatibility for older model checkpoints
+    if safe_hasattr(args, "no_tie_adaptive_proj"):
+        # previous models defined --no-tie-adaptive-proj, so use the existence of
+        # that option to determine if this is an "old" model checkpoint
+        args.no_decoder_final_norm = True  # old models always set this to True
+        if args.no_tie_adaptive_proj is False:
+            args.tie_adaptive_proj = True
+    if safe_hasattr(args, "decoder_final_norm"):
+        args.no_decoder_final_norm = not args.decoder_final_norm
+
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.0)
+
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 512)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 2048)
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 8)
+    args.adaptive_softmax_cutoff = safe_getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = safe_getattr(args, "adaptive_softmax_dropout", 0)
+    args.adaptive_softmax_factor = safe_getattr(args, "adaptive_softmax_factor", 4)
+    args.decoder_learned_pos = safe_getattr(args, "decoder_learned_pos", False)
+    args.activation_fn = safe_getattr(args, "activation_fn", "relu")
+
+    args.decoder_layerdrop = safe_getattr(args, "decoder_layerdrop", 0)
+    args.decoder_layers_to_keep = safe_getattr(args, "decoder_layers_to_keep", None)
+    args.quant_noise_pq = safe_getattr(args, "quant_noise_pq", 0)
+    args.quant_noise_pq_block_size = safe_getattr(args, "quant_noise_pq_block_size", 8)
+    args.quant_noise_scalar = safe_getattr(args, "quant_noise_scalar", 0)
+
+    args.base_layers = safe_getattr(args, "base_layers", 0)
+    args.base_sublayers = safe_getattr(args, "base_sublayers", 1)
+    args.base_shuffle = safe_getattr(args, "base_shuffle", False)
+
+    args.add_bos_token = safe_getattr(args, "add_bos_token", False)
+    args.no_token_positional_embeddings = safe_getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.share_decoder_input_output_embed = safe_getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.character_embeddings = safe_getattr(args, "character_embeddings", False)
+
+    args.decoder_output_dim = safe_getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = safe_getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    # Model training is not stable without this
+    args.decoder_normalize_before = True
+    args.no_decoder_final_norm = safe_getattr(args, "no_decoder_final_norm", False)
+
+    args.adaptive_input = safe_getattr(args, "adaptive_input", False)
+    args.adaptive_input_factor = safe_getattr(args, "adaptive_input_factor", 4)
+    args.adaptive_input_cutoff = safe_getattr(args, "adaptive_input_cutoff", None)
+
+    args.tie_adaptive_weights = safe_getattr(args, "tie_adaptive_weights", False)
+    args.tie_adaptive_proj = safe_getattr(args, "tie_adaptive_proj", False)
+
+    args.no_scale_embedding = safe_getattr(args, "no_scale_embedding", False)
+    args.layernorm_embedding = safe_getattr(args, "layernorm_embedding", False)
+    args.checkpoint_activations = safe_getattr(args, "checkpoint_activations", False)
+    args.offload_activations = safe_getattr(args, "offload_activations", False)
+    if args.offload_activations:
+        args.checkpoint_activations = True
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_big")
+def transformer_lm_big(args):
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 12)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16)
+    base_lm_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_wiki103")
+@register_model_architecture("transformer_lm", "transformer_lm_baevski_wiki103")
+def transformer_lm_baevski_wiki103(args):
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 16)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 8)
+    args.dropout = safe_getattr(args, "dropout", 0.3)
+    args.adaptive_input = safe_getattr(args, "adaptive_input", True)
+    args.tie_adaptive_weights = safe_getattr(args, "tie_adaptive_weights", True)
+    args.adaptive_input_cutoff = safe_getattr(args, "adaptive_input_cutoff", "20000,60000")
+    args.adaptive_softmax_cutoff = safe_getattr(
+        args, "adaptive_softmax_cutoff", "20000,60000"
+    )
+    args.adaptive_softmax_dropout = safe_getattr(args, "adaptive_softmax_dropout", 0.2)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_dropout = safe_getattr(args, "activation_dropout", 0.1)
+    args.no_decoder_final_norm = safe_getattr(args, "no_decoder_final_norm", True)
+    args.tie_adaptive_proj = safe_getattr(args, "tie_adaptive_proj", True)
+    transformer_lm_big(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gbw")
+@register_model_architecture("transformer_lm", "transformer_lm_baevski_gbw")
+def transformer_lm_baevski_gbw(args):
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 512)
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.no_decoder_final_norm = safe_getattr(args, "no_decoder_final_norm", True)
+    transformer_lm_big(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt")
+def transformer_lm_gpt(args):
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 768)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 3072)
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 12)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 12)
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt2_small")
+def transformer_lm_gpt2_small(args):
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 4096)
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 24)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16)
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt2_tiny")
+def transformer_lm_gpt2_tiny(args):
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 64)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 64)
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 2)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 1)
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt2_medium")
+def transformer_lm_gpt2_medium(args):
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1280)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 5120)
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 36)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 20)
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt2_big")
+def transformer_lm_gpt2_big(args):
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1600)
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", 6400)
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 48)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 25)
+    args.dropout = safe_getattr(args, "dropout", 0.1)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.1)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    base_lm_architecture(args)
+
+
+def base_gpt3_architecture(args):
+    args.decoder_input_dim = args.decoder_embed_dim
+    args.decoder_output_dim = args.decoder_embed_dim
+    args.decoder_ffn_embed_dim = safe_getattr(args, "decoder_ffn_embed_dim", args.decoder_embed_dim * 4)
+    # GPT-3 used learned positional embeddings, rather than sinusoidal
+    args.decoder_learned_pos = safe_getattr(args, "decoder_learned_pos", True)
+    args.dropout = safe_getattr(args, "dropout", 0.0)
+    args.attention_dropout = safe_getattr(args, "attention_dropout", 0.0)
+    args.activation_fn = safe_getattr(args, "activation_fn", "gelu")
+    args.share_decoder_input_output_embed = True
+    base_lm_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_small")
+def transformer_lm_gpt3_small(args):
+    # 125M params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 12)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 768)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 12)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_medium")
+def transformer_lm_gpt3_medium(args):
+    # 350M params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 24)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1024)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_large")
+def transformer_lm_gpt3_large(args):
+    # 760M params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 24)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 1536)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 16)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_xl")
+def transformer_lm_gpt3_xl(args):
+    # 1.3B params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 24)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 2048)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 32)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_2_7")
+def transformer_lm_gpt3_2_7(args):
+    # 2.7B params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 32)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 2560)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 32)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_6_7")
+def transformer_lm_gpt3_6_7(args):
+    # 6.7B params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 32)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 4096)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 32)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_13")
+def transformer_lm_gpt3_13(args):
+    # 13B params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 40)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 5120)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 40)
+    base_gpt3_architecture(args)
+
+
+@register_model_architecture("transformer_lm", "transformer_lm_gpt3_175")
+def transformer_lm_gpt3_175(args):
+    # 175B params
+    args.decoder_layers = safe_getattr(args, "decoder_layers", 96)
+    args.decoder_embed_dim = safe_getattr(args, "decoder_embed_dim", 12288)
+    args.decoder_attention_heads = safe_getattr(args, "decoder_attention_heads", 96)
+    base_gpt3_architecture(args)
diff --git a/fairseq/fairseq/models/wav2vec/__init__.py b/fairseq/fairseq/models/wav2vec/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..06cec18183ca14cd534d14558e8b44e25f3e69d5
--- /dev/null
+++ b/fairseq/fairseq/models/wav2vec/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .wav2vec import *  # noqa
+from .wav2vec2 import *  # noqa
+from .wav2vec2_asr import *  # noqa
diff --git a/fairseq/fairseq/models/wav2vec/wav2vec.py b/fairseq/fairseq/models/wav2vec/wav2vec.py
new file mode 100644
index 0000000000000000000000000000000000000000..af6604da10f504baabff50bf14a6eb2214bffef3
--- /dev/null
+++ b/fairseq/fairseq/models/wav2vec/wav2vec.py
@@ -0,0 +1,630 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+import logging
+import math
+from typing import Optional, Tuple
+from omegaconf import II
+import sys
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.models import BaseFairseqModel, register_model
+from fairseq.modules import (
+    Fp32GroupNorm,
+    Fp32LayerNorm,
+    GumbelVectorQuantizer,
+    KmeansVectorQuantizer,
+    TransposeLast,
+)
+from fairseq.tasks import FairseqTask
+from fairseq.utils import buffered_arange
+
+
+logger = logging.getLogger(__name__)
+
+
+AGGREGATOR_CHOICES = ChoiceEnum(["cnn", "gru"])
+PROJECT_FEATURES_CHOICES = ChoiceEnum(["none", "same", "new"])
+ACTIVATION_CHOICES = ChoiceEnum(["relu", "gelu"])
+VQ_TYPE_CHOICES = ChoiceEnum(["none", "gumbel", "kmeans"])
+
+
+@dataclass
+class Wav2VecConfig(FairseqDataclass):
+    prediction_steps: int = field(
+        default=12, metadata={"help": "number of steps ahead to predict"}
+    )
+    sample_distance: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "sample distance from target. does not work properly with cross-sampling"
+        },
+    )
+    cross_sample_negatives: int = field(
+        default=0, metadata={"help": "num of cross sampled negatives"}
+    )
+    num_negatives: int = field(
+        default=10, metadata={"help": "num of sampled negatives"}
+    )
+    conv_feature_layers: str = field(
+        default="[(512, 10, 5), (512, 8, 4), (512, 4, 2), (512, 4, 2), (512, 4, 2), (512, 1, 1), (512, 1, 1), (512, 1, 1)]",
+        metadata={
+            "help": "convolutional feature extraction layers [(dim, kernel_size, stride), ...]"
+        },
+    )
+    conv_aggregator_layers: str = field(
+        default="[(512, 2, 1), (512, 3, 1), (512, 4, 1), (512, 5, 1), (512, 6, 1), (512, 7, 1), (512, 8, 1), (512, 9, 1), (512, 10, 1), (512, 11, 1), (512, 12, 1), (512, 13, 1)]",
+        metadata={
+            "help": "convolutional aggregator layers [(dim, kernel_size, stride), ...]"
+        },
+    )
+    dropout: float = field(
+        default=0.0, metadata={"help": "dropout to apply within the model"}
+    )
+    dropout_features: float = field(
+        default=0.0, metadata={"help": "dropout to apply to the features"}
+    )
+    dropout_agg: float = field(
+        default=0.0, metadata={"help": "dropout to apply after aggregation step"}
+    )
+    aggregator: AGGREGATOR_CHOICES = field(
+        default="cnn", metadata={"help": "type of aggregator to use"}
+    )
+    gru_dim: int = field(default=512, metadata={"help": "GRU dimensionality"})
+    no_conv_bias: bool = field(
+        default=False, metadata={"help": "if set, does not learn bias for conv layers"}
+    )
+    agg_zero_pad: bool = field(
+        default=False,
+        metadata={"help": "if set, zero pads in aggregator instead of repl pad"},
+    )
+    skip_connections_feat: bool = field(
+        default=False,
+        metadata={"help": "if set, adds skip connections to the feature extractor"},
+    )
+    skip_connections_agg: bool = field(
+        default=True,
+        metadata={"help": "if set, adds skip connections to the aggregator"},
+    )
+    residual_scale: float = field(
+        default=0.5, metadata={"help": "scales residual by sqrt(value)"}
+    )
+    log_compression: bool = field(
+        default=True,
+        metadata={"help": "if set, adds a log compression to feature extractor"},
+    )
+    balanced_classes: bool = field(
+        default=False,
+        metadata={"help": "if set, loss is scaled to balance for number of negatives"},
+    )
+    project_features: PROJECT_FEATURES_CHOICES = field(
+        default="none",
+        metadata={
+            "help": "if not none, features are projected using the (same or new) aggregator"
+        },
+    )
+    non_affine_group_norm: bool = field(
+        default=False, metadata={"help": "if set, group norm is not affine"}
+    )
+    offset: str = field(
+        default="auto",
+        metadata={
+            "help": "if set to 'auto', it is computed automatically from the receptive field, else set to int value"
+        },
+    )
+    activation: ACTIVATION_CHOICES = field(
+        default="relu",
+        metadata={
+            "help": "if set to 'auto', it is computed automatically from the receptive field, else set to int value"
+        },
+    )
+    vq_type: VQ_TYPE_CHOICES = field(
+        default="none", metadata={"help": "which type of quantizer to use"}
+    )
+    vq_vars: int = field(
+        default=320,
+        metadata={"help": "project to this many vector quantized variables per group"},
+    )
+    vq_groups: int = field(
+        default=2, metadata={"help": "number of groups of latent variables"}
+    )
+    vq_dim: int = field(
+        default=0,
+        metadata={
+            "help": "uses this dimensionality for quantized vectors. 0 to use model dim // groups"
+        },
+    )
+    vq_depth: int = field(
+        default=1, metadata={"help": "number of layers for vq weight projection"}
+    )
+    combine_groups: bool = field(
+        default=False, metadata={"help": "if set, variables are shared among groups"}
+    )
+    vq_temp: Tuple[float, float, float] = field(
+        default=(2.0, 0.5, 0.999995),
+        metadata={
+            "help": "temperature for latent variable sampling with gumbel softmax. should be a tuple of 3 values (start, end, decay)"
+        },
+    )
+    vq_gamma: float = field(
+        default=0.25,
+        metadata={"help": "gamma parameter for kmeans style vector quantization"},
+    )
+    infonce: bool = II("criterion.infonce")
+
+
+@register_model("wav2vec", dataclass=Wav2VecConfig)
+class Wav2VecModel(BaseFairseqModel):
+    @classmethod
+    def build_model(cls, cfg: Wav2VecConfig, task: FairseqTask):
+        """Build a new model instance."""
+
+        model = Wav2VecModel(cfg)
+        logger.info(model)
+        return model
+
+    def __init__(self, cfg: Wav2VecConfig):
+        super().__init__()
+
+        self.prediction_steps = cfg.prediction_steps
+        offset = cfg.offset
+
+        if cfg.activation == "relu":
+            activation = nn.ReLU()
+        elif cfg.activation == "gelu":
+            activation = nn.GELU()
+        else:
+            raise Exception("unknown activation " + cfg.activation)
+
+        feature_enc_layers = eval(cfg.conv_feature_layers)
+        self.feature_extractor = ConvFeatureExtractionModel(
+            conv_layers=feature_enc_layers,
+            dropout=0.0,
+            log_compression=cfg.log_compression,
+            skip_connections=cfg.skip_connections_feat,
+            residual_scale=cfg.residual_scale,
+            non_affine_group_norm=cfg.non_affine_group_norm,
+            activation=activation,
+        )
+        embed = feature_enc_layers[-1][0]
+
+        self.vector_quantizer = None
+        if cfg.vq_type == "gumbel":
+            self.vector_quantizer = GumbelVectorQuantizer(
+                dim=embed,
+                num_vars=cfg.vq_vars,
+                temp=cfg.vq_temp,
+                groups=cfg.vq_groups,
+                combine_groups=cfg.combine_groups,
+                vq_dim=cfg.vq_dim if cfg.vq_dim > 0 else embed,
+                time_first=False,
+                activation=activation,
+                weight_proj_depth=cfg.vq_depth,
+                weight_proj_factor=2,
+            )
+        elif cfg.vq_type == "kmeans":
+            self.vector_quantizer = KmeansVectorQuantizer(
+                dim=embed,
+                num_vars=cfg.vq_vars,
+                groups=cfg.vq_groups,
+                combine_groups=cfg.combine_groups,
+                vq_dim=cfg.vq_dim if cfg.vq_dim > 0 else embed,
+                time_first=False,
+                gamma=cfg.vq_gamma,
+            )
+        else:
+            assert (
+                cfg.vq_type == "none" or cfg.vq_type is None
+            ), "Unknown quantizer type"
+
+        if cfg.offset == "auto":
+            jin = 0
+            rin = 0
+            for _, k, stride in feature_enc_layers:
+                if rin == 0:
+                    rin = k
+                rin = rin + (k - 1) * jin
+                if jin == 0:
+                    jin = stride
+                else:
+                    jin *= stride
+            offset = math.ceil(rin / jin)
+
+        offset = int(offset)
+
+        def make_aggregator():
+            if cfg.aggregator == "cnn":
+                agg_layers = eval(cfg.conv_aggregator_layers)
+                agg_dim = agg_layers[-1][0]
+                feature_aggregator = ConvAggegator(
+                    conv_layers=agg_layers,
+                    embed=embed,
+                    dropout=cfg.dropout,
+                    skip_connections=cfg.skip_connections_agg,
+                    residual_scale=cfg.residual_scale,
+                    non_affine_group_norm=cfg.non_affine_group_norm,
+                    conv_bias=not cfg.no_conv_bias,
+                    zero_pad=cfg.agg_zero_pad,
+                    activation=activation,
+                )
+            elif cfg.aggregator == "gru":
+                agg_dim = cfg.gru_dim
+                feature_aggregator = nn.Sequential(
+                    TransposeLast(),
+                    nn.GRU(
+                        input_size=embed,
+                        hidden_size=agg_dim,
+                        num_layers=1,
+                        dropout=cfg.dropout,
+                    ),
+                    TransposeLast(deconstruct_idx=0),
+                )
+            else:
+                raise Exception("unknown aggregator type " + cfg.aggregator)
+
+            return feature_aggregator, agg_dim
+
+        self.feature_aggregator, agg_dim = make_aggregator()
+
+        self.wav2vec_predictions = Wav2VecPredictionsModel(
+            in_dim=agg_dim,
+            out_dim=embed,
+            prediction_steps=cfg.prediction_steps,
+            n_negatives=cfg.num_negatives,
+            cross_sample_negatives=cfg.cross_sample_negatives,
+            sample_distance=cfg.sample_distance,
+            dropout=cfg.dropout,
+            offset=offset,
+            balanced_classes=cfg.balanced_classes,
+            infonce=cfg.infonce,
+        )
+
+        self.dropout_feats = nn.Dropout(p=cfg.dropout_features)
+        self.dropout_agg = nn.Dropout(p=cfg.dropout_agg)
+
+        if cfg.project_features == "none":
+            self.project_features = None
+        elif cfg.project_features == "same":
+            self.project_features = self.feature_aggregator
+        elif cfg.project_features == "new":
+            self.project_features, _ = make_aggregator()
+
+    def forward(self, source):
+        result = {}
+
+        features = self.feature_extractor(source)
+        if self.vector_quantizer:
+            q_res = self.vector_quantizer(features)
+            features = q_res["x"]
+            for k in q_res.keys():
+                if k != "x":
+                    result[k] = q_res[k]
+
+        x = self.dropout_feats(features)
+        x = self.feature_aggregator(x)
+        x = self.dropout_agg(x)
+
+        if self.project_features is not None:
+            features = self.project_features(features)
+        x, targets = self.wav2vec_predictions(x, features)
+        result["cpc_logits"] = x
+        result["cpc_targets"] = targets
+
+        return result
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+
+    def max_positions(self):
+        """Maximum length supported by the model."""
+        return sys.maxsize
+
+    def get_logits(self, net_output):
+        logits = net_output["cpc_logits"]
+        return logits
+
+    def get_targets(self, sample, net_output):
+        t = net_output["cpc_targets"]
+        if isinstance(t, tuple):
+            t = t[0]
+        return t.contiguous()
+
+    def get_target_weights(self, targets, net_output):
+        targets = net_output["cpc_targets"]
+        if isinstance(targets, tuple) and targets[-1] is not None:
+            return targets[-1]
+        return None
+
+    def get_extra_losses(self, net_output):
+        loss = None
+        if "prob_perplexity" in net_output:
+            loss = net_output["num_vars"] - net_output["prob_perplexity"]
+        elif "kmeans_loss" in net_output:
+            loss = net_output["kmeans_loss"]
+
+        return loss
+
+
+def norm_block(is_layer_norm, dim, affine=True):
+    if is_layer_norm:
+        mod = nn.Sequential(
+            TransposeLast(),
+            Fp32LayerNorm(dim, elementwise_affine=affine),
+            TransposeLast(),
+        )
+    else:
+        mod = Fp32GroupNorm(1, dim, affine=affine)
+
+    return mod
+
+
+class ConvFeatureExtractionModel(nn.Module):
+    def __init__(
+        self,
+        conv_layers,
+        dropout,
+        log_compression,
+        skip_connections,
+        residual_scale,
+        non_affine_group_norm,
+        activation,
+    ):
+        super().__init__()
+
+        def block(n_in, n_out, k, stride):
+            return nn.Sequential(
+                nn.Conv1d(n_in, n_out, k, stride=stride, bias=False),
+                nn.Dropout(p=dropout),
+                norm_block(
+                    is_layer_norm=False, dim=n_out, affine=not non_affine_group_norm
+                ),
+                activation,
+            )
+
+        in_d = 1
+        self.conv_layers = nn.ModuleList()
+        for dim, k, stride in conv_layers:
+            self.conv_layers.append(block(in_d, dim, k, stride))
+            in_d = dim
+
+        self.log_compression = log_compression
+        self.skip_connections = skip_connections
+        self.residual_scale = math.sqrt(residual_scale)
+
+    def forward(self, x):
+        # BxT -> BxCxT
+        x = x.unsqueeze(1)
+
+        for conv in self.conv_layers:
+            residual = x
+            x = conv(x)
+            if self.skip_connections and x.size(1) == residual.size(1):
+                tsz = x.size(2)
+                r_tsz = residual.size(2)
+                residual = residual[..., :: r_tsz // tsz][..., :tsz]
+                x = (x + residual) * self.residual_scale
+
+        if self.log_compression:
+            x = x.abs()
+            x = x + 1
+            x = x.log()
+
+        return x
+
+
+class ZeroPad1d(nn.Module):
+    def __init__(self, pad_left, pad_right):
+        super().__init__()
+        self.pad_left = pad_left
+        self.pad_right = pad_right
+
+    def forward(self, x):
+        return F.pad(x, (self.pad_left, self.pad_right))
+
+
+class ConvAggegator(nn.Module):
+    def __init__(
+        self,
+        conv_layers,
+        embed,
+        dropout,
+        skip_connections,
+        residual_scale,
+        non_affine_group_norm,
+        conv_bias,
+        zero_pad,
+        activation,
+    ):
+        super().__init__()
+
+        def block(n_in, n_out, k, stride):
+            # padding dims only really make sense for stride = 1
+            ka = k // 2
+            kb = ka - 1 if k % 2 == 0 else ka
+
+            pad = (
+                ZeroPad1d(ka + kb, 0) if zero_pad else nn.ReplicationPad1d((ka + kb, 0))
+            )
+
+            return nn.Sequential(
+                pad,
+                nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias),
+                nn.Dropout(p=dropout),
+                norm_block(False, n_out, affine=not non_affine_group_norm),
+                activation,
+            )
+
+        in_d = embed
+        self.conv_layers = nn.ModuleList()
+        self.residual_proj = nn.ModuleList()
+        for dim, k, stride in conv_layers:
+            if in_d != dim and skip_connections:
+                self.residual_proj.append(nn.Conv1d(in_d, dim, 1, bias=False))
+            else:
+                self.residual_proj.append(None)
+
+            self.conv_layers.append(block(in_d, dim, k, stride))
+            in_d = dim
+        self.conv_layers = nn.Sequential(*self.conv_layers)
+        self.skip_connections = skip_connections
+        self.residual_scale = math.sqrt(residual_scale)
+
+    def forward(self, x):
+        for rproj, conv in zip(self.residual_proj, self.conv_layers):
+            residual = x
+            x = conv(x)
+            if self.skip_connections:
+                if rproj is not None:
+                    residual = rproj(residual)
+                x = (x + residual) * self.residual_scale
+        return x
+
+
+class Wav2VecPredictionsModel(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        out_dim,
+        prediction_steps,
+        n_negatives,
+        cross_sample_negatives,
+        sample_distance,
+        dropout,
+        offset,
+        balanced_classes,
+        infonce,
+    ):
+        super().__init__()
+
+        self.n_negatives = n_negatives
+        self.cross_sample_negatives = cross_sample_negatives
+        self.sample_distance = sample_distance
+        self.project_to_steps = nn.ConvTranspose2d(
+            in_dim, out_dim, (1, prediction_steps)
+        )
+        self.dropout = nn.Dropout(p=dropout)
+        self.offset = offset
+        self.balanced_classes = balanced_classes
+        self.infonce = infonce
+
+    def sample_negatives(self, y):
+        bsz, fsz, tsz = y.shape
+
+        y = y.transpose(0, 1)  # BCT -> CBT
+        y = y.contiguous().view(fsz, -1)  # CBT => C(BxT)
+
+        cross_high = tsz * bsz
+        high = tsz if self.sample_distance is None else min(tsz, self.sample_distance)
+        assert high > 1
+
+        neg_idxs = torch.randint(low=0, high=high, size=(bsz, self.n_negatives * tsz))
+
+        with torch.no_grad():
+            if self.n_negatives > 0:
+                tszs = (
+                    buffered_arange(tsz)
+                    .unsqueeze(-1)
+                    .expand(-1, self.n_negatives)
+                    .flatten()
+                )
+
+                neg_idxs = torch.randint(
+                    low=0, high=high - 1, size=(bsz, self.n_negatives * tsz)
+                )
+                neg_idxs[neg_idxs >= tszs] += 1
+
+            if self.cross_sample_negatives > 0:
+                tszs = (
+                    buffered_arange(tsz)
+                    .unsqueeze(-1)
+                    .expand(-1, self.cross_sample_negatives)
+                    .flatten()
+                )
+
+                cross_neg_idxs = torch.randint(
+                    low=0,
+                    high=cross_high - 1,
+                    size=(bsz, self.cross_sample_negatives * tsz),
+                )
+                cross_neg_idxs[cross_neg_idxs >= tszs] += 1
+
+        if self.n_negatives > 0:
+            for i in range(1, bsz):
+                neg_idxs[i] += i * high
+        else:
+            neg_idxs = cross_neg_idxs
+
+        if self.cross_sample_negatives > 0 and self.n_negatives > 0:
+            neg_idxs = torch.cat([neg_idxs, cross_neg_idxs], dim=1)
+
+        negs = y[..., neg_idxs.view(-1)]
+        negs = negs.view(
+            fsz, bsz, self.n_negatives + self.cross_sample_negatives, tsz
+        ).permute(
+            2, 1, 0, 3
+        )  # to NxBxCxT
+
+        return negs
+
+    def forward(self, x, y):
+
+        x = x.unsqueeze(-1)
+        x = self.project_to_steps(x)  # BxCxTxS
+        x = self.dropout(x)
+
+        negatives = self.sample_negatives(y)
+        y = y.unsqueeze(0)
+        targets = torch.cat([y, negatives], dim=0)  # Copies x B x C x T
+
+        copies = targets.size(0)
+        bsz, dim, tsz, steps = x.shape
+        steps = min(steps, tsz - self.offset)
+
+        predictions = x.new(
+            bsz * copies * (tsz - self.offset + 1) * steps
+            - ((steps + 1) * steps // 2) * copies * bsz
+        )
+        if self.infonce:
+            labels = predictions.new_full(
+                (predictions.shape[0] // copies,), 0, dtype=torch.long
+            )
+        else:
+            labels = torch.zeros_like(predictions)
+        weights = (
+            torch.full_like(labels, 1 / self.n_negatives)
+            if self.balanced_classes and not self.infonce
+            else None
+        )
+
+        start = end = 0
+        for i in range(steps):
+            offset = i + self.offset
+            end = start + (tsz - offset) * bsz * copies
+            if self.infonce:
+                predictions[start:end] = torch.einsum(
+                    "bct,nbct->tbn", x[..., :-offset, i], targets[..., offset:]
+                ).flatten()
+            else:
+                pos_num = (end - start) // copies
+                predictions[start:end] = torch.einsum(
+                    "bct,nbct->nbt", x[..., :-offset, i], targets[..., offset:]
+                ).flatten()
+                labels[start : start + pos_num] = 1.0
+                if weights is not None:
+                    weights[start : start + pos_num] = 1.0
+            start = end
+        assert end == predictions.numel(), "{} != {}".format(end, predictions.numel())
+
+        if self.infonce:
+            predictions = predictions.view(-1, copies)
+        else:
+            if weights is not None:
+                labels = (labels, weights)
+
+        return predictions, labels
diff --git a/fairseq/fairseq/models/wav2vec/wav2vec2.py b/fairseq/fairseq/models/wav2vec/wav2vec2.py
new file mode 100644
index 0000000000000000000000000000000000000000..714fd3ab50443b8d15715b1cf5abd4eb517298c4
--- /dev/null
+++ b/fairseq/fairseq/models/wav2vec/wav2vec2.py
@@ -0,0 +1,1016 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+from typing import List, Tuple
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.data.data_utils import compute_mask_indices
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.models import BaseFairseqModel, register_model
+from fairseq.modules import (
+    Fp32GroupNorm,
+    Fp32LayerNorm,
+    GradMultiply,
+    GumbelVectorQuantizer,
+    LayerNorm,
+    MultiheadAttention,
+    SamePad,
+    TransposeLast,
+)
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+from fairseq.utils import buffered_arange, index_put, is_xla_tensor
+
+
+EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"])
+MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(["static", "uniform", "normal", "poisson"])
+
+
+@dataclass
+class Wav2Vec2Config(FairseqDataclass):
+    extractor_mode: EXTRACTOR_MODE_CHOICES = field(
+        default="default",
+        metadata={
+            "help": "mode for feature extractor. default has a single group norm with d "
+            "groups in the first conv block, whereas layer_norm has layer norms in "
+            "every block (meant to use with normalize=True)"
+        },
+    )
+    encoder_layers: int = field(
+        default=12, metadata={"help": "num encoder layers in the transformer"}
+    )
+    encoder_embed_dim: int = field(
+        default=768, metadata={"help": "encoder embedding dimension"}
+    )
+    encoder_ffn_embed_dim: int = field(
+        default=3072, metadata={"help": "encoder embedding dimension for FFN"}
+    )
+    encoder_attention_heads: int = field(
+        default=12, metadata={"help": "num encoder attention heads"}
+    )
+    activation_fn: ChoiceEnum(utils.get_available_activation_fns()) = field(
+        default="gelu", metadata={"help": "activation function to use"}
+    )
+
+    # dropouts
+    dropout: float = field(
+        default=0.1, metadata={"help": "dropout probability for the transformer"}
+    )
+    attention_dropout: float = field(
+        default=0.1, metadata={"help": "dropout probability for attention weights"}
+    )
+    activation_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability after activation in FFN"}
+    )
+    encoder_layerdrop: float = field(
+        default=0.0, metadata={"help": "probability of dropping a tarnsformer layer"}
+    )
+    dropout_input: float = field(
+        default=0.0,
+        metadata={"help": "dropout to apply to the input (after feat extr)"},
+    )
+    dropout_features: float = field(
+        default=0.0,
+        metadata={"help": "dropout to apply to the features (after feat extr)"},
+    )
+
+    final_dim: int = field(
+        default=0,
+        metadata={
+            "help": "project final representations and targets to this many dimensions."
+            "set to encoder_embed_dim is <= 0"
+        },
+    )
+    layer_norm_first: bool = field(
+        default=False, metadata={"help": "apply layernorm first in the transformer"}
+    )
+    conv_feature_layers: str = field(
+        default="[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]",
+        metadata={
+            "help": "string describing convolutional feature extraction layers in form of a python list that contains "
+            "[(dim, kernel_size, stride), ...]"
+        },
+    )
+    conv_bias: bool = field(
+        default=False, metadata={"help": "include bias in conv encoder"}
+    )
+    logit_temp: float = field(
+        default=0.1, metadata={"help": "temperature to divide logits by"}
+    )
+    quantize_targets: bool = field(
+        default=False, metadata={"help": "use quantized targets"}
+    )
+    quantize_input: bool = field(
+        default=False, metadata={"help": "use quantized inputs"}
+    )
+    same_quantizer: bool = field(
+        default=False, metadata={"help": "use same quantizer for inputs and targets"}
+    )
+    target_glu: bool = field(
+        default=False, metadata={"help": "adds projection + glu to targets"}
+    )
+    feature_grad_mult: float = field(
+        default=1.0, metadata={"help": "multiply feature extractor var grads by this"}
+    )
+    quantizer_depth: int = field(
+        default=1,
+        metadata={"help": "number of quantizer layers"},
+    )
+    quantizer_factor: int = field(
+        default=3,
+        metadata={
+            "help": "dimensionality increase for inner quantizer layers (if depth > 1)"
+        },
+    )
+    latent_vars: int = field(
+        default=320,
+        metadata={"help": "number of latent variables V in each group of the codebook"},
+    )
+    latent_groups: int = field(
+        default=2,
+        metadata={"help": "number of groups G of latent variables in the codebook"},
+    )
+    latent_dim: int = field(
+        default=0,
+        metadata={
+            "help": "if > 0, uses this dimensionality for latent variables. "
+            "otherwise uses final_dim / latent_groups"
+        },
+    )
+
+    # masking
+    mask_length: int = field(default=10, metadata={"help": "mask length"})
+    mask_prob: float = field(
+        default=0.65, metadata={"help": "probability of replacing a token with mask"}
+    )
+    mask_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static", metadata={"help": "how to choose mask length"}
+    )
+    mask_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument (used for more complex distributions), "
+            "see help in compute_mask_indices"
+        },
+    )
+    no_mask_overlap: bool = field(
+        default=False, metadata={"help": "whether to allow masks to overlap"}
+    )
+    mask_min_space: int = field(
+        default=1,
+        metadata={"help": "min space between spans (if no overlap is enabled)"},
+    )
+
+    # channel masking
+    mask_channel_length: int = field(
+        default=10, metadata={"help": "length of the mask for features (channels)"}
+    )
+    mask_channel_prob: float = field(
+        default=0.0, metadata={"help": "probability of replacing a feature with 0"}
+    )
+    mask_channel_before: bool = False
+    mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static",
+        metadata={"help": "how to choose mask length for channel masking"},
+    )
+    mask_channel_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument (used for more complex distributions), "
+            "see help in compute_mask_indicesh"
+        },
+    )
+    no_mask_channel_overlap: bool = field(
+        default=False, metadata={"help": "whether to allow channel masks to overlap"}
+    )
+    mask_channel_min_space: int = field(
+        default=1,
+        metadata={"help": "min space between spans (if no overlap is enabled)"},
+    )
+
+    # negative selection
+    num_negatives: int = field(
+        default=100,
+        metadata={"help": "number of negative examples from the same sample"},
+    )
+    negatives_from_everywhere: bool = field(
+        default=False,
+        metadata={"help": "sample negatives from everywhere, not just masked states"},
+    )
+    cross_sample_negatives: int = field(
+        default=0, metadata={"help": "number of negative examples from the any sample"}
+    )
+    codebook_negatives: int = field(
+        default=0, metadata={"help": "number of negative examples codebook"}
+    )
+
+    # positional embeddings
+    conv_pos: int = field(
+        default=128,
+        metadata={"help": "number of filters for convolutional positional embeddings"},
+    )
+    conv_pos_groups: int = field(
+        default=16,
+        metadata={"help": "number of groups for convolutional positional embedding"},
+    )
+
+    latent_temp: Tuple[float, float, float] = field(
+        default=(2, 0.5, 0.999995),
+        metadata={
+            "help": "temperature for latent variable sampling. "
+            "can be tuple of 3 values (start, end, decay)"
+        },
+    )
+
+
+@register_model("wav2vec2", dataclass=Wav2Vec2Config)
+class Wav2Vec2Model(BaseFairseqModel):
+    def __init__(self, cfg: Wav2Vec2Config):
+        super().__init__()
+        self.cfg = cfg
+
+        feature_enc_layers = eval(cfg.conv_feature_layers)
+        self.embed = feature_enc_layers[-1][0]
+
+        self.feature_extractor = ConvFeatureExtractionModel(
+            conv_layers=feature_enc_layers,
+            dropout=0.0,
+            mode=cfg.extractor_mode,
+            conv_bias=cfg.conv_bias,
+        )
+
+        self.post_extract_proj = (
+            nn.Linear(self.embed, cfg.encoder_embed_dim)
+            if self.embed != cfg.encoder_embed_dim and not cfg.quantize_input
+            else None
+        )
+
+        self.mask_prob = cfg.mask_prob
+        self.mask_selection = cfg.mask_selection
+        self.mask_other = cfg.mask_other
+        self.mask_length = cfg.mask_length
+        self.no_mask_overlap = cfg.no_mask_overlap
+        self.mask_min_space = cfg.mask_min_space
+
+        self.mask_channel_prob = cfg.mask_channel_prob
+        self.mask_channel_before = cfg.mask_channel_before
+        self.mask_channel_selection = cfg.mask_channel_selection
+        self.mask_channel_other = cfg.mask_channel_other
+        self.mask_channel_length = cfg.mask_channel_length
+        self.no_mask_channel_overlap = cfg.no_mask_channel_overlap
+        self.mask_channel_min_space = cfg.mask_channel_min_space
+
+        self.dropout_input = nn.Dropout(cfg.dropout_input)
+        self.dropout_features = nn.Dropout(cfg.dropout_features)
+
+        self.feature_grad_mult = cfg.feature_grad_mult
+
+        self.quantizer = None
+        self.input_quantizer = None
+
+        self.n_negatives = cfg.num_negatives
+        self.cross_sample_negatives = cfg.cross_sample_negatives
+        self.codebook_negatives = cfg.codebook_negatives
+        self.negatives_from_everywhere = cfg.negatives_from_everywhere
+
+        self.logit_temp = cfg.logit_temp
+
+        final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim
+
+        if cfg.quantize_targets:
+            vq_dim = cfg.latent_dim if cfg.latent_dim > 0 else final_dim
+            self.quantizer = GumbelVectorQuantizer(
+                dim=self.embed,
+                num_vars=cfg.latent_vars,
+                temp=cfg.latent_temp,
+                groups=cfg.latent_groups,
+                combine_groups=False,
+                vq_dim=vq_dim,
+                time_first=True,
+                weight_proj_depth=cfg.quantizer_depth,
+                weight_proj_factor=cfg.quantizer_factor,
+            )
+            self.project_q = nn.Linear(vq_dim, final_dim)
+        else:
+            self.project_q = nn.Linear(self.embed, final_dim)
+
+        if cfg.quantize_input:
+            if cfg.same_quantizer and self.quantizer is not None:
+                vq_dim = final_dim
+                self.input_quantizer = self.quantizer
+            else:
+                vq_dim = cfg.latent_dim if cfg.latent_dim > 0 else cfg.encoder_embed_dim
+                self.input_quantizer = GumbelVectorQuantizer(
+                    dim=self.embed,
+                    num_vars=cfg.latent_vars,
+                    temp=cfg.latent_temp,
+                    groups=cfg.latent_groups,
+                    combine_groups=False,
+                    vq_dim=vq_dim,
+                    time_first=True,
+                    weight_proj_depth=cfg.quantizer_depth,
+                    weight_proj_factor=cfg.quantizer_factor,
+                )
+            self.project_inp = nn.Linear(vq_dim, cfg.encoder_embed_dim)
+
+        self.mask_emb = nn.Parameter(
+            torch.FloatTensor(cfg.encoder_embed_dim).uniform_()
+        )
+
+        self.encoder = TransformerEncoder(cfg)
+        self.layer_norm = LayerNorm(self.embed)
+
+        self.target_glu = None
+        if cfg.target_glu:
+            self.target_glu = nn.Sequential(
+                nn.Linear(final_dim, final_dim * 2), nn.GLU()
+            )
+
+        self.final_proj = nn.Linear(cfg.encoder_embed_dim, final_dim)
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        return state_dict
+
+    @classmethod
+    def build_model(cls, cfg: Wav2Vec2Config, task=None):
+        """Build a new model instance."""
+
+        return cls(cfg)
+
+    def apply_mask(
+        self,
+        x,
+        padding_mask,
+        mask_indices=None,
+        mask_channel_indices=None,
+    ):
+        B, T, C = x.shape
+
+        if self.mask_channel_prob > 0 and self.mask_channel_before:
+            mask_channel_indices = compute_mask_indices(
+                (B, C),
+                None,
+                self.mask_channel_prob,
+                self.mask_channel_length,
+                self.mask_channel_selection,
+                self.mask_channel_other,
+                no_overlap=self.no_mask_channel_overlap,
+                min_space=self.mask_channel_min_space,
+            )
+            mask_channel_indices = (
+                torch.from_numpy(mask_channel_indices)
+                .to(x.device)
+                .unsqueeze(1)
+                .expand(-1, T, -1)
+            )
+            x[mask_channel_indices] = 0
+
+        if self.mask_prob > 0:
+            if mask_indices is None:
+                mask_indices = compute_mask_indices(
+                    (B, T),
+                    padding_mask,
+                    self.mask_prob,
+                    self.mask_length,
+                    self.mask_selection,
+                    self.mask_other,
+                    min_masks=2,
+                    no_overlap=self.no_mask_overlap,
+                    min_space=self.mask_min_space,
+                )
+                mask_indices = torch.from_numpy(mask_indices).to(x.device)
+            x = index_put(x, mask_indices, self.mask_emb)
+        else:
+            mask_indices = None
+
+        if self.mask_channel_prob > 0 and not self.mask_channel_before:
+            if mask_channel_indices is None:
+                mask_channel_indices = compute_mask_indices(
+                    (B, C),
+                    None,
+                    self.mask_channel_prob,
+                    self.mask_channel_length,
+                    self.mask_channel_selection,
+                    self.mask_channel_other,
+                    no_overlap=self.no_mask_channel_overlap,
+                    min_space=self.mask_channel_min_space,
+                )
+                mask_channel_indices = (
+                    torch.from_numpy(mask_channel_indices)
+                    .to(x.device)
+                    .unsqueeze(1)
+                    .expand(-1, T, -1)
+                )
+            x = index_put(x, mask_channel_indices, 0)
+
+        return x, mask_indices
+
+    def sample_negatives(self, y, num, padding_count=None):
+
+        if self.n_negatives == 0 and self.cross_sample_negatives == 0:
+            return y.new(0)
+
+        bsz, tsz, fsz = y.shape
+        y = y.view(-1, fsz)  # BTC => (BxT)C
+
+        # FIXME: what happens if padding_count is specified?
+        cross_high = tsz * bsz
+        high = tsz - (padding_count or 0)
+        with torch.no_grad():
+            assert high > 1, f"{bsz,tsz,fsz}"
+
+            if self.n_negatives > 0:
+                tszs = (
+                    buffered_arange(num)
+                    .unsqueeze(-1)
+                    .expand(-1, self.n_negatives)
+                    .flatten()
+                )
+
+                neg_idxs = torch.randint(
+                    low=0, high=high - 1, size=(bsz, self.n_negatives * num)
+                )
+                neg_idxs[neg_idxs >= tszs] += 1
+
+            if self.cross_sample_negatives > 0:
+                tszs = (
+                    buffered_arange(num)
+                    .unsqueeze(-1)
+                    .expand(-1, self.cross_sample_negatives)
+                    .flatten()
+                )
+
+                cross_neg_idxs = torch.randint(
+                    low=0,
+                    high=cross_high - 1,
+                    size=(bsz, self.cross_sample_negatives * num),
+                )
+                cross_neg_idxs[cross_neg_idxs >= tszs] += 1
+
+        if self.n_negatives > 0:
+            for i in range(1, bsz):
+                neg_idxs[i] += i * high
+        else:
+            neg_idxs = cross_neg_idxs
+
+        if self.cross_sample_negatives > 0 and self.n_negatives > 0:
+            neg_idxs = torch.cat([neg_idxs, cross_neg_idxs], dim=1)
+
+        negs = y[neg_idxs.view(-1)]
+        negs = negs.view(
+            bsz, num, self.n_negatives + self.cross_sample_negatives, fsz
+        ).permute(
+            2, 0, 1, 3
+        )  # to NxBxTxC
+        return negs, neg_idxs
+
+    def compute_preds(self, x, y, negatives):
+
+        neg_is_pos = (y == negatives).all(-1)
+        y = y.unsqueeze(0)
+        targets = torch.cat([y, negatives], dim=0)
+
+        logits = torch.cosine_similarity(x.float(), targets.float(), dim=-1).type_as(x)
+
+        logits = logits / self.logit_temp
+
+        if is_xla_tensor(logits) or neg_is_pos.any():
+            fillval = -float(2 ** 30)
+            if not hasattr(self, "_inftensor"):
+                self._inftensor = (
+                    torch.tensor(fillval).to(x.device)
+                    if is_xla_tensor(logits)
+                    else float("-inf")
+                )
+            logits[1:] = index_put(logits[1:], neg_is_pos, self._inftensor)
+
+        return logits
+
+    def _get_feat_extract_output_lengths(self, input_lengths: torch.LongTensor):
+        """
+        Computes the output length of the convolutional layers
+        """
+
+        def _conv_out_length(input_length, kernel_size, stride):
+            return torch.floor((input_length - kernel_size) / stride + 1)
+
+        conv_cfg_list = eval(self.cfg.conv_feature_layers)
+
+        for i in range(len(conv_cfg_list)):
+            input_lengths = _conv_out_length(
+                input_lengths, conv_cfg_list[i][1], conv_cfg_list[i][2]
+            )
+
+        return input_lengths.to(torch.long)
+
+    def forward(
+        self,
+        source,
+        padding_mask=None,
+        mask=True,
+        features_only=False,
+        layer=None,
+        mask_indices=None,
+        mask_channel_indices=None,
+        padding_count=None,
+    ):
+
+        if self.feature_grad_mult > 0:
+            features = self.feature_extractor(source)
+            if self.feature_grad_mult != 1.0:
+                features = GradMultiply.apply(features, self.feature_grad_mult)
+        else:
+            with torch.no_grad():
+                features = self.feature_extractor(source)
+
+        features_pen = features.float().pow(2).mean()
+
+        features = features.transpose(1, 2)
+        features = self.layer_norm(features)
+        unmasked_features = features.clone()
+
+        if padding_mask is not None and padding_mask.any():
+            input_lengths = (1 - padding_mask.long()).sum(-1)
+            # apply conv formula to get real output_lengths
+            output_lengths = self._get_feat_extract_output_lengths(input_lengths)
+
+            padding_mask = torch.zeros(
+                features.shape[:2], dtype=features.dtype, device=features.device
+            )
+
+            # these two operations makes sure that all values
+            # before the output lengths indices are attended to
+            padding_mask[
+                (
+                    torch.arange(padding_mask.shape[0], device=padding_mask.device),
+                    output_lengths - 1,
+                )
+            ] = 1
+            padding_mask = (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])).bool()
+        else:
+            padding_mask = None
+
+        if self.post_extract_proj is not None:
+            features = self.post_extract_proj(features)
+
+        features = self.dropout_input(features)
+        unmasked_features = self.dropout_features(unmasked_features)
+
+        num_vars = None
+        code_ppl = None
+        prob_ppl = None
+        curr_temp = None
+
+        if self.input_quantizer:
+            q = self.input_quantizer(features, produce_targets=False)
+            features = q["x"]
+            num_vars = q["num_vars"]
+            code_ppl = q["code_perplexity"]
+            prob_ppl = q["prob_perplexity"]
+            curr_temp = q["temp"]
+            features = self.project_inp(features)
+
+        if mask:
+            x, mask_indices = self.apply_mask(
+                features,
+                padding_mask,
+                mask_indices=mask_indices,
+                mask_channel_indices=mask_channel_indices,
+            )
+            if not is_xla_tensor(x) and mask_indices is not None:
+                # tpu-comment: reducing the size in a dynamic way causes
+                # too many recompilations on xla.
+                y = unmasked_features[mask_indices].view(
+                    unmasked_features.size(0), -1, unmasked_features.size(-1)
+                )
+            else:
+                y = unmasked_features
+        else:
+            x = features
+            y = unmasked_features
+            mask_indices = None
+
+        x, layer_results = self.encoder(x, padding_mask=padding_mask, layer=layer)
+
+        if features_only:
+            return {
+                "x": x,
+                "padding_mask": padding_mask,
+                "features": unmasked_features,
+                "layer_results": layer_results,
+            }
+
+        if self.quantizer:
+            q = self.quantizer(y, produce_targets=False)
+            y = q["x"]
+            num_vars = q["num_vars"]
+            code_ppl = q["code_perplexity"]
+            prob_ppl = q["prob_perplexity"]
+            curr_temp = q["temp"]
+
+            y = self.project_q(y)
+
+            if self.negatives_from_everywhere:
+                neg_cands = self.quantizer(unmasked_features, produce_targets=False)[
+                    "x"
+                ]
+                negs, _ = self.sample_negatives(
+                    neg_cands,
+                    y.size(1),
+                    padding_count=padding_count,
+                )
+                negs = self.project_q(negs)
+
+            else:
+                negs, _ = self.sample_negatives(
+                    y,
+                    y.size(1),
+                    padding_count=padding_count,
+                )
+
+            if self.codebook_negatives > 0:
+                cb_negs = self.quantizer.sample_from_codebook(
+                    y.size(0) * y.size(1), self.codebook_negatives
+                )
+                cb_negs = cb_negs.view(
+                    self.codebook_negatives, y.size(0), y.size(1), -1
+                )  # order doesnt matter
+                cb_negs = self.project_q(cb_negs)
+                negs = torch.cat([negs, cb_negs], dim=0)
+        else:
+            y = self.project_q(y)
+
+            if self.negatives_from_everywhere:
+                negs, _ = self.sample_negatives(
+                    unmasked_features,
+                    y.size(1),
+                    padding_count=padding_count,
+                )
+                negs = self.project_q(negs)
+            else:
+                negs, _ = self.sample_negatives(
+                    y,
+                    y.size(1),
+                    padding_count=padding_count,
+                )
+
+        if not is_xla_tensor(x):
+            # tpu-comment: reducing the size in a dynamic way causes
+            # too many recompilations on xla.
+            x = x[mask_indices].view(x.size(0), -1, x.size(-1))
+
+        if self.target_glu:
+            y = self.target_glu(y)
+            negs = self.target_glu(negs)
+
+        x = self.final_proj(x)
+        x = self.compute_preds(x, y, negs)
+
+        result = {
+            "x": x,
+            "padding_mask": padding_mask,
+            "features_pen": features_pen,
+        }
+
+        if prob_ppl is not None:
+            result["prob_perplexity"] = prob_ppl
+            result["code_perplexity"] = code_ppl
+            result["num_vars"] = num_vars
+            result["temp"] = curr_temp
+
+        return result
+
+    def quantize(self, x):
+        assert self.quantizer is not None
+        x = self.feature_extractor(x)
+        x = x.transpose(1, 2)
+        x = self.layer_norm(x)
+        return self.quantizer.forward_idx(x)
+
+    def extract_features(self, source, padding_mask, mask=False, layer=None):
+        res = self.forward(
+            source, padding_mask, mask=mask, features_only=True, layer=layer
+        )
+        return res
+
+    def get_logits(self, net_output):
+        logits = net_output["x"]
+        logits = logits.transpose(0, 2)
+        logits = logits.reshape(-1, logits.size(-1))
+        return logits
+
+    def get_targets(self, sample, net_output, expand_steps=True):
+        x = net_output["x"]
+        return x.new_zeros(x.size(1) * x.size(2), dtype=torch.long)
+
+    def get_extra_losses(self, net_output):
+        pen = []
+
+        if "prob_perplexity" in net_output:
+            pen.append(
+                (net_output["num_vars"] - net_output["prob_perplexity"])
+                / net_output["num_vars"]
+            )
+
+        if "features_pen" in net_output:
+            pen.append(net_output["features_pen"])
+
+        return pen
+
+    def remove_pretraining_modules(self):
+        self.quantizer = None
+        self.project_q = None
+        self.target_glu = None
+        self.final_proj = None
+
+
+class ConvFeatureExtractionModel(nn.Module):
+    def __init__(
+        self,
+        conv_layers: List[Tuple[int, int, int]],
+        dropout: float = 0.0,
+        mode: str = "default",
+        conv_bias: bool = False,
+    ):
+        super().__init__()
+
+        assert mode in {"default", "layer_norm"}
+
+        def block(
+            n_in,
+            n_out,
+            k,
+            stride,
+            is_layer_norm=False,
+            is_group_norm=False,
+            conv_bias=False,
+        ):
+            def make_conv():
+                conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias)
+                nn.init.kaiming_normal_(conv.weight)
+                return conv
+
+            assert (
+                is_layer_norm and is_group_norm
+            ) == False, "layer norm and group norm are exclusive"
+
+            if is_layer_norm:
+                return nn.Sequential(
+                    make_conv(),
+                    nn.Dropout(p=dropout),
+                    nn.Sequential(
+                        TransposeLast(),
+                        Fp32LayerNorm(dim, elementwise_affine=True),
+                        TransposeLast(),
+                    ),
+                    nn.GELU(),
+                )
+            elif is_group_norm:
+                return nn.Sequential(
+                    make_conv(),
+                    nn.Dropout(p=dropout),
+                    Fp32GroupNorm(dim, dim, affine=True),
+                    nn.GELU(),
+                )
+            else:
+                return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU())
+
+        in_d = 1
+        self.conv_layers = nn.ModuleList()
+        for i, cl in enumerate(conv_layers):
+            assert len(cl) == 3, "invalid conv definition: " + str(cl)
+            (dim, k, stride) = cl
+
+            self.conv_layers.append(
+                block(
+                    in_d,
+                    dim,
+                    k,
+                    stride,
+                    is_layer_norm=mode == "layer_norm",
+                    is_group_norm=mode == "default" and i == 0,
+                    conv_bias=conv_bias,
+                )
+            )
+            in_d = dim
+
+    def forward(self, x):
+
+        # BxT -> BxCxT
+        x = x.unsqueeze(1)
+
+        for conv in self.conv_layers:
+            x = conv(x)
+
+        return x
+
+
+class TransformerEncoder(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+
+        self.dropout = args.dropout
+        self.embedding_dim = args.encoder_embed_dim
+
+        self.pos_conv = nn.Conv1d(
+            self.embedding_dim,
+            self.embedding_dim,
+            kernel_size=args.conv_pos,
+            padding=args.conv_pos // 2,
+            groups=args.conv_pos_groups,
+        )
+        dropout = 0
+        std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim))
+        nn.init.normal_(self.pos_conv.weight, mean=0, std=std)
+        nn.init.constant_(self.pos_conv.bias, 0)
+
+        self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2)
+        self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU())
+
+        self.layers = nn.ModuleList(
+            [
+                TransformerSentenceEncoderLayer(
+                    embedding_dim=self.embedding_dim,
+                    ffn_embedding_dim=args.encoder_ffn_embed_dim,
+                    num_attention_heads=args.encoder_attention_heads,
+                    dropout=self.dropout,
+                    attention_dropout=args.attention_dropout,
+                    activation_dropout=args.activation_dropout,
+                    activation_fn=args.activation_fn,
+                    layer_norm_first=args.layer_norm_first,
+                )
+                for _ in range(args.encoder_layers)
+            ]
+        )
+
+        self.layer_norm_first = args.layer_norm_first
+        self.layer_norm = LayerNorm(self.embedding_dim)
+        self.layerdrop = args.encoder_layerdrop
+
+        self.apply(init_bert_params)
+
+    def forward(self, x, padding_mask=None, layer=None):
+        x, layer_results = self.extract_features(x, padding_mask, layer)
+
+        if self.layer_norm_first and layer is None:
+            x = self.layer_norm(x)
+
+        return x, layer_results
+
+    def extract_features(self, x, padding_mask=None, tgt_layer=None):
+
+        if padding_mask is not None:
+            x = index_put(x, padding_mask, 0)
+
+        x_conv = self.pos_conv(x.transpose(1, 2))
+        x_conv = x_conv.transpose(1, 2)
+        x = x + x_conv
+
+        if not self.layer_norm_first:
+            x = self.layer_norm(x)
+
+        x = F.dropout(x, p=self.dropout, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        layer_results = []
+        r = None
+        for i, layer in enumerate(self.layers):
+            dropout_probability = np.random.random()
+            if not self.training or (dropout_probability > self.layerdrop):
+                x, z = layer(x, self_attn_padding_mask=padding_mask, need_weights=False)
+                if tgt_layer is not None:
+                    layer_results.append((x, z))
+            if i == tgt_layer:
+                r = x
+                break
+
+        if r is not None:
+            x = r
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        return x, layer_results
+
+    def max_positions(self):
+        """Maximum output length supported by the encoder."""
+        return self.args.max_positions
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        return state_dict
+
+
+class TransformerSentenceEncoderLayer(nn.Module):
+    """
+    Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
+    models.
+    """
+
+    def __init__(
+        self,
+        embedding_dim: float = 768,
+        ffn_embedding_dim: float = 3072,
+        num_attention_heads: float = 8,
+        dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        activation_fn: str = "relu",
+        layer_norm_first: bool = False,
+    ) -> None:
+
+        super().__init__()
+        # Initialize parameters
+        self.embedding_dim = embedding_dim
+        self.dropout = dropout
+        self.activation_dropout = activation_dropout
+
+        # Initialize blocks
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.self_attn = MultiheadAttention(
+            self.embedding_dim,
+            num_attention_heads,
+            dropout=attention_dropout,
+            self_attention=True,
+        )
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(self.activation_dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.layer_norm_first = layer_norm_first
+
+        # layer norm associated with the self attention layer
+        self.self_attn_layer_norm = LayerNorm(self.embedding_dim)
+        self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim)
+        self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim)
+
+        # layer norm associated with the position wise feed-forward NN
+        self.final_layer_norm = LayerNorm(self.embedding_dim)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        self_attn_mask: torch.Tensor = None,
+        self_attn_padding_mask: torch.Tensor = None,
+        need_weights: bool = False,
+        att_args=None,
+    ):
+        """
+        LayerNorm is applied either before or after the self-attention/ffn
+        modules similar to the original Transformer imlementation.
+        """
+        residual = x
+
+        if self.layer_norm_first:
+            x = self.self_attn_layer_norm(x)
+            x, attn = self.self_attn(
+                query=x,
+                key=x,
+                value=x,
+                key_padding_mask=self_attn_padding_mask,
+                attn_mask=self_attn_mask,
+            )
+            x = self.dropout1(x)
+            x = residual + x
+
+            residual = x
+            x = self.final_layer_norm(x)
+            x = self.activation_fn(self.fc1(x))
+            x = self.dropout2(x)
+            x = self.fc2(x)
+            x = self.dropout3(x)
+            x = residual + x
+        else:
+            x, attn = self.self_attn(
+                query=x,
+                key=x,
+                value=x,
+                key_padding_mask=self_attn_padding_mask,
+            )
+
+            x = self.dropout1(x)
+            x = residual + x
+
+            x = self.self_attn_layer_norm(x)
+
+            residual = x
+            x = self.activation_fn(self.fc1(x))
+            x = self.dropout2(x)
+            x = self.fc2(x)
+            x = self.dropout3(x)
+            x = residual + x
+            x = self.final_layer_norm(x)
+
+        return x, attn
diff --git a/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py b/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb5d819da5121a243e345b3812292ef0b13ccf98
--- /dev/null
+++ b/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py
@@ -0,0 +1,664 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from argparse import Namespace
+import contextlib
+import copy
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from dataclasses import dataclass, field
+from omegaconf import MISSING, II, open_dict
+from typing import Any, Optional
+
+from fairseq import checkpoint_utils, tasks, utils
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.tasks import FairseqTask
+from fairseq.models import (
+    BaseFairseqModel,
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+)
+from fairseq.models.wav2vec.wav2vec2 import MASKING_DISTRIBUTION_CHOICES
+from fairseq.modules import (
+    LayerNorm,
+    PositionalEmbedding,
+    TransformerDecoderLayer,
+)
+
+
+@dataclass
+class Wav2Vec2AsrConfig(FairseqDataclass):
+    w2v_path: str = field(
+        default=MISSING, metadata={"help": "path to wav2vec 2.0 model"}
+    )
+    no_pretrained_weights: bool = field(
+        default=False, metadata={"help": "if true, does not load pretrained weights"}
+    )
+    dropout_input: float = field(
+        default=0.0,
+        metadata={"help": "dropout to apply to the input (after feat extr)"},
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "dropout after transformer and before final projection"},
+    )
+    dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability inside wav2vec 2.0 model"}
+    )
+    attention_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability for attention weights inside wav2vec 2.0 model"
+        },
+    )
+    activation_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability after activation in FFN inside wav2vec 2.0 model"
+        },
+    )
+    conv_feature_layers: Optional[str] = field(
+        default="[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]",
+        metadata={
+            "help": (
+                "string describing convolutional feature extraction "
+                "layers in form of a python list that contains "
+                "[(dim, kernel_size, stride), ...]"
+            ),
+        },
+    )
+    encoder_embed_dim: Optional[int] = field(
+        default=768, metadata={"help": "encoder embedding dimension"}
+    )
+
+    # masking
+    apply_mask: bool = field(
+        default=False, metadata={"help": "apply masking during fine-tuning"}
+    )
+    mask_length: int = field(
+        default=10, metadata={"help": "repeat the mask indices multiple times"}
+    )
+    mask_prob: float = field(
+        default=0.5,
+        metadata={
+            "help": "probability of replacing a token with mask (normalized by length)"
+        },
+    )
+    mask_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static", metadata={"help": "how to choose masks"}
+    )
+    mask_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument (used for more complex distributions), "
+            "see help in compute_mask_indices"
+        },
+    )
+    no_mask_overlap: bool = field(
+        default=False, metadata={"help": "whether to allow masks to overlap"}
+    )
+    mask_min_space: Optional[int] = field(
+        default=1,
+        metadata={"help": "min space between spans (if no overlap is enabled)"},
+    )
+
+    # channel masking
+    mask_channel_length: int = field(
+        default=10, metadata={"help": "length of the mask for features (channels)"}
+    )
+    mask_channel_prob: float = field(
+        default=0.0, metadata={"help": "probability of replacing a feature with 0"}
+    )
+    mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field(
+        default="static",
+        metadata={"help": "how to choose mask length for channel masking"},
+    )
+    mask_channel_other: float = field(
+        default=0,
+        metadata={
+            "help": "secondary mask argument (used for more complex distributions), "
+            "see help in compute_mask_indicesh"
+        },
+    )
+    no_mask_channel_overlap: bool = field(
+        default=False, metadata={"help": "whether to allow channel masks to overlap"}
+    )
+    freeze_finetune_updates: int = field(
+        default=0, metadata={"help": "dont finetune wav2vec for this many updates"}
+    )
+    feature_grad_mult: float = field(
+        default=0.0, metadata={"help": "reset feature grad mult in wav2vec 2.0 to this"}
+    )
+    layerdrop: float = field(
+        default=0.0, metadata={"help": "probability of dropping a layer in wav2vec 2.0"}
+    )
+    mask_channel_min_space: Optional[int] = field(
+        default=1,
+        metadata={"help": "min space between spans (if no overlap is enabled)"},
+    )
+    mask_channel_before: bool = False
+    normalize: bool = II("task.normalize")
+    data: str = II("task.data")
+    # this holds the loaded wav2vec args
+    w2v_args: Any = None
+
+
+@dataclass
+class Wav2Vec2CtcConfig(Wav2Vec2AsrConfig):
+    blank_weight: float = 0
+    blank_mode: str = "add"
+
+
+@register_model("wav2vec_ctc", dataclass=Wav2Vec2CtcConfig)
+class Wav2VecCtc(BaseFairseqModel):
+    def __init__(self, cfg: Wav2Vec2CtcConfig, w2v_encoder: BaseFairseqModel):
+        super().__init__()
+        self.cfg = cfg
+        self.w2v_encoder = w2v_encoder
+        self.blank_weight = cfg.blank_weight
+        self.blank_mode = cfg.blank_mode
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+        return state_dict
+
+    @classmethod
+    def build_model(cls, cfg: Wav2Vec2CtcConfig, task: FairseqTask):
+        """Build a new model instance."""
+        w2v_encoder = Wav2VecEncoder(cfg, len(task.target_dictionary))
+        return cls(cfg, w2v_encoder)
+
+    def get_logits(self, net_output, normalize=False):
+        logits = net_output["encoder_out"]
+        if self.blank_weight != 0:
+            if self.blank_mode == "add":
+                logits[..., 0] += self.blank_weight
+            elif self.blank_mode == "set":
+                logits[..., 0] = self.blank_weight
+            else:
+                raise Exception(f"invalid blank mode {self.blank_mode}")
+
+        if net_output["padding_mask"] is not None and net_output["padding_mask"].any():
+            logits[net_output["padding_mask"].T][..., 0] = float("inf")
+            logits[net_output["padding_mask"].T][..., 1:] = float("-inf")
+
+        if normalize:
+            logits = utils.log_softmax(logits.float(), dim=-1)
+
+        return logits
+
+    def get_normalized_probs(self, net_output, log_probs):
+        """Get normalized probabilities (or log probs) from a net's output."""
+
+        logits = self.get_logits(net_output)
+
+        if log_probs:
+            return utils.log_softmax(logits.float(), dim=-1)
+        else:
+            return utils.softmax(logits.float(), dim=-1)
+
+    def forward(self, **kwargs):
+        x = self.w2v_encoder(**kwargs)
+        return x
+
+
+@dataclass
+class Wav2Vec2Seq2SeqConfig(Wav2Vec2AsrConfig):
+    decoder_embed_dim: int = field(
+        default=768, metadata={"help": "decoder embedding dimension"}
+    )
+    decoder_ffn_embed_dim: int = field(
+        default=3072, metadata={"help": "decoder embedding dimension for FFN"}
+    )
+    decoder_layers: int = field(default=6, metadata={"help": "num of decoder layers"})
+    decoder_layerdrop: float = field(
+        default=0.0, metadata={"help": "decoder layerdrop chance"}
+    )
+    decoder_attention_heads: int = field(
+        default=4, metadata={"help": "num decoder attention heads"}
+    )
+    decoder_learned_pos: bool = field(
+        default=False,
+        metadata={"help": "use learned positional embeddings in the decoder"},
+    )
+    decoder_normalize_before: bool = field(
+        default=False, metadata={"help": "apply layernorm before each decoder block"}
+    )
+    no_token_positional_embeddings: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, disables positional embeddings (outside self attention)"
+        },
+    )
+    decoder_dropout: float = field(
+        default=0.0, metadata={"help": "dropout probability in the decoder"}
+    )
+    decoder_attention_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability for attention weights inside the decoder"
+        },
+    )
+    decoder_activation_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "dropout probability after activation in FFN inside the decoder"
+        },
+    )
+    max_target_positions: int = field(
+        default=2048, metadata={"help": "max target positions"}
+    )
+    share_decoder_input_output_embed: bool = field(
+        default=False, metadata={"help": "share decoder input and output embeddings"}
+    )
+    autoregressive: bool = II("task.autoregressive")
+
+
+@register_model("wav2vec_seq2seq", dataclass=Wav2Vec2Seq2SeqConfig)
+class Wav2Vec2Seq2SeqModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @classmethod
+    def build_model(cls, cfg: Wav2Vec2Seq2SeqConfig, task: FairseqTask):
+        """Build a new model instance."""
+
+        assert (
+            cfg.autoregressive
+        ), "Please set task.autoregressive=true for seq2seq asr models"
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+
+        def build_embedding(dictionary, embed_dim):
+            num_embeddings = len(dictionary)
+            padding_idx = dictionary.pad()
+            emb = Embedding(num_embeddings, embed_dim, padding_idx)
+            return emb
+
+        decoder_embed_tokens = build_embedding(tgt_dict, cfg.decoder_embed_dim)
+
+        encoder = cls.build_encoder(cfg)
+        decoder = cls.build_decoder(cfg, tgt_dict, decoder_embed_tokens)
+
+        return Wav2Vec2Seq2SeqModel(encoder, decoder)
+
+    @classmethod
+    def build_encoder(cls, cfg: Wav2Vec2AsrConfig):
+        return Wav2VecEncoder(cfg)
+
+    @classmethod
+    def build_decoder(cls, cfg: Wav2Vec2Seq2SeqConfig, tgt_dict, embed_tokens):
+        return TransformerDecoder(cfg, tgt_dict, embed_tokens)
+
+    def forward(self, **kwargs):
+        encoder_out = self.encoder(**kwargs)
+        decoder_out = self.decoder(encoder_out=encoder_out, **kwargs)
+        return decoder_out
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        super().upgrade_state_dict_named(state_dict, name)
+        return state_dict
+
+
+class Wav2VecEncoder(FairseqEncoder):
+    def __init__(self, cfg: Wav2Vec2AsrConfig, output_size=None):
+        self.apply_mask = cfg.apply_mask
+
+        arg_overrides = {
+            "dropout": cfg.dropout,
+            "activation_dropout": cfg.activation_dropout,
+            "dropout_input": cfg.dropout_input,
+            "attention_dropout": cfg.attention_dropout,
+            "mask_length": cfg.mask_length,
+            "mask_prob": cfg.mask_prob,
+            "mask_selection": cfg.mask_selection,
+            "mask_other": cfg.mask_other,
+            "no_mask_overlap": cfg.no_mask_overlap,
+            "mask_channel_length": cfg.mask_channel_length,
+            "mask_channel_prob": cfg.mask_channel_prob,
+            "mask_channel_before": cfg.mask_channel_before,
+            "mask_channel_selection": cfg.mask_channel_selection,
+            "mask_channel_other": cfg.mask_channel_other,
+            "no_mask_channel_overlap": cfg.no_mask_channel_overlap,
+            "encoder_layerdrop": cfg.layerdrop,
+            "feature_grad_mult": cfg.feature_grad_mult,
+        }
+
+        if cfg.w2v_args is None:
+            state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides)
+            w2v_args = state.get("cfg", None)
+            if w2v_args is None:
+                w2v_args = convert_namespace_to_omegaconf(state["args"])
+            w2v_args.criterion = None
+            w2v_args.lr_scheduler = None
+            cfg.w2v_args = w2v_args
+        else:
+            state = None
+            w2v_args = cfg.w2v_args
+            if isinstance(w2v_args, Namespace):
+                cfg.w2v_args = w2v_args = convert_namespace_to_omegaconf(w2v_args)
+
+        assert cfg.normalize == w2v_args.task.normalize, (
+            "Fine-tuning works best when data normalization is the same. "
+            "Please check that --normalize is set or unset for both pre-training and here"
+        )
+
+        w2v_args.task.data = cfg.data
+        task = tasks.setup_task(w2v_args.task)
+        model = task.build_model(w2v_args.model)
+
+        if state is not None and not cfg.no_pretrained_weights:
+            model.load_state_dict(state["model"], strict=True)
+
+        model.remove_pretraining_modules()
+
+        super().__init__(task.source_dictionary)
+
+        d = w2v_args.model.encoder_embed_dim
+
+        self.w2v_model = model
+
+        self.final_dropout = nn.Dropout(cfg.final_dropout)
+        self.freeze_finetune_updates = cfg.freeze_finetune_updates
+        self.num_updates = 0
+
+        targ_d = None
+        self.proj = None
+
+        if output_size is not None:
+            targ_d = output_size
+        elif getattr(cfg, "decoder_embed_dim", d) != d:
+            targ_d = cfg.decoder_embed_dim
+
+        if targ_d is not None:
+            self.proj = Linear(d, targ_d)
+
+    def set_num_updates(self, num_updates):
+        """Set the number of parameters updates."""
+        super().set_num_updates(num_updates)
+        self.num_updates = num_updates
+
+    def forward(self, source, padding_mask, **kwargs):
+
+        w2v_args = {
+            "source": source,
+            "padding_mask": padding_mask,
+            "mask": self.apply_mask and self.training,
+        }
+
+        ft = self.freeze_finetune_updates <= self.num_updates
+
+        with torch.no_grad() if not ft else contextlib.ExitStack():
+            res = self.w2v_model.extract_features(**w2v_args)
+
+            x = res["x"]
+            padding_mask = res["padding_mask"]
+
+            # B x T x C -> T x B x C
+            x = x.transpose(0, 1)
+
+        x = self.final_dropout(x)
+
+        if self.proj:
+            x = self.proj(x)
+
+        return {
+            "encoder_out": x,  # T x B x C
+            "padding_mask": padding_mask,  # B x T,
+            "layer_results": res["layer_results"],
+        }
+
+    def forward_torchscript(self, net_input):
+        if torch.jit.is_scripting():
+            return self.forward(net_input["source"], net_input["padding_mask"])
+        else:
+            return self.forward_non_torchscript(net_input)
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        if encoder_out["encoder_out"] is not None:
+            encoder_out["encoder_out"] = encoder_out["encoder_out"].index_select(
+                1, new_order
+            )
+        if encoder_out["padding_mask"] is not None:
+            encoder_out["padding_mask"] = encoder_out[
+                "padding_mask"
+            ].index_select(0, new_order)
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return None
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        return state_dict
+
+
+class TransformerDecoder(FairseqIncrementalDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self,
+        cfg: Wav2Vec2Seq2SeqConfig,
+        dictionary,
+        embed_tokens,
+        no_encoder_attn=False,
+    ):
+        super().__init__(dictionary)
+
+        self.dropout = cfg.decoder_dropout
+        self.share_input_output_embed = cfg.share_decoder_input_output_embed
+
+        input_embed_dim = embed_tokens.embedding_dim
+        embed_dim = cfg.decoder_embed_dim
+        self.output_embed_dim = cfg.decoder_embed_dim
+
+        self.layerdrop = cfg.decoder_layerdrop
+
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_target_positions = cfg.max_target_positions
+
+        self.embed_tokens = embed_tokens
+        self.embed_scale = math.sqrt(embed_dim)  # todo: try with input_embed_dim
+
+        self.project_in_dim = (
+            Linear(input_embed_dim, embed_dim, bias=False)
+            if embed_dim != input_embed_dim
+            else None
+        )
+
+        self.embed_positions = (
+            PositionalEmbedding(
+                cfg.max_target_positions,
+                embed_dim,
+                self.padding_idx,
+                learned=cfg.decoder_learned_pos,
+            )
+            if not cfg.no_token_positional_embeddings
+            else None
+        )
+
+        # TODO: update this when transformer gets converted to dataclass configs
+        transformer_cfg = copy.deepcopy(cfg)
+        with open_dict(transformer_cfg):
+            transformer_cfg.dropout = transformer_cfg.decoder_dropout
+            transformer_cfg.attention_dropout = (
+                transformer_cfg.decoder_attention_dropout
+            )
+            transformer_cfg.activation_dropout = (
+                transformer_cfg.decoder_activation_dropout
+            )
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                TransformerDecoderLayer(transformer_cfg, no_encoder_attn)
+                for _ in range(transformer_cfg.decoder_layers)
+            ]
+        )
+
+        if not self.share_input_output_embed:
+            self.embed_out = nn.Parameter(
+                torch.Tensor(len(dictionary), self.output_embed_dim)
+            )
+            nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5)
+
+        if transformer_cfg.decoder_normalize_before:
+            self.layer_norm = LayerNorm(embed_dim)
+        else:
+            self.layer_norm = None
+
+    def forward(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (Tensor, optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        prev_output_tokens = prev_output_tokens.long()
+        x, extra = self.extract_features(
+            prev_output_tokens, encoder_out, incremental_state
+        )
+        x = self.output_layer(x)
+        return x, extra
+
+    def extract_features(
+        self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused
+    ):
+        """
+        Similar to *forward* but only return features.
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+
+        # embed positions
+        positions = (
+            self.embed_positions(
+                prev_output_tokens, incremental_state=incremental_state
+            )
+            if self.embed_positions is not None
+            else None
+        )
+
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        # embed tokens and positions
+        x = self.embed_scale * self.embed_tokens(prev_output_tokens)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if positions is not None:
+            x += positions
+        x = F.dropout(x, p=self.dropout, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+        attn = None
+
+        inner_states = [x]
+
+        # decoder layers
+        self_attn_padding_mask = None
+        if prev_output_tokens.eq(self.padding_idx).any():
+            self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx)
+        for layer in self.layers:
+            dropout_probability = np.random.random()
+            if not self.training or (dropout_probability > self.layerdrop):
+                x, attn, _ = layer(
+                    x,
+                    encoder_out["encoder_out"] if encoder_out is not None else None,
+                    encoder_out["padding_mask"] if encoder_out is not None else None,
+                    incremental_state,
+                    self_attn_mask=self.buffered_future_mask(x)
+                    if incremental_state is None
+                    else None,
+                    self_attn_padding_mask=self_attn_padding_mask
+                )
+                inner_states.append(x)
+
+        if self.layer_norm:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        return x, {"attn": attn, "inner_states": inner_states}
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the vocabulary size."""
+        # project back to size of vocabulary
+        if self.share_input_output_embed:
+            return F.linear(features, self.embed_tokens.weight)
+        else:
+            return F.linear(features, self.embed_out)
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        if self.embed_positions is None:
+            return self.max_target_positions
+        return min(self.max_target_positions, self.embed_positions.max_positions)
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        if (
+            not hasattr(self, "_future_mask")
+            or self._future_mask is None
+            or self._future_mask.device != tensor.device
+            or self._future_mask.size(0) < dim
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
+            )
+        return self._future_mask[:dim, :dim]
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        return state_dict
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    nn.init.constant_(m.weight[padding_idx], 0)
+    return m
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
diff --git a/fairseq/fairseq/modules/__init__.py b/fairseq/fairseq/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7a030e2b5cbca30e6a4ca4f8a17a62a8cf197af
--- /dev/null
+++ b/fairseq/fairseq/modules/__init__.py
@@ -0,0 +1,82 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+from .adaptive_input import AdaptiveInput
+from .adaptive_softmax import AdaptiveSoftmax
+from .base_layer import BaseLayer
+from .beamable_mm import BeamableMM
+from .character_token_embedder import CharacterTokenEmbedder
+from .conv_tbc import ConvTBC
+from .cross_entropy import cross_entropy
+from .downsampled_multihead_attention import DownsampledMultiHeadAttention
+from .dynamic_convolution import DynamicConv, DynamicConv1dTBC
+from .dynamic_crf_layer import DynamicCRF
+from .fairseq_dropout import FairseqDropout
+from .fp32_group_norm import Fp32GroupNorm
+from .gelu import gelu, gelu_accurate
+from .grad_multiply import GradMultiply
+from .gumbel_vector_quantizer import GumbelVectorQuantizer
+from .kmeans_vector_quantizer import KmeansVectorQuantizer
+from .layer_drop import LayerDropModuleList
+from .layer_norm import Fp32LayerNorm, LayerNorm
+from .learned_positional_embedding import LearnedPositionalEmbedding
+from .lightweight_convolution import LightweightConv, LightweightConv1dTBC
+from .linearized_convolution import LinearizedConvolution
+from .location_attention import LocationAttention
+from .lstm_cell_with_zoneout import LSTMCellWithZoneOut
+from .multihead_attention import MultiheadAttention
+from .positional_embedding import PositionalEmbedding
+from .same_pad import SamePad
+from .scalar_bias import ScalarBias
+from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding
+from .transformer_sentence_encoder_layer import TransformerSentenceEncoderLayer
+from .transformer_sentence_encoder import TransformerSentenceEncoder
+from .transpose_last import TransposeLast
+from .unfold import unfold1d
+from .transformer_layer import TransformerDecoderLayer, TransformerEncoderLayer
+from .vggblock import VGGBlock
+
+__all__ = [
+    "AdaptiveInput",
+    "AdaptiveSoftmax",
+    "BaseLayer",
+    "BeamableMM",
+    "CharacterTokenEmbedder",
+    "ConvTBC",
+    "cross_entropy",
+    "DownsampledMultiHeadAttention",
+    "DynamicConv1dTBC",
+    "DynamicConv",
+    "DynamicCRF",
+    "FairseqDropout",
+    "Fp32GroupNorm",
+    "Fp32LayerNorm",
+    "gelu",
+    "gelu_accurate",
+    "GradMultiply",
+    "GumbelVectorQuantizer",
+    "KmeansVectorQuantizer",
+    "LayerDropModuleList",
+    "LayerNorm",
+    "LearnedPositionalEmbedding",
+    "LightweightConv1dTBC",
+    "LightweightConv",
+    "LinearizedConvolution",
+    "LocationAttention",
+    "LSTMCellWithZoneOut",
+    "MultiheadAttention",
+    "PositionalEmbedding",
+    "SamePad",
+    "ScalarBias",
+    "SinusoidalPositionalEmbedding",
+    "TransformerSentenceEncoderLayer",
+    "TransformerSentenceEncoder",
+    "TransformerDecoderLayer",
+    "TransformerEncoderLayer",
+    "TransposeLast",
+    "VGGBlock",
+    "unfold1d",
+]
diff --git a/fairseq/fairseq/modules/adaptive_input.py b/fairseq/fairseq/modules/adaptive_input.py
new file mode 100644
index 0000000000000000000000000000000000000000..446534a9f8b87337a4dd752944ea386ff7cf7965
--- /dev/null
+++ b/fairseq/fairseq/modules/adaptive_input.py
@@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import List
+
+import torch
+from fairseq.modules.quant_noise import quant_noise
+from torch import nn
+
+
+class AdaptiveInput(nn.Module):
+    def __init__(
+        self,
+        vocab_size: int,
+        padding_idx: int,
+        initial_dim: int,
+        factor: float,
+        output_dim: int,
+        cutoff: List[int],
+        q_noise: float = 0,
+        qn_block_size: int = 8,
+    ):
+        super().__init__()
+
+        if vocab_size > cutoff[-1]:
+            cutoff = cutoff + [vocab_size]
+        else:
+            assert (
+                vocab_size == cutoff[-1]
+            ), "cannot specify cutoff larger than vocab size"
+
+        self.cutoff = cutoff
+        self.embedding_dim = output_dim
+        self.padding_idx = padding_idx
+
+        self.embeddings = nn.ModuleList()
+        for i in range(len(self.cutoff)):
+            prev = self.cutoff[i - 1] if i > 0 else 0
+            size = self.cutoff[i] - prev
+            dim = int(initial_dim // (factor ** i))
+            seq = nn.Sequential(
+                nn.Embedding(size, dim, self.padding_idx),
+                quant_noise(
+                    nn.Linear(dim, output_dim, bias=False), q_noise, qn_block_size
+                ),
+            )
+
+            self.embeddings.append(seq)
+            self.padding_idx = None
+        self.padding_idx = padding_idx
+
+        def init_weights(m):
+            if isinstance(m, nn.Embedding):
+                nn.init.normal_(m.weight, mean=0, std=m.weight.shape[1] ** -0.5)
+                nn.init.constant_(m.weight[padding_idx], 0)
+            elif hasattr(m, "weight"):
+                nn.init.xavier_uniform_(m.weight)
+
+        self.apply(init_weights)
+
+        self.register_buffer("_float_tensor", torch.FloatTensor(1))
+
+    def weights_for_band(self, band: int):
+        return self.embeddings[band][0].weight, self.embeddings[band][1].weight
+
+    def forward(self, input: torch.Tensor):
+        result = self._float_tensor.new(input.shape + (self.embedding_dim,))
+        for i in range(len(self.cutoff)):
+            mask = input.lt(self.cutoff[i])
+            if i > 0:
+                mask.mul_(input.ge(self.cutoff[i - 1]))
+                chunk_input = input[mask] - self.cutoff[i - 1]
+            else:
+                chunk_input = input[mask]
+            if mask.any():
+                result[mask] = self.embeddings[i](chunk_input)
+        return result
diff --git a/fairseq/fairseq/modules/adaptive_softmax.py b/fairseq/fairseq/modules/adaptive_softmax.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae0c77ba0f6ee98501306d66cbc4a948b4ade0f7
--- /dev/null
+++ b/fairseq/fairseq/modules/adaptive_softmax.py
@@ -0,0 +1,268 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import functools
+import operator
+
+import torch
+import torch.nn.functional as F
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.quant_noise import quant_noise
+from torch import nn
+
+
+class TiedLinear(nn.Module):
+    def __init__(self, weight, transpose):
+        super().__init__()
+        self.weight = weight
+        self.transpose = transpose
+
+    def forward(self, input):
+        return F.linear(input, self.weight.t() if self.transpose else self.weight)
+
+
+class TiedHeadModule(nn.Module):
+    def __init__(self, weights, input_dim, num_classes, q_noise, qn_block_size):
+        super().__init__()
+        tied_emb, _ = weights
+        self.num_words, emb_dim = tied_emb.size()
+
+        self.word_proj = quant_noise(
+            TiedLinear(tied_emb, transpose=False), q_noise, qn_block_size
+        )
+        if input_dim != emb_dim:
+            self.word_proj = nn.Sequential(
+                quant_noise(
+                    nn.Linear(input_dim, emb_dim, bias=False), q_noise, qn_block_size
+                ),
+                self.word_proj,
+            )
+
+        self.class_proj = quant_noise(
+            nn.Linear(input_dim, num_classes, bias=False), q_noise, qn_block_size
+        )
+        self.out_dim = self.num_words + num_classes
+
+        self.register_buffer("_float_tensor", torch.FloatTensor(1))
+
+    def forward(self, input):
+        inp_sz = functools.reduce(operator.mul, input.shape[:-1], 1)
+        out = self._float_tensor.new(inp_sz, self.out_dim)
+        out[:, : self.num_words] = self.word_proj(input.view(inp_sz, -1))
+        out[:, self.num_words :] = self.class_proj(input.view(inp_sz, -1))
+        return out
+
+
+class AdaptiveSoftmax(nn.Module):
+    """
+    This is an implementation of the efficient softmax approximation for
+    graphical processing units (GPU), described in the paper "Efficient softmax
+    approximation for GPUs" (http://arxiv.org/abs/1609.04309).
+    """
+
+    def __init__(
+        self,
+        vocab_size,
+        input_dim,
+        cutoff,
+        dropout,
+        factor=4.0,
+        adaptive_inputs=None,
+        tie_proj=False,
+        q_noise=0,
+        qn_block_size=8,
+    ):
+        super().__init__()
+
+        if vocab_size > cutoff[-1]:
+            cutoff = cutoff + [vocab_size]
+        else:
+            assert (
+                vocab_size == cutoff[-1]
+            ), "cannot specify cutoff larger than vocab size"
+
+        output_dim = cutoff[0] + len(cutoff) - 1
+
+        self.vocab_size = vocab_size
+        self.cutoff = cutoff
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.input_dim = input_dim
+        self.factor = factor
+        self.q_noise = q_noise
+        self.qn_block_size = qn_block_size
+
+        self.lsm = nn.LogSoftmax(dim=1)
+
+        if adaptive_inputs is not None:
+            self.head = TiedHeadModule(
+                adaptive_inputs.weights_for_band(0),
+                input_dim,
+                len(cutoff) - 1,
+                self.q_noise,
+                self.qn_block_size,
+            )
+        else:
+            self.head = quant_noise(
+                nn.Linear(input_dim, output_dim, bias=False),
+                self.q_noise,
+                self.qn_block_size,
+            )
+
+        self._make_tail(adaptive_inputs, tie_proj)
+
+        def init_weights(m):
+            if (
+                hasattr(m, "weight")
+                and not isinstance(m, TiedLinear)
+                and not isinstance(m, TiedHeadModule)
+            ):
+                nn.init.xavier_uniform_(m.weight)
+
+        self.apply(init_weights)
+
+        self.register_buffer("version", torch.LongTensor([1]))
+
+    def _make_tail(self, adaptive_inputs=None, tie_proj=False):
+        self.tail = nn.ModuleList()
+        for i in range(len(self.cutoff) - 1):
+            dim = int(self.input_dim // self.factor ** (i + 1))
+
+            tied_emb, tied_proj = (
+                adaptive_inputs.weights_for_band(i + 1)
+                if adaptive_inputs is not None
+                else (None, None)
+            )
+
+            if tied_proj is not None:
+                if tie_proj:
+                    proj = quant_noise(
+                        TiedLinear(tied_proj, transpose=True),
+                        self.q_noise,
+                        self.qn_block_size,
+                    )
+                else:
+                    proj = quant_noise(
+                        nn.Linear(tied_proj.size(0), tied_proj.size(1), bias=False),
+                        self.q_noise,
+                        self.qn_block_size,
+                    )
+            else:
+                proj = quant_noise(
+                    nn.Linear(self.input_dim, dim, bias=False),
+                    self.q_noise,
+                    self.qn_block_size,
+                )
+
+            if tied_emb is None:
+                out_proj = nn.Linear(
+                    dim, self.cutoff[i + 1] - self.cutoff[i], bias=False
+                )
+            else:
+                out_proj = TiedLinear(tied_emb, transpose=False)
+
+            m = nn.Sequential(
+                proj,
+                nn.Dropout(self.dropout_module.p),
+                quant_noise(out_proj, self.q_noise, self.qn_block_size),
+            )
+
+            self.tail.append(m)
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        version_name = name + ".version"
+        if version_name not in state_dict:
+            raise Exception("This version of the model is no longer supported")
+
+    def adapt_target(self, target):
+        """
+        In order to be efficient, the AdaptiveSoftMax does not compute the
+        scores for all the word of the vocabulary for all the examples. It is
+        thus necessary to call the method adapt_target of the AdaptiveSoftMax
+        layer inside each forward pass.
+        """
+
+        target = target.view(-1)
+        new_target = [target.clone()]
+        target_idxs = []
+
+        for i in range(len(self.cutoff) - 1):
+            mask = target.ge(self.cutoff[i]).mul(target.lt(self.cutoff[i + 1]))
+            new_target[0][mask] = self.cutoff[0] + i
+
+            if mask.any():
+                target_idxs.append(mask.nonzero(as_tuple=False).squeeze(1))
+                new_target.append(target[mask].add(-self.cutoff[i]))
+            else:
+                target_idxs.append(None)
+                new_target.append(None)
+
+        return new_target, target_idxs
+
+    def forward(self, input, target):
+        """
+        Args:
+            input: (b x t x d)
+            target: (b x t)
+        Returns:
+            2 lists: output for each cutoff section and new targets by cut off
+        """
+
+        input = input.contiguous().view(-1, input.size(-1))
+        input = self.dropout_module(input)
+
+        new_target, target_idxs = self.adapt_target(target)
+        output = [self.head(input)]
+
+        for i in range(len(target_idxs)):
+            if target_idxs[i] is not None:
+                output.append(self.tail[i](input.index_select(0, target_idxs[i])))
+            else:
+                output.append(None)
+
+        return output, new_target
+
+    def get_log_prob(self, input, target):
+        """
+        Computes the log probabilities for all the words of the vocabulary,
+        given a 2D tensor of hidden vectors.
+        """
+
+        bsz, length, dim = input.size()
+        input = input.contiguous().view(-1, dim)
+
+        if target is not None:
+            _, target_idxs = self.adapt_target(target)
+        else:
+            target_idxs = None
+
+        head_y = self.head(input)
+        log_probs = head_y.new_zeros(input.size(0), self.vocab_size)
+
+        head_sz = self.cutoff[0] + len(self.tail)
+        log_probs[:, :head_sz] = self.lsm(head_y)
+        tail_priors = log_probs[:, self.cutoff[0] : head_sz].clone()
+
+        for i in range(len(self.tail)):
+            start = self.cutoff[i]
+            end = self.cutoff[i + 1]
+
+            if target_idxs is None:
+                tail_out = log_probs[:, start:end]
+                tail_out.copy_(self.tail[i](input))
+                log_probs[:, start:end] = self.lsm(tail_out).add_(
+                    tail_priors[:, i, None]
+                )
+            elif target_idxs[i] is not None:
+                idxs = target_idxs[i]
+                tail_out = log_probs[idxs, start:end]
+                tail_out.copy_(self.tail[i](input[idxs]))
+                log_probs[idxs, start:end] = self.lsm(tail_out).add_(
+                    tail_priors[idxs, i, None]
+                )
+
+        log_probs = log_probs.view(bsz, length, -1)
+        return log_probs
diff --git a/fairseq/fairseq/modules/base_layer.py b/fairseq/fairseq/modules/base_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7ef155b25fc73e74780879f665288c9bc95fd80
--- /dev/null
+++ b/fairseq/fairseq/modules/base_layer.py
@@ -0,0 +1,135 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+import torch
+import sys
+from fairseq import utils
+from fairseq.distributed import utils as distributed_utils
+from fairseq.modules.layer_norm import LayerNorm
+
+
+class BaseLayer(nn.Module):
+
+    def __init__(self, args):
+        super().__init__()
+        self.num_workers = distributed_utils.get_data_parallel_world_size()
+        expert_centroids = torch.empty(self.num_workers, args.decoder_embed_dim)
+        torch.nn.init.orthogonal_(expert_centroids, gain=0.1)
+        self.register_parameter("expert_centroids", torch.nn.Parameter(expert_centroids))
+        self.expert_network = nn.Sequential(*([BaseSublayer(args) for _ in range(args.base_sublayers)]))
+        self.expert_id = distributed_utils.get_data_parallel_rank()
+        self.shuffle = args.base_shuffle
+        self.cpp = self.load_assignment()
+
+        # Add a special attribute to the expert parameters, so we know not to sync their gradients
+        for param in self.expert_network.parameters():
+            param.expert = True
+
+    def forward(self, input_features, *args, **kwargs):
+        features = input_features.reshape(-1, input_features.size(-1))
+        is_training = input_features.requires_grad
+
+        if self.shuffle and is_training:
+            # Send each token to a random worker, to break correlations within the batch
+            shuffle_sort = torch.randperm(features.size(0), device=features.device)
+            features = All2All.apply(features[shuffle_sort])
+
+        with torch.no_grad():
+            # Compute similarity of each token to each expert, for routing
+            token_expert_affinities = features.matmul(self.expert_centroids.transpose(0, 1))
+
+        # Compute which token goes to which expert
+        sort_by_expert, input_splits, output_splits = self.balanced_assignment(token_expert_affinities) \
+            if is_training else self.greedy_assignment(token_expert_affinities)
+        # Swap these tokens for the right ones for our expert
+        routed_features = All2All.apply(features[sort_by_expert], output_splits, input_splits)
+
+        if routed_features.size(0) > 0:
+            # Mix in the expert network based on how appropriate it is for these tokens
+            alpha = torch.sigmoid(routed_features.mv(self.expert_centroids[self.expert_id])).unsqueeze(1)
+            routed_features = alpha * self.expert_network(routed_features) + (1 - alpha) * routed_features
+        # Return to original worker and ordering
+        result = All2All.apply(routed_features, input_splits, output_splits)[self.inverse_sort(sort_by_expert)]
+
+        if self.shuffle and is_training:
+            # Undo shuffling
+            result = All2All.apply(result)[self.inverse_sort(shuffle_sort)]
+
+        # Return additional Nones for compatibility with TransformerDecoderLayer
+        return result.view(input_features.size()), None, None
+
+    def inverse_sort(self, order):
+        # Creates an index that undoes a sort: xs==xs[order][inverse_sort(order)]
+        return torch.empty_like(order).scatter_(0, order, torch.arange(0, order.size(0), device=order.device))
+
+    def balanced_assignment(self, scores):
+        ok = scores.isfinite()
+        if not ok.all():
+            # NaNs here can break the assignment algorithm
+            scores[~ok] = scores[ok].min()
+        return self.cpp.balanced_assignment(scores), None, None
+
+    # Assigns each token to the top k experts
+    def greedy_assignment(self, scores, k=1):
+        token_to_workers = torch.topk(scores, dim=1, k=k, largest=True).indices.view(-1)
+        token_to_workers, sort_ordering = torch.sort(token_to_workers)
+        worker2token = sort_ordering // k
+
+        # Find how many tokens we're sending to each other worker (being careful for sending 0 tokens to some workers)
+        output_splits = torch.zeros((self.num_workers,), dtype=torch.long, device=scores.device)
+        workers, counts = torch.unique_consecutive(token_to_workers, return_counts=True)
+        output_splits[workers] = counts
+        # Tell other workers how many tokens to expect from us
+        input_splits = All2All.apply(output_splits)
+        return worker2token, input_splits.tolist(), output_splits.tolist()
+
+    def load_assignment(self):
+        try:
+            from fairseq import libbase
+
+            return libbase
+
+        except ImportError as e:
+            sys.stderr.write(
+                "ERROR: missing libbase. run `python setup.py build_ext --inplace`\n"
+            )
+            raise e
+
+
+class BaseSublayer(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.activation_fn = utils.get_activation_fn(
+            activation=getattr(args, 'activation_fn', 'relu') or "relu"
+        )
+        self.norm = LayerNorm(args.decoder_embed_dim, export=False)
+        self.ff1 = torch.nn.Linear(args.decoder_embed_dim, args.decoder_ffn_embed_dim)
+        self.ff2 = torch.nn.Linear(args.decoder_ffn_embed_dim, args.decoder_embed_dim)
+        self.ff2.weight.data.zero_()
+
+    def forward(self, xs):
+        return xs + self.ff2(self.activation_fn(self.ff1(self.norm(xs))))
+
+
+# Wraps torch.distributed.all_to_all_single as a function that supports autograd
+class All2All(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, xs, input_splits=None, output_splits=None):
+        ctx.input_splits = input_splits
+        ctx.output_splits = output_splits
+
+        ys = torch.empty_like(xs) if output_splits is None else \
+            xs.new_empty(size=[sum(output_splits)] + list(xs.size()[1:]))
+        torch.distributed.all_to_all_single(ys, xs, output_split_sizes=output_splits, input_split_sizes=input_splits)
+        return ys
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        result = torch.empty_like(grad_output) if ctx.input_splits is None else \
+            grad_output.new_empty(size=[sum(ctx.input_splits)] + list(grad_output.size()[1:]))
+        torch.distributed.all_to_all_single(result, grad_output,
+                                            output_split_sizes=ctx.input_splits, input_split_sizes=ctx.output_splits)
+        return result, None, None
diff --git a/fairseq/fairseq/modules/beamable_mm.py b/fairseq/fairseq/modules/beamable_mm.py
new file mode 100644
index 0000000000000000000000000000000000000000..eff1a4607f600c71210e6b914985dc48731aae86
--- /dev/null
+++ b/fairseq/fairseq/modules/beamable_mm.py
@@ -0,0 +1,49 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+
+
+class BeamableMM(nn.Module):
+    """This module provides an optimized MM for beam decoding with attention.
+
+    It leverage the fact that the source-side of the input is replicated beam
+    times and the target-side of the input is of width one. This layer speeds up
+    inference by replacing the inputs {(bsz x 1 x nhu), (bsz x sz2 x nhu)}
+    with smaller inputs {(bsz/beam x beam x nhu), (bsz/beam x sz2 x nhu)}.
+    """
+
+    def __init__(self, beam_size=None):
+        super(BeamableMM, self).__init__()
+        self.beam_size = beam_size
+
+    def forward(self, input1, input2):
+        if (
+            not self.training
+            and self.beam_size is not None  # test mode
+            and input1.dim() == 3  # beam size is set
+            and input1.size(1)  # only support batched input
+            == 1  # single time step update
+        ):
+            bsz, beam = input1.size(0), self.beam_size
+
+            # bsz x 1 x nhu --> bsz/beam x beam x nhu
+            input1 = input1[:, 0, :].unfold(0, beam, beam).transpose(2, 1)
+
+            # bsz x sz2 x nhu --> bsz/beam x sz2 x nhu
+            input2 = input2.unfold(0, beam, beam)[:, :, :, 0]
+
+            # use non batched operation if bsz = beam
+            if input1.size(0) == 1:
+                output = torch.mm(input1[0, :, :], input2[0, :, :])
+            else:
+                output = input1.bmm(input2)
+            return output.view(bsz, 1, -1)
+        else:
+            return input1.bmm(input2)
+
+    def set_beam_size(self, beam_size):
+        self.beam_size = beam_size
diff --git a/fairseq/fairseq/modules/character_token_embedder.py b/fairseq/fairseq/modules/character_token_embedder.py
new file mode 100644
index 0000000000000000000000000000000000000000..181221b61b9f76453b67e3b848b198620dce912c
--- /dev/null
+++ b/fairseq/fairseq/modules/character_token_embedder.py
@@ -0,0 +1,214 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import List, Tuple
+
+import torch
+import torch.nn.functional as F
+from fairseq.data import Dictionary
+from torch import nn
+
+
+CHAR_PAD_IDX = 0
+CHAR_EOS_IDX = 257
+
+
+logger = logging.getLogger(__name__)
+
+
+class CharacterTokenEmbedder(torch.nn.Module):
+    def __init__(
+        self,
+        vocab: Dictionary,
+        filters: List[Tuple[int, int]],
+        char_embed_dim: int,
+        word_embed_dim: int,
+        highway_layers: int,
+        max_char_len: int = 50,
+        char_inputs: bool = False,
+    ):
+        super(CharacterTokenEmbedder, self).__init__()
+
+        self.onnx_trace = False
+        self.embedding_dim = word_embed_dim
+        self.max_char_len = max_char_len
+        self.char_embeddings = nn.Embedding(257, char_embed_dim, padding_idx=0)
+        self.symbol_embeddings = nn.Parameter(torch.FloatTensor(2, word_embed_dim))
+        self.eos_idx, self.unk_idx = 0, 1
+        self.char_inputs = char_inputs
+
+        self.convolutions = nn.ModuleList()
+        for width, out_c in filters:
+            self.convolutions.append(
+                nn.Conv1d(char_embed_dim, out_c, kernel_size=width)
+            )
+
+        last_dim = sum(f[1] for f in filters)
+
+        self.highway = Highway(last_dim, highway_layers) if highway_layers > 0 else None
+
+        self.projection = nn.Linear(last_dim, word_embed_dim)
+
+        assert (
+            vocab is not None or char_inputs
+        ), "vocab must be set if not using char inputs"
+        self.vocab = None
+        if vocab is not None:
+            self.set_vocab(vocab, max_char_len)
+
+        self.reset_parameters()
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def set_vocab(self, vocab, max_char_len):
+        word_to_char = torch.LongTensor(len(vocab), max_char_len)
+
+        truncated = 0
+        for i in range(len(vocab)):
+            if i < vocab.nspecial:
+                char_idxs = [0] * max_char_len
+            else:
+                chars = vocab[i].encode()
+                # +1 for padding
+                char_idxs = [c + 1 for c in chars] + [0] * (max_char_len - len(chars))
+            if len(char_idxs) > max_char_len:
+                truncated += 1
+                char_idxs = char_idxs[:max_char_len]
+            word_to_char[i] = torch.LongTensor(char_idxs)
+
+        if truncated > 0:
+            logger.info(
+                "truncated {} words longer than {} characters".format(
+                    truncated, max_char_len
+                )
+            )
+
+        self.vocab = vocab
+        self.word_to_char = word_to_char
+
+    @property
+    def padding_idx(self):
+        return Dictionary().pad() if self.vocab is None else self.vocab.pad()
+
+    def reset_parameters(self):
+        nn.init.xavier_normal_(self.char_embeddings.weight)
+        nn.init.xavier_normal_(self.symbol_embeddings)
+        nn.init.xavier_uniform_(self.projection.weight)
+
+        nn.init.constant_(
+            self.char_embeddings.weight[self.char_embeddings.padding_idx], 0.0
+        )
+        nn.init.constant_(self.projection.bias, 0.0)
+
+    def forward(
+        self,
+        input: torch.Tensor,
+    ):
+        if self.char_inputs:
+            chars = input.view(-1, self.max_char_len)
+            pads = chars[:, 0].eq(CHAR_PAD_IDX)
+            eos = chars[:, 0].eq(CHAR_EOS_IDX)
+            if eos.any():
+                if self.onnx_trace:
+                    chars = torch.where(eos.unsqueeze(1), chars.new_zeros(1), chars)
+                else:
+                    chars[eos] = 0
+
+            unk = None
+        else:
+            flat_words = input.view(-1)
+            chars = self.word_to_char[flat_words.type_as(self.word_to_char)].type_as(
+                input
+            )
+            pads = flat_words.eq(self.vocab.pad())
+            eos = flat_words.eq(self.vocab.eos())
+            unk = flat_words.eq(self.vocab.unk())
+
+        word_embs = self._convolve(chars)
+        if self.onnx_trace:
+            if pads.any():
+                word_embs = torch.where(
+                    pads.unsqueeze(1), word_embs.new_zeros(1), word_embs
+                )
+            if eos.any():
+                word_embs = torch.where(
+                    eos.unsqueeze(1), self.symbol_embeddings[self.eos_idx], word_embs
+                )
+            if unk is not None and unk.any():
+                word_embs = torch.where(
+                    unk.unsqueeze(1), self.symbol_embeddings[self.unk_idx], word_embs
+                )
+        else:
+            if pads.any():
+                word_embs[pads] = 0
+            if eos.any():
+                word_embs[eos] = self.symbol_embeddings[self.eos_idx]
+            if unk is not None and unk.any():
+                word_embs[unk] = self.symbol_embeddings[self.unk_idx]
+
+        return word_embs.view(input.size()[:2] + (-1,))
+
+    def _convolve(
+        self,
+        char_idxs: torch.Tensor,
+    ):
+        char_embs = self.char_embeddings(char_idxs)
+        char_embs = char_embs.transpose(1, 2)  # BTC -> BCT
+
+        conv_result = []
+
+        for conv in self.convolutions:
+            x = conv(char_embs)
+            x, _ = torch.max(x, -1)
+            x = F.relu(x)
+            conv_result.append(x)
+
+        x = torch.cat(conv_result, dim=-1)
+
+        if self.highway is not None:
+            x = self.highway(x)
+        x = self.projection(x)
+
+        return x
+
+
+class Highway(torch.nn.Module):
+    """
+    A `Highway layer <https://arxiv.org/abs/1505.00387>`_.
+    Adopted from the AllenNLP implementation.
+    """
+
+    def __init__(self, input_dim: int, num_layers: int = 1):
+        super(Highway, self).__init__()
+        self.input_dim = input_dim
+        self.layers = nn.ModuleList(
+            [nn.Linear(input_dim, input_dim * 2) for _ in range(num_layers)]
+        )
+        self.activation = nn.ReLU()
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        for layer in self.layers:
+            # As per comment in AllenNLP:
+            # We should bias the highway layer to just carry its input forward.  We do that by
+            # setting the bias on `B(x)` to be positive, because that means `g` will be biased to
+            # be high, so we will carry the input forward.  The bias on `B(x)` is the second half
+            # of the bias vector in each Linear layer.
+            nn.init.constant_(layer.bias[self.input_dim :], 1)
+
+            nn.init.constant_(layer.bias[: self.input_dim], 0)
+            nn.init.xavier_normal_(layer.weight)
+
+    def forward(self, x: torch.Tensor):
+        for layer in self.layers:
+            projection = layer(x)
+            proj_x, gate = projection.chunk(2, dim=-1)
+            proj_x = self.activation(proj_x)
+            gate = torch.sigmoid(gate)
+            x = gate * x + (gate.new_tensor([1]) - gate) * proj_x
+        return x
diff --git a/fairseq/fairseq/modules/checkpoint_activations.py b/fairseq/fairseq/modules/checkpoint_activations.py
new file mode 100644
index 0000000000000000000000000000000000000000..7489e09eb79b595aef674914556018d7f0a4efbf
--- /dev/null
+++ b/fairseq/fairseq/modules/checkpoint_activations.py
@@ -0,0 +1,236 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import functools
+from typing import Any, Dict, List, Tuple, Union
+
+import torch
+import torch.utils.checkpoint as checkpoint
+from fairseq import utils
+
+
+def checkpoint_wrapper(m, offload_to_cpu=False):
+    """
+    A friendlier wrapper for performing activation checkpointing.
+
+    Compared to the PyTorch version, this version:
+    - wraps an nn.Module, so that all subsequent calls will use checkpointing
+    - handles keyword arguments in the forward
+    - handles non-Tensor outputs from the forward
+
+    Usage::
+
+        checkpointed_module = checkpoint_wrapper(my_module, offload_to_cpu=True)
+        a, b = checkpointed_module(x, y=3, z=torch.Tensor([1]))
+    """
+    # should I check whether original_forward has already been set?
+    assert not hasattr(
+        m, "precheckpoint_forward"
+    ), "checkpoint function has already been applied?"
+    m.precheckpoint_forward = m.forward
+    m.forward = functools.partial(
+        _checkpointed_forward,
+        m.precheckpoint_forward,  # original_forward
+        offload_to_cpu,
+    )
+    return m
+
+
+def unwrap_checkpoint(m: torch.nn.Module):
+    """
+    unwrap a module and its children from checkpoint_wrapper
+    """
+    for module in m.modules():
+        if hasattr(module, "precheckpoint_forward"):
+            module.forward = module.precheckpoint_forward
+            del module.precheckpoint_forward
+    return m
+
+
+def _checkpointed_forward(original_forward, offload_to_cpu, *args, **kwargs):
+    # Autograd Functions in PyTorch work best with positional args, since
+    # the backward must return gradients (or None) for every input argument.
+    # We can flatten keyword arguments to make this easier.
+    kwarg_keys, flat_args = pack_kwargs(*args, **kwargs)
+    parent_ctx_dict = {"offload": offload_to_cpu}
+    output = CheckpointFunction.apply(
+        original_forward, parent_ctx_dict, kwarg_keys, *flat_args
+    )
+    if isinstance(output, torch.Tensor):
+        return output
+    else:
+        packed_non_tensor_outputs = parent_ctx_dict["packed_non_tensor_outputs"]
+        if packed_non_tensor_outputs:
+            output = unpack_non_tensors(output, packed_non_tensor_outputs)
+        return output
+
+
+def pack_kwargs(*args, **kwargs) -> Tuple[List[str], List[Any]]:
+    """
+    Usage::
+
+        kwarg_keys, flat_args = pack_kwargs(1, 2, a=3, b=4)
+        args, kwargs = unpack_kwargs(kwarg_keys, flat_args)
+        assert args == [1, 2]
+        assert kwargs == {"a": 3, "b": 4}
+    """
+    kwarg_keys = []
+    flat_args = list(args)
+    for k, v in kwargs.items():
+        kwarg_keys.append(k)
+        flat_args.append(v)
+    return kwarg_keys, flat_args
+
+
+def unpack_kwargs(
+    kwarg_keys: List[str], flat_args: List[Any]
+) -> Tuple[List[Any], Dict[str, Any]]:
+    if len(kwarg_keys) == 0:
+        return flat_args, {}
+    args = flat_args[: -len(kwarg_keys)]
+    kwargs = {k: v for k, v in zip(kwarg_keys, flat_args[-len(kwarg_keys) :])}
+    return args, kwargs
+
+
+def split_non_tensors(
+    mixed: Union[torch.Tensor, Tuple[Any]]
+) -> Tuple[Tuple[torch.Tensor], Dict[str, List[Any]]]:
+    """
+    Usage::
+
+        x = torch.Tensor([1])
+        y = torch.Tensor([2])
+        tensors, packed_non_tensors = split_non_tensors((x, y, None, 3))
+        recon = unpack_non_tensors(tensors, packed_non_tensors)
+        assert recon == (x, y, None, 3)
+    """
+    if isinstance(mixed, torch.Tensor):
+        return (mixed,), None
+    tensors = []
+    packed_non_tensors = {"is_tensor": [], "objects": []}
+    for o in mixed:
+        if isinstance(o, torch.Tensor):
+            packed_non_tensors["is_tensor"].append(True)
+            tensors.append(o)
+        else:
+            packed_non_tensors["is_tensor"].append(False)
+            packed_non_tensors["objects"].append(o)
+    return tuple(tensors), packed_non_tensors
+
+
+def unpack_non_tensors(
+    tensors: Tuple[torch.Tensor],
+    packed_non_tensors: Dict[str, List[Any]],
+) -> Tuple[Any]:
+    if packed_non_tensors is None:
+        return tensors
+    assert isinstance(packed_non_tensors, dict)
+    mixed = []
+    is_tensor_list = packed_non_tensors["is_tensor"]
+    objects = packed_non_tensors["objects"]
+    assert len(tensors) + len(objects) == len(is_tensor_list)
+    obj_i = tnsr_i = 0
+    for is_tensor in is_tensor_list:
+        if is_tensor:
+            mixed.append(tensors[tnsr_i])
+            tnsr_i += 1
+        else:
+            mixed.append(objects[obj_i])
+            obj_i += 1
+    return tuple(mixed)
+
+
+class CheckpointFunction(torch.autograd.Function):
+    """Similar to the torch version, but support non-Tensor outputs.
+
+    The caller is expected to provide a dict (*parent_ctx_dict*) that will hold
+    the non-Tensor outputs. These should be combined with the Tensor *outputs*
+    by calling ``unpack_non_tensors``.
+    """
+
+    @staticmethod
+    def forward(ctx, run_function, parent_ctx_dict, kwarg_keys, *args):
+        if torch.is_grad_enabled():  # grad may be disabled, e.g., during validation
+            checkpoint.check_backward_validity(args)
+
+        ctx.run_function = run_function
+        ctx.kwarg_keys = kwarg_keys
+        ctx.fwd_rng_state = utils.get_rng_state()
+
+        tensor_inputs, packed_non_tensor_inputs = split_non_tensors(args)
+        if parent_ctx_dict["offload"]:
+            ctx.fwd_device = tuple(x.device for x in tensor_inputs)
+            ctx.grad_requirements = tuple(x.requires_grad for x in tensor_inputs)
+            tensor_inputs = tuple(x.to(torch.device("cpu"), non_blocking=True) for x in tensor_inputs)
+
+        else:
+            ctx.fwd_device, ctx.grad_requirements = None, None
+
+        ctx.save_for_backward(*tensor_inputs)
+        ctx.packed_non_tensor_inputs = packed_non_tensor_inputs
+
+        with torch.no_grad():
+            unpacked_args, unpacked_kwargs = unpack_kwargs(kwarg_keys, args)
+            outputs = run_function(*unpacked_args, **unpacked_kwargs)
+
+        if isinstance(outputs, torch.Tensor):
+            return outputs
+        else:
+            # Autograd Functions don't like non-Tensor outputs. We can split the
+            # non-Tensor and Tensor outputs, returning the former by reference
+            # through *parent_ctx_dict* and returning the latter directly.
+            outputs, packed_non_tensor_outputs = split_non_tensors(outputs)
+            parent_ctx_dict["packed_non_tensor_outputs"] = packed_non_tensor_outputs
+            return outputs
+
+    @staticmethod
+    def backward(ctx, *args):
+        if not torch.autograd._is_checkpoint_valid():
+            raise RuntimeError(
+                "Checkpointing is not compatible with .grad(), please use .backward() if possible"
+            )
+
+        tensor_inputs: Tuple = ctx.saved_tensors
+        tensor_inputs = checkpoint.detach_variable(tensor_inputs)
+        if ctx.fwd_device is not None:
+            tensor_inputs = [
+                t.to(ctx.fwd_device[i], non_blocking=True) for i, t in enumerate(tensor_inputs)
+            ]
+            for i, need_grad in enumerate(ctx.grad_requirements):
+                tensor_inputs[i].requires_grad = need_grad
+        inputs = unpack_non_tensors(tensor_inputs, ctx.packed_non_tensor_inputs)
+
+        # Store the current states.
+        bwd_rng_state = utils.get_rng_state()
+
+        # Set the states to what it used to be before the forward pass.
+        utils.set_rng_state(ctx.fwd_rng_state)
+
+        with torch.enable_grad():
+            unpacked_args, unpacked_kwargs = unpack_kwargs(ctx.kwarg_keys, inputs)
+            outputs = ctx.run_function(*unpacked_args, **unpacked_kwargs)
+            tensor_outputs, _ = split_non_tensors(outputs)
+        # Set the states back to what it was at the start of this function.
+        utils.set_rng_state(bwd_rng_state)
+
+        # Run backward() with only Tensors that require grad
+        outputs_with_grad = []
+        args_with_grad = []
+        for i in range(len(tensor_outputs)):
+            if tensor_outputs[i].requires_grad:
+                outputs_with_grad.append(tensor_outputs[i])
+                args_with_grad.append(args[i])
+        if len(outputs_with_grad) == 0:
+            raise RuntimeError(
+                "None of the outputs have requires_grad=True, "
+                "this checkpoint() is not necessary"
+            )
+
+        torch.autograd.backward(outputs_with_grad, args_with_grad)
+
+        grads = tuple(
+            inp.grad if isinstance(inp, torch.Tensor) else None for inp in inputs
+        )
+        return (None, None, None) + grads
diff --git a/fairseq/fairseq/modules/conv_tbc.py b/fairseq/fairseq/modules/conv_tbc.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e17ec94f7e595cb657b3d2daaa1052a95d0677
--- /dev/null
+++ b/fairseq/fairseq/modules/conv_tbc.py
@@ -0,0 +1,53 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from torch import nn
+from torch.nn.modules.utils import _single
+from torch import Tensor
+
+
+class ConvTBC(torch.nn.Module):
+    """1D convolution over an input of shape (time x batch x channel)
+
+    The implementation uses gemm to perform the convolution. This implementation
+    is faster than cuDNN for small kernel sizes.
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, padding=0):
+        super(ConvTBC, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _single(kernel_size)
+        self.padding = _single(padding)
+
+        self.weight = torch.nn.Parameter(
+            torch.Tensor(self.kernel_size[0], in_channels, out_channels)
+        )
+        self.bias = torch.nn.Parameter(torch.Tensor(out_channels))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        nn.init.xavier_normal_(self.weight)
+        nn.init.zeros_(self.bias)
+
+    def conv_tbc(self, input: Tensor):
+        return torch.conv_tbc(
+            input.contiguous(), self.weight, self.bias, self.padding[0]
+        )
+
+    def forward(self, input: Tensor):
+        return self.conv_tbc(input)
+
+    def __repr__(self):
+        s = (
+            "{name}({in_channels}, {out_channels}, kernel_size={kernel_size}"
+            ", padding={padding}"
+        )
+        if self.bias is None:
+            s += ", bias=False"
+        s += ")"
+        return s.format(name=self.__class__.__name__, **self.__dict__)
diff --git a/fairseq/fairseq/modules/cross_entropy.py b/fairseq/fairseq/modules/cross_entropy.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f33c24cb56e25f91595009af38e63784c2263a0
--- /dev/null
+++ b/fairseq/fairseq/modules/cross_entropy.py
@@ -0,0 +1,61 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+import torch.nn.functional as F
+
+
+logger = logging.getLogger(__name__)
+
+
+def _cross_entropy_pytorch(logits, target, ignore_index=None, reduction="mean"):
+    lprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32)
+    return F.nll_loss(
+        lprobs,
+        target,
+        ignore_index=ignore_index,
+        reduction=reduction,
+    )
+
+
+try:
+    import xentropy_cuda
+    from apex.contrib import xentropy
+
+    def cross_entropy(logits, target, ignore_index=-100, reduction="mean"):
+        if logits.device == torch.device("cpu"):
+            return _cross_entropy_pytorch(logits, target, ignore_index, reduction)
+        else:
+            if not getattr(cross_entropy, "_has_logged_once", False):
+                logger.info("using fused cross entropy")
+                cross_entropy._has_logged_once = True
+
+            half_to_float = logits.dtype == torch.half
+            losses = xentropy.SoftmaxCrossEntropyLoss.apply(
+                logits,
+                target,
+                0.0,
+                ignore_index,
+                half_to_float,
+            )
+            if reduction == "sum":
+                return losses.sum()
+            elif reduction == "mean":
+                if ignore_index >= 0:
+                    return losses.sum() / target.ne(ignore_index).sum()
+                else:
+                    return losses.mean()
+            elif reduction == "none":
+                return losses
+            else:
+                raise NotImplementedError
+
+
+except ImportError:
+
+    def cross_entropy(logits, target, ignore_index=-100, reduction="mean"):
+        return _cross_entropy_pytorch(logits, target, ignore_index, reduction)
diff --git a/fairseq/fairseq/modules/cuda_utils.cu b/fairseq/fairseq/modules/cuda_utils.cu
new file mode 100644
index 0000000000000000000000000000000000000000..924f852758ee654e462546274db4b5e7199a9c90
--- /dev/null
+++ b/fairseq/fairseq/modules/cuda_utils.cu
@@ -0,0 +1,202 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+template <typename U, typename V>
+constexpr __host__ __device__ auto divUp(U a, V b) -> decltype(a + b) {
+  return (a + b - 1) / b;
+}
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__inline__ __device__ void zeroSharedMem(scalar_t* data) {
+  /*
+    Given an array of length FS + SB, zero out the first padding_l and last
+    (FS - padding_l) values in the array
+  */
+
+  int tid = threadIdx.x;
+
+  if (FS < SB) {
+    // zero all if we have enough threads in a block to do all of them
+    if (tid < padding_l || tid > SB - FS + padding_l - 1) {
+      data[tid] = scalar_t(0.0);
+    }
+  } else {
+    // otherwise zero out one block at a time
+    const int numIterations = divUp<int, int>(FS, SB);
+    for (int i = 0; i < numIterations; i++) {
+      int offset = i * SB;
+      if (tid + offset < padding_l) {
+        data[tid + offset] = scalar_t(0.0);
+      } else if (tid + offset < FS) {
+        data[SB + tid + offset] = scalar_t(0.0);
+      }
+    }
+  }
+}
+
+template <typename scalar_t>
+__inline__ __device__ scalar_t warpReduce(scalar_t data) {
+  /*
+    Reduce an array within each warp. After processing all values in warp will
+    caontain the sum of all original values in that warp.
+
+    data - pointer to data to reduce
+  */
+  data += __shfl_xor_sync(SHFL_MASK, data, 16);
+  data += __shfl_xor_sync(SHFL_MASK, data, 8);
+  data += __shfl_xor_sync(SHFL_MASK, data, 4);
+  data += __shfl_xor_sync(SHFL_MASK, data, 2);
+  data += __shfl_xor_sync(SHFL_MASK, data, 1);
+  return data;
+}
+
+template <typename scalar_t>
+__inline__ __device__ scalar_t blockReduce(scalar_t data) {
+  /*
+     Reduce an entire array on the block level. After processing, the
+     first value in the array will contain the reduced sum.
+
+     data - pointer to data to reduce
+  */
+
+  static __shared__ scalar_t warpSum[32];
+  const int tid = threadIdx.x;
+  int wid = tid / 32;
+  int lane = tid % 32;
+
+  __syncthreads();
+
+  // reduce each warp then write to shared memory
+  scalar_t sum = warpReduce(data);
+  if (lane == 0) {
+    warpSum[wid] = sum;
+  }
+
+  __syncthreads();
+
+  scalar_t v;
+  // perform final sum of partial warp sums
+  if (tid < blockDim.x / 32) {
+    v = warpSum[lane];
+  } else {
+    v = scalar_t(0.0);
+  }
+
+  if (wid == 0) {
+    v = warpReduce(v);
+  }
+  __syncthreads();
+
+  return v;
+}
+
+void checkCudaStatus(cudaError_t status, int lineNumber = -1) {
+  if (status != cudaSuccess) {
+    std::cout << cudaGetErrorString(status) << " at line " << lineNumber
+              << std::endl;
+    std::cout << "Exiting" << std::endl;
+    exit(1);
+  }
+}
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__device__ void load_input_to_shared(
+    const scalar_t* input, // global memory
+    int inputOffset,
+    int sequenceLength,
+    int iteration,
+    int numIterations,
+    bool no_prev,
+    scalar_t* output /* shared memory */) {
+  /*
+    Load a block size of input into shared memory with
+    right and left overhang of total size FS. If previously
+    loaded memory, overlap will be shifted over to reduce
+    global memory access
+
+    input - pointer to start of channel sequence
+    inputOffset - how far in the sequence to start loading
+    sequenceLength - total length of sequence
+    iteration - which block of sequence we are loading
+    numIterations - total number of blocks to load
+    no_prev - whether to load the whole block if the previous block
+              wasn't loaded
+    output - shared memory to write input to
+  */
+
+  const int tid = threadIdx.x;
+
+  // Load the left "overhang" of input
+  if (iteration > 0) {
+    if (padding_l < SB) {
+      // load all at once
+      if (tid < padding_l) {
+        output[tid] =
+            (no_prev) ? input[inputOffset - padding_l + tid] : output[tid + SB];
+      }
+    } else {
+      // load in chunks of size SB
+      int numIterations = divUp<int, int>(padding_l, SB);
+      for (int i = 0; i < numIterations; i++) {
+        int offset = i * SB;
+        if ((tid + offset) < padding_l) {
+          output[tid + offset] = (no_prev)
+              ? input[inputOffset - padding_l + tid + offset]
+              : output[tid + offset + SB];
+        }
+      }
+    }
+  }
+
+  // Load the right "overhang" of input
+  if (iteration < (numIterations - 1)) {
+    const int elementsLeft = sequenceLength - (iteration + 1) * SB;
+
+    if ((FS - padding_l) < SB) {
+      // load all at once
+      if (tid < (FS - padding_l)) {
+        output[padding_l + SB + tid] = (tid < elementsLeft)
+            ? input[inputOffset + SB + tid]
+            : scalar_t(0.0);
+      }
+    } else {
+      // load in chunks of size SB
+      int numIterations = divUp<int, int>(FS - padding_l, SB);
+      for (int i = 0; i < numIterations; i++) {
+        int offset = i * SB;
+        if ((tid + offset) < (FS - padding_l)) {
+          output[padding_l + SB + tid + offset] =
+              ((tid + offset) < elementsLeft)
+              ? input[inputOffset + SB + tid + offset]
+              : scalar_t(0.0);
+        }
+      }
+    }
+  }
+
+  // We should also clear out the right "overhang"
+  if (iteration == (numIterations - 1)) {
+    if ((FS - padding_l) < SB) {
+      // clear out all at once
+      if (tid < (FS - padding_l)) {
+        output[padding_l + SB + tid] = scalar_t(0.0);
+      }
+    } else {
+      // clear in chunks of size SB
+      int numIterations = divUp<int, int>(FS - padding_l, SB);
+      for (int i = 0; i < numIterations; i++) {
+        int offset = i * SB;
+        if ((tid + offset) < (FS - padding_l)) {
+          output[padding_l + SB + tid + offset] = scalar_t(0.0);
+        }
+      }
+    }
+  }
+  output[tid + padding_l] = ((inputOffset + tid) < sequenceLength)
+      ? input[inputOffset + tid]
+      : scalar_t(0.0);
+}
diff --git a/fairseq/fairseq/modules/downsampled_multihead_attention.py b/fairseq/fairseq/modules/downsampled_multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cdece3f7fca2b830eb72999ce93f58667ed595b
--- /dev/null
+++ b/fairseq/fairseq/modules/downsampled_multihead_attention.py
@@ -0,0 +1,316 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.scalar_bias import scalar_bias
+
+
+class SingleHeadAttention(nn.Module):
+    """
+    Single-head attention that supports Gating and Downsampling
+    """
+
+    def __init__(
+        self,
+        out_channels,
+        embed_dim,
+        head_dim,
+        head_index,
+        dropout=0.0,
+        bias=True,
+        project_input=True,
+        gated=False,
+        downsample=False,
+        num_heads=1,
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.head_index = head_index
+        self.head_dim = head_dim
+        self.project_input = project_input
+        self.gated = gated
+        self.downsample = downsample
+        self.num_heads = num_heads
+        self.projection = None
+
+        k_layers = []
+        v_layers = []
+        if self.downsample:
+            k_layers.append(Downsample(self.head_index))
+            v_layers.append(Downsample(self.head_index))
+            out_proj_size = self.head_dim
+        else:
+            out_proj_size = self.head_dim * self.num_heads
+        if self.gated:
+            k_layers.append(GatedLinear(self.embed_dim, out_proj_size, bias=bias))
+            self.in_proj_q = GatedLinear(self.embed_dim, out_proj_size, bias=bias)
+            v_layers.append(GatedLinear(self.embed_dim, out_proj_size, bias=bias))
+        else:
+            k_layers.append(Linear(self.embed_dim, out_proj_size, bias=bias))
+            self.in_proj_q = Linear(self.embed_dim, out_proj_size, bias=bias)
+            v_layers.append(Linear(self.embed_dim, out_proj_size, bias=bias))
+
+        self.in_proj_k = nn.Sequential(*k_layers)
+        self.in_proj_v = nn.Sequential(*v_layers)
+
+        if self.downsample:
+            self.out_proj = Linear(out_proj_size, self.head_dim, bias=bias)
+        else:
+            self.out_proj = Linear(out_proj_size, out_channels, bias=bias)
+
+        self.scaling = self.head_dim ** -0.5
+
+    def forward(
+        self,
+        query,
+        key,
+        value,
+        mask_future_timesteps=False,
+        key_padding_mask=None,
+        use_scalar_bias=False,
+    ):
+        """Input shape: Time x Batch x Channel
+        Self-attention can be implemented by passing in the same arguments for
+        query, key and value. Future timesteps can be masked with the
+        `mask_future_timesteps` argument. Padding elements can be excluded from
+        the key by passing a binary ByteTensor (`key_padding_mask`) with shape:
+        batch x src_len, where padding elements are indicated by 1s.
+        """
+        src_len, bsz, out_channels = key.size()
+        tgt_len = query.size(0)
+        assert list(query.size()) == [tgt_len, bsz, out_channels]
+        assert key.size() == value.size()
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.downsample:
+            size = bsz
+        else:
+            size = bsz * self.num_heads
+
+        k = key
+        v = value
+        q = query
+        if self.project_input:
+            q = self.in_proj_q(q)
+            k = self.in_proj_k(k)
+            v = self.in_proj_v(v)
+            src_len = k.size()[0]
+        q *= self.scaling
+
+        if not self.downsample:
+            q = q.view(tgt_len, size, self.head_dim)
+            k = k.view(src_len, size, self.head_dim)
+            v = v.view(src_len, size, self.head_dim)
+
+        q = q.transpose(0, 1)
+        k = k.transpose(0, 1)
+        v = v.transpose(0, 1)
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        if mask_future_timesteps:
+            assert (
+                query.size() == key.size()
+            ), "mask_future_timesteps only applies to self-attention"
+            attn_weights *= torch.tril(
+                attn_weights.data.new([1]).expand(tgt_len, tgt_len).clone(),
+                diagonal=-1,
+            )[:, :: self.head_index + 1 if self.downsample else 1].unsqueeze(0)
+            attn_weights += torch.triu(
+                attn_weights.data.new([-math.inf]).expand(tgt_len, tgt_len).clone(),
+                diagonal=0,
+            )[:, :: self.head_index + 1 if self.downsample else 1].unsqueeze(0)
+        tgt_size = tgt_len
+        if use_scalar_bias:
+            attn_weights = scalar_bias(attn_weights, 2)
+            v = scalar_bias(v, 1)
+            tgt_size += 1
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            if key_padding_mask.max() > 0:
+                if self.downsample:
+                    attn_weights = attn_weights.view(bsz, 1, tgt_len, src_len)
+                else:
+                    attn_weights = attn_weights.view(
+                        size, self.num_heads, tgt_len, src_len
+                    )
+                attn_weights = attn_weights.masked_fill(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2),
+                    -math.inf,
+                )
+                attn_weights = attn_weights.view(size, tgt_len, src_len)
+        attn_weights = F.softmax(attn_weights, dim=-1)
+        attn_weights = self.dropout_module(attn_weights)
+
+        attn = torch.bmm(attn_weights, v)
+        if self.downsample:
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.head_dim)
+        else:
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim)
+
+        attn = self.out_proj(attn)
+
+        return attn, attn_weights
+
+
+class DownsampledMultiHeadAttention(nn.ModuleList):
+    """
+    Multi-headed attention with Gating and Downsampling
+    """
+
+    def __init__(
+        self,
+        out_channels,
+        embed_dim,
+        num_heads,
+        dropout=0.0,
+        bias=True,
+        project_input=True,
+        gated=False,
+        downsample=False,
+    ):
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+        self.downsample = downsample
+        self.gated = gated
+        self.project_input = project_input
+        assert self.head_dim * num_heads == embed_dim
+
+        if self.downsample:
+            attention_heads = []
+            for index in range(self.num_heads):
+                attention_heads.append(
+                    SingleHeadAttention(
+                        out_channels,
+                        self.embed_dim,
+                        self.head_dim,
+                        index,
+                        dropout,
+                        bias,
+                        self.project_input,
+                        self.gated,
+                        self.downsample,
+                        self.num_heads,
+                    )
+                )
+            super().__init__(modules=attention_heads)
+            self.out_proj = Linear(embed_dim, out_channels, bias=bias)
+        else:
+            # either we have a list of attention heads, or just one attention head
+            # if not being downsampled, we can do the heads with one linear layer instead of separate ones
+            super().__init__()
+            self.attention_module = SingleHeadAttention(
+                out_channels,
+                self.embed_dim,
+                self.head_dim,
+                1,
+                dropout,
+                bias,
+                self.project_input,
+                self.gated,
+                self.downsample,
+                self.num_heads,
+            )
+
+    def forward(
+        self,
+        query,
+        key,
+        value,
+        mask_future_timesteps=False,
+        key_padding_mask=None,
+        use_scalar_bias=False,
+    ):
+        src_len, bsz, embed_dim = key.size()
+        tgt_len = query.size(0)
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+        assert key.size() == value.size()
+
+        tgt_size = tgt_len
+        if use_scalar_bias:
+            tgt_size += 1
+
+        attn = []
+        attn_weights = []
+        if self.downsample:
+            for attention_head_number in range(self.num_heads):
+                # call the forward of each attention head
+                _attn, _attn_weight = self[attention_head_number](
+                    query,
+                    key,
+                    value,
+                    mask_future_timesteps,
+                    key_padding_mask,
+                    use_scalar_bias,
+                )
+                attn.append(_attn)
+                attn_weights.append(_attn_weight)
+            full_attn = torch.cat(attn, dim=2)
+            full_attn = self.out_proj(full_attn)
+            return full_attn, attn_weights[0].clone()
+        else:
+            _attn, _attn_weight = self.attention_module(
+                query,
+                key,
+                value,
+                mask_future_timesteps,
+                key_padding_mask,
+                use_scalar_bias,
+            )
+            attn.append(_attn)
+            attn_weights.append(_attn_weight)
+            full_attn = torch.cat(attn, dim=2)
+            full_attn_weights = torch.cat(attn_weights)
+            full_attn_weights = full_attn_weights.view(
+                bsz, self.num_heads, tgt_size, src_len
+            )
+            full_attn_weights = full_attn_weights.sum(dim=1) / self.num_heads
+            return full_attn, full_attn_weights
+
+
+class Downsample(nn.Module):
+    """
+    Selects every nth element, where n is the index
+    """
+
+    def __init__(self, index):
+        super().__init__()
+        self.index = index
+
+    def forward(self, x):
+        return x[:: self.index + 1]
+
+
+def Linear(in_features, out_features, dropout=0.0, bias=True):
+    """Weight-normalized Linear layer (input: B x T x C)"""
+    m = nn.Linear(in_features, out_features, bias=bias)
+    m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))
+    m.bias.data.zero_()
+    return nn.utils.weight_norm(m)
+
+
+def GatedLinear(in_features, out_features, dropout=0.0, bias=True):
+    """Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
+    return nn.Sequential(
+        Linear(in_features, out_features * 4, dropout, bias),
+        nn.GLU(),
+        Linear(out_features * 2, out_features * 2, dropout, bias),
+        nn.GLU(),
+        Linear(out_features, out_features, dropout, bias),
+    )
diff --git a/fairseq/fairseq/modules/dynamic_convolution.py b/fairseq/fairseq/modules/dynamic_convolution.py
new file mode 100644
index 0000000000000000000000000000000000000000..0121d453b9e026f5128dd41fce691aa1b4486448
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamic_convolution.py
@@ -0,0 +1,310 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+
+from .unfold import unfold1d
+
+
+def DynamicConv(
+    input_size,
+    kernel_size=1,
+    padding_l=None,
+    num_heads=1,
+    weight_dropout=0.0,
+    weight_softmax=False,
+    renorm_padding=False,
+    bias=False,
+    conv_bias=False,
+    query_size=None,
+    in_proj=False,
+):
+    if torch.cuda.is_available():
+        try:
+            from fairseq.modules.dynamicconv_layer import DynamicconvLayer
+
+            return DynamicconvLayer(
+                input_size,
+                kernel_size=kernel_size,
+                padding_l=padding_l,
+                num_heads=num_heads,
+                weight_dropout=weight_dropout,
+                weight_softmax=weight_softmax,
+                renorm_padding=renorm_padding,
+                bias=bias,
+                conv_bias=conv_bias,
+                query_size=query_size,
+            )
+        except ImportError as e:
+            print(e)
+    return DynamicConv1dTBC(
+        input_size,
+        kernel_size=kernel_size,
+        padding_l=padding_l,
+        num_heads=num_heads,
+        weight_dropout=weight_dropout,
+        weight_softmax=weight_softmax,
+        renorm_padding=renorm_padding,
+        bias=bias,
+        conv_bias=conv_bias,
+        query_size=query_size,
+    )
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
+
+
+@with_incremental_state
+class DynamicConv1dTBC(nn.Module):
+    """Dynamic lightweight convolution taking T x B x C inputs
+    Args:
+        input_size: # of channels of the input
+        kernel_size: convolution channels
+        padding_l: padding to the left when using "same" padding
+        num_heads: number of heads used. The weight is of shape (num_heads, 1, kernel_size)
+        weight_dropout: the drop rate of the DropConnect to drop the weight
+        weight_softmax: normalize the weight with softmax before the convolution
+        renorm_padding: re-normalize the filters to ignore the padded part (only the non-padding parts sum up to 1)
+        bias: use bias
+        conv_bias: bias of the convolution
+        query_size: specified when feeding a different input as the query
+        in_proj: project the input and generate the filter together
+
+    Shape:
+        Input: TxBxC, i.e. (timesteps, batch_size, input_size)
+        Output: TxBxC, i.e. (timesteps, batch_size, input_size)
+
+    Attributes:
+        weight: the learnable weights of the module of shape
+            `(num_heads, 1, kernel_size)`
+        bias:   the learnable bias of the module of shape `(input_size)`
+    """
+
+    def __init__(
+        self,
+        input_size,
+        kernel_size=1,
+        padding_l=None,
+        num_heads=1,
+        weight_dropout=0.0,
+        weight_softmax=False,
+        renorm_padding=False,
+        bias=False,
+        conv_bias=False,
+        query_size=None,
+        in_proj=False,
+    ):
+        super().__init__()
+        self.input_size = input_size
+        self.query_size = input_size if query_size is None else query_size
+        self.kernel_size = kernel_size
+        self.padding_l = padding_l
+        self.num_heads = num_heads
+        self.weight_dropout_module = FairseqDropout(
+            weight_dropout, module_name=self.__class__.__name__
+        )
+        self.weight_softmax = weight_softmax
+        self.renorm_padding = renorm_padding
+
+        if in_proj:
+            self.weight_linear = Linear(
+                self.input_size, self.input_size + num_heads * kernel_size * 1
+            )
+        else:
+            self.weight_linear = Linear(
+                self.query_size, num_heads * kernel_size * 1, bias=bias
+            )
+        if conv_bias:
+            self.conv_bias = nn.Parameter(torch.Tensor(input_size))
+        else:
+            self.conv_bias = None
+        self.reset_parameters()
+
+    @property
+    def in_proj(self):
+        return (
+            self.weight_linear.out_features
+            == self.input_size + self.num_heads * self.kernel_size
+        )
+
+    def reset_parameters(self):
+        self.weight_linear.reset_parameters()
+        if self.conv_bias is not None:
+            nn.init.constant_(self.conv_bias, 0.0)
+
+    def forward(self, x, incremental_state=None, query=None, unfold=None):
+        """Assuming the input, x, of the shape T x B x C and producing an output in the shape T x B x C
+        args:
+            x: Input of shape T x B x C, i.e. (timesteps, batch_size, input_size)
+            incremental_state: A dict to keep the state
+            unfold: unfold the input or not. If not, we use the matrix trick instead
+            query: use the specified query to predict the conv filters
+        """
+        unfold = (
+            x.size(0) > 512 if unfold is None else unfold
+        )  # use unfold mode as default for long sequence to save memory
+        unfold = unfold or (incremental_state is not None)
+        assert query is None or not self.in_proj
+
+        if query is None:
+            query = x
+        if unfold:
+            output = self._forward_unfolded(x, incremental_state, query)
+        else:
+            output = self._forward_expanded(x, incremental_state, query)
+
+        if self.conv_bias is not None:
+            output = output + self.conv_bias.view(1, 1, -1)
+        return output
+
+    def _forward_unfolded(self, x, incremental_state, query):
+        """The conventional implementation of convolutions.
+        Unfolding the input by having a window shifting to the right."""
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        R = C // H
+        assert R * H == C == self.input_size
+
+        if self.in_proj:
+            proj = self.weight_linear(x)
+            x = proj.narrow(2, 0, self.input_size).contiguous()
+            weight = (
+                proj.narrow(2, self.input_size, H * K).contiguous().view(T * B * H, -1)
+            )
+        else:
+            weight = self.weight_linear(query).view(T * B * H, -1)
+
+        # renorm_padding is only implemented in _forward_expanded
+        assert not self.renorm_padding or incremental_state is not None
+
+        if incremental_state is not None:
+            input_buffer = self._get_input_buffer(incremental_state)
+            if input_buffer is None:
+                input_buffer = x.new()
+            x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3)
+            if self.kernel_size > 1:
+                self._set_input_buffer(
+                    incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :]
+                )
+            x_unfold = x_unfold.view(T * B * H, R, -1)
+        else:
+            padding_l = self.padding_l
+            if K > T and padding_l == K - 1:
+                weight = weight.narrow(1, K - T, T)
+                K, padding_l = T, T - 1
+            # unfold the input: T x B x C --> T' x B x C x K
+            x_unfold = unfold1d(x, K, padding_l, 0)
+            x_unfold = x_unfold.view(T * B * H, R, K)
+
+        if self.weight_softmax and not self.renorm_padding:
+            weight = F.softmax(weight, dim=1)
+        weight = weight.narrow(1, 0, K)
+
+        if incremental_state is not None:
+            weight = weight[:, -x_unfold.size(2) :]
+            K = weight.size(1)
+
+        if self.weight_softmax and self.renorm_padding:
+            weight = F.softmax(weight, dim=1)
+
+        weight = self.weight_dropout_module(weight, inplace=False)
+
+        output = torch.bmm(x_unfold, weight.unsqueeze(2))  # T*B*H x R x 1
+        output = output.view(T, B, C)
+        return output
+
+    def _forward_expanded(self, x, incremental_stat, query):
+        """Turn the convolution filters into band matrices and do matrix multiplication.
+        This is faster when the sequence is short, but less memory efficient.
+        This is not used in the decoder during inference.
+        """
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        R = C // H
+        assert R * H == C == self.input_size
+        if self.in_proj:
+            proj = self.weight_linear(x)
+            x = proj.narrow(2, 0, self.input_size).contiguous()
+            weight = (
+                proj.narrow(2, self.input_size, H * K).contiguous().view(T * B * H, -1)
+            )
+        else:
+            weight = self.weight_linear(query).view(T * B * H, -1)
+
+        if not self.renorm_padding:
+            if self.weight_softmax:
+                weight = F.softmax(weight, dim=1)
+            weight = self.weight_dropout_module(weight, inplace=False)
+        weight = weight.narrow(1, 0, K).contiguous()
+        weight = weight.view(T, B * H, K).transpose(0, 1)
+
+        x = x.view(T, B * H, R).transpose(0, 1)
+        if self.weight_softmax and self.renorm_padding:
+            # turn the convolution filters into band matrices
+            weight_expanded = weight.new(B * H, T, T + K - 1).fill_(float("-inf"))
+            weight_expanded.as_strided(
+                (B * H, T, K), (T * (T + K - 1), T + K, 1)
+            ).copy_(weight)
+            weight_expanded = weight_expanded.narrow(2, self.padding_l, T)
+            # normalize the weight over valid positions like self-attention
+            weight_expanded = F.softmax(weight_expanded, dim=2)
+            weight_expanded = self.weight_dropout_module(weight_expanded, inplace=False)
+        else:
+            P = self.padding_l
+            # For efficiency, we cut the kernel size and reduce the padding when the kernel is larger than the length
+            if K > T and P == K - 1:
+                weight = weight.narrow(2, K - T, T)
+                K, P = T, T - 1
+            # turn the convolution filters into band matrices
+            weight_expanded = weight.new_zeros(B * H, T, T + K - 1, requires_grad=False)
+            weight_expanded.as_strided(
+                (B * H, T, K), (T * (T + K - 1), T + K, 1)
+            ).copy_(weight)
+            weight_expanded = weight_expanded.narrow(2, P, T)  # B*H x T x T
+        output = torch.bmm(weight_expanded, x)
+        output = output.transpose(0, 1).contiguous().view(T, B, C)
+        return output
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            input_buffer = input_buffer.index_select(1, new_order)
+            self._set_input_buffer(incremental_state, input_buffer)
+
+    def _get_input_buffer(self, incremental_state):
+        return utils.get_incremental_state(self, incremental_state, "input_buffer")
+
+    def _set_input_buffer(self, incremental_state, new_buffer):
+        return utils.set_incremental_state(
+            self, incremental_state, "input_buffer", new_buffer
+        )
+
+    def extra_repr(self):
+        s = "{}, kernel_size={}, padding_l={}, num_heads={}, weight_softmax={}, conv_bias={}, renorm_padding={}, in_proj={}".format(
+            self.input_size,
+            self.kernel_size,
+            self.padding_l,
+            self.num_heads,
+            self.weight_softmax,
+            self.conv_bias is not None,
+            self.renorm_padding,
+            self.in_proj,
+        )
+
+        if self.query_size != self.input_size:
+            s += ", query_size={}".format(self.query_size)
+        if self.weight_dropout_module.p > 0.0:
+            s += ", weight_dropout={}".format(self.weight_dropout_module.p)
+        return s
diff --git a/fairseq/fairseq/modules/dynamic_crf_layer.py b/fairseq/fairseq/modules/dynamic_crf_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8fcc6b8d2672d2eacc6d01b9688bac44d5e1ce26
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamic_crf_layer.py
@@ -0,0 +1,189 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+This file is to re-implemented the low-rank and beam approximation of CRF layer
+Proposed by:
+
+Sun, Zhiqing, et al.
+Fast Structured Decoding for Sequence Models
+https://arxiv.org/abs/1910.11555
+
+The CRF implementation is mainly borrowed from
+https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py
+
+"""
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+def logsumexp(x, dim=1):
+    return torch.logsumexp(x.float(), dim=dim).type_as(x)
+
+
+class DynamicCRF(nn.Module):
+    """Dynamic CRF layer is used to approximate the traditional
+    Conditional Random Fields (CRF)
+    $P(y | x) = 1/Z(x) exp(sum_i s(y_i, x) + sum_i t(y_{i-1}, y_i, x))$
+
+    where in this function, we assume the emition scores (s) are given,
+    and the transition score is a |V| x |V| matrix $M$
+
+    in the following two aspects:
+     (1) it used a low-rank approximation for the transition matrix:
+         $M = E_1 E_2^T$
+     (2) it used a beam to estimate the normalizing factor Z(x)
+    """
+
+    def __init__(self, num_embedding, low_rank=32, beam_size=64):
+        super().__init__()
+
+        self.E1 = nn.Embedding(num_embedding, low_rank)
+        self.E2 = nn.Embedding(num_embedding, low_rank)
+
+        self.vocb = num_embedding
+        self.rank = low_rank
+        self.beam = beam_size
+
+    def extra_repr(self):
+        return "vocab_size={}, low_rank={}, beam_size={}".format(
+            self.vocb, self.rank, self.beam
+        )
+
+    def forward(self, emissions, targets, masks, beam=None):
+        """
+        Compute the conditional log-likelihood of a sequence of target tokens given emission scores
+
+        Args:
+            emissions (`~torch.Tensor`): Emission score are usually the unnormalized decoder output
+                ``(batch_size, seq_len, vocab_size)``. We assume batch-first
+            targets (`~torch.LongTensor`): Sequence of target token indices
+                ``(batch_size, seq_len)
+            masks (`~torch.ByteTensor`): Mask tensor with the same size as targets
+
+        Returns:
+            `~torch.Tensor`: approximated log-likelihood
+        """
+        numerator = self._compute_score(emissions, targets, masks)
+        denominator = self._compute_normalizer(emissions, targets, masks, beam)
+        return numerator - denominator
+
+    def forward_decoder(self, emissions, masks=None, beam=None):
+        """
+        Find the most likely output sequence using Viterbi algorithm.
+
+        Args:
+            emissions (`~torch.Tensor`): Emission score are usually the unnormalized decoder output
+                ``(batch_size, seq_len, vocab_size)``. We assume batch-first
+            masks (`~torch.ByteTensor`): Mask tensor with the same size as targets
+
+        Returns:
+            `~torch.LongTensor`: decoded sequence from the CRF model
+        """
+        return self._viterbi_decode(emissions, masks, beam)
+
+    def _compute_score(self, emissions, targets, masks=None):
+        batch_size, seq_len = targets.size()
+        emission_scores = emissions.gather(2, targets[:, :, None])[:, :, 0]  # B x T
+        transition_scores = (self.E1(targets[:, :-1]) * self.E2(targets[:, 1:])).sum(2)
+
+        scores = emission_scores
+        scores[:, 1:] += transition_scores
+
+        if masks is not None:
+            scores = scores * masks.type_as(scores)
+        return scores.sum(-1)
+
+    def _compute_normalizer(self, emissions, targets=None, masks=None, beam=None):
+        # HACK: we include "target" which is a hueristic for training
+        # HACK: we use a beam of tokens to approximate the normalizing factor (which is bad?)
+
+        beam = beam if beam is not None else self.beam
+        batch_size, seq_len = emissions.size()[:2]
+        if targets is not None:
+            _emissions = emissions.scatter(2, targets[:, :, None], np.float("inf"))
+            beam_targets = _emissions.topk(beam, 2)[1]
+            beam_emission_scores = emissions.gather(2, beam_targets)
+        else:
+            beam_emission_scores, beam_targets = emissions.topk(beam, 2)
+        beam_transition_score1 = self.E1(beam_targets[:, :-1])  # B x (T-1) x K x D
+        beam_transition_score2 = self.E2(beam_targets[:, 1:])  # B x (T-1) x K x D
+        beam_transition_matrix = torch.bmm(
+            beam_transition_score1.view(-1, beam, self.rank),
+            beam_transition_score2.view(-1, beam, self.rank).transpose(1, 2),
+        )
+        beam_transition_matrix = beam_transition_matrix.view(batch_size, -1, beam, beam)
+
+        # compute the normalizer in the log-space
+        score = beam_emission_scores[:, 0]  # B x K
+        for i in range(1, seq_len):
+            next_score = score[:, :, None] + beam_transition_matrix[:, i - 1]
+            next_score = logsumexp(next_score, dim=1) + beam_emission_scores[:, i]
+
+            if masks is not None:
+                score = torch.where(masks[:, i : i + 1], next_score, score)
+            else:
+                score = next_score
+
+        # Sum (log-sum-exp) over all possible tags
+        return logsumexp(score, dim=1)
+
+    def _viterbi_decode(self, emissions, masks=None, beam=None):
+        # HACK: we use a beam of tokens to approximate the normalizing factor (which is bad?)
+
+        beam = beam if beam is not None else self.beam
+        batch_size, seq_len = emissions.size()[:2]
+        beam_emission_scores, beam_targets = emissions.topk(beam, 2)
+        beam_transition_score1 = self.E1(beam_targets[:, :-1])  # B x (T-1) x K x D
+        beam_transition_score2 = self.E2(beam_targets[:, 1:])  # B x (T-1) x K x D
+        beam_transition_matrix = torch.bmm(
+            beam_transition_score1.view(-1, beam, self.rank),
+            beam_transition_score2.view(-1, beam, self.rank).transpose(1, 2),
+        )
+        beam_transition_matrix = beam_transition_matrix.view(batch_size, -1, beam, beam)
+
+        traj_tokens, traj_scores = [], []
+        finalized_tokens, finalized_scores = [], []
+
+        # compute the normalizer in the log-space
+        score = beam_emission_scores[:, 0]  # B x K
+        dummy = (
+            torch.arange(beam, device=score.device).expand(*score.size()).contiguous()
+        )
+
+        for i in range(1, seq_len):
+            traj_scores.append(score)
+            _score = score[:, :, None] + beam_transition_matrix[:, i - 1]
+            _score, _index = _score.max(dim=1)
+            _score = _score + beam_emission_scores[:, i]
+
+            if masks is not None:
+                score = torch.where(masks[:, i : i + 1], _score, score)
+                index = torch.where(masks[:, i : i + 1], _index, dummy)
+            else:
+                score, index = _score, _index
+            traj_tokens.append(index)
+
+        # now running the back-tracing and find the best
+        best_score, best_index = score.max(dim=1)
+        finalized_tokens.append(best_index[:, None])
+        finalized_scores.append(best_score[:, None])
+
+        for idx, scs in zip(reversed(traj_tokens), reversed(traj_scores)):
+            previous_index = finalized_tokens[-1]
+            finalized_tokens.append(idx.gather(1, previous_index))
+            finalized_scores.append(scs.gather(1, previous_index))
+
+        finalized_tokens.reverse()
+        finalized_tokens = torch.cat(finalized_tokens, 1)
+        finalized_tokens = beam_targets.gather(2, finalized_tokens[:, :, None])[:, :, 0]
+
+        finalized_scores.reverse()
+        finalized_scores = torch.cat(finalized_scores, 1)
+        finalized_scores[:, 1:] = finalized_scores[:, 1:] - finalized_scores[:, :-1]
+
+        return finalized_scores, finalized_tokens
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/__init__.py b/fairseq/fairseq/modules/dynamicconv_layer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..22dc6f403d2a0ecdb1b9e7e69ed96bd560e93b2c
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .dynamicconv_layer import DynamicconvLayer  # noqa
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/cuda_function_gen.py b/fairseq/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
new file mode 100644
index 0000000000000000000000000000000000000000..9304f99eb8169a614f39babc830c84cac80e080b
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/cuda_function_gen.py
@@ -0,0 +1,223 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+def gen_forward():
+
+    kernels = [3, 5, 7, 15, 31, 63, 127, 255]
+    blocks = [32, 64, 128, 256]
+
+    head = """
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "dynamicconv_cuda.cuh"
+
+std::vector<at::Tensor> dynamicconv_cuda_forward(at::Tensor input, at::Tensor weight, int padding_l) {
+
+    at::DeviceGuard g(input.device());
+    const auto minibatch = input.size(0);
+    const auto numFeatures = input.size(1);
+    const auto sequenceLength = input.size(2);
+
+    const auto numHeads = weight.size(1);
+    const auto filterSize = weight.size(2);
+
+    const auto numFiltersInBlock = numFeatures / numHeads;
+    const dim3 blocks(minibatch, numFeatures);
+
+    auto output = at::zeros_like(input);
+    auto stream = at::cuda::getCurrentCUDAStream();
+"""
+
+    switch = """
+    switch(filterSize) {
+"""
+
+    case_k = """
+        case {k}:
+"""
+
+    main_block = """
+            if (padding_l == {pad}) {{
+                AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "dynamicconv_forward", ([&] {{
+                    dynamicconv_forward_kernel<{k}, {b_size}, {pad}, scalar_t>
+                    <<<blocks, {b_size}, 0, stream>>>(
+                            input.data<scalar_t>(),
+                            weight.data<scalar_t>(),
+                            minibatch,
+                            sequenceLength,
+                            numFeatures,
+                            numFiltersInBlock,
+                            numHeads,
+                            output.data<scalar_t>());
+                }}));
+            }} else
+"""
+
+    bad_padding = """
+            {
+                std::cout << "WARNING: Unsupported padding size - skipping forward pass" << std::endl;
+            }
+            break;\n
+"""
+
+    end = """
+        default:
+            std::cout << "WARNING: Unsupported filter length passed - skipping forward pass" << std::endl;
+    }
+
+    return {output};
+}
+"""
+
+    with open("dynamicconv_cuda_forward.cu", "w") as forward:
+        forward.write(head)
+        forward.write(switch)
+        for k in kernels:
+            b_size = 32
+            for b in blocks:
+                if b > k:
+                    b_size = b
+                    break
+            forward.write(case_k.format(k=k))
+            for pad in [k // 2, k - 1]:
+                forward.write(main_block.format(k=k, b_size=b_size, pad=pad))
+            forward.write(bad_padding)
+        forward.write(end)
+
+
+def gen_backward():
+
+    kernels = [3, 5, 7, 15, 31, 63, 127, 255]
+    thresh = [512, 512, 512, 512, 512, 380, 256, 256]
+    min_block = [64, 64, 64, 64, 64, 64, 128, 256]
+    seqs = [32 * x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]]
+
+    head = """
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "dynamicconv_cuda.cuh"
+
+std::vector<at::Tensor> dynamicconv_cuda_backward(at::Tensor gradOutput, int padding_l, at::Tensor input, at::Tensor weight) {
+
+    at::DeviceGuard g(input.device());
+    const auto minibatch = input.size(0);
+    const auto numFeatures = input.size(1);
+    const auto sequenceLength = input.size(2);
+
+    const auto numHeads = weight.size(1);
+    const auto filterSize = weight.size(2);
+
+    const auto numFiltersInBlock = numFeatures / numHeads;
+    auto numChunks = 1;
+
+    auto gradInput = at::zeros_like(input);
+    auto gradWeight = at::zeros_like(weight);
+    auto stream = at::cuda::getCurrentCUDAStream();
+
+    dim3 blocks(minibatch, numHeads, numChunks);
+"""
+
+    sequence_if = """
+    if (sequenceLength < {seq}) {{
+        switch(filterSize) {{
+"""
+
+    case_k = """
+            case {k}:
+"""
+
+    chunks_reset = """
+                numChunks = int(ceilf(sequenceLength/float({b_size})));
+                blocks = dim3(minibatch, numHeads, numChunks);
+"""
+
+    main_block = """
+                if (padding_l == {p}) {{
+                    AT_DISPATCH_FLOATING_TYPES_AND_HALF(gradOutput.scalar_type(), "dynamicconv_backward", ([&] {{
+                        dynamicconv_backward_kernel<{k}, {b_size}, {p}, scalar_t>
+                        <<<blocks, {b_size}, 0, stream>>>(
+                                    gradOutput.data<scalar_t>(),
+                                    input.data<scalar_t>(),
+                                    weight.data<scalar_t>(),
+                                    minibatch,
+                                    sequenceLength,
+                                    numFeatures,
+                                    numFiltersInBlock,
+                                    numHeads,
+                                    gradWeight.data<scalar_t>(),
+                                    gradInput.data<scalar_t>());
+                    }}));
+                }} else
+"""
+
+    bad_padding = """
+                {
+                    std::cout << "WARNING: Unsupported padding size - skipping backward pass" << std::endl;
+                }
+                break;\n
+"""
+
+    bad_filter = """
+            default:
+                std::cout << "WARNING: Unsupported filter length passed - skipping backward pass" << std::endl;
+        }
+"""
+
+    con_else = """
+    } else
+"""
+
+    final_else = """
+    {
+        switch(filterSize) {
+"""
+
+    last_return = """
+    }
+    return {gradInput, gradWeight};
+}
+"""
+
+    with open("dynamicconv_cuda_backward.cu", "w") as backward:
+        backward.write(head)
+        for seq in seqs:
+            backward.write(sequence_if.format(seq=seq))
+            for k, t, m in zip(kernels, thresh, min_block):
+                backward.write(case_k.format(k=k))
+                if seq <= t:
+                    b_size = seq
+                else:
+                    b_size = m
+                    backward.write(chunks_reset.format(b_size=b_size))
+                for p in [k // 2, k - 1]:
+                    backward.write(main_block.format(k=k, b_size=b_size, p=p))
+                backward.write(bad_padding)
+            backward.write(bad_filter)
+            backward.write(con_else)
+        backward.write(final_else)
+        for k, m in zip(kernels, min_block):
+            backward.write(case_k.format(k=k))
+            backward.write(chunks_reset.format(b_size=m))
+            for p in [k // 2, k - 1]:
+                backward.write(main_block.format(k=k, b_size=m, p=p))
+            backward.write(bad_padding)
+        backward.write(bad_filter)
+        backward.write(last_return)
+
+
+if __name__ == "__main__":
+    gen_forward()
+    gen_backward()
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..744c363e550231b8e0fbb94f998d46039daf5c00
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cpp
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <vector>
+
+std::vector<at::Tensor>
+dynamicconv_cuda_forward(at::Tensor input, at::Tensor filters, int padding_l);
+
+std::vector<at::Tensor> dynamicconv_cuda_backward(
+    at::Tensor gradOutput,
+    int padding_l,
+    at::Tensor input,
+    at::Tensor filters);
+
+#define CHECK_CUDA(x) \
+  AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) \
+  AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+std::vector<at::Tensor>
+dynamicconv_forward(at::Tensor input, at::Tensor filters, int padding_l) {
+  CHECK_INPUT(input);
+  CHECK_INPUT(filters);
+
+  return dynamicconv_cuda_forward(input, filters, padding_l);
+}
+
+std::vector<at::Tensor> dynamicconv_backward(
+    at::Tensor gradOutput,
+    int padding_l,
+    at::Tensor input,
+    at::Tensor filters) {
+  CHECK_INPUT(gradOutput);
+  CHECK_INPUT(input);
+  CHECK_INPUT(filters);
+
+  return dynamicconv_cuda_backward(gradOutput, padding_l, input, filters);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("forward", &dynamicconv_forward, "dynamicconv forward (CUDA)");
+  m.def("backward", &dynamicconv_backward, "dynamicconv backward (CUDA)");
+}
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..44baf21bdd2d4a7a692ae6f7953a413ea6513268
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda.cuh
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <c10/cuda/CUDAStream.h>
+
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+
+#define SHFL_MASK 0xffffffff
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void dynamicconv_forward_kernel(
+    const scalar_t* input,
+    const scalar_t* weight,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    int numHeads,
+    scalar_t* output);
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void dynamicconv_backward_kernel(
+    const scalar_t* gradOutput, // B * C * T
+    const scalar_t* input, // B * C * T
+    const scalar_t* weight,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    int numHeads,
+    scalar_t* gradWeight,
+    scalar_t* gradInput); // B * H * k * T
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..4630f1e9826aea61973f2e82feb57ac0a4390735
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_cuda_kernel.cu
@@ -0,0 +1,176 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../cuda_utils.cu"
+#include "dynamicconv_cuda.cuh"
+#include "dynamicconv_cuda_backward.cu"
+#include "dynamicconv_cuda_forward.cu"
+
+// FS is filter size and kernels are specialized for filter sizes
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void dynamicconv_forward_kernel(
+    const scalar_t* input,
+    const scalar_t* weight,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    int numHeads,
+    scalar_t* output) {
+  assert(blockDim.x == SB);
+
+  const int tid = threadIdx.x;
+  const int batchIdx = blockIdx.x;
+  const int featureIdx = blockIdx.y;
+  const int head = featureIdx / numFiltersInBlock;
+
+  const int IOOffset =
+      batchIdx * numFeatures * sequenceLength + featureIdx * sequenceLength;
+  const scalar_t* inputFeature = &input[IOOffset];
+  scalar_t* outputFeature = &output[IOOffset];
+
+  scalar_t filter[FS];
+
+  __shared__ scalar_t tempInput[SB + FS];
+  zeroSharedMem<FS, SB, padding_l>(tempInput);
+
+  const int numIterations = divUp<int, int>(sequenceLength, SB);
+
+  for (int i = 0; i < numIterations; ++i) {
+    __syncthreads();
+    const int inputOffset = i * SB;
+    load_input_to_shared<FS, SB, padding_l>(
+        inputFeature,
+        inputOffset,
+        sequenceLength,
+        i,
+        numIterations,
+        false,
+        tempInput);
+    __syncthreads();
+    if (inputOffset + tid < sequenceLength) {
+#pragma unroll
+      for (int k = 0; k < FS; ++k) {
+        const int filterOffset = batchIdx * numHeads * FS * sequenceLength +
+            head * FS * sequenceLength + k * sequenceLength + i * SB + tid;
+        filter[k] = weight[filterOffset];
+      }
+
+      scalar_t out = scalar_t(0.0);
+#pragma unroll
+      for (int k = 0; k < FS; ++k) {
+        out += filter[k] * tempInput[tid + k];
+      }
+
+      outputFeature[inputOffset + tid] = out;
+    }
+  }
+}
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void dynamicconv_backward_kernel(
+    const scalar_t* gradOutput, // B * C * T
+    const scalar_t* input, // B * C * T
+    const scalar_t* weight,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    int numHeads,
+    scalar_t* gradWeight,
+    scalar_t* gradInput) { // B * H * k * T
+
+  assert(blockDim.x == SB);
+
+  // each block operates on a single batch and filter head
+  const int tid = threadIdx.x;
+  const int batchIdx = blockIdx.x;
+  const int headIdx = blockIdx.y;
+  const int chunkIdx = blockIdx.z;
+
+  const int numChunks = divUp<int, int>(sequenceLength, SB);
+  const int inputOffset = chunkIdx * SB;
+
+  // initialize shared memory for output gradient and input
+  __shared__ scalar_t tempGradOutput[SB + FS];
+  __shared__ scalar_t tempInput[SB + FS];
+  const int padding = FS - padding_l - 1;
+
+  zeroSharedMem<FS, SB, padding>(tempGradOutput);
+  zeroSharedMem<FS, SB, padding_l>(tempInput);
+
+  // initialize local filter and weight gradient sum arrays
+  scalar_t tempGradSum[FS];
+  scalar_t bfilter[FS];
+  for (int k = 0; k < FS; ++k) {
+    tempGradSum[k] = scalar_t(0.0);
+
+    int idxOffset = inputOffset + tid + k - padding;
+    if (idxOffset >= 0 && idxOffset < sequenceLength) {
+      int bfilterOffset = batchIdx * numHeads * FS * sequenceLength +
+          headIdx * FS * sequenceLength + (FS - k - 1) * sequenceLength +
+          idxOffset;
+      bfilter[k] = weight[bfilterOffset];
+    } else {
+      bfilter[k] = scalar_t(0.0);
+    }
+  }
+
+  // iterate over filter block
+  for (int featureIdx = 0; featureIdx < numFiltersInBlock; ++featureIdx) {
+    __syncthreads();
+
+    // load input and output gradient for this channel and chunk
+    const int IOOffset = batchIdx * numFeatures * sequenceLength +
+        (headIdx * numFiltersInBlock + featureIdx) * sequenceLength;
+    const scalar_t* inputFeature = &input[IOOffset];
+    const scalar_t* gradOutputFeature = &gradOutput[IOOffset];
+    scalar_t* gradInputFeature = &gradInput[IOOffset];
+
+    load_input_to_shared<FS, SB, padding>(
+        gradOutputFeature,
+        inputOffset,
+        sequenceLength,
+        chunkIdx,
+        numChunks,
+        true,
+        tempGradOutput);
+    load_input_to_shared<FS, SB, padding_l>(
+        inputFeature,
+        inputOffset,
+        sequenceLength,
+        chunkIdx,
+        numChunks,
+        true,
+        tempInput);
+    __syncthreads();
+
+    // sum input and weight gradients
+    scalar_t out = scalar_t(0.0);
+#pragma unroll
+    for (int k = 0; k < FS; ++k) {
+      tempGradSum[k] += tempInput[tid + k] * tempGradOutput[tid + padding];
+      out += bfilter[k] * tempGradOutput[tid + k];
+    }
+
+    if (inputOffset + tid < sequenceLength) {
+      gradInputFeature[inputOffset + tid] = out;
+    }
+  }
+
+  const int gradOffset =
+      batchIdx * numHeads * FS * sequenceLength + headIdx * FS * sequenceLength;
+  scalar_t* gradWeightFeature = &gradWeight[gradOffset];
+
+  // write weight gradient
+  if (inputOffset + tid < sequenceLength) {
+    for (int k = 0; k < FS; ++k) {
+      const int outputOffset = k * sequenceLength + inputOffset + tid;
+      gradWeightFeature[outputOffset] = tempGradSum[k];
+    }
+  }
+}
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..711ed03483f4089dbe91964a89021b49eeffbedc
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamicconv_layer.py
@@ -0,0 +1,227 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import dynamicconv_cuda
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.unfold import unfold1d
+from torch import nn
+from torch.autograd import Function
+
+
+class dynamicconvFunction(Function):
+    @staticmethod
+    def forward(ctx, x, weights, padding_l):
+        ctx.padding_l = padding_l
+        outputs = dynamicconv_cuda.forward(x, weights, padding_l)
+        variables = [x, weights]
+        ctx.save_for_backward(*variables)
+        return outputs[0]
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        outputs = dynamicconv_cuda.backward(
+            grad_output.contiguous(), ctx.padding_l, *ctx.saved_tensors
+        )
+        grad_input, grad_weights = outputs
+        return grad_input, grad_weights, None
+
+
+@with_incremental_state
+class DynamicconvLayer(nn.Module):
+    def __init__(
+        self,
+        input_size,
+        kernel_size=1,
+        padding_l=None,
+        weight_softmax=False,
+        num_heads=1,
+        weight_dropout=0.0,
+        bias=False,
+        renorm_padding=False,
+        conv_bias=False,
+        query_size=None,
+    ):
+
+        super(DynamicconvLayer, self).__init__()
+        self.input_size = input_size
+        self.query_size = input_size if query_size is None else query_size
+        self.kernel_size = kernel_size
+        self.padding_l = padding_l
+        self.num_heads = num_heads
+        self.weight_softmax = weight_softmax
+        self.weight_dropout_module = FairseqDropout(
+            weight_dropout, module_name=self.__class__.__name__
+        )
+        self.renorm_padding = renorm_padding
+        self.bias = bias
+
+        self.weight_linear = nn.Linear(input_size, num_heads * kernel_size, bias)
+        if conv_bias:
+            self.conv_bias = nn.Parameter(torch.Tensor(input_size))
+        else:
+            self.conv_bias = None
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.weight_linear.weight)
+        if self.conv_bias is not None:
+            nn.init.constant_(self.conv_bias, 0.0)
+            nn.init.constant_(self.weight_linaer.bias, 0.0)
+
+    def forward(self, x, incremental_state=None, query=None, unfold=None):
+
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        # R = C // H
+
+        # during inference time, incremental BMM is faster
+        if incremental_state is not None:
+            unfold = (
+                x.size(0) > 512 if unfold is None else unfold
+            )  # use unfold mode as default for long sequence to save memory
+            unfold = unfold or (incremental_state is not None)
+            assert query is None
+
+            if query is None:
+                query = x
+            if unfold:
+                output = self._forward_unfolded(x, incremental_state, query)
+            else:
+                output = self._forward_expanded(x, incremental_state, query)
+
+            if self.conv_bias is not None:
+                output = output + self.conv_bias.view(1, 1, -1)
+
+            return output
+
+        # during training time, use CUDA kernel
+        else:
+            weight = self.weight_linear(x).view(T, B, H, K)
+            if self.weight_softmax:
+                weight = F.softmax(weight, dim=-1)
+            if self.weight_dropout_module.p:
+                weight = self.weight_dropout_module(weight)
+
+            weight = weight.permute(1, 2, 3, 0).contiguous()
+            self.filters = weight
+            x = x.permute(1, 2, 0).contiguous()
+            output = dynamicconvFunction.apply(x, weight, self.padding_l).permute(
+                2, 0, 1
+            )
+            if self.conv_bias is not None:
+                output = output + self.conv_bias.view(1, 1, -1)
+            return output
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            input_buffer = input_buffer.index_select(1, new_order)
+            self._set_input_buffer(incremental_state, input_buffer)
+
+    def _get_input_buffer(self, incremental_state):
+        return utils.get_incremental_state(self, incremental_state, "input_buffer")
+
+    def _set_input_buffer(self, incremental_state, new_buffer):
+        return utils.set_incremental_state(
+            self, incremental_state, "input_buffer", new_buffer
+        )
+
+    def _forward_unfolded(self, x, incremental_state, query):
+        """The conventional implementation of convolutions.
+        Unfolding the input by having a window shifting to the right."""
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        R = C // H
+        assert R * H == C == self.input_size
+
+        weight = self.weight_linear(query).view(T * B * H, -1)
+
+        # renorm_padding is only implemented in _forward_expanded
+        assert not self.renorm_padding or incremental_state is not None
+
+        if incremental_state is not None:
+            input_buffer = self._get_input_buffer(incremental_state)
+            if input_buffer is None:
+                input_buffer = x.new()
+            x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3)
+            if self.kernel_size > 1:
+                self._set_input_buffer(
+                    incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :]
+                )
+            x_unfold = x_unfold.view(T * B * H, R, -1)
+        else:
+            padding_l = self.padding_l
+            if K > T and padding_l == K - 1:
+                weight = weight.narrow(1, K - T, T)
+                K, padding_l = T, T - 1
+            # unfold the input: T x B x C --> T' x B x C x K
+            x_unfold = unfold1d(x, K, padding_l, 0)
+            x_unfold = x_unfold.view(T * B * H, R, K)
+
+        if self.weight_softmax and not self.renorm_padding:
+            weight = F.softmax(weight, dim=1)
+        weight = weight.narrow(1, 0, K)
+
+        if incremental_state is not None:
+            weight = weight[:, -x_unfold.size(2) :]
+            K = weight.size(1)
+
+        if self.weight_softmax and self.renorm_padding:
+            weight = F.softmax(weight, dim=1)
+
+        weight = self.weight_dropout_module(weight, inplace=False)
+
+        output = torch.bmm(x_unfold, weight.unsqueeze(2))  # T*B*H x R x 1
+        output = output.view(T, B, C)
+        return output
+
+    def _forward_expanded(self, x, incremental_stat, query):
+        """Turn the convolution filters into band matrices and do matrix multiplication.
+        This is faster when the sequence is short, but less memory efficient.
+        This is not used in the decoder during inference.
+        """
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        R = C // H
+        assert R * H == C == self.input_size
+        weight = self.weight_linear(query).view(T * B * H, -1)
+
+        if not self.renorm_padding:
+            if self.weight_softmax:
+                weight = F.softmax(weight, dim=1)
+            weight = self.weight_dropout_module(weight, inplace=False)
+        weight = weight.narrow(1, 0, K).contiguous()
+        weight = weight.view(T, B * H, K).transpose(0, 1)
+
+        x = x.view(T, B * H, R).transpose(0, 1)
+        if self.weight_softmax and self.renorm_padding:
+            # turn the convolution filters into band matrices
+            weight_expanded = weight.new(B * H, T, T + K - 1).fill_(float("-inf"))
+            weight_expanded.as_strided(
+                (B * H, T, K), (T * (T + K - 1), T + K, 1)
+            ).copy_(weight)
+            weight_expanded = weight_expanded.narrow(2, self.padding_l, T)
+            # normalize the weight over valid positions like self-attention
+            weight_expanded = F.softmax(weight_expanded, dim=2)
+            weight_expanded = self.weight_dropout_module(weight_expanded, inplace=False)
+        else:
+            P = self.padding_l
+            # For efficiency, we cut the kernel size and reduce the padding when the kernel is larger than the length
+            if K > T and P == K - 1:
+                weight = weight.narrow(2, K - T, T)
+                K, P = T, T - 1
+            # turn the convolution filters into band matrices
+            weight_expanded = weight.new_zeros(B * H, T, T + K - 1, requires_grad=False)
+            weight_expanded.as_strided(
+                (B * H, T, K), (T * (T + K - 1), T + K, 1)
+            ).copy_(weight)
+            weight_expanded = weight_expanded.narrow(2, P, T)  # B*H x T x T
+        output = torch.bmm(weight_expanded, x)
+        output = output.transpose(0, 1).contiguous().view(T, B, C)
+        return output
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp b/fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d7e57c859085f98ec10960330ca763ae2764585a
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/dynamiconv_cpu.cpp
@@ -0,0 +1,29 @@
+#include <torch/torch.h>
+#include <vector>
+
+std::vector<float*>
+dynamicconv_cpu_forward(float* input, float* filters, int padding_l);
+
+std::vector<float*> dynamicconv_cpu_backward(
+    float* gradOutput,
+    int padding_l,
+    float* input,
+    float* filters);
+
+std::vector<float*>
+dynamicconv_forward(float* input, float* filters, int padding_l) {
+  return dynamicconv_cpu_forward(input, filters, padding_l);
+}
+
+std::vector<float*> dynamicconv_backward(
+    float* gradOutput,
+    int padding_l,
+    float* input,
+    float* filters) {
+  return dynamicconv_cpu_backward(gradOutput, padding_l, input, filters);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("forward", &dynamicconv_forward, "dynamicconv forward (CPU)");
+  m.def("backward", &dynamicconv_backward, "dynamicconv backward (CPU)");
+}
diff --git a/fairseq/fairseq/modules/dynamicconv_layer/setup.py b/fairseq/fairseq/modules/dynamicconv_layer/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a21f7e2ee0840a3b251522275a0b32a856951d7
--- /dev/null
+++ b/fairseq/fairseq/modules/dynamicconv_layer/setup.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+
+setup(
+    name="dynamicconv_layer",
+    ext_modules=[
+        CUDAExtension(
+            name="dynamicconv_cuda",
+            sources=[
+                "dynamicconv_cuda.cpp",
+                "dynamicconv_cuda_kernel.cu",
+            ],
+        ),
+    ],
+    cmdclass={"build_ext": BuildExtension},
+)
diff --git a/fairseq/fairseq/modules/fairseq_dropout.py b/fairseq/fairseq/modules/fairseq_dropout.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cddca77186f5ddd5cfb9c0ed6def9bafdf3bf1e
--- /dev/null
+++ b/fairseq/fairseq/modules/fairseq_dropout.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from typing import List, Optional
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+logger = logging.getLogger(__name__)
+
+
+class FairseqDropout(nn.Module):
+    def __init__(self, p, module_name=None):
+        super().__init__()
+        self.p = p
+        self.module_name = module_name
+        self.apply_during_inference = False
+
+    def forward(self, x, inplace: bool = False):
+        if self.p > 0 and (self.training or self.apply_during_inference):
+            return F.dropout(x, p=self.p, training=True, inplace=inplace)
+        else:
+            return x
+
+    def make_generation_fast_(
+        self,
+        name: str,
+        retain_dropout: bool = False,
+        retain_dropout_modules: Optional[List[str]] = None,
+        **kwargs
+    ):
+        if retain_dropout:
+            if retain_dropout_modules is not None and self.module_name is None:
+                logger.warning(
+                    "Cannot enable dropout during inference for module {} "
+                    "because module_name was not set".format(name)
+                )
+            elif (
+                retain_dropout_modules is None  # if None, apply to all modules
+                or self.module_name in retain_dropout_modules
+            ):
+                logger.info(
+                    "Enabling dropout during inference for module: {}".format(name)
+                )
+                self.apply_during_inference = True
+            else:
+                logger.info("Disabling dropout for module: {}".format(name))
diff --git a/fairseq/fairseq/modules/fp32_group_norm.py b/fairseq/fairseq/modules/fp32_group_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..d03aac022e30c8c14a600062d1d86429504ba003
--- /dev/null
+++ b/fairseq/fairseq/modules/fp32_group_norm.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Layer norm done in fp32 (for fp16 training)
+"""
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Fp32GroupNorm(nn.GroupNorm):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def forward(self, input):
+        output = F.group_norm(
+            input.float(),
+            self.num_groups,
+            self.weight.float() if self.weight is not None else None,
+            self.bias.float() if self.bias is not None else None,
+            self.eps,
+        )
+        return output.type_as(input)
diff --git a/fairseq/fairseq/modules/gelu.py b/fairseq/fairseq/modules/gelu.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2f1ecff4a3ae3de3eb7d327b9163c46b18a15ed
--- /dev/null
+++ b/fairseq/fairseq/modules/gelu.py
@@ -0,0 +1,25 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+See "Gaussian Error Linear Units (GELUs)" by Dan Hendrycks and Kevin Gimpel with
+the corresponding GitHub repo: https://github.com/hendrycks/GELUs
+"""
+
+import math
+
+import torch
+import torch.nn as nn
+
+
+def gelu_accurate(x):
+    if not hasattr(gelu_accurate, "_a"):
+        gelu_accurate._a = math.sqrt(2 / math.pi)
+    return (
+        0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3))))
+    )
+
+
+def gelu(x: torch.Tensor) -> torch.Tensor:
+    return torch.nn.functional.gelu(x.float()).type_as(x)
diff --git a/fairseq/fairseq/modules/grad_multiply.py b/fairseq/fairseq/modules/grad_multiply.py
new file mode 100644
index 0000000000000000000000000000000000000000..08d15f55dfda9c61a1cf8641ea31424fe1d97f57
--- /dev/null
+++ b/fairseq/fairseq/modules/grad_multiply.py
@@ -0,0 +1,18 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+class GradMultiply(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, scale):
+        ctx.scale = scale
+        res = x.new(x)
+        return res
+
+    @staticmethod
+    def backward(ctx, grad):
+        return grad * ctx.scale, None
diff --git a/fairseq/fairseq/modules/gumbel_vector_quantizer.py b/fairseq/fairseq/modules/gumbel_vector_quantizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..71134388889d7f224655957256e78fd6c02d72a3
--- /dev/null
+++ b/fairseq/fairseq/modules/gumbel_vector_quantizer.py
@@ -0,0 +1,202 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class GumbelVectorQuantizer(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_vars,
+        temp,
+        groups,
+        combine_groups,
+        vq_dim,
+        time_first,
+        activation=nn.GELU(),
+        weight_proj_depth=1,
+        weight_proj_factor=1,
+    ):
+        """Vector quantization using gumbel softmax
+
+        Args:
+            dim: input dimension (channels)
+            num_vars: number of quantized vectors per group
+            temp: temperature for training. this should be a tuple of 3 elements: (start, stop, decay factor)
+            groups: number of groups for vector quantization
+            combine_groups: whether to use the vectors for all groups
+            vq_dim: dimensionality of the resulting quantized vector
+            time_first: if true, expect input in BxTxC format, otherwise in BxCxT
+            activation: what activation to use (should be a module). this is only used if weight_proj_depth is > 1
+            weight_proj_depth: number of layers (with activation in between) to project input before computing logits
+            weight_proj_factor: this is used only if weight_proj_depth is > 1. scales the inner dimensionality of
+                                projections by this factor
+        """
+        super().__init__()
+
+        self.groups = groups
+        self.combine_groups = combine_groups
+        self.input_dim = dim
+        self.num_vars = num_vars
+        self.time_first = time_first
+
+        assert (
+            vq_dim % groups == 0
+        ), f"dim {vq_dim} must be divisible by groups {groups} for concatenation"
+
+        var_dim = vq_dim // groups
+        num_groups = groups if not combine_groups else 1
+
+        self.vars = nn.Parameter(torch.FloatTensor(1, num_groups * num_vars, var_dim))
+        nn.init.uniform_(self.vars)
+
+        if weight_proj_depth > 1:
+
+            def block(input_dim, output_dim):
+                return nn.Sequential(nn.Linear(input_dim, output_dim), activation)
+
+            inner_dim = self.input_dim * weight_proj_factor
+            self.weight_proj = nn.Sequential(
+                *[
+                    block(self.input_dim if i == 0 else inner_dim, inner_dim)
+                    for i in range(weight_proj_depth - 1)
+                ],
+                nn.Linear(inner_dim, groups * num_vars),
+            )
+        else:
+            self.weight_proj = nn.Linear(self.input_dim, groups * num_vars)
+            nn.init.normal_(self.weight_proj.weight, mean=0, std=1)
+            nn.init.zeros_(self.weight_proj.bias)
+
+        if isinstance(temp, str):
+            import ast
+            temp = ast.literal_eval(temp)
+        assert len(temp) == 3, f"{temp}, {len(temp)}"
+
+        self.max_temp, self.min_temp, self.temp_decay = temp
+        self.curr_temp = self.max_temp
+        self.codebook_indices = None
+
+    def set_num_updates(self, num_updates):
+        self.curr_temp = max(
+            self.max_temp * self.temp_decay ** num_updates, self.min_temp
+        )
+
+    def get_codebook_indices(self):
+        if self.codebook_indices is None:
+            from itertools import product
+
+            p = [range(self.num_vars)] * self.groups
+            inds = list(product(*p))
+            self.codebook_indices = torch.tensor(
+                inds, dtype=torch.long, device=self.vars.device
+            ).flatten()
+
+            if not self.combine_groups:
+                self.codebook_indices = self.codebook_indices.view(
+                    self.num_vars ** self.groups, -1
+                )
+                for b in range(1, self.groups):
+                    self.codebook_indices[:, b] += self.num_vars * b
+                self.codebook_indices = self.codebook_indices.flatten()
+        return self.codebook_indices
+
+    def codebook(self):
+        indices = self.get_codebook_indices()
+        return (
+            self.vars.squeeze(0)
+            .index_select(0, indices)
+            .view(self.num_vars ** self.groups, -1)
+        )
+
+    def sample_from_codebook(self, b, n):
+        indices = self.get_codebook_indices()
+        indices = indices.view(-1, self.groups)
+        cb_size = indices.size(0)
+        assert (
+            n < cb_size
+        ), f"sample size {n} is greater than size of codebook {cb_size}"
+        sample_idx = torch.randint(low=0, high=cb_size, size=(b * n,))
+        indices = indices[sample_idx]
+
+        z = self.vars.squeeze(0).index_select(0, indices.flatten()).view(b, n, -1)
+        return z
+
+    def to_codebook_index(self, indices):
+        res = indices.new_full(indices.shape[:-1], 0)
+        for i in range(self.groups):
+            exponent = self.groups - i - 1
+            res += indices[..., i] * (self.num_vars ** exponent)
+        return res
+
+    def forward_idx(self, x):
+        res = self.forward(x, produce_targets=True)
+        return res["x"], res["targets"]
+
+    def forward(self, x, produce_targets=False):
+
+        result = {"num_vars": self.num_vars * self.groups}
+
+        if not self.time_first:
+            x = x.transpose(1, 2)
+
+        bsz, tsz, fsz = x.shape
+        x = x.reshape(-1, fsz)
+        x = self.weight_proj(x)
+        x = x.view(bsz * tsz * self.groups, -1)
+
+        _, k = x.max(-1)
+        hard_x = (
+            x.new_zeros(*x.shape)
+            .scatter_(-1, k.view(-1, 1), 1.0)
+            .view(bsz * tsz, self.groups, -1)
+        )
+        hard_probs = torch.mean(hard_x.float(), dim=0)
+        result["code_perplexity"] = torch.exp(
+            -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1)
+        ).sum()
+
+        avg_probs = torch.softmax(
+            x.view(bsz * tsz, self.groups, -1).float(), dim=-1
+        ).mean(dim=0)
+        result["prob_perplexity"] = torch.exp(
+            -torch.sum(avg_probs * torch.log(avg_probs + 1e-7), dim=-1)
+        ).sum()
+
+        result["temp"] = self.curr_temp
+
+        if self.training:
+            x = F.gumbel_softmax(x.float(), tau=self.curr_temp, hard=True).type_as(x)
+        else:
+            x = hard_x
+
+        x = x.view(bsz * tsz, -1)
+
+        vars = self.vars
+        if self.combine_groups:
+            vars = vars.repeat(1, self.groups, 1)
+
+        if produce_targets:
+            result["targets"] = (
+                x.view(bsz * tsz * self.groups, -1)
+                .argmax(dim=-1)
+                .view(bsz, tsz, self.groups)
+                .detach()
+            )
+
+        x = x.unsqueeze(-1) * vars
+        x = x.view(bsz * tsz, self.groups, self.num_vars, -1)
+        x = x.sum(-2)
+        x = x.view(bsz, tsz, -1)
+
+        if not self.time_first:
+            x = x.transpose(1, 2)  # BTC -> BCT
+
+        result["x"] = x
+
+        return result
diff --git a/fairseq/fairseq/modules/kmeans_attention.py b/fairseq/fairseq/modules/kmeans_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a7debcf2ac025fb02ba5e672987f87dbbc49a4
--- /dev/null
+++ b/fairseq/fairseq/modules/kmeans_attention.py
@@ -0,0 +1,609 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from inspect import isfunction
+from operator import mul
+from functools import reduce, wraps
+
+from aml.multimodal_video.utils.einops.lib import rearrange, repeat
+from aml.multimodal_video.utils.einops.lib.layers.torch import Rearrange
+
+from fairseq.modules.local_attention import LocalAttention
+
+# constants
+
+TOKEN_SELF_ATTN_VALUE = -5e4
+KMEAN_INIT_ITERS = 10
+
+# helper functions
+
+
+def exists(val):
+    return val is not None
+
+
+def identity(x, *args, **kwargs):
+    return x
+
+
+def default(x, d):
+    if not exists(x):
+        return d if not isfunction(d) else d()
+    return x
+
+
+def cast_tuple(x):
+    return x if isinstance(x, tuple) else (x,)
+
+
+def cache_fn(f):
+    cache = None
+
+    @wraps(f)
+    def cached_fn(*args, **kwargs):
+        nonlocal cache
+        if exists(cache):
+            return cache
+        cache = f(*args, **kwargs)
+        return cache
+    return cached_fn
+
+
+def to(t):
+    return {'device': t.device, 'dtype': t.dtype}
+
+
+def find_modules(nn_module, type):
+    return [module for module in nn_module.modules() if isinstance(module, type)]
+
+
+def is_empty(t):
+    return t.nelement() == 0
+
+
+def max_neg_value(tensor):
+    return -torch.finfo(tensor.dtype).max
+
+
+def batched_index_select(values, indices):
+    last_dim = values.shape[-1]
+    return values.gather(2, expand_dim(indices, -1, last_dim))
+
+
+def merge_dims(ind_from, ind_to, tensor):
+    shape = list(tensor.shape)
+    arr_slice = slice(ind_from, ind_to + 1)
+    shape[arr_slice] = [reduce(mul, shape[arr_slice])]
+    return tensor.reshape(*shape)
+
+
+def expand_dim(t, dim, k):
+    t = t.unsqueeze(dim)
+    expand_shape = [-1] * len(t.shape)
+    expand_shape[dim] = k
+    return t.expand(*expand_shape)
+
+
+def scatter_mean(src, t, index, dim, eps=1e-5):
+    numer = src.scatter_add(dim, index, t)
+    denom = src.scatter_add(dim, index, torch.ones_like(t))
+    return numer / (denom + eps)
+
+
+def split_at_index(dim, index, t):
+    pre_slices = (slice(None),) * dim
+    l = (*pre_slices, slice(None, index))
+    r = (*pre_slices, slice(index, None))
+    return t[l], t[r]
+
+
+def reshape_dim(t, dim, split_dims):
+    shape = list(t.shape)
+    num_dims = len(shape)
+    dim = (dim + num_dims) % num_dims
+    shape[dim:dim+1] = split_dims
+    return t.reshape(shape)
+
+
+def ema(old, new, decay):
+    if not exists(old):
+        return new
+    return old * decay + new * (1 - decay)
+
+
+def ema_inplace(moving_avg, new, decay):
+    if is_empty(moving_avg):
+        moving_avg.data.copy_(new)
+        return
+    moving_avg.data.mul_(decay).add_(new, alpha=(1 - decay))
+
+# helper classes
+
+
+def map_first_tuple_or_el(x, fn):
+    if isinstance(x, tuple):
+        return (fn(x[0]),) + x[1:]
+    return fn(x)
+
+
+class Chunk(nn.Module):
+    def __init__(self, chunks, fn, along_dim=-1):
+        super().__init__()
+        self.dim = along_dim
+        self.chunks = chunks
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        if self.chunks <= 1:
+            return self.fn(x, **kwargs)
+        chunks = x.chunk(self.chunks, dim=self.dim)
+        return torch.cat([self.fn(c, **kwargs) for c in chunks], dim=self.dim)
+
+
+class PreNorm(nn.ModuleList):
+    def __init__(self, norm_class, dim, fn):
+        super().__init__()
+        self.norm = norm_class(dim)
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        x = self.norm(x)
+        return self.fn(x, **kwargs)
+
+
+class ReZero(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.residual_weight = nn.Parameter(torch.zeros(1))
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        x = self.fn(x, **kwargs)
+        return map_first_tuple_or_el(x, lambda t: t * self.residual_weight)
+
+
+class ScaleNorm(nn.Module):
+    def __init__(self, dim, eps=1e-5):
+        super().__init__()
+        self.g = nn.Parameter(torch.ones(1))
+        self.eps = eps
+
+    def forward(self, x):
+        def norm(t):
+            n = torch.norm(t, dim=-1, keepdim=True).clamp(min=self.eps)
+            return t / n * self.g
+        return map_first_tuple_or_el(x, norm)
+
+
+class ProjectInOut(nn.Module):
+    def __init__(self, fn, dim_in, dim_out, project_out=True):
+        super().__init__()
+        self.fn = fn
+        self.project_in = nn.Linear(dim_in, dim_out)
+        self.project_out = nn.Linear(dim_out, dim_in) if project_out else identity
+
+    def forward(self, x, **kwargs):
+        x = self.project_in(x)
+        x, loss = self.fn(x, **kwargs)
+        x = self.project_out(x)
+        return x, loss
+
+
+class MatrixMultiply(nn.Module):
+    def __init__(self, tensor, transpose=False):
+        super().__init__()
+        self.tensor = tensor
+        self.transpose = transpose
+
+    def forward(self, x):
+        tensor = self.tensor
+        if self.transpose:
+            tensor = tensor.t()
+        return x @ tensor
+
+# positional embeddings
+
+
+class DepthWiseConv1d(nn.Module):
+    def __init__(self, dim_in, dim_out, kernel_size, stride=1, bias=True, causal=False):
+        super().__init__()
+        self.padding = ((kernel_size - 1), 0) if causal else (kernel_size // 2, kernel_size // 2)
+
+        self.net = nn.Sequential(
+            nn.Conv1d(dim_in, dim_in, kernel_size=kernel_size, groups=dim_in, stride=stride, bias=bias),
+            nn.Conv1d(dim_in, dim_out, 1, bias=bias)
+        )
+
+    def forward(self, x):
+        x = F.pad(x, self.padding, value=0.)
+        return self.net(x)
+
+
+class FixedPositionalEmbedding(nn.Module):
+    def __init__(self, dim, max_seq_len):
+        super().__init__()
+        inv_freq = 1. / (10000 ** (torch.arange(0, dim, 2).float() / dim))
+        position = torch.arange(0, max_seq_len, dtype=torch.float)
+        sinusoid_inp = torch.einsum("i,j->ij", position, inv_freq)
+        emb = torch.cat((sinusoid_inp.sin(), sinusoid_inp.cos()), dim=-1)
+        self.register_buffer('emb', emb)
+
+    def forward(self, x):
+        return self.emb[None, :x.shape[1], :].to(x)
+
+
+def rotate_every_two(x):
+    x = rearrange(x, '... (d j) -> ... d j', j=2)
+    x1, x2 = x.unbind(dim=-1)
+    x = torch.stack((-x2, x1), dim=-1)
+    return rearrange(x, '... d j -> ... (d j)')
+
+
+def apply_rotary_pos_emb(q, k, sinu_pos):
+    sinu_pos = rearrange(sinu_pos, '() n (j d) -> n j d', j=2)
+    sin, cos = sinu_pos.unbind(dim=-2)
+    sin, cos = map(lambda t: repeat(t, 'b n -> b (n j)', j=2), (sin, cos))
+    q, k = map(lambda t: (t * cos) + (rotate_every_two(t) * sin), (q, k))
+    return q, k
+
+# kmeans related function and class
+
+
+def update_kmeans_on_backwards(module):
+    module.kmean_modules = find_modules(module, Kmeans)
+
+    def hook(_, grad_in, grad_out):
+        for m in module.kmean_modules:
+            m.update()
+
+    return module.register_backward_hook(hook)
+
+
+def similarity(x, means):
+    return torch.einsum('bhld,hcd->bhlc', x, means)
+
+
+def dists_and_buckets(x, means):
+    dists = similarity(x, means)
+    _, buckets = torch.max(dists, dim=-1)
+    return dists, buckets
+
+
+def batched_bincount(index, num_classes, dim=-1):
+    shape = list(index.shape)
+    shape[dim] = num_classes
+    out = index.new_zeros(shape)
+    out.scatter_add_(dim, index, torch.ones_like(index, dtype=index.dtype))
+    return out
+
+
+def kmeans_iter(x, means, buckets=None):
+    b, h, _, d, dtype, num_clusters = *x.shape, x.dtype, means.shape[1]
+
+    if not exists(buckets):
+        _, buckets = dists_and_buckets(x, means)
+
+    bins = batched_bincount(buckets, num_clusters).sum(0, keepdim=True)
+    zero_mask = bins.long() == 0
+
+    means_ = buckets.new_zeros(b, h, num_clusters, d, dtype=dtype)
+    means_.scatter_add_(-2, expand_dim(buckets, -1, d), x)
+    means_ = F.normalize(means_.sum(0, keepdim=True), dim=-1).type(dtype)
+
+    means = torch.where(zero_mask.unsqueeze(-1), means, means_)
+    means = means.squeeze(0)
+    return means
+
+
+def distribution(dists, window_size):
+    _, topk_indices = dists.topk(k=window_size, dim=-2)
+    indices = topk_indices.transpose(-2, -1)
+    return indices.reshape(*indices.size()[:2], -1)
+
+
+class Kmeans(nn.Module):
+    def __init__(self, num_heads, head_dim, num_clusters, ema_decay=0.999, commitment=1e-4):
+        super().__init__()
+        self.commitment = commitment
+        self.ema_decay = ema_decay
+
+        self.register_buffer('means', torch.randn(num_heads, num_clusters, head_dim))
+        self.register_buffer('initted', torch.tensor(False))
+        self.num_new_means = 0
+        self.new_means = None
+
+    @torch.no_grad()
+    def init(self, x):
+        if self.initted:
+            return
+        _, h, _, d, device, _ = *x.shape, x.device, x.dtype
+
+        num_clusters = self.means.shape[1]
+
+        means = x.transpose(0, 1).contiguous().view(h, -1, d)
+        num_samples = means.shape[1]
+
+        if num_samples >= num_clusters:
+            indices = torch.randperm(num_samples, device=device)[:num_clusters]
+        else:
+            indices = torch.randint(0, num_samples, (num_clusters,), device=device)
+
+        means = means[:, indices]
+
+        for _ in range(KMEAN_INIT_ITERS):
+            means = kmeans_iter(x, means)
+
+        self.num_new_means = 0
+        self.means.data.copy_(means)
+        self.initted.data.copy_(torch.tensor(True))
+
+    @torch.no_grad()
+    def update(self, new_means=None):
+        new_means = default(new_means, self.new_means)
+        assert exists(new_means), 'new kmeans has not been supplied'
+        ema_inplace(self.means, new_means, self.ema_decay)
+
+        del self.new_means
+        self.new_means = None
+        self.num_new_means = 0
+
+    def forward(self, x, update_means=False):
+        self.init(x)
+
+        b, dtype = x.shape[0], x.dtype
+        means = self.means.type(dtype)
+        x = F.normalize(x, 2, dim=-1).type(dtype)
+
+        with torch.no_grad():
+            dists, buckets = dists_and_buckets(x, means)
+
+        routed_means = batched_index_select(expand_dim(means, 0, b), buckets)
+        loss = F.mse_loss(x, routed_means) * self.commitment
+
+        if update_means:
+            with torch.no_grad():
+                means = kmeans_iter(x, means, buckets)
+            self.new_means = ema(self.new_means, means, self.num_new_means / (self.num_new_means + 1))
+            self.num_new_means += 1
+
+        return dists, loss
+
+# kmeans attention class
+
+
+class KmeansAttention(nn.Module):
+    def __init__(self, num_clusters, window_size, num_heads, head_dim, causal=False, dropout=0., ema_decay=0.999, commitment=1e-4, context_window_size=None, receives_context=False, num_mem_kv=0, shared_qk=False):
+        super().__init__()
+        self.num_heads = num_heads
+        self.num_clusters = num_clusters
+        self.head_dim = head_dim
+
+        self.window_size = window_size
+        self.context_window_size = default(context_window_size, window_size)
+        self.causal = causal
+
+        self.shared_qk = shared_qk
+        self.receives_context = receives_context
+        self.kmeans = Kmeans(num_heads, head_dim, num_clusters, ema_decay, commitment)
+        self.dropout = nn.Dropout(dropout)
+
+        self.num_mem_kv = max(num_mem_kv, 1 if causal and not shared_qk else 0)
+        self.mem_key = nn.Parameter(torch.randn(num_heads, num_clusters, self.num_mem_kv, head_dim))
+        self.mem_value = nn.Parameter(torch.randn(num_heads, num_clusters, self.num_mem_kv, head_dim))
+
+    def forward(self, q, k, v, query_mask=None, key_mask=None, **kwargs):
+        b, h, t, d, kv_t, wsz, c_wsz, nc, device, dtype = *q.shape, k.shape[2], self.window_size, self.context_window_size, self.num_clusters, q.device, q.dtype
+        is_reverse = kwargs.pop('_reverse', False)
+
+        out = torch.zeros_like(q, dtype=dtype)
+
+        update_kmeans = self.training and not is_reverse
+
+        key_mask = default(key_mask, query_mask) if not self.receives_context else key_mask
+        kv_wsz = wsz if not self.receives_context else c_wsz
+
+        wsz = min(wsz, t)
+        kv_wsz = min(kv_wsz, kv_t)
+
+        if not self.shared_qk or self.receives_context:
+            dists, aux_loss = self.kmeans(torch.cat((q, k), dim=2), update_kmeans)
+            q_dists, k_dists = split_at_index(2, t, dists)
+            indices = distribution(q_dists, wsz)
+            kv_indices = distribution(k_dists, kv_wsz)
+        else:
+            dists, aux_loss = self.kmeans(q, update_kmeans)
+            k = F.normalize(k, dim=-1).to(q)
+            indices = distribution(dists, wsz)
+            kv_indices = indices
+
+        q = batched_index_select(q, indices)
+        k = batched_index_select(k, kv_indices)
+        v = batched_index_select(v, kv_indices)
+
+        reshape_with_window = lambda x: x.reshape(b, h, nc, -1, d)
+        q, k, v = map(reshape_with_window, (q, k, v))
+
+        m_k, m_v = map(lambda x: expand_dim(x, 0, b).to(q), (self.mem_key, self.mem_value))
+        k, v = map(lambda x: torch.cat(x, dim=3), ((m_k, k), (m_v, v)))
+
+        dots = torch.einsum('bhnid,bhnjd->bhnij', q, k) * (d ** -0.5)
+
+        mask_value = max_neg_value(dots)
+
+        if exists(query_mask) or exists(key_mask):
+            query_mask = default(query_mask, lambda: torch.ones((b, t), device=device).bool())
+            key_mask = default(key_mask, lambda: torch.ones((b, kv_t), device=device).bool())
+
+            q_mask = expand_dim(query_mask, 1, h).gather(2, indices)
+            kv_mask = expand_dim(key_mask, 1, h).gather(2, kv_indices)
+            q_mask, kv_mask = map(lambda t: t.reshape(b, h, nc, -1), (q_mask, kv_mask))
+            mask = q_mask[:, :, :, :, None] * kv_mask[:, :, :, None, :]
+            mask = F.pad(mask, (self.num_mem_kv, 0), value=1)
+            dots.masked_fill_(~mask, mask_value)
+            del mask
+
+        if self.causal:
+            q_mask, kv_mask = map(lambda t: t.reshape(b, h, nc, -1), (indices, kv_indices))
+            mask = q_mask[:, :, :, :, None] >= kv_mask[:, :, :, None, :]
+            mask = F.pad(mask, (self.num_mem_kv, 0), value=1)
+            dots.masked_fill_(~mask, mask_value)
+            del mask
+
+        if self.shared_qk:
+            q_mask, kv_mask = map(lambda t: t.reshape(b, h, nc, -1), (indices, kv_indices))
+            mask = q_mask[:, :, :, :, None] == kv_mask[:, :, :, None, :]
+            mask = F.pad(mask, (self.num_mem_kv, 0), value=0)
+            dots.masked_fill_(mask, TOKEN_SELF_ATTN_VALUE)
+            del mask
+
+        dots = dots.softmax(dim=-1)
+        dots = self.dropout(dots)
+
+        bo = torch.einsum('bhcij,bhcjd->bhcid', dots, v)
+        so = torch.reshape(bo, (b, h, -1, bo.shape[-1])).type(dtype)
+        out = scatter_mean(out, so, indices.unsqueeze(-1).expand_as(so), -2)
+        return out, aux_loss
+
+# feedforward
+
+
+class GELU_(nn.Module):
+    def forward(self, x):
+        return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+
+
+GELU = nn.GELU if hasattr(nn, 'GELU') else GELU_
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, mult=4, dropout=0., activation=None, glu=False):
+        super().__init__()
+        activation = default(activation, GELU)
+
+        self.glu = glu
+        self.w1 = nn.Linear(dim, dim * mult * (2 if glu else 1))
+        self.act = activation()
+        self.dropout = nn.Dropout(dropout)
+        self.w2 = nn.Linear(dim * mult, dim)
+
+    def forward(self, x, **kwargs):
+        if not self.glu:
+            x = self.w1(x)
+            x = self.act(x)
+        else:
+            x, v = self.w1(x).chunk(2, dim=-1)
+            x = self.act(x) * v
+
+        x = self.dropout(x)
+        x = self.w2(x)
+        return x
+
+# self attention
+
+
+class SelfAttention(nn.Module):
+    def __init__(self, dim, max_seq_len, heads, local_attn_heads, window_size, dim_head=None, local_attn_window_size=None, local_attn_radius_blocks=1, causal=False, attn_dropout=0., dropout=0., kmeans_ema_decay=0.999, commitment_factor=1e-4, receives_context=False, context_window_size=None, rel_pos_emb=True, num_mem_kv=0, shared_qk=False, conv_query_kernel=9):
+        super().__init__()
+        assert dim_head or (dim % heads) == 0, 'hidden dimension must be divisible by number of heads'
+        assert (max_seq_len % window_size) == 0, 'maximum sequence length must be divisible by the target window size'
+        assert local_attn_heads <= heads, 'number of local attention heads must be less than total heads'
+        assert not (receives_context and local_attn_heads > 0), 'local attention cannot be used for self attention with context'
+        assert not (receives_context and causal), 'contextual attention layer cannot be causal'
+
+        local_attn_window_size = default(local_attn_window_size, window_size)
+        context_window_size = default(context_window_size, window_size)
+
+        self.shared_qk = shared_qk
+        self.receives_context = receives_context
+        self.heads = heads
+        self.local_attn_heads = local_attn_heads
+        self.global_attn_heads = heads - local_attn_heads
+
+        self.causal = causal
+        self.window_size = window_size
+
+        dim_head = default(dim_head, dim // heads)
+        dim_heads = dim_head * heads
+        self.dim_head = dim_head
+
+        num_clusters = max_seq_len // window_size
+
+        # local
+
+        local_dim_heads = dim_head * self.local_attn_heads
+
+        if self.local_attn_heads > 0:
+            rel_pos_emb_config = (dim_head, local_attn_heads) if rel_pos_emb else None
+            self.local_attn = LocalAttention(dim=dim_head, window_size=local_attn_window_size, causal=causal, dropout=attn_dropout, rel_pos_emb_config=rel_pos_emb_config, look_backward=local_attn_radius_blocks, look_forward=0 if causal else local_attn_radius_blocks)
+            self.local_to_qkv = nn.Linear(dim, 3 * local_dim_heads)
+
+        # global
+
+        global_dim_heads = dim_head * self.global_attn_heads
+
+        if self.global_attn_heads > 0:
+            self.global_attn = KmeansAttention(num_clusters, window_size, self.global_attn_heads, dim_head, causal=causal, dropout=attn_dropout, ema_decay=kmeans_ema_decay, commitment=commitment_factor, receives_context=receives_context, num_mem_kv=num_mem_kv, shared_qk=shared_qk)
+
+        self.to_q = nn.Sequential(
+            Rearrange('b n c -> b c n'),
+            DepthWiseConv1d(dim, global_dim_heads, conv_query_kernel, causal=causal),
+            Rearrange('b c n -> b n c')
+        )
+
+        self.to_v = nn.Linear(dim, global_dim_heads, bias=False)
+
+        if not self.shared_qk:
+            self.to_k = nn.Linear(dim, global_dim_heads, bias=False)
+
+        # out
+
+        self.to_out = nn.Linear(dim_heads, dim, bias=False)
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, query, key, value, context=None, key_padding_mask=None, context_mask=None, pos_emb=None, **kwargs):
+        assert not (self.receives_context and not exists(context)), 'context must be passed if self attention is set to receive context'
+        input_mask = key_padding_mask
+        x = query.transpose(0, 1)
+        b, t, _, h, dh = *x.shape, self.heads, self.dim_head
+        has_local, has_global = map(lambda x: x > 0, (self.local_attn_heads, self.global_attn_heads))
+
+        split_heads = lambda v: reshape_dim(v, -1, (-1, dh)).transpose(1, 2).contiguous()
+
+        if has_local:
+            local_qkv = self.local_to_qkv(x).chunk(3, dim=-1)
+            lq, lk, lv = map(split_heads, local_qkv)
+
+        if has_global:
+            kv_input = x if not self.receives_context else context
+
+            q, v = self.to_q(x), self.to_v(kv_input)
+
+            if not self.shared_qk:
+                k = self.to_k(kv_input)
+            else:
+                k = self.to_q(kv_input) if self.receives_context else q
+
+            q, k, v = map(split_heads, (q, k, v))
+
+        out = []
+        total_loss = torch.tensor(0., requires_grad=True, **to(x))
+
+        if has_local:
+            local_out = self.local_attn(lq, lk, lv, input_mask=input_mask)
+            out.append(local_out)
+
+        if has_global:
+            if not self.receives_context and exists(pos_emb):
+                q, k = apply_rotary_pos_emb(q, k, pos_emb)
+
+            global_out, loss = self.global_attn(q, k, v, query_mask=input_mask, key_mask=context_mask)
+            total_loss = total_loss + loss
+
+            out.append(global_out)
+
+        out = torch.cat(out, dim=1)
+        out = out.reshape(b, h, t, -1).transpose(1, 2).reshape(b, t, -1)
+        out = self.dropout(out.transpose(0, 1))
+        # out = self.to_out(out)
+        return out, total_loss
diff --git a/fairseq/fairseq/modules/kmeans_vector_quantizer.py b/fairseq/fairseq/modules/kmeans_vector_quantizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..040db1e83e775a3bb59d5263d22aae9276a83f22
--- /dev/null
+++ b/fairseq/fairseq/modules/kmeans_vector_quantizer.py
@@ -0,0 +1,127 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+from fairseq.modules import Fp32GroupNorm
+
+
+class KmeansVectorQuantizer(nn.Module):
+    def __init__(
+        self, dim, num_vars, groups, combine_groups, vq_dim, time_first, gamma=0.25
+    ):
+        """Vector quantization using straight pass-through estimator (i.e. kmeans)
+
+        Args:
+            dim: input dimension (channels)
+            num_vars: number of quantized vectors per group
+            groups: number of groups for vector quantization
+            combine_groups: whether to use the vectors for all groups
+            vq_dim: dimensionality of the resulting quantized vector
+            time_first: if true, expect input in BxTxC format, otherwise in BxCxT
+            gamma: commitment loss coefficient
+        """
+        super().__init__()
+
+        self.groups = groups
+        self.combine_groups = combine_groups
+        self.input_dim = dim
+        self.num_vars = num_vars
+        self.vq_dim = vq_dim
+        self.time_first = time_first
+
+        assert (
+            vq_dim % groups == 0
+        ), f"dim {vq_dim} must be divisible by groups {groups} for concatenation"
+
+        self.var_dim = vq_dim // groups
+        num_groups = groups if not combine_groups else 1
+
+        self.embedding = nn.Parameter(
+            0.01 * torch.randn(num_vars, num_groups, self.var_dim)
+        )
+        self.projection = nn.Sequential(
+            nn.Conv1d(dim, dim, kernel_size=1, groups=groups, bias=False),
+            Fp32GroupNorm(groups, dim),
+        )
+        self.gamma = gamma
+        self.mse_mean = nn.MSELoss(reduction="mean")
+
+    def _pass_grad(self, x, y):
+        """Manually set gradient for backward pass.
+        for y = f(x), ensure that during the backward pass,
+        dL/dy = dL/dx regardless of f(x).
+        Returns:
+            y, with the gradient forced to be dL/dy = dL/dx.
+        """
+
+        return y.detach() + (x - x.detach())
+
+    @property
+    def expand_embedding(self):
+        if self.combine_groups:
+            return self.embedding.expand(self.num_vars, self.groups, self.var_dim)
+        return self.embedding
+
+    def forward_idx(self, x):
+        res = self.forward(x, produce_targets=True)
+        return res["x"], res["targets"]
+
+    def forward(self, x, produce_targets=False):
+
+        result = {"num_vars": self.num_vars}
+
+        if self.time_first:
+            x = x.transpose(1, 2)
+
+        bsz, fsz, tsz = x.shape
+
+        ze = self.projection(x)
+        ze_ = ze.view(bsz, self.groups, self.var_dim, tsz).permute(0, 3, 1, 2)
+        d = (
+            (ze_.unsqueeze(0) - self.expand_embedding.unsqueeze(1).unsqueeze(1))
+            .view(self.num_vars, bsz, tsz, self.groups, -1)
+            .norm(dim=-1, p=2)
+        )
+        idx = d.argmin(dim=0)
+        zq = (
+            torch.stack(
+                [
+                    self.expand_embedding[idx[..., group], group]
+                    for group in range(self.groups)
+                ],
+                dim=-2,
+            )
+            .view(bsz, tsz, self.groups * self.var_dim)
+            .permute(0, 2, 1)
+        )
+        assert ze.shape == zq.shape, (ze.shape, zq.shape)
+        x = self._pass_grad(ze, zq)
+
+        hard_x = (
+            idx.new_zeros(bsz * tsz * self.groups, self.num_vars)
+            .scatter_(-1, idx.view(-1, 1), 1.0)
+            .view(bsz * tsz, self.groups, -1)
+        )
+        hard_probs = torch.mean(hard_x.float(), dim=0)
+        result["code_perplexity"] = torch.exp(
+            -torch.sum(hard_probs * torch.log(hard_probs + 1e-7), dim=-1)
+        ).sum()
+
+        if produce_targets:
+            result["targets"] = idx
+
+        if self.time_first:
+            x = x.transpose(1, 2)  # BCT -> BTC
+        result["x"] = x
+
+        ze = ze.float()
+        zq = zq.float()
+        latent_loss = self.mse_mean(zq, ze.detach())
+        commitment_loss = self.mse_mean(ze, zq.detach())
+
+        result["kmeans_loss"] = latent_loss + self.gamma * commitment_loss
+
+        return result
diff --git a/fairseq/fairseq/modules/layer_drop.py b/fairseq/fairseq/modules/layer_drop.py
new file mode 100644
index 0000000000000000000000000000000000000000..8961d8bcbc492c40c6b30973234416ce5a414f5a
--- /dev/null
+++ b/fairseq/fairseq/modules/layer_drop.py
@@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+LayerDrop as described in https://arxiv.org/abs/1909.11556.
+"""
+
+import torch
+import torch.nn as nn
+
+
+class LayerDropModuleList(nn.ModuleList):
+    """
+    A LayerDrop implementation based on :class:`torch.nn.ModuleList`.
+
+    We refresh the choice of which layers to drop every time we iterate
+    over the LayerDropModuleList instance. During evaluation we always
+    iterate over all layers.
+
+    Usage::
+
+        layers = LayerDropList(p=0.5, modules=[layer1, layer2, layer3])
+        for layer in layers:  # this might iterate over layers 1 and 3
+            x = layer(x)
+        for layer in layers:  # this might iterate over all layers
+            x = layer(x)
+        for layer in layers:  # this might not iterate over any layers
+            x = layer(x)
+
+    Args:
+        p (float): probability of dropping out each layer
+        modules (iterable, optional): an iterable of modules to add
+    """
+
+    def __init__(self, p, modules=None):
+        super().__init__(modules)
+        self.p = p
+
+    def __iter__(self):
+        dropout_probs = torch.empty(len(self)).uniform_()
+        for i, m in enumerate(super().__iter__()):
+            if not self.training or (dropout_probs[i] > self.p):
+                yield m
diff --git a/fairseq/fairseq/modules/layer_norm.py b/fairseq/fairseq/modules/layer_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..234609d9e213a650e0032aaa0ca0462a818bfead
--- /dev/null
+++ b/fairseq/fairseq/modules/layer_norm.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+try:
+    from apex.normalization import FusedLayerNorm as _FusedLayerNorm
+
+    has_fused_layernorm = True
+
+    class FusedLayerNorm(_FusedLayerNorm):
+        @torch.jit.unused
+        def forward(self, x):
+            if not x.is_cuda:
+                return super().forward(x)
+            else:
+                with torch.cuda.device(x.device):
+                    return super().forward(x)
+
+
+except ImportError:
+    has_fused_layernorm = False
+
+
+def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):
+    if torch.jit.is_scripting():
+        export = True
+    if not export and torch.cuda.is_available() and has_fused_layernorm:
+        return FusedLayerNorm(normalized_shape, eps, elementwise_affine)
+    return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine)
+
+
+class Fp32LayerNorm(nn.LayerNorm):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def forward(self, input):
+        output = F.layer_norm(
+            input.float(),
+            self.normalized_shape,
+            self.weight.float() if self.weight is not None else None,
+            self.bias.float() if self.bias is not None else None,
+            self.eps,
+        )
+        return output.type_as(input)
diff --git a/fairseq/fairseq/modules/learned_positional_embedding.py b/fairseq/fairseq/modules/learned_positional_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..378d0f707183dd344dbb9288dda394b11053acf0
--- /dev/null
+++ b/fairseq/fairseq/modules/learned_positional_embedding.py
@@ -0,0 +1,61 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, Optional
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from torch import Tensor
+
+
+class LearnedPositionalEmbedding(nn.Embedding):
+    """
+    This module learns positional embeddings up to a fixed maximum size.
+    Padding ids are ignored by either offsetting based on padding_idx
+    or by setting padding_idx to None and ensuring that the appropriate
+    position ids are passed to the forward function.
+    """
+
+    def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int):
+        super().__init__(num_embeddings, embedding_dim, padding_idx)
+        self.onnx_trace = False
+        if self.padding_idx is not None:
+            self.max_positions = self.num_embeddings - self.padding_idx - 1
+        else:
+            self.max_positions = self.num_embeddings
+
+    def forward(
+        self,
+        input: Tensor,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        positions: Optional[Tensor] = None,
+    ):
+        """Input is expected to be of size [bsz x seqlen]."""
+        assert (positions is None) or (
+            self.padding_idx is None
+        ), "If positions is pre-computed then padding_idx should not be set."
+
+        if positions is None:
+            if incremental_state is not None:
+                # positions is the same for every token when decoding a single step
+                # Without the int() cast, it doesn't work in some cases when exporting to ONNX
+                positions = torch.zeros(
+                    (1, 1), device=input.device, dtype=input.dtype
+                ).fill_(int(self.padding_idx + input.size(1)))
+            else:
+                positions = utils.make_positions(
+                    input, self.padding_idx, onnx_trace=self.onnx_trace
+                )
+        return F.embedding(
+            positions,
+            self.weight,
+            self.padding_idx,
+            self.max_norm,
+            self.norm_type,
+            self.scale_grad_by_freq,
+            self.sparse,
+        )
diff --git a/fairseq/fairseq/modules/lightconv_layer/__init__.py b/fairseq/fairseq/modules/lightconv_layer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b2a99c1227f827768911e5e22e79f6865ffbfd3
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .lightconv_layer import LightconvLayer  # noqa
diff --git a/fairseq/fairseq/modules/lightconv_layer/cuda_function_gen.py b/fairseq/fairseq/modules/lightconv_layer/cuda_function_gen.py
new file mode 100644
index 0000000000000000000000000000000000000000..a25433dd8edae2f0b52d7d0eeeb829cabc6b4b89
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/cuda_function_gen.py
@@ -0,0 +1,289 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+def gen_forward():
+
+    kernels = [3, 5, 7, 15, 31, 63, 127, 255]
+    seqs = [32 * x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]]
+
+    head = """
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "lightconv_cuda.cuh"
+
+std::vector<at::Tensor> lightconv_cuda_forward(at::Tensor input, at::Tensor filters, int padding_l) {
+
+    at::DeviceGuard g(input.device());
+    const auto minibatch = input.size(0);
+    const auto numFeatures = input.size(1);
+    const auto sequenceLength = input.size(2);
+
+    const auto numHeads = filters.size(0);
+    const auto filterSize = filters.size(1);
+
+    const auto numFiltersInBlock = numFeatures / numHeads;
+
+    const dim3 blocks(minibatch, numFeatures);
+
+    auto output = at::zeros_like(input);
+    auto stream = at::cuda::getCurrentCUDAStream();
+"""
+
+    sequence_if = """
+    if (sequenceLength <= {seq}) {{
+        switch(filterSize) {{
+"""
+
+    case_k = """
+            case {k}:
+"""
+
+    main_block = """
+                if (padding_l == {pad}) {{
+                    AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "lightconv_forward", ([&] {{
+                        lightconv_forward_kernel<{k}, {b_size}, {pad}, scalar_t>
+                        <<<blocks, {b_size}, 0, stream>>>(
+                                input.data<scalar_t>(),
+                                filters.data<scalar_t>(),
+                                minibatch,
+                                sequenceLength,
+                                numFeatures,
+                                numFiltersInBlock,
+                                output.data<scalar_t>());
+                    }}));
+                }} else
+"""
+
+    bad_padding = """
+                {
+                    std::cout << "WARNING: Unsupported padding size - skipping forward pass" << std::endl;
+                }
+                break;
+"""
+
+    bad_filter = """
+            default:
+                std::cout << "WARNING: Unsupported filter length passed - skipping forward pass" << std::endl;
+        }
+"""
+
+    con_else = """
+    } else
+"""
+
+    final_else = """
+    {
+        switch(filterSize) {
+"""
+
+    final_return = """
+    }
+
+    return {output};
+}
+"""
+
+    with open("lightconv_cuda_forward.cu", "w") as forward:
+        forward.write(head)
+        for seq in seqs:
+            forward.write(sequence_if.format(seq=seq))
+            for k in kernels:
+                forward.write(case_k.format(k=k))
+                for pad in [k // 2, k - 1]:
+                    forward.write(main_block.format(k=k, b_size=seq, pad=pad))
+                forward.write(bad_padding)
+            forward.write(bad_filter)
+            forward.write(con_else)
+
+        forward.write(final_else)
+        for k in kernels:
+            forward.write(case_k.format(k=k))
+            for pad in [k // 2, k - 1]:
+                forward.write(main_block.format(k=k, b_size=seq, pad=pad))
+            forward.write(bad_padding)
+        forward.write(bad_filter)
+        forward.write(final_return)
+
+
+def gen_backward():
+
+    head = """
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "lightconv_cuda.cuh"
+
+std::vector<at::Tensor> lightconv_cuda_backward(
+        at::Tensor gradOutput,
+        int padding_l,
+        at::Tensor input,
+        at::Tensor filters) {
+
+    // gradWrtInput
+    const int minibatch = input.size(0);
+    const int numFeatures = input.size(1);
+    const int sequenceLength = input.size(2);
+
+    const int numHeads = filters.size(0);
+    const int filterSize = filters.size(1);
+
+    const dim3 gradBlocks(minibatch, numFeatures);
+    const dim3 weightGradFirstpassShortBlocks(minibatch, numHeads);
+    const dim3 weightGradSecondpassBlocks(numHeads, filterSize);
+
+    const int numFiltersInBlock = numFeatures / numHeads;
+
+    auto gradInput = at::zeros_like(input);
+    auto gradFilters = at::zeros_like(filters);
+
+    at::DeviceGuard g(input.device());
+    auto stream = at::cuda::getCurrentCUDAStream();
+
+    switch(filterSize) {
+"""
+
+    sequence_if = """
+            if (sequenceLength <= {seq}) {{
+"""
+
+    case_k = """
+        case {k}:
+"""
+
+    main_block = """
+                if (padding_l == {p}) {{
+                    AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "lightconv_backward", ([&] {{
+                        lightconv_grad_wrt_input_kernel<{k}, {b_size}, {p}, scalar_t>
+                        <<<gradBlocks, {b_size}, 0, stream>>>(
+                                gradOutput.data<scalar_t>(),
+                                filters.data<scalar_t>(),
+                                minibatch,
+                                sequenceLength,
+                                numFeatures,
+                                numFiltersInBlock,
+                                gradInput.data<scalar_t>());
+
+"""
+
+    weight_grad_short = """
+                        at::Tensor tempSumGradFilters = at::zeros({{minibatch, numHeads, filterSize}}, input.options().dtype(at::kFloat));
+                        lightconv_grad_wrt_weights_firstpass_short_kernel<{k}, {b_size}, {p}, scalar_t>
+                        <<<weightGradFirstpassShortBlocks, {b_size}, 0, stream>>>(
+                                input.data<scalar_t>(),
+                                gradOutput.data<scalar_t>(),
+                                minibatch,
+                                sequenceLength,
+                                numFeatures,
+                                numFiltersInBlock,
+                                numHeads,
+                                tempSumGradFilters.data<float>()
+                        );
+
+                        lightconv_grad_wrt_weights_secondpass_short_kernel<{k}, {b_size}, scalar_t>
+                        <<<weightGradSecondpassBlocks, {b_size}, 0, stream>>>(
+                                tempSumGradFilters.data<float>(),
+                                minibatch,
+                                numFiltersInBlock,
+                                gradFilters.data<scalar_t>()
+                        );
+                    }}));
+                }} else
+"""
+
+    weight_grad = """
+                        at::Tensor tempSumGradFilters = at::zeros({{minibatch, numFeatures, filterSize}}, input.options().dtype(at::kFloat));
+                        lightconv_grad_wrt_weights_firstpass_kernel<{k}, {b_size}, {p}, scalar_t>
+                        <<<gradBlocks, {b_size}, 0, stream>>>(
+                                input.data<scalar_t>(),
+                                gradOutput.data<scalar_t>(),
+                                minibatch,
+                                sequenceLength,
+                                numFeatures,
+                                numFiltersInBlock,
+                                tempSumGradFilters.data<float>()
+                        );
+
+                        lightconv_grad_wrt_weights_secondpass_kernel<{k}, {b_size}, scalar_t>
+                        <<<weightGradSecondpassBlocks, {b_size}, 0, stream>>>(
+                                tempSumGradFilters.data<float>(),
+                                minibatch,
+                                numFiltersInBlock,
+                                gradFilters.data<scalar_t>()
+                        );
+                    }}));
+                }} else
+"""
+
+    bad_padding = """
+                {
+                    std::cout << "WARNING: Unsupported padding size - skipping backward pass" << std::endl;
+                }
+"""
+
+    breakout = """
+                break;
+"""
+
+    bad_filter = """
+        default:
+            std::cout << "WARNING: Unsupported filter length passed - skipping backward pass" << std::endl;
+"""
+
+    con_else = """
+            } else
+"""
+
+    final_else = """
+    {
+        switch(filterSize) {
+"""
+
+    last_return = """
+    }
+    return {gradInput, gradFilters};
+}
+"""
+
+    kernels = [3, 5, 7, 15, 31, 63, 127, 255]
+    seqs = [32 * x for x in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]]
+    thresh = [32, 32, 64, 128, 256, -1, -1, -1]
+    max_mem = [-1, -1, -1, -1, -1, 192, 96, 64]
+
+    with open("lightconv_cuda_backward.cu", "w") as backward:
+        backward.write(head)
+        for (k, t, mem) in zip(kernels, thresh, max_mem):
+            backward.write(case_k.format(k=k))
+            for seq in seqs:
+                if (t == -1 or seq <= t) and (mem == -1 or seq < mem):
+                    backward.write(sequence_if.format(seq=seq))
+                    for p in [k // 2, k - 1]:
+                        backward.write(main_block.format(k=k, b_size=seq, p=p))
+                        backward.write(weight_grad_short.format(k=k, b_size=seq, p=p))
+                    backward.write(bad_padding)
+                else:
+                    for p in [k // 2, k - 1]:
+                        backward.write(main_block.format(k=k, b_size=32, p=p))
+                        backward.write(weight_grad.format(k=k, b_size=32, p=p))
+                    backward.write(bad_padding)
+                    backward.write(breakout)
+                    break
+                backward.write(con_else)
+        backward.write(bad_filter)
+        backward.write(last_return)
+
+
+if __name__ == "__main__":
+    gen_forward()
+    gen_backward()
diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ece47a8d908b93cec102743070c9057986d39d3f
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cpp
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/extension.h>
+#include <vector>
+
+std::vector<at::Tensor>
+lightconv_cuda_forward(at::Tensor input, at::Tensor filters, int padding_l);
+
+std::vector<at::Tensor> lightconv_cuda_backward(
+    at::Tensor gradOutput,
+    int padding_l,
+    at::Tensor input,
+    at::Tensor filters);
+
+#define CHECK_CUDA(x) \
+  AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) \
+  AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) \
+  CHECK_CUDA(x);       \
+  CHECK_CONTIGUOUS(x)
+
+std::vector<at::Tensor>
+lightconv_forward(at::Tensor input, at::Tensor filters, int padding_l) {
+  CHECK_INPUT(input);
+  CHECK_INPUT(filters);
+
+  return lightconv_cuda_forward(input, filters, padding_l);
+}
+
+std::vector<at::Tensor> lightconv_backward(
+    at::Tensor gradOutput,
+    int padding_l,
+    at::Tensor input,
+    at::Tensor filters) {
+  CHECK_INPUT(gradOutput);
+  CHECK_INPUT(input);
+  CHECK_INPUT(filters);
+
+  return lightconv_cuda_backward(gradOutput, padding_l, input, filters);
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("forward", &lightconv_forward, "lighconv forward (CUDA)");
+  m.def("backward", &lightconv_backward, "lighconv backward (CUDA)");
+}
diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cuh b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..610ab399e9b201cd8b0fb87a91e09b6f7aab9803
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda.cuh
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <ATen/ATen.h>
+#include <c10/cuda/CUDAStream.h>
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include <assert.h>
+#include <stdlib.h>
+
+#define SHFL_MASK 0xffffffff
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_forward_kernel(
+    const scalar_t* input,
+    const scalar_t* filters,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    scalar_t* output);
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_grad_wrt_input_kernel(
+    const scalar_t* input,
+    const scalar_t* filters,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    scalar_t* output);
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_firstpass_short_kernel(
+    const scalar_t* input,
+    const scalar_t* gradInput,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    int numHeads,
+    float* output);
+
+template <int FS, int SB, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_secondpass_short_kernel(
+    const float* input,
+    const int minibatch,
+    const int numFiltersInBlock,
+    scalar_t* output);
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_firstpass_kernel(
+    const scalar_t* input,
+    const scalar_t* gradInput,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    float* output);
+
+template <int FS, int SB, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_secondpass_kernel(
+    const float* input,
+    const int minibatch,
+    const int numFiltersInBlock,
+    scalar_t* output);
diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..cdf31d5d2df2d3433c66e167f098e48a99f96db2
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_cuda_kernel.cu
@@ -0,0 +1,400 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "../cuda_utils.cu"
+#include "lightconv_cuda.cuh"
+#include "lightconv_cuda_backward.cu"
+#include "lightconv_cuda_forward.cu"
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_forward_kernel(
+    const scalar_t* input,
+    const scalar_t* filters,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    scalar_t* output) {
+  const int tid = threadIdx.x;
+  const int batchIdx = blockIdx.x;
+  const int featureIdx = blockIdx.y;
+  const int filterIdx = featureIdx / numFiltersInBlock;
+
+  const int IOOffset =
+      numFeatures * sequenceLength * batchIdx + featureIdx * sequenceLength;
+  const scalar_t* inputFeature = &input[IOOffset];
+  scalar_t* outputFeature = &output[IOOffset];
+  const scalar_t* inputFilter = &filters[filterIdx * FS];
+
+  assert(blockDim.x == SB);
+
+  scalar_t filter[FS];
+#pragma unroll
+  for (int i = 0; i < FS; ++i) {
+    filter[i] = inputFilter[i];
+  }
+
+  __shared__ scalar_t temp[SB + FS];
+  zeroSharedMem<FS, SB, padding_l>(temp);
+
+  const int numIterations = divUp<int, int>(sequenceLength, SB);
+
+  for (int i = 0; i < numIterations; ++i) {
+    // Read input into shared memory
+    const int inputOffset = i * SB;
+
+    load_input_to_shared<FS, SB, padding_l>(
+        inputFeature,
+        inputOffset,
+        sequenceLength,
+        i,
+        numIterations,
+        (numIterations == 1),
+        temp);
+
+    __syncthreads();
+
+    scalar_t out = 0;
+#pragma unroll
+    for (int j = 0; j < FS; ++j) {
+      out += filter[j] * temp[tid + j];
+    }
+
+    // Write output
+    const int outputOffset = inputOffset;
+    if ((outputOffset + tid) < sequenceLength) {
+      outputFeature[outputOffset + tid] = out;
+    }
+
+    __syncthreads();
+  }
+}
+
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_grad_wrt_input_kernel(
+    const scalar_t* input,
+    const scalar_t* filters,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    scalar_t* output) {
+  // input grad kernel is similar to forward kernel
+  const int tid = threadIdx.x;
+  const int batchIdx = blockIdx.x;
+  const int featureIdx = blockIdx.y;
+  const int filterIdx = featureIdx / numFiltersInBlock;
+
+  const int IOOffset =
+      numFeatures * sequenceLength * batchIdx + featureIdx * sequenceLength;
+  const scalar_t* inputFeature = &input[IOOffset];
+  scalar_t* outputFeature = &output[IOOffset];
+  const scalar_t* inputFilter = &filters[filterIdx * FS];
+
+  assert(blockDim.x == SB);
+
+  scalar_t filter[FS];
+
+// The only change is loading the filter in reverse
+#pragma unroll
+  for (int i = 0; i < FS; ++i) {
+    filter[i] = inputFilter[FS - i - 1];
+  }
+
+  __shared__ scalar_t temp[SB + FS];
+  const int padding = FS - padding_l - 1;
+  zeroSharedMem<FS, SB, padding>(temp);
+
+  __syncthreads();
+
+  const int numIterations = divUp<int, int>(sequenceLength, SB);
+
+  for (int i = 0; i < numIterations; ++i) {
+    // Read input into shared memory
+    const int inputOffset = i * SB;
+
+    load_input_to_shared<FS, SB, padding>(
+        inputFeature,
+        inputOffset,
+        sequenceLength,
+        i,
+        numIterations,
+        false,
+        temp);
+
+    __syncthreads();
+
+    scalar_t out = 0;
+#pragma unroll
+    for (int j = 0; j < FS; ++j) {
+      out += filter[j] * temp[tid + j];
+    }
+
+    // Write output
+    const int outputOffset = inputOffset;
+    if ((outputOffset + tid) < sequenceLength) {
+      outputFeature[outputOffset + tid] = out;
+    }
+
+    __syncthreads();
+  }
+}
+
+// This is by far the most expensive kernel in terms of time taken.
+// Can be 16x slower than the forward or grad_wrt_input when filter size is 31
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_firstpass_short_kernel(
+    const scalar_t* input,
+    const scalar_t* gradInput,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    int numHeads,
+    float* output) {
+  const int tid = threadIdx.x;
+  const int batchIdx = blockIdx.x;
+  const int filterIdx = blockIdx.y;
+
+  const int numIterations = divUp<int, int>(sequenceLength, SB);
+
+  float* tempOutputGradWeight = &output[filterIdx * FS * minibatch];
+
+  assert(blockDim.x == SB);
+
+  __shared__ scalar_t tempInput[SB + FS];
+  __shared__ scalar_t tempGradInput[SB + FS];
+
+  // local weight accumulation
+  float accumWeights[FS];
+
+  // Initialize memory
+  for (int i = 0; i < FS; ++i) {
+    accumWeights[i] = float(0.0);
+  }
+
+  // loop over each sequence within filterblock
+  for (int idxInFilterBlock = 0; idxInFilterBlock < numFiltersInBlock;
+       ++idxInFilterBlock) {
+    const int featureOffset = batchIdx * numFeatures * sequenceLength +
+        (filterIdx * numFiltersInBlock + idxInFilterBlock) * sequenceLength;
+    const scalar_t* inputFeature = &input[featureOffset];
+    const scalar_t* gradInputFeature = &gradInput[featureOffset];
+
+    zeroSharedMem<FS, SB, padding_l>(tempInput);
+    zeroSharedMem<FS, SB, (FS / 2)>(tempGradInput);
+    __syncthreads();
+
+    for (int i = 0; i < numIterations; ++i) {
+      const int inputOffset = i * SB;
+
+      load_input_to_shared<FS, SB, padding_l>(
+          inputFeature,
+          inputOffset,
+          sequenceLength,
+          i,
+          numIterations,
+          false,
+          tempInput);
+      load_input_to_shared<FS, SB, (FS / 2)>(
+          gradInputFeature,
+          inputOffset,
+          sequenceLength,
+          i,
+          numIterations,
+          false,
+          tempGradInput);
+
+      __syncthreads();
+
+      const int gradIndex = (FS / 2) + tid;
+      scalar_t tempGrad = tempGradInput[gradIndex];
+
+#pragma unroll
+      for (int j = 0; j < FS; j++) {
+        const int inputIndex = tid + j;
+        accumWeights[j] += tempInput[inputIndex] * tempGrad;
+      }
+
+      __syncthreads();
+    }
+  }
+
+  // Row-major sum
+  for (int filterWeightIdx = 0; filterWeightIdx < FS; ++filterWeightIdx) {
+    float temp;
+    if (tid < sequenceLength) {
+      temp = accumWeights[filterWeightIdx];
+    } else {
+      temp = float(0.0);
+    }
+
+    const int outputOffset = filterWeightIdx * minibatch + batchIdx;
+
+    temp = blockReduce(temp);
+
+    if (tid == 0) {
+      tempOutputGradWeight[outputOffset] = temp;
+    }
+  }
+}
+
+template <int FS, int SB, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_secondpass_short_kernel(
+    const float* input,
+    const int minibatch,
+    const int numFiltersInBlock,
+    scalar_t* output) {
+  assert(blockDim.x == SB);
+
+  const int tid = threadIdx.x;
+
+  const int filterIdx = blockIdx.x;
+  const int filterWeightIdx = blockIdx.y;
+
+  const int inputOffset =
+      filterIdx * FS * minibatch + filterWeightIdx * minibatch;
+  const float* tempInput = &input[inputOffset];
+
+  // read into shared memory for reduction
+  int readIndex = tid;
+
+  float sum = 0.0;
+  while (readIndex < minibatch) {
+    sum += tempInput[readIndex];
+    readIndex += SB;
+  }
+
+  float temp = blockReduce(sum);
+
+  if (tid == 0) {
+    output[blockIdx.x * FS + blockIdx.y] = temp;
+  }
+}
+
+// This is by far the most expensive kernel in terms of time taken.
+// Can be 16x slower than the forward or grad_wrt_input when filter size is 31
+template <int FS, int SB, int padding_l, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_firstpass_kernel(
+    const scalar_t* input,
+    const scalar_t* gradInput,
+    int minibatch,
+    int sequenceLength,
+    int numFeatures,
+    int numFiltersInBlock,
+    float* output) {
+  assert(blockDim.x == SB);
+
+  const int tid = threadIdx.x;
+  const int batchIdx = blockIdx.x;
+  const int featureIdx = blockIdx.y;
+  const int filterIdx = featureIdx / numFiltersInBlock;
+  const int idxInFilterBlock = featureIdx % numFiltersInBlock;
+
+  const int numIterations = divUp<int, int>(sequenceLength, SB);
+
+  float temp;
+
+  __shared__ scalar_t tempInput[SB + FS];
+  __shared__ scalar_t tempGradInput[SB + FS];
+  zeroSharedMem<FS, SB, padding_l>(tempInput);
+  zeroSharedMem<FS, SB, (FS / 2)>(tempGradInput);
+  __syncthreads();
+
+  float accumWeights[FS];
+
+  for (int i = 0; i < FS; ++i) {
+    accumWeights[i] = float(0.0);
+  }
+
+  const int IOOffset =
+      batchIdx * numFeatures * sequenceLength + featureIdx * sequenceLength;
+  const scalar_t* inputFeature = &input[IOOffset];
+  const scalar_t* gradInputFeature = &gradInput[IOOffset];
+  float* tempOutputGradWeight =
+      &output[filterIdx * FS * minibatch * numFiltersInBlock];
+
+  for (int i = 0; i < numIterations; ++i) {
+    const int inputOffset = i * SB;
+
+    load_input_to_shared<FS, SB, padding_l>(
+        inputFeature,
+        inputOffset,
+        sequenceLength,
+        i,
+        numIterations,
+        false,
+        tempInput);
+    load_input_to_shared<FS, SB, (FS / 2)>(
+        gradInputFeature,
+        inputOffset,
+        sequenceLength,
+        i,
+        numIterations,
+        false,
+        tempGradInput);
+    __syncthreads();
+
+#pragma unroll
+    for (int j = 0; j < FS; ++j) {
+      accumWeights[j] += tempInput[tid + j] * tempGradInput[tid + (FS / 2)];
+    }
+
+    __syncthreads();
+  }
+
+  // Row-major sum
+  for (int filterWeightIdx = 0; filterWeightIdx < FS; ++filterWeightIdx) {
+    // Write to shared memory before reduction
+    if (tid < sequenceLength) {
+      temp = accumWeights[filterWeightIdx];
+    } else {
+      temp = float(0.0);
+    }
+
+    temp = blockReduce(temp);
+
+    const int outputOffset = filterWeightIdx * minibatch * numFiltersInBlock +
+        batchIdx * numFiltersInBlock + idxInFilterBlock;
+
+    if (tid == 0) {
+      tempOutputGradWeight[outputOffset] = temp;
+    }
+  }
+}
+
+template <int FS, int SB, typename scalar_t>
+__global__ void lightconv_grad_wrt_weights_secondpass_kernel(
+    const float* input,
+    const int minibatch,
+    const int numFiltersInBlock,
+    scalar_t* output) {
+  assert(blockDim.x == SB);
+  const int tid = threadIdx.x;
+
+  // What is the id within a minibatch
+  const int filterIdx = blockIdx.x;
+  const int filterWeightIdx = blockIdx.y;
+
+  const int inputOffset = filterIdx * FS * minibatch * numFiltersInBlock +
+      filterWeightIdx * minibatch * numFiltersInBlock;
+  const float* tempInput = &input[inputOffset];
+
+  int readIndex = tid;
+
+  float sum = float(0.0);
+  while (readIndex < (minibatch * numFiltersInBlock)) {
+    sum += tempInput[readIndex];
+    readIndex += SB;
+  }
+
+  float temp = blockReduce(sum);
+
+  if (tid == 0) {
+    output[blockIdx.x * FS + blockIdx.y] = temp;
+  }
+}
diff --git a/fairseq/fairseq/modules/lightconv_layer/lightconv_layer.py b/fairseq/fairseq/modules/lightconv_layer/lightconv_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7e597f4749c591b057d776aacec39b44d99c037
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/lightconv_layer.py
@@ -0,0 +1,137 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import lightconv_cuda
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from torch import nn
+from torch.autograd import Function
+
+
+class lightconvFunction(Function):
+    @staticmethod
+    def forward(ctx, x, weights, padding_l):
+        ctx.padding_l = padding_l
+        outputs = lightconv_cuda.forward(x, weights, padding_l)
+        variables = [x, weights]
+        ctx.save_for_backward(*variables)
+        return outputs[0]
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        outputs = lightconv_cuda.backward(
+            grad_output.contiguous(), ctx.padding_l, *ctx.saved_tensors
+        )
+        grad_input, grad_weights = outputs
+        return grad_input, grad_weights, None
+
+
+@with_incremental_state
+class LightconvLayer(nn.Module):
+    def __init__(
+        self,
+        input_size,
+        kernel_size=1,
+        padding_l=None,
+        weight_softmax=False,
+        num_heads=1,
+        weight_dropout=0.0,
+        bias=False,
+    ):
+        super(LightconvLayer, self).__init__()
+        self.input_size = input_size
+        self.kernel_size = kernel_size
+        self.padding_l = padding_l
+        self.num_heads = num_heads
+        self.weight_softmax = weight_softmax
+        self.weight_dropout_module = FairseqDropout(
+            weight_dropout, module_name=self.__class__.__name__
+        )
+
+        self.weight = nn.Parameter(torch.Tensor(num_heads, kernel_size))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(input_size))
+        else:
+            self.bias = None
+        self.reset_parameters()
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+        for k, v in state_dict.items():
+            if k.endswith(prefix + "weight"):
+                if v.dim() == 3 and v.size(1) == 1:
+                    state_dict[k] = v.squeeze(1)
+
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.weight)
+        if self.bias is not None:
+            nn.init.constant_(self.bias, 0.0)
+
+    def forward(self, x, incremental_state=None):
+
+        # during inference time, incremental BMM is faster
+        if incremental_state is not None:
+            T, B, C = x.size()
+            K, H = self.kernel_size, self.num_heads
+            R = C // H
+            input_buffer = self._get_input_buffer(incremental_state)
+            if input_buffer is None:
+                input_buffer = x.new()
+            x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3)
+            if self.kernel_size > 1:
+                self._set_input_buffer(
+                    incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :]
+                )
+            x_unfold = x_unfold.view(T * B * H, R, -1)
+
+            weight = self.weight
+            if self.weight_softmax:
+                weight = F.softmax(weight.float(), dim=1).type_as(weight)
+
+            weight = weight[:, -x_unfold.size(2) :]
+
+            K = weight.size(1)
+
+            weight = (
+                weight.view(1, H, K)
+                .expand(T * B, H, K)
+                .contiguous()
+                .view(T * B * H, K, 1)
+            )
+
+            weight = self.weight_dropout_module(weight)
+            output = torch.bmm(x_unfold, weight)  # T*B*H x R x 1
+            output = output.view(T, B, C)
+            return output
+
+        # during training time, use CUDA kernel
+        else:
+            x = x.permute(1, 2, 0).contiguous()
+            weight = self.weight
+            if self.weight_softmax:
+                weight = F.softmax(self.weight, -1)
+            if self.weight_dropout_module.p:
+                weight = self.weight_dropout_module(weight)
+            return lightconvFunction.apply(x, weight, self.padding_l).permute(2, 0, 1)
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            input_buffer = input_buffer.index_select(1, new_order)
+            self._set_input_buffer(incremental_state, input_buffer)
+
+    def _get_input_buffer(self, incremental_state):
+        return utils.get_incremental_state(self, incremental_state, "input_buffer")
+
+    def _set_input_buffer(self, incremental_state, new_buffer):
+        return utils.set_incremental_state(
+            self, incremental_state, "input_buffer", new_buffer
+        )
+
+    def half(self):
+        return self._apply(lambda t: t.half() if t.is_floating_point() else t)
diff --git a/fairseq/fairseq/modules/lightconv_layer/setup.py b/fairseq/fairseq/modules/lightconv_layer/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..052635be79b466d0ad56cf5cf607bd10c2297ecf
--- /dev/null
+++ b/fairseq/fairseq/modules/lightconv_layer/setup.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+
+setup(
+    name="lightconv_layer",
+    ext_modules=[
+        CUDAExtension(
+            "lightconv_cuda",
+            [
+                "lightconv_cuda.cpp",
+                "lightconv_cuda_kernel.cu",
+            ],
+        ),
+    ],
+    cmdclass={"build_ext": BuildExtension},
+)
diff --git a/fairseq/fairseq/modules/lightweight_convolution.py b/fairseq/fairseq/modules/lightweight_convolution.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec11a9507951c9e8f3564753841dd9c74a4900e0
--- /dev/null
+++ b/fairseq/fairseq/modules/lightweight_convolution.py
@@ -0,0 +1,310 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.unfold import unfold1d
+
+
+def LightweightConv(
+    input_size,
+    kernel_size=1,
+    padding_l=None,
+    num_heads=1,
+    weight_dropout=0.0,
+    weight_softmax=False,
+    bias=False,
+):
+    if torch.cuda.is_available():
+        try:
+            from fairseq.modules.lightconv_layer import LightconvLayer
+
+            return LightconvLayer(
+                input_size,
+                kernel_size=kernel_size,
+                padding_l=padding_l,
+                num_heads=num_heads,
+                weight_dropout=weight_dropout,
+                weight_softmax=weight_softmax,
+                bias=bias,
+            )
+        except ImportError as e:
+            print(e)
+    return LightweightConv1dTBC(
+        input_size,
+        kernel_size=kernel_size,
+        padding_l=padding_l,
+        num_heads=num_heads,
+        weight_dropout=weight_dropout,
+        weight_softmax=weight_softmax,
+        bias=bias,
+    )
+
+
+class LightweightConv1d(nn.Module):
+    """Lightweight Convolution assuming the input is BxCxT
+    This is just an example that explains LightConv clearer than the TBC version.
+    We don't use this module in the model.
+
+    Args:
+        input_size: # of channels of the input and output
+        kernel_size: convolution channels
+        padding: padding
+        num_heads: number of heads used. The weight is of shape
+            `(num_heads, 1, kernel_size)`
+        weight_softmax: normalize the weight with softmax before the convolution
+
+    Shape:
+        Input: BxCxT, i.e. (batch_size, input_size, timesteps)
+        Output: BxCxT, i.e. (batch_size, input_size, timesteps)
+
+    Attributes:
+        weight: the learnable weights of the module of shape
+            `(num_heads, 1, kernel_size)`
+        bias: the learnable bias of the module of shape `(input_size)`
+    """
+
+    def __init__(
+        self,
+        input_size,
+        kernel_size=1,
+        padding=0,
+        num_heads=1,
+        weight_softmax=False,
+        bias=False,
+        weight_dropout=0.0,
+    ):
+        super().__init__()
+        self.input_size = input_size
+        self.kernel_size = kernel_size
+        self.num_heads = num_heads
+        self.padding = padding
+        self.weight_softmax = weight_softmax
+        self.weight = nn.Parameter(torch.Tensor(num_heads, 1, kernel_size))
+
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(input_size))
+        else:
+            self.bias = None
+        self.weight_dropout_module = FairseqDropout(
+            weight_dropout, module_name=self.__class__.__name__
+        )
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.weight)
+        if self.bias is not None:
+            nn.init.constant_(self.bias, 0.0)
+
+    def forward(self, input):
+        """
+        input size: B x C x T
+        output size: B x C x T
+        """
+        B, C, T = input.size()
+        H = self.num_heads
+
+        weight = self.weight
+        if self.weight_softmax:
+            weight = F.softmax(weight, dim=-1)
+
+        weight = self.weight_dropout_module(weight)
+        # Merge every C/H entries into the batch dimension (C = self.input_size)
+        # B x C x T -> (B * C/H) x H x T
+        # One can also expand the weight to C x 1 x K by a factor of C/H
+        # and do not reshape the input instead, which is slow though
+        input = input.view(-1, H, T)
+        output = F.conv1d(input, weight, padding=self.padding, groups=self.num_heads)
+        output = output.view(B, C, T)
+        if self.bias is not None:
+            output = output + self.bias.view(1, -1, 1)
+
+        return output
+
+
+@with_incremental_state
+class LightweightConv1dTBC(nn.Module):
+    """Lightweight Convolution assuming the input is TxBxC
+    Args:
+        input_size: # of channels of the input
+        kernel_size: convolution channels
+        padding_l: padding to the left when using "same" padding
+        num_heads: number of heads used. The weight is of shape (num_heads, 1, kernel_size)
+        weight_dropout: the drop rate of the DropConnect to drop the weight
+        weight_softmax: normalize the weight with softmax before the convolution
+        bias: use bias
+
+    Shape:
+        Input: TxBxC, i.e. (timesteps, batch_size, input_size)
+        Output: TxBxC, i.e. (timesteps, batch_size, input_size)
+
+    Attributes:
+        weight: the learnable weights of the module of shape
+            `(num_heads, 1, kernel_size)`
+        bias:   the learnable bias of the module of shape `(input_size)`
+    """
+
+    def __init__(
+        self,
+        input_size,
+        kernel_size=1,
+        padding_l=None,
+        num_heads=1,
+        weight_dropout=0.0,
+        weight_softmax=False,
+        bias=False,
+    ):
+        super().__init__()
+        self.input_size = input_size
+        self.kernel_size = kernel_size
+        self.padding_l = padding_l
+        self.num_heads = num_heads
+        self.weight_dropout_module = FairseqDropout(
+            weight_dropout, module_name=self.__class__.__name__
+        )
+        self.weight_softmax = weight_softmax
+
+        self.weight = nn.Parameter(torch.Tensor(num_heads, 1, kernel_size))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(input_size))
+        else:
+            self.bias = None
+
+        self.reset_parameters()
+        self.onnx_trace = False
+
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.weight)
+        if self.bias is not None:
+            nn.init.constant_(self.bias, 0.0)
+
+    def forward(self, x, incremental_state=None, unfold=False):
+        """Assuming the input, x, of the shape T x B x C and producing an output in the shape T x B x C
+        args:
+            x: Input of shape T x B x C, i.e. (timesteps, batch_size, input_size)
+            incremental_state: A dict to keep the state
+            unfold: unfold the input or not. If not, we use the matrix trick instead
+        """
+        unfold = unfold or (incremental_state is not None)
+
+        if unfold:
+            output = self._forward_unfolded(x, incremental_state)
+        else:
+            output = self._forward_expanded(x, incremental_state)
+
+        if self.bias is not None:
+            output = output + self.bias.view(1, 1, -1)
+        return output
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def _forward_unfolded(self, x, incremental_state):
+        """The conventional implementation of convolutions.
+        Unfolding the input by having a window shifting to the right."""
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        R = C // H
+        assert R * H == C == self.input_size
+
+        weight = self.weight.view(H, K)
+        if incremental_state is not None:
+            input_buffer = self._get_input_buffer(incremental_state)
+            if input_buffer is None:
+                input_buffer = x.new()
+            x_unfold = torch.cat([input_buffer, x.unsqueeze(3)], dim=3)
+            if self.kernel_size > 1:
+                self._set_input_buffer(
+                    incremental_state, x_unfold[:, :, :, -self.kernel_size + 1 :]
+                )
+            x_unfold = x_unfold.view(T * B * H, R, -1)
+        else:
+            # unfold the input: T x B x C --> T' x B x C x K
+            x_unfold = unfold1d(x, self.kernel_size, self.padding_l, 0)
+            x_unfold = x_unfold.view(T * B * H, R, K)
+
+        if self.weight_softmax:
+            weight = utils.softmax(weight, dim=1, onnx_trace=self.onnx_trace).type_as(
+                weight
+            )
+
+        if incremental_state is not None:
+            weight = weight[:, -x_unfold.size(2) :]
+            K = weight.size(1)
+
+        weight = (
+            weight.view(1, H, K).expand(T * B, H, K).contiguous().view(T * B * H, K, 1)
+        )
+
+        weight = self.weight_dropout_module(weight)
+        output = torch.bmm(x_unfold, weight)  # T*B*H x R x 1
+        output = output.view(T, B, C)
+        return output
+
+    def _forward_expanded(self, x, incremental_state):
+        """Turn the convolution filters into band matrices and do matrix multiplication.
+        This is faster when the sequence is short, but less memory efficient.
+        This is not used in the decoder during inference.
+        """
+        T, B, C = x.size()
+        K, H = self.kernel_size, self.num_heads
+        R = C // H
+        assert R * H == C == self.input_size
+
+        weight = self.weight.view(H, K)
+        if self.weight_softmax:
+            weight = utils.softmax(weight, dim=1, onnx_trace=self.onnx_trace).type_as(
+                weight
+            )
+        weight = weight.view(1, H, K).expand(T * B, H, K).contiguous()
+        weight = weight.view(T, B * H, K).transpose(0, 1)
+
+        x = x.view(T, B * H, R).transpose(0, 1)
+        P = self.padding_l
+        if K > T and P == K - 1:
+            weight = weight.narrow(2, K - T, T)
+            K, P = T, T - 1
+        # turn the convolution filters into band matrices
+        weight_expanded = weight.new_zeros(B * H, T, T + K - 1, requires_grad=False)
+        weight_expanded.as_strided((B * H, T, K), (T * (T + K - 1), T + K, 1)).copy_(
+            weight
+        )
+        weight_expanded = weight_expanded.narrow(2, P, T)
+        weight_expanded = self.weight_dropout_module(weight_expanded)
+
+        output = torch.bmm(weight_expanded, x)
+        output = output.transpose(0, 1).contiguous().view(T, B, C)
+        return output
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            input_buffer = input_buffer.index_select(1, new_order)
+            self._set_input_buffer(incremental_state, input_buffer)
+
+    def _get_input_buffer(self, incremental_state):
+        return utils.get_incremental_state(self, incremental_state, "input_buffer")
+
+    def _set_input_buffer(self, incremental_state, new_buffer):
+        return utils.set_incremental_state(
+            self, incremental_state, "input_buffer", new_buffer
+        )
+
+    def extra_repr(self):
+        s = "{}, kernel_size={}, padding_l={}, num_heads={}, weight_softmax={}, bias={}".format(
+            self.input_size,
+            self.kernel_size,
+            self.padding_l,
+            self.num_heads,
+            self.weight_softmax,
+            self.bias is not None,
+        )
+        if self.weight_dropout_module.p > 0.0:
+            s += ", weight_dropout={}".format(self.weight_dropout_module.p)
+        return s
diff --git a/fairseq/fairseq/modules/linearized_convolution.py b/fairseq/fairseq/modules/linearized_convolution.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7e156cb0c75cb375447859c8b6749311372c35e
--- /dev/null
+++ b/fairseq/fairseq/modules/linearized_convolution.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+
+from .conv_tbc import ConvTBC
+
+from typing import Dict, Optional
+from torch import Tensor
+
+@with_incremental_state
+class LinearizedConvolution(ConvTBC):
+    """An optimized version of nn.Conv1d.
+
+    At training time, this module uses ConvTBC, which is an optimized version
+    of Conv1d. At inference time, it optimizes incremental generation (i.e.,
+    one time step at a time) by replacing the convolutions with linear layers.
+    Note that the input order changes from training to inference.
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
+        super().__init__(in_channels, out_channels, kernel_size, **kwargs)
+        self._linearized_weight = None
+        self.register_backward_hook(self._clear_linearized_weight)
+
+    def state_dict(self, destination=None, prefix="", keep_vars=False):
+        state = ConvTBC.state_dict(self, destination, prefix, keep_vars=keep_vars)
+        # don't store redundant _linearized_weight in checkpoints
+        if prefix + "_linearized_weight" in state:
+            del state[prefix + "_linearized_weight"]
+        return state
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+        if prefix + "_linearized_weight" in state_dict:
+            del state_dict[prefix + "_linearized_weight"]
+
+    @torch.jit.export
+    def forward(self, input, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None):
+        """
+        Args:
+            incremental_state: Used to buffer signal; if not None, then input is
+                expected to contain a single frame. If the input order changes
+                between time steps, call reorder_incremental_state.
+        Input:
+            Time x Batch x Channel during training
+            Batch x Time x Channel during inference
+        """
+        if incremental_state is None:
+            output = self.conv_tbc(input)
+            if self.kernel_size[0] > 1 and self.padding[0] > 0:
+                # remove future timesteps added by padding
+                output = output[: -self.padding[0], :, :]
+            return output
+
+        # reshape weight
+        weight = self._get_linearized_weight()
+        kw = self.kernel_size[0]
+
+        bsz = input.size(0)  # input: bsz x len x dim
+        if kw > 1:
+            input = input.data
+            input_buffer = self._get_input_buffer(incremental_state)
+            if input_buffer is None:
+                input_buffer = input.new(bsz, kw, input.size(2)).zero_()
+                self._set_input_buffer(incremental_state, input_buffer)
+            else:
+                # shift buffer
+                input_buffer[:, :-1, :] = input_buffer[:, 1:, :].clone()
+            # append next input
+            input_buffer[:, -1, :] = input[:, -1, :]
+            input = input_buffer
+        with torch.no_grad():
+            output = F.linear(input.view(bsz, -1), weight, self.bias)
+        return output.view(bsz, 1, -1)
+
+    @torch.jit.unused
+    def reorder_incremental_state(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], new_order):
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            input_buffer = input_buffer.index_select(0, new_order)
+            self._set_input_buffer(incremental_state, input_buffer)
+
+    @torch.jit.unused
+    def _get_input_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]):
+        return utils.get_incremental_state(self, incremental_state, "input_buffer")
+
+    @torch.jit.unused
+    def _set_input_buffer(self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]], new_buffer):
+        return utils.set_incremental_state(
+            self, incremental_state, "input_buffer", new_buffer
+        )
+
+    @torch.jit.unused
+    def _get_linearized_weight(self):
+        if self._linearized_weight is None:
+            kw = self.kernel_size[0]
+            weight = self.weight.transpose(2, 1).transpose(1, 0).contiguous()
+            assert weight.size() == (self.out_channels, kw, self.in_channels)
+            return weight.view(self.out_channels, -1)
+        return self._linearized_weight
+
+    @torch.jit.unused
+    def _clear_linearized_weight(self, *args):
+        self._linearized_weight = None
diff --git a/fairseq/fairseq/modules/location_attention.py b/fairseq/fairseq/modules/location_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..a970876bba4369a93245fe73bd963566bfe4d63d
--- /dev/null
+++ b/fairseq/fairseq/modules/location_attention.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+
+
+class LocationAttention(nn.Module):
+    """
+    Attention-Based Models for Speech Recognition
+    https://arxiv.org/pdf/1506.07503.pdf
+
+    :param int encoder_dim: # projection-units of encoder
+    :param int decoder_dim: # units of decoder
+    :param int attn_dim: attention dimension
+    :param int conv_dim: # channels of attention convolution
+    :param int conv_kernel_size: filter size of attention convolution
+    """
+
+    def __init__(self, attn_dim, encoder_dim, decoder_dim,
+                 attn_state_kernel_size, conv_dim, conv_kernel_size,
+                 scaling=2.0):
+        super(LocationAttention, self).__init__()
+        self.attn_dim = attn_dim
+        self.decoder_dim = decoder_dim
+        self.scaling = scaling
+        self.proj_enc = nn.Linear(encoder_dim, attn_dim)
+        self.proj_dec = nn.Linear(decoder_dim, attn_dim, bias=False)
+        self.proj_attn = nn.Linear(conv_dim, attn_dim, bias=False)
+        self.conv = nn.Conv1d(attn_state_kernel_size, conv_dim,
+                              2 * conv_kernel_size + 1,
+                              padding=conv_kernel_size, bias=False)
+        self.proj_out = nn.Sequential(nn.Tanh(), nn.Linear(attn_dim, 1))
+
+        self.proj_enc_out = None  # cache
+
+    def clear_cache(self):
+        self.proj_enc_out = None
+
+    def forward(self, encoder_out, encoder_padding_mask, decoder_h, attn_state):
+        """
+        :param torch.Tensor encoder_out: padded encoder hidden state B x T x D
+        :param torch.Tensor encoder_padding_mask: encoder padding mask
+        :param torch.Tensor decoder_h: decoder hidden state B x D
+        :param torch.Tensor attn_prev: previous attention weight B x K x T
+        :return: attention weighted encoder state (B, D)
+        :rtype: torch.Tensor
+        :return: previous attention weights (B x T)
+        :rtype: torch.Tensor
+        """
+        bsz, seq_len, _ = encoder_out.size()
+        if self.proj_enc_out is None:
+            self.proj_enc_out = self.proj_enc(encoder_out)
+
+        # B x K x T -> B x C x T
+        attn = self.conv(attn_state)
+        # B x C x T -> B x T x C -> B x T x D
+        attn = self.proj_attn(attn.transpose(1, 2))
+
+        if decoder_h is None:
+            decoder_h = encoder_out.new_zeros(bsz, self.decoder_dim)
+        dec_h = self.proj_dec(decoder_h).view(bsz, 1, self.attn_dim)
+
+        out = self.proj_out(attn + self.proj_enc_out + dec_h).squeeze(2)
+        out.masked_fill_(encoder_padding_mask, -float("inf"))
+
+        w = F.softmax(self.scaling * out, dim=1)
+        c = torch.sum(encoder_out * w.view(bsz, seq_len, 1), dim=1)
+        return c, w
diff --git a/fairseq/fairseq/modules/lstm_cell_with_zoneout.py b/fairseq/fairseq/modules/lstm_cell_with_zoneout.py
new file mode 100644
index 0000000000000000000000000000000000000000..f04e5db255c62bbe0faebbc641f579f92be5580c
--- /dev/null
+++ b/fairseq/fairseq/modules/lstm_cell_with_zoneout.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+
+
+class LSTMCellWithZoneOut(nn.Module):
+    """
+    Zoneout: Regularizing RNNs by Randomly Preserving Hidden Activations
+    https://arxiv.org/abs/1606.01305
+    """
+
+    def __init__(self, prob: float, input_size: int, hidden_size: int,
+                 bias: bool = True):
+        super(LSTMCellWithZoneOut, self).__init__()
+        self.lstm_cell = nn.LSTMCell(input_size, hidden_size, bias=bias)
+        self.prob = prob
+        if prob > 1.0 or prob < 0.0:
+            raise ValueError("zoneout probability must be in the range from "
+                             "0.0 to 1.0.")
+
+    def zoneout(self, h, next_h, prob):
+        if isinstance(h, tuple):
+            return tuple(
+                [self.zoneout(h[i], next_h[i], prob) for i in range(len(h))]
+            )
+
+        if self.training:
+            mask = h.new_zeros(*h.size()).bernoulli_(prob)
+            return mask * h + (1 - mask) * next_h
+
+        return prob * h + (1 - prob) * next_h
+
+    def forward(self, x, h):
+        return self.zoneout(h, self.lstm_cell(x, h), self.prob)
diff --git a/fairseq/fairseq/modules/multihead_attention.py b/fairseq/fairseq/modules/multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2516356117847b0d46d965ee942354a2ed23189
--- /dev/null
+++ b/fairseq/fairseq/modules/multihead_attention.py
@@ -0,0 +1,500 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Dict, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.quant_noise import quant_noise
+from torch import Tensor, nn
+from torch.nn import Parameter
+
+
+@with_incremental_state
+class MultiheadAttention(nn.Module):
+    """Multi-headed attention.
+
+    See "Attention Is All You Need" for more details.
+    """
+
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        kdim=None,
+        vdim=None,
+        dropout=0.0,
+        bias=True,
+        add_bias_kv=False,
+        add_zero_attn=False,
+        self_attention=False,
+        encoder_decoder_attention=False,
+        q_noise=0.0,
+        qn_block_size=8,
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert not self.self_attention or self.qkv_same_dim, (
+            "Self-attention requires query, key and " "value to be of the same size"
+        )
+
+        self.k_proj = quant_noise(
+            nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.v_proj = quant_noise(
+            nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.q_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        self.out_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self.reset_parameters()
+
+        self.onnx_trace = False
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            # Empirically observed the convergence to be much better with
+            # the scaled initialization
+            nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
+        else:
+            nn.init.xavier_uniform_(self.k_proj.weight)
+            nn.init.xavier_uniform_(self.v_proj.weight)
+            nn.init.xavier_uniform_(self.q_proj.weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.out_proj.bias is not None:
+            nn.init.constant_(self.out_proj.bias, 0.0)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+
+    def forward(
+        self,
+        query,
+        key: Optional[Tensor],
+        value: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        need_weights: bool = True,
+        static_kv: bool = False,
+        attn_mask: Optional[Tensor] = None,
+        before_softmax: bool = False,
+        need_head_weights: bool = False,
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        is_tpu = query.device.type == "xla"
+
+        tgt_len, bsz, embed_dim = query.size()
+        src_len = tgt_len
+        assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}"
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+        if key is not None:
+            src_len, key_bsz, _ = key.size()
+            if not torch.jit.is_scripting():
+                assert key_bsz == bsz
+                assert value is not None
+                assert src_len, bsz == value.shape[:2]
+
+        if (
+            not self.onnx_trace
+            and not is_tpu  # don't use PyTorch version on TPUs
+            and incremental_state is None
+            and not static_kv
+            # A workaround for quantization to work. Otherwise JIT compilation
+            # treats bias in linear module as method.
+            and not torch.jit.is_scripting()
+        ):
+            assert key is not None and value is not None
+            return F.multi_head_attention_forward(
+                query,
+                key,
+                value,
+                self.embed_dim,
+                self.num_heads,
+                torch.empty([0]),
+                torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)),
+                self.bias_k,
+                self.bias_v,
+                self.add_zero_attn,
+                self.dropout_module.p,
+                self.out_proj.weight,
+                self.out_proj.bias,
+                self.training or self.dropout_module.apply_during_inference,
+                key_padding_mask,
+                need_weights,
+                attn_mask,
+                use_separate_proj_weight=True,
+                q_proj_weight=self.q_proj.weight,
+                k_proj_weight=self.k_proj.weight,
+                v_proj_weight=self.v_proj.weight,
+            )
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if saved_state is not None and "prev_key" in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            q = self.q_proj(query)
+            k = self.k_proj(query)
+            v = self.v_proj(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.q_proj(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.k_proj(key)
+                v = self.v_proj(key)
+
+        else:
+            assert key is not None and value is not None
+            q = self.q_proj(query)
+            k = self.k_proj(key)
+            v = self.v_proj(value)
+        q *= self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        key_padding_mask.new_zeros(key_padding_mask.size(0), 1),
+                    ],
+                    dim=1,
+                )
+
+        q = (
+            q.contiguous()
+            .view(tgt_len, bsz * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+        if k is not None:
+            k = (
+                k.contiguous()
+                .view(-1, bsz * self.num_heads, self.head_dim)
+                .transpose(0, 1)
+            )
+        if v is not None:
+            v = (
+                v.contiguous()
+                .view(-1, bsz * self.num_heads, self.head_dim)
+                .transpose(0, 1)
+            )
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if "prev_key" in saved_state:
+                _prev_key = saved_state["prev_key"]
+                assert _prev_key is not None
+                prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    assert k is not None
+                    k = torch.cat([prev_key, k], dim=1)
+                src_len = k.size(1)
+            if "prev_value" in saved_state:
+                _prev_value = saved_state["prev_value"]
+                assert _prev_value is not None
+                prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    assert v is not None
+                    v = torch.cat([prev_value, v], dim=1)
+            prev_key_padding_mask: Optional[Tensor] = None
+            if "prev_key_padding_mask" in saved_state:
+                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
+            assert k is not None and v is not None
+            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
+                key_padding_mask=key_padding_mask,
+                prev_key_padding_mask=prev_key_padding_mask,
+                batch_size=bsz,
+                src_len=k.size(1),
+                static_kv=static_kv,
+            )
+
+            saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_key_padding_mask"] = key_padding_mask
+            # In this branch incremental_state is never None
+            assert incremental_state is not None
+            incremental_state = self._set_input_buffer(incremental_state, saved_state)
+        assert k is not None
+        assert k.size(1) == src_len
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.dim() == 0:
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.add_zero_attn:
+            assert v is not None
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        torch.zeros(key_padding_mask.size(0), 1).type_as(
+                            key_padding_mask
+                        ),
+                    ],
+                    dim=1,
+                )
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            if self.onnx_trace:
+                attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1)
+            attn_weights += attn_mask
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            if not is_tpu:
+                attn_weights = attn_weights.masked_fill(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
+                    float("-inf"),
+                )
+            else:
+                attn_weights = attn_weights.transpose(0, 2)
+                attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf"))
+                attn_weights = attn_weights.transpose(0, 2)
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if before_softmax:
+            return attn_weights, v
+
+        attn_weights_float = utils.softmax(
+            attn_weights, dim=-1, onnx_trace=self.onnx_trace
+        )
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = self.dropout_module(attn_weights)
+
+        assert v is not None
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        if self.onnx_trace and attn.size(1) == 1:
+            # when ONNX tracing a single decoder step (sequence length == 1)
+            # the transpose is a no-op copy before view, thus unnecessary
+            attn = attn.contiguous().view(tgt_len, bsz, embed_dim)
+        else:
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn = self.out_proj(attn)
+        attn_weights: Optional[Tensor] = None
+        if need_weights:
+            attn_weights = attn_weights_float.view(
+                bsz, self.num_heads, tgt_len, src_len
+            ).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+
+        return attn, attn_weights
+
+    @staticmethod
+    def _append_prev_key_padding_mask(
+        key_padding_mask: Optional[Tensor],
+        prev_key_padding_mask: Optional[Tensor],
+        batch_size: int,
+        src_len: int,
+        static_kv: bool,
+    ) -> Optional[Tensor]:
+        # saved key padding masks have shape (bsz, seq_len)
+        if prev_key_padding_mask is not None and static_kv:
+            new_key_padding_mask = prev_key_padding_mask
+        elif prev_key_padding_mask is not None and key_padding_mask is not None:
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1
+            )
+        # During incremental decoding, as the padding token enters and
+        # leaves the frame, there will be a time when prev or current
+        # is None
+        elif prev_key_padding_mask is not None:
+            if src_len > prev_key_padding_mask.size(1):
+                filler = torch.zeros(
+                    (batch_size, src_len - prev_key_padding_mask.size(1)),
+                    device=prev_key_padding_mask.device,
+                )
+                new_key_padding_mask = torch.cat(
+                    [prev_key_padding_mask.float(), filler.float()], dim=1
+                )
+            else:
+                new_key_padding_mask = prev_key_padding_mask.float()
+        elif key_padding_mask is not None:
+            if src_len > key_padding_mask.size(1):
+                filler = torch.zeros(
+                    (batch_size, src_len - key_padding_mask.size(1)),
+                    device=key_padding_mask.device,
+                )
+                new_key_padding_mask = torch.cat(
+                    [filler.float(), key_padding_mask.float()], dim=1
+                )
+            else:
+                new_key_padding_mask = key_padding_mask.float()
+        else:
+            new_key_padding_mask = prev_key_padding_mask
+        return new_key_padding_mask
+
+    @torch.jit.export
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        new_order: Tensor,
+    ):
+        """Reorder buffered internal state (for incremental generation)."""
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            for k in input_buffer.keys():
+                input_buffer_k = input_buffer[k]
+                if input_buffer_k is not None:
+                    if self.encoder_decoder_attention and input_buffer_k.size(
+                        0
+                    ) == new_order.size(0):
+                        break
+                    input_buffer[k] = input_buffer_k.index_select(0, new_order)
+            incremental_state = self._set_input_buffer(incremental_state, input_buffer)
+        return incremental_state
+
+    def _get_input_buffer(
+        self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
+    ) -> Dict[str, Optional[Tensor]]:
+        result = self.get_incremental_state(incremental_state, "attn_state")
+        if result is not None:
+            return result
+        else:
+            empty_result: Dict[str, Optional[Tensor]] = {}
+            return empty_result
+
+    def _set_input_buffer(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        buffer: Dict[str, Optional[Tensor]],
+    ):
+        return self.set_incremental_state(incremental_state, "attn_state", buffer)
+
+    def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int):
+        return attn_weights
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+        items_to_add = {}
+        keys_to_remove = []
+        for k in state_dict.keys():
+            if k.endswith(prefix + "in_proj_weight"):
+                # in_proj_weight used to be q + k + v with same dimensions
+                dim = int(state_dict[k].shape[0] / 3)
+                items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim]
+                items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim]
+                items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :]
+
+                keys_to_remove.append(k)
+
+                k_bias = prefix + "in_proj_bias"
+                if k_bias in state_dict.keys():
+                    dim = int(state_dict[k].shape[0] / 3)
+                    items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim]
+                    items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][
+                        dim : 2 * dim
+                    ]
+                    items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :]
+
+                    keys_to_remove.append(prefix + "in_proj_bias")
+
+        for k in keys_to_remove:
+            del state_dict[k]
+
+        for key, value in items_to_add.items():
+            state_dict[key] = value
diff --git a/fairseq/fairseq/modules/positional_embedding.py b/fairseq/fairseq/modules/positional_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e94e35edb46bf9dea911fe74577d8ecbe9b5ff1
--- /dev/null
+++ b/fairseq/fairseq/modules/positional_embedding.py
@@ -0,0 +1,35 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+
+from .learned_positional_embedding import LearnedPositionalEmbedding
+from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding
+
+
+def PositionalEmbedding(
+    num_embeddings: int,
+    embedding_dim: int,
+    padding_idx: int,
+    learned: bool = False,
+):
+    if learned:
+        # if padding_idx is specified then offset the embedding ids by
+        # this index and adjust num_embeddings appropriately
+        # TODO: The right place for this offset would be inside
+        # LearnedPositionalEmbedding. Move this there for a cleaner implementation.
+        if padding_idx is not None:
+            num_embeddings = num_embeddings + padding_idx + 1
+        m = LearnedPositionalEmbedding(num_embeddings, embedding_dim, padding_idx)
+        nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+        if padding_idx is not None:
+            nn.init.constant_(m.weight[padding_idx], 0)
+    else:
+        m = SinusoidalPositionalEmbedding(
+            embedding_dim,
+            padding_idx,
+            init_size=num_embeddings + padding_idx + 1,
+        )
+    return m
diff --git a/fairseq/fairseq/modules/quant_noise.py b/fairseq/fairseq/modules/quant_noise.py
new file mode 100644
index 0000000000000000000000000000000000000000..d777dfbb6c1bf6a9b769dfdaec35d5ef084c8a8b
--- /dev/null
+++ b/fairseq/fairseq/modules/quant_noise.py
@@ -0,0 +1,107 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+
+
+def quant_noise(module, p, block_size):
+    """
+    Wraps modules and applies quantization noise to the weights for
+    subsequent quantization with Iterative Product Quantization as
+    described in "Training with Quantization Noise for Extreme Model Compression"
+
+    Args:
+        - module: nn.Module
+        - p: amount of Quantization Noise
+        - block_size: size of the blocks for subsequent quantization with iPQ
+
+    Remarks:
+        - Module weights must have the right sizes wrt the block size
+        - Only Linear, Embedding and Conv2d modules are supported for the moment
+        - For more detail on how to quantize by blocks with convolutional weights,
+          see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks"
+        - We implement the simplest form of noise here as stated in the paper
+          which consists in randomly dropping blocks
+    """
+
+    # if no quantization noise, don't register hook
+    if p <= 0:
+        return module
+
+    # supported modules
+    assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d))
+
+    # test whether module.weight has the right sizes wrt block_size
+    is_conv = module.weight.ndim == 4
+
+    # 2D matrix
+    if not is_conv:
+        assert (
+            module.weight.size(1) % block_size == 0
+        ), "Input features must be a multiple of block sizes"
+
+    # 4D matrix
+    else:
+        # 1x1 convolutions
+        if module.kernel_size == (1, 1):
+            assert (
+                module.in_channels % block_size == 0
+            ), "Input channels must be a multiple of block sizes"
+        # regular convolutions
+        else:
+            k = module.kernel_size[0] * module.kernel_size[1]
+            assert k % block_size == 0, "Kernel size must be a multiple of block size"
+
+    def _forward_pre_hook(mod, input):
+        # no noise for evaluation
+        if mod.training:
+            if not is_conv:
+                # gather weight and sizes
+                weight = mod.weight
+                in_features = weight.size(1)
+                out_features = weight.size(0)
+
+                # split weight matrix into blocks and randomly drop selected blocks
+                mask = torch.zeros(
+                    in_features // block_size * out_features, device=weight.device
+                )
+                mask.bernoulli_(p)
+                mask = mask.repeat_interleave(block_size, -1).view(-1, in_features)
+
+            else:
+                # gather weight and sizes
+                weight = mod.weight
+                in_channels = mod.in_channels
+                out_channels = mod.out_channels
+
+                # split weight matrix into blocks and randomly drop selected blocks
+                if mod.kernel_size == (1, 1):
+                    mask = torch.zeros(
+                        int(in_channels // block_size * out_channels),
+                        device=weight.device,
+                    )
+                    mask.bernoulli_(p)
+                    mask = mask.repeat_interleave(block_size, -1).view(-1, in_channels)
+                else:
+                    mask = torch.zeros(
+                        weight.size(0), weight.size(1), device=weight.device
+                    )
+                    mask.bernoulli_(p)
+                    mask = (
+                        mask.unsqueeze(2)
+                        .unsqueeze(3)
+                        .repeat(1, 1, mod.kernel_size[0], mod.kernel_size[1])
+                    )
+
+            # scale weights and apply mask
+            mask = mask.to(
+                torch.bool
+            )  # x.bool() is not currently supported in TorchScript
+            s = 1 / (1 - p)
+            mod.weight.data = s * weight.masked_fill(mask, 0)
+
+    module.register_forward_pre_hook(_forward_pre_hook)
+    return module
diff --git a/fairseq/fairseq/modules/quantization/__init__.py b/fairseq/fairseq/modules/quantization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/fairseq/modules/quantization/pq/__init__.py b/fairseq/fairseq/modules/quantization/pq/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c142a802e05ec7ecfa5dba7d9a98c26a60ac75d2
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .utils import SizeTracker, get_param, attrsetter, quantize_model_  # NOQA
diff --git a/fairseq/fairseq/modules/quantization/pq/em.py b/fairseq/fairseq/modules/quantization/pq/em.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f15c3e46bd052b1e00929e7ece9355fb03846c7
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/em.py
@@ -0,0 +1,211 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import random
+from collections import Counter
+
+import torch
+
+
+class EM:
+    """
+    EM algorithm used to quantize the columns of W to minimize
+
+                         ||W - W_hat||^2
+
+    Args:
+        - W: weight matrix of size (in_features x out_features)
+        - n_iter: number of k-means iterations
+        - n_centroids: number of centroids (size of codebook)
+        - eps: for cluster reassignment when an empty cluster is found
+        - max_tentatives for cluster reassignment when an empty cluster is found
+        - verbose: print error after each iteration
+
+    Remarks:
+        - If one cluster is empty, the most populated cluster is split into
+          two clusters
+        - All the relevant dimensions are specified in the code
+    """
+
+    def __init__(
+        self, W, n_centroids=256, n_iter=20, eps=1e-6, max_tentatives=30, verbose=True
+    ):
+        self.W = W
+        self.n_centroids = n_centroids
+        self.n_iter = n_iter
+        self.eps = eps
+        self.max_tentatives = max_tentatives
+        self.verbose = verbose
+        self.centroids = torch.Tensor()
+        self.assignments = torch.Tensor()
+        self.objective = []
+
+    def initialize_centroids(self):
+        """
+        Initializes the centroids by sampling random columns from W.
+        """
+
+        in_features, out_features = self.W.size()
+        indices = torch.randint(
+            low=0, high=out_features, size=(self.n_centroids,)
+        ).long()
+        self.centroids = self.W[:, indices].t()  # (n_centroids x in_features)
+
+    def step(self, i):
+        """
+        There are two standard steps for each iteration: expectation (E) and
+        minimization (M). The E-step (assignment) is performed with an exhaustive
+        search and the M-step (centroid computation) is performed with
+        the exact solution.
+
+        Args:
+            - i: step number
+
+        Remarks:
+            - The E-step heavily uses PyTorch broadcasting to speed up computations
+              and reduce the memory overhead
+        """
+
+        # assignments (E-step)
+        distances = self.compute_distances()  # (n_centroids x out_features)
+        self.assignments = torch.argmin(distances, dim=0)  # (out_features)
+        n_empty_clusters = self.resolve_empty_clusters()
+
+        # centroids (M-step)
+        for k in range(self.n_centroids):
+            W_k = self.W[:, self.assignments == k]  # (in_features x size_of_cluster_k)
+            self.centroids[k] = W_k.mean(dim=1)  # (in_features)
+
+        # book-keeping
+        obj = (self.centroids[self.assignments].t() - self.W).norm(p=2).item()
+        self.objective.append(obj)
+        if self.verbose:
+            logging.info(
+                f"Iteration: {i},\t"
+                f"objective: {obj:.6f},\t"
+                f"resolved empty clusters: {n_empty_clusters}"
+            )
+
+    def resolve_empty_clusters(self):
+        """
+        If one cluster is empty, the most populated cluster is split into
+        two clusters by shifting the respective centroids. This is done
+        iteratively for a fixed number of tentatives.
+        """
+
+        # empty clusters
+        counts = Counter(map(lambda x: x.item(), self.assignments))
+        empty_clusters = set(range(self.n_centroids)) - set(counts.keys())
+        n_empty_clusters = len(empty_clusters)
+
+        tentatives = 0
+        while len(empty_clusters) > 0:
+            # given an empty cluster, find most populated cluster and split it into two
+            k = random.choice(list(empty_clusters))
+            m = counts.most_common(1)[0][0]
+            e = torch.randn_like(self.centroids[m]) * self.eps
+            self.centroids[k] = self.centroids[m].clone()
+            self.centroids[k] += e
+            self.centroids[m] -= e
+
+            # recompute assignments
+            distances = self.compute_distances()  # (n_centroids x out_features)
+            self.assignments = torch.argmin(distances, dim=0)  # (out_features)
+
+            # check for empty clusters
+            counts = Counter(map(lambda x: x.item(), self.assignments))
+            empty_clusters = set(range(self.n_centroids)) - set(counts.keys())
+
+            # increment tentatives
+            if tentatives == self.max_tentatives:
+                logging.info(
+                    f"Could not resolve all empty clusters, {len(empty_clusters)} remaining"
+                )
+                raise EmptyClusterResolveError
+            tentatives += 1
+
+        return n_empty_clusters
+
+    def compute_distances(self):
+        """
+        For every centroid m, computes
+
+                          ||M - m[None, :]||_2
+
+        Remarks:
+            - We rely on PyTorch's broadcasting to speed up computations
+              and reduce the memory overhead
+            - Without chunking, the sizes in the broadcasting are modified as:
+              (n_centroids x n_samples x out_features) -> (n_centroids x out_features)
+            - The broadcasting computation is automatically chunked so that
+              the tensors fit into the memory of the GPU
+        """
+
+        nb_centroids_chunks = 1
+
+        while True:
+            try:
+                return torch.cat(
+                    [
+                        (self.W[None, :, :] - centroids_c[:, :, None]).norm(p=2, dim=1)
+                        for centroids_c in self.centroids.chunk(
+                            nb_centroids_chunks, dim=0
+                        )
+                    ],
+                    dim=0,
+                )
+            except RuntimeError:
+                nb_centroids_chunks *= 2
+
+    def assign(self):
+        """
+        Assigns each column of W to its closest centroid, thus essentially
+        performing the E-step in train().
+
+        Remarks:
+            - The function must be called after train() or after loading
+              centroids using self.load(), otherwise it will return empty tensors
+        """
+
+        distances = self.compute_distances()  # (n_centroids x out_features)
+        self.assignments = torch.argmin(distances, dim=0)  # (out_features)
+
+    def save(self, path, layer):
+        """
+        Saves centroids and assignments.
+
+        Args:
+            - path: folder used to save centroids and assignments
+        """
+
+        torch.save(self.centroids, os.path.join(path, "{}_centroids.pth".format(layer)))
+        torch.save(
+            self.assignments, os.path.join(path, "{}_assignments.pth".format(layer))
+        )
+        torch.save(self.objective, os.path.join(path, "{}_objective.pth".format(layer)))
+
+    def load(self, path, layer):
+        """
+        Loads centroids and assignments from a given path
+
+        Args:
+            - path: folder use to load centroids and assignments
+        """
+
+        self.centroids = torch.load(
+            os.path.join(path, "{}_centroids.pth".format(layer))
+        )
+        self.assignments = torch.load(
+            os.path.join(path, "{}_assignments.pth".format(layer))
+        )
+        self.objective = torch.load(
+            os.path.join(path, "{}_objective.pth".format(layer))
+        )
+
+
+class EmptyClusterResolveError(Exception):
+    pass
diff --git a/fairseq/fairseq/modules/quantization/pq/modules/__init__.py b/fairseq/fairseq/modules/quantization/pq/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b67c8e8ad691aa01e9e10e904d69d94595387668
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/modules/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .qconv import PQConv2d  # NOQA
+from .qemb import PQEmbedding  # NOQA
+from .qlinear import PQLinear  # NOQA
diff --git a/fairseq/fairseq/modules/quantization/pq/modules/qconv.py b/fairseq/fairseq/modules/quantization/pq/modules/qconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..d15ec192e8cda6265a198e583a9bf7fb194dd129
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/modules/qconv.py
@@ -0,0 +1,115 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.utils import _pair
+
+
+class PQConv2d(nn.Module):
+    """
+    Quantized counterpart of nn.Conv2d module. Stores the centroid, the assignments
+    and the non-quantized biases. The full weight is re-instantiated at each forward
+    pass and autograd automatically computes the gradients with respect to the
+    centroids.
+
+    Args:
+        - centroids: centroids of size n_centroids x block_size
+        - assignments: assignments of the centroids to the subvectors
+          of size self.out_channels x n_blocks
+        - bias: the non-quantized bias, must be either torch.Tensor or None
+
+    Remarks:
+        - We refer the reader to the official documentation of the nn.Conv2d module
+          for the other arguments and the behavior of the module.
+        - Performance tests on GPU show that this implementation is 10% slower than
+          the non-quantized nn.Conv2d module for a standard training loop.
+        - During the backward, the gradients are averaged by cluster and not summed.
+          This explains the hook registered to the centroids.
+    """
+
+    def __init__(
+        self,
+        centroids,
+        assignments,
+        bias,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        padding_mode="zeros",
+    ):
+        super(PQConv2d, self).__init__()
+        self.block_size = centroids.size(1)
+        self.n_centroids = centroids.size(0)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        self.padding_mode = padding_mode
+        # check compatibility
+        if in_channels // groups * np.prod(self.kernel_size) % self.block_size != 0:
+            raise ValueError("Wrong PQ sizes")
+        if len(assignments) % out_channels != 0:
+            raise ValueError("Wrong PQ sizes")
+        if in_channels % groups != 0:
+            raise ValueError("in_channels must be divisible by groups")
+        if out_channels % groups != 0:
+            raise ValueError("out_channels must be divisible by groups")
+        # define parameters
+        self.centroids = nn.Parameter(centroids, requires_grad=True)
+        self.register_buffer("assignments", assignments)
+        self.register_buffer("counts", torch.bincount(assignments).type_as(centroids))
+        if bias is not None:
+            self.bias = nn.Parameter(bias)
+        else:
+            self.register_parameter("bias", None)
+        # register hook for averaging gradients per centroids instead of summing
+        self.centroids.register_hook(lambda x: x / self.counts[:, None])
+
+    @property
+    def weight(self):
+        return (
+            self.centroids[self.assignments]
+            .reshape(-1, self.out_channels, self.block_size)
+            .permute(1, 0, 2)
+            .reshape(
+                self.out_channels, self.in_channels // self.groups, *self.kernel_size
+            )
+        )
+
+    def forward(self, x):
+        return F.conv2d(
+            x,
+            self.weight,
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+        )
+
+    def extra_repr(self):
+        s = "{in_channels}, {out_channels}, kernel_size={kernel_size}, stride={stride}"
+        if self.padding != (0,) * len(self.padding):
+            s += ", padding={padding}"
+        if self.dilation != (1,) * len(self.dilation):
+            s += ", dilation={dilation}"
+        if self.groups != 1:
+            s += ", groups={groups}"
+        if self.bias is None:
+            s += ", bias=False"
+        if self.padding_mode != "zeros":
+            s += ", padding_mode={padding_mode}"
+        s += ", n_centroids={n_centroids}, block_size={block_size}"
+        return s.format(**self.__dict__)
diff --git a/fairseq/fairseq/modules/quantization/pq/modules/qemb.py b/fairseq/fairseq/modules/quantization/pq/modules/qemb.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a74ad3c4c7c9d3203d26e7885864ba578951bfe
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/modules/qemb.py
@@ -0,0 +1,107 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class PQEmbedding(nn.Module):
+    """
+    Quantized counterpart of nn.Embedding module. Stores the centroids and
+    the assignments. The full weight is re-instantiated at each forward
+    pass.
+
+    Args:
+        - centroids: centroids of size n_centroids x block_size
+        - assignments: assignments of the centroids to the subvectors
+          of size self.out_features x n_blocks
+        - bias: the non-quantized bias
+
+    Remarks:
+        - We refer the reader to the official documentation of the nn.Embedding module
+          for the other arguments and the behavior of the module
+        - Performance tests on GPU show that this implementation is 10% slower than
+          the non-quantized nn.Embedding module for a standard training loop.
+    """
+
+    def __init__(
+        self,
+        centroids,
+        assignments,
+        num_embeddings,
+        embedding_dim,
+        padding_idx=None,
+        max_norm=None,
+        norm_type=2.0,
+        scale_grad_by_freq=False,
+        sparse=False,
+        _weight=None,
+    ):
+        super(PQEmbedding, self).__init__()
+        self.block_size = centroids.size(1)
+        self.n_centroids = centroids.size(0)
+        self.num_embeddings = num_embeddings
+        self.embedding_dim = embedding_dim
+        if padding_idx is not None:
+            if padding_idx > 0:
+                assert (
+                    padding_idx < self.num_embeddings
+                ), "Padding_idx must be within num_embeddings"
+            elif padding_idx < 0:
+                assert (
+                    padding_idx >= -self.num_embeddings
+                ), "Padding_idx must be within num_embeddings"
+                padding_idx = self.num_embeddings + padding_idx
+        self.padding_idx = padding_idx
+        self.max_norm = max_norm
+        self.norm_type = norm_type
+        self.scale_grad_by_freq = scale_grad_by_freq
+        self.sparse = sparse
+        # check compatibility
+        if self.embedding_dim % self.block_size != 0:
+            raise ValueError("Wrong PQ sizes")
+        if len(assignments) % self.num_embeddings != 0:
+            raise ValueError("Wrong PQ sizes")
+        # define parameters
+        self.centroids = nn.Parameter(centroids, requires_grad=True)
+        self.register_buffer("assignments", assignments)
+        self.register_buffer("counts", torch.bincount(assignments).type_as(centroids))
+
+    @property
+    def weight(self):
+        return (
+            self.centroids[self.assignments]
+            .reshape(-1, self.num_embeddings, self.block_size)
+            .permute(1, 0, 2)
+            .flatten(1, 2)
+        )
+
+    def forward(self, input):
+        return F.embedding(
+            input,
+            self.weight,
+            self.padding_idx,
+            self.max_norm,
+            self.norm_type,
+            self.scale_grad_by_freq,
+            self.sparse,
+        )
+
+    def extra_repr(self):
+        s = "{num_embeddings}, {embedding_dim}"
+        if self.padding_idx is not None:
+            s += ", padding_idx={padding_idx}"
+        if self.max_norm is not None:
+            s += ", max_norm={max_norm}"
+        if self.norm_type != 2:
+            s += ", norm_type={norm_type}"
+        if self.scale_grad_by_freq is not False:
+            s += ", scale_grad_by_freq={scale_grad_by_freq}"
+        if self.sparse is not False:
+            s += ", sparse=True"
+        s += ", n_centroids={n_centroids}, block_size={block_size}"
+
+        return s.format(**self.__dict__)
diff --git a/fairseq/fairseq/modules/quantization/pq/modules/qlinear.py b/fairseq/fairseq/modules/quantization/pq/modules/qlinear.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bdd25a8685bb7c7b32e1f02372aaeb26d8ba53a
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/modules/qlinear.py
@@ -0,0 +1,71 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class PQLinear(nn.Module):
+    """
+    Quantized counterpart of nn.Linear module. Stores the centroid, the assignments
+    and the non-quantized biases. The full weight is re-instantiated at each forward
+    pass.
+
+    Args:
+        - centroids: centroids of size n_centroids x block_size
+        - assignments: assignments of the centroids to the subvectors
+          of size self.out_features x n_blocks
+        - bias: the non-quantized bias
+
+    Remarks:
+        - We refer the reader to the official documentation of the nn.Linear module
+          for the other arguments and the behavior of the module
+        - Performance tests on GPU show that this implementation is 15% slower than
+          the non-quantized nn.Linear module for a standard training loop.
+    """
+
+    def __init__(self, centroids, assignments, bias, in_features, out_features):
+        super(PQLinear, self).__init__()
+        self.block_size = centroids.size(1)
+        self.n_centroids = centroids.size(0)
+        self.in_features = in_features
+        self.out_features = out_features
+        # check compatibility
+        if self.in_features % self.block_size != 0:
+            raise ValueError("Wrong PQ sizes")
+        if len(assignments) % self.out_features != 0:
+            raise ValueError("Wrong PQ sizes")
+        # define parameters
+        self.centroids = nn.Parameter(centroids, requires_grad=True)
+        self.register_buffer("assignments", assignments)
+        self.register_buffer("counts", torch.bincount(assignments).type_as(centroids))
+        if bias is not None:
+            self.bias = nn.Parameter(bias)
+        else:
+            self.register_parameter("bias", None)
+
+    @property
+    def weight(self):
+        return (
+            self.centroids[self.assignments]
+            .reshape(-1, self.out_features, self.block_size)
+            .permute(1, 0, 2)
+            .flatten(1, 2)
+        )
+
+    def forward(self, x):
+        return F.linear(
+            x,
+            self.weight,
+            self.bias,
+        )
+
+    def extra_repr(self):
+        return f"in_features={self.in_features},\
+                 out_features={self.out_features},\
+                 n_centroids={self.n_centroids},\
+                 block_size={self.block_size},\
+                 bias={self.bias is not None}"
diff --git a/fairseq/fairseq/modules/quantization/pq/pq.py b/fairseq/fairseq/modules/quantization/pq/pq.py
new file mode 100644
index 0000000000000000000000000000000000000000..eddc2eb34602403f10979f54cd23a45bc2f104d5
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/pq.py
@@ -0,0 +1,128 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .em import EM, EmptyClusterResolveError
+
+
+class PQ(EM):
+    """
+    Quantizes the layer weights W with the standard Product Quantization
+    technique. This learns a codebook of codewords or centroids of size
+    block_size from W. For further reference on using PQ to quantize
+    neural networks, see "And the Bit Goes Down: Revisiting the Quantization
+    of Neural Networks", Stock et al., ICLR 2020.
+
+    PQ is performed in two steps:
+    (1) The matrix W (weights or fully-connected or convolutional layer)
+        is reshaped to (block_size, -1).
+            - If W is fully-connected (2D), its columns are split into
+              blocks of size block_size.
+            - If W is convolutional (4D), its filters are split along the
+              spatial dimension.
+    (2) We apply the standard EM/k-means algorithm to the resulting reshaped matrix.
+
+    Args:
+        - W: weight matrix to quantize of size (in_features x out_features)
+        - block_size: size of the blocks (subvectors)
+        - n_centroids: number of centroids
+        - n_iter: number of k-means iterations
+        - eps: for cluster reassignment when an empty cluster is found
+        - max_tentatives for cluster reassignment when an empty cluster is found
+        - verbose: print information after each iteration
+
+    Remarks:
+        - block_size be compatible with the shape of W
+    """
+
+    def __init__(
+        self,
+        W,
+        block_size,
+        n_centroids=256,
+        n_iter=20,
+        eps=1e-6,
+        max_tentatives=30,
+        verbose=True,
+    ):
+        self.block_size = block_size
+        W_reshaped = self._reshape(W)
+        super(PQ, self).__init__(
+            W_reshaped,
+            n_centroids=n_centroids,
+            n_iter=n_iter,
+            eps=eps,
+            max_tentatives=max_tentatives,
+            verbose=verbose,
+        )
+
+    def _reshape(self, W):
+        """
+        Reshapes the matrix W as expained in step (1).
+        """
+
+        # fully connected: by convention the weight has size out_features x in_features
+        if len(W.size()) == 2:
+            self.out_features, self.in_features = W.size()
+            assert (
+                self.in_features % self.block_size == 0
+            ), "Linear: n_blocks must be a multiple of in_features"
+            return (
+                W.reshape(self.out_features, -1, self.block_size)
+                .permute(2, 1, 0)
+                .flatten(1, 2)
+            )
+
+        # convolutional: we reshape along the spatial dimension
+        elif len(W.size()) == 4:
+            self.out_channels, self.in_channels, self.k_h, self.k_w = W.size()
+            assert (
+                self.in_channels * self.k_h * self.k_w
+            ) % self.block_size == 0, (
+                "Conv2d: n_blocks must be a multiple of in_channels * k_h * k_w"
+            )
+            return (
+                W.reshape(self.out_channels, -1, self.block_size)
+                .permute(2, 1, 0)
+                .flatten(1, 2)
+            )
+        # not implemented
+        else:
+            raise NotImplementedError(W.size())
+
+    def encode(self):
+        """
+        Performs self.n_iter EM steps.
+        """
+
+        self.initialize_centroids()
+        for i in range(self.n_iter):
+            try:
+                self.step(i)
+            except EmptyClusterResolveError:
+                break
+
+    def decode(self):
+        """
+        Returns the encoded full weight matrix. Must be called after
+        the encode function.
+        """
+
+        # fully connected case
+        if "k_h" not in self.__dict__:
+            return (
+                self.centroids[self.assignments]
+                .reshape(-1, self.out_features, self.block_size)
+                .permute(1, 0, 2)
+                .flatten(1, 2)
+            )
+
+        # convolutional case
+        else:
+            return (
+                self.centroids[self.assignments]
+                .reshape(-1, self.out_channels, self.block_size)
+                .permute(1, 0, 2)
+                .reshape(self.out_channels, self.in_channels, self.k_h, self.k_w)
+            )
diff --git a/fairseq/fairseq/modules/quantization/pq/utils.py b/fairseq/fairseq/modules/quantization/pq/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..14c015b7c19aae65812e864cf1d95ef3d39de606
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/pq/utils.py
@@ -0,0 +1,374 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import re
+from operator import attrgetter, itemgetter
+import torch
+import numpy as np
+import torch.distributed as dist
+import torch.nn as nn
+
+from .modules import PQConv2d, PQEmbedding, PQLinear
+from .pq import PQ
+
+
+def quantize_model_(
+    model,
+    size_tracker,
+    layers_to_quantize,
+    block_sizes_config,
+    n_centroids_config,
+    step=0,
+    n_iter=15,
+    eps=1e-6,
+    max_tentatives=100,
+    remove_weights=False,
+    verbose=True,
+    state_dict=None,
+):
+    """
+    Quantize a model in-place by stages. All the targeted
+    layers are replaced by their quantized counterpart,
+    and the model is ready for the finetuning of the
+    centroids in a standard training loop (no modifications
+    required). Note that we do not quantize biases.
+
+    Args:
+        - model: a nn.Module
+        - size_tracker: useful for tracking quatization statistics
+        - layers_to_quantize: a list containing regexps for
+          filtering the layers to quantize at each stage according
+          to their name (as in model.named_parameters())
+        - block_sizes_config: dict like
+          {
+              'Conv2d': ('kernel_size', {'(3, 3)': 9, '(1, 1)': 4}),
+              'Linear': ('in_features', {'*': 8})
+          }
+          For instance, all conv2d layers with kernel size 3x3 have
+          a block size of 9 and all Linear layers are quantized with
+          a block size of 8, irrespective of their size.
+        - n_centroids_config: dict like
+          {
+              'Conv2d': ('kernel_size', {'*': 256}),
+              'Linear': ('in_features', {'*': 256})
+          }
+          For instance, all conv2d layers are quantized with 256 centroids
+        - step: the layers to quantize inplace corresponding
+          to layers_to_quantize[step]
+    """
+
+    quantized_layers = get_layers(model, layers_to_quantize[step], remove_weights=remove_weights)
+
+    for layer in quantized_layers:
+
+        # book-keeping
+        is_master_process = (not dist.is_initialized()) or (
+            dist.is_initialized() and dist.get_rank() == 0
+        )
+        verbose = verbose and is_master_process
+
+        # get block size and centroids
+        module = attrgetter(layer)(model)
+        block_size = get_param(module, layer, block_sizes_config)
+        n_centroids = get_param(module, layer, n_centroids_config)
+        if verbose:
+            logging.info(
+                f"Quantizing layer {layer} with block size {block_size} and {n_centroids} centroids"
+            )
+
+        # quantize layer
+        weight = module.weight.data.clone()
+        is_bias = "bias" in [x[0] for x in module.named_parameters()]
+        bias = module.bias.data.clone() if is_bias else None
+        quantizer = PQ(
+            weight,
+            block_size,
+            n_centroids=n_centroids,
+            n_iter=n_iter,
+            eps=eps,
+            max_tentatives=max_tentatives,
+            verbose=verbose,
+        )
+
+        # quantization performed on all GPUs with same seed
+        quantizer.encode()
+        centroids = quantizer.centroids.contiguous()
+        assignments = quantizer.assignments.contiguous()
+
+        # If n_iter = 0 and state_dict is provided, then
+        # we initialize random assignments and centroids to
+        # random values of the appropriate dimensions
+        # because the quantized model parameters will
+        # overwritten by the state_dict later on.
+        if n_iter == 0 and state_dict:
+            # Initialize random centroids of the correct size
+            centroids = torch.rand(centroids.size())
+            centroids.cuda()
+            # Get counts and assignment keys from layer in loaded checkpoint.
+            counts_key = layer+"."+"counts"
+            assignment_key = layer+"."+"assignments"
+            # Get number of different bins to include.
+            counts = list(state_dict[counts_key].shape)[0]
+            print(layer)
+            print(state_dict[counts_key])
+            print(counts)
+            # Initialize random assignments of the correct size
+            # with an appropriate number of bins.
+            num_assignments = list(state_dict[assignment_key].shape)[0]
+            num_extra = num_assignments - counts
+            print(num_assignments)
+            print(num_extra)
+            assignments_bins = torch.arange(counts)
+            assignments_rand = torch.randint(0, counts-1, (num_extra, ))
+            assignments = torch.cat((assignments_bins, assignments_rand), 0)
+            # assignments = assignments.type(torch.IntTensor)
+            assignments.cuda()
+            print("assignments")
+            print(assignments)
+
+        # broadcast results to make sure weights are up-to-date
+        if dist.is_initialized():
+            dist.broadcast(centroids, 0)
+            dist.broadcast(assignments, 0)
+
+        # instantiate the quantized counterpart
+        if isinstance(module, nn.Linear):
+            out_features, in_features = map(
+                lambda k: module.__dict__[k], ["out_features", "in_features"]
+            )
+            quantized_module = PQLinear(
+                centroids, assignments, bias, in_features, out_features
+            )
+        elif isinstance(module, nn.Embedding):
+            num_embeddings, embedding_dim = map(
+                lambda k: module.__dict__[k], ["num_embeddings", "embedding_dim"]
+            )
+            quantized_module = PQEmbedding(
+                centroids, assignments, num_embeddings, embedding_dim
+            )
+        elif isinstance(module, nn.Conv2d):
+            out_channels, in_channels, kernel_size = map(
+                lambda k: module.__dict__[k],
+                ["out_channels", "in_channels", "kernel_size"],
+            )
+            stride, padding, dilation, groups, padding_mode = map(
+                lambda k: module.__dict__[k],
+                ["stride", "padding", "dilation", "groups", "padding_mode"],
+            )
+
+            quantized_module = PQConv2d(
+                centroids,
+                assignments,
+                bias,
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                groups=groups,
+                padding_mode=padding_mode,
+            )
+        else:
+            raise ValueError(f"Module {module} not yet supported for quantization")
+
+        # replace layer by its quantized counterpart
+        attrsetter(layer)(model, quantized_module)
+
+        # update statistics
+        size_tracker.update(weight, block_size, n_centroids)
+
+    # return name of quantized layers
+    return quantized_layers
+
+
+def get_layers(model, filter_regexp, remove_weights=False):
+    """
+    Filters out the layers according to a regexp. Note that
+    we omit biases.
+
+    Args:
+        - model: a nn.Module
+        - filter_regexp: a regexp to filter the layers to keep
+          according to their name in model.named_parameters().
+          For instance, the regexp:
+
+             down_layers\\.[123456]\\.(conv[12]|identity\\.conv))
+
+          is keeping blocks down_layers from 1 to 6, and inside
+          each block is keeping conv1, conv2 and identity.conv.
+
+    Remarks:
+        - We add (module\\.)? at the beginning of the regexp to
+          account for the possible use of nn.parallel.DataParallel
+    """
+
+    # get all parameter names
+    all_layers = map(itemgetter(0), model.named_parameters())
+
+    # remove biases
+    all_layers = filter(lambda x: "bias" not in x, all_layers)
+
+    # remove .weight in all other names (or .weight_orig is spectral norm)
+    all_layers = map(lambda x: x.replace(".weight_orig", ""), all_layers)
+    # remove weights indicates whether the weights extension should be removed, in addition to
+    # weight_orig and weight extension on names
+    if remove_weights:
+        all_layers = map(lambda x: x.replace(".weights", ""), all_layers)
+    all_layers = map(lambda x: x.replace(".weight", ""), all_layers)
+
+    # return filtered layers
+    filter_regexp = "(module\\.)?" + "(" + filter_regexp + ")"
+    r = re.compile(filter_regexp)
+
+    return list(filter(r.match, all_layers))
+
+
+def get_param(module, layer_name, param_config):
+    """
+    Given a quantization configuration, get the right parameter
+    for the module to be quantized.
+
+    Args:
+        - module: a nn.Module
+        - layer_name: the name of the layer
+        - param_config: a dict like
+          {
+              'Conv2d': ('kernel_size', {'(3, 3)': 9, '(1, 1)': 4}),
+              'Linear': ('in_features', {'*': 8})
+          }
+          For instance, all conv2d layers with kernel size 3x3 have
+          a block size of 9 and all Linear layers are quantized with
+          a block size of 8, irrespective of their size.
+
+    Remarks:
+        - if 'fuzzy_name' is passed as a parameter, layers whose layer_name
+          include 'fuzzy_name' will be assigned the given parameter.
+          In the following example, conv.expand layers will have a block
+          size of 9 while conv.reduce will have a block size of 4 and all
+          other layers will have a block size of 2.
+          {
+              'Conv2d': ('fuzzy_name', {'expand': 9, 'reduce': 4, '*': 2}),
+              'Linear': ('fuzzy_name', {'classifier': 8, 'projection': 4})
+          }
+
+    """
+
+    layer_type = module.__class__.__name__
+
+    if layer_type not in param_config:
+        raise KeyError(f"Layer type {layer_type} not in config for layer {module}")
+
+    feature, params = param_config[module.__class__.__name__]
+
+    if feature != "fuzzy_name":
+        feature_value = str(getattr(module, feature))
+        if feature_value not in params:
+            if "*" in params:
+                feature_value = "*"
+            else:
+                raise KeyError(
+                    f"{feature}={feature_value} not in config for layer {module}"
+                )
+    else:
+        feature_values = [name for name in params if name in layer_name]
+        if len(feature_values) == 0:
+            if "*" in params:
+                feature_value = "*"
+            else:
+                raise KeyError(f"name={layer_name} not in config for {module}")
+        else:
+            feature_value = feature_values[0]
+
+    return params[feature_value]
+
+
+class SizeTracker(object):
+    """
+    Class to keep track of the compressed network size with iPQ.
+
+    Args:
+        - model: a nn.Module
+
+    Remarks:
+        - The compressed size is the sum of three components
+          for each layer in the network:
+              (1) Storing the centroids given by iPQ in fp16
+              (2) Storing the assignments of the blocks in int8
+              (3) Storing all non-compressed elements such as biases
+        - This cost in only valid if we use 256 centroids (then
+          indexing can indeed by done with int8).
+    """
+
+    def __init__(self, model):
+        self.model = model
+        self.size_non_compressed_model = self.compute_size()
+        self.size_non_quantized = self.size_non_compressed_model
+        self.size_index = 0
+        self.size_centroids = 0
+        self.n_quantized_layers = 0
+
+    def compute_size(self):
+        """
+        Computes the size of the model (in MB).
+        """
+
+        res = 0
+        for _, p in self.model.named_parameters():
+            res += p.numel()
+        return res * 4 / 1024 / 1024
+
+    def update(self, W, block_size, n_centroids):
+        """
+        Updates the running statistics when quantizing a new layer.
+        """
+
+        # bits per weights
+        bits_per_weight = np.log2(n_centroids) / block_size
+        self.n_quantized_layers += 1
+
+        # size of indexing the subvectors of size block_size (in MB)
+        size_index_layer = bits_per_weight * W.numel() / 8 / 1024 / 1024
+        self.size_index += size_index_layer
+
+        # size of the centroids stored in float16 (in MB)
+        size_centroids_layer = n_centroids * block_size * 2 / 1024 / 1024
+        self.size_centroids += size_centroids_layer
+
+        # size of non-compressed layers, e.g. LayerNorms or biases (in MB)
+        size_uncompressed_layer = W.numel() * 4 / 1024 / 1024
+        self.size_non_quantized -= size_uncompressed_layer
+
+    def __repr__(self):
+        size_compressed = (
+            self.size_index + self.size_centroids + self.size_non_quantized
+        )
+        compression_ratio = self.size_non_compressed_model / size_compressed  # NOQA
+        return (
+            f"Non-compressed model size: {self.size_non_compressed_model:.2f} MB. "
+            f"After quantizing {self.n_quantized_layers} layers, size "
+            f"(indexing + centroids + other): {self.size_index:.2f} MB + "
+            f"{self.size_centroids:.2f} MB + {self.size_non_quantized:.2f} MB = "
+            f"{size_compressed:.2f} MB, compression ratio: {compression_ratio:.2f}x"
+        )
+
+
+def attrsetter(*items):
+    def resolve_attr(obj, attr):
+        attrs = attr.split(".")
+        head = attrs[:-1]
+        tail = attrs[-1]
+
+        for name in head:
+            obj = getattr(obj, name)
+        return obj, tail
+
+    def g(obj, val):
+        for attr in items:
+            resolved_obj, resolved_attr = resolve_attr(obj, attr)
+            setattr(resolved_obj, resolved_attr, val)
+
+    return g
diff --git a/fairseq/fairseq/modules/quantization/quantization_options.py b/fairseq/fairseq/modules/quantization/quantization_options.py
new file mode 100644
index 0000000000000000000000000000000000000000..b46d682c0edaeaaf2a230e51d50da2a32d4bda98
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/quantization_options.py
@@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+def parse_config_yaml(yaml_data):
+    # Initialize to default options.
+    quantization_options = {
+        "n_centroids": {
+            "Linear": ["in_features", {"*": 256}],
+            "Embedding": ["embedding_dim", {"*": 256}],
+        },
+        "block_sizes": {
+            "Linear": ["fuzzy_name", {"fc": 8, "attn": 4, "emb": 4}],
+            "Embedding": ["fuzzy_name", {"emb": 8}],
+        },
+        "layers_to_quantize": [
+            "decoder\\.layers\\.\\d+\\.fc[12]",
+            "decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]",
+            "decoder\\.layers\\.\\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)",
+        ],
+    }
+
+    if "n_centroids" in yaml_data:
+        quantization_options["n_centroids"] = {
+            layer: convert_yaml_to_tuple(layer_data)
+            for layer, layer_data in yaml_data["n_centroids"].items()
+        }
+    if "block_sizes" in yaml_data:
+        quantization_options["block_sizes"] = {
+            layer: convert_yaml_to_tuple(layer_data)
+            for layer, layer_data in yaml_data["block_sizes"].items()
+        }
+    if "layers_to_quantize" in yaml_data:
+        quantization_options["layers_to_quantize"] = yaml_data["layers_to_quantize"]
+
+    return quantization_options
+
+
+def convert_yaml_to_tuple(yaml_dictionary):
+    """Converts a yaml dictionary with two keys: `key` and `value` into a two
+    argument tuple of those values."""
+    return (yaml_dictionary["key"], yaml_dictionary["value"])
diff --git a/fairseq/fairseq/modules/quantization/scalar/__init__.py b/fairseq/fairseq/modules/quantization/scalar/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..143834f3d036780eb6844c82f0c6f2d10cfe2f61
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .utils import quantize_model_  # NOQA
diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/__init__.py b/fairseq/fairseq/modules/quantization/scalar/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8031d9cdb23f2bc72596f8bc9cfa4965f96e3e6c
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/modules/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .qact import ActivationQuantizer  # NOQA
+from .qconv import IntConv2d  # NOQA
+from .qemb import IntEmbedding  # NOQA
+from .qlinear import IntLinear  # NOQA
diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qact.py b/fairseq/fairseq/modules/quantization/scalar/modules/qact.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5dd1d63362423ab0cfc381dddabb547a3b44c72
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/modules/qact.py
@@ -0,0 +1,88 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from ..ops import emulate_int
+
+
+class ActivationQuantizer:
+    """
+    Fake scalar quantization of the activations using a forward hook.
+
+    Args:
+        - module. a nn.Module for which we quantize the *post-activations*
+        - p: proportion of activations to quantize, set by default to 1
+        - update_step: to recompute quantization parameters
+        - bits: number of bits for quantization
+        - method: choose among {"tensor", "histogram", "channel"}
+        - clamp_threshold: to prevent gradients overflow
+
+    Remarks:
+        - Parameters scale and zero_point are recomputed every update_step
+          forward pass to reduce the overhead
+        - For the list of quantization methods and number of bits, see ops.py
+        - To remove the hook from the module, simply call self.handle.remove()
+        - At test time, the activations are fully quantized
+        - We use the straight-through estimator so that the gradients
+          back-propagate nicely in the network, this is implemented with
+          the detach() trick
+        - The activations are hard-clamped in [-clamp_threshold, clamp_threshold]
+          to prevent overflow during the backward pass
+    """
+
+    def __init__(
+        self,
+        module,
+        p=1,
+        update_step=1000,
+        bits=8,
+        method="histogram",
+        clamp_threshold=5,
+    ):
+        self.module = module
+        self.p = p
+        self.update_step = update_step
+        self.counter = 0
+        self.bits = bits
+        self.method = method
+        self.clamp_threshold = clamp_threshold
+        self.handle = None
+        self.register_hook()
+
+    def register_hook(self):
+        # forward hook
+        def quantize_hook(module, x, y):
+
+            # update parameters every 1000 iterations
+            if self.counter % self.update_step == 0:
+                self.scale = None
+                self.zero_point = None
+            self.counter += 1
+
+            # train with QuantNoise and evaluate the fully quantized network
+            p = self.p if self.module.training else 1
+
+            # quantize activations
+            y_q, self.scale, self.zero_point = emulate_int(
+                y.detach(),
+                bits=self.bits,
+                method=self.method,
+                scale=self.scale,
+                zero_point=self.zero_point,
+            )
+
+            # mask to apply noise
+            mask = torch.zeros_like(y)
+            mask.bernoulli_(1 - p)
+            noise = (y_q - y).masked_fill(mask.bool(), 0)
+
+            # using straight-through estimator (STE)
+            clamp_low = -self.scale * self.zero_point
+            clamp_high = self.scale * (2 ** self.bits - 1 - self.zero_point)
+            return torch.clamp(y, clamp_low.item(), clamp_high.item()) + noise.detach()
+
+        # register hook
+        self.handle = self.module.register_forward_hook(quantize_hook)
diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qconv.py b/fairseq/fairseq/modules/quantization/scalar/modules/qconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..83788c6f71fd41e61fd115681a22d53ce8b8362c
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/modules/qconv.py
@@ -0,0 +1,149 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn.functional as F
+from torch.nn.modules.conv import _ConvNd
+from torch.nn.modules.utils import _pair
+
+from ..ops import emulate_int
+
+
+class IntConv2d(_ConvNd):
+    """
+    Quantized counterpart of the nn.Conv2d module that applies QuantNoise during training.
+
+    Args:
+        - standard nn.Conv2d parameters
+        - p: amount of noise to inject (0 = no quantization, 1 = quantize all the weights)
+        - bits: number of bits
+        - method: choose among {"tensor", "histogram", "channel"}
+        - update_step: recompute scale and zero_point every update_steps iterations
+
+    Remarks:
+        - We use the straight-thgourh estimator so that the gradients
+          back-propagate nicely in the network, this is implemented with
+          the detach() trick
+        - Parameters scale and zero_point are recomputed every update_step
+          forward pass to reduce the overhead
+        - At test time, the weights are fully quantized
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        bias=True,
+        padding_mode="zeros",
+        p=0,
+        bits=8,
+        method="histogram",
+        update_step=1000,
+    ):
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        super(IntConv2d, self).__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            False,
+            _pair(0),
+            groups,
+            bias,
+            padding_mode,
+        )
+
+        # quantization parameters
+        self.p = p
+        self.bits = bits
+        self.method = method
+        self.update_step = update_step
+        self.counter = 0
+
+    def _conv_forward(self, input, weight):
+        if self.padding_mode != "zeros":
+            return F.conv2d(
+                F.pad(input, self._padding_repeated_twice, mode=self.padding_mode),
+                weight,
+                self.bias,
+                self.stride,
+                _pair(0),
+                self.dilation,
+                self.groups,
+            )
+        return F.conv2d(
+            input,
+            weight,
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+        )
+
+    def forward(self, input):
+        # train with QuantNoise and evaluate the fully quantized network
+        p = self.p if self.training else 1
+
+        # update parameters every 100 iterations
+        if self.counter % self.update_step == 0:
+            self.scale = None
+            self.zero_point = None
+        self.counter += 1
+
+        # quantize weight
+        weight_quantized, self.scale, self.zero_point = emulate_int(
+            self.weight.detach(),
+            bits=self.bits,
+            method=self.method,
+            scale=self.scale,
+            zero_point=self.zero_point,
+        )
+
+        # mask to apply noise
+        mask = torch.zeros_like(self.weight)
+        mask.bernoulli_(1 - p)
+        noise = (weight_quantized - self.weight).masked_fill(mask.bool(), 0)
+
+        # using straight-through estimator (STE)
+        clamp_low = -self.scale * self.zero_point
+        clamp_high = self.scale * (2 ** self.bits - 1 - self.zero_point)
+        weight = (
+            torch.clamp(self.weight, clamp_low.item(), clamp_high.item())
+            + noise.detach()
+        )
+
+        # return output
+        output = self._conv_forward(input, weight)
+        return output
+
+    def extra_repr(self):
+        return (
+            "in_channels={}, out_channels={}, kernel_size={}, stride={}, "
+            "padding={}, dilation={}, groups={}, bias={}, quant_noise={}, "
+            "bits={}, method={}".format(
+                self.in_channels,
+                self.out_channels,
+                self.kernel_size,
+                self.stride,
+                self.padding,
+                self.dilation,
+                self.groups,
+                self.bias is not None,
+                self.p,
+                self.bits,
+                self.method,
+            )
+        )
diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qemb.py b/fairseq/fairseq/modules/quantization/scalar/modules/qemb.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6cf06e5872cb86e5c2e726153c7a80c78db9d1e
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/modules/qemb.py
@@ -0,0 +1,147 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..ops import emulate_int
+
+
+class IntEmbedding(nn.Module):
+    """
+    Quantized counterpart of the nn.Embedding module that applies QuantNoise during training.
+
+    Args:
+        - num_embeddings: number of tokens
+        - embedding_dim: embedding dimension
+        - p: amount of noise to inject (0 = no quantization, 1 = quantize all the weights)
+        - bits: number of bits
+        - method: choose among {"tensor", "histogram", "channel"}
+        - update_step: recompute scale and zero_point every update_steps iterations
+
+    Remarks:
+        - We use the straight-through estimator so that the gradients
+          back-propagate nicely in the network, this is implemented with
+          the detach() trick
+        - Parameters scale and zero_point are recomputed every update_step
+          forward pass to reduce the overhead
+        - At test time, the weights are fully quantized
+    """
+
+    def __init__(
+        self,
+        num_embeddings,
+        embedding_dim,
+        padding_idx=None,
+        max_norm=None,
+        norm_type=2.0,
+        scale_grad_by_freq=False,
+        sparse=False,
+        _weight=None,
+        p=0,
+        update_step=1000,
+        bits=8,
+        method="histogram",
+    ):
+        super(IntEmbedding, self).__init__()
+        self.num_embeddings = num_embeddings
+        self.embedding_dim = embedding_dim
+        if padding_idx is not None:
+            if padding_idx > 0:
+                assert (
+                    padding_idx < self.num_embeddings
+                ), "Padding_idx must be within num_embeddings"
+            elif padding_idx < 0:
+                assert (
+                    padding_idx >= -self.num_embeddings
+                ), "Padding_idx must be within num_embeddings"
+                padding_idx = self.num_embeddings + padding_idx
+        self.padding_idx = padding_idx
+        self.max_norm = max_norm
+        self.norm_type = norm_type
+        self.scale_grad_by_freq = scale_grad_by_freq
+        if _weight is None:
+            self.weight = nn.Parameter(torch.Tensor(num_embeddings, embedding_dim))
+            self.reset_parameters()
+        else:
+            assert list(_weight.shape) == [
+                num_embeddings,
+                embedding_dim,
+            ], "Shape of weight does not match num_embeddings and embedding_dim"
+            self.weight = nn.Parameter(_weight)
+        self.sparse = sparse
+
+        # quantization parameters
+        self.p = p
+        self.bits = bits
+        self.method = method
+        self.update_step = update_step
+        self.counter = 0
+
+    def reset_parameters(self):
+        nn.init.normal_(self.weight)
+        if self.padding_idx is not None:
+            with torch.no_grad():
+                self.weight[self.padding_idx].fill_(0)
+
+    def forward(self, input):
+        # train with QuantNoise and evaluate the fully quantized network
+        p = self.p if self.training else 1
+
+        # update parameters every 1000 iterations
+        if self.counter % self.update_step == 0:
+            self.scale = None
+            self.zero_point = None
+        self.counter += 1
+
+        # quantize weight
+        weight_quantized, self.scale, self.zero_point = emulate_int(
+            self.weight.detach(),
+            bits=self.bits,
+            method=self.method,
+            scale=self.scale,
+            zero_point=self.zero_point,
+        )
+
+        # mask to apply noise
+        mask = torch.zeros_like(self.weight)
+        mask.bernoulli_(1 - p)
+        noise = (weight_quantized - self.weight).masked_fill(mask.bool(), 0)
+
+        # using straight-through estimator (STE)
+        clamp_low = -self.scale * self.zero_point
+        clamp_high = self.scale * (2 ** self.bits - 1 - self.zero_point)
+        weight = (
+            torch.clamp(self.weight, clamp_low.item(), clamp_high.item())
+            + noise.detach()
+        )
+
+        # return output
+        output = F.embedding(
+            input,
+            weight,
+            self.padding_idx,
+            self.max_norm,
+            self.norm_type,
+            self.scale_grad_by_freq,
+            self.sparse,
+        )
+        return output
+
+    def extra_repr(self):
+        s = "{num_embeddings}, {embedding_dim}"
+        if self.padding_idx is not None:
+            s += ", padding_idx={padding_idx}"
+        if self.max_norm is not None:
+            s += ", max_norm={max_norm}"
+        if self.norm_type != 2:
+            s += ", norm_type={norm_type}"
+        if self.scale_grad_by_freq is not False:
+            s += ", scale_grad_by_freq={scale_grad_by_freq}"
+        if self.sparse is not False:
+            s += ", sparse=True"
+        s += "quant_noise={p}, bits={bits}, method={method}"
+        return s.format(**self.__dict__)
diff --git a/fairseq/fairseq/modules/quantization/scalar/modules/qlinear.py b/fairseq/fairseq/modules/quantization/scalar/modules/qlinear.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db1559386bce286301d31435851dc4ea76687a5
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/modules/qlinear.py
@@ -0,0 +1,113 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..ops import emulate_int
+
+
+class IntLinear(nn.Module):
+    """
+    Quantized counterpart of the nn.Linear module that applies QuantNoise during training.
+
+    Args:
+        - in_features: input features
+        - out_features: output features
+        - bias: bias or not
+        - p: amount of noise to inject (0 = no quantization, 1 = quantize all the weights)
+        - bits: number of bits
+        - method: choose among {"tensor", "histogram", "channel"}
+        - update_step: recompute scale and zero_point every update_steps iterations
+
+    Remarks:
+        - We use the straight-through estimator so that the gradients
+          back-propagate nicely in the network, this is implemented with
+          the detach() trick.
+        - Parameters scale and zero_point are recomputed every update_step
+          forward pass to reduce the overhead
+        - At test time, the weights are fully quantized
+    """
+
+    def __init__(
+        self,
+        in_features,
+        out_features,
+        bias=True,
+        p=0,
+        update_step=3000,
+        bits=8,
+        method="histogram",
+    ):
+        super(IntLinear, self).__init__()
+        self.in_features = int(in_features)
+        self.out_features = int(out_features)
+        self.weight = torch.nn.Parameter(torch.Tensor(out_features, in_features))
+        self.chosen_bias = bias
+        if self.chosen_bias:
+            self.bias = torch.nn.Parameter(torch.Tensor(out_features))
+        else:
+            self.register_parameter("bias", None)
+        self.reset_parameters()
+
+        # quantization parameters
+        self.p = p
+        self.bits = bits
+        self.method = method
+        self.update_step = update_step
+        self.counter = 0
+
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.weight)
+        if self.chosen_bias:
+            nn.init.constant_(self.bias, 0.0)
+        return
+
+    def forward(self, input):
+        # train with QuantNoise and evaluate the fully quantized network
+        p = self.p if self.training else 1
+
+        # update parameters every 100 iterations
+        if self.counter % self.update_step == 0:
+            self.scale = None
+            self.zero_point = None
+        self.counter += 1
+
+        # quantize weight
+        weight_quantized, self.scale, self.zero_point = emulate_int(
+            self.weight.detach(),
+            bits=self.bits,
+            method=self.method,
+            scale=self.scale,
+            zero_point=self.zero_point,
+        )
+
+        # mask to apply noise
+        mask = torch.zeros_like(self.weight)
+        mask.bernoulli_(1 - p)
+        noise = (weight_quantized - self.weight).masked_fill(mask.bool(), 0)
+
+        # using straight-through estimator (STE)
+        clamp_low = -self.scale * self.zero_point
+        clamp_high = self.scale * (2 ** self.bits - 1 - self.zero_point)
+        weight = (
+            torch.clamp(self.weight, clamp_low.item(), clamp_high.item())
+            + noise.detach()
+        )
+
+        # return output
+        output = F.linear(input, weight, self.bias)
+        return output
+
+    def extra_repr(self):
+        return "in_features={}, out_features={}, bias={}, quant_noise={}, bits={}, method={}".format(
+            self.in_features,
+            self.out_features,
+            self.bias is not None,
+            self.p,
+            self.bits,
+            self.method,
+        )
diff --git a/fairseq/fairseq/modules/quantization/scalar/ops.py b/fairseq/fairseq/modules/quantization/scalar/ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..c74f530380b393ffc53ecfb1398000079495772f
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/ops.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+
+def emulate_int(w, bits, method, scale=None, zero_point=None):
+    q = globals()[f"emulate_int8_{method}"]
+    return q(w, scale=scale, zero_point=zero_point, bits=bits)
+
+
+def quantize(w, scale, zero_point, bits=8):
+    # In the default behavior, max_val = 255.
+    max_val = 2 ** bits - 1
+    return (
+        torch.clamp(torch.round(w / scale + zero_point), 0, max_val) - zero_point
+    ) * scale
+
+
+def emulate_int8_histogram(w, scale=None, zero_point=None, bits=8):
+    if scale is None:
+        obs = torch.quantization.observer.HistogramObserver()
+        obs.to(device=w.device)
+        _ = obs(w.float())
+        scale, zero_point = obs.calculate_qparams()
+        scale = scale.cuda().type_as(w)
+        zero_point = zero_point.cuda().type_as(w)
+    return quantize(w, scale, zero_point, bits=bits), scale, zero_point
+
+
+def emulate_int8_channel(w, scale=None, zero_point=None, bits=8):
+    if scale is None:
+        obs = torch.quantization.observer.PerChannelMinMaxObserver(
+            ch_axis=-1, qscheme=torch.per_channel_symmetric
+        )
+        obs.to(device=w.device)
+        _ = obs(w)
+        scale, zero_point, ch_axis = obs.get_qparams()
+        scale = scale.cuda().type_as(w)
+        zero_point = zero_point.cuda().type_as(w)
+    return quantize(w, scale, zero_point, bits=bits), scale, zero_point
+
+
+def emulate_int8_tensor(w, scale=None, zero_point=None, bits=8):
+    if scale is None:
+        obs = torch.quantization.observer.MinMaxObserver()
+        obs.to(device=w.device)
+        _ = obs(w)
+        scale, zero_point = obs.calculate_qparams()
+        scale = scale.cuda().type_as(w)
+        zero_point = zero_point.cuda().type_as(w)
+    return quantize(w, scale, zero_point, bits=bits), scale, zero_point
diff --git a/fairseq/fairseq/modules/quantization/scalar/utils.py b/fairseq/fairseq/modules/quantization/scalar/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ec6af3fcb09ccaf853be15a84ed8181f9e2f546
--- /dev/null
+++ b/fairseq/fairseq/modules/quantization/scalar/utils.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from operator import attrgetter
+
+import torch.distributed as dist
+import torch.nn as nn
+
+from ..pq.utils import attrsetter, get_layers
+from .modules import ActivationQuantizer, IntConv2d, IntEmbedding, IntLinear
+
+
+MAPPING = {nn.Linear: IntLinear, nn.Embedding: IntEmbedding, nn.Conv2d: IntConv2d}
+
+
+def quantize_model_(model, p=0.2, bits=8, update_step=3000, method="histogram", remove_weights=False):
+    """
+    Replaces all modules with their scalar quantized counterpart and
+    registers hooks to quantize the post-ativations of those modules.
+
+    Args:
+        - model: a nn.Module
+        - p: amount of noise (0 for no noise, 1 to quantize all the weights/activations)
+        - bits: number of bits
+        - update_step: update quantization parameters every update_step steps
+    """
+    # quantize all layers
+    # remove weights indicates whether the weights extension should be removed, in addition to
+    # weight_orig and weight extension on names
+    quantized_layers = get_layers(model, "(.*?)", remove_weights=remove_weights)
+
+    for layer in quantized_layers:
+
+        # book-keeping
+        is_master_process = (not dist.is_initialized()) or (
+            dist.is_initialized() and dist.get_rank() == 0
+        )
+
+        # recover module
+        module = attrgetter(layer)(model)
+        if is_master_process:
+            logging.info(
+                f"Quantizing layer {layer} with bits={bits} and QuantNoise={p}"
+            )
+
+        # quantization params
+        q_params = {
+            "p": p,
+            "update_step": update_step,
+            "bits": bits,
+            "method": method,
+            "counter": 0,
+        }
+
+        # instantiate the quantized counterpart
+        if isinstance(module, tuple(MAPPING.keys())):
+            QuantizedModule = MAPPING[module.__class__]
+            quantized_module = QuantizedModule.__new__(QuantizedModule)
+            params = module.__dict__
+            params.update(q_params)
+            quantized_module.__dict__.update(params)
+
+        else:
+            if is_master_process:
+                logging.info(f"Module {module} not yet supported for quantization")
+            continue
+
+        # activation quantization
+        a_q = ActivationQuantizer(quantized_module, p=0, bits=bits, method=method)
+
+        # replace layer by its quantized counterpart
+        attrsetter(layer)(model, quantized_module)
+
+    # return name of quantized layers
+    return quantized_layers
diff --git a/fairseq/fairseq/modules/same_pad.py b/fairseq/fairseq/modules/same_pad.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c04990ea6fdb291f162ee8ac3d17a92483daf8e
--- /dev/null
+++ b/fairseq/fairseq/modules/same_pad.py
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from torch import nn
+
+
+class SamePad(nn.Module):
+    def __init__(self, kernel_size, causal=False):
+        super().__init__()
+        if causal:
+            self.remove = kernel_size - 1
+        else:
+            self.remove = 1 if kernel_size % 2 == 0 else 0
+
+    def forward(self, x):
+        if self.remove > 0:
+            x = x[:, :, : -self.remove]
+        return x
diff --git a/fairseq/fairseq/modules/scalar_bias.py b/fairseq/fairseq/modules/scalar_bias.py
new file mode 100644
index 0000000000000000000000000000000000000000..c96247c75914fabb8a2b7ff731bb82b588f72690
--- /dev/null
+++ b/fairseq/fairseq/modules/scalar_bias.py
@@ -0,0 +1,31 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+import torch
+
+
+class ScalarBias(torch.autograd.Function):
+    """
+    Adds a vector of scalars, used in self-attention mechanism to allow
+    the model to optionally attend to this vector instead of the past
+    """
+
+    @staticmethod
+    def forward(ctx, input, dim, bias_init):
+        size = list(input.size())
+        size[dim] += 1
+        output = input.new(*size).fill_(bias_init)
+        output.narrow(dim, 1, size[dim] - 1).copy_(input)
+        ctx.dim = dim
+        return output
+
+    @staticmethod
+    def backward(ctx, grad):
+        return grad.narrow(ctx.dim, 1, grad.size(ctx.dim) - 1), None, None
+
+
+def scalar_bias(input, dim, bias_init=0):
+    return ScalarBias.apply(input, dim, bias_init)
diff --git a/fairseq/fairseq/modules/sinusoidal_positional_embedding.py b/fairseq/fairseq/modules/sinusoidal_positional_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..4793ecfb522d0729fc2d24a3ddf0c6a774d67773
--- /dev/null
+++ b/fairseq/fairseq/modules/sinusoidal_positional_embedding.py
@@ -0,0 +1,105 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Any, Optional
+
+import torch
+import torch.onnx.operators
+from fairseq import utils
+from torch import Tensor, nn
+
+
+class SinusoidalPositionalEmbedding(nn.Module):
+    """This module produces sinusoidal positional embeddings of any length.
+
+    Padding symbols are ignored.
+    """
+
+    def __init__(self, embedding_dim, padding_idx, init_size=1024):
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx if padding_idx is not None else 0
+        self.weights = SinusoidalPositionalEmbedding.get_embedding(
+            init_size, embedding_dim, padding_idx
+        )
+        self.onnx_trace = False
+        self.register_buffer("_float_tensor", torch.FloatTensor(1))
+        self.max_positions = int(1e5)
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    @staticmethod
+    def get_embedding(
+        num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None
+    ):
+        """Build sinusoidal embeddings.
+
+        This matches the implementation in tensor2tensor, but differs slightly
+        from the description in Section 3.5 of "Attention Is All You Need".
+        """
+        half_dim = embedding_dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
+        emb = torch.arange(num_embeddings, dtype=torch.float).unsqueeze(
+            1
+        ) * emb.unsqueeze(0)
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(
+            num_embeddings, -1
+        )
+        if embedding_dim % 2 == 1:
+            # zero pad
+            emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
+        if padding_idx is not None:
+            emb[padding_idx, :] = 0
+        return emb
+
+    def forward(
+        self,
+        input,
+        incremental_state: Optional[Any] = None,
+        timestep: Optional[Tensor] = None,
+        positions: Optional[Any] = None,
+    ):
+        """Input is expected to be of size [bsz x seqlen]."""
+        bspair = torch.onnx.operators.shape_as_tensor(input)
+        bsz, seq_len = bspair[0], bspair[1]
+        max_pos = self.padding_idx + 1 + seq_len
+        if self.weights is None or max_pos > self.weights.size(0):
+            # recompute/expand embeddings if needed
+            self.weights = SinusoidalPositionalEmbedding.get_embedding(
+                max_pos, self.embedding_dim, self.padding_idx
+            )
+        self.weights = self.weights.to(self._float_tensor)
+
+        if incremental_state is not None:
+            # positions is the same for every token when decoding a single step
+            pos = timestep.view(-1)[0] + 1 if timestep is not None else seq_len
+            if self.onnx_trace:
+                return (
+                    self.weights.index_select(index=self.padding_idx + pos, dim=0)
+                    .unsqueeze(1)
+                    .repeat(bsz, 1, 1)
+                )
+            return self.weights[self.padding_idx + pos, :].expand(bsz, 1, -1)
+
+        positions = utils.make_positions(
+            input, self.padding_idx, onnx_trace=self.onnx_trace
+        )
+        if self.onnx_trace:
+            flat_embeddings = self.weights.detach().index_select(0, positions.view(-1))
+            embedding_shape = torch.cat(
+                (bsz.view(1), seq_len.view(1), torch.tensor([-1], dtype=torch.long))
+            )
+            embeddings = torch.onnx.operators.reshape_from_tensor_shape(
+                flat_embeddings, embedding_shape
+            )
+            return embeddings
+        return (
+            self.weights.index_select(0, positions.view(-1))
+            .view(bsz, seq_len, -1)
+            .detach()
+        )
diff --git a/fairseq/fairseq/modules/sparse_multihead_attention.py b/fairseq/fairseq/modules/sparse_multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cbd9d6785886e319aab0601517e27df733b6f97
--- /dev/null
+++ b/fairseq/fairseq/modules/sparse_multihead_attention.py
@@ -0,0 +1,140 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+
+from .multihead_attention import MultiheadAttention
+
+
+class SparseMultiheadAttention(MultiheadAttention):
+    """Sparse Multi-Headed Attention.
+
+    "Generating Long Sequences with Sparse Transformers". Implements
+    fixed factorized self attention, where l=stride and c=expressivity.
+    A(1) includes all words in the stride window and A(2) takes a summary of c
+    words from the end of each stride window.
+    If is_bidirectional=False, we do not include any words past the current word,
+    as in the paper.
+    """
+
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        kdim=None,
+        vdim=None,
+        dropout=0.0,
+        bias=True,
+        add_bias_kv=False,
+        add_zero_attn=False,
+        self_attention=False,
+        encoder_decoder_attention=False,
+        stride=32,
+        expressivity=8,
+        is_bidirectional=True,
+    ):
+
+        super().__init__(
+            embed_dim,
+            num_heads,
+            kdim,
+            vdim,
+            dropout,
+            bias,
+            add_bias_kv,
+            add_zero_attn,
+            self_attention,
+            encoder_decoder_attention,
+        )
+
+        self.is_bidirectional = is_bidirectional
+        self.stride = stride
+        self.expressivity = expressivity
+        assert self.stride > 0 and self.stride >= self.expressivity
+
+    # Used for Ai(2) calculations - beginning of [l-c, l] range
+    def compute_checkpoint(self, word_index):
+        if word_index % self.stride == 0 and word_index != 0:
+            checkpoint_index = word_index - self.expressivity
+        else:
+            checkpoint_index = (
+                math.floor(word_index / self.stride) * self.stride
+                + self.stride
+                - self.expressivity
+            )
+        return checkpoint_index
+
+    # Computes Ai(2)
+    def compute_subset_summaries(self, absolute_max):
+        checkpoint_index = self.compute_checkpoint(0)
+        subset_two = set()
+        while checkpoint_index <= absolute_max - 1:
+            summary = set(
+                range(
+                    checkpoint_index,
+                    min(checkpoint_index + self.expressivity + 1, absolute_max),
+                )
+            )
+            subset_two = subset_two.union(summary)
+            checkpoint_index = self.compute_checkpoint(checkpoint_index + self.stride)
+        return subset_two
+
+    # Sparse Transformer Fixed Attention Pattern: https://arxiv.org/pdf/1904.10509.pdf
+    def compute_fixed_attention_subset(self, word_index, tgt_len):
+        # +1s account for range function; [min, max) -> [min, max]
+        if not self.is_bidirectional:
+            absolute_max = word_index + 1
+        else:
+            absolute_max = tgt_len
+
+        # Subset 1 - whole window
+        rounded_index = (
+            math.floor((word_index + self.stride) / self.stride) * self.stride
+        )
+        if word_index % self.stride == 0 and word_index != 0:
+            subset_one = set(
+                range(word_index - self.stride, min(absolute_max, word_index + 1))
+            )
+        else:
+            subset_one = set(
+                range(
+                    max(0, rounded_index - self.stride),
+                    min(absolute_max, rounded_index + 1),
+                )
+            )
+
+        # Subset 2 - summary per window
+        # If bidirectional, subset 2 is the same for every index
+        subset_two = set()
+        if not self.is_bidirectional:
+            subset_two = self.compute_subset_summaries(absolute_max)
+
+        return subset_one.union(subset_two)
+
+    # Compute sparse mask - if bidirectional, can pre-compute and store
+    def buffered_sparse_mask(self, tensor, tgt_len, src_len):
+        assert tgt_len > self.stride
+        sparse_mask = torch.empty((tgt_len, src_len)).float().fill_(float("-inf"))
+
+        # If bidirectional, subset 2 is the same for every index
+        subset_summaries = set()
+        if self.is_bidirectional:
+            subset_summaries = self.compute_subset_summaries(tgt_len)
+
+        for i in range(tgt_len):
+            fixed_attention_subset = self.compute_fixed_attention_subset(i, tgt_len)
+            fixed_attention_subset = fixed_attention_subset.union(subset_summaries)
+            included_word_indices = torch.LongTensor(list(fixed_attention_subset))
+            sparse_mask[i].index_fill_(0, included_word_indices, 0)
+        return sparse_mask.type_as(tensor)
+
+    def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz):
+        sparse_mask = self.buffered_sparse_mask(attn_weights, tgt_len, src_len)
+        sparse_mask = sparse_mask.unsqueeze(0).expand(
+            bsz * self.num_heads, tgt_len, src_len
+        )
+        attn_weights += sparse_mask
diff --git a/fairseq/fairseq/modules/sparse_transformer_sentence_encoder.py b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f41ec09327fe80b50d20674e7482794ce45c531c
--- /dev/null
+++ b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder.py
@@ -0,0 +1,96 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn as nn
+from fairseq.modules import TransformerSentenceEncoder
+from fairseq.modules.sparse_transformer_sentence_encoder_layer import (
+    SparseTransformerSentenceEncoderLayer,
+)
+
+
+class SparseTransformerSentenceEncoder(TransformerSentenceEncoder):
+    """
+    Sparse implementation of the TransformerSentenceEncoder
+    - see SparseMultiheadAttention
+    """
+
+    def __init__(
+        self,
+        padding_idx: int,
+        vocab_size: int,
+        num_encoder_layers: int = 6,
+        embedding_dim: int = 768,
+        ffn_embedding_dim: int = 3072,
+        num_attention_heads: int = 8,
+        dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        max_seq_len: int = 256,
+        num_segments: int = 2,
+        use_position_embeddings: bool = True,
+        offset_positions_by_padding: bool = True,
+        encoder_normalize_before: bool = False,
+        apply_bert_init: bool = False,
+        activation_fn: str = "relu",
+        learned_pos_embedding: bool = True,
+        embed_scale: float = None,
+        freeze_embeddings: bool = False,
+        n_trans_layers_to_freeze: int = 0,
+        export: bool = False,
+        is_bidirectional: bool = True,
+        stride: int = 32,
+        expressivity: int = 8,
+    ) -> None:
+
+        super().__init__(
+            padding_idx,
+            vocab_size,
+            num_encoder_layers,
+            embedding_dim,
+            ffn_embedding_dim,
+            num_attention_heads,
+            dropout,
+            attention_dropout,
+            activation_dropout,
+            max_seq_len,
+            num_segments,
+            use_position_embeddings,
+            offset_positions_by_padding,
+            encoder_normalize_before,
+            apply_bert_init,
+            activation_fn,
+            learned_pos_embedding,
+            embed_scale,
+            freeze_embeddings,
+            n_trans_layers_to_freeze,
+            export,
+        )
+
+        self.layers = nn.ModuleList(
+            [
+                SparseTransformerSentenceEncoderLayer(
+                    embedding_dim=self.embedding_dim,
+                    ffn_embedding_dim=ffn_embedding_dim,
+                    num_attention_heads=num_attention_heads,
+                    dropout=dropout,
+                    attention_dropout=attention_dropout,
+                    activation_dropout=activation_dropout,
+                    activation_fn=activation_fn,
+                    export=export,
+                    is_bidirectional=is_bidirectional,
+                    stride=stride,
+                    expressivity=expressivity,
+                )
+                for _ in range(num_encoder_layers)
+            ]
+        )
+
+        def freeze_module_params(m):
+            if m is not None:
+                for p in m.parameters():
+                    p.requires_grad = False
+
+        for layer in range(n_trans_layers_to_freeze):
+            freeze_module_params(self.layers[layer])
diff --git a/fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d95da59c2471bfa858fd627605196d7f41f9ec12
--- /dev/null
+++ b/fairseq/fairseq/modules/sparse_transformer_sentence_encoder_layer.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.modules import TransformerSentenceEncoderLayer
+from fairseq.modules.sparse_multihead_attention import SparseMultiheadAttention
+
+
+class SparseTransformerSentenceEncoderLayer(TransformerSentenceEncoderLayer):
+    """
+    Implements a Sprase Transformer Encoder Layer (see SparseMultiheadAttention)
+    """
+
+    def __init__(
+        self,
+        embedding_dim: int = 768,
+        ffn_embedding_dim: int = 3072,
+        num_attention_heads: int = 8,
+        dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        activation_fn: str = "relu",
+        export: bool = False,
+        is_bidirectional: bool = True,
+        stride: int = 32,
+        expressivity: int = 8,
+    ) -> None:
+
+        super().__init__(
+            embedding_dim,
+            ffn_embedding_dim,
+            num_attention_heads,
+            dropout,
+            attention_dropout,
+            activation_dropout,
+            activation_fn,
+            export,
+        )
+
+        self.self_attn = SparseMultiheadAttention(
+            self.embedding_dim,
+            num_attention_heads,
+            dropout=attention_dropout,
+            add_bias_kv=False,
+            add_zero_attn=False,
+            self_attention=True,
+            is_bidirectional=is_bidirectional,
+            stride=stride,
+            expressivity=expressivity,
+        )
diff --git a/fairseq/fairseq/modules/transformer_layer.py b/fairseq/fairseq/modules/transformer_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..347b8118daa2818af5e0230a793f2fa8fcd63b3a
--- /dev/null
+++ b/fairseq/fairseq/modules/transformer_layer.py
@@ -0,0 +1,459 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Dict, List, Optional
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.modules import LayerNorm, MultiheadAttention
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.quant_noise import quant_noise
+from torch import Tensor
+from fairseq.models.transformer import (
+    TransformerConfig,
+)
+
+
+class TransformerEncoderLayerBase(nn.Module):
+    """Encoder layer block.
+
+    In the original paper each operation (multi-head attention or FFN) is
+    postprocessed with: `dropout -> add residual -> layernorm`. In the
+    tensor2tensor code they suggest that learning is more robust when
+    preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *cfg.encoder.normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+    """
+
+    def __init__(self, cfg):
+        super().__init__()
+        self.cfg = cfg
+        self.embed_dim = cfg.encoder.embed_dim
+        self.quant_noise = cfg.quant_noise.pq
+        self.quant_noise_block_size = cfg.quant_noise.pq_block_size
+        self.self_attn = self.build_self_attention(self.embed_dim, cfg)
+        self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export)
+        self.dropout_module = FairseqDropout(
+            cfg.dropout, module_name=self.__class__.__name__
+        )
+        self.activation_fn = utils.get_activation_fn(activation=cfg.activation_fn)
+        activation_dropout_p = cfg.activation_dropout
+        if activation_dropout_p == 0:
+            # for backwards compatibility with models that use cfg.relu_dropout
+            activation_dropout_p = cfg.relu_dropout or 0
+        self.activation_dropout_module = FairseqDropout(
+            float(activation_dropout_p), module_name=self.__class__.__name__
+        )
+        self.normalize_before = cfg.encoder.normalize_before
+        self.fc1 = self.build_fc1(
+            self.embed_dim,
+            cfg.encoder.ffn_embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+        self.fc2 = self.build_fc2(
+            cfg.encoder.ffn_embed_dim,
+            self.embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+
+        self.final_layer_norm = LayerNorm(self.embed_dim, export=cfg.export)
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(
+            nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size
+        )
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(
+            nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size
+        )
+
+    def build_self_attention(self, embed_dim, cfg):
+        return MultiheadAttention(
+            embed_dim,
+            cfg.encoder.attention_heads,
+            dropout=cfg.attention_dropout,
+            self_attention=True,
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+        )
+
+    def residual_connection(self, x, residual):
+        return residual + x
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """
+        Rename layer norm states from `...layer_norms.0.weight` to
+        `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to
+        `...final_layer_norm.weight`
+        """
+        layer_norm_map = {"0": "self_attn_layer_norm", "1": "final_layer_norm"}
+        for old, new in layer_norm_map.items():
+            for m in ("weight", "bias"):
+                k = "{}.layer_norms.{}.{}".format(name, old, m)
+                if k in state_dict:
+                    state_dict["{}.{}.{}".format(name, new, m)] = state_dict[k]
+                    del state_dict[k]
+
+    def forward(
+        self,
+        x,
+        encoder_padding_mask: Optional[Tensor],
+        attn_mask: Optional[Tensor] = None,
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor): binary ByteTensor of shape
+                `(batch, seq_len)` where padding elements are indicated by ``1``.
+            attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`,
+                where `tgt_len` is the length of output and `src_len` is the
+                length of input, though here both are equal to `seq_len`.
+                `attn_mask[tgt_i, src_j] = 1` means that when calculating the
+                embedding for `tgt_i`, we exclude (mask out) `src_j`. This is
+                useful for strided self-attention.
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        # anything in original attn_mask = 1, becomes -1e8
+        # anything in original attn_mask = 0, becomes 0
+        # Note that we cannot use -inf here, because at some edge cases,
+        # the attention weight (before softmax) for some padded element in query
+        # will become -inf, which results in NaN in model parameters
+        if attn_mask is not None:
+            attn_mask = attn_mask.masked_fill(
+                attn_mask.to(torch.bool),
+                -1e8 if x.dtype == torch.float32 else -1e4
+            )
+
+        residual = x
+        if self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+        x, _ = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=encoder_padding_mask,
+            need_weights=False,
+            attn_mask=attn_mask,
+        )
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.final_layer_norm(x)
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.final_layer_norm(x)
+        return x
+
+
+# backward compatible with the legacy argparse format
+class TransformerEncoderLayer(TransformerEncoderLayerBase):
+    def __init__(self, args):
+        super().__init__(TransformerConfig.from_namespace(args))
+        self.args = args
+
+    def build_self_attention(self, embed_dim, args):
+        return super().build_self_attention(
+            embed_dim, TransformerConfig.from_namespace(args)
+        )
+
+
+class TransformerDecoderLayerBase(nn.Module):
+    """Decoder layer block.
+
+    In the original paper each operation (multi-head attention, encoder
+    attention or FFN) is postprocessed with: `dropout -> add residual ->
+    layernorm`. In the tensor2tensor code they suggest that learning is more
+    robust when preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *cfg.decoder.normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self, cfg, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False
+    ):
+        super().__init__()
+        self.embed_dim = cfg.decoder.embed_dim
+        self.dropout_module = FairseqDropout(
+            cfg.dropout, module_name=self.__class__.__name__
+        )
+        self.quant_noise = cfg.quant_noise.pq
+        self.quant_noise_block_size = cfg.quant_noise.pq_block_size
+
+        self.cross_self_attention = cfg.cross_self_attention
+
+        self.self_attn = self.build_self_attention(
+            self.embed_dim,
+            cfg,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+        )
+
+        self.activation_fn = utils.get_activation_fn(activation=cfg.activation_fn)
+        activation_dropout_p = cfg.activation_dropout
+        if activation_dropout_p == 0:
+            # for backwards compatibility with models that use cfg.relu_dropout
+            activation_dropout_p = cfg.relu_dropout or 0
+        self.activation_dropout_module = FairseqDropout(
+            float(activation_dropout_p), module_name=self.__class__.__name__
+        )
+        self.normalize_before = cfg.decoder.normalize_before
+
+        self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export)
+
+        if no_encoder_attn:
+            self.encoder_attn = None
+            self.encoder_attn_layer_norm = None
+        else:
+            self.encoder_attn = self.build_encoder_attention(self.embed_dim, cfg)
+            self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export)
+
+        self.fc1 = self.build_fc1(
+            self.embed_dim,
+            cfg.decoder.ffn_embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+        self.fc2 = self.build_fc2(
+            cfg.decoder.ffn_embed_dim,
+            self.embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+
+        self.final_layer_norm = LayerNorm(self.embed_dim, export=cfg.export)
+        self.need_attn = True
+
+        self.onnx_trace = False
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size)
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size)
+
+    def build_self_attention(
+        self, embed_dim, cfg, add_bias_kv=False, add_zero_attn=False
+    ):
+        return MultiheadAttention(
+            embed_dim,
+            cfg.decoder.attention_heads,
+            dropout=cfg.attention_dropout,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+            self_attention=not cfg.cross_self_attention,
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+        )
+
+    def build_encoder_attention(self, embed_dim, cfg):
+        return MultiheadAttention(
+            embed_dim,
+            cfg.decoder.attention_heads,
+            kdim=cfg.encoder.embed_dim,
+            vdim=cfg.encoder.embed_dim,
+            dropout=cfg.attention_dropout,
+            encoder_decoder_attention=True,
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+        )
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def residual_connection(self, x, residual):
+        return residual + x
+
+    def forward(
+        self,
+        x,
+        encoder_out: Optional[torch.Tensor] = None,
+        encoder_padding_mask: Optional[torch.Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        prev_self_attn_state: Optional[List[torch.Tensor]] = None,
+        prev_attn_state: Optional[List[torch.Tensor]] = None,
+        self_attn_mask: Optional[torch.Tensor] = None,
+        self_attn_padding_mask: Optional[torch.Tensor] = None,
+        need_attn: bool = False,
+        need_head_weights: bool = False,
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor, optional): binary
+                ByteTensor of shape `(batch, src_len)` where padding
+                elements are indicated by ``1``.
+            need_attn (bool, optional): return attention weights
+            need_head_weights (bool, optional): return attention weights
+                for each head (default: return average over heads).
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        if need_head_weights:
+            need_attn = True
+
+        residual = x
+        if self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+        if prev_self_attn_state is not None:
+            prev_key, prev_value = prev_self_attn_state[:2]
+            saved_state: Dict[str, Optional[Tensor]] = {
+                "prev_key": prev_key,
+                "prev_value": prev_value,
+            }
+            if len(prev_self_attn_state) >= 3:
+                saved_state["prev_key_padding_mask"] = prev_self_attn_state[2]
+            assert incremental_state is not None
+            self.self_attn._set_input_buffer(incremental_state, saved_state)
+        _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state)
+        if self.cross_self_attention and not (
+            incremental_state is not None
+            and _self_attn_input_buffer is not None
+            and "prev_key" in _self_attn_input_buffer
+        ):
+            if self_attn_mask is not None:
+                assert encoder_out is not None
+                self_attn_mask = torch.cat(
+                    (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1
+                )
+            if self_attn_padding_mask is not None:
+                if encoder_padding_mask is None:
+                    assert encoder_out is not None
+                    encoder_padding_mask = self_attn_padding_mask.new_zeros(
+                        encoder_out.size(1), encoder_out.size(0)
+                    )
+                self_attn_padding_mask = torch.cat(
+                    (encoder_padding_mask, self_attn_padding_mask), dim=1
+                )
+            assert encoder_out is not None
+            y = torch.cat((encoder_out, x), dim=0)
+        else:
+            y = x
+
+        x, attn = self.self_attn(
+            query=x,
+            key=y,
+            value=y,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            need_weights=False,
+            attn_mask=self_attn_mask,
+        )
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        if self.encoder_attn is not None and encoder_out is not None:
+            residual = x
+            if self.normalize_before:
+                x = self.encoder_attn_layer_norm(x)
+            if prev_attn_state is not None:
+                prev_key, prev_value = prev_attn_state[:2]
+                saved_state: Dict[str, Optional[Tensor]] = {
+                    "prev_key": prev_key,
+                    "prev_value": prev_value,
+                }
+                if len(prev_attn_state) >= 3:
+                    saved_state["prev_key_padding_mask"] = prev_attn_state[2]
+                assert incremental_state is not None
+                self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                need_weights=need_attn or (not self.training and self.need_attn),
+                need_head_weights=need_head_weights,
+            )
+            x = self.dropout_module(x)
+            x = self.residual_connection(x, residual)
+            if not self.normalize_before:
+                x = self.encoder_attn_layer_norm(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.final_layer_norm(x)
+
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.final_layer_norm(x)
+        if self.onnx_trace and incremental_state is not None:
+            saved_state = self.self_attn._get_input_buffer(incremental_state)
+            assert saved_state is not None
+            if self_attn_padding_mask is not None:
+                self_attn_state = [
+                    saved_state["prev_key"],
+                    saved_state["prev_value"],
+                    saved_state["prev_key_padding_mask"],
+                ]
+            else:
+                self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]]
+            return x, attn, self_attn_state
+        return x, attn, None
+
+    def make_generation_fast_(self, need_attn: bool = False, **kwargs):
+        self.need_attn = need_attn
+
+
+# backward compatible with the legacy argparse format
+class TransformerDecoderLayer(TransformerDecoderLayerBase):
+    def __init__(
+        self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False
+    ):
+        super().__init__(
+            TransformerConfig.from_namespace(args),
+            no_encoder_attn=no_encoder_attn,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+        )
+        self.args = args
+
+    def build_self_attention(
+        self, embed_dim, args, add_bias_kv=False, add_zero_attn=False
+    ):
+        return super().build_self_attention(
+            embed_dim,
+            TransformerConfig.from_namespace(args),
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+        )
+
+    def build_encoder_attention(self, embed_dim, args):
+        return super().build_encoder_attention(
+            embed_dim,
+            TransformerConfig.from_namespace(args),
+        )
diff --git a/fairseq/fairseq/modules/transformer_sentence_encoder.py b/fairseq/fairseq/modules/transformer_sentence_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0540d69229fb994b9e573a5016c9f239b7929e2
--- /dev/null
+++ b/fairseq/fairseq/modules/transformer_sentence_encoder.py
@@ -0,0 +1,291 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Optional, Tuple
+
+import torch
+import torch.nn as nn
+from fairseq.modules import (
+    FairseqDropout,
+    LayerDropModuleList,
+    LayerNorm,
+    MultiheadAttention,
+    PositionalEmbedding,
+    TransformerSentenceEncoderLayer,
+)
+from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_
+
+
+def init_bert_params(module):
+    """
+    Initialize the weights specific to the BERT Model.
+    This overrides the default initializations depending on the specified arguments.
+        1. If normal_init_linear_weights is set then weights of linear
+           layer will be initialized using the normal distribution and
+           bais will be set to the specified value.
+        2. If normal_init_embed_weights is set then weights of embedding
+           layer will be initialized using the normal distribution.
+        3. If normal_init_proj_weights is set then weights of
+           in_project_weight for MultiHeadAttention initialized using
+           the normal distribution (to be validated).
+    """
+
+    def normal_(data):
+        # with FSDP, module params will be on CUDA, so we cast them back to CPU
+        # so that the RNG is consistent with and without FSDP
+        data.copy_(
+            data.cpu().normal_(mean=0.0, std=0.02).to(data.device)
+        )
+
+    if isinstance(module, nn.Linear):
+        normal_(module.weight.data)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    if isinstance(module, nn.Embedding):
+        normal_(module.weight.data)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    if isinstance(module, MultiheadAttention):
+        normal_(module.q_proj.weight.data)
+        normal_(module.k_proj.weight.data)
+        normal_(module.v_proj.weight.data)
+
+
+class TransformerSentenceEncoder(nn.Module):
+    """
+    Implementation for a Bi-directional Transformer based Sentence Encoder used
+    in BERT/XLM style pre-trained models.
+
+    This first computes the token embedding using the token embedding matrix,
+    position embeddings (if specified) and segment embeddings
+    (if specified). After applying the specified number of
+    TransformerEncoderLayers, it outputs all the internal states of the
+    encoder as well as the final representation associated with the first
+    token (usually CLS token).
+
+    Input:
+        - tokens: B x T matrix representing sentences
+        - segment_labels: B x T matrix representing segment label for tokens
+
+    Output:
+        - a tuple of the following:
+            - a list of internal model states used to compute the
+              predictions where each tensor has shape T x B x C
+            - sentence representation associated with first input token
+              in format B x C.
+    """
+
+    def __init__(
+        self,
+        padding_idx: int,
+        vocab_size: int,
+        num_encoder_layers: int = 6,
+        embedding_dim: int = 768,
+        ffn_embedding_dim: int = 3072,
+        num_attention_heads: int = 8,
+        dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        layerdrop: float = 0.0,
+        max_seq_len: int = 256,
+        num_segments: int = 2,
+        use_position_embeddings: bool = True,
+        offset_positions_by_padding: bool = True,
+        encoder_normalize_before: bool = False,
+        apply_bert_init: bool = False,
+        activation_fn: str = "relu",
+        learned_pos_embedding: bool = True,
+        embed_scale: float = None,
+        freeze_embeddings: bool = False,
+        n_trans_layers_to_freeze: int = 0,
+        export: bool = False,
+        traceable: bool = False,
+        q_noise: float = 0.0,
+        qn_block_size: int = 8,
+    ) -> None:
+
+        super().__init__()
+        self.padding_idx = padding_idx
+        self.vocab_size = vocab_size
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.layerdrop = layerdrop
+        self.max_seq_len = max_seq_len
+        self.embedding_dim = embedding_dim
+        self.num_segments = num_segments
+        self.use_position_embeddings = use_position_embeddings
+        self.apply_bert_init = apply_bert_init
+        self.learned_pos_embedding = learned_pos_embedding
+        self.traceable = traceable
+
+        self.embed_tokens = self.build_embedding(
+            self.vocab_size, self.embedding_dim, self.padding_idx
+        )
+        self.embed_scale = embed_scale
+
+        if q_noise > 0:
+            self.quant_noise = apply_quant_noise_(
+                nn.Linear(self.embedding_dim, self.embedding_dim, bias=False),
+                q_noise,
+                qn_block_size,
+            )
+        else:
+            self.quant_noise = None
+
+        self.segment_embeddings = (
+            nn.Embedding(self.num_segments, self.embedding_dim, padding_idx=None)
+            if self.num_segments > 0
+            else None
+        )
+
+        self.embed_positions = (
+            PositionalEmbedding(
+                self.max_seq_len,
+                self.embedding_dim,
+                padding_idx=(self.padding_idx if offset_positions_by_padding else None),
+                learned=self.learned_pos_embedding,
+            )
+            if self.use_position_embeddings
+            else None
+        )
+
+        if encoder_normalize_before:
+            self.emb_layer_norm = LayerNorm(self.embedding_dim, export=export)
+        else:
+            self.emb_layer_norm = None
+
+        if self.layerdrop > 0.0:
+            self.layers = LayerDropModuleList(p=self.layerdrop)
+        else:
+            self.layers = nn.ModuleList([])
+        self.layers.extend(
+            [
+                self.build_transformer_sentence_encoder_layer(
+                    embedding_dim=self.embedding_dim,
+                    ffn_embedding_dim=ffn_embedding_dim,
+                    num_attention_heads=num_attention_heads,
+                    dropout=self.dropout_module.p,
+                    attention_dropout=attention_dropout,
+                    activation_dropout=activation_dropout,
+                    activation_fn=activation_fn,
+                    export=export,
+                    q_noise=q_noise,
+                    qn_block_size=qn_block_size,
+                )
+                for _ in range(num_encoder_layers)
+            ]
+        )
+
+        # Apply initialization of model params after building the model
+        if self.apply_bert_init:
+            self.apply(init_bert_params)
+
+        def freeze_module_params(m):
+            if m is not None:
+                for p in m.parameters():
+                    p.requires_grad = False
+
+        if freeze_embeddings:
+            freeze_module_params(self.embed_tokens)
+            freeze_module_params(self.segment_embeddings)
+            freeze_module_params(self.embed_positions)
+            freeze_module_params(self.emb_layer_norm)
+
+        for layer in range(n_trans_layers_to_freeze):
+            freeze_module_params(self.layers[layer])
+
+    def build_embedding(self, vocab_size, embedding_dim, padding_idx):
+        return nn.Embedding(vocab_size, embedding_dim, padding_idx)
+
+    def build_transformer_sentence_encoder_layer(
+        self,
+        embedding_dim,
+        ffn_embedding_dim,
+        num_attention_heads,
+        dropout,
+        attention_dropout,
+        activation_dropout,
+        activation_fn,
+        export,
+        q_noise,
+        qn_block_size,
+    ):
+        return TransformerSentenceEncoderLayer(
+            embedding_dim=embedding_dim,
+            ffn_embedding_dim=ffn_embedding_dim,
+            num_attention_heads=num_attention_heads,
+            dropout=dropout,
+            attention_dropout=attention_dropout,
+            activation_dropout=activation_dropout,
+            activation_fn=activation_fn,
+            export=export,
+            q_noise=q_noise,
+            qn_block_size=qn_block_size,
+        )
+
+    def forward(
+        self,
+        tokens: torch.Tensor,
+        segment_labels: torch.Tensor = None,
+        last_state_only: bool = False,
+        positions: Optional[torch.Tensor] = None,
+        token_embeddings: Optional[torch.Tensor] = None,
+        attn_mask: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        is_tpu = tokens.device.type == "xla"
+
+        # compute padding mask. This is needed for multi-head attention
+        padding_mask = tokens.eq(self.padding_idx)
+        if not self.traceable and not is_tpu and not padding_mask.any():
+            padding_mask = None
+
+        if token_embeddings is not None:
+            x = token_embeddings
+        else:
+            x = self.embed_tokens(tokens)
+
+        if self.embed_scale is not None:
+            x = x * self.embed_scale
+
+        if self.embed_positions is not None:
+            x = x + self.embed_positions(tokens, positions=positions)
+
+        if self.segment_embeddings is not None and segment_labels is not None:
+            x = x + self.segment_embeddings(segment_labels)
+
+        if self.quant_noise is not None:
+            x = self.quant_noise(x)
+
+        if self.emb_layer_norm is not None:
+            x = self.emb_layer_norm(x)
+
+        x = self.dropout_module(x)
+
+        # account for padding while computing the representation
+        if padding_mask is not None:
+            x = x * (1 - padding_mask.unsqueeze(-1).type_as(x))
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        inner_states = []
+        if not last_state_only:
+            inner_states.append(x)
+
+        for layer in self.layers:
+            x, _ = layer(x, self_attn_padding_mask=padding_mask, self_attn_mask=attn_mask)
+            if not last_state_only:
+                inner_states.append(x)
+
+        sentence_rep = x[0, :, :]
+
+        if last_state_only:
+            inner_states = [x]
+
+        if self.traceable:
+            return torch.stack(inner_states), sentence_rep
+        else:
+            return inner_states, sentence_rep
diff --git a/fairseq/fairseq/modules/transformer_sentence_encoder_layer.py b/fairseq/fairseq/modules/transformer_sentence_encoder_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f869c4b2f8fb15f96a292e39bd293df7898a4fce
--- /dev/null
+++ b/fairseq/fairseq/modules/transformer_sentence_encoder_layer.py
@@ -0,0 +1,139 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable, Optional
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.modules import LayerNorm, MultiheadAttention
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.quant_noise import quant_noise
+
+
+class TransformerSentenceEncoderLayer(nn.Module):
+    """
+    Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
+    models.
+    """
+
+    def __init__(
+        self,
+        embedding_dim: int = 768,
+        ffn_embedding_dim: int = 3072,
+        num_attention_heads: int = 8,
+        dropout: float = 0.1,
+        attention_dropout: float = 0.1,
+        activation_dropout: float = 0.1,
+        activation_fn: str = "relu",
+        export: bool = False,
+        q_noise: float = 0.0,
+        qn_block_size: int = 8,
+        init_fn: Callable = None,
+    ) -> None:
+        super().__init__()
+
+        if init_fn is not None:
+            init_fn()
+
+        # Initialize parameters
+        self.embedding_dim = embedding_dim
+        self.num_attention_heads = num_attention_heads
+        self.attention_dropout = attention_dropout
+        self.q_noise = q_noise
+        self.qn_block_size = qn_block_size
+
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+        self.activation_dropout_module = FairseqDropout(
+            activation_dropout, module_name=self.__class__.__name__
+        )
+
+        # Initialize blocks
+        self.activation_fn = utils.get_activation_fn(activation_fn)
+        self.self_attn = self.build_self_attention(
+            self.embedding_dim,
+            num_attention_heads,
+            dropout=attention_dropout,
+            self_attention=True,
+            q_noise=q_noise,
+            qn_block_size=qn_block_size,
+        )
+
+        # layer norm associated with the self attention layer
+        self.self_attn_layer_norm = LayerNorm(self.embedding_dim, export=export)
+
+        self.fc1 = self.build_fc1(
+            self.embedding_dim,
+            ffn_embedding_dim,
+            q_noise=q_noise,
+            qn_block_size=qn_block_size,
+        )
+        self.fc2 = self.build_fc2(
+            ffn_embedding_dim,
+            self.embedding_dim,
+            q_noise=q_noise,
+            qn_block_size=qn_block_size,
+        )
+
+        # layer norm associated with the position wise feed-forward NN
+        self.final_layer_norm = LayerNorm(self.embedding_dim, export=export)
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size)
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size)
+
+    def build_self_attention(
+        self,
+        embed_dim,
+        num_attention_heads,
+        dropout,
+        self_attention,
+        q_noise,
+        qn_block_size,
+    ):
+        return MultiheadAttention(
+            embed_dim,
+            num_attention_heads,
+            dropout=dropout,
+            self_attention=True,
+            q_noise=q_noise,
+            qn_block_size=qn_block_size,
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        self_attn_mask: Optional[torch.Tensor] = None,
+        self_attn_padding_mask: Optional[torch.Tensor] = None,
+    ):
+        """
+        LayerNorm is applied either before or after the self-attention/ffn
+        modules similar to the original Transformer implementation.
+        """
+        residual = x
+        x, attn = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=self_attn_padding_mask,
+            need_weights=False,
+            attn_mask=self_attn_mask,
+        )
+        x = self.dropout_module(x)
+        x = residual + x
+        x = self.self_attn_layer_norm(x)
+
+        residual = x
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        x = residual + x
+        x = self.final_layer_norm(x)
+        return x, attn
diff --git a/fairseq/fairseq/modules/transpose_last.py b/fairseq/fairseq/modules/transpose_last.py
new file mode 100644
index 0000000000000000000000000000000000000000..e578b3ec5097bfac5c976b207ea46bec1d9bd4f5
--- /dev/null
+++ b/fairseq/fairseq/modules/transpose_last.py
@@ -0,0 +1,20 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+transpose last 2 dimensions of the input
+"""
+
+import torch.nn as nn
+
+
+class TransposeLast(nn.Module):
+    def __init__(self, deconstruct_idx=None):
+        super().__init__()
+        self.deconstruct_idx = deconstruct_idx
+
+    def forward(self, x):
+        if self.deconstruct_idx is not None:
+            x = x[self.deconstruct_idx]
+        return x.transpose(-2, -1)
diff --git a/fairseq/fairseq/modules/unfold.py b/fairseq/fairseq/modules/unfold.py
new file mode 100644
index 0000000000000000000000000000000000000000..138272f1ef4f673b29e36aed4531106f7ce95968
--- /dev/null
+++ b/fairseq/fairseq/modules/unfold.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.nn.functional as F
+
+
+def unfold1d(x, kernel_size, padding_l, pad_value=0):
+    """unfold T x B x C to T x B x C x K"""
+    if kernel_size > 1:
+        T, B, C = x.size()
+        x = F.pad(
+            x, (0, 0, 0, 0, padding_l, kernel_size - 1 - padding_l), value=pad_value
+        )
+        x = x.as_strided((T, B, C, kernel_size), (B * C, C, 1, B * C))
+    else:
+        x = x.unsqueeze(3)
+    return x
diff --git a/fairseq/fairseq/modules/vggblock.py b/fairseq/fairseq/modules/vggblock.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee5ee19a34816c7350c21fba7c4907fec8ca7a61
--- /dev/null
+++ b/fairseq/fairseq/modules/vggblock.py
@@ -0,0 +1,116 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from collections.abc import Iterable
+from itertools import repeat
+
+import torch
+import torch.nn as nn
+
+
+def _pair(v):
+    if isinstance(v, Iterable):
+        assert len(v) == 2, "len(v) != 2"
+        return v
+    return tuple(repeat(v, 2))
+
+
+def infer_conv_output_dim(conv_op, input_dim, sample_inchannel):
+    sample_seq_len = 200
+    sample_bsz = 10
+    x = torch.randn(sample_bsz, sample_inchannel, sample_seq_len, input_dim)
+    # N x C x H x W
+    # N: sample_bsz, C: sample_inchannel, H: sample_seq_len, W: input_dim
+    x = conv_op(x)
+    # N x C x H x W
+    x = x.transpose(1, 2)
+    # N x H x C x W
+    bsz, seq = x.size()[:2]
+    per_channel_dim = x.size()[3]
+    # bsz: N, seq: H, CxW the rest
+    return x.contiguous().view(bsz, seq, -1).size(-1), per_channel_dim
+
+
+class VGGBlock(torch.nn.Module):
+    """
+    VGG motibated cnn module https://arxiv.org/pdf/1409.1556.pdf
+
+    Args:
+        in_channels: (int) number of input channels (typically 1)
+        out_channels: (int) number of output channels
+        conv_kernel_size: convolution channels
+        pooling_kernel_size: the size of the pooling window to take a max over
+        num_conv_layers: (int) number of convolution layers
+        input_dim: (int) input dimension
+        conv_stride: the stride of the convolving kernel.
+            Can be a single number or a tuple (sH, sW)  Default: 1
+        padding: implicit paddings on both sides of the input.
+            Can be a single number or a tuple (padH, padW). Default: None
+        layer_norm: (bool) if layer norm is going to be applied. Default: False
+
+    Shape:
+        Input: BxCxTxfeat, i.e. (batch_size, input_size, timesteps, features)
+        Output: BxCxTxfeat, i.e. (batch_size, input_size, timesteps, features)
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        conv_kernel_size,
+        pooling_kernel_size,
+        num_conv_layers,
+        input_dim,
+        conv_stride=1,
+        padding=None,
+        layer_norm=False,
+    ):
+        assert (
+            input_dim is not None
+        ), "Need input_dim for LayerNorm and infer_conv_output_dim"
+        super(VGGBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.conv_kernel_size = _pair(conv_kernel_size)
+        self.pooling_kernel_size = _pair(pooling_kernel_size)
+        self.num_conv_layers = num_conv_layers
+        self.padding = (
+            tuple(e // 2 for e in self.conv_kernel_size)
+            if padding is None
+            else _pair(padding)
+        )
+        self.conv_stride = _pair(conv_stride)
+
+        self.layers = nn.ModuleList()
+        for layer in range(num_conv_layers):
+            conv_op = nn.Conv2d(
+                in_channels if layer == 0 else out_channels,
+                out_channels,
+                self.conv_kernel_size,
+                stride=self.conv_stride,
+                padding=self.padding,
+            )
+            self.layers.append(conv_op)
+            if layer_norm:
+                conv_output_dim, per_channel_dim = infer_conv_output_dim(
+                    conv_op, input_dim, in_channels if layer == 0 else out_channels
+                )
+                self.layers.append(nn.LayerNorm(per_channel_dim))
+                input_dim = per_channel_dim
+            self.layers.append(nn.ReLU())
+
+        if self.pooling_kernel_size is not None:
+            pool_op = nn.MaxPool2d(kernel_size=self.pooling_kernel_size, ceil_mode=True)
+            self.layers.append(pool_op)
+            self.total_output_dim, self.output_dim = infer_conv_output_dim(
+                pool_op, input_dim, out_channels
+            )
+
+    def forward(self, x):
+        for i, _ in enumerate(self.layers):
+            x = self.layers[i](x)
+        return x
diff --git a/fairseq/fairseq/nan_detector.py b/fairseq/fairseq/nan_detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..faa8031d4666c9ba9837919fe1c884dacf47ac3a
--- /dev/null
+++ b/fairseq/fairseq/nan_detector.py
@@ -0,0 +1,108 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+
+
+logger = logging.getLogger(__name__)
+
+
+class NanDetector:
+    """
+    Detects the first NaN or Inf in forward and/or backward pass and logs, together with the module name
+    """
+
+    def __init__(self, model, forward=True, backward=True):
+        self.bhooks = []
+        self.fhooks = []
+        self.forward = forward
+        self.backward = backward
+        self.named_parameters = list(model.named_parameters())
+        self.reset()
+
+        for name, mod in model.named_modules():
+            mod.__module_name = name
+            self.add_hooks(mod)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        # Dump out all model gnorms to enable better debugging
+        norm = {}
+        gradients = {}
+        for name, param in self.named_parameters:
+            if param.grad is not None:
+                grad_norm = torch.norm(param.grad.data, p=2, dtype=torch.float32)
+                norm[name] = grad_norm.item()
+                if torch.isnan(grad_norm).any() or torch.isinf(grad_norm).any():
+                    gradients[name] = param.grad.data
+        if len(gradients) > 0:
+            logger.info("Detected nan/inf grad norm, dumping norms...")
+            logger.info(f"norms: {norm}")
+            logger.info(f"gradients: {gradients}")
+
+        self.close()
+
+    def add_hooks(self, module):
+        if self.forward:
+            self.fhooks.append(module.register_forward_hook(self.fhook_fn))
+        if self.backward:
+            self.bhooks.append(module.register_backward_hook(self.bhook_fn))
+
+    def reset(self):
+        self.has_printed_f = False
+        self.has_printed_b = False
+
+    def _detect(self, tensor, name, backward):
+        err = None
+        if (
+            torch.is_floating_point(tensor)
+            # single value tensors (like the loss) will not provide much info
+            and tensor.numel() >= 2
+        ):
+            with torch.no_grad():
+                if torch.isnan(tensor).any():
+                    err = "NaN"
+                elif torch.isinf(tensor).any():
+                    err = "Inf"
+        if err is not None:
+            err = f"{err} detected in output of {name}, shape: {tensor.shape}, {'backward' if backward else 'forward'}"
+        return err
+
+    def _apply(self, module, inp, x, backward):
+        if torch.is_tensor(x):
+            if isinstance(inp, tuple) and len(inp) > 0:
+                inp = inp[0]
+            err = self._detect(x, module.__module_name, backward)
+            if err is not None:
+                if torch.is_tensor(inp) and not backward:
+                    err += (
+                        f" input max: {inp.max().item()}, input min: {inp.min().item()}"
+                    )
+
+                has_printed_attr = "has_printed_b" if backward else "has_printed_f"
+                logger.warning(err)
+                setattr(self, has_printed_attr, True)
+        elif isinstance(x, dict):
+            for v in x.values():
+                self._apply(module, inp, v, backward)
+        elif isinstance(x, list) or isinstance(x, tuple):
+            for v in x:
+                self._apply(module, inp, v, backward)
+
+    def fhook_fn(self, module, inp, output):
+        if not self.has_printed_f:
+            self._apply(module, inp, output, backward=False)
+
+    def bhook_fn(self, module, inp, output):
+        if not self.has_printed_b:
+            self._apply(module, inp, output, backward=True)
+
+    def close(self):
+        for hook in self.fhooks + self.bhooks:
+            hook.remove()
diff --git a/fairseq/fairseq/ngram_repeat_block.py b/fairseq/fairseq/ngram_repeat_block.py
new file mode 100644
index 0000000000000000000000000000000000000000..854125149448a2d37ad2773cd1e6d614e73e0e79
--- /dev/null
+++ b/fairseq/fairseq/ngram_repeat_block.py
@@ -0,0 +1,150 @@
+# Originally from Microsoft Corporation.
+# Licensed under the MIT License.
+
+""" Wrapper for ngram_repeat_block cuda extension """
+import torch
+from torch import nn
+
+import math
+from typing import Dict, List, Optional
+import warnings
+
+try:
+    from fairseq import ngram_repeat_block_cuda
+
+    EXTENSION_BUILT = True
+except ImportError:
+    EXTENSION_BUILT = False
+
+
+def is_cuda_extension_usable() -> bool:
+    """Check whether ngram_repeat_block_cuda is built properly"""
+    if not EXTENSION_BUILT or not torch.cuda.is_available():
+        return False
+    bsz = 2
+    tokens = torch.tensor([[4, 4, 3, 2], [1, 2, 3, 4]], dtype=torch.long, device="cuda")
+    lprobs = torch.rand((8, 12), device="cuda")
+    try:
+        outputs = ngram_repeat_block_cuda.forward(tokens, lprobs, bsz, 3, 4, 3)
+        outputs = outputs + 4  # This line breaks if the extension is built incorrectly.
+        return True
+    except RuntimeError:
+        warnings.warn(
+            "NGramRepeatBlock extension must be rebuilt."
+            'Run TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0" python setup.py build_ext --inplace'
+        )
+        return False
+
+
+class NGramRepeatBlock(nn.Module):
+    """ Wrapper class for calling ngram_repeat_block cuda extension """
+
+    def __init__(self, no_repeat_ngram_size: int, use_extension: bool = True):
+        super().__init__()
+        self.use_extension = is_cuda_extension_usable() if use_extension else False
+        self.no_repeat_ngram_size = no_repeat_ngram_size
+
+    def reset_parameters(self):
+        pass
+
+    @torch.jit.unused
+    def call_cuda_extension(
+        self,
+        tokens,
+        lprobs,
+        bsz: int,
+        beam_size: int,
+        step: int,
+    ):
+        return ngram_repeat_block_cuda.forward(
+            tokens, lprobs, bsz, step, beam_size, self.no_repeat_ngram_size
+        )
+
+    def forward(
+        self,
+        tokens,
+        lprobs,
+        bsz: int,
+        beam_size: int,
+        step: int,
+    ):
+        """
+        Args:
+            tokens(Tensor): Input tokens(Bsz*beam, seq_len)
+            lprobs(Tensor): likelihood probability,
+            Expected to be updated in place.(Bsz*beam, vocab_size)
+            bsz(int): batch size
+            step(int): current step
+            beam_size(int): beam size
+            no_repeat_ngram_size(int): Ngram size
+        """
+        msg = f"expected {bsz *beam_size} got"
+        assert tokens.size(0) == bsz * beam_size, f"{msg} {tokens.size(0)}"
+        assert lprobs.size(0) == bsz * beam_size, f"{msg} {lprobs.size(0)}"
+        if self.use_extension:
+            return self.call_cuda_extension(tokens, lprobs, bsz, beam_size, step)
+
+        else:
+            return self._no_repeat_ngram(
+                tokens,
+                lprobs,
+                bsz,
+                beam_size,
+                step,
+            )
+
+    def _no_repeat_ngram(self, tokens, lprobs, bsz: int, beam_size: int, step: int):
+        """For each hypothesis generate a list of previous ngrams and set associated lprobs to -inf"""
+        gen_ngrams: List[Dict[str, List[int]]] = [
+            torch.jit.annotate(Dict[str, List[int]], {})
+            for bbsz_idx in range(bsz * beam_size)
+        ]
+        cpu_tokens = tokens.cpu()
+        for bbsz_idx in range(bsz * beam_size):
+            gen_tokens: List[int] = cpu_tokens[bbsz_idx].tolist()
+            for ngram in self.transpose_list(
+                [gen_tokens[i:] for i in range(self.no_repeat_ngram_size)]
+            ):
+                key = ",".join([str(x) for x in ngram[:-1]])
+                gen_ngrams[bbsz_idx][key] = gen_ngrams[bbsz_idx].get(
+                    key, torch.jit.annotate(List[int], [])
+                ) + [ngram[-1]]
+        if step + 2 - self.no_repeat_ngram_size >= 0:
+            # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
+            banned_tokens = [
+                self.calculate_banned_tokens(
+                    tokens, step, gen_ngrams, self.no_repeat_ngram_size, bbsz_idx
+                )
+                for bbsz_idx in range(bsz * beam_size)
+            ]
+        else:
+            banned_tokens = [
+                torch.jit.annotate(List[int], []) for bbsz_idx in range(bsz * beam_size)
+            ]
+        for bbsz_idx in range(bsz * beam_size):
+            lprobs[bbsz_idx][
+                torch.tensor(banned_tokens[bbsz_idx], dtype=torch.int64)
+            ] = torch.tensor(-math.inf).to(lprobs)
+        return lprobs
+
+    @staticmethod
+    def calculate_banned_tokens(
+        tokens,
+        step: int,
+        gen_ngrams: List[Dict[str, List[int]]],
+        no_repeat_ngram_size: int,
+        bbsz_idx: int,
+    ):
+        tokens_list: List[int] = tokens[
+            bbsz_idx, step + 2 - no_repeat_ngram_size : step + 1
+        ].tolist()
+        # before decoding the next token, prevent decoding of ngrams that have already appeared
+        ngram_index = ",".join([str(x) for x in tokens_list])
+        return gen_ngrams[bbsz_idx].get(ngram_index, torch.jit.annotate(List[int], []))
+
+    @staticmethod
+    def transpose_list(l: List[List[int]]):
+        # GeneratorExp aren't supported in TS so ignoring the lint
+        min_len = min([len(x) for x in l])  # noqa
+        l2 = [[row[i] for row in l] for i in range(min_len)]
+        return l2
diff --git a/fairseq/fairseq/optim/__init__.py b/fairseq/fairseq/optim/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..be783be896396ff659c0bd173a7acebb8a2d165d
--- /dev/null
+++ b/fairseq/fairseq/optim/__init__.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import importlib
+import os
+
+from fairseq import registry
+from fairseq.optim.bmuf import FairseqBMUF  # noqa
+from fairseq.optim.fairseq_optimizer import (  # noqa
+    FairseqOptimizer,
+    LegacyFairseqOptimizer,
+)
+from fairseq.optim.amp_optimizer import AMPOptimizer
+from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer
+from fairseq.optim.shard import shard_
+from omegaconf import DictConfig
+
+__all__ = [
+    "AMPOptimizer",
+    "FairseqOptimizer",
+    "FP16Optimizer",
+    "MemoryEfficientFP16Optimizer",
+    "shard_",
+]
+
+(
+    _build_optimizer,
+    register_optimizer,
+    OPTIMIZER_REGISTRY,
+    OPTIMIZER_DATACLASS_REGISTRY,
+) = registry.setup_registry("--optimizer", base_class=FairseqOptimizer, required=True)
+
+
+def build_optimizer(cfg: DictConfig, params, *extra_args, **extra_kwargs):
+    if all(isinstance(p, dict) for p in params):
+        params = [t for p in params for t in p.values()]
+    params = list(filter(lambda p: p.requires_grad, params))
+    return _build_optimizer(cfg, params, *extra_args, **extra_kwargs)
+
+
+# automatically import any Python files in the optim/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        file_name = file[: file.find(".py")]
+        importlib.import_module("fairseq.optim." + file_name)
diff --git a/fairseq/fairseq/optim/adadelta.py b/fairseq/fairseq/optim/adadelta.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1a21549770f0904a6a40a42ff7eb52811f1bfbe
--- /dev/null
+++ b/fairseq/fairseq/optim/adadelta.py
@@ -0,0 +1,47 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.optim
+
+from . import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("adadelta")
+class Adadelta(LegacyFairseqOptimizer):
+    def __init__(self, args, params):
+        super().__init__(args)
+        self._optimizer = torch.optim.Adadelta(params, **self.optimizer_config)
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--adadelta-rho', type=float, default=0.9, metavar='RHO',
+                            help='coefficient used for computing a running average of squared gradients')
+        parser.add_argument('--adadelta-eps', type=float, default=1e-6, metavar='EPS',
+                            help='term added to the denominator to improve numerical stability')
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        parser.add_argument('--anneal-eps', action='store_true', help='flag to anneal eps')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "rho": self.args.adadelta_rho,
+            "eps": self.args.adadelta_eps,
+            "weight_decay": self.args.weight_decay,
+        }
+
+    @property
+    def supports_flat_params(self):
+        return True
diff --git a/fairseq/fairseq/optim/adafactor.py b/fairseq/fairseq/optim/adafactor.py
new file mode 100644
index 0000000000000000000000000000000000000000..c969b9fbc0d229a25f2046ec67c53c57a433814b
--- /dev/null
+++ b/fairseq/fairseq/optim/adafactor.py
@@ -0,0 +1,268 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+
+import torch
+import torch.optim
+
+from . import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("adafactor")
+class FairseqAdafactor(LegacyFairseqOptimizer):
+    def __init__(self, args, params):
+        super().__init__(args)
+        self._optimizer = Adafactor(params, **self.optimizer_config)
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--adafactor-eps', default='(1e-30, 1e-3)', metavar="E",
+                            help='epsilons for Adafactor optimizer')
+        parser.add_argument('--clip-threshold', type=float, default=1.0, metavar="C",
+                            help='threshold for clipping update root mean square')
+        parser.add_argument('--decay-rate', type=float, default=-0.8, metavar="D",
+                            help='decay rate of the second moment estimator')
+        parser.add_argument('--beta1', type=float, default=None, metavar="B",
+                            help='beta for first moment estimator. Optional')
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        parser.add_argument('--scale-parameter', action='store_true',
+                            help='scale learning rate by root mean square of parameter')
+        parser.add_argument('--relative-step', action='store_true',
+                            help='set learning rate to inverse square root of timestep,'
+                                 'otherwise use external learning rate')
+        parser.add_argument('--warmup-init', action='store_true',
+                            help='use relative step for warm-up learning rate schedule')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        Note : Convergence issues empirically observed with fp16 on.
+               Might require search for appropriate configuration.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "eps": eval(self.args.adafactor_eps),
+            "clip_threshold": self.args.clip_threshold,
+            "decay_rate": self.args.decay_rate,
+            "beta1": self.args.beta1,
+            "weight_decay": self.args.weight_decay,
+            "scale_parameter": self.args.scale_parameter,  # defaults to False
+            "relative_step": self.args.relative_step,  # defaults to False
+            "warmup_init": self.args.warmup_init,
+        }
+
+
+class Adafactor(torch.optim.Optimizer):
+    """Implements Adafactor algorithm.
+
+    This implementation is based on:
+    `Adafactor: Adaptive Learning Rates with Sublinear Memory Cost`
+    (see https://arxiv.org/abs/1804.04235)
+
+    Note that this optimizer internally adjusts the learning rate
+    depending on the *scale_parameter*, *relative_step* and
+    *warmup_init* options. To use a manual (external) learning rate
+    schedule you should set `scale_parameter=False` and
+    `relative_step=False`.
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): external learning rate (default: None)
+        eps (tuple[float, float]): regularization constans for square gradient
+            and parameter scale respectively (default: (1e-30, 1e-3))
+        clip_threshold (float): threshold of root mean square of
+            final gradient update (default: 1.0)
+        decay_rate (float): coefficient used to compute running averages of square
+            gradient (default: -0.8)
+        beta1 (float): coefficient used for computing running averages of gradient
+            (default: None)
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        scale_parameter (bool): if True, learning rate is scaled by root mean square of
+            parameter (default: True)
+        relative_step (bool): if True, time-dependent learning rate is computed
+            instead of external learning rate (default: True)
+        warmup_init (bool): time-dependent learning rate computation depends on
+            whether warm-up initialization is being used (default: False)
+    """
+
+    def __init__(
+        self,
+        params,
+        lr=None,
+        eps=(1e-30, 1e-3),
+        clip_threshold=1.0,
+        decay_rate=-0.8,
+        beta1=None,
+        weight_decay=0.0,
+        scale_parameter=True,
+        relative_step=True,
+        warmup_init=False,
+    ):
+        if lr is not None and relative_step:
+            raise ValueError("Cannot combine manual lr and relative_step options")
+        if warmup_init and not relative_step:
+            raise ValueError("warmup_init requires relative_step=True")
+
+        defaults = dict(
+            lr=lr,
+            eps=eps,
+            clip_threshold=clip_threshold,
+            decay_rate=decay_rate,
+            beta1=beta1,
+            weight_decay=weight_decay,
+            scale_parameter=scale_parameter,
+            relative_step=relative_step,
+            warmup_init=warmup_init,
+        )
+        super(Adafactor, self).__init__(params, defaults)
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return True
+
+    @property
+    def supports_flat_params(self):
+        return False
+
+    def _get_lr(self, param_group, param_state):
+        rel_step_sz = param_group["lr"]
+        if param_group["relative_step"]:
+            min_step = (
+                1e-6 * param_state["step"] if param_group["warmup_init"] else 1e-2
+            )
+            rel_step_sz = min(min_step, 1.0 / math.sqrt(param_state["step"]))
+        param_scale = 1.0
+        if param_group["scale_parameter"]:
+            param_scale = max(param_group["eps"][1], param_state["RMS"])
+        return param_scale * rel_step_sz
+
+    def _get_options(self, param_group, param_shape):
+        factored = len(param_shape) >= 2
+        use_first_moment = param_group["beta1"] is not None
+        return factored, use_first_moment
+
+    def _rms(self, tensor):
+        return tensor.norm(2) / (tensor.numel() ** 0.5)
+
+    def _approx_sq_grad(self, exp_avg_sq_row, exp_avg_sq_col):
+        r_factor = (
+            (exp_avg_sq_row / exp_avg_sq_row.mean(dim=-1, keepdim=True))
+            .rsqrt_()
+            .unsqueeze(-1)
+        )
+        c_factor = exp_avg_sq_col.unsqueeze(-2).rsqrt()
+        return torch.mul(r_factor, c_factor)
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                if grad.dtype in {torch.float16, torch.bfloat16}:
+                    grad = grad.float()
+                if grad.is_sparse:
+                    raise RuntimeError("Adafactor does not support sparse gradients.")
+
+                state = self.state[p]
+                grad_shape = grad.shape
+
+                factored, use_first_moment = self._get_options(group, grad_shape)
+                # State Initialization
+                if len(state) == 0:
+                    state["step"] = 0
+
+                    if use_first_moment:
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(grad)
+                    if factored:
+                        state["exp_avg_sq_row"] = torch.zeros(grad_shape[:-1]).to(grad)
+                        state["exp_avg_sq_col"] = torch.zeros(
+                            grad_shape[:-2] + grad_shape[-1:]
+                        ).to(grad)
+                    else:
+                        state["exp_avg_sq"] = torch.zeros_like(grad)
+
+                    state["RMS"] = 0
+                else:
+                    if use_first_moment:
+                        state["exp_avg"] = state["exp_avg"].to(grad)
+                    if factored:
+                        state["exp_avg_sq_row"] = state["exp_avg_sq_row"].to(grad)
+                        state["exp_avg_sq_col"] = state["exp_avg_sq_col"].to(grad)
+                    else:
+                        state["exp_avg_sq"] = state["exp_avg_sq"].to(grad)
+
+                p_data_fp32 = p.data
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p_data_fp32 = p_data_fp32.float()
+
+                state["step"] += 1
+                state["RMS"] = self._rms(p_data_fp32)
+                group["lr"] = self._get_lr(group, state)
+
+                beta2t = 1.0 - math.pow(state["step"], group["decay_rate"])
+                update = (grad ** 2) + group["eps"][0]
+                if factored:
+                    exp_avg_sq_row = state["exp_avg_sq_row"]
+                    exp_avg_sq_col = state["exp_avg_sq_col"]
+
+                    exp_avg_sq_row.mul_(beta2t).add_(
+                        update.mean(dim=-1), alpha=1.0 - beta2t
+                    )
+                    exp_avg_sq_col.mul_(beta2t).add_(
+                        update.mean(dim=-2), alpha=1.0 - beta2t
+                    )
+
+                    # Approximation of exponential moving average of square of gradient
+                    update = self._approx_sq_grad(exp_avg_sq_row, exp_avg_sq_col)
+                    update.mul_(grad)
+                else:
+                    exp_avg_sq = state["exp_avg_sq"]
+
+                    exp_avg_sq.mul_(beta2t).add_(update, alpha=1.0 - beta2t)
+                    update = exp_avg_sq.rsqrt().mul_(grad)
+
+                update.div_(
+                    (self._rms(update) / group["clip_threshold"]).clamp_(min=1.0)
+                )
+                update.mul_(group["lr"])
+
+                if use_first_moment:
+                    exp_avg = state["exp_avg"]
+                    exp_avg.mul_(group["beta1"]).add_(update, alpha=1 - group["beta1"])
+                    update = exp_avg
+
+                if group["weight_decay"] != 0:
+                    p_data_fp32.add_(
+                        p_data_fp32, alpha=-group["weight_decay"] * group["lr"]
+                    )
+
+                p_data_fp32.add_(-update)
+
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p.data.copy_(p_data_fp32)
+
+        return loss
diff --git a/fairseq/fairseq/optim/adagrad.py b/fairseq/fairseq/optim/adagrad.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f539541c1c91d8c822f7ce624fa6eabf744f60e
--- /dev/null
+++ b/fairseq/fairseq/optim/adagrad.py
@@ -0,0 +1,40 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.optim
+
+from . import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("adagrad")
+class Adagrad(LegacyFairseqOptimizer):
+    def __init__(self, args, params):
+        super().__init__(args)
+        self._optimizer = torch.optim.Adagrad(params, **self.optimizer_config)
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "weight_decay": self.args.weight_decay,
+        }
+
+    @property
+    def supports_flat_params(self):
+        return False
diff --git a/fairseq/fairseq/optim/adam.py b/fairseq/fairseq/optim/adam.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3ae9e64a74774310adcd9968d2eae23368890f9
--- /dev/null
+++ b/fairseq/fairseq/optim/adam.py
@@ -0,0 +1,239 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import math
+from collections.abc import Collection
+from dataclasses import dataclass, field
+from typing import Any, List
+
+import torch
+import torch.distributed as dist
+import torch.optim
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim import FairseqOptimizer, register_optimizer
+from fairseq.optim.fused_adam import get_fused_adam_class
+from omegaconf import II, OmegaConf
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class FairseqAdamConfig(FairseqDataclass):
+    adam_betas: Any = field(
+        default=(0.9, 0.999), metadata={"help": "betas for Adam optimizer"}
+    )
+    adam_eps: float = field(
+        default=1e-8, metadata={"help": "epsilon for Adam optimizer"}
+    )
+    weight_decay: float = field(default=0.0, metadata={"help": "weight decay"})
+    use_old_adam: bool = field(
+        default=False, metadata={"help": "Use fairseq.optim.adam.Adam"}
+    )
+    fp16_adam_stats: bool = field(
+        default=False, metadata={"help": "use FP16 stats (with automatic scaling)"}
+    )
+    # TODO common vars below in parent
+    tpu: bool = II("common.tpu")
+    lr: List[float] = II("optimization.lr")
+
+
+@register_optimizer("adam", dataclass=FairseqAdamConfig)
+class FairseqAdam(FairseqOptimizer):
+    """Adam optimizer for fairseq.
+
+    Important note: this optimizer corresponds to the "AdamW" variant of
+    Adam in its weight decay behavior. As such, it is most closely
+    analogous to torch.optim.AdamW from PyTorch.
+    """
+
+    def __init__(self, cfg: FairseqAdamConfig, params):
+        super().__init__(cfg)
+        fused_adam_cls = get_fused_adam_class()
+        use_fused_adam = (
+            not getattr(cfg, "use_old_adam", False)
+            and fused_adam_cls is not None
+            and torch.cuda.is_available()
+        )
+        if getattr(cfg, "tpu", False):
+            if self.cfg.fp16_adam_stats:
+                raise NotImplementedError("--fp16-adam-stats is only supported on GPU")
+            # on TPUs we use the Adam defined here, since it
+            # automatically casts gradients to FP32
+            self._optimizer = Adam(params, **self.optimizer_config)
+        elif use_fused_adam:
+            logger.info("using FusedAdam")
+            self._optimizer = fused_adam_cls(
+                params,
+                use_fp16_stats=self.cfg.fp16_adam_stats,
+                **self.optimizer_config
+            )
+        else:
+            if self.cfg.fp16_adam_stats:
+                raise NotImplementedError("--fp16-adam-stats is only supported with FusedAdamV1")
+            self._optimizer = Adam(params, **self.optimizer_config)
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.cfg.lr[0]
+            if isinstance(self.cfg.lr, Collection)
+            else self.cfg.lr,
+            "betas": eval(self.cfg.adam_betas)
+            if isinstance(self.cfg.adam_betas, str)
+            else OmegaConf.to_container(self.cfg.adam_betas),
+            "eps": self.cfg.adam_eps,
+            "weight_decay": self.cfg.weight_decay,
+        }
+
+    def average_params(self):
+        """Reduce Params is only used during BMUF distributed training."""
+        state_dict = self.optimizer.state_dict()
+        total_gpus = float(dist.get_world_size())
+
+        for _, value in state_dict["state"].items():
+            value["exp_avg"] /= total_gpus
+            value["exp_avg_sq"] /= total_gpus
+            dist.all_reduce(value["exp_avg"], op=dist.ReduceOp.SUM)
+            dist.all_reduce(value["exp_avg_sq"], op=dist.ReduceOp.SUM)
+
+
+class Adam(torch.optim.Optimizer):
+    r"""Implements Adam algorithm.
+
+    This implementation is modified from torch.optim.Adam based on:
+    `Fixed Weight Decay Regularization in Adam`
+    (see https://arxiv.org/abs/1711.05101)
+
+    It has been proposed in `Adam: A Method for Stochastic Optimization`_.
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 1e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square (default: (0.9, 0.999))
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
+            algorithm from the paper `On the Convergence of Adam and Beyond`_
+
+    .. _Adam\: A Method for Stochastic Optimization:
+        https://arxiv.org/abs/1412.6980
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def __init__(
+        self,
+        params,
+        lr=1e-3,
+        betas=(0.9, 0.999),
+        eps=1e-8,
+        weight_decay=0,
+        amsgrad=False,
+    ):
+        defaults = dict(
+            lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad
+        )
+        super(Adam, self).__init__(params, defaults)
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return True
+
+    @property
+    def supports_flat_params(self):
+        return True
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                if grad.dtype in {torch.float16, torch.bfloat16}:
+                    grad = grad.float()
+                if grad.is_sparse:
+                    raise RuntimeError(
+                        "Adam does not support sparse gradients, please consider SparseAdam instead"
+                    )
+                amsgrad = group.get("amsgrad", False)
+
+                p_data_fp32 = p.data
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p_data_fp32 = p_data_fp32.float()
+
+                state = self.state[p]
+
+                # State initialization
+                if len(state) == 0:
+                    state["step"] = 0
+                    # Exponential moving average of gradient values
+                    state["exp_avg"] = torch.zeros_like(p_data_fp32)
+                    # Exponential moving average of squared gradient values
+                    state["exp_avg_sq"] = torch.zeros_like(p_data_fp32)
+                    if amsgrad:
+                        # Maintains max of all exp. moving avg. of sq. grad. values
+                        state["max_exp_avg_sq"] = torch.zeros_like(p_data_fp32)
+                else:
+                    state["exp_avg"] = state["exp_avg"].to(p_data_fp32)
+                    state["exp_avg_sq"] = state["exp_avg_sq"].to(p_data_fp32)
+                    if amsgrad:
+                        state["max_exp_avg_sq"] = state["max_exp_avg_sq"].to(
+                            p_data_fp32
+                        )
+
+                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
+                if amsgrad:
+                    max_exp_avg_sq = state["max_exp_avg_sq"]
+                beta1, beta2 = group["betas"]
+
+                state["step"] += 1
+
+                # Decay the first and second moment running average coefficient
+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
+                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
+                if amsgrad:
+                    # Maintains the maximum of all 2nd moment running avg. till now
+                    torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
+                    # Use the max. for normalizing running avg. of gradient
+                    denom = max_exp_avg_sq.sqrt().add_(group["eps"])
+                else:
+                    denom = exp_avg_sq.sqrt().add_(group["eps"])
+
+                bias_correction1 = 1 - beta1 ** state["step"]
+                bias_correction2 = 1 - beta2 ** state["step"]
+                step_size = group["lr"] * math.sqrt(bias_correction2) / bias_correction1
+
+                if group["weight_decay"] != 0:
+                    p_data_fp32.add_(
+                        p_data_fp32, alpha=-group["weight_decay"] * group["lr"]
+                    )
+
+                p_data_fp32.addcdiv_(exp_avg, denom, value=-step_size)
+
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p.data.copy_(p_data_fp32)
+
+        return loss
diff --git a/fairseq/fairseq/optim/adamax.py b/fairseq/fairseq/optim/adamax.py
new file mode 100644
index 0000000000000000000000000000000000000000..98ff8ad7ad6c12ab5efc53ca76db2f1663be7906
--- /dev/null
+++ b/fairseq/fairseq/optim/adamax.py
@@ -0,0 +1,172 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import torch.optim
+
+from . import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("adamax")
+class FairseqAdamax(LegacyFairseqOptimizer):
+    def __init__(self, args, params):
+        super().__init__(args)
+        self._optimizer = Adamax(params, **self.optimizer_config)
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--adamax-betas', default='(0.9, 0.999)', metavar='B',
+                            help='betas for Adam optimizer')
+        parser.add_argument('--adamax-eps', type=float, default=1e-8, metavar='D',
+                            help='epsilon for Adam optimizer')
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        parser.add_argument('--no-bias-correction', default=False, action='store_true',
+                            help='disable bias correction')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "betas": eval(self.args.adamax_betas),
+            "eps": self.args.adamax_eps,
+            "weight_decay": self.args.weight_decay,
+            "bias_correction": not self.args.no_bias_correction,
+        }
+
+
+class Adamax(torch.optim.Optimizer):
+    """Implements Adamax algorithm (a variant of Adam based on infinity norm).
+
+    It has been proposed in `Adam: A Method for Stochastic Optimization`__.
+
+    Compared to the version in PyTorch, this version implements a fix for weight decay.
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 2e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        bias_correction (bool, optional): enable bias correction (default: True)
+
+    __ https://arxiv.org/abs/1412.6980
+    """
+
+    def __init__(
+        self,
+        params,
+        lr=2e-3,
+        betas=(0.9, 0.999),
+        eps=1e-8,
+        weight_decay=0,
+        bias_correction=True,
+    ):
+        if not 0.0 <= lr:
+            raise ValueError("Invalid learning rate: {}".format(lr))
+        if not 0.0 <= eps:
+            raise ValueError("Invalid epsilon value: {}".format(eps))
+        if not 0.0 <= betas[0] < 1.0:
+            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+        if not 0.0 <= betas[1] < 1.0:
+            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+        if not 0.0 <= weight_decay:
+            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
+
+        defaults = dict(
+            lr=lr,
+            betas=betas,
+            eps=eps,
+            weight_decay=weight_decay,
+            bias_correction=bias_correction,
+        )
+        super(Adamax, self).__init__(params, defaults)
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return True
+
+    @property
+    def supports_flat_params(self):
+        return True
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data.float()
+                if grad.is_sparse:
+                    raise RuntimeError("Adamax does not support sparse gradients")
+
+                p_data_fp32 = p.data
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p_data_fp32 = p_data_fp32.float()
+
+                state = self.state[p]
+
+                # State initialization
+                if len(state) == 0:
+                    state["step"] = 0
+                    state["exp_avg"] = torch.zeros_like(p_data_fp32)
+                    state["exp_inf"] = torch.zeros_like(p_data_fp32)
+                else:
+                    state["exp_avg"] = state["exp_avg"].to(p_data_fp32)
+                    state["exp_inf"] = state["exp_inf"].to(p_data_fp32)
+
+                exp_avg, exp_inf = state["exp_avg"], state["exp_inf"]
+                beta1, beta2 = group["betas"]
+                eps = group["eps"]
+
+                state["step"] += 1
+
+                # Update biased first moment estimate.
+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
+
+                # Update the exponentially weighted infinity norm.
+                torch.max(
+                    exp_inf.mul_(beta2),
+                    grad.abs_(),
+                    out=exp_inf,
+                )
+
+                step_size = group["lr"]
+                if group["bias_correction"]:
+                    bias_correction = 1 - beta1 ** state["step"]
+                    step_size /= bias_correction
+
+                if group["weight_decay"] != 0:
+                    p_data_fp32.add_(
+                        p_data_fp32, alpha=-group["weight_decay"] * group["lr"]
+                    )
+
+                p_data_fp32.addcdiv_(exp_avg, exp_inf.add(eps), value=-step_size)
+
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p.data.copy_(p_data_fp32)
+
+        return loss
diff --git a/fairseq/fairseq/optim/amp_optimizer.py b/fairseq/fairseq/optim/amp_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b7958e50ce444474c48d1f5aeff05d66c19e5b6
--- /dev/null
+++ b/fairseq/fairseq/optim/amp_optimizer.py
@@ -0,0 +1,105 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from fairseq import optim
+from omegaconf import DictConfig
+
+logger = logging.getLogger(__name__)
+
+
+class AMPOptimizer(optim.FairseqOptimizer):
+    """
+    Wrap an *optimizer* to support AMP (automatic mixed precision) training.
+    """
+
+    def __init__(self, cfg: DictConfig, params, fp32_optimizer, **kwargs):
+        super().__init__(cfg.optimizer)
+        self.fp32_optimizer = fp32_optimizer
+        amp_kwargs = {"init_scale": cfg.common.fp16_init_scale}
+        if getattr(cfg.common, "amp_scale_window", None) is not None:
+            amp_kwargs["growth_interval"] = cfg.common.amp_init_scale
+        self._grad_scaler = torch.cuda.amp.GradScaler(**amp_kwargs)
+        self.min_loss_scale = cfg.common.min_loss_scale
+
+    @classmethod
+    def build_optimizer(cls, cfg: DictConfig, params, **kwargs):
+        """
+        Args:
+            cfg (omegaconf.DictConfig): fairseq args
+            params (iterable): iterable of parameters to optimize
+        """
+        fp32_optimizer = optim.build_optimizer(cfg.optimizer, params)
+        return cls(cfg, params, fp32_optimizer, **kwargs)
+
+    def backward(self, loss):
+        """Computes the sum of gradients of the given tensor w.r.t. graph leaves.
+
+        Compared to :func:`fairseq.optim.FairseqOptimizer.backward`, this
+        function additionally dynamically scales the loss to avoid gradient
+        underflow.
+        """
+        self._grad_scaler.scale(loss).backward()
+
+    def step(self):
+        self.scaler.step(self.fp32_optimizer)
+        self.scaler.update()
+
+    def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
+        """Clips gradient norm."""
+        self.scaler.unscale_(self.optimizer)
+        grad_norm = self.fp32_optimizer.clip_grad_norm(max_norm, aggregate_norm_fn)
+        if not torch.isfinite(grad_norm).all():
+            new_loss_scale = self.next_loss_scale
+            if new_loss_scale <= self.min_loss_scale:
+                raise FloatingPointError(
+                    (
+                        "AMP: Minimum loss scale reached ({}). Your loss is probably exploding. "
+                        "Try restarting training or use fp32. {}"
+                    ).format(self.min_loss_scale, new_loss_scale)
+                )
+            else:
+                logger.info("AMP: overflow detected, setting scale to "
+                            f"to {new_loss_scale}")
+        return grad_norm
+
+    @property
+    def scaler(self):
+        return self._grad_scaler
+
+    @property
+    def next_loss_scale(self):
+        return self.scaler.get_scale() * self.scaler.get_backoff_factor()
+
+    @property
+    def optimizer(self):
+        return self.fp32_optimizer.optimizer
+
+    @optimizer.setter
+    def optimizer(self, optimizer):
+        self.fp32_optimizer.optimizer = optimizer
+
+    @property
+    def lr_scheduler(self):
+        return getattr(self.fp32_optimizer, "lr_scheduler", None)
+
+    @property
+    def optimizer_config(self):
+        return self.fp32_optimizer.optimizer_config
+
+    def get_lr(self):
+        return self.fp32_optimizer.get_lr()
+
+    def set_lr(self, lr):
+        self.fp32_optimizer.set_lr(lr)
+
+    def all_reduce_grads(self, module):
+        self.fp32_optimizer.all_reduce_grads(module)
+
+    @property
+    def supports_flat_params(self):
+        return self.fp32_optimizer.supports_flat_params
diff --git a/fairseq/fairseq/optim/bmuf.py b/fairseq/fairseq/optim/bmuf.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6d0e04e86eb894efe59e13a78843d01ca9e651d
--- /dev/null
+++ b/fairseq/fairseq/optim/bmuf.py
@@ -0,0 +1,200 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+import torch
+import torch.distributed as dist
+from fairseq.dataclass.configs import FairseqBMUFConfig
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+from fairseq.optim.fairseq_optimizer import FairseqOptimizer
+
+
+class FairseqBMUF(FairseqOptimizer):
+    """
+    Implements incremental block distributed data parallelism similar to
+    https://ieeexplore.ieee.org/document/7472805
+
+    Paper title: Scalable training of deep learning machines by incremental
+    block training with intra-block parallel optimization and blockwise
+    model-update filtering
+    """
+
+    def __init__(self, cfg: FairseqBMUFConfig, optimizer):
+        super().__init__(cfg)
+        self._optimizer = optimizer
+        self._num_updates = 0
+        self.sync_iter = cfg.global_sync_iter
+        self.block_momentum = cfg.block_momentum
+        self.block_lr = cfg.block_lr
+        self._reset_local_data()
+        self.warmup_iteration = cfg.warmup_iterations
+        self.use_nbm = cfg.use_nbm
+        self.initial_state = self._optimizer.state_dict()
+        self.average_sync = self.cfg.average_sync
+        self.world_size = self.cfg.distributed_world_size
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        gen_parser_from_dataclass(parser, FairseqBMUFConfig())
+
+    @property
+    def optimizer(self):
+        return self._optimizer.optimizer
+
+    @property
+    def optimizer_config(self):
+        return self._optimizer.optimizer_config
+
+    def get_lr(self):
+        return self._optimizer.get_lr()
+
+    def set_lr(self, lr):
+        self._optimizer.set_lr(lr)
+
+    def state_dict(self):
+        return self._optimizer.state_dict()
+
+    def load_state_dict(self, state_dict, optimizer_overrides=None):
+        self._optimizer.load_state_dict(state_dict, optimizer_overrides)
+        self.initial_state = self._optimizer.state_dict()
+
+    def multiply_grads(self, c):
+        """Multiplies grads by a constant *c*."""
+        self._optimizer.multiply_grads(c)
+
+    def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
+        """Clips gradient norm."""
+        return self._optimizer.clip_grad_norm(max_norm, aggregate_norm_fn)
+
+    def average_params(self):
+        self._optimizer.average_params()
+
+    def _block_sync(self):
+        if self.world_size <= 1:
+            return
+        # Update the global model using local models from all GPUs
+        # (Step-1) Calculate grad between previously synced model and
+        # currrent local model
+        if self.block_momentum != 0:
+            self._calc_grad()
+
+        # (Step-2) Average gradient from all GPUs
+        self._avg_grad_from_all_gpus()
+
+        # (Step-3) Calculate global momentum and update the global model
+        if self.block_momentum != 0:
+            self._update_global_model()
+
+        # (Step-4) Average local optimizer params
+        if self.average_sync:
+            self.average_params()
+
+    def _is_warmup_end(self):
+        # Check whether train iterations is equal to warmup iter
+        if self.get_num_updates() == self.warmup_iteration:
+            return True
+        return False
+
+    def _is_bmuf_iter(self):
+        # Check whether train iterations is equal to bmuf sync iter
+        if (self.get_num_updates() > self.warmup_iteration) and (
+            self.get_num_updates() % self.sync_iter == 0
+        ):
+            return True
+        return False
+
+    def _warmup_sync(self, root_rank=0):
+        if self.world_size <= 1:
+            return
+        # Broadcast the local model to all gpus
+        for param in self.params:
+            dist.broadcast(param.data, src=root_rank)
+
+        # Update local optimizer state
+        if self.average_sync:
+            self._optimizer.average_params()
+        else:
+            self._optimizer.load_state_dict(self.initial_state)
+
+        self._reset_local_data()
+
+    def step(self, closure=None):
+        """Performs a single optimization step."""
+        self._optimizer.step(closure)
+        self.set_num_updates(self.get_num_updates() + 1)
+        if self._is_warmup_end():
+            self._warmup_sync()
+        elif self._is_bmuf_iter():
+            self._block_sync()
+
+    def zero_grad(self):
+        """Clears the gradients of all optimized parameters."""
+        self._optimizer.zero_grad()
+
+    def get_num_updates(self):
+        """Get the number of parameters updates."""
+        return self._num_updates
+
+    def set_num_updates(self, num_updates):
+        """Set the number of parameters updates."""
+        self._num_updates = num_updates
+
+    @torch.no_grad()
+    def _reset_local_data(self):
+        # (Step-0) Initialize global momentum parameters and store global copy on each gpu
+        self.global_params = [torch.zeros_like(p.data) for p in self.params]
+        self.smoothed_grads = [p.data.new_zeros(p.data.size()) for p in self.params]
+        self.grads = [p.data.new_zeros(p.data.size()) for p in self.params]
+
+        # saving the global model locally for calculating gradient during bmuf sync
+        for param, global_param in zip(self.params, self.global_params):
+            global_param.copy_(param.data)
+
+    @torch.no_grad()
+    def _calc_grad(self):
+        # global_params is basically the global copy from the previously finished
+        # synchronisation. param.data is local parameter after block_sync_freq
+        # for the local gpu. so grad is difference between previously synced
+        # model and currrent local model.
+        for index, (param, global_param) in enumerate(
+            zip(self.params, self.global_params)
+        ):
+            self.grads[index] = global_param - param.data
+
+    def _avg_grad_from_all_gpus(self):
+        for index, param in enumerate(self.params):
+            sync_para = param.data if self.block_momentum == 0 else self.grads[index]
+            sync_para /= float(dist.get_world_size())
+            dist.all_reduce(sync_para, op=dist.ReduceOp.SUM)
+
+    @torch.no_grad()
+    def _update_global_model(self):
+        for index, (param, global_param, smoothed_grad, grad) in enumerate(
+            zip(
+                self.params,
+                self.global_params,
+                self.smoothed_grads,
+                # all gpus would share the same value of smoothed_grad, since it is
+                # always computed on synchronized gradients.
+                self.grads,
+            )
+        ):
+            # global_param is basically last syncrhornized parameter. though
+            # smoothed_grad is local, all processes will have same value of
+            # smoothed_grad and hence param is globally synchronized copy.
+            # smoothed_grad(t) = BM * smoothed_grad(t-1) + BM_lr * grad(t)
+            smoothed_grad = self.block_momentum * smoothed_grad + self.block_lr * grad
+            param.data.copy_(global_param - smoothed_grad)
+
+            # A Nesterov momentum here is to do a partial weight update before
+            # calculating the gradient
+            if self.use_nbm:
+                param.data.copy_(param.data - self.block_momentum * smoothed_grad)
+
+            # backup for the next synchronization.
+            self.smoothed_grads[index] = smoothed_grad
+            global_param.copy_(param.data)
diff --git a/fairseq/fairseq/optim/composite.py b/fairseq/fairseq/optim/composite.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5366d62434a4400ba9cc524f4286f99f733d121
--- /dev/null
+++ b/fairseq/fairseq/optim/composite.py
@@ -0,0 +1,188 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Dict, Any, List, Optional
+
+import torch.optim
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim import FairseqOptimizer, register_optimizer, _build_optimizer
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, build_lr_scheduler
+from omegaconf import II, open_dict
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class OptimizerAndSchedulerConfig(FairseqDataclass):
+    optimizer: Any = None
+    lr_scheduler: Optional[Any] = None
+    lr: List = II("optimization.lr")
+    lr_float: Optional[float] = None  # this makes it easier to sweep on learning rate with auto sweepers
+
+
+@dataclass
+class CompositeOptimizerConfig(FairseqDataclass):
+    groups: Dict[str, Any] = field(
+        default_factory=lambda: {},
+        metadata={
+            "help": "optimizer name -> optimizer OptimizerAndSchedulerConfig. "
+            "Configures a different optimizer and (optionally) lr scheduler for each parameter group"
+        },
+    )
+
+
+@register_optimizer("composite", dataclass=CompositeOptimizerConfig)
+class FairseqCompositeOptimizer(FairseqOptimizer):
+
+    optimizers: Dict[str, FairseqOptimizer] = {}
+    lr_schedulers: Dict[str, FairseqLRScheduler] = {}
+    lr_scheduler: FairseqLRScheduler = None
+    _optimizer: torch.optim.Optimizer
+
+    def __init__(self, cfg: CompositeOptimizerConfig, params):
+        super().__init__(cfg)
+
+        assert (
+            len(params) > 1
+        ), "Composite optimizer only works when there are multiple parameter groups (try fp16_no_flatten_grads: true)"
+
+        groupped_params = defaultdict(list)
+        for p in params:
+            group = getattr(p, "param_group", "default")
+            groupped_params[group].append(p)
+
+        assert groupped_params.keys() == cfg.groups.keys(), (
+            f"Parameter groups {groupped_params.keys()} and optimizer groups {cfg.groups.keys()} are not the same! "
+            "Try setting 'param_group' on your parameters in the model."
+        )
+
+        for group, group_params in groupped_params.items():
+            group_cfg = cfg.groups[group]
+            with open_dict(group_cfg):
+                if group_cfg.lr_float is not None:
+                    group_cfg.optimizer.lr = [group_cfg.lr_float]
+                    group_cfg.lr_scheduler.lr = [group_cfg.lr_float]
+                else:
+                    group_cfg.optimizer.lr = group_cfg.lr
+                    group_cfg.lr_scheduler.lr = group_cfg.lr
+            self.optimizers[group] = _build_optimizer(group_cfg.optimizer, group_params)
+            if group_cfg.lr_scheduler is not None:
+                self.lr_schedulers[group] = build_lr_scheduler(
+                    group_cfg.lr_scheduler, self.optimizers[group]
+                )
+
+        if len(self.lr_schedulers) > 0:
+            assert len(self.lr_schedulers) == len(self.optimizers), (
+                f"Please provide an lr scheduler for each optimizer to use pass_through scheduler. "
+                f"Optimizers: {self.optimizers}; Lr scheds: {self.lr_schedulers}"
+            )
+            self.lr_scheduler = CompositeLRScheduler(self.lr_schedulers)
+
+        self._optimizer = CompositeOptimizer(self.optimizers)
+
+    @property
+    def supports_groups(self):
+        return True
+
+    @property
+    def param_groups(self):
+        for opt in self.optimizers.values():
+            for group in opt.param_groups:
+                yield group
+
+    def get_lr(self):
+        """Return the current learning rate."""
+        k = (
+            "default"
+            if "default" in self.optimizers
+            else next(iter(self.optimizers.keys()))
+        )
+        return self.optimizers[k].param_groups[0]["lr"]
+
+    def state_dict(self):
+        """Return the LR scheduler state dict."""
+        return {k: s.state_dict() for k, s in self.optimizers.items()}
+
+    def load_state_dict(self, state_dict, optimizer_overrides=None):
+        """Load an LR scheduler state dict."""
+        for k, state in state_dict.items():
+            if k not in self.optimizers:
+                # skip extra keys like "loss_scale" added by fp16 optimizer
+                continue
+
+            overrides = (
+                optimizer_overrides[k]
+                if isinstance(optimizer_overrides, dict) and k in optimizer_overrides
+                else None
+            )
+            self.optimizers[k].load_state_dict(state, optimizer_overrides=overrides)
+
+
+class CompositeOptimizer(torch.optim.Optimizer):
+    def __init__(self, optimizers: Dict[str, FairseqOptimizer]):
+        self.optimizers = optimizers
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return all(o.supports_memory_efficient_fp16 for o in self.optimizers.values())
+
+    @property
+    def supports_flat_params(self):
+        return all(o.supports_flat_params for o in self.optimizers.values())
+
+    def step(self, closure=None, groups=None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for k, opt in self.optimizers.items():
+            if groups is None or k in groups:
+                opt.step()
+
+        return loss
+
+    def zero_grad(self):
+        for opt in self.optimizers.values():
+            opt.zero_grad()
+
+
+class CompositeLRScheduler(FairseqLRScheduler):
+    def __init__(self, lr_schedulers):
+        super().__init__(None, None)
+
+        self.lr_schedulers = lr_schedulers
+
+    def state_dict(self):
+        """Return the LR scheduler state dict."""
+        return {k: s.state_dict() for k, s in self.lr_schedulers.items()}
+
+    def load_state_dict(self, state_dict):
+        """Load an LR scheduler state dict."""
+        for k, state in state_dict.items():
+            self.lr_schedulers[k].load_state_dict(state)
+
+    def step_begin_epoch(self, epoch):
+        """Update the learning rate at the beginning of the given epoch."""
+        for s in self.lr_schedulers.values():
+            s.step_begin_epoch(epoch)
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        for s in self.lr_schedulers.values():
+            s.step(epoch)
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        return {k: s.step_update(num_updates) for k, s in self.lr_schedulers.items()}
diff --git a/fairseq/fairseq/optim/cpu_adam.py b/fairseq/fairseq/optim/cpu_adam.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2f893aeda69ee1741e5e3af406ff4182b6f2416
--- /dev/null
+++ b/fairseq/fairseq/optim/cpu_adam.py
@@ -0,0 +1,206 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import importlib
+from collections.abc import Collection
+from dataclasses import dataclass, field
+from typing import List
+
+import torch
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim import FairseqOptimizer, register_optimizer
+from omegaconf import II, DictConfig
+
+
+try:
+    import deepspeed
+    has_deepspeed = True
+except ImportError as e:
+    has_deepspeed = False
+
+
+def _get_cpu_adam():
+    try:
+        from deepspeed.ops.op_builder import CPUAdamBuilder
+        return CPUAdamBuilder().load()
+    except ImportError:
+        # fbcode
+        from deepspeed.ops.adam import DeepSpeedCPUAdam as ds_opt_adam
+        return ds_opt_adam
+
+@dataclass
+class FairseqCPUAdamConfig(FairseqDataclass):
+    adam_betas: str = field(
+        default="(0.9, 0.999)", metadata={"help": "betas for Adam optimizer"}
+    )
+    adam_eps: float = field(
+        default=1e-8, metadata={"help": "epsilon for Adam optimizer"}
+    )
+    weight_decay: float = field(default=0.0, metadata={"help": "weight decay"})
+    fp16_adam_stats: bool = field(
+        default=False, metadata={"help": "use FP16 stats (with automatic scaling)"}
+    )
+    # TODO common vars below in parent
+    lr: List[float] = II("optimization.lr")
+
+
+@register_optimizer("cpu_adam", dataclass=FairseqCPUAdamConfig)
+class FairseqCPUAdam(FairseqOptimizer):
+    """Adam optimizer for fairseq, optimized for CPU tensors.
+
+    Important note: this optimizer corresponds to the "AdamW" variant of
+    Adam in its weight decay behavior. As such, it is most closely
+    analogous to torch.optim.AdamW from PyTorch.
+    """
+
+    def __init__(self, cfg: DictConfig, params):
+        super().__init__(cfg)
+        self._optimizer = CPUAdam(params, **self.optimizer_config)
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.cfg.lr[0]
+            if isinstance(self.cfg.lr, Collection)
+            else self.cfg.lr,
+            "betas": eval(self.cfg.adam_betas),
+            "eps": self.cfg.adam_eps,
+            "weight_decay": self.cfg.weight_decay,
+            "use_fp16_stats": self.cfg.fp16_adam_stats,
+        }
+
+
+class CPUAdam(torch.optim.Optimizer):
+
+    optimizer_id = 0
+
+    def __init__(
+        self,
+        params,
+        lr=1e-3,
+        bias_correction=True,
+        betas=(0.9, 0.999),
+        eps=1e-8,
+        weight_decay=0,
+        use_fp16_stats=False,
+    ):
+        defaults = {
+            "lr": lr,
+            "bias_correction": bias_correction,
+            "betas": betas,
+            "eps": eps,
+            "weight_decay": weight_decay,
+        }
+        super().__init__(params, defaults)
+
+        self.use_fp16_stats = use_fp16_stats
+        self.FLOAT16_MAX = 65504.0
+
+        if not has_deepspeed:
+            raise ImportError("Please install DeepSpeed: pip install deepspeed")
+
+        self.opt_id = CPUAdam.optimizer_id
+        CPUAdam.optimizer_id = CPUAdam.optimizer_id + 1
+
+        self.ds_opt_adam = _get_cpu_adam()
+        adamw_mode = True
+        self.ds_opt_adam.create_adam(
+            self.opt_id, lr, betas[0], betas[1], eps, weight_decay, adamw_mode
+        )
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return True
+
+    @property
+    def supports_flat_params(self):
+        return True
+
+    @torch.no_grad()
+    def step(self, closure=None):
+        loss = None
+        if closure is not None:
+            with torch.enable_grad():
+                loss = closure()
+
+        torch.cuda.synchronize()
+
+        for group_id, group in enumerate(self.param_groups):
+            for param_id, p in enumerate(group["params"]):
+                if p.grad is None:
+                    continue
+
+                state = self.state[p]
+                if len(state) == 0:
+                    state["step"] = 0
+                    dtype = torch.float16 if self.use_fp16_stats else p.data.dtype
+                    # gradient momentums
+                    state["exp_avg"] = torch.zeros_like(
+                        p.data, dtype=dtype, device="cpu"
+                    )
+                    # gradient variances
+                    state["exp_avg_sq"] = torch.zeros_like(
+                        p.data, dtype=dtype, device="cpu"
+                    )
+                    if self.use_fp16_stats:
+                        assert torch.is_floating_point(p.data)
+                        state["exp_avg_scale"] = 1.0
+                        state["exp_avg_sq_scale"] = 1.0
+
+                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
+
+                p_data_bak = p.data  # backup of the original data pointer
+
+                p.data = p.data.to(dtype=torch.float32, device="cpu")
+                p.grad.data = p.grad.data.to(dtype=torch.float32, device="cpu")
+
+                if self.use_fp16_stats:
+                    exp_avg = exp_avg.float() * state["exp_avg_scale"]
+                    exp_avg_sq = exp_avg_sq.float() * state["exp_avg_sq_scale"]
+
+                state["step"] += 1
+                beta1, beta2 = group["betas"]
+
+                self.ds_opt_adam.adam_update(
+                    self.opt_id,
+                    state["step"],
+                    group["lr"],
+                    beta1,
+                    beta2,
+                    group["eps"],
+                    group["weight_decay"],
+                    group["bias_correction"],
+                    p.data,
+                    p.grad.data,
+                    exp_avg,
+                    exp_avg_sq,
+                )
+
+                if p_data_bak.data_ptr() != p.data.data_ptr():
+                    p_data_bak.copy_(p.data)
+                    p.data = p_data_bak
+
+                if self.use_fp16_stats:
+
+                    def inf_norm(t):
+                        return torch.norm(t, float("inf"))
+
+                    # from github.com/openai/jukebox/blob/master/jukebox/utils/fp16.py
+                    state["exp_avg_scale"], state["exp_avg_sq_scale"] = (
+                        1e-8 + inf_norm(exp_avg) / self.FLOAT16_MAX,
+                        1e-8 + inf_norm(exp_avg_sq) / self.FLOAT16_MAX,
+                    )
+                    state["exp_avg"], state["exp_avg_sq"] = (
+                        (exp_avg / state["exp_avg_scale"]).half(),
+                        (exp_avg_sq / state["exp_avg_sq_scale"]).half(),
+                    )
+
+        return loss
diff --git a/fairseq/fairseq/optim/dynamic_loss_scaler.py b/fairseq/fairseq/optim/dynamic_loss_scaler.py
new file mode 100644
index 0000000000000000000000000000000000000000..43f9be37b9067c520cd794b9a941c57adae25e97
--- /dev/null
+++ b/fairseq/fairseq/optim/dynamic_loss_scaler.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+class DynamicLossScaler(object):
+    def __init__(
+        self,
+        init_scale=2.0 ** 15,
+        scale_factor=2.0,
+        scale_window=2000,
+        tolerance=0.0,
+        threshold=None,
+        min_loss_scale=1e-4,
+    ):
+        self.loss_scale = init_scale
+        self.scale_factor = scale_factor
+        self.scale_window = scale_window
+        self.tolerance = tolerance
+        self.threshold = threshold
+        self._iter = 0
+        self._last_overflow_iter = -1
+        self._last_rescale_iter = -1
+        self._overflows_since_rescale = 0
+        self.min_loss_scale = min_loss_scale
+
+    def scale(self, outputs):
+        return self.loss_scale * outputs
+
+    def update(self):
+        if (self._iter - self._last_overflow_iter) % self.scale_window == 0:
+            self.loss_scale *= self.scale_factor
+            self._last_rescale_iter = self._iter
+        self._iter += 1
+
+    def _decrease_loss_scale(self):
+        self.loss_scale /= self.scale_factor
+        if self.threshold is not None:
+            self.loss_scale = max(self.loss_scale, self.threshold)
+
+    def check_overflow(self, grad_norm):
+        # detect inf and nan
+        if grad_norm == float("inf") or grad_norm != grad_norm:
+            # overflow has occured
+            prev_scale = self.loss_scale
+            iter_since_rescale = self._iter - self._last_rescale_iter
+
+            self._last_overflow_iter = self._iter
+            self._overflows_since_rescale += 1
+            pct_overflow = self._overflows_since_rescale / float(iter_since_rescale)
+            if pct_overflow >= self.tolerance:
+                self._decrease_loss_scale()
+                self._last_rescale_iter = self._iter
+                self._overflows_since_rescale = 0
+
+            if self.loss_scale <= self.min_loss_scale:
+                # Use FloatingPointError as an uncommon error that parent
+                # functions can safely catch to stop training.
+                self.loss_scale = prev_scale
+                raise FloatingPointError(
+                    (
+                        "Minimum loss scale reached ({}). Your loss is probably exploding. "
+                        "Try lowering the learning rate, using gradient clipping or "
+                        "increasing the batch size."
+                    ).format(self.min_loss_scale)
+                )
+
+            self._iter += 1
+            raise OverflowError("setting loss scale to: " + str(self.loss_scale))
diff --git a/fairseq/fairseq/optim/fairseq_optimizer.py b/fairseq/fairseq/optim/fairseq_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e5411753a2ba94f3a7a68316131530b8b17d22a
--- /dev/null
+++ b/fairseq/fairseq/optim/fairseq_optimizer.py
@@ -0,0 +1,179 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq import utils
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+
+
+class FairseqOptimizer(object):
+    def __init__(self, cfg):
+        super().__init__()
+        self.cfg = cfg
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add optimizer-specific arguments to the parser."""
+        dc = getattr(cls, "__dataclass", None)
+        if dc is not None:
+            gen_parser_from_dataclass(parser, dc())
+
+    @property
+    def optimizer(self):
+        """Return a torch.optim.optimizer.Optimizer instance."""
+        if not hasattr(self, "_optimizer"):
+            raise NotImplementedError
+        if not isinstance(self._optimizer, torch.optim.Optimizer):
+            raise ValueError("_optimizer must be an instance of torch.optim.Optimizer")
+        return self._optimizer
+
+    @optimizer.setter
+    def optimizer(self, optimizer):
+        """Reset optimizer instance."""
+        if not hasattr(self, "_optimizer"):
+            raise NotImplementedError
+        if not isinstance(self._optimizer, torch.optim.Optimizer):
+            raise ValueError("_optimizer must be an instance of torch.optim.Optimizer")
+        self._optimizer = optimizer
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        raise NotImplementedError
+
+    @property
+    def params(self):
+        """Return an iterable of the parameters held by the optimizer."""
+        for param_group in self.param_groups:
+            for p in param_group["params"]:
+                yield p
+
+    @property
+    def param_groups(self):
+        return self.optimizer.param_groups
+
+    def __getstate__(self):
+        return self._optimizer.__getstate__()
+
+    def get_lr(self):
+        """Return the current learning rate."""
+        return self.param_groups[0]["lr"]
+
+    def set_lr(self, lr):
+        """Set the learning rate."""
+        for param_group in self.param_groups:
+            param_group["lr"] = lr
+
+    def state_dict(self):
+        """Return the optimizer's state dict."""
+        return self.optimizer.state_dict()
+
+    def load_state_dict(self, state_dict, optimizer_overrides=None):
+        """Load an optimizer state dict.
+
+        In general we should prefer the configuration of the existing optimizer
+        instance (e.g., learning rate) over that found in the state_dict. This
+        allows us to resume training from a checkpoint using a new set of
+        optimizer args.
+        """
+        self.optimizer.load_state_dict(state_dict)
+
+        if optimizer_overrides is not None and len(optimizer_overrides) > 0:
+            # override learning rate, momentum, etc. with latest values
+            for group in self.param_groups:
+                group.update(optimizer_overrides)
+
+    def backward(self, loss):
+        """Computes the sum of gradients of the given tensor w.r.t. graph leaves."""
+        loss.backward()
+
+    def all_reduce_grads(self, module):
+        """Manually all-reduce gradients (if required)."""
+        if hasattr(module, "all_reduce_grads"):
+            module.all_reduce_grads()
+
+    def multiply_grads(self, c):
+        """Multiplies grads by a constant *c*."""
+        for p in self.params:
+            if p.grad is not None:
+                if torch.is_tensor(c):
+                    c = c.to(p.grad.device)
+                p.grad.data.mul_(c)
+
+    def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
+        """Clips gradient norm."""
+        return utils.clip_grad_norm_(self.params, max_norm, aggregate_norm_fn)
+
+    def step(self, closure=None, scale=1.0, groups=None):
+        """Performs a single optimization step."""
+        if self.supports_step_with_scale:
+            if self.supports_groups:
+                self.optimizer.step(closure, scale=scale, groups=groups)
+            else:
+                self.optimizer.step(closure, scale=scale)
+        else:
+            if scale != 1.0:
+                self.multiply_grads(1.0 / scale)
+            if self.supports_groups:
+                self.optimizer.step(closure, groups=groups)
+            else:
+                self.optimizer.step(closure)
+
+    def zero_grad(self):
+        """Clears the gradients of all optimized parameters."""
+        for p in self.params:
+            p.grad = None
+        self.optimizer.zero_grad()
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        if hasattr(self.optimizer, "supports_memory_efficient_fp16"):
+            return self.optimizer.supports_memory_efficient_fp16
+        return False
+
+    @property
+    def supports_step_with_scale(self):
+        if hasattr(self.optimizer, "supports_step_with_scale"):
+            return self.optimizer.supports_step_with_scale
+        return False
+
+    @property
+    def supports_groups(self):
+        if hasattr(self.optimizer, "supports_groups"):
+            return self.optimizer.supports_groups
+        return False
+
+    @property
+    def supports_flat_params(self):
+        """
+        Whether the optimizer supports collapsing of the model
+        parameters/gradients into a single contiguous Tensor.
+        """
+        if hasattr(self.optimizer, "supports_flat_params"):
+            return self.optimizer.supports_flat_params
+        return False
+
+    def average_params(self):
+        pass
+
+    def broadcast_global_state_dict(self, state_dict):
+        """
+        Broadcasts a global state dict to all ranks.
+        Useful for optimizers that shard state between ranks.
+        """
+        if hasattr(self.optimizer, "broadcast_global_state_dict"):
+            return self.optimizer.broadcast_global_state_dict(state_dict)
+        else:
+            return state_dict
+
+
+class LegacyFairseqOptimizer(FairseqOptimizer):
+    def __init__(self, args):
+        self.args = args
diff --git a/fairseq/fairseq/optim/fp16_optimizer.py b/fairseq/fairseq/optim/fp16_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..c59b21cf6b36650a4dd899e62b83a01715d2e2a1
--- /dev/null
+++ b/fairseq/fairseq/optim/fp16_optimizer.py
@@ -0,0 +1,548 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections import defaultdict
+from itertools import chain
+
+import torch
+from fairseq import optim
+from omegaconf import DictConfig
+
+from .dynamic_loss_scaler import DynamicLossScaler
+
+
+class _FP16OptimizerMixin(object):
+    def __init__(self, *args, **kwargs):
+        # forward __init__ call to the next class in mro(method resolution order)
+        super().__init__(*args, **kwargs)
+        self._multiply_factor = 1.0
+
+    @property
+    def has_flat_params(self):
+        return torch.is_tensor(self.fp32_params) or (
+            isinstance(self.fp32_params, dict)
+            and all(torch.is_tensor(t) for t in self.fp32_params.values())
+        )
+
+    @classmethod
+    def build_fp32_params(cls, args, params, flatten=True):
+        # create FP32 copy of parameters and grads
+        if flatten:
+            is_pipeline_parallel = getattr(
+                args, "pipeline_model_parallel", False
+            ) and getattr(args, "distributed_no_spawn", False)
+            total_param_size = sum(p.data.numel() for p in params)
+            devices = [torch.cuda.current_device()]
+            if is_pipeline_parallel:
+                devices = list(set(args.pipeline_devices))
+            fp32_params = {}
+            for device in devices:
+                if is_pipeline_parallel:
+                    device_param_size = sum(
+                        p.data.numel() for p in params if p.device.index == device
+                    )
+                    device_params = [p for p in params if p.device.index == device]
+                else:
+                    device_param_size = total_param_size
+                    device_params = params
+                fp32_params[device] = (
+                    device_params[0].new(0).float().new(device_param_size)
+                )
+                offset = 0
+                for p in device_params:
+                    numel = p.data.numel()
+                    fp32_params[device][offset : offset + numel].copy_(p.data.view(-1))
+                    offset += numel
+                fp32_params[device] = torch.nn.Parameter(fp32_params[device])
+                fp32_params[device].grad = fp32_params[device].data.new(
+                    device_param_size
+                )
+            return fp32_params
+        else:
+            fp32_params = []
+            for p in params:
+                p32 = torch.nn.Parameter(p.data.float())
+                if hasattr(p, 'expert'):
+                    p32.expert = True
+                elif hasattr(p, 'base_expert'):
+                    p32.base_expert = True
+                p32.grad = torch.zeros_like(p32.data)
+                if hasattr(p, "param_group"):
+                    p32.param_group = p.param_group
+                fp32_params.append(p32)
+            return fp32_params
+
+    def state_dict(self):
+        """Return the optimizer's state dict."""
+        state_dict = self.fp32_optimizer.state_dict()
+        if self.scaler is not None:
+            state_dict["loss_scale"] = self.scaler.loss_scale
+        return state_dict
+
+    def load_state_dict(self, state_dict, optimizer_overrides=None):
+        """Load an optimizer state dict.
+
+        In general we should prefer the configuration of the existing optimizer
+        instance (e.g., learning rate) over that found in the state_dict. This
+        allows us to resume training from a checkpoint using a new set of
+        optimizer args.
+        """
+        if "loss_scale" in state_dict and self.scaler is not None:
+            self.scaler.loss_scale = state_dict["loss_scale"]
+        self.fp32_optimizer.load_state_dict(state_dict, optimizer_overrides)
+
+    def backward(self, loss):
+        """Computes the sum of gradients of the given tensor w.r.t. graph leaves.
+
+        Compared to :func:`fairseq.optim.FairseqOptimizer.backward`, this
+        function additionally dynamically scales the loss to avoid gradient
+        underflow.
+        """
+        if self.scaler is not None:
+            loss = self.scaler.scale(loss)
+        loss.backward()
+        self._needs_sync = True
+
+    def _sync_fp16_grads_to_fp32(self):
+        if self._needs_sync:
+            # copy FP16 grads to FP32
+            if self.has_flat_params:
+                devices = list(self.fp32_params.keys())
+                device_params_dict = defaultdict(list)
+                for p in self.fp16_params:
+                    if p.requires_grad:
+                        device_params_dict[p.device.index].append(p)
+                for device in devices:
+                    device_params = device_params_dict[device]
+                    offset = 0
+                    for p in device_params:
+                        grad_data = (
+                            p.grad.data
+                            if p.grad is not None
+                            else p.data.new_zeros(p.data.shape)
+                        )
+                        numel = grad_data.numel()
+                        self.fp32_params[device].grad.data[
+                            offset : offset + numel
+                        ].copy_(grad_data.view(-1))
+                        offset += numel
+            else:
+                for p, p32 in zip(self.fp16_params, self.fp32_params):
+                    if not p.requires_grad:
+                        continue
+                    if p.grad is not None:
+                        if p32.grad is None:
+                            p32.grad = p.grad.data.float()
+                        else:
+                            p32.grad.data.copy_(p.grad.data)
+                    else:
+                        p32.grad = torch.zeros_like(p.data, dtype=torch.float)
+
+            self._needs_sync = False
+
+    def _sync_fp32_params_to_fp16(self):
+        # copy FP32 params back into FP16 model
+        if self.has_flat_params:
+            devices = list(self.fp32_params.keys())
+            device_params_dict = defaultdict(list)
+            for p in self.fp16_params:
+                device_params_dict[p.device.index].append(p)
+            for device in devices:
+                device_params = device_params_dict[device]
+                offset = 0
+                for p in device_params:
+                    numel = p.data.numel()
+                    p.data.copy_(
+                        self.fp32_params[device]
+                        .data[offset : offset + numel]
+                        .view_as(p.data)
+                    )
+                    offset += numel
+        else:
+            for p, p32 in zip(self.fp16_params, self.fp32_params):
+                if not p.requires_grad:
+                    continue
+                p.data.copy_(p32.data)
+
+    def _unscale_grads(self):
+        self._sync_fp16_grads_to_fp32()
+        if (
+            # Skip the multiplication if it's a no-op (i.e., if _multiply_factor
+            # is 1.0). At the same time, we want to avoid the device-to-host
+            # transfer by comparing it to 1.0. Since _multiply_factor starts as
+            # a Python float, we roughly assume that if it's a tensor then it's
+            # probably not =1.0 anymore and we do the multiplication. Otherwise
+            # we can safely check the value without a D2H transfer.
+            torch.is_tensor(self._multiply_factor)
+            or self._multiply_factor != 1.0
+        ):
+            self.fp32_optimizer.multiply_grads(self._multiply_factor)
+            self._multiply_factor = 1.0
+
+    def multiply_grads(self, c):
+        """Multiplies grads by a constant ``c``."""
+        self._multiply_factor *= c
+
+    def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
+        """Clips gradient norm and updates dynamic loss scaler."""
+        self._sync_fp16_grads_to_fp32()
+
+        grad_norm = self._multiply_factor * self.fp32_optimizer.clip_grad_norm(
+            0, aggregate_norm_fn
+        )
+
+        if self.scaler is not None:
+            if grad_norm > max_norm > 0.0:
+                self._multiply_factor *= max_norm / grad_norm
+
+            self.scaler.check_overflow(grad_norm)
+        elif max_norm > 0.0:
+            clip_coef = (max_norm / (grad_norm + 1e-6)).clamp_(max=1)
+            self._multiply_factor *= clip_coef
+
+        return grad_norm
+
+    def step(self, closure=None, groups=None):
+        """Performs a single optimization step."""
+        self._sync_fp16_grads_to_fp32()
+
+        if getattr(self, "supports_step_with_scale", False):
+            self.fp32_optimizer.step(closure, scale=(1.0 / self._multiply_factor), groups=groups)
+        else:
+            self._unscale_grads()
+            self.fp32_optimizer.step(closure, groups=groups)
+
+        if self.scaler is not None:
+            self.scaler.update()
+
+        self._sync_fp32_params_to_fp16()
+
+    def zero_grad(self):
+        """Clears the gradients of all optimized parameters."""
+        for p in self.fp16_params:
+            p.grad = None
+        if self.has_flat_params:
+            if torch.is_tensor(self.fp32_params):
+                self.fp32_params.grad.zero_()
+            elif isinstance(self.fp32_params, dict):
+                for fp32_params in self.fp32_params.values():
+                    fp32_params.grad.zero_()
+            else:
+                raise RuntimeError("self.fp32_params must be a tensor or dict")
+        else:
+            for p32 in self.fp32_params:
+                if p32.grad is not None:
+                    p32.grad.zero_()
+        self._needs_sync = False
+
+        if self.scaler is not None:
+            self._multiply_factor = 1.0 / float(self.scaler.loss_scale)
+
+
+class FP16Optimizer(_FP16OptimizerMixin, optim.FairseqOptimizer):
+    """
+    Wrap an *optimizer* to support FP16 (mixed precision) training.
+    """
+
+    def __init__(self, cfg: DictConfig, params, fp32_optimizer, fp32_params, **kwargs):
+        super().__init__(cfg.optimizer)
+        self.fp16_params = params
+        self.fp32_optimizer = fp32_optimizer
+        self.fp32_params = fp32_params
+
+        if getattr(cfg.common, "fp16_scale_window", None) is None:
+            if len(cfg.optimization.update_freq) > 1:
+                raise ValueError(
+                    "--fp16-scale-window must be given explicitly when using a "
+                    "custom --update-freq schedule"
+                )
+            data_parallel_size = int(
+                cfg.distributed_training.distributed_world_size
+                / cfg.common.model_parallel_size
+            )
+            scale_window = int(
+                2 ** 14 / data_parallel_size / cfg.optimization.update_freq[0]
+            )
+        else:
+            scale_window = cfg.common.fp16_scale_window
+
+        if not getattr(cfg.common, "bf16", False):
+            self.scaler = DynamicLossScaler(
+                init_scale=cfg.common.fp16_init_scale,
+                scale_window=scale_window,
+                tolerance=cfg.common.fp16_scale_tolerance,
+                threshold=cfg.common.threshold_loss_scale,
+                min_loss_scale=cfg.common.min_loss_scale,
+            )
+        else:
+            # disable loss scaling for bfloat16
+            self.scaler = None
+
+    @classmethod
+    def build_optimizer(cls, cfg: DictConfig, params, **kwargs):
+        """
+        Args:
+            cfg (omegaconf.DictConfig): fairseq args
+            params (iterable): iterable of parameters to optimize
+        """
+        flatten = not getattr(cfg.common, "fp16_no_flatten_grads", False)
+        if getattr(cfg.common, "bf16", False):
+            flatten = False  # mixed precision is faster on TPUs without flat grads
+        fp32_params = cls.build_fp32_params(cfg.optimizer, params, flatten=flatten)
+        if flatten:
+            fp32_optimizer = optim.build_optimizer(cfg.optimizer, [fp32_params])
+        else:
+            fp32_optimizer = optim.build_optimizer(cfg.optimizer, fp32_params)
+        if flatten and not fp32_optimizer.supports_flat_params:
+            raise RuntimeError(
+                f"chosen optimizer {fp32_optimizer.__class__.__name__} does not support flat params, please set --fp16-no-flatten-grads"
+            )
+        return cls(cfg, params, fp32_optimizer, fp32_params, **kwargs)
+
+    @property
+    def optimizer(self):
+        return self.fp32_optimizer.optimizer
+
+    @optimizer.setter
+    def optimizer(self, optimizer):
+        self.fp32_optimizer.optimizer = optimizer
+
+    @property
+    def lr_scheduler(self):
+        return getattr(self.fp32_optimizer, "lr_scheduler", None)
+
+    @property
+    def optimizer_config(self):
+        return self.fp32_optimizer.optimizer_config
+
+    def get_lr(self):
+        return self.fp32_optimizer.get_lr()
+
+    def set_lr(self, lr):
+        self.fp32_optimizer.set_lr(lr)
+
+    def all_reduce_grads(self, module):
+        self.fp32_optimizer.all_reduce_grads(module)
+
+    @property
+    def supports_flat_params(self):
+        return self.fp32_optimizer.supports_flat_params
+
+
+class _MemoryEfficientFP16OptimizerMixin(object):
+    def __init__(self, *args, **kwargs):
+        # forward __init__ call to the next class in MRO (method resolution order)
+        super().__init__(*args, **kwargs)
+        self._multiply_factor = 1.0
+
+    @property
+    def has_flat_params(self):
+        return False
+
+    def state_dict(self):
+        """Return the optimizer's state dict."""
+        state_dict = self.wrapped_optimizer.state_dict()
+        if self.scaler is not None:
+            state_dict["loss_scale"] = self.scaler.loss_scale
+        return state_dict
+
+    def load_state_dict(self, state_dict, optimizer_overrides=None):
+        """Load an optimizer state dict.
+
+        In general we should prefer the configuration of the existing optimizer
+        instance (e.g., learning rate) over that found in the state_dict. This
+        allows us to resume training from a checkpoint using a new set of
+        optimizer args.
+        """
+        if "loss_scale" in state_dict and self.scaler is not None:
+            self.scaler.loss_scale = state_dict["loss_scale"]
+
+        self.wrapped_optimizer.load_state_dict(state_dict, optimizer_overrides)
+
+        # Hack: PyTorch automatically casts the optimizer state to match the
+        # type of the current parameters. But with --memory-efficient-fp16 the
+        # params are FP16 while the optimizer state is FP32 and we don't want
+        # to cast. A workaround is to manually copy back the original state
+        # after the optimizer has been loaded.
+        if not getattr(self.optimizer, "disable_mem_eff_fp16_loading_hack", False):
+            groups = self.optimizer.param_groups
+            saved_groups = state_dict["param_groups"]
+            id_map = {
+                old_id: p
+                for old_id, p in zip(
+                    chain(*(g["params"] for g in saved_groups)),
+                    chain(*(g["params"] for g in groups)),
+                )
+            }
+            for k, v in state_dict["state"].items():
+                if k in id_map:
+                    param = id_map[k]
+                    self.optimizer.state[param] = v
+
+    def backward(self, loss):
+        """Computes the sum of gradients of the given tensor w.r.t. graph leaves.
+
+        Compared to :func:`fairseq.optim.FairseqOptimizer.backward`, this
+        function additionally dynamically scales the loss to avoid gradient
+        underflow.
+        """
+        if self.scaler is not None:
+            loss = self.scaler.scale(loss)
+        loss.backward()
+
+    def _unscale_grads(self):
+        if (
+            # Skip the multiplication if it's a no-op (i.e., if _multiply_factor
+            # is 1.0). At the same time, we want to avoid the device-to-host
+            # transfer by comparing it to 1.0. Since _multiply_factor starts as
+            # a Python float, we roughly assume that if it's a tensor then it's
+            # probably not =1.0 anymore and we do the multiplication. Otherwise
+            # we can safely check the value without a D2H transfer.
+            torch.is_tensor(self._multiply_factor)
+            or self._multiply_factor != 1.0
+        ):
+            self.wrapped_optimizer.multiply_grads(self._multiply_factor)
+            self._multiply_factor = 1.0
+
+    def multiply_grads(self, c):
+        """Multiplies grads by a constant *c*."""
+        self._multiply_factor *= c
+
+    def clip_grad_norm(self, max_norm, aggregate_norm_fn=None):
+        """Clips gradient norm and updates dynamic loss scaler."""
+        max_norm = float(max_norm)
+        grad_norm = self._multiply_factor * self.wrapped_optimizer.clip_grad_norm(
+            0, aggregate_norm_fn
+        )
+
+        if self.scaler is not None:
+            grad_norm_cpu = float(grad_norm)
+            if grad_norm_cpu > max_norm > 0.0:
+                self._multiply_factor *= max_norm / grad_norm_cpu
+
+            # detect overflow and adjust loss scale
+            self.scaler.check_overflow(grad_norm_cpu)
+        elif max_norm > 0.0:
+            clip_coef = (max_norm / (grad_norm + 1e-6)).clamp_(max=1)
+            self._multiply_factor *= clip_coef
+
+        return grad_norm
+
+    def step(self, closure=None, groups=None):
+        """Performs a single optimization step."""
+        if getattr(self, "supports_step_with_scale", False):
+            # NOTE(msb) optimizer divides by scale factor
+            self.wrapped_optimizer.step(closure, scale=(1.0 / self._multiply_factor), groups=groups)
+        else:
+            self._unscale_grads()
+            self.wrapped_optimizer.step(closure, groups=groups)
+
+        if self.scaler is not None:
+            self.scaler.update()
+
+    def zero_grad(self):
+        """Clears the gradients of all optimized parameters."""
+        self.wrapped_optimizer.zero_grad()
+        if self.scaler is not None:
+            self._multiply_factor = 1.0 / float(self.scaler.loss_scale)
+        else:
+            self._multiply_factor = 1.0
+
+    @property
+    def supports_flat_params(self):
+        return self.wrapped_optimizer.supports_flat_params
+
+
+class MemoryEfficientFP16Optimizer(
+    _MemoryEfficientFP16OptimizerMixin, optim.FairseqOptimizer
+):
+    """
+    Wrap an *optimizer* to support FP16 (mixed precision) training.
+
+    Compared to :class:`fairseq.optim.FP16Optimizer`, this version does not
+    maintain an FP32 copy of the model. We instead expect the optimizer to
+    convert the gradients to FP32 internally and sync the results back to the
+    FP16 model params. This significantly reduces memory usage but slightly
+    increases the time spent in the optimizer.
+
+    Since this wrapper depends on specific functionality in the wrapped
+    optimizer (i.e., on-the-fly conversion of grads to FP32), only certain
+    optimizers can be wrapped. This is determined by the
+    *supports_memory_efficient_fp16* property.
+    """
+
+    def __init__(
+        self, cfg: DictConfig, params, optimizer, allow_unsupported=False, **kwargs
+    ):
+        if not allow_unsupported and not optimizer.supports_memory_efficient_fp16:
+            raise ValueError(
+                "Unsupported optimizer: {}".format(optimizer.__class__.__name__)
+            )
+
+        super().__init__(getattr(cfg, "optimizer", None))
+        self.wrapped_optimizer = optimizer
+
+        if getattr(cfg.common, "fp16_scale_window", None) is None:
+            if len(cfg.optimization.update_freq) > 1:
+                raise ValueError(
+                    "--fp16-scale-window must be given explicitly when using a "
+                    "custom --update-freq schedule"
+                )
+            data_parallel_size = int(
+                cfg.distributed_training.distributed_world_size
+                / cfg.common.model_parallel_size
+            )
+            scale_window = int(
+                2 ** 14 / data_parallel_size / cfg.optimization.update_freq[0]
+            )
+        else:
+            scale_window = cfg.common.fp16_scale_window
+
+        if not getattr(cfg.common, "bf16", False):
+            self.scaler = DynamicLossScaler(
+                init_scale=cfg.common.fp16_init_scale,
+                scale_window=scale_window,
+                tolerance=cfg.common.fp16_scale_tolerance,
+                threshold=cfg.common.threshold_loss_scale,
+                min_loss_scale=cfg.common.min_loss_scale,
+            )
+        else:
+            # disable loss scaling for bfloat16
+            self.scaler = None
+
+    @classmethod
+    def build_optimizer(cls, cfg: DictConfig, params, **kwargs):
+        """
+        Args:
+            args (argparse.Namespace): fairseq args
+            params (iterable): iterable of parameters to optimize
+        """
+        fp16_optimizer = optim.build_optimizer(cfg.optimizer, params)
+        return cls(cfg, params, fp16_optimizer, **kwargs)
+
+    @property
+    def optimizer(self):
+        return self.wrapped_optimizer.optimizer
+
+    @optimizer.setter
+    def optimizer(self, optimizer):
+        self.wrapped_optimizer.optimizer = optimizer
+
+    @property
+    def optimizer_config(self):
+        return self.wrapped_optimizer.optimizer_config
+
+    @property
+    def lr_scheduler(self):
+        return getattr(self.wrapped_optimizer, "lr_scheduler", None)
+
+    def get_lr(self):
+        return self.wrapped_optimizer.get_lr()
+
+    def set_lr(self, lr):
+        self.wrapped_optimizer.set_lr(lr)
+
+    def all_reduce_grads(self, module):
+        self.wrapped_optimizer.all_reduce_grads(module)
diff --git a/fairseq/fairseq/optim/fused_adam.py b/fairseq/fairseq/optim/fused_adam.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a6d1f73d53cae24ff94bb0bbc42bcc1de75548a
--- /dev/null
+++ b/fairseq/fairseq/optim/fused_adam.py
@@ -0,0 +1,384 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import types
+
+import torch
+
+
+def get_fused_adam_class():
+    """
+    Look for the FusedAdam optimizer from apex. We first try to load the
+    "contrib" interface, which is a bit faster than the main interface,
+    but is technically deprecated.
+    """
+    try:
+        # The "deprecated" interface in recent versions of apex is a bit
+        # faster than the main interface, since we don't use the apex
+        # optimizer. This can be installed by passing the
+        # `--deprecated_fused_adam` option when building apex.
+        global fused_adam_cuda
+        import importlib
+
+        fused_adam_cuda = importlib.import_module("fused_adam_cuda")
+        return FusedAdamV1
+    except ImportError:
+        try:
+            # fallback to the newer interface
+            from apex.optimizers import FusedAdam as _FusedAdam  # noqa
+            from apex.multi_tensor_apply import multi_tensor_applier
+
+            if multi_tensor_applier.available:
+                return FusedAdamV2
+        except ImportError:
+            pass
+    return None
+
+
+class FusedAdamV1(torch.optim.Optimizer):
+    """
+    Implements Adam algorithm. Currently GPU-only. Requires Apex to be installed via
+    ``python setup.py install --cuda_ext --cpp_ext``.
+
+    It has been proposed in `Adam: A Method for Stochastic Optimization`_.
+
+    Compared to the original version in Apex, the fairseq version casts grads
+    and params to FP32 internally to support ``--memory-efficient-fp16``.
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups.
+        lr (float, optional): learning rate. (default: 1e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square. (default: (0.9, 0.999))
+        eps (float, optional): term added to the denominator to improve
+            numerical stability. (default: 1e-8)
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
+            algorithm from the paper `On the Convergence of Adam and Beyond`_
+            (default: False) NOT SUPPORTED in FusedAdam!
+        eps_inside_sqrt (boolean, optional): in the 'update parameters' step,
+            adds eps to the bias-corrected second moment estimate before
+            evaluating square root instead of adding it to the square root of
+            second moment estimate as in the original paper. (default: False)
+    .. _Adam: A Method for Stochastic Optimization:
+        https://arxiv.org/abs/1412.6980
+    .. _On the Convergence of Adam and Beyond:
+        https://openreview.net/forum?id=ryQu7f-RZ
+    """
+
+    def __init__(
+        self,
+        params,
+        lr=1e-3,
+        bias_correction=True,
+        betas=(0.9, 0.999),
+        eps=1e-8,
+        eps_inside_sqrt=False,
+        weight_decay=0.0,
+        max_grad_norm=0.0,
+        amsgrad=False,
+        use_fp16_stats=False,
+    ):
+        global fused_adam_cuda
+        import importlib
+
+        fused_adam_cuda = importlib.import_module("fused_adam_cuda")
+
+        if amsgrad:
+            raise RuntimeError("FusedAdam does not support the AMSGrad variant.")
+        defaults = {
+            "lr": lr,
+            "bias_correction": bias_correction,
+            "betas": betas,
+            "eps": eps,
+            "weight_decay": weight_decay,
+            "max_grad_norm": max_grad_norm,
+        }
+        super().__init__(params, defaults)
+        self.eps_mode = 0 if eps_inside_sqrt else 1
+
+        self.use_fp16_stats = use_fp16_stats
+        self.FLOAT16_MAX = 65504.0
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return True
+
+    @property
+    def supports_flat_params(self):
+        return True
+
+    @property
+    def supports_step_with_scale(self):
+        return True
+
+    def step(self, closure=None, grads=None, scale=1.0, grad_norms=None):
+        """Performs a single optimization step.
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+            grads (list of tensors, optional): weight gradient to use for the
+                optimizer update. If gradients have type torch.half, parameters
+                are expected to be in type torch.float. (default: None)
+            output params (list of tensors, optional): A reduced precision copy
+                of the updated weights written out in addition to the regular
+                updated weights. Have to be of same type as gradients. (default: None)
+            scale (float, optional): factor to divide gradient tensor values
+                by before applying to weights. (default: 1)
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        if grads is None:
+            grads_group = [None] * len(self.param_groups)
+        # backward compatibility
+        # assuming a list/generator of parameter means single group
+        elif isinstance(grads, types.GeneratorType):
+            grads_group = [grads]
+        elif type(grads[0]) != list:
+            grads_group = [grads]
+        else:
+            grads_group = grads
+
+        if grad_norms is None:
+            grad_norms = [None] * len(self.param_groups)
+
+        for group, grads_this_group, grad_norm in zip(
+            self.param_groups, grads_group, grad_norms
+        ):
+            if grads_this_group is None:
+                grads_this_group = [None] * len(group["params"])
+
+            # compute combined scale factor for this group
+            combined_scale = scale
+            if group.get("max_grad_norm", 0) > 0:
+                # norm is in fact norm*scale
+                clip = ((grad_norm / scale) + 1e-6) / group["max_grad_norm"]
+                if clip > 1:
+                    combined_scale = clip * scale
+
+            bias_correction = 1 if group.get("bias_correction", 1) else 0
+
+            for p, grad in zip(group["params"], grads_this_group):
+                # note: p.grad should not ever be set for correct
+                # operation of mixed precision optimizer that sometimes
+                # sends None gradients
+                if p.grad is None and grad is None:
+                    continue
+                if grad is None:
+                    grad = p.grad.data
+                if grad.is_sparse:
+                    raise RuntimeError(
+                        "FusedAdam does not support sparse gradients, "
+                        "please consider SparseAdam instead"
+                    )
+
+                if p.device.type == "cpu":
+                    p_data_fp32 = p.data.cuda(non_blocking=True).float()
+                    out_p = torch.tensor([], dtype = torch.float)
+                else:
+                    p_data_fp32 = p.data.float()
+                    out_p = p.data
+
+                state = self.state[p]
+
+                # State initialization
+                dtype = torch.float16 if self.use_fp16_stats else p_data_fp32.dtype
+                if len(state) == 0:
+                    state["step"] = 0
+                    # Exponential moving average of gradient values
+                    state["exp_avg"] = torch.zeros_like(p_data_fp32, dtype=dtype)
+                    # Exponential moving average of squared gradient values
+                    state["exp_avg_sq"] = torch.zeros_like(p_data_fp32, dtype=dtype)
+                    if self.use_fp16_stats:
+                        state["exp_avg_scale"] = 1.0
+                        state["exp_avg_sq_scale"] = 1.0
+                else:
+                    device = p_data_fp32.device
+                    state["exp_avg"] = state["exp_avg"].to(device, dtype)
+                    state["exp_avg_sq"] = state["exp_avg_sq"].to(device, dtype)
+
+                exp_avg = state["exp_avg"]
+                exp_avg_sq = state["exp_avg_sq"]
+                if self.use_fp16_stats:
+                    assert exp_avg.dtype == torch.float16
+                    exp_avg = exp_avg.float() * state["exp_avg_scale"]
+                    exp_avg_sq = exp_avg_sq.float() * state["exp_avg_sq_scale"]
+                beta1, beta2 = group["betas"]
+
+                state["step"] += 1
+
+                with torch.cuda.device(p_data_fp32.device):
+                    fused_adam_cuda.adam(
+                        p_data_fp32,
+                        out_p,
+                        exp_avg,
+                        exp_avg_sq,
+                        grad,
+                        group["lr"],
+                        beta1,
+                        beta2,
+                        group["eps"],
+                        combined_scale,
+                        state["step"],
+                        self.eps_mode,
+                        bias_correction,
+                        group["weight_decay"],
+                    )
+
+                if p.device.type == "cpu":
+                    p.data.copy_(p_data_fp32, non_blocking=True)
+
+                if self.use_fp16_stats:
+                    def inf_norm(t):
+                        return torch.norm(t, float("inf"))
+
+                    # from github.com/openai/jukebox/blob/master/jukebox/utils/fp16.py
+                    state["exp_avg_scale"], state["exp_avg_sq_scale"] = (
+                        1e-8 + inf_norm(exp_avg) / self.FLOAT16_MAX,
+                        1e-8 + inf_norm(exp_avg_sq) / self.FLOAT16_MAX,
+                    )
+                    state["exp_avg"], state["exp_avg_sq"] = (
+                        (exp_avg / state["exp_avg_scale"]).half(),
+                        (exp_avg_sq / state["exp_avg_sq_scale"]).half(),
+                    )
+
+        return loss
+
+
+try:
+    from apex.optimizers import FusedAdam
+    from apex.multi_tensor_apply import multi_tensor_applier
+
+    class FusedAdamV2(FusedAdam):
+        """
+        Compared to the original version in Apex, the fairseq version casts grads
+        and params to FP32 internally to support ``--memory-efficient-fp16``.
+        """
+
+        def __init__(self, *args, use_fp16_stats=False, **kwargs):
+            if use_fp16_stats:
+                raise NotImplementedError("--fp16-adam-stats is only supported with FusedAdamV1")
+            super().__init__(*args, **kwargs)
+            if not hasattr(self, "multi_tensor_adam"):
+                raise Exception(
+                    "Apex installation is outdated. Please install an updated version of apex."
+                )
+
+        @property
+        def supports_memory_efficient_fp16(self):
+            return True
+
+        @property
+        def supports_flat_params(self):
+            return True
+
+        def step(
+            self,
+            closure=None,
+            grads=None,
+            output_params=None,
+            scale=None,
+            grad_norms=None,
+        ):
+            """Performs a single optimization step."""
+            loss = None
+            if closure is not None:
+                loss = closure()
+
+            for group in self.param_groups:
+                bias_correction = 1 if group["bias_correction"] else 0
+                beta1, beta2 = group["betas"]
+
+                # assume same step across group now to simplify things
+                # per parameter step can be easily support by making it tensor, or pass list into kernel
+                if "step" in group:
+                    group["step"] += 1
+                else:
+                    group["step"] = 1
+
+                # create lists for multi-tensor apply
+                g_16, p_16, orig_p_16, m_16, v_16 = [], [], [], [], []
+                g_32, p_32, m_32, v_32 = [], [], [], []
+
+                for p in group["params"]:
+                    if p.grad is None:
+                        continue
+                    if p.grad.data.is_sparse:
+                        raise RuntimeError(
+                            "FusedAdam does not support sparse gradients, "
+                            "please consider SparseAdam instead"
+                        )
+
+                    state = self.state[p]
+                    # State initialization
+                    if len(state) == 0:
+                        # Exponential moving average of gradient values
+                        state["exp_avg"] = torch.zeros_like(p.data, dtype=torch.float)
+                        # Exponential moving average of squared gradient values
+                        state["exp_avg_sq"] = torch.zeros_like(
+                            p.data, dtype=torch.float
+                        )
+                    else:
+                        state["exp_avg"] = state["exp_avg"].to(
+                            device=p.data.device, dtype=torch.float
+                        )
+                        state["exp_avg_sq"] = state["exp_avg_sq"].to(
+                            device=p.data.device, dtype=torch.float
+                        )
+
+                    if p.dtype == torch.float16:
+                        g_16.append(p.grad.data.float())
+                        p_16.append(p.data.float())
+                        orig_p_16.append(p.data)
+                        m_16.append(state["exp_avg"])
+                        v_16.append(state["exp_avg_sq"])
+                    elif p.dtype == torch.float32:
+                        g_32.append(p.grad.data)
+                        p_32.append(p.data)
+                        m_32.append(state["exp_avg"])
+                        v_32.append(state["exp_avg_sq"])
+                    else:
+                        raise RuntimeError("FusedAdam only support fp16 and fp32.")
+
+                with torch.cuda.device(p.device):
+                    if len(g_16) > 0:
+                        multi_tensor_applier(
+                            self.multi_tensor_adam,
+                            self._dummy_overflow_buf,
+                            [g_16, p_16, m_16, v_16],
+                            group["lr"],
+                            beta1,
+                            beta2,
+                            group["eps"],
+                            group["step"],
+                            self.adam_w_mode,
+                            bias_correction,
+                            group["weight_decay"],
+                        )
+                        for orig_p, p in zip(orig_p_16, p_16):
+                            orig_p.copy_(p.data)
+                    if len(g_32) > 0:
+                        multi_tensor_applier(
+                            self.multi_tensor_adam,
+                            self._dummy_overflow_buf,
+                            [g_32, p_32, m_32, v_32],
+                            group["lr"],
+                            beta1,
+                            beta2,
+                            group["eps"],
+                            group["step"],
+                            self.adam_w_mode,
+                            bias_correction,
+                            group["weight_decay"],
+                        )
+
+            return loss
+
+
+except ImportError:
+    pass
diff --git a/fairseq/fairseq/optim/fused_lamb.py b/fairseq/fairseq/optim/fused_lamb.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4f2bdb0c6c65f7758509b6d4d2f2c48cb6e8b4f
--- /dev/null
+++ b/fairseq/fairseq/optim/fused_lamb.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.optim import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("lamb")
+class FairseqLAMB(LegacyFairseqOptimizer):
+    """LAMB optimizer."""
+
+    def __init__(self, args, params):
+        super().__init__(args)
+        try:
+            from apex.optimizers import FusedLAMB
+
+            self._optimizer = FusedLAMB(params, **self.optimizer_config)
+        except ImportError:
+            raise ImportError("Please install apex to use LAMB optimizer")
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--lamb-betas', default='(0.9, 0.999)', metavar='B',
+                            help='betas for LAMB optimizer')
+        parser.add_argument('--lamb-eps', type=float, default=1e-8, metavar='D',
+                            help='epsilon for LAMB optimizer')
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "betas": eval(self.args.lamb_betas),
+            "eps": self.args.lamb_eps,
+            "weight_decay": self.args.weight_decay,
+        }
+
+    @property
+    def supports_flat_params(self):
+        return False
diff --git a/fairseq/fairseq/optim/lr_scheduler/__init__.py b/fairseq/fairseq/optim/lr_scheduler/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b3dbc023aa4a6f7bfb8403b8204d71ca432f79c
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import importlib
+import os
+
+from fairseq import registry
+from fairseq.optim.lr_scheduler.fairseq_lr_scheduler import (  # noqa
+    FairseqLRScheduler,
+    LegacyFairseqLRScheduler,
+)
+from omegaconf import DictConfig
+
+
+(
+    build_lr_scheduler_,
+    register_lr_scheduler,
+    LR_SCHEDULER_REGISTRY,
+    LR_SCHEDULER_DATACLASS_REGISTRY,
+) = registry.setup_registry(
+    "--lr-scheduler", base_class=FairseqLRScheduler, default="fixed"
+)
+
+
+def build_lr_scheduler(cfg: DictConfig, optimizer):
+    return build_lr_scheduler_(cfg, optimizer)
+
+
+# automatically import any Python files in the optim/lr_scheduler/ directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        file_name = file[: file.find(".py")]
+        importlib.import_module("fairseq.optim.lr_scheduler." + file_name)
diff --git a/fairseq/fairseq/optim/lr_scheduler/cosine_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/cosine_lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..51f58359eda387d67748f48217906ac6d16ccd08
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/cosine_lr_scheduler.py
@@ -0,0 +1,147 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from collections.abc import Collection
+from dataclasses import dataclass, field
+from typing import List
+
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class CosineLRScheduleConfig(FairseqDataclass):
+    warmup_updates: int = field(
+        default=0,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    warmup_init_lr: float = field(
+        default=-1,
+        metadata={
+            "help": "initial learning rate during warmup phase; default is cfg.lr"
+        },
+    )
+    lr: List[float] = field(
+        default=II("optimization.lr"),
+        metadata={"help": "max learning rate, must be more than cfg.min_lr"},
+    )
+    min_lr: float = field(default=0.0, metadata={"help": "min learning rate"})
+    t_mult: float = field(
+        default=1.0, metadata={"help": "factor to grow the length of each period"}
+    )
+    lr_period_updates: float = field(
+        default=-1, metadata={"help": "initial number of updates per period"}
+    )
+    lr_shrink: float = field(
+        default=0.1, metadata={"help": "shrink factor for annealing"}
+    )
+    # This is not required, but is for convenience in inferring lr_period_updates
+    max_update: int = II("optimization.max_update")
+
+
+@register_lr_scheduler("cosine", dataclass=CosineLRScheduleConfig)
+class CosineLRSchedule(FairseqLRScheduler):
+    """Assign LR based on a cyclical schedule that follows the cosine function.
+
+    See https://arxiv.org/pdf/1608.03983.pdf for details.
+
+    We also support a warmup phase where we linearly increase the learning rate
+    from some initial learning rate (``--warmup-init-lr``) until the configured
+    max learning rate (``--lr``).
+
+    During warmup::
+
+      lrs = torch.linspace(cfg.warmup_init_lr, cfg.lr, cfg.warmup_updates)
+      lr = lrs[update_num]
+
+    After warmup::
+
+      lr = cfg.min_lr + 0.5*(cfg.lr - cfg.min_lr)*(1 + cos(t_curr / t_i))
+
+    where ``t_curr`` is current percentage of updates within the current period
+    range and ``t_i`` is the current period range, which is scaled by ``t_mul``
+    after every iteration.
+    """
+
+    def __init__(self, cfg: CosineLRScheduleConfig, fairseq_optimizer):
+        super().__init__(cfg, fairseq_optimizer)
+        if isinstance(cfg.lr, Collection) and len(cfg.lr) > 1:
+            raise ValueError(
+                "Cannot use a fixed learning rate schedule with cosine."
+                f" Consider --lr-scheduler=fixed instead. ({cfg.lr})"
+            )
+
+        self.max_lr = cfg.lr[0] if isinstance(cfg.lr, Collection) else cfg.lr
+        assert (
+            self.max_lr > cfg.min_lr
+        ), f"max_lr (={cfg.lr}) must be more than min_lr (={cfg.min_lr})"
+
+        warmup_end_lr = self.max_lr
+        if cfg.warmup_init_lr < 0:
+            cfg.warmup_init_lr = cfg.min_lr
+
+        self.t_mult = cfg.t_mult
+        self.period = cfg.lr_period_updates
+
+        if self.period <= 0:
+            assert (
+                cfg.max_update > 0
+            ), "Either --max_update or --lr-period-updates must be set"
+            self.period = cfg.max_update - cfg.warmup_updates
+
+        if cfg.warmup_updates > 0:
+            # linearly warmup for the first cfg.warmup_updates
+            self.lr_step = (warmup_end_lr - cfg.warmup_init_lr) / cfg.warmup_updates
+        else:
+            self.lr_step = 1
+
+        self.warmup_updates = cfg.warmup_updates
+        self.lr_shrink = cfg.lr_shrink
+
+        # initial learning rate
+        self.lr = cfg.warmup_init_lr
+        self.optimizer.set_lr(self.lr)
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        super().step(epoch, val_loss)
+        # we don't change the learning rate at epoch boundaries
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        if num_updates < self.cfg.warmup_updates:
+            self.lr = self.cfg.warmup_init_lr + num_updates * self.lr_step
+        else:
+            curr_updates = num_updates - self.cfg.warmup_updates
+            if self.t_mult != 1:
+                i = math.floor(
+                    math.log(
+                        1 - curr_updates / self.period * (1 - self.t_mult), self.t_mult
+                    )
+                )
+                t_i = self.t_mult ** i * self.period
+                t_curr = (
+                    curr_updates
+                    - (1 - self.t_mult ** i) / (1 - self.t_mult) * self.period
+                )
+            else:
+                i = math.floor(curr_updates / self.period)
+                t_i = self.period
+                t_curr = curr_updates - (self.period * i)
+
+            lr_shrink = self.lr_shrink ** i
+            min_lr = self.cfg.min_lr * lr_shrink
+            max_lr = self.max_lr * lr_shrink
+
+            self.lr = min_lr + 0.5 * (max_lr - min_lr) * (
+                1 + math.cos(math.pi * t_curr / t_i)
+            )
+
+        self.optimizer.set_lr(self.lr)
+        return self.lr
diff --git a/fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac6340fa0744a08d2b527972dfc669573fb4e1c3
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/fairseq_lr_scheduler.py
@@ -0,0 +1,62 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from argparse import Namespace
+
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+from fairseq.optim import FairseqOptimizer
+
+
+class FairseqLRScheduler(object):
+    def __init__(self, cfg, optimizer):
+        super().__init__()
+        if optimizer is not None and not isinstance(optimizer, FairseqOptimizer):
+            raise ValueError("optimizer must be an instance of FairseqOptimizer")
+        self.cfg = cfg
+        self.optimizer = optimizer
+        self.best = None
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add arguments to the parser for this LR scheduler."""
+        dc = getattr(cls, "__dataclass", None)
+        if dc is not None:
+            gen_parser_from_dataclass(parser, dc())
+
+    def state_dict(self):
+        """Return the LR scheduler state dict."""
+        return {"best": self.best}
+
+    def load_state_dict(self, state_dict):
+        """Load an LR scheduler state dict."""
+        self.best = state_dict["best"]
+
+    def step_begin_epoch(self, epoch):
+        """Update the learning rate at the beginning of the given epoch."""
+        pass
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        if val_loss is not None:
+            if self.best is None:
+                self.best = val_loss
+            else:
+                self.best = min(self.best, val_loss)
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        return self.optimizer.get_lr()
+
+    def reinit(self, total_num_update, num_updates):
+        pass
+
+
+class LegacyFairseqLRScheduler(FairseqLRScheduler):
+    def __init__(self, args: Namespace, optimizer):
+        if not isinstance(optimizer, FairseqOptimizer):
+            raise ValueError("optimizer must be an instance of FairseqOptimizer")
+        self.args = args
+        self.optimizer = optimizer
+        self.best = None
diff --git a/fairseq/fairseq/optim/lr_scheduler/fixed_schedule.py b/fairseq/fairseq/optim/lr_scheduler/fixed_schedule.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0e7e14b7e72b1151f7d7f19094430bbab64f8f0
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/fixed_schedule.py
@@ -0,0 +1,76 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from typing import Optional, List
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class FixedLRScheduleConfig(FairseqDataclass):
+    force_anneal: Optional[int] = field(
+        default=None,
+        metadata={"help": "force annealing at specified epoch"},
+    )
+    lr_shrink: float = field(
+        default=0.1,
+        metadata={"help": "shrink factor for annealing, lr_new = (lr * lr_shrink)"},
+    )
+    warmup_updates: int = field(
+        default=0,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    lr: List[float] = II("optimization.lr")
+
+
+@register_lr_scheduler("fixed", dataclass=FixedLRScheduleConfig)
+class FixedLRSchedule(FairseqLRScheduler):
+    """Decay the LR on a fixed schedule."""
+
+    def __init__(self, cfg: FixedLRScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+
+        self.lr = cfg.lr[0]
+        if cfg.warmup_updates > 0:
+            self.warmup_factor = 1.0 / cfg.warmup_updates
+        else:
+            self.warmup_factor = 1
+
+    def state_dict(self):
+        return {"lr": self.lr}
+
+    def load_state_dict(self, state_dict):
+        if "lr" in state_dict:
+            self.lr = state_dict["lr"]
+
+    def get_next_lr(self, epoch):
+        lrs = self.cfg.lr
+        if self.cfg.force_anneal is None or epoch < self.cfg.force_anneal:
+            # use fixed LR schedule
+            next_lr = lrs[min(epoch - 1, len(lrs) - 1)]
+        else:
+            # annneal based on lr_shrink
+            next_lr = lrs[-1] * self.cfg.lr_shrink ** (
+                epoch + 1 - self.cfg.force_anneal
+            )
+        return next_lr
+
+    def step_begin_epoch(self, epoch):
+        """Update the learning rate at the beginning of the given epoch."""
+        self.lr = self.get_next_lr(epoch)
+        self.optimizer.set_lr(self.warmup_factor * self.lr)
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        if self.cfg.warmup_updates > 0 and num_updates < self.cfg.warmup_updates:
+            self.warmup_factor = (num_updates + 1) / float(self.cfg.warmup_updates)
+            self.optimizer.set_lr(self.warmup_factor * self.lr)
+        else:
+            self.optimizer.set_lr(self.lr)
+        return self.optimizer.get_lr()
diff --git a/fairseq/fairseq/optim/lr_scheduler/inverse_square_root_schedule.py b/fairseq/fairseq/optim/lr_scheduler/inverse_square_root_schedule.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f87bb5d7ed5c7eb8011d4c651f2ecbf0ae700ac
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/inverse_square_root_schedule.py
@@ -0,0 +1,85 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections.abc import Collection
+from dataclasses import dataclass, field
+from typing import List
+
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class InverseSquareRootLRScheduleConfig(FairseqDataclass):
+    warmup_updates: int = field(
+        default=4000,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    warmup_init_lr: float = field(
+        default=-1,
+        metadata={
+            "help": "initial learning rate during warmup phase; default is cfg.lr"
+        },
+    )
+    lr: List[float] = II("optimization.lr")
+
+
+@register_lr_scheduler("inverse_sqrt", dataclass=InverseSquareRootLRScheduleConfig)
+class InverseSquareRootSchedule(FairseqLRScheduler):
+    """Decay the LR based on the inverse square root of the update number.
+
+    We also support a warmup phase where we linearly increase the learning rate
+    from some initial learning rate (``--warmup-init-lr``) until the configured
+    learning rate (``--lr``). Thereafter we decay proportional to the number of
+    updates, with a decay factor set to align with the configured learning rate.
+
+    During warmup::
+
+      lrs = torch.linspace(cfg.warmup_init_lr, cfg.lr, cfg.warmup_updates)
+      lr = lrs[update_num]
+
+    After warmup::
+
+      decay_factor = cfg.lr * sqrt(cfg.warmup_updates)
+      lr = decay_factor / sqrt(update_num)
+    """
+
+    def __init__(self, cfg: InverseSquareRootLRScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+        if isinstance(cfg.lr, Collection) and len(cfg.lr) > 1:
+            raise ValueError(
+                "Cannot use a fixed learning rate schedule with inverse_sqrt."
+                " Consider --lr-scheduler=fixed instead."
+            )
+        warmup_end_lr = cfg.lr[0] if isinstance(cfg.lr, Collection) else cfg.lr
+        if cfg.warmup_init_lr < 0:
+            cfg.warmup_init_lr = 0 if cfg.warmup_updates > 0 else warmup_end_lr
+
+        # linearly warmup for the first cfg.warmup_updates
+        self.lr_step = (warmup_end_lr - cfg.warmup_init_lr) / cfg.warmup_updates
+
+        # then, decay prop. to the inverse square root of the update number
+        self.decay_factor = warmup_end_lr * cfg.warmup_updates ** 0.5
+
+        # initial learning rate
+        self.lr = cfg.warmup_init_lr
+        self.optimizer.set_lr(self.lr)
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        super().step(epoch, val_loss)
+        # we don't change the learning rate at epoch boundaries
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        if num_updates < self.cfg.warmup_updates:
+            self.lr = self.cfg.warmup_init_lr + num_updates * self.lr_step
+        else:
+            self.lr = self.decay_factor * num_updates ** -0.5
+        self.optimizer.set_lr(self.lr)
+        return self.lr
diff --git a/fairseq/fairseq/optim/lr_scheduler/manual_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/manual_lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..0269a1e2853854745e23b07931294f37b67d0295
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/manual_lr_scheduler.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from . import LegacyFairseqLRScheduler, register_lr_scheduler
+import logging
+import ast
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+
+
+@register_lr_scheduler("manual")
+class ManualSchedule(LegacyFairseqLRScheduler):
+    """Decay the LR on a manual schedule."""
+
+    def __init__(self, args, optimizer):
+        super().__init__(args, optimizer)
+
+        self.epoch2lr = self.parse_manuallr_args(args.epoch2lr)
+        self.update2lr = self.parse_manuallr_args(args.update2lr)
+        logger.info("@@@ ManualSchedule epoch2lr={}".format(self.epoch2lr))
+        logger.info("@@@ ManualSchedule update2lr={}".format(self.update2lr))
+
+        if 1 in self.epoch2lr:
+            self.lr = self.epoch2lr[1]
+        elif 1 in self.update2lr:
+            self.lr = self.update2lr[1]
+        else:
+            self.lr = args.lr[0]
+        self.optimizer.set_lr(self.lr)  # Set the beginning of the epoch.
+
+    def parse_manuallr_args(self, lr_args_str):
+        lr_dict = ast.literal_eval(lr_args_str.replace(' ', ''))
+        if not isinstance(lr_dict, dict):
+            raise ValueError("epoch2lr/update2lr must be abel to evaluated to a dict")
+
+        lr_args = {}
+        logger.info("@@@ after parsing input dictionary lr_dict = {}".format(lr_dict))
+        for key, val in lr_dict.items():
+            if "," in key:
+                for k in key.split(","):
+                    lr_args[int(k)] = float(val)
+            elif "-" in key:
+                s = int(key.split("-")[0])
+                e = int(key.split("-")[1])
+                for k in range(s, e + 1, 1):
+                    lr_args[k] = float(val)
+            else:
+                lr_args[int(key)] = float(val)
+
+        return lr_args
+
+    @staticmethod
+    def add_args(parser):
+        """Add arguments to the parser for this LR scheduler."""
+        # fmt: off
+        parser.add_argument(
+            "--epoch2lr",
+            type=str,
+            metavar="DICT",
+            default="{}",
+            help="a dictionary used to set lr for each epoch manually",
+        )
+        parser.add_argument(
+            "--update2lr",
+            type=str,
+            metavar="DICT",
+            default="{}",
+            help="a dictionary used to set lr for each update manually",
+        )
+        # fmt: on
+
+    def state_dict(self):
+        return {"lr": self.lr}
+
+    def load_state_dict(self, state_dict):
+        if "lr" in state_dict:
+            self.lr = state_dict["lr"]
+
+    def get_next_lr(self, epoch):
+        manual_keys = [k for k in self.epoch2lr if k <= epoch]
+        if manual_keys:
+            manual_lr = self.epoch2lr[max(manual_keys)]
+        else:
+            logger.warning("@@@ epoch={} does not exist in manual lr input. epoch2lr={}...".format(
+                epoch, list(self.epoch2lr.items())[:min(10, len(self.epoch2lr.keys())-1)]
+            ))
+            manual_lr = self.optimizer.get_lr()
+        return manual_lr
+
+    def step_begin_epoch(self, epoch):
+        """Update the learning rate at the beginning of the given epoch."""
+        self.lr = self.get_next_lr(epoch)
+        self.optimizer.set_lr(self.lr)
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        manual_keys = [k for k in self.update2lr if k <= num_updates]
+        if manual_keys:
+            manual_lr = self.update2lr[max(manual_keys)]
+        else:
+            logger.warning("epoch={} does not exist in manual lr input update2lr={}...".format(
+                num_updates, list(self.update2lr.items())[:min(10, len(self.update2lr.keys())-1)]))
+            manual_lr = self.optimizer.get_lr()
+
+        self.optimizer.set_lr(manual_lr)
+        return self.optimizer.get_lr()
diff --git a/fairseq/fairseq/optim/lr_scheduler/pass_through.py b/fairseq/fairseq/optim/lr_scheduler/pass_through.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f93db328c1de9b268e8ee1c0c1cad558fd089aa
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/pass_through.py
@@ -0,0 +1,39 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class PassThroughScheduleConfig(FairseqDataclass):
+    pass
+
+
+@register_lr_scheduler("pass_through", dataclass=PassThroughScheduleConfig)
+class PassThroughScheduleSchedule(FairseqLRScheduler):
+    """Delegate lr scheduling to the optimizer."""
+
+    def __init__(self, cfg: PassThroughScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+        assert (
+            hasattr(optimizer, "lr_scheduler") and optimizer.lr_scheduler is not None
+        ), "Pass-through schedule can only be used with optimizers with their own schedulers"
+
+    def state_dict(self):
+        return self.optimizer.lr_scheduler.state_dict()
+
+    def load_state_dict(self, state_dict):
+        self.optimizer.lr_scheduler.load_state_dict(state_dict)
+
+    def step_begin_epoch(self, epoch):
+        """Update the learning rate at the beginning of the given epoch."""
+        return self.optimizer.lr_scheduler.step_begin_epoch(epoch)
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        return self.optimizer.lr_scheduler.step_update(num_updates)
diff --git a/fairseq/fairseq/optim/lr_scheduler/polynomial_decay_schedule.py b/fairseq/fairseq/optim/lr_scheduler/polynomial_decay_schedule.py
new file mode 100644
index 0000000000000000000000000000000000000000..73c3c8ea3435d6050401c45e737e4ecf5662825c
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/polynomial_decay_schedule.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from typing import Optional, List
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class PolynomialDecayLRScheduleConfig(FairseqDataclass):
+    warmup_updates: int = field(
+        default=0,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    warmup_ratio: float = field(
+        default=0,
+        metadata={"help": "warmup ratio"},
+    )
+    force_anneal: Optional[int] = field(
+        default=None,
+        metadata={"help": "force annealing at specified epoch"},
+    )
+    end_learning_rate: float = field(
+        default=0.0,
+        metadata={"help": "learning rate to decay to"},
+    )
+    power: float = field(
+        default=1.0,
+        metadata={"help": "decay exponent"},
+    )
+    total_num_update: Optional[float] = field(
+        default=1000000,
+        metadata={"help": "total number of updates over which to decay learning rate"},
+    )
+    lr: List[float] = II("optimization.lr")
+
+
+@register_lr_scheduler("polynomial_decay", dataclass=PolynomialDecayLRScheduleConfig)
+class PolynomialDecayLRSchedule(FairseqLRScheduler):
+    """Decay the LR on a fixed schedule."""
+
+    def __init__(self, cfg: PolynomialDecayLRScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+
+        assert cfg.total_num_update > 0
+        # set defaults
+        cfg.warmup_updates = getattr(cfg, 'warmup_updates', 0) or 0
+
+        self.lr = cfg.lr[0]
+        self.warmup_updates = cfg.warmup_updates
+        if self.warmup_updates > 0:
+            self.warmup_factor = 1.0 / self.warmup_updates
+        else:
+            self.warmup_factor = 1
+        self.end_learning_rate = cfg.end_learning_rate
+        self.total_num_update = cfg.total_num_update
+        self.power = cfg.power
+        self.optimizer.set_lr(self.warmup_factor * self.lr)
+
+    def get_next_lr(self, epoch):
+        lrs = self.cfg.lr
+        if self.cfg.force_anneal is None or epoch < self.cfg.force_anneal:
+            # use fixed LR schedule
+            next_lr = lrs[min(epoch, len(lrs) - 1)]
+        else:
+            # annneal based on lr_shrink
+            next_lr = self.optimizer.get_lr()
+        return next_lr
+
+    def step_begin_epoch(self, epoch):
+        """Update the learning rate at the beginning of the given epoch."""
+        self.lr = self.get_next_lr(epoch)
+        self.optimizer.set_lr(self.warmup_factor * self.lr)
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
+            self.warmup_factor = num_updates / float(self.warmup_updates)
+            lr = self.warmup_factor * self.lr
+        elif num_updates >= self.total_num_update:
+            lr = self.end_learning_rate
+        else:
+            warmup = self.warmup_updates
+            lr_range = self.lr - self.end_learning_rate
+            pct_remaining = 1 - (num_updates - warmup) / (self.total_num_update - warmup)
+            lr = lr_range * pct_remaining ** (self.power) + self.end_learning_rate
+        self.optimizer.set_lr(lr)
+        return self.optimizer.get_lr()
+
+    def reinit(self, total_num_update, num_updates):
+        # only enable this when set warmup_ratio
+        if self.cfg.warmup_ratio <= 0:
+            return
+        # re init this according to the real number of updates
+        self.total_num_update = total_num_update
+        self.warmup_updates = int(self.total_num_update * self.cfg.warmup_ratio)
+        if num_updates > 0:
+            self.warmup_factor = min(1.0, num_updates / float(self.warmup_updates))
+            self.step_update(num_updates)
+        else:
+            self.warmup_factor = 1.0 / self.warmup_updates
+            self.optimizer.set_lr(self.warmup_factor * self.lr)
+        print('Total steps {}, warmup steps {}, warmup_factor {}'.format(self.total_num_update, self.warmup_updates,
+                                                                         self.warmup_factor))
\ No newline at end of file
diff --git a/fairseq/fairseq/optim/lr_scheduler/reduce_lr_on_plateau.py b/fairseq/fairseq/optim/lr_scheduler/reduce_lr_on_plateau.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ee9c1be4a59ad3d072412827ab4e9b62dc7434e
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/reduce_lr_on_plateau.py
@@ -0,0 +1,143 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from typing import List
+
+import torch.optim.lr_scheduler
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class ReduceLROnPlateauLRScheduleConfig(FairseqDataclass):
+    lr_shrink: float = field(
+        default=0.1, metadata={"help": "shrink factor for annealing"}
+    )
+    lr_threshold: float = field(
+        default=1e-4,
+        metadata={
+            "help": (
+                "threshold for measuring the new optimum, to only focus on "
+                "significant changes"
+            )
+        },
+    )
+    lr_patience: int = field(
+        default=0,
+        metadata={
+            "help": (
+                "number of epochs with no improvement after which learning rate will "
+                "be reduced"
+            )
+        },
+    )
+    warmup_updates: int = field(
+        default=0,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    warmup_init_lr: float = field(
+        default=-1,
+        metadata={
+            "help": "initial learning rate during warmup phase; default is cfg.lr"
+        },
+    )
+    lr: List[float] = II("optimization.lr")
+    maximize_best_checkpoint_metric: bool = II(
+        "checkpoint.maximize_best_checkpoint_metric"
+    )
+
+
+@register_lr_scheduler(
+    "reduce_lr_on_plateau", dataclass=ReduceLROnPlateauLRScheduleConfig
+)
+class ReduceLROnPlateauLRSchedule(FairseqLRScheduler):
+    """
+    Decay the LR by a factor every time the validation loss plateaus.
+    Also comes with optional warmup phase, where we linearly increase
+    the learning rate from some initial learning rate
+    (``--warmup-init-lr``) until the configured learning rate
+    (``--lr``). Thereafter the lr is adjusted according to original
+    reduce_on_plateau scheme.
+
+    During warmup::
+
+      lrs = torch.linspace(
+          cfg.warmup_init_lr, cfg.lr, cfg.warmup_updates
+      )
+      lr = lrs[update_num]
+    """
+
+    def __init__(self, cfg: ReduceLROnPlateauLRScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+        if len(cfg.lr) > 1:
+            raise ValueError(
+                "Cannot use a fixed learning rate schedule with reduce_lr_on_plateau."
+                " Consider --lr-scheduler=fixed instead."
+            )
+        self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            self.optimizer.optimizer,
+            patience=cfg.lr_patience,
+            factor=cfg.lr_shrink,
+            mode="max" if cfg.maximize_best_checkpoint_metric else "min",
+            threshold=cfg.lr_threshold,
+        )
+        warmup_end_lr = cfg.lr[0]
+        # if no warm up, sets initial lr to be cfg.lr[0]
+        if cfg.warmup_init_lr < 0:
+            cfg.warmup_init_lr = 0 if cfg.warmup_updates > 0 else warmup_end_lr
+
+        # linearly warmup for the first cfg.warmup_updates
+        if cfg.warmup_updates > 0:
+            self.lr_step = (warmup_end_lr - cfg.warmup_init_lr) / cfg.warmup_updates
+
+        # this flag is either set from arg when no warm up, or set by
+        # step_update() when warmup finishes
+        self.warmup_end = True if cfg.warmup_updates <= 0 else False
+
+        # initial learning rate
+        # this self.lr is used only during init and/or warm up period
+        self.lr = warmup_end_lr if self.warmup_end else cfg.warmup_init_lr
+        self.optimizer.set_lr(self.lr)
+
+    def state_dict(self):
+        """Return the LR scheduler state dict."""
+        return {
+            "best": self.lr_scheduler.best,
+            "last_epoch": self.lr_scheduler.last_epoch,
+        }
+
+    def load_state_dict(self, state_dict):
+        """Load an LR scheduler state dict."""
+        self.lr_scheduler.best = state_dict["best"]
+        if "last_epoch" in state_dict:
+            self.lr_scheduler.last_epoch = state_dict["last_epoch"]
+
+    def step(self, epoch, val_loss=None):
+        """
+        Update the learning rate at the end of the given epoch if warmup
+        finishes otherwise no update of lr on epoch boundaries
+        """
+        if val_loss is not None and self.warmup_end is True:
+            self.lr_scheduler.step(val_loss)
+        else:
+            self.lr_scheduler.last_epoch = epoch
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """
+        Update the learning rate after each update."""
+        # if there is warmup
+        if self.cfg.warmup_updates > 0:
+            if num_updates <= self.cfg.warmup_updates:
+                self.lr = self.cfg.warmup_init_lr + num_updates * self.lr_step
+                self.optimizer.set_lr(self.lr)
+            else:
+                if self.warmup_end is False:
+                    self.warmup_end = True
+        # else do nothing
+        return self.optimizer.get_lr()
diff --git a/fairseq/fairseq/optim/lr_scheduler/step_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/step_lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cb20068606a4afd2983430b794fa24647de2e7b
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/step_lr_scheduler.py
@@ -0,0 +1,86 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections.abc import Collection
+from dataclasses import dataclass, field
+from typing import List
+
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class StepLRScheduleConfig(FairseqDataclass):
+    warmup_updates: int = field(
+        default=0,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    warmup_init_lr: float = field(
+        default=-1,
+        metadata={
+            "help": "initial learning rate during warmup phase; default is cfg.lr"
+        },
+    )
+    lr: List[float] = field(
+        default=II("optimization.lr"),
+        metadata={"help": "max learning rate, must be more than cfg.min_lr"},
+    )
+    min_lr: float = field(default=0.0, metadata={"help": "min learning rate"})
+    lr_deacy_period: int = field(default=25000, metadata={"help": "decay period"})
+    lr_decay: float = field(default=0.5, metadata={"help": "decay factor"})
+
+
+@register_lr_scheduler("step", dataclass=StepLRScheduleConfig)
+class StepLRSchedule(FairseqLRScheduler):
+    """Decay learning rate every k updates by a fixed factor
+    """
+
+    def __init__(self, cfg: StepLRScheduleConfig, fairseq_optimizer):
+        super().__init__(cfg, fairseq_optimizer)
+        self.max_lr = cfg.lr[0] if isinstance(cfg.lr, Collection) else cfg.lr
+        self.min_lr = cfg.min_lr
+        self.lr_deacy_period = cfg.lr_deacy_period
+        self.lr_decay = cfg.lr_decay
+        self.warmup_updates = cfg.warmup_updates
+        self.warmup_init_lr = (
+            cfg.warmup_init_lr if cfg.warmup_init_lr >= 0 else self.min_lr
+        )
+
+        assert(self.lr_deacy_period > 0)
+        assert(self.lr_decay <= 1)
+        assert(self.min_lr >= 0)
+        assert(self.max_lr > self.min_lr)
+
+        if cfg.warmup_updates > 0:
+            # linearly warmup for the first cfg.warmup_updates
+            self.warmup_lr_step = (
+                (self.max_lr - self.warmup_init_lr) / self.warmup_updates
+            )
+        else:
+            self.warmup_lr_step = 1
+
+        # initial learning rate
+        self.lr = self.warmup_init_lr
+        self.optimizer.set_lr(self.lr)
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        super().step(epoch, val_loss)
+        # we don't change the learning rate at epoch boundaries
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        if num_updates < self.cfg.warmup_updates:
+            self.lr = self.warmup_init_lr + num_updates * self.warmup_lr_step
+        else:
+            curr_updates = num_updates - self.cfg.warmup_updates
+            lr_mult = self.lr_decay ** (curr_updates // self.lr_deacy_period)
+            self.lr = max(self.max_lr * lr_mult, self.min_lr)
+
+        self.optimizer.set_lr(self.lr)
+        return self.lr
diff --git a/fairseq/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d5547c39b14f62acbd4f4b9ab3abfb3009c0e6d
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/tri_stage_lr_scheduler.py
@@ -0,0 +1,175 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+from typing import Optional, List, Tuple
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class TriStageLRScheduleConfig(FairseqDataclass):
+    warmup_steps: int = field(
+        default=0,
+        metadata={"help": "warmup the learning rate linearly for the first N updates"},
+    )
+    hold_steps: int = field(
+        default=0,
+        metadata={"help": "steps in hold stage"},
+    )
+    decay_steps: int = field(
+        default=0,
+        metadata={"help": "steps in decay stages"},
+    )
+    phase_ratio: Optional[Tuple[float, float, float]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "if set, automatically sets warmup/hold/decay steps to the ratio "
+                "specified here from max_updates. the ratios must add up to 1.0"
+            )
+        },
+    )
+    init_lr_scale: float = field(
+        default=0.01,
+        metadata={"help": "initial learning rate scale during warmup phase"},
+    )
+    final_lr_scale: float = field(
+        default=0.01,
+        metadata={"help": "final learning rate scale"},
+    )
+    max_update: float = II("optimization.max_update")
+    lr: List[float] = II("optimization.lr")
+
+
+@register_lr_scheduler("tri_stage", dataclass=TriStageLRScheduleConfig)
+class TriStageLRSchedule(FairseqLRScheduler):
+    """Tristage learning rate schedulr
+
+    Implement the learning rate scheduler in https://arxiv.org/pdf/1904.08779.pdf
+
+    Similar to inverse_squre_root scheduler, but tri_stage learning rate employs
+    three stages LR scheduling:
+
+        - warmup stage, starting from `lr` * `init_lr_scale`, linearly
+          increased to `lr` in `warmup_steps` iterations
+
+        - hold stage, after `warmup_steps`, keep the LR as `lr` for `hold_steps`
+          iterations
+
+        - decay stage, after hold stage, decay LR exponetially to
+          `lr` * `final_lr_scale` in `decay_steps`;
+          after that LR is keep as `final_lr_scale` * `lr`
+
+    During warmup::
+
+      init_lr = cfg.init_lr_scale * cfg.lr
+      lrs = torch.linspace(init_lr, cfg.lr, cfg.warmup_steps)
+      lr = lrs[update_num]
+
+    During hold::
+
+      lr = cfg.lr
+
+    During decay::
+
+      decay_factor = - math.log(cfg.final_lr_scale) / cfg.decay_steps
+      lr = cfg.lr * exp(- (update_num - warmup_steps - decay_steps) * decay_factor)
+
+    After that::
+
+      lr = cfg.lr * cfg.final_lr_scale
+    """
+
+    def __init__(self, cfg: TriStageLRScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+        if len(cfg.lr) > 1:
+            raise ValueError(
+                "Cannot use a fixed learning rate schedule with tri-stage lr."
+                " Consider --lr-scheduler=fixed instead."
+            )
+
+        # calculate LR at each point
+        self.peak_lr = cfg.lr[0]
+        self.init_lr = cfg.init_lr_scale * cfg.lr[0]
+        self.final_lr = cfg.final_lr_scale * cfg.lr[0]
+
+        if cfg.phase_ratio is not None:
+            assert cfg.max_update > 0
+            assert sum(cfg.phase_ratio) == 1, "phase ratios must add up to 1"
+            self.warmup_steps = int(cfg.max_update * cfg.phase_ratio[0])
+            self.hold_steps = int(cfg.max_update * cfg.phase_ratio[1])
+            self.decay_steps = int(cfg.max_update * cfg.phase_ratio[2])
+        else:
+            self.warmup_steps = cfg.warmup_steps
+            self.hold_steps = cfg.hold_steps
+            self.decay_steps = cfg.decay_steps
+
+        assert (
+            self.warmup_steps + self.hold_steps + self.decay_steps > 0
+        ), "please specify steps or phase_ratio"
+
+        self.warmup_rate = (
+            (self.peak_lr - self.init_lr) / self.warmup_steps
+            if self.warmup_steps != 0
+            else 0
+        )
+        self.decay_factor = -math.log(cfg.final_lr_scale) / self.decay_steps
+
+        # initial learning rate
+        self.lr = self.init_lr
+        self.optimizer.set_lr(self.lr)
+
+    def _decide_stage(self, update_step):
+        """
+        return stage, and the corresponding steps within the current stage
+        """
+        if update_step < self.warmup_steps:
+            # warmup state
+            return 0, update_step
+
+        offset = self.warmup_steps
+
+        if update_step < offset + self.hold_steps:
+            # hold stage
+            return 1, update_step - offset
+
+        offset += self.hold_steps
+
+        if update_step <= offset + self.decay_steps:
+            # decay stage
+            return 2, update_step - offset
+
+        offset += self.decay_steps
+
+        # still here ? constant lr stage
+        return 3, update_step - offset
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        super().step(epoch, val_loss)
+        # we don't change the learning rate at epoch boundaries
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        stage, steps_in_stage = self._decide_stage(num_updates)
+        if stage == 0:
+            self.lr = self.init_lr + self.warmup_rate * steps_in_stage
+        elif stage == 1:
+            self.lr = self.peak_lr
+        elif stage == 2:
+            self.lr = self.peak_lr * math.exp(-self.decay_factor * steps_in_stage)
+        elif stage == 3:
+            self.lr = self.final_lr
+        else:
+            raise ValueError("Undefined stage")
+
+        self.optimizer.set_lr(self.lr)
+
+        return self.lr
diff --git a/fairseq/fairseq/optim/lr_scheduler/triangular_lr_scheduler.py b/fairseq/fairseq/optim/lr_scheduler/triangular_lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfe2a0d381f28525f90ee120b31a69210338eb1b
--- /dev/null
+++ b/fairseq/fairseq/optim/lr_scheduler/triangular_lr_scheduler.py
@@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from dataclasses import dataclass, field
+from typing import List
+
+from omegaconf import II
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.optim.lr_scheduler import FairseqLRScheduler, register_lr_scheduler
+
+
+@dataclass
+class TriangularLRScheduleConfig(FairseqDataclass):
+    max_lr: float = field(
+        default="???", metadata={"help": "max learning rate, must be more than cfg.lr"}
+    )
+    lr_period_updates: float = field(
+        default=5000,
+        metadata={"help": "initial number of updates per period (cycle length)"},
+    )
+    lr_shrink: float = field(
+        default=0.1, metadata={"help": "shrink factor for annealing"}
+    )
+    shrink_min: bool = field(
+        default=False, metadata={"help": "if set, also shrinks min lr"}
+    )
+    lr: List[float] = II("optimization.lr")
+
+
+@register_lr_scheduler("triangular", dataclass=TriangularLRScheduleConfig)
+class TriangularLRSchedule(FairseqLRScheduler):
+    """Assign LR based on a triangular cyclical schedule.
+
+    See https://arxiv.org/pdf/1506.01186.pdf for details.
+    """
+
+    def __init__(self, cfg: TriangularLRScheduleConfig, optimizer):
+        super().__init__(cfg, optimizer)
+        if len(cfg.lr) > 1:
+            raise ValueError(
+                "Cannot use a fixed learning rate schedule with triangular."
+                " Consider --lr-scheduler=fixed instead."
+            )
+
+        lr = cfg.lr[0]
+
+        assert cfg.max_lr > lr, "max_lr must be more than lr"
+        self.min_lr = lr
+        self.max_lr = cfg.max_lr
+        self.stepsize = cfg.lr_period_updates // 2
+        self.lr_shrink = cfg.lr_shrink
+        self.shrink_min = cfg.shrink_min
+
+        # initial learning rate
+        self.lr = self.min_lr
+        self.optimizer.set_lr(self.lr)
+
+    def step(self, epoch, val_loss=None):
+        """Update the learning rate at the end of the given epoch."""
+        super().step(epoch, val_loss)
+        # we don't change the learning rate at epoch boundaries
+        return self.optimizer.get_lr()
+
+    def step_update(self, num_updates):
+        """Update the learning rate after each update."""
+        cycle = math.floor(num_updates / (2 * self.stepsize))
+
+        lr_shrink = self.lr_shrink ** cycle
+        max_lr = self.max_lr * lr_shrink
+        if self.shrink_min:
+            min_lr = self.min_lr * lr_shrink
+        else:
+            min_lr = self.min_lr
+
+        x = abs(num_updates / self.stepsize - 2 * (cycle + 1) + 1)
+        self.lr = min_lr + (max_lr - min_lr) * max(0, (1 - x))
+
+        self.optimizer.set_lr(self.lr)
+        return self.lr
diff --git a/fairseq/fairseq/optim/nag.py b/fairseq/fairseq/optim/nag.py
new file mode 100644
index 0000000000000000000000000000000000000000..c30a6c0fb1e8d5dc7edd5b53ba15a6acd46ecbff
--- /dev/null
+++ b/fairseq/fairseq/optim/nag.py
@@ -0,0 +1,111 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from collections.abc import Collection
+from dataclasses import dataclass, field
+from typing import List
+
+import torch
+from fairseq.dataclass import FairseqDataclass
+from omegaconf import II, DictConfig
+from torch.optim.optimizer import Optimizer, required
+
+from . import FairseqOptimizer, register_optimizer
+
+
+@dataclass
+class FairseqNAGConfig(FairseqDataclass):
+    momentum: float = field(default=0.99, metadata={"help": "momentum factor"})
+    weight_decay: float = field(default=0.0, metadata={"help": "weight decay"})
+    # TODO common vars in parent class
+    lr: List[float] = II("optimization.lr")
+
+
+@register_optimizer("nag", dataclass=FairseqNAGConfig)
+class FairseqNAG(FairseqOptimizer):
+    def __init__(self, cfg: DictConfig, params):
+        super().__init__(cfg)
+        self._optimizer = NAG(params, **self.optimizer_config)
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.cfg.lr[0]
+            if isinstance(self.cfg.lr, Collection)
+            else self.cfg.lr,
+            "momentum": self.cfg.momentum,
+            "weight_decay": self.cfg.weight_decay,
+        }
+
+
+class NAG(Optimizer):
+    def __init__(self, params, lr=required, momentum=0, weight_decay=0):
+        defaults = dict(lr=lr, lr_old=lr, momentum=momentum, weight_decay=weight_decay)
+        super(NAG, self).__init__(params, defaults)
+
+    @property
+    def supports_memory_efficient_fp16(self):
+        return True
+
+    @property
+    def supports_flat_params(self):
+        return True
+
+    def step(self, closure=None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            weight_decay = group["weight_decay"]
+            momentum = group["momentum"]
+            lr = group["lr"]
+            lr_old = group.get("lr_old", lr)
+            lr_correct = lr / lr_old if lr_old > 0 else lr
+
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+
+                p_data_fp32 = p.data
+                if p_data_fp32.dtype in {torch.float16, torch.bfloat16}:
+                    p_data_fp32 = p_data_fp32.float()
+
+                d_p = p.grad.data.float()
+                param_state = self.state[p]
+                if "momentum_buffer" not in param_state:
+                    param_state["momentum_buffer"] = torch.zeros_like(d_p)
+                else:
+                    param_state["momentum_buffer"] = param_state["momentum_buffer"].to(
+                        d_p
+                    )
+
+                buf = param_state["momentum_buffer"]
+
+                if weight_decay != 0:
+                    p_data_fp32.mul_(1 - lr * weight_decay)
+                p_data_fp32.add_(buf, alpha=momentum * momentum * lr_correct)
+                p_data_fp32.add_(d_p, alpha=-(1 + momentum) * lr)
+
+                buf.mul_(momentum * lr_correct).add_(d_p, alpha=-lr)
+
+                if p.data.dtype in {torch.float16, torch.bfloat16}:
+                    p.data.copy_(p_data_fp32)
+
+            group["lr_old"] = lr
+
+        return loss
diff --git a/fairseq/fairseq/optim/sgd.py b/fairseq/fairseq/optim/sgd.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e34fb99a18fff12ab76be5894a84cbbb2f48176
--- /dev/null
+++ b/fairseq/fairseq/optim/sgd.py
@@ -0,0 +1,43 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch.optim
+
+from . import LegacyFairseqOptimizer, register_optimizer
+
+
+@register_optimizer("sgd")
+class SGD(LegacyFairseqOptimizer):
+    def __init__(self, args, params):
+        super().__init__(args)
+        self._optimizer = torch.optim.SGD(params, **self.optimizer_config)
+
+    @staticmethod
+    def add_args(parser):
+        """Add optimizer-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--momentum', default=0.0, type=float, metavar='M',
+                            help='momentum factor')
+        parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
+                            help='weight decay')
+        # fmt: on
+
+    @property
+    def optimizer_config(self):
+        """
+        Return a kwarg dictionary that will be used to override optimizer
+        args stored in checkpoints. This allows us to load a checkpoint and
+        resume training using a different set of optimizer args, e.g., with a
+        different learning rate.
+        """
+        return {
+            "lr": self.args.lr[0],
+            "momentum": self.args.momentum,
+            "weight_decay": self.args.weight_decay,
+        }
+
+    @property
+    def supports_flat_params(self):
+        return True
diff --git a/fairseq/fairseq/optim/shard.py b/fairseq/fairseq/optim/shard.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d7f2eb9e5de6086fe2435d432bde7521ebb8155
--- /dev/null
+++ b/fairseq/fairseq/optim/shard.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, Dict
+
+from fairseq.distributed import utils
+
+
+try:
+    from fairscale.optim import OSS
+
+    _has_fairscale = True
+except ImportError:
+    _has_fairscale = False
+
+
+def shard_(optimizer, group):
+    if not _has_fairscale:
+        raise ImportError(
+            "\n\nPlease install the fairscale package:" "\n\n  pip install fairscale"
+        )
+
+    class FairseqOSS(OSS):
+        @property
+        def disable_mem_eff_fp16_loading_hack(self):
+            return True
+
+        def __getattr__(self, name):
+            if name.startswith("supports") and hasattr(self.optim, name):
+                return getattr(self.optim, name)
+            raise AttributeError(
+                "'FairseqOSS' object has no attribute {0!r}".format(name)
+            )
+
+        def broadcast_global_state_dict(
+            self, state_dict: Dict[str, Any]
+        ) -> Dict[str, Any]:
+            """
+            Broadcasts the entire state_dict to all other ranks
+            each rank is responsible to load their own partition of data
+            """
+            return utils.broadcast_object(
+                state_dict,
+                src_rank=0,
+                group=self.group,
+            )
+
+    torch_optimizer = optimizer.optimizer
+    optim_cls = type(torch_optimizer)
+
+    optimizer.optimizer = FairseqOSS(
+        torch_optimizer.param_groups,
+        optim_cls,
+        group=group,
+        **optimizer.optimizer_config
+    )
diff --git a/fairseq/fairseq/options.py b/fairseq/fairseq/options.py
new file mode 100644
index 0000000000000000000000000000000000000000..797b2842db4a68849110a25bb52a47c658966186
--- /dev/null
+++ b/fairseq/fairseq/options.py
@@ -0,0 +1,406 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+from pathlib import Path
+from typing import Callable, List, Optional, Union
+
+import torch
+from fairseq import utils
+from fairseq.data.indexed_dataset import get_available_dataset_impl
+from fairseq.dataclass.configs import (
+    CheckpointConfig,
+    CommonConfig,
+    CommonEvalConfig,
+    DatasetConfig,
+    DistributedTrainingConfig,
+    EvalLMConfig,
+    GenerationConfig,
+    InteractiveConfig,
+    OptimizationConfig,
+    EMAConfig,
+)
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+
+# this import is for backward compatibility
+from fairseq.utils import csv_str_list, eval_bool, eval_str_dict, eval_str_list  # noqa
+
+
+def get_preprocessing_parser(default_task="translation"):
+    parser = get_parser("Preprocessing", default_task)
+    add_preprocess_args(parser)
+    return parser
+
+
+def get_training_parser(default_task="translation"):
+    parser = get_parser("Trainer", default_task)
+    add_dataset_args(parser, train=True)
+    add_distributed_training_args(parser)
+    add_model_args(parser)
+    add_optimization_args(parser)
+    add_checkpoint_args(parser)
+    add_ema_args(parser)
+    return parser
+
+
+def get_generation_parser(interactive=False, default_task="translation"):
+    parser = get_parser("Generation", default_task)
+    add_dataset_args(parser, gen=True)
+    add_distributed_training_args(parser)
+    add_generation_args(parser)
+    add_checkpoint_args(parser)
+    if interactive:
+        add_interactive_args(parser)
+    return parser
+
+
+def get_speech_generation_parser(default_task="text_to_speech"):
+    parser = get_parser("Speech Generation", default_task)
+    add_dataset_args(parser, gen=True)
+    add_distributed_training_args(parser, default_world_size=1)
+    add_speech_generation_args(parser)
+    return parser
+
+
+def get_interactive_generation_parser(default_task="translation"):
+    return get_generation_parser(interactive=True, default_task=default_task)
+
+
+def get_eval_lm_parser(default_task="language_modeling"):
+    parser = get_parser("Evaluate Language Model", default_task)
+    add_dataset_args(parser, gen=True)
+    add_distributed_training_args(parser, default_world_size=1)
+    add_eval_lm_args(parser)
+    return parser
+
+
+def get_validation_parser(default_task=None):
+    parser = get_parser("Validation", default_task)
+    add_dataset_args(parser, train=True)
+    add_distributed_training_args(parser, default_world_size=1)
+    group = parser.add_argument_group("Evaluation")
+    gen_parser_from_dataclass(group, CommonEvalConfig())
+    return parser
+
+
+def parse_args_and_arch(
+    parser: argparse.ArgumentParser,
+    input_args: List[str] = None,
+    parse_known: bool = False,
+    suppress_defaults: bool = False,
+    modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None,
+):
+    """
+    Args:
+        parser (ArgumentParser): the parser
+        input_args (List[str]): strings to parse, defaults to sys.argv
+        parse_known (bool): only parse known arguments, similar to
+            `ArgumentParser.parse_known_args`
+        suppress_defaults (bool): parse while ignoring all default values
+        modify_parser (Optional[Callable[[ArgumentParser], None]]):
+            function to modify the parser, e.g., to set default values
+    """
+    if suppress_defaults:
+        # Parse args without any default values. This requires us to parse
+        # twice, once to identify all the necessary task/model args, and a second
+        # time with all defaults set to None.
+        args = parse_args_and_arch(
+            parser,
+            input_args=input_args,
+            parse_known=parse_known,
+            suppress_defaults=False,
+        )
+        suppressed_parser = argparse.ArgumentParser(add_help=False, parents=[parser])
+        suppressed_parser.set_defaults(**{k: None for k, v in vars(args).items()})
+        args = suppressed_parser.parse_args(input_args)
+        return argparse.Namespace(
+            **{k: v for k, v in vars(args).items() if v is not None}
+        )
+
+    from fairseq.models import ARCH_MODEL_REGISTRY, ARCH_CONFIG_REGISTRY, MODEL_REGISTRY
+
+    # Before creating the true parser, we need to import optional user module
+    # in order to eagerly import custom tasks, optimizers, architectures, etc.
+    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
+    usr_parser.add_argument("--user-dir", default=None)
+    usr_args, _ = usr_parser.parse_known_args(input_args)
+    utils.import_user_module(usr_args)
+
+    if modify_parser is not None:
+        modify_parser(parser)
+
+    # The parser doesn't know about model/criterion/optimizer-specific args, so
+    # we parse twice. First we parse the model/criterion/optimizer, then we
+    # parse a second time after adding the *-specific arguments.
+    # If input_args is given, we will parse those args instead of sys.argv.
+    args, _ = parser.parse_known_args(input_args)
+
+    # Add model-specific args to parser.
+    if hasattr(args, "arch"):
+        model_specific_group = parser.add_argument_group(
+            "Model-specific configuration",
+            # Only include attributes which are explicitly given as command-line
+            # arguments or which have default values.
+            argument_default=argparse.SUPPRESS,
+        )
+        if args.arch in ARCH_MODEL_REGISTRY:
+            ARCH_MODEL_REGISTRY[args.arch].add_args(model_specific_group)
+        elif args.arch in MODEL_REGISTRY:
+            MODEL_REGISTRY[args.arch].add_args(model_specific_group)
+        else:
+            raise RuntimeError()
+
+    if hasattr(args, "task"):
+        from fairseq.tasks import TASK_REGISTRY
+
+        TASK_REGISTRY[args.task].add_args(parser)
+    if getattr(args, "use_bmuf", False):
+        # hack to support extra args for block distributed data parallelism
+        from fairseq.optim.bmuf import FairseqBMUF
+
+        FairseqBMUF.add_args(parser)
+
+    # Add *-specific args to parser.
+    from fairseq.registry import REGISTRIES
+
+    for registry_name, REGISTRY in REGISTRIES.items():
+        choice = getattr(args, registry_name, None)
+        if choice is not None:
+            cls = REGISTRY["registry"][choice]
+            if hasattr(cls, "add_args"):
+                cls.add_args(parser)
+            elif hasattr(cls, "__dataclass"):
+                gen_parser_from_dataclass(parser, cls.__dataclass())
+
+    # Modify the parser a second time, since defaults may have been reset
+    if modify_parser is not None:
+        modify_parser(parser)
+
+    # Parse a second time.
+    if parse_known:
+        args, extra = parser.parse_known_args(input_args)
+    else:
+        args = parser.parse_args(input_args)
+        extra = None
+    # Post-process args.
+    if (
+        hasattr(args, "batch_size_valid") and args.batch_size_valid is None
+    ) or not hasattr(args, "batch_size_valid"):
+        args.batch_size_valid = args.batch_size
+    if hasattr(args, "max_tokens_valid") and args.max_tokens_valid is None:
+        args.max_tokens_valid = args.max_tokens
+    if getattr(args, "memory_efficient_fp16", False):
+        args.fp16 = True
+    if getattr(args, "memory_efficient_bf16", False):
+        args.bf16 = True
+    args.tpu = getattr(args, "tpu", False)
+    args.bf16 = getattr(args, "bf16", False)
+    if args.bf16:
+        args.tpu = True
+    if args.tpu and args.fp16:
+        raise ValueError("Cannot combine --fp16 and --tpu, use --bf16 on TPUs")
+
+    if getattr(args, "seed", None) is None:
+        args.seed = 1  # default seed for training
+        args.no_seed_provided = True
+    else:
+        args.no_seed_provided = False
+
+    # Apply architecture configuration.
+    if hasattr(args, "arch") and args.arch in ARCH_CONFIG_REGISTRY:
+        ARCH_CONFIG_REGISTRY[args.arch](args)
+
+    if parse_known:
+        return args, extra
+    else:
+        return args
+
+
+def get_parser(desc, default_task="translation"):
+    # Before creating the true parser, we need to import optional user module
+    # in order to eagerly import custom tasks, optimizers, architectures, etc.
+    usr_parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
+    usr_parser.add_argument("--user-dir", default=None)
+    usr_args, _ = usr_parser.parse_known_args()
+    utils.import_user_module(usr_args)
+
+    parser = argparse.ArgumentParser(allow_abbrev=False)
+    gen_parser_from_dataclass(parser, CommonConfig())
+
+    from fairseq.registry import REGISTRIES
+
+    for registry_name, REGISTRY in REGISTRIES.items():
+        parser.add_argument(
+            "--" + registry_name.replace("_", "-"),
+            default=REGISTRY["default"],
+            choices=REGISTRY["registry"].keys(),
+        )
+
+    # Task definitions can be found under fairseq/tasks/
+    from fairseq.tasks import TASK_REGISTRY
+
+    parser.add_argument(
+        "--task",
+        metavar="TASK",
+        default=default_task,
+        choices=TASK_REGISTRY.keys(),
+        help="task",
+    )
+    # fmt: on
+    return parser
+
+
+def add_preprocess_args(parser):
+    group = parser.add_argument_group("Preprocessing")
+    # fmt: off
+    group.add_argument("-s", "--source-lang", default=None, metavar="SRC",
+                       help="source language")
+    group.add_argument("-t", "--target-lang", default=None, metavar="TARGET",
+                       help="target language")
+    group.add_argument("--trainpref", metavar="FP", default=None,
+                       help="train file prefix (also used to build dictionaries)")
+    group.add_argument("--validpref", metavar="FP", default=None,
+                       help="comma separated, valid file prefixes "
+                            "(words missing from train set are replaced with <unk>)")
+    group.add_argument("--testpref", metavar="FP", default=None,
+                       help="comma separated, test file prefixes "
+                            "(words missing from train set are replaced with <unk>)")
+    group.add_argument("--align-suffix", metavar="FP", default=None,
+                       help="alignment file suffix")
+    group.add_argument("--destdir", metavar="DIR", default="data-bin",
+                       help="destination dir")
+    group.add_argument("--thresholdtgt", metavar="N", default=0, type=int,
+                       help="map words appearing less than threshold times to unknown")
+    group.add_argument("--thresholdsrc", metavar="N", default=0, type=int,
+                       help="map words appearing less than threshold times to unknown")
+    group.add_argument("--tgtdict", metavar="FP",
+                       help="reuse given target dictionary")
+    group.add_argument("--srcdict", metavar="FP",
+                       help="reuse given source dictionary")
+    group.add_argument("--nwordstgt", metavar="N", default=-1, type=int,
+                       help="number of target words to retain")
+    group.add_argument("--nwordssrc", metavar="N", default=-1, type=int,
+                       help="number of source words to retain")
+    group.add_argument("--alignfile", metavar="ALIGN", default=None,
+                       help="an alignment file (optional)")
+    parser.add_argument('--dataset-impl', metavar='FORMAT', default='mmap',
+                        choices=get_available_dataset_impl(),
+                        help='output dataset implementation')
+    group.add_argument("--joined-dictionary", action="store_true",
+                       help="Generate joined dictionary")
+    group.add_argument("--only-source", action="store_true",
+                       help="Only process the source language")
+    group.add_argument("--padding-factor", metavar="N", default=8, type=int,
+                       help="Pad dictionary size to be multiple of N")
+    group.add_argument("--workers", metavar="N", default=1, type=int,
+                       help="number of parallel workers")
+    group.add_argument("--dict-only", action='store_true',
+                       help="if true, only builds a dictionary and then exits")
+    # fmt: on
+    return parser
+
+
+def add_dataset_args(parser, train=False, gen=False):
+    group = parser.add_argument_group("dataset_data_loading")
+    gen_parser_from_dataclass(group, DatasetConfig())
+    # fmt: on
+    return group
+
+
+def add_distributed_training_args(parser, default_world_size=None):
+    group = parser.add_argument_group("distributed_training")
+    if default_world_size is None:
+        default_world_size = max(1, torch.cuda.device_count())
+    gen_parser_from_dataclass(
+        group, DistributedTrainingConfig(distributed_world_size=default_world_size)
+    )
+    return group
+
+
+def add_optimization_args(parser):
+    group = parser.add_argument_group("optimization")
+    # fmt: off
+    gen_parser_from_dataclass(group, OptimizationConfig())
+    # fmt: on
+    return group
+
+
+def add_checkpoint_args(parser):
+    group = parser.add_argument_group("checkpoint")
+    # fmt: off
+    gen_parser_from_dataclass(group, CheckpointConfig())
+    # fmt: on
+    return group
+
+
+def add_common_eval_args(group):
+    gen_parser_from_dataclass(group, CommonEvalConfig())
+
+
+def add_eval_lm_args(parser):
+    group = parser.add_argument_group("LM Evaluation")
+    add_common_eval_args(group)
+    gen_parser_from_dataclass(group, EvalLMConfig())
+
+
+def add_generation_args(parser):
+    group = parser.add_argument_group("Generation")
+    add_common_eval_args(group)
+    gen_parser_from_dataclass(group, GenerationConfig())
+    return group
+
+
+def add_speech_generation_args(parser):
+    group = parser.add_argument_group("Speech Generation")
+    add_common_eval_args(group)  # NOTE: remove_bpe is not needed
+    # fmt: off
+    group.add_argument('--eos_prob_threshold', default=0.5, type=float,
+                       help='terminate when eos probability exceeds this')
+    # fmt: on
+    return group
+
+
+def add_interactive_args(parser):
+    group = parser.add_argument_group("Interactive")
+    gen_parser_from_dataclass(group, InteractiveConfig())
+
+
+def add_model_args(parser):
+    group = parser.add_argument_group("Model configuration")
+    # fmt: off
+
+    # Model definitions can be found under fairseq/models/
+    #
+    # The model architecture can be specified in several ways.
+    # In increasing order of priority:
+    # 1) model defaults (lowest priority)
+    # 2) --arch argument
+    # 3) --encoder/decoder-* arguments (highest priority)
+    from fairseq.models import ARCH_MODEL_REGISTRY
+    group.add_argument('--arch', '-a', metavar='ARCH',
+                       choices=ARCH_MODEL_REGISTRY.keys(),
+                       help='model architecture')
+    # fmt: on
+    return group
+
+
+def get_args(
+    data: Union[str, Path],
+    task: str = "translation",
+    arch: str = "transformer",
+    **overrides
+):
+    parser = get_training_parser(task)
+    args = parse_args_and_arch(parser, [str(data), "--task", task, "--arch", arch])
+
+    for k, v in overrides.items():
+        setattr(args, k, v)
+
+    return args
+
+
+def add_ema_args(parser):
+    group = parser.add_argument_group("EMA configuration")
+    gen_parser_from_dataclass(group, EMAConfig())
diff --git a/fairseq/fairseq/pdb.py b/fairseq/fairseq/pdb.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ba6ef0d336b30717cfdde94e1b838cfe2bfeb20
--- /dev/null
+++ b/fairseq/fairseq/pdb.py
@@ -0,0 +1,47 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import multiprocessing
+import os
+import pdb
+import sys
+
+
+__all__ = ["set_trace"]
+
+
+_stdin = [None]
+_stdin_lock = multiprocessing.Lock()
+try:
+    _stdin_fd = sys.stdin.fileno()
+except Exception:
+    _stdin_fd = None
+
+
+class MultiprocessingPdb(pdb.Pdb):
+    """A Pdb wrapper that works in a multiprocessing environment.
+
+    Usage: `from fairseq import pdb; pdb.set_trace()`
+    """
+
+    def __init__(self):
+        pdb.Pdb.__init__(self, nosigint=True)
+
+    def _cmdloop(self):
+        stdin_bak = sys.stdin
+        with _stdin_lock:
+            try:
+                if _stdin_fd is not None:
+                    if not _stdin[0]:
+                        _stdin[0] = os.fdopen(_stdin_fd)
+                    sys.stdin = _stdin[0]
+                self.cmdloop()
+            finally:
+                sys.stdin = stdin_bak
+
+
+def set_trace():
+    pdb = MultiprocessingPdb()
+    pdb.set_trace(sys._getframe().f_back)
diff --git a/fairseq/fairseq/quantization_utils.py b/fairseq/fairseq/quantization_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..11fc414c852b199b80a569bf024272535929abcc
--- /dev/null
+++ b/fairseq/fairseq/quantization_utils.py
@@ -0,0 +1,143 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+from fairseq.modules.quantization import pq, quantization_options, scalar
+from omegaconf import DictConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+def quantize_model_scalar(model, model_cfg: DictConfig):
+    quant_noise_scalar = getattr(model_cfg, "quant_noise_scalar", 0) or 0
+    if quant_noise_scalar > 0:
+        # quantize_model edits the model in place
+        scalar.quantize_model_(model, p=quant_noise_scalar, bits=8, update_step=1000)
+    return model
+
+
+class Quantizer(object):
+    def __init__(self, config_path, max_epoch, max_update):
+        try:
+            import yaml
+        except ImportError:
+            raise ImportError("Please install yaml with: pip install yaml")
+
+        # parse config
+        if config_path:
+            with open(config_path) as config_file:
+                config = quantization_options.parse_config_yaml(
+                    yaml.safe_load(config_file)
+                )
+        else:
+            config = quantization_options.parse_config_yaml({})
+
+        self.n_centroids_config = config["n_centroids"]
+        self.block_sizes_config = config["block_sizes"]
+        self.layers_to_quantize = config["layers_to_quantize"]
+
+        # We assume that training will run for a fixed number of epochs
+        # (or updates) and that we should train for equal durations
+        # between iterations of PQ.
+        num_iterations = len(self.layers_to_quantize)
+        if max_epoch > 0:
+            assert max_epoch % num_iterations == 0, (
+                "for iterative PQ, --max-epoch (={}) must be evenly divisible by "
+                "len(layers_to_quantize) (={})".format(max_epoch, num_iterations)
+            )
+            self.epoch_schedule = max_epoch // num_iterations
+        else:
+            self.epoch_schedule = None
+        if max_update > 0:
+            assert max_update % num_iterations == 0, (
+                "for iterative PQ, --max-update (={}) must be evenly divisible by "
+                "len(layers_to_quantize) (={})".format(max_update, num_iterations)
+            )
+            self.update_schedule = max_update // num_iterations
+        else:
+            self.update_schedule = None
+        assert (self.epoch_schedule is not None) ^ (
+            self.update_schedule is not None
+        ), "for iterative PQ, cannot specify both --max-update and --max-epoch"
+
+        # 0 is a special value for quantization step, which will force
+        # the first call to begin_epoch() to call step()
+        self.quantization_step = 0
+
+    def set_trainer(self, trainer):
+        self.trainer = trainer
+        self.size_tracker = pq.SizeTracker(self.trainer.get_model())
+
+    def step(self):
+        """Move to the next stage of quantization."""
+        if self.quantization_step >= len(self.layers_to_quantize):
+            # Maybe we just finished the last training step or we loaded
+            # a checkpoint for an iterative PQ model which previously
+            # finished training. Either way, don't quantize again.
+            return
+
+        logger.info(
+            "quantizing model (step={}; layers_to_quantize[step]={})".format(
+                self.quantization_step, self.layers_to_quantize[self.quantization_step]
+            )
+        )
+        quantized_layers = pq.quantize_model_(
+            self.trainer.get_model(),
+            self.size_tracker,
+            self.layers_to_quantize,
+            self.block_sizes_config,
+            self.n_centroids_config,
+            step=self.quantization_step,
+        )
+        logger.info("quantized layers: {}".format(quantized_layers))
+        logger.info(self.size_tracker)
+
+        self.quantization_step += 1
+
+        # reintialize the Trainer since model parameters have changed
+        self.trainer.reinitialize()
+
+    def begin_epoch(self, epoch):
+        """Called at the beginning of each epoch (epochs start at 1)."""
+        if (
+            (
+                self.epoch_schedule is not None
+                and epoch > 0
+                and (epoch - 1) % self.epoch_schedule == 0
+            )
+            # we always step once in the beginning, even if using
+            # update-based quantization
+            or self.quantization_step == 0
+        ):
+            self.step()
+
+    def step_update(self, num_updates):
+        """Called at the end of each step."""
+        if (
+            self.update_schedule is not None
+            and num_updates > 0
+            and num_updates % self.update_schedule == 0
+        ):
+            self.step()
+
+    def state_dict(self):
+        return {
+            "n_centroids_config": self.n_centroids_config,
+            "block_sizes_config": self.block_sizes_config,
+            "layers_to_quantize": self.layers_to_quantize,
+            "epoch_schedule": self.epoch_schedule,
+            "update_schedule": self.update_schedule,
+            "quantization_step": self.quantization_step,
+        }
+
+    def load_state_dict(self, state_dict):
+        self.n_centroids_config = state_dict["n_centroids_config"]
+        self.block_sizes_config = state_dict["block_sizes_config"]
+        self.layers_to_quantize = state_dict["layers_to_quantize"]
+        self.epoch_schedule = state_dict["epoch_schedule"]
+        self.update_schedule = state_dict["update_schedule"]
+        self.quantization_step = state_dict["quantization_step"]
diff --git a/fairseq/fairseq/registry.py b/fairseq/fairseq/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3b9406043d75a51d7bf4af5294f82b33a8f9a5e
--- /dev/null
+++ b/fairseq/fairseq/registry.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from argparse import Namespace
+
+from typing import Union
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import merge_with_parent
+from hydra.core.config_store import ConfigStore
+from omegaconf import DictConfig
+
+REGISTRIES = {}
+
+
+def setup_registry(registry_name: str, base_class=None, default=None, required=False):
+    assert registry_name.startswith("--")
+    registry_name = registry_name[2:].replace("-", "_")
+
+    REGISTRY = {}
+    REGISTRY_CLASS_NAMES = set()
+    DATACLASS_REGISTRY = {}
+
+    # maintain a registry of all registries
+    if registry_name in REGISTRIES:
+        return  # registry already exists
+    REGISTRIES[registry_name] = {
+        "registry": REGISTRY,
+        "default": default,
+        "dataclass_registry": DATACLASS_REGISTRY,
+    }
+
+    def build_x(cfg: Union[DictConfig, str, Namespace], *extra_args, **extra_kwargs):
+        if isinstance(cfg, DictConfig):
+            choice = cfg._name
+
+            if choice and choice in DATACLASS_REGISTRY:
+                dc = DATACLASS_REGISTRY[choice]
+                cfg = merge_with_parent(dc(), cfg)
+        elif isinstance(cfg, str):
+            choice = cfg
+            if choice in DATACLASS_REGISTRY:
+                cfg = DATACLASS_REGISTRY[choice]()
+        else:
+            choice = getattr(cfg, registry_name, None)
+            if choice in DATACLASS_REGISTRY:
+                cfg = DATACLASS_REGISTRY[choice].from_namespace(cfg)
+
+        if choice is None:
+            if required:
+                raise ValueError("{} is required!".format(registry_name))
+            return None
+
+        cls = REGISTRY[choice]
+        if hasattr(cls, "build_" + registry_name):
+            builder = getattr(cls, "build_" + registry_name)
+        else:
+            builder = cls
+
+        return builder(cfg, *extra_args, **extra_kwargs)
+
+    def register_x(name, dataclass=None):
+        def register_x_cls(cls):
+            if name in REGISTRY:
+                raise ValueError(
+                    "Cannot register duplicate {} ({})".format(registry_name, name)
+                )
+            if cls.__name__ in REGISTRY_CLASS_NAMES:
+                raise ValueError(
+                    "Cannot register {} with duplicate class name ({})".format(
+                        registry_name, cls.__name__
+                    )
+                )
+            if base_class is not None and not issubclass(cls, base_class):
+                raise ValueError(
+                    "{} must extend {}".format(cls.__name__, base_class.__name__)
+                )
+
+            if dataclass is not None and not issubclass(dataclass, FairseqDataclass):
+                raise ValueError(
+                    "Dataclass {} must extend FairseqDataclass".format(dataclass)
+                )
+
+            cls.__dataclass = dataclass
+            if cls.__dataclass is not None:
+                DATACLASS_REGISTRY[name] = cls.__dataclass
+
+                cs = ConfigStore.instance()
+                node = dataclass()
+                node._name = name
+                cs.store(name=name, group=registry_name, node=node, provider="fairseq")
+
+            REGISTRY[name] = cls
+
+            return cls
+
+        return register_x_cls
+
+    return build_x, register_x, REGISTRY, DATACLASS_REGISTRY
diff --git a/fairseq/fairseq/scoring/__init__.py b/fairseq/fairseq/scoring/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..58f2f563e493327394dff1265030d18f0814b5a2
--- /dev/null
+++ b/fairseq/fairseq/scoring/__init__.py
@@ -0,0 +1,55 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import importlib
+import os
+from abc import ABC, abstractmethod
+
+from fairseq import registry
+from omegaconf import DictConfig
+
+
+class BaseScorer(ABC):
+    def __init__(self, cfg):
+        self.cfg = cfg
+        self.ref = []
+        self.pred = []
+
+    def add_string(self, ref, pred):
+        self.ref.append(ref)
+        self.pred.append(pred)
+
+    @abstractmethod
+    def score(self) -> float:
+        pass
+
+    @abstractmethod
+    def result_string(self) -> str:
+        pass
+
+
+_build_scorer, register_scorer, SCORER_REGISTRY, _ = registry.setup_registry(
+    "--scoring", default="bleu"
+)
+
+
+def build_scorer(choice, tgt_dict):
+    _choice = choice._name if isinstance(choice, DictConfig) else choice
+
+    if _choice == "bleu":
+        from fairseq.scoring import bleu
+
+        return bleu.Scorer(
+            bleu.BleuConfig(pad=tgt_dict.pad(), eos=tgt_dict.eos(), unk=tgt_dict.unk())
+        )
+    return _build_scorer(choice)
+
+
+# automatically import any Python files in the current directory
+for file in sorted(os.listdir(os.path.dirname(__file__))):
+    if file.endswith(".py") and not file.startswith("_"):
+        module = file[: file.find(".py")]
+        importlib.import_module("fairseq.scoring." + module)
diff --git a/fairseq/fairseq/scoring/bleu.py b/fairseq/fairseq/scoring/bleu.py
new file mode 100644
index 0000000000000000000000000000000000000000..97de5f966ec08e5a304c41358e67755c601622b7
--- /dev/null
+++ b/fairseq/fairseq/scoring/bleu.py
@@ -0,0 +1,167 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ctypes
+import math
+import sys
+from dataclasses import dataclass, field
+
+import torch
+from fairseq.dataclass import FairseqDataclass
+from fairseq.scoring import BaseScorer, register_scorer
+from fairseq.scoring.tokenizer import EvaluationTokenizer
+
+
+class BleuStat(ctypes.Structure):
+    _fields_ = [
+        ("reflen", ctypes.c_size_t),
+        ("predlen", ctypes.c_size_t),
+        ("match1", ctypes.c_size_t),
+        ("count1", ctypes.c_size_t),
+        ("match2", ctypes.c_size_t),
+        ("count2", ctypes.c_size_t),
+        ("match3", ctypes.c_size_t),
+        ("count3", ctypes.c_size_t),
+        ("match4", ctypes.c_size_t),
+        ("count4", ctypes.c_size_t),
+    ]
+
+
+@dataclass
+class SacrebleuConfig(FairseqDataclass):
+    sacrebleu_tokenizer: EvaluationTokenizer.ALL_TOKENIZER_TYPES = field(
+        default="13a", metadata={"help": "tokenizer"}
+    )
+    sacrebleu_lowercase: bool = field(
+        default=False, metadata={"help": "apply lowercasing"}
+    )
+    sacrebleu_char_level: bool = field(
+        default=False, metadata={"help": "evaluate at character level"}
+    )
+
+
+@register_scorer("sacrebleu", dataclass=SacrebleuConfig)
+class SacrebleuScorer(BaseScorer):
+    def __init__(self, cfg):
+        super(SacrebleuScorer, self).__init__(cfg)
+        import sacrebleu
+
+        self.sacrebleu = sacrebleu
+        self.tokenizer = EvaluationTokenizer(
+            tokenizer_type=cfg.sacrebleu_tokenizer,
+            lowercase=cfg.sacrebleu_lowercase,
+            character_tokenization=cfg.sacrebleu_char_level,
+        )
+
+    def add_string(self, ref, pred):
+        self.ref.append(self.tokenizer.tokenize(ref))
+        self.pred.append(self.tokenizer.tokenize(pred))
+
+    def score(self, order=4):
+        return self.result_string(order).score
+
+    def result_string(self, order=4):
+        if order != 4:
+            raise NotImplementedError
+        # tokenization and lowercasing are performed by self.tokenizer instead.
+        return self.sacrebleu.corpus_bleu(
+            self.pred, [self.ref], tokenize="none"
+        ).format()
+
+
+@dataclass
+class BleuConfig(FairseqDataclass):
+    pad: int = field(default=1, metadata={"help": "padding index"})
+    eos: int = field(default=2, metadata={"help": "eos index"})
+    unk: int = field(default=3, metadata={"help": "unk index"})
+
+
+@register_scorer("bleu", dataclass=BleuConfig)
+class Scorer(object):
+    def __init__(self, cfg):
+        self.stat = BleuStat()
+        self.pad = cfg.pad
+        self.eos = cfg.eos
+        self.unk = cfg.unk
+
+        try:
+            from fairseq import libbleu
+        except ImportError as e:
+            sys.stderr.write(
+                "ERROR: missing libbleu.so. run `pip install --editable .`\n"
+            )
+            raise e
+
+        self.C = ctypes.cdll.LoadLibrary(libbleu.__file__)
+
+        self.reset()
+
+    def reset(self, one_init=False):
+        if one_init:
+            self.C.bleu_one_init(ctypes.byref(self.stat))
+        else:
+            self.C.bleu_zero_init(ctypes.byref(self.stat))
+
+    def add(self, ref, pred):
+        if not isinstance(ref, torch.IntTensor):
+            raise TypeError("ref must be a torch.IntTensor (got {})".format(type(ref)))
+        if not isinstance(pred, torch.IntTensor):
+            raise TypeError("pred must be a torch.IntTensor(got {})".format(type(pred)))
+
+        # don't match unknown words
+        rref = ref.clone()
+        assert not rref.lt(0).any()
+        rref[rref.eq(self.unk)] = -999
+
+        rref = rref.contiguous().view(-1)
+        pred = pred.contiguous().view(-1)
+
+        self.C.bleu_add(
+            ctypes.byref(self.stat),
+            ctypes.c_size_t(rref.size(0)),
+            ctypes.c_void_p(rref.data_ptr()),
+            ctypes.c_size_t(pred.size(0)),
+            ctypes.c_void_p(pred.data_ptr()),
+            ctypes.c_int(self.pad),
+            ctypes.c_int(self.eos),
+        )
+
+    def score(self, order=4):
+        psum = sum(
+            math.log(p) if p > 0 else float("-Inf") for p in self.precision()[:order]
+        )
+        return self.brevity() * math.exp(psum / order) * 100
+
+    def precision(self):
+        def ratio(a, b):
+            return a / b if b > 0 else 0
+
+        return [
+            ratio(self.stat.match1, self.stat.count1),
+            ratio(self.stat.match2, self.stat.count2),
+            ratio(self.stat.match3, self.stat.count3),
+            ratio(self.stat.match4, self.stat.count4),
+        ]
+
+    def brevity(self):
+        r = self.stat.reflen / self.stat.predlen
+        return min(1, math.exp(1 - r))
+
+    def result_string(self, order=4):
+        assert order <= 4, "BLEU scores for order > 4 aren't supported"
+        fmt = "BLEU{} = {:2.2f}, {:2.1f}"
+        for _ in range(1, order):
+            fmt += "/{:2.1f}"
+        fmt += " (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})"
+        bleup = [p * 100 for p in self.precision()[:order]]
+        return fmt.format(
+            order,
+            self.score(order=order),
+            *bleup,
+            self.brevity(),
+            self.stat.predlen / self.stat.reflen,
+            self.stat.predlen,
+            self.stat.reflen
+        )
diff --git a/fairseq/fairseq/scoring/chrf.py b/fairseq/fairseq/scoring/chrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d6cb77383a44d9ac739958b79a30764f1fbf7f3
--- /dev/null
+++ b/fairseq/fairseq/scoring/chrf.py
@@ -0,0 +1,27 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from fairseq.scoring import BaseScorer, register_scorer
+
+
+@register_scorer("chrf")
+class ChrFScorer(BaseScorer):
+    def __init__(self, args):
+        super(ChrFScorer, self).__init__(args)
+        import sacrebleu
+
+        self.sacrebleu = sacrebleu
+
+    def add_string(self, ref, pred):
+        self.ref.append(ref)
+        self.pred.append(pred)
+
+    def score(self, order=4):
+        return self.result_string(order).score
+
+    def result_string(self, order=4):
+        if order != 4:
+            raise NotImplementedError
+        return self.sacrebleu.corpus_chrf(self.pred, [self.ref]).format()
diff --git a/fairseq/fairseq/scoring/tokenizer.py b/fairseq/fairseq/scoring/tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..61cf6d4a7cc698258caad9f68f2e8559dd510eee
--- /dev/null
+++ b/fairseq/fairseq/scoring/tokenizer.py
@@ -0,0 +1,67 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unicodedata
+
+from fairseq.dataclass import ChoiceEnum
+
+
+class EvaluationTokenizer(object):
+    """A generic evaluation-time tokenizer, which leverages built-in tokenizers
+    in sacreBLEU (https://github.com/mjpost/sacrebleu). It additionally provides
+    lowercasing, punctuation removal and character tokenization, which are
+    applied after sacreBLEU tokenization.
+
+    Args:
+        tokenizer_type (str): the type of sacreBLEU tokenizer to apply.
+        lowercase (bool): lowercase the text.
+        punctuation_removal (bool): remove punctuation (based on unicode
+        category) from text.
+        character_tokenization (bool): tokenize the text to characters.
+    """
+
+    SPACE = chr(32)
+    SPACE_ESCAPE = chr(9601)
+    ALL_TOKENIZER_TYPES = ChoiceEnum(["none", "13a", "intl", "zh", "ja-mecab"])
+
+    def __init__(
+        self,
+        tokenizer_type: str = "13a",
+        lowercase: bool = False,
+        punctuation_removal: bool = False,
+        character_tokenization: bool = False,
+    ):
+        from sacrebleu.tokenizers import TOKENIZERS
+
+        assert tokenizer_type in TOKENIZERS, f"{tokenizer_type}, {TOKENIZERS}"
+        self.lowercase = lowercase
+        self.punctuation_removal = punctuation_removal
+        self.character_tokenization = character_tokenization
+        self.tokenizer = TOKENIZERS[tokenizer_type]
+
+    @classmethod
+    def remove_punctuation(cls, sent: str):
+        """Remove punctuation based on Unicode category."""
+        return cls.SPACE.join(
+            t
+            for t in sent.split(cls.SPACE)
+            if not all(unicodedata.category(c)[0] == "P" for c in t)
+        )
+
+    def tokenize(self, sent: str):
+        tokenized = self.tokenizer()(sent)
+
+        if self.punctuation_removal:
+            tokenized = self.remove_punctuation(tokenized)
+
+        if self.character_tokenization:
+            tokenized = self.SPACE.join(
+                list(tokenized.replace(self.SPACE, self.SPACE_ESCAPE))
+            )
+
+        if self.lowercase:
+            tokenized = tokenized.lower()
+
+        return tokenized
diff --git a/fairseq/fairseq/scoring/wer.py b/fairseq/fairseq/scoring/wer.py
new file mode 100644
index 0000000000000000000000000000000000000000..633dc47c247691c4c9e36cbdbab7d7cb74b38452
--- /dev/null
+++ b/fairseq/fairseq/scoring/wer.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.scoring import BaseScorer, register_scorer
+from fairseq.scoring.tokenizer import EvaluationTokenizer
+
+
+@dataclass
+class WerScorerConfig(FairseqDataclass):
+    wer_tokenizer: EvaluationTokenizer.ALL_TOKENIZER_TYPES = field(
+        default="none", metadata={"help": "sacreBLEU tokenizer to use for evaluation"}
+    )
+    wer_remove_punct: bool = field(
+        default=False, metadata={"help": "remove punctuation"}
+    )
+    wer_char_level: bool = field(
+        default=False, metadata={"help": "evaluate at character level"}
+    )
+    wer_lowercase: bool = field(default=False, metadata={"help": "lowercasing"})
+
+
+@register_scorer("wer", dataclass=WerScorerConfig)
+class WerScorer(BaseScorer):
+    def __init__(self, cfg):
+        super().__init__(cfg)
+        self.reset()
+        try:
+            import editdistance as ed
+        except ImportError:
+            raise ImportError("Please install editdistance to use WER scorer")
+        self.ed = ed
+        self.tokenizer = EvaluationTokenizer(
+            tokenizer_type=self.cfg.wer_tokenizer,
+            lowercase=self.cfg.wer_lowercase,
+            punctuation_removal=self.cfg.wer_remove_punct,
+            character_tokenization=self.cfg.wer_char_level,
+        )
+
+    def reset(self):
+        self.distance = 0
+        self.ref_length = 0
+
+    def add_string(self, ref, pred):
+        ref_items = self.tokenizer.tokenize(ref).split()
+        pred_items = self.tokenizer.tokenize(pred).split()
+        self.distance += self.ed.eval(ref_items, pred_items)
+        self.ref_length += len(ref_items)
+
+    def result_string(self):
+        return f"WER: {self.score():.2f}"
+
+    def score(self):
+        return 100.0 * self.distance / self.ref_length if self.ref_length > 0 else 0
diff --git a/fairseq/fairseq/search.py b/fairseq/fairseq/search.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5ea68b4ce04409c504c1d22098b7968a9ce596a
--- /dev/null
+++ b/fairseq/fairseq/search.py
@@ -0,0 +1,814 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import List, Optional
+
+import torch
+import torch.nn as nn
+from fairseq.token_generation_constraints import (
+    ConstraintState,
+    OrderedConstraintState,
+    UnorderedConstraintState,
+)
+from torch import Tensor
+
+
+class Search(nn.Module):
+    def __init__(self, tgt_dict):
+        super().__init__()
+        self.pad = tgt_dict.pad()
+        self.unk = tgt_dict.unk()
+        self.eos = tgt_dict.eos()
+        self.vocab_size = len(tgt_dict)
+        self.src_lengths = torch.tensor(-1)
+        self.supports_constraints = False
+        self.stop_on_max_len = False
+
+    def step(
+        self, step, lprobs, scores, prev_output_tokens=None, original_batch_idxs=None
+    ):
+        """Take a single search step.
+
+        Args:
+            step: the current search step, starting at 0
+            lprobs: (bsz x input_beam_size x vocab_size)
+                the model's log-probabilities over the vocabulary at the current step
+            scores: (bsz x input_beam_size x step)
+                the historical model scores of each hypothesis up to this point
+            prev_output_tokens: (bsz x step)
+                the previously generated oputput tokens
+            original_batch_idxs: (bsz)
+                the tensor with the batch indices, in the range [0, bsz)
+                this is useful in case there has been applied a re-ordering
+                and we need to know the orignal indices
+
+        Return: A tuple of (scores, indices, beams) where:
+            scores: (bsz x output_beam_size)
+                the scores of the chosen elements; output_beam_size can be
+                larger than input_beam_size, e.g., we may return
+                2*input_beam_size to account for EOS
+            indices: (bsz x output_beam_size)
+                the indices of the chosen elements
+            beams: (bsz x output_beam_size)
+                the hypothesis ids of the chosen elements, in the range [0, input_beam_size)
+        """
+        raise NotImplementedError
+
+    @torch.jit.export
+    def set_src_lengths(self, src_lengths):
+        self.src_lengths = src_lengths
+
+    @torch.jit.export
+    def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int):
+        """Initialize constraint states for constrained decoding (if supported).
+
+        Args:
+            batch_constraints: (torch.Tensor, optional)
+                the list of constraints, in packed form
+            beam_size: (int)
+                the beam size
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        pass
+
+    def prune_sentences(self, batch_idxs: Tensor):
+        """
+        Removes constraint states for completed sentences (if supported).
+        This is called from sequence_generator._generate() when sentences are
+        deleted from the batch.
+
+        Args:
+            batch_idxs: Indices of *sentences* whose constraint state should be *kept*.
+        """
+        pass
+
+    def update_constraints(self, active_hypos: Tensor):
+        """
+        Updates the constraint states by selecting the beam items that are retained.
+        This is called at each time step of sequence_generator._generate() when
+        the set of 2 * {beam_size} candidate hypotheses are reduced to the beam size.
+
+        Args:
+            active_hypos: (batch size, beam size)
+              list of integers denoting, for each sentence, which beam candidate items
+              should be kept.
+        """
+        pass
+
+
+class BeamSearch(Search):
+    def __init__(self, tgt_dict):
+        super().__init__(tgt_dict)
+        self.constraint_states = None
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores: Optional[Tensor],
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            assert scores is not None
+            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)
+
+        top_prediction = torch.topk(
+            lprobs.view(bsz, -1),
+            k=min(
+                # Take the best 2 x beam_size predictions. We'll choose the first
+                # beam_size of these which don't predict eos to continue with.
+                beam_size * 2,
+                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+            ),
+        )
+        scores_buf = top_prediction[0]
+        indices_buf = top_prediction[1]
+        # Project back into relative indices and beams
+        beams_buf = indices_buf // vocab_size
+        indices_buf = indices_buf.fmod(vocab_size)
+
+        # At this point, beams_buf and indices_buf are single-dim and contain relative indices
+        return scores_buf, indices_buf, beams_buf
+
+
+class PrefixConstrainedBeamSearch(Search):
+    def __init__(self, tgt_dict, prefix_allowed_tokens_fn):
+        super().__init__(tgt_dict)
+        self.prefix_allowed_tokens_fn = prefix_allowed_tokens_fn
+        self.stop_on_max_len = True
+
+    @torch.jit.export
+    def apply_mask(self, x, prev_output_tokens, original_batch_idxs):
+        beam_size = x.shape[0] // original_batch_idxs.shape[0]
+        original_batch_idxs = (
+            original_batch_idxs.unsqueeze(-1).repeat((1, beam_size)).flatten().tolist()
+        )
+
+        mask = torch.full_like(x, -math.inf)
+        for sent_i, (sent, batch_i) in enumerate(
+            zip(prev_output_tokens, original_batch_idxs)
+        ):
+            mask[sent_i, :, self.prefix_allowed_tokens_fn(batch_i, sent)] = 0
+
+        return mask
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs: Tensor,
+        scores: Tensor,
+        prev_output_tokens: Tensor,
+        original_batch_idxs: Tensor,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+
+        lprobs += self.apply_mask(
+            lprobs.view(bsz * beam_size, 1, vocab_size),
+            prev_output_tokens,
+            original_batch_idxs,
+        ).view(bsz, beam_size, vocab_size)
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            assert scores is not None
+            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)
+
+        top_prediction = torch.topk(
+            lprobs.view(bsz, -1),
+            k=min(
+                # Take the best beam_size predictions. We'll choose the first
+                # beam_size of these which don't predict eos to continue with.
+                beam_size,
+                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+            ),
+        )
+        scores_buf = top_prediction[0]
+        indices_buf = top_prediction[1]
+        beams_buf = indices_buf // vocab_size
+        indices_buf = indices_buf.fmod(vocab_size)
+        return scores_buf, indices_buf, beams_buf
+
+
+class LexicallyConstrainedBeamSearch(Search):
+    """Implements lexically constrained beam search as described in
+
+        Fast Lexically Constrained Decoding with Dynamic Beam
+        Allocation for Neural Machine Translation.  Post & Vilar,
+        NAACL 2018.  https://www.aclweb.org/anthology/N18-1119/
+
+    and
+
+        Improved Lexically Constrained Decoding for Translation and
+        Monolingual Rewriting. Hu et al, NAACL
+        2019. https://www.aclweb.org/anthology/N19-1090/
+
+    This is accomplished by maintaining, for each beam hypothesis, a
+    ConstraintState object (see constraints.py) that tracks which
+    constraints have been generated and using this information to
+    shape the beam for each input sentence.
+    """
+
+    def __init__(self, tgt_dict, representation):
+        super().__init__(tgt_dict)
+        self.representation = representation
+        self.vocab_size = len(tgt_dict)
+        self.num_cands = 0
+        self.supports_constraints = True
+
+    @torch.jit.export
+    def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int):
+        self.constraint_states = []
+        for constraint_tensor in batch_constraints:
+            if self.representation == "ordered":
+                constraint_state = OrderedConstraintState.create(constraint_tensor)
+            elif self.representation == "unordered":
+                constraint_state = UnorderedConstraintState.create(constraint_tensor)
+
+            self.constraint_states.append([constraint_state for i in range(beam_size)])
+
+    @torch.jit.export
+    def prune_sentences(self, batch_idxs: Tensor):
+        self.constraint_states = [
+            self.constraint_states[i] for i in batch_idxs.tolist()
+        ]
+
+    @torch.jit.export
+    def update_constraints(self, active_hypos: Tensor):
+        if self.constraint_states:
+            batch_size = active_hypos.size(0)
+            for sentid in range(batch_size):
+                self.constraint_states[sentid] = [
+                    self.constraint_states[sentid][i] for i in active_hypos[sentid]
+                ]
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs: Tensor,
+        scores: Optional[Tensor],
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        """
+        A constrained step builds a large candidates list from the following:
+        - the top 2 * {beam_size} items over the whole beam
+        - for each item in the beam
+          - the top {each_k} (default 1)
+          - all next constraints
+        We then compute the constrained state of each beam item, and assign
+        stripe codes: 0 to the best in each bank, 1 to the 2nd-best, and so
+        on. We then sort by (stripe, score), and truncate the list at
+        2 * beam size.
+
+        Args:
+            step: the decoder step
+            lprobs: (batch size, beam size, target vocab)
+                the target-vocab distributions for each item in the beam.
+        Retrun: A tuple of (scores, indices, beams, constraints) where:
+            scores: (batch, output beam size)
+                the scores of the chosen elements
+            indices: (batch, output beam size)
+                the target vocab indices of the chosen elements
+            beams: (batch, output beam size)
+                the 0-indexed hypothesis ids of the chosen elements
+            constraints: (batch, output beam size)
+                the new constraint states
+        """
+        each_k = 1
+        device = lprobs.device
+
+        batch_size, beam_size, vocab_size = lprobs.size()
+
+        self.num_cands = min(
+            # Just take the k-best. We'll get another k from the 1-best from each
+            # row, plus more from the constraints
+            beam_size * 2,
+            lprobs.view(batch_size, -1).size(1) - 1,  # -1 so we never select pad
+        )
+
+        # STEP 0: Preliminary. Prevent EOS for unfinished hyps across all batch items
+        constraint_states = self.constraint_states
+        if constraint_states and step > 0:
+            not_finished_indices = []
+            for sentno, sent_constraints in enumerate(constraint_states):
+                for beamno, state in enumerate(sent_constraints):
+                    index = sentno * beam_size + beamno
+                    if not state.finished:
+                        not_finished_indices.append(index)
+            not_finished_indices = torch.tensor(not_finished_indices)
+            if not_finished_indices.numel() > 0:
+                lprobs.view(batch_size * beam_size, -1)[
+                    not_finished_indices, self.eos
+                ] = -math.inf
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam entry for each batch item
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            assert scores is not None
+            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)
+
+        top_prediction = torch.topk(
+            lprobs.view(batch_size, -1),
+            self.num_cands,
+        )
+        scores_buf, indices_buf = top_prediction
+        # Project back into relative indices and beams
+        beams_buf = indices_buf // vocab_size
+        indices_buf = indices_buf.fmod(vocab_size)
+
+        # Short circuit if there are no constraints in this batch
+        if not constraint_states:
+            return scores_buf, indices_buf, beams_buf
+
+        # STEP 1: get top-1 from each hypothesis across all sentences in the batch
+        if step > 0:
+            top_scores, top_indices = torch.topk(
+                lprobs.view(batch_size * beam_size, -1),
+                k=each_k,
+                dim=1,
+            )
+            top_scores = top_scores.view(batch_size, -1)
+            top_indices = top_indices.view(batch_size, -1)
+            scores_buf = torch.cat((scores_buf, top_scores), dim=1)
+            indices_buf = torch.cat((indices_buf, top_indices), dim=1)
+            new_beams = torch.arange(0, beam_size, device=device).repeat(batch_size, 1)
+            beams_buf = torch.cat((beams_buf, new_beams), dim=1)
+
+        # Now, process sentences in the batch one by one.
+        new_scores_buf = torch.zeros((batch_size, 2 * beam_size), device=device)
+        new_indices_buf = torch.zeros((batch_size, 2 * beam_size), device=device).long()
+        new_beams_buf = torch.zeros((batch_size, 2 * beam_size), device=device).long()
+        for sentno, states in enumerate(constraint_states):
+            scores, indices, beams, new_states = self.step_sentence(
+                step,
+                sentno,
+                lprobs[sentno],
+                constraint_states[sentno],
+                beams_buf[sentno].clone(),
+                indices_buf[sentno].clone(),
+                scores_buf[sentno].clone(),
+            )
+            new_scores_buf[sentno] = scores
+            new_indices_buf[sentno] = indices
+            new_beams_buf[sentno] = beams
+            self.constraint_states[sentno] = new_states
+
+        return new_scores_buf, new_indices_buf, new_beams_buf
+
+    @torch.jit.export
+    def step_sentence(
+        self,
+        step: int,
+        sentno: int,
+        lprobs: Tensor,
+        constraint_states: List[List[ConstraintState]],
+        beams_buf: Tensor,
+        indices_buf: Tensor,
+        scores_buf: Tensor,
+    ):
+        """Does per-sentence processing. Adds all constraints for each
+        hypothesis to the list of candidates; then removes duplicates,
+        sorts, and dynamically stripes across the banks. All tensor inputs
+        are collapsed to those pertaining to a single input sentence.
+        """
+        device = lprobs.device
+
+        # STEP 2: Add all constraints for each beam item
+        for beamno, state in enumerate(constraint_states):
+            next_tokens = torch.tensor(list(state.next_tokens()), device=device).long()
+            if next_tokens.numel() != 0:
+                indices_buf = torch.cat((indices_buf, next_tokens))
+                next_beams = (
+                    torch.tensor(beamno, device=device)
+                    .repeat(next_tokens.size(0))
+                    .long()
+                )
+                beams_buf = torch.cat((beams_buf, next_beams))
+                next_values = lprobs[beamno].take(next_tokens.view(-1))
+                scores_buf = torch.cat((scores_buf, next_values))
+
+            # At the 0th time step, there is just one beam item
+            if step == 0:
+                break
+
+        # STEP 3: Compute the "bank" for each candidate. This is the
+        # number of constraints it's generated. We need this so that
+        # we can do round-robin allocation of the beam across these
+        # banks. If C is the number of constraints, we select the best
+        # item in bank C, then the best in bank C-1, etc, followed by
+        # the 2nd-best in bank C, the 2nd-best in bank C-1, etc, and so
+        # on, until the maximum beam size. We accomplish this by
+        # creating a sort key and striping across the banks.
+
+        # Compute the new states for all candidates
+        cands_size = indices_buf.size(0)
+        constraint_states = [
+            constraint_states[beams_buf[i]].advance(indices_buf[i])
+            for i in range(cands_size)
+        ]
+
+        banks = torch.tensor([state.bank for state in constraint_states], device=device)
+
+        # STEP 4: Sort
+        num_constraint_tokens = len(state.tokens)
+
+        # Sort by keys (bank, score) (i.e., sort banks together, and scores
+        # within banks). AFAIK pytorch doesn't support either stable sort or
+        # multi-key sorting, so we have to hack this.
+        MAX_SCORE = -100
+        sort_key = (num_constraint_tokens - banks) * MAX_SCORE + scores_buf
+        sort_values, sort_indices = sort_key.sort(dim=0, descending=True)
+        scores_buf = scores_buf[sort_indices]
+        indices_buf = indices_buf[sort_indices]
+        beams_buf = beams_buf[sort_indices]
+        banks = banks[sort_indices]
+
+        # Sort the constraints to follow suit
+        constraint_states = [constraint_states[i] for i in sort_indices]
+
+        # STEP 5: Remove duplicates. The topk calls (overall and
+        # per-row) plus the per-row generation of constraints will
+        # produce duplicates. Here we remove them.
+
+        def roll(t):
+            """Rolls a 1d tensor left by 1.
+
+            [0, 1, 2, 3, 4] becomes [4, 0, 1, 2, 3]
+            """
+            return torch.cat((t[-1].unsqueeze(0), t[0:-1]), dim=0)
+
+        # We map candidates (beam, token_id) to a single dimension.
+        # This is then shifted by 1. We can then easily identify
+        # duplicates and create a mask that identifies unique
+        # extensions.
+        uniques_mask = beams_buf * (self.vocab_size + 1) + indices_buf
+        uniques_mask = roll(uniques_mask) != uniques_mask
+
+        # Use the mask to pare down the data structures
+        scores_buf = torch.masked_select(scores_buf, uniques_mask)
+        indices_buf = torch.masked_select(indices_buf, uniques_mask)
+        beams_buf = torch.masked_select(beams_buf, uniques_mask)
+        banks = torch.masked_select(banks, uniques_mask)
+        i = 1
+        for mask in uniques_mask[1:]:
+            if not mask:
+                constraint_states.pop(i)
+            i += mask
+
+        # STEP 6: Assign IDs round-robin across banks, sort, and
+        # truncate. Now that the candidates are sorted by (bank,
+        # score) and uniqed, we dynamically allocate the {beam_size}
+        # beam by striping across the candidates. These stripes will
+        # be used as sort keys to do round-robin selection. This is
+        # accomplished in a single pass with offsets. Sorting by
+        # highest-banks (furthest-along hypotheses) first ensures
+        # progress through the constraints.
+        #
+        # e.g., BANKS: 3 3 3 2 2 2 2 1 1 1 0 0
+        # OLD STRIPES: 0 1 2 0 1 2 3 0 1 2 0 1
+        # NEW STRIPES: 0 1+4 2+8 0+1 1+5 2+9 3+11 0+2 1+6 2+10 0+3 1+7
+        #            = 0 5 10 1 6 11 13 2 7 12 3 8
+        #
+        # Sorting by this then gives the following banks:
+        #
+        #             3 2 1 0 3 2 1 0 3 2 1 2
+        #
+        # We'll take the top {beam_size} of these.
+        stripe_offsets = [offset * (len(banks) + 1) for offset in range(len(banks) + 1)]
+        stripes = torch.zeros_like(banks)
+        cur_bank_count = -1
+        cur_bank = banks[0]
+        for i, bank in enumerate(banks):
+            if bank != cur_bank:
+                cur_bank_count = 0
+                cur_bank = bank
+            else:
+                cur_bank_count += 1
+            stripes[i] = num_constraint_tokens - bank + stripe_offsets[cur_bank_count]
+
+        # STEP 7: Sort by the stripes values
+        sort_values, sort_indices = stripes.sort(dim=0)
+        scores_buf = scores_buf[sort_indices]
+        indices_buf = indices_buf[sort_indices]
+        beams_buf = beams_buf[sort_indices]
+        constraint_states = [constraint_states[i] for i in sort_indices]
+
+        # STEP 8: Truncate to the candidates size!
+        scores_buf = scores_buf[: self.num_cands]
+        indices_buf = indices_buf[: self.num_cands]
+        beams_buf = beams_buf[: self.num_cands]
+
+        return scores_buf, indices_buf, beams_buf, constraint_states
+
+
+class LengthConstrainedBeamSearch(Search):
+    def __init__(self, tgt_dict, min_len_a, min_len_b, max_len_a, max_len_b):
+        super().__init__(tgt_dict)
+        self.min_len_a = min_len_a
+        self.min_len_b = min_len_b
+        self.max_len_a = max_len_a
+        self.max_len_b = max_len_b
+        self.beam = BeamSearch(tgt_dict)
+        self.needs_src_lengths = True
+
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        min_lens = self.min_len_a * self.src_lengths + self.min_len_b
+        max_lens = self.max_len_a * self.src_lengths + self.max_len_b
+        lprobs[step < min_lens, :, self.eos] = -math.inf
+        lprobs[step >= max_lens, :, self.eos] = 0
+        return self.beam.step(step, lprobs, scores)
+
+
+class DiverseBeamSearch(Search):
+    """Diverse Beam Search.
+
+    See "Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence
+    Models" for details.
+
+    We only implement the Hamming Diversity penalty here, which performed best
+    in the original paper.
+    """
+
+    def __init__(self, tgt_dict, num_groups, diversity_strength):
+        super().__init__(tgt_dict)
+        self.num_groups = num_groups
+        self.diversity_strength = -diversity_strength
+        self.beam = BeamSearch(tgt_dict)
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+        if beam_size % self.num_groups != 0:
+            raise ValueError(
+                "DiverseBeamSearch requires --beam to be divisible by the number of groups"
+            )
+
+        # initialize diversity penalty
+        diversity_buf = torch.zeros(lprobs[:, 0, :].size()).to(lprobs)
+
+        scores_G, indices_G, beams_G = [], [], []
+        for g in range(self.num_groups):
+            lprobs_g = lprobs[:, g :: self.num_groups, :]
+            scores_g = scores[:, g :: self.num_groups, :] if step > 0 else None
+
+            # apply diversity penalty
+            if g > 0:
+                lprobs_g = torch.add(
+                    lprobs_g,
+                    other=diversity_buf.unsqueeze(1),
+                    alpha=self.diversity_strength,
+                )
+            else:
+                lprobs_g = lprobs_g.contiguous()
+
+            scores_buf, indices_buf, beams_buf = self.beam.step(
+                step, lprobs_g, scores_g
+            )
+            beams_buf.mul_(self.num_groups).add_(g)
+
+            scores_G.append(scores_buf.clone())
+            indices_G.append(indices_buf.clone())
+            beams_G.append(beams_buf.clone())
+
+            # update diversity penalty
+            diversity_buf.scatter_add_(
+                1, indices_buf, torch.ones(indices_buf.size()).to(diversity_buf)
+            )
+
+        # interleave results from different groups
+        scores_buf = torch.stack(scores_G, dim=2).view(bsz, -1)
+        indices_buf = torch.stack(indices_G, dim=2).view(bsz, -1)
+        beams_buf = torch.stack(beams_G, dim=2).view(bsz, -1)
+        return scores_buf, indices_buf, beams_buf
+
+
+class Sampling(Search):
+    sampling_topk: int
+    sampling_topp: float
+
+    def __init__(self, tgt_dict, sampling_topk=-1, sampling_topp=-1.0):
+        super().__init__(tgt_dict)
+        self.sampling_topk = sampling_topk
+        self.sampling_topp = sampling_topp
+
+    def _sample_topp(self, lprobs):
+        """Sample among the smallest set of elements whose cumulative probability mass exceeds p.
+
+        See `"The Curious Case of Neural Text Degeneration"
+        (Holtzman et al., 2019) <https://arxiv.org/abs/1904.09751>`_.
+
+        Args:
+            lprobs: (bsz x input_beam_size x vocab_size)
+                the model's log-probabilities over the vocabulary at the current step
+
+        Return: A tuple of (trimed_probs, truncated_indices) where:
+            trimed_probs: (bsz x input_beam_size x ?)
+                the model's probabilities over the elements selected to sample from. The
+                width of the third dimension is determined by top-P.
+            truncated_indices: (bsz x input_beam_size x ?)
+                the indices of the chosen elements.
+        """
+        probs = lprobs.exp_()
+
+        # sort the last dimension (vocab dimension) in descending order
+        sorted_probs, sorted_indices = probs.sort(descending=True)
+
+        # compute a mask to indicate the words to be included in the top-P set.
+        cumsum_probs = sorted_probs.cumsum(dim=2)
+        mask = cumsum_probs.lt(self.sampling_topp)
+
+        # note that mask was computed by 'lt'. One more word needs to be included
+        # so that the cumulative probability mass can exceed p.
+        cumsum_mask = mask.cumsum(dim=2)
+        last_included = cumsum_mask[:, :, -1:]
+        last_included.clamp_(0, mask.size()[2] - 1)
+        mask = mask.scatter_(2, last_included, 1)
+
+        # truncate unnecessary dims.
+        max_dim = last_included.max()
+        truncated_mask = mask[:, :, : max_dim + 1]
+        truncated_probs = sorted_probs[:, :, : max_dim + 1]
+        truncated_indices = sorted_indices[:, :, : max_dim + 1]
+
+        # trim the words that are not in top-P by setting their probabilities
+        # to 0, so that they would not be sampled later.
+        trim_mask = ~truncated_mask
+        trimed_probs = truncated_probs.masked_fill_(trim_mask, 0)
+        return trimed_probs, truncated_indices
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+
+        if self.sampling_topp > 0:
+            # only sample from the smallest set of words whose cumulative probability mass exceeds p
+            probs, top_indices = self._sample_topp(lprobs)
+        elif self.sampling_topk > 0:
+            # only sample from top-k candidates
+            lprobs, top_indices = lprobs.topk(self.sampling_topk)
+            probs = lprobs.exp_()
+        else:
+            probs = lprobs.exp_()
+
+            # dummy data to be consistent with true branch for type check
+            top_indices = torch.empty(0).to(probs)
+        # sample
+        if step == 0:
+            indices_buf = torch.multinomial(
+                probs.view(bsz, -1),
+                beam_size,
+                replacement=True,
+            ).view(bsz, beam_size)
+        else:
+            indices_buf = torch.multinomial(
+                probs.view(bsz * beam_size, -1),
+                1,
+                replacement=True,
+            ).view(bsz, beam_size)
+
+        if step == 0:
+            # expand to beam size
+            probs = probs.expand(bsz, beam_size, -1)
+
+        # gather scores
+        scores_buf = torch.gather(probs, dim=2, index=indices_buf.unsqueeze(-1))
+        scores_buf = scores_buf.log_().view(bsz, -1)
+
+        # remap indices if using top-k or top-P sampling
+        if self.sampling_topk > 0 or self.sampling_topp > 0:
+            indices_buf = torch.gather(
+                top_indices.expand(bsz, beam_size, -1),
+                dim=2,
+                index=indices_buf.unsqueeze(-1),
+            ).squeeze(2)
+
+        if step == 0:
+            beams_buf = indices_buf.new_zeros(bsz, beam_size)
+        else:
+            beams_buf = torch.arange(0, beam_size).to(indices_buf).repeat(bsz, 1)
+            # make scores cumulative
+            scores_buf.add_(
+                torch.gather(scores[:, :, step - 1], dim=1, index=beams_buf)
+            )
+
+        return scores_buf, indices_buf, beams_buf
+
+
+class DiverseSiblingsSearch(Search):
+    """
+    Beam search with diverse siblings.
+
+    See "A Simple, Fast Diverse Decoding Algorithm for Neural Generation" for details.
+    https://arxiv.org/abs/1611.08562
+
+    1/ Calculate hypotheses for each beam
+    2/ Intra-sibling ordering
+    3/ Rewrite scores
+    4/ Choose top K hypotheses
+
+    if diversity_rate == 0 is equivalent to BeamSearch
+    """
+
+    def __init__(self, tgt_dict, diversity_rate):
+        super().__init__(tgt_dict)
+        self.diversity_rate = diversity_rate
+        self.beam = BeamSearch(tgt_dict)
+
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+        k = min(
+            # Take the best 2 x beam_size predictions. We'll choose the first
+            # beam_size of these which don't predict eos to continue with.
+            beam_size * 2,
+            lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+        )
+        s_list: List[Tensor]
+        i_list: List[Tensor]
+        s_list = [torch.empty(0).to(lprobs) for i in range(beam_size)]
+        i_list = [torch.LongTensor().to(device=lprobs.device) for i in range(beam_size)]
+        sibling_score = torch.arange(1, k + 1).to(lprobs) * self.diversity_rate
+
+        if step == 0:
+            return self.beam.step(step, lprobs, scores)
+        lprobs.add_(scores[:, :, step - 1].unsqueeze(-1))
+
+        # 1/ Calculate hypotheses for each beam
+        for i in range(beam_size):
+            torch.topk(lprobs[:, i, :].view(bsz, -1), k, out=(s_list[i], i_list[i]))
+            i_list[i].fmod_(vocab_size)
+
+            # 2/ Intra-sibling ordering by default from topk + 3/ Rewrite scores
+            s_list[i].sub_(sibling_score)
+
+        # 4/ Choose top K hypotheses
+        indices = torch.stack(i_list, dim=1).view(bsz, -1)
+
+        final_scores = torch.empty(0).to(lprobs)
+        final_indices = torch.LongTensor().to(device=lprobs.device)
+        final_beams = torch.LongTensor().to(device=lprobs.device)
+        (final_scores, final_indices) = torch.topk(
+            torch.stack(s_list, dim=1).view(bsz, -1),
+            k,
+        )
+
+        final_beams = final_indices // k
+
+        for i in range(bsz):
+            final_indices[i] = indices[i][final_indices[i]]
+
+        return final_scores, final_indices, final_beams
diff --git a/fairseq/fairseq/sequence_generator.py b/fairseq/fairseq/sequence_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e61140dd834210cfd7ecc14808951f4709c3519
--- /dev/null
+++ b/fairseq/fairseq/sequence_generator.py
@@ -0,0 +1,973 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import math
+from typing import Dict, List, Optional
+import sys
+
+import torch
+import torch.nn as nn
+from fairseq import search, utils
+from fairseq.data import data_utils
+from fairseq.models import FairseqIncrementalDecoder
+from torch import Tensor
+from fairseq.ngram_repeat_block import NGramRepeatBlock
+
+
+class SequenceGenerator(nn.Module):
+    def __init__(
+        self,
+        models,
+        tgt_dict,
+        beam_size=1,
+        max_len_a=0,
+        max_len_b=200,
+        max_len=0,
+        min_len=1,
+        normalize_scores=True,
+        len_penalty=1.0,
+        unk_penalty=0.0,
+        temperature=1.0,
+        match_source_len=False,
+        no_repeat_ngram_size=0,
+        search_strategy=None,
+        eos=None,
+        symbols_to_strip_from_output=None,
+        lm_model=None,
+        lm_weight=1.0,
+    ):
+        """Generates translations of a given source sentence.
+
+        Args:
+            models (List[~fairseq.models.FairseqModel]): ensemble of models,
+                currently support fairseq.models.TransformerModel for scripting
+            beam_size (int, optional): beam width (default: 1)
+            max_len_a/b (int, optional): generate sequences of maximum length
+                ax + b, where x is the source length
+            max_len (int, optional): the maximum length of the generated output
+                (not including end-of-sentence)
+            min_len (int, optional): the minimum length of the generated output
+                (not including end-of-sentence)
+            normalize_scores (bool, optional): normalize scores by the length
+                of the output (default: True)
+            len_penalty (float, optional): length penalty, where <1.0 favors
+                shorter, >1.0 favors longer sentences (default: 1.0)
+            unk_penalty (float, optional): unknown word penalty, where <0
+                produces more unks, >0 produces fewer (default: 0.0)
+            temperature (float, optional): temperature, where values
+                >1.0 produce more uniform samples and values <1.0 produce
+                sharper samples (default: 1.0)
+            match_source_len (bool, optional): outputs should match the source
+                length (default: False)
+        """
+        super().__init__()
+        if isinstance(models, EnsembleModel):
+            self.model = models
+        else:
+            self.model = EnsembleModel(models)
+        self.tgt_dict = tgt_dict
+        self.pad = tgt_dict.pad()
+        self.unk = tgt_dict.unk()
+        self.eos = tgt_dict.eos() if eos is None else eos
+        self.symbols_to_strip_from_output = (
+            symbols_to_strip_from_output.union({self.eos})
+            if symbols_to_strip_from_output is not None
+            else {self.eos}
+        )
+        self.vocab_size = len(tgt_dict)
+        self.beam_size = beam_size
+        # the max beam size is the dictionary size - 1, since we never select pad
+        self.beam_size = min(beam_size, self.vocab_size - 1)
+        self.max_len_a = max_len_a
+        self.max_len_b = max_len_b
+        self.min_len = min_len
+        self.max_len = max_len or self.model.max_decoder_positions()
+
+        self.normalize_scores = normalize_scores
+        self.len_penalty = len_penalty
+        self.unk_penalty = unk_penalty
+        self.temperature = temperature
+        self.match_source_len = match_source_len
+
+        if no_repeat_ngram_size > 0:
+            self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size)
+        else:
+            self.repeat_ngram_blocker = None
+
+        assert temperature > 0, "--temperature must be greater than 0"
+
+        self.search = (
+            search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy
+        )
+        # We only need to set src_lengths in LengthConstrainedBeamSearch.
+        # As a module attribute, setting it would break in multithread
+        # settings when the model is shared.
+        self.should_set_src_lengths = (
+            hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths
+        )
+
+        self.model.eval()
+
+        self.lm_model = lm_model
+        self.lm_weight = lm_weight
+        if self.lm_model is not None:
+            self.lm_model.eval()
+
+    def cuda(self):
+        self.model.cuda()
+        return self
+
+    @torch.no_grad()
+    def forward(
+        self,
+        sample: Dict[str, Dict[str, Tensor]],
+        prefix_tokens: Optional[Tensor] = None,
+        bos_token: Optional[int] = None,
+    ):
+        """Generate a batch of translations.
+
+        Args:
+            sample (dict): batch
+            prefix_tokens (torch.LongTensor, optional): force decoder to begin
+                with these tokens
+            bos_token (int, optional): beginning of sentence token
+                (default: self.eos)
+        """
+        return self._generate(sample, prefix_tokens, bos_token=bos_token)
+
+    # TODO(myleott): unused, deprecate after pytorch-translate migration
+    def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None):
+        """Iterate over a batched dataset and yield individual translations.
+        Args:
+            cuda (bool, optional): use GPU for generation
+            timer (StopwatchMeter, optional): time generations
+        """
+        for sample in data_itr:
+            s = utils.move_to_cuda(sample) if cuda else sample
+            if "net_input" not in s:
+                continue
+            input = s["net_input"]
+            # model.forward normally channels prev_output_tokens into the decoder
+            # separately, but SequenceGenerator directly calls model.encoder
+            encoder_input = {
+                k: v for k, v in input.items() if k != "prev_output_tokens"
+            }
+            if timer is not None:
+                timer.start()
+            with torch.no_grad():
+                hypos = self.generate(encoder_input)
+            if timer is not None:
+                timer.stop(sum(len(h[0]["tokens"]) for h in hypos))
+            for i, id in enumerate(s["id"].data):
+                # remove padding
+                src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad)
+                ref = (
+                    utils.strip_pad(s["target"].data[i, :], self.pad)
+                    if s["target"] is not None
+                    else None
+                )
+                yield id, src, ref, hypos[i]
+
+    @torch.no_grad()
+    def generate(self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs) -> List[List[Dict[str, Tensor]]]:
+        """Generate translations. Match the api of other fairseq generators.
+
+        Args:
+            models (List[~fairseq.models.FairseqModel]): ensemble of models
+            sample (dict): batch
+            prefix_tokens (torch.LongTensor, optional): force decoder to begin
+                with these tokens
+            constraints (torch.LongTensor, optional): force decoder to include
+                the list of constraints
+            bos_token (int, optional): beginning of sentence token
+                (default: self.eos)
+        """
+        return self._generate(sample, **kwargs)
+
+    def _generate(
+        self,
+        sample: Dict[str, Dict[str, Tensor]],
+        prefix_tokens: Optional[Tensor] = None,
+        constraints: Optional[Tensor] = None,
+        bos_token: Optional[int] = None,
+    ):
+        incremental_states = torch.jit.annotate(
+            List[Dict[str, Dict[str, Optional[Tensor]]]],
+            [
+                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
+                for i in range(self.model.models_size)
+            ],
+        )
+        net_input = sample["net_input"]
+
+        if "src_tokens" in net_input:
+            src_tokens = net_input["src_tokens"]
+            # length of the source text being the character length except EndOfSentence and pad
+            src_lengths = (
+                (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1)
+            )
+        elif "source" in net_input:
+            src_tokens = net_input["source"]
+            src_lengths = (
+                net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1)
+                if net_input["padding_mask"] is not None
+                else torch.tensor(src_tokens.size(-1)).to(src_tokens)
+            )
+        elif "features" in net_input:
+            src_tokens = net_input["features"]
+            src_lengths = (
+                net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1)
+                if net_input["padding_mask"] is not None
+                else torch.tensor(src_tokens.size(-1)).to(src_tokens)
+            )
+        else:
+            raise Exception("expected src_tokens or source in net input. input keys: " + str(net_input.keys()))
+
+        # bsz: total number of sentences in beam
+        # Note that src_tokens may have more than 2 dimensions (i.e. audio features)
+        bsz, src_len = src_tokens.size()[:2]
+        beam_size = self.beam_size
+
+        if constraints is not None and not self.search.supports_constraints:
+            raise NotImplementedError(
+                "Target-side constraints were provided, but search method doesn't support them"
+            )
+
+        # Initialize constraints, when active
+        self.search.init_constraints(constraints, beam_size)
+
+        max_len: int = -1
+        if self.match_source_len:
+            max_len = src_lengths.max().item()
+        else:
+            max_len = min(
+                int(self.max_len_a * src_len + self.max_len_b),
+                self.max_len - 1,
+            )
+        assert (
+            self.min_len <= max_len
+        ), "min_len cannot be larger than max_len, please adjust these!"
+        # compute the encoder output for each beam
+        with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"):
+            encoder_outs = self.model.forward_encoder(net_input)
+
+        # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores
+        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
+        new_order = new_order.to(src_tokens.device).long()
+        encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order)
+        # ensure encoder_outs is a List.
+        assert encoder_outs is not None
+
+        # initialize buffers
+        scores = (
+            torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float()
+        )  # +1 for eos; pad is never chosen for scoring
+        tokens = (
+            torch.zeros(bsz * beam_size, max_len + 2)
+            .to(src_tokens)
+            .long()
+            .fill_(self.pad)
+        )  # +2 for eos and pad
+        tokens[:, 0] = self.eos if bos_token is None else bos_token
+        attn: Optional[Tensor] = None
+
+        # A list that indicates candidates that should be ignored.
+        # For example, suppose we're sampling and have already finalized 2/5
+        # samples. Then cands_to_ignore would mark 2 positions as being ignored,
+        # so that we only finalize the remaining 3 samples.
+        cands_to_ignore = (
+            torch.zeros(bsz, beam_size).to(src_tokens).eq(-1)
+        )  # forward and backward-compatible False mask
+
+        # list of completed sentences
+        finalized = torch.jit.annotate(
+            List[List[Dict[str, Tensor]]],
+            [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
+        )  # contains lists of dictionaries of infomation about the hypothesis being finalized at each step
+
+        # a boolean array indicating if the sentence at the index is finished or not
+        finished = [False for i in range(bsz)]
+        num_remaining_sent = bsz  # number of sentences remaining
+
+        # number of candidate hypos per step
+        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS
+
+        # offset arrays for converting between different indexing schemes
+        bbsz_offsets = (
+            (torch.arange(0, bsz) * beam_size)
+            .unsqueeze(1)
+            .type_as(tokens)
+            .to(src_tokens.device)
+        )
+        cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device)
+
+        reorder_state: Optional[Tensor] = None
+        batch_idxs: Optional[Tensor] = None
+
+        original_batch_idxs: Optional[Tensor] = None
+        if "id" in sample and isinstance(sample["id"], Tensor):
+            original_batch_idxs = sample["id"]
+        else:
+            original_batch_idxs = torch.arange(0, bsz).type_as(tokens)
+
+        for step in range(max_len + 1):  # one extra step for EOS marker
+            # reorder decoder internal states based on the prev choice of beams
+            if reorder_state is not None:
+                if batch_idxs is not None:
+                    # update beam indices to take into account removed sentences
+                    corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(
+                        batch_idxs
+                    )
+                    reorder_state.view(-1, beam_size).add_(
+                        corr.unsqueeze(-1) * beam_size
+                    )
+                    original_batch_idxs = original_batch_idxs[batch_idxs]
+                self.model.reorder_incremental_state(incremental_states, reorder_state)
+                encoder_outs = self.model.reorder_encoder_out(
+                    encoder_outs, reorder_state
+                )
+            with torch.autograd.profiler.record_function("EnsembleModel: forward_decoder"):
+                lprobs, avg_attn_scores = self.model.forward_decoder(
+                    tokens[:, : step + 1],
+                    encoder_outs,
+                    incremental_states,
+                    self.temperature,
+                )
+
+            if self.lm_model is not None:
+                lm_out = self.lm_model(tokens[:, : step + 1])
+                probs = self.lm_model.get_normalized_probs(
+                    lm_out, log_probs=True, sample=None
+                )
+                probs = probs[:, -1, :] * self.lm_weight
+                lprobs += probs
+            # handle prefix tokens (possibly with different lengths)
+            if (
+                prefix_tokens is not None
+                and step < prefix_tokens.size(1)
+                and step < max_len
+            ):
+                lprobs, tokens, scores = self._prefix_tokens(
+                    step, lprobs, scores, tokens, prefix_tokens, beam_size
+                )
+            elif step < self.min_len:
+                # minimum length constraint (does not apply if using prefix_tokens)
+                lprobs[:, self.eos] = -math.inf
+
+            lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)
+
+            lprobs[:, self.pad] = -math.inf  # never select pad
+            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty
+
+            # handle max length constraint
+            if step >= max_len:
+                lprobs[:, : self.eos] = -math.inf
+                lprobs[:, self.eos + 1 :] = -math.inf
+
+            # Record attention scores, only support avg_attn_scores is a Tensor
+            if avg_attn_scores is not None:
+                if attn is None:
+                    attn = torch.empty(
+                        bsz * beam_size, avg_attn_scores.size(1), max_len + 2
+                    ).to(scores)
+                attn[:, :, step + 1].copy_(avg_attn_scores)
+
+            scores = scores.type_as(lprobs)
+            eos_bbsz_idx = torch.empty(0).to(
+                tokens
+            )  # indices of hypothesis ending with eos (finished sentences)
+            eos_scores = torch.empty(0).to(
+                scores
+            )  # scores of hypothesis ending with eos (finished sentences)
+
+            if self.should_set_src_lengths:
+                self.search.set_src_lengths(src_lengths)
+
+            if self.repeat_ngram_blocker is not None:
+                lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step)
+
+            # Shape: (batch, cand_size)
+            cand_scores, cand_indices, cand_beams = self.search.step(
+                step,
+                lprobs.view(bsz, -1, self.vocab_size),
+                scores.view(bsz, beam_size, -1)[:, :, :step],
+                tokens[:, : step + 1],
+                original_batch_idxs,
+            )
+
+            # cand_bbsz_idx contains beam indices for the top candidate
+            # hypotheses, with a range of values: [0, bsz*beam_size),
+            # and dimensions: [bsz, cand_size]
+            cand_bbsz_idx = cand_beams.add(bbsz_offsets)
+
+            # finalize hypotheses that end in eos
+            # Shape of eos_mask: (batch size, beam size)
+            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
+            eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask)
+
+            # only consider eos when it's among the top beam_size indices
+            # Now we know what beam item(s) to finish
+            # Shape: 1d list of absolute-numbered
+            eos_bbsz_idx = torch.masked_select(
+                cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size]
+            )
+
+            finalized_sents: List[int] = []
+            if eos_bbsz_idx.numel() > 0:
+                eos_scores = torch.masked_select(
+                    cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size]
+                )
+
+                finalized_sents = self.finalize_hypos(
+                    step,
+                    eos_bbsz_idx,
+                    eos_scores,
+                    tokens,
+                    scores,
+                    finalized,
+                    finished,
+                    beam_size,
+                    attn,
+                    src_lengths,
+                    max_len,
+                )
+                num_remaining_sent -= len(finalized_sents)
+
+            assert num_remaining_sent >= 0
+            if num_remaining_sent == 0:
+                break
+            if self.search.stop_on_max_len and step >= max_len:
+                break
+            assert step < max_len, f"{step} < {max_len}"
+
+            # Remove finalized sentences (ones for which {beam_size}
+            # finished hypotheses have been generated) from the batch.
+            if len(finalized_sents) > 0:
+                new_bsz = bsz - len(finalized_sents)
+
+                # construct batch_idxs which holds indices of batches to keep for the next pass
+                batch_mask = torch.ones(
+                    bsz, dtype=torch.bool, device=cand_indices.device
+                )
+                batch_mask[finalized_sents] = False
+                # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it
+                batch_idxs = torch.arange(
+                    bsz, device=cand_indices.device
+                ).masked_select(batch_mask)
+
+                # Choose the subset of the hypothesized constraints that will continue
+                self.search.prune_sentences(batch_idxs)
+
+                eos_mask = eos_mask[batch_idxs]
+                cand_beams = cand_beams[batch_idxs]
+                bbsz_offsets.resize_(new_bsz, 1)
+                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
+                cand_scores = cand_scores[batch_idxs]
+                cand_indices = cand_indices[batch_idxs]
+
+                if prefix_tokens is not None:
+                    prefix_tokens = prefix_tokens[batch_idxs]
+                src_lengths = src_lengths[batch_idxs]
+                cands_to_ignore = cands_to_ignore[batch_idxs]
+
+                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                if attn is not None:
+                    attn = attn.view(bsz, -1)[batch_idxs].view(
+                        new_bsz * beam_size, attn.size(1), -1
+                    )
+                bsz = new_bsz
+            else:
+                batch_idxs = None
+
+            # Set active_mask so that values > cand_size indicate eos hypos
+            # and values < cand_size indicate candidate active hypos.
+            # After, the min values per row are the top candidate active hypos
+
+            # Rewrite the operator since the element wise or is not supported in torchscript.
+
+            eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size]))
+            active_mask = torch.add(
+                eos_mask.type_as(cand_offsets) * cand_size,
+                cand_offsets[: eos_mask.size(1)],
+            )
+
+            # get the top beam_size active hypotheses, which are just
+            # the hypos with the smallest values in active_mask.
+            # {active_hypos} indicates which {beam_size} hypotheses
+            # from the list of {2 * beam_size} candidates were
+            # selected. Shapes: (batch size, beam size)
+            new_cands_to_ignore, active_hypos = torch.topk(
+                active_mask, k=beam_size, dim=1, largest=False
+            )
+
+            # update cands_to_ignore to ignore any finalized hypos.
+            cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size]
+            # Make sure there is at least one active item for each sentence in the batch.
+            assert (~cands_to_ignore).any(dim=1).all()
+
+            # update cands_to_ignore to ignore any finalized hypos
+
+            # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam
+            # can be selected more than once).
+            active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos)
+            active_scores = torch.gather(cand_scores, dim=1, index=active_hypos)
+
+            active_bbsz_idx = active_bbsz_idx.view(-1)
+            active_scores = active_scores.view(-1)
+
+            # copy tokens and scores for active hypotheses
+
+            # Set the tokens for each beam (can select the same row more than once)
+            tokens[:, : step + 1] = torch.index_select(
+                tokens[:, : step + 1], dim=0, index=active_bbsz_idx
+            )
+            # Select the next token for each of them
+            tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather(
+                cand_indices, dim=1, index=active_hypos
+            )
+            if step > 0:
+                scores[:, :step] = torch.index_select(
+                    scores[:, :step], dim=0, index=active_bbsz_idx
+                )
+            scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather(
+                cand_scores, dim=1, index=active_hypos
+            )
+
+            # Update constraints based on which candidates were selected for the next beam
+            self.search.update_constraints(active_hypos)
+
+            # copy attention for active hypotheses
+            if attn is not None:
+                attn[:, :, : step + 2] = torch.index_select(
+                    attn[:, :, : step + 2], dim=0, index=active_bbsz_idx
+                )
+
+            # reorder incremental state in decoder
+            reorder_state = active_bbsz_idx
+
+        # sort by score descending
+        for sent in range(len(finalized)):
+            scores = torch.tensor(
+                [float(elem["score"].item()) for elem in finalized[sent]]
+            )
+            _, sorted_scores_indices = torch.sort(scores, descending=True)
+            finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices]
+            finalized[sent] = torch.jit.annotate(
+                List[Dict[str, Tensor]], finalized[sent]
+            )
+        return finalized
+
+    def _prefix_tokens(
+        self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int
+    ):
+        """Handle prefix tokens"""
+        prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1)
+        prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1))
+        prefix_mask = prefix_toks.ne(self.pad)
+        lprobs[prefix_mask] = torch.min(prefix_lprobs) - 1
+        lprobs[prefix_mask] = lprobs[prefix_mask].scatter(
+            -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask]
+        )
+        # if prefix includes eos, then we should make sure tokens and
+        # scores are the same across all beams
+        eos_mask = prefix_toks.eq(self.eos)
+        if eos_mask.any():
+            # validate that the first beam matches the prefix
+            first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[
+                :, 0, 1 : step + 1
+            ]
+            eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0]
+            target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step]
+            assert (first_beam == target_prefix).all()
+
+            # copy tokens, scores and lprobs from the first beam to all beams
+            tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size)
+            scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size)
+            lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size)
+        return lprobs, tokens, scores
+
+    def replicate_first_beam(self, tensor, mask, beam_size: int):
+        tensor = tensor.view(-1, beam_size, tensor.size(-1))
+        tensor[mask] = tensor[mask][:, :1, :]
+        return tensor.view(-1, tensor.size(-1))
+
+    def finalize_hypos(
+        self,
+        step: int,
+        bbsz_idx,
+        eos_scores,
+        tokens,
+        scores,
+        finalized: List[List[Dict[str, Tensor]]],
+        finished: List[bool],
+        beam_size: int,
+        attn: Optional[Tensor],
+        src_lengths,
+        max_len: int,
+    ):
+        """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly.
+        A sentence is finalized when {beam_size} finished items have been collected for it.
+
+        Returns number of sentences (not beam items) being finalized.
+        These will be removed from the batch and not processed further.
+        Args:
+            bbsz_idx (Tensor):
+        """
+        assert bbsz_idx.numel() == eos_scores.numel()
+
+        # clone relevant token and attention tensors.
+        # tokens is (batch * beam, max_len). So the index_select
+        # gets the newly EOS rows, then selects cols 1..{step + 2}
+        tokens_clone = tokens.index_select(0, bbsz_idx)[
+            :, 1 : step + 2
+        ]  # skip the first index, which is EOS
+
+        tokens_clone[:, step] = self.eos
+        attn_clone = (
+            attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2]
+            if attn is not None
+            else None
+        )
+
+        # compute scores per token position
+        pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1]
+        pos_scores[:, step] = eos_scores
+        # convert from cumulative to per-position scores
+        pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]
+
+        # normalize sentence-level scores
+        if self.normalize_scores:
+            eos_scores /= (step + 1) ** self.len_penalty
+
+        # cum_unfin records which sentences in the batch are finished.
+        # It helps match indexing between (a) the original sentences
+        # in the batch and (b) the current, possibly-reduced set of
+        # sentences.
+        cum_unfin: List[int] = []
+        prev = 0
+        for f in finished:
+            if f:
+                prev += 1
+            else:
+                cum_unfin.append(prev)
+        cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx)
+
+        unfin_idx = bbsz_idx // beam_size
+        sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx)
+
+        # Create a set of "{sent}{unfin_idx}", where
+        # "unfin_idx" is the index in the current (possibly reduced)
+        # list of sentences, and "sent" is the index in the original,
+        # unreduced batch
+        # For every finished beam item
+        # sentence index in the current (possibly reduced) batch
+        seen = (sent << 32) + unfin_idx
+        unique_seen: List[int] = torch.unique(seen).tolist()
+
+        if self.match_source_len:
+            condition = step > torch.index_select(src_lengths, 0, unfin_idx)
+            eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores)
+        sent_list: List[int] = sent.tolist()
+        for i in range(bbsz_idx.size()[0]):
+            # An input sentence (among those in a batch) is finished when
+            # beam_size hypotheses have been collected for it
+            if len(finalized[sent_list[i]]) < beam_size:
+                if attn_clone is not None:
+                    # remove padding tokens from attn scores
+                    hypo_attn = attn_clone[i]
+                else:
+                    hypo_attn = torch.empty(0)
+
+                finalized[sent_list[i]].append(
+                    {
+                        "tokens": tokens_clone[i],
+                        "score": eos_scores[i],
+                        "attention": hypo_attn,  # src_len x tgt_len
+                        "alignment": torch.empty(0),
+                        "positional_scores": pos_scores[i],
+                    }
+                )
+
+        newly_finished: List[int] = []
+        for unique_s in unique_seen:
+            # check termination conditions for this sentence
+            unique_sent: int = unique_s >> 32
+            unique_unfin_idx: int = unique_s - (unique_sent << 32)
+
+            if not finished[unique_sent] and self.is_finished(
+                step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size
+            ):
+                finished[unique_sent] = True
+                newly_finished.append(unique_unfin_idx)
+
+        return newly_finished
+
+    def is_finished(
+        self,
+        step: int,
+        unfin_idx: int,
+        max_len: int,
+        finalized_sent_len: int,
+        beam_size: int,
+    ):
+        """
+        Check whether decoding for a sentence is finished, which
+        occurs when the list of finalized sentences has reached the
+        beam size, or when we reach the maximum length.
+        """
+        assert finalized_sent_len <= beam_size
+        if finalized_sent_len == beam_size or step == max_len:
+            return True
+        return False
+
+
+class EnsembleModel(nn.Module):
+    """A wrapper around an ensemble of models."""
+
+    def __init__(self, models):
+        super().__init__()
+        self.models_size = len(models)
+        # method '__len__' is not supported in ModuleList for torch script
+        self.single_model = models[0]
+        self.models = nn.ModuleList(models)
+
+        self.has_incremental: bool = False
+        if all(
+            hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder)
+            for m in models
+        ):
+            self.has_incremental = True
+
+    def forward(self):
+        pass
+
+    def has_encoder(self):
+        return hasattr(self.single_model, "encoder")
+
+    def has_incremental_states(self):
+        return self.has_incremental
+
+    def max_decoder_positions(self):
+        return min([m.max_decoder_positions() for m in self.models if hasattr(m, "max_decoder_positions")] + [sys.maxsize])
+
+    @torch.jit.export
+    def forward_encoder(self, net_input: Dict[str, Tensor]):
+        if not self.has_encoder():
+            return None
+        return [model.encoder.forward_torchscript(net_input) for model in self.models]
+
+    @torch.jit.export
+    def forward_decoder(
+        self,
+        tokens,
+        encoder_outs: List[Dict[str, List[Tensor]]],
+        incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]],
+        temperature: float = 1.0,
+    ):
+        log_probs = []
+        avg_attn: Optional[Tensor] = None
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None
+        for i, model in enumerate(self.models):
+            if self.has_encoder():
+                encoder_out = encoder_outs[i]
+            # decode each model
+            if self.has_incremental_states():
+                decoder_out = model.decoder.forward(
+                    tokens,
+                    encoder_out=encoder_out,
+                    incremental_state=incremental_states[i],
+                )
+            else:
+                if hasattr(model, "decoder"):
+                    decoder_out = model.decoder.forward(tokens, encoder_out=encoder_out)
+                else:
+                    decoder_out = model.forward(tokens)
+
+            attn: Optional[Tensor] = None
+            decoder_len = len(decoder_out)
+            if decoder_len > 1 and decoder_out[1] is not None:
+                if isinstance(decoder_out[1], Tensor):
+                    attn = decoder_out[1]
+                else:
+                    attn_holder = decoder_out[1]["attn"]
+                    if isinstance(attn_holder, Tensor):
+                        attn = attn_holder
+                    elif attn_holder is not None:
+                        attn = attn_holder[0]
+                if attn is not None:
+                    attn = attn[:, -1, :]
+
+            decoder_out_tuple = (
+                decoder_out[0][:, -1:, :].div_(temperature),
+                None if decoder_len <= 1 else decoder_out[1],
+            )
+            probs = model.get_normalized_probs(
+                decoder_out_tuple, log_probs=True, sample=None
+            )
+            probs = probs[:, -1, :]
+            if self.models_size == 1:
+                return probs, attn
+
+            log_probs.append(probs)
+            if attn is not None:
+                if avg_attn is None:
+                    avg_attn = attn
+                else:
+                    avg_attn.add_(attn)
+
+        avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log(
+            self.models_size
+        )
+
+        if avg_attn is not None:
+            avg_attn.div_(self.models_size)
+        return avg_probs, avg_attn
+
+    @torch.jit.export
+    def reorder_encoder_out(
+        self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order
+    ):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        new_outs: List[Dict[str, List[Tensor]]] = []
+        if not self.has_encoder():
+            return new_outs
+        for i, model in enumerate(self.models):
+            assert encoder_outs is not None
+            new_outs.append(
+                model.encoder.reorder_encoder_out(encoder_outs[i], new_order)
+            )
+        return new_outs
+
+    @torch.jit.export
+    def reorder_incremental_state(
+        self,
+        incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]],
+        new_order,
+    ):
+        if not self.has_incremental_states():
+            return
+        for i, model in enumerate(self.models):
+            model.decoder.reorder_incremental_state_scripting(
+                incremental_states[i], new_order
+            )
+
+
+class SequenceGeneratorWithAlignment(SequenceGenerator):
+    def __init__(
+        self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs
+    ):
+        """Generates translations of a given source sentence.
+
+        Produces alignments following "Jointly Learning to Align and
+        Translate with Transformer Models" (Garg et al., EMNLP 2019).
+
+        Args:
+            left_pad_target (bool, optional): Whether or not the
+                hypothesis should be left padded or not when they are
+                teacher forced for generating alignments.
+        """
+        super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs)
+        self.left_pad_target = left_pad_target
+
+        if print_alignment == "hard":
+            self.extract_alignment = utils.extract_hard_alignment
+        elif print_alignment == "soft":
+            self.extract_alignment = utils.extract_soft_alignment
+
+    @torch.no_grad()
+    def generate(self, models, sample, **kwargs):
+        finalized = super()._generate(sample, **kwargs)
+
+        src_tokens = sample["net_input"]["src_tokens"]
+        bsz = src_tokens.shape[0]
+        beam_size = self.beam_size
+        (
+            src_tokens,
+            src_lengths,
+            prev_output_tokens,
+            tgt_tokens,
+        ) = self._prepare_batch_for_alignment(sample, finalized)
+        if any(getattr(m, "full_context_alignment", False) for m in self.model.models):
+            attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens)
+        else:
+            attn = [
+                finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0)
+                for i in range(bsz * beam_size)
+            ]
+
+        if src_tokens.device != "cpu":
+            src_tokens = src_tokens.to("cpu")
+            tgt_tokens = tgt_tokens.to("cpu")
+            attn = [i.to("cpu") for i in attn]
+
+        # Process the attn matrix to extract hard alignments.
+        for i in range(bsz * beam_size):
+            alignment = self.extract_alignment(
+                attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos
+            )
+            finalized[i // beam_size][i % beam_size]["alignment"] = alignment
+        return finalized
+
+    def _prepare_batch_for_alignment(self, sample, hypothesis):
+        src_tokens = sample["net_input"]["src_tokens"]
+        bsz = src_tokens.shape[0]
+        src_tokens = (
+            src_tokens[:, None, :]
+            .expand(-1, self.beam_size, -1)
+            .contiguous()
+            .view(bsz * self.beam_size, -1)
+        )
+        src_lengths = sample["net_input"]["src_lengths"]
+        src_lengths = (
+            src_lengths[:, None]
+            .expand(-1, self.beam_size)
+            .contiguous()
+            .view(bsz * self.beam_size)
+        )
+        prev_output_tokens = data_utils.collate_tokens(
+            [beam["tokens"] for example in hypothesis for beam in example],
+            self.pad,
+            self.eos,
+            self.left_pad_target,
+            move_eos_to_beginning=True,
+        )
+        tgt_tokens = data_utils.collate_tokens(
+            [beam["tokens"] for example in hypothesis for beam in example],
+            self.pad,
+            self.eos,
+            self.left_pad_target,
+            move_eos_to_beginning=False,
+        )
+        return src_tokens, src_lengths, prev_output_tokens, tgt_tokens
+
+
+class EnsembleModelWithAlignment(EnsembleModel):
+    """A wrapper around an ensemble of models."""
+
+    def __init__(self, models):
+        super().__init__(models)
+
+    def forward_align(self, src_tokens, src_lengths, prev_output_tokens):
+        avg_attn = None
+        for model in self.models:
+            decoder_out = model(src_tokens, src_lengths, prev_output_tokens)
+            attn = decoder_out[1]["attn"][0]
+            if avg_attn is None:
+                avg_attn = attn
+            else:
+                avg_attn.add_(attn)
+        if len(self.models) > 1:
+            avg_attn.div_(len(self.models))
+        return avg_attn
diff --git a/fairseq/fairseq/sequence_scorer.py b/fairseq/fairseq/sequence_scorer.py
new file mode 100644
index 0000000000000000000000000000000000000000..411d4df4445ef8dd3f1907ad56f9de6943d1fed8
--- /dev/null
+++ b/fairseq/fairseq/sequence_scorer.py
@@ -0,0 +1,153 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+import torch
+from fairseq import utils
+
+
+class SequenceScorer(object):
+    """Scores the target for a given source sentence."""
+
+    def __init__(
+        self,
+        tgt_dict,
+        softmax_batch=None,
+        compute_alignment=False,
+        eos=None,
+        symbols_to_strip_from_output=None,
+    ):
+        self.pad = tgt_dict.pad()
+        self.eos = tgt_dict.eos() if eos is None else eos
+        self.softmax_batch = softmax_batch or sys.maxsize
+        assert self.softmax_batch > 0
+        self.compute_alignment = compute_alignment
+        self.symbols_to_strip_from_output = (
+            symbols_to_strip_from_output.union({self.eos})
+            if symbols_to_strip_from_output is not None
+            else {self.eos}
+        )
+
+    @torch.no_grad()
+    def generate(self, models, sample, **kwargs):
+        """Score a batch of translations."""
+        net_input = sample["net_input"]
+
+        def batch_for_softmax(dec_out, target):
+            # assumes decoder_out[0] is the only thing needed (may not be correct for future models!)
+            first, rest = dec_out[0], dec_out[1:]
+            bsz, tsz, dim = first.shape
+            if bsz * tsz < self.softmax_batch:
+                yield dec_out, target, True
+            else:
+                flat = first.contiguous().view(1, -1, dim)
+                flat_tgt = target.contiguous().view(flat.shape[:-1])
+                s = 0
+                while s < flat.size(1):
+                    e = s + self.softmax_batch
+                    yield (flat[:, s:e],) + rest, flat_tgt[:, s:e], False
+                    s = e
+
+        def gather_target_probs(probs, target):
+            probs = probs.gather(
+                dim=2,
+                index=target.unsqueeze(-1),
+            )
+            return probs
+
+        orig_target = sample["target"]
+
+        # compute scores for each model in the ensemble
+        avg_probs = None
+        avg_attn = None
+        for model in models:
+            model.eval()
+            decoder_out = model(**net_input)
+            attn = decoder_out[1] if len(decoder_out) > 1 else None
+            if type(attn) is dict:
+                attn = attn.get("attn", None)
+
+            batched = batch_for_softmax(decoder_out, orig_target)
+            probs, idx = None, 0
+            for bd, tgt, is_single in batched:
+                sample["target"] = tgt
+                curr_prob = model.get_normalized_probs(
+                    bd, log_probs=len(models) == 1, sample=sample
+                ).data
+                if is_single:
+                    probs = gather_target_probs(curr_prob, orig_target)
+                else:
+                    if probs is None:
+                        probs = curr_prob.new(orig_target.numel())
+                    step = curr_prob.size(0) * curr_prob.size(1)
+                    end = step + idx
+                    tgt_probs = gather_target_probs(
+                        curr_prob.view(tgt.shape + (curr_prob.size(-1),)), tgt
+                    )
+                    probs[idx:end] = tgt_probs.view(-1)
+                    idx = end
+                sample["target"] = orig_target
+
+            probs = probs.view(sample["target"].shape)
+
+            if avg_probs is None:
+                avg_probs = probs
+            else:
+                avg_probs.add_(probs)
+            if attn is not None:
+                if torch.is_tensor(attn):
+                    attn = attn.data
+                else:
+                    attn = attn[0]
+                if avg_attn is None:
+                    avg_attn = attn
+                else:
+                    avg_attn.add_(attn)
+        if len(models) > 1:
+            avg_probs.div_(len(models))
+            avg_probs.log_()
+            if avg_attn is not None:
+                avg_attn.div_(len(models))
+
+        bsz = avg_probs.size(0)
+        hypos = []
+        start_idxs = sample["start_indices"] if "start_indices" in sample else [0] * bsz
+        for i in range(bsz):
+            # remove padding from ref
+            ref = (
+                utils.strip_pad(sample["target"][i, start_idxs[i] :], self.pad)
+                if sample["target"] is not None
+                else None
+            )
+            tgt_len = ref.numel()
+            avg_probs_i = avg_probs[i][start_idxs[i] : start_idxs[i] + tgt_len]
+            score_i = avg_probs_i.sum() / tgt_len
+            if avg_attn is not None:
+                avg_attn_i = avg_attn[i]
+                if self.compute_alignment:
+                    alignment = utils.extract_hard_alignment(
+                        avg_attn_i,
+                        sample["net_input"]["src_tokens"][i],
+                        sample["target"][i],
+                        self.pad,
+                        self.eos,
+                    )
+                else:
+                    alignment = None
+            else:
+                avg_attn_i = alignment = None
+            hypos.append(
+                [
+                    {
+                        "tokens": ref,
+                        "score": score_i,
+                        "attention": avg_attn_i,
+                        "alignment": alignment,
+                        "positional_scores": avg_probs_i,
+                    }
+                ]
+            )
+        return hypos
diff --git a/fairseq/fairseq/speech_generator.py b/fairseq/fairseq/speech_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..8086e34d2b56fa808d0905b1a00e87e6736fcf04
--- /dev/null
+++ b/fairseq/fairseq/speech_generator.py
@@ -0,0 +1,219 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+import numpy as np
+
+from fairseq.data.audio.speech_to_text_dataset import S2TDataConfig
+
+
+class SpeechGenerator(object):
+    def __init__(self, model, vocoder, data_cfg: S2TDataConfig):
+        self.model = model
+        self.vocoder = vocoder
+        stats_npz_path = data_cfg.global_cmvn_stats_npz
+        self.gcmvn_stats = None
+        if stats_npz_path is not None:
+            self.gcmvn_stats = np.load(stats_npz_path)
+
+    def gcmvn_denormalize(self, x):
+        # x: B x T x C
+        if self.gcmvn_stats is None:
+            return x
+        mean = torch.from_numpy(self.gcmvn_stats["mean"]).to(x)
+        std = torch.from_numpy(self.gcmvn_stats["std"]).to(x)
+        assert len(x.shape) == 3 and mean.shape[0] == std.shape[0] == x.shape[2]
+        x = x * std.view(1, 1, -1).expand_as(x)
+        return x + mean.view(1, 1, -1).expand_as(x)
+
+    def get_waveform(self, feat):
+        # T x C -> T
+        return None if self.vocoder is None else self.vocoder(feat).squeeze(0)
+
+
+class AutoRegressiveSpeechGenerator(SpeechGenerator):
+    def __init__(
+            self, model, vocoder, data_cfg, max_iter: int = 6000,
+            eos_prob_threshold: float = 0.5,
+    ):
+        super().__init__(model, vocoder, data_cfg)
+        self.max_iter = max_iter
+        self.eos_prob_threshold = eos_prob_threshold
+
+    @torch.no_grad()
+    def generate(self, model, sample, has_targ=False, **kwargs):
+        model.eval()
+
+        src_tokens = sample["net_input"]["src_tokens"]
+        src_lengths = sample["net_input"]["src_lengths"]
+        bsz, src_len = src_tokens.size()
+        n_frames_per_step = model.decoder.n_frames_per_step
+        out_dim = model.decoder.out_dim
+        raw_dim = out_dim // n_frames_per_step
+
+        # initialize
+        encoder_out = model.forward_encoder(src_tokens, src_lengths,
+                                            speaker=sample["speaker"])
+        incremental_state = {}
+        feat, attn, eos_prob = [], [], []
+        finished = src_tokens.new_zeros((bsz,)).bool()
+        out_lens = src_lengths.new_zeros((bsz,)).long().fill_(self.max_iter)
+
+        prev_feat_out = encoder_out["encoder_out"][0].new_zeros(bsz, 1, out_dim)
+        for step in range(self.max_iter):
+            cur_out_lens = out_lens.clone()
+            cur_out_lens.masked_fill_(cur_out_lens.eq(self.max_iter), step + 1)
+            _, cur_eos_out, cur_extra = model.forward_decoder(
+                prev_feat_out, encoder_out=encoder_out,
+                incremental_state=incremental_state,
+                target_lengths=cur_out_lens, speaker=sample["speaker"], **kwargs
+            )
+            cur_eos_prob = torch.sigmoid(cur_eos_out).squeeze(2)
+            feat.append(cur_extra['feature_out'])
+            attn.append(cur_extra['attn'])
+            eos_prob.append(cur_eos_prob)
+
+            cur_finished = (cur_eos_prob.squeeze(1) > self.eos_prob_threshold)
+            out_lens.masked_fill_((~finished) & cur_finished, step + 1)
+            finished = finished | cur_finished
+            if finished.sum().item() == bsz:
+                break
+            prev_feat_out = cur_extra['feature_out']
+
+        feat = torch.cat(feat, dim=1)
+        feat = model.decoder.postnet(feat) + feat
+        eos_prob = torch.cat(eos_prob, dim=1)
+        attn = torch.cat(attn, dim=2)
+        alignment = attn.max(dim=1)[1]
+
+        feat = feat.reshape(bsz, -1, raw_dim)
+        feat = self.gcmvn_denormalize(feat)
+
+        eos_prob = eos_prob.repeat_interleave(n_frames_per_step, dim=1)
+        attn = attn.repeat_interleave(n_frames_per_step, dim=2)
+        alignment = alignment.repeat_interleave(n_frames_per_step, dim=1)
+        out_lens = out_lens * n_frames_per_step
+
+        finalized = [
+            {
+                'feature': feat[b, :out_len],
+                'eos_prob': eos_prob[b, :out_len],
+                'attn': attn[b, :, :out_len],
+                'alignment': alignment[b, :out_len],
+                'waveform': self.get_waveform(feat[b, :out_len]),
+            }
+            for b, out_len in zip(range(bsz), out_lens)
+        ]
+
+        if has_targ:
+            assert sample["target"].size(-1) == out_dim
+            tgt_feats = sample["target"].view(bsz, -1, raw_dim)
+            tgt_feats = self.gcmvn_denormalize(tgt_feats)
+            tgt_lens = sample["target_lengths"] * n_frames_per_step
+            for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)):
+                finalized[b]["targ_feature"] = f[:l]
+                finalized[b]["targ_waveform"] = self.get_waveform(f[:l])
+        return finalized
+
+
+class NonAutoregressiveSpeechGenerator(SpeechGenerator):
+    @torch.no_grad()
+    def generate(self, model, sample, has_targ=False, **kwargs):
+        model.eval()
+
+        bsz, max_src_len = sample["net_input"]["src_tokens"].size()
+        n_frames_per_step = model.encoder.n_frames_per_step
+        out_dim = model.encoder.out_dim
+        raw_dim = out_dim // n_frames_per_step
+
+        feat, out_lens, log_dur_out, _, _ = model(
+            src_tokens=sample["net_input"]["src_tokens"],
+            src_lengths=sample["net_input"]["src_lengths"],
+            prev_output_tokens=sample["net_input"]["prev_output_tokens"],
+            incremental_state=None,
+            target_lengths=sample["target_lengths"],
+            speaker=sample["speaker"]
+        )
+
+        feat = feat.view(bsz, -1, raw_dim)
+        feat = self.gcmvn_denormalize(feat)
+
+        dur_out = torch.clamp(
+            torch.round(torch.exp(log_dur_out) - 1).long(), min=0
+        )
+
+        def get_dur_plot_data(d):
+            r = []
+            for i, dd in enumerate(d):
+                r += [i + 1] * dd.item()
+            return r
+
+        out_lens = out_lens * n_frames_per_step
+        finalized = [
+            {
+                'feature': feat[b, :l] if l > 0 else feat.new_zeros([1, raw_dim]),
+                'waveform': self.get_waveform(
+                    feat[b, :l] if l > 0 else feat.new_zeros([1, raw_dim])
+                ),
+                'attn': feat.new_tensor(get_dur_plot_data(dur_out[b])),
+            }
+            for b, l in zip(range(bsz), out_lens)
+        ]
+
+        if has_targ:
+            tgt_feats = sample["target"].view(bsz, -1, raw_dim)
+            tgt_feats = self.gcmvn_denormalize(tgt_feats)
+            tgt_lens = sample["target_lengths"] * n_frames_per_step
+            for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)):
+                finalized[b]["targ_feature"] = f[:l]
+                finalized[b]["targ_waveform"] = self.get_waveform(f[:l])
+        return finalized
+
+
+class TeacherForcingAutoRegressiveSpeechGenerator(AutoRegressiveSpeechGenerator):
+    @torch.no_grad()
+    def generate(self, model, sample, has_targ=False, **kwargs):
+        model.eval()
+
+        src_tokens = sample["net_input"]["src_tokens"]
+        src_lens = sample["net_input"]["src_lengths"]
+        prev_out_tokens = sample["net_input"]["prev_output_tokens"]
+        tgt_lens = sample["target_lengths"]
+        n_frames_per_step = model.decoder.n_frames_per_step
+        raw_dim = model.decoder.out_dim // n_frames_per_step
+        bsz = src_tokens.shape[0]
+
+        feat, eos_prob, extra = model(
+            src_tokens, src_lens, prev_out_tokens, incremental_state=None,
+            target_lengths=tgt_lens, speaker=sample["speaker"]
+        )
+
+        attn = extra["attn"]  # B x T_s x T_t
+        alignment = attn.max(dim=1)[1]
+        feat = feat.reshape(bsz, -1, raw_dim)
+        feat = self.gcmvn_denormalize(feat)
+        eos_prob = eos_prob.repeat_interleave(n_frames_per_step, dim=1)
+        attn = attn.repeat_interleave(n_frames_per_step, dim=2)
+        alignment = alignment.repeat_interleave(n_frames_per_step, dim=1)
+        tgt_lens = sample["target_lengths"] * n_frames_per_step
+
+        finalized = [
+            {
+                'feature': feat[b, :tgt_len],
+                'eos_prob': eos_prob[b, :tgt_len],
+                'attn': attn[b, :, :tgt_len],
+                'alignment': alignment[b, :tgt_len],
+                'waveform': self.get_waveform(feat[b, :tgt_len]),
+            }
+            for b, tgt_len in zip(range(bsz), tgt_lens)
+        ]
+
+        if has_targ:
+            tgt_feats = sample["target"].view(bsz, -1, raw_dim)
+            tgt_feats = self.gcmvn_denormalize(tgt_feats)
+            for b, (f, l) in enumerate(zip(tgt_feats, tgt_lens)):
+                finalized[b]["targ_feature"] = f[:l]
+                finalized[b]["targ_waveform"] = self.get_waveform(f[:l])
+        return finalized
diff --git a/fairseq/fairseq/tasks/__init__.py b/fairseq/fairseq/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a46b012c573a76e00e489307720fc3fa462c296
--- /dev/null
+++ b/fairseq/fairseq/tasks/__init__.py
@@ -0,0 +1,136 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import argparse
+import importlib
+import os
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import merge_with_parent
+from hydra.core.config_store import ConfigStore
+
+from .fairseq_task import FairseqTask, LegacyFairseqTask  # noqa
+
+
+# register dataclass
+TASK_DATACLASS_REGISTRY = {}
+TASK_REGISTRY = {}
+TASK_CLASS_NAMES = set()
+
+
+def setup_task(cfg: FairseqDataclass, **kwargs):
+    task = None
+    task_name = getattr(cfg, "task", None)
+
+    if isinstance(task_name, str):
+        # legacy tasks
+        task = TASK_REGISTRY[task_name]
+        if task_name in TASK_DATACLASS_REGISTRY:
+            dc = TASK_DATACLASS_REGISTRY[task_name]
+            cfg = dc.from_namespace(cfg)
+    else:
+        task_name = getattr(cfg, "_name", None)
+
+        if task_name and task_name in TASK_DATACLASS_REGISTRY:
+            dc = TASK_DATACLASS_REGISTRY[task_name]
+            cfg = merge_with_parent(dc(), cfg)
+            task = TASK_REGISTRY[task_name]
+
+    assert (
+        task is not None
+    ), f"Could not infer task type from {cfg}. Available argparse tasks: {TASK_REGISTRY.keys()}. Available hydra tasks: {TASK_DATACLASS_REGISTRY.keys()}"
+
+    return task.setup_task(cfg, **kwargs)
+
+
+def register_task(name, dataclass=None):
+    """
+    New tasks can be added to fairseq with the
+    :func:`~fairseq.tasks.register_task` function decorator.
+
+    For example::
+
+        @register_task('classification')
+        class ClassificationTask(FairseqTask):
+            (...)
+
+    .. note::
+
+        All Tasks must implement the :class:`~fairseq.tasks.FairseqTask`
+        interface.
+
+    Args:
+        name (str): the name of the task
+    """
+
+    def register_task_cls(cls):
+        if name in TASK_REGISTRY:
+            raise ValueError("Cannot register duplicate task ({})".format(name))
+        if not issubclass(cls, FairseqTask):
+            raise ValueError(
+                "Task ({}: {}) must extend FairseqTask".format(name, cls.__name__)
+            )
+        if cls.__name__ in TASK_CLASS_NAMES:
+            raise ValueError(
+                "Cannot register task with duplicate class name ({})".format(
+                    cls.__name__
+                )
+            )
+        TASK_REGISTRY[name] = cls
+        TASK_CLASS_NAMES.add(cls.__name__)
+
+        if dataclass is not None and not issubclass(dataclass, FairseqDataclass):
+            raise ValueError(
+                "Dataclass {} must extend FairseqDataclass".format(dataclass)
+            )
+
+        cls.__dataclass = dataclass
+        if dataclass is not None:
+            TASK_DATACLASS_REGISTRY[name] = dataclass
+
+            cs = ConfigStore.instance()
+            node = dataclass()
+            node._name = name
+            cs.store(name=name, group="task", node=node, provider="fairseq")
+
+        return cls
+
+    return register_task_cls
+
+
+def get_task(name):
+    return TASK_REGISTRY[name]
+
+
+def import_tasks(tasks_dir, namespace):
+    for file in os.listdir(tasks_dir):
+        path = os.path.join(tasks_dir, file)
+        if (
+            not file.startswith("_")
+            and not file.startswith(".")
+            and (file.endswith(".py") or os.path.isdir(path))
+        ):
+            task_name = file[: file.find(".py")] if file.endswith(".py") else file
+            importlib.import_module(namespace + "." + task_name)
+
+            # expose `task_parser` for sphinx
+            if task_name in TASK_REGISTRY:
+                parser = argparse.ArgumentParser(add_help=False)
+                group_task = parser.add_argument_group("Task name")
+                # fmt: off
+                group_task.add_argument('--task', metavar=task_name,
+                                        help='Enable this task with: ``--task=' + task_name + '``')
+                # fmt: on
+                group_args = parser.add_argument_group(
+                    "Additional command-line arguments"
+                )
+                TASK_REGISTRY[task_name].add_args(group_args)
+                globals()[task_name + "_parser"] = parser
+
+
+# automatically import any Python files in the tasks/ directory
+tasks_dir = os.path.dirname(__file__)
+import_tasks(tasks_dir, "fairseq.tasks")
diff --git a/fairseq/fairseq/tasks/audio_finetuning.py b/fairseq/fairseq/tasks/audio_finetuning.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ef87c604f00581f03075e9ebe10a43dd51d6e45
--- /dev/null
+++ b/fairseq/fairseq/tasks/audio_finetuning.py
@@ -0,0 +1,346 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+import logging
+import os
+import torch
+import json
+
+from argparse import Namespace
+from dataclasses import dataclass, field
+from typing import Optional, Any
+
+from fairseq.data import AddTargetDataset, Dictionary, encoders
+from fairseq.tasks.audio_pretraining import AudioPretrainingTask, AudioPretrainingConfig
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.configs import GenerationConfig
+from fairseq.data.text_compressor import TextCompressor, TextCompressionLevel
+
+from . import register_task
+from .. import utils
+from ..logging import metrics
+
+
+logger = logging.getLogger(__name__)
+
+
+class LabelEncoder(object):
+    def __init__(self, dictionary):
+        self.dictionary = dictionary
+
+    def __call__(self, label):
+        return self.dictionary.encode_line(
+            label, append_eos=False, add_if_not_exist=False
+        )
+
+
+def label_len_fn(label):
+    return len(label.split(" "))
+
+
+@dataclass
+class AudioFinetuningConfig(AudioPretrainingConfig):
+    # Options for reporting WER metrics during validation. Only applicable to
+    # Seq2Seq models during fine-tuning
+    eval_wer: bool = field(
+        default=False, metadata={"help": "compute WER for Seq2Seq models"}
+    )
+    eval_wer_config: GenerationConfig = field(
+        default_factory=lambda: GenerationConfig(),
+        metadata={"help": "beam search config for evaluating wer during training"},
+    )
+    eval_wer_tokenizer: Any = field(
+        default=None,
+        metadata={"help": "tokenizer config for evaluating wer during training"},
+    )
+    eval_wer_post_process: str = field(
+        default="letter",
+        metadata={
+            "help": "remove BPE tokens before scoring (can be sentencepiece, letter, and more)"
+        },
+    )
+    eval_bleu: bool = field(
+        default=False, metadata={"help": "evaluation with BLEU scores"}
+    )
+    eval_bleu_detok: Optional[str] = field(
+        default=None, metadata={
+            "help": "detokenize before computing BLEU (e.g., 'moses'); "
+                    "required if using --eval-bleu; use 'space' to disable "
+                    "detokenization; see fairseq.data.encoders for other options"
+        }
+    )
+    eval_bleu_detok_args: str = field(
+        default="{}",
+        metadata={"help": "args for building the tokenizer, if needed"}
+    )
+    eval_tokenized_bleu: bool = field(
+        default=False,
+        metadata={"help": "compute tokenized BLEU instead of sacrebleu"}
+    )
+    eval_bleu_remove_bpe: Optional[str] = field(
+        default=None, metadata={"help": "remove BPE before computing BLEU"}
+    )
+    eval_bleu_args: str = field(
+        default="{}",
+        metadata={"help": "generation args for BLUE scoring, e.g., "
+                          "'{\"beam\": 4, \"lenpen\": 0.6}'"}
+    )
+    eval_bleu_print_samples: bool = field(
+        default=False,
+        metadata={"help": "print sample generations during validation"}
+    )
+    autoregressive: bool = field(
+        default=False,
+        metadata={
+            "help": "required for autoregressive decoders (like seq2seq models); "
+            "adds 'prev_output_tokens' to input and appends eos to target"
+        },
+    )
+
+
+@register_task("audio_finetuning", dataclass=AudioFinetuningConfig)
+class AudioFinetuningTask(AudioPretrainingTask):
+    """ """
+
+    cfg: AudioFinetuningConfig
+
+    def __init__(
+        self,
+        cfg: AudioFinetuningConfig,
+    ):
+        super().__init__(cfg)
+        self.blank_symbol = "<s>"
+
+        self.state.add_factory("target_dictionary", self.load_target_dictionary)
+
+    def load_target_dictionary(self):
+        if self.cfg.labels:
+            dict_path = os.path.join(self.cfg.data, f"dict.{self.cfg.labels}.txt")
+            return Dictionary.load(dict_path)
+        return None
+
+    def load_dataset(self, split: str, task_cfg: AudioFinetuningConfig = None, **kwargs):
+        super().load_dataset(split, task_cfg, **kwargs)
+
+        task_cfg = task_cfg or self.cfg
+        assert task_cfg.labels is not None
+        text_compression_level = getattr(
+            TextCompressionLevel, str(self.cfg.text_compression_level)
+        )
+        data_path = self.cfg.data
+        label_path = os.path.join(data_path, f"{split}.{task_cfg.labels}")
+        skipped_indices = getattr(self.datasets[split], "skipped_indices", set())
+        text_compressor = TextCompressor(level=text_compression_level)
+        with open(label_path, "r") as f:
+            labels = [
+                text_compressor.compress(l)
+                for i, l in enumerate(f) if i not in skipped_indices
+            ]
+
+        assert len(labels) == len(self.datasets[split]), (
+            f"labels length ({len(labels)}) and dataset length "
+            f"({len(self.datasets[split])}) do not match"
+        )
+
+        process_label = LabelEncoder(self.target_dictionary)
+
+        self.datasets[split] = AddTargetDataset(
+            self.datasets[split],
+            labels,
+            pad=self.target_dictionary.pad(),
+            eos=self.target_dictionary.eos(),
+            batch_targets=True,
+            process_label=process_label,
+            label_len_fn=label_len_fn,
+            add_to_input=task_cfg.get("autoregressive", False),
+            text_compression_level=text_compression_level
+        )
+
+    @property
+    def target_dictionary(self):
+        """Return the :class:`~fairseq.data.Dictionary` for the language
+        model."""
+        return self.state.target_dictionary
+
+    def valid_step(self, sample, model, criterion):
+        loss, sample_size, logging_output = super().valid_step(sample, model, criterion)
+        if self.cfg.eval_wer and self.cfg.autoregressive:
+            metrics = self._inference_with_wer(self.sequence_generator, sample, model)
+            logging_output["_num_char_errors"] = metrics["num_char_errors"]
+            logging_output["_num_chars"] = metrics["num_chars"]
+            logging_output["_num_word_errors"] = metrics["num_word_errors"]
+            logging_output["_num_words"] = metrics["num_words"]
+        if self.cfg.eval_bleu and self.cfg.autoregressive:
+            metrics = self._inference_with_bleu(self.sequence_generator, sample, model)
+            logging_output['_bleu_sys_len'] = metrics.sys_len
+            logging_output['_bleu_ref_len'] = metrics.ref_len
+            # we split counts into separate entries so that they can be
+            # summed efficiently across workers using fast-stat-sync
+            assert len(metrics.counts) == 4
+            for i in range(4):
+                logging_output[f"_bleu_counts_{i}"] = metrics.counts[i]
+                logging_output[f"_bleu_totals_{i}"] = metrics.totals[i]
+        return loss, sample_size, logging_output
+
+    def build_model(self, model_cfg: FairseqDataclass):
+        model = super().build_model(model_cfg)
+
+        if self.cfg.eval_wer and self.cfg.autoregressive:
+            self.sequence_generator = self.build_generator(
+                [model],
+                self.cfg.eval_wer_config,
+            )
+            if self.cfg.eval_wer_tokenizer:
+                self.tokenizer = encoders.build_tokenizer(self.cfg.eval_wer_tokenizer)
+            else:
+                self.tokenizer = None
+        if self.cfg.eval_bleu and self.cfg.autoregressive:
+            assert self.cfg.eval_bleu_detok is not None, (
+                '--eval-bleu-detok is required if using --eval-bleu; '
+                'try --eval-bleu-detok=moses (or --eval-bleu-detok=space '
+                'to disable detokenization, e.g., when using sentencepiece)'
+            )
+            detok_args = json.loads(self.cfg.eval_bleu_detok_args)
+            self.tokenizer = encoders.build_tokenizer(
+                Namespace(tokenizer=self.cfg.eval_bleu_detok, **detok_args)
+            )
+            gen_args = json.loads(self.cfg.eval_bleu_args)
+            gen_args = Namespace(**gen_args)
+            self.sequence_generator = self.build_generator([model], gen_args)
+
+        return model
+
+    def _inference_with_wer(self, generator, sample, model):
+        import editdistance
+
+        def decode(toks):
+            s = self.target_dictionary.string(
+                toks.int().cpu(),
+                self.cfg.eval_wer_post_process,
+                escape_unk=True,
+            )
+            if self.tokenizer:
+                s = self.tokenizer.decode(s)
+            return s
+
+        num_word_errors, num_char_errors = 0, 0
+        num_chars, num_words = 0, 0
+        gen_out = self.inference_step(generator, [model], sample, None)
+        for i in range(len(gen_out)):
+            hyp = decode(gen_out[i][0]["tokens"])
+            ref = decode(
+                utils.strip_pad(sample["target"][i], self.target_dictionary.pad()),
+            )
+            num_char_errors += editdistance.eval(hyp, ref)
+            num_chars += len(ref)
+            hyp_words = hyp.split()
+            ref_words = ref.split()
+            num_word_errors += editdistance.eval(hyp_words, ref_words)
+            num_words += len(ref_words)
+
+        return {
+            "num_char_errors": num_char_errors,
+            "num_chars": num_chars,
+            "num_word_errors": num_word_errors,
+            "num_words": num_words,
+        }
+
+    def _inference_with_bleu(self, generator, sample, model):
+        import sacrebleu
+
+        def decode(toks, is_ref):
+            s = self.target_dictionary.string(
+                toks.int().cpu(),
+                self.cfg.eval_bleu_remove_bpe,
+                # The default unknown string in fairseq is `<unk>`, but
+                # this is tokenized by sacrebleu as `< unk >`, inflating
+                # BLEU scores. Instead, we use a somewhat more verbose
+                # alternative that is unlikely to appear in the real
+                # reference, but doesn't get split into multiple tokens.
+                unk_string=(
+                    "UNKNOWNTOKENINREF" if is_ref else "UNKNOWNTOKENINHYP"
+                ),
+            )
+            if self.tokenizer:
+                s = self.tokenizer.decode(s)
+            return s
+
+        gen_out = self.inference_step(generator, [model], sample)
+        hyps, refs = [], []
+        for i in range(len(gen_out)):
+            hyps.append(decode(gen_out[i][0]['tokens'], is_ref=False))
+            refs.append(
+                decode(
+                    utils.strip_pad(
+                        sample['target'][i],
+                        self.target_dictionary.pad()
+                    ),
+                    is_ref=True,  # don't count <unk> as matches to the hypo
+                )
+            )
+        if self.cfg.eval_bleu_print_samples:
+            logger.info('H-{} {}'.format(sample["id"][0], hyps[0]))
+            logger.info('T-{} {}'.format(sample["id"][0], refs[0]))
+
+        eval_tokenization = 'none' if self.cfg.eval_tokenized_bleu else '13a'
+        return sacrebleu.corpus_bleu(hyps, [refs], tokenize=eval_tokenization)
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+
+        if self.cfg.eval_wer:
+            zero = torch.scalar_tensor(0.0)
+            num_char_errors = sum(
+                log.get("_num_char_errors", zero) for log in logging_outputs
+            )
+            num_chars = sum(log.get("_num_chars", zero) for log in logging_outputs)
+            num_word_errors = sum(
+                log.get("_num_word_errors", zero) for log in logging_outputs
+            )
+            num_words = sum(log.get("_num_words", zero) for log in logging_outputs)
+            metrics.log_scalar("_num_char_errors", num_char_errors)
+            metrics.log_scalar("_num_chars", num_chars)
+            metrics.log_scalar("_num_word_errors", num_word_errors)
+            metrics.log_scalar("_num_words", num_words)
+            if num_chars > 0:
+                metrics.log_derived(
+                    "uer",
+                    lambda meters: meters["_num_char_errors"].sum
+                    * 100.0
+                    / meters["_num_chars"].sum
+                    if meters["_num_chars"].sum > 0
+                    else float("nan"),
+                )
+            if num_words > 0:
+                metrics.log_derived(
+                    "wer",
+                    lambda meters: meters["_num_word_errors"].sum
+                    * 100.0
+                    / meters["_num_words"].sum
+                    if meters["_num_words"].sum > 0
+                    else float("nan"),
+                )
+        if self.cfg.eval_bleu:
+            len_keys = ["_bleu_sys_len", "_bleu_ref_len"]
+            count_keys = [f"_bleu_counts_{i}" for i in range(4)]
+            total_keys = [f"_bleu_totals_{i}" for i in range(4)]
+            for k in len_keys + count_keys + total_keys:
+                metrics.log_scalar(
+                    k, sum(log.get(k, 0) for log in logging_outputs)
+                )
+
+            import sacrebleu
+            metrics.log_derived(
+                'bleu',
+                lambda meters: sacrebleu.compute_bleu(
+                    correct=[meters[k].sum for k in count_keys],
+                    total=[meters[k].sum for k in total_keys],
+                    sys_len=meters['_bleu_sys_len'].sum,
+                    ref_len=meters['_bleu_ref_len'].sum,
+                    smooth_method="exp"
+                ).score
+            )
diff --git a/fairseq/fairseq/tasks/audio_pretraining.py b/fairseq/fairseq/tasks/audio_pretraining.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc310088db8852e80cd2e65d51f06f8f7cb592e3
--- /dev/null
+++ b/fairseq/fairseq/tasks/audio_pretraining.py
@@ -0,0 +1,206 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+import logging
+import os
+import sys
+
+from argparse import Namespace
+from dataclasses import dataclass, field
+from typing import Optional
+from omegaconf import MISSING, II, OmegaConf
+
+from fairseq.data import BinarizedAudioDataset, FileAudioDataset
+from fairseq.dataclass import FairseqDataclass, ChoiceEnum
+from fairseq.data.text_compressor import TextCompressionLevel
+
+from . import FairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class InferredW2vConfig:
+    # The following are needed to precompute mask and mask channel indices
+    #   before model's forward.
+    mask_length: Optional[int] = II("model.mask_length")
+    mask_prob: Optional[float] = II("model.mask_prob")
+    mask_selection: Optional[str] = II("model.mask_selection")
+    mask_other: Optional[float] = II("model.mask_other")
+    no_mask_overlap: Optional[bool] = II("model.no_mask_overlap")
+    mask_min_space: Optional[int] = II("model.mask_min_space")
+    mask_channel_length: Optional[int] = II("model.mask_channel_length")
+    mask_channel_prob: Optional[float] = II("model.mask_channel_prob")
+    mask_channel_selection: Optional[str] = II("model.mask_channel_selection")
+    mask_channel_other: Optional[float] = II("model.mask_channel_other")
+    no_mask_channel_overlap: Optional[bool] = II("model.no_mask_channel_overlap")
+    mask_channel_min_space: Optional[int] = II("model.mask_channel_min_space")
+
+    conv_feature_layers: Optional[str] = II("model.conv_feature_layers")
+    encoder_embed_dim: Optional[int] = II("model.encoder_embed_dim")
+
+
+@dataclass
+class AudioPretrainingConfig(FairseqDataclass):
+    data: str = field(default=MISSING, metadata={"help": "path to data directory"})
+    labels: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "extension of the label file to load, used for fine-tuning"},
+    )
+    binarized_dataset: bool = field(
+        default=False,
+        metadata={
+            "help": "if true, loads binarized dataset (useful for very large datasets). "
+            "See examples/wav2vec/scripts/binarize_manifest.sh"
+        },
+    )
+    sample_rate: int = field(
+        default=16_000,
+        metadata={
+            "help": "target sample rate. audio files will be up/down sampled to this rate"
+        },
+    )
+    normalize: bool = field(
+        default=False,
+        metadata={"help": "if set, normalizes input to have 0 mean and unit variance"},
+    )
+    enable_padding: bool = field(
+        default=False, metadata={"help": "pad shorter samples instead of cropping"}
+    )
+    max_sample_size: Optional[int] = field(
+        default=None, metadata={"help": "max sample size to crop to for batching"}
+    )
+    min_sample_size: Optional[int] = field(
+        default=None, metadata={"help": "min sample size to skip small examples"}
+    )
+    num_batch_buckets: int = field(
+        default=0,
+        metadata={"help": "number of buckets"},
+    )
+    precompute_mask_indices: bool = field(
+        default=False,
+        metadata={
+            "help": "flag to compute mask indices in data preparation.",
+        },
+    )
+
+    inferred_w2v_config: Optional[InferredW2vConfig] = field(
+        default=None,
+        metadata={
+            "help": "wav2vec 2.0 masking arguments used to pre-compute masks (required for TPU)",
+        },
+    )
+
+    tpu: bool = II("common.tpu")
+    text_compression_level: ChoiceEnum([x.name for x in TextCompressionLevel]) = field(
+        default="none",
+        metadata={
+            "help": "compression level for texts (e.g. audio filenames, "
+                    "target texts): none/low/high (default: none). "
+        }
+    )
+
+
+@register_task("audio_pretraining", dataclass=AudioPretrainingConfig)
+class AudioPretrainingTask(FairseqTask):
+    """ """
+
+    cfg: AudioPretrainingConfig
+
+    @classmethod
+    def setup_task(cls, cfg: AudioPretrainingConfig, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            cfg (AudioPretrainingConfig): configuration of this task
+        """
+
+        return cls(cfg)
+
+    def _get_mask_precompute_kwargs(self, cfg):
+        if self.cfg.precompute_mask_indices or self.cfg.tpu:
+            assert (
+                cfg.inferred_w2v_config is not None
+            ), "inferred_w2v_config must be set"
+            return OmegaConf.to_container(
+                cfg.inferred_w2v_config, resolve=True, enum_to_str=True
+            )
+        else:
+            return {}
+
+    def load_dataset(self, split: str, task_cfg: FairseqDataclass = None, **kwargs):
+        data_path = self.cfg.data
+        task_cfg = task_cfg or self.cfg
+
+        # upgrade old task
+        if isinstance(task_cfg, Namespace):
+            if not hasattr(task_cfg, "autoregressive"):
+                task_cfg.autoregressive = not task_cfg.criterion == "ctc"
+
+        text_compression_level = getattr(
+            TextCompressionLevel, str(self.cfg.text_compression_level)
+        )
+        if getattr(task_cfg, "binarized_dataset", False):
+            self.datasets[split] = BinarizedAudioDataset(
+                data_path,
+                split=split,
+                sample_rate=task_cfg.get("sample_rate", self.cfg.sample_rate),
+                max_sample_size=self.cfg.max_sample_size,
+                min_sample_size=self.cfg.min_sample_size,
+                pad=task_cfg.labels is not None or task_cfg.enable_padding,
+                normalize=task_cfg.normalize,
+                num_buckets=self.cfg.num_batch_buckets or int(self.cfg.tpu),
+                compute_mask_indices=(self.cfg.precompute_mask_indices or self.cfg.tpu),
+                **self._get_mask_precompute_kwargs(task_cfg),
+            )
+        else:
+            manifest_path = os.path.join(data_path, "{}.tsv".format(split))
+
+            self.datasets[split] = FileAudioDataset(
+                manifest_path=manifest_path,
+                sample_rate=task_cfg.get("sample_rate", self.cfg.sample_rate),
+                max_sample_size=self.cfg.max_sample_size,
+                min_sample_size=self.cfg.min_sample_size,
+                pad=task_cfg.labels is not None or task_cfg.enable_padding,
+                normalize=task_cfg.normalize,
+                num_buckets=self.cfg.num_batch_buckets or int(self.cfg.tpu),
+                compute_mask_indices=(self.cfg.precompute_mask_indices or self.cfg.tpu),
+                text_compression_level=text_compression_level,
+                **self._get_mask_precompute_kwargs(task_cfg),
+            )
+
+        if self.cfg.tpu and task_cfg.inferred_w2v_config.mask_channel_prob == 0.0:
+            logger.info(
+                "Pretraining on TPUs may suffer convergence "
+                "issues when training with `mask_channel_prob` value of "
+                "0. You may want to set this to a low value close to 0."
+            )
+
+    @property
+    def source_dictionary(self):
+        return None
+
+    @property
+    def target_dictionary(self):
+        return None
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        return sys.maxsize, sys.maxsize
+
+    def build_model(self, model_cfg: FairseqDataclass):
+        model = super().build_model(model_cfg)
+
+        actualized_cfg = getattr(model, "cfg", None)
+        if actualized_cfg is not None:
+            # if "w2v_args" in actualized_cfg:
+            if hasattr(actualized_cfg, "w2v_args"):
+                model_cfg.w2v_args = actualized_cfg.w2v_args
+
+        return model
diff --git a/fairseq/fairseq/tasks/cross_lingual_lm.py b/fairseq/fairseq/tasks/cross_lingual_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f8fe7e2de181e41bd0e6a2bf96948ee78de5ae8
--- /dev/null
+++ b/fairseq/fairseq/tasks/cross_lingual_lm.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import logging
+import os
+from collections import OrderedDict
+
+import numpy as np
+from fairseq import tokenizer, utils
+from fairseq.data import ConcatDataset, Dictionary, TokenBlockDataset, data_utils
+from fairseq.data.legacy.masked_lm_dataset import MaskedLMDataset
+from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary
+from fairseq.data.multi_corpus_sampled_dataset import MultiCorpusSampledDataset
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("cross_lingual_lm")
+class CrossLingualLMTask(LegacyFairseqTask):
+    """
+    Task for training cross-lingual language models.
+
+    For more details look at: https://arxiv.org/pdf/1901.07291.pdf
+
+    Args:
+        dictionary (Dictionary): the dictionary for the input of the task
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument(
+            "data",
+            help="colon separated path to data directories list, \
+                            will be iterated upon during epochs in round-robin manner",
+        )
+        parser.add_argument(
+            "--tokens-per-sample",
+            default=512,
+            type=int,
+            help="max number of total tokens over all segments" " per sample",
+        )
+        parser.add_argument(
+            "--monolingual-langs",
+            default="en",
+            type=str,
+            help="comma separated list of languages for which we"
+            " want to train XLM on",
+        )
+        parser.add_argument(
+            "--shuffle",
+            action="store_true",
+            help="shuffle each monolingual dataset while" " training",
+        )
+
+    def __init__(self, args, dictionary):
+        super().__init__(args)
+        self.dictionary = dictionary
+        self.seed = args.seed
+        self.distributed_world_size = args.distributed_world_size
+        self.langs2id = self._lang_to_id(args.monolingual_langs)
+
+    def _lang_to_id(self, languages: str):
+        """
+        Build a map from languages to ids. These ids are used as segment labels
+        for cross-lingual LM training.
+        """
+        lang2id = {}
+        langs = [l.strip() for l in languages.split(",")]
+        for id, lang in enumerate(langs):
+            lang2id[lang] = id
+        return lang2id
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        return MaskedLMDictionary.load(filename)
+
+    @classmethod
+    def build_dictionary(
+        cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8
+    ):
+        d = MaskedLMDictionary()
+        for filename in filenames:
+            Dictionary.add_file_to_dictionary(
+                filename, d, tokenizer.tokenize_line, workers
+            )
+        d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor)
+        return d
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task."""
+        dictionary = MaskedLMDictionary.load(os.path.join(args.data, "dict.txt"))
+        logger.info("dictionary: {} types".format(len(dictionary)))
+        return cls(args, dictionary)
+
+    def _load_single_lang_dataset(self, split, epoch):
+        loaded_datasets = []
+
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        for k in itertools.count():
+            split_k = split + (str(k) if k > 0 else "")
+            path = os.path.join(data_path, split_k)
+
+            ds = data_utils.load_indexed_dataset(
+                path, self.dictionary, self.args.dataset_impl
+            )
+            if ds is None:
+                if k > 0:
+                    break
+                else:
+                    raise FileNotFoundError(
+                        "Dataset not found: {} ({})".format(split, data_path)
+                    )
+
+            # Since we append each block with the classification_token,
+            # we need to effectively create blocks of length
+            # tokens_per_sample-1
+            loaded_datasets.append(
+                TokenBlockDataset(
+                    ds,
+                    ds.sizes,
+                    self.args.tokens_per_sample - 1,
+                    pad=self.dictionary.pad(),
+                    eos=self.dictionary.eos(),
+                )
+            )
+
+            logger.info(
+                "{} {} {} examples".format(data_path, split_k, len(loaded_datasets[-1]))
+            )
+
+        if len(loaded_datasets) == 1:
+            dataset = loaded_datasets[0]
+            sizes = dataset.sizes
+        else:
+            dataset = ConcatDataset(loaded_datasets)
+            sizes = np.concatenate([ds.sizes for ds in loaded_datasets])
+
+        return dataset, sizes
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        dataset_map = OrderedDict()
+
+        for lang in self.langs2id.keys():
+            # Datasets are expected to be in "split.lang" format (Eg: train.en)
+            language_split = "{}.{}".format(split, lang)
+
+            block_dataset, sizes = self._load_single_lang_dataset(
+                split=language_split, epoch=epoch
+            )
+
+            dataset_map[lang] = MaskedLMDataset(
+                dataset=block_dataset,
+                sizes=sizes,
+                vocab=self.dictionary,
+                pad_idx=self.dictionary.pad(),
+                mask_idx=self.dictionary.mask(),
+                classif_token_idx=self.dictionary.eos(),
+                sep_token_idx=self.dictionary.eos(),
+                shuffle=getattr(self.args, "shuffle", False),
+                has_pairs=False,
+                segment_id=self.langs2id[lang],
+                seed=self.seed,
+            )
+
+        self.datasets[split] = MultiCorpusSampledDataset(dataset_map)
+        logger.info(
+            "{} {} {} examples".format(
+                utils.split_paths(self.args.data)[epoch - 1],
+                split,
+                len(self.datasets[split]),
+            )
+        )
diff --git a/fairseq/fairseq/tasks/denoising.py b/fairseq/fairseq/tasks/denoising.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1dff26c36d51e394e1c955c6683fa4a20c52395
--- /dev/null
+++ b/fairseq/fairseq/tasks/denoising.py
@@ -0,0 +1,277 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+from fairseq import utils
+from fairseq.data import (
+    AppendTokenDataset,
+    DenoisingDataset,
+    Dictionary,
+    IdDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    PadDataset,
+    PrependTokenDataset,
+    StripTokenDataset,
+    TokenBlockDataset,
+    data_utils,
+)
+from fairseq.data.encoders.utils import get_whole_word_mask
+from fairseq.data.shorten_dataset import maybe_shorten_dataset
+from fairseq.tasks import LegacyFairseqTask, register_task
+import numpy as np
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("denoising")
+class DenoisingTask(LegacyFairseqTask):
+    """
+    Denoising task for applying sequence to sequence denoising. (ie. BART)
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument("data", help="path to data directory")
+        parser.add_argument(
+            "--tokens-per-sample",
+            default=512,
+            type=int,
+            help="max number of total tokens over all segments"
+            " per sample for dataset",
+        )
+        parser.add_argument(
+            "--sample-break-mode",
+            default="complete_doc",
+            type=str,
+            help="mode for breaking sentence",
+        )
+        parser.add_argument(
+            "--mask",
+            default=0.0,
+            type=float,
+            help="fraction of words/subwords that will be masked",
+        )
+        parser.add_argument(
+            "--mask-random",
+            default=0.0,
+            type=float,
+            help="instead of using [MASK], use random token this often",
+        )
+        parser.add_argument(
+            "--insert",
+            default=0.0,
+            type=float,
+            help="insert this percentage of additional random tokens",
+        )
+        parser.add_argument(
+            "--permute",
+            default=0.0,
+            type=float,
+            help="take this proportion of subwords and permute them",
+        )
+        parser.add_argument(
+            "--rotate",
+            default=0.5,
+            type=float,
+            help="rotate this proportion of inputs",
+        )
+        parser.add_argument(
+            "--poisson-lambda",
+            default=3.0,
+            type=float,
+            help="randomly shuffle sentences for this proportion of inputs",
+        )
+        parser.add_argument(
+            "--permute-sentences",
+            default=0.0,
+            type=float,
+            help="shuffle this proportion of sentences in all inputs",
+        )
+        parser.add_argument(
+            "--mask-length",
+            default="subword",
+            type=str,
+            choices=["subword", "word", "span-poisson"],
+            help="mask length to choose",
+        )
+        parser.add_argument(
+            "--replace-length",
+            default=-1,
+            type=int,
+            help="when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)",
+        )
+        parser.add_argument(
+            "--max-source-positions",
+            default=1024,
+            type=int,
+            metavar="N",
+            help="max number of tokens in the source sequence",
+        )
+        parser.add_argument(
+            "--max-target-positions",
+            default=1024,
+            type=int,
+            metavar="N",
+            help="max number of tokens in the target sequence",
+        )
+
+        parser.add_argument(
+            "--shorten-method",
+            default="none",
+            choices=["none", "truncate", "random_crop"],
+            help="if not none, shorten sequences that exceed --tokens-per-sample",
+        )
+        parser.add_argument(
+            "--shorten-data-split-list",
+            default="",
+            help="comma-separated list of dataset splits to apply shortening to, "
+            'e.g., "train,valid" (default: all dataset splits)',
+        )
+
+
+    def __init__(self, args, dictionary):
+        super().__init__(args)
+        self.dictionary = dictionary
+        self.seed = args.seed
+
+        # add mask token
+        self.mask_idx = self.dictionary.add_symbol("<mask>")
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task."""
+        paths = utils.split_paths(args.data)
+        assert len(paths) > 0
+        dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt"))
+        logger.info("dictionary: {} types".format(len(dictionary)))
+        if not hasattr(args, "shuffle_instance"):
+            args.shuffle_instance = False
+        return cls(args, dictionary)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+        split_path = os.path.join(data_path, split)
+
+        dataset = data_utils.load_indexed_dataset(
+            split_path,
+            self.dictionary,
+            self.args.dataset_impl,
+            combine=combine,
+        )
+        if dataset is None:
+            raise FileNotFoundError(
+                "Dataset not found: {} ({})".format(split, split_path)
+            )
+
+        dataset = StripTokenDataset(dataset, self.dictionary.eos())
+
+        dataset = maybe_shorten_dataset(
+            dataset,
+            split,
+            self.args.shorten_data_split_list,
+            self.args.shorten_method,
+            self.args.tokens_per_sample,
+            self.args.seed,
+        )
+
+        # create continuous blocks of tokens
+        dataset = TokenBlockDataset(
+            dataset,
+            dataset.sizes,
+            self.args.tokens_per_sample - 2,  # one less for <s> and one for </s>
+            pad=self.dictionary.pad(),
+            eos=self.dictionary.eos(),
+            break_mode=self.args.sample_break_mode,
+            document_sep_len=0,
+        )
+        logger.info("loaded {} blocks from: {}".format(len(dataset), split_path))
+
+        # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT)
+        dataset = PrependTokenDataset(dataset, self.source_dictionary.bos())
+        dataset = AppendTokenDataset(dataset, self.source_dictionary.eos())
+
+        mask_whole_words = (
+            get_whole_word_mask(self.args, self.source_dictionary)
+            if self.args.mask_length != "subword"
+            else None
+        )
+
+        self.datasets[split] = DenoisingDataset(
+            dataset,
+            dataset.sizes,
+            self.dictionary,
+            self.mask_idx,
+            mask_whole_words,
+            shuffle=self.args.shuffle_instance,
+            seed=self.seed,
+            args=self.args,
+        )
+        logger.info(
+            "Split: {0}, Loaded {1} samples of denoising_dataset".format(
+                split,
+                len(self.datasets[split]),
+            )
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs):
+        """
+        Generate batches for inference. We assume that the input begins with a
+        bos symbol (`<s>`) and ends with an eos symbol (`</s>`).
+        """
+        pad = self.source_dictionary.pad()
+        eos = self.source_dictionary.eos()
+        src_dataset = TokenBlockDataset(
+            src_tokens,
+            src_lengths,
+            block_size=self.args.tokens_per_sample - 2,  # for <s> and </s>
+            pad=pad,
+            eos=eos,
+            break_mode=self.args.sample_break_mode,
+            document_sep_len=0,
+        )
+        prev_output_tokens = PrependTokenDataset(
+            StripTokenDataset(src_dataset, eos), eos
+        )
+        src_dataset = PadDataset(src_dataset, pad_idx=pad, left_pad=False)
+        return NestedDictionaryDataset(
+            {
+                "id": IdDataset(),
+                "net_input": {
+                    "src_tokens": src_dataset,
+                    "src_lengths": NumelDataset(src_dataset, reduce=False),
+                    "prev_output_tokens": PadDataset(
+                        prev_output_tokens, pad_idx=pad, left_pad=False
+                    ),
+                },
+                "target": src_dataset,
+            },
+            sizes=[np.array(src_lengths)],
+        )
+
+    def max_positions(self):
+        """Return the max sentence length allowed by the task."""
+        return (self.args.max_source_positions, self.args.max_target_positions)
+
+    @property
+    def source_dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary`."""
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        """Return the target :class:`~fairseq.data.Dictionary`."""
+        return self.dictionary
diff --git a/fairseq/fairseq/tasks/fairseq_task.py b/fairseq/fairseq/tasks/fairseq_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..d671f17cf16a2493b3615b036d9d986e8b19736e
--- /dev/null
+++ b/fairseq/fairseq/tasks/fairseq_task.py
@@ -0,0 +1,668 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import warnings
+from argparse import Namespace
+from typing import Any, Callable, Dict, List
+
+import torch
+from fairseq import metrics, search, tokenizer, utils
+from fairseq.data import Dictionary, FairseqDataset, data_utils, encoders, iterators
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+from fairseq.optim.amp_optimizer import AMPOptimizer
+from omegaconf import DictConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+class StatefulContainer(object):
+
+    def __init__(self):
+        self._state = dict()
+        self._factories = dict()
+
+    def add_factory(self, name, factory: Callable[[], Any]):
+        self._factories[name] = factory
+
+    def merge_state_dict(self, state_dict: Dict[str, Any]):
+        self._state.update(state_dict)
+
+    @property
+    def state_dict(self) -> Dict[str, Any]:
+        return self._state
+
+    def __getattr__(self, name):
+        if name not in self._state and name in self._factories:
+            self._state[name] = self._factories[name]()
+
+        if name in self._state:
+            return self._state[name]
+
+        raise AttributeError(f"Task state has no factory for attribute {name}")
+
+
+class FairseqTask(object):
+    """
+    Tasks store dictionaries and provide helpers for loading/iterating over
+    Datasets, initializing the Model/Criterion and calculating the loss.
+
+    Tasks have limited statefulness. In particular, state that needs to be
+    saved to/loaded from checkpoints needs to be stored in the `self.state`
+    :class:`StatefulContainer` object. For example::
+
+        self.state.add_factory("dictionary", self.load_dictionary)
+        print(self.state.dictionary)  # calls self.load_dictionary()
+
+    This is necessary so that when loading checkpoints, we can properly
+    recreate the task state after initializing the task instance.
+    """
+
+    @classmethod
+    def add_args(cls, parser):
+        """Add task-specific arguments to the parser."""
+        dc = getattr(cls, "__dataclass", None)
+        if dc is not None:
+            gen_parser_from_dataclass(parser, dc())
+
+    @staticmethod
+    def logging_outputs_can_be_summed(criterion) -> bool:
+        """
+        Whether the logging outputs returned by `train_step` and `valid_step` can
+        be summed across workers prior to calling `aggregate_logging_outputs`.
+        Setting this to True will improves distributed training speed.
+        """
+        return criterion.logging_outputs_can_be_summed()
+
+    def __init__(self, cfg: FairseqDataclass, **kwargs):
+        self.cfg = cfg
+        self.datasets = dict()
+        self.dataset_to_epoch_iter = dict()
+        self.state = StatefulContainer()
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        """Load the dictionary from the filename
+
+        Args:
+            filename (str): the filename
+        """
+        return Dictionary.load(filename)
+
+    @classmethod
+    def build_dictionary(
+        cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8
+    ):
+        """Build the dictionary
+
+        Args:
+            filenames (list): list of filenames
+            workers (int): number of concurrent workers
+            threshold (int): defines the minimum word count
+            nwords (int): defines the total number of words in the final dictionary,
+                including special symbols
+            padding_factor (int): can be used to pad the dictionary size to be a
+                multiple of 8, which is important on some hardware (e.g., Nvidia
+                Tensor Cores).
+        """
+        d = Dictionary()
+        for filename in filenames:
+            Dictionary.add_file_to_dictionary(
+                filename, d, tokenizer.tokenize_line, workers
+            )
+        d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor)
+        return d
+
+    @classmethod
+    def setup_task(cls, cfg: DictConfig, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            cfg (omegaconf.DictConfig): parsed command-line arguments
+        """
+        return cls(cfg, **kwargs)
+
+    def has_sharded_data(self, split):
+        return os.pathsep in getattr(self.cfg, "data", "")
+
+    def load_dataset(
+        self,
+        split: str,
+        combine: bool = False,
+        task_cfg: FairseqDataclass = None,
+        **kwargs
+    ):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+            combine (bool): combines a split segmented into pieces into one dataset
+            task_cfg (FairseqDataclass): optional task configuration stored in the checkpoint that can be used
+                                         to load datasets
+        """
+        raise NotImplementedError
+
+    def dataset(self, split):
+        """
+        Return a loaded dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+
+        Returns:
+            a :class:`~fairseq.data.FairseqDataset` corresponding to *split*
+        """
+        from fairseq.data import FairseqDataset
+
+        if split not in self.datasets:
+            raise KeyError("Dataset not loaded: " + split)
+        if not isinstance(self.datasets[split], FairseqDataset):
+            raise TypeError("Datasets are expected to be of type FairseqDataset")
+        return self.datasets[split]
+
+    def filter_indices_by_size(
+        self, indices, dataset, max_positions=None, ignore_invalid_inputs=False
+    ):
+        """
+        Filter examples that are too large
+
+        Args:
+            indices (np.array): original array of sample indices
+            dataset (~fairseq.data.FairseqDataset): dataset to batch
+            max_positions (optional): max sentence length supported by the
+                model (default: None).
+            ignore_invalid_inputs (bool, optional): don't raise Exception for
+                sentences that are too long (default: False).
+        Returns:
+            np.array: array of filtered sample indices
+        """
+        indices, ignored = dataset.filter_indices_by_size(indices, max_positions)
+        if len(ignored) > 0:
+            if not ignore_invalid_inputs:
+                raise Exception(
+                    (
+                        "Size of sample #{} is invalid (={}) since max_positions={}, "
+                        "skip this example with --skip-invalid-size-inputs-valid-test"
+                    ).format(ignored[0], dataset.size(ignored[0]), max_positions)
+                )
+            logger.warning(
+                (
+                    "{:,} samples have invalid sizes and will be skipped, "
+                    "max_positions={}, first few sample ids={}"
+                ).format(len(ignored), max_positions, ignored[:10])
+            )
+        return indices
+
+    def can_reuse_epoch_itr(self, dataset):
+        # We can reuse the epoch iterator across epochs as long as the dataset
+        # hasn't disabled it. We default to ``False`` here, although in practice
+        # this will be ``True`` for most datasets that inherit from
+        # ``FairseqDataset`` due to the base implementation there.
+        return getattr(dataset, "can_reuse_epoch_itr_across_epochs", False)
+
+    def get_batch_iterator(
+        self,
+        dataset,
+        max_tokens=None,
+        max_sentences=None,
+        max_positions=None,
+        ignore_invalid_inputs=False,
+        required_batch_size_multiple=1,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=1,
+        data_buffer_size=0,
+        disable_iterator_cache=False,
+    ):
+        """
+        Get an iterator that yields batches of data from the given dataset.
+
+        Args:
+            dataset (~fairseq.data.FairseqDataset): dataset to batch
+            max_tokens (int, optional): max number of tokens in each batch
+                (default: None).
+            max_sentences (int, optional): max number of sentences in each
+                batch (default: None).
+            max_positions (optional): max sentence length supported by the
+                model (default: None).
+            ignore_invalid_inputs (bool, optional): don't raise Exception for
+                sentences that are too long (default: False).
+            required_batch_size_multiple (int, optional): require batch size to
+                be a multiple of N (default: 1).
+            seed (int, optional): seed for random number generator for
+                reproducibility (default: 1).
+            num_shards (int, optional): shard the data iterator into N
+                shards (default: 1).
+            shard_id (int, optional): which shard of the data iterator to
+                return (default: 0).
+            num_workers (int, optional): how many subprocesses to use for data
+                loading. 0 means the data will be loaded in the main process
+                (default: 0).
+            epoch (int, optional): the epoch to start the iterator from
+                (default: 1).
+            data_buffer_size (int, optional): number of batches to
+                preload (default: 0).
+            disable_iterator_cache (bool, optional): don't cache the
+                EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`)
+                (default: False).
+        Returns:
+            ~fairseq.iterators.EpochBatchIterator: a batched iterator over the
+                given dataset split
+        """
+        can_reuse_epoch_itr = not disable_iterator_cache and self.can_reuse_epoch_itr(
+            dataset
+        )
+        if can_reuse_epoch_itr and dataset in self.dataset_to_epoch_iter:
+            logger.debug("reusing EpochBatchIterator for epoch {}".format(epoch))
+            return self.dataset_to_epoch_iter[dataset]
+
+        assert isinstance(dataset, FairseqDataset)
+
+        # initialize the dataset with the correct starting epoch
+        dataset.set_epoch(epoch)
+
+        # get indices ordered by example size
+        with data_utils.numpy_seed(seed):
+            indices = dataset.ordered_indices()
+
+        # filter examples that are too large
+        if max_positions is not None:
+            indices = self.filter_indices_by_size(
+                indices, dataset, max_positions, ignore_invalid_inputs
+            )
+
+        # create mini-batches with given size constraints
+        batch_sampler = dataset.batch_by_size(
+            indices,
+            max_tokens=max_tokens,
+            max_sentences=max_sentences,
+            required_batch_size_multiple=required_batch_size_multiple,
+        )
+
+        # return a reusable, sharded iterator
+        epoch_iter = iterators.EpochBatchIterator(
+            dataset=dataset,
+            collate_fn=dataset.collater,
+            batch_sampler=batch_sampler,
+            seed=seed,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_workers=num_workers,
+            epoch=epoch,
+            buffer_size=data_buffer_size,
+        )
+
+        if can_reuse_epoch_itr:
+            self.dataset_to_epoch_iter[dataset] = epoch_iter
+
+        return epoch_iter
+
+    def build_model(self, cfg: FairseqDataclass):
+        """
+        Build the :class:`~fairseq.models.BaseFairseqModel` instance for this
+        task.
+
+        Args:
+            cfg (FairseqDataclass): configuration object
+
+        Returns:
+            a :class:`~fairseq.models.BaseFairseqModel` instance
+        """
+        from fairseq import models, quantization_utils
+
+        model = models.build_model(cfg, self)
+        model = quantization_utils.quantize_model_scalar(model, cfg)
+        return model
+
+    def build_criterion(self, cfg: DictConfig):
+        """
+        Build the :class:`~fairseq.criterions.FairseqCriterion` instance for
+        this task.
+
+        Args:
+            cfg (omegaconf.DictConfig): configration object
+
+        Returns:
+            a :class:`~fairseq.criterions.FairseqCriterion` instance
+        """
+        from fairseq import criterions
+
+        return criterions.build_criterion(cfg, self)
+
+    def build_generator(
+        self, models, args, seq_gen_cls=None, extra_gen_cls_kwargs=None, prefix_allowed_tokens_fn=None,
+    ):
+        """
+        Build a :class:`~fairseq.SequenceGenerator` instance for this
+        task.
+
+        Args:
+            models (List[~fairseq.models.FairseqModel]): ensemble of models
+            args (fairseq.dataclass.configs.GenerationConfig):
+                configuration object (dataclass) for generation
+            extra_gen_cls_kwargs (Dict[str, Any]): extra options to pass
+                through to SequenceGenerator
+            prefix_allowed_tokens_fn (Callable[[int, torch.Tensor], List[int]]):
+                If provided, this function constrains the beam search to
+                allowed tokens only at each step. The provided function
+                should take 2 arguments: the batch ID (`batch_id: int`)
+                and a unidimensional tensor of token ids (`inputs_ids:
+                torch.Tensor`). It has to return a `List[int]` with the
+                allowed tokens for the next generation step conditioned
+                on the previously generated tokens (`inputs_ids`) and
+                the batch ID (`batch_id`). This argument is useful for
+                constrained generation conditioned on the prefix, as
+                described in "Autoregressive Entity Retrieval"
+                (https://arxiv.org/abs/2010.00904) and
+                https://github.com/facebookresearch/GENRE.
+        """
+        if getattr(args, "score_reference", False):
+            from fairseq.sequence_scorer import SequenceScorer
+
+            return SequenceScorer(
+                self.target_dictionary,
+                compute_alignment=getattr(args, "print_alignment", False),
+            )
+
+        from fairseq.sequence_generator import (
+            SequenceGenerator,
+            SequenceGeneratorWithAlignment,
+        )
+
+        # Choose search strategy. Defaults to Beam Search.
+        sampling = getattr(args, "sampling", False)
+        sampling_topk = getattr(args, "sampling_topk", -1)
+        sampling_topp = getattr(args, "sampling_topp", -1.0)
+        diverse_beam_groups = getattr(args, "diverse_beam_groups", -1)
+        diverse_beam_strength = getattr(args, "diverse_beam_strength", 0.5)
+        match_source_len = getattr(args, "match_source_len", False)
+        diversity_rate = getattr(args, "diversity_rate", -1)
+        constrained = getattr(args, "constraints", False)
+        if prefix_allowed_tokens_fn is None:
+            prefix_allowed_tokens_fn = getattr(args, "prefix_allowed_tokens_fn", None)
+        if (
+            sum(
+                int(cond)
+                for cond in [
+                    sampling,
+                    diverse_beam_groups > 0,
+                    match_source_len,
+                    diversity_rate > 0,
+                ]
+            )
+            > 1
+        ):
+            raise ValueError("Provided Search parameters are mutually exclusive.")
+        assert sampling_topk < 0 or sampling, "--sampling-topk requires --sampling"
+        assert sampling_topp < 0 or sampling, "--sampling-topp requires --sampling"
+
+        if sampling:
+            search_strategy = search.Sampling(
+                self.target_dictionary, sampling_topk, sampling_topp
+            )
+        elif diverse_beam_groups > 0:
+            search_strategy = search.DiverseBeamSearch(
+                self.target_dictionary, diverse_beam_groups, diverse_beam_strength
+            )
+        elif match_source_len:
+            # this is useful for tagging applications where the output
+            # length should match the input length, so we hardcode the
+            # length constraints for simplicity
+            search_strategy = search.LengthConstrainedBeamSearch(
+                self.target_dictionary,
+                min_len_a=1,
+                min_len_b=0,
+                max_len_a=1,
+                max_len_b=0,
+            )
+        elif diversity_rate > -1:
+            search_strategy = search.DiverseSiblingsSearch(
+                self.target_dictionary, diversity_rate
+            )
+        elif constrained:
+            search_strategy = search.LexicallyConstrainedBeamSearch(
+                self.target_dictionary, args.constraints
+            )
+        elif prefix_allowed_tokens_fn:
+            search_strategy = search.PrefixConstrainedBeamSearch(
+                self.target_dictionary, prefix_allowed_tokens_fn
+            )
+        else:
+            search_strategy = search.BeamSearch(self.target_dictionary)
+
+        extra_gen_cls_kwargs = extra_gen_cls_kwargs or {}
+        if seq_gen_cls is None:
+            if getattr(args, "print_alignment", False):
+                seq_gen_cls = SequenceGeneratorWithAlignment
+                extra_gen_cls_kwargs["print_alignment"] = args.print_alignment
+            else:
+                seq_gen_cls = SequenceGenerator
+
+        return seq_gen_cls(
+            models,
+            self.target_dictionary,
+            beam_size=getattr(args, "beam", 5),
+            max_len_a=getattr(args, "max_len_a", 0),
+            max_len_b=getattr(args, "max_len_b", 200),
+            min_len=getattr(args, "min_len", 1),
+            normalize_scores=(not getattr(args, "unnormalized", False)),
+            len_penalty=getattr(args, "lenpen", 1),
+            unk_penalty=getattr(args, "unkpen", 0),
+            temperature=getattr(args, "temperature", 1.0),
+            match_source_len=getattr(args, "match_source_len", False),
+            no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0),
+            search_strategy=search_strategy,
+            **extra_gen_cls_kwargs,
+        )
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False, **extra_kwargs
+    ):
+        """
+        Do forward and backward, and return the loss as computed by *criterion*
+        for the given *model* and *sample*.
+
+        Args:
+            sample (dict): the mini-batch. The format is defined by the
+                :class:`~fairseq.data.FairseqDataset`.
+            model (~fairseq.models.BaseFairseqModel): the model
+            criterion (~fairseq.criterions.FairseqCriterion): the criterion
+            optimizer (~fairseq.optim.FairseqOptimizer): the optimizer
+            update_num (int): the current update
+            ignore_grad (bool): multiply loss by 0 if this is set to True
+
+        Returns:
+            tuple:
+                - the loss
+                - the sample size, which is used as the denominator for the
+                  gradient
+                - logging outputs to display while training
+        """
+        model.train()
+        model.set_num_updates(update_num)
+        with torch.autograd.profiler.record_function("forward"):
+            with torch.cuda.amp.autocast(enabled=(isinstance(optimizer, AMPOptimizer))):
+                loss, sample_size, logging_output = criterion(model, sample, update_num=update_num)
+        if ignore_grad:
+            loss *= 0
+        with torch.autograd.profiler.record_function("backward"):
+            optimizer.backward(loss)
+        return loss, sample_size, logging_output
+
+    def valid_step(self, sample, model, criterion, **extra_kwargs):
+        model.eval()
+        with torch.no_grad():
+            loss, sample_size, logging_output = criterion(model, sample)
+        return loss, sample_size, logging_output
+
+    def optimizer_step(self, optimizer, model, update_num):
+        optimizer.step()
+
+    def build_dataset_for_inference(
+        self, src_tokens: List[torch.Tensor], src_lengths: List[int], **kwargs
+    ) -> torch.utils.data.Dataset:
+        raise NotImplementedError
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        with torch.no_grad():
+            return generator.generate(
+                models, sample, prefix_tokens=prefix_tokens, constraints=constraints
+            )
+
+    def begin_epoch(self, epoch, model):
+        """Hook function called before the start of each epoch."""
+        pass
+
+    def begin_valid_epoch(self, epoch, model):
+        """Hook function called before the start of each validation epoch."""
+        pass
+
+    def aggregate_logging_outputs(self, logging_outputs, criterion):
+        """[deprecated] Aggregate logging outputs from data parallel training."""
+        utils.deprecation_warning(
+            "The aggregate_logging_outputs API is deprecated. "
+            "Please use the reduce_metrics API instead."
+        )
+        with metrics.aggregate() as agg:
+            self.reduce_metrics(logging_outputs, criterion)
+            return agg.get_smoothed_values()
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        """Aggregate logging outputs from data parallel training."""
+        # backward compatibility for tasks that override aggregate_logging_outputs
+        base_func = FairseqTask.aggregate_logging_outputs
+        self_func = getattr(self, "aggregate_logging_outputs").__func__
+        if self_func is not base_func:
+            utils.deprecation_warning(
+                "Tasks should implement the reduce_metrics API. "
+                "Falling back to deprecated aggregate_logging_outputs API."
+            )
+            agg_logging_outputs = self.aggregate_logging_outputs(
+                logging_outputs, criterion
+            )
+            for k, v in agg_logging_outputs.items():
+                metrics.log_scalar(k, v)
+            return
+
+        if not any("ntokens" in log for log in logging_outputs):
+            warnings.warn(
+                "ntokens not found in Criterion logging outputs, cannot log wpb or wps"
+            )
+        else:
+            ntokens = sum(log.get("ntokens", 0) for log in logging_outputs)
+            metrics.log_scalar("wpb", ntokens, priority=180, round=1)
+            metrics.log_speed("wps", ntokens, priority=90, round=1)
+
+        if not any("nsentences" in log for log in logging_outputs):
+            warnings.warn(
+                "nsentences not found in Criterion logging outputs, cannot log bsz"
+            )
+        else:
+            nsentences = sum(log.get("nsentences", 0) for log in logging_outputs)
+            metrics.log_scalar("bsz", nsentences, priority=190, round=1)
+
+        criterion.__class__.reduce_metrics(logging_outputs)
+
+    def state_dict(self):
+        if self.state is not None:
+            return self.state.state_dict
+        return {}
+
+    def load_state_dict(self, state_dict: Dict[str, Any]):
+        if self.state is not None:
+            self.state.merge_state_dict(state_dict)
+
+    def max_positions(self):
+        """Return the max input length allowed by the task."""
+        return None
+
+    @property
+    def source_dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary` (if applicable
+        for this task)."""
+        raise NotImplementedError
+
+    @property
+    def target_dictionary(self):
+        """Return the target :class:`~fairseq.data.Dictionary` (if applicable
+        for this task)."""
+        raise NotImplementedError
+
+    def build_tokenizer(self, args):
+        """Build the pre-tokenizer for this task."""
+        return encoders.build_tokenizer(args)
+
+    def build_bpe(self, args):
+        """Build the tokenizer for this task."""
+        return encoders.build_bpe(args)
+
+    def get_interactive_tokens_and_lengths(self, lines, encode_fn):
+        tokens = [
+            self.source_dictionary.encode_line(
+                encode_fn(src_str), add_if_not_exist=False
+            ).long()
+            for src_str in lines
+        ]
+        lengths = [t.numel() for t in tokens]
+        return tokens, lengths
+
+
+class LegacyFairseqTask(FairseqTask):
+    def __init__(self, args: Namespace):
+        super().__init__(None)
+        self.args = args
+        self.datasets = {}
+        self.dataset_to_epoch_iter = {}
+
+    @classmethod
+    def setup_task(cls, args: Namespace, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            args (argparse.Namespace): parsed command-line arguments
+        """
+        return cls(args, **kwargs)
+
+    def has_sharded_data(self, split):
+        return os.pathsep in getattr(self.args, "data", "")
+
+    def build_model(self, args: Namespace):
+        """
+        Build the :class:`~fairseq.models.BaseFairseqModel` instance for this
+        task.
+
+        Args:
+            args (argparse.Namespace): parsed command-line arguments
+
+        Returns:
+            a :class:`~fairseq.models.BaseFairseqModel` instance
+        """
+        from fairseq import models, quantization_utils
+
+        model = models.build_model(args, self)
+        model = quantization_utils.quantize_model_scalar(model, args)
+        return model
+
+    def build_criterion(self, args: Namespace):
+        """
+        Build the :class:`~fairseq.criterions.FairseqCriterion` instance for
+        this task.
+
+        Args:
+            args (argparse.Namespace): parsed command-line arguments
+
+        Returns:
+            a :class:`~fairseq.criterions.FairseqCriterion` instance
+        """
+        from fairseq import criterions
+
+        return criterions.build_criterion(args, self)
diff --git a/fairseq/fairseq/tasks/frm_text_to_speech.py b/fairseq/fairseq/tasks/frm_text_to_speech.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fa9b0f83e742aefce764e2858a81f99db911afd
--- /dev/null
+++ b/fairseq/fairseq/tasks/frm_text_to_speech.py
@@ -0,0 +1,56 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+from fairseq.data.audio.frm_text_to_speech_dataset import FrmTextToSpeechDatasetCreator
+from fairseq.tasks import register_task
+from fairseq.tasks.text_to_speech import TextToSpeechTask
+
+
+logging.basicConfig(
+        format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO
+)
+logger = logging.getLogger(__name__)
+
+
+@register_task('frm_text_to_speech')
+class FrmTextToSpeechTask(TextToSpeechTask):
+    @staticmethod
+    def add_args(parser):
+        TextToSpeechTask.add_args(parser)
+        parser.add_argument(
+            "--do_chunk", action="store_true", help="train on chunks"
+        )
+        parser.add_argument("--chunk_bound", default=-1, type=int)
+        parser.add_argument("--chunk_init", default=50, type=int)
+        parser.add_argument("--chunk_incr", default=5, type=int)
+        parser.add_argument("--add_eos", action="store_true")
+        parser.add_argument("--dedup", action="store_true")
+        parser.add_argument("--ref_fpu", default=-1, type=float)
+
+    def load_dataset(self, split, **unused_kwargs):
+        is_train_split = split.startswith("train")
+        pre_tokenizer = self.build_tokenizer(self.args)
+        bpe_tokenizer = self.build_bpe(self.args)
+        self.datasets[split] = FrmTextToSpeechDatasetCreator.from_tsv(
+            self.args.data,
+            self.data_cfg,
+            split,
+            self.src_dict,
+            pre_tokenizer,
+            bpe_tokenizer,
+            is_train_split=is_train_split,
+            n_frames_per_step=self.args.n_frames_per_step,
+            speaker_to_id=self.speaker_to_id,
+            do_chunk=self.args.do_chunk,
+            chunk_bound=self.args.chunk_bound,
+            chunk_init=self.args.chunk_init,
+            chunk_incr=self.args.chunk_incr,
+            add_eos=self.args.add_eos,
+            dedup=self.args.dedup,
+            ref_fpu=self.args.ref_fpu
+        )
diff --git a/fairseq/fairseq/tasks/hubert_pretraining.py b/fairseq/fairseq/tasks/hubert_pretraining.py
new file mode 100644
index 0000000000000000000000000000000000000000..f756080dd17b380d004420c045a8744411c0e93d
--- /dev/null
+++ b/fairseq/fairseq/tasks/hubert_pretraining.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+
+import logging
+import os
+import sys
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+
+from dataclasses import dataclass, field
+from fairseq.data import Dictionary, HubertDataset
+from fairseq.dataclass.configs import FairseqDataclass
+from fairseq.tasks import register_task
+from fairseq.tasks.fairseq_task import FairseqTask
+from omegaconf import MISSING
+
+logger = logging.getLogger(__name__)
+
+
+class LabelEncoder(object):
+    def __init__(self, dictionary: Dictionary) -> None:
+        self.dictionary = dictionary
+
+    def __call__(self, label: str) -> List[str]:
+        return self.dictionary.encode_line(
+            label, append_eos=False, add_if_not_exist=False,
+        )
+
+
+@dataclass
+class HubertPretrainingConfig(FairseqDataclass):
+    data: str = field(
+        default=MISSING, metadata={"help": "path to data directory"}
+    )
+    fine_tuning: bool = field(
+        default=False, metadata={"help": "set to true if fine-tuning Hubert"}
+    )
+    labels: List[str] = field(
+        default_factory=lambda: ["ltr"],
+        metadata={
+            "help": (
+                "extension of the label files to load, frame-level labels for"
+                " pre-training, and sequence-level label for fine-tuning"
+            )
+        },
+    )
+    label_dir: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "if set, looks for labels in this directory instead",
+        },
+    )
+    label_rate: int = field(
+        default=-1,
+        metadata={"help": "label frame rate. -1 for sequence label"},
+    )
+    sample_rate: int = field(
+        default=16_000,
+        metadata={
+            "help": "target sample rate. audio files will be up/down "
+            "sampled to this rate"
+        },
+    )
+    normalize: bool = field(
+        default=False,
+        metadata={
+            "help": "if set, normalizes input to have 0 mean and unit variance"
+        },
+    )
+    enable_padding: bool = field(
+        default=False,
+        metadata={"help": "pad shorter samples instead of cropping"},
+    )
+    max_keep_size: Optional[int] = field(
+        default=None,
+        metadata={"help": "exclude sample longer than this"},
+    )
+    max_sample_size: Optional[int] = field(
+        default=None,
+        metadata={"help": "max sample size to crop to for batching"},
+    )
+    min_sample_size: Optional[int] = field(
+        default=None,
+        metadata={"help": "min sample size to crop to for batching"},
+    )
+    single_target: Optional[bool] = field(
+        default=False,
+        metadata={
+            "help": "if set, AddTargetDatasets outputs same keys "
+            "as AddTargetDataset"
+        },
+    )
+    random_crop: Optional[bool] = field(
+        default=True,
+        metadata={"help": "always crop from the beginning if false"},
+    )
+    pad_audio: Optional[bool] = field(
+        default=False,
+        metadata={"help": "pad audio to the longest one in the batch if true"},
+    )
+
+
+@register_task("hubert_pretraining", dataclass=HubertPretrainingConfig)
+class HubertPretrainingTask(FairseqTask):
+
+    cfg: HubertPretrainingConfig
+
+    def __init__(
+        self,
+        cfg: HubertPretrainingConfig,
+    ) -> None:
+        super().__init__(cfg)
+
+        logger.info(f"current directory is {os.getcwd()}")
+        logger.info(f"HubertPretrainingTask Config {cfg}")
+
+        self.cfg = cfg
+        self.fine_tuning = cfg.fine_tuning
+
+        if cfg.fine_tuning:
+            self.state.add_factory("target_dictionary", self.load_dictionaries)
+        else:
+            self.state.add_factory("dictionaries", self.load_dictionaries)
+
+        self.blank_symbol = "<s>"
+
+    @property
+    def source_dictionary(self) -> Optional[Dictionary]:
+        return None
+
+    @property
+    def target_dictionary(self) -> Optional[Dictionary]:
+        return self.state.target_dictionary
+
+    @property
+    def dictionaries(self) -> List[Dictionary]:
+        return self.state.dictionaries
+
+    @classmethod
+    def setup_task(
+        cls, cfg: HubertPretrainingConfig, **kwargs
+    ) -> "HubertPretrainingTask":
+        return cls(cfg)
+
+    def load_dictionaries(self):
+        label_dir = self.cfg.data if self.cfg.label_dir is None else self.cfg.label_dir
+        dictionaries = [Dictionary.load(f"{label_dir}/dict.{label}.txt") for label in self.cfg.labels]
+        return dictionaries[0] if self.cfg.fine_tuning else dictionaries
+
+    def get_label_dir(self) -> str:
+        if self.cfg.label_dir is None:
+            return self.cfg.data
+        return self.cfg.label_dir
+
+    def load_dataset(self, split: str, **kwargs) -> None:
+        manifest = f"{self.cfg.data}/{split}.tsv"
+        dicts = [self.target_dictionary] if self.cfg.fine_tuning else self.dictionaries
+        pad_list = [dict.pad() for dict in dicts]
+        eos_list = [dict.eos() for dict in dicts]
+        procs = [LabelEncoder(dict) for dict in dicts]
+        paths = [
+            f"{self.get_label_dir()}/{split}.{l}" for l in self.cfg.labels
+        ]
+
+        # hubert v1: pad_audio=True, random_crop=False;
+        self.datasets[split] = HubertDataset(
+            manifest,
+            sample_rate=self.cfg.sample_rate,
+            label_paths=paths,
+            label_rates=self.cfg.label_rate,
+            pad_list=pad_list,
+            eos_list=eos_list,
+            label_processors=procs,
+            max_keep_sample_size=self.cfg.max_keep_size,
+            min_keep_sample_size=self.cfg.min_sample_size,
+            max_sample_size=self.cfg.max_sample_size,
+            pad_audio=self.cfg.pad_audio,
+            normalize=self.cfg.normalize,
+            store_labels=False,
+            random_crop=self.cfg.random_crop,
+            single_target=self.cfg.single_target,
+        )
+
+    def max_positions(self) -> Tuple[int, int]:
+        return (sys.maxsize, sys.maxsize)
+
+    def filter_indices_by_size(
+        self, indices: np.array, *args, **kwargs
+    ) -> np.array:
+        return indices
diff --git a/fairseq/fairseq/tasks/language_modeling.py b/fairseq/fairseq/tasks/language_modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b76a51c61d71c4358de07bdd4eb3f93894737a8
--- /dev/null
+++ b/fairseq/fairseq/tasks/language_modeling.py
@@ -0,0 +1,379 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+import torch
+from fairseq import utils
+from fairseq.data import (
+    AppendTokenDataset,
+    Dictionary,
+    IdDataset,
+    LMContextWindowDataset,
+    MonolingualDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    PadDataset,
+    PrependTokenDataset,
+    StripTokenDataset,
+    TokenBlockDataset,
+    TruncatedDictionary,
+    data_utils,
+)
+from fairseq.data.indexed_dataset import get_available_dataset_impl
+from fairseq.data.shorten_dataset import maybe_shorten_dataset
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.tasks import LegacyFairseqTask, register_task
+from omegaconf import II
+
+
+SAMPLE_BREAK_MODE_CHOICES = ChoiceEnum(["none", "complete", "complete_doc", "eos"])
+SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"])
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class LanguageModelingConfig(FairseqDataclass):
+    data: Optional[str] = field(
+        default=None, metadata={"help": "path to data directory"}
+    )
+    sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field(
+        default="none",
+        metadata={
+            "help": 'If omitted or "none", fills each sample with tokens-per-sample '
+            'tokens. If set to "complete", splits samples only at the end '
+            "of sentence, but may include multiple sentences per sample. "
+            '"complete_doc" is similar but respects doc boundaries. '
+            'If set to "eos", includes only one sentence per sample.'
+        },
+    )
+    tokens_per_sample: int = field(
+        default=1024,
+        metadata={"help": "max number of tokens per sample for LM dataset"},
+    )
+    output_dictionary_size: int = field(
+        default=-1, metadata={"help": "limit the size of output dictionary"}
+    )
+    self_target: bool = field(default=False, metadata={"help": "include self target"})
+    future_target: bool = field(
+        default=False, metadata={"help": "include future target"}
+    )
+    past_target: bool = field(default=False, metadata={"help": "include past target"})
+    add_bos_token: bool = field(
+        default=False, metadata={"help": "prepend beginning of sentence token (<s>)"}
+    )
+    max_target_positions: Optional[int] = field(
+        default=None, metadata={"help": "max number of tokens in the target sequence"}
+    )
+    shorten_method: SHORTEN_METHOD_CHOICES = field(
+        default="none",
+        metadata={
+            "help": "if not none, shorten sequences that exceed --tokens-per-sample"
+        },
+    )
+    shorten_data_split_list: str = field(
+        default="",
+        metadata={
+            "help": "comma-separated list of dataset splits to apply shortening to, "
+            'e.g., "train,valid" (default: all dataset splits)'
+        },
+    )
+    pad_to_fixed_length: Optional[bool] = field(
+        default=False, metadata={"help": "pad to fixed length"},
+    )
+    pad_to_fixed_bsz: Optional[bool] = field(
+        default=False, metadata={"help": "boolean to pad to fixed batch size"},
+    )
+
+    # TODO common vars below add to parent
+    seed: int = II("common.seed")
+    batch_size: Optional[int] = II("dataset.batch_size")
+    batch_size_valid: Optional[int] = II("dataset.batch_size_valid")
+    dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II(
+        "dataset.dataset_impl"
+    )
+    data_buffer_size: int = II("dataset.data_buffer_size")
+    tpu: bool = II("common.tpu")
+    use_plasma_view: bool = II("common.use_plasma_view")
+    plasma_path: str = II("common.plasma_path")
+
+
+@register_task("language_modeling", dataclass=LanguageModelingConfig)
+class LanguageModelingTask(LegacyFairseqTask):
+    """
+    Train a language model.
+
+    Args:
+        dictionary (~fairseq.data.Dictionary): the dictionary for the input of
+            the language model
+        output_dictionary (~fairseq.data.Dictionary): the dictionary for the
+            output of the language model. In most cases it will be the same as
+            *dictionary*, but could possibly be a more limited version of the
+            dictionary (if ``--output-dictionary-size`` is used).
+        targets (List[str]): list of the target types that the language model
+            should predict.  Can be one of "self", "future", and "past".
+            Defaults to "future".
+
+    .. note::
+
+        The language modeling task is compatible with :mod:`fairseq-train`,
+        :mod:`fairseq-generate`, :mod:`fairseq-interactive` and
+        :mod:`fairseq-eval-lm`.
+
+    The language modeling task provides the following additional command-line
+    arguments:
+
+    .. argparse::
+        :ref: fairseq.tasks.language_modeling_parser
+        :prog:
+    """
+
+    def __init__(self, args, dictionary, output_dictionary=None, targets=None):
+        super().__init__(args)
+        self.dictionary = dictionary
+        self.output_dictionary = output_dictionary or dictionary
+
+        if targets is None:
+            targets = ["future"]
+        self.targets = targets
+
+    @classmethod
+    def setup_dictionary(cls, args, **kwargs):
+        dictionary = None
+        output_dictionary = None
+        if args.data:
+            paths = utils.split_paths(args.data)
+            assert len(paths) > 0
+            dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt"))
+            logger.info("dictionary: {} types".format(len(dictionary)))
+            output_dictionary = dictionary
+            if args.output_dictionary_size >= 0:
+                output_dictionary = TruncatedDictionary(
+                    dictionary, args.output_dictionary_size
+                )
+        return (dictionary, output_dictionary)
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            args (argparse.Namespace): parsed command-line arguments
+        """
+        dictionary, output_dictionary = cls.setup_dictionary(args, **kwargs)
+
+        # upgrade old checkpoints
+        if getattr(args, "exclude_self_target", False):
+            args.self_target = False
+
+        targets = []
+        if getattr(args, "self_target", False):
+            targets.append("self")
+        if getattr(args, "future_target", False):
+            targets.append("future")
+        if getattr(args, "past_target", False):
+            targets.append("past")
+        if len(targets) == 0:
+            # standard language modeling
+            targets = ["future"]
+
+        return cls(args, dictionary, output_dictionary, targets=targets)
+
+    def build_model(self, args):
+        model = super().build_model(args)
+        for target in self.targets:
+            if target not in model.supported_targets:
+                raise ValueError(
+                    "Unsupported language modeling target: {}".format(target)
+                )
+
+        return model
+
+    def load_dataset(
+        self, split: str, epoch=1, combine=False, **kwargs
+    ) -> MonolingualDataset:
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, valid1, test)
+        """
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+
+        data_path = paths[(epoch - 1) % len(paths)]
+        split_path = os.path.join(data_path, split)
+
+        # each process has its own copy of the raw data (likely to be an np.memmap)
+        dataset = data_utils.load_indexed_dataset(
+            split_path, self.dictionary, self.args.dataset_impl, combine=combine
+        )
+        if dataset is None:
+            raise FileNotFoundError(f"Dataset not found: {split} ({split_path})")
+
+        dataset = maybe_shorten_dataset(
+            dataset,
+            split,
+            self.args.shorten_data_split_list,
+            self.args.shorten_method,
+            self.args.tokens_per_sample,
+            self.args.seed,
+        )
+        dataset = TokenBlockDataset(
+            dataset,
+            dataset.sizes,
+            self.args.tokens_per_sample,
+            pad=self.dictionary.pad(),
+            eos=self.dictionary.eos(),
+            break_mode=self.args.sample_break_mode,
+            include_targets=True,
+            use_plasma_view=self.args.use_plasma_view,
+            split_path=split_path,
+            plasma_path=self.args.plasma_path,
+        )
+
+        add_eos_for_other_targets = (
+            self.args.sample_break_mode is not None
+            and self.args.sample_break_mode != "none"
+        )
+        fixed_pad_length = None
+        if self.args.pad_to_fixed_length:
+            fixed_pad_length = self.args.tokens_per_sample
+
+        pad_to_bsz = None
+        if self.args.pad_to_fixed_bsz:
+            pad_to_bsz = self.args.batch_size_valid if 'valid' in split else self.args.batch_size
+
+        self.datasets[split] = MonolingualDataset(
+            dataset=dataset,
+            sizes=dataset.sizes,
+            src_vocab=self.dictionary,
+            tgt_vocab=self.output_dictionary,
+            add_eos_for_other_targets=add_eos_for_other_targets,
+            shuffle=True,
+            targets=self.targets,
+            add_bos_token=self.args.add_bos_token,
+            fixed_pad_length=fixed_pad_length,
+            pad_to_bsz=pad_to_bsz,
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs):
+        """
+        Generate batches for inference. We prepend an eos token to src_tokens
+        (or bos if `--add-bos-token` is set) and we append a <pad> to target.
+        This is convenient both for generation with a prefix and LM scoring.
+        """
+        dataset = StripTokenDataset(
+            TokenBlockDataset(
+                src_tokens,
+                src_lengths,
+                block_size=None,  # ignored for "eos" break mode
+                pad=self.source_dictionary.pad(),
+                eos=self.source_dictionary.eos(),
+                break_mode="eos",
+            ),
+            # remove eos from (end of) target sequence
+            self.source_dictionary.eos(),
+        )
+        src_dataset = PrependTokenDataset(
+            dataset,
+            token=(
+                self.source_dictionary.bos()
+                if getattr(self.args, "add_bos_token", False)
+                else self.source_dictionary.eos()
+            ),
+        )
+        tgt_dataset = AppendTokenDataset(dataset, token=self.source_dictionary.pad())
+        return NestedDictionaryDataset(
+            {
+                "id": IdDataset(),
+                "net_input": {
+                    "src_tokens": PadDataset(
+                        src_dataset,
+                        pad_idx=self.source_dictionary.pad(),
+                        left_pad=False,
+                    ),
+                    "src_lengths": NumelDataset(src_dataset, reduce=False),
+                },
+                "target": PadDataset(
+                    tgt_dataset, pad_idx=self.source_dictionary.pad(), left_pad=False
+                ),
+            },
+            sizes=[np.array(src_lengths)],
+        )
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        with torch.no_grad():
+            # Generation will always be conditioned on bos_token
+            if getattr(self.args, "add_bos_token", False):
+                bos_token = self.source_dictionary.bos()
+            else:
+                bos_token = self.source_dictionary.eos()
+
+            if constraints is not None:
+                raise NotImplementedError(
+                    "Constrained decoding with the language_modeling task is not supported"
+                )
+
+            # SequenceGenerator doesn't use src_tokens directly, we need to
+            # pass the `prefix_tokens` argument instead
+            if prefix_tokens is None and sample["net_input"]["src_tokens"].nelement():
+                prefix_tokens = sample["net_input"]["src_tokens"]
+                if prefix_tokens[:, 0].eq(bos_token).all():
+                    prefix_tokens = prefix_tokens[:, 1:]
+
+            return generator.generate(
+                models, sample, prefix_tokens=prefix_tokens, bos_token=bos_token
+            )
+
+    def eval_lm_dataloader(
+        self,
+        dataset,
+        max_tokens: Optional[int] = 36000,
+        batch_size: Optional[int] = None,
+        max_positions: Optional[int] = None,
+        num_shards: int = 1,
+        shard_id: int = 0,
+        num_workers: int = 1,
+        data_buffer_size: int = 10,
+        # ensures that every evaluated token has access to a context of at least
+        # this size, if possible
+        context_window: int = 0,
+    ):
+        if context_window > 0:
+            dataset = LMContextWindowDataset(
+                dataset=dataset,
+                tokens_per_sample=self.args.tokens_per_sample,
+                context_window=context_window,
+                pad_idx=self.source_dictionary.pad(),
+            )
+        return self.get_batch_iterator(
+            dataset=dataset,
+            max_tokens=max_tokens,
+            max_sentences=batch_size,
+            max_positions=max_positions,
+            ignore_invalid_inputs=True,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_workers=num_workers,
+            data_buffer_size=data_buffer_size,
+        ).next_epoch_itr(shuffle=False)
+
+    @property
+    def source_dictionary(self):
+        """Return the :class:`~fairseq.data.Dictionary` for the language
+        model."""
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        """Return the :class:`~fairseq.data.Dictionary` for the language
+        model."""
+        return self.output_dictionary
diff --git a/fairseq/fairseq/tasks/legacy_masked_lm.py b/fairseq/fairseq/tasks/legacy_masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..975497654926b64fff6c4960f54c4e6932e7fce1
--- /dev/null
+++ b/fairseq/fairseq/tasks/legacy_masked_lm.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import logging
+import os
+
+import numpy as np
+from fairseq import tokenizer, utils
+from fairseq.data import ConcatDataset, Dictionary, data_utils, indexed_dataset
+from fairseq.data.legacy.block_pair_dataset import BlockPairDataset
+from fairseq.data.legacy.masked_lm_dataset import MaskedLMDataset
+from fairseq.data.legacy.masked_lm_dictionary import BertDictionary
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("legacy_masked_lm")
+class LegacyMaskedLMTask(LegacyFairseqTask):
+    """
+    Task for training Masked LM (BERT) model.
+    Args:
+        dictionary (Dictionary): the dictionary for the input of the task
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument(
+            "data",
+            help="colon separated path to data directories list, \
+                            will be iterated upon during epochs in round-robin manner",
+        )
+        parser.add_argument(
+            "--tokens-per-sample",
+            default=512,
+            type=int,
+            help="max number of total tokens over all segments"
+            " per sample for BERT dataset",
+        )
+        parser.add_argument(
+            "--break-mode", default="doc", type=str, help="mode for breaking sentence"
+        )
+        parser.add_argument("--shuffle-dataset", action="store_true", default=False)
+
+    def __init__(self, args, dictionary):
+        super().__init__(args)
+        self.dictionary = dictionary
+        self.seed = args.seed
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        return BertDictionary.load(filename)
+
+    @classmethod
+    def build_dictionary(
+        cls, filenames, workers=1, threshold=-1, nwords=-1, padding_factor=8
+    ):
+        d = BertDictionary()
+        for filename in filenames:
+            Dictionary.add_file_to_dictionary(
+                filename, d, tokenizer.tokenize_line, workers
+            )
+        d.finalize(threshold=threshold, nwords=nwords, padding_factor=padding_factor)
+        return d
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task."""
+        paths = utils.split_paths(args.data)
+        assert len(paths) > 0
+        dictionary = BertDictionary.load(os.path.join(paths[0], "dict.txt"))
+        logger.info("dictionary: {} types".format(len(dictionary)))
+
+        return cls(args, dictionary)
+
+    def load_dataset(self, split, epoch=1, combine=False):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        loaded_datasets = []
+
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+        logger.info("data_path", data_path)
+
+        for k in itertools.count():
+            split_k = split + (str(k) if k > 0 else "")
+            path = os.path.join(data_path, split_k)
+            ds = indexed_dataset.make_dataset(
+                path,
+                impl=self.args.dataset_impl,
+                fix_lua_indexing=True,
+                dictionary=self.dictionary,
+            )
+
+            if ds is None:
+                if k > 0:
+                    break
+                else:
+                    raise FileNotFoundError(
+                        "Dataset not found: {} ({})".format(split, data_path)
+                    )
+
+            with data_utils.numpy_seed(self.seed + k):
+                loaded_datasets.append(
+                    BlockPairDataset(
+                        ds,
+                        self.dictionary,
+                        ds.sizes,
+                        self.args.tokens_per_sample,
+                        break_mode=self.args.break_mode,
+                        doc_break_size=1,
+                    )
+                )
+
+            logger.info(
+                "{} {} {} examples".format(data_path, split_k, len(loaded_datasets[-1]))
+            )
+
+            if not combine:
+                break
+
+        if len(loaded_datasets) == 1:
+            dataset = loaded_datasets[0]
+            sizes = dataset.sizes
+        else:
+            dataset = ConcatDataset(loaded_datasets)
+            sizes = np.concatenate([ds.sizes for ds in loaded_datasets])
+
+        self.datasets[split] = MaskedLMDataset(
+            dataset=dataset,
+            sizes=sizes,
+            vocab=self.dictionary,
+            pad_idx=self.dictionary.pad(),
+            mask_idx=self.dictionary.mask(),
+            classif_token_idx=self.dictionary.cls(),
+            sep_token_idx=self.dictionary.sep(),
+            shuffle=self.args.shuffle_dataset,
+            seed=self.seed,
+        )
diff --git a/fairseq/fairseq/tasks/masked_lm.py b/fairseq/fairseq/tasks/masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c08132fb742de3d3d1beea0b8fce979ff408ebb
--- /dev/null
+++ b/fairseq/fairseq/tasks/masked_lm.py
@@ -0,0 +1,255 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+import logging
+import os
+
+from omegaconf import MISSING, II, OmegaConf
+
+import numpy as np
+from fairseq import utils
+from fairseq.data import (
+    Dictionary,
+    IdDataset,
+    MaskTokensDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    NumSamplesDataset,
+    PrependTokenDataset,
+    RightPadDataset,
+    SortDataset,
+    TokenBlockDataset,
+    data_utils,
+)
+from fairseq.data.encoders.utils import get_whole_word_mask
+from fairseq.data.shorten_dataset import maybe_shorten_dataset
+from fairseq.dataclass import FairseqDataclass
+from fairseq.tasks import FairseqTask, register_task
+
+from .language_modeling import SAMPLE_BREAK_MODE_CHOICES, SHORTEN_METHOD_CHOICES
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class MaskedLMConfig(FairseqDataclass):
+    data: str = field(
+        default=MISSING,
+        metadata={
+            "help": "colon separated path to data directories list, \
+                            will be iterated upon during epochs in round-robin manner"
+        },
+    )
+    sample_break_mode: SAMPLE_BREAK_MODE_CHOICES = field(
+        default="none",
+        metadata={
+            "help": 'If omitted or "none", fills each sample with tokens-per-sample '
+            'tokens. If set to "complete", splits samples only at the end '
+            "of sentence, but may include multiple sentences per sample. "
+            '"complete_doc" is similar but respects doc boundaries. '
+            'If set to "eos", includes only one sentence per sample.'
+        },
+    )
+    tokens_per_sample: int = field(
+        default=1024,
+        metadata={"help": "max number of tokens per sample for LM dataset"},
+    )
+    mask_prob: float = field(
+        default=0.15,
+        metadata={"help": "probability of replacing a token with mask"},
+    )
+    leave_unmasked_prob: float = field(
+        default=0.1,
+        metadata={"help": "probability that a masked token is unmasked"},
+    )
+    random_token_prob: float = field(
+        default=0.1,
+        metadata={"help": "probability of replacing a token with a random token"},
+    )
+    freq_weighted_replacement: bool = field(
+        default=False,
+        metadata={"help": "sample random replacement words based on word frequencies"},
+    )
+    mask_whole_words: bool = field(
+        default=False,
+        metadata={"help": "mask whole words; you may also want to set --bpe"},
+    )
+    mask_multiple_length: int = field(
+        default=1,
+        metadata={"help": "repeat the mask indices multiple times"},
+    )
+    mask_stdev: float = field(
+        default=0.0,
+        metadata={"help": "stdev of the mask length"},
+    )
+    shorten_method: SHORTEN_METHOD_CHOICES = field(
+        default="none",
+        metadata={
+            "help": "if not none, shorten sequences that exceed --tokens-per-sample"
+        },
+    )
+    shorten_data_split_list: str = field(
+        default="",
+        metadata={
+            "help": "comma-separated list of dataset splits to apply shortening to, "
+            'e.g., "train,valid" (default: all dataset splits)'
+        },
+    )
+    seed: int = II("common.seed")
+
+
+@register_task("masked_lm", dataclass=MaskedLMConfig)
+class MaskedLMTask(FairseqTask):
+
+    cfg: MaskedLMConfig
+
+    """Task for training masked language models (e.g., BERT, RoBERTa)."""
+
+    def __init__(self, cfg: MaskedLMConfig, dictionary):
+        super().__init__(cfg)
+        self.dictionary = dictionary
+
+        # add mask token
+        self.mask_idx = dictionary.add_symbol("<mask>")
+
+    @classmethod
+    def setup_task(cls, cfg: MaskedLMConfig, **kwargs):
+        paths = utils.split_paths(cfg.data)
+        assert len(paths) > 0
+        dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt"))
+        logger.info("dictionary: {} types".format(len(dictionary)))
+        return cls(cfg, dictionary)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = utils.split_paths(self.cfg.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+        split_path = os.path.join(data_path, split)
+
+        dataset = data_utils.load_indexed_dataset(
+            split_path,
+            self.source_dictionary,
+            combine=combine,
+        )
+        if dataset is None:
+            raise FileNotFoundError(
+                "Dataset not found: {} ({})".format(split, split_path)
+            )
+
+        dataset = maybe_shorten_dataset(
+            dataset,
+            split,
+            self.cfg.shorten_data_split_list,
+            self.cfg.shorten_method,
+            self.cfg.tokens_per_sample,
+            self.cfg.seed,
+        )
+
+        # create continuous blocks of tokens
+        dataset = TokenBlockDataset(
+            dataset,
+            dataset.sizes,
+            self.cfg.tokens_per_sample - 1,  # one less for <s>
+            pad=self.source_dictionary.pad(),
+            eos=self.source_dictionary.eos(),
+            break_mode=self.cfg.sample_break_mode,
+        )
+        logger.info("loaded {} blocks from: {}".format(len(dataset), split_path))
+
+        # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT)
+        dataset = PrependTokenDataset(dataset, self.source_dictionary.bos())
+
+        # create masked input and targets
+        mask_whole_words = (
+            get_whole_word_mask(self.args, self.source_dictionary)
+            if self.cfg.mask_whole_words
+            else None
+        )
+
+        src_dataset, tgt_dataset = MaskTokensDataset.apply_mask(
+            dataset,
+            self.source_dictionary,
+            pad_idx=self.source_dictionary.pad(),
+            mask_idx=self.mask_idx,
+            seed=self.cfg.seed,
+            mask_prob=self.cfg.mask_prob,
+            leave_unmasked_prob=self.cfg.leave_unmasked_prob,
+            random_token_prob=self.cfg.random_token_prob,
+            freq_weighted_replacement=self.cfg.freq_weighted_replacement,
+            mask_whole_words=mask_whole_words,
+            mask_multiple_length=self.cfg.mask_multiple_length,
+            mask_stdev=self.cfg.mask_stdev,
+        )
+
+        with data_utils.numpy_seed(self.cfg.seed):
+            shuffle = np.random.permutation(len(src_dataset))
+
+        self.datasets[split] = SortDataset(
+            NestedDictionaryDataset(
+                {
+                    "id": IdDataset(),
+                    "net_input": {
+                        "src_tokens": RightPadDataset(
+                            src_dataset,
+                            pad_idx=self.source_dictionary.pad(),
+                        ),
+                        "src_lengths": NumelDataset(src_dataset, reduce=False),
+                    },
+                    "target": RightPadDataset(
+                        tgt_dataset,
+                        pad_idx=self.source_dictionary.pad(),
+                    ),
+                    "nsentences": NumSamplesDataset(),
+                    "ntokens": NumelDataset(src_dataset, reduce=True),
+                },
+                sizes=[src_dataset.sizes],
+            ),
+            sort_order=[
+                shuffle,
+                src_dataset.sizes,
+            ],
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, sort=True):
+        src_dataset = RightPadDataset(
+            TokenBlockDataset(
+                src_tokens,
+                src_lengths,
+                self.cfg.tokens_per_sample - 1,  # one less for <s>
+                pad=self.source_dictionary.pad(),
+                eos=self.source_dictionary.eos(),
+                break_mode="eos",
+            ),
+            pad_idx=self.source_dictionary.pad(),
+        )
+        src_dataset = PrependTokenDataset(src_dataset, self.source_dictionary.bos())
+        src_dataset = NestedDictionaryDataset(
+            {
+                "id": IdDataset(),
+                "net_input": {
+                    "src_tokens": src_dataset,
+                    "src_lengths": NumelDataset(src_dataset, reduce=False),
+                },
+            },
+            sizes=src_lengths,
+        )
+        if sort:
+            src_dataset = SortDataset(src_dataset, sort_order=[src_lengths])
+        return src_dataset
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
diff --git a/fairseq/fairseq/tasks/multilingual_denoising.py b/fairseq/fairseq/tasks/multilingual_denoising.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1c914917feb5165aad7482cd1377f5f65b21635
--- /dev/null
+++ b/fairseq/fairseq/tasks/multilingual_denoising.py
@@ -0,0 +1,254 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+import numpy as np
+from fairseq.data import (
+    AppendTokenDataset,
+    ConcatDataset,
+    DenoisingDataset,
+    Dictionary,
+    PrependTokenDataset,
+    ResamplingDataset,
+    SortDataset,
+    TokenBlockDataset,
+    data_utils,
+)
+from fairseq.data.encoders.utils import get_whole_word_mask
+from fairseq.tasks import register_task
+
+from .denoising import DenoisingTask
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("multilingual_denoising")
+class MultilingualDenoisingTask(DenoisingTask):
+    @staticmethod
+    def add_args(parser):
+        DenoisingTask.add_args(parser)
+        parser.add_argument(
+            "--multilang-sampling-alpha",
+            type=float,
+            default=1.0,
+            help="smoothing alpha for sample ratios across multiple datasets",
+        )
+        parser.add_argument("--add-lang-token", default=False, action="store_true")
+        parser.add_argument(
+            "--langs", type=str, help="language ids we are considering", default=None
+        )
+        parser.add_argument(
+            "--no-whole-word-mask-langs",
+            type=str,
+            default="",
+            metavar="N",
+            help="languages without spacing between words dont support whole word masking",
+        )
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task."""
+        paths = args.data.split(":")
+        assert len(paths) > 0
+        dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt"))
+
+        data_path = paths[0]
+        if args.langs is None:
+            languages = sorted(
+                [
+                    name
+                    for name in os.listdir(data_path)
+                    if os.path.isdir(os.path.join(data_path, name))
+                ]
+            )
+        else:
+            languages = args.langs.split(",")
+
+        if args.add_lang_token:
+            for lang in languages:
+                dictionary.add_symbol("[{}]".format(lang))
+
+        logger.info("dictionary: {} types".format(len(dictionary)))
+        if not hasattr(args, "shuffle_instance"):
+            args.shuffle_instance = False
+        return cls(args, dictionary)
+
+    def __init__(self, args, dictionary):
+        super().__init__(args, dictionary)
+        self.dictionary = dictionary
+        self.seed = args.seed
+
+        # add mask token
+        self.mask_idx = self.dictionary.add_symbol("<mask>")
+        self.langs = args.langs
+        self.args = args
+
+    def _get_sample_prob(self, dataset_lens):
+        """
+        Get smoothed sampling porbability by languages. This helps low resource
+        languages by upsampling them.
+        """
+        prob = dataset_lens / dataset_lens.sum()
+        smoothed_prob = prob ** self.args.multilang_sampling_alpha
+        smoothed_prob = smoothed_prob / smoothed_prob.sum()
+        return smoothed_prob
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = self.args.data.split(":")
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+        split_path = os.path.join(data_path, split)
+
+        if self.langs is None:
+            languages = sorted(
+                [
+                    name
+                    for name in os.listdir(data_path)
+                    if os.path.isdir(os.path.join(data_path, name))
+                ]
+            )
+        else:
+            languages = self.langs.split(",")
+            for name in languages:
+                p = os.path.join(data_path, name)
+                assert os.path.exists(p), "data not found: {}".format(p)
+
+        logger.info("Training on {0} languages: {1}".format(len(languages), languages))
+        logger.info(
+            "Language to id mapping: ", {lang: id for id, lang in enumerate(languages)}
+        )
+
+        mask_whole_words = get_whole_word_mask(self.args, self.dictionary)
+        language_without_segmentations = self.args.no_whole_word_mask_langs.split(",")
+        lang_datasets = []
+        for language in languages:
+            split_path = os.path.join(data_path, language, split)
+
+            dataset = data_utils.load_indexed_dataset(
+                split_path,
+                self.source_dictionary,
+                self.args.dataset_impl,
+                combine=combine,
+            )
+            if dataset is None:
+                raise FileNotFoundError(
+                    "Dataset not found: {} ({})".format(split, split_path)
+                )
+
+            end_token = (
+                self.source_dictionary.index("[{}]".format(language))
+                if self.args.add_lang_token
+                else self.source_dictionary.eos()
+            )
+
+            # create continuous blocks of tokens
+            dataset = TokenBlockDataset(
+                dataset,
+                dataset.sizes,
+                self.args.tokens_per_sample - 2,  # one less for <s>
+                pad=self.source_dictionary.pad(),
+                eos=end_token,
+                break_mode=self.args.sample_break_mode,
+            )
+            logger.info("loaded {} blocks from: {}".format(len(dataset), split_path))
+
+            # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT)
+            dataset = PrependTokenDataset(dataset, self.source_dictionary.bos())
+            dataset = AppendTokenDataset(dataset, end_token)
+
+            lang_mask_whole_words = (
+                mask_whole_words
+                if language not in language_without_segmentations
+                else None
+            )
+            lang_dataset = DenoisingDataset(
+                dataset,
+                dataset.sizes,
+                self.dictionary,
+                self.mask_idx,
+                lang_mask_whole_words,
+                shuffle=self.args.shuffle_instance,
+                seed=self.seed,
+                args=self.args,
+                eos=None
+                if not self.args.add_lang_token
+                else self.source_dictionary.index("[{}]".format(language)),
+            )
+            lang_datasets.append(lang_dataset)
+
+        dataset_lengths = np.array(
+            [len(d) for d in lang_datasets],
+            dtype=float,
+        )
+        logger.info(
+            "loaded total {} blocks for all languages".format(
+                int(dataset_lengths.sum()),
+            )
+        )
+        if split == self.args.train_subset:
+            # For train subset, additionally up or down sample languages.
+            sample_probs = self._get_sample_prob(dataset_lengths)
+            logger.info(
+                "Sample probability by language: {}".format(
+                    {
+                        lang: "{0:.4f}".format(sample_probs[id])
+                        for id, lang in enumerate(languages)
+                    }
+                )
+            )
+            size_ratio = (sample_probs * dataset_lengths.sum()) / dataset_lengths
+            logger.info(
+                "Up/Down Sampling ratio by language: {}".format(
+                    {
+                        lang: "{0:.2f}".format(size_ratio[id])
+                        for id, lang in enumerate(languages)
+                    }
+                )
+            )
+
+            resampled_lang_datasets = [
+                ResamplingDataset(
+                    lang_datasets[i],
+                    size_ratio=size_ratio[i],
+                    seed=self.args.seed,
+                    epoch=epoch,
+                    replace=size_ratio[i] >= 1.0,
+                )
+                for i, d in enumerate(lang_datasets)
+            ]
+            dataset = ConcatDataset(
+                resampled_lang_datasets,
+            )
+        else:
+            dataset = ConcatDataset(lang_datasets)
+            lang_splits = [split]
+            for lang_id, lang_dataset in enumerate(lang_datasets):
+                split_name = split + "_" + languages[lang_id]
+                lang_splits.append(split_name)
+                self.datasets[split_name] = lang_dataset
+
+            if split in self.args.valid_subset:
+                self.args.valid_subset = self.args.valid_subset.replace(
+                    split, ",".join(lang_splits)
+                )
+
+        with data_utils.numpy_seed(self.args.seed + epoch):
+            shuffle = np.random.permutation(len(dataset))
+
+        self.datasets[split] = SortDataset(
+            dataset,
+            sort_order=[
+                shuffle,
+                dataset.sizes,
+            ],
+        )
diff --git a/fairseq/fairseq/tasks/multilingual_masked_lm.py b/fairseq/fairseq/tasks/multilingual_masked_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e6ce4b8a2f77ed889a6e1451321a8e3ac21dc67
--- /dev/null
+++ b/fairseq/fairseq/tasks/multilingual_masked_lm.py
@@ -0,0 +1,338 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+import numpy as np
+import torch
+from fairseq import utils
+from fairseq.data import (
+    ConcatDataset,
+    Dictionary,
+    IdDataset,
+    MaskTokensDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    NumSamplesDataset,
+    PadDataset,
+    PrependTokenDataset,
+    RawLabelDataset,
+    ResamplingDataset,
+    SortDataset,
+    TokenBlockDataset,
+    data_utils,
+    encoders,
+)
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("multilingual_masked_lm")
+class MultiLingualMaskedLMTask(LegacyFairseqTask):
+    """Task for training masked language models (e.g., BERT, RoBERTa)."""
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument(
+            "data",
+            help="colon separated path to data directories list, \
+                            will be iterated upon during epochs in round-robin manner",
+        )
+        parser.add_argument(
+            "--sample-break-mode",
+            default="complete",
+            choices=["none", "complete", "complete_doc", "eos"],
+            help='If omitted or "none", fills each sample with tokens-per-sample '
+            'tokens. If set to "complete", splits samples only at the end '
+            "of sentence, but may include multiple sentences per sample. "
+            '"complete_doc" is similar but respects doc boundaries. '
+            'If set to "eos", includes only one sentence per sample.',
+        )
+        parser.add_argument(
+            "--tokens-per-sample",
+            default=512,
+            type=int,
+            help="max number of total tokens over all segments "
+            "per sample for BERT dataset",
+        )
+        parser.add_argument(
+            "--mask-prob",
+            default=0.15,
+            type=float,
+            help="probability of replacing a token with mask",
+        )
+        parser.add_argument(
+            "--leave-unmasked-prob",
+            default=0.1,
+            type=float,
+            help="probability that a masked token is unmasked",
+        )
+        parser.add_argument(
+            "--random-token-prob",
+            default=0.1,
+            type=float,
+            help="probability of replacing a token with a random token",
+        )
+        parser.add_argument(
+            "--freq-weighted-replacement",
+            action="store_true",
+            help="sample random replacement words based on word frequencies",
+        )
+        parser.add_argument(
+            "--mask-whole-words",
+            default=False,
+            action="store_true",
+            help="mask whole words; you may also want to set --bpe",
+        )
+        parser.add_argument(
+            "--multilang-sampling-alpha",
+            type=float,
+            default=1.0,
+            help="smoothing alpha for sample rations across multiple datasets",
+        )
+
+    def __init__(self, args, dictionary):
+        super().__init__(args)
+        self.dictionary = dictionary
+        self.seed = args.seed
+
+        # add mask token
+        self.mask_idx = dictionary.add_symbol("<mask>")
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        paths = utils.split_paths(args.data)
+        assert len(paths) > 0
+        dictionary = Dictionary.load(os.path.join(paths[0], "dict.txt"))
+        logger.info("dictionary: {} types".format(len(dictionary)))
+        return cls(args, dictionary)
+
+    def _get_whole_word_mask(self):
+        # create masked input and targets
+        if self.args.mask_whole_words:
+            bpe = encoders.build_bpe(self.args)
+            if bpe is not None:
+
+                def is_beginning_of_word(i):
+                    if i < self.source_dictionary.nspecial:
+                        # special elements are always considered beginnings
+                        return True
+                    tok = self.source_dictionary[i]
+                    if tok.startswith("madeupword"):
+                        return True
+                    try:
+                        return bpe.is_beginning_of_word(tok)
+                    except ValueError:
+                        return True
+
+                mask_whole_words = torch.ByteTensor(
+                    list(map(is_beginning_of_word, range(len(self.source_dictionary))))
+                )
+        else:
+            mask_whole_words = None
+        return mask_whole_words
+
+    def _get_sample_prob(self, dataset_lens):
+        """
+        Get smoothed sampling porbability by languages. This helps low resource
+        languages by upsampling them.
+        """
+        prob = dataset_lens / dataset_lens.sum()
+        smoothed_prob = prob ** self.args.multilang_sampling_alpha
+        smoothed_prob = smoothed_prob / smoothed_prob.sum()
+        return smoothed_prob
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        languages = sorted(
+            name
+            for name in os.listdir(data_path)
+            if os.path.isdir(os.path.join(data_path, name))
+        )
+
+        logger.info("Training on {0} languages: {1}".format(len(languages), languages))
+        logger.info(
+            "Language to id mapping: ", {lang: id for id, lang in enumerate(languages)}
+        )
+
+        mask_whole_words = self._get_whole_word_mask()
+        lang_datasets = []
+        for lang_id, language in enumerate(languages):
+            split_path = os.path.join(data_path, language, split)
+
+            dataset = data_utils.load_indexed_dataset(
+                split_path,
+                self.source_dictionary,
+                self.args.dataset_impl,
+                combine=combine,
+            )
+            if dataset is None:
+                raise FileNotFoundError(
+                    "Dataset not found: {} ({})".format(split, split_path)
+                )
+
+            # create continuous blocks of tokens
+            dataset = TokenBlockDataset(
+                dataset,
+                dataset.sizes,
+                self.args.tokens_per_sample - 1,  # one less for <s>
+                pad=self.source_dictionary.pad(),
+                eos=self.source_dictionary.eos(),
+                break_mode=self.args.sample_break_mode,
+            )
+            logger.info("loaded {} blocks from: {}".format(len(dataset), split_path))
+
+            # prepend beginning-of-sentence token (<s>, equiv. to [CLS] in BERT)
+            dataset = PrependTokenDataset(dataset, self.source_dictionary.bos())
+
+            src_dataset, tgt_dataset = MaskTokensDataset.apply_mask(
+                dataset,
+                self.source_dictionary,
+                pad_idx=self.source_dictionary.pad(),
+                mask_idx=self.mask_idx,
+                seed=self.args.seed,
+                mask_prob=self.args.mask_prob,
+                leave_unmasked_prob=self.args.leave_unmasked_prob,
+                random_token_prob=self.args.random_token_prob,
+                freq_weighted_replacement=self.args.freq_weighted_replacement,
+                mask_whole_words=mask_whole_words,
+            )
+
+            lang_dataset = NestedDictionaryDataset(
+                {
+                    "net_input": {
+                        "src_tokens": PadDataset(
+                            src_dataset,
+                            pad_idx=self.source_dictionary.pad(),
+                            left_pad=False,
+                        ),
+                        "src_lengths": NumelDataset(src_dataset, reduce=False),
+                    },
+                    "target": PadDataset(
+                        tgt_dataset,
+                        pad_idx=self.source_dictionary.pad(),
+                        left_pad=False,
+                    ),
+                    "nsentences": NumSamplesDataset(),
+                    "ntokens": NumelDataset(src_dataset, reduce=True),
+                    "lang_id": RawLabelDataset([lang_id] * src_dataset.sizes.shape[0]),
+                },
+                sizes=[src_dataset.sizes],
+            )
+            lang_datasets.append(lang_dataset)
+
+        dataset_lengths = np.array(
+            [len(d) for d in lang_datasets],
+            dtype=float,
+        )
+        logger.info(
+            "loaded total {} blocks for all languages".format(
+                dataset_lengths.sum(),
+            )
+        )
+        if split == self.args.train_subset:
+            # For train subset, additionally up or down sample languages.
+            sample_probs = self._get_sample_prob(dataset_lengths)
+            logger.info(
+                "Sample probability by language: ",
+                {
+                    lang: "{0:.4f}".format(sample_probs[id])
+                    for id, lang in enumerate(languages)
+                },
+            )
+            size_ratio = (sample_probs * dataset_lengths.sum()) / dataset_lengths
+            logger.info(
+                "Up/Down Sampling ratio by language: ",
+                {
+                    lang: "{0:.2f}".format(size_ratio[id])
+                    for id, lang in enumerate(languages)
+                },
+            )
+
+            resampled_lang_datasets = [
+                ResamplingDataset(
+                    lang_datasets[i],
+                    size_ratio=size_ratio[i],
+                    seed=self.args.seed,
+                    epoch=epoch,
+                    replace=size_ratio[i] >= 1.0,
+                )
+                for i, d in enumerate(lang_datasets)
+            ]
+            dataset = ConcatDataset(resampled_lang_datasets)
+        else:
+            dataset = ConcatDataset(lang_datasets)
+            lang_splits = [split]
+            for lang_id, lang_dataset in enumerate(lang_datasets):
+                split_name = split + "_" + languages[lang_id]
+                lang_splits.append(split_name)
+                self.datasets[split_name] = lang_dataset
+
+            # [TODO]: This is hacky for now to print validation ppl for each
+            # language individually. Maybe need task API changes to allow it
+            # in more generic ways.
+            if split in self.args.valid_subset:
+                self.args.valid_subset = self.args.valid_subset.replace(
+                    split, ",".join(lang_splits)
+                )
+
+        with data_utils.numpy_seed(self.args.seed + epoch):
+            shuffle = np.random.permutation(len(dataset))
+
+        self.datasets[split] = SortDataset(
+            dataset,
+            sort_order=[
+                shuffle,
+                dataset.sizes,
+            ],
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, sort=True):
+        src_dataset = PadDataset(
+            TokenBlockDataset(
+                src_tokens,
+                src_lengths,
+                self.args.tokens_per_sample - 1,  # one less for <s>
+                pad=self.source_dictionary.pad(),
+                eos=self.source_dictionary.eos(),
+                break_mode="eos",
+            ),
+            pad_idx=self.source_dictionary.pad(),
+            left_pad=False,
+        )
+        src_dataset = PrependTokenDataset(src_dataset, self.source_dictionary.bos())
+        src_dataset = NestedDictionaryDataset(
+            {
+                "id": IdDataset(),
+                "net_input": {
+                    "src_tokens": src_dataset,
+                    "src_lengths": NumelDataset(src_dataset, reduce=False),
+                },
+            },
+            sizes=src_lengths,
+        )
+        if sort:
+            src_dataset = SortDataset(src_dataset, sort_order=[src_lengths])
+        return src_dataset
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
diff --git a/fairseq/fairseq/tasks/multilingual_translation.py b/fairseq/fairseq/tasks/multilingual_translation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f85ab4832a6c7cbe57a99a3efc6987125d956fc
--- /dev/null
+++ b/fairseq/fairseq/tasks/multilingual_translation.py
@@ -0,0 +1,462 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import os
+from collections import OrderedDict
+from argparse import ArgumentError
+
+import torch
+from fairseq import metrics, options, utils
+from fairseq.data import (
+    Dictionary,
+    LanguagePairDataset,
+    RoundRobinZipDatasets,
+    TransformEosLangPairDataset,
+)
+from fairseq.models import FairseqMultiModel
+from fairseq.tasks.translation import load_langpair_dataset
+
+from . import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+def _lang_token(lang: str):
+    return "__{}__".format(lang)
+
+
+def _lang_token_index(dic: Dictionary, lang: str):
+    """Return language token index."""
+    idx = dic.index(_lang_token(lang))
+    assert idx != dic.unk_index, "cannot find language token for lang {}".format(lang)
+    return idx
+
+
+@register_task("multilingual_translation")
+class MultilingualTranslationTask(LegacyFairseqTask):
+    """A task for training multiple translation models simultaneously.
+
+    We iterate round-robin over batches from multiple language pairs, ordered
+    according to the `--lang-pairs` argument.
+
+    The training loop is roughly:
+
+        for i in range(len(epoch)):
+            for lang_pair in args.lang_pairs:
+                batch = next_batch_for_lang_pair(lang_pair)
+                loss = criterion(model_for_lang_pair(lang_pair), batch)
+                loss.backward()
+            optimizer.step()
+
+    In practice, `next_batch_for_lang_pair` is abstracted in a FairseqDataset
+    (e.g., `RoundRobinZipDatasets`) and `model_for_lang_pair` is a model that
+    implements the `FairseqMultiModel` interface.
+
+    During inference it is required to specify a single `--source-lang` and
+    `--target-lang`, which indicates the inference langauge direction.
+    `--lang-pairs`, `--encoder-langtok`, `--decoder-langtok` have to be set to
+    the same value as training.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('data', metavar='DIR', help='path to data directory')
+        parser.add_argument('--lang-pairs', default=None, metavar='PAIRS',
+                            help='comma-separated list of language pairs (in training order): en-de,en-fr,de-fr')
+        parser.add_argument('-s', '--source-lang', default=None, metavar='SRC',
+                            help='source language (only needed for inference)')
+        parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET',
+                            help='target language (only needed for inference)')
+        parser.add_argument('--left-pad-source', default='True', type=str, metavar='BOOL',
+                            help='pad the source on the left (default: True)')
+        parser.add_argument('--left-pad-target', default='False', type=str, metavar='BOOL',
+                            help='pad the target on the left (default: False)')
+        try:
+            parser.add_argument('--max-source-positions', default=1024, type=int, metavar='N',
+                                help='max number of tokens in the source sequence')
+            parser.add_argument('--max-target-positions', default=1024, type=int, metavar='N',
+                                help='max number of tokens in the target sequence')
+        except ArgumentError:
+            # this might have already been defined. Once we transition this to hydra it should be fine to add it here.
+            pass
+        parser.add_argument('--upsample-primary', default=1, type=int,
+                            help='amount to upsample primary dataset')
+        parser.add_argument('--encoder-langtok', default=None, type=str, choices=['src', 'tgt'],
+                            metavar='SRCTGT',
+                            help='replace beginning-of-sentence in source sentence with source or target '
+                                 'language token. (src/tgt)')
+        parser.add_argument('--decoder-langtok', action='store_true',
+                            help='replace beginning-of-sentence in target sentence with target language token')
+        # fmt: on
+
+    def __init__(self, args, dicts, training):
+        super().__init__(args)
+        self.dicts = dicts
+        self.training = training
+        if training:
+            self.lang_pairs = args.lang_pairs
+        else:
+            self.lang_pairs = ["{}-{}".format(args.source_lang, args.target_lang)]
+        # eval_lang_pairs for multilingual translation is usually all of the
+        # lang_pairs. However for other multitask settings or when we want to
+        # optimize for certain languages we want to use a different subset. Thus
+        # the eval_lang_pairs class variable is provided for classes that extend
+        # this class.
+        self.eval_lang_pairs = self.lang_pairs
+        # model_lang_pairs will be used to build encoder-decoder model pairs in
+        # models.build_model(). This allows multitask type of sub-class can
+        # build models other than the input lang_pairs
+        self.model_lang_pairs = self.lang_pairs
+        self.langs = list(dicts.keys())
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        dicts, training = cls.prepare(args, **kwargs)
+        return cls(args, dicts, training)
+
+    @classmethod
+    def update_args(cls, args):
+        args.left_pad_source = utils.eval_bool(args.left_pad_source)
+        args.left_pad_target = utils.eval_bool(args.left_pad_target)
+
+        if args.lang_pairs is None:
+            raise ValueError(
+                "--lang-pairs is required. List all the language pairs in the training objective."
+            )
+        if isinstance(args.lang_pairs, str):
+            args.lang_pairs = args.lang_pairs.split(",")
+
+    @classmethod
+    def prepare(cls, args, **kargs):
+        cls.update_args(args)
+        sorted_langs = sorted(
+            list({x for lang_pair in args.lang_pairs for x in lang_pair.split("-")})
+        )
+        if args.source_lang is not None or args.target_lang is not None:
+            training = False
+        else:
+            training = True
+
+        # load dictionaries
+        dicts = OrderedDict()
+        for lang in sorted_langs:
+            paths = utils.split_paths(args.data)
+            assert len(paths) > 0
+            dicts[lang] = cls.load_dictionary(
+                os.path.join(paths[0], "dict.{}.txt".format(lang))
+            )
+            if len(dicts) > 0:
+                assert dicts[lang].pad() == dicts[sorted_langs[0]].pad()
+                assert dicts[lang].eos() == dicts[sorted_langs[0]].eos()
+                assert dicts[lang].unk() == dicts[sorted_langs[0]].unk()
+            if args.encoder_langtok is not None or args.decoder_langtok:
+                for lang_to_add in sorted_langs:
+                    dicts[lang].add_symbol(_lang_token(lang_to_add))
+            logger.info("[{}] dictionary: {} types".format(lang, len(dicts[lang])))
+        return dicts, training
+
+    def get_encoder_langtok(self, src_lang, tgt_lang):
+        if self.args.encoder_langtok is None:
+            return self.dicts[src_lang].eos()
+        if self.args.encoder_langtok == "src":
+            return _lang_token_index(self.dicts[src_lang], src_lang)
+        else:
+            return _lang_token_index(self.dicts[src_lang], tgt_lang)
+
+    def get_decoder_langtok(self, tgt_lang):
+        if not self.args.decoder_langtok:
+            return self.dicts[tgt_lang].eos()
+        return _lang_token_index(self.dicts[tgt_lang], tgt_lang)
+
+    def alter_dataset_langtok(
+        self,
+        lang_pair_dataset,
+        src_eos=None,
+        src_lang=None,
+        tgt_eos=None,
+        tgt_lang=None,
+    ):
+        if self.args.encoder_langtok is None and not self.args.decoder_langtok:
+            return lang_pair_dataset
+
+        new_src_eos = None
+        if (
+            self.args.encoder_langtok is not None
+            and src_eos is not None
+            and src_lang is not None
+            and tgt_lang is not None
+        ):
+            new_src_eos = self.get_encoder_langtok(src_lang, tgt_lang)
+        else:
+            src_eos = None
+
+        new_tgt_bos = None
+        if self.args.decoder_langtok and tgt_eos is not None and tgt_lang is not None:
+            new_tgt_bos = self.get_decoder_langtok(tgt_lang)
+        else:
+            tgt_eos = None
+
+        return TransformEosLangPairDataset(
+            lang_pair_dataset,
+            src_eos=src_eos,
+            new_src_eos=new_src_eos,
+            tgt_bos=tgt_eos,
+            new_tgt_bos=new_tgt_bos,
+        )
+
+    def load_dataset(self, split, epoch=1, **kwargs):
+        """Load a dataset split."""
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        def language_pair_dataset(lang_pair):
+            src, tgt = lang_pair.split("-")
+            langpair_dataset = load_langpair_dataset(
+                data_path,
+                split,
+                src,
+                self.dicts[src],
+                tgt,
+                self.dicts[tgt],
+                combine=True,
+                dataset_impl=self.args.dataset_impl,
+                upsample_primary=self.args.upsample_primary,
+                left_pad_source=self.args.left_pad_source,
+                left_pad_target=self.args.left_pad_target,
+                max_source_positions=self.args.max_source_positions,
+                max_target_positions=self.args.max_target_positions,
+            )
+            return self.alter_dataset_langtok(
+                langpair_dataset,
+                src_eos=self.dicts[src].eos(),
+                src_lang=src,
+                tgt_eos=self.dicts[tgt].eos(),
+                tgt_lang=tgt,
+            )
+
+        self.datasets[split] = RoundRobinZipDatasets(
+            OrderedDict(
+                [
+                    (lang_pair, language_pair_dataset(lang_pair))
+                    for lang_pair in self.lang_pairs
+                ]
+            ),
+            eval_key=None
+            if self.training
+            else "%s-%s" % (self.args.source_lang, self.args.target_lang),
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None):
+        if constraints is not None:
+            raise NotImplementedError(
+                "Constrained decoding with the multilingual_translation task is not supported"
+            )
+
+        lang_pair = "%s-%s" % (self.args.source_lang, self.args.target_lang)
+        return RoundRobinZipDatasets(
+            OrderedDict(
+                [
+                    (
+                        lang_pair,
+                        self.alter_dataset_langtok(
+                            LanguagePairDataset(
+                                src_tokens, src_lengths, self.source_dictionary
+                            ),
+                            src_eos=self.source_dictionary.eos(),
+                            src_lang=self.args.source_lang,
+                            tgt_eos=self.target_dictionary.eos(),
+                            tgt_lang=self.args.target_lang,
+                        ),
+                    )
+                ]
+            ),
+            eval_key=lang_pair,
+        )
+
+    def build_model(self, args):
+        def check_args():
+            messages = []
+            if (
+                len(set(self.args.lang_pairs).symmetric_difference(args.lang_pairs))
+                != 0
+            ):
+                messages.append(
+                    "--lang-pairs should include all the language pairs {}.".format(
+                        args.lang_pairs
+                    )
+                )
+            if self.args.encoder_langtok != args.encoder_langtok:
+                messages.append(
+                    "--encoder-langtok should be {}.".format(args.encoder_langtok)
+                )
+            if self.args.decoder_langtok != args.decoder_langtok:
+                messages.append(
+                    "--decoder-langtok should {} be set.".format(
+                        "" if args.decoder_langtok else "not"
+                    )
+                )
+
+            if len(messages) > 0:
+                raise ValueError(" ".join(messages))
+
+        # Update args -> the fact that the constructor here
+        # changes the args object doesn't mean you get the same one here
+        self.update_args(args)
+
+        # Check if task args are consistant with model args
+        check_args()
+
+        from fairseq import models
+
+        model = models.build_model(args, self)
+        if not isinstance(model, FairseqMultiModel):
+            raise ValueError(
+                "MultilingualTranslationTask requires a FairseqMultiModel architecture"
+            )
+        return model
+
+    def _per_lang_pair_train_loss(
+        self, lang_pair, model, update_num, criterion, sample, optimizer, ignore_grad
+    ):
+        loss, sample_size, logging_output = criterion(
+            model.models[lang_pair], sample[lang_pair]
+        )
+        if ignore_grad:
+            loss *= 0
+        optimizer.backward(loss)
+        return loss, sample_size, logging_output
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+        model.train()
+        from collections import defaultdict
+
+        agg_loss, agg_sample_size, agg_logging_output = 0.0, 0.0, defaultdict(float)
+        curr_lang_pairs = [
+            lang_pair
+            for lang_pair in self.model_lang_pairs
+            if sample[lang_pair] is not None and len(sample[lang_pair]) != 0
+        ]
+
+        for idx, lang_pair in enumerate(curr_lang_pairs):
+
+            def maybe_no_sync():
+                if (
+                    self.args.distributed_world_size > 1
+                    and hasattr(model, "no_sync")
+                    and idx < len(curr_lang_pairs) - 1
+                ):
+                    return model.no_sync()
+                else:
+                    return contextlib.ExitStack()  # dummy contextmanager
+
+            with maybe_no_sync():
+                loss, sample_size, logging_output = self._per_lang_pair_train_loss(
+                    lang_pair,
+                    model,
+                    update_num,
+                    criterion,
+                    sample,
+                    optimizer,
+                    ignore_grad,
+                )
+            agg_loss += loss.detach().item()
+            # TODO make summing of the sample sizes configurable
+            agg_sample_size += sample_size
+            for k in logging_output:
+                agg_logging_output[k] += logging_output[k]
+                agg_logging_output[f"{lang_pair}:{k}"] += logging_output[k]
+        return agg_loss, agg_sample_size, agg_logging_output
+
+    def _per_lang_pair_valid_loss(self, lang_pair, model, criterion, sample):
+        return criterion(model.models[lang_pair], sample[lang_pair])
+
+    def valid_step(self, sample, model, criterion):
+        model.eval()
+        with torch.no_grad():
+            from collections import defaultdict
+
+            agg_loss, agg_sample_size, agg_logging_output = 0.0, 0.0, defaultdict(float)
+            for lang_pair in self.eval_lang_pairs:
+                if (
+                    lang_pair not in sample
+                    or sample[lang_pair] is None
+                    or len(sample[lang_pair]) == 0
+                ):
+                    continue
+                loss, sample_size, logging_output = self._per_lang_pair_valid_loss(
+                    lang_pair, model, criterion, sample
+                )
+                agg_loss += loss.data.item()
+                # TODO make summing of the sample sizes configurable
+                agg_sample_size += sample_size
+                for k in logging_output:
+                    agg_logging_output[k] += logging_output[k]
+                    agg_logging_output[f"{lang_pair}:{k}"] += logging_output[k]
+        return agg_loss, agg_sample_size, agg_logging_output
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        with torch.no_grad():
+            if self.args.decoder_langtok:
+                bos_token = _lang_token_index(
+                    self.target_dictionary, self.args.target_lang
+                )
+            else:
+                bos_token = self.target_dictionary.eos()
+            return generator.generate(
+                models,
+                sample,
+                prefix_tokens=prefix_tokens,
+                constraints=constraints,
+                bos_token=bos_token,
+            )
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        with metrics.aggregate():
+            # pass 'sample_size', 'nsentences', 'ntokens' stats to fairseq_task
+            super().reduce_metrics(logging_outputs, criterion)
+            for k in ["sample_size", "nsentences", "ntokens"]:
+                metrics.log_scalar(k, sum(l[k] for l in logging_outputs))
+
+    @property
+    def source_dictionary(self):
+        if self.training:
+            return next(iter(self.dicts.values()))
+        else:
+            return self.dicts[self.args.source_lang]
+
+    @property
+    def target_dictionary(self):
+        if self.training:
+            return next(iter(self.dicts.values()))
+        else:
+            return self.dicts[self.args.target_lang]
+
+    def max_positions(self):
+        """Return the max sentence length allowed by the task."""
+        if len(self.datasets.values()) == 0:
+            return {
+                "%s-%s"
+                % (self.args.source_lang, self.args.target_lang): (
+                    self.args.max_source_positions,
+                    self.args.max_target_positions,
+                )
+            }
+        return OrderedDict(
+            [
+                (key, (self.args.max_source_positions, self.args.max_target_positions))
+                for split in self.datasets.keys()
+                for key in self.datasets[split].datasets.keys()
+            ]
+        )
diff --git a/fairseq/fairseq/tasks/online_backtranslation.py b/fairseq/fairseq/tasks/online_backtranslation.py
new file mode 100644
index 0000000000000000000000000000000000000000..68233f422bdd604b569cf813c270077a56bae39f
--- /dev/null
+++ b/fairseq/fairseq/tasks/online_backtranslation.py
@@ -0,0 +1,682 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import json
+import logging
+import math
+import os
+from argparse import Namespace
+from collections import OrderedDict, defaultdict
+from pathlib import Path
+from typing import Dict, Sequence, Tuple
+from argparse import ArgumentError
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import fairseq
+from fairseq import metrics, options, utils
+from fairseq.data import (
+    FairseqDataset,
+    LanguagePairDataset,
+    NoisingDataset,
+    PrependTokenDataset,
+    RoundRobinZipDatasets,
+    TransformEosLangPairDataset,
+    data_utils,
+    encoders,
+)
+from fairseq.sequence_generator import SequenceGenerator
+from fairseq.tasks import register_task
+from fairseq.tasks.translation import TranslationTask, load_langpair_dataset
+
+logger = logging.getLogger(__name__)
+
+
+class PiecewiseLinearFn:
+    """Piecewise linear function. Can be configured with a string."""
+
+    def __init__(self, pieces: Sequence[Tuple[int, float]]):
+        assert pieces == sorted(
+            pieces
+        ), f"PiecewiseLinearFn configuration should be sorted, received: {pieces}"
+
+        self.pieces = pieces
+
+    def __call__(self, x: int) -> float:
+        for i, (x_a, y_a) in enumerate(self.pieces[:-1]):
+            x_b, y_b = self.pieces[i + 1]
+            if x_a <= x <= x_b:
+                return y_a + (x - x_a) * (y_b - y_a) / (x_b - x_a)
+
+        return self.pieces[-1][1]
+
+    @staticmethod
+    def from_string(configuration: str) -> "PiecewiseLinearFn":
+        """
+        Parse the configuration of lambda coefficient (for scheduling).
+        x = "3"                  # lambda will be a constant equal to x
+        x = "0:1,1000:0"         # lambda will start from 1 and linearly decrease
+                                 # to 0 during the first 1000 iterations
+        x = "0:0,1000:0,2000:1"  # lambda will be equal to 0 for the first 1000
+                                 # iterations, then will linearly increase to 1 until iteration 2000
+        """
+        if isinstance(configuration, float):
+            return PiecewiseLinearFn([(0, configuration)])
+
+        try:
+            parts = configuration.split(",")
+            if len(parts) == 1:
+                v = float(configuration)
+                return PiecewiseLinearFn([(0, v)])
+
+            split = [s.split(":") for s in parts]
+            pieces = [(int(t), float(v)) for t, v in split]
+            return PiecewiseLinearFn(pieces)
+        except Exception:
+            raise ValueError(
+                f"Invalid PiecewiseLinearFn configuration: {configuration!r}"
+            )
+
+    @staticmethod
+    def one() -> "PiecewiseLinearFn":
+        return PiecewiseLinearFn([(0, 1.0)])
+
+
+@register_task("online_backtranslation")
+class OnlineBackTranslationTask(TranslationTask):
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        # fmt: off
+        # Generic translation args
+        parser.add_argument('data', help='colon separated path to data directories list, \
+                            will be iterated upon during epochs in round-robin manner; \
+                            however, valid and test data are always in the first directory to \
+                            avoid the need for repeating them in all directories')
+        parser.add_argument('--mono-langs', metavar='MONO_LANGS',
+                            help='monolingual languages for training')
+        parser.add_argument('--valid-lang-pairs', default=None, metavar='VALID_LANG_PAIRS',
+                            help='language pairs for validation')
+        parser.add_argument('--load-alignments', action='store_true',
+                            help='load the binarized alignments')
+        parser.add_argument('--left-pad-source', default='False', type=str, metavar='BOOL',
+                            help='pad the source on the left')
+        parser.add_argument('--left-pad-target', default='False', type=str, metavar='BOOL',
+                            help='pad the target on the left')
+        parser.add_argument('--upsample-primary', default=1, type=int,
+                            help='amount to upsample primary dataset')
+        try:
+            parser.add_argument('--max-source-positions', default=1024, type=int, metavar='N',
+                                help='max number of tokens in the source sequence')
+            parser.add_argument('--max-target-positions', default=1024, type=int, metavar='N',
+                                help='max number of tokens in the target sequence')
+        except ArgumentError:
+            # this might have already been defined. Once we transition this to hydra it should be fine to add it here.
+            pass
+        parser.add_argument('--truncate-source', action='store_true', default=False,
+                            help='truncate source to max-source-positions')
+        parser.add_argument('--num-batch-buckets', default=0, type=int, metavar='N',
+                            help='if >0, then bucket source and target lengths into N '
+                                 'buckets and pad accordingly; this is useful on TPUs '
+                                 'to minimize the number of compilations')
+
+        # Denoising args
+        parser.add_argument('--max-word-shuffle-distance', default=3.0, type=float, metavar='N',
+                            help='maximum word shuffle distance for denoising autoencoding data generation')
+        parser.add_argument('--word-dropout-prob', default=0.1, type=float, metavar='N',
+                            help='word dropout probability for denoising autoencoding data generation')
+        parser.add_argument('--word-blanking-prob', default=0.2, type=float, metavar='N',
+                            help='word blanking probability for denoising autoencoding data generation')
+
+        # Backtranslation args
+        parser.add_argument('--lambda-bt', default="1.0", type=str, metavar='N',
+                            help='back-translation weight')
+        parser.add_argument('--lambda-dae', default="1.0", type=str, metavar='N',
+                            help='denoising auto-encoder weight')
+
+        # Evaluation args
+        parser.add_argument('--generate-one-by-one', action='store_true',
+                            help='generate one sentence at a time for backtranslation')
+
+        parser.add_argument('--eval-bleu', action='store_true',
+                            help='evaluation with BLEU scores')
+        parser.add_argument('--eval-bleu-detok', type=str, default="space",
+                            help='detokenize before computing BLEU (e.g., "moses"); '
+                                 'required if using --eval-bleu; use "space" to '
+                                 'disable detokenization; see fairseq.data.encoders '
+                                 'for other options')
+        parser.add_argument('--eval-bleu-detok-args', type=str, metavar='JSON',
+                            help='args for building the tokenizer, if needed')
+        parser.add_argument('--eval-tokenized-bleu', action='store_true', default=False,
+                            help='compute tokenized BLEU instead of sacrebleu')
+        parser.add_argument('--eval-bleu-remove-bpe', nargs='?', const='@@ ', default=None,
+                            help='remove BPE before computing BLEU')
+        parser.add_argument('--eval-bleu-args', type=str, metavar='JSON',
+                            help='generation args for BLUE scoring, '
+                                 'e.g., \'{"beam": 4, "lenpen": 0.6}\'')
+        parser.add_argument('--eval-bleu-print-samples', action='store_true',
+                            help='print sample generations during validation')
+        # fmt: on
+
+    def __init__(self, args, common_dict, mono_langs, valid_lang_pairs):
+        super().__init__(args, common_dict, common_dict)
+        self.common_dict = common_dict
+        self.mono_langs = mono_langs
+        self.valid_lang_pairs = valid_lang_pairs
+
+        self.SHOW_SAMPLES_INTERVAL = 1000
+        # Start by showing samples
+        self._show_samples_ctr = self.SHOW_SAMPLES_INTERVAL
+        self.SHOW_SAMPLES_NUMBER = 5
+        self.lambda_bt = PiecewiseLinearFn.from_string(args.lambda_bt)
+        self.lambda_dae = PiecewiseLinearFn.from_string(args.lambda_dae)
+
+        self.args = args
+        self.data = utils.split_paths(self.args.data)
+        if len(self.data) == 1:
+            shards = list(Path(self.data[0]).glob("shard*"))
+            if len(shards) > 0:
+                # keep this as strings, since it can also be a manifold path
+                old_data = self.data
+                self.data = [str(shard) for shard in shards]
+                logging.warning(f"Expanded data directory {old_data} to {self.data}")
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            args (argparse.Namespace): parsed command-line arguments
+        """
+        args.left_pad_source = options.eval_bool(args.left_pad_source)
+        args.left_pad_target = options.eval_bool(args.left_pad_target)
+
+        paths = utils.split_paths(args.data)
+        assert len(paths) > 0
+        assert args.mono_langs is not None
+
+        mono_langs = args.mono_langs.split(",")
+        valid_lang_pairs = args.valid_lang_pairs.split(",")
+
+        # load dictionary
+        dict_path = os.path.join(paths[0], "dict.txt")
+        common_dict = cls.load_dictionary(dict_path)
+
+        return cls(args, common_dict, mono_langs, valid_lang_pairs)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs) -> FairseqDataset:
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        if split == "train":
+            data_path = self.data[(epoch - 1) % len(self.data)]
+            dataset = self.load_train_dataset(data_path)
+        else:
+            # valid/test should always be the same.
+            dataset = self.load_translation_dataset(split, self.data[0])
+
+        self.datasets[split] = dataset
+        return dataset
+
+    def load_train_dataset(self, data_path: str) -> FairseqDataset:
+        """The training dataset is made of backtranslation dataset and denoising dataset."""
+        data = []
+        for lang in self.mono_langs:
+            train_path = os.path.join(data_path, lang, "train")
+            # TODO: could we do the BT using denoise sample ?
+            # this would half the data loading work
+            data.append((f"{lang}-BT", self.load_bt_dataset(train_path, lang)))
+            data.append(
+                (f"{lang}-DENOISE", self.load_denoise_dataset(train_path, lang))
+            )
+
+        return RoundRobinZipDatasets(OrderedDict(data))
+
+    def _langpair_dataset(
+        self, src: FairseqDataset, tgt: FairseqDataset
+    ) -> LanguagePairDataset:
+        return LanguagePairDataset(
+            src,
+            src.sizes,
+            self.dictionary,
+            tgt=tgt,
+            tgt_sizes=tgt.sizes,
+            tgt_dict=self.dictionary,
+            left_pad_source=self.args.left_pad_source,
+            left_pad_target=self.args.left_pad_target,
+            # TODO: should we shuffle ? we are already sorting batch by sizes so ?
+            # shuffle=True,
+        )
+
+    def _prepend_lang_bos_to_target(
+        self, dataset: LanguagePairDataset, lang: str
+    ) -> LanguagePairDataset:
+        bos = _lang_token_index(self.dictionary, lang)
+        return TransformEosLangPairDataset(
+            dataset,
+            src_eos=self.dictionary.eos(),
+            new_src_eos=self.dictionary.eos(),
+            tgt_bos=self.dictionary.eos(),
+            new_tgt_bos=bos,
+        )
+
+    def load_bt_dataset(self, data_path: str, lang: str) -> FairseqDataset:
+        """The BT dataset is generated with (tgt, tgt) pairs.
+        The actual translation to a (generated_src, tgt) pair
+        is done on the fly during training.
+        """
+        mono_dataset = data_utils.load_indexed_dataset(
+            data_path, self.common_dict, self.args.dataset_impl
+        )
+        assert mono_dataset is not None, f"No dataset found for {lang}"
+
+        mono_dataset_src = PrependTokenDataset(
+            mono_dataset, _lang_token_index(self.dictionary, lang)
+        )
+
+        mono_dataset_bt = self._langpair_dataset(mono_dataset_src, mono_dataset)
+        logger.info(
+            f"mono_lang = {lang} "
+            f"lang token index = {_lang_token_index(self.dictionary, lang)} "
+            f"lang token = {_lang_token(lang)}"
+        )
+
+        mono_dataset_bt = self._prepend_lang_bos_to_target(mono_dataset_bt, lang)
+        return mono_dataset_bt
+
+    def load_denoise_dataset(self, data_path: str, lang: str) -> FairseqDataset:
+        """Classic denoising dataset"""
+        dataset = data_utils.load_indexed_dataset(
+            data_path, self.common_dict, self.args.dataset_impl
+        )
+        noisy_dataset = NoisingDataset(
+            dataset,
+            self.dictionary,
+            seed=1,
+            max_word_shuffle_distance=self.args.max_word_shuffle_distance,
+            word_dropout_prob=self.args.word_dropout_prob,
+            word_blanking_prob=self.args.word_blanking_prob,
+        )
+        noisy_dataset = PrependTokenDataset(
+            noisy_dataset, _lang_token_index(self.dictionary, lang)
+        )
+
+        clean_dataset = data_utils.load_indexed_dataset(
+            data_path, self.common_dict, self.args.dataset_impl
+        )
+        denoising_dataset = self._langpair_dataset(noisy_dataset, clean_dataset)
+        denoising_dataset = self._prepend_lang_bos_to_target(denoising_dataset, lang)
+        return denoising_dataset
+
+    def load_translation_dataset(
+        self, split: str, data_path: str, combine: bool = False
+    ):
+        # only judging with one language pair for the moment,
+        # since ConcatDataset doesn't work as expected
+        assert len(self.valid_lang_pairs) == 1, "For now..."
+        valid_lang_pair = self.valid_lang_pairs[0]
+        src, tgt = valid_lang_pair.split("-")
+
+        # use the same function than TranslationTask
+        src_tgt_dt = load_langpair_dataset(
+            data_path,
+            split,
+            src,
+            self.common_dict,
+            tgt,
+            self.common_dict,
+            combine=combine,
+            dataset_impl=self.args.dataset_impl,
+            upsample_primary=self.args.upsample_primary,
+            left_pad_source=self.args.left_pad_source,
+            left_pad_target=self.args.left_pad_target,
+            max_source_positions=self.args.max_source_positions,
+            max_target_positions=self.args.max_target_positions,
+            load_alignments=self.args.load_alignments,
+            truncate_source=self.args.truncate_source,
+            num_buckets=self.args.num_batch_buckets,
+            shuffle=(split != "test"),
+            prepend_bos_src=_lang_token_index(self.dictionary, src),
+        )
+
+        src_tgt_eos_dt = self._prepend_lang_bos_to_target(src_tgt_dt, tgt)
+        src_tgt_eos_dt.args = self.args
+        return src_tgt_eos_dt
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None):
+        raise NotImplementedError
+
+    def build_model(self, args):
+        # torch.autograd.set_detect_anomaly(True)
+        model = super().build_model(args)
+
+        add_secial_tokens_to_dict_and_model(self.common_dict, model, self.mono_langs)
+
+        self.sequence_generators = {}
+        for mono_lang in self.mono_langs:
+            self.sequence_generators[mono_lang] = SequenceGenerator(
+                [model],
+                tgt_dict=self.dictionary,
+                beam_size=1,
+                max_len_a=1.3,
+                max_len_b=5,
+                min_len=5,
+                # keep 1 to be able to prepend bos
+                max_len=model.max_decoder_positions() - 1,
+            )
+
+        if getattr(args, "eval_bleu", False):
+            assert getattr(args, "eval_bleu_detok", None) is not None, (
+                "--eval-bleu-detok is required if using --eval-bleu; "
+                "try --eval-bleu-detok=moses (or --eval-bleu-detok=space "
+                "to disable detokenization, e.g., when using sentencepiece)"
+            )
+            detok_args = json.loads(getattr(args, "eval_bleu_detok_args", "{}") or "{}")
+            self.tokenizer = encoders.build_tokenizer(
+                Namespace(
+                    tokenizer=getattr(args, "eval_bleu_detok", None), **detok_args
+                )
+            )
+
+            gen_args = json.loads(getattr(args, "eval_bleu_args", "{}") or "{}")
+            self.bleu_sequence_generator = self.build_generator(
+                [model], Namespace(**gen_args)
+            )
+
+        return model
+
+    def max_positions(self):
+        """Return the max sentence length allowed by the task."""
+        return (self.args.max_source_positions, self.args.max_target_positions)
+
+    @property
+    def dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary`."""
+        return self.common_dict
+
+    def display_samples_once_in_a_while(self, smp, mono_lang, other_lang):
+        self._show_samples_ctr += 1
+        if self._show_samples_ctr < self.SHOW_SAMPLES_INTERVAL:
+            return
+        self._show_samples_ctr = 0
+
+        ln = smp["net_input"]["src_tokens"].shape[0]
+
+        logger.info(
+            f"(r:{self.args.distributed_rank}) : "
+            f"{other_lang} ---> {mono_lang} "
+            f"({other_lang} was generated by back-translation.) {ln} samples"
+        )
+
+        for i in range(min(ln, self.SHOW_SAMPLES_NUMBER)):
+            src_tokens = smp["net_input"]["src_tokens"][i]
+            tgt_tokens = smp["target"][i]
+
+            src_str = self.dictionary.string(src_tokens, "sentencepiece")
+            tgt_str = self.dictionary.string(tgt_tokens, "sentencepiece")
+            logger.info(
+                f"\n{i}\t\t[{other_lang} generated]  {src_str}\n"
+                f"\t\t[{mono_lang} original ]  {tgt_str}\n"
+                f"\t\t[ src tokens]  {src_tokens}\n"
+            )
+
+    def backtranslate_sample(self, smp, orig_lang, other_lang) -> None:
+        """
+        * WARNING: smp is modified in place.
+        * At the start of this function, `smp` has the same input and target:
+          |--------------------------------------------------------|
+          | smp['net_input']['src_tokens'] |  smp['target']        |
+          | (from data) __en__ hello world |  __en__ hello world   |
+          |--------------------------------------------------------|
+
+        * We call generator.generate(smp, bos_token = token("ro")),
+        and copy the result as input
+        * At the end, `smp` has the translation to other language.
+          |--------------------------------------------------------|
+          | smp['net_input']['src_tokens'] |  smp['target']        |
+          | (generated) __ro__ salut lume  |  __en__ hello world   |
+          |--------------------------------------------------------|
+
+        """
+        bos_token = _lang_token_index(self.dictionary, other_lang)
+        generated = self.sequence_generators[orig_lang].generate(
+            models=[], sample=smp, bos_token=bos_token
+        )
+
+        max_lngth = max([gn[0]["tokens"].size(0) for gn in generated])
+        net_input = smp["net_input"]
+        n_src_tokens = torch.empty(
+            size=(len(generated), max_lngth + 1), dtype=net_input["src_tokens"].dtype
+        )
+        n_src_lengths = torch.empty(
+            len(generated), dtype=net_input["src_lengths"].dtype
+        )
+
+        for i, gn in enumerate(generated):
+            tokens = gn[0]["tokens"]
+            tokens_size = tokens.size(0)
+            padding_needed = max_lngth - tokens_size
+            tokens = torch.cat([tokens.new([bos_token]), tokens])
+            tokens = F.pad(tokens, (0, padding_needed), value=self.dictionary.pad())
+            n_src_tokens[i] = tokens
+            n_src_lengths[i] = tokens_size + 1
+
+        device = net_input["src_tokens"].device
+        # This seems to be important
+        del net_input["src_tokens"]
+        del net_input["src_lengths"]
+        net_input["src_tokens"] = n_src_tokens.to(device)
+        net_input["src_lengths"] = n_src_lengths.to(device)
+
+    def generate(self, smp, model):
+        model.eval()
+        orig_lang = (
+            self.dictionary[smp["net_input"]["src_tokens"][0][0]]
+            .replace(" ", "")
+            .replace("_", "")
+        )
+        bos_token = smp["net_input"]["prev_output_tokens"][0][0]
+        with torch.no_grad():
+            generated = self.sequence_generators[orig_lang].generate(
+                models=[model], sample=smp, bos_token=bos_token
+            )
+        return generated
+
+    def get_other_lang(self, lang):
+        # TODO: allow more complex mapping
+        if lang != self.mono_langs[0]:
+            return self.mono_langs[0]
+        if len(self.mono_langs) == 2:
+            return self.mono_langs[1]
+        return self.mono_langs[np.random.randint(1, len(self.mono_langs))]
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+
+        model.train()
+        model.set_num_updates(update_num)
+
+        agg_loss, agg_sample_size = 0.0, 0.0
+        agg_logging_output: Dict[str, float] = defaultdict(float)
+
+        dataset_keys = self.datasets["train"].datasets.keys()
+
+        weights = {
+            "BT": self.lambda_bt(update_num),
+            "DENOISE": self.lambda_dae(update_num),
+        }
+        log_keys = {"BT": "bt_", "DENOISE": "dae_"}
+
+        for dataset_key in dataset_keys:
+            smp = sample[dataset_key]
+            mono_lang, task_subtype = dataset_key.split("-")
+            if weights[task_subtype] == 0:
+                continue
+
+            if task_subtype == "BT":
+                with torch.autograd.profiler.record_function("backtranslation"):
+                    model.eval()
+                    # TODO: Could we translate to several language at once ?
+                    # this would allow to share encoder_out and maximize GPU usage.
+                    other_lang = self.get_other_lang(mono_lang)
+                    self.backtranslate_sample(smp, mono_lang, other_lang)
+                    self.display_samples_once_in_a_while(smp, mono_lang, other_lang)
+                    model.train()
+
+            # Like in FairseqTask.train_step
+            with torch.autograd.profiler.record_function("forward"):
+                loss, sample_size, logging_output = criterion(model, smp)
+            loss *= weights[task_subtype]
+            if ignore_grad:
+                loss *= 0
+            with torch.autograd.profiler.record_function("backward"):
+                optimizer.backward(loss)
+
+            agg_loss += loss.item()
+            agg_sample_size += sample_size
+            for k in logging_output:
+                agg_logging_output[log_keys[task_subtype] + k] += logging_output[k]
+                agg_logging_output[k] += logging_output[k]
+
+        return agg_loss, agg_sample_size, agg_logging_output
+
+    def get_bos_token_from_sample(self, sample):
+        net_input = sample["net_input"]
+        source_lang_token_id = torch.unique(net_input["src_tokens"][:, 0]).item()
+        source_lang_token = self.dictionary[source_lang_token_id].replace("_", "")
+        target_lang_token_id = _lang_token_index(
+            self.dictionary, self.get_other_lang(source_lang_token)
+        )
+
+        return target_lang_token_id
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+        bt_sample_size = sum(x.get("bt_sample_size", 0) for x in logging_outputs)
+        if bt_sample_size:
+            bt_loss_sum = sum(x.get("bt_loss", 0) for x in logging_outputs)
+            bt_loss_sum *= 1 / bt_sample_size / math.log(2)
+            metrics.log_scalar("bt_loss", bt_loss_sum, bt_sample_size, round=3)
+
+            bt_nll_loss_sum = sum(x.get("bt_nll_loss", 0) for x in logging_outputs)
+            bt_ntokens = sum(x.get("bt_ntokens", 0) for x in logging_outputs)
+            bt_nll_loss_sum *= 1 / bt_ntokens / math.log(2)
+            metrics.log_scalar("bt_nll_loss", bt_nll_loss_sum, bt_ntokens, round=3)
+            metrics.log_derived(
+                "bt_ppl", lambda meters: utils.get_perplexity(meters["bt_nll_loss"].avg)
+            )
+
+        dae_sample_size = sum(x.get("dae_sample_size", 0) for x in logging_outputs)
+        if dae_sample_size:
+            dae_loss_sum = sum(x.get("dae_loss", 0) for x in logging_outputs)
+            dae_loss_sum *= 1 / dae_sample_size / math.log(2)
+            metrics.log_scalar("dae_loss", dae_loss_sum, dae_sample_size, round=3)
+
+            dae_nll_loss_sum = sum(x.get("dae_nll_loss", 0) for x in logging_outputs)
+            dae_ntokens = sum(x.get("dae_ntokens", 0) for x in logging_outputs)
+            dae_nll_loss_sum *= 1 / dae_ntokens / math.log(2)
+            metrics.log_scalar("dae_nll_loss", dae_nll_loss_sum, dae_ntokens, round=3)
+            metrics.log_derived(
+                "dae_ppl",
+                lambda meters: utils.get_perplexity(meters["dae_nll_loss"].avg),
+            )
+
+
+@torch.no_grad()
+def extend_embedding(
+    emb: nn.Module, new_vocab_size: int, copy_from_token_id: int
+) -> None:
+    old_emb_data = emb.weight.data
+    (old_vocab_size, dim) = old_emb_data.shape
+    assert new_vocab_size >= old_vocab_size
+
+    if new_vocab_size > old_vocab_size:
+        emb.weight.data = torch.zeros((new_vocab_size, dim))
+        emb.weight.data[:old_vocab_size, :] = old_emb_data
+        # initialize new embeddings
+        emb.weight.data[old_vocab_size:, :] = old_emb_data[copy_from_token_id]
+        if hasattr(emb, "num_embeddings"):
+            emb.num_embeddings = new_vocab_size
+        if hasattr(emb, "out_features"):
+            emb.out_features = new_vocab_size
+
+    if getattr(emb, "bias", None) is None:
+        return
+
+    # Fix the bias.
+    # Bias shape can be different from the previous vocab size
+    # if the weight matrix was shared and alread extended but not the bias.
+    (old_vocab_size,) = emb.bias.shape
+    assert new_vocab_size >= old_vocab_size
+    if new_vocab_size > old_vocab_size:
+        old_bias = emb.bias.data
+        new_bias = torch.zeros(
+            (new_vocab_size,), dtype=old_bias.dtype, device=old_bias.device
+        )
+        new_bias[:old_vocab_size] = old_bias
+        emb.bias.data = new_bias
+
+
+def add_secial_tokens_to_dict_and_model(
+    dictionary: "fairseq.data.Dictionary",
+    model: nn.Module,
+    mono_langs: Sequence[str],
+) -> None:
+    embs = model.encoder.embed_tokens
+    vocab_size, embedding_dim = embs.weight.shape
+
+    # The model may or may not have a '<mask>' embedding yet
+    assert (
+        len(dictionary) <= vocab_size <= len(dictionary) + 1
+    ), f"Dictionary len ({len(dictionary)}) doesn't match embs shape ({embs.weight.shape})"
+    # TODO: we should reuse the pretrained model dict which already has <mask>
+    dictionary.add_symbol("<mask>")
+
+    for lang in mono_langs:
+        lang_token = _lang_token(lang)
+        dictionary.add_symbol(lang_token)
+    logger.info(
+        f"dictionary: {len(dictionary)} -> {vocab_size} tokens "
+        f"after adding {len(mono_langs)} lang tokens."
+    )
+
+    if len(dictionary) <= vocab_size:
+        return
+
+    extend_embedding(embs, len(dictionary), dictionary.bos())
+    dec_embs = model.decoder.embed_tokens
+    extend_embedding(dec_embs, len(dictionary), dictionary.bos())
+    lm_head = model.decoder.reg_head
+    extend_embedding(lm_head, len(dictionary), dictionary.bos())
+    assert lm_head.weight.shape == (len(dictionary), embedding_dim)
+
+
+def _lang_token(lang: str) -> str:
+    return f"__{lang}__"
+
+
+def _lang_token_index(dictionary, lang: str) -> int:
+    return dictionary.index(_lang_token(lang))
+
+
+@contextlib.contextmanager
+def assert_weights_have_changed(model: nn.Module):
+    def checksum(model: nn.Module) -> float:
+        return sum(p.sum().item() for p in model.parameters())
+
+    initial_checksum = checksum(model)
+    yield model
+    final_checksum = checksum(model)
+    logger.info(
+        f"initial_checksum={initial_checksum} -> final_checksum={final_checksum}"
+    )
+    assert initial_checksum != final_checksum, "Model hasn't changed !"
diff --git a/fairseq/fairseq/tasks/semisupervised_translation.py b/fairseq/fairseq/tasks/semisupervised_translation.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2f9bf9a733d94e50b588e4316b4a02e1c8bcf51
--- /dev/null
+++ b/fairseq/fairseq/tasks/semisupervised_translation.py
@@ -0,0 +1,485 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+from collections import OrderedDict
+
+from fairseq import utils
+from fairseq.data import (
+    BacktranslationDataset,
+    IndexedCachedDataset,
+    IndexedDataset,
+    IndexedRawTextDataset,
+    LanguagePairDataset,
+    NoisingDataset,
+    RoundRobinZipDatasets,
+    data_utils,
+    indexed_dataset,
+)
+from fairseq.models import FairseqMultiModel
+from fairseq.sequence_generator import SequenceGenerator
+
+from . import register_task
+from .multilingual_translation import MultilingualTranslationTask
+
+
+logger = logging.getLogger(__name__)
+
+
+def _get_bt_dataset_key(lang_pair):
+    return "bt:" + lang_pair
+
+
+def _get_denoising_dataset_key(lang_pair):
+    return "denoising:" + lang_pair
+
+
+# ported from UnsupervisedMT
+def parse_lambda_config(x):
+    """
+    Parse the configuration of lambda coefficient (for scheduling).
+    x = "3"                  # lambda will be a constant equal to x
+    x = "0:1,1000:0"         # lambda will start from 1 and linearly decrease
+                             # to 0 during the first 1000 iterations
+    x = "0:0,1000:0,2000:1"  # lambda will be equal to 0 for the first 1000
+                             # iterations, then will linearly increase to 1 until iteration 2000
+    """
+    split = x.split(",")
+    if len(split) == 1:
+        return float(x), None
+    else:
+        split = [s.split(os.pathsep) for s in split]
+        assert all(len(s) == 2 for s in split)
+        assert all(k.isdigit() for k, _ in split)
+        assert all(
+            int(split[i][0]) < int(split[i + 1][0]) for i in range(len(split) - 1)
+        )
+        return float(split[0][1]), [(int(k), float(v)) for k, v in split]
+
+
+@register_task("semisupervised_translation")
+class SemisupervisedTranslationTask(MultilingualTranslationTask):
+    """A task for training multiple translation models simultaneously.
+
+    We iterate round-robin over batches from multiple language pairs, ordered
+    according to the `--lang-pairs` argument.
+
+    The training loop is roughly:
+
+        for i in range(len(epoch)):
+            for lang_pair in args.lang_pairs:
+                batch = next_batch_for_lang_pair(lang_pair)
+                loss = criterion(model_for_lang_pair(lang_pair), batch)
+                loss.backward()
+            optimizer.step()
+
+    In practice, `next_batch_for_lang_pair` is abstracted in a FairseqDataset
+    (e.g., `RoundRobinZipDatasets`) and `model_for_lang_pair` is a model that
+    implements the `FairseqMultiModel` interface.
+
+    During inference it is required to specify a single `--source-lang` and
+    `--target-lang`, instead of `--lang-pairs`.
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        # fmt: off
+        MultilingualTranslationTask.add_args(parser)
+        parser.add_argument('--lambda-parallel-config', default="1.0", type=str, metavar='CONFIG',
+                            help='cross-entropy reconstruction coefficient (parallel data). '
+                                 'use fixed weight during training if set to floating point number. '
+                                 'use piecewise linear function over number of updates to schedule the '
+                                 'weight with the format: w0:step0,w1:step1,...')
+        parser.add_argument('--lambda-denoising-config', default="0.0", type=str, metavar='CONFIG',
+                            help='Cross-entropy reconstruction coefficient (denoising autoencoding)'
+                                 'use fixed weight during training if set to floating point number. '
+                                 'use piecewise linear function over number of updates to schedule the '
+                                 'weight with the format: w0:step0,w1:step1,...')
+        parser.add_argument('--lambda-otf-bt-config', default="0.0", type=str, metavar='CONFIG',
+                            help='cross-entropy reconstruction coefficient (on-the-fly back-translation parallel data)'
+                                 'use fixed weight during training if set to floating point number. '
+                                 'use piecewise linear function over number of updates to schedule the '
+                                 'weight with the format: w0:step0,w1:step1,...')
+        parser.add_argument('--bt-max-len-a', default=1.1, type=float, metavar='N',
+                            help='generate back-translated sequences of maximum length ax + b, where x is the '
+                                 'source length')
+        parser.add_argument('--bt-max-len-b', default=10.0, type=float, metavar='N',
+                            help='generate back-translated sequences of maximum length ax + b, where x is the '
+                                 'source length')
+        parser.add_argument('--bt-beam-size', default=1, type=int, metavar='N',
+                            help='beam size used in beam search of online back-translation')
+        parser.add_argument('--max-word-shuffle-distance', default=3.0, type=float, metavar='N',
+                            help='maximum word shuffle distance for denoising autoencoding data generation')
+        parser.add_argument('--word-dropout-prob', default=0.1, type=float, metavar='N',
+                            help='word dropout probability for denoising autoencoding data generation')
+        parser.add_argument('--word-blanking-prob', default=0.2, type=float, metavar='N',
+                            help='word blanking probability for denoising autoencoding data generation')
+        # fmt: on
+
+    def __init__(self, args, dicts, training):
+        super().__init__(args, dicts, training)
+        self.lambda_parallel, self.lambda_parallel_steps = parse_lambda_config(
+            args.lambda_parallel_config
+        )
+        self.lambda_otf_bt, self.lambda_otf_bt_steps = parse_lambda_config(
+            args.lambda_otf_bt_config
+        )
+        self.lambda_denoising, self.lambda_denoising_steps = parse_lambda_config(
+            args.lambda_denoising_config
+        )
+        if self.lambda_denoising > 0.0 or self.lambda_denoising_steps is not None:
+            denoising_lang_pairs = [
+                "%s-%s" % (tgt, tgt)
+                for tgt in {lang_pair.split("-")[1] for lang_pair in args.lang_pairs}
+            ]
+            self.model_lang_pairs = self.model_lang_pairs + denoising_lang_pairs
+        self.backtranslate_datasets = {}
+        self.backtranslators = {}
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        dicts, training = MultilingualTranslationTask.prepare(args, **kwargs)
+        return cls(args, dicts, training)
+
+    def load_dataset(self, split, epoch=1, **kwargs):
+        """Load a dataset split."""
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        def split_exists(split, src, tgt, lang):
+            if src is not None:
+                filename = os.path.join(
+                    data_path, "{}.{}-{}.{}".format(split, src, tgt, lang)
+                )
+            else:
+                filename = os.path.join(
+                    data_path, "{}.{}-None.{}".format(split, src, tgt)
+                )
+            return indexed_dataset.dataset_exists(filename, impl=self.args.dataset_impl)
+
+        def load_indexed_dataset(path, dictionary):
+            return data_utils.load_indexed_dataset(
+                path, dictionary, self.args.dataset_impl
+            )
+
+        # load parallel datasets
+        src_datasets, tgt_datasets = {}, {}
+        if (
+            self.lambda_parallel > 0.0
+            or self.lambda_parallel_steps is not None
+            or not split.startswith("train")
+        ):
+            for lang_pair in self.lang_pairs:
+                src, tgt = lang_pair.split("-")
+                if split_exists(split, src, tgt, src):
+                    prefix = os.path.join(
+                        data_path, "{}.{}-{}.".format(split, src, tgt)
+                    )
+                elif split_exists(split, tgt, src, src):
+                    prefix = os.path.join(
+                        data_path, "{}.{}-{}.".format(split, tgt, src)
+                    )
+                else:
+                    continue
+                src_datasets[lang_pair] = load_indexed_dataset(
+                    prefix + src, self.dicts[src]
+                )
+                tgt_datasets[lang_pair] = load_indexed_dataset(
+                    prefix + tgt, self.dicts[tgt]
+                )
+                logger.info(
+                    "parallel-{} {} {} examples".format(
+                        data_path, split, len(src_datasets[lang_pair])
+                    )
+                )
+            if len(src_datasets) == 0:
+                raise FileNotFoundError(
+                    "Dataset not found: {} ({})".format(split, data_path)
+                )
+
+        # back translation datasets
+        backtranslate_datasets = {}
+        if (
+            self.lambda_otf_bt > 0.0 or self.lambda_otf_bt_steps is not None
+        ) and split.startswith("train"):
+            for lang_pair in self.lang_pairs:
+                src, tgt = lang_pair.split("-")
+                if not split_exists(split, tgt, None, tgt):
+                    raise FileNotFoundError(
+                        "Dataset not found: backtranslation {} ({})".format(
+                            split, data_path
+                        )
+                    )
+                filename = os.path.join(
+                    data_path, "{}.{}-None.{}".format(split, tgt, tgt)
+                )
+                dataset = load_indexed_dataset(filename, self.dicts[tgt])
+                lang_pair_dataset_tgt = LanguagePairDataset(
+                    dataset,
+                    dataset.sizes,
+                    self.dicts[tgt],
+                    left_pad_source=self.args.left_pad_source,
+                    left_pad_target=self.args.left_pad_target,
+                )
+                lang_pair_dataset = LanguagePairDataset(
+                    dataset,
+                    dataset.sizes,
+                    src_dict=self.dicts[src],
+                    tgt=dataset,
+                    tgt_sizes=dataset.sizes,
+                    tgt_dict=self.dicts[tgt],
+                    left_pad_source=self.args.left_pad_source,
+                    left_pad_target=self.args.left_pad_target,
+                )
+                backtranslate_datasets[lang_pair] = BacktranslationDataset(
+                    tgt_dataset=self.alter_dataset_langtok(
+                        lang_pair_dataset_tgt,
+                        src_eos=self.dicts[tgt].eos(),
+                        src_lang=tgt,
+                        tgt_lang=src,
+                    ),
+                    backtranslation_fn=self.backtranslators[lang_pair],
+                    src_dict=self.dicts[src],
+                    tgt_dict=self.dicts[tgt],
+                    output_collater=self.alter_dataset_langtok(
+                        lang_pair_dataset=lang_pair_dataset,
+                        src_eos=self.dicts[src].eos(),
+                        src_lang=src,
+                        tgt_eos=self.dicts[tgt].eos(),
+                        tgt_lang=tgt,
+                    ).collater,
+                )
+                logger.info(
+                    "backtranslate-{}: {} {} {} examples".format(
+                        tgt,
+                        data_path,
+                        split,
+                        len(backtranslate_datasets[lang_pair]),
+                    )
+                )
+                self.backtranslate_datasets[lang_pair] = backtranslate_datasets[
+                    lang_pair
+                ]
+
+        # denoising autoencoder
+        noising_datasets = {}
+        if (
+            self.lambda_denoising > 0.0 or self.lambda_denoising_steps is not None
+        ) and split.startswith("train"):
+            for lang_pair in self.lang_pairs:
+                _, tgt = lang_pair.split("-")
+                if not split_exists(split, tgt, None, tgt):
+                    continue
+                filename = os.path.join(
+                    data_path, "{}.{}-None.{}".format(split, tgt, tgt)
+                )
+                tgt_dataset1 = load_indexed_dataset(filename, self.dicts[tgt])
+                tgt_dataset2 = load_indexed_dataset(filename, self.dicts[tgt])
+                noising_dataset = NoisingDataset(
+                    tgt_dataset1,
+                    self.dicts[tgt],
+                    seed=1,
+                    max_word_shuffle_distance=self.args.max_word_shuffle_distance,
+                    word_dropout_prob=self.args.word_dropout_prob,
+                    word_blanking_prob=self.args.word_blanking_prob,
+                )
+                noising_datasets[lang_pair] = self.alter_dataset_langtok(
+                    LanguagePairDataset(
+                        noising_dataset,
+                        tgt_dataset1.sizes,
+                        self.dicts[tgt],
+                        tgt_dataset2,
+                        tgt_dataset2.sizes,
+                        self.dicts[tgt],
+                        left_pad_source=self.args.left_pad_source,
+                        left_pad_target=self.args.left_pad_target,
+                    ),
+                    src_eos=self.dicts[tgt].eos(),
+                    src_lang=tgt,
+                    tgt_eos=self.dicts[tgt].eos(),
+                    tgt_lang=tgt,
+                )
+                logger.info(
+                    "denoising-{}: {} {} {} examples".format(
+                        tgt,
+                        data_path,
+                        split,
+                        len(noising_datasets[lang_pair]),
+                    )
+                )
+
+        def language_pair_dataset(lang_pair):
+            src, tgt = lang_pair.split("-")
+            src_dataset, tgt_dataset = src_datasets[lang_pair], tgt_datasets[lang_pair]
+            return self.alter_dataset_langtok(
+                LanguagePairDataset(
+                    src_dataset,
+                    src_dataset.sizes,
+                    self.dicts[src],
+                    tgt_dataset,
+                    tgt_dataset.sizes,
+                    self.dicts[tgt],
+                    left_pad_source=self.args.left_pad_source,
+                    left_pad_target=self.args.left_pad_target,
+                ),
+                self.dicts[src].eos(),
+                src,
+                self.dicts[tgt].eos(),
+                tgt,
+            )
+
+        self.datasets[split] = RoundRobinZipDatasets(
+            OrderedDict(
+                [
+                    (lang_pair, language_pair_dataset(lang_pair))
+                    for lang_pair in src_datasets.keys()
+                ]
+                + [
+                    (_get_bt_dataset_key(lang_pair), dataset)
+                    for lang_pair, dataset in backtranslate_datasets.items()
+                ]
+                + [
+                    (_get_denoising_dataset_key(lang_pair), dataset)
+                    for lang_pair, dataset in noising_datasets.items()
+                ]
+            ),
+            eval_key=None
+            if self.training
+            else "%s-%s" % (self.args.source_lang, self.args.target_lang),
+        )
+
+    def build_model(self, args):
+        from fairseq import models
+
+        model = models.build_model(args, self)
+        if not isinstance(model, FairseqMultiModel):
+            raise ValueError(
+                "SemisupervisedTranslationTask requires a FairseqMultiModel architecture"
+            )
+
+        # create SequenceGenerator for each model that has backtranslation dependency on it
+        self.sequence_generators = {}
+        if (
+            self.lambda_otf_bt > 0.0 or self.lambda_otf_bt_steps is not None
+        ) and self.training:
+            for lang_pair in self.lang_pairs:
+                src, tgt = lang_pair.split("-")
+                key = "{}-{}".format(tgt, src)
+                self.sequence_generators[key] = SequenceGenerator(
+                    [model.models[key]],
+                    tgt_dict=self.dicts[src],
+                    beam_size=args.bt_beam_size,
+                    max_len_a=args.bt_max_len_a,
+                    max_len_b=args.bt_max_len_b,
+                )
+                decoder_lang_tok_idx = self.get_decoder_langtok(src)
+
+                def backtranslate_fn(
+                    sample,
+                    model=model.models[key],
+                    bos_token=decoder_lang_tok_idx,
+                    sequence_generator=self.sequence_generators[key],
+                ):
+                    return sequence_generator.generate(
+                        [model],
+                        sample,
+                        bos_token=bos_token,
+                    )
+
+                self.backtranslators[lang_pair] = backtranslate_fn
+
+        return model
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+        model.train()
+
+        if update_num > 0:
+            self.update_step(update_num)
+
+        agg_loss, agg_sample_size, agg_logging_output = 0.0, 0.0, {}
+
+        def forward_backward(model, samples, logging_output_key, weight):
+            nonlocal agg_loss, agg_sample_size, agg_logging_output
+            if samples is None or len(samples) == 0:
+                return
+            loss, sample_size, logging_output = criterion(model, samples)
+            if ignore_grad:
+                loss *= 0
+            else:
+                loss *= weight
+            optimizer.backward(loss)
+            agg_loss += loss.detach().item()
+            # TODO make summing of the sample sizes configurable
+            agg_sample_size += sample_size
+            for k in logging_output:
+                agg_logging_output[k] += logging_output[k]
+                agg_logging_output[logging_output_key] += logging_output[k]
+
+        if self.lambda_parallel > 0.0:
+            for lang_pair in self.lang_pairs:
+                forward_backward(
+                    model.models[lang_pair],
+                    sample[lang_pair],
+                    lang_pair,
+                    self.lambda_parallel,
+                )
+
+        if self.lambda_otf_bt > 0.0:
+            for lang_pair in self.lang_pairs:
+                sample_key = _get_bt_dataset_key(lang_pair)
+                forward_backward(
+                    model.models[lang_pair],
+                    sample[sample_key],
+                    sample_key,
+                    self.lambda_otf_bt,
+                )
+
+        if self.lambda_denoising > 0.0:
+            for lang_pair in self.lang_pairs:
+                _, tgt = lang_pair.split("-")
+                sample_key = _get_denoising_dataset_key(lang_pair)
+                forward_backward(
+                    model.models["{0}-{0}".format(tgt)],
+                    sample[sample_key],
+                    sample_key,
+                    self.lambda_denoising,
+                )
+
+        return agg_loss, agg_sample_size, agg_logging_output
+
+    def update_step(self, num_updates):
+        def lambda_step_func(config, n_iter):
+            """
+            Update a lambda value according to its schedule configuration.
+            """
+            ranges = [
+                i
+                for i in range(len(config) - 1)
+                if config[i][0] <= n_iter < config[i + 1][0]
+            ]
+            if len(ranges) == 0:
+                assert n_iter >= config[-1][0]
+                return config[-1][1]
+            assert len(ranges) == 1
+            i = ranges[0]
+            x_a, y_a = config[i]
+            x_b, y_b = config[i + 1]
+            return y_a + (n_iter - x_a) * float(y_b - y_a) / float(x_b - x_a)
+
+        if self.lambda_parallel_steps is not None:
+            self.lambda_parallel = lambda_step_func(
+                self.lambda_parallel_steps, num_updates
+            )
+        if self.lambda_denoising_steps is not None:
+            self.lambda_denoising = lambda_step_func(
+                self.lambda_denoising_steps, num_updates
+            )
+        if self.lambda_otf_bt_steps is not None:
+            self.lambda_otf_bt = lambda_step_func(self.lambda_otf_bt_steps, num_updates)
diff --git a/fairseq/fairseq/tasks/sentence_prediction.py b/fairseq/fairseq/tasks/sentence_prediction.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5f9302c10b3410e7650433d54f70aad4fd1cfc4
--- /dev/null
+++ b/fairseq/fairseq/tasks/sentence_prediction.py
@@ -0,0 +1,286 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+import contextlib
+from dataclasses import dataclass, field
+from typing import Optional
+from omegaconf import MISSING, II, open_dict, OmegaConf
+
+import numpy as np
+from fairseq.data import (
+    ConcatSentencesDataset,
+    Dictionary,
+    IdDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    NumSamplesDataset,
+    OffsetTokensDataset,
+    PrependTokenDataset,
+    RawLabelDataset,
+    RightPadDataset,
+    RollDataset,
+    SortDataset,
+    StripTokenDataset,
+    data_utils,
+)
+from fairseq.data.shorten_dataset import maybe_shorten_dataset
+from fairseq.tasks import FairseqDataclass, FairseqTask, register_task
+from fairseq.dataclass import ChoiceEnum
+
+
+logger = logging.getLogger(__name__)
+SHORTEN_METHOD_CHOICES = ChoiceEnum(["none", "truncate", "random_crop"])
+
+
+@dataclass
+class SentencePredictionConfig(FairseqDataclass):
+    data: str = field(default=MISSING, metadata={"help": "path to data directory"})
+    num_classes: int = field(
+        default=-1,
+        metadata={"help": "number of classes or regression targets"},
+    )
+    init_token: Optional[int] = field(
+        default=None,
+        metadata={"help": "add token at the beginning of each batch item"},
+    )
+    separator_token: Optional[int] = field(
+        default=None,
+        metadata={"help": "add separator token between inputs"},
+    )
+    no_shuffle: bool = field(
+        default=False,
+    )
+    shorten_method: SHORTEN_METHOD_CHOICES = field(
+        default="none",
+        metadata={
+            "help": "if not none, shorten sequences that exceed tokens_per_sample"
+        },
+    )
+    shorten_data_split_list: str = field(
+        default="",
+        metadata={
+            "help": "comma-separated list of dataset splits to apply shortening to, "
+            'e.g., "train,valid" (default: all dataset splits)'
+        },
+    )
+    add_prev_output_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "add prev_output_tokens to sample, used for encoder-decoder arch"
+        },
+    )
+    max_positions: int = field(
+        default=512,
+        metadata={"help": "max tokens per example"},
+    )
+
+    regression_target: bool = II("criterion.regression_target")
+    classification_head_name: str = II("criterion.classification_head_name")
+    seed: int = II("common.seed")
+
+
+@register_task("sentence_prediction", dataclass=SentencePredictionConfig)
+class SentencePredictionTask(FairseqTask):
+    """
+    Sentence (or sentence pair) prediction (classification or regression) task.
+
+    Args:
+        dictionary (Dictionary): the dictionary for the input of the task
+    """
+
+    def __init__(self, cfg, data_dictionary, label_dictionary):
+        super().__init__(cfg)
+        self.dictionary = data_dictionary
+        self._label_dictionary = label_dictionary
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        """Load the dictionary from the filename
+
+        Args:
+            filename (str): the filename
+        """
+        dictionary = Dictionary.load(filename)
+        dictionary.add_symbol("<mask>")
+        return dictionary
+
+    @classmethod
+    def setup_task(cls, cfg, **kwargs):
+        assert cfg.num_classes > 0, "Must set task.num_classes"
+
+        # load data dictionary
+        data_dict = cls.load_dictionary(
+            os.path.join(cfg.data, "input0", "dict.txt"),
+        )
+        logger.info("[input] dictionary: {} types".format(len(data_dict)))
+
+        # load label dictionary
+        if not cfg.regression_target:
+            label_dict = cls.load_dictionary(
+                os.path.join(cfg.data, "label", "dict.txt"),
+            )
+            logger.info("[label] dictionary: {} types".format(len(label_dict)))
+        else:
+            label_dict = data_dict
+        return cls(cfg, data_dict, label_dict)
+
+    def load_dataset(self, split, combine=False, **kwargs):
+        """Load a given dataset split (e.g., train, valid, test)."""
+
+        def get_path(key, split):
+            return os.path.join(self.cfg.data, key, split)
+
+        def make_dataset(key, dictionary):
+            split_path = get_path(key, split)
+
+            try:
+                dataset = data_utils.load_indexed_dataset(
+                    split_path,
+                    dictionary,
+                    combine=combine,
+                )
+            except Exception as e:
+                if "StorageException: [404] Path not found" in str(e):
+                    logger.warning(f"dataset {e} not found")
+                    dataset = None
+                else:
+                    raise e
+            return dataset
+
+        input0 = make_dataset("input0", self.source_dictionary)
+        assert input0 is not None, "could not find dataset: {}".format(
+            get_path("input0", split)
+        )
+        input1 = make_dataset("input1", self.source_dictionary)
+
+        if self.cfg.init_token is not None:
+            input0 = PrependTokenDataset(input0, self.cfg.init_token)
+
+        if input1 is None:
+            src_tokens = input0
+        else:
+            if self.cfg.separator_token is not None:
+                input1 = PrependTokenDataset(input1, self.cfg.separator_token)
+
+            src_tokens = ConcatSentencesDataset(input0, input1)
+
+        with data_utils.numpy_seed(self.cfg.seed):
+            shuffle = np.random.permutation(len(src_tokens))
+
+        src_tokens = maybe_shorten_dataset(
+            src_tokens,
+            split,
+            self.cfg.shorten_data_split_list,
+            self.cfg.shorten_method,
+            self.max_positions(),
+            self.cfg.seed,
+        )
+
+        dataset = {
+            "id": IdDataset(),
+            "net_input": {
+                "src_tokens": RightPadDataset(
+                    src_tokens,
+                    pad_idx=self.source_dictionary.pad(),
+                ),
+                "src_lengths": NumelDataset(src_tokens, reduce=False),
+            },
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(src_tokens, reduce=True),
+        }
+
+        if self.cfg.add_prev_output_tokens:
+            prev_tokens_dataset = RightPadDataset(
+                RollDataset(src_tokens, 1),
+                pad_idx=self.dictionary.pad(),
+            )
+            dataset["net_input"].update(
+                prev_output_tokens=prev_tokens_dataset,
+            )
+
+        if not self.cfg.regression_target:
+            label_dataset = make_dataset("label", self.label_dictionary)
+            if label_dataset is not None:
+                dataset.update(
+                    target=OffsetTokensDataset(
+                        StripTokenDataset(
+                            label_dataset,
+                            id_to_strip=self.label_dictionary.eos(),
+                        ),
+                        offset=-self.label_dictionary.nspecial,
+                    )
+                )
+        else:
+            label_path = "{0}.label".format(get_path("label", split))
+            if os.path.exists(label_path):
+
+                def parse_regression_target(i, line):
+                    values = line.split()
+                    assert (
+                        len(values) == self.cfg.num_classes
+                    ), f'expected num_classes={self.cfg.num_classes} regression target values on line {i}, found: "{line}"'
+                    return [float(x) for x in values]
+
+                with open(label_path) as h:
+                    dataset.update(
+                        target=RawLabelDataset(
+                            [
+                                parse_regression_target(i, line.strip())
+                                for i, line in enumerate(h.readlines())
+                            ]
+                        )
+                    )
+
+        nested_dataset = NestedDictionaryDataset(
+            dataset,
+            sizes=[src_tokens.sizes],
+        )
+
+        if self.cfg.no_shuffle:
+            dataset = nested_dataset
+        else:
+            dataset = SortDataset(
+                nested_dataset,
+                # shuffle
+                sort_order=[shuffle],
+            )
+
+        logger.info("Loaded {0} with #samples: {1}".format(split, len(dataset)))
+
+        self.datasets[split] = dataset
+        return self.datasets[split]
+
+    def build_model(self, cfg):
+        from fairseq import models
+
+        with open_dict(cfg) if OmegaConf.is_config(cfg) else contextlib.ExitStack():
+            cfg.max_positions = self.cfg.max_positions
+
+        model = models.build_model(cfg, self)
+
+        model.register_classification_head(
+            self.cfg.classification_head_name,
+            num_classes=self.cfg.num_classes,
+        )
+
+        return model
+
+    def max_positions(self):
+        return self.cfg.max_positions
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+    @property
+    def label_dictionary(self):
+        return self._label_dictionary
diff --git a/fairseq/fairseq/tasks/sentence_ranking.py b/fairseq/fairseq/tasks/sentence_ranking.py
new file mode 100644
index 0000000000000000000000000000000000000000..bed44f34e5f8e506b6ae7ba30ddaa661bf4a7522
--- /dev/null
+++ b/fairseq/fairseq/tasks/sentence_ranking.py
@@ -0,0 +1,219 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+import numpy as np
+from fairseq import utils
+from fairseq.data import (
+    ConcatSentencesDataset,
+    Dictionary,
+    IdDataset,
+    NestedDictionaryDataset,
+    NumelDataset,
+    NumSamplesDataset,
+    PrependTokenDataset,
+    RawLabelDataset,
+    RightPadDataset,
+    SortDataset,
+    TruncateDataset,
+    data_utils,
+)
+from fairseq.data.shorten_dataset import maybe_shorten_dataset
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("sentence_ranking")
+class SentenceRankingTask(LegacyFairseqTask):
+    """
+    Ranking task on multiple sentences.
+
+    Args:
+        dictionary (Dictionary): the dictionary for the input of the task
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        parser.add_argument("data", metavar="FILE", help="file prefix for data")
+        parser.add_argument(
+            "--num-classes", type=int, help="number of sentences to be ranked"
+        )
+        parser.add_argument(
+            "--init-token",
+            type=int,
+            help="add token at the beginning of each batch item",
+        )
+        parser.add_argument(
+            "--separator-token", type=int, help="add separator token between inputs"
+        )
+        parser.add_argument("--no-shuffle", action="store_true")
+        parser.add_argument(
+            "--shorten-method",
+            default="none",
+            choices=["none", "truncate", "random_crop"],
+            help="if not none, shorten sequences that exceed --tokens-per-sample",
+        )
+        parser.add_argument(
+            "--shorten-data-split-list",
+            default="",
+            help="comma-separated list of dataset splits to apply shortening to, "
+            'e.g., "train,valid" (default: all dataset splits)',
+        )
+        parser.add_argument(
+            "--max-option-length", type=int, help="max length for each option"
+        )
+
+    def __init__(self, args, dictionary):
+        super().__init__(args)
+        self.dictionary = dictionary
+
+    @classmethod
+    def load_dictionary(cls, args, filename, source=True):
+        """Load the dictionary from the filename
+
+        Args:
+            filename (str): the filename
+        """
+        dictionary = Dictionary.load(filename)
+        dictionary.add_symbol("<mask>")
+        return dictionary
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        assert (
+            args.criterion == "sentence_ranking"
+        ), "Must set --criterion=sentence_ranking"
+
+        # load data dictionary
+        data_dict = cls.load_dictionary(
+            args,
+            os.path.join(args.data, "input0", "dict.txt"),
+            source=True,
+        )
+        logger.info("[input] dictionary: {} types".format(len(data_dict)))
+        return SentenceRankingTask(args, data_dict)
+
+    def load_dataset(self, split, combine=False, **kwargs):
+        """Load a given dataset split (e.g., train, valid, test)."""
+
+        def get_path(type, split):
+            return os.path.join(self.args.data, type, split)
+
+        def make_dataset(type, dictionary):
+            split_path = get_path(type, split)
+
+            dataset = data_utils.load_indexed_dataset(
+                split_path,
+                self.source_dictionary,
+                self.args.dataset_impl,
+                combine=combine,
+            )
+            return dataset
+
+        input0 = make_dataset("input0", self.source_dictionary)
+        input_options = [
+            make_dataset("input{idx}".format(idx=idx + 1), self.source_dictionary)
+            for idx in range(self.args.num_classes)
+        ]
+
+        if self.args.separator_token is not None:
+            input0 = PrependTokenDataset(input0, self.args.separator_token)
+
+        src_tokens = []
+        for input_option in input_options:
+            if self.args.init_token is not None:
+                input_option = PrependTokenDataset(input_option, self.args.init_token)
+            if self.args.max_option_length is not None:
+                input_option = TruncateDataset(
+                    input_option, self.args.max_option_length
+                )
+            src_token = ConcatSentencesDataset(input_option, input0)
+            src_token = maybe_shorten_dataset(
+                src_token,
+                split,
+                self.args.shorten_data_split_list,
+                self.args.shorten_method,
+                self.args.max_positions,
+                self.args.seed,
+            )
+            src_tokens.append(src_token)
+
+        with data_utils.numpy_seed(self.args.seed):
+            shuffle = np.random.permutation(len(src_tokens[0]))
+
+        dataset = {
+            "id": IdDataset(),
+            "nsentences": NumSamplesDataset(),
+            "ntokens": NumelDataset(src_tokens[0], reduce=True),
+        }
+
+        for src_token_idx in range(len(src_tokens)):
+            dataset.update(
+                {
+                    "net_input{idx}".format(idx=src_token_idx + 1): {
+                        "src_tokens": RightPadDataset(
+                            src_tokens[src_token_idx],
+                            pad_idx=self.source_dictionary.pad(),
+                        ),
+                        "src_lengths": NumelDataset(
+                            src_tokens[src_token_idx], reduce=False
+                        ),
+                    }
+                }
+            )
+
+        label_path = "{}.label".format(get_path("label", split))
+        if os.path.exists(label_path):
+            with open(label_path) as h:
+                dataset.update(
+                    target=RawLabelDataset([int(x.strip()) for x in h.readlines()])
+                )
+
+        nested_dataset = NestedDictionaryDataset(
+            dataset,
+            sizes=[np.maximum.reduce([src_token.sizes for src_token in src_tokens])],
+        )
+
+        if self.args.no_shuffle:
+            dataset = nested_dataset
+        else:
+            dataset = SortDataset(
+                nested_dataset,
+                # shuffle
+                sort_order=[shuffle],
+            )
+
+        logger.info("Loaded {0} with #samples: {1}".format(split, len(dataset)))
+
+        self.datasets[split] = dataset
+        return self.datasets[split]
+
+    def build_model(self, args):
+        from fairseq import models
+
+        model = models.build_model(args, self)
+
+        model.register_classification_head(
+            getattr(args, "ranking_head_name", "sentence_classification_head"),
+            num_classes=1,
+        )
+
+        return model
+
+    def max_positions(self):
+        return self.args.max_positions
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
diff --git a/fairseq/fairseq/tasks/simultaneous_translation.py b/fairseq/fairseq/tasks/simultaneous_translation.py
new file mode 100644
index 0000000000000000000000000000000000000000..11c7dc1ea966a54f8915ef164377e40f90e851a1
--- /dev/null
+++ b/fairseq/fairseq/tasks/simultaneous_translation.py
@@ -0,0 +1,42 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from fairseq.tasks import register_task
+from fairseq.tasks.speech_to_text import SpeechToTextTask
+from fairseq.tasks.translation import (
+    TranslationTask, TranslationConfig
+)
+
+try:
+    import examples.simultaneous_translation # noqa
+    import_successful = True
+except BaseException:
+    import_successful = False
+
+
+logger = logging.getLogger(__name__)
+
+
+def check_import(flag):
+    if not flag:
+        raise ImportError(
+            "'examples.simultaneous_translation' is not correctly imported. "
+            "Please considering `pip install -e $FAIRSEQ_DIR`."
+        )
+
+
+@register_task("simul_speech_to_text")
+class SimulSpeechToTextTask(SpeechToTextTask):
+    def __init__(self, args, tgt_dict):
+        check_import(import_successful)
+        super().__init__(args, tgt_dict)
+
+
+@register_task("simul_text_to_text",  dataclass=TranslationConfig)
+class SimulTextToTextTask(TranslationTask):
+    def __init__(self, cfg, src_dict, tgt_dict):
+        check_import(import_successful)
+        super().__init__(cfg, src_dict, tgt_dict)
diff --git a/fairseq/fairseq/tasks/speech_to_text.py b/fairseq/fairseq/tasks/speech_to_text.py
new file mode 100644
index 0000000000000000000000000000000000000000..06e292103ef898d607eb23441ce840de1fc800a1
--- /dev/null
+++ b/fairseq/fairseq/tasks/speech_to_text.py
@@ -0,0 +1,165 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from pathlib import Path
+from argparse import Namespace
+
+from fairseq.data import Dictionary, encoders
+from fairseq.data.audio.speech_to_text_dataset import (
+    S2TDataConfig,
+    SpeechToTextDataset,
+    SpeechToTextDatasetCreator,
+    get_features_or_waveform
+)
+from fairseq.tasks import LegacyFairseqTask, register_task
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("speech_to_text")
+class SpeechToTextTask(LegacyFairseqTask):
+    @classmethod
+    def add_args(cls, parser):
+        parser.add_argument("data", help="manifest root path")
+        parser.add_argument(
+            "--config-yaml",
+            type=str,
+            default="config.yaml",
+            help="Configuration YAML filename (under manifest root)",
+        )
+        parser.add_argument(
+            "--max-source-positions",
+            default=6000,
+            type=int,
+            metavar="N",
+            help="max number of tokens in the source sequence",
+        )
+        parser.add_argument(
+            "--max-target-positions",
+            default=1024,
+            type=int,
+            metavar="N",
+            help="max number of tokens in the target sequence",
+        )
+
+    def __init__(self, args, tgt_dict):
+        super().__init__(args)
+        self.tgt_dict = tgt_dict
+        self.data_cfg = S2TDataConfig(Path(args.data) / args.config_yaml)
+        self.speaker_to_id = self._get_speaker_to_id()
+
+    def _get_speaker_to_id(self):
+        speaker_to_id = None
+        speaker_set_filename = self.data_cfg.config.get("speaker_set_filename")
+        if speaker_set_filename is not None:
+            speaker_set_path = Path(self.args.data) / speaker_set_filename
+            with open(speaker_set_path) as f:
+                speaker_to_id = {r.strip(): i for i, r in enumerate(f)}
+        return speaker_to_id
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        data_cfg = S2TDataConfig(Path(args.data) / args.config_yaml)
+        dict_path = Path(args.data) / data_cfg.vocab_filename
+        if not dict_path.is_file():
+            raise FileNotFoundError(f"Dict not found: {dict_path.as_posix()}")
+        tgt_dict = Dictionary.load(dict_path.as_posix())
+        logger.info(
+            f"dictionary size ({data_cfg.vocab_filename}): " f"{len(tgt_dict):,}"
+        )
+
+        if getattr(args, "train_subset", None) is not None:
+            if not all(s.startswith("train") for s in args.train_subset.split(",")):
+                raise ValueError('Train splits should be named like "train*".')
+        return cls(args, tgt_dict)
+
+    def build_criterion(self, args):
+        from fairseq import criterions
+
+        if self.data_cfg.prepend_tgt_lang_tag and args.ignore_prefix_size != 1:
+            raise ValueError(
+                'Please set "--ignore-prefix-size 1" since '
+                "target language ID token is prepended as BOS."
+            )
+        return criterions.build_criterion(args, self)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        is_train_split = split.startswith("train")
+        pre_tokenizer = self.build_tokenizer(self.args)
+        bpe_tokenizer = self.build_bpe(self.args)
+        self.datasets[split] = SpeechToTextDatasetCreator.from_tsv(
+            self.args.data,
+            self.data_cfg,
+            split,
+            self.tgt_dict,
+            pre_tokenizer,
+            bpe_tokenizer,
+            is_train_split=is_train_split,
+            epoch=epoch,
+            seed=self.args.seed,
+            speaker_to_id=self.speaker_to_id
+        )
+
+    @property
+    def target_dictionary(self):
+        return self.tgt_dict
+
+    @property
+    def source_dictionary(self):
+        return None
+
+    def max_positions(self):
+        return self.args.max_source_positions, self.args.max_target_positions
+
+    def build_model(self, args):
+        args.input_feat_per_channel = self.data_cfg.input_feat_per_channel
+        args.input_channels = self.data_cfg.input_channels
+        args.speaker_to_id = self.speaker_to_id
+        return super(SpeechToTextTask, self).build_model(args)
+
+    def build_generator(
+        self,
+        models,
+        args,
+        seq_gen_cls=None,
+        extra_gen_cls_kwargs=None,
+    ):
+        if self.data_cfg.prepend_tgt_lang_tag and args.prefix_size != 1:
+            raise ValueError(
+                'Please set "--prefix-size 1" since '
+                "target language ID token is prepended as BOS."
+            )
+        lang_token_ids = {
+            i
+            for s, i in self.tgt_dict.indices.items()
+            if SpeechToTextDataset.is_lang_tag(s)
+        }
+
+        if extra_gen_cls_kwargs is None:
+            extra_gen_cls_kwargs = {}
+        extra_gen_cls_kwargs["symbols_to_strip_from_output"] = lang_token_ids
+        return super().build_generator(
+            models, args, seq_gen_cls=None,
+            extra_gen_cls_kwargs=extra_gen_cls_kwargs
+        )
+
+    def build_tokenizer(self, args):
+        logger.info(f"pre-tokenizer: {self.data_cfg.pre_tokenizer}")
+        return encoders.build_tokenizer(Namespace(**self.data_cfg.pre_tokenizer))
+
+    def build_bpe(self, args):
+        logger.info(f"tokenizer: {self.data_cfg.bpe_tokenizer}")
+        return encoders.build_bpe(Namespace(**self.data_cfg.bpe_tokenizer))
+
+    def get_interactive_tokens_and_lengths(self, lines, encode_fn):
+        n_frames = [get_features_or_waveform(p).shape[0] for p in lines]
+        return lines, n_frames
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, **kwargs):
+        return SpeechToTextDataset(
+            "interactive", False, self.data_cfg, src_tokens, src_lengths
+        )
diff --git a/fairseq/fairseq/tasks/text_to_speech.py b/fairseq/fairseq/tasks/text_to_speech.py
new file mode 100644
index 0000000000000000000000000000000000000000..5646e41d39f6e39d4b046ee34ff69b998dab160d
--- /dev/null
+++ b/fairseq/fairseq/tasks/text_to_speech.py
@@ -0,0 +1,467 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import os.path as op
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+from fairseq.data.audio.text_to_speech_dataset import TextToSpeechDatasetCreator
+from fairseq.tasks import register_task
+from fairseq.tasks.speech_to_text import SpeechToTextTask
+from fairseq.speech_generator import (
+    AutoRegressiveSpeechGenerator, NonAutoregressiveSpeechGenerator,
+    TeacherForcingAutoRegressiveSpeechGenerator
+)
+
+logging.basicConfig(
+        format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO
+)
+logger = logging.getLogger(__name__)
+
+
+try:
+    from tensorboardX import SummaryWriter
+except ImportError:
+    logger.info("Please install tensorboardX: pip install tensorboardX")
+    SummaryWriter = None
+
+
+@register_task('text_to_speech')
+class TextToSpeechTask(SpeechToTextTask):
+    @staticmethod
+    def add_args(parser):
+        parser.add_argument('data', help='manifest root path')
+        parser.add_argument(
+            '--config-yaml', type=str, default='config.yaml',
+            help='Configuration YAML filename (under manifest root)'
+        )
+        parser.add_argument('--max-source-positions', default=1024, type=int,
+                            metavar='N',
+                            help='max number of tokens in the source sequence')
+        parser.add_argument('--max-target-positions', default=1200, type=int,
+                            metavar='N',
+                            help='max number of tokens in the target sequence')
+        parser.add_argument("--n-frames-per-step", type=int, default=1)
+        parser.add_argument("--eos-prob-threshold", type=float, default=0.5)
+        parser.add_argument("--eval-inference", action="store_true")
+        parser.add_argument("--eval-tb-nsample", type=int, default=8)
+        parser.add_argument("--vocoder", type=str, default="griffin_lim")
+        parser.add_argument("--spec-bwd-max-iter", type=int, default=8)
+
+    def __init__(self, args, src_dict):
+        super().__init__(args, src_dict)
+        self.src_dict = src_dict
+        self.sr = self.data_cfg.config.get("features").get("sample_rate")
+
+        self.tensorboard_writer = None
+        self.tensorboard_dir = ""
+        if args.tensorboard_logdir and SummaryWriter is not None:
+            self.tensorboard_dir = os.path.join(args.tensorboard_logdir,
+                                                "valid_extra")
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        is_train_split = split.startswith('train')
+        pre_tokenizer = self.build_tokenizer(self.args)
+        bpe_tokenizer = self.build_bpe(self.args)
+        self.datasets[split] = TextToSpeechDatasetCreator.from_tsv(
+            self.args.data, self.data_cfg, split, self.src_dict,
+            pre_tokenizer, bpe_tokenizer, is_train_split=is_train_split,
+            epoch=epoch, seed=self.args.seed,
+            n_frames_per_step=self.args.n_frames_per_step,
+            speaker_to_id=self.speaker_to_id
+        )
+
+    @property
+    def target_dictionary(self):
+        return None
+
+    @property
+    def source_dictionary(self):
+        return self.src_dict
+
+    def get_speaker_embeddings_path(self):
+        speaker_emb_path = None
+        if self.data_cfg.config.get("speaker_emb_filename") is not None:
+            speaker_emb_path = op.join(
+                self.args.data, self.data_cfg.config.get("speaker_emb_filename")
+            )
+        return speaker_emb_path
+
+    @classmethod
+    def get_speaker_embeddings(cls, args):
+        embed_speaker = None
+        if args.speaker_to_id is not None:
+            if args.speaker_emb_path is None:
+                embed_speaker = torch.nn.Embedding(
+                    len(args.speaker_to_id), args.speaker_embed_dim
+                )
+            else:
+                speaker_emb_mat = np.load(args.speaker_emb_path)
+                assert speaker_emb_mat.shape[1] == args.speaker_embed_dim
+                embed_speaker = torch.nn.Embedding.from_pretrained(
+                    torch.from_numpy(speaker_emb_mat), freeze=True,
+                )
+                logger.info(
+                    f"load speaker embeddings from {args.speaker_emb_path}. "
+                    f"train embedding? {embed_speaker.weight.requires_grad}\n"
+                    f"embeddings:\n{speaker_emb_mat}"
+                )
+        return embed_speaker
+
+    def build_model(self, cfg):
+        cfg.pitch_min = self.data_cfg.config["features"].get("pitch_min", None)
+        cfg.pitch_max = self.data_cfg.config["features"].get("pitch_max", None)
+        cfg.energy_min = self.data_cfg.config["features"].get("energy_min", None)
+        cfg.energy_max = self.data_cfg.config["features"].get("energy_max", None)
+        cfg.speaker_emb_path = self.get_speaker_embeddings_path()
+        model = super().build_model(cfg)
+        self.generator = None
+        if getattr(cfg, "eval_inference", False):
+            self.generator = self.build_generator([model], cfg)
+        return model
+
+    def build_generator(self, models, cfg, vocoder=None, **unused):
+        if vocoder is None:
+            vocoder = self.build_default_vocoder()
+        model = models[0]
+        if getattr(model, "NON_AUTOREGRESSIVE", False):
+            return NonAutoregressiveSpeechGenerator(
+                model, vocoder, self.data_cfg
+            )
+        else:
+            generator = AutoRegressiveSpeechGenerator
+            if getattr(cfg, "teacher_forcing", False):
+                generator = TeacherForcingAutoRegressiveSpeechGenerator
+                logger.info("Teacher forcing mode for generation")
+            return generator(
+                model, vocoder, self.data_cfg,
+                max_iter=self.args.max_target_positions,
+                eos_prob_threshold=self.args.eos_prob_threshold
+            )
+
+    def build_default_vocoder(self):
+        from fairseq.models.text_to_speech.vocoder import get_vocoder
+        vocoder = get_vocoder(self.args, self.data_cfg)
+        if torch.cuda.is_available() and not self.args.cpu:
+            vocoder = vocoder.cuda()
+        else:
+            vocoder = vocoder.cpu()
+        return vocoder
+
+    def valid_step(self, sample, model, criterion):
+        loss, sample_size, logging_output = super().valid_step(
+            sample, model, criterion
+        )
+
+        if getattr(self.args, "eval_inference", False):
+            hypos, inference_losses = self.valid_step_with_inference(
+                sample, model, self.generator
+            )
+            for k, v in inference_losses.items():
+                assert(k not in logging_output)
+                logging_output[k] = v
+
+            picked_id = 0
+            if self.tensorboard_dir and (sample["id"] == picked_id).any():
+                self.log_tensorboard(
+                    sample,
+                    hypos[:self.args.eval_tb_nsample],
+                    model._num_updates,
+                    is_na_model=getattr(model, "NON_AUTOREGRESSIVE", False)
+                )
+        return loss, sample_size, logging_output
+
+    def valid_step_with_inference(self, sample, model, generator):
+        hypos = generator.generate(model, sample, has_targ=True)
+
+        losses = {
+            "mcd_loss": 0.,
+            "targ_frames": 0.,
+            "pred_frames": 0.,
+            "nins": 0.,
+            "ndel": 0.,
+        }
+        rets = batch_mel_cepstral_distortion(
+            [hypo["targ_waveform"] for hypo in hypos],
+            [hypo["waveform"] for hypo in hypos],
+            self.sr,
+            normalize_type=None
+        )
+        for d, extra in rets:
+            pathmap = extra[-1]
+            losses["mcd_loss"] += d.item()
+            losses["targ_frames"] += pathmap.size(0)
+            losses["pred_frames"] += pathmap.size(1)
+            losses["nins"] += (pathmap.sum(dim=1) - 1).sum().item()
+            losses["ndel"] += (pathmap.sum(dim=0) - 1).sum().item()
+
+        return hypos, losses
+
+    def log_tensorboard(self, sample, hypos, num_updates, is_na_model=False):
+        if self.tensorboard_writer is None:
+            self.tensorboard_writer = SummaryWriter(self.tensorboard_dir)
+        tb_writer = self.tensorboard_writer
+        for b in range(len(hypos)):
+            idx = sample["id"][b]
+            text = sample["src_texts"][b]
+            targ = hypos[b]["targ_feature"]
+            pred = hypos[b]["feature"]
+            attn = hypos[b]["attn"]
+
+            if is_na_model:
+                data = plot_tts_output(
+                    [targ.transpose(0, 1), pred.transpose(0, 1)],
+                    [f"target (idx={idx})", "output"], attn,
+                    "alignment", ret_np=True, suptitle=text,
+                )
+            else:
+                eos_prob = hypos[b]["eos_prob"]
+                data = plot_tts_output(
+                    [targ.transpose(0, 1), pred.transpose(0, 1), attn],
+                    [f"target (idx={idx})", "output", "alignment"], eos_prob,
+                    "eos prob", ret_np=True, suptitle=text,
+                )
+
+            tb_writer.add_image(
+                f"inference_sample_{b}", data, num_updates,
+                dataformats="HWC"
+            )
+
+            if hypos[b]["waveform"] is not None:
+                targ_wave = hypos[b]["targ_waveform"].detach().cpu().float()
+                pred_wave = hypos[b]["waveform"].detach().cpu().float()
+                tb_writer.add_audio(
+                    f"inference_targ_{b}",
+                    targ_wave,
+                    num_updates,
+                    sample_rate=self.sr
+                )
+                tb_writer.add_audio(
+                    f"inference_pred_{b}",
+                    pred_wave,
+                    num_updates,
+                    sample_rate=self.sr
+                )
+
+
+def save_figure_to_numpy(fig):
+    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
+    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+    return data
+
+
+DEFAULT_V_MIN = np.log(1e-5)
+
+
+def plot_tts_output(
+        data_2d, title_2d, data_1d, title_1d, figsize=(24, 4),
+        v_min=DEFAULT_V_MIN, v_max=3, ret_np=False, suptitle=""
+):
+    try:
+        import matplotlib.pyplot as plt
+        from mpl_toolkits.axes_grid1 import make_axes_locatable
+    except ImportError:
+        raise ImportError("Please install Matplotlib: pip install matplotlib")
+
+    data_2d = [
+        x.detach().cpu().float().numpy()
+        if isinstance(x, torch.Tensor) else x for x in data_2d
+    ]
+    fig, axes = plt.subplots(1, len(data_2d) + 1, figsize=figsize)
+    if suptitle:
+        fig.suptitle(suptitle[:400])  # capped at 400 chars
+    axes = [axes] if len(data_2d) == 0 else axes
+    for ax, x, name in zip(axes, data_2d, title_2d):
+        ax.set_title(name)
+        divider = make_axes_locatable(ax)
+        cax = divider.append_axes('right', size='5%', pad=0.05)
+        im = ax.imshow(
+            x, origin="lower", aspect="auto", vmin=max(x.min(), v_min),
+            vmax=min(x.max(), v_max)
+        )
+        fig.colorbar(im, cax=cax, orientation='vertical')
+
+    if isinstance(data_1d, torch.Tensor):
+        data_1d = data_1d.detach().cpu().numpy()
+    axes[-1].plot(data_1d)
+    axes[-1].set_title(title_1d)
+    plt.tight_layout()
+
+    if ret_np:
+        fig.canvas.draw()
+        data = save_figure_to_numpy(fig)
+        plt.close(fig)
+        return data
+
+
+def antidiag_indices(offset, min_i=0, max_i=None, min_j=0, max_j=None):
+    """
+    for a (3, 4) matrix with min_i=1, max_i=3, min_j=1, max_j=4, outputs
+
+    offset=2 (1, 1),
+    offset=3 (2, 1), (1, 2)
+    offset=4 (2, 2), (1, 3)
+    offset=5 (2, 3)
+
+    constraints:
+        i + j = offset
+        min_j <= j < max_j
+        min_i <= offset - j < max_i
+    """
+    if max_i is None:
+        max_i = offset + 1
+    if max_j is None:
+        max_j = offset + 1
+    min_j = max(min_j, offset - max_i + 1, 0)
+    max_j = min(max_j, offset - min_i + 1, offset + 1)
+    j = torch.arange(min_j, max_j)
+    i = offset - j
+    return torch.stack([i, j])
+
+
+def batch_dynamic_time_warping(distance, shapes=None):
+    """full batched DTW without any constraints
+
+    distance:  (batchsize, max_M, max_N) matrix
+    shapes: (batchsize,) vector specifying (M, N) for each entry
+    """
+    # ptr: 0=left, 1=up-left, 2=up
+    ptr2dij = {0: (0, -1), 1: (-1, -1), 2: (-1, 0)}
+
+    bsz, m, n = distance.size()
+    cumdist = torch.zeros_like(distance)
+    backptr = torch.zeros_like(distance).type(torch.int32) - 1
+
+    # initialize
+    cumdist[:, 0, :] = distance[:, 0, :].cumsum(dim=-1)
+    cumdist[:, :, 0] = distance[:, :, 0].cumsum(dim=-1)
+    backptr[:, 0, :] = 0
+    backptr[:, :, 0] = 2
+
+    # DP with optimized anti-diagonal parallelization, O(M+N) steps
+    for offset in range(2, m + n - 1):
+        ind = antidiag_indices(offset, 1, m, 1, n)
+        c = torch.stack(
+            [cumdist[:, ind[0], ind[1] - 1], cumdist[:, ind[0] - 1, ind[1] - 1],
+             cumdist[:, ind[0] - 1, ind[1]], ],
+            dim=2
+        )
+        v, b = c.min(axis=-1)
+        backptr[:, ind[0], ind[1]] = b.int()
+        cumdist[:, ind[0], ind[1]] = v + distance[:, ind[0], ind[1]]
+
+    # backtrace
+    pathmap = torch.zeros_like(backptr)
+    for b in range(bsz):
+        i = m - 1 if shapes is None else (shapes[b][0] - 1).item()
+        j = n - 1 if shapes is None else (shapes[b][1] - 1).item()
+        dtwpath = [(i, j)]
+        while (i != 0 or j != 0) and len(dtwpath) < 10000:
+            assert (i >= 0 and j >= 0)
+            di, dj = ptr2dij[backptr[b, i, j].item()]
+            i, j = i + di, j + dj
+            dtwpath.append((i, j))
+        dtwpath = dtwpath[::-1]
+        indices = torch.from_numpy(np.array(dtwpath))
+        pathmap[b, indices[:, 0], indices[:, 1]] = 1
+
+    return cumdist, backptr, pathmap
+
+
+def compute_l2_dist(x1, x2):
+    """compute an (m, n) L2 distance matrix from (m, d) and (n, d) matrices"""
+    return torch.cdist(x1.unsqueeze(0), x2.unsqueeze(0), p=2).squeeze(0).pow(2)
+
+
+def compute_rms_dist(x1, x2):
+    l2_dist = compute_l2_dist(x1, x2)
+    return (l2_dist / x1.size(1)).pow(0.5)
+
+
+def get_divisor(pathmap, normalize_type):
+    if normalize_type is None:
+        return 1
+    elif normalize_type == "len1":
+        return pathmap.size(0)
+    elif normalize_type == "len2":
+        return pathmap.size(1)
+    elif normalize_type == "path":
+        return pathmap.sum().item()
+    else:
+        raise ValueError(f"normalize_type {normalize_type} not supported")
+
+
+def batch_compute_distortion(y1, y2, sr, feat_fn, dist_fn, normalize_type):
+    d, s, x1, x2 = [], [], [], []
+    for cur_y1, cur_y2 in zip(y1, y2):
+        assert (cur_y1.ndim == 1 and cur_y2.ndim == 1)
+        cur_x1 = feat_fn(cur_y1)
+        cur_x2 = feat_fn(cur_y2)
+        x1.append(cur_x1)
+        x2.append(cur_x2)
+
+        cur_d = dist_fn(cur_x1, cur_x2)
+        d.append(cur_d)
+        s.append(d[-1].size())
+    max_m = max(ss[0] for ss in s)
+    max_n = max(ss[1] for ss in s)
+    d = torch.stack(
+        [F.pad(dd, (0, max_n - dd.size(1), 0, max_m - dd.size(0))) for dd in d]
+    )
+    s = torch.LongTensor(s).to(d.device)
+    cumdists, backptrs, pathmaps = batch_dynamic_time_warping(d, s)
+
+    rets = []
+    itr = zip(s, x1, x2, d, cumdists, backptrs, pathmaps)
+    for (m, n), cur_x1, cur_x2, dist, cumdist, backptr, pathmap in itr:
+        cumdist = cumdist[:m, :n]
+        backptr = backptr[:m, :n]
+        pathmap = pathmap[:m, :n]
+        divisor = get_divisor(pathmap, normalize_type)
+
+        distortion = cumdist[-1, -1] / divisor
+        ret = distortion, (cur_x1, cur_x2, dist, cumdist, backptr, pathmap)
+        rets.append(ret)
+    return rets
+
+
+def batch_mel_cepstral_distortion(
+        y1, y2, sr, normalize_type="path", mfcc_fn=None
+):
+    """
+    https://arxiv.org/pdf/2011.03568.pdf
+
+    The root mean squared error computed on 13-dimensional MFCC using DTW for
+    alignment. MFCC features are computed from an 80-channel log-mel
+    spectrogram using a 50ms Hann window and hop of 12.5ms.
+
+    y1: list of waveforms
+    y2: list of waveforms
+    sr: sampling rate
+    """
+
+    try:
+        import torchaudio
+    except ImportError:
+        raise ImportError("Please install torchaudio: pip install torchaudio")
+
+    if mfcc_fn is None or mfcc_fn.sample_rate != sr:
+        melkwargs = {
+            "n_fft": int(0.05 * sr), "win_length": int(0.05 * sr),
+            "hop_length": int(0.0125 * sr), "f_min": 20,
+            "n_mels": 80, "window_fn": torch.hann_window
+        }
+        mfcc_fn = torchaudio.transforms.MFCC(
+            sr, n_mfcc=13, log_mels=True, melkwargs=melkwargs
+        ).to(y1[0].device)
+    return batch_compute_distortion(
+        y1, y2, sr, lambda y: mfcc_fn(y).transpose(-1, -2), compute_rms_dist,
+        normalize_type
+    )
diff --git a/fairseq/fairseq/tasks/translation.py b/fairseq/fairseq/tasks/translation.py
new file mode 100644
index 0000000000000000000000000000000000000000..86473608677c62b063cd9889ed29d59002523be7
--- /dev/null
+++ b/fairseq/fairseq/tasks/translation.py
@@ -0,0 +1,493 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+import itertools
+import json
+import logging
+import os
+from typing import Optional
+from argparse import Namespace
+from omegaconf import II
+
+import numpy as np
+from fairseq import metrics, utils
+from fairseq.data import (
+    AppendTokenDataset,
+    ConcatDataset,
+    LanguagePairDataset,
+    PrependTokenDataset,
+    StripTokenDataset,
+    TruncateDataset,
+    data_utils,
+    encoders,
+    indexed_dataset,
+)
+from fairseq.data.indexed_dataset import get_available_dataset_impl
+from fairseq.dataclass import ChoiceEnum, FairseqDataclass
+from fairseq.tasks import FairseqTask, register_task
+
+
+EVAL_BLEU_ORDER = 4
+
+
+logger = logging.getLogger(__name__)
+
+
+def load_langpair_dataset(
+    data_path,
+    split,
+    src,
+    src_dict,
+    tgt,
+    tgt_dict,
+    combine,
+    dataset_impl,
+    upsample_primary,
+    left_pad_source,
+    left_pad_target,
+    max_source_positions,
+    max_target_positions,
+    prepend_bos=False,
+    load_alignments=False,
+    truncate_source=False,
+    append_source_id=False,
+    num_buckets=0,
+    shuffle=True,
+    pad_to_multiple=1,
+    prepend_bos_src=None,
+):
+    def split_exists(split, src, tgt, lang, data_path):
+        filename = os.path.join(data_path, "{}.{}-{}.{}".format(split, src, tgt, lang))
+        return indexed_dataset.dataset_exists(filename, impl=dataset_impl)
+
+    src_datasets = []
+    tgt_datasets = []
+
+    for k in itertools.count():
+        split_k = split + (str(k) if k > 0 else "")
+
+        # infer langcode
+        if split_exists(split_k, src, tgt, src, data_path):
+            prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, src, tgt))
+        elif split_exists(split_k, tgt, src, src, data_path):
+            prefix = os.path.join(data_path, "{}.{}-{}.".format(split_k, tgt, src))
+        else:
+            if k > 0:
+                break
+            else:
+                raise FileNotFoundError(
+                    "Dataset not found: {} ({})".format(split, data_path)
+                )
+
+        src_dataset = data_utils.load_indexed_dataset(
+            prefix + src, src_dict, dataset_impl
+        )
+        if truncate_source:
+            src_dataset = AppendTokenDataset(
+                TruncateDataset(
+                    StripTokenDataset(src_dataset, src_dict.eos()),
+                    max_source_positions - 1,
+                ),
+                src_dict.eos(),
+            )
+        src_datasets.append(src_dataset)
+
+        tgt_dataset = data_utils.load_indexed_dataset(
+            prefix + tgt, tgt_dict, dataset_impl
+        )
+        if tgt_dataset is not None:
+            tgt_datasets.append(tgt_dataset)
+
+        logger.info(
+            "{} {} {}-{} {} examples".format(
+                data_path, split_k, src, tgt, len(src_datasets[-1])
+            )
+        )
+
+        if not combine:
+            break
+
+    assert len(src_datasets) == len(tgt_datasets) or len(tgt_datasets) == 0
+
+    if len(src_datasets) == 1:
+        src_dataset = src_datasets[0]
+        tgt_dataset = tgt_datasets[0] if len(tgt_datasets) > 0 else None
+    else:
+        sample_ratios = [1] * len(src_datasets)
+        sample_ratios[0] = upsample_primary
+        src_dataset = ConcatDataset(src_datasets, sample_ratios)
+        if len(tgt_datasets) > 0:
+            tgt_dataset = ConcatDataset(tgt_datasets, sample_ratios)
+        else:
+            tgt_dataset = None
+
+    if prepend_bos:
+        assert hasattr(src_dict, "bos_index") and hasattr(tgt_dict, "bos_index")
+        src_dataset = PrependTokenDataset(src_dataset, src_dict.bos())
+        if tgt_dataset is not None:
+            tgt_dataset = PrependTokenDataset(tgt_dataset, tgt_dict.bos())
+    elif prepend_bos_src is not None:
+        logger.info(f"prepending src bos: {prepend_bos_src}")
+        src_dataset = PrependTokenDataset(src_dataset, prepend_bos_src)
+
+    eos = None
+    if append_source_id:
+        src_dataset = AppendTokenDataset(
+            src_dataset, src_dict.index("[{}]".format(src))
+        )
+        if tgt_dataset is not None:
+            tgt_dataset = AppendTokenDataset(
+                tgt_dataset, tgt_dict.index("[{}]".format(tgt))
+            )
+        eos = tgt_dict.index("[{}]".format(tgt))
+
+    align_dataset = None
+    if load_alignments:
+        align_path = os.path.join(data_path, "{}.align.{}-{}".format(split, src, tgt))
+        if indexed_dataset.dataset_exists(align_path, impl=dataset_impl):
+            align_dataset = data_utils.load_indexed_dataset(
+                align_path, None, dataset_impl
+            )
+
+    tgt_dataset_sizes = tgt_dataset.sizes if tgt_dataset is not None else None
+    return LanguagePairDataset(
+        src_dataset,
+        src_dataset.sizes,
+        src_dict,
+        tgt_dataset,
+        tgt_dataset_sizes,
+        tgt_dict,
+        left_pad_source=left_pad_source,
+        left_pad_target=left_pad_target,
+        align_dataset=align_dataset,
+        eos=eos,
+        num_buckets=num_buckets,
+        shuffle=shuffle,
+        pad_to_multiple=pad_to_multiple,
+    )
+
+
+@dataclass
+class TranslationConfig(FairseqDataclass):
+    data: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "colon separated path to data directories list, will be iterated upon during epochs "
+            "in round-robin manner; however, valid and test data are always in the first directory "
+            "to avoid the need for repeating them in all directories"
+        },
+    )
+    source_lang: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "source language",
+            "argparse_alias": "-s",
+        },
+    )
+    target_lang: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "target language",
+            "argparse_alias": "-t",
+        },
+    )
+    load_alignments: bool = field(
+        default=False, metadata={"help": "load the binarized alignments"}
+    )
+    left_pad_source: bool = field(
+        default=True, metadata={"help": "pad the source on the left"}
+    )
+    left_pad_target: bool = field(
+        default=False, metadata={"help": "pad the target on the left"}
+    )
+    max_source_positions: int = field(
+        default=1024, metadata={"help": "max number of tokens in the source sequence"}
+    )
+    max_target_positions: int = field(
+        default=1024, metadata={"help": "max number of tokens in the target sequence"}
+    )
+    upsample_primary: int = field(
+        default=-1, metadata={"help": "the amount of upsample primary dataset"}
+    )
+    truncate_source: bool = field(
+        default=False, metadata={"help": "truncate source to max-source-positions"}
+    )
+    num_batch_buckets: int = field(
+        default=0,
+        metadata={
+            "help": "if >0, then bucket source and target lengths into "
+            "N buckets and pad accordingly; this is useful on TPUs to minimize the number of compilations"
+        },
+    )
+    train_subset: str = II("dataset.train_subset")
+    dataset_impl: Optional[ChoiceEnum(get_available_dataset_impl())] = II(
+        "dataset.dataset_impl"
+    )
+    required_seq_len_multiple: int = II("dataset.required_seq_len_multiple")
+
+    # options for reporting BLEU during validation
+    eval_bleu: bool = field(
+        default=False, metadata={"help": "evaluation with BLEU scores"}
+    )
+    eval_bleu_args: Optional[str] = field(
+        default="{}",
+        metadata={
+            "help": 'generation args for BLUE scoring, e.g., \'{"beam": 4, "lenpen": 0.6}\', as JSON string'
+        },
+    )
+    eval_bleu_detok: str = field(
+        default="space",
+        metadata={
+            "help": "detokenize before computing BLEU (e.g., 'moses'); required if using --eval-bleu; "
+            "use 'space' to disable detokenization; see fairseq.data.encoders for other options"
+        },
+    )
+    eval_bleu_detok_args: Optional[str] = field(
+        default="{}",
+        metadata={"help": "args for building the tokenizer, if needed, as JSON string"},
+    )
+    eval_tokenized_bleu: bool = field(
+        default=False, metadata={"help": "compute tokenized BLEU instead of sacrebleu"}
+    )
+    eval_bleu_remove_bpe: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "remove BPE before computing BLEU",
+            "argparse_const": "@@ ",
+        },
+    )
+    eval_bleu_print_samples: bool = field(
+        default=False, metadata={"help": "print sample generations during validation"}
+    )
+
+
+@register_task("translation", dataclass=TranslationConfig)
+class TranslationTask(FairseqTask):
+    """
+    Translate from one (source) language to another (target) language.
+
+    Args:
+        src_dict (~fairseq.data.Dictionary): dictionary for the source language
+        tgt_dict (~fairseq.data.Dictionary): dictionary for the target language
+
+    .. note::
+
+        The translation task is compatible with :mod:`fairseq-train`,
+        :mod:`fairseq-generate` and :mod:`fairseq-interactive`.
+    """
+
+    cfg: TranslationConfig
+
+    def __init__(self, cfg: TranslationConfig, src_dict, tgt_dict):
+        super().__init__(cfg)
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+
+    @classmethod
+    def setup_task(cls, cfg: TranslationConfig, **kwargs):
+        """Setup the task (e.g., load dictionaries).
+
+        Args:
+            args (argparse.Namespace): parsed command-line arguments
+        """
+
+        paths = utils.split_paths(cfg.data)
+        assert len(paths) > 0
+        # find language pair automatically
+        if cfg.source_lang is None or cfg.target_lang is None:
+            cfg.source_lang, cfg.target_lang = data_utils.infer_language_pair(paths[0])
+        if cfg.source_lang is None or cfg.target_lang is None:
+            raise Exception(
+                "Could not infer language pair, please provide it explicitly"
+            )
+
+        # load dictionaries
+        src_dict = cls.load_dictionary(
+            os.path.join(paths[0], "dict.{}.txt".format(cfg.source_lang))
+        )
+        tgt_dict = cls.load_dictionary(
+            os.path.join(paths[0], "dict.{}.txt".format(cfg.target_lang))
+        )
+        assert src_dict.pad() == tgt_dict.pad()
+        assert src_dict.eos() == tgt_dict.eos()
+        assert src_dict.unk() == tgt_dict.unk()
+        logger.info("[{}] dictionary: {} types".format(cfg.source_lang, len(src_dict)))
+        logger.info("[{}] dictionary: {} types".format(cfg.target_lang, len(tgt_dict)))
+
+        return cls(cfg, src_dict, tgt_dict)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = utils.split_paths(self.cfg.data)
+        assert len(paths) > 0
+        if split != self.cfg.train_subset:
+            # if not training data set, use the first shard for valid and test
+            paths = paths[:1]
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        # infer langcode
+        src, tgt = self.cfg.source_lang, self.cfg.target_lang
+
+        self.datasets[split] = load_langpair_dataset(
+            data_path,
+            split,
+            src,
+            self.src_dict,
+            tgt,
+            self.tgt_dict,
+            combine=combine,
+            dataset_impl=self.cfg.dataset_impl,
+            upsample_primary=self.cfg.upsample_primary,
+            left_pad_source=self.cfg.left_pad_source,
+            left_pad_target=self.cfg.left_pad_target,
+            max_source_positions=self.cfg.max_source_positions,
+            max_target_positions=self.cfg.max_target_positions,
+            load_alignments=self.cfg.load_alignments,
+            truncate_source=self.cfg.truncate_source,
+            num_buckets=self.cfg.num_batch_buckets,
+            shuffle=(split != "test"),
+            pad_to_multiple=self.cfg.required_seq_len_multiple,
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None):
+        return LanguagePairDataset(
+            src_tokens,
+            src_lengths,
+            self.source_dictionary,
+            tgt_dict=self.target_dictionary,
+            constraints=constraints,
+        )
+
+    def build_model(self, cfg):
+        model = super().build_model(cfg)
+        if self.cfg.eval_bleu:
+            detok_args = json.loads(self.cfg.eval_bleu_detok_args)
+            self.tokenizer = encoders.build_tokenizer(
+                Namespace(tokenizer=self.cfg.eval_bleu_detok, **detok_args)
+            )
+
+            gen_args = json.loads(self.cfg.eval_bleu_args)
+            self.sequence_generator = self.build_generator(
+                [model], Namespace(**gen_args)
+            )
+        return model
+
+    def valid_step(self, sample, model, criterion):
+        loss, sample_size, logging_output = super().valid_step(sample, model, criterion)
+        if self.cfg.eval_bleu:
+            bleu = self._inference_with_bleu(self.sequence_generator, sample, model)
+            logging_output["_bleu_sys_len"] = bleu.sys_len
+            logging_output["_bleu_ref_len"] = bleu.ref_len
+            # we split counts into separate entries so that they can be
+            # summed efficiently across workers using fast-stat-sync
+            assert len(bleu.counts) == EVAL_BLEU_ORDER
+            for i in range(EVAL_BLEU_ORDER):
+                logging_output["_bleu_counts_" + str(i)] = bleu.counts[i]
+                logging_output["_bleu_totals_" + str(i)] = bleu.totals[i]
+        return loss, sample_size, logging_output
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+        if self.cfg.eval_bleu:
+
+            def sum_logs(key):
+                import torch
+                result = sum(log.get(key, 0) for log in logging_outputs)
+                if torch.is_tensor(result):
+                    result = result.cpu()
+                return result
+
+            counts, totals = [], []
+            for i in range(EVAL_BLEU_ORDER):
+                counts.append(sum_logs("_bleu_counts_" + str(i)))
+                totals.append(sum_logs("_bleu_totals_" + str(i)))
+
+            if max(totals) > 0:
+                # log counts as numpy arrays -- log_scalar will sum them correctly
+                metrics.log_scalar("_bleu_counts", np.array(counts))
+                metrics.log_scalar("_bleu_totals", np.array(totals))
+                metrics.log_scalar("_bleu_sys_len", sum_logs("_bleu_sys_len"))
+                metrics.log_scalar("_bleu_ref_len", sum_logs("_bleu_ref_len"))
+
+                def compute_bleu(meters):
+                    import inspect
+                    try:
+                        from sacrebleu.metrics import BLEU
+                        comp_bleu = BLEU.compute_bleu
+                    except ImportError:
+                        # compatibility API for sacrebleu 1.x
+                        import sacrebleu
+                        comp_bleu = sacrebleu.compute_bleu
+
+                    fn_sig = inspect.getfullargspec(comp_bleu)[0]
+                    if "smooth_method" in fn_sig:
+                        smooth = {"smooth_method": "exp"}
+                    else:
+                        smooth = {"smooth": "exp"}
+                    bleu = comp_bleu(
+                        correct=meters["_bleu_counts"].sum,
+                        total=meters["_bleu_totals"].sum,
+                        sys_len=meters["_bleu_sys_len"].sum,
+                        ref_len=meters["_bleu_ref_len"].sum,
+                        **smooth
+                    )
+                    return round(bleu.score, 2)
+
+                metrics.log_derived("bleu", compute_bleu)
+
+    def max_positions(self):
+        """Return the max sentence length allowed by the task."""
+        return (self.cfg.max_source_positions, self.cfg.max_target_positions)
+
+    @property
+    def source_dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary`."""
+        return self.src_dict
+
+    @property
+    def target_dictionary(self):
+        """Return the target :class:`~fairseq.data.Dictionary`."""
+        return self.tgt_dict
+
+    def _inference_with_bleu(self, generator, sample, model):
+        import sacrebleu
+
+        def decode(toks, escape_unk=False):
+            s = self.tgt_dict.string(
+                toks.int().cpu(),
+                self.cfg.eval_bleu_remove_bpe,
+                # The default unknown string in fairseq is `<unk>`, but
+                # this is tokenized by sacrebleu as `< unk >`, inflating
+                # BLEU scores. Instead, we use a somewhat more verbose
+                # alternative that is unlikely to appear in the real
+                # reference, but doesn't get split into multiple tokens.
+                unk_string=("UNKNOWNTOKENINREF" if escape_unk else "UNKNOWNTOKENINHYP"),
+            )
+            if self.tokenizer:
+                s = self.tokenizer.decode(s)
+            return s
+
+        gen_out = self.inference_step(generator, [model], sample, prefix_tokens=None)
+        hyps, refs = [], []
+        for i in range(len(gen_out)):
+            hyps.append(decode(gen_out[i][0]["tokens"]))
+            refs.append(
+                decode(
+                    utils.strip_pad(sample["target"][i], self.tgt_dict.pad()),
+                    escape_unk=True,  # don't count <unk> as matches to the hypo
+                )
+            )
+        if self.cfg.eval_bleu_print_samples:
+            logger.info("example hypothesis: " + hyps[0])
+            logger.info("example reference: " + refs[0])
+        if self.cfg.eval_tokenized_bleu:
+            return sacrebleu.corpus_bleu(hyps, [refs], tokenize="none")
+        else:
+            return sacrebleu.corpus_bleu(hyps, [refs])
diff --git a/fairseq/fairseq/tasks/translation_from_pretrained_bart.py b/fairseq/fairseq/tasks/translation_from_pretrained_bart.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fd7a5b29f0e34699b5d5ef7574bc39b8c6052c9
--- /dev/null
+++ b/fairseq/fairseq/tasks/translation_from_pretrained_bart.py
@@ -0,0 +1,132 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from fairseq import utils
+from fairseq.data import LanguagePairDataset
+
+from . import register_task
+from .translation import TranslationTask, load_langpair_dataset
+
+
+@register_task("translation_from_pretrained_bart")
+class TranslationFromPretrainedBARTTask(TranslationTask):
+    """
+    Translate from source language to target language with a model initialized with a multilingual pretrain.
+
+    Args:
+        src_dict (~fairseq.data.Dictionary): dictionary for the source language
+        tgt_dict (~fairseq.data.Dictionary): dictionary for the target language
+
+    .. note::
+
+        The translation task is compatible with :mod:`fairseq-train`,
+        :mod:`fairseq-generate` and :mod:`fairseq-interactive`.
+
+    The translation task provides the following additional command-line
+    arguments:
+
+    .. argparse::
+        :ref: fairseq.tasks.translation_parser
+        :prog:
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        # fmt: off
+        TranslationTask.add_args(parser)
+        parser.add_argument('--langs',  type=str, metavar='LANG',
+                            help='comma-separated list of monolingual language, '
+                                 'for example, "en,de,fr". These should match the '
+                                 'langs from pretraining (and be in the same order). '
+                                 'You should always add all pretraining language idx '
+                                 'during finetuning.')
+        parser.add_argument('--prepend-bos', action='store_true',
+                            help='prepend bos token to each sentence, which matches '
+                                 'mBART pretraining')
+        # fmt: on
+
+    def __init__(self, args, src_dict, tgt_dict):
+        super().__init__(args, src_dict, tgt_dict)
+        self.langs = args.langs.split(",")
+        for d in [src_dict, tgt_dict]:
+            for l in self.langs:
+                d.add_symbol("[{}]".format(l))
+            d.add_symbol("<mask>")
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = utils.split_paths(self.args.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        # infer langcode
+        src, tgt = self.args.source_lang, self.args.target_lang
+
+        self.datasets[split] = load_langpair_dataset(
+            data_path,
+            split,
+            src,
+            self.src_dict,
+            tgt,
+            self.tgt_dict,
+            combine=combine,
+            dataset_impl=self.args.dataset_impl,
+            upsample_primary=self.args.upsample_primary,
+            left_pad_source=self.args.left_pad_source,
+            left_pad_target=self.args.left_pad_target,
+            max_source_positions=getattr(self.args, "max_source_positions", 1024),
+            max_target_positions=getattr(self.args, "max_target_positions", 1024),
+            load_alignments=self.args.load_alignments,
+            prepend_bos=getattr(self.args, "prepend_bos", False),
+            append_source_id=True,
+        )
+
+    def build_generator(self, models, args, **unused):
+        if getattr(args, "score_reference", False):
+            from fairseq.sequence_scorer import SequenceScorer
+
+            return SequenceScorer(
+                self.target_dictionary,
+                eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)),
+            )
+        else:
+            from fairseq.sequence_generator import SequenceGenerator
+
+            return SequenceGenerator(
+                models,
+                self.target_dictionary,
+                beam_size=getattr(args, "beam", 5),
+                max_len_a=getattr(args, "max_len_a", 0),
+                max_len_b=getattr(args, "max_len_b", 200),
+                min_len=getattr(args, "min_len", 1),
+                normalize_scores=(not getattr(args, "unnormalized", False)),
+                len_penalty=getattr(args, "lenpen", 1),
+                unk_penalty=getattr(args, "unkpen", 0),
+                temperature=getattr(args, "temperature", 1.0),
+                match_source_len=getattr(args, "match_source_len", False),
+                no_repeat_ngram_size=getattr(args, "no_repeat_ngram_size", 0),
+                eos=self.tgt_dict.index("[{}]".format(self.args.target_lang)),
+            )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None):
+        src_lang_id = self.source_dictionary.index("[{}]".format(self.args.source_lang))
+        source_tokens = []
+        for s_t in src_tokens:
+            s_t = torch.cat([s_t, s_t.new(1).fill_(src_lang_id)])
+            source_tokens.append(s_t)
+        dataset = LanguagePairDataset(
+            source_tokens,
+            src_lengths,
+            self.source_dictionary,
+            tgt_dict=self.target_dictionary,
+            constraints=constraints,
+        )
+        return dataset
diff --git a/fairseq/fairseq/tasks/translation_from_pretrained_xlm.py b/fairseq/fairseq/tasks/translation_from_pretrained_xlm.py
new file mode 100644
index 0000000000000000000000000000000000000000..a05f2891524a8b23482e206c1742c3b816b77afb
--- /dev/null
+++ b/fairseq/fairseq/tasks/translation_from_pretrained_xlm.py
@@ -0,0 +1,39 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from fairseq.data.legacy.masked_lm_dictionary import MaskedLMDictionary
+from fairseq.tasks.translation import TranslationConfig, TranslationTask
+
+from . import register_task
+
+
+@dataclass
+class TranslationFromPretrainedXLMConfig(TranslationConfig):
+    pass
+
+
+@register_task(
+    "translation_from_pretrained_xlm", dataclass=TranslationFromPretrainedXLMConfig
+)
+class TranslationFromPretrainedXLMTask(TranslationTask):
+    """
+    Same as TranslationTask except use the MaskedLMDictionary class so that
+    we can load data that was binarized with the MaskedLMDictionary class.
+
+    This task should be used for the entire training pipeline when we want to
+    train an NMT model from a pretrained XLM checkpoint: binarizing NMT data,
+    training NMT with the pretrained XLM checkpoint, and subsequent evaluation
+    of that trained model.
+    """
+
+    @classmethod
+    def load_dictionary(cls, filename):
+        """Load the masked LM dictionary from the filename
+
+        Args:
+            filename (str): the filename
+        """
+        return MaskedLMDictionary.load(filename)
diff --git a/fairseq/fairseq/tasks/translation_lev.py b/fairseq/fairseq/tasks/translation_lev.py
new file mode 100644
index 0000000000000000000000000000000000000000..041279305dc4978f6a3a4178c5ec4c72c5fb2b5c
--- /dev/null
+++ b/fairseq/fairseq/tasks/translation_lev.py
@@ -0,0 +1,191 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+import torch
+from fairseq import utils
+from fairseq.data import LanguagePairDataset
+from fairseq.dataclass import ChoiceEnum
+from fairseq.tasks import register_task
+from fairseq.tasks.translation import TranslationConfig, TranslationTask, load_langpair_dataset
+from fairseq.utils import new_arange
+
+
+NOISE_CHOICES = ChoiceEnum(["random_delete", "random_mask", "no_noise", "full_mask"])
+
+@dataclass
+class TranslationLevenshteinConfig(TranslationConfig):
+    noise: NOISE_CHOICES = field(
+        default="random_delete",
+        metadata={
+            "help": "type of noise"
+        },
+    )
+
+@register_task("translation_lev", dataclass=TranslationLevenshteinConfig)
+class TranslationLevenshteinTask(TranslationTask):
+    """
+    Translation (Sequence Generation) task for Levenshtein Transformer
+    See `"Levenshtein Transformer" <https://arxiv.org/abs/1905.11006>`_.
+    """
+
+    cfg: TranslationLevenshteinConfig
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        paths = utils.split_paths(self.cfg.data)
+        assert len(paths) > 0
+        data_path = paths[(epoch - 1) % len(paths)]
+
+        # infer langcode
+        src, tgt = self.cfg.source_lang, self.cfg.target_lang
+
+        self.datasets[split] = load_langpair_dataset(
+            data_path,
+            split,
+            src,
+            self.src_dict,
+            tgt,
+            self.tgt_dict,
+            combine=combine,
+            dataset_impl=self.cfg.dataset_impl,
+            upsample_primary=self.cfg.upsample_primary,
+            left_pad_source=self.cfg.left_pad_source,
+            left_pad_target=self.cfg.left_pad_target,
+            max_source_positions=self.cfg.max_source_positions,
+            max_target_positions=self.cfg.max_target_positions,
+            prepend_bos=True,
+        )
+
+    def inject_noise(self, target_tokens):
+        def _random_delete(target_tokens):
+            pad = self.tgt_dict.pad()
+            bos = self.tgt_dict.bos()
+            eos = self.tgt_dict.eos()
+
+            max_len = target_tokens.size(1)
+            target_mask = target_tokens.eq(pad)
+            target_score = target_tokens.clone().float().uniform_()
+            target_score.masked_fill_(
+                target_tokens.eq(bos) | target_tokens.eq(eos), 0.0
+            )
+            target_score.masked_fill_(target_mask, 1)
+            target_score, target_rank = target_score.sort(1)
+            target_length = target_mask.size(1) - target_mask.float().sum(
+                1, keepdim=True
+            )
+
+            # do not delete <bos> and <eos> (we assign 0 score for them)
+            target_cutoff = (
+                2
+                + (
+                    (target_length - 2)
+                    * target_score.new_zeros(target_score.size(0), 1).uniform_()
+                ).long()
+            )
+            target_cutoff = target_score.sort(1)[1] >= target_cutoff
+
+            prev_target_tokens = (
+                target_tokens.gather(1, target_rank)
+                .masked_fill_(target_cutoff, pad)
+                .gather(1, target_rank.masked_fill_(target_cutoff, max_len).sort(1)[1])
+            )
+            prev_target_tokens = prev_target_tokens[
+                :, : prev_target_tokens.ne(pad).sum(1).max()
+            ]
+
+            return prev_target_tokens
+
+        def _random_mask(target_tokens):
+            pad = self.tgt_dict.pad()
+            bos = self.tgt_dict.bos()
+            eos = self.tgt_dict.eos()
+            unk = self.tgt_dict.unk()
+
+            target_masks = (
+                target_tokens.ne(pad) & target_tokens.ne(bos) & target_tokens.ne(eos)
+            )
+            target_score = target_tokens.clone().float().uniform_()
+            target_score.masked_fill_(~target_masks, 2.0)
+            target_length = target_masks.sum(1).float()
+            target_length = target_length * target_length.clone().uniform_()
+            target_length = target_length + 1  # make sure to mask at least one token.
+
+            _, target_rank = target_score.sort(1)
+            target_cutoff = new_arange(target_rank) < target_length[:, None].long()
+            prev_target_tokens = target_tokens.masked_fill(
+                target_cutoff.scatter(1, target_rank, target_cutoff), unk
+            )
+            return prev_target_tokens
+
+        def _full_mask(target_tokens):
+            pad = self.tgt_dict.pad()
+            bos = self.tgt_dict.bos()
+            eos = self.tgt_dict.eos()
+            unk = self.tgt_dict.unk()
+
+            target_mask = (
+                target_tokens.eq(bos) | target_tokens.eq(eos) | target_tokens.eq(pad)
+            )
+            return target_tokens.masked_fill(~target_mask, unk)
+
+        if self.cfg.noise == "random_delete":
+            return _random_delete(target_tokens)
+        elif self.cfg.noise == "random_mask":
+            return _random_mask(target_tokens)
+        elif self.cfg.noise == "full_mask":
+            return _full_mask(target_tokens)
+        elif self.cfg.noise == "no_noise":
+            return target_tokens
+        else:
+            raise NotImplementedError
+
+    def build_generator(self, models, args, **unused):
+        # add models input to match the API for SequenceGenerator
+        from fairseq.iterative_refinement_generator import IterativeRefinementGenerator
+
+        return IterativeRefinementGenerator(
+            self.target_dictionary,
+            eos_penalty=getattr(args, "iter_decode_eos_penalty", 0.0),
+            max_iter=getattr(args, "iter_decode_max_iter", 10),
+            beam_size=getattr(args, "iter_decode_with_beam", 1),
+            reranking=getattr(args, "iter_decode_with_external_reranker", False),
+            decoding_format=getattr(args, "decoding_format", None),
+            adaptive=not getattr(args, "iter_decode_force_max_iter", False),
+            retain_history=getattr(args, "retain_iter_history", False),
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None):
+        if constraints is not None:
+            # Though see Susanto et al. (ACL 2020): https://www.aclweb.org/anthology/2020.acl-main.325/
+            raise NotImplementedError(
+                "Constrained decoding with the translation_lev task is not supported"
+            )
+
+        return LanguagePairDataset(
+            src_tokens, src_lengths, self.source_dictionary, append_bos=True
+        )
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
+    ):
+        model.train()
+        sample["prev_target"] = self.inject_noise(sample["target"])
+        loss, sample_size, logging_output = criterion(model, sample)
+        if ignore_grad:
+            loss *= 0
+        optimizer.backward(loss)
+        return loss, sample_size, logging_output
+
+    def valid_step(self, sample, model, criterion):
+        model.eval()
+        with torch.no_grad():
+            sample["prev_target"] = self.inject_noise(sample["target"])
+            loss, sample_size, logging_output = criterion(model, sample)
+        return loss, sample_size, logging_output
diff --git a/fairseq/fairseq/tasks/translation_multi_simple_epoch.py b/fairseq/fairseq/tasks/translation_multi_simple_epoch.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f36e5b93e98497de31969d203ae04dbb4bd9306
--- /dev/null
+++ b/fairseq/fairseq/tasks/translation_multi_simple_epoch.py
@@ -0,0 +1,430 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import datetime
+import logging
+import time
+
+import torch
+from fairseq.data import (
+    FairseqDataset,
+    LanguagePairDataset,
+    ListDataset,
+    data_utils,
+    iterators,
+)
+from fairseq.data.multilingual.multilingual_data_manager import (
+    MultilingualDatasetManager,
+)
+from fairseq.data.multilingual.sampling_method import SamplingMethod
+from fairseq.tasks import LegacyFairseqTask, register_task
+from fairseq.utils import FileContentsAction
+
+
+###
+def get_time_gap(s, e):
+    return (
+        datetime.datetime.fromtimestamp(e) - datetime.datetime.fromtimestamp(s)
+    ).__str__()
+
+
+###
+
+
+logger = logging.getLogger(__name__)
+
+
+@register_task("translation_multi_simple_epoch")
+class TranslationMultiSimpleEpochTask(LegacyFairseqTask):
+    """
+    Translate from one (source) language to another (target) language.
+
+    Args:
+        langs (List[str]): a list of languages that are being supported
+        dicts (Dict[str, fairseq.data.Dictionary]): mapping from supported languages to their dictionaries
+        training (bool): whether the task should be configured for training or not
+
+    .. note::
+
+        The translation task is compatible with :mod:`fairseq-train`,
+        :mod:`fairseq-generate` and :mod:`fairseq-interactive`.
+
+    The translation task provides the following additional command-line
+    arguments:
+
+    .. argparse::
+        :ref: fairseq.tasks.translation_parser
+        :prog:
+    """
+
+    @staticmethod
+    def add_args(parser):
+        """Add task-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('-s', '--source-lang', default=None, metavar='SRC',
+                            help='inference source language')
+        parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET',
+                            help='inference target language')
+        parser.add_argument('--lang-pairs', default=None, metavar='PAIRS',
+                            help='comma-separated list of language pairs (in training order): en-de,en-fr,de-fr',
+                            action=FileContentsAction)
+        parser.add_argument('--keep-inference-langtok', action='store_true',
+                            help='keep language tokens in inference output (e.g. for analysis or debugging)')
+
+        SamplingMethod.add_arguments(parser)
+        MultilingualDatasetManager.add_args(parser)
+        # fmt: on
+
+    def __init__(self, args, langs, dicts, training):
+        super().__init__(args)
+        self.langs = langs
+        self.dicts = dicts
+        self.training = training
+        if training:
+            self.lang_pairs = args.lang_pairs
+        else:
+            self.lang_pairs = ["{}-{}".format(args.source_lang, args.target_lang)]
+        # eval_lang_pairs for multilingual translation is usually all of the
+        # lang_pairs. However for other multitask settings or when we want to
+        # optimize for certain languages we want to use a different subset. Thus
+        # the eval_lang_pairs class variable is provided for classes that extend
+        # this class.
+        self.eval_lang_pairs = self.lang_pairs
+        # model_lang_pairs will be used to build encoder-decoder model pairs in
+        # models.build_model(). This allows multitask type of sub-class can
+        # build models other than the input lang_pairs
+        self.model_lang_pairs = self.lang_pairs
+        self.source_langs = [d.split("-")[0] for d in self.lang_pairs]
+        self.target_langs = [d.split("-")[1] for d in self.lang_pairs]
+        self.check_dicts(self.dicts, self.source_langs, self.target_langs)
+
+        self.sampling_method = SamplingMethod.build_sampler(args, self)
+        self.data_manager = MultilingualDatasetManager.setup_data_manager(
+            args, self.lang_pairs, langs, dicts, self.sampling_method
+        )
+
+    def check_dicts(self, dicts, source_langs, target_langs):
+        if self.args.source_dict is not None or self.args.target_dict is not None:
+            # no need to check whether the source side and target side are sharing dictionaries
+            return
+        src_dict = dicts[source_langs[0]]
+        tgt_dict = dicts[target_langs[0]]
+        for src_lang in source_langs:
+            assert (
+                src_dict == dicts[src_lang]
+            ), "Diffrent dictionary are specified for different source languages; "
+            "TranslationMultiSimpleEpochTask only supports one shared dictionary across all source languages"
+        for tgt_lang in target_langs:
+            assert (
+                tgt_dict == dicts[tgt_lang]
+            ), "Diffrent dictionary are specified for different target languages; "
+            "TranslationMultiSimpleEpochTask only supports one shared dictionary across all target languages"
+
+    @classmethod
+    def setup_task(cls, args, **kwargs):
+        langs, dicts, training = MultilingualDatasetManager.prepare(
+           cls.load_dictionary, args, **kwargs
+        )
+        return cls(args, langs, dicts, training)
+
+    def has_sharded_data(self, split):
+        return self.data_manager.has_sharded_data(split)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        """Load a given dataset split.
+
+        Args:
+            split (str): name of the split (e.g., train, valid, test)
+        """
+        if split in self.datasets:
+            dataset = self.datasets[split]
+            if self.has_sharded_data(split):
+                if self.args.virtual_epoch_size is not None:
+                    if dataset.load_next_shard:
+                        shard_epoch = dataset.shard_epoch
+                    else:
+                        # no need to load next shard so skip loading
+                        # also this avoid always loading from beginning of the data
+                        return
+                else:
+                    shard_epoch = epoch
+        else:
+            # estimate the shard epoch from virtual data size and virtual epoch size
+            shard_epoch = self.data_manager.estimate_global_pass_epoch(epoch)
+        logger.info(f"loading data for {split} epoch={epoch}/{shard_epoch}")
+        logger.info(f"mem usage: {data_utils.get_mem_usage()}")
+        if split in self.datasets:
+            del self.datasets[split]
+            logger.info("old dataset deleted manually")
+            logger.info(f"mem usage: {data_utils.get_mem_usage()}")
+        self.datasets[split] = self.data_manager.load_dataset(
+            split,
+            self.training,
+            epoch=epoch,
+            combine=combine,
+            shard_epoch=shard_epoch,
+            **kwargs,
+        )
+
+    def build_dataset_for_inference(self, src_tokens, src_lengths, constraints=None):
+        if constraints is not None:
+            raise NotImplementedError(
+                "Constrained decoding with the multilingual_translation task is not supported"
+            )
+
+        src_data = ListDataset(src_tokens, src_lengths)
+        dataset = LanguagePairDataset(src_data, src_lengths, self.source_dictionary)
+        src_langtok_spec, tgt_langtok_spec = self.args.langtoks["main"]
+        if self.args.lang_tok_replacing_bos_eos:
+            dataset = self.data_manager.alter_dataset_langtok(
+                dataset,
+                src_eos=self.source_dictionary.eos(),
+                src_lang=self.args.source_lang,
+                tgt_eos=self.target_dictionary.eos(),
+                tgt_lang=self.args.target_lang,
+                src_langtok_spec=src_langtok_spec,
+                tgt_langtok_spec=tgt_langtok_spec,
+            )
+        else:
+            dataset.src = self.data_manager.src_dataset_tranform_func(
+                self.args.source_lang,
+                self.args.target_lang,
+                dataset=dataset.src,
+                spec=src_langtok_spec,
+            )
+        return dataset
+
+    def build_generator(
+        self,
+        models,
+        args,
+        seq_gen_cls=None,
+        extra_gen_cls_kwargs=None,
+    ):
+        if not getattr(args, "keep_inference_langtok", False):
+            _, tgt_langtok_spec = self.args.langtoks["main"]
+            if tgt_langtok_spec:
+                tgt_lang_tok = self.data_manager.get_decoder_langtok(
+                    self.args.target_lang, tgt_langtok_spec
+                )
+                extra_gen_cls_kwargs = extra_gen_cls_kwargs or {}
+                extra_gen_cls_kwargs["symbols_to_strip_from_output"] = {tgt_lang_tok}
+
+        return super().build_generator(
+            models, args, seq_gen_cls=None, extra_gen_cls_kwargs=extra_gen_cls_kwargs
+        )
+
+    def build_model(self, args):
+        return super().build_model(args)
+
+    def valid_step(self, sample, model, criterion):
+        loss, sample_size, logging_output = super().valid_step(sample, model, criterion)
+        return loss, sample_size, logging_output
+
+    def inference_step(
+        self, generator, models, sample, prefix_tokens=None, constraints=None
+    ):
+        with torch.no_grad():
+            _, tgt_langtok_spec = self.args.langtoks["main"]
+            if not self.args.lang_tok_replacing_bos_eos:
+                if prefix_tokens is None and tgt_langtok_spec:
+                    tgt_lang_tok = self.data_manager.get_decoder_langtok(
+                        self.args.target_lang, tgt_langtok_spec
+                    )
+                    src_tokens = sample["net_input"]["src_tokens"]
+                    bsz = src_tokens.size(0)
+                    prefix_tokens = (
+                        torch.LongTensor([[tgt_lang_tok]]).expand(bsz, 1).to(src_tokens)
+                    )
+                return generator.generate(
+                    models,
+                    sample,
+                    prefix_tokens=prefix_tokens,
+                    constraints=constraints,
+                )
+            else:
+                return generator.generate(
+                    models,
+                    sample,
+                    prefix_tokens=prefix_tokens,
+                    bos_token=self.data_manager.get_decoder_langtok(
+                        self.args.target_lang, tgt_langtok_spec
+                    )
+                    if tgt_langtok_spec
+                    else self.target_dictionary.eos(),
+                )
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+
+    def max_positions(self):
+        """Return the max sentence length allowed by the task."""
+        return (self.args.max_source_positions, self.args.max_target_positions)
+
+    @property
+    def source_dictionary(self):
+        return self.data_manager.get_source_dictionary(self.source_langs[0])
+
+    @property
+    def target_dictionary(self):
+        return self.data_manager.get_target_dictionary(self.target_langs[0])
+
+    def create_batch_sampler_func(
+        self,
+        max_positions,
+        ignore_invalid_inputs,
+        max_tokens,
+        max_sentences,
+        required_batch_size_multiple=1,
+        seed=1,
+    ):
+        def construct_batch_sampler(dataset, epoch):
+            splits = [
+                s for s, _ in self.datasets.items() if self.datasets[s] == dataset
+            ]
+            split = splits[0] if len(splits) > 0 else None
+            # NEW implementation
+            if epoch is not None:
+                # initialize the dataset with the correct starting epoch
+                dataset.set_epoch(epoch)
+
+            # get indices ordered by example size
+            start_time = time.time()
+            logger.info(f"start batch sampler: mem usage: {data_utils.get_mem_usage()}")
+
+            with data_utils.numpy_seed(seed):
+                indices = dataset.ordered_indices()
+            logger.info(
+                f"[{split}] @batch_sampler order indices time: {get_time_gap(start_time, time.time())}"
+            )
+            logger.info(f"mem usage: {data_utils.get_mem_usage()}")
+
+            # filter examples that are too large
+            if max_positions is not None:
+                my_time = time.time()
+                indices = self.filter_indices_by_size(
+                    indices, dataset, max_positions, ignore_invalid_inputs
+                )
+                logger.info(
+                    f"[{split}] @batch_sampler filter_by_size time: {get_time_gap(my_time, time.time())}"
+                )
+                logger.info(f"mem usage: {data_utils.get_mem_usage()}")
+
+            # create mini-batches with given size constraints
+            my_time = time.time()
+            batch_sampler = dataset.batch_by_size(
+                indices,
+                max_tokens=max_tokens,
+                max_sentences=max_sentences,
+                required_batch_size_multiple=required_batch_size_multiple,
+            )
+
+            logger.info(
+                f"[{split}] @batch_sampler batch_by_size time: {get_time_gap(my_time, time.time())}"
+            )
+            logger.info(
+                f"[{split}] per epoch batch_sampler set-up time: {get_time_gap(start_time, time.time())}"
+            )
+            logger.info(f"mem usage: {data_utils.get_mem_usage()}")
+
+            return batch_sampler
+
+        return construct_batch_sampler
+
+    # we need to override get_batch_iterator because we want to reset the epoch iterator each time
+    def get_batch_iterator(
+        self,
+        dataset,
+        max_tokens=None,
+        max_sentences=None,
+        max_positions=None,
+        ignore_invalid_inputs=False,
+        required_batch_size_multiple=1,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=1,
+        data_buffer_size=0,
+        disable_iterator_cache=False,
+    ):
+        """
+        Get an iterator that yields batches of data from the given dataset.
+
+        Args:
+            dataset (~fairseq.data.FairseqDataset): dataset to batch
+            max_tokens (int, optional): max number of tokens in each batch
+                (default: None).
+            max_sentences (int, optional): max number of sentences in each
+                batch (default: None).
+            max_positions (optional): max sentence length supported by the
+                model (default: None).
+            ignore_invalid_inputs (bool, optional): don't raise Exception for
+                sentences that are too long (default: False).
+            required_batch_size_multiple (int, optional): require batch size to
+                be a multiple of N (default: 1).
+            seed (int, optional): seed for random number generator for
+                reproducibility (default: 1).
+            num_shards (int, optional): shard the data iterator into N
+                shards (default: 1).
+            shard_id (int, optional): which shard of the data iterator to
+                return (default: 0).
+            num_workers (int, optional): how many subprocesses to use for data
+                loading. 0 means the data will be loaded in the main process
+                (default: 0).
+            epoch (int, optional): the epoch to start the iterator from
+                (default: 0).
+            data_buffer_size (int, optional): number of batches to
+                preload (default: 0).
+            disable_iterator_cache (bool, optional): don't cache the
+                EpochBatchIterator (ignores `FairseqTask::can_reuse_epoch_itr`)
+                (default: False).
+        Returns:
+            ~fairseq.iterators.EpochBatchIterator: a batched iterator over the
+                given dataset split
+        """
+        # initialize the dataset with the correct starting epoch
+        assert isinstance(dataset, FairseqDataset)
+        if dataset in self.dataset_to_epoch_iter:
+            return self.dataset_to_epoch_iter[dataset]
+        if self.args.sampling_method == "RoundRobin":
+            batch_iter = super().get_batch_iterator(
+                dataset,
+                max_tokens=max_tokens,
+                max_sentences=max_sentences,
+                max_positions=max_positions,
+                ignore_invalid_inputs=ignore_invalid_inputs,
+                required_batch_size_multiple=required_batch_size_multiple,
+                seed=seed,
+                num_shards=num_shards,
+                shard_id=shard_id,
+                num_workers=num_workers,
+                epoch=epoch,
+                data_buffer_size=data_buffer_size,
+                disable_iterator_cache=disable_iterator_cache,
+            )
+            self.dataset_to_epoch_iter[dataset] = batch_iter
+            return batch_iter
+
+        construct_batch_sampler = self.create_batch_sampler_func(
+            max_positions,
+            ignore_invalid_inputs,
+            max_tokens,
+            max_sentences,
+            required_batch_size_multiple=required_batch_size_multiple,
+            seed=seed,
+        )
+
+        epoch_iter = iterators.EpochBatchIterator(
+            dataset=dataset,
+            collate_fn=dataset.collater,
+            batch_sampler=construct_batch_sampler,
+            seed=seed,
+            num_shards=num_shards,
+            shard_id=shard_id,
+            num_workers=num_workers,
+            epoch=epoch,
+        )
+        return epoch_iter
diff --git a/fairseq/fairseq/token_generation_constraints.py b/fairseq/fairseq/token_generation_constraints.py
new file mode 100644
index 0000000000000000000000000000000000000000..e708dc51bcb0ffb7b411496239c74d5e6f3c2448
--- /dev/null
+++ b/fairseq/fairseq/token_generation_constraints.py
@@ -0,0 +1,506 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Implements tracking of constraints for a beam item.
+
+A list of constraints is given as a list of one or more token
+sequences, each of length at least one token. For example, for an input sentence
+
+> Die maschinelle Übersetzung ist schwer zu kontrollieren.
+
+We could have the constraints:
+* to influence
+* hard
+
+There are two implementations:
+* OrderedConstraintState: Tracks progress through an ordered list of multitoken constraints.
+* UnorderedConstraintState: Tracks progress through an unordered list of multitoken constraints.
+
+The difference is that in the first, the constraints are assumed to be
+in order; the algorithm will permit zero or more tokens between them.
+In the second, the constraints are not ordered, so many orderings will
+be explored.
+
+The same sequence can be present any number of times, and will appear
+that many times in the output.
+"""
+
+from collections import Counter
+from typing import List, Optional, Set, Tuple
+
+import torch
+
+
+class ConstraintState:
+    def __init__(self):
+        pass
+
+
+def pack_constraints(batch_constraints: List[List[torch.Tensor]]) -> torch.Tensor:
+    """Takes a list of list of constraints in tensor form (a list of
+    tensor constraints for each sentence) and transforms it into a
+    packed Tensor. For example, here is a batch of size 3 with 3, 0,
+    and 1 constraints:
+
+        [ [ [3 1 2], [3], [4 5 6 7], ]
+          [],
+          [ [1 8 9 10 1 4 11 12], ]
+        ]
+
+    Its corresponding packed structure is:
+
+        [ [ 3  3  1  2  0  3  0  4  5  6  7  0],
+          [ 0  0  0  0  0  0  0  0  0  0  0  0],
+          [ 1  1  8  9 10  1  4 11 12  0  0  0] ]
+
+    The packed tensor has shape (batch size, maxlen), where
+    maxlen is defined below. Each row contains concatenated
+    constraint tokens for that sentence, with 0 appended after
+    each constraint. The first item in each row is the number
+    of constraints for that sentence. So maxlen is the maximum
+    of
+
+    (number of constraints) + (sum length of constraints) + 1.
+
+    across all sentences in the batch.
+    """
+    # The maximum word length of concatenated constraints for any sentence
+    max_constraints_len = 1
+    for sentence_constraints in batch_constraints:
+        if len(sentence_constraints):
+            # number of constraints, plus sum of constrain lens, plus a zero after each
+            constraints_len = (
+                1
+                + sum([c.size(0) for c in sentence_constraints])
+                + len(sentence_constraints)
+            )
+            max_constraints_len = max(max_constraints_len, constraints_len)
+
+    batch_size = len(batch_constraints)
+    constraints_tensor = torch.zeros((batch_size, max_constraints_len)).long()
+    for i, sentence_constraints in enumerate(batch_constraints):
+        constraints_tensor[i, 0] = len(sentence_constraints)
+        offset = 1
+        for j, constraint in enumerate(sentence_constraints):
+            this_len = constraint.size(0)
+            constraints_tensor[i, offset : offset + this_len] = constraint
+            offset += this_len + 1
+
+    return constraints_tensor.long()
+
+
+def unpack_constraints(constraint_tensor: torch.Tensor) -> List[torch.Tensor]:
+    """
+    Transforms *one row* of a packed constraint tensor (e.g., for one
+    sentence in the batch) into a list of constraint tensors.
+    """
+    constraint_list = []
+    num_constraints = constraint_tensor[0]
+    constraints = constraint_tensor.tolist()
+    offset = 1
+    for i in range(num_constraints):
+        where = constraints.index(0, offset)
+        constraint_list.append(constraint_tensor[offset:where])
+        offset = where + 1
+
+    return constraint_list
+
+
+class ConstraintNode:
+    """
+    Represents a node in a trie managing unordered constraints.
+    """
+
+    def __init__(self, token: int = None, parent=None):
+        # The token associate with this node (None for the root)
+        self.token = int(token) if token is not None else None
+        # The parent (None at the root)
+        self.parent = parent
+        # Whether this node is a completed constraint
+        self.terminal = 0
+        # List of child nodes
+        self.children = {}
+
+        # The cumulative number of constraints from this point in the
+        # trie forward
+        self.num_constraints = 0
+
+    @property
+    def id(self):
+        return self.token
+
+    def __str__(self):
+        term = self.terminal != 0
+        return f"[{self.token}].{term}#{self.num_constraints}"
+
+    def __getitem__(self, key: int):
+        return self.children.get(key, None)
+
+    def next_tokens(self) -> Set[int]:
+        """The set of child labels."""
+        return set(self.children.keys())
+
+    @staticmethod
+    def create(constraints: List[List[int]]):
+        root = ConstraintNode()
+        for sequence in constraints:
+            root.add_sequence(sequence)
+
+        return root
+
+    @staticmethod
+    def print_graph(node: "ConstraintNode"):
+        if len(node.children) == 0:
+            return str(node)
+        else:
+            s = f"({node}"
+            for child in node.children.values():
+                s += " " + ConstraintNode.print_graph(child)
+            s += ")"
+            return s
+
+    def token_counts(self) -> Counter:
+        """Returns a counter of the number of times each token is used
+        in a constraint.
+        """
+        token_counts = Counter()
+        kids = list(self.children.values())
+        while len(kids) > 0:
+            kid = kids.pop()
+            token_counts[kid.id] += kid.num_constraints
+            kids += list(kid.children.values())
+
+        return token_counts
+
+    def tokens(self) -> Set[int]:
+        """Returns the set of tokens in constraints."""
+        return set(self.token_counts().keys())
+
+    def add_sequence(self, sequence: List[int]):
+        """Adds a constraint, represented as a list of integers, to
+        the trie."""
+        assert len(sequence) > 0
+
+        token = int(sequence[0])
+        if token not in self.children:
+            self.children[token] = ConstraintNode(token, parent=self)
+
+        node = self.children[token]
+        if len(sequence) == 1:
+            node.terminal += 1
+            node.num_constraints += 1
+            parent = node.parent
+            while parent is not None:
+                parent.num_constraints += 1
+                parent = parent.parent
+        else:
+            node.add_sequence(sequence[1:])
+
+
+class UnorderedConstraintState(ConstraintState):
+    """
+    Records progress through the set of constraints for each item in the beam
+    using a trie.
+    """
+
+    def __init__(self, node: ConstraintNode, copy_from: "ConstraintState" = None):
+        self.node = node
+
+        if copy_from is None:
+            # The root node
+            self.root = node
+            # The set of states in the graph that have been completed
+            self.completed = Counter()
+            # The...
+            self.generated = Counter()
+            # The list of tokens we need to generate
+            self.needed_tokens = self.root.tokens()
+        else:
+            self.completed = Counter(copy_from.completed)
+            self.generated = Counter(copy_from.generated)
+            self.root = copy_from.root
+
+        # Mark the node as generated
+        if self.node != self.root:
+            self.generated[node] += 1
+
+    @staticmethod
+    def create(constraint_tensor: torch.Tensor):
+        constraint_list = unpack_constraints(constraint_tensor)
+        constraint_trie_root = ConstraintNode.create(constraint_list)
+        return UnorderedConstraintState(constraint_trie_root)
+
+    def __str__(self):
+        gen_str = ",".join([str(node) for node in self.generated])
+        return f"{self.name}/{self.bank}({gen_str})x{self.num_completed}"
+
+    def __copy__(self):
+        copied_state = UnorderedConstraintState(self.node, copy_from=self)
+        return copied_state
+
+    def copy(self):
+        return self.__copy__()
+
+    @property
+    def name(self):
+        if self.node.id is None:
+            return "ROOT"
+        else:
+            return str(self.node.id)
+
+    @property
+    def is_root(self):
+        return self.node == self.root
+
+    @property
+    def bank(self):
+        return sum(self.generated.values())
+
+    @property
+    def num_completed(self):
+        """The number of constraints (not constraint tokens) that are completed.
+        In addition to the already-completed states, we need to account for the
+        current state, which might get marked as completed when another token
+        is generated.
+        """
+        in_final = self.node.terminal and self.completed[self.node] < self.node.terminal
+        return sum(self.completed.values()) + in_final
+
+    @property
+    def finished(self):
+        return self.root.num_constraints - self.num_completed == 0
+
+    @property
+    def token_counts(self):
+        return self.root.token_counts()
+
+    @property
+    def tokens(self):
+        return self.root.tokens()
+
+    @property
+    def num_constraint_tokens(self):
+        return sum(self.token_counts.values())
+
+    def next_tokens(self) -> Set[int]:
+        """Returns the list of tokens that could come next.
+        These are (a) all tokens extending the root state and, for
+        non-root states, additionally all tokens extending the current
+        state."""
+
+        if self.node != self.root:
+            return self.root.next_tokens().union(self.node.next_tokens())
+        else:
+            return self.root.next_tokens()
+
+    def advance(self, token: int):
+        """Reads in a token and advances the state. Here's how it works.
+
+        We can advance to the next state if:
+        - there is a matching child
+        - its path isn't blocked
+
+        A path is blocked when all constraints that are descendants of
+        that node have already been generated, in the current state.
+
+        If we are not able to advance from the current state, we "fall
+        off the graph" and return to the root state. There, we again
+        try to advance, checking the same criteria.
+
+        In any case, when falling off the graph, we need to do some
+        bookkeeping. We:
+        - check whether any constraints were met (all prefixes of
+          current state)
+        - if one is found, mark it as completed
+        - adjust visited nodes accordingly
+        """
+        token = int(token)
+
+        next_state = None
+        child = self.node[token]
+        if child is not None and self.generated[child] < child.num_constraints:
+            next_state = UnorderedConstraintState(child, copy_from=self)
+
+        def rewind():
+            """If we're mid-trie and an "illegal" token is chosen next, we need
+            to reset our state to the root state. However, along the way, we need
+            to check whether a prefix of the current trie state represents a state
+            we could mark as completed.
+            """
+            node = self.node
+            while node != self.root:
+                if node.terminal and self.completed[node] < node.terminal:
+                    next_state.completed[node] += 1
+                    return
+
+                next_state.generated[node] -= 1
+                node = node.parent
+
+        # Fall off the graph, check the root
+        if next_state is None and token in self.root.next_tokens():
+            child = self.root[token]
+            # We can only traverse this edge if it's not saturated
+            if self.generated[child] < child.num_constraints:
+                next_state = UnorderedConstraintState(child, copy_from=self)
+            else:
+                next_state = UnorderedConstraintState(self.root, copy_from=self)
+
+            # Rewind
+            rewind()
+
+        elif next_state is None:
+            next_state = UnorderedConstraintState(self.root, copy_from=self)
+            # Rewind
+            rewind()
+
+        return next_state
+
+
+class ConstraintSequence:
+    def __init__(self, sequences: List[List[int]]):
+        """Represents a set of possibly multitoken constraints by
+        concatenating them and internally recording the end points.
+        """
+        self.sequences = []
+        self.endpoints = []
+        self.num_tokens = 0
+        self.tokens = set()
+        for sequence in sequences:
+            for token in sequence:
+                self.tokens.add(token)
+            self.num_tokens += len(sequence)
+            self.endpoints += [False for x in range(len(sequence) - 1)] + [True]
+            self.sequences += sequence
+
+    def __getitem__(self, key: int):
+        return self.sequences[key]
+
+    def __len__(self):
+        return len(self.sequences)
+
+    def __str__(self):
+        return str(self.sequences)
+
+
+class OrderedConstraintState(ConstraintState):
+    """
+    Records progress through the set of linear nonbranching constraints with gaps.
+    """
+
+    def __init__(self, sequence: ConstraintSequence, state: int = -1):
+        self.sequence = sequence
+        self.state = state
+
+    @staticmethod
+    def create(constraint_tensor: torch.Tensor):
+        constraint_list = unpack_constraints(constraint_tensor)
+        return OrderedConstraintState(ConstraintSequence(constraint_list), -1)
+
+    def __str__(self):
+        return f"{self.state}/{self.bank}x{self.num_completed}"
+
+    def __copy__(self):
+        return OrderedConstraintState(self.sequence, self.state)
+
+    def copy(self):
+        return self.__copy__()
+
+    @property
+    def num_completed(self):
+        if self.state == -1:
+            return 0
+        count = len(
+            list(filter(lambda x: x, self.sequence.endpoints[0 : self.state + 1]))
+        )
+        return count
+
+    @property
+    def is_root(self):
+        return self.state == -1
+
+    @property
+    def name(self):
+        if self.state == -1:
+            return "ROOT"
+        else:
+            return str(self.sequence[self.state])
+
+    @property
+    def bank(self) -> int:
+        return self.state + 1
+
+    @property
+    def finished(self):
+        return self.state + 1 == len(self.sequence)
+
+    @property
+    def token_counts(self):
+        return self.sequence.token_counts()
+
+    @property
+    def tokens(self):
+        return self.sequence.tokens
+
+    @property
+    def num_constraint_tokens(self):
+        return sum(self.token_counts.values())
+
+    def next_tokens(self) -> Set[int]:
+        """Returns the list of tokens that could come next.
+        These are (a) all tokens extending the root state and, for
+        non-root states, additionally all tokens extending the current
+        state."""
+
+        tokens = set()
+        if self.state > 0:
+            tokens.add(self.sequence[0])
+        if not self.finished:
+            tokens.add(self.sequence[self.state + 1])
+        return tokens
+
+    def advance(self, token: int):
+        """Reads in a token and advances the state. Here's how it works.
+
+        We can advance to the next state if:
+        - there is a matching child
+        - its path isn't blocked
+
+        A path is blocked when all constraints that are descendants of
+        that node have already been generated, in the current state.
+
+        If we are not able to advance from the current state, we "fall
+        off the graph" and return to the root state. There, we again
+        try to advance, checking the same criteria.
+
+        In any case, when falling off the graph, we need to do some
+        bookkeeping. We:
+        - check whether any constraints were met (all prefixes of
+          current state)
+        - if one is found, mark it as completed
+        - adjust visited nodes accordingly
+        """
+        token = int(token)
+        # print(f"{self} ADVANCE({token}) {self.sequence} -> ", end="")
+
+        if self.finished:
+            # Accept anything
+            next_state = self.copy()
+
+        elif self.sequence[self.state + 1] == token:
+            # Advance to the next token
+            next_state = OrderedConstraintState(self.sequence, self.state + 1)
+
+        elif self.sequence.endpoints[self.state]:
+            # Accept anything between constraints (*)
+            next_state = self.copy()
+
+        elif token == self.sequence[0]:
+            # Start over having generated the first token
+            next_state = OrderedConstraintState(self.sequence, 0)
+        else:
+            # Start over from the root
+            next_state = OrderedConstraintState(self.sequence, -1)
+
+        return next_state
diff --git a/fairseq/fairseq/tokenizer.py b/fairseq/fairseq/tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..42131f7b1d334020c3b48a6e44d4139f7c62ad28
--- /dev/null
+++ b/fairseq/fairseq/tokenizer.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import re
+
+
+SPACE_NORMALIZER = re.compile(r"\s+")
+
+
+def tokenize_line(line):
+    line = SPACE_NORMALIZER.sub(" ", line)
+    line = line.strip()
+    return line.split()
diff --git a/fairseq/fairseq/trainer.py b/fairseq/fairseq/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e46ccfe0b8d3a224586fb16c69168321f60ce30e
--- /dev/null
+++ b/fairseq/fairseq/trainer.py
@@ -0,0 +1,1509 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Train a network across multiple GPUs.
+"""
+
+import contextlib
+import logging
+import sys
+import time
+from argparse import Namespace
+from itertools import chain
+from typing import Any, Dict, List
+
+import torch
+from fairseq import checkpoint_utils, models, optim, utils
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.distributed import utils as distributed_utils
+from fairseq.file_io import PathManager
+from fairseq.logging import meters, metrics
+from fairseq.models.ema import build_ema
+from fairseq.nan_detector import NanDetector
+from fairseq.optim import lr_scheduler
+from omegaconf import OmegaConf
+
+logger = logging.getLogger(__name__)
+
+
+class Trainer(object):
+    """Main class for data parallel training.
+
+    This class supports synchronous distributed data parallel training,
+    where multiple workers each have a full model replica and gradients
+    are accumulated across workers before each update. We use
+    :class:`~torch.nn.parallel.DistributedDataParallel` to handle
+    communication of the gradients across workers.
+    """
+
+    def __init__(self, cfg: FairseqConfig, task, model, criterion, quantizer=None):
+
+        if isinstance(cfg, Namespace):
+            logger.warning(
+                "argparse.Namespace configuration is deprecated! Automatically converting to OmegaConf"
+            )
+            cfg = convert_namespace_to_omegaconf(cfg)
+
+        self.cfg = cfg
+        self.task = task
+
+        # catalog shared parameters
+        shared_params = _catalog_shared_params(model)
+        self.tpu = cfg.common.tpu
+        self.cuda = torch.cuda.is_available() and not cfg.common.cpu and not self.tpu
+        if self.cuda:
+            self.device = torch.device("cuda")
+        elif self.tpu:
+            self.device = utils.get_tpu_device()
+        else:
+            self.device = torch.device("cpu")
+
+        if self.is_fsdp:
+            import fairscale
+            if self.cfg.common.bf16:
+                raise ValueError(
+                    "FullyShardedDataParallel is not compatible with --bf16 or "
+                    "--memory-efficient-bf16"
+                )
+            if self.cfg.distributed_training.zero_sharding != "none":
+                raise ValueError(
+                    "FullyShardedDataParallel is not compatible with --zero-sharding "
+                    "option (it's already built in)"
+                )
+            if max(self.cfg.optimization.update_freq) > 1 and fairscale.__version__ < "0.4.0":
+                raise RuntimeError(
+                    "Please update to fairscale 0.4.0 or newer when combining "
+                    "--update-freq with FullyShardedDataParallel"
+                )
+        else:
+            if (
+                hasattr(self.cfg.distributed_training, "cpu_offload")
+                and self.cfg.distributed_training.cpu_offload
+            ):
+                raise ValueError("--cpu-offload requires --ddp-backend=fully_sharded")
+
+        # copy model and criterion to current device/dtype
+        self._criterion = criterion
+        self._model = model
+        if not self.is_fsdp:
+            if cfg.common.fp16:
+                assert not cfg.common.amp, "Cannot use fp16 and AMP together"
+                self._criterion = self._criterion.half()
+                self._model = self._model.half()
+            elif cfg.common.bf16:
+                self._criterion = self._criterion.to(dtype=torch.bfloat16)
+                self._model = self._model.to(dtype=torch.bfloat16)
+            elif cfg.common.amp:
+                self._amp_retries = 0
+        if (
+            not cfg.distributed_training.pipeline_model_parallel
+            # the DistributedFairseqModel wrapper will handle moving to device,
+            # so only handle cases which don't use the wrapper
+            and not self.use_distributed_wrapper
+        ):
+            self._criterion = self._criterion.to(device=self.device)
+            self._model = self._model.to(device=self.device)
+        self.pipeline_model_parallel = cfg.distributed_training.pipeline_model_parallel
+        self.last_device = None
+        if self.cuda and self.pipeline_model_parallel:
+            self.last_device = torch.device(
+                cfg.distributed_training.pipeline_devices[-1]
+            )
+
+        # check that shared parameters are preserved after device transfer
+        for shared_param in shared_params:
+            ref = _get_module_by_path(self._model, shared_param[0])
+            for path in shared_param[1:]:
+                logger.info(
+                    "detected shared parameter: {} <- {}".format(shared_param[0], path)
+                )
+                _set_module_by_path(self._model, path, ref)
+
+        self._dummy_batch = None  # indicates we don't have a dummy batch at first
+        self._lr_scheduler = None
+        self._num_updates = 0
+        self._num_xla_compiles = 0  # for TPUs
+        self._optim_history = None
+        self._optimizer = None
+        self._warn_once = set()
+        self._wrapped_criterion = None
+        self._wrapped_model = None
+        self._ema = None
+
+        # TODO(myleott): support tpu
+        if self.cuda and self.data_parallel_world_size > 1:
+            self._grad_norm_buf = torch.cuda.DoubleTensor(self.data_parallel_world_size)
+        else:
+            self._grad_norm_buf = None
+
+        self.quantizer = quantizer
+        if self.quantizer is not None:
+            self.quantizer.set_trainer(self)
+
+        # get detailed cuda environment
+        if self.cuda:
+            self.cuda_env = utils.CudaEnvironment()
+            if self.data_parallel_world_size > 1:
+                self.cuda_env_arr = distributed_utils.all_gather_list(
+                    self.cuda_env, group=distributed_utils.get_global_group()
+                )
+            else:
+                self.cuda_env_arr = [self.cuda_env]
+            if self.data_parallel_rank == 0:
+                utils.CudaEnvironment.pretty_print_cuda_env_list(self.cuda_env_arr)
+        else:
+            self.cuda_env = None
+            self.cuda_env_arr = None
+
+        metrics.log_start_time("wall", priority=790, round=0)
+
+        self._start_time = time.time()
+        self._previous_training_time = 0
+        self._cumulative_training_time = None
+
+    def reinitialize(self):
+        """Reinitialize the Trainer, typically after model params change."""
+        self._lr_scheduler = None
+        self._optimizer = None
+        self._wrapped_criterion = None
+        self._wrapped_model = None
+
+    @property
+    def data_parallel_world_size(self):
+        if self.cfg.distributed_training.distributed_world_size == 1:
+            return 1
+        return distributed_utils.get_data_parallel_world_size()
+
+    @property
+    def data_parallel_process_group(self):
+        return distributed_utils.get_data_parallel_group()
+
+    @property
+    def data_parallel_rank(self):
+        if self.cfg.distributed_training.distributed_world_size == 1:
+            return 0
+        return distributed_utils.get_data_parallel_rank()
+
+    @property
+    def is_data_parallel_master(self):
+        # NOTE: this returns true for all model parallel replicas with data
+        # parallel rank 0
+        return self.data_parallel_rank == 0
+
+    @property
+    def use_distributed_wrapper(self) -> bool:
+        return (
+            self.data_parallel_world_size > 1 and not self.cfg.optimization.use_bmuf
+        ) or (
+            self.is_fsdp and self.cfg.distributed_training.cpu_offload
+        )
+
+    @property
+    def should_save_checkpoint_on_current_rank(self) -> bool:
+        """Indicates whether to save checkpoints on the current DDP rank."""
+        if (
+            self.is_fsdp and self.cfg.distributed_training.use_sharded_state
+        ) or getattr(self.cfg.model, "base_layers", 0) > 0:
+            return True
+        else:
+            return self.is_data_parallel_master
+
+    @property
+    def always_call_state_dict_during_save_checkpoint(self) -> bool:
+        if self.is_fsdp and not self.cfg.distributed_training.use_sharded_state:
+            # FSDP calls communication collective when consolidating checkpoints
+            return True
+        else:
+            return False
+
+    @property
+    def checkpoint_suffix(self) -> str:
+        """Suffix to add to the checkpoint file name."""
+        if self.is_fsdp and self.cfg.distributed_training.use_sharded_state:
+            return self.cfg.checkpoint.checkpoint_suffix + "-shard{0}".format(
+                self.data_parallel_rank
+            )
+        else:
+            return self.cfg.checkpoint.checkpoint_suffix or ""
+
+    @property
+    def criterion(self):
+        if self._wrapped_criterion is None:
+            if utils.has_parameters(self._criterion) and self.use_distributed_wrapper:
+                self._wrapped_criterion = models.DistributedFairseqModel(
+                    self.cfg.distributed_training,
+                    self._criterion,
+                    process_group=self.data_parallel_process_group,
+                    device=self.device,
+                )
+            else:
+                self._wrapped_criterion = self._criterion
+        return self._wrapped_criterion
+
+    @property
+    def model(self):
+        if self._wrapped_model is None:
+            if self.use_distributed_wrapper:
+                self._wrapped_model = models.DistributedFairseqModel(
+                    self.cfg.distributed_training,
+                    self._model,
+                    process_group=self.data_parallel_process_group,
+                    device=self.device,
+                )
+            else:
+                self._wrapped_model = self._model
+        return self._wrapped_model
+
+    @property
+    def ema(self):
+        if self._ema is None:
+            self._build_ema()
+        return self._ema
+
+    def _build_ema(self):
+        if self.cfg.ema.store_ema:
+            self._ema = build_ema(self._model, self.cfg.ema, self.device)
+            logger.info(
+                "Exponential Moving Average Shadow Model is initialized."
+            )
+
+    @property
+    def optimizer(self):
+        if self._optimizer is None:
+            self._build_optimizer()
+        return self._optimizer
+
+    @property
+    def lr_scheduler(self):
+        if self._lr_scheduler is None:
+            self._build_optimizer()  # this will initialize self._lr_scheduler
+        return self._lr_scheduler
+
+    def _build_optimizer(self):
+        params = list(
+            filter(
+                lambda p: p.requires_grad,
+                chain(self.model.parameters(), self.criterion.parameters()),
+            )
+        )
+
+        if self.is_fsdp and self.cfg.common.fp16:
+            # FullyShardedDataParallel always uses MemoryEfficientFP16 wrapper,
+            # mostly for the grad scaling. But if we don't have the
+            # --memory-efficient-fp16 flag set, then we're effectively doing
+            # regular --fp16 and can allow the use of optimizers that would
+            # otherwise be unsupported by MemoryEfficientFP16Optimizer.
+            allow_unsupported = not self.cfg.common.memory_efficient_fp16
+            self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
+                self.cfg, params, allow_unsupported=allow_unsupported
+            )
+        elif self.cfg.common.fp16 or self.cfg.common.bf16 or self.cfg.common.amp:
+            if self.cuda and torch.cuda.get_device_capability(0)[0] < 7:
+                logger.info(
+                    "NOTE: your device does NOT support faster training with --fp16 or --amp, "
+                    "please switch to FP32 which is likely to be faster"
+                )
+            if (
+                self.cfg.common.memory_efficient_fp16
+                or self.cfg.common.memory_efficient_bf16
+            ):
+                self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
+                    self.cfg, params
+                )
+            elif self.cfg.common.amp:
+                self._optimizer = optim.AMPOptimizer.build_optimizer(self.cfg, params)
+            else:
+                self._optimizer = optim.FP16Optimizer.build_optimizer(self.cfg, params)
+        else:
+            if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7:
+                logger.info("NOTE: your device may support faster training with --fp16 or --amp")
+            self._optimizer = optim.build_optimizer(self.cfg.optimizer, params)
+
+        if self.is_fsdp:
+            assert (
+                not self.cfg.optimization.use_bmuf
+            ), "--ddp-backend=fully_sharded is not compatible with BMUF"
+            assert self._optimizer.supports_flat_params, (
+                "--ddp-backend=fully_sharded is only compatible with pointwise "
+                "optimizers (e.g., Adam, AdamW, Adadelta, Adamax, SGD, etc.). "
+                "However, the sharding will result in slightly different results when "
+                "using non-pointwise optimizers (e.g., Adagrad, Adafactor, LAMB)"
+            )
+
+        if self.cfg.optimization.use_bmuf:
+            self._optimizer = optim.FairseqBMUF(
+                self.cfg.bmuf,
+                self._optimizer,
+            )
+
+        if self.cfg.distributed_training.zero_sharding == "os":
+            if (
+                self.cfg.common.fp16
+                and not self.cfg.common.memory_efficient_fp16
+                and not self.cfg.common.memory_efficient_bf16
+            ) and not self.cfg.common.fp16_no_flatten_grads:
+                raise ValueError(
+                    "ZeRO is incomptabile with fp16 and flattened grads. "
+                    "Please use --fp16-no-flatten-grads"
+                )
+            else:
+                optim.shard_(self._optimizer, self.data_parallel_process_group)
+
+        # We should initialize the learning rate scheduler immediately after
+        # building the optimizer, so that the initial learning rate is set.
+        self._lr_scheduler = lr_scheduler.build_lr_scheduler(
+            self.cfg.lr_scheduler,
+            self.optimizer,
+        )
+        self._lr_scheduler.step_update(0)
+
+    @property
+    def is_fsdp(self):
+        return self.cfg.distributed_training.ddp_backend == "fully_sharded"
+
+    def consolidate_optimizer(self):
+        """For OSS, we need to consolidate the state dict."""
+        if self.cfg.checkpoint.no_save_optimizer_state:
+            return
+        self._gathered_optim_state = None
+        if hasattr(self.optimizer.optimizer, "consolidate_state_dict"):
+            self.optimizer.optimizer.consolidate_state_dict()
+        elif self.is_fsdp and not self.model.use_sharded_state:
+            st = self.model.gather_full_optim_state_dict(
+                self.optimizer
+            )  # only returns on rank 0
+            self._gathered_optim_state = st
+
+    def state_dict(self):
+        state_dict = {
+            "args": None,  # legacy
+            "cfg": (
+                OmegaConf.to_container(self.cfg, resolve=True, enum_to_str=True)
+                if OmegaConf.is_config(self.cfg)
+                else self.cfg
+            ),
+            "model": self.model.state_dict(),
+            "criterion": (
+                self.criterion.state_dict()
+                if utils.has_parameters(self.criterion)
+                else None
+            ),
+            "optimizer_history": (self._optim_history or [])
+            + [
+                {
+                    "criterion_name": self.get_criterion().__class__.__name__,
+                    "optimizer_name": self.optimizer.__class__.__name__,
+                    "lr_scheduler_state": self.lr_scheduler.state_dict(),
+                    "num_updates": self.get_num_updates(),
+                }
+            ],
+            "task_state": self.task.state_dict() if self.task is not None else {},
+            "extra_state": {
+                "metrics": metrics.state_dict(),
+                "previous_training_time": self.cumulative_training_time(),
+            },
+        }
+        if self.cfg.ema.store_ema:
+            # Save EMA model state as extra state
+            state_dict["extra_state"]["ema"] = self.ema.get_model().state_dict()
+            if self.cfg.ema.ema_fp32:
+                # Save EMA params in fp32
+                state_dict["extra_state"]["ema_fp32_params"] = self.ema.fp32_params
+        if not self.cfg.checkpoint.no_save_optimizer_state:
+            if self._gathered_optim_state is not None:
+                state_dict["last_optimizer_state"] = self._gathered_optim_state
+                self._gathered_optim_state = None
+            else:
+                state_dict["last_optimizer_state"] = self.optimizer.state_dict()
+        if self.is_fsdp:
+            # save meta data for recombining checkpoint upon loading
+            state_dict["fsdp_metadata"] = self.model.local_metadata_dict()
+        return state_dict
+
+    def save_checkpoint(self, filename, extra_state):
+        """Save all training state in a checkpoint file."""
+        logger.info(f"Saving checkpoint to {filename}")
+        # call state_dict on all ranks in case it needs internal communication
+        state_dict = utils.move_to_cpu(self.state_dict())
+        state_dict["extra_state"].update(extra_state)
+        if self.should_save_checkpoint_on_current_rank:
+            checkpoint_utils.torch_persistent_save(
+                state_dict,
+                filename,
+                async_write=self.cfg.checkpoint.write_checkpoints_asynchronously,
+            )
+        logger.info(f"Finished saving checkpoint to {filename}")
+
+    def load_checkpoint(
+        self,
+        filename,
+        reset_optimizer=False,
+        reset_lr_scheduler=False,
+        optimizer_overrides=None,
+        reset_meters=False,
+    ):
+        """
+        Load all training state from a checkpoint file.
+        rank = 0 will load the checkpoint, and then broadcast it to all
+        other ranks.
+        """
+        extra_state, self._optim_history, last_optim_state = None, [], None
+
+        logger.info(f"Preparing to load checkpoint {filename}")
+        is_distributed = self.data_parallel_world_size > 1
+        bexists = PathManager.isfile(filename)
+        if bexists:
+            load_on_all_ranks = (
+                self.cfg.checkpoint.load_checkpoint_on_all_dp_ranks
+                # TPUs don't support broadcast yet, so load checkpoints
+                # on every worker for now
+                or self.tpu
+                # FSDP requires loading checkpoint shards on all ranks
+                or (self.is_fsdp and self.cfg.distributed_training.use_sharded_state)
+                or getattr(self.cfg.model, "base_layers", 0) > 0
+            )
+
+            if load_on_all_ranks or self.data_parallel_rank == 0:
+                state = checkpoint_utils.load_checkpoint_to_cpu(
+                    filename, load_on_all_ranks=load_on_all_ranks
+                )
+                last_optim_state = state.get("last_optimizer_state", None)
+
+                # If doing zero_sharding, do not broadcast global optimizer
+                # state. Later we will broadcast sharded states to each rank
+                # to avoid memory from exploding.
+                if (
+                    not load_on_all_ranks
+                    and self.cfg.distributed_training.zero_sharding == "os"
+                    and "last_optimizer_state" in state
+                    and is_distributed
+                ):
+                    state["last_optimizer_state"] = "SHARDED"
+            else:
+                last_optim_state = None
+                state = None
+
+            if is_distributed and not load_on_all_ranks:
+                state = distributed_utils.broadcast_object(
+                    state,
+                    src_rank=0,
+                    group=self.data_parallel_process_group,
+                    dist_device=self.device,
+                )
+                if self.data_parallel_rank > 0:
+                    last_optim_state = state.get("last_optimizer_state", None)
+
+            # load model parameters
+            try:
+                self.model.load_state_dict(
+                    state["model"], strict=True, model_cfg=self.cfg.model
+                )
+                # save memory for later steps
+                del state["model"]
+                if utils.has_parameters(self.get_criterion()):
+                    self.get_criterion().load_state_dict(
+                        state["criterion"], strict=True
+                    )
+                    del state["criterion"]
+
+            except Exception:
+                raise Exception(
+                    "Cannot load model parameters from checkpoint {}; "
+                    "please ensure that the architectures match.".format(filename)
+                )
+            extra_state = state["extra_state"]
+            self._optim_history = state["optimizer_history"]
+
+        if last_optim_state is not None and not reset_optimizer:
+            # rebuild optimizer after loading model, since params may have changed
+            self._build_optimizer()
+
+            # only reload optimizer and lr_scheduler if they match
+            last_optim = self._optim_history[-1]
+            assert (
+                last_optim["criterion_name"] == self.get_criterion().__class__.__name__
+            ), f"Criterion does not match; please reset the optimizer (--reset-optimizer). {last_optim['criterion_name']} vs {self.get_criterion().__class__.__name__}"
+            assert (
+                last_optim["optimizer_name"] == self.optimizer.__class__.__name__
+            ), f"Optimizer does not match; please reset the optimizer (--reset-optimizer). {last_optim['optimizer_name']} vs {self.optimizer.__class__.__name__}"
+
+            if not reset_lr_scheduler:
+                self.lr_scheduler.load_state_dict(last_optim["lr_scheduler_state"])
+
+            if self.is_fsdp and not self.model.use_sharded_state:
+                # if use_sharded_state, the last_optim_state is already sharded, skip this
+                last_optim_state = self.model.get_shard_from_optim_state_dict(
+                    last_optim_state
+                )
+            elif not load_on_all_ranks and is_distributed:
+                last_optim_state = self.optimizer.broadcast_global_state_dict(
+                    last_optim_state
+                )
+
+            self.optimizer.load_state_dict(last_optim_state, optimizer_overrides)
+
+            self.set_num_updates(last_optim["num_updates"])
+
+        if extra_state is not None:
+            itr_state = extra_state["train_iterator"]
+            epoch = itr_state["epoch"]
+
+            if "previous_training_time" in extra_state:
+                self._previous_training_time = extra_state["previous_training_time"]
+                self._start_time = time.time()
+
+            self.lr_step(epoch)
+
+            if (
+                itr_state.get("version", 1) >= 2
+                and itr_state["iterations_in_epoch"] == 0
+            ):
+                # reset meters at start of epoch
+                reset_meters = True
+
+            if "metrics" in extra_state and not reset_meters:
+                metrics.load_state_dict(extra_state["metrics"])
+
+                # reset TimeMeters, since their start times don't make sense anymore
+                for meter in metrics.get_meters("default"):
+                    if isinstance(meter, meters.TimeMeter):
+                        meter.reset()
+
+            if self.cfg.ema.store_ema:
+                if "ema" not in extra_state:
+                    logger.warn(
+                        "EMA not found in checkpoint. But store_ema is True. "
+                        "EMA is re-initialized from checkpoint."
+                    )
+                    self.ema.restore(state["model"], build_fp32_params=self.cfg.ema.ema_fp32)
+                else:
+                    logger.info(
+                        "Loading EMA from checkpoint"
+                    )
+                    self.ema.restore(extra_state["ema"], build_fp32_params=False)
+
+                    if self.cfg.ema.ema_fp32:
+                        if "ema_fp32_params" in extra_state:
+                            logger.info(
+                                "Loading EMA fp32 params from checkpoint"
+                            )
+                            self.ema.build_fp32_params(extra_state["ema_fp32_params"])
+                        else:
+                            logger.info(
+                                "Building EMA fp32 params from EMA model in checkpoint"
+                            )
+                            self.ema.build_fp32_params()
+
+            logger.info(
+                "Loaded checkpoint {} (epoch {} @ {} updates)".format(
+                    filename, epoch, self.get_num_updates()
+                )
+            )
+
+        else:
+            logger.info("No existing checkpoint found {}".format(filename))
+
+        return extra_state
+
+    def get_train_iterator(
+        self,
+        epoch,
+        combine=True,
+        load_dataset=True,
+        data_selector=None,
+        shard_batch_itr=True,
+        disable_iterator_cache=False,
+    ):
+        """Return an EpochBatchIterator over the training set for a given epoch."""
+        if load_dataset:
+            logger.info("loading train data for epoch {}".format(epoch))
+            self.task.load_dataset(
+                self.cfg.dataset.train_subset,
+                epoch=epoch,
+                combine=combine,
+                data_selector=data_selector,
+                tpu=self.tpu,
+            )
+        batch_iterator = self.task.get_batch_iterator(
+            dataset=self.task.dataset(self.cfg.dataset.train_subset),
+            max_tokens=self.cfg.dataset.max_tokens,
+            max_sentences=self.cfg.dataset.batch_size,
+            max_positions=utils.resolve_max_positions(
+                self.task.max_positions(),
+                self.model.max_positions(),
+                self.cfg.dataset.max_tokens,
+            ),
+            ignore_invalid_inputs=True,
+            required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple,
+            seed=self.cfg.common.seed,
+            num_shards=self.data_parallel_world_size if shard_batch_itr else 1,
+            shard_id=self.data_parallel_rank if shard_batch_itr else 0,
+            num_workers=self.cfg.dataset.num_workers,
+            epoch=epoch,
+            data_buffer_size=self.cfg.dataset.data_buffer_size,
+            disable_iterator_cache=disable_iterator_cache,
+        )
+        self.reset_dummy_batch(batch_iterator.first_batch)
+        return batch_iterator
+
+    def get_valid_iterator(
+        self,
+        subset,
+        disable_iterator_cache=False,
+    ):
+        """Return an EpochBatchIterator over given validation subset for a given epoch."""
+        batch_iterator = self.task.get_batch_iterator(
+            dataset=self.task.dataset(subset),
+            max_tokens=self.cfg.dataset.max_tokens_valid,
+            max_sentences=self.cfg.dataset.batch_size_valid,
+            max_positions=utils.resolve_max_positions(
+                self.task.max_positions(),
+                self.model.max_positions(),
+            ),
+            ignore_invalid_inputs=self.cfg.dataset.skip_invalid_size_inputs_valid_test,
+            required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple,
+            seed=self.cfg.common.seed,
+            num_shards=self.data_parallel_world_size,
+            shard_id=self.data_parallel_rank,
+            num_workers=self.cfg.dataset.num_workers,
+            # always pass a fixed "epoch" to keep validation data consistent
+            # across training epochs
+            epoch=1,
+            data_buffer_size=self.cfg.dataset.data_buffer_size,
+            disable_iterator_cache=disable_iterator_cache,
+        )
+        self.reset_dummy_batch(batch_iterator.first_batch)
+        return batch_iterator
+
+    def begin_epoch(self, epoch):
+        """Called at the beginning of each epoch."""
+        logger.info("begin training epoch {}".format(epoch))
+
+        self.lr_step_begin_epoch(epoch)
+
+        if self.quantizer is not None:
+            self.quantizer.begin_epoch(epoch)
+
+        # task specific setup per epoch
+        self.task.begin_epoch(epoch, self.get_model())
+
+        if self.tpu:
+            import torch_xla.core.xla_model as xm
+
+            xm.rendezvous("begin_epoch")  # wait for all workers
+            xm.mark_step()
+
+    def begin_valid_epoch(self, epoch):
+        """Called at the beginning of each validation epoch."""
+
+        # task specific setup per validation epoch
+        self.task.begin_valid_epoch(epoch, self.get_model())
+
+    def reset_dummy_batch(self, batch):
+        self._dummy_batch = batch
+
+    @metrics.aggregate("train")
+    def train_step(self, samples, raise_oom=False):
+        """Do forward, backward and parameter update."""
+        self._set_seed()
+        self.model.train()
+        self.criterion.train()
+        self.zero_grad()
+
+        metrics.log_start_time("train_wall", priority=800, round=0)
+
+        # If EMA is enabled through store_ema=True
+        # and task.uses_ema is True, pass the EMA model as a keyword
+        # argument to the task.
+        extra_kwargs = {}
+        if self.cfg.ema.store_ema and getattr(self.task, "uses_ema", False):
+            extra_kwargs["ema_model"] = self.ema.get_model()
+
+        # forward and backward pass
+        logging_outputs, sample_size, ooms = [], 0, 0
+        for i, sample in enumerate(samples):  # delayed update loop
+            sample, is_dummy_batch = self._prepare_sample(sample)
+
+            def maybe_no_sync():
+                """
+                Whenever *samples* contains more than one mini-batch, we
+                want to accumulate gradients locally and only call
+                all-reduce in the last backwards pass.
+                """
+                if (
+                    self.data_parallel_world_size > 1
+                    and hasattr(self.model, "no_sync")
+                    and i < len(samples) - 1
+                    # The no_sync context manager results in increased memory
+                    # usage with FSDP, since full-size gradients will be
+                    # accumulated on each GPU. It's typically a better tradeoff
+                    # to do the extra communication with FSDP.
+                    and not self.is_fsdp
+                ):
+                    return self.model.no_sync()
+                else:
+                    return contextlib.ExitStack()  # dummy contextmanager
+
+            try:
+                with maybe_no_sync():
+                    # forward and backward
+                    loss, sample_size_i, logging_output = self.task.train_step(
+                        sample=sample,
+                        model=self.model,
+                        criterion=self.criterion,
+                        optimizer=self.optimizer,
+                        update_num=self.get_num_updates(),
+                        ignore_grad=is_dummy_batch,
+                        **extra_kwargs,
+                    )
+                    del loss
+
+                logging_outputs.append(logging_output)
+                sample_size += sample_size_i
+
+                # emptying the CUDA cache after the first step can
+                # reduce the chance of OOM
+                if self.cuda and self.get_num_updates() == 0:
+                    torch.cuda.empty_cache()
+            except RuntimeError as e:
+                if "out of memory" in str(e):
+                    self._log_oom(e)
+                    if raise_oom:
+                        raise e
+                    logger.warning(
+                        "attempting to recover from OOM in forward/backward pass"
+                    )
+                    ooms += 1
+                    self.zero_grad()
+                    if self.cuda:
+                        torch.cuda.empty_cache()
+                    if self.cfg.distributed_training.distributed_world_size == 1:
+                        return None
+                else:
+                    raise e
+
+            if self.tpu and i < len(samples) - 1:
+                # tpu-comment: every XLA operation before marking step is
+                # appended to the IR graph, and processing too many batches
+                # before marking step can lead to OOM errors.
+                # To handle gradient accumulation use case, we explicitly
+                # mark step here for every forward pass without a backward pass
+                self._xla_markstep_and_send_to_cpu()
+
+        if is_dummy_batch:
+            if torch.is_tensor(sample_size):
+                sample_size.zero_()
+            else:
+                sample_size *= 0.0
+
+        if torch.is_tensor(sample_size):
+            sample_size = sample_size.float()
+        else:
+            sample_size = float(sample_size)
+
+        # gather logging outputs from all replicas
+        if self._sync_stats():
+            train_time = self._local_cumulative_training_time()
+            logging_outputs, (
+                sample_size,
+                ooms,
+                total_train_time,
+            ) = self._aggregate_logging_outputs(
+                logging_outputs, sample_size, ooms, train_time, ignore=is_dummy_batch
+            )
+            self._cumulative_training_time = (
+                total_train_time / self.data_parallel_world_size
+            )
+
+        overflow = False
+        try:
+            with torch.autograd.profiler.record_function("reduce-grads"):
+                # reduce gradients across workers
+                self.optimizer.all_reduce_grads(self.model)
+                if utils.has_parameters(self.criterion):
+                    self.optimizer.all_reduce_grads(self.criterion)
+
+            with torch.autograd.profiler.record_function("multiply-grads"):
+                # multiply gradients by (data_parallel_size / sample_size) since
+                # DDP normalizes by the number of data parallel workers for
+                # improved fp16 precision.
+                # Thus we get (sum_of_gradients / sample_size) at the end.
+                # In case of fp16, this step also undoes loss scaling.
+                # (Debugging note: Some optimizers perform this scaling on the
+                # fly, so inspecting model.parameters() or optimizer.params may
+                # still show the original, unscaled gradients.)
+                numer = (
+                    self.data_parallel_world_size
+                    if not self.cfg.optimization.use_bmuf or self._sync_stats()
+                    else 1
+                )
+                self.optimizer.multiply_grads(numer / (sample_size or 1.0))
+                # Note: (sample_size or 1.0) handles the case of a zero gradient, in a
+                # way that avoids CPU/device transfers in case sample_size is a GPU or
+                # TPU object. The assumption is that the gradient itself is also 0.
+
+            with torch.autograd.profiler.record_function("clip-grads"):
+                # clip grads
+                grad_norm = self.clip_grad_norm(self.cfg.optimization.clip_norm)
+
+            # check that grad norms are consistent across workers
+            # on tpu check tensor is slow
+            if not self.tpu:
+                if (
+                    not self.cfg.optimization.use_bmuf
+                    and self.cfg.distributed_training.ddp_backend != "slow_mo"
+                ):
+                    self._check_grad_norms(grad_norm)
+                if not torch.isfinite(grad_norm).all():
+                    # in case of AMP, if gradients are Nan/Inf then
+                    # optimizer step is still required
+                    if self.cfg.common.amp:
+                        overflow = True
+                    else:
+                        # check local gradnorm single GPU case, trigger NanDetector
+                        raise FloatingPointError("gradients are Nan/Inf")
+
+            with torch.autograd.profiler.record_function("optimizer"):
+                # take an optimization step
+                self.task.optimizer_step(
+                    self.optimizer, model=self.model, update_num=self.get_num_updates()
+                )
+                if self.cfg.common.amp and overflow:
+                    if self._amp_retries == self.cfg.common.amp_batch_retries:
+                        logger.info("AMP: skipping this batch.")
+                        self._amp_retries = 0
+                    else:
+                        self._amp_retries += 1
+                        return self.train_step(samples, raise_oom)  # recursion to feed in same batch
+
+        except FloatingPointError:
+            # re-run the forward and backward pass with hooks attached to print
+            # out where it fails
+            self.zero_grad()
+            with NanDetector(self.get_model()):
+                for _, sample in enumerate(samples):
+                    sample, _ = self._prepare_sample(sample)
+                    self.task.train_step(
+                        sample,
+                        self.model,
+                        self.criterion,
+                        self.optimizer,
+                        self.get_num_updates(),
+                        ignore_grad=False,
+                        **extra_kwargs,
+                    )
+            raise
+        except OverflowError as e:
+            overflow = True
+            logger.info(
+                f"NOTE: gradient overflow detected, ignoring gradient, {str(e)}"
+            )
+            grad_norm = torch.tensor(0.0).cuda()
+            self.zero_grad()
+        except RuntimeError as e:
+            if "out of memory" in str(e):
+                self._log_oom(e)
+                logger.error("OOM during optimization, irrecoverable")
+            raise e
+
+        # Some distributed wrappers (e.g., SlowMo) need access to the optimizer
+        # after the step
+        if hasattr(self.model, "perform_additional_optimizer_actions"):
+            if hasattr(self.optimizer, "fp32_params"):
+                self.model.perform_additional_optimizer_actions(
+                    self.optimizer.optimizer, self.optimizer.fp32_params
+                )
+            else:
+                self.model.perform_additional_optimizer_actions(
+                    self.optimizer.optimizer
+                )
+
+        logging_output = None
+        if not overflow or self.cfg.distributed_training.ddp_backend == "slow_mo":
+            self.set_num_updates(self.get_num_updates() + 1)
+
+            if self.cfg.ema.store_ema:
+                # Step EMA forward with new model.
+                self.ema.step(
+                    self.get_model(),
+                    self.get_num_updates(),
+                )
+                metrics.log_scalar(
+                    "ema_decay",
+                    self.ema.get_decay(),
+                    priority=10000,
+                    round=5,
+                    weight=0,
+                )
+
+            if self.tpu:
+                import torch_xla.core.xla_model as xm
+
+                # mark step on TPUs
+                self._xla_markstep_and_send_to_cpu()
+
+                # only log stats every log_interval steps
+                # this causes wps to be misreported when log_interval > 1
+                logging_output = {}
+                if self.get_num_updates() % self.cfg.common.log_interval == 0:
+                    # log memory usage
+                    mem_info = xm.get_memory_info(self.device)
+                    gb_free = mem_info["kb_free"] / 1024 / 1024
+                    gb_total = mem_info["kb_total"] / 1024 / 1024
+                    metrics.log_scalar(
+                        "gb_free", gb_free, priority=1500, round=1, weight=0
+                    )
+                    metrics.log_scalar(
+                        "gb_total", gb_total, priority=1600, round=1, weight=0
+                    )
+                    logging_outputs = self._xla_markstep_and_send_to_cpu(
+                        logging_outputs
+                    )
+                    logging_output = self._reduce_and_log_stats(
+                        logging_outputs, sample_size, grad_norm
+                    )
+
+                # log whenever there's an XLA compilation, since these
+                # slow down training and may indicate opportunities for
+                # optimization
+                self._check_xla_compilation()
+            else:
+                if self.cuda and self.cuda_env is not None:
+                    # log minimum free memory over the iteration
+                    gb_used = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
+                    torch.cuda.reset_peak_memory_stats()
+                    gb_free = self.cuda_env.total_memory_in_GB - gb_used
+                    metrics.log_scalar(
+                        "gb_free", gb_free, priority=1500, round=1, weight=0
+                    )
+
+                # log stats
+                logging_output = self._reduce_and_log_stats(
+                    logging_outputs, sample_size, grad_norm
+                )
+
+                # clear CUDA cache to reduce memory fragmentation
+                if (
+                    self.cuda
+                    and self.cfg.common.empty_cache_freq > 0
+                    and (
+                        (self.get_num_updates() + self.cfg.common.empty_cache_freq - 1)
+                        % self.cfg.common.empty_cache_freq
+                    )
+                    == 0
+                ):
+                    torch.cuda.empty_cache()
+
+        if self.cfg.common.fp16 or self.cfg.common.amp:
+            metrics.log_scalar(
+                "loss_scale",
+                (
+                    self.optimizer.scaler.loss_scale
+                    if self.cfg.common.fp16
+                    else self.optimizer.scaler.get_scale()
+                ),
+                priority=700,
+                round=4,
+                weight=0,
+            )
+
+        metrics.log_stop_time("train_wall")
+        return logging_output
+
+    @metrics.aggregate("valid")
+    def valid_step(self, sample, raise_oom=False):
+        """Do forward pass in evaluation mode."""
+        if self.tpu:
+            import torch_xla.core.xla_model as xm
+
+            xm.rendezvous("valid_step")  # wait for all workers
+
+        # If EMA is enabled through store_ema=True
+        # and task.uses_ema is True, pass the EMA model as a keyword
+        # argument to the task.
+        extra_kwargs = {}
+        if self.cfg.ema.store_ema and getattr(self.task, "uses_ema", False):
+            extra_kwargs["ema_model"] = self.ema.get_model()
+
+        with torch.no_grad():
+            self.model.eval()
+            self.criterion.eval()
+
+            sample, is_dummy_batch = self._prepare_sample(sample)
+
+            try:
+                _loss, sample_size, logging_output = self.task.valid_step(
+                    sample, self.model, self.criterion, **extra_kwargs
+                )
+            except RuntimeError as e:
+                if "out of memory" in str(e):
+                    self._log_oom(e)
+                    if not raise_oom:
+                        logger.warning(
+                            "ran out of memory in validation step, retrying batch"
+                        )
+                        for p in self.model.parameters():
+                            if p.grad is not None:
+                                p.grad = None  # free some memory
+                        if self.cuda:
+                            torch.cuda.empty_cache()
+                        return self.valid_step(sample, raise_oom=True)
+                raise e
+
+            logging_outputs = [logging_output]
+            if is_dummy_batch:
+                if torch.is_tensor(sample_size):
+                    sample_size.zero_()
+                else:
+                    sample_size *= 0.0
+
+        # gather logging outputs from all replicas
+        if self.data_parallel_world_size > 1:
+            logging_outputs, (sample_size,) = self._aggregate_logging_outputs(
+                logging_outputs,
+                sample_size,
+                ignore=is_dummy_batch,
+            )
+
+        # log validation stats
+        if self.tpu:
+            logging_outputs = self._xla_markstep_and_send_to_cpu(logging_outputs)
+        logging_output = self._reduce_and_log_stats(logging_outputs, sample_size)
+
+        return logging_output
+
+    def zero_grad(self):
+        self.optimizer.zero_grad()
+
+    def lr_step_begin_epoch(self, epoch):
+        """Adjust the learning rate at the beginning of the epoch."""
+        self.lr_scheduler.step_begin_epoch(epoch)
+        # prefer updating the LR based on the number of steps
+        return self.lr_step_update()
+
+    def lr_step(self, epoch, val_loss=None):
+        """Adjust the learning rate at the end of the epoch."""
+        self.lr_scheduler.step(epoch, val_loss)
+        # prefer updating the LR based on the number of steps
+        return self.lr_step_update()
+
+    def lr_step_update(self):
+        """Update the learning rate after each update."""
+        new_lr = self.lr_scheduler.step_update(self.get_num_updates())
+        if isinstance(new_lr, dict):
+            for k, v in new_lr.items():
+                metrics.log_scalar(f"lr_{k}", v, weight=0, priority=300)
+            new_lr = new_lr.get("default", next(iter(new_lr.values())))
+        else:
+            metrics.log_scalar("lr", new_lr, weight=0, priority=300)
+        return new_lr
+
+    def get_lr(self):
+        """Get the current learning rate."""
+        return self.optimizer.get_lr()
+
+    def get_model(self):
+        """Get the (non-wrapped) model instance."""
+        return self._model
+
+    def get_criterion(self):
+        """Get the (non-wrapped) criterion instance."""
+        return self._criterion
+
+    def get_meter(self, name):
+        """[deprecated] Get a specific meter by name."""
+        from fairseq import meters
+
+        if "get_meter" not in self._warn_once:
+            self._warn_once.add("get_meter")
+            utils.deprecation_warning(
+                "Trainer.get_meter is deprecated. Please use fairseq.metrics instead."
+            )
+
+        train_meters = metrics.get_meters("train")
+        if train_meters is None:
+            train_meters = {}
+
+        if name == "train_loss" and "loss" in train_meters:
+            return train_meters["loss"]
+        elif name == "train_nll_loss":
+            # support for legacy train.py, which assumed this meter is
+            # always initialized
+            m = train_meters.get("nll_loss", None)
+            return m or meters.AverageMeter()
+        elif name == "wall":
+            # support for legacy train.py, which assumed this meter is
+            # always initialized
+            m = metrics.get_meter("default", "wall")
+            return m or meters.TimeMeter()
+        elif name == "wps":
+            m = metrics.get_meter("train", "wps")
+            return m or meters.TimeMeter()
+        elif name in {"valid_loss", "valid_nll_loss"}:
+            # support for legacy train.py, which assumed these meters
+            # are always initialized
+            k = name[len("valid_") :]
+            m = metrics.get_meter("valid", k)
+            return m or meters.AverageMeter()
+        elif name == "oom":
+            return meters.AverageMeter()
+        elif name in train_meters:
+            return train_meters[name]
+        return None
+
+    def get_num_updates(self):
+        """Get the number of parameters updates."""
+        return self._num_updates
+
+    def set_num_updates(self, num_updates):
+        """Set the number of parameters updates."""
+        self._num_updates = num_updates
+        self.lr_step_update()
+        if self.quantizer:
+            self.quantizer.step_update(self._num_updates)
+        metrics.log_scalar("num_updates", self._num_updates, weight=0, priority=200)
+
+    def clip_grad_norm(self, clip_norm):
+        def agg_norm_fn(total_norm):
+            total_norm = total_norm.cuda().float() ** 2
+            total_norm = distributed_utils.all_reduce(
+                total_norm, group=self.data_parallel_process_group
+            )
+            return total_norm ** 0.5
+
+        should_agg_norm = (
+            self.is_fsdp
+            and (
+                self.data_parallel_process_group is not None
+                or torch.distributed.is_initialized()
+            )
+        )
+        return self.optimizer.clip_grad_norm(
+            clip_norm, aggregate_norm_fn=agg_norm_fn if should_agg_norm else None
+        )
+
+    def cumulative_training_time(self):
+        if self._cumulative_training_time is None:
+            # single GPU
+            return self._local_cumulative_training_time()
+        else:
+            return self._cumulative_training_time
+
+    def _local_cumulative_training_time(self):
+        """Aggregate training time in seconds."""
+        return time.time() - self._start_time + self._previous_training_time
+
+    def _fp_convert_sample(self, sample):
+        def apply_half(t):
+            if t.dtype is torch.float32:
+                return t.to(dtype=torch.half)
+            return t
+
+        def apply_bfloat16(t):
+            if t.dtype is torch.float32:
+                return t.to(dtype=torch.bfloat16)
+            return t
+
+        if self.cfg.common.fp16:
+            sample = utils.apply_to_sample(apply_half, sample)
+
+        if self.cfg.common.bf16:
+            sample = utils.apply_to_sample(apply_bfloat16, sample)
+
+        return sample
+
+    def _prepare_sample(self, sample, is_dummy=False):
+        if sample == "DUMMY":
+            raise Exception(
+                "Trying to use an uninitialized 'dummy' batch. This usually indicates "
+                "that the total number of batches is smaller than the number of "
+                "participating GPUs. Try reducing the batch size or using fewer GPUs."
+            )
+
+        if sample is None or len(sample) == 0:
+            assert (
+                self._dummy_batch is not None and len(self._dummy_batch) > 0
+            ), "Invalid dummy batch: {}".format(self._dummy_batch)
+            sample, _ = self._prepare_sample(self._dummy_batch, is_dummy=True)
+            return sample, True
+
+        # Given that PCIe/NVLink bandwidth is significantly smaller than DRAM bandwidth
+        # it makes sense to do the format conversion on the CPU and then transfer
+        # a smaller buffer to the device. This also saves GPU memory capacity.
+
+        if self.cfg.common.on_cpu_convert_precision:
+            sample = self._fp_convert_sample(sample)
+
+        if self.cuda:
+            if self.pipeline_model_parallel:
+                if 'target' in sample:
+                    sample['target'] = utils.move_to_cuda(sample['target'], device=self.last_device)
+            else:
+                sample = utils.move_to_cuda(sample)
+        elif self.tpu and is_dummy:
+            # the dummy batch may not be on the appropriate device
+            sample = utils.move_to_cuda(sample, device=self.device)
+
+        if not self.cfg.common.on_cpu_convert_precision:
+            sample = self._fp_convert_sample(sample)
+
+        if self._dummy_batch == "DUMMY":
+            self._dummy_batch = sample
+
+        return sample, False
+
+    def _set_seed(self):
+        # Set seed based on args.seed and the update number so that we get
+        # reproducible results when resuming from checkpoints
+        seed = self.cfg.common.seed + self.get_num_updates()
+        utils.set_torch_seed(seed)
+
+    def _sync_stats(self):
+        # Return True if it's using multiple GPUs and DDP or multiple GPUs with
+        # BMUF and it's a bmuf sync with warmup iterations completed before.
+        if self.data_parallel_world_size == 1:
+            return False
+        elif self.cfg.optimization.use_bmuf:
+            return (
+                self.get_num_updates() + 1
+            ) % self.cfg.bmuf.global_sync_iter == 0 and (
+                self.get_num_updates() + 1
+            ) > self.cfg.bmuf.warmup_iterations
+        else:
+            return True
+
+    def _log_oom(self, exc):
+        msg = "OOM: Ran out of memory with exception: {}".format(exc)
+        logger.warning(msg)
+        if torch.cuda.is_available() and hasattr(torch.cuda, "memory_summary"):
+            for device_idx in range(torch.cuda.device_count()):
+                logger.warning(torch.cuda.memory_summary(device=device_idx))
+        sys.stderr.flush()
+
+    def _aggregate_logging_outputs(
+        self,
+        logging_outputs: List[Dict[str, Any]],
+        *extra_stats_to_sum,
+        ignore=False,
+    ):
+        if self.task.__class__.logging_outputs_can_be_summed(self.get_criterion()):
+            return self._fast_stat_sync_sum(
+                logging_outputs, *extra_stats_to_sum, ignore=ignore
+            )
+        else:
+            return self._all_gather_list_sync(
+                logging_outputs, *extra_stats_to_sum, ignore=ignore
+            )
+
+    def _all_gather_list_sync(
+        self,
+        logging_outputs: List[Dict[str, Any]],
+        *extra_stats_to_sum,
+        ignore=False,
+    ):
+        """
+        Sync logging outputs across workers. all_gather_list_sync is
+        suitable when logging outputs are complex types.
+        """
+        if self.tpu:
+            raise NotImplementedError
+        if ignore:
+            logging_outputs = []
+        results = list(
+            zip(
+                *distributed_utils.all_gather_list(
+                    [logging_outputs] + list(extra_stats_to_sum),
+                    max_size=getattr(self.cfg.common, "all_gather_list_size", 16384),
+                    group=self.data_parallel_process_group,
+                )
+            )
+        )
+        logging_outputs, extra_stats_to_sum = results[0], results[1:]
+        logging_outputs = list(chain.from_iterable(logging_outputs))
+        extra_stats_to_sum = [sum(s) for s in extra_stats_to_sum]
+        return logging_outputs, extra_stats_to_sum
+
+    def _fast_stat_sync_sum(
+        self,
+        logging_outputs: List[Dict[str, Any]],
+        *extra_stats_to_sum,
+        ignore=False,
+    ):
+        """
+        Sync logging outputs across workers. fast_stat_sync_sum is
+        faster than all_gather_list_sync, but is only suitable when
+        logging outputs are scalars and can be summed. Note that
+        *logging_outputs* cannot contain any nested dicts/lists.
+        """
+        data = {}
+        for i, stat in enumerate(extra_stats_to_sum):
+            data["extra_stats_" + str(i)] = stat
+        if len(logging_outputs) > 0:
+            log_keys = list(logging_outputs[0].keys())
+            for k in log_keys:
+                if not ignore:
+                    v = sum(log[k] for log in logging_outputs if k in log)
+                else:
+                    v = logging_outputs[0][k]
+                    v = torch.zeros_like(v) if torch.is_tensor(v) else 0
+                data["logging_outputs_" + k] = v
+        else:
+            log_keys = None
+
+        data = distributed_utils.all_reduce_dict(
+            data, device=self.device, group=self.data_parallel_process_group
+        )
+
+        extra_stats_to_sum = [
+            data["extra_stats_" + str(i)] for i in range(len(extra_stats_to_sum))
+        ]
+        if log_keys is not None:
+            logging_outputs = [{k: data["logging_outputs_" + k] for k in log_keys}]
+        else:
+            logging_outputs = []
+        return logging_outputs, extra_stats_to_sum
+
+    def _check_grad_norms(self, grad_norm):
+        """Check that grad norms are consistent across workers."""
+        if self._grad_norm_buf is not None:
+            self._grad_norm_buf.zero_()
+            self._grad_norm_buf[self.data_parallel_rank] = grad_norm
+            distributed_utils.all_reduce(
+                self._grad_norm_buf, group=self.data_parallel_process_group
+            )
+
+            def is_consistent(tensor):
+                max_abs_diff = torch.max(torch.abs(tensor - tensor[0]))
+                return (
+                    (torch.isfinite(tensor).all()
+                     and (max_abs_diff / (tensor[0] + 1e-6) < 1e-6).all())
+                    or
+                    (self.cfg.common.amp and not torch.isfinite(tensor).all())
+                    # in case of amp non-finite grads are fine
+                )
+
+            if not is_consistent(self._grad_norm_buf):
+                pretty_detail = "\n".join(
+                    "rank {:3d} = {:.8f}".format(r, n)
+                    for r, n in enumerate(self._grad_norm_buf.tolist())
+                )
+                error_detail = "grad_norm across the workers:\n{}\n".format(
+                    pretty_detail
+                )
+                # use FloatingPointError to trigger NanDetector
+                raise FloatingPointError(
+                    "Fatal error: gradients are inconsistent between workers. "
+                    "Try --ddp-backend=legacy_ddp. "
+                    "Or are you mixing up different generation of GPUs in training?"
+                    + "\n"
+                    + "-" * 80
+                    + "\n{}\n".format(error_detail)
+                    + "-" * 80
+                )
+
+    def _reduce_and_log_stats(self, logging_outputs, sample_size, grad_norm=None):
+        if grad_norm is not None and (
+            not torch.is_tensor(grad_norm) or torch.isfinite(grad_norm)
+        ):
+            metrics.log_speed("ups", 1.0, priority=100, round=2)
+            metrics.log_scalar("gnorm", grad_norm, priority=400, round=3)
+            if self.cfg.optimization.clip_norm > 0:
+                metrics.log_scalar(
+                    "clip",
+                    torch.where(
+                        grad_norm > self.cfg.optimization.clip_norm,
+                        grad_norm.new_tensor(100),
+                        grad_norm.new_tensor(0),
+                    ),
+                    priority=500,
+                    round=1,
+                )
+
+        with metrics.aggregate() as agg:
+            if logging_outputs is not None:
+                self.task.reduce_metrics(logging_outputs, self.get_criterion())
+                del logging_outputs
+
+            # extra warning for criterions that don't properly log a loss value
+            if "loss" not in agg:
+                if "loss" not in self._warn_once:
+                    self._warn_once.add("loss")
+                    logger.warning(
+                        "Criterion.reduce_metrics did not log a 'loss' value, "
+                        "which may break some functionality"
+                    )
+                metrics.log_scalar("loss", -1)
+
+            # support legacy interface
+            if self.tpu:
+                logging_output = {}
+            else:
+                logging_output = agg.get_smoothed_values()
+                logging_output["sample_size"] = sample_size
+                for key_to_delete in ["ppl", "wps", "wpb", "bsz"]:
+                    if key_to_delete in logging_output:
+                        del logging_output[key_to_delete]
+            return logging_output
+
+    def _check_xla_compilation(self):
+        import torch_xla.debug.metrics as met
+
+        compile_stats = met.metric_data("CompileTime")
+        if compile_stats is None:
+            return
+        num_xla_compiles = compile_stats[0]
+        if num_xla_compiles > self._num_xla_compiles:
+            logger.warning(
+                "XLA compilation detected on device #{}; too many of these can lead "
+                "to slow training, but we expect a few in the beginning".format(
+                    self.cfg.distributed_training.distributed_rank
+                )
+            )
+        self._num_xla_compiles = num_xla_compiles
+
+    def _xla_markstep_and_send_to_cpu(self, data=None):
+        import torch_xla.core.xla_model as xm
+
+        xm.mark_step()
+        if data is not None:
+            from fairseq.utils import xla_device_to_cpu
+
+            return xla_device_to_cpu(data)
+
+
+def _catalog_shared_params(module, memo=None, prefix=""):
+    if memo is None:
+        first_call = True
+        memo = {}
+    else:
+        first_call = False
+    for name, param in module._parameters.items():
+        param_prefix = prefix + ("." if prefix else "") + name
+        if param not in memo:
+            memo[param] = []
+        memo[param].append(param_prefix)
+    for name, m in module._modules.items():
+        if m is None:
+            continue
+        submodule_prefix = prefix + ("." if prefix else "") + name
+        _catalog_shared_params(m, memo, submodule_prefix)
+    if first_call:
+        return [x for x in memo.values() if len(x) > 1]
+
+
+def _get_module_by_path(module, path):
+    path = path.split(".")
+    for name in path:
+        module = getattr(module, name)
+    return module
+
+
+def _set_module_by_path(module, path, value):
+    path = path.split(".")
+    for name in path[:-1]:
+        module = getattr(module, name)
+    setattr(module, path[-1], value)
diff --git a/fairseq/fairseq/utils.py b/fairseq/fairseq/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f61a8d38d456edf7605c31a87d09413e778658f3
--- /dev/null
+++ b/fairseq/fairseq/utils.py
@@ -0,0 +1,829 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import contextlib
+import copy
+import importlib
+import logging
+import os
+import sys
+import warnings
+from itertools import accumulate
+from typing import Callable, Dict, List, Optional, TYPE_CHECKING
+
+import torch
+import torch.nn.functional as F
+from torch import Tensor
+import collections
+
+if TYPE_CHECKING:
+    from fairseq.modules.multihead_attention import MultiheadAttention
+
+try:
+    from amp_C import multi_tensor_l2norm
+
+    multi_tensor_l2norm_available = True
+except ImportError:
+    multi_tensor_l2norm_available = False
+
+try:
+    import torch_xla.core.xla_model as xm
+except ImportError:
+    xm = None
+
+
+logger = logging.getLogger(__name__)
+
+
+MANIFOLD_PATH_SEP = "|"
+
+
+class FileContentsAction(argparse.Action):
+    def __init__(self, option_strings, dest, nargs=None, **kwargs):
+        if nargs is not None:
+            raise ValueError("nargs not allowed")
+        super(FileContentsAction, self).__init__(option_strings, dest, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        from fairseq.file_io import PathManager
+
+        if PathManager.isfile(values):
+            with PathManager.open(values) as f:
+                argument = f.read().strip()
+        else:
+            argument = values
+        setattr(namespace, self.dest, argument)
+
+
+def split_paths(paths: str, separator=os.pathsep) -> List[str]:
+    return (
+        paths.split(separator) if "://" not in paths else paths.split(MANIFOLD_PATH_SEP)
+    )
+
+
+def load_ensemble_for_inference(filenames, task, model_arg_overrides=None):
+    from fairseq import checkpoint_utils
+
+    deprecation_warning(
+        "utils.load_ensemble_for_inference is deprecated. "
+        "Please use checkpoint_utils.load_model_ensemble instead."
+    )
+    return checkpoint_utils.load_model_ensemble(
+        filenames, arg_overrides=model_arg_overrides, task=task
+    )
+
+
+def apply_to_sample(f, sample):
+    if hasattr(sample, "__len__") and len(sample) == 0:
+        return {}
+
+    def _apply(x):
+        if torch.is_tensor(x):
+            return f(x)
+        elif isinstance(x, collections.OrderedDict):
+            # OrderedDict has attributes that needs to be preserved
+            od = collections.OrderedDict((key, _apply(value)) for key, value in x.items())
+            od.__dict__ = x.__dict__
+            return od
+        elif isinstance(x, dict):
+            return {key: _apply(value) for key, value in x.items()}
+        elif isinstance(x, list):
+            return [_apply(x) for x in x]
+        elif isinstance(x, tuple):
+            return tuple(_apply(x) for x in x)
+        elif isinstance(x, set):
+            return {_apply(x) for x in x}
+        else:
+            return x
+
+    return _apply(sample)
+
+
+def move_to_cuda(sample, device=None):
+    device = device or torch.cuda.current_device()
+
+    def _move_to_cuda(tensor):
+        # non_blocking is ignored if tensor is not pinned, so we can always set
+        # to True (see github.com/PyTorchLightning/pytorch-lightning/issues/620)
+        return tensor.to(device=device, non_blocking=True)
+
+    return apply_to_sample(_move_to_cuda, sample)
+
+
+def move_to_cpu(sample):
+    def _move_to_cpu(tensor):
+        # PyTorch has poor support for half tensors (float16) on CPU.
+        # Move any such tensors to float32.
+        if tensor.dtype in {torch.bfloat16, torch.float16}:
+            tensor = tensor.to(dtype=torch.float32)
+        return tensor.cpu()
+
+    return apply_to_sample(_move_to_cpu, sample)
+
+
+def move_to_tpu(sample):
+
+    import torch_xla.core.xla_model as xm
+
+    device = xm.xla_device()
+
+    def _move_to_tpu(tensor):
+        return tensor.to(device)
+
+    return apply_to_sample(_move_to_tpu, sample)
+
+
+def get_incremental_state(
+    module: "MultiheadAttention",
+    incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
+    key: str,
+) -> Optional[Dict[str, Optional[Tensor]]]:
+    """Helper for getting incremental state for an nn.Module."""
+    return module.get_incremental_state(incremental_state, key)
+
+
+def set_incremental_state(
+    module: "MultiheadAttention",
+    incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
+    key: str,
+    value: Dict[str, Optional[Tensor]],
+) -> Optional[Dict[str, Dict[str, Optional[Tensor]]]]:
+    """Helper for setting incremental state for an nn.Module."""
+    if incremental_state is not None:
+        result = module.set_incremental_state(incremental_state, key, value)
+        if result is not None:
+            incremental_state = result
+    return incremental_state
+
+
+def load_align_dict(replace_unk):
+    if replace_unk is None:
+        align_dict = None
+    elif isinstance(replace_unk, str) and len(replace_unk) > 0:
+        # Load alignment dictionary for unknown word replacement if it was passed as an argument.
+        align_dict = {}
+        with open(replace_unk, "r") as f:
+            for line in f:
+                cols = line.split()
+                align_dict[cols[0]] = cols[1]
+    else:
+        # No alignment dictionary provided but we still want to perform unknown word replacement by copying the
+        # original source word.
+        align_dict = {}
+    return align_dict
+
+
+def print_embed_overlap(embed_dict, vocab_dict):
+    embed_keys = set(embed_dict.keys())
+    vocab_keys = set(vocab_dict.symbols)
+    overlap = len(embed_keys & vocab_keys)
+    logger.info("found {}/{} types in embedding file".format(overlap, len(vocab_dict)))
+
+
+def parse_embedding(embed_path):
+    """Parse embedding text file into a dictionary of word and embedding tensors.
+
+    The first line can have vocabulary size and dimension. The following lines
+    should contain word and embedding separated by spaces.
+
+    Example:
+        2 5
+        the -0.0230 -0.0264  0.0287  0.0171  0.1403
+        at -0.0395 -0.1286  0.0275  0.0254 -0.0932
+    """
+    embed_dict = {}
+    with open(embed_path) as f_embed:
+        next(f_embed)  # skip header
+        for line in f_embed:
+            pieces = line.rstrip().split(" ")
+            embed_dict[pieces[0]] = torch.Tensor(
+                [float(weight) for weight in pieces[1:]]
+            )
+    return embed_dict
+
+
+def load_embedding(embed_dict, vocab, embedding):
+    for idx in range(len(vocab)):
+        token = vocab[idx]
+        if token in embed_dict:
+            embedding.weight.data[idx] = embed_dict[token]
+    return embedding
+
+
+def replace_unk(hypo_str, src_str, alignment, align_dict, unk):
+    from fairseq import tokenizer
+
+    # Tokens are strings here
+    hypo_tokens = tokenizer.tokenize_line(hypo_str)
+    # TODO: Very rare cases where the replacement is '<eos>' should be handled gracefully
+    src_tokens = tokenizer.tokenize_line(src_str) + ["<eos>"]
+    for i, ht in enumerate(hypo_tokens):
+        if ht == unk:
+            src_token = src_tokens[alignment[i]]
+            # Either take the corresponding value in the aligned dictionary or just copy the original value.
+            hypo_tokens[i] = align_dict.get(src_token, src_token)
+    return " ".join(hypo_tokens)
+
+
+def post_process_prediction(
+    hypo_tokens,
+    src_str,
+    alignment,
+    align_dict,
+    tgt_dict,
+    remove_bpe=None,
+    extra_symbols_to_ignore=None,
+):
+    hypo_str = tgt_dict.string(
+        hypo_tokens, remove_bpe, extra_symbols_to_ignore=extra_symbols_to_ignore
+    )
+    if align_dict is not None:
+        hypo_str = replace_unk(
+            hypo_str, src_str, alignment, align_dict, tgt_dict.unk_string()
+        )
+    if align_dict is not None or remove_bpe is not None:
+        # Convert back to tokens for evaluating with unk replacement or without BPE
+        # Note that the dictionary can be modified inside the method.
+        hypo_tokens = tgt_dict.encode_line(hypo_str, add_if_not_exist=True)
+    return hypo_tokens, hypo_str, alignment
+
+
+def make_positions(tensor, padding_idx: int, onnx_trace: bool = False):
+    """Replace non-padding symbols with their position numbers.
+
+    Position numbers begin at padding_idx+1. Padding symbols are ignored.
+    """
+    # The series of casts and type-conversions here are carefully
+    # balanced to both work with ONNX export and XLA. In particular XLA
+    # prefers ints, cumsum defaults to output longs, and ONNX doesn't know
+    # how to handle the dtype kwarg in cumsum.
+    mask = tensor.ne(padding_idx).int()
+    return (torch.cumsum(mask, dim=1).type_as(mask) * mask).long() + padding_idx
+
+
+def strip_pad(tensor, pad):
+    return tensor[tensor.ne(pad)]
+
+
+def buffered_arange(max):
+    if not hasattr(buffered_arange, "buf"):
+        buffered_arange.buf = torch.LongTensor()
+    if max > buffered_arange.buf.numel():
+        buffered_arange.buf.resize_(max)
+        torch.arange(max, out=buffered_arange.buf)
+    return buffered_arange.buf[:max]
+
+
+def convert_padding_direction(
+    src_tokens, padding_idx, right_to_left: bool = False, left_to_right: bool = False
+):
+    assert right_to_left ^ left_to_right
+    pad_mask = src_tokens.eq(padding_idx)
+    if not pad_mask.any():
+        # no padding, return early
+        return src_tokens
+    if left_to_right and not pad_mask[:, 0].any():
+        # already right padded
+        return src_tokens
+    if right_to_left and not pad_mask[:, -1].any():
+        # already left padded
+        return src_tokens
+    max_len = src_tokens.size(1)
+    buffered = torch.empty(0).long()
+    if max_len > 0:
+        torch.arange(max_len, out=buffered)
+    range = buffered.type_as(src_tokens).expand_as(src_tokens)
+    num_pads = pad_mask.long().sum(dim=1, keepdim=True)
+    if right_to_left:
+        index = torch.remainder(range - num_pads, max_len)
+    else:
+        index = torch.remainder(range + num_pads, max_len)
+    return src_tokens.gather(1, index)
+
+
+def item(tensor):
+    # tpu-comment: making this a no-op for xla devices.
+    if torch.is_tensor(tensor) and tensor.device.type == "xla":
+        return tensor.detach()
+    if hasattr(tensor, "item"):
+        return tensor.item()
+    if hasattr(tensor, "__getitem__"):
+        return tensor[0]
+    return tensor
+
+
+def multi_tensor_total_norm(grads, chunk_size=2048 * 32) -> torch.Tensor:
+    per_device_grads = {}
+    norms = []
+    for grad in grads:
+        device = grad.device
+        cur_device_grads = per_device_grads.get(device)
+        if cur_device_grads is None:
+            cur_device_grads = []
+            per_device_grads[device] = cur_device_grads
+        cur_device_grads.append(grad)
+    for device in per_device_grads.keys():
+        cur_device_grads = per_device_grads[device]
+        if device.type == "cuda":
+            # TODO(msb) return has_inf
+            has_inf = torch.zeros((1, 1), dtype=torch.int, device=device)
+            with torch.cuda.device(device):
+                norm = multi_tensor_l2norm(
+                    chunk_size, has_inf, [cur_device_grads], False
+                )
+            norms.append(norm[0].to(torch.cuda.current_device()))
+        else:
+            norms += [torch.norm(g, p=2, dtype=torch.float32) for g in cur_device_grads]
+    total_norm = torch.norm(torch.stack(norms))
+    return total_norm
+
+
+@torch.no_grad()
+def clip_grad_norm_(params, max_norm, aggregate_norm_fn=None) -> torch.Tensor:
+    def grad_exists(p):
+        return p is not None and getattr(p, "grad", None) is not None
+
+    if isinstance(params, torch.Tensor):
+        params = [params]
+    params = list(params)
+    grads = [
+        p.grad.detach() for p in params if grad_exists(p) and not hasattr(p, "expert")
+    ]
+    expert_grads = [
+        p.grad.detach() for p in params if grad_exists(p) and hasattr(p, "expert")
+    ]
+
+    if len(grads) == 0:
+        if len(params) > 0:
+            return params[0].new_tensor(0.0)
+        else:
+            return torch.tensor(0.0)
+
+    if len(grads) == 1:
+        total_norm = torch.norm(grads[0], p=2, dtype=torch.float32)
+    else:
+        if multi_tensor_l2norm_available:
+            total_norm = multi_tensor_total_norm(grads)
+        else:
+            if torch.cuda.is_available():
+                warnings.warn(
+                    "amp_C fused kernels unavailable, disabling multi_tensor_l2norm; "
+                    "you may get better performance by installing NVIDIA's apex library"
+                )
+                device = torch.cuda.current_device()
+            elif grads[0].device.type == "xla":
+                device = grads[0].device
+            else:
+                device = torch.device("cpu")
+            total_norm = torch.norm(
+                torch.stack(
+                    [torch.norm(g, p=2, dtype=torch.float32).to(device) for g in grads]
+                )
+            )
+
+    if aggregate_norm_fn is not None:
+        total_norm = aggregate_norm_fn(total_norm)
+
+    if max_norm > 0:
+        max_norm = float(max_norm)
+        clip_coef = (max_norm / (total_norm + 1e-6)).clamp_(max=1)
+        for g in grads + expert_grads:
+            g.mul_(clip_coef)
+    return total_norm
+
+
+def fill_with_neg_inf(t):
+    """FP16-compatible function that fills a tensor with -inf."""
+    return t.float().fill_(float("-inf")).type_as(t)
+
+
+def _match_types(arg1, arg2):
+    """Convert the numerical argument to the same type as the other argument"""
+
+    def upgrade(arg_number, arg_structure):
+        if isinstance(arg_structure, tuple):
+            return tuple([arg_number] * len(arg_structure))
+        elif isinstance(arg_structure, dict):
+            arg = copy.deepcopy(arg_structure)
+            for k in arg:
+                arg[k] = upgrade(arg_number, arg_structure[k])
+            return arg
+        else:
+            return arg_number
+
+    if isinstance(arg1, float) or isinstance(arg1, int):
+        return upgrade(arg1, arg2), arg2
+    elif isinstance(arg2, float) or isinstance(arg2, int):
+        return arg1, upgrade(arg2, arg1)
+
+    return arg1, arg2
+
+
+def resolve_max_positions(*args):
+    """Resolve max position constraints from multiple sources."""
+
+    def map_value_update(d1, d2):
+        updated_value = copy.deepcopy(d1)
+        for key in d2:
+            if key not in updated_value:
+                updated_value[key] = d2[key]
+            else:
+                updated_value[key] = min(d1[key], d2[key])
+        return updated_value
+
+    def nullsafe_min(l):
+        minim = None
+        for item in l:
+            if minim is None:
+                minim = item
+            elif item is not None and item < minim:
+                minim = item
+        return minim
+
+    max_positions = None
+    for arg in args:
+        if max_positions is None:
+            max_positions = arg
+        elif arg is not None:
+            max_positions, arg = _match_types(max_positions, arg)
+            if isinstance(arg, float) or isinstance(arg, int):
+                max_positions = min(max_positions, arg)
+            elif isinstance(arg, dict):
+                max_positions = map_value_update(max_positions, arg)
+            else:
+                max_positions = tuple(map(nullsafe_min, zip(max_positions, arg)))
+
+    return max_positions
+
+
+def import_user_module(args):
+    module_path = getattr(args, "user_dir", None)
+    if module_path is not None:
+        module_path = os.path.abspath(args.user_dir)
+        if not os.path.exists(module_path) and not os.path.isfile(
+            os.path.dirname(module_path)
+        ):
+            fairseq_rel_path = os.path.join(os.path.dirname(__file__), args.user_dir)
+            if os.path.exists(fairseq_rel_path):
+                module_path = fairseq_rel_path
+            else:
+                fairseq_rel_path = os.path.join(
+                    os.path.dirname(__file__), "..", args.user_dir
+                )
+                if os.path.exists(fairseq_rel_path):
+                    module_path = fairseq_rel_path
+                else:
+                    raise FileNotFoundError(module_path)
+
+        # ensure that user modules are only imported once
+        import_user_module.memo = getattr(import_user_module, "memo", set())
+        if module_path not in import_user_module.memo:
+            import_user_module.memo.add(module_path)
+
+            module_parent, module_name = os.path.split(module_path)
+            if module_name not in sys.modules:
+                sys.path.insert(0, module_parent)
+                importlib.import_module(module_name)
+
+                tasks_path = os.path.join(module_path, "tasks")
+                if os.path.exists(tasks_path):
+                    from fairseq.tasks import import_tasks
+
+                    import_tasks(tasks_path, f"{module_name}.tasks")
+
+                models_path = os.path.join(module_path, "models")
+                if os.path.exists(models_path):
+                    from fairseq.models import import_models
+
+                    import_models(models_path, f"{module_name}.models")
+            else:
+                raise ImportError(
+                    "Failed to import --user-dir={} because the corresponding module name "
+                    "({}) is not globally unique. Please rename the directory to "
+                    "something unique and try again.".format(module_path, module_name)
+                )
+
+
+def softmax(x, dim: int, onnx_trace: bool = False):
+    if onnx_trace:
+        return F.softmax(x.float(), dim=dim)
+    else:
+        return F.softmax(x, dim=dim, dtype=torch.float32)
+
+
+def log_softmax(x, dim: int, onnx_trace: bool = False):
+    if onnx_trace:
+        return F.log_softmax(x.float(), dim=dim)
+    else:
+        return F.log_softmax(x, dim=dim, dtype=torch.float32)
+
+
+def get_perplexity(loss, round=2, base=2):
+    from fairseq.logging.meters import safe_round
+
+    if loss is None:
+        return 0.0
+    try:
+        return safe_round(base ** loss, round)
+    except OverflowError:
+        return float("inf")
+
+
+def deprecation_warning(message, stacklevel=3):
+    # don't use DeprecationWarning, since it's ignored by default
+    warnings.warn(message, stacklevel=stacklevel)
+
+
+def get_activation_fn(activation: str) -> Callable:
+    """Returns the activation function corresponding to `activation`"""
+    from fairseq.modules import gelu, gelu_accurate
+
+    if activation == "relu":
+        return F.relu
+    elif activation == "gelu":
+        return gelu
+    elif activation == "gelu_fast":
+        deprecation_warning(
+            "--activation-fn=gelu_fast has been renamed to gelu_accurate"
+        )
+        return gelu_accurate
+    elif activation == "gelu_accurate":
+        return gelu_accurate
+    elif activation == "tanh":
+        return torch.tanh
+    elif activation == "linear":
+        return lambda x: x
+    else:
+        raise RuntimeError("--activation-fn {} not supported".format(activation))
+
+
+def get_available_activation_fns() -> List:
+    return [
+        "relu",
+        "gelu",
+        "gelu_fast",  # deprecated
+        "gelu_accurate",
+        "tanh",
+        "linear",
+    ]
+
+
+@contextlib.contextmanager
+def model_eval(model):
+    is_training = model.training
+    model.eval()
+    yield
+    model.train(is_training)
+
+
+def has_parameters(module):
+    try:
+        next(module.parameters())
+        return True
+    except StopIteration:
+        return False
+
+
+def get_rng_state():
+    state = {"torch_rng_state": torch.get_rng_state()}
+    if xm is not None:
+        state["xla_rng_state"] = xm.get_rng_state()
+    if torch.cuda.is_available():
+        state["cuda_rng_state"] = torch.cuda.get_rng_state()
+    return state
+
+
+def set_rng_state(state):
+    torch.set_rng_state(state["torch_rng_state"])
+    if xm is not None:
+        xm.set_rng_state(state["xla_rng_state"])
+    if torch.cuda.is_available():
+        torch.cuda.set_rng_state(state["cuda_rng_state"])
+
+
+class set_torch_seed(object):
+    def __init__(self, seed):
+        assert isinstance(seed, int)
+        self.rng_state = get_rng_state()
+
+        torch.manual_seed(seed)
+        if xm is not None:
+            xm.set_rng_state(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc):
+        set_rng_state(self.rng_state)
+
+
+def parse_alignment(line):
+    """
+    Parses a single line from the alingment file.
+
+    Args:
+        line (str): String containing the alignment of the format:
+            <src_idx_1>-<tgt_idx_1> <src_idx_2>-<tgt_idx_2> ..
+            <src_idx_m>-<tgt_idx_m>. All indices are 0 indexed.
+
+    Returns:
+        torch.IntTensor: packed alignments of shape (2 * m).
+    """
+    alignments = line.strip().split()
+    parsed_alignment = torch.IntTensor(2 * len(alignments))
+    for idx, alignment in enumerate(alignments):
+        src_idx, tgt_idx = alignment.split("-")
+        parsed_alignment[2 * idx] = int(src_idx)
+        parsed_alignment[2 * idx + 1] = int(tgt_idx)
+    return parsed_alignment
+
+
+def get_token_to_word_mapping(tokens, exclude_list):
+    n = len(tokens)
+    word_start = [int(token not in exclude_list) for token in tokens]
+    word_idx = list(accumulate(word_start))
+    token_to_word = {i: word_idx[i] for i in range(n)}
+    return token_to_word
+
+
+def extract_hard_alignment(attn, src_sent, tgt_sent, pad, eos):
+    tgt_valid = (
+        ((tgt_sent != pad) & (tgt_sent != eos)).nonzero(as_tuple=False).squeeze(dim=-1)
+    )
+    src_invalid = (
+        ((src_sent == pad) | (src_sent == eos)).nonzero(as_tuple=False).squeeze(dim=-1)
+    )
+    src_token_to_word = get_token_to_word_mapping(src_sent, [eos, pad])
+    tgt_token_to_word = get_token_to_word_mapping(tgt_sent, [eos, pad])
+    alignment = []
+    if len(tgt_valid) != 0 and len(src_invalid) < len(src_sent):
+        attn_valid = attn[tgt_valid]
+        attn_valid[:, src_invalid] = float("-inf")
+        _, src_indices = attn_valid.max(dim=1)
+        for tgt_idx, src_idx in zip(tgt_valid, src_indices):
+            alignment.append(
+                (
+                    src_token_to_word[src_idx.item()] - 1,
+                    tgt_token_to_word[tgt_idx.item()] - 1,
+                )
+            )
+    return alignment
+
+
+def extract_soft_alignment(attn, src_sent, tgt_sent, pad, eos):
+    tgt_valid = ((tgt_sent != pad)).nonzero(as_tuple=False)
+    src_valid = ((src_sent != pad)).nonzero(as_tuple=False).squeeze(dim=-1)
+    alignment = []
+    if len(tgt_valid) != 0 and len(src_valid) != 0:
+        attn_valid = attn[tgt_valid, src_valid]
+        alignment = [
+            ["{:.6f}".format(p) for p in src_probs.tolist()] for src_probs in attn_valid
+        ]
+    return alignment
+
+
+def new_arange(x, *size):
+    """
+    Return a Tensor of `size` filled with a range function on the device of x.
+    If size is empty, using the size of the variable x.
+    """
+    if len(size) == 0:
+        size = x.size()
+    return torch.arange(size[-1], device=x.device).expand(*size).contiguous()
+
+
+def get_tpu_device():
+    return xm.xla_device()
+
+
+def tpu_data_loader(itr):
+    import torch_xla.core.xla_model as xm
+    import torch_xla.distributed.parallel_loader as pl
+    from fairseq.data import iterators
+
+    xm.rendezvous("tpu_data_loader")  # wait for all workers
+    xm.mark_step()
+    device = xm.xla_device()
+    return iterators.CountingIterator(
+        pl.ParallelLoader(itr, [device]).per_device_loader(device),
+        start=getattr(itr, "n", 0),
+        total=len(itr),
+    )
+
+
+def is_xla_tensor(tensor):
+    return torch.is_tensor(tensor) and tensor.device.type == "xla"
+
+
+def index_put(tensor, indices, value):
+    if is_xla_tensor(tensor):
+        for _ in range(indices.dim(), tensor.dim()):
+            indices = indices.unsqueeze(-1)
+        if indices.size(-1) < tensor.size(-1):
+            indices = indices.expand_as(tensor)
+        tensor = torch.mul(tensor, ~indices) + torch.mul(value, indices)
+    else:
+        tensor[indices] = value
+    return tensor
+
+
+def xla_device_to_cpu(dat):
+    import torch_xla.core.xla_model as xm
+
+    return xm._maybe_convert_to_cpu(dat)
+
+
+class CudaEnvironment(object):
+    def __init__(self):
+        cur_device = torch.cuda.current_device()
+        prop = torch.cuda.get_device_properties("cuda:{}".format(cur_device))
+        self.name = prop.name
+        self.major = prop.major
+        self.minor = prop.minor
+        self.total_memory_in_GB = prop.total_memory / 1024 / 1024 / 1024
+
+    @staticmethod
+    def pretty_print_cuda_env_list(cuda_env_list):
+        """
+        Given a list of CudaEnviorments, pretty print them
+        """
+        num_workers = len(cuda_env_list)
+        center = "CUDA enviroments for all {} workers".format(num_workers)
+        banner_len = 40 - len(center) // 2
+        first_line = "*" * banner_len + center + "*" * banner_len
+        logger.info(first_line)
+        for r, env in enumerate(cuda_env_list):
+            logger.info(
+                "rank {:3d}: ".format(r)
+                + "capabilities = {:2d}.{:<2d} ; ".format(env.major, env.minor)
+                + "total memory = {:.3f} GB ; ".format(env.total_memory_in_GB)
+                + "name = {:40s}".format(env.name)
+            )
+        logger.info(first_line)
+
+
+def csv_str_list(x):
+    return x.split(",")
+
+
+def eval_str_list(x, type=float):
+    if x is None:
+        return None
+    if isinstance(x, str):
+        x = eval(x)
+    try:
+        return list(map(type, x))
+    except TypeError:
+        return [type(x)]
+
+
+def eval_str_dict(x, type=dict):
+    if x is None:
+        return None
+    if isinstance(x, str):
+        x = eval(x)
+    return x
+
+
+def eval_bool(x, default=False):
+    if x is None:
+        return default
+    try:
+        return bool(eval(x))
+    except TypeError:
+        return default
+
+
+def reset_logging():
+    root = logging.getLogger()
+    for handler in root.handlers:
+        root.removeHandler(handler)
+    root.setLevel(os.environ.get("LOGLEVEL", "INFO").upper())
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(
+        logging.Formatter(
+            fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+        )
+    )
+    root.addHandler(handler)
+
+
+def safe_getattr(obj, k, default=None):
+    """Returns obj[k] if it exists and is not None, otherwise returns default."""
+    from omegaconf import OmegaConf
+
+    if OmegaConf.is_config(obj):
+        return obj[k] if k in obj and obj[k] is not None else default
+
+    return getattr(obj, k, default)
+
+
+def safe_hasattr(obj, k):
+    """Returns True if the given key exists and is not None."""
+    return getattr(obj, k, None) is not None
diff --git a/fairseq/fairseq/version.txt b/fairseq/fairseq/version.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41432f00d9ce57fadd55cc7dd27b391ddf5ca0b9
--- /dev/null
+++ b/fairseq/fairseq/version.txt
@@ -0,0 +1 @@
+1.0.0a0
diff --git a/fairseq/fairseq_cli/__init__.py b/fairseq/fairseq_cli/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/fairseq_cli/eval_lm.py b/fairseq/fairseq_cli/eval_lm.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab6e77029ef738291efd190b1cfe2435dd403dea
--- /dev/null
+++ b/fairseq/fairseq_cli/eval_lm.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Evaluate the perplexity of a trained language model.
+"""
+
+import logging
+import math
+import os
+import sys
+from argparse import Namespace
+from typing import Iterable, List, Optional
+
+import torch
+import fairseq
+from fairseq import checkpoint_utils, distributed_utils, options, tasks, utils
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.logging import progress_bar
+from fairseq.logging.meters import StopwatchMeter
+from fairseq.sequence_scorer import SequenceScorer
+from omegaconf import DictConfig
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.eval_lm")
+
+
+def eval_lm(
+    models: List[fairseq.models.FairseqModel],
+    source_dictionary: fairseq.data.Dictionary,
+    batch_iterator: Iterable,
+    post_process: Optional[str] = None,
+    output_word_probs: bool = False,
+    output_word_stats: bool = False,
+    target_dictionary: Optional[fairseq.data.Dictionary] = None,
+    softmax_batch: int = 0,
+    remove_bos_token: bool = False,
+    device: Optional[torch.device] = None,
+):
+    """
+    Args:
+        models (List[~fairseq.models.FairseqModel]): list of models to
+            evaluate. Models are essentially `nn.Module` instances, but
+            must be compatible with fairseq's `SequenceScorer`.
+        source_dictionary (~fairseq.data.Dictionary): dictionary for
+            applying any relevant post processing or outputing word
+            probs/stats.
+        batch_iterator (Iterable): yield batches of data
+        post_process (Optional[str]): post-process text by removing BPE,
+            letter segmentation, etc. Valid options can be found in
+            fairseq.data.utils.post_process, although not all options
+            are implemented here.
+        output_word_probs (Optional[bool]): output words and their
+            predicted log probabilities
+        output_word_stats (Optional[bool]): output word statistics such
+            as word count and average probability
+        target_dictionary (Optional[~fairseq.data.Dictionary]): output
+            dictionary (defaults to *source_dictionary*)
+        softmax_batch (Optional[bool]): if BxT is more than this, will
+            batch the softmax over vocab to this amount of tokens, in
+            order to fit into GPU memory
+        remove_bos_token (Optional[bool]): if True, confirm that the
+            first token is the beginning-of-sentence symbol (according
+            to the relevant dictionary) and remove it from the output
+        device (Optional[torch.device]): device to use for evaluation
+            (defaults to device of first model parameter)
+    """
+    if target_dictionary is None:
+        target_dictionary = source_dictionary
+    if device is None:
+        device = next(models[0].parameters()).device
+
+    gen_timer = StopwatchMeter()
+    scorer = SequenceScorer(target_dictionary, softmax_batch)
+
+    score_sum = 0.0
+    count = 0
+
+    if post_process is not None:
+        if post_process in {"subword_nmt", "@@ "}:
+            bpe_cont = post_process.rstrip()
+            bpe_toks = {
+                i
+                for i in range(len(source_dictionary))
+                if source_dictionary[i].endswith(bpe_cont)
+            }
+        else:
+            raise NotImplementedError(
+                "--post-process={post_process} is not implemented"
+            )
+        bpe_len = len(bpe_cont)
+    else:
+        bpe_toks = None
+        bpe_len = 0
+
+    word_stats = dict()
+
+    for sample in batch_iterator:
+        if "net_input" not in sample:
+            continue
+
+        sample = utils.move_to_cuda(sample, device=device)
+
+        gen_timer.start()
+        hypos = scorer.generate(models, sample)
+        gen_timer.stop(sample["ntokens"])
+
+        for i, hypos_i in enumerate(hypos):
+            hypo = hypos_i[0]
+            sample_id = sample["id"][i]
+
+            tokens = hypo["tokens"]
+            tgt_len = tokens.numel()
+            pos_scores = hypo["positional_scores"].float()
+
+            if remove_bos_token:
+                assert hypo["tokens"][0].item() == target_dictionary.bos()
+                tokens = tokens[1:]
+                pos_scores = pos_scores[1:]
+
+            skipped_toks = 0
+            if bpe_toks is not None:
+                for i in range(tgt_len - 1):
+                    if tokens[i].item() in bpe_toks:
+                        skipped_toks += 1
+                        pos_scores[i + 1] += pos_scores[i]
+                        pos_scores[i] = 0
+
+            inf_scores = pos_scores.eq(float("inf")) | pos_scores.eq(float("-inf"))
+            if inf_scores.any():
+                logger.info(
+                    "skipping tokens with inf scores:",
+                    target_dictionary.string(tokens[inf_scores.nonzero()]),
+                )
+                pos_scores = pos_scores[(~inf_scores).nonzero()]
+            score_sum += pos_scores.sum().cpu()
+            count += pos_scores.numel() - skipped_toks
+
+            if output_word_probs or output_word_stats:
+                w = ""
+                word_prob = []
+                is_bpe = False
+                for i in range(len(tokens)):
+                    w_ind = tokens[i].item()
+                    w += source_dictionary[w_ind]
+                    if bpe_toks is not None and w_ind in bpe_toks:
+                        w = w[:-bpe_len]
+                        is_bpe = True
+                    else:
+                        word_prob.append((w, pos_scores[i].item()))
+
+                        next_prob = None
+                        ind = i + 1
+                        while ind < len(tokens):
+                            if pos_scores[ind].item() != 0:
+                                next_prob = pos_scores[ind]
+                                break
+                            ind += 1
+
+                        word_stats.setdefault(w, WordStat(w, is_bpe)).add(
+                            pos_scores[i].item(), next_prob
+                        )
+                        is_bpe = False
+                        w = ""
+                if output_word_probs:
+                    logger.info(
+                        str(int(sample_id))
+                        + " "
+                        + (
+                            "\t".join(
+                                "{} [{:2f}]".format(x[0], x[1]) for x in word_prob
+                            )
+                        )
+                    )
+
+    avg_nll_loss = (
+        -score_sum / count / math.log(2) if count > 0 else 0
+    )  # convert to base 2
+    logger.info(
+        "Evaluated {:,} tokens in {:.1f}s ({:.2f} tokens/s)".format(
+            gen_timer.n, gen_timer.sum, 1.0 / gen_timer.avg if gen_timer.avg > 0 else 0
+        )
+    )
+
+    if output_word_stats:
+        for ws in sorted(word_stats.values(), key=lambda x: x.count, reverse=True):
+            logger.info(ws)
+
+    return {
+        "loss": avg_nll_loss,
+        "perplexity": 2 ** avg_nll_loss,
+    }
+
+
+class WordStat(object):
+    def __init__(self, word, is_bpe):
+        self.word = word
+        self.is_bpe = is_bpe
+        self.log_prob = 0
+        self.next_word_prob = 0
+        self.count = 0
+        self.missing_next_words = 0
+
+    def add(self, log_prob, next_word_prob):
+        """increments counters for the sum of log probs of current word and next
+        word (given context ending at current word). Since the next word might be at the end of the example,
+        or it might be not counted because it is not an ending subword unit,
+        also keeps track of how many of those we have seen"""
+        if next_word_prob is not None:
+            self.next_word_prob += next_word_prob
+        else:
+            self.missing_next_words += 1
+        self.log_prob += log_prob
+        self.count += 1
+
+    def __str__(self):
+        return "{}\t{}\t{}\t{}\t{}\t{}".format(
+            self.word,
+            self.count,
+            self.log_prob,
+            self.is_bpe,
+            self.next_word_prob,
+            self.count - self.missing_next_words,
+        )
+
+
+def main(cfg: DictConfig, **unused_kwargs):
+    if isinstance(cfg, Namespace):
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    utils.import_user_module(cfg.common)
+
+    logger.info(cfg)
+
+    if cfg.eval_lm.context_window > 0:
+        # reduce tokens per sample by the required context window size
+        cfg.task.tokens_per_sample -= cfg.eval_lm.context_window
+
+    # Initialize the task using the current *cfg*
+    task = tasks.setup_task(cfg.task)
+
+    # Load ensemble
+    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
+    models, model_args, task = checkpoint_utils.load_model_ensemble_and_task(
+        [cfg.common_eval.path],
+        arg_overrides=eval(cfg.common_eval.model_overrides),
+        suffix=cfg.checkpoint.checkpoint_suffix,
+        strict=(cfg.checkpoint.checkpoint_shard_count == 1),
+        num_shards=cfg.checkpoint.checkpoint_shard_count,
+        task=task,
+    )
+
+    use_fp16 = cfg.common.fp16
+    use_cuda = torch.cuda.is_available() and not cfg.common.cpu
+    if use_cuda:
+        torch.cuda.set_device(cfg.distributed_training.device_id)
+
+    # Optimize ensemble for generation and set the source and dest dicts on the model
+    # (required by scorer)
+    for model in models:
+        if use_fp16:
+            model.half()
+        if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
+            model.cuda()
+        model.prepare_for_inference_(cfg)
+
+    assert len(models) > 0
+
+    logger.info(
+        "num. model params: {:,}".format(sum(p.numel() for p in models[0].parameters()))
+    )
+
+    # Load dataset splits
+    task.load_dataset(cfg.dataset.gen_subset)
+    dataset = task.dataset(cfg.dataset.gen_subset)
+    logger.info(
+        "{} {} {:,} examples".format(
+            cfg.task.data, cfg.dataset.gen_subset, len(dataset)
+        )
+    )
+
+    itr = task.eval_lm_dataloader(
+        dataset=dataset,
+        max_tokens=cfg.dataset.max_tokens or 36000,
+        batch_size=cfg.dataset.batch_size,
+        max_positions=utils.resolve_max_positions(
+            *[model.max_positions() for model in models]
+        ),
+        num_shards=max(
+            cfg.dataset.num_shards,
+            cfg.distributed_training.distributed_world_size,
+        ),
+        shard_id=max(
+            cfg.dataset.shard_id,
+            cfg.distributed_training.distributed_rank,
+        ),
+        num_workers=cfg.dataset.num_workers,
+        data_buffer_size=cfg.dataset.data_buffer_size,
+        context_window=cfg.eval_lm.context_window,
+    )
+
+    itr = progress_bar.progress_bar(
+        itr,
+        log_format=cfg.common.log_format,
+        log_interval=cfg.common.log_interval,
+        default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+    )
+
+    results = eval_lm(
+        models=models,
+        source_dictionary=task.source_dictionary,
+        batch_iterator=itr,
+        post_process=cfg.common_eval.post_process,
+        output_word_probs=cfg.eval_lm.output_word_probs,
+        output_word_stats=cfg.eval_lm.output_word_stats,
+        target_dictionary=task.target_dictionary,
+        softmax_batch=cfg.eval_lm.softmax_batch,
+        remove_bos_token=getattr(cfg.task, "add_bos_token", False),
+    )
+
+    logger.info(
+        "Loss (base 2): {:.4f}, Perplexity: {:.2f}".format(
+            results["loss"], results["perplexity"]
+        )
+    )
+
+    return results
+
+
+def cli_main():
+    parser = options.get_eval_lm_parser()
+    args = options.parse_args_and_arch(parser)
+
+    distributed_utils.call_main(convert_namespace_to_omegaconf(args), main)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/generate.py b/fairseq/fairseq_cli/generate.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e887e88649fef784b366abe518babd25a30feee
--- /dev/null
+++ b/fairseq/fairseq_cli/generate.py
@@ -0,0 +1,414 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Translate pre-processed data with a trained model.
+"""
+
+import ast
+import logging
+import math
+import os
+import sys
+from argparse import Namespace
+from itertools import chain
+
+import numpy as np
+import torch
+from fairseq import checkpoint_utils, options, scoring, tasks, utils
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.logging import progress_bar
+from fairseq.logging.meters import StopwatchMeter, TimeMeter
+from omegaconf import DictConfig
+
+
+def main(cfg: DictConfig):
+
+    if isinstance(cfg, Namespace):
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    assert cfg.common_eval.path is not None, "--path required for generation!"
+    assert (
+        not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam
+    ), "--sampling requires --nbest to be equal to --beam"
+    assert (
+        cfg.generation.replace_unk is None or cfg.dataset.dataset_impl == "raw"
+    ), "--replace-unk requires a raw text dataset (--dataset-impl=raw)"
+
+    if cfg.common_eval.results_path is not None:
+        os.makedirs(cfg.common_eval.results_path, exist_ok=True)
+        output_path = os.path.join(
+            cfg.common_eval.results_path,
+            "generate-{}.txt".format(cfg.dataset.gen_subset),
+        )
+        with open(output_path, "w", buffering=1, encoding="utf-8") as h:
+            return _main(cfg, h)
+    else:
+        return _main(cfg, sys.stdout)
+
+
+def get_symbols_to_strip_from_output(generator):
+    if hasattr(generator, "symbols_to_strip_from_output"):
+        return generator.symbols_to_strip_from_output
+    else:
+        return {generator.eos}
+
+
+def _main(cfg: DictConfig, output_file):
+    logging.basicConfig(
+        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+        level=os.environ.get("LOGLEVEL", "INFO").upper(),
+        stream=output_file,
+    )
+    logger = logging.getLogger("fairseq_cli.generate")
+
+    utils.import_user_module(cfg.common)
+
+    if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None:
+        cfg.dataset.max_tokens = 12000
+    logger.info(cfg)
+
+    # Fix seed for stochastic decoding
+    if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
+        np.random.seed(cfg.common.seed)
+        utils.set_torch_seed(cfg.common.seed)
+
+    use_cuda = torch.cuda.is_available() and not cfg.common.cpu
+
+    # Load dataset splits
+    task = tasks.setup_task(cfg.task)
+
+
+    # Set dictionaries
+    try:
+        src_dict = getattr(task, "source_dictionary", None)
+    except NotImplementedError:
+        src_dict = None
+    tgt_dict = task.target_dictionary
+
+    overrides = ast.literal_eval(cfg.common_eval.model_overrides)
+
+    # Load ensemble
+    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
+    models, saved_cfg = checkpoint_utils.load_model_ensemble(
+        utils.split_paths(cfg.common_eval.path),
+        arg_overrides=overrides,
+        task=task,
+        suffix=cfg.checkpoint.checkpoint_suffix,
+        strict=(cfg.checkpoint.checkpoint_shard_count == 1),
+        num_shards=cfg.checkpoint.checkpoint_shard_count,
+    )
+
+    # loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config
+    task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task)
+
+    if cfg.generation.lm_path is not None:
+        overrides["data"] = cfg.task.data
+
+        try:
+            lms, _ = checkpoint_utils.load_model_ensemble(
+                [cfg.generation.lm_path], arg_overrides=overrides, task=None
+            )
+        except:
+            logger.warning(
+                f"Failed to load language model! Please make sure that the language model dict is the same "
+                f"as target dict and is located in the data dir ({cfg.task.data})"
+            )
+            raise
+
+        assert len(lms) == 1
+    else:
+        lms = [None]
+
+    # Optimize ensemble for generation
+    for model in chain(models, lms):
+        if model is None:
+            continue
+        if cfg.common.fp16:
+            model.half()
+        if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
+            model.cuda()
+        model.prepare_for_inference_(cfg)
+
+    # Load alignment dictionary for unknown word replacement
+    # (None if no unknown word replacement, empty if no path to align dictionary)
+    align_dict = utils.load_align_dict(cfg.generation.replace_unk)
+
+    # Load dataset (possibly sharded)
+    itr = task.get_batch_iterator(
+        dataset=task.dataset(cfg.dataset.gen_subset),
+        max_tokens=cfg.dataset.max_tokens,
+        max_sentences=cfg.dataset.batch_size,
+        max_positions=utils.resolve_max_positions(
+            task.max_positions(), *[m.max_positions() for m in models]
+        ),
+        ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=cfg.dataset.required_batch_size_multiple,
+        seed=cfg.common.seed,
+        num_shards=cfg.distributed_training.distributed_world_size,
+        shard_id=cfg.distributed_training.distributed_rank,
+        num_workers=cfg.dataset.num_workers,
+        data_buffer_size=cfg.dataset.data_buffer_size,
+    ).next_epoch_itr(shuffle=False)
+    progress = progress_bar.progress_bar(
+        itr,
+        log_format=cfg.common.log_format,
+        log_interval=cfg.common.log_interval,
+        default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+    )
+
+    # Initialize generator
+    gen_timer = StopwatchMeter()
+
+    extra_gen_cls_kwargs = {"lm_model": lms[0], "lm_weight": cfg.generation.lm_weight}
+    generator = task.build_generator(
+        models, cfg.generation, extra_gen_cls_kwargs=extra_gen_cls_kwargs
+    )
+
+    # Handle tokenization and BPE
+    tokenizer = task.build_tokenizer(cfg.tokenizer)
+    bpe = task.build_bpe(cfg.bpe)
+
+    def decode_fn(x):
+        if bpe is not None:
+            x = bpe.decode(x)
+        if tokenizer is not None:
+            x = tokenizer.decode(x)
+        return x
+
+    scorer = scoring.build_scorer(cfg.scoring, tgt_dict)
+
+    num_sentences = 0
+    has_target = True
+    wps_meter = TimeMeter()
+    for sample in progress:
+        sample = utils.move_to_cuda(sample) if use_cuda else sample
+        if "net_input" not in sample:
+            continue
+
+        prefix_tokens = None
+        if cfg.generation.prefix_size > 0:
+            prefix_tokens = sample["target"][:, : cfg.generation.prefix_size]
+
+        constraints = None
+        if "constraints" in sample:
+            constraints = sample["constraints"]
+
+        gen_timer.start()
+        hypos = task.inference_step(
+            generator,
+            models,
+            sample,
+            prefix_tokens=prefix_tokens,
+            constraints=constraints,
+        )
+        num_generated_tokens = sum(len(h[0]["tokens"]) for h in hypos)
+        gen_timer.stop(num_generated_tokens)
+
+        for i, sample_id in enumerate(sample["id"].tolist()):
+            has_target = sample["target"] is not None
+
+            # Remove padding
+            if "src_tokens" in sample["net_input"]:
+                src_tokens = utils.strip_pad(
+                    sample["net_input"]["src_tokens"][i, :], tgt_dict.pad()
+                )
+            else:
+                src_tokens = None
+
+            target_tokens = None
+            if has_target:
+                target_tokens = (
+                    utils.strip_pad(sample["target"][i, :], tgt_dict.pad()).int().cpu()
+                )
+
+            # Either retrieve the original sentences or regenerate them from tokens.
+            if align_dict is not None:
+                src_str = task.dataset(cfg.dataset.gen_subset).src.get_original_text(
+                    sample_id
+                )
+                target_str = task.dataset(cfg.dataset.gen_subset).tgt.get_original_text(
+                    sample_id
+                )
+            else:
+                if src_dict is not None:
+                    src_str = src_dict.string(src_tokens, cfg.common_eval.post_process)
+                else:
+                    src_str = ""
+                if has_target:
+                    target_str = tgt_dict.string(
+                        target_tokens,
+                        cfg.common_eval.post_process,
+                        escape_unk=True,
+                        extra_symbols_to_ignore=get_symbols_to_strip_from_output(
+                            generator
+                        ),
+                    )
+
+            src_str = decode_fn(src_str)
+            if has_target:
+                target_str = decode_fn(target_str)
+
+            if not cfg.common_eval.quiet:
+                if src_dict is not None:
+                    print("S-{}\t{}".format(sample_id, src_str), file=output_file)
+                if has_target:
+                    print("T-{}\t{}".format(sample_id, target_str), file=output_file)
+
+            # Process top predictions
+            for j, hypo in enumerate(hypos[i][: cfg.generation.nbest]):
+                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
+                    hypo_tokens=hypo["tokens"].int().cpu(),
+                    src_str=src_str,
+                    alignment=hypo["alignment"],
+                    align_dict=align_dict,
+                    tgt_dict=tgt_dict,
+                    remove_bpe=cfg.common_eval.post_process,
+                    extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator),
+                )
+                detok_hypo_str = decode_fn(hypo_str)
+                if not cfg.common_eval.quiet:
+                    score = hypo["score"] / math.log(2)  # convert to base 2
+                    # original hypothesis (after tokenization and BPE)
+                    print(
+                        "H-{}\t{}\t{}".format(sample_id, score, hypo_str),
+                        file=output_file,
+                    )
+                    # detokenized hypothesis
+                    print(
+                        "D-{}\t{}\t{}".format(sample_id, score, detok_hypo_str),
+                        file=output_file,
+                    )
+                    print(
+                        "P-{}\t{}".format(
+                            sample_id,
+                            " ".join(
+                                map(
+                                    lambda x: "{:.4f}".format(x),
+                                    # convert from base e to base 2
+                                    hypo["positional_scores"]
+                                    .div_(math.log(2))
+                                    .tolist(),
+                                )
+                            ),
+                        ),
+                        file=output_file,
+                    )
+
+                    if cfg.generation.print_alignment == "hard":
+                        print(
+                            "A-{}\t{}".format(
+                                sample_id,
+                                " ".join(
+                                    [
+                                        "{}-{}".format(src_idx, tgt_idx)
+                                        for src_idx, tgt_idx in alignment
+                                    ]
+                                ),
+                            ),
+                            file=output_file,
+                        )
+                    if cfg.generation.print_alignment == "soft":
+                        print(
+                            "A-{}\t{}".format(
+                                sample_id,
+                                " ".join(
+                                    [
+                                        ",".join(src_probs)
+                                        for src_probs in alignment
+                                    ]
+                                ),
+                            ),
+                            file=output_file,
+                        )
+
+                    if cfg.generation.print_step:
+                        print(
+                            "I-{}\t{}".format(sample_id, hypo["steps"]),
+                            file=output_file,
+                        )
+
+                    if cfg.generation.retain_iter_history:
+                        for step, h in enumerate(hypo["history"]):
+                            _, h_str, _ = utils.post_process_prediction(
+                                hypo_tokens=h["tokens"].int().cpu(),
+                                src_str=src_str,
+                                alignment=None,
+                                align_dict=None,
+                                tgt_dict=tgt_dict,
+                                remove_bpe=None,
+                            )
+                            print(
+                                "E-{}_{}\t{}".format(sample_id, step, h_str),
+                                file=output_file,
+                            )
+
+                # Score only the top hypothesis
+                if has_target and j == 0:
+                    if align_dict is not None or cfg.common_eval.post_process is not None:
+                        # Convert back to tokens for evaluation with unk replacement and/or without BPE
+                        target_tokens = tgt_dict.encode_line(
+                            target_str, add_if_not_exist=True
+                        )
+                        hypo_tokens = tgt_dict.encode_line(
+                            detok_hypo_str, add_if_not_exist=True
+                        )
+                    if hasattr(scorer, "add_string"):
+                        scorer.add_string(target_str, detok_hypo_str)
+                    else:
+                        scorer.add(target_tokens, hypo_tokens)
+
+        wps_meter.update(num_generated_tokens)
+        progress.log({"wps": round(wps_meter.avg)})
+        num_sentences += (
+            sample["nsentences"] if "nsentences" in sample else sample["id"].numel()
+        )
+
+    logger.info("NOTE: hypothesis and token scores are output in base 2")
+    logger.info(
+        "Translated {:,} sentences ({:,} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)".format(
+            num_sentences,
+            gen_timer.n,
+            gen_timer.sum,
+            num_sentences / gen_timer.sum,
+            1.0 / gen_timer.avg,
+        )
+    )
+    if has_target:
+        if cfg.bpe and not cfg.generation.sacrebleu:
+            if cfg.common_eval.post_process:
+                logger.warning(
+                    "BLEU score is being computed by splitting detokenized string on spaces, this is probably not what you want. Use --sacrebleu for standard 13a BLEU tokenization"
+                )
+            else:
+                logger.warning(
+                    "If you are using BPE on the target side, the BLEU score is computed on BPE tokens, not on proper words.  Use --sacrebleu for standard 13a BLEU tokenization"
+                )
+        # use print to be consistent with other main outputs: S-, H-, T-, D- and so on
+        print(
+            "Generate {} with beam={}: {}".format(
+                cfg.dataset.gen_subset, cfg.generation.beam, scorer.result_string()
+            ),
+            file=output_file,
+        )
+
+    return scorer
+
+
+def cli_main():
+    parser = options.get_generation_parser()
+    # TODO: replace this workaround with refactoring of `AudioPretraining`
+    parser.add_argument(
+        '--arch', '-a', metavar='ARCH', default="wav2vec2",
+        help='Model architecture. For constructing tasks that rely on '
+             'model args (e.g. `AudioPretraining`)'
+    )
+    args = options.parse_args_and_arch(parser)
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/hydra_train.py b/fairseq/fairseq_cli/hydra_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..6555ab415e82c1156c85d000ff030e6107c55d73
--- /dev/null
+++ b/fairseq/fairseq_cli/hydra_train.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+
+from fairseq.dataclass.initialize import add_defaults, hydra_init
+from fairseq_cli.train import main as pre_main
+from fairseq import distributed_utils, metrics
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.dataclass.utils import omegaconf_no_object_check
+from fairseq.utils import reset_logging
+
+import hydra
+from hydra.core.hydra_config import HydraConfig
+import torch
+from omegaconf import OmegaConf, open_dict
+
+
+logger = logging.getLogger("fairseq_cli.hydra_train")
+
+
+@hydra.main(config_path=os.path.join("..", "fairseq", "config"), config_name="config")
+def hydra_main(cfg: FairseqConfig) -> float:
+    _hydra_main(cfg)
+
+
+def _hydra_main(cfg: FairseqConfig, **kwargs) -> float:
+    add_defaults(cfg)
+
+    if cfg.common.reset_logging:
+        reset_logging()  # Hydra hijacks logging, fix that
+    else:
+        # check if directly called or called through hydra_main
+        if HydraConfig.initialized():
+            with open_dict(cfg):
+                # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
+                cfg.job_logging_cfg = OmegaConf.to_container(HydraConfig.get().job_logging, resolve=True)
+
+    with omegaconf_no_object_check():
+        cfg = OmegaConf.create(OmegaConf.to_container(cfg, resolve=True, enum_to_str=True))
+    OmegaConf.set_struct(cfg, True)
+
+    try:
+        if cfg.common.profile:
+            with torch.cuda.profiler.profile():
+                with torch.autograd.profiler.emit_nvtx():
+                    distributed_utils.call_main(cfg, pre_main, **kwargs)
+        else:
+            distributed_utils.call_main(cfg, pre_main, **kwargs)
+    except BaseException as e:
+        if not cfg.common.suppress_crashes:
+            raise
+        else:
+            logger.error("Crashed! " + str(e))
+
+    # get best val and return - useful for sweepers
+    try:
+        best_val = metrics.get_smoothed_value(
+            "valid", cfg.checkpoint.best_checkpoint_metric
+        )
+    except:
+        best_val = None
+
+    if best_val is None:
+        best_val = float("inf")
+
+    return best_val
+
+
+def cli_main():
+    try:
+        from hydra._internal.utils import get_args
+
+        cfg_name = get_args().config_name or "config"
+    except:
+        logger.warning("Failed to get config name from hydra args")
+        cfg_name = "config"
+
+    hydra_init(cfg_name)
+    hydra_main()
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/interactive.py b/fairseq/fairseq_cli/interactive.py
new file mode 100644
index 0000000000000000000000000000000000000000..cadef2821a74a3b2f051c792d835129bf775714f
--- /dev/null
+++ b/fairseq/fairseq_cli/interactive.py
@@ -0,0 +1,316 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Translate raw text with a trained model. Batches data on-the-fly.
+"""
+
+import ast
+import fileinput
+import logging
+import math
+import os
+import sys
+import time
+from argparse import Namespace
+from collections import namedtuple
+
+import numpy as np
+import torch
+from fairseq import checkpoint_utils, distributed_utils, options, tasks, utils
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.token_generation_constraints import pack_constraints, unpack_constraints
+from fairseq_cli.generate import get_symbols_to_strip_from_output
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.interactive")
+
+
+Batch = namedtuple("Batch", "ids src_tokens src_lengths constraints")
+Translation = namedtuple("Translation", "src_str hypos pos_scores alignments")
+
+
+def buffered_read(input, buffer_size):
+    buffer = []
+    with fileinput.input(files=[input], openhook=fileinput.hook_encoded("utf-8")) as h:
+        for src_str in h:
+            buffer.append(src_str.strip())
+            if len(buffer) >= buffer_size:
+                yield buffer
+                buffer = []
+
+    if len(buffer) > 0:
+        yield buffer
+
+
+def make_batches(lines, cfg, task, max_positions, encode_fn):
+    def encode_fn_target(x):
+        return encode_fn(x)
+
+    if cfg.generation.constraints:
+        # Strip (tab-delimited) contraints, if present, from input lines,
+        # store them in batch_constraints
+        batch_constraints = [list() for _ in lines]
+        for i, line in enumerate(lines):
+            if "\t" in line:
+                lines[i], *batch_constraints[i] = line.split("\t")
+
+        # Convert each List[str] to List[Tensor]
+        for i, constraint_list in enumerate(batch_constraints):
+            batch_constraints[i] = [
+                task.target_dictionary.encode_line(
+                    encode_fn_target(constraint),
+                    append_eos=False,
+                    add_if_not_exist=False,
+                )
+                for constraint in constraint_list
+            ]
+
+    if cfg.generation.constraints:
+        constraints_tensor = pack_constraints(batch_constraints)
+    else:
+        constraints_tensor = None
+
+    tokens, lengths = task.get_interactive_tokens_and_lengths(lines, encode_fn)
+
+    itr = task.get_batch_iterator(
+        dataset=task.build_dataset_for_inference(
+            tokens, lengths, constraints=constraints_tensor
+        ),
+        max_tokens=cfg.dataset.max_tokens,
+        max_sentences=cfg.dataset.batch_size,
+        max_positions=max_positions,
+        ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
+    ).next_epoch_itr(shuffle=False)
+    for batch in itr:
+        ids = batch["id"]
+        src_tokens = batch["net_input"]["src_tokens"]
+        src_lengths = batch["net_input"]["src_lengths"]
+        constraints = batch.get("constraints", None)
+
+        yield Batch(
+            ids=ids,
+            src_tokens=src_tokens,
+            src_lengths=src_lengths,
+            constraints=constraints,
+        )
+
+
+def main(cfg: FairseqConfig):
+    if isinstance(cfg, Namespace):
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    start_time = time.time()
+    total_translate_time = 0
+
+    utils.import_user_module(cfg.common)
+
+    if cfg.interactive.buffer_size < 1:
+        cfg.interactive.buffer_size = 1
+    if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None:
+        cfg.dataset.batch_size = 1
+
+    assert (
+        not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam
+    ), "--sampling requires --nbest to be equal to --beam"
+    assert (
+        not cfg.dataset.batch_size
+        or cfg.dataset.batch_size <= cfg.interactive.buffer_size
+    ), "--batch-size cannot be larger than --buffer-size"
+
+    logger.info(cfg)
+
+    # Fix seed for stochastic decoding
+    if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
+        np.random.seed(cfg.common.seed)
+        utils.set_torch_seed(cfg.common.seed)
+
+    use_cuda = torch.cuda.is_available() and not cfg.common.cpu
+
+    # Setup task, e.g., translation
+    task = tasks.setup_task(cfg.task)
+
+    # Load ensemble
+    overrides = ast.literal_eval(cfg.common_eval.model_overrides)
+    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
+    models, _model_args = checkpoint_utils.load_model_ensemble(
+        utils.split_paths(cfg.common_eval.path),
+        arg_overrides=overrides,
+        task=task,
+        suffix=cfg.checkpoint.checkpoint_suffix,
+        strict=(cfg.checkpoint.checkpoint_shard_count == 1),
+        num_shards=cfg.checkpoint.checkpoint_shard_count,
+    )
+
+    # Set dictionaries
+    src_dict = task.source_dictionary
+    tgt_dict = task.target_dictionary
+
+    # Optimize ensemble for generation
+    for model in models:
+        if model is None:
+            continue
+        if cfg.common.fp16:
+            model.half()
+        if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
+            model.cuda()
+        model.prepare_for_inference_(cfg)
+
+    # Initialize generator
+    generator = task.build_generator(models, cfg.generation)
+
+    # Handle tokenization and BPE
+    tokenizer = task.build_tokenizer(cfg.tokenizer)
+    bpe = task.build_bpe(cfg.bpe)
+
+    def encode_fn(x):
+        if tokenizer is not None:
+            x = tokenizer.encode(x)
+        if bpe is not None:
+            x = bpe.encode(x)
+        return x
+
+    def decode_fn(x):
+        if bpe is not None:
+            x = bpe.decode(x)
+        if tokenizer is not None:
+            x = tokenizer.decode(x)
+        return x
+
+    # Load alignment dictionary for unknown word replacement
+    # (None if no unknown word replacement, empty if no path to align dictionary)
+    align_dict = utils.load_align_dict(cfg.generation.replace_unk)
+
+    max_positions = utils.resolve_max_positions(
+        task.max_positions(), *[model.max_positions() for model in models]
+    )
+
+    if cfg.generation.constraints:
+        logger.warning(
+            "NOTE: Constrained decoding currently assumes a shared subword vocabulary."
+        )
+
+    if cfg.interactive.buffer_size > 1:
+        logger.info("Sentence buffer size: %s", cfg.interactive.buffer_size)
+    logger.info("NOTE: hypothesis and token scores are output in base 2")
+    logger.info("Type the input sentence and press return:")
+    start_id = 0
+    for inputs in buffered_read(cfg.interactive.input, cfg.interactive.buffer_size):
+        results = []
+        for batch in make_batches(inputs, cfg, task, max_positions, encode_fn):
+            bsz = batch.src_tokens.size(0)
+            src_tokens = batch.src_tokens
+            src_lengths = batch.src_lengths
+            constraints = batch.constraints
+            if use_cuda:
+                src_tokens = src_tokens.cuda()
+                src_lengths = src_lengths.cuda()
+                if constraints is not None:
+                    constraints = constraints.cuda()
+
+            sample = {
+                "net_input": {
+                    "src_tokens": src_tokens,
+                    "src_lengths": src_lengths,
+                },
+            }
+            translate_start_time = time.time()
+            translations = task.inference_step(
+                generator, models, sample, constraints=constraints
+            )
+            translate_time = time.time() - translate_start_time
+            total_translate_time += translate_time
+            list_constraints = [[] for _ in range(bsz)]
+            if cfg.generation.constraints:
+                list_constraints = [unpack_constraints(c) for c in constraints]
+            for i, (id, hypos) in enumerate(zip(batch.ids.tolist(), translations)):
+                src_tokens_i = utils.strip_pad(src_tokens[i], tgt_dict.pad())
+                constraints = list_constraints[i]
+                results.append(
+                    (
+                        start_id + id,
+                        src_tokens_i,
+                        hypos,
+                        {
+                            "constraints": constraints,
+                            "time": translate_time / len(translations),
+                        },
+                    )
+                )
+
+        # sort output to match input order
+        for id_, src_tokens, hypos, info in sorted(results, key=lambda x: x[0]):
+            src_str = ''
+            if src_dict is not None:
+                src_str = src_dict.string(src_tokens, cfg.common_eval.post_process)
+                print("S-{}\t{}".format(id_, src_str))
+                print("W-{}\t{:.3f}\tseconds".format(id_, info["time"]))
+                for constraint in info["constraints"]:
+                    print(
+                        "C-{}\t{}".format(
+                            id_, tgt_dict.string(constraint, cfg.common_eval.post_process)
+                        )
+                    )
+
+            # Process top predictions
+            for hypo in hypos[: min(len(hypos), cfg.generation.nbest)]:
+                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
+                    hypo_tokens=hypo["tokens"].int().cpu(),
+                    src_str=src_str,
+                    alignment=hypo["alignment"],
+                    align_dict=align_dict,
+                    tgt_dict=tgt_dict,
+                    remove_bpe=cfg.common_eval.post_process,
+                    extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator),
+                )
+                detok_hypo_str = decode_fn(hypo_str)
+                score = hypo["score"] / math.log(2)  # convert to base 2
+                # original hypothesis (after tokenization and BPE)
+                print("H-{}\t{}\t{}".format(id_, score, hypo_str))
+                # detokenized hypothesis
+                print("D-{}\t{}\t{}".format(id_, score, detok_hypo_str))
+                print(
+                    "P-{}\t{}".format(
+                        id_,
+                        " ".join(
+                            map(
+                                lambda x: "{:.4f}".format(x),
+                                # convert from base e to base 2
+                                hypo["positional_scores"].div_(math.log(2)).tolist(),
+                            )
+                        ),
+                    )
+                )
+                if cfg.generation.print_alignment:
+                    alignment_str = " ".join(
+                        ["{}-{}".format(src, tgt) for src, tgt in alignment]
+                    )
+                    print("A-{}\t{}".format(id_, alignment_str))
+
+        # update running id_ counter
+        start_id += len(inputs)
+
+    logger.info(
+        "Total time: {:.3f} seconds; translation time: {:.3f}".format(
+            time.time() - start_time, total_translate_time
+        )
+    )
+
+
+def cli_main():
+    parser = options.get_interactive_generation_parser()
+    args = options.parse_args_and_arch(parser)
+    distributed_utils.call_main(convert_namespace_to_omegaconf(args), main)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/preprocess.py b/fairseq/fairseq_cli/preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ee9a1e3ba08f9f6ef4c01b9ee34374c9528eb19
--- /dev/null
+++ b/fairseq/fairseq_cli/preprocess.py
@@ -0,0 +1,409 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data pre-processing: build vocabularies and binarize training data.
+"""
+
+import logging
+import os
+import shutil
+import sys
+from collections import Counter
+from itertools import zip_longest
+from multiprocessing import Pool
+
+from fairseq import options, tasks, utils
+from fairseq.binarizer import Binarizer
+from fairseq.data import indexed_dataset
+from fairseq.file_chunker_utils import find_offsets
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.preprocess")
+
+
+def main(args):
+    utils.import_user_module(args)
+
+    os.makedirs(args.destdir, exist_ok=True)
+
+    logger.addHandler(
+        logging.FileHandler(
+            filename=os.path.join(args.destdir, "preprocess.log"),
+        )
+    )
+    logger.info(args)
+
+    assert args.dataset_impl != "huffman", "preprocessing.py doesn't support Huffman yet, use HuffmanCodeBuilder directly."
+
+    task = tasks.get_task(args.task)
+
+    def train_path(lang):
+        return "{}{}".format(args.trainpref, ("." + lang) if lang else "")
+
+    def file_name(prefix, lang):
+        fname = prefix
+        if lang is not None:
+            fname += ".{lang}".format(lang=lang)
+        return fname
+
+    def dest_path(prefix, lang):
+        return os.path.join(args.destdir, file_name(prefix, lang))
+
+    def dict_path(lang):
+        return dest_path("dict", lang) + ".txt"
+
+    def build_dictionary(filenames, src=False, tgt=False):
+        assert src ^ tgt
+        return task.build_dictionary(
+            filenames,
+            workers=args.workers,
+            threshold=args.thresholdsrc if src else args.thresholdtgt,
+            nwords=args.nwordssrc if src else args.nwordstgt,
+            padding_factor=args.padding_factor,
+        )
+
+    target = not args.only_source
+
+    if not args.srcdict and os.path.exists(dict_path(args.source_lang)):
+        raise FileExistsError(dict_path(args.source_lang))
+    if target and not args.tgtdict and os.path.exists(dict_path(args.target_lang)):
+        raise FileExistsError(dict_path(args.target_lang))
+
+    if args.joined_dictionary:
+        assert (
+            not args.srcdict or not args.tgtdict
+        ), "cannot use both --srcdict and --tgtdict with --joined-dictionary"
+
+        if args.srcdict:
+            src_dict = task.load_dictionary(args.srcdict)
+        elif args.tgtdict:
+            src_dict = task.load_dictionary(args.tgtdict)
+        else:
+            assert (
+                args.trainpref
+            ), "--trainpref must be set if --srcdict is not specified"
+            src_dict = build_dictionary(
+                {train_path(lang) for lang in [args.source_lang, args.target_lang]},
+                src=True,
+            )
+        tgt_dict = src_dict
+    else:
+        if args.srcdict:
+            src_dict = task.load_dictionary(args.srcdict)
+        else:
+            assert (
+                args.trainpref
+            ), "--trainpref must be set if --srcdict is not specified"
+            src_dict = build_dictionary([train_path(args.source_lang)], src=True)
+
+        if target:
+            if args.tgtdict:
+                tgt_dict = task.load_dictionary(args.tgtdict)
+            else:
+                assert (
+                    args.trainpref
+                ), "--trainpref must be set if --tgtdict is not specified"
+                tgt_dict = build_dictionary([train_path(args.target_lang)], tgt=True)
+        else:
+            tgt_dict = None
+
+    src_dict.save(dict_path(args.source_lang))
+    if target and tgt_dict is not None:
+        tgt_dict.save(dict_path(args.target_lang))
+
+    if args.dict_only:
+        return
+
+    def make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers):
+        logger.info("[{}] Dictionary: {} types".format(lang, len(vocab)))
+        n_seq_tok = [0, 0]
+        replaced = Counter()
+
+        def merge_result(worker_result):
+            replaced.update(worker_result["replaced"])
+            n_seq_tok[0] += worker_result["nseq"]
+            n_seq_tok[1] += worker_result["ntok"]
+
+        input_file = "{}{}".format(
+            input_prefix, ("." + lang) if lang is not None else ""
+        )
+        offsets = find_offsets(input_file, num_workers)
+        (first_chunk, *more_chunks) = zip(offsets, offsets[1:])
+        pool = None
+        if num_workers > 1:
+            pool = Pool(processes=num_workers - 1)
+            for worker_id, (start_offset, end_offset) in enumerate(
+                more_chunks, start=1
+            ):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                pool.apply_async(
+                    binarize,
+                    (
+                        args,
+                        input_file,
+                        vocab,
+                        prefix,
+                        lang,
+                        start_offset,
+                        end_offset,
+                    ),
+                    callback=merge_result,
+                )
+            pool.close()
+
+        ds = indexed_dataset.make_builder(
+            dataset_dest_file(args, output_prefix, lang, "bin"),
+            impl=args.dataset_impl,
+            vocab_size=len(vocab),
+        )
+        merge_result(
+            Binarizer.binarize(
+                input_file,
+                vocab,
+                lambda t: ds.add_item(t),
+                offset=first_chunk[0],
+                end=first_chunk[1],
+            )
+        )
+        if num_workers > 1:
+            pool.join()
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                temp_file_path = dataset_dest_prefix(args, prefix, lang)
+                ds.merge_file_(temp_file_path)
+                os.remove(indexed_dataset.data_file_path(temp_file_path))
+                os.remove(indexed_dataset.index_file_path(temp_file_path))
+
+        ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
+
+        logger.info(
+            "[{}] {}: {} sents, {} tokens, {:.3}% replaced by {}".format(
+                lang,
+                input_file,
+                n_seq_tok[0],
+                n_seq_tok[1],
+                100 * sum(replaced.values()) / n_seq_tok[1],
+                vocab.unk_word,
+            )
+        )
+
+    def make_binary_alignment_dataset(input_prefix, output_prefix, num_workers):
+        nseq = [0]
+
+        def merge_result(worker_result):
+            nseq[0] += worker_result["nseq"]
+
+        input_file = input_prefix
+        offsets = find_offsets(input_file, num_workers)
+        (first_chunk, *more_chunks) = zip(offsets, offsets[1:])
+        pool = None
+        if num_workers > 1:
+            pool = Pool(processes=num_workers - 1)
+            for worker_id, (start_offset, end_offset) in enumerate(
+                more_chunks, start=1
+            ):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                pool.apply_async(
+                    binarize_alignments,
+                    (
+                        args,
+                        input_file,
+                        utils.parse_alignment,
+                        prefix,
+                        start_offset,
+                        end_offset,
+                    ),
+                    callback=merge_result,
+                )
+            pool.close()
+
+        ds = indexed_dataset.make_builder(
+            dataset_dest_file(args, output_prefix, None, "bin"), impl=args.dataset_impl
+        )
+
+        merge_result(
+            Binarizer.binarize_alignments(
+                input_file,
+                utils.parse_alignment,
+                lambda t: ds.add_item(t),
+                offset=first_chunk[0],
+                end=first_chunk[1],
+            )
+        )
+        if num_workers > 1:
+            pool.join()
+            for worker_id in range(1, num_workers):
+                prefix = "{}{}".format(output_prefix, worker_id)
+                temp_file_path = dataset_dest_prefix(args, prefix, None)
+                ds.merge_file_(temp_file_path)
+                os.remove(indexed_dataset.data_file_path(temp_file_path))
+                os.remove(indexed_dataset.index_file_path(temp_file_path))
+
+        ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
+
+        logger.info("[alignments] {}: parsed {} alignments".format(input_file, nseq[0]))
+
+    def make_dataset(vocab, input_prefix, output_prefix, lang, num_workers=1):
+        if args.dataset_impl == "raw":
+            # Copy original text file to destination folder
+            output_text_file = dest_path(
+                output_prefix + ".{}-{}".format(args.source_lang, args.target_lang),
+                lang,
+            )
+            shutil.copyfile(file_name(input_prefix, lang), output_text_file)
+        else:
+            make_binary_dataset(vocab, input_prefix, output_prefix, lang, num_workers)
+
+    def make_all(lang, vocab):
+        if args.trainpref:
+            make_dataset(vocab, args.trainpref, "train", lang, num_workers=args.workers)
+        if args.validpref:
+            for k, validpref in enumerate(args.validpref.split(",")):
+                outprefix = "valid{}".format(k) if k > 0 else "valid"
+                make_dataset(
+                    vocab, validpref, outprefix, lang, num_workers=args.workers
+                )
+        if args.testpref:
+            for k, testpref in enumerate(args.testpref.split(",")):
+                outprefix = "test{}".format(k) if k > 0 else "test"
+                make_dataset(vocab, testpref, outprefix, lang, num_workers=args.workers)
+
+    def make_all_alignments():
+        if args.trainpref and os.path.exists(args.trainpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.trainpref + "." + args.align_suffix,
+                "train.align",
+                num_workers=args.workers,
+            )
+        if args.validpref and os.path.exists(args.validpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.validpref + "." + args.align_suffix,
+                "valid.align",
+                num_workers=args.workers,
+            )
+        if args.testpref and os.path.exists(args.testpref + "." + args.align_suffix):
+            make_binary_alignment_dataset(
+                args.testpref + "." + args.align_suffix,
+                "test.align",
+                num_workers=args.workers,
+            )
+
+    make_all(args.source_lang, src_dict)
+    if target:
+        make_all(args.target_lang, tgt_dict)
+    if args.align_suffix:
+        make_all_alignments()
+
+    logger.info("Wrote preprocessed data to {}".format(args.destdir))
+
+    if args.alignfile:
+        assert args.trainpref, "--trainpref must be set if --alignfile is specified"
+        src_file_name = train_path(args.source_lang)
+        tgt_file_name = train_path(args.target_lang)
+        freq_map = {}
+        with open(args.alignfile, "r", encoding="utf-8") as align_file:
+            with open(src_file_name, "r", encoding="utf-8") as src_file:
+                with open(tgt_file_name, "r", encoding="utf-8") as tgt_file:
+                    for a, s, t in zip_longest(align_file, src_file, tgt_file):
+                        si = src_dict.encode_line(s, add_if_not_exist=False)
+                        ti = tgt_dict.encode_line(t, add_if_not_exist=False)
+                        ai = list(map(lambda x: tuple(x.split("-")), a.split()))
+                        for sai, tai in ai:
+                            srcidx = si[int(sai)]
+                            tgtidx = ti[int(tai)]
+                            if srcidx != src_dict.unk() and tgtidx != tgt_dict.unk():
+                                assert srcidx != src_dict.pad()
+                                assert srcidx != src_dict.eos()
+                                assert tgtidx != tgt_dict.pad()
+                                assert tgtidx != tgt_dict.eos()
+
+                                if srcidx not in freq_map:
+                                    freq_map[srcidx] = {}
+                                if tgtidx not in freq_map[srcidx]:
+                                    freq_map[srcidx][tgtidx] = 1
+                                else:
+                                    freq_map[srcidx][tgtidx] += 1
+
+        align_dict = {}
+        for srcidx in freq_map.keys():
+            align_dict[srcidx] = max(freq_map[srcidx], key=freq_map[srcidx].get)
+
+        with open(
+            os.path.join(
+                args.destdir,
+                "alignment.{}-{}.txt".format(args.source_lang, args.target_lang),
+            ),
+            "w",
+            encoding="utf-8",
+        ) as f:
+            for k, v in align_dict.items():
+                print("{} {}".format(src_dict[k], tgt_dict[v]), file=f)
+
+
+def binarize(args, filename, vocab, output_prefix, lang, offset, end, append_eos=True):
+    ds = indexed_dataset.make_builder(
+        dataset_dest_file(args, output_prefix, lang, "bin"),
+        impl=args.dataset_impl,
+        vocab_size=len(vocab),
+    )
+
+    def consumer(tensor):
+        ds.add_item(tensor)
+
+    res = Binarizer.binarize(
+        filename, vocab, consumer, append_eos=append_eos, offset=offset, end=end
+    )
+    ds.finalize(dataset_dest_file(args, output_prefix, lang, "idx"))
+    return res
+
+
+def binarize_alignments(args, filename, parse_alignment, output_prefix, offset, end):
+    ds = indexed_dataset.make_builder(
+        dataset_dest_file(args, output_prefix, None, "bin"),
+        impl=args.dataset_impl,
+        vocab_size=None,
+    )
+
+    def consumer(tensor):
+        ds.add_item(tensor)
+
+    res = Binarizer.binarize_alignments(
+        filename, parse_alignment, consumer, offset=offset, end=end
+    )
+    ds.finalize(dataset_dest_file(args, output_prefix, None, "idx"))
+    return res
+
+
+def dataset_dest_prefix(args, output_prefix, lang):
+    base = "{}/{}".format(args.destdir, output_prefix)
+    if lang is not None:
+        lang_part = ".{}-{}.{}".format(args.source_lang, args.target_lang, lang)
+    elif args.only_source:
+        lang_part = ""
+    else:
+        lang_part = ".{}-{}".format(args.source_lang, args.target_lang)
+
+    return "{}{}".format(base, lang_part)
+
+
+def dataset_dest_file(args, output_prefix, lang, extension):
+    base = dataset_dest_prefix(args, output_prefix, lang)
+    return "{}.{}".format(base, extension)
+
+
+def cli_main():
+    parser = options.get_preprocessing_parser()
+    args = parser.parse_args()
+    main(args)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/score.py b/fairseq/fairseq_cli/score.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b207be959d55f6a56d8c5eb7db3dbe0c1ac977e
--- /dev/null
+++ b/fairseq/fairseq_cli/score.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+BLEU scoring of generated translations against reference translations.
+"""
+
+import argparse
+import os
+import sys
+
+from fairseq.data import dictionary
+from fairseq.scoring import bleu
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="Command-line script for BLEU scoring."
+    )
+    # fmt: off
+    parser.add_argument('-s', '--sys', default='-', help='system output')
+    parser.add_argument('-r', '--ref', required=True, help='references')
+    parser.add_argument('-o', '--order', default=4, metavar='N',
+                        type=int, help='consider ngrams up to this order')
+    parser.add_argument('--ignore-case', action='store_true',
+                        help='case-insensitive scoring')
+    parser.add_argument('--sacrebleu', action='store_true',
+                        help='score with sacrebleu')
+    parser.add_argument('--sentence-bleu', action='store_true',
+                        help='report sentence-level BLEUs (i.e., with +1 smoothing)')
+    # fmt: on
+    return parser
+
+
+def cli_main():
+    parser = get_parser()
+    args = parser.parse_args()
+    print(args)
+
+    assert args.sys == "-" or os.path.exists(
+        args.sys
+    ), "System output file {} does not exist".format(args.sys)
+    assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)
+
+    dict = dictionary.Dictionary()
+
+    def readlines(fd):
+        for line in fd.readlines():
+            if args.ignore_case:
+                yield line.lower()
+            else:
+                yield line
+
+    if args.sacrebleu:
+        import sacrebleu
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                print(sacrebleu.corpus_bleu(fdsys, [fdref]).format())
+
+    elif args.sentence_bleu:
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
+                for i, (sys_tok, ref_tok) in enumerate(
+                    zip(readlines(fdsys), readlines(fdref))
+                ):
+                    scorer.reset(one_init=True)
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                    print(i, scorer.result_string(args.order))
+
+    else:
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(
+                    bleu.BleuConfig(
+                        pad=dict.pad(),
+                        eos=dict.eos(),
+                        unk=dict.unk(),
+                    )
+                )
+                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                print(scorer.result_string(args.order))
+
+    if args.sys == "-":
+        score(sys.stdin)
+    else:
+        with open(args.sys, "r") as f:
+            score(f)
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/train.py b/fairseq/fairseq_cli/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..83475873138c5d1bac288c234afb6b4a1a7882d7
--- /dev/null
+++ b/fairseq/fairseq_cli/train.py
@@ -0,0 +1,514 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Train a new model on one or across multiple GPUs.
+"""
+
+import argparse
+import logging
+import math
+import os
+import sys
+from typing import Dict, Optional, Any, List, Tuple, Callable
+
+# We need to setup root logger before importing any fairseq libraries.
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.train")
+
+import numpy as np
+import torch
+from fairseq import (
+    checkpoint_utils,
+    options,
+    quantization_utils,
+    tasks,
+    utils,
+)
+from fairseq.data import iterators, data_utils
+from fairseq.data.plasma_utils import PlasmaStore
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.distributed import fsdp_enable_wrap, fsdp_wrap, utils as distributed_utils
+from fairseq.file_io import PathManager
+from fairseq.logging import meters, metrics, progress_bar
+from fairseq.model_parallel.megatron_trainer import MegatronTrainer
+from fairseq.trainer import Trainer
+from omegaconf import DictConfig, OmegaConf
+
+
+
+
+def main(cfg: FairseqConfig) -> None:
+    if isinstance(cfg, argparse.Namespace):
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    utils.import_user_module(cfg.common)
+
+    if distributed_utils.is_master(cfg.distributed_training) and "job_logging_cfg" in cfg:
+        # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
+        logging.config.dictConfig(OmegaConf.to_container(cfg.job_logging_cfg))
+
+    assert (
+        cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None
+    ), "Must specify batch size either with --max-tokens or --batch-size"
+    metrics.reset()
+
+    if cfg.common.log_file is not None:
+        handler = logging.FileHandler(filename=cfg.common.log_file)
+        logger.addHandler(handler)
+
+    np.random.seed(cfg.common.seed)
+    utils.set_torch_seed(cfg.common.seed)
+
+    if distributed_utils.is_master(cfg.distributed_training):
+        checkpoint_utils.verify_checkpoint_directory(cfg.checkpoint.save_dir)
+
+    # Print args
+    logger.info(cfg)
+
+    if cfg.checkpoint.write_checkpoints_asynchronously:
+        try:
+            import iopath  # noqa: F401
+        except ImportError:
+            logging.exception(
+                "Asynchronous checkpoint writing is specified but iopath is "
+                "not installed: `pip install iopath`"
+            )
+            return
+
+    # Setup task, e.g., translation, language modeling, etc.
+    task = tasks.setup_task(cfg.task)
+
+    assert cfg.criterion, "Please specify criterion to train a model"
+
+    # Build model and criterion
+    if cfg.distributed_training.ddp_backend == "fully_sharded":
+        with fsdp_enable_wrap(cfg.distributed_training):
+            model = fsdp_wrap(task.build_model(cfg.model))
+    else:
+        model = task.build_model(cfg.model)
+    criterion = task.build_criterion(cfg.criterion)
+    logger.info(model)
+    logger.info("task: {}".format(task.__class__.__name__))
+    logger.info("model: {}".format(model.__class__.__name__))
+    logger.info("criterion: {}".format(criterion.__class__.__name__))
+    logger.info(
+        "num. shared model params: {:,} (num. trained: {:,})".format(
+            sum(p.numel() for p in model.parameters() if not getattr(p, "expert", False)),
+            sum(p.numel() for p in model.parameters() if not getattr(p, "expert", False) and p.requires_grad)
+        )
+    )
+
+    logger.info(
+        "num. expert model params: {} (num. trained: {})".format(
+            sum(p.numel() for p in model.parameters() if getattr(p, "expert", False)),
+            sum(p.numel() for p in model.parameters() if getattr(p, "expert", False) and p.requires_grad),
+        )
+    )
+
+    # Load valid dataset (we load training data below, based on the latest checkpoint)
+    # We load the valid dataset AFTER building the model
+    data_utils.raise_if_valid_subsets_unintentionally_ignored(cfg)
+    if cfg.dataset.combine_valid_subsets:
+        task.load_dataset("valid", combine=True, epoch=1)
+    else:
+        for valid_sub_split in cfg.dataset.valid_subset.split(","):
+            task.load_dataset(valid_sub_split, combine=False, epoch=1)
+
+    # (optionally) Configure quantization
+    if cfg.common.quantization_config_path is not None:
+        quantizer = quantization_utils.Quantizer(
+            config_path=cfg.common.quantization_config_path,
+            max_epoch=cfg.optimization.max_epoch,
+            max_update=cfg.optimization.max_update,
+        )
+    else:
+        quantizer = None
+
+    # Build trainer
+    if cfg.common.model_parallel_size == 1:
+        trainer = Trainer(cfg, task, model, criterion, quantizer)
+    else:
+        trainer = MegatronTrainer(cfg, task, model, criterion)
+    logger.info(
+        "training on {} devices (GPUs/TPUs)".format(
+            cfg.distributed_training.distributed_world_size
+        )
+    )
+    logger.info(
+        "max tokens per device = {} and max sentences per device = {}".format(
+            cfg.dataset.max_tokens,
+            cfg.dataset.batch_size,
+        )
+    )
+
+    # Load the latest checkpoint if one is available and restore the
+    # corresponding train iterator
+    extra_state, epoch_itr = checkpoint_utils.load_checkpoint(
+        cfg.checkpoint,
+        trainer,
+        # don't cache epoch iterators for sharded datasets
+        disable_iterator_cache=task.has_sharded_data("train"),
+    )
+    if cfg.common.tpu:
+        import torch_xla.core.xla_model as xm
+        xm.rendezvous("load_checkpoint")  # wait for all workers
+
+    max_epoch = cfg.optimization.max_epoch or math.inf
+    lr = trainer.get_lr()
+
+    train_meter = meters.StopwatchMeter()
+    train_meter.start()
+    while epoch_itr.next_epoch_idx <= max_epoch:
+        if lr <= cfg.optimization.stop_min_lr:
+            logger.info(
+                f"stopping training because current learning rate ({lr}) is smaller "
+                "than or equal to minimum learning rate "
+                f"(--stop-min-lr={cfg.optimization.stop_min_lr})"
+            )
+            break
+
+        # train for one epoch
+        valid_losses, should_stop = train(cfg, trainer, task, epoch_itr)
+        if should_stop:
+            break
+
+        # only use first validation loss to update the learning rate
+        lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0])
+
+        epoch_itr = trainer.get_train_iterator(
+            epoch_itr.next_epoch_idx,
+            # sharded data: get train iterator for next epoch
+            load_dataset=task.has_sharded_data("train"),
+            # don't cache epoch iterators for sharded datasets
+            disable_iterator_cache=task.has_sharded_data("train"),
+        )
+    train_meter.stop()
+    logger.info("done training in {:.1f} seconds".format(train_meter.sum))
+
+    # ioPath implementation to wait for all asynchronous file writes to complete.
+    if cfg.checkpoint.write_checkpoints_asynchronously:
+        logger.info(
+            "ioPath PathManager waiting for all asynchronous checkpoint "
+            "writes to finish."
+        )
+        PathManager.async_close()
+        logger.info("ioPath PathManager finished waiting.")
+
+
+def should_stop_early(cfg: DictConfig, valid_loss: float) -> bool:
+    # skip check if no validation was done in the current epoch
+    if valid_loss is None:
+        return False
+    if cfg.checkpoint.patience <= 0:
+        return False
+
+    def is_better(a, b):
+        return a > b if cfg.checkpoint.maximize_best_checkpoint_metric else a < b
+
+    prev_best = getattr(should_stop_early, "best", None)
+    if prev_best is None or is_better(valid_loss, prev_best):
+        should_stop_early.best = valid_loss
+        should_stop_early.num_runs = 0
+        return False
+    else:
+        should_stop_early.num_runs += 1
+        if should_stop_early.num_runs >= cfg.checkpoint.patience:
+            logger.info(
+                "early stop since valid performance hasn't improved for last {} runs".format(
+                    cfg.checkpoint.patience
+                )
+            )
+            return True
+        else:
+            return False
+
+
+@metrics.aggregate("train")
+def train(
+    cfg: DictConfig, trainer: Trainer, task: tasks.FairseqTask, epoch_itr
+) -> Tuple[List[Optional[float]], bool]:
+    """Train the model for one epoch and return validation losses."""
+    # Initialize data iterator
+    itr = epoch_itr.next_epoch_itr(
+        fix_batches_to_gpus=cfg.distributed_training.fix_batches_to_gpus,
+        shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum),
+    )
+    update_freq = (
+        cfg.optimization.update_freq[epoch_itr.epoch - 1]
+        if epoch_itr.epoch <= len(cfg.optimization.update_freq)
+        else cfg.optimization.update_freq[-1]
+    )
+    itr = iterators.GroupedIterator(itr, update_freq)
+    if cfg.common.tpu:
+        itr = utils.tpu_data_loader(itr)
+    progress = progress_bar.progress_bar(
+        itr,
+        log_format=cfg.common.log_format,
+        log_file=cfg.common.log_file,
+        log_interval=cfg.common.log_interval,
+        epoch=epoch_itr.epoch,
+        tensorboard_logdir=(
+            cfg.common.tensorboard_logdir
+            if distributed_utils.is_master(cfg.distributed_training)
+            else None
+        ),
+        default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+        wandb_project=(
+            cfg.common.wandb_project
+            if distributed_utils.is_master(cfg.distributed_training)
+            else None
+        ),
+        wandb_run_name=os.environ.get(
+            "WANDB_NAME", os.path.basename(cfg.checkpoint.save_dir)
+        ),
+        azureml_logging=(
+            cfg.common.azureml_logging
+            if distributed_utils.is_master(cfg.distributed_training)
+            else False
+        ),
+    )
+    progress.update_config(_flatten_config(cfg))
+
+    trainer.begin_epoch(epoch_itr.epoch)
+
+    valid_subsets = cfg.dataset.valid_subset.split(",")
+    should_stop = False
+    num_updates = trainer.get_num_updates()
+    logger.info("Start iterating over samples")
+    for i, samples in enumerate(progress):
+        with metrics.aggregate("train_inner"), torch.autograd.profiler.record_function(
+            "train_step-%d" % i
+        ):
+            log_output = trainer.train_step(samples)
+
+        if log_output is not None:  # not OOM, overflow, ...
+            # log mid-epoch stats
+            num_updates = trainer.get_num_updates()
+            if num_updates % cfg.common.log_interval == 0:
+                stats = get_training_stats(metrics.get_smoothed_values("train_inner"))
+                progress.log(stats, tag="train_inner", step=num_updates)
+
+                # reset mid-epoch stats after each log interval
+                # the end-of-epoch stats will still be preserved
+                metrics.reset_meters("train_inner")
+
+        end_of_epoch = not itr.has_next()
+        valid_losses, should_stop = validate_and_save(
+            cfg, trainer, task, epoch_itr, valid_subsets, end_of_epoch
+        )
+
+        if should_stop:
+            break
+
+    # log end-of-epoch stats
+    logger.info("end of epoch {} (average epoch stats below)".format(epoch_itr.epoch))
+    stats = get_training_stats(metrics.get_smoothed_values("train"))
+    progress.print(stats, tag="train", step=num_updates)
+
+    # reset epoch-level meters
+    metrics.reset_meters("train")
+    return valid_losses, should_stop
+
+
+def _flatten_config(cfg: DictConfig):
+    config = OmegaConf.to_container(cfg)
+    # remove any legacy Namespaces and replace with a single "args"
+    namespace = None
+    for k, v in list(config.items()):
+        if isinstance(v, argparse.Namespace):
+            namespace = v
+            del config[k]
+    if namespace is not None:
+        config["args"] = vars(namespace)
+    return config
+
+
+def validate_and_save(
+    cfg: DictConfig,
+    trainer: Trainer,
+    task: tasks.FairseqTask,
+    epoch_itr,
+    valid_subsets: List[str],
+    end_of_epoch: bool,
+) -> Tuple[List[Optional[float]], bool]:
+    num_updates = trainer.get_num_updates()
+    max_update = cfg.optimization.max_update or math.inf
+
+    # Stopping conditions (and an additional one based on validation loss later
+    # on)
+    should_stop = False
+    if num_updates >= max_update:
+        should_stop = True
+        logger.info(
+            f"Stopping training due to "
+            f"num_updates: {num_updates} >= max_update: {max_update}"
+        )
+
+    training_time_hours = trainer.cumulative_training_time() / (60 * 60)
+    if (
+        cfg.optimization.stop_time_hours > 0
+        and training_time_hours > cfg.optimization.stop_time_hours
+    ):
+        should_stop = True
+        logger.info(
+            f"Stopping training due to "
+            f"cumulative_training_time: {training_time_hours} > "
+            f"stop_time_hours: {cfg.optimization.stop_time_hours} hour(s)"
+        )
+
+    do_save = (
+        (end_of_epoch and epoch_itr.epoch % cfg.checkpoint.save_interval == 0)
+        or should_stop
+        or (
+            cfg.checkpoint.save_interval_updates > 0
+            and num_updates > 0
+            and num_updates % cfg.checkpoint.save_interval_updates == 0
+            and num_updates >= cfg.dataset.validate_after_updates
+        )
+    )
+    do_validate = (
+        (not end_of_epoch and do_save)  # validate during mid-epoch saves
+        or (end_of_epoch and epoch_itr.epoch % cfg.dataset.validate_interval == 0)
+        or should_stop
+        or (
+            cfg.dataset.validate_interval_updates > 0
+            and num_updates > 0
+            and num_updates % cfg.dataset.validate_interval_updates == 0
+        )
+    ) and not cfg.dataset.disable_validation and num_updates >= cfg.dataset.validate_after_updates
+
+    # Validate
+    valid_losses = [None]
+    if do_validate:
+        valid_losses = validate(cfg, trainer, task, epoch_itr, valid_subsets)
+
+    should_stop |= should_stop_early(cfg, valid_losses[0])
+
+    # Save checkpoint
+    if do_save or should_stop:
+        checkpoint_utils.save_checkpoint(
+            cfg.checkpoint, trainer, epoch_itr, valid_losses[0]
+        )
+
+    return valid_losses, should_stop
+
+
+def get_training_stats(stats: Dict[str, Any]) -> Dict[str, Any]:
+    stats["wall"] = round(metrics.get_meter("default", "wall").elapsed_time, 0)
+    return stats
+
+
+def validate(
+    cfg: DictConfig,
+    trainer: Trainer,
+    task: tasks.FairseqTask,
+    epoch_itr,
+    subsets: List[str],
+) -> List[Optional[float]]:
+    """Evaluate the model on the validation set(s) and return the losses."""
+
+    if cfg.dataset.fixed_validation_seed is not None:
+        # set fixed seed for every validation
+        utils.set_torch_seed(cfg.dataset.fixed_validation_seed)
+
+    trainer.begin_valid_epoch(epoch_itr.epoch)
+    valid_losses = []
+    for subset in subsets:
+        logger.info('begin validation on "{}" subset'.format(subset))
+
+        # Initialize data iterator
+        itr = trainer.get_valid_iterator(subset).next_epoch_itr(
+            shuffle=False, set_dataset_epoch=False  # use a fixed valid set
+        )
+        if cfg.common.tpu:
+            itr = utils.tpu_data_loader(itr)
+        progress = progress_bar.progress_bar(
+            itr,
+            log_format=cfg.common.log_format,
+            log_interval=cfg.common.log_interval,
+            epoch=epoch_itr.epoch,
+            prefix=f"valid on '{subset}' subset",
+            tensorboard_logdir=(
+                cfg.common.tensorboard_logdir
+                if distributed_utils.is_master(cfg.distributed_training)
+                else None
+            ),
+            default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+            wandb_project=(
+                cfg.common.wandb_project
+                if distributed_utils.is_master(cfg.distributed_training)
+                else None
+            ),
+            wandb_run_name=os.environ.get(
+                "WANDB_NAME", os.path.basename(cfg.checkpoint.save_dir)
+            ),
+        )
+
+        # create a new root metrics aggregator so validation metrics
+        # don't pollute other aggregators (e.g., train meters)
+        with metrics.aggregate(new_root=True) as agg:
+            for i, sample in enumerate(progress):
+                if cfg.dataset.max_valid_steps is not None and i > cfg.dataset.max_valid_steps:
+                    break
+                trainer.valid_step(sample)
+
+        # log validation stats
+        stats = get_valid_stats(cfg, trainer, agg.get_smoothed_values())
+
+        if hasattr(task, "post_validate"):
+            task.post_validate(trainer.get_model(), stats, agg)
+
+        progress.print(stats, tag=subset, step=trainer.get_num_updates())
+
+        valid_losses.append(stats[cfg.checkpoint.best_checkpoint_metric])
+    return valid_losses
+
+
+def get_valid_stats(
+    cfg: DictConfig, trainer: Trainer, stats: Dict[str, Any]
+) -> Dict[str, Any]:
+    stats["num_updates"] = trainer.get_num_updates()
+    if hasattr(checkpoint_utils.save_checkpoint, "best"):
+        key = "best_{0}".format(cfg.checkpoint.best_checkpoint_metric)
+        best_function = max if cfg.checkpoint.maximize_best_checkpoint_metric else min
+        stats[key] = best_function(
+            checkpoint_utils.save_checkpoint.best,
+            stats[cfg.checkpoint.best_checkpoint_metric],
+        )
+    return stats
+
+
+def cli_main(
+    modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None
+) -> None:
+    parser = options.get_training_parser()
+    args = options.parse_args_and_arch(parser, modify_parser=modify_parser)
+
+    cfg = convert_namespace_to_omegaconf(args)
+
+    if cfg.common.use_plasma_view:
+        server = PlasmaStore(path=cfg.common.plasma_path)
+        logger.info(f"Started plasma server pid {server.server.pid} {cfg.common.plasma_path}")
+
+    if args.profile:
+        with torch.cuda.profiler.profile():
+            with torch.autograd.profiler.emit_nvtx():
+                distributed_utils.call_main(cfg, main)
+    else:
+        distributed_utils.call_main(cfg, main)
+
+    # if cfg.common.use_plasma_view:
+    #     server.server.kill()
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/fairseq_cli/validate.py b/fairseq/fairseq_cli/validate.py
new file mode 100644
index 0000000000000000000000000000000000000000..22b93e9a6a1e1fbcff67075019177110905270f2
--- /dev/null
+++ b/fairseq/fairseq_cli/validate.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+from argparse import Namespace
+from itertools import chain
+
+import torch
+from fairseq import checkpoint_utils, distributed_utils, options, utils
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.logging import metrics, progress_bar
+from fairseq.utils import reset_logging
+from omegaconf import DictConfig
+
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.validate")
+
+
+def main(cfg: DictConfig, override_args=None):
+    if isinstance(cfg, Namespace):
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    utils.import_user_module(cfg.common)
+
+    reset_logging()
+
+    assert (
+        cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None
+    ), "Must specify batch size either with --max-tokens or --batch-size"
+
+    use_fp16 = cfg.common.fp16
+    use_cuda = torch.cuda.is_available() and not cfg.common.cpu
+
+    if use_cuda:
+        torch.cuda.set_device(cfg.distributed_training.device_id)
+
+    if cfg.distributed_training.distributed_world_size > 1:
+        data_parallel_world_size = distributed_utils.get_data_parallel_world_size()
+        data_parallel_rank = distributed_utils.get_data_parallel_rank()
+    else:
+        data_parallel_world_size = 1
+        data_parallel_rank = 0
+
+    if override_args is not None:
+        overrides = vars(override_args)
+        overrides.update(eval(getattr(override_args, "model_overrides", "{}")))
+    else:
+        overrides = None
+
+    # Load ensemble
+    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
+    models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+        [cfg.common_eval.path],
+        arg_overrides=overrides,
+        suffix=cfg.checkpoint.checkpoint_suffix,
+    )
+    model = models[0]
+
+    # Move models to GPU
+    for model in models:
+        model.eval()
+        if use_fp16:
+            model.half()
+        if use_cuda:
+            model.cuda()
+
+    # Print args
+    logger.info(saved_cfg)
+
+    # Build criterion
+    criterion = task.build_criterion(saved_cfg.criterion)
+    criterion.eval()
+
+    for subset in cfg.dataset.valid_subset.split(","):
+        try:
+            task.load_dataset(subset, combine=False, epoch=1, task_cfg=saved_cfg.task)
+            dataset = task.dataset(subset)
+        except KeyError:
+            raise Exception("Cannot find dataset: " + subset)
+
+        # Initialize data iterator
+        itr = task.get_batch_iterator(
+            dataset=dataset,
+            max_tokens=cfg.dataset.max_tokens,
+            max_sentences=cfg.dataset.batch_size,
+            max_positions=utils.resolve_max_positions(
+                task.max_positions(),
+                *[m.max_positions() for m in models],
+            ),
+            ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
+            required_batch_size_multiple=cfg.dataset.required_batch_size_multiple,
+            seed=cfg.common.seed,
+            num_shards=data_parallel_world_size,
+            shard_id=data_parallel_rank,
+            num_workers=cfg.dataset.num_workers,
+            data_buffer_size=cfg.dataset.data_buffer_size,
+        ).next_epoch_itr(shuffle=False)
+        progress = progress_bar.progress_bar(
+            itr,
+            log_format=cfg.common.log_format,
+            log_interval=cfg.common.log_interval,
+            prefix=f"valid on '{subset}' subset",
+            default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+        )
+
+        log_outputs = []
+        for i, sample in enumerate(progress):
+            sample = utils.move_to_cuda(sample) if use_cuda else sample
+            _loss, _sample_size, log_output = task.valid_step(sample, model, criterion)
+            progress.log(log_output, step=i)
+            log_outputs.append(log_output)
+
+        if data_parallel_world_size > 1:
+            log_outputs = distributed_utils.all_gather_list(
+                log_outputs,
+                max_size=cfg.common.all_gather_list_size,
+                group=distributed_utils.get_data_parallel_group(),
+            )
+            log_outputs = list(chain.from_iterable(log_outputs))
+
+        with metrics.aggregate() as agg:
+            task.reduce_metrics(log_outputs, criterion)
+            log_output = agg.get_smoothed_values()
+
+        progress.print(log_output, tag=subset, step=i)
+
+
+def cli_main():
+    parser = options.get_validation_parser()
+    args = options.parse_args_and_arch(parser)
+
+    # only override args that are explicitly given on the command line
+    override_parser = options.get_validation_parser()
+    override_args = options.parse_args_and_arch(
+        override_parser, suppress_defaults=True
+    )
+
+    distributed_utils.call_main(
+        convert_namespace_to_omegaconf(args), main, override_args=override_args
+    )
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/fairseq/hubconf.py b/fairseq/hubconf.py
new file mode 100644
index 0000000000000000000000000000000000000000..5949e274edd02e86cb323331211641ce0d0b9b93
--- /dev/null
+++ b/fairseq/hubconf.py
@@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""isort:skip_file"""
+
+import functools
+import importlib
+
+
+dependencies = [
+    "dataclasses",
+    "hydra",
+    "numpy",
+    "omegaconf",
+    "regex",
+    "requests",
+    "torch",
+]
+
+
+# Check for required dependencies and raise a RuntimeError if any are missing.
+missing_deps = []
+for dep in dependencies:
+    try:
+        importlib.import_module(dep)
+    except ImportError:
+        # Hack: the hydra package is provided under the "hydra-core" name in
+        # pypi. We don't want the user mistakenly calling `pip install hydra`
+        # since that will install an unrelated package.
+        if dep == "hydra":
+            dep = "hydra-core"
+        missing_deps.append(dep)
+if len(missing_deps) > 0:
+    raise RuntimeError("Missing dependencies: {}".format(", ".join(missing_deps)))
+
+
+# only do fairseq imports after checking for dependencies
+from fairseq.hub_utils import (  # noqa; noqa
+    BPEHubInterface as bpe,
+    TokenizerHubInterface as tokenizer,
+)
+from fairseq.models import MODEL_REGISTRY  # noqa
+
+
+# torch.hub doesn't build Cython components, so if they are not found then try
+# to build them here
+try:
+    import fairseq.data.token_block_utils_fast  # noqa
+except ImportError:
+    try:
+        import cython  # noqa
+        import os
+        from setuptools import sandbox
+
+        sandbox.run_setup(
+            os.path.join(os.path.dirname(__file__), "setup.py"),
+            ["build_ext", "--inplace"],
+        )
+    except ImportError:
+        print(
+            "Unable to build Cython components. Please make sure Cython is "
+            "installed if the torch.hub model you are loading depends on it."
+        )
+
+
+# automatically expose models defined in FairseqModel::hub_models
+for _model_type, _cls in MODEL_REGISTRY.items():
+    for model_name in _cls.hub_models().keys():
+        globals()[model_name] = functools.partial(
+            _cls.from_pretrained,
+            model_name,
+        )
diff --git a/fairseq/pyproject.toml b/fairseq/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..6d1b4c5b6fb56a63069147e3a1de922ce71a45d8
--- /dev/null
+++ b/fairseq/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel", "cython"]
+build-backend = "setuptools.build_meta"
diff --git a/fairseq/scripts/__init__.py b/fairseq/scripts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/scripts/average_checkpoints.py b/fairseq/scripts/average_checkpoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..c512f802bce6b3395cc42a0e4eb39181e9f8c873
--- /dev/null
+++ b/fairseq/scripts/average_checkpoints.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import collections
+import os
+import re
+
+import torch
+from fairseq.file_io import PathManager
+
+
+def average_checkpoints(inputs):
+    """Loads checkpoints from inputs and returns a model with averaged weights.
+
+    Args:
+      inputs: An iterable of string paths of checkpoints to load from.
+
+    Returns:
+      A dict of string keys mapping to various values. The 'model' key
+      from the returned dict should correspond to an OrderedDict mapping
+      string parameter names to torch Tensors.
+    """
+    params_dict = collections.OrderedDict()
+    params_keys = None
+    new_state = None
+    num_models = len(inputs)
+
+    for fpath in inputs:
+        with PathManager.open(fpath, "rb") as f:
+            state = torch.load(
+                f,
+                map_location=(
+                    lambda s, _: torch.serialization.default_restore_location(s, "cpu")
+                ),
+            )
+        # Copies over the settings from the first checkpoint
+        if new_state is None:
+            new_state = state
+
+        model_params = state["model"]
+
+        model_params_keys = list(model_params.keys())
+        if params_keys is None:
+            params_keys = model_params_keys
+        elif params_keys != model_params_keys:
+            raise KeyError(
+                "For checkpoint {}, expected list of params: {}, "
+                "but found: {}".format(f, params_keys, model_params_keys)
+            )
+
+        for k in params_keys:
+            p = model_params[k]
+            if isinstance(p, torch.HalfTensor):
+                p = p.float()
+            if k not in params_dict:
+                params_dict[k] = p.clone()
+                # NOTE: clone() is needed in case of p is a shared parameter
+            else:
+                params_dict[k] += p
+
+    averaged_params = collections.OrderedDict()
+    for k, v in params_dict.items():
+        averaged_params[k] = v
+        if averaged_params[k].is_floating_point():
+            averaged_params[k].div_(num_models)
+        else:
+            averaged_params[k] //= num_models
+    new_state["model"] = averaged_params
+    return new_state
+
+
+def last_n_checkpoints(paths, n, update_based, upper_bound=None):
+    assert len(paths) == 1
+    path = paths[0]
+    if update_based:
+        pt_regexp = re.compile(r"checkpoint_\d+_(\d+)\.pt")
+    else:
+        pt_regexp = re.compile(r"checkpoint(\d+)\.pt")
+    files = PathManager.ls(path)
+
+    entries = []
+    for f in files:
+        m = pt_regexp.fullmatch(f)
+        if m is not None:
+            sort_key = int(m.group(1))
+            if upper_bound is None or sort_key <= upper_bound:
+                entries.append((sort_key, m.group(0)))
+    if len(entries) < n:
+        raise Exception(
+            "Found {} checkpoint files but need at least {}", len(entries), n
+        )
+    return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)[:n]]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Tool to average the params of input checkpoints to "
+        "produce a new checkpoint",
+    )
+    # fmt: off
+    parser.add_argument('--inputs', required=True, nargs='+',
+                        help='Input checkpoint file paths.')
+    parser.add_argument('--output', required=True, metavar='FILE',
+                        help='Write the new checkpoint containing the averaged weights to this path.')
+    num_group = parser.add_mutually_exclusive_group()
+    num_group.add_argument('--num-epoch-checkpoints', type=int,
+                           help='if set, will try to find checkpoints with names checkpoint_xx.pt in the path specified by input, '
+                           'and average last this many of them.')
+    num_group.add_argument('--num-update-checkpoints', type=int,
+                           help='if set, will try to find checkpoints with names checkpoint_ee_xx.pt in the path specified by input, '
+                           'and average last this many of them.')
+    parser.add_argument('--checkpoint-upper-bound', type=int,
+                        help='when using --num-epoch-checkpoints, this will set an upper bound on which epoch to use, '
+                        'when using --num-update-checkpoints, this will set an upper bound on which update to use'
+                        'e.g., with --num-epoch-checkpoints=10 --checkpoint-upper-bound=50, checkpoints 41-50 would be averaged.'
+                        'e.g., with --num-update-checkpoints=10 --checkpoint-upper-bound=50000, checkpoints 40500-50000 would be averaged assuming --save-interval-updates 500'
+                        )
+    # fmt: on
+    args = parser.parse_args()
+    print(args)
+
+    num = None
+    is_update_based = False
+    if args.num_update_checkpoints is not None:
+        num = args.num_update_checkpoints
+        is_update_based = True
+    elif args.num_epoch_checkpoints is not None:
+        num = args.num_epoch_checkpoints
+
+    assert args.checkpoint_upper_bound is None or (
+        args.num_epoch_checkpoints is not None
+        or args.num_update_checkpoints is not None
+    ), "--checkpoint-upper-bound requires --num-epoch-checkpoints or --num-update-checkpoints"
+    assert (
+        args.num_epoch_checkpoints is None or args.num_update_checkpoints is None
+    ), "Cannot combine --num-epoch-checkpoints and --num-update-checkpoints"
+
+    if num is not None:
+        args.inputs = last_n_checkpoints(
+            args.inputs,
+            num,
+            is_update_based,
+            upper_bound=args.checkpoint_upper_bound,
+        )
+        print("averaging checkpoints: ", args.inputs)
+
+    new_state = average_checkpoints(args.inputs)
+    with PathManager.open(args.output, "wb") as f:
+        torch.save(new_state, f)
+    print("Finished writing averaged checkpoint to {}".format(args.output))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/build_sym_alignment.py b/fairseq/scripts/build_sym_alignment.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ca5c18f7bd4b0fbf58b203793506ca395466129
--- /dev/null
+++ b/fairseq/scripts/build_sym_alignment.py
@@ -0,0 +1,97 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Use this script in order to build symmetric alignments for your translation
+dataset.
+This script depends on fast_align and mosesdecoder tools. You will need to
+build those before running the script.
+fast_align:
+    github: http://github.com/clab/fast_align
+    instructions: follow the instructions in README.md
+mosesdecoder:
+    github: http://github.com/moses-smt/mosesdecoder
+    instructions: http://www.statmt.org/moses/?n=Development.GetStarted
+The script produces the following files under --output_dir:
+    text.joined - concatenation of lines from the source_file and the
+    target_file.
+    align.forward - forward pass of fast_align.
+    align.backward - backward pass of fast_align.
+    aligned.sym_heuristic - symmetrized alignment.
+"""
+
+import argparse
+import os
+from itertools import zip_longest
+
+
+def main():
+    parser = argparse.ArgumentParser(description="symmetric alignment builer")
+    # fmt: off
+    parser.add_argument('--fast_align_dir',
+                        help='path to fast_align build directory')
+    parser.add_argument('--mosesdecoder_dir',
+                        help='path to mosesdecoder root directory')
+    parser.add_argument('--sym_heuristic',
+                        help='heuristic to use for symmetrization',
+                        default='grow-diag-final-and')
+    parser.add_argument('--source_file',
+                        help='path to a file with sentences '
+                             'in the source language')
+    parser.add_argument('--target_file',
+                        help='path to a file with sentences '
+                             'in the target language')
+    parser.add_argument('--output_dir',
+                        help='output directory')
+    # fmt: on
+    args = parser.parse_args()
+
+    fast_align_bin = os.path.join(args.fast_align_dir, "fast_align")
+    symal_bin = os.path.join(args.mosesdecoder_dir, "bin", "symal")
+    sym_fast_align_bin = os.path.join(
+        args.mosesdecoder_dir, "scripts", "ems", "support", "symmetrize-fast-align.perl"
+    )
+
+    # create joined file
+    joined_file = os.path.join(args.output_dir, "text.joined")
+    with open(args.source_file, "r", encoding="utf-8") as src, open(
+        args.target_file, "r", encoding="utf-8"
+    ) as tgt:
+        with open(joined_file, "w", encoding="utf-8") as joined:
+            for s, t in zip_longest(src, tgt):
+                print("{} ||| {}".format(s.strip(), t.strip()), file=joined)
+
+    bwd_align_file = os.path.join(args.output_dir, "align.backward")
+
+    # run forward alignment
+    fwd_align_file = os.path.join(args.output_dir, "align.forward")
+    fwd_fast_align_cmd = "{FASTALIGN} -i {JOINED} -d -o -v > {FWD}".format(
+        FASTALIGN=fast_align_bin, JOINED=joined_file, FWD=fwd_align_file
+    )
+    assert os.system(fwd_fast_align_cmd) == 0
+
+    # run backward alignment
+    bwd_align_file = os.path.join(args.output_dir, "align.backward")
+    bwd_fast_align_cmd = "{FASTALIGN} -i {JOINED} -d -o -v -r > {BWD}".format(
+        FASTALIGN=fast_align_bin, JOINED=joined_file, BWD=bwd_align_file
+    )
+    assert os.system(bwd_fast_align_cmd) == 0
+
+    # run symmetrization
+    sym_out_file = os.path.join(args.output_dir, "aligned")
+    sym_cmd = "{SYMFASTALIGN} {FWD} {BWD} {SRC} {TGT} {OUT} {HEURISTIC} {SYMAL}".format(
+        SYMFASTALIGN=sym_fast_align_bin,
+        FWD=fwd_align_file,
+        BWD=bwd_align_file,
+        SRC=args.source_file,
+        TGT=args.target_file,
+        OUT=sym_out_file,
+        HEURISTIC=args.sym_heuristic,
+        SYMAL=symal_bin,
+    )
+    assert os.system(sym_cmd) == 0
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/compare_namespaces.py b/fairseq/scripts/compare_namespaces.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc24db624f8db36f546c263ba3a806dae6d466bf
--- /dev/null
+++ b/fairseq/scripts/compare_namespaces.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+"""Helper script to compare two argparse.Namespace objects."""
+
+from argparse import Namespace  # noqa
+
+
+def main():
+
+    ns1 = eval(input("Namespace 1: "))
+    ns2 = eval(input("Namespace 2: "))
+
+    def keys(ns):
+        ks = set()
+        for k in dir(ns):
+            if not k.startswith("_"):
+                ks.add(k)
+        return ks
+
+    k1 = keys(ns1)
+    k2 = keys(ns2)
+
+    def print_keys(ks, ns1, ns2=None):
+        for k in ks:
+            if ns2 is None:
+                print("{}\t{}".format(k, getattr(ns1, k, None)))
+            else:
+                print(
+                    "{}\t{}\t{}".format(k, getattr(ns1, k, None), getattr(ns2, k, None))
+                )
+
+    print("Keys unique to namespace 1:")
+    print_keys(k1 - k2, ns1)
+    print()
+
+    print("Keys unique to namespace 2:")
+    print_keys(k2 - k1, ns2)
+    print()
+
+    print("Overlapping keys with different values:")
+    ks = [k for k in k1 & k2 if getattr(ns1, k, "None") != getattr(ns2, k, "None")]
+    print_keys(ks, ns1, ns2)
+    print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/compound_split_bleu.sh b/fairseq/scripts/compound_split_bleu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1972fddcebff9a43a70bcf14c287175c68f60e3f
--- /dev/null
+++ b/fairseq/scripts/compound_split_bleu.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+    echo "usage: $0 GENERATE_PY_OUTPUT"
+    exit 1
+fi
+
+GEN=$1
+
+SYS=$GEN.sys
+REF=$GEN.ref
+
+if [ $(tail -n 1 $GEN | grep BLEU | wc -l) -ne 1 ]; then
+    echo "not done generating"
+    exit
+fi
+
+grep ^H $GEN | awk -F '\t' '{print $NF}' | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS
+grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF
+fairseq-score --sys $SYS --ref $REF
diff --git a/fairseq/scripts/constraints/extract.py b/fairseq/scripts/constraints/extract.py
new file mode 100755
index 0000000000000000000000000000000000000000..f6155d0a0538aadb46bf612256b6b949728de69e
--- /dev/null
+++ b/fairseq/scripts/constraints/extract.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Extracts random constraints from reference files."""
+
+import argparse
+import random
+import sys
+
+from sacrebleu import extract_ngrams
+
+
+def get_phrase(words, index, length):
+    assert index < len(words) - length + 1
+    phr = " ".join(words[index : index + length])
+    for i in range(index, index + length):
+        words.pop(index)
+    return phr
+
+
+def main(args):
+
+    if args.seed:
+        random.seed(args.seed)
+
+    for line in sys.stdin:
+        constraints = []
+
+        def add_constraint(constraint):
+            constraints.append(constraint)
+
+        source = line.rstrip()
+        if "\t" in line:
+            source, target = line.split("\t")
+            if args.add_sos:
+                target = f"<s> {target}"
+            if args.add_eos:
+                target = f"{target} </s>"
+
+            if len(target.split()) >= args.len:
+                words = [target]
+
+                num = args.number
+
+                choices = {}
+                for i in range(num):
+                    if len(words) == 0:
+                        break
+                    segmentno = random.choice(range(len(words)))
+                    segment = words.pop(segmentno)
+                    tokens = segment.split()
+                    phrase_index = random.choice(range(len(tokens)))
+                    choice = " ".join(
+                        tokens[phrase_index : min(len(tokens), phrase_index + args.len)]
+                    )
+                    for j in range(
+                        phrase_index, min(len(tokens), phrase_index + args.len)
+                    ):
+                        tokens.pop(phrase_index)
+                    if phrase_index > 0:
+                        words.append(" ".join(tokens[0:phrase_index]))
+                    if phrase_index + 1 < len(tokens):
+                        words.append(" ".join(tokens[phrase_index:]))
+                    choices[target.find(choice)] = choice
+
+                    # mask out with spaces
+                    target = target.replace(choice, " " * len(choice), 1)
+
+                for key in sorted(choices.keys()):
+                    add_constraint(choices[key])
+
+        print(source, *constraints, sep="\t")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--number", "-n", type=int, default=1, help="number of phrases")
+    parser.add_argument("--len", "-l", type=int, default=1, help="phrase length")
+    parser.add_argument(
+        "--add-sos", default=False, action="store_true", help="add <s> token"
+    )
+    parser.add_argument(
+        "--add-eos", default=False, action="store_true", help="add </s> token"
+    )
+    parser.add_argument("--seed", "-s", default=0, type=int)
+    args = parser.parse_args()
+
+    main(args)
diff --git a/fairseq/scripts/constraints/validate.py b/fairseq/scripts/constraints/validate.py
new file mode 100755
index 0000000000000000000000000000000000000000..d531ad9f39b1df42c98fe8f26ad61fe53a9ac0c5
--- /dev/null
+++ b/fairseq/scripts/constraints/validate.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+
+
+"""Reads in a fairseq output file, and verifies that the constraints
+(C- lines) are present in the output (the first H- line). Assumes that
+constraints are listed prior to the first hypothesis.
+"""
+
+constraints = []
+found = 0
+total = 0
+for line in sys.stdin:
+    if line.startswith("C-"):
+        constraints.append(line.rstrip().split("\t")[1])
+    elif line.startswith("H-"):
+        text = line.split("\t")[2]
+
+        for constraint in constraints:
+            total += 1
+            if constraint in text:
+                found += 1
+            else:
+                print(f"No {constraint} in {text}", file=sys.stderr)
+
+        constraints = []
+
+print(f"Found {found} / {total} = {100 * found / total:.1f}%")
diff --git a/fairseq/scripts/convert_dictionary.lua b/fairseq/scripts/convert_dictionary.lua
new file mode 100644
index 0000000000000000000000000000000000000000..14ee8c997f642c8ff196617c2dcd0584037a60c4
--- /dev/null
+++ b/fairseq/scripts/convert_dictionary.lua
@@ -0,0 +1,34 @@
+-- Copyright (c) Facebook, Inc. and its affiliates.
+--
+-- This source code is licensed under the MIT license found in the
+-- LICENSE file in the root directory of this source tree.
+--
+-- Usage: convert_dictionary.lua <dict.th7>
+require 'fairseq'
+require 'torch'
+require 'paths'
+
+if #arg < 1 then
+   print('usage: convert_dictionary.lua <dict.th7>')
+   os.exit(1)
+end
+if not paths.filep(arg[1]) then
+   print('error: file does not exit: ' .. arg[1])
+   os.exit(1)
+end
+
+dict = torch.load(arg[1])
+dst = paths.basename(arg[1]):gsub('.th7', '.txt')
+assert(dst:match('.txt$'))
+
+f = io.open(dst, 'w')
+for idx, symbol in ipairs(dict.index_to_symbol) do
+  if idx > dict.cutoff then
+    break
+  end
+  f:write(symbol)
+  f:write(' ')
+  f:write(dict.index_to_freq[idx])
+  f:write('\n')
+end
+f:close()
diff --git a/fairseq/scripts/convert_model.lua b/fairseq/scripts/convert_model.lua
new file mode 100644
index 0000000000000000000000000000000000000000..61b92139294fb90a25989ebd2ee52a765fb278a2
--- /dev/null
+++ b/fairseq/scripts/convert_model.lua
@@ -0,0 +1,108 @@
+-- Copyright (c) Facebook, Inc. and its affiliates.
+--
+-- This source code is licensed under the MIT license found in the
+-- LICENSE file in the root directory of this source tree.
+--
+-- Usage: convert_model.lua <model_epoch1.th7>
+require 'torch'
+local fairseq = require 'fairseq'
+
+model = torch.load(arg[1])
+
+function find_weight_norm(container, module)
+  for _, wn in ipairs(container:listModules()) do
+    if torch.type(wn) == 'nn.WeightNorm' and wn.modules[1] == module then
+      return wn
+    end
+  end
+end
+
+function push_state(dict, key, module)
+  if torch.type(module) == 'nn.Linear' then
+    local wn = find_weight_norm(model.module, module)
+    assert(wn)
+    dict[key .. '.weight_v'] = wn.v:float()
+    dict[key .. '.weight_g'] = wn.g:float()
+  elseif torch.type(module) == 'nn.TemporalConvolutionTBC' then
+    local wn = find_weight_norm(model.module, module)
+    assert(wn)
+    local v = wn.v:float():view(wn.viewOut):transpose(2, 3)
+    dict[key .. '.weight_v'] = v
+    dict[key .. '.weight_g'] = wn.g:float():view(module.weight:size(3), 1, 1)
+  else
+    dict[key .. '.weight'] = module.weight:float()
+  end
+  if module.bias then
+    dict[key .. '.bias'] = module.bias:float()
+  end
+end
+
+encoder_dict = {}
+decoder_dict = {}
+combined_dict = {}
+
+function encoder_state(encoder)
+  luts = encoder:findModules('nn.LookupTable')
+  push_state(encoder_dict, 'embed_tokens', luts[1])
+  push_state(encoder_dict, 'embed_positions', luts[2])
+
+  fcs = encoder:findModules('nn.Linear')
+  assert(#fcs >= 2)
+  local nInputPlane = fcs[1].weight:size(1)
+  push_state(encoder_dict, 'fc1', table.remove(fcs, 1))
+  push_state(encoder_dict, 'fc2', table.remove(fcs, #fcs))
+
+  for i, module in ipairs(encoder:findModules('nn.TemporalConvolutionTBC')) do
+    push_state(encoder_dict, 'convolutions.' .. tostring(i - 1), module)
+    if nInputPlane ~= module.weight:size(3) / 2 then
+      push_state(encoder_dict, 'projections.' .. tostring(i - 1), table.remove(fcs, 1))
+    end
+    nInputPlane = module.weight:size(3) / 2
+  end
+  assert(#fcs == 0)
+end
+
+function decoder_state(decoder)
+  luts = decoder:findModules('nn.LookupTable')
+  push_state(decoder_dict, 'embed_tokens', luts[1])
+  push_state(decoder_dict, 'embed_positions', luts[2])
+
+  fcs = decoder:findModules('nn.Linear')
+  local nInputPlane = fcs[1].weight:size(1)
+  push_state(decoder_dict, 'fc1', table.remove(fcs, 1))
+  push_state(decoder_dict, 'fc2', fcs[#fcs - 1])
+  push_state(decoder_dict, 'fc3', fcs[#fcs])
+
+  table.remove(fcs, #fcs)
+  table.remove(fcs, #fcs)
+
+  for i, module in ipairs(decoder:findModules('nn.TemporalConvolutionTBC')) do
+    if nInputPlane ~= module.weight:size(3) / 2 then
+      push_state(decoder_dict, 'projections.' .. tostring(i - 1), table.remove(fcs, 1))
+    end
+    nInputPlane = module.weight:size(3) / 2
+
+    local prefix = 'attention.' .. tostring(i - 1)
+    push_state(decoder_dict, prefix .. '.in_projection', table.remove(fcs, 1))
+    push_state(decoder_dict, prefix .. '.out_projection', table.remove(fcs, 1))
+    push_state(decoder_dict, 'convolutions.' .. tostring(i - 1), module)
+  end
+  assert(#fcs == 0)
+end
+
+
+_encoder = model.module.modules[2]
+_decoder = model.module.modules[3]
+
+encoder_state(_encoder)
+decoder_state(_decoder)
+
+for k, v in pairs(encoder_dict) do
+  combined_dict['encoder.' .. k] = v
+end
+for k, v in pairs(decoder_dict) do
+  combined_dict['decoder.' .. k] = v
+end
+
+
+torch.save('state_dict.t7', combined_dict)
diff --git a/fairseq/scripts/count_docs.py b/fairseq/scripts/count_docs.py
new file mode 100644
index 0000000000000000000000000000000000000000..58d85af85e91377a34dbd01f7674436152fd08e8
--- /dev/null
+++ b/fairseq/scripts/count_docs.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Count the number of documents and average number of lines and tokens per
+document in a large file. Documents should be separated by a single empty line.
+"""
+
+import argparse
+import gzip
+import sys
+
+import numpy as np
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input")
+    parser.add_argument("--gzip", action="store_true")
+    args = parser.parse_args()
+
+    def gopen():
+        if args.gzip:
+            return gzip.open(args.input, "r")
+        else:
+            return open(args.input, "r", encoding="utf-8")
+
+    num_lines = []
+    num_toks = []
+    with gopen() as h:
+        num_docs = 1
+        num_lines_in_doc = 0
+        num_toks_in_doc = 0
+        for i, line in enumerate(h):
+            if len(line.strip()) == 0:  # empty line indicates new document
+                num_docs += 1
+                num_lines.append(num_lines_in_doc)
+                num_toks.append(num_toks_in_doc)
+                num_lines_in_doc = 0
+                num_toks_in_doc = 0
+            else:
+                num_lines_in_doc += 1
+                num_toks_in_doc += len(line.rstrip().split())
+            if i % 1000000 == 0:
+                print(i, file=sys.stderr, end="", flush=True)
+            elif i % 100000 == 0:
+                print(".", file=sys.stderr, end="", flush=True)
+        print(file=sys.stderr, flush=True)
+
+    print("found {} docs".format(num_docs))
+    print("average num lines per doc: {}".format(np.mean(num_lines)))
+    print("average num toks per doc: {}".format(np.mean(num_toks)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/read_binarized.py b/fairseq/scripts/read_binarized.py
new file mode 100644
index 0000000000000000000000000000000000000000..a414095d03fb022a6753e816fc8bfd80e11db24d
--- /dev/null
+++ b/fairseq/scripts/read_binarized.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+
+from fairseq.data import Dictionary, data_utils, indexed_dataset
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description="writes text from binarized file to stdout"
+    )
+    # fmt: off
+    parser.add_argument('--dataset-impl', help='dataset implementation',
+                        choices=indexed_dataset.get_available_dataset_impl())
+    parser.add_argument('--dict', metavar='FP', help='dictionary containing known words', default=None)
+    parser.add_argument('--input', metavar='FP', required=True, help='binarized file to read')
+    # fmt: on
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+
+    dictionary = Dictionary.load(args.dict) if args.dict is not None else None
+    dataset = data_utils.load_indexed_dataset(
+        args.input,
+        dictionary,
+        dataset_impl=args.dataset_impl,
+        default="lazy",
+    )
+
+    for tensor_line in dataset:
+        if dictionary is None:
+            line = " ".join([str(int(x)) for x in tensor_line])
+        else:
+            line = dictionary.string(tensor_line)
+
+        print(line)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/rm_pt.py b/fairseq/scripts/rm_pt.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cd063d21f0610fa7c42c2cfb2ee8af7c9c78677
--- /dev/null
+++ b/fairseq/scripts/rm_pt.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+import re
+import shutil
+import sys
+
+
+pt_regexp = re.compile(r"checkpoint(\d+|_\d+_\d+|_[a-z]+)\.pt")
+pt_regexp_epoch_based = re.compile(r"checkpoint(\d+)\.pt")
+pt_regexp_update_based = re.compile(r"checkpoint_\d+_(\d+)\.pt")
+
+
+def parse_checkpoints(files):
+    entries = []
+    for f in files:
+        m = pt_regexp_epoch_based.fullmatch(f)
+        if m is not None:
+            entries.append((int(m.group(1)), m.group(0)))
+        else:
+            m = pt_regexp_update_based.fullmatch(f)
+            if m is not None:
+                entries.append((int(m.group(1)), m.group(0)))
+    return entries
+
+
+def last_n_checkpoints(files, n):
+    entries = parse_checkpoints(files)
+    return [x[1] for x in sorted(entries, reverse=True)[:n]]
+
+
+def every_n_checkpoints(files, n):
+    entries = parse_checkpoints(files)
+    return [x[1] for x in sorted(sorted(entries)[::-n])]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=(
+            "Recursively delete checkpoint files from `root_dir`, "
+            "but preserve checkpoint_best.pt and checkpoint_last.pt"
+        )
+    )
+    parser.add_argument("root_dirs", nargs="*")
+    parser.add_argument(
+        "--save-last", type=int, default=0, help="number of last checkpoints to save"
+    )
+    parser.add_argument(
+        "--save-every", type=int, default=0, help="interval of checkpoints to save"
+    )
+    parser.add_argument(
+        "--preserve-test",
+        action="store_true",
+        help="preserve checkpoints in dirs that start with test_ prefix (default: delete them)",
+    )
+    parser.add_argument(
+        "--delete-best", action="store_true", help="delete checkpoint_best.pt"
+    )
+    parser.add_argument(
+        "--delete-last", action="store_true", help="delete checkpoint_last.pt"
+    )
+    parser.add_argument(
+        "--no-dereference", action="store_true", help="don't dereference symlinks"
+    )
+    args = parser.parse_args()
+
+    files_to_desymlink = []
+    files_to_preserve = []
+    files_to_delete = []
+    for root_dir in args.root_dirs:
+        for root, _subdirs, files in os.walk(root_dir):
+            if args.save_last > 0:
+                to_save = last_n_checkpoints(files, args.save_last)
+            else:
+                to_save = []
+            if args.save_every > 0:
+                to_save += every_n_checkpoints(files, args.save_every)
+            for file in files:
+                if not pt_regexp.fullmatch(file):
+                    continue
+                full_path = os.path.join(root, file)
+                if (
+                    not os.path.basename(root).startswith("test_") or args.preserve_test
+                ) and (
+                    (file == "checkpoint_last.pt" and not args.delete_last)
+                    or (file == "checkpoint_best.pt" and not args.delete_best)
+                    or file in to_save
+                ):
+                    if os.path.islink(full_path) and not args.no_dereference:
+                        files_to_desymlink.append(full_path)
+                    else:
+                        files_to_preserve.append(full_path)
+                else:
+                    files_to_delete.append(full_path)
+
+    if len(files_to_desymlink) == 0 and len(files_to_delete) == 0:
+        print("Nothing to do.")
+        sys.exit(0)
+
+    files_to_desymlink = sorted(files_to_desymlink)
+    files_to_preserve = sorted(files_to_preserve)
+    files_to_delete = sorted(files_to_delete)
+
+    print("Operations to perform (in order):")
+    if len(files_to_desymlink) > 0:
+        for file in files_to_desymlink:
+            print(" - preserve (and dereference symlink): " + file)
+    if len(files_to_preserve) > 0:
+        for file in files_to_preserve:
+            print(" - preserve: " + file)
+    if len(files_to_delete) > 0:
+        for file in files_to_delete:
+            print(" - delete: " + file)
+    while True:
+        resp = input("Continue? (Y/N): ")
+        if resp.strip().lower() == "y":
+            break
+        elif resp.strip().lower() == "n":
+            sys.exit(0)
+
+    print("Executing...")
+    if len(files_to_desymlink) > 0:
+        for file in files_to_desymlink:
+            realpath = os.path.realpath(file)
+            print("rm " + file)
+            os.remove(file)
+            print("cp {} {}".format(realpath, file))
+            shutil.copyfile(realpath, file)
+    if len(files_to_delete) > 0:
+        for file in files_to_delete:
+            print("rm " + file)
+            os.remove(file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/sacrebleu.sh b/fairseq/scripts/sacrebleu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c10bf2b76ea032deabab6f5c9d8a3e1e884f1642
--- /dev/null
+++ b/fairseq/scripts/sacrebleu.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+if [ $# -ne 4 ]; then
+    echo "usage: $0 TESTSET SRCLANG TGTLANG GEN"
+    exit 1
+fi
+
+TESTSET=$1
+SRCLANG=$2
+TGTLANG=$3
+
+GEN=$4
+
+if ! command -v sacremoses &> /dev/null
+then
+    echo "sacremoses could not be found, please install with: pip install sacremoses"
+    exit
+fi
+
+grep ^H $GEN \
+| sed 's/^H\-//' \
+| sort -n -k 1 \
+| cut -f 3 \
+| sacremoses detokenize \
+> $GEN.sorted.detok
+
+sacrebleu --test-set $TESTSET --language-pair "${SRCLANG}-${TGTLANG}" < $GEN.sorted.detok
diff --git a/fairseq/scripts/shard_docs.py b/fairseq/scripts/shard_docs.py
new file mode 100644
index 0000000000000000000000000000000000000000..97232c3c845ee01dc5ab627388934cc0f9588280
--- /dev/null
+++ b/fairseq/scripts/shard_docs.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Split a large file into shards while respecting document boundaries. Documents
+should be separated by a single empty line.
+"""
+
+import argparse
+import contextlib
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input")
+    parser.add_argument("--num-shards", type=int)
+    args = parser.parse_args()
+
+    assert args.num_shards is not None and args.num_shards > 1
+
+    with open(args.input, "r", encoding="utf-8") as h:
+        with contextlib.ExitStack() as stack:
+            outputs = [
+                stack.enter_context(
+                    open(args.input + ".shard" + str(i), "w", encoding="utf-8")
+                )
+                for i in range(args.num_shards)
+            ]
+
+            doc = []
+            first_doc = [True] * args.num_shards
+
+            def output_doc(i):
+                if not first_doc[i]:
+                    outputs[i].write("\n")
+                first_doc[i] = False
+                for line in doc:
+                    outputs[i].write(line)
+                doc.clear()
+
+            num_docs = 0
+            for line in h:
+                if line.strip() == "":  # empty line indicates new document
+                    output_doc(num_docs % args.num_shards)
+                    num_docs += 1
+                else:
+                    doc.append(line)
+            output_doc(num_docs % args.num_shards)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/split_train_valid_docs.py b/fairseq/scripts/split_train_valid_docs.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff159785284a13b44626b207d84430c592acaf8f
--- /dev/null
+++ b/fairseq/scripts/split_train_valid_docs.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Split a large file into a train and valid set while respecting document
+boundaries. Documents should be separated by a single empty line.
+"""
+
+import argparse
+import random
+import sys
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input")
+    parser.add_argument("sample_output", help="train output file")
+    parser.add_argument("remainder_output", help="valid output file")
+    parser.add_argument("-k", type=int, help="remainder size")
+    parser.add_argument(
+        "--lines", action="store_true", help="split lines instead of docs"
+    )
+    args = parser.parse_args()
+
+    assert args.k is not None
+
+    sample = []
+    remainder = []
+    num_docs = [0]
+
+    def update_sample(doc):
+        if len(sample) < args.k:
+            sample.append(doc.copy())
+        else:
+            i = num_docs[0]
+            j = random.randrange(i + 1)
+            if j < args.k:
+                remainder.append(sample[j])
+                sample[j] = doc.copy()
+            else:
+                remainder.append(doc.copy())
+        num_docs[0] += 1
+        doc.clear()
+
+    with open(args.input, "r", encoding="utf-8") as h:
+        doc = []
+        for i, line in enumerate(h):
+            if line.strip() == "":  # empty line indicates new document
+                update_sample(doc)
+            else:
+                doc.append(line)
+            if args.lines:
+                update_sample(doc)
+            if i % 1000000 == 0:
+                print(i, file=sys.stderr, end="", flush=True)
+            elif i % 100000 == 0:
+                print(".", file=sys.stderr, end="", flush=True)
+        if len(doc) > 0:
+            update_sample(doc)
+    print(file=sys.stderr, flush=True)
+
+    assert len(sample) == args.k
+
+    with open(args.sample_output, "w", encoding="utf-8") as out:
+        first = True
+        for doc in sample:
+            if not first and not args.lines:
+                out.write("\n")
+            first = False
+            for line in doc:
+                out.write(line)
+
+    with open(args.remainder_output, "w", encoding="utf-8") as out:
+        first = True
+        for doc in remainder:
+            if not first and not args.lines:
+                out.write("\n")
+            first = False
+            for line in doc:
+                out.write(line)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/spm_decode.py b/fairseq/scripts/spm_decode.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c18b1d2a7d7628b7aeb6fdb6c4ab5a096e9edf8
--- /dev/null
+++ b/fairseq/scripts/spm_decode.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import argparse
+
+import sentencepiece as spm
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="sentencepiece model to use for decoding"
+    )
+    parser.add_argument("--input", required=True, help="input file to decode")
+    parser.add_argument("--input_format", choices=["piece", "id"], default="piece")
+    args = parser.parse_args()
+
+    sp = spm.SentencePieceProcessor()
+    sp.Load(args.model)
+
+    if args.input_format == "piece":
+
+        def decode(l):
+            return "".join(sp.DecodePieces(l))
+
+    elif args.input_format == "id":
+
+        def decode(l):
+            return "".join(sp.DecodeIds(l))
+
+    else:
+        raise NotImplementedError
+
+    def tok2int(tok):
+        # remap reference-side <unk> (represented as <<unk>>) to 0
+        return int(tok) if tok != "<<unk>>" else 0
+
+    with open(args.input, "r", encoding="utf-8") as h:
+        for line in h:
+            if args.input_format == "id":
+                print(decode(list(map(tok2int, line.rstrip().split()))))
+            elif args.input_format == "piece":
+                print(decode(line.rstrip().split()))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/spm_encode.py b/fairseq/scripts/spm_encode.py
new file mode 100644
index 0000000000000000000000000000000000000000..83facfb3b184aff8b9cc3f0c82dd53668c63e57b
--- /dev/null
+++ b/fairseq/scripts/spm_encode.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import argparse
+import contextlib
+import sys
+
+import sentencepiece as spm
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="sentencepiece model to use for encoding"
+    )
+    parser.add_argument(
+        "--inputs", nargs="+", default=["-"], help="input files to filter/encode"
+    )
+    parser.add_argument(
+        "--outputs", nargs="+", default=["-"], help="path to save encoded outputs"
+    )
+    parser.add_argument("--output_format", choices=["piece", "id"], default="piece")
+    parser.add_argument(
+        "--min-len",
+        type=int,
+        metavar="N",
+        help="filter sentence pairs with fewer than N tokens",
+    )
+    parser.add_argument(
+        "--max-len",
+        type=int,
+        metavar="N",
+        help="filter sentence pairs with more than N tokens",
+    )
+    args = parser.parse_args()
+
+    assert len(args.inputs) == len(
+        args.outputs
+    ), "number of input and output paths should match"
+
+    sp = spm.SentencePieceProcessor()
+    sp.Load(args.model)
+
+    if args.output_format == "piece":
+
+        def encode(l):
+            return sp.EncodeAsPieces(l)
+
+    elif args.output_format == "id":
+
+        def encode(l):
+            return list(map(str, sp.EncodeAsIds(l)))
+
+    else:
+        raise NotImplementedError
+
+    if args.min_len is not None or args.max_len is not None:
+
+        def valid(line):
+            return (args.min_len is None or len(line) >= args.min_len) and (
+                args.max_len is None or len(line) <= args.max_len
+            )
+
+    else:
+
+        def valid(lines):
+            return True
+
+    with contextlib.ExitStack() as stack:
+        inputs = [
+            stack.enter_context(open(input, "r", encoding="utf-8"))
+            if input != "-"
+            else sys.stdin
+            for input in args.inputs
+        ]
+        outputs = [
+            stack.enter_context(open(output, "w", encoding="utf-8"))
+            if output != "-"
+            else sys.stdout
+            for output in args.outputs
+        ]
+
+        stats = {
+            "num_empty": 0,
+            "num_filtered": 0,
+        }
+
+        def encode_line(line):
+            line = line.strip()
+            if len(line) > 0:
+                line = encode(line)
+                if valid(line):
+                    return line
+                else:
+                    stats["num_filtered"] += 1
+            else:
+                stats["num_empty"] += 1
+            return None
+
+        for i, lines in enumerate(zip(*inputs), start=1):
+            enc_lines = list(map(encode_line, lines))
+            if not any(enc_line is None for enc_line in enc_lines):
+                for enc_line, output_h in zip(enc_lines, outputs):
+                    print(" ".join(enc_line), file=output_h)
+            if i % 10000 == 0:
+                print("processed {} lines".format(i), file=sys.stderr)
+
+        print("skipped {} empty lines".format(stats["num_empty"]), file=sys.stderr)
+        print("filtered {} lines".format(stats["num_filtered"]), file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fairseq/scripts/spm_train.py b/fairseq/scripts/spm_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db668fd4166a860198784990de68ea26157995d
--- /dev/null
+++ b/fairseq/scripts/spm_train.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+
+import sentencepiece as spm
+
+
+if __name__ == "__main__":
+    spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
diff --git a/fairseq/scripts/test_fsdp.sh b/fairseq/scripts/test_fsdp.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1f428a035e4474427ded991f8e8307ea59f61f69
--- /dev/null
+++ b/fairseq/scripts/test_fsdp.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+rm -rf fsdp_dummy
+mkdir -p fsdp_dummy
+CUDA_VISIBLE_DEVICES=0,1,2,3 fairseq-train /private/home/sshleifer/data-bin/stories_mmap \
+    --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \
+    --cpu-offload --checkpoint-activations \
+    --task language_modeling --tokens-per-sample 256 --batch-size 8 \
+    --arch transformer_lm_gpt2_tiny \
+    --optimizer cpu_adam --adam-betas "(0.9,0.98)" \
+    --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \
+    --max-update 5 --log-format json --log-interval 1 \
+    --save-interval-updates 5 --save-dir fsdp_dummy --disable-validation \
+    --restore-file x.pt "$@"
+
+# Now we try to load the checkpoint
+CUDA_VISIBLE_DEVICES=0,1 fairseq-train /private/home/sshleifer/data-bin/stories_mmap \
+    --ddp-backend fully_sharded --fp16 --fp16-init-scale 4 \
+    --cpu-offload --checkpoint-activations \
+    --task language_modeling --tokens-per-sample 256 --batch-size 8 \
+    --arch transformer_lm_gpt2_tiny \
+    --optimizer cpu_adam --adam-betas "(0.9,0.98)" \
+    --lr 0.0001 --lr-scheduler polynomial_decay --warmup-updates 5 --total-num-update 10 \
+    --max-update 2 --log-format json --log-interval 1 \
+    --save-interval-updates 2 --save-dir fsdp_dummy
diff --git a/fairseq/setup.py b/fairseq/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..4379b2c31f593134fb027cf01da5fcd706a64e00
--- /dev/null
+++ b/fairseq/setup.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import subprocess
+import sys
+
+from setuptools import Extension, find_packages, setup
+
+if sys.version_info < (3, 6):
+    sys.exit("Sorry, Python >= 3.6 is required for fairseq.")
+
+
+def write_version_py():
+    with open(os.path.join("fairseq", "version.txt")) as f:
+        version = f.read().strip()
+
+    # append latest commit hash to version string
+    try:
+        sha = (
+            subprocess.check_output(["git", "rev-parse", "HEAD"])
+            .decode("ascii")
+            .strip()
+        )
+        version += "+" + sha[:7]
+    except Exception:
+        pass
+
+    # write version info to fairseq/version.py
+    with open(os.path.join("fairseq", "version.py"), "w") as f:
+        f.write('__version__ = "{}"\n'.format(version))
+    return version
+
+
+version = write_version_py()
+
+
+with open("README.md") as f:
+    readme = f.read()
+
+
+if sys.platform == "darwin":
+    extra_compile_args = ["-stdlib=libc++", "-O3"]
+else:
+    extra_compile_args = ["-std=c++11", "-O3"]
+
+
+class NumpyExtension(Extension):
+    """Source: https://stackoverflow.com/a/54128391"""
+
+    def __init__(self, *args, **kwargs):
+        self.__include_dirs = []
+        super().__init__(*args, **kwargs)
+
+    @property
+    def include_dirs(self):
+        import numpy
+
+        return self.__include_dirs + [numpy.get_include()]
+
+    @include_dirs.setter
+    def include_dirs(self, dirs):
+        self.__include_dirs = dirs
+
+
+extensions = [
+    Extension(
+        "fairseq.libbleu",
+        sources=[
+            "fairseq/clib/libbleu/libbleu.cpp",
+            "fairseq/clib/libbleu/module.cpp",
+        ],
+        extra_compile_args=extra_compile_args,
+    ),
+    NumpyExtension(
+        "fairseq.data.data_utils_fast",
+        sources=["fairseq/data/data_utils_fast.pyx"],
+        language="c++",
+        extra_compile_args=extra_compile_args,
+    ),
+    NumpyExtension(
+        "fairseq.data.token_block_utils_fast",
+        sources=["fairseq/data/token_block_utils_fast.pyx"],
+        language="c++",
+        extra_compile_args=extra_compile_args,
+    ),
+]
+
+
+cmdclass = {}
+
+
+try:
+    # torch is not available when generating docs
+    from torch.utils import cpp_extension
+
+    extensions.extend(
+        [
+            cpp_extension.CppExtension(
+                "fairseq.libbase",
+                sources=[
+                    "fairseq/clib/libbase/balanced_assignment.cpp",
+                ],
+            )
+        ]
+    )
+
+    extensions.extend(
+        [
+            cpp_extension.CppExtension(
+                "fairseq.libnat",
+                sources=[
+                    "fairseq/clib/libnat/edit_dist.cpp",
+                ],
+            ),
+            cpp_extension.CppExtension(
+                "alignment_train_cpu_binding",
+                sources=[
+                    "examples/operators/alignment_train_cpu.cpp",
+                ],
+            ),
+        ]
+    )
+    if "CUDA_HOME" in os.environ:
+        extensions.extend(
+            [
+                cpp_extension.CppExtension(
+                    "fairseq.libnat_cuda",
+                    sources=[
+                        "fairseq/clib/libnat_cuda/edit_dist.cu",
+                        "fairseq/clib/libnat_cuda/binding.cpp",
+                    ],
+                ),
+                cpp_extension.CppExtension(
+                    "fairseq.ngram_repeat_block_cuda",
+                    sources=[
+                        "fairseq/clib/cuda/ngram_repeat_block_cuda.cpp",
+                        "fairseq/clib/cuda/ngram_repeat_block_cuda_kernel.cu",
+                    ],
+                ),
+                cpp_extension.CppExtension(
+                    "alignment_train_cuda_binding",
+                    sources=[
+                        "examples/operators/alignment_train_kernel.cu",
+                        "examples/operators/alignment_train_cuda.cpp",
+                    ],
+                ),
+            ]
+        )
+    cmdclass["build_ext"] = cpp_extension.BuildExtension
+
+except ImportError:
+    pass
+
+
+if "READTHEDOCS" in os.environ:
+    # don't build extensions when generating docs
+    extensions = []
+    if "build_ext" in cmdclass:
+        del cmdclass["build_ext"]
+
+    # use CPU build of PyTorch
+    dependency_links = [
+        "https://download.pytorch.org/whl/cpu/torch-1.7.0%2Bcpu-cp36-cp36m-linux_x86_64.whl"
+    ]
+else:
+    dependency_links = []
+
+
+if "clean" in sys.argv[1:]:
+    # Source: https://bit.ly/2NLVsgE
+    print("deleting Cython files...")
+    import subprocess
+
+    subprocess.run(
+        ["rm -f fairseq/*.so fairseq/**/*.so fairseq/*.pyd fairseq/**/*.pyd"],
+        shell=True,
+    )
+
+
+extra_packages = []
+if os.path.exists(os.path.join("fairseq", "model_parallel", "megatron", "mpu")):
+    extra_packages.append("fairseq.model_parallel.megatron.mpu")
+
+
+def do_setup(package_data):
+    setup(
+        name="fairseq",
+        version=version,
+        description="Facebook AI Research Sequence-to-Sequence Toolkit",
+        url="https://github.com/pytorch/fairseq",
+        classifiers=[
+            "Intended Audience :: Science/Research",
+            "License :: OSI Approved :: MIT License",
+            "Programming Language :: Python :: 3.6",
+            "Programming Language :: Python :: 3.7",
+            "Programming Language :: Python :: 3.8",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        ],
+        long_description=readme,
+        long_description_content_type="text/markdown",
+        setup_requires=[
+            "cython",
+            'numpy<1.20.0; python_version<"3.7"',
+            'numpy; python_version>="3.7"',
+            "setuptools>=18.0",
+        ],
+        install_requires=[
+            "cffi",
+            "cython",
+            'dataclasses; python_version<"3.7"',
+            "hydra-core>=1.0.7,<1.1",
+            "omegaconf<2.1",
+            'numpy<1.20.0; python_version<"3.7"',
+            'numpy; python_version>="3.7"',
+            "regex",
+            "sacrebleu>=1.4.12",
+            # "torch",
+            "tqdm",
+            "bitarray",
+            # "torchaudio>=0.8.0",
+        ],
+        dependency_links=dependency_links,
+        packages=find_packages(
+            exclude=[
+                "examples",
+                "examples.*",
+                "scripts",
+                "scripts.*",
+                "tests",
+                "tests.*",
+            ]
+        )
+        + extra_packages,
+        package_data=package_data,
+        ext_modules=extensions,
+        test_suite="tests",
+        entry_points={
+            "console_scripts": [
+                "fairseq-eval-lm = fairseq_cli.eval_lm:cli_main",
+                "fairseq-generate = fairseq_cli.generate:cli_main",
+                "fairseq-hydra-train = fairseq_cli.hydra_train:cli_main",
+                "fairseq-interactive = fairseq_cli.interactive:cli_main",
+                "fairseq-preprocess = fairseq_cli.preprocess:cli_main",
+                "fairseq-score = fairseq_cli.score:cli_main",
+                "fairseq-train = fairseq_cli.train:cli_main",
+                "fairseq-validate = fairseq_cli.validate:cli_main",
+            ],
+        },
+        cmdclass=cmdclass,
+        zip_safe=False,
+    )
+
+
+def get_files(path, relative_to="fairseq"):
+    all_files = []
+    for root, _dirs, files in os.walk(path, followlinks=True):
+        root = os.path.relpath(root, relative_to)
+        for file in files:
+            if file.endswith(".pyc"):
+                continue
+            all_files.append(os.path.join(root, file))
+    return all_files
+
+
+if __name__ == "__main__":
+    try:
+        # symlink examples into fairseq package so package_data accepts them
+        fairseq_examples = os.path.join("fairseq", "examples")
+        if "build_ext" not in sys.argv[1:] and not os.path.exists(fairseq_examples):
+            os.symlink(os.path.join("..", "examples"), fairseq_examples)
+
+        package_data = {
+            "fairseq": (
+                get_files(fairseq_examples) + get_files(os.path.join("fairseq", "config"))
+            )
+        }
+        do_setup(package_data)
+    finally:
+        if "build_ext" not in sys.argv[1:] and os.path.islink(fairseq_examples):
+            os.unlink(fairseq_examples)
diff --git a/fairseq/tests/__init__.py b/fairseq/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/tests/distributed/__init__.py b/fairseq/tests/distributed/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/tests/distributed/test_bmuf.py b/fairseq/tests/distributed/test_bmuf.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b7cadb094d49587b6b82432248459fdcf42457e
--- /dev/null
+++ b/fairseq/tests/distributed/test_bmuf.py
@@ -0,0 +1,207 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import functools
+import random
+import unittest
+from multiprocessing import Manager
+
+import torch
+import torch.nn as nn
+from fairseq import optim
+from fairseq.distributed import utils as distributed_utils
+from omegaconf import OmegaConf
+
+
+class Model(nn.Module):
+    def __init__(self, input_size, output_size):
+        super(Model, self).__init__()
+        self.fc = nn.Linear(input_size, output_size)
+
+    def forward(self, input):
+        output = self.fc(input)
+        return output
+
+
+def setup_model_loss_criterion(cfg, args, rank, is_cuda):
+    """
+    setup model, criterion and optimizer based on input args
+    """
+    args.distributed_rank = rank
+    cfg.distributed_training.distributed_rank = args.distributed_rank
+    if cfg.distributed_training.distributed_world_size > 1:
+        distributed_utils.distributed_init(cfg)
+    torch.manual_seed(1)
+    model = Model(args.input_size, args.nb_classes)
+    loss_fn = nn.CrossEntropyLoss()
+    if is_cuda:
+        model = model.cuda()
+        loss_fn = loss_fn.cuda()
+
+    optimizer = optim.sgd.SGD(args, model.parameters())
+    optimizer = optim.FairseqBMUF(
+        cfg=cfg.bmuf,
+        optimizer=optimizer
+    )
+
+    return model, loss_fn, optimizer
+
+
+def train_step(input, target, model, loss_fn, optimizer, **unused):
+    """Do forward, backward and parameter update."""
+    model.train()
+    output = model(input)
+    loss = loss_fn(output, target)
+    optimizer.backward(loss)
+    optimizer.step()
+
+
+def single_gpu_training(cfg, args, rank, iterations, shared_results):
+
+    is_cuda = torch.cuda.is_available()
+    if is_cuda:
+        torch.cuda.set_device(rank)
+
+    model, loss_fn, optimizer = setup_model_loss_criterion(cfg, args, rank, is_cuda)
+
+    for _ in range(iterations):
+        input = torch.randn(1, args.input_size)
+        target = torch.empty(args.batch_size, dtype=torch.long).random_(args.nb_classes)
+
+        if is_cuda:
+            input = input.cuda()
+            target = target.cuda()
+        train_step(input, target, model, loss_fn, optimizer)
+
+    results = []
+    for param in model.parameters():
+        if len(results) == 0:
+            results = param.flatten().cpu().data
+        else:
+            results = torch.cat((results, param.flatten().cpu().data), 0)
+
+    shared_results[rank] = results
+
+
+def setup_args():
+    args = argparse.Namespace()
+    args.global_sync_iter = 20
+    args.block_momentum = 0.875
+    args.block_lr = 0.5
+    args.input_size = 5
+    args.nb_classes = 2
+    args.batch_size = 1
+    args.lr = [1e-3]
+    args.momentum = 0
+    args.weight_decay = 0
+    args.warmup_iterations = 0
+    args.use_nbm = True
+    args.average_sync = True
+    args.global_sync_iter = 1
+    args.model_parallel_size = 1
+    args.distributed_backend = "gloo"
+
+    args.distributed_world_size = 2
+    port = random.randint(10000, 20000)
+    args.distributed_init_method = "tcp://localhost:{port}".format(port=port)
+    args.distributed_init_host = "localhost"
+    args.distributed_port = port + 1
+    args.local_world_size = args.distributed_world_size
+
+    cfg = OmegaConf.create()
+    cfg.optimization = OmegaConf.create()
+    cfg.common = OmegaConf.create()
+    cfg.distributed_training = OmegaConf.create()
+    cfg.dataset = OmegaConf.create()
+    cfg.bmuf = OmegaConf.create()
+    cfg.optimizer = OmegaConf.create()
+
+    cfg.bmuf.global_sync_iter = args.global_sync_iter
+    cfg.bmuf.block_momentum = args.block_momentum
+    cfg.bmuf.block_lr = args.block_lr
+    cfg.dataset.batch_size = args.batch_size
+    cfg.optimization.lr = args.lr
+    cfg.optimizer.momentum = args.momentum
+    cfg.optimizer.weight_decay = args.weight_decay
+    cfg.bmuf.warmup_iterations = args.warmup_iterations
+    cfg.bmuf.use_nbm = args.use_nbm
+    cfg.bmuf.average_sync = args.average_sync
+    cfg.common.model_parallel_size = args.model_parallel_size
+    cfg.distributed_training.distributed_backend = args.distributed_backend
+    cfg.distributed_training.distributed_world_size = args.distributed_world_size
+    cfg.bmuf.distributed_world_size = args.distributed_world_size
+    cfg.distributed_training.distributed_init_method = args.distributed_init_method
+    cfg.distributed_training.distributed_port = args.distributed_port
+
+    return cfg, args
+
+
+@unittest.skipIf(torch.cuda.device_count() < 2, "test requires 2 GPUs")
+class TestBMUF(unittest.TestCase):
+    def bmuf_process(self, cfg, args, iterations):
+        processes = []
+        results = Manager().dict()
+        torch.multiprocessing.spawn(
+            fn=functools.partial(single_gpu_training, cfg, args),
+            args=(iterations, results),
+            nprocs=args.distributed_world_size,
+            join=True,
+        )
+        return results
+
+    def test_bmuf_sync(self):
+        # Train model for 1 iteration and do bmuf sync without doing warmup
+        cfg, args = setup_args()
+        iterations = 1
+        results = self.bmuf_process(cfg, args, iterations)
+        # Make sure params in both machines are same
+        assert len(results) == 2
+        self.assertAlmostEqual(results[0], results[1])
+
+    def test_warmup_sync(self):
+        # Train model for 20 iteration and do warmup sync without doing bmuf sync
+        cfg, args = setup_args()
+        args.warmup_iterations = 20
+        cfg.bmuf.warmup_iterations = args.warmup_iterations
+        iterations = 20
+        results = self.bmuf_process(cfg, args, iterations)
+        # Make sure params in both machines are same
+        assert len(results) == 2
+        self.assertAlmostEqual(results[0], results[1])
+
+    def test_warmup_sync_bmuf_sync(self):
+        # Train model for 25 iteration and do warmup sync after 20 iteration
+        # and bmuf sync after 25 iteration
+        cfg, args = setup_args()
+        args.warmup_iterations = 20
+        args.global_sync_iter = 5
+        cfg.bmuf.warmup_iterations = args.warmup_iterations
+        cfg.bmuf.global_sync_iter = args.global_sync_iter
+        iterations = 25
+        results = self.bmuf_process(cfg, args, iterations)
+        # Make sure params in both machines are same
+        assert len(results) == 2
+        self.assertAlmostEqual(results[0], results[1])
+
+    def test_single_gpu_bmuf(self):
+        # Train model for 5 iterations and use GPU 1
+        cfg, args = setup_args()
+        args.distributed_world_size = 1
+        args.warmup_iterations = 5
+        cfg.distributed_training.distributed_world_size = args.distributed_world_size
+        cfg.bmuf.distributed_world_size = args.distributed_world_size
+        cfg.bmuf.warmup_iterations = args.warmup_iterations
+        iterations = 20
+        results = self.bmuf_process(cfg, args, iterations)
+        assert len(results) == 1
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/distributed/test_distributed_timeout_wrapper.py b/fairseq/tests/distributed/test_distributed_timeout_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..27908b9d3f7d6d880351e2a12effb12f9bc27971
--- /dev/null
+++ b/fairseq/tests/distributed/test_distributed_timeout_wrapper.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import signal
+import time
+import unittest
+
+import torch
+from torch import nn
+
+from fairseq.distributed import DistributedTimeoutWrapper
+
+
+class ModuleWithDelay(nn.Module):
+
+    def __init__(self, delay):
+        super().__init__()
+        self.delay = delay
+
+    def forward(self, x):
+        time.sleep(self.delay)
+        return x
+
+
+class TestDistributedTimeoutWrapper(unittest.TestCase):
+
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_no_timeout(self):
+        module = DistributedTimeoutWrapper(ModuleWithDelay(1), 0, signal.SIGINT)
+        module(torch.rand(5))
+        module.stop_timeout()
+
+    def test_timeout_safe(self):
+        module = DistributedTimeoutWrapper(ModuleWithDelay(1), 10, signal.SIGINT)
+        module(torch.rand(5))
+        module.stop_timeout()
+
+    def test_timeout_killed(self):
+        with self.assertRaises(KeyboardInterrupt):
+            module = DistributedTimeoutWrapper(ModuleWithDelay(5), 1, signal.SIGINT)
+            module(torch.rand(5))
+            module.stop_timeout()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/distributed/test_module_proxy_wrapper.py b/fairseq/tests/distributed/test_module_proxy_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..2803a044cdcc12e0a348f40d06ce89c571d307ed
--- /dev/null
+++ b/fairseq/tests/distributed/test_module_proxy_wrapper.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from torch import nn
+
+from fairseq.distributed import ModuleProxyWrapper
+
+from .utils import objects_are_equal
+
+
+class MockDDPWrapper(nn.Module):
+    """A simple wrapper with an interface similar to DistributedDataParallel."""
+
+    def __init__(self, module):
+        super().__init__()
+        self.module = module
+
+    def forward(self, x):
+        return self.module(x)
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(5, 10)
+        self.xyz = "hello"
+
+    def forward(self, x):
+        return self.linear(x)
+
+    def get_xyz(self):
+        return self.xyz
+
+
+class TestModuleProxyWrapper(unittest.TestCase):
+
+    def _get_module(self):
+        module = Model()
+        wrapped_module = MockDDPWrapper(module)
+        wrapped_module = ModuleProxyWrapper(wrapped_module)
+        return wrapped_module, module
+
+    def test_getattr_forwarding(self):
+        wrapped_module, module = self._get_module()
+        assert module.xyz == "hello"
+        assert module.get_xyz() == "hello"
+        assert wrapped_module.xyz == "hello"
+
+        wrapped_module.xyz = "world"
+        assert wrapped_module.xyz == "world"
+        assert module.get_xyz() == "hello"
+
+    def test_state_dict(self):
+        wrapped_module, module = self._get_module()
+        assert objects_are_equal(wrapped_module.state_dict(), module.state_dict())
+
+    def test_load_state_dict(self):
+        wrapped_module, module = self._get_module()
+        wrapped_module.load_state_dict(module.state_dict())
+        input = torch.rand(4, 5)
+        torch.testing.assert_allclose(wrapped_module(input), module(input))
+
+    def test_forward(self):
+        wrapped_module, module = self._get_module()
+        input = torch.rand(4, 5)
+        torch.testing.assert_allclose(wrapped_module(input), module(input))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/distributed/test_utils.py b/fairseq/tests/distributed/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..30f995b67acd39af5816d2eb412d6b4df7f44f8c
--- /dev/null
+++ b/fairseq/tests/distributed/test_utils.py
@@ -0,0 +1,124 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import functools
+import sys
+import unittest
+
+import torch
+
+from fairseq.distributed import utils as dist_utils
+
+from .utils import objects_are_equal, spawn_and_init
+
+
+class DistributedTest(unittest.TestCase):
+    def setUp(self):
+        if not torch.cuda.is_available():
+            raise unittest.SkipTest("CUDA not available, skipping test")
+        if sys.platform == "win32":
+            raise unittest.SkipTest("NCCL doesn't support Windows, skipping test")
+        if torch.cuda.device_count() < 2:
+            raise unittest.SkipTest("distributed tests require 2+ GPUs, skipping")
+
+
+class TestBroadcastObject(DistributedTest):
+    def test_str(self):
+        spawn_and_init(
+            functools.partial(
+                TestBroadcastObject._test_broadcast_object, "hello world"
+            ),
+            world_size=2,
+        )
+
+    def test_tensor(self):
+        spawn_and_init(
+            functools.partial(
+                TestBroadcastObject._test_broadcast_object,
+                torch.rand(5),
+            ),
+            world_size=2,
+        )
+
+    def test_complex(self):
+        spawn_and_init(
+            functools.partial(
+                TestBroadcastObject._test_broadcast_object,
+                {
+                    "a": "1",
+                    "b": [2, torch.rand(2, 3), 3],
+                    "c": (torch.rand(2, 3), 4),
+                    "d": {5, torch.rand(5)},
+                    "e": torch.rand(5),
+                    "f": torch.rand(5).int().cuda(),
+                },
+            ),
+            world_size=2,
+        )
+
+    @staticmethod
+    def _test_broadcast_object(ref_obj, rank, group):
+        obj = dist_utils.broadcast_object(
+            ref_obj if rank == 0 else None, src_rank=0, group=group
+        )
+        assert objects_are_equal(ref_obj, obj)
+
+
+class TestAllGatherList(DistributedTest):
+    def test_str_equality(self):
+        spawn_and_init(
+            functools.partial(
+                TestAllGatherList._test_all_gather_list_equality,
+                "hello world",
+            ),
+            world_size=2,
+        )
+
+    def test_tensor_equality(self):
+        spawn_and_init(
+            functools.partial(
+                TestAllGatherList._test_all_gather_list_equality,
+                torch.rand(5),
+            ),
+            world_size=2,
+        )
+
+    def test_complex_equality(self):
+        spawn_and_init(
+            functools.partial(
+                TestAllGatherList._test_all_gather_list_equality,
+                {
+                    "a": "1",
+                    "b": [2, torch.rand(2, 3), 3],
+                    "c": (torch.rand(2, 3), 4),
+                    "d": {5, torch.rand(5)},
+                    "e": torch.rand(5),
+                    "f": torch.rand(5).int(),
+                },
+            ),
+            world_size=2,
+        )
+
+    @staticmethod
+    def _test_all_gather_list_equality(ref_obj, rank, group):
+        objs = dist_utils.all_gather_list(ref_obj, group)
+        for obj in objs:
+            assert objects_are_equal(ref_obj, obj)
+
+    def test_rank_tensor(self):
+        spawn_and_init(
+            TestAllGatherList._test_all_gather_list_rank_tensor, world_size=2
+        )
+
+    @staticmethod
+    def _test_all_gather_list_rank_tensor(rank, group):
+        obj = torch.tensor([rank])
+        objs = dist_utils.all_gather_list(obj, group)
+        for i, obj in enumerate(objs):
+            assert obj.item() == i
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/distributed/utils.py b/fairseq/tests/distributed/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8040392a8e27eb4c3a74032c702643a91d11a3e
--- /dev/null
+++ b/fairseq/tests/distributed/utils.py
@@ -0,0 +1,62 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import functools
+import tempfile
+
+import torch
+
+
+def spawn_and_init(fn, world_size, args=None):
+    if args is None:
+        args = ()
+    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+        torch.multiprocessing.spawn(
+            fn=functools.partial(init_and_run, fn, args),
+            args=(world_size, tmp_file.name,),
+            nprocs=world_size,
+            join=True,
+        )
+
+
+def distributed_init(rank, world_size, tmp_file):
+    torch.distributed.init_process_group(
+        backend="nccl",
+        init_method="file://{}".format(tmp_file),
+        world_size=world_size,
+        rank=rank,
+    )
+    torch.cuda.set_device(rank)
+
+
+def init_and_run(fn, args, rank, world_size, tmp_file):
+    distributed_init(rank, world_size, tmp_file)
+    group = torch.distributed.new_group()
+    fn(rank, group, *args)
+
+
+def objects_are_equal(a, b) -> bool:
+    if type(a) is not type(b):
+        return False
+    if isinstance(a, dict):
+        if set(a.keys()) != set(b.keys()):
+            return False
+        for k in a.keys():
+            if not objects_are_equal(a[k], b[k]):
+                return False
+        return True
+    elif isinstance(a, (list, tuple, set)):
+        if len(a) != len(b):
+            return False
+        return all(objects_are_equal(x, y) for x, y in zip(a, b))
+    elif torch.is_tensor(a):
+        return (
+            a.size() == b.size()
+            and a.dtype == b.dtype
+            and a.device == b.device
+            and torch.all(a == b)
+        )
+    else:
+        return a == b
diff --git a/fairseq/tests/gpu/__init__.py b/fairseq/tests/gpu/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/tests/gpu/test_binaries_gpu.py b/fairseq/tests/gpu/test_binaries_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..de8c2426134089035c6e0e5da223647bab6f3dba
--- /dev/null
+++ b/fairseq/tests/gpu/test_binaries_gpu.py
@@ -0,0 +1,449 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import json
+import os
+import tempfile
+import unittest
+from io import StringIO
+
+import torch
+from fairseq import options
+from fairseq_cli import train
+from tests.utils import (
+    create_dummy_data,
+    generate_main,
+    preprocess_lm_data,
+    preprocess_translation_data,
+    train_translation_model,
+)
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestTranslationGPU(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_fp16_multigpu(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fp16") as data_dir:
+                log = os.path.join(data_dir, "train.log")
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    ["--fp16", "--log-file", log],
+                    world_size=min(torch.cuda.device_count(), 2),
+                )
+                generate_main(data_dir)
+                assert os.path.exists(log)
+
+    @staticmethod
+    def parse_logs(logfile):
+        logs = []
+        for ln in open(logfile, "r").readlines():
+            try:
+                logs.append(json.loads(ln))
+            except json.JSONDecodeError:
+                continue
+        return logs
+
+    def test_resume_training_fsdp(self):
+        self._test_resume_training(["--ddp-backend", "fully_sharded"])
+
+    def test_resume_training_fsdp_sharded_state(self):
+        self._test_resume_training(["--ddp-backend", "fully_sharded", "--use-sharded-state"])
+
+    def test_resume_training_noc10d(self):
+        self._test_resume_training([])
+
+    def _test_resume_training(self, extra_clargs, arch="fconv_iwslt_de_en"):
+        flags = [
+            "--fp16",
+            "--log-format",
+            "json",
+            "--max-update",
+            "10",
+            "--save-interval-updates",
+            "2",
+            "--log-interval",
+            "1",
+        ] + extra_clargs
+        world_size = min(torch.cuda.device_count(), 2)
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fp16") as data_dir:
+                log = os.path.join(data_dir, "train.log")
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir, arch, flags + ["--log-file", log], world_size=world_size,
+                )
+                log2 = os.path.join(data_dir, "resume.log")
+                restore_file = os.path.join(data_dir, "checkpoint_1_2.pt")
+                train_translation_model(
+                    data_dir,
+                    arch,
+                    flags + ["--log-file", log2, "--restore-file", restore_file],
+                    world_size=world_size,
+                )
+
+                l1 = self.parse_logs(log)
+                l2 = self.parse_logs(log2)
+                assert int(l2[0]["num_updates"]) == 3, f"{l1}\n\n {l2}"
+                for k in [
+                    "train_loss",
+                    "train_num_updates",
+                    "train_ppl",
+                    "train_gnorm",
+                ]:
+                    from_scratch, resumed = l1[-1][k], l2[-1][k]
+                    assert (
+                        from_scratch == resumed
+                    ), f"difference at {k} {from_scratch} != {resumed}"
+
+    def test_memory_efficient_fp16(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_memory_efficient_fp16") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir, "fconv_iwslt_de_en", ["--memory-efficient-fp16"]
+                )
+                generate_main(data_dir)
+
+    def test_transformer_fp16(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_iwslt_de_en",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "64",
+                        "--decoder-embed-dim",
+                        "64",
+                        "--fp16",
+                    ],
+                    run_validation=True,
+                )
+                generate_main(data_dir)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_amp(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_amp") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(data_dir, "fconv_iwslt_de_en", ["--amp"])
+                generate_main(data_dir)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_transformer_amp(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_iwslt_de_en",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "64",
+                        "--decoder-embed-dim",
+                        "64",
+                        "--amp",
+                    ],
+                    run_validation=True,
+                )
+                generate_main(data_dir)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_levenshtein_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_levenshtein_transformer"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, ["--joined-dictionary"])
+                train_translation_model(
+                    data_dir,
+                    "levenshtein_transformer",
+                    [
+                        "--apply-bert-init",
+                        "--early-exit",
+                        "6,6,6",
+                        "--criterion",
+                        "nat_loss",
+                    ],
+                    task="translation_lev",
+                )
+                gen_config = [
+                    "--task",
+                    "translation_lev",
+                    "--iter-decode-max-iter",
+                    "9",
+                    "--iter-decode-eos-penalty",
+                    "0",
+                    "--print-step",
+                ]
+                # non-ensemble generation
+                generate_main(data_dir, gen_config)
+                # ensemble generation
+                generate_main(
+                    data_dir,
+                    gen_config,
+                    path=os.pathsep.join(
+                        [
+                            os.path.join(data_dir, "checkpoint_last.pt"),
+                            os.path.join(data_dir, "checkpoint_last.pt"),
+                        ]
+                    ),
+                )
+
+    def test_fsdp_checkpoint_generate(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fsdp_sharded") as data_dir:
+                log = os.path.join(data_dir, "train.log")
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                world_size = min(torch.cuda.device_count(), 2)
+                train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    ["--log-file", log, "--ddp-backend", "fully_sharded"],
+                    world_size=world_size,
+                )
+                generate_main(data_dir)
+                assert os.path.exists(log)
+
+    def test_fsdp_sharded_checkpoint_generate(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fsdp_sharded") as data_dir:
+                log = os.path.join(data_dir, "train.log")
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                world_size = min(torch.cuda.device_count(), 2)
+                train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    ["--log-file", log, "--ddp-backend", "fully_sharded", "--use-sharded-state"],
+                    world_size=world_size,
+                )
+                generate_main(data_dir, ["--checkpoint-shard-count", str(world_size)])
+                assert os.path.exists(log)
+
+
+def _quantize_language_model(data_dir, arch, extra_flags=None, run_validation=False):
+    train_parser = options.get_training_parser()
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            "language_modeling",
+            data_dir,
+            "--arch",
+            arch,
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--criterion",
+            "adaptive_loss",
+            "--adaptive-softmax-cutoff",
+            "5,10,15",
+            "--max-tokens",
+            "500",
+            "--tokens-per-sample",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-epoch",
+            "1",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            "1",
+            "--ddp-backend",
+            "no_c10d",
+            "--num-workers",
+            "0",
+        ]
+        + (extra_flags or []),
+    )
+    train.main(train_args)
+
+    # try scalar quantization
+    scalar_quant_train_parser = options.get_training_parser()
+    scalar_quant_train_args = options.parse_args_and_arch(
+        scalar_quant_train_parser,
+        [
+            "--task",
+            "language_modeling",
+            data_dir,
+            "--arch",
+            arch,
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--criterion",
+            "adaptive_loss",
+            "--adaptive-softmax-cutoff",
+            "5,10,15",
+            "--max-tokens",
+            "500",
+            "--tokens-per-sample",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-update",
+            "3",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            "1",
+            "--ddp-backend",
+            "no_c10d",
+            "--num-workers",
+            "0",
+            "--quant-noise-scalar",
+            "0.5",
+        ]
+        + (extra_flags or []),
+    )
+    train.main(scalar_quant_train_args)
+
+    # try iterative PQ quantization
+    quantize_parser = options.get_training_parser()
+    quantize_args = options.parse_args_and_arch(
+        quantize_parser,
+        [
+            "--task",
+            "language_modeling",
+            data_dir,
+            "--arch",
+            arch,
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--criterion",
+            "adaptive_loss",
+            "--adaptive-softmax-cutoff",
+            "5,10,15",
+            "--max-tokens",
+            "50",
+            "--tokens-per-sample",
+            "50",
+            "--max-update",
+            "6",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            "1",
+            "--ddp-backend",
+            "no_c10d",
+            "--num-workers",
+            "0",
+            "--restore-file",
+            os.path.join(data_dir, "checkpoint_last.pt"),
+            "--reset-optimizer",
+            "--quantization-config-path",
+            os.path.join(
+                os.path.dirname(__file__), "transformer_quantization_config.yaml"
+            ),
+        ]
+        + (extra_flags or []),
+    )
+    train.main(quantize_args)
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestQuantization(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_quantization(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_quantization") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                # tests both scalar and iterative PQ quantization
+                _quantize_language_model(data_dir, "transformer_lm")
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestOptimizersGPU(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_flat_grads(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_flat_grads") as data_dir:
+                # Use just a bit of data and tiny model to keep this test runtime reasonable
+                create_dummy_data(data_dir, num_examples=10, maxlen=5)
+                preprocess_translation_data(data_dir)
+                with self.assertRaises(RuntimeError):
+                    # adafactor isn't compatible with flat grads, which
+                    # are used by default with --fp16
+                    train_translation_model(
+                        data_dir,
+                        "lstm",
+                        [
+                            "--required-batch-size-multiple",
+                            "1",
+                            "--encoder-layers",
+                            "1",
+                            "--encoder-hidden-size",
+                            "32",
+                            "--decoder-layers",
+                            "1",
+                            "--optimizer",
+                            "adafactor",
+                            "--fp16",
+                        ],
+                    )
+                # but it should pass once we set --fp16-no-flatten-grads
+                train_translation_model(
+                    data_dir,
+                    "lstm",
+                    [
+                        "--required-batch-size-multiple",
+                        "1",
+                        "--encoder-layers",
+                        "1",
+                        "--encoder-hidden-size",
+                        "32",
+                        "--decoder-layers",
+                        "1",
+                        "--optimizer",
+                        "adafactor",
+                        "--fp16",
+                        "--fp16-no-flatten-grads",
+                    ],
+                )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/gpu/test_ema_gpu.py b/fairseq/tests/gpu/test_ema_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..337107d69a2626652d1f34321a555dde02b3c1a9
--- /dev/null
+++ b/fairseq/tests/gpu/test_ema_gpu.py
@@ -0,0 +1,200 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Optional
+
+import torch
+from fairseq.models.ema import EMA
+
+
+class DummyModule(torch.nn.Module):
+    def __init__(self) -> None:
+        """LightningModule for testing purposes
+
+        Args:
+            epoch_min_loss_override (int, optional): Pass in an epoch that will be set to the minimum
+                validation loss for testing purposes (zero based). If None this is ignored. Defaults to None.
+        """
+        super().__init__()
+        self.layer = torch.nn.Linear(in_features=32, out_features=2)
+        self.another_layer = torch.nn.Linear(in_features=2, out_features=2)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.layer(x)
+        return self.another_layer(x)
+
+
+@dataclass
+class EMAConfig(object):
+    ema_decay: float = 0.99
+    ema_start_update: int = 0
+    ema_fp32: bool = False
+    ema_seed_model: Optional[str] = None
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestEMAGPU(unittest.TestCase):
+    def assertTorchAllClose(self, x, y, atol=1e-8, rtol=1e-5, msg=None):
+        diff = x.float() - y.float()
+        diff_norm = torch.norm(diff)
+        other_norm = torch.norm(y.float())
+
+        if msg is None:
+            msg = "|input - other| > {} + {} * |other|".format(
+                atol, rtol
+            )
+
+        self.assertLessEqual(
+            diff_norm,
+            atol + rtol * other_norm,
+            msg=msg,
+        )
+
+    def test_ema(self):
+        model = DummyModule().cuda()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        state = deepcopy(model.state_dict())
+        config = EMAConfig()
+        ema = EMA(model, config)
+
+        # set decay
+        ema._set_decay(config.ema_decay)
+        self.assertEqual(ema.get_decay(), config.ema_decay)
+
+        # get model
+        self.assertEqual(ema.get_model(), ema.model)
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+        # EMA step
+        x = torch.randn(32).cuda()
+        y = model(x)
+        loss = y.sum()
+        loss.backward()
+        optimizer.step()
+
+        ema.step(model)
+
+        ema_state_dict = ema.get_model().state_dict()
+
+        for key, param in model.state_dict().items():
+            prev_param = state[key]
+            ema_param = ema_state_dict[key]
+
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+            self.assertTorchAllClose(
+                ema_param,
+                config.ema_decay * prev_param + (1 - config.ema_decay) * param,
+            )
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+        # Load EMA into model
+        model2 = DummyModule().cuda()
+        ema.reverse(model2)
+
+        for key, param in model2.state_dict().items():
+            ema_param = ema_state_dict[key]
+            self.assertTrue(
+                torch.allclose(ema_param, param)
+            )
+
+    def test_ema_fp32(self):
+        model = DummyModule().cuda().half()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        state = deepcopy(model.state_dict())
+        config = EMAConfig(ema_fp32=True)
+        ema = EMA(model, config)
+
+        x = torch.randn(32).cuda()
+        y = model(x.half())
+        loss = y.sum()
+        loss.backward()
+        optimizer.step()
+
+        ema.step(model)
+
+        for key, param in model.state_dict().items():
+            prev_param = state[key]
+            ema_param = ema.get_model().state_dict()[key]
+
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+            self.assertIn(key, ema.fp32_params)
+
+            # EMA update is done in fp32, and hence the EMA param must be
+            # closer to the EMA update done in fp32 than in fp16.
+            self.assertLessEqual(
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param.float() + (1 - config.ema_decay) * param.float()).half().float()
+                ),
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param + (1 - config.ema_decay) * param).float()
+                ),
+            )
+            self.assertTorchAllClose(
+                ema_param,
+                (config.ema_decay * prev_param.float() + (1 - config.ema_decay) * param.float()).half(),
+            )
+
+    def test_ema_fp16(self):
+        model = DummyModule().cuda().half()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        state = deepcopy(model.state_dict())
+        config = EMAConfig(ema_fp32=False)
+        ema = EMA(model, config)
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+        x = torch.randn(32).cuda()
+        y = model(x.half())
+        loss = y.sum()
+        loss.backward()
+        optimizer.step()
+
+        ema.step(model)
+
+        for key, param in model.state_dict().items():
+            prev_param = state[key]
+            ema_param = ema.get_model().state_dict()[key]
+
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+
+            # EMA update is done in fp16, and hence the EMA param must be
+            # closer to the EMA update done in fp16 than in fp32.
+            self.assertLessEqual(
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param + (1 - config.ema_decay) * param).float()
+                ),
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param.float() + (1 - config.ema_decay) * param.float()).half().float()
+                ),
+            )
+            self.assertTorchAllClose(
+                ema_param,
+                config.ema_decay * prev_param + (1 - config.ema_decay) * param,
+            )
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/gpu/transformer_quantization_config.yaml b/fairseq/tests/gpu/transformer_quantization_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de31d8116ced675b81eb74119642217d768e7736
--- /dev/null
+++ b/fairseq/tests/gpu/transformer_quantization_config.yaml
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This file defines example configuration arguments for quantizing
+# a transformer model with product quantization
+
+n_centroids:
+    Linear:
+        key: in_features
+        value: {"*": 8}
+    Embedding:
+        key: embedding_dim
+        value: {"*": 8}
+
+block_sizes:
+  Linear:
+      key: fuzzy_name
+      value: {fc: 8, attn: 4, emb: 4}
+  Embedding:
+      key: fuzzy_name
+      value: {emb: 8}
+
+layers_to_quantize:
+    - decoder\\.layers\\.\d+\\.fc[12]
+    - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
+    - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)
diff --git a/fairseq/tests/speech_recognition/__init__.py b/fairseq/tests/speech_recognition/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fairseq/tests/speech_recognition/asr_test_base.py b/fairseq/tests/speech_recognition/asr_test_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c5d414e7bf17ee02f280d024fa5d07e28b79d6b
--- /dev/null
+++ b/fairseq/tests/speech_recognition/asr_test_base.py
@@ -0,0 +1,557 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import unittest
+from inspect import currentframe, getframeinfo
+
+import numpy as np
+import torch
+from examples.speech_recognition.data.data_utils import lengths_to_encoder_padding_mask
+from fairseq.data import data_utils as fairseq_data_utils
+from fairseq.data.dictionary import Dictionary
+from fairseq.models import (
+    BaseFairseqModel,
+    FairseqDecoder,
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqEncoderModel,
+    FairseqModel,
+)
+from fairseq.tasks.fairseq_task import LegacyFairseqTask
+
+
+DEFAULT_TEST_VOCAB_SIZE = 100
+
+
+# ///////////////////////////////////////////////////////////////////////////
+# utility function to setup dummy dict/task/input
+# ///////////////////////////////////////////////////////////////////////////
+
+
+def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE):
+    dummy_dict = Dictionary()
+    # add dummy symbol to satisfy vocab size
+    for id, _ in enumerate(range(vocab_size)):
+        dummy_dict.add_symbol("{}".format(id), 1000)
+    return dummy_dict
+
+
+class DummyTask(LegacyFairseqTask):
+    def __init__(self, args):
+        super().__init__(args)
+        self.dictionary = get_dummy_dictionary()
+        if getattr(self.args, "ctc", False):
+            self.dictionary.add_symbol("<ctc_blank>")
+        self.tgt_dict = self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+def get_dummy_task_and_parser():
+    """
+    to build a fariseq model, we need some dummy parse and task. This function
+    is used to create dummy task and parser to faciliate model/criterion test
+
+    Note: we use FbSpeechRecognitionTask as the dummy task. You may want
+    to use other task by providing another function
+    """
+    parser = argparse.ArgumentParser(
+        description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS
+    )
+    DummyTask.add_args(parser)
+    args = parser.parse_args([])
+    task = DummyTask.setup_task(args)
+    return task, parser
+
+
+def get_dummy_input(T=100, D=80, B=5, K=100):
+    forward_input = {}
+    # T max sequence length
+    # D feature vector dimension
+    # B batch size
+    # K target dimension size
+    feature = torch.randn(B, T, D)
+    # this (B, T, D) layout is just a convention, you can override it by
+    # write your own _prepare_forward_input function
+    src_lengths = torch.from_numpy(
+        np.random.randint(low=1, high=T, size=B, dtype=np.int64)
+    )
+    src_lengths[0] = T  # make sure the maximum length matches
+    prev_output_tokens = []
+    for b in range(B):
+        token_length = np.random.randint(low=1, high=src_lengths[b].item() + 1)
+        tokens = np.random.randint(low=0, high=K, size=token_length, dtype=np.int64)
+        prev_output_tokens.append(torch.from_numpy(tokens))
+
+    prev_output_tokens = fairseq_data_utils.collate_tokens(
+        prev_output_tokens,
+        pad_idx=1,
+        eos_idx=2,
+        left_pad=False,
+        move_eos_to_beginning=False,
+    )
+    src_lengths, sorted_order = src_lengths.sort(descending=True)
+    forward_input["src_tokens"] = feature.index_select(0, sorted_order)
+    forward_input["src_lengths"] = src_lengths
+    forward_input["prev_output_tokens"] = prev_output_tokens
+
+    return forward_input
+
+
+def get_dummy_encoder_output(encoder_out_shape=(100, 80, 5)):
+    """
+    This only provides an example to generate dummy encoder output
+    """
+    (T, B, D) = encoder_out_shape
+    encoder_out = {}
+
+    encoder_out["encoder_out"] = torch.from_numpy(
+        np.random.randn(*encoder_out_shape).astype(np.float32)
+    )
+    seq_lengths = torch.from_numpy(np.random.randint(low=1, high=T, size=B))
+    # some dummy mask
+    encoder_out["encoder_padding_mask"] = torch.arange(T).view(1, T).expand(
+        B, -1
+    ) >= seq_lengths.view(B, 1).expand(-1, T)
+    encoder_out["encoder_padding_mask"].t_()
+
+    # encoer_padding_mask is (T, B) tensor, with (t, b)-th element indicate
+    # whether encoder_out[t, b] is valid (=0) or not (=1)
+    return encoder_out
+
+
+def _current_postion_info():
+    cf = currentframe()
+    frameinfo = " (at {}:{})".format(
+        os.path.basename(getframeinfo(cf).filename), cf.f_back.f_lineno
+    )
+    return frameinfo
+
+
+def check_encoder_output(encoder_output, batch_size=None):
+    """we expect encoder_output to be a dict with the following
+    key/value pairs:
+    - encoder_out: a Torch.Tensor
+    - encoder_padding_mask: a binary Torch.Tensor
+    """
+    if not isinstance(encoder_output, dict):
+        msg = (
+            "FairseqEncoderModel.forward(...) must be a dict" + _current_postion_info()
+        )
+        return False, msg
+
+    if "encoder_out" not in encoder_output:
+        msg = (
+            "FairseqEncoderModel.forward(...) must contain encoder_out"
+            + _current_postion_info()
+        )
+        return False, msg
+
+    if "encoder_padding_mask" not in encoder_output:
+        msg = (
+            "FairseqEncoderModel.forward(...) must contain encoder_padding_mask"
+            + _current_postion_info()
+        )
+        return False, msg
+
+    if not isinstance(encoder_output["encoder_out"], torch.Tensor):
+        msg = "encoder_out must be a torch.Tensor" + _current_postion_info()
+        return False, msg
+
+    if encoder_output["encoder_out"].dtype != torch.float32:
+        msg = "encoder_out must have float32 dtype" + _current_postion_info()
+        return False, msg
+
+    mask = encoder_output["encoder_padding_mask"]
+    if mask is not None:
+        if not isinstance(mask, torch.Tensor):
+            msg = (
+                "encoder_padding_mask must be a torch.Tensor" + _current_postion_info()
+            )
+            return False, msg
+        if mask.dtype != torch.uint8 and (
+            not hasattr(torch, "bool") or mask.dtype != torch.bool
+        ):
+            msg = (
+                "encoder_padding_mask must have dtype of uint8"
+                + _current_postion_info()
+            )
+            return False, msg
+
+        if mask.dim() != 2:
+            msg = (
+                "we expect encoder_padding_mask to be a 2-d tensor, in shape (T, B)"
+                + _current_postion_info()
+            )
+            return False, msg
+
+        if batch_size is not None and mask.size(1) != batch_size:
+            msg = (
+                "we expect encoder_padding_mask to be a 2-d tensor, with size(1)"
+                + " being the batch size"
+                + _current_postion_info()
+            )
+            return False, msg
+    return True, None
+
+
+def check_decoder_output(decoder_output):
+    """we expect output from a decoder is a tuple with the following constraint:
+    - the first element is a torch.Tensor
+    - the second element can be anything (reserved for future use)
+    """
+    if not isinstance(decoder_output, tuple):
+        msg = "FariseqDecoder output must be a tuple" + _current_postion_info()
+        return False, msg
+
+    if len(decoder_output) != 2:
+        msg = "FairseqDecoder output must be 2-elem tuple" + _current_postion_info()
+        return False, msg
+
+    if not isinstance(decoder_output[0], torch.Tensor):
+        msg = (
+            "FariseqDecoder output[0] must be a torch.Tensor" + _current_postion_info()
+        )
+        return False, msg
+
+    return True, None
+
+
+# ///////////////////////////////////////////////////////////////////////////
+# Base Test class
+# ///////////////////////////////////////////////////////////////////////////
+
+
+class TestBaseFairseqModelBase(unittest.TestCase):
+    """
+    This class is used to facilitate writing unittest for any class derived from
+    `BaseFairseqModel`.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if cls is TestBaseFairseqModelBase:
+            raise unittest.SkipTest("Skipping test case in base")
+        super().setUpClass()
+
+    def setUpModel(self, model):
+        self.assertTrue(isinstance(model, BaseFairseqModel))
+        self.model = model
+
+    def setupInput(self):
+        pass
+
+    def setUp(self):
+        self.model = None
+        self.forward_input = None
+        pass
+
+
+class TestFairseqEncoderDecoderModelBase(TestBaseFairseqModelBase):
+    """
+    base code to test FairseqEncoderDecoderModel (formally known as
+    `FairseqModel`) must be derived from this base class
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if cls is TestFairseqEncoderDecoderModelBase:
+            raise unittest.SkipTest("Skipping test case in base")
+        super().setUpClass()
+
+    def setUpModel(self, model_cls, extra_args_setters=None):
+        self.assertTrue(
+            issubclass(model_cls, (FairseqEncoderDecoderModel, FairseqModel)),
+            msg="This class only tests for FairseqModel subclasses",
+        )
+
+        task, parser = get_dummy_task_and_parser()
+        model_cls.add_args(parser)
+
+        args = parser.parse_args([])
+
+        if extra_args_setters is not None:
+            for args_setter in extra_args_setters:
+                args_setter(args)
+        model = model_cls.build_model(args, task)
+        self.model = model
+
+    def setUpInput(self, input=None):
+        self.forward_input = get_dummy_input() if input is None else input
+
+    def setUp(self):
+        super().setUp()
+
+    def test_forward(self):
+        if self.model and self.forward_input:
+            forward_output = self.model.forward(**self.forward_input)
+            # for FairseqEncoderDecoderModel, forward returns a tuple of two
+            # elements, the first one is a Torch.Tensor
+            succ, msg = check_decoder_output(forward_output)
+            if not succ:
+                self.assertTrue(succ, msg=msg)
+            self.forward_output = forward_output
+
+    def test_get_normalized_probs(self):
+        if self.model and self.forward_input:
+            forward_output = self.model.forward(**self.forward_input)
+            logprob = self.model.get_normalized_probs(forward_output, log_probs=True)
+            prob = self.model.get_normalized_probs(forward_output, log_probs=False)
+
+            # in order for different models/criterion to play with each other
+            # we need to know whether the logprob or prob output is batch_first
+            # or not. We assume an additional attribute will be attached to logprob
+            # or prob. If you find your code failed here, simply override
+            # FairseqModel.get_normalized_probs, see example at
+            # https://fburl.com/batch_first_example
+            self.assertTrue(hasattr(logprob, "batch_first"))
+            self.assertTrue(hasattr(prob, "batch_first"))
+
+            self.assertTrue(torch.is_tensor(logprob))
+            self.assertTrue(torch.is_tensor(prob))
+
+
+class TestFairseqEncoderModelBase(TestBaseFairseqModelBase):
+    """
+    base class to test FairseqEncoderModel
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if cls is TestFairseqEncoderModelBase:
+            raise unittest.SkipTest("Skipping test case in base")
+        super().setUpClass()
+
+    def setUpModel(self, model_cls, extra_args_setters=None):
+        self.assertTrue(
+            issubclass(model_cls, FairseqEncoderModel),
+            msg="This class is only used for testing FairseqEncoderModel",
+        )
+        task, parser = get_dummy_task_and_parser()
+        model_cls.add_args(parser)
+        args = parser.parse_args([])
+        if extra_args_setters is not None:
+            for args_setter in extra_args_setters:
+                args_setter(args)
+
+        model = model_cls.build_model(args, task)
+        self.model = model
+
+    def setUpInput(self, input=None):
+        self.forward_input = get_dummy_input() if input is None else input
+        # get_dummy_input() is originally for s2s, here we delete extra dict
+        # items, so it can be used for EncoderModel / Encoder as well
+        self.forward_input.pop("prev_output_tokens", None)
+
+    def setUp(self):
+        super().setUp()
+
+    def test_forward(self):
+        if self.forward_input and self.model:
+            bsz = self.forward_input["src_tokens"].size(0)
+            forward_output = self.model.forward(**self.forward_input)
+
+            # we expect forward_output to be a dict with the following
+            # key/value pairs:
+            # - encoder_out: a Torch.Tensor
+            # - encoder_padding_mask: a binary Torch.Tensor
+            succ, msg = check_encoder_output(forward_output, batch_size=bsz)
+            if not succ:
+                self.assertTrue(succ, msg=msg)
+            self.forward_output = forward_output
+
+    def test_get_normalized_probs(self):
+        if self.model and self.forward_input:
+            forward_output = self.model.forward(**self.forward_input)
+            logprob = self.model.get_normalized_probs(forward_output, log_probs=True)
+            prob = self.model.get_normalized_probs(forward_output, log_probs=False)
+
+            # in order for different models/criterion to play with each other
+            # we need to know whether the logprob or prob output is batch_first
+            # or not. We assume an additional attribute will be attached to logprob
+            # or prob. If you find your code failed here, simply override
+            # FairseqModel.get_normalized_probs, see example at
+            # https://fburl.com/batch_first_example
+            self.assertTrue(hasattr(logprob, "batch_first"))
+            self.assertTrue(hasattr(prob, "batch_first"))
+
+            self.assertTrue(torch.is_tensor(logprob))
+            self.assertTrue(torch.is_tensor(prob))
+
+
+class TestFairseqEncoderBase(unittest.TestCase):
+    """
+    base class to test FairseqEncoder
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if cls is TestFairseqEncoderBase:
+            raise unittest.SkipTest("Skipping test case in base")
+        super().setUpClass()
+
+    def setUpEncoder(self, encoder):
+        self.assertTrue(
+            isinstance(encoder, FairseqEncoder),
+            msg="This class is only used for test FairseqEncoder",
+        )
+        self.encoder = encoder
+
+    def setUpInput(self, input=None):
+        self.forward_input = get_dummy_input() if input is None else input
+        # get_dummy_input() is originally for s2s, here we delete extra dict
+        # items, so it can be used for EncoderModel / Encoder as well
+        self.forward_input.pop("prev_output_tokens", None)
+
+    def setUp(self):
+        self.encoder = None
+        self.forward_input = None
+
+    def test_forward(self):
+        if self.encoder and self.forward_input:
+            bsz = self.forward_input["src_tokens"].size(0)
+
+            forward_output = self.encoder.forward(**self.forward_input)
+            succ, msg = check_encoder_output(forward_output, batch_size=bsz)
+            if not succ:
+                self.assertTrue(succ, msg=msg)
+            self.forward_output = forward_output
+
+
+class TestFairseqDecoderBase(unittest.TestCase):
+    """
+    base class to test FairseqDecoder
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if cls is TestFairseqDecoderBase:
+            raise unittest.SkipTest("Skipping test case in base")
+        super().setUpClass()
+
+    def setUpDecoder(self, decoder):
+        self.assertTrue(
+            isinstance(decoder, FairseqDecoder),
+            msg="This class is only used for test FairseqDecoder",
+        )
+        self.decoder = decoder
+
+    def setUpInput(self, input=None):
+        self.forward_input = get_dummy_encoder_output() if input is None else input
+
+    def setUpPrevOutputTokens(self, tokens=None):
+        if tokens is None:
+            self.encoder_input = get_dummy_input()
+            self.prev_output_tokens = self.encoder_input["prev_output_tokens"]
+        else:
+            self.prev_output_tokens = tokens
+
+    def setUp(self):
+        self.decoder = None
+        self.forward_input = None
+        self.prev_output_tokens = None
+
+    def test_forward(self):
+        if (
+            self.decoder is not None
+            and self.forward_input is not None
+            and self.prev_output_tokens is not None
+        ):
+            forward_output = self.decoder.forward(
+                prev_output_tokens=self.prev_output_tokens,
+                encoder_out=self.forward_input,
+            )
+            succ, msg = check_decoder_output(forward_output)
+            if not succ:
+                self.assertTrue(succ, msg=msg)
+            self.forward_input = forward_output
+
+
+class DummyEncoderModel(FairseqEncoderModel):
+    def __init__(self, encoder):
+        super().__init__(encoder)
+
+    @classmethod
+    def build_model(cls, args, task):
+        return cls(DummyEncoder())
+
+    def get_logits(self, net_output):
+        # Inverse of sigmoid to use with BinaryCrossEntropyWithLogitsCriterion as
+        # F.binary_cross_entropy_with_logits combines sigmoid and CE
+        return torch.log(
+            torch.div(net_output["encoder_out"], 1 - net_output["encoder_out"])
+        )
+
+    def get_normalized_probs(self, net_output, log_probs, sample=None):
+        lprobs = super().get_normalized_probs(net_output, log_probs, sample=sample)
+        lprobs.batch_first = True
+        return lprobs
+
+
+class DummyEncoder(FairseqEncoder):
+    def __init__(self):
+        super().__init__(None)
+
+    def forward(self, src_tokens, src_lengths):
+        mask, max_len = lengths_to_encoder_padding_mask(src_lengths)
+        return {"encoder_out": src_tokens, "encoder_padding_mask": mask}
+
+
+class CrossEntropyCriterionTestBase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        if cls is CrossEntropyCriterionTestBase:
+            raise unittest.SkipTest("Skipping base class test case")
+        super().setUpClass()
+
+    def setUpArgs(self):
+        args = argparse.Namespace()
+        args.sentence_avg = False
+        args.threshold = 0.1  # to use with BinaryCrossEntropyWithLogitsCriterion
+        return args
+
+    def setUp(self):
+        args = self.setUpArgs()
+        self.model = DummyEncoderModel(encoder=DummyEncoder())
+        self.criterion = self.criterion_cls.build_criterion(args, task=DummyTask(args))
+
+    def get_src_tokens(self, correct_prediction, aggregate):
+        """
+        correct_prediction: True if the net_output (src_tokens) should
+        predict the correct target
+        aggregate: True if the criterion expects net_output (src_tokens)
+        aggregated across time axis
+        """
+        predicted_idx = 0 if correct_prediction else 1
+        if aggregate:
+            src_tokens = torch.zeros((2, 2), dtype=torch.float)
+            for b in range(2):
+                src_tokens[b][predicted_idx] = 1.0
+        else:
+            src_tokens = torch.zeros((2, 10, 2), dtype=torch.float)
+            for b in range(2):
+                for t in range(10):
+                    src_tokens[b][t][predicted_idx] = 1.0
+        return src_tokens
+
+    def get_target(self, soft_target):
+        if soft_target:
+            target = torch.zeros((2, 2), dtype=torch.float)
+            for b in range(2):
+                target[b][0] = 1.0
+        else:
+            target = torch.zeros((2, 10), dtype=torch.long)
+        return target
+
+    def get_test_sample(self, correct, soft_target, aggregate):
+        src_tokens = self.get_src_tokens(correct, aggregate)
+        target = self.get_target(soft_target)
+        L = src_tokens.size(1)
+        return {
+            "net_input": {"src_tokens": src_tokens, "src_lengths": torch.tensor([L])},
+            "target": target,
+            "ntokens": src_tokens.size(0) * src_tokens.size(1),
+        }
diff --git a/fairseq/tests/speech_recognition/test_collaters.py b/fairseq/tests/speech_recognition/test_collaters.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a5029a48faea2426d7a0277655a2c7c08c1d16c
--- /dev/null
+++ b/fairseq/tests/speech_recognition/test_collaters.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+import torch
+from examples.speech_recognition.data.collaters import Seq2SeqCollater
+
+
+class TestSeq2SeqCollator(unittest.TestCase):
+    def test_collate(self):
+
+        eos_idx = 1
+        pad_idx = 0
+        collater = Seq2SeqCollater(
+            feature_index=0, label_index=1, pad_index=pad_idx, eos_index=eos_idx
+        )
+
+        # 2 frames in the first sample and 3 frames in the second one
+        frames1 = np.array([[7, 8], [9, 10]])
+        frames2 = np.array([[1, 2], [3, 4], [5, 6]])
+        target1 = np.array([4, 2, 3, eos_idx])
+        target2 = np.array([3, 2, eos_idx])
+        sample1 = {"id": 0, "data": [frames1, target1]}
+        sample2 = {"id": 1, "data": [frames2, target2]}
+        batch = collater.collate([sample1, sample2])
+
+        # collate sort inputs by frame's length before creating the batch
+        self.assertTensorEqual(batch["id"], torch.tensor([1, 0]))
+        self.assertEqual(batch["ntokens"], 7)
+        self.assertTensorEqual(
+            batch["net_input"]["src_tokens"],
+            torch.tensor(
+                [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [pad_idx, pad_idx]]]
+            ),
+        )
+        self.assertTensorEqual(
+            batch["net_input"]["prev_output_tokens"],
+            torch.tensor([[eos_idx, 3, 2, pad_idx], [eos_idx, 4, 2, 3]]),
+        )
+        self.assertTensorEqual(batch["net_input"]["src_lengths"], torch.tensor([3, 2]))
+        self.assertTensorEqual(
+            batch["target"],
+            torch.tensor([[3, 2, eos_idx, pad_idx], [4, 2, 3, eos_idx]]),
+        )
+        self.assertEqual(batch["nsentences"], 2)
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/speech_recognition/test_cross_entropy.py b/fairseq/tests/speech_recognition/test_cross_entropy.py
new file mode 100644
index 0000000000000000000000000000000000000000..b05400ed95e22762c3e3e5e8fd3ebfa6caf1e325
--- /dev/null
+++ b/fairseq/tests/speech_recognition/test_cross_entropy.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from examples.speech_recognition.criterions.cross_entropy_acc import (
+    CrossEntropyWithAccCriterion,
+)
+
+from .asr_test_base import CrossEntropyCriterionTestBase
+
+
+class CrossEntropyWithAccCriterionTest(CrossEntropyCriterionTestBase):
+    def setUp(self):
+        self.criterion_cls = CrossEntropyWithAccCriterion
+        super().setUp()
+
+    def test_cross_entropy_all_correct(self):
+        sample = self.get_test_sample(correct=True, soft_target=False, aggregate=False)
+        loss, sample_size, logging_output = self.criterion(
+            self.model, sample, "sum", log_probs=True
+        )
+        assert logging_output["correct"] == 20
+        assert logging_output["total"] == 20
+        assert logging_output["sample_size"] == 20
+        assert logging_output["ntokens"] == 20
+
+    def test_cross_entropy_all_wrong(self):
+        sample = self.get_test_sample(correct=False, soft_target=False, aggregate=False)
+        loss, sample_size, logging_output = self.criterion(
+            self.model, sample, "sum", log_probs=True
+        )
+        assert logging_output["correct"] == 0
+        assert logging_output["total"] == 20
+        assert logging_output["sample_size"] == 20
+        assert logging_output["ntokens"] == 20
diff --git a/fairseq/tests/speech_recognition/test_data_utils.py b/fairseq/tests/speech_recognition/test_data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a72e0b66948da1349d87eafdef4c4004dd535c96
--- /dev/null
+++ b/fairseq/tests/speech_recognition/test_data_utils.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import unittest
+
+import torch
+from examples.speech_recognition.data import data_utils
+
+
+class DataUtilsTest(unittest.TestCase):
+    def test_normalization(self):
+        sample_len1 = torch.tensor(
+            [
+                [
+                    -0.7661,
+                    -1.3889,
+                    -2.0972,
+                    -0.9134,
+                    -0.7071,
+                    -0.9765,
+                    -0.8700,
+                    -0.8283,
+                    0.7512,
+                    1.3211,
+                    2.1532,
+                    2.1174,
+                    1.2800,
+                    1.2633,
+                    1.6147,
+                    1.6322,
+                    2.0723,
+                    3.1522,
+                    3.2852,
+                    2.2309,
+                    2.5569,
+                    2.2183,
+                    2.2862,
+                    1.5886,
+                    0.8773,
+                    0.8725,
+                    1.2662,
+                    0.9899,
+                    1.1069,
+                    1.3926,
+                    1.2795,
+                    1.1199,
+                    1.1477,
+                    1.2687,
+                    1.3843,
+                    1.1903,
+                    0.8355,
+                    1.1367,
+                    1.2639,
+                    1.4707,
+                ]
+            ]
+        )
+        out = data_utils.apply_mv_norm(sample_len1)
+        assert not torch.isnan(out).any()
+        assert (out == sample_len1).all()
diff --git a/fairseq/tests/speech_recognition/test_vggtransformer.py b/fairseq/tests/speech_recognition/test_vggtransformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4dc73b8c7379970dc0bcc16fcb088a64a1bd7e3b
--- /dev/null
+++ b/fairseq/tests/speech_recognition/test_vggtransformer.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+# import models/encoder/decoder to be tested
+from examples.speech_recognition.models.vggtransformer import (
+    TransformerDecoder,
+    VGGTransformerEncoder,
+    VGGTransformerModel,
+    vggtransformer_1,
+    vggtransformer_2,
+    vggtransformer_base,
+)
+
+# import base test class
+from .asr_test_base import (
+    DEFAULT_TEST_VOCAB_SIZE,
+    TestFairseqDecoderBase,
+    TestFairseqEncoderBase,
+    TestFairseqEncoderDecoderModelBase,
+    get_dummy_dictionary,
+    get_dummy_encoder_output,
+    get_dummy_input,
+)
+
+
+class VGGTransformerModelTest_mid(TestFairseqEncoderDecoderModelBase):
+    def setUp(self):
+        def override_config(args):
+            """
+            vggtrasformer_1 use 14 layers of transformer,
+            for testing purpose, it is too expensive. For fast turn-around
+            test, reduce the number of layers to 3.
+            """
+            args.transformer_enc_config = (
+                "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 3"
+            )
+
+        super().setUp()
+        extra_args_setter = [vggtransformer_1, override_config]
+
+        self.setUpModel(VGGTransformerModel, extra_args_setter)
+        self.setUpInput(get_dummy_input(T=50, D=80, B=5, K=DEFAULT_TEST_VOCAB_SIZE))
+
+
+class VGGTransformerModelTest_big(TestFairseqEncoderDecoderModelBase):
+    def setUp(self):
+        def override_config(args):
+            """
+            vggtrasformer_2 use 16 layers of transformer,
+            for testing purpose, it is too expensive. For fast turn-around
+            test, reduce the number of layers to 3.
+            """
+            args.transformer_enc_config = (
+                "((1024, 16, 4096, True, 0.15, 0.15, 0.15),) * 3"
+            )
+
+        super().setUp()
+        extra_args_setter = [vggtransformer_2, override_config]
+
+        self.setUpModel(VGGTransformerModel, extra_args_setter)
+        self.setUpInput(get_dummy_input(T=50, D=80, B=5, K=DEFAULT_TEST_VOCAB_SIZE))
+
+
+class VGGTransformerModelTest_base(TestFairseqEncoderDecoderModelBase):
+    def setUp(self):
+        def override_config(args):
+            """
+            vggtrasformer_base use 12 layers of transformer,
+            for testing purpose, it is too expensive. For fast turn-around
+            test, reduce the number of layers to 3.
+            """
+            args.transformer_enc_config = (
+                "((512, 8, 2048, True, 0.15, 0.15, 0.15),) * 3"
+            )
+
+        super().setUp()
+        extra_args_setter = [vggtransformer_base, override_config]
+
+        self.setUpModel(VGGTransformerModel, extra_args_setter)
+        self.setUpInput(get_dummy_input(T=50, D=80, B=5, K=DEFAULT_TEST_VOCAB_SIZE))
+
+
+class VGGTransformerEncoderTest(TestFairseqEncoderBase):
+    def setUp(self):
+        super().setUp()
+
+        self.setUpInput(get_dummy_input(T=50, D=80, B=5))
+
+    def test_forward(self):
+        print("1. test standard vggtransformer")
+        self.setUpEncoder(VGGTransformerEncoder(input_feat_per_channel=80))
+        super().test_forward()
+        print("2. test vggtransformer with limited right context")
+        self.setUpEncoder(
+            VGGTransformerEncoder(
+                input_feat_per_channel=80, transformer_context=(-1, 5)
+            )
+        )
+        super().test_forward()
+        print("3. test vggtransformer with limited left context")
+        self.setUpEncoder(
+            VGGTransformerEncoder(
+                input_feat_per_channel=80, transformer_context=(5, -1)
+            )
+        )
+        super().test_forward()
+        print("4. test vggtransformer with limited right context and sampling")
+        self.setUpEncoder(
+            VGGTransformerEncoder(
+                input_feat_per_channel=80,
+                transformer_context=(-1, 12),
+                transformer_sampling=(2, 2),
+            )
+        )
+        super().test_forward()
+        print("5. test vggtransformer with windowed context and sampling")
+        self.setUpEncoder(
+            VGGTransformerEncoder(
+                input_feat_per_channel=80,
+                transformer_context=(12, 12),
+                transformer_sampling=(2, 2),
+            )
+        )
+
+
+class TransformerDecoderTest(TestFairseqDecoderBase):
+    def setUp(self):
+        super().setUp()
+
+        dict = get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE)
+        decoder = TransformerDecoder(dict)
+        dummy_encoder_output = get_dummy_encoder_output(encoder_out_shape=(50, 5, 256))
+
+        self.setUpDecoder(decoder)
+        self.setUpInput(dummy_encoder_output)
+        self.setUpPrevOutputTokens()
diff --git a/fairseq/tests/test_activation_checkpointing.py b/fairseq/tests/test_activation_checkpointing.py
new file mode 100644
index 0000000000000000000000000000000000000000..647a9572886f8aff09a4aadc0b21e1d5817ff38e
--- /dev/null
+++ b/fairseq/tests/test_activation_checkpointing.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+import torch.nn as nn
+from fairseq.modules.checkpoint_activations import checkpoint_wrapper
+from torch.utils.checkpoint import checkpoint
+
+
+class Model(nn.Module):
+    def __init__(
+        self, use_pytorch_checkpoint=False, use_fairseq_checkpoint=False, **kwargs
+    ):
+        super().__init__()
+        torch.manual_seed(0)
+        self.use_pytorch_checkpoint = use_pytorch_checkpoint
+        self.ffn = nn.Sequential(
+            nn.Linear(32, 128),
+            # add a Dropout layer to test RNG save/restore
+            nn.Dropout(p=0.5),
+            nn.Linear(128, 32),
+        )
+        if use_fairseq_checkpoint:
+            self.ffn = checkpoint_wrapper(self.ffn, **kwargs)
+        self.out = nn.Linear(32, 1)
+
+    def forward(self, x):
+        if self.use_pytorch_checkpoint:
+            x = checkpoint(self.ffn, x)
+        else:
+            x = self.ffn(x)
+        return self.out(x)
+
+
+class TestActivationCheckpointing(unittest.TestCase):
+    def _test_checkpoint_wrapper(self, device, log_memory_usage=False):
+        def get_loss_and_gnorm(model):
+            torch.manual_seed(1)
+            input = torch.rand(2, 16, 32).requires_grad_(True).to(device)
+            model.zero_grad()
+            loss = model(input).sum()
+            loss.backward()
+            gnorm = torch.norm(
+                torch.stack([torch.norm(p.grad.detach()) for p in model.parameters()])
+            )
+            return {"loss": loss, "gnorm": gnorm}
+
+        model = Model().to(device)
+        no_cpt = get_loss_and_gnorm(model)
+
+        model = Model(use_pytorch_checkpoint=True).to(device)
+        pyt_cpt = get_loss_and_gnorm(model)
+        torch.testing.assert_allclose(no_cpt["loss"], pyt_cpt["loss"])
+        torch.testing.assert_allclose(no_cpt["gnorm"], pyt_cpt["gnorm"])
+
+        model = Model(use_fairseq_checkpoint=True).to(device)
+        fairseq_cpt = get_loss_and_gnorm(model)
+        torch.testing.assert_allclose(no_cpt["loss"], fairseq_cpt["loss"])
+        torch.testing.assert_allclose(no_cpt["gnorm"], fairseq_cpt["gnorm"])
+
+        model = Model(use_fairseq_checkpoint=True, offload_to_cpu=True).to(device)
+        fairseq_cpt_offload = get_loss_and_gnorm(model)
+        torch.testing.assert_allclose(no_cpt["loss"], fairseq_cpt_offload["loss"])
+        torch.testing.assert_allclose(no_cpt["gnorm"], fairseq_cpt_offload["gnorm"])
+
+    def test_checkpoint_wrapper_cpu(self):
+        self._test_checkpoint_wrapper(device=torch.device("cpu"))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_checkpoint_wrapper_cuda(self):
+        self._test_checkpoint_wrapper(device=torch.device("cuda"))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_amp_optimizer.py b/fairseq/tests/test_amp_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a785e1830e91b7e090e841d428fe4ea61f3a65c
--- /dev/null
+++ b/fairseq/tests/test_amp_optimizer.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import copy
+import unittest
+
+import torch
+from torch.cuda.amp import autocast, GradScaler
+from fairseq.optim import build_optimizer
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestGradientScalingAMP(unittest.TestCase):
+    def setUp(self):
+        self.x = torch.tensor([2.0]).cuda().half()
+        weight = 3.0
+        bias = 5.0
+        self.error = 1.0
+        self.target = torch.tensor([self.x * weight + bias + self.error]).cuda()
+        self.loss_fn = torch.nn.L1Loss()
+
+        self.model = torch.nn.Linear(1, 1)
+        self.model.weight.data = torch.tensor([[weight]])
+        self.model.bias.data = torch.tensor([bias])
+        self.model.cuda()
+        self.params = list(self.model.parameters())
+
+        self.namespace_dls = argparse.Namespace(
+            optimizer="adam",
+            lr=[0.1],
+            adam_betas="(0.9, 0.999)",
+            adam_eps=1e-8,
+            weight_decay=0.0,
+            threshold_loss_scale=1,
+            min_loss_scale=1e-4,
+        )
+        self.scaler = GradScaler(
+            init_scale=1,
+            growth_interval=1,
+        )
+
+    def run_iter(self, model, params, optimizer):
+        optimizer.zero_grad()
+        with autocast():
+            y = model(self.x)
+            loss = self.loss_fn(y, self.target)
+        self.scaler.scale(loss).backward()
+        self.assertEqual(loss, torch.tensor(1.0, device="cuda:0", dtype=torch.float16))
+
+        self.scaler.unscale_(optimizer)
+        grad_norm = optimizer.clip_grad_norm(0)
+        self.assertAlmostEqual(grad_norm.item(), 2.2361, 4)
+
+        self.scaler.step(optimizer)
+        self.scaler.update()
+        self.assertEqual(
+            model.weight,
+            torch.tensor(
+                [[3.1]], device="cuda:0", requires_grad=True
+            ),
+        )
+        self.assertEqual(
+            model.bias,
+            torch.tensor(
+                [5.1], device="cuda:0", requires_grad=True
+            ),
+        )
+        self.assertEqual(self.scaler.get_scale(), 2.0)
+
+    def test_automatic_mixed_precision(self):
+        model = copy.deepcopy(self.model)
+        params = list(model.parameters())
+        optimizer = build_optimizer(self.namespace_dls, params)
+
+        self.run_iter(model, params, optimizer)
diff --git a/fairseq/tests/test_average_checkpoints.py b/fairseq/tests/test_average_checkpoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..f348b56b869372d8434fe03f13324d78e9093fa2
--- /dev/null
+++ b/fairseq/tests/test_average_checkpoints.py
@@ -0,0 +1,134 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import collections
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+import torch
+from scripts.average_checkpoints import average_checkpoints
+from torch import nn
+
+
+class ModelWithSharedParameter(nn.Module):
+    def __init__(self):
+        super(ModelWithSharedParameter, self).__init__()
+        self.embedding = nn.Embedding(1000, 200)
+        self.FC1 = nn.Linear(200, 200)
+        self.FC2 = nn.Linear(200, 200)
+        # tie weight in FC2 to FC1
+        self.FC2.weight = nn.Parameter(self.FC1.weight)
+        self.FC2.bias = nn.Parameter(self.FC1.bias)
+
+        self.relu = nn.ReLU()
+
+    def forward(self, input):
+        return self.FC2(self.ReLU(self.FC1(input))) + self.FC1(input)
+
+
+class TestAverageCheckpoints(unittest.TestCase):
+    def test_average_checkpoints(self):
+        params_0 = collections.OrderedDict(
+            [
+                ("a", torch.DoubleTensor([100.0])),
+                ("b", torch.FloatTensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])),
+                ("c", torch.IntTensor([7, 8, 9])),
+            ]
+        )
+        params_1 = collections.OrderedDict(
+            [
+                ("a", torch.DoubleTensor([1.0])),
+                ("b", torch.FloatTensor([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])),
+                ("c", torch.IntTensor([2, 2, 2])),
+            ]
+        )
+        params_avg = collections.OrderedDict(
+            [
+                ("a", torch.DoubleTensor([50.5])),
+                ("b", torch.FloatTensor([[1.0, 1.5, 2.0], [2.5, 3.0, 3.5]])),
+                # We expect truncation for integer division
+                ("c", torch.IntTensor([4, 5, 5])),
+            ]
+        )
+
+        fd_0, path_0 = tempfile.mkstemp()
+        fd_1, path_1 = tempfile.mkstemp()
+        torch.save(collections.OrderedDict([("model", params_0)]), path_0)
+        torch.save(collections.OrderedDict([("model", params_1)]), path_1)
+
+        output = average_checkpoints([path_0, path_1])["model"]
+
+        os.close(fd_0)
+        os.remove(path_0)
+        os.close(fd_1)
+        os.remove(path_1)
+
+        for (k_expected, v_expected), (k_out, v_out) in zip(
+            params_avg.items(), output.items()
+        ):
+            self.assertEqual(
+                k_expected,
+                k_out,
+                "Key mismatch - expected {} but found {}. "
+                "(Expected list of keys: {} vs actual list of keys: {})".format(
+                    k_expected, k_out, params_avg.keys(), output.keys()
+                ),
+            )
+            np.testing.assert_allclose(
+                v_expected.numpy(),
+                v_out.numpy(),
+                err_msg="Tensor value mismatch for key {}".format(k_expected),
+            )
+
+    def test_average_checkpoints_with_shared_parameters(self):
+        def _construct_model_with_shared_parameters(path, value):
+            m = ModelWithSharedParameter()
+            nn.init.constant_(m.FC1.weight, value)
+            torch.save({"model": m.state_dict()}, path)
+            return m
+
+        tmpdir = tempfile.mkdtemp()
+        paths = []
+        path = os.path.join(tmpdir, "m1.pt")
+        m1 = _construct_model_with_shared_parameters(path, 1.0)
+        paths.append(path)
+
+        path = os.path.join(tmpdir, "m2.pt")
+        m2 = _construct_model_with_shared_parameters(path, 2.0)
+        paths.append(path)
+
+        path = os.path.join(tmpdir, "m3.pt")
+        m3 = _construct_model_with_shared_parameters(path, 3.0)
+        paths.append(path)
+
+        new_model = average_checkpoints(paths)
+        self.assertTrue(
+            torch.equal(
+                new_model["model"]["embedding.weight"],
+                (m1.embedding.weight + m2.embedding.weight + m3.embedding.weight) / 3.0,
+            )
+        )
+
+        self.assertTrue(
+            torch.equal(
+                new_model["model"]["FC1.weight"],
+                (m1.FC1.weight + m2.FC1.weight + m3.FC1.weight) / 3.0,
+            )
+        )
+
+        self.assertTrue(
+            torch.equal(
+                new_model["model"]["FC2.weight"],
+                (m1.FC2.weight + m2.FC2.weight + m3.FC2.weight) / 3.0,
+            )
+        )
+        shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_backtranslation_dataset.py b/fairseq/tests/test_backtranslation_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..dffc3b49387dfdc046ea23d7db179377040b7cbc
--- /dev/null
+++ b/fairseq/tests/test_backtranslation_dataset.py
@@ -0,0 +1,123 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.data import (
+    BacktranslationDataset,
+    LanguagePairDataset,
+    TransformEosDataset,
+)
+from fairseq.sequence_generator import SequenceGenerator
+
+
+class TestBacktranslationDataset(unittest.TestCase):
+    def setUp(self):
+        (
+            self.tgt_dict,
+            self.w1,
+            self.w2,
+            self.src_tokens,
+            self.src_lengths,
+            self.model,
+        ) = test_utils.sequence_generator_setup()
+
+        dummy_src_samples = self.src_tokens
+
+        self.tgt_dataset = test_utils.TestDataset(data=dummy_src_samples)
+        self.cuda = torch.cuda.is_available()
+
+    def _backtranslation_dataset_helper(
+        self,
+        remove_eos_from_input_src,
+        remove_eos_from_output_src,
+    ):
+        tgt_dataset = LanguagePairDataset(
+            src=self.tgt_dataset,
+            src_sizes=self.tgt_dataset.sizes,
+            src_dict=self.tgt_dict,
+            tgt=None,
+            tgt_sizes=None,
+            tgt_dict=None,
+        )
+
+        generator = SequenceGenerator(
+            [self.model],
+            tgt_dict=self.tgt_dict,
+            max_len_a=0,
+            max_len_b=200,
+            beam_size=2,
+            unk_penalty=0,
+        )
+
+        backtranslation_dataset = BacktranslationDataset(
+            tgt_dataset=TransformEosDataset(
+                dataset=tgt_dataset,
+                eos=self.tgt_dict.eos(),
+                # remove eos from the input src
+                remove_eos_from_src=remove_eos_from_input_src,
+            ),
+            src_dict=self.tgt_dict,
+            backtranslation_fn=(
+                lambda sample: generator.generate([self.model], sample)
+            ),
+            output_collater=TransformEosDataset(
+                dataset=tgt_dataset,
+                eos=self.tgt_dict.eos(),
+                # if we remove eos from the input src, then we need to add it
+                # back to the output tgt
+                append_eos_to_tgt=remove_eos_from_input_src,
+                remove_eos_from_src=remove_eos_from_output_src,
+            ).collater,
+            cuda=self.cuda,
+        )
+        dataloader = torch.utils.data.DataLoader(
+            backtranslation_dataset,
+            batch_size=2,
+            collate_fn=backtranslation_dataset.collater,
+        )
+        backtranslation_batch_result = next(iter(dataloader))
+
+        eos, pad, w1, w2 = self.tgt_dict.eos(), self.tgt_dict.pad(), self.w1, self.w2
+
+        # Note that we sort by src_lengths and add left padding, so actually
+        # ids will look like: [1, 0]
+        expected_src = torch.LongTensor([[w1, w2, w1, eos], [pad, pad, w1, eos]])
+        if remove_eos_from_output_src:
+            expected_src = expected_src[:, :-1]
+        expected_tgt = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]])
+        generated_src = backtranslation_batch_result["net_input"]["src_tokens"]
+        tgt_tokens = backtranslation_batch_result["target"]
+
+        self.assertTensorEqual(expected_src, generated_src)
+        self.assertTensorEqual(expected_tgt, tgt_tokens)
+
+    def test_backtranslation_dataset_no_eos_in_output_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=False,
+            remove_eos_from_output_src=True,
+        )
+
+    def test_backtranslation_dataset_with_eos_in_output_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=False,
+            remove_eos_from_output_src=False,
+        )
+
+    def test_backtranslation_dataset_no_eos_in_input_src(self):
+        self._backtranslation_dataset_helper(
+            remove_eos_from_input_src=True,
+            remove_eos_from_output_src=False,
+        )
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_binaries.py b/fairseq/tests/test_binaries.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e207742625427f108f78bcd24d487a081b6ccf7
--- /dev/null
+++ b/fairseq/tests/test_binaries.py
@@ -0,0 +1,1874 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import json
+import os
+import random
+import sys
+import tempfile
+import unittest
+from io import StringIO
+from typing import List, Dict
+import torch
+from fairseq import options
+from fairseq_cli import eval_lm, train
+from tests.utils import (
+    create_dummy_data,
+    generate_main,
+    preprocess_lm_data,
+    preprocess_summarization_data,
+    preprocess_translation_data,
+    create_laser_data_and_config_json,
+    train_translation_model,
+    train_language_model,
+)
+
+
+try:
+    import transformers  # noqa
+
+    has_hf_transformers = True
+except ImportError:
+    has_hf_transformers = False
+
+
+class TestTranslation(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_fconv(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fconv") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(data_dir, "fconv_iwslt_de_en")
+                generate_main(data_dir)
+
+    def test_raw(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fconv_raw") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, ["--dataset-impl", "raw"])
+                train_translation_model(
+                    data_dir, "fconv_iwslt_de_en", ["--dataset-impl", "raw"]
+                )
+                generate_main(data_dir, ["--dataset-impl", "raw"])
+
+    def test_update_freq(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_update_freq") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir, "fconv_iwslt_de_en", ["--update-freq", "3"]
+                )
+                generate_main(data_dir)
+
+    def test_max_positions(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_max_positions") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                with self.assertRaises(Exception) as context:
+                    train_translation_model(
+                        data_dir,
+                        "fconv_iwslt_de_en",
+                        ["--max-target-positions", "5"],
+                    )
+                self.assertTrue(
+                    "skip this example with --skip-invalid-size-inputs-valid-test"
+                    in str(context.exception)
+                )
+                train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    [
+                        "--max-target-positions",
+                        "5",
+                        "--skip-invalid-size-inputs-valid-test",
+                    ],
+                )
+                with self.assertRaises(Exception) as context:
+                    generate_main(data_dir)
+                generate_main(data_dir, ["--skip-invalid-size-inputs-valid-test"])
+
+    def test_generation(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_sampling") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(data_dir, "fconv_iwslt_de_en")
+                generate_main(
+                    data_dir,
+                    [
+                        "--sampling",
+                        "--temperature",
+                        "2",
+                        "--beam",
+                        "2",
+                        "--nbest",
+                        "2",
+                    ],
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--sampling",
+                        "--sampling-topk",
+                        "3",
+                        "--beam",
+                        "2",
+                        "--nbest",
+                        "2",
+                    ],
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--sampling",
+                        "--sampling-topp",
+                        "0.2",
+                        "--beam",
+                        "2",
+                        "--nbest",
+                        "2",
+                    ],
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--diversity-rate",
+                        "0.5",
+                        "--beam",
+                        "6",
+                    ],
+                )
+                with self.assertRaises(ValueError):
+                    generate_main(
+                        data_dir,
+                        [
+                            "--diverse-beam-groups",
+                            "4",
+                            "--match-source-len",
+                        ],
+                    )
+                generate_main(data_dir, ["--prefix-size", "2"])
+                generate_main(data_dir, ["--retain-dropout"])
+
+    def test_eval_bleu(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_eval_bleu") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    [
+                        "--eval-bleu",
+                        "--eval-bleu-print-samples",
+                        "--eval-bleu-remove-bpe",
+                        "--eval-bleu-detok",
+                        "space",
+                        "--eval-bleu-args",
+                        '{"beam": 4, "min_len": 10}',
+                    ],
+                )
+
+    def test_lstm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_lstm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "lstm_wiseman_iwslt_de_en",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--decoder-out-embed-dim",
+                        "8",
+                    ],
+                )
+                generate_main(data_dir)
+
+    def test_lstm_bidirectional(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_lstm_bidirectional") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "lstm",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--encoder-bidirectional",
+                        "--encoder-hidden-size",
+                        "16",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--decoder-out-embed-dim",
+                        "8",
+                        "--decoder-layers",
+                        "2",
+                    ],
+                )
+                generate_main(data_dir)
+
+    def test_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_iwslt_de_en",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                    ],
+                    run_validation=True,
+                )
+                generate_main(data_dir)
+
+    def test_multilingual_transformer(self):
+        # test with all combinations of encoder/decoder lang tokens
+        encoder_langtok_flags = [
+            [],
+            ["--encoder-langtok", "src"],
+            ["--encoder-langtok", "tgt"],
+        ]
+        decoder_langtok_flags = [[], ["--decoder-langtok"]]
+        with contextlib.redirect_stdout(StringIO()):
+            for i in range(len(encoder_langtok_flags)):
+                for j in range(len(decoder_langtok_flags)):
+                    enc_ltok_flag = encoder_langtok_flags[i]
+                    dec_ltok_flag = decoder_langtok_flags[j]
+                    with tempfile.TemporaryDirectory(
+                        f"test_multilingual_transformer_{i}_{j}"
+                    ) as data_dir:
+                        create_dummy_data(data_dir)
+                        preprocess_translation_data(data_dir)
+                        train_translation_model(
+                            data_dir,
+                            arch="multilingual_transformer",
+                            task="multilingual_translation",
+                            extra_flags=[
+                                "--encoder-layers",
+                                "2",
+                                "--decoder-layers",
+                                "2",
+                                "--encoder-embed-dim",
+                                "8",
+                                "--decoder-embed-dim",
+                                "8",
+                            ]
+                            + enc_ltok_flag
+                            + dec_ltok_flag,
+                            lang_flags=["--lang-pairs", "in-out,out-in"],
+                            run_validation=True,
+                            extra_valid_flags=enc_ltok_flag + dec_ltok_flag,
+                        )
+                        generate_main(
+                            data_dir,
+                            extra_flags=[
+                                "--task",
+                                "multilingual_translation",
+                                "--lang-pairs",
+                                "in-out,out-in",
+                                "--source-lang",
+                                "in",
+                                "--target-lang",
+                                "out",
+                            ]
+                            + enc_ltok_flag
+                            + dec_ltok_flag,
+                        )
+
+    @unittest.skipIf(
+        sys.platform.lower() == "darwin", "skip latent depth test on MacOS"
+    )
+    def test_multilingual_translation_latent_depth(self):
+        # test with latent depth in encoder, decoder, or both
+        encoder_latent_layer = [[], ["--encoder-latent-layer"]]
+        decoder_latent_layer = [[], ["--decoder-latent-layer"]]
+        with contextlib.redirect_stdout(StringIO()):
+            for i in range(len(encoder_latent_layer)):
+                for j in range(len(decoder_latent_layer)):
+                    if i == 0 and j == 0:
+                        continue
+                    enc_ll_flag = encoder_latent_layer[i]
+                    dec_ll_flag = decoder_latent_layer[j]
+                    with tempfile.TemporaryDirectory(
+                        f"test_multilingual_translation_latent_depth_{i}_{j}"
+                    ) as data_dir:
+                        create_dummy_data(data_dir)
+                        preprocess_translation_data(
+                            data_dir, extra_flags=["--joined-dictionary"]
+                        )
+                        train_translation_model(
+                            data_dir,
+                            arch="latent_multilingual_transformer",
+                            task="multilingual_translation_latent_depth",
+                            extra_flags=[
+                                "--user-dir",
+                                "examples/latent_depth/latent_depth_src",
+                                "--encoder-layers",
+                                "2",
+                                "--decoder-layers",
+                                "2",
+                                "--encoder-embed-dim",
+                                "8",
+                                "--decoder-embed-dim",
+                                "8",
+                                "--share-encoders",
+                                "--share-decoders",
+                                "--sparsity-weight",
+                                "0.1",
+                            ]
+                            + enc_ll_flag
+                            + dec_ll_flag,
+                            lang_flags=["--lang-pairs", "in-out,out-in"],
+                            run_validation=True,
+                            extra_valid_flags=[
+                                "--user-dir",
+                                "examples/latent_depth/latent_depth_src",
+                            ]
+                            + enc_ll_flag
+                            + dec_ll_flag,
+                        )
+                        generate_main(
+                            data_dir,
+                            extra_flags=[
+                                "--user-dir",
+                                "examples/latent_depth/latent_depth_src",
+                                "--task",
+                                "multilingual_translation_latent_depth",
+                                "--lang-pairs",
+                                "in-out,out-in",
+                                "--source-lang",
+                                "in",
+                                "--target-lang",
+                                "out",
+                            ]
+                            + enc_ll_flag
+                            + dec_ll_flag,
+                        )
+
+    def test_translation_multi_simple_epoch(self):
+        # test with all combinations of encoder/decoder lang tokens
+        encoder_langtok_flags = [
+            [],
+            ["--encoder-langtok", "src"],
+            ["--encoder-langtok", "tgt"],
+        ]
+        decoder_langtok_flags = [[], ["--decoder-langtok"]]
+        with contextlib.redirect_stdout(StringIO()):
+            for i in range(len(encoder_langtok_flags)):
+                for j in range(len(decoder_langtok_flags)):
+                    enc_ltok_flag = encoder_langtok_flags[i]
+                    dec_ltok_flag = decoder_langtok_flags[j]
+                    with tempfile.TemporaryDirectory(
+                        f"test_translation_multi_simple_epoch_{i}_{j}"
+                    ) as data_dir:
+                        create_dummy_data(data_dir)
+                        preprocess_translation_data(
+                            data_dir, extra_flags=["--joined-dictionary"]
+                        )
+                        train_translation_model(
+                            data_dir,
+                            arch="transformer",
+                            task="translation_multi_simple_epoch",
+                            extra_flags=[
+                                "--encoder-layers",
+                                "2",
+                                "--decoder-layers",
+                                "2",
+                                "--encoder-embed-dim",
+                                "8",
+                                "--decoder-embed-dim",
+                                "8",
+                                "--sampling-method",
+                                "temperature",
+                                "--sampling-temperature",
+                                "1.5",
+                                "--virtual-epoch-size",
+                                "1000",
+                            ]
+                            + enc_ltok_flag
+                            + dec_ltok_flag,
+                            lang_flags=["--lang-pairs", "in-out,out-in"],
+                            run_validation=True,
+                            extra_valid_flags=enc_ltok_flag + dec_ltok_flag,
+                        )
+                        generate_main(
+                            data_dir,
+                            extra_flags=[
+                                "--task",
+                                "translation_multi_simple_epoch",
+                                "--lang-pairs",
+                                "in-out,out-in",
+                                "--source-lang",
+                                "in",
+                                "--target-lang",
+                                "out",
+                            ]
+                            + enc_ltok_flag
+                            + dec_ltok_flag,
+                        )
+
+    def test_translation_multi_simple_epoch_no_vepoch(self):
+        # test with all combinations of encoder/decoder lang tokens
+        with contextlib.redirect_stdout(StringIO()):
+            enc_ltok_flag = ["--encoder-langtok", "src"]
+            dec_ltok_flag = ["--decoder-langtok"]
+            with tempfile.TemporaryDirectory(
+                "test_translation_multi_simple_epoch_dict"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, extra_flags=[])
+                train_translation_model(
+                    data_dir,
+                    arch="transformer",
+                    task="translation_multi_simple_epoch",
+                    extra_flags=[
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--sampling-method",
+                        "temperature",
+                        "--sampling-temperature",
+                        "1.5",
+                    ]
+                    + enc_ltok_flag
+                    + dec_ltok_flag,
+                    lang_flags=["--lang-pairs", "in-out"],
+                    run_validation=True,
+                    extra_valid_flags=enc_ltok_flag + dec_ltok_flag,
+                )
+                generate_main(
+                    data_dir,
+                    extra_flags=[
+                        "--task",
+                        "translation_multi_simple_epoch",
+                        "--lang-pairs",
+                        "in-out",
+                        "--source-lang",
+                        "in",
+                        "--target-lang",
+                        "out",
+                    ]
+                    + enc_ltok_flag
+                    + dec_ltok_flag,
+                )
+
+    def test_translation_multi_simple_epoch_dicts(self):
+        # test with all combinations of encoder/decoder lang tokens
+        with contextlib.redirect_stdout(StringIO()):
+            enc_ltok_flag = ["--encoder-langtok", "src"]
+            dec_ltok_flag = ["--decoder-langtok"]
+            with tempfile.TemporaryDirectory(
+                "test_translation_multi_simple_epoch_dict"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, extra_flags=[])
+                train_translation_model(
+                    data_dir,
+                    arch="transformer",
+                    task="translation_multi_simple_epoch",
+                    extra_flags=[
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--sampling-method",
+                        "temperature",
+                        "--sampling-temperature",
+                        "1.5",
+                        "--virtual-epoch-size",
+                        "1000",
+                    ]
+                    + enc_ltok_flag
+                    + dec_ltok_flag,
+                    lang_flags=["--lang-pairs", "in-out"],
+                    run_validation=True,
+                    extra_valid_flags=enc_ltok_flag + dec_ltok_flag,
+                )
+                generate_main(
+                    data_dir,
+                    extra_flags=[
+                        "--task",
+                        "translation_multi_simple_epoch",
+                        "--lang-pairs",
+                        "in-out",
+                        "--source-lang",
+                        "in",
+                        "--target-lang",
+                        "out",
+                    ]
+                    + enc_ltok_flag
+                    + dec_ltok_flag,
+                )
+
+    def test_translation_multi_simple_epoch_src_tgt_dict_spec(self):
+        # test the specification of explicit --src-dict and --tgt-dict
+        with contextlib.redirect_stdout(StringIO()):
+            enc_ltok_flag = ["--encoder-langtok", "src"]
+            dec_ltok_flag = ["--decoder-langtok"]
+            with tempfile.TemporaryDirectory(
+                "test_translation_multi_simple_epoch_dict"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, extra_flags=[])
+                train_translation_model(
+                    data_dir,
+                    arch="transformer",
+                    task="translation_multi_simple_epoch",
+                    extra_flags=[
+                        "--source-dict",
+                        f"{data_dir}/dict.in.txt",
+                        "--target-dict",
+                        f"{data_dir}/dict.out.txt",
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--sampling-method",
+                        "temperature",
+                        "--sampling-temperature",
+                        "1.5",
+                        "--virtual-epoch-size",
+                        "1000",
+                    ]
+                    + enc_ltok_flag
+                    + dec_ltok_flag,
+                    lang_flags=["--lang-pairs", "in-out"],
+                    run_validation=True,
+                    extra_valid_flags=enc_ltok_flag + dec_ltok_flag,
+                )
+                generate_main(
+                    data_dir,
+                    extra_flags=[
+                        "--task",
+                        "translation_multi_simple_epoch",
+                        "--lang-pairs",
+                        "in-out",
+                        "--source-lang",
+                        "in",
+                        "--target-lang",
+                        "out",
+                    ]
+                    + enc_ltok_flag
+                    + dec_ltok_flag,
+                )
+
+    def test_transformer_cross_self_attention(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_transformer_cross_self_attention"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_iwslt_de_en",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--no-cross-attention",
+                        "--cross-self-attention",
+                    ],
+                    run_validation=True,
+                )
+                generate_main(data_dir, extra_flags=[])
+
+    def test_transformer_pointer_generator(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_transformer_pointer_generator"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_summarization_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_pointer_generator",
+                    extra_flags=[
+                        "--user-dir",
+                        "examples/pointer_generator/pointer_generator_src",
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--alignment-layer",
+                        "-1",
+                        "--alignment-heads",
+                        "1",
+                        "--source-position-markers",
+                        "0",
+                    ],
+                    run_validation=True,
+                    extra_valid_flags=[
+                        "--user-dir",
+                        "examples/pointer_generator/pointer_generator_src",
+                    ],
+                )
+                generate_main(
+                    data_dir,
+                    extra_flags=[
+                        "--user-dir",
+                        "examples/pointer_generator/pointer_generator_src",
+                    ],
+                )
+
+    def test_lightconv(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_lightconv") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "lightconv_iwslt_de_en",
+                    [
+                        "--encoder-conv-type",
+                        "lightweight",
+                        "--decoder-conv-type",
+                        "lightweight",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                    ],
+                )
+                generate_main(data_dir)
+
+    def test_dynamicconv(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_dynamicconv") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "lightconv_iwslt_de_en",
+                    [
+                        "--encoder-conv-type",
+                        "dynamic",
+                        "--decoder-conv-type",
+                        "dynamic",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                    ],
+                )
+                generate_main(data_dir)
+
+    def test_cmlm_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_cmlm_transformer") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, ["--joined-dictionary"])
+                train_translation_model(
+                    data_dir,
+                    "cmlm_transformer",
+                    [
+                        "--apply-bert-init",
+                        "--criterion",
+                        "nat_loss",
+                        "--noise",
+                        "full_mask",
+                        "--pred-length-offset",
+                        "--length-loss-factor",
+                        "0.1",
+                    ],
+                    task="translation_lev",
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "translation_lev",
+                        "--iter-decode-max-iter",
+                        "9",
+                        "--iter-decode-eos-penalty",
+                        "0",
+                        "--print-step",
+                    ],
+                )
+
+    def test_nonautoregressive_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_nonautoregressive_transformer"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, ["--joined-dictionary"])
+                train_translation_model(
+                    data_dir,
+                    "nonautoregressive_transformer",
+                    [
+                        "--apply-bert-init",
+                        "--src-embedding-copy",
+                        "--criterion",
+                        "nat_loss",
+                        "--noise",
+                        "full_mask",
+                        "--pred-length-offset",
+                        "--length-loss-factor",
+                        "0.1",
+                    ],
+                    task="translation_lev",
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "translation_lev",
+                        "--iter-decode-max-iter",
+                        "0",
+                        "--iter-decode-eos-penalty",
+                        "0",
+                        "--print-step",
+                    ],
+                )
+
+    # def test_nat_crf_transformer(self):
+    #     with contextlib.redirect_stdout(StringIO()):
+    #         with tempfile.TemporaryDirectory('test_nat_crf_transformer') as data_dir:
+    #             create_dummy_data(data_dir)
+    #             preprocess_translation_data(data_dir, ['--joined-dictionary'])
+    #             train_translation_model(data_dir, 'nacrf_transformer', [
+    #                 '--apply-bert-init', '--criterion',
+    #                 'nat_loss', '--noise', 'full_mask', '--pred-length-offset',
+    #                 '--length-loss-factor', '0.1',
+    #                 '--word-ins-loss-factor', '0.5',
+    #                 '--crf-lowrank-approx', '1',
+    #                 '--crf-beam-approx', '1'
+    #             ], task='translation_lev')
+    #             generate_main(data_dir, [
+    #                 '--task', 'translation_lev',
+    #                 '--iter-decode-max-iter', '0',
+    #                 '--iter-decode-eos-penalty', '0',
+    #                 '--print-step',
+    #             ])
+
+    def test_iterative_nonautoregressive_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_iterative_nonautoregressive_transformer"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, ["--joined-dictionary"])
+                train_translation_model(
+                    data_dir,
+                    "iterative_nonautoregressive_transformer",
+                    [
+                        "--apply-bert-init",
+                        "--src-embedding-copy",
+                        "--criterion",
+                        "nat_loss",
+                        "--noise",
+                        "full_mask",
+                        "--stochastic-approx",
+                        "--dae-ratio",
+                        "0.5",
+                        "--train-step",
+                        "3",
+                    ],
+                    task="translation_lev",
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "translation_lev",
+                        "--iter-decode-max-iter",
+                        "9",
+                        "--iter-decode-eos-penalty",
+                        "0",
+                        "--print-step",
+                    ],
+                )
+
+    def test_insertion_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_insertion_transformer") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir, ["--joined-dictionary"])
+                train_translation_model(
+                    data_dir,
+                    "insertion_transformer",
+                    [
+                        "--apply-bert-init",
+                        "--criterion",
+                        "nat_loss",
+                        "--noise",
+                        "random_mask",
+                    ],
+                    task="translation_lev",
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "translation_lev",
+                        "--iter-decode-max-iter",
+                        "9",
+                        "--iter-decode-eos-penalty",
+                        "0",
+                        "--print-step",
+                    ],
+                )
+
+    def test_mixture_of_experts(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_moe") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_iwslt_de_en",
+                    [
+                        "--task",
+                        "translation_moe",
+                        "--user-dir",
+                        "examples/translation_moe/translation_moe_src",
+                        "--method",
+                        "hMoElp",
+                        "--mean-pool-gating-network",
+                        "--num-experts",
+                        "3",
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                    ],
+                )
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "translation_moe",
+                        "--user-dir",
+                        "examples/translation_moe/translation_moe_src",
+                        "--method",
+                        "hMoElp",
+                        "--mean-pool-gating-network",
+                        "--num-experts",
+                        "3",
+                        "--gen-expert",
+                        "0",
+                    ],
+                )
+
+    def test_alignment(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_alignment") as data_dir:
+                create_dummy_data(data_dir, alignment=True)
+                preprocess_translation_data(data_dir, ["--align-suffix", "align"])
+                train_translation_model(
+                    data_dir,
+                    "transformer_align",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--load-alignments",
+                        "--alignment-layer",
+                        "1",
+                        "--criterion",
+                        "label_smoothed_cross_entropy_with_alignment",
+                    ],
+                    run_validation=True,
+                )
+                generate_main(data_dir)
+
+    def test_laser_lstm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_laser_lstm") as data_dir:
+                laser_config_file = create_laser_data_and_config_json(data_dir)
+                train_translation_model(
+                    laser_config_file.name,
+                    "laser_lstm",
+                    [
+                        "--user-dir",
+                        "examples/laser/laser_src",
+                        "--weighting-alpha",
+                        "0.3",
+                        "--encoder-bidirectional",
+                        "--encoder-hidden-size",
+                        "512",
+                        "--encoder-layers",
+                        "5",
+                        "--decoder-layers",
+                        "1",
+                        "--encoder-embed-dim",
+                        "320",
+                        "--decoder-embed-dim",
+                        "320",
+                        "--decoder-lang-embed-dim",
+                        "32",
+                        "--save-dir",
+                        data_dir,
+                        "--disable-validation",
+                    ],
+                    task="laser",
+                    lang_flags=[],
+                )
+
+    def test_laser_transformer(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_laser_transformer") as data_dir:
+                laser_config_file = create_laser_data_and_config_json(data_dir)
+                train_translation_model(
+                    laser_config_file.name,
+                    "laser_transformer",
+                    [
+                        "--user-dir",
+                        "examples/laser/laser_src",
+                        "--weighting-alpha",
+                        "0.3",
+                        "--encoder-embed-dim",
+                        "320",
+                        "--decoder-embed-dim",
+                        "320",
+                        "--decoder-lang-embed-dim",
+                        "32",
+                        "--save-dir",
+                        data_dir,
+                        "--disable-validation",
+                    ],
+                    task="laser",
+                    lang_flags=[],
+                )
+
+    def test_alignment_full_context(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_alignment") as data_dir:
+                create_dummy_data(data_dir, alignment=True)
+                preprocess_translation_data(data_dir, ["--align-suffix", "align"])
+                train_translation_model(
+                    data_dir,
+                    "transformer_align",
+                    [
+                        "--encoder-layers",
+                        "2",
+                        "--decoder-layers",
+                        "2",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--load-alignments",
+                        "--alignment-layer",
+                        "1",
+                        "--criterion",
+                        "label_smoothed_cross_entropy_with_alignment",
+                        "--full-context-alignment",
+                    ],
+                    run_validation=True,
+                )
+                generate_main(data_dir)
+
+    def test_transformer_layerdrop(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer_layerdrop") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                train_translation_model(
+                    data_dir,
+                    "transformer_iwslt_de_en",
+                    [
+                        "--encoder-layers",
+                        "3",
+                        "--decoder-layers",
+                        "3",
+                        "--encoder-embed-dim",
+                        "8",
+                        "--decoder-embed-dim",
+                        "8",
+                        "--encoder-layerdrop",
+                        "0.01",
+                        "--decoder-layerdrop",
+                        "0.01",
+                    ],
+                )
+                generate_main(data_dir)
+                generate_main(
+                    data_dir,
+                    [
+                        "--model-overrides",
+                        "{'encoder_layers_to_keep':'0,2','decoder_layers_to_keep':'1'}",
+                    ],
+                )
+
+
+class TestStories(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_fconv_self_att_wp(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fconv_self_att_wp") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_translation_data(data_dir)
+                config = [
+                    "--encoder-layers",
+                    "[(128, 3)] * 2",
+                    "--decoder-layers",
+                    "[(128, 3)] * 2",
+                    "--decoder-attention",
+                    "True",
+                    "--encoder-attention",
+                    "False",
+                    "--gated-attention",
+                    "True",
+                    "--self-attention",
+                    "True",
+                    "--project-input",
+                    "True",
+                    "--encoder-embed-dim",
+                    "8",
+                    "--decoder-embed-dim",
+                    "8",
+                    "--decoder-out-embed-dim",
+                    "8",
+                    "--multihead-self-attention-nheads",
+                    "2",
+                ]
+                train_translation_model(data_dir, "fconv_self_att_wp", config)
+                generate_main(data_dir)
+
+                # fusion model
+                os.rename(
+                    os.path.join(data_dir, "checkpoint_last.pt"),
+                    os.path.join(data_dir, "pretrained.pt"),
+                )
+                config.extend(
+                    [
+                        "--pretrained",
+                        "True",
+                        "--pretrained-checkpoint",
+                        os.path.join(data_dir, "pretrained.pt"),
+                        "--save-dir",
+                        os.path.join(data_dir, "fusion_model"),
+                    ]
+                )
+                train_translation_model(data_dir, "fconv_self_att_wp", config)
+
+
+class TestLanguageModeling(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_fconv_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_fconv_lm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "fconv_lm",
+                    [
+                        "--decoder-layers",
+                        "[(850, 3)] * 2 + [(1024,4)]",
+                        "--decoder-embed-dim",
+                        "280",
+                        "--optimizer",
+                        "nag",
+                        "--lr",
+                        "0.1",
+                    ],
+                )
+                eval_lm_main(data_dir)
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "language_modeling",
+                        "--sample-break-mode",
+                        "eos",
+                        "--tokens-per-sample",
+                        "500",
+                    ],
+                )
+
+    def test_transformer_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "transformer_lm",
+                    ["--add-bos-token", '--nval',  '1'],
+                    run_validation=True,
+                )
+                eval_lm_main(data_dir)
+                eval_lm_main(data_dir, extra_flags=["--context-window", "25"])
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "language_modeling",
+                        "--sample-break-mode",
+                        "eos",
+                        "--tokens-per-sample",
+                        "500",
+                    ],
+                )
+
+    def test_transformer_lm_with_adaptive_softmax(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_transformer_lm_with_adaptive_softmax"
+            ) as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "transformer_lm",
+                    [
+                        "--add-bos-token",
+                        "--criterion",
+                        "adaptive_loss",
+                        "--adaptive-softmax-cutoff",
+                        "5,10,15",
+                    ],
+                    run_validation=True,
+                )
+                eval_lm_main(data_dir)
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "language_modeling",
+                        "--sample-break-mode",
+                        "eos",
+                        "--tokens-per-sample",
+                        "500",
+                    ],
+                )
+
+    def test_lightconv_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_lightconv_lm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "lightconv_lm",
+                    ["--add-bos-token"],
+                    run_validation=True,
+                )
+                eval_lm_main(data_dir)
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "language_modeling",
+                        "--sample-break-mode",
+                        "eos",
+                        "--tokens-per-sample",
+                        "500",
+                    ],
+                )
+
+    def test_lstm_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_lstm_lm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "lstm_lm",
+                    ["--add-bos-token"],
+                    run_validation=True,
+                )
+                eval_lm_main(data_dir)
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "language_modeling",
+                        "--sample-break-mode",
+                        "eos",
+                        "--tokens-per-sample",
+                        "500",
+                    ],
+                )
+
+    def test_lstm_lm_residuals(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_lstm_lm_residuals") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "lstm_lm",
+                    ["--add-bos-token", "--residuals"],
+                    run_validation=True,
+                )
+                eval_lm_main(data_dir)
+                generate_main(
+                    data_dir,
+                    [
+                        "--task",
+                        "language_modeling",
+                        "--sample-break-mode",
+                        "eos",
+                        "--tokens-per-sample",
+                        "500",
+                    ],
+                )
+
+    @unittest.skipIf(not has_hf_transformers, "skip test if transformers is missing")
+    def test_transformer_xl_bptt_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer_xl_bptt_lm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                task_flags = [
+                    "--user-dir",
+                    "examples/truncated_bptt",
+                    "--task",
+                    "truncated_bptt_lm",
+                    "--batch-size",
+                    "2",
+                    "--tokens-per-sample",
+                    "50",
+                ]
+                train_language_model(
+                    data_dir=data_dir,
+                    arch="transformer_xl",
+                    extra_flags=task_flags
+                    + [
+                        "--n-layer",
+                        "2",
+                    ],
+                    task="truncated_bptt_lm",
+                    run_validation=True,
+                    extra_valid_flags=task_flags,
+                )
+                eval_lm_main(data_dir, extra_flags=task_flags)
+                # Train with activation offloading
+                train_language_model(
+                    data_dir=data_dir,
+                    arch="transformer_xl",
+                    extra_flags=task_flags
+                    + [
+                        "--n-layer",
+                        "2",
+                        "--offload-activations",
+                    ],
+                    task="truncated_bptt_lm",
+                    run_validation=True,
+                    extra_valid_flags=task_flags,
+                )
+
+
+class TestMaskedLanguageModel(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_legacy_masked_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_legacy_mlm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_legacy_masked_language_model(data_dir, "masked_lm")
+
+    def test_roberta_masked_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_roberta_mlm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_masked_lm(
+                    data_dir, "roberta_base", extra_flags=["--encoder-layers", "2"]
+                )
+
+    def test_roberta_sentence_prediction(self):
+        num_classes = 3
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_roberta_head") as data_dir:
+                create_dummy_roberta_head_data(data_dir, num_classes=num_classes)
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                preprocess_lm_data(os.path.join(data_dir, "label"))
+                train_roberta_head(data_dir, "roberta_base", num_classes=num_classes)
+
+    def test_roberta_regression_single(self):
+        num_classes = 1
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_roberta_regression_single"
+            ) as data_dir:
+                create_dummy_roberta_head_data(
+                    data_dir, num_classes=num_classes, regression=True
+                )
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                train_roberta_head(
+                    data_dir,
+                    "roberta_base",
+                    num_classes=num_classes,
+                    extra_flags=["--regression-target"],
+                )
+
+    def test_roberta_regression_multiple(self):
+        num_classes = 3
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_roberta_regression_multiple"
+            ) as data_dir:
+                create_dummy_roberta_head_data(
+                    data_dir, num_classes=num_classes, regression=True
+                )
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                train_roberta_head(
+                    data_dir,
+                    "roberta_base",
+                    num_classes=num_classes,
+                    extra_flags=["--regression-target"],
+                )
+
+    def test_linformer_roberta_masked_lm(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_linformer_roberta_mlm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_masked_lm(
+                    data_dir,
+                    "linformer_roberta_base",
+                    extra_flags=[
+                        "--user-dir",
+                        "examples/linformer/linformer_src",
+                        "--encoder-layers",
+                        "2",
+                    ],
+                )
+
+    def test_linformer_roberta_sentence_prediction(self):
+        num_classes = 3
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_linformer_roberta_head") as data_dir:
+                create_dummy_roberta_head_data(data_dir, num_classes=num_classes)
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                preprocess_lm_data(os.path.join(data_dir, "label"))
+                train_roberta_head(
+                    data_dir,
+                    "linformer_roberta_base",
+                    num_classes=num_classes,
+                    extra_flags=["--user-dir", "examples/linformer/linformer_src"],
+                )
+
+    def test_linformer_roberta_regression_single(self):
+        num_classes = 1
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_linformer_roberta_regression_single"
+            ) as data_dir:
+                create_dummy_roberta_head_data(
+                    data_dir, num_classes=num_classes, regression=True
+                )
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                train_roberta_head(
+                    data_dir,
+                    "linformer_roberta_base",
+                    num_classes=num_classes,
+                    extra_flags=[
+                        "--regression-target",
+                        "--user-dir",
+                        "examples/linformer/linformer_src",
+                    ],
+                )
+
+    def test_linformer_roberta_regression_multiple(self):
+        num_classes = 3
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory(
+                "test_linformer_roberta_regression_multiple"
+            ) as data_dir:
+                create_dummy_roberta_head_data(
+                    data_dir, num_classes=num_classes, regression=True
+                )
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                train_roberta_head(
+                    data_dir,
+                    "linformer_roberta_base",
+                    num_classes=num_classes,
+                    extra_flags=[
+                        "--regression-target",
+                        "--user-dir",
+                        "examples/linformer/linformer_src",
+                    ],
+                )
+
+    def _test_pretrained_masked_lm_for_translation(self, learned_pos_emb, encoder_only):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_mlm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_legacy_masked_language_model(
+                    data_dir,
+                    arch="masked_lm",
+                    extra_args=("--encoder-learned-pos",) if learned_pos_emb else (),
+                )
+                with tempfile.TemporaryDirectory(
+                    "test_mlm_translation"
+                ) as translation_dir:
+                    create_dummy_data(translation_dir)
+                    preprocess_translation_data(
+                        translation_dir, extra_flags=["--joined-dictionary"]
+                    )
+                    # Train transformer with data_dir/checkpoint_last.pt
+                    train_translation_model(
+                        translation_dir,
+                        arch="transformer_from_pretrained_xlm",
+                        extra_flags=[
+                            "--decoder-layers",
+                            "1",
+                            "--decoder-embed-dim",
+                            "32",
+                            "--decoder-attention-heads",
+                            "1",
+                            "--decoder-ffn-embed-dim",
+                            "32",
+                            "--encoder-layers",
+                            "1",
+                            "--encoder-embed-dim",
+                            "32",
+                            "--encoder-attention-heads",
+                            "1",
+                            "--encoder-ffn-embed-dim",
+                            "32",
+                            "--pretrained-xlm-checkpoint",
+                            "{}/checkpoint_last.pt".format(data_dir),
+                            "--activation-fn",
+                            "gelu",
+                            "--max-source-positions",
+                            "500",
+                            "--max-target-positions",
+                            "500",
+                        ]
+                        + (
+                            ["--encoder-learned-pos", "--decoder-learned-pos"]
+                            if learned_pos_emb
+                            else []
+                        )
+                        + (["--init-encoder-only"] if encoder_only else []),
+                        task="translation_from_pretrained_xlm",
+                    )
+
+    def test_pretrained_masked_lm_for_translation_learned_pos_emb(self):
+        self._test_pretrained_masked_lm_for_translation(True, False)
+
+    def test_pretrained_masked_lm_for_translation_sinusoidal_pos_emb(self):
+        self._test_pretrained_masked_lm_for_translation(False, False)
+
+    def test_pretrained_masked_lm_for_translation_encoder_only(self):
+        self._test_pretrained_masked_lm_for_translation(True, True)
+
+    def test_r4f_roberta(self):
+        num_classes = 3
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_r4f_roberta_head") as data_dir:
+                create_dummy_roberta_head_data(data_dir, num_classes=num_classes)
+                preprocess_lm_data(os.path.join(data_dir, "input0"))
+                preprocess_lm_data(os.path.join(data_dir, "label"))
+                train_roberta_head(
+                    data_dir,
+                    "roberta_base",
+                    num_classes=num_classes,
+                    extra_flags=[
+                        "--user-dir",
+                        "examples/rxf/rxf_src",
+                        "--criterion",
+                        "sentence_prediction_r3f",
+                        "--spectral-norm-classification-head",
+                    ],
+                )
+
+
+def train_legacy_masked_language_model(data_dir, arch, extra_args=()):
+    train_parser = options.get_training_parser()
+    # TODO: langs should be in and out right?
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            "cross_lingual_lm",
+            data_dir,
+            "--arch",
+            arch,
+            # Optimizer args
+            "--optimizer",
+            "adam",
+            "--lr-scheduler",
+            "reduce_lr_on_plateau",
+            "--lr-shrink",
+            "0.5",
+            "--lr",
+            "0.0001",
+            "--stop-min-lr",
+            "1e-09",
+            # dropout, attention args
+            "--dropout",
+            "0.1",
+            "--attention-dropout",
+            "0.1",
+            # MLM args
+            "--criterion",
+            "legacy_masked_lm_loss",
+            "--masked-lm-only",
+            "--monolingual-langs",
+            "in,out",
+            "--num-segment",
+            "5",
+            # Transformer args: use a small transformer model for fast training
+            "--encoder-layers",
+            "1",
+            "--encoder-embed-dim",
+            "32",
+            "--encoder-attention-heads",
+            "1",
+            "--encoder-ffn-embed-dim",
+            "32",
+            # Other training args
+            "--max-tokens",
+            "500",
+            "--tokens-per-sample",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-epoch",
+            "1",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            "1",
+            "--dataset-impl",
+            "raw",
+            "--num-workers",
+            "0",
+        ]
+        + list(extra_args),
+    )
+    train.main(train_args)
+
+
+class TestOptimizers(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_optimizers(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_optimizers") as data_dir:
+                # Use just a bit of data and tiny model to keep this test runtime reasonable
+                create_dummy_data(data_dir, num_examples=10, maxlen=5)
+                preprocess_translation_data(data_dir)
+                optimizers = ["adafactor", "adam", "nag", "adagrad", "sgd", "adadelta"]
+                last_checkpoint = os.path.join(data_dir, "checkpoint_last.pt")
+                for optimizer in optimizers:
+                    if os.path.exists(last_checkpoint):
+                        os.remove(last_checkpoint)
+                    train_translation_model(
+                        data_dir,
+                        "lstm",
+                        [
+                            "--required-batch-size-multiple",
+                            "1",
+                            "--encoder-layers",
+                            "1",
+                            "--encoder-hidden-size",
+                            "32",
+                            "--decoder-layers",
+                            "1",
+                            "--optimizer",
+                            optimizer,
+                        ],
+                    )
+                    generate_main(data_dir)
+
+
+def read_last_log_entry(
+    logs: List[logging.LogRecord], logger_name: str
+) -> Dict[str, float]:
+    for x in reversed(logs):
+        if x.name == logger_name:
+            return json.loads(x.message)
+    raise ValueError(f"No entries from {logger_name} found in captured logs")
+
+
+class TestActivationCheckpointing(unittest.TestCase):
+    base_flags = [
+        "--encoder-layers",
+        "2",
+        "--decoder-layers",
+        "2",
+        "--encoder-embed-dim",
+        "8",
+        "--decoder-embed-dim",
+        "8",
+        "--restore-file",
+        "x.pt",
+        "--log-format",
+        "json",
+        "--log-interval",
+        "1",
+        "--max-update",
+        "2",
+    ]
+
+    def _train(self, data_dir, extra_flags):
+        with self.assertLogs() as logs:
+            train_translation_model(
+                data_dir,
+                "transformer_iwslt_de_en",
+                self.base_flags + extra_flags,
+                run_validation=True,
+                extra_valid_flags=["--log-format", "json"],
+            )
+        return logs.records
+
+    def test_activation_offloading_does_not_change_metrics(self):
+        """Neither ----checkpoint-activations nor --offload-activations should change loss"""
+        with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir:
+
+            with self.assertLogs():
+                create_dummy_data(data_dir, num_examples=20)
+                preprocess_translation_data(data_dir)
+            offload_logs = self._train(data_dir, ["--offload-activations"])
+            baseline_logs = self._train(data_dir, [])
+
+            assert len(baseline_logs) == len(offload_logs)
+
+            baseline_valid_stats = read_last_log_entry(baseline_logs, "valid")
+            offload_valid_stats = read_last_log_entry(offload_logs, "valid")
+            baseline_train_stats = read_last_log_entry(baseline_logs, "train")
+            offload_train_stats = read_last_log_entry(offload_logs, "train")
+
+            assert (
+                baseline_train_stats["train_loss"] == offload_train_stats["train_loss"]
+            )
+            assert (
+                baseline_valid_stats["valid_loss"] == offload_valid_stats["valid_loss"]
+            )
+
+    def test_activation_checkpointing_does_not_change_metrics(self):
+        """--checkpoint-activations should not change loss"""
+
+        with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir:
+            with self.assertLogs():
+                create_dummy_data(data_dir, num_examples=20)
+                preprocess_translation_data(data_dir)
+            ckpt_logs = self._train(data_dir, ["--checkpoint-activations"])
+            baseline_logs = self._train(data_dir, [])
+            assert len(baseline_logs) == len(ckpt_logs)
+
+            baseline_train_stats = read_last_log_entry(baseline_logs, "train")
+            ckpt_train_stats = read_last_log_entry(ckpt_logs, "train")
+            assert baseline_train_stats["train_loss"] == ckpt_train_stats["train_loss"]
+
+            baseline_valid_stats = read_last_log_entry(baseline_logs, "valid")
+            ckpt_valid_stats = read_last_log_entry(ckpt_logs, "valid")
+            assert baseline_valid_stats["valid_loss"] == ckpt_valid_stats["valid_loss"]
+
+
+def create_dummy_roberta_head_data(
+    data_dir, num_examples=100, maxlen=10, num_classes=2, regression=False
+):
+    input_dir = "input0"
+
+    def _create_dummy_data(filename):
+        random_data = torch.rand(num_examples * maxlen)
+        input_data = 97 + torch.floor(26 * random_data).int()
+        if regression:
+            output_data = torch.rand((num_examples, num_classes))
+        else:
+            output_data = 1 + torch.floor(num_classes * torch.rand(num_examples)).int()
+        with open(os.path.join(data_dir, input_dir, filename + ".out"), "w") as f_in:
+            label_filename = filename + ".label" if regression else filename + ".out"
+            with open(os.path.join(data_dir, "label", label_filename), "w") as f_out:
+                offset = 0
+                for i in range(num_examples):
+                    # write example input
+                    ex_len = random.randint(1, maxlen)
+                    ex_str = " ".join(map(chr, input_data[offset : offset + ex_len]))
+                    print(ex_str, file=f_in)
+                    # write example label
+                    if regression:
+                        class_str = " ".join(map(str, output_data[i].numpy()))
+                        print(class_str, file=f_out)
+                    else:
+                        class_str = "class{}".format(output_data[i])
+                        print(class_str, file=f_out)
+                    offset += ex_len
+
+    os.mkdir(os.path.join(data_dir, input_dir))
+    os.mkdir(os.path.join(data_dir, "label"))
+    _create_dummy_data("train")
+    _create_dummy_data("valid")
+    _create_dummy_data("test")
+
+
+def train_masked_lm(data_dir, arch, extra_flags=None):
+    train_parser = options.get_training_parser()
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            "masked_lm",
+            data_dir,
+            "--arch",
+            arch,
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--criterion",
+            "masked_lm",
+            "--batch-size",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-epoch",
+            "1",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            "1",
+            "--ddp-backend",
+            "no_c10d",
+            "--num-workers",
+            "0",
+        ]
+        + (extra_flags or []),
+    )
+    train.main(train_args)
+
+
+def train_roberta_head(data_dir, arch, num_classes=2, extra_flags=None):
+    train_parser = options.get_training_parser()
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            "sentence_prediction",
+            data_dir,
+            "--arch",
+            arch,
+            "--encoder-layers",
+            "2",
+            "--num-classes",
+            str(num_classes),
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--criterion",
+            "sentence_prediction",
+            "--max-tokens",
+            "500",
+            "--max-positions",
+            "500",
+            "--batch-size",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-epoch",
+            "1",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            "1",
+            "--ddp-backend",
+            "no_c10d",
+            "--num-workers",
+            "0",
+        ]
+        + (extra_flags or []),
+    )
+    train.main(train_args)
+
+
+def eval_lm_main(data_dir, extra_flags=None):
+    eval_lm_parser = options.get_eval_lm_parser()
+    eval_lm_args = options.parse_args_and_arch(
+        eval_lm_parser,
+        [
+            data_dir,
+            "--path",
+            os.path.join(data_dir, "checkpoint_last.pt"),
+            "--no-progress-bar",
+            "--num-workers",
+            "0",
+        ]
+        + (extra_flags or []),
+    )
+    eval_lm.main(eval_lm_args)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_character_token_embedder.py b/fairseq/tests/test_character_token_embedder.py
new file mode 100644
index 0000000000000000000000000000000000000000..24940ebd21a0e4465ca6052409353a3179e9cf6d
--- /dev/null
+++ b/fairseq/tests/test_character_token_embedder.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from fairseq.data import Dictionary
+from fairseq.modules import CharacterTokenEmbedder
+
+
+class TestCharacterTokenEmbedder(unittest.TestCase):
+    def test_character_token_embedder(self):
+        vocab = Dictionary()
+        vocab.add_symbol("hello")
+        vocab.add_symbol("there")
+
+        embedder = CharacterTokenEmbedder(
+            vocab, [(2, 16), (4, 32), (8, 64), (16, 2)], 64, 5, 2
+        )
+
+        test_sents = [["hello", "unk", "there"], ["there"], ["hello", "there"]]
+        max_len = max(len(s) for s in test_sents)
+        input = torch.LongTensor(len(test_sents), max_len + 2).fill_(vocab.pad())
+        for i in range(len(test_sents)):
+            input[i][0] = vocab.eos()
+            for j in range(len(test_sents[i])):
+                input[i][j + 1] = vocab.index(test_sents[i][j])
+            input[i][j + 2] = vocab.eos()
+        embs = embedder(input)
+
+        assert embs.size() == (len(test_sents), max_len + 2, 5)
+        self.assertAlmostEqual(embs[0][0], embs[1][0])
+        self.assertAlmostEqual(embs[0][0], embs[0][-1])
+        self.assertAlmostEqual(embs[0][1], embs[2][1])
+        self.assertAlmostEqual(embs[0][3], embs[1][1])
+
+        embs.sum().backward()
+        assert embedder.char_embeddings.weight.grad is not None
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-6)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_checkpoint_utils.py b/fairseq/tests/test_checkpoint_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f28222633a68943497616507ce412ead76864d6
--- /dev/null
+++ b/fairseq/tests/test_checkpoint_utils.py
@@ -0,0 +1,106 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import os
+import tempfile
+import unittest
+from io import StringIO
+from unittest.mock import patch
+
+from fairseq import checkpoint_utils
+from omegaconf import OmegaConf
+
+from tests.utils import (
+    create_dummy_data,
+    preprocess_translation_data,
+    train_translation_model,
+)
+
+
+class TestCheckpointUtils(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    @contextlib.contextmanager
+    def _train_transformer(self, seed, extra_args=None):
+        if extra_args is None:
+            extra_args = []
+        with tempfile.TemporaryDirectory(f"_train_transformer_seed{seed}") as data_dir:
+            create_dummy_data(data_dir)
+            preprocess_translation_data(data_dir)
+            train_translation_model(
+                data_dir,
+                "transformer_iwslt_de_en",
+                [
+                    "--encoder-layers",
+                    "3",
+                    "--decoder-layers",
+                    "3",
+                    "--encoder-embed-dim",
+                    "8",
+                    "--decoder-embed-dim",
+                    "8",
+                    "--seed",
+                    str(seed),
+                ]
+                + extra_args,
+            )
+            yield os.path.join(data_dir, "checkpoint_last.pt")
+
+    def test_load_model_ensemble_and_task(self):
+        # with contextlib.redirect_stdout(StringIO()):
+            with self._train_transformer(seed=123) as model1:
+                with self._train_transformer(seed=456) as model2:
+                    ensemble, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+                        filenames=[model1, model2]
+                    )
+                    self.assertEqual(len(ensemble), 2)
+
+                    # after Transformer has been migrated to Hydra, this will probably
+                    # become cfg.common.seed
+                    self.assertEqual(ensemble[0].args.seed, 123)
+                    self.assertEqual(ensemble[1].args.seed, 456)
+
+                    # the task from the first model should be returned
+                    self.assertTrue("seed123" in task.cfg.data)
+
+                    # last cfg is saved
+                    self.assertEqual(cfg.common.seed, 456)
+
+    def test_prune_state_dict(self):
+        with contextlib.redirect_stdout(StringIO()):
+            extra_args = ["--encoder-layerdrop", "0.01", "--decoder-layerdrop", "0.01"]
+            with self._train_transformer(seed=1, extra_args=extra_args) as model:
+                ensemble, cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+                    filenames=[model],
+                    arg_overrides={
+                        "encoder_layers_to_keep": "0,2",
+                        "decoder_layers_to_keep": "1",
+                    },
+                )
+                self.assertEqual(len(ensemble), 1)
+                self.assertEqual(len(ensemble[0].encoder.layers), 2)
+                self.assertEqual(len(ensemble[0].decoder.layers), 1)
+
+    def test_torch_persistent_save_async(self):
+        state_dict = {}
+        filename = "async_checkpoint.pt"
+
+        with patch(f"{checkpoint_utils.__name__}.PathManager.opena") as mock_opena:
+            with patch(f"{checkpoint_utils.__name__}._torch_persistent_save") as mock_save:
+                checkpoint_utils.torch_persistent_save(
+                    state_dict, filename, async_write=True
+                )
+                mock_opena.assert_called_with(filename, "wb")
+                mock_save.assert_called()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_concat_dataset.py b/fairseq/tests/test_concat_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d94aeffd481a2e107eb5747e41d76435b3f3dc8a
--- /dev/null
+++ b/fairseq/tests/test_concat_dataset.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from fairseq.data import LanguagePairDataset, TokenBlockDataset
+from fairseq.data.concat_dataset import ConcatDataset
+from tests.test_train import mock_dict
+
+
+class TestConcatDataset(unittest.TestCase):
+    def setUp(self):
+        d = mock_dict()
+        tokens_1 = torch.LongTensor([1]).view(1, -1)
+        tokens_ds1 = TokenBlockDataset(
+            tokens_1,
+            sizes=[tokens_1.size(-1)],
+            block_size=1,
+            pad=0,
+            eos=1,
+            include_targets=False,
+        )
+        self.dataset_1 = LanguagePairDataset(
+            tokens_ds1, tokens_ds1.sizes, d, shuffle=False
+        )
+        tokens_2 = torch.LongTensor([2]).view(1, -1)
+        tokens_ds2 = TokenBlockDataset(
+            tokens_2,
+            sizes=[tokens_2.size(-1)],
+            block_size=1,
+            pad=0,
+            eos=1,
+            include_targets=False,
+        )
+        self.dataset_2 = LanguagePairDataset(
+            tokens_ds2, tokens_ds2.sizes, d, shuffle=False
+        )
+
+    def test_concat_dataset_basics(self):
+        d = ConcatDataset([self.dataset_1, self.dataset_2])
+        assert len(d) == 2
+        assert d[0]["source"][0] == 1
+        assert d[1]["source"][0] == 2
+
+        d = ConcatDataset([self.dataset_1, self.dataset_2], sample_ratios=[1, 2])
+        assert len(d) == 3
+        assert d[0]["source"][0] == 1
+        assert d[1]["source"][0] == 2
+        assert d[2]["source"][0] == 2
+
+        d = ConcatDataset([self.dataset_1, self.dataset_2], sample_ratios=[2, 1])
+        assert len(d) == 3
+        assert d[0]["source"][0] == 1
+        assert d[1]["source"][0] == 1
+        assert d[2]["source"][0] == 2
diff --git a/fairseq/tests/test_constraints.py b/fairseq/tests/test_constraints.py
new file mode 100755
index 0000000000000000000000000000000000000000..1c37f7e1fb26d8ea5349fedd3a60f566d09cf598
--- /dev/null
+++ b/fairseq/tests/test_constraints.py
@@ -0,0 +1,269 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import sys
+import unittest
+
+import torch
+from fairseq.token_generation_constraints import *
+
+
+def tensorize(constraints: List[List[int]]) -> torch.Tensor:
+    return [torch.tensor(x) for x in constraints]
+
+
+class TestHelperRoutines(unittest.TestCase):
+    def setUp(self):
+        self.examples = [
+            ([[]], torch.tensor([[0]])),
+            ([[], []], torch.tensor([[0], [0]])),
+            ([[torch.tensor([1, 2])], []], torch.tensor([[1, 1, 2, 0], [0, 0, 0, 0]])),
+            (
+                [
+                    [
+                        torch.tensor([3, 1, 2]),
+                        torch.tensor([3]),
+                        torch.tensor([4, 5, 6, 7]),
+                    ],
+                    [],
+                    [torch.tensor([1, 8, 9, 10, 1, 4, 11, 12])],
+                ],
+                torch.tensor(
+                    [
+                        [3, 3, 1, 2, 0, 3, 0, 4, 5, 6, 7, 0],
+                        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                        [1, 1, 8, 9, 10, 1, 4, 11, 12, 0, 0, 0],
+                    ]
+                ),
+            ),
+        ]
+
+    def test_packing(self):
+        """Ensures the list of lists of tensors gets packed correctly."""
+        for batch_constraints, expected_tensor in self.examples:
+            packed = pack_constraints(batch_constraints)
+            assert torch.equal(packed, expected_tensor)
+
+
+class TestUnorderedConstraintState(unittest.TestCase):
+    def setUp(self):
+        # Tuples of (contraint set, expected printed graph, token counts per node)
+        self.examples = [
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                "([None].False#6 ([1].True#4 ([2].False#1 [3].True#1) [3].True#1 [4].True#1) ([4].False#2 ([5].True#2 ([6].False#1 [7].True#1))))",
+                {1: 4, 2: 1, 3: 2, 4: 3, 5: 2, 6: 1, 7: 1},
+            ),
+            ([], "[None].False#0", {}),
+            (tensorize([[0]]), "([None].False#1 [0].True#1)", {0: 1}),
+            (
+                tensorize([[100000, 1, 2, 3, 4, 5]]),
+                "([None].False#1 ([100000].False#1 ([1].False#1 ([2].False#1 ([3].False#1 ([4].False#1 [5].True#1))))))",
+                {100000: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                "([None].False#2 ([1].False#2 [2].True#2))",
+                {1: 2, 2: 2},
+            ),
+            (
+                tensorize([[1, 2], [3, 4]]),
+                "([None].False#2 ([1].False#1 [2].True#1) ([3].False#1 [4].True#1))",
+                {1: 1, 2: 1, 3: 1, 4: 1},
+            ),
+        ]
+
+        self.sequences = [
+            (
+                self.examples[0][0],
+                [],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2],
+                {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2, 94],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [1, 3, 999, 1, 4],
+                {"bank": 4, "num_completed": 2, "finished": False, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 3, 999, 1, 4, 999],
+                {"bank": 4, "num_completed": 2, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                [4, 5, 6, 8],
+                {"bank": 2, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[0][0],
+                # Tricky, because in last three, goes down [1->4] branch, could miss [1] and [4->5]
+                # [[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]],
+                [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                self.examples[0][0],
+                [1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": True},
+            ),
+            (
+                tensorize([[1], [2, 3]]),
+                # Should not be able to get credit for entering 1 a second time
+                [1, 1],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": True},
+            ),
+            (
+                self.examples[4][0],
+                [1, 2, 1, 2],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                self.examples[4][0],
+                [1, 2, 1, 2, 1],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
+            ),
+            (
+                self.examples[5][0],
+                [1, 2, 3, 4, 5],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": True},
+            ),
+        ]
+
+    def test_graphs(self):
+        """
+        Test whether unordered graph systems are created correctly.
+        """
+        for example in self.examples:
+            constraints, expected, gold_counts = example
+            c = ConstraintNode.create(constraints)
+            assert (
+                ConstraintNode.print_graph(c) == expected
+            ), f"got {ConstraintNode.print_graph(c)}, expected {expected}"
+            assert (
+                c.token_counts() == gold_counts
+            ), f"{c} got {c.token_counts()} wanted {gold_counts}"
+
+    def test_next_tokens(self):
+        """
+        Tests that the set of next tokens is correct.
+        """
+        for example in self.examples:
+            constraints, expected, gold_counts = example
+            root = ConstraintNode.create(constraints)
+
+            root_tokens = set(root.children.keys())
+            for sequence in constraints:
+                state = UnorderedConstraintState(root)
+                for token in sequence:
+                    all_tokens = root_tokens.union(state.node.children.keys())
+                    assert (
+                        all_tokens == state.next_tokens()
+                    ), f"ALL {all_tokens} NEXT {state.next_tokens()}"
+                    state = state.advance(token)
+
+    def test_sequences(self):
+        for constraints, tokens, expected in self.sequences:
+            state = UnorderedConstraintState.create(pack_constraints([constraints])[0])
+            for token in tokens:
+                state = state.advance(token)
+            result = {}
+            for attr in expected.keys():
+                result[attr] = getattr(state, attr)
+
+            assert (
+                result == expected
+            ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
+
+
+class TestOrderedConstraintState(unittest.TestCase):
+    def setUp(self):
+        self.sequences = [
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2],
+                {"bank": 2, "num_completed": 0, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 94],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 3, 999, 1, 4],
+                {"bank": 0, "num_completed": 0, "finished": False, "is_root": True},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 999, 999],
+                {"bank": 3, "num_completed": 1, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 77, 1, 3, 1],
+                {"bank": 6, "num_completed": 2, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 3, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2, 3], [1, 3], [1, 4], [4, 5, 6, 7], [1], [4, 5]]),
+                [1, 2, 999, 1, 2, 3, 999, 1, 3, 1, 4, 4, 5, 6, 7, 1, 4, 5, 117],
+                {"bank": 14, "num_completed": 6, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1], [2, 3]]),
+                [1, 1],
+                {"bank": 1, "num_completed": 1, "finished": False, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                [1, 2, 1, 2],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [1, 2]]),
+                [1, 2, 1, 2, 1],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+            (
+                tensorize([[1, 2], [3, 4]]),
+                [1, 2, 3, 4, 5],
+                {"bank": 4, "num_completed": 2, "finished": True, "is_root": False},
+            ),
+        ]
+
+    def test_sequences(self):
+        for i, (constraints, tokens, expected) in enumerate(self.sequences):
+            state = OrderedConstraintState.create(pack_constraints([constraints])[0])
+            for token in tokens:
+                state = state.advance(token)
+            result = {}
+            for attr in expected.keys():
+                result[attr] = getattr(state, attr)
+            assert (
+                result == expected
+            ), f"TEST({tokens}) GOT: {result} WANTED: {expected}"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_convtbc.py b/fairseq/tests/test_convtbc.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a3c9b91e70f597ab77b9b01459cc429db5d7956
--- /dev/null
+++ b/fairseq/tests/test_convtbc.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+import torch.nn as nn
+from fairseq.modules import ConvTBC
+
+
+class TestConvTBC(unittest.TestCase):
+    def test_convtbc(self):
+        # ksz, in_channels, out_channels
+        conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1)
+        # out_channels, in_channels, ksz
+        conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1)
+
+        conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2))
+        conv_tbc.bias.data.copy_(conv1d.bias.data)
+
+        input_tbc = torch.randn(7, 2, 4, requires_grad=True)
+        input1d = input_tbc.data.transpose(0, 1).transpose(1, 2)
+        input1d.requires_grad = True
+
+        output_tbc = conv_tbc(input_tbc)
+        output1d = conv1d(input1d)
+
+        self.assertAlmostEqual(
+            output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data
+        )
+
+        grad_tbc = torch.randn(output_tbc.size())
+        grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous()
+
+        output_tbc.backward(grad_tbc)
+        output1d.backward(grad1d)
+
+        self.assertAlmostEqual(
+            conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data
+        )
+        self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data)
+        self.assertAlmostEqual(
+            input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data
+        )
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_data_utils.py b/fairseq/tests/test_data_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2acfc8dc184015ad762db154dd9929f4c4043093
--- /dev/null
+++ b/fairseq/tests/test_data_utils.py
@@ -0,0 +1,136 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import numpy as np
+from fairseq.data.data_utils_fast import batch_by_size_fn
+from fairseq.data.data_utils_fast import batch_by_size_vec
+
+
+class TestBatchBySize(unittest.TestCase):
+    @classmethod
+    def batch_by_size_baseline(
+        cls,
+        indices,
+        num_tokens_vec,
+        max_tokens,
+        max_sentences,
+        bsz_mult,
+    ):
+        """Simple, reliable and slow implementation of batch by size """
+        batches = []
+        start = 0
+        while start < len(indices):
+            for end in range(start + 1, len(indices) + 1):
+                max_val = max(num_tokens_vec[pos] for pos in range(start, end))
+                sent_count = end - start
+                num_tokens = max_val * sent_count
+                overflow = num_tokens > max_tokens > 0 or sent_count > max_sentences > 0
+                terminate = overflow or end == len(indices)
+                if overflow:
+                    sent_count -= 1
+                if terminate:
+                    if sent_count > bsz_mult:
+                        sent_count = sent_count - sent_count % bsz_mult
+                    batches.append(indices[start : start + sent_count])
+                    start = start + sent_count
+                    break
+        return batches
+
+    @classmethod
+    def _get_error_message(
+        cls, max_sentences, max_tokens, bsz_mult, num_tokens_vec, validation, results
+    ):
+        return f"""Reference batch_by_size implementation should produce
+                    same output as the baseline method.
+                Params:
+                max_sentences={max_sentences},
+                max_tokens={max_tokens},
+                bsz_mult={bsz_mult},
+                num_tokens_vec={num_tokens_vec},
+                expected_batches={validation},
+                returned_batches={results}"""
+
+    def _compare_results(
+        self,
+        indices_len,
+        batch_by_size_impl,
+        max_sentences,
+        max_tokens,
+        bsz_mult,
+        num_tokens_vec,
+    ):
+        indices = np.array(list(range(indices_len)))
+        validation = self.batch_by_size_baseline(
+            indices,
+            num_tokens_vec,
+            max_tokens=max_tokens,
+            max_sentences=max_sentences,
+            bsz_mult=bsz_mult,
+        )
+        results = batch_by_size_impl(
+            indices,
+            num_tokens_vec,
+            max_tokens=max_tokens,
+            max_sentences=max_sentences,
+            bsz_mult=bsz_mult,
+        )
+        error_msg = self._get_error_message(
+            max_sentences, max_tokens, bsz_mult, num_tokens_vec, validation, results
+        )
+        self.assertEqual(len(validation), len(results), error_msg)
+        for first, second in zip(validation, results):
+            self.assertTrue(np.array_equal(first, second), error_msg)
+
+    def _run_compare_with_baseline_sweep(self, batch_by_size_impl):
+        """Compare reference batch_by_size implementation with batch_by_size_baseline
+        across a dense grid of hyperparam values"""
+        MAX_MAX_TOKENS = 10
+        NUM_TOKENS_VECS_COUNT = 5
+        for indices_len in [10, 11]:  # try odd and even len of indices
+            for max_sentences in range(0, indices_len + 2):
+                for max_tokens in range(0, MAX_MAX_TOKENS):
+                    for bsz_mult in range(1, max(MAX_MAX_TOKENS, indices_len) + 2):
+                        for _ in range(NUM_TOKENS_VECS_COUNT):
+                            num_tokens_vec = np.random.randint(
+                                0, max_tokens + 1, size=indices_len
+                            )
+                            self._compare_results(
+                                indices_len,
+                                batch_by_size_impl,
+                                max_sentences,
+                                max_tokens,
+                                bsz_mult,
+                                num_tokens_vec,
+                            )
+
+
+class TestBatchBySizeVec(TestBatchBySize):
+    def test_compare_with_baseline(self):
+        self._run_compare_with_baseline_sweep(batch_by_size_vec)
+
+
+class TestBatchBySizeFn(TestBatchBySize):
+    def test_compare_with_baseline(self):
+        def batch_by_size_fn_wrapper(
+            indices,
+            num_tokens_vec,
+            max_tokens,
+            max_sentences,
+            bsz_mult,
+        ):
+            def num_tokens_fn(idx):
+                return num_tokens_vec[idx]
+
+            return batch_by_size_fn(
+                indices, num_tokens_fn, max_tokens, max_sentences, bsz_mult
+            )
+
+        self._run_compare_with_baseline_sweep(batch_by_size_fn_wrapper)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_dataclass_utils.py b/fairseq/tests/test_dataclass_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..45fc391a979feb198b0a4ecea69c31f1340e87d2
--- /dev/null
+++ b/fairseq/tests/test_dataclass_utils.py
@@ -0,0 +1,87 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from argparse import ArgumentParser
+from dataclasses import dataclass, field
+
+from fairseq.dataclass import FairseqDataclass
+from fairseq.dataclass.utils import gen_parser_from_dataclass
+
+
+@dataclass
+class A(FairseqDataclass):
+    data: str = field(default="test", metadata={"help": "the data input"})
+    num_layers: int = field(default=200, metadata={"help": "more layers is better?"})
+
+
+@dataclass
+class B(FairseqDataclass):
+    bar: A = field(default=A())
+    foo: int = field(default=0, metadata={"help": "not a bar"})
+
+
+@dataclass
+class D(FairseqDataclass):
+    arch: A = field(default=A())
+    foo: int = field(default=0, metadata={"help": "not a bar"})
+
+
+@dataclass
+class C(FairseqDataclass):
+    data: str = field(default="test", metadata={"help": "root level data input"})
+    encoder: D = field(default=D())
+    decoder: A = field(default=A())
+    lr: int = field(default=0, metadata={"help": "learning rate"})
+
+
+class TestDataclassUtils(unittest.TestCase):
+    def test_argparse_convert_basic(self):
+        parser = ArgumentParser()
+        gen_parser_from_dataclass(parser, A(), True)
+        args = parser.parse_args(["--num-layers", '10', "the/data/path"])
+        self.assertEqual(args.num_layers, 10)
+        self.assertEqual(args.data, "the/data/path")
+
+    def test_argparse_recursive(self):
+        parser = ArgumentParser()
+        gen_parser_from_dataclass(parser, B(), True)
+        args = parser.parse_args(["--num-layers", "10", "--foo", "10", "the/data/path"])
+        self.assertEqual(args.num_layers, 10)
+        self.assertEqual(args.foo, 10)
+        self.assertEqual(args.data, "the/data/path")
+
+    def test_argparse_recursive_prefixing(self):
+        self.maxDiff = None
+        parser = ArgumentParser()
+        gen_parser_from_dataclass(parser, C(), True, "")
+        args = parser.parse_args(
+            [
+                "--encoder-arch-data",
+                "ENCODER_ARCH_DATA",
+                "--encoder-arch-num-layers",
+                "10",
+                "--encoder-foo",
+                "10",
+                "--decoder-data",
+                "DECODER_DATA",
+                "--decoder-num-layers",
+                "10",
+                "--lr",
+                "10",
+                "the/data/path",
+            ]
+        )
+        self.assertEqual(args.encoder_arch_data, "ENCODER_ARCH_DATA")
+        self.assertEqual(args.encoder_arch_num_layers, 10)
+        self.assertEqual(args.encoder_foo, 10)
+        self.assertEqual(args.decoder_data, "DECODER_DATA")
+        self.assertEqual(args.decoder_num_layers, 10)
+        self.assertEqual(args.lr, 10)
+        self.assertEqual(args.data, "the/data/path")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_dataset.py b/fairseq/tests/test_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3e3970028bc4b0259153e403951e1735bb0cd3e
--- /dev/null
+++ b/fairseq/tests/test_dataset.py
@@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import unittest
+from typing import Sequence
+
+from fairseq.data import LanguagePairDataset, ListDataset, RoundRobinZipDatasets
+from tests.test_train import mock_dict
+
+
+def lang_pair_dataset(lengths: Sequence[int]) -> LanguagePairDataset:
+    tokens = [[i] * l for i, l in enumerate(lengths)]
+    return LanguagePairDataset(ListDataset(tokens), lengths, mock_dict())
+
+
+def sample(id: int, length: int):
+    return {"id": id, "source": [id] * length, "target": None}
+
+
+class TestDataset(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_round_robin_zip_datasets(self):
+        long_dataset = lang_pair_dataset([10, 9, 8, 11])
+        short_dataset = lang_pair_dataset([11, 9])
+
+        dataset = RoundRobinZipDatasets({"a": long_dataset, "b": short_dataset})
+        # Dataset is now sorted by sentence length
+        dataset.ordered_indices()
+        assert dataset.longest_dataset is long_dataset
+        self.assertEqual(dict(dataset[0]), {"a": sample(2, 8), "b": sample(1, 9)})
+        # The item 2 of dataset 'a' is with item (2 % 2 = 0) of dataset 'b'
+        self.assertEqual(dict(dataset[2]), {"a": sample(0, 10), "b": sample(1, 9)})
+
+    def test_round_robin_zip_datasets_filtered(self):
+        long_dataset = lang_pair_dataset([10, 20, 8, 11, 1000, 7, 12])
+        short_dataset = lang_pair_dataset([11, 20, 9, 1000])
+
+        dataset = RoundRobinZipDatasets({"a": long_dataset, "b": short_dataset})
+        # Dataset is now sorted by sentence length
+        idx = dataset.ordered_indices()
+        idx, _ = dataset.filter_indices_by_size(idx, {"a": 19, "b": 900})
+        self.assertEqual(list(idx), [0, 1, 2, 3, 4])
+        self.assertEqual(dict(dataset[0]), {"a": sample(5, 7), "b": sample(2, 9)})
+        self.assertEqual(dict(dataset[2]), {"a": sample(0, 10), "b": sample(1, 20)})
+        self.assertEqual(dict(dataset[4]), {"a": sample(6, 12), "b": sample(0, 11)})
+
+    def test_round_robin_zip_datasets_filtered_with_tuple(self):
+        long_dataset = lang_pair_dataset([10, 20, 8, 11, 1000, 7, 12])
+        short_dataset = lang_pair_dataset([11, 20, 9, 1000])
+
+        dataset = RoundRobinZipDatasets({"a": long_dataset, "b": short_dataset})
+        # Dataset is now sorted by sentence length
+        idx = dataset.ordered_indices()
+        idx, _ = dataset.filter_indices_by_size(idx, 19)
+        self.assertEqual(list(idx), [0, 1, 2, 3, 4])
+        self.assertEqual(dict(dataset[0]), {"a": sample(5, 7), "b": sample(2, 9)})
+        self.assertEqual(dict(dataset[2]), {"a": sample(0, 10), "b": sample(2, 9)})
+        self.assertEqual(dict(dataset[4]), {"a": sample(6, 12), "b": sample(2, 9)})
diff --git a/fairseq/tests/test_dictionary.py b/fairseq/tests/test_dictionary.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc9d71b3c722ce3066e182d4b237b2a72999d4d0
--- /dev/null
+++ b/fairseq/tests/test_dictionary.py
@@ -0,0 +1,145 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import io
+import os
+import string
+import tempfile
+import unittest
+
+import torch
+from fairseq import tokenizer
+from fairseq.data import Dictionary
+
+
+class TestDictionary(unittest.TestCase):
+    def test_finalize(self):
+        txt = [
+            "A B C D",
+            "B C D",
+            "C D",
+            "D",
+        ]
+        ref_ids1 = list(
+            map(
+                torch.IntTensor,
+                [
+                    [4, 5, 6, 7, 2],
+                    [5, 6, 7, 2],
+                    [6, 7, 2],
+                    [7, 2],
+                ],
+            )
+        )
+        ref_ids2 = list(
+            map(
+                torch.IntTensor,
+                [
+                    [7, 6, 5, 4, 2],
+                    [6, 5, 4, 2],
+                    [5, 4, 2],
+                    [4, 2],
+                ],
+            )
+        )
+
+        # build dictionary
+        d = Dictionary()
+        for line in txt:
+            d.encode_line(line, add_if_not_exist=True)
+
+        def get_ids(dictionary):
+            ids = []
+            for line in txt:
+                ids.append(dictionary.encode_line(line, add_if_not_exist=False))
+            return ids
+
+        def assertMatch(ids, ref_ids):
+            for toks, ref_toks in zip(ids, ref_ids):
+                self.assertEqual(toks.size(), ref_toks.size())
+                self.assertEqual(0, (toks != ref_toks).sum().item())
+
+        ids = get_ids(d)
+        assertMatch(ids, ref_ids1)
+
+        # check finalized dictionary
+        d.finalize()
+        finalized_ids = get_ids(d)
+        assertMatch(finalized_ids, ref_ids2)
+
+        # write to disk and reload
+        with tempfile.NamedTemporaryFile(mode="w") as tmp_dict:
+            d.save(tmp_dict.name)
+            d = Dictionary.load(tmp_dict.name)
+            reload_ids = get_ids(d)
+            assertMatch(reload_ids, ref_ids2)
+            assertMatch(finalized_ids, reload_ids)
+
+    def test_overwrite(self):
+        # for example, Camembert overwrites <unk>, <s> and </s>
+        dict_file = io.StringIO(
+            "<unk> 999 #fairseq:overwrite\n"
+            "<s> 999 #fairseq:overwrite\n"
+            "</s> 999 #fairseq:overwrite\n"
+            ", 999\n"
+            "▁de 999\n"
+        )
+        d = Dictionary()
+        d.add_from_file(dict_file)
+        self.assertEqual(d.index("<pad>"), 1)
+        self.assertEqual(d.index("foo"), 3)
+        self.assertEqual(d.index("<unk>"), 4)
+        self.assertEqual(d.index("<s>"), 5)
+        self.assertEqual(d.index("</s>"), 6)
+        self.assertEqual(d.index(","), 7)
+        self.assertEqual(d.index("▁de"), 8)
+
+    def test_no_overwrite(self):
+        # for example, Camembert overwrites <unk>, <s> and </s>
+        dict_file = io.StringIO(
+            "<unk> 999\n" "<s> 999\n" "</s> 999\n" ", 999\n" "▁de 999\n"
+        )
+        d = Dictionary()
+        with self.assertRaisesRegex(RuntimeError, "Duplicate"):
+            d.add_from_file(dict_file)
+
+    def test_space(self):
+        # for example, character models treat space as a symbol
+        dict_file = io.StringIO("  999\n" "a 999\n" "b 999\n")
+        d = Dictionary()
+        d.add_from_file(dict_file)
+        self.assertEqual(d.index(" "), 4)
+        self.assertEqual(d.index("a"), 5)
+        self.assertEqual(d.index("b"), 6)
+
+    def test_add_file_to_dict(self):
+        counts = {}
+        num_lines = 100
+        per_line = 10
+        with tempfile.TemporaryDirectory("test_sampling") as data_dir:
+            filename = os.path.join(data_dir, "dummy.txt")
+            with open(filename, "w", encoding="utf-8") as data:
+                for c in string.ascii_letters:
+                    line = f"{c} " * per_line
+                    for _ in range(num_lines):
+                        data.write(f"{line}\n")
+                    counts[c] = per_line * num_lines
+                    per_line += 5
+
+            dict = Dictionary()
+            Dictionary.add_file_to_dictionary(
+                filename, dict, tokenizer.tokenize_line, 10
+            )
+            dict.finalize(threshold=0, nwords=-1, padding_factor=8)
+
+            for c in string.ascii_letters:
+                count = dict.get_count(dict.index(c))
+                self.assertEqual(
+                    counts[c], count, f"{c} count is {count} but should be {counts[c]}"
+                )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_ema.py b/fairseq/tests/test_ema.py
new file mode 100644
index 0000000000000000000000000000000000000000..88ea65a434e49775d40f2b08ce6df0f8d9929c18
--- /dev/null
+++ b/fairseq/tests/test_ema.py
@@ -0,0 +1,199 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Optional
+
+import torch
+from fairseq.models.ema import EMA
+
+
+class DummyModule(torch.nn.Module):
+    def __init__(self) -> None:
+        """LightningModule for testing purposes
+
+        Args:
+            epoch_min_loss_override (int, optional): Pass in an epoch that will be set to the minimum
+                validation loss for testing purposes (zero based). If None this is ignored. Defaults to None.
+        """
+        super().__init__()
+        self.layer = torch.nn.Linear(in_features=32, out_features=2)
+        self.another_layer = torch.nn.Linear(in_features=2, out_features=2)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.layer(x)
+        return self.another_layer(x)
+
+
+@dataclass
+class EMAConfig(object):
+    ema_decay: float = 0.99
+    ema_start_update: int = 0
+    ema_fp32: bool = False
+    ema_seed_model: Optional[str] = None
+
+
+class TestEMAGPU(unittest.TestCase):
+    def assertTorchAllClose(self, x, y, atol=1e-8, rtol=1e-5, msg=None):
+        diff = x.float() - y.float()
+        diff_norm = torch.norm(diff)
+        other_norm = torch.norm(y.float())
+
+        if msg is None:
+            msg = "|input - other| > {} + {} * |other|".format(
+                atol, rtol
+            )
+
+        self.assertLessEqual(
+            diff_norm,
+            atol + rtol * other_norm,
+            msg=msg,
+        )
+
+    def test_ema(self):
+        model = DummyModule()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        state = deepcopy(model.state_dict())
+        config = EMAConfig()
+        ema = EMA(model, config)
+
+        # set decay
+        ema._set_decay(config.ema_decay)
+        self.assertEqual(ema.get_decay(), config.ema_decay)
+
+        # get model
+        self.assertEqual(ema.get_model(), ema.model)
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+        # EMA step
+        x = torch.randn(32)
+        y = model(x)
+        loss = y.sum()
+        loss.backward()
+        optimizer.step()
+
+        ema.step(model)
+
+        ema_state_dict = ema.get_model().state_dict()
+
+        for key, param in model.state_dict().items():
+            prev_param = state[key]
+            ema_param = ema_state_dict[key]
+
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+            self.assertTorchAllClose(
+                ema_param,
+                config.ema_decay * prev_param + (1 - config.ema_decay) * param,
+            )
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+        # Load EMA into model
+        model2 = DummyModule()
+        ema.reverse(model2)
+
+        for key, param in model2.state_dict().items():
+            ema_param = ema_state_dict[key]
+            self.assertTrue(
+                torch.allclose(ema_param, param)
+            )
+
+    def test_ema_fp32(self):
+        model = DummyModule().half()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        state = deepcopy(model.state_dict())
+        config = EMAConfig(ema_fp32=True)
+        ema = EMA(model, config)
+
+        x = torch.randn(32)
+        y = model(x.half())
+        loss = y.sum()
+        loss.backward()
+        optimizer.step()
+
+        ema.step(model)
+
+        for key, param in model.state_dict().items():
+            prev_param = state[key]
+            ema_param = ema.get_model().state_dict()[key]
+
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+            self.assertIn(key, ema.fp32_params)
+
+            # EMA update is done in fp32, and hence the EMA param must be
+            # closer to the EMA update done in fp32 than in fp16.
+            self.assertLessEqual(
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param.float() + (1 - config.ema_decay) * param.float()).half().float()
+                ),
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param + (1 - config.ema_decay) * param).float()
+                ),
+            )
+            self.assertTorchAllClose(
+                ema_param,
+                (config.ema_decay * prev_param.float() + (1 - config.ema_decay) * param.float()).half(),
+            )
+
+    def test_ema_fp16(self):
+        model = DummyModule().half()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        state = deepcopy(model.state_dict())
+        config = EMAConfig(ema_fp32=False)
+        ema = EMA(model, config)
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+        x = torch.randn(32)
+        y = model(x.half())
+        loss = y.sum()
+        loss.backward()
+        optimizer.step()
+
+        ema.step(model)
+
+        for key, param in model.state_dict().items():
+            prev_param = state[key]
+            ema_param = ema.get_model().state_dict()[key]
+
+            if "version" in key:
+                # Do not decay a model.version pytorch param
+                continue
+
+            # EMA update is done in fp16, and hence the EMA param must be
+            # closer to the EMA update done in fp16 than in fp32.
+            self.assertLessEqual(
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param + (1 - config.ema_decay) * param).float()
+                ),
+                torch.norm(
+                    ema_param.float() -
+                    (config.ema_decay * prev_param.float() + (1 - config.ema_decay) * param.float()).half().float()
+                ),
+            )
+            self.assertTorchAllClose(
+                ema_param,
+                config.ema_decay * prev_param + (1 - config.ema_decay) * param,
+            )
+
+        # Since fp32 params is not used, it should be of size 0
+        self.assertEqual(len(ema.fp32_params), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_export.py b/fairseq/tests/test_export.py
new file mode 100644
index 0000000000000000000000000000000000000000..b380697b9aff8799f90c1e0819e408826ecf2932
--- /dev/null
+++ b/fairseq/tests/test_export.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import tempfile
+import unittest
+
+import torch
+from fairseq.data.dictionary import Dictionary
+from fairseq.models.transformer import TransformerModel
+from fairseq.modules import multihead_attention, sinusoidal_positional_embedding
+from fairseq.tasks.fairseq_task import LegacyFairseqTask
+
+
+DEFAULT_TEST_VOCAB_SIZE = 100
+
+
+class DummyTask(LegacyFairseqTask):
+    def __init__(self, args):
+        super().__init__(args)
+        self.dictionary = get_dummy_dictionary()
+        if getattr(self.args, "ctc", False):
+            self.dictionary.add_symbol("<ctc_blank>")
+        self.src_dict = self.dictionary
+        self.tgt_dict = self.dictionary
+
+    @property
+    def source_dictionary(self):
+        return self.src_dict
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE):
+    dummy_dict = Dictionary()
+    # add dummy symbol to satisfy vocab size
+    for id, _ in enumerate(range(vocab_size)):
+        dummy_dict.add_symbol("{}".format(id), 1000)
+    return dummy_dict
+
+
+def get_dummy_task_and_parser():
+    """
+    Return a dummy task and argument parser, which can be used to
+    create a model/criterion.
+    """
+    parser = argparse.ArgumentParser(
+        description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS
+    )
+    DummyTask.add_args(parser)
+    args = parser.parse_args([])
+    task = DummyTask.setup_task(args)
+    return task, parser
+
+
+def _test_save_and_load(scripted_module):
+    with tempfile.NamedTemporaryFile() as f:
+        scripted_module.save(f.name)
+        torch.jit.load(f.name)
+
+
+class TestExportModels(unittest.TestCase):
+    def test_export_multihead_attention(self):
+        module = multihead_attention.MultiheadAttention(embed_dim=8, num_heads=2)
+        scripted = torch.jit.script(module)
+        _test_save_and_load(scripted)
+
+    def test_incremental_state_multihead_attention(self):
+        module1 = multihead_attention.MultiheadAttention(embed_dim=8, num_heads=2)
+        module1 = torch.jit.script(module1)
+        module2 = multihead_attention.MultiheadAttention(embed_dim=8, num_heads=2)
+        module2 = torch.jit.script(module2)
+
+        state = {}
+        state = module1.set_incremental_state(state, "key", {"a": torch.tensor([1])})
+        state = module2.set_incremental_state(state, "key", {"a": torch.tensor([2])})
+        v1 = module1.get_incremental_state(state, "key")["a"]
+        v2 = module2.get_incremental_state(state, "key")["a"]
+
+        self.assertEqual(v1, 1)
+        self.assertEqual(v2, 2)
+
+    def test_positional_embedding(self):
+        module = sinusoidal_positional_embedding.SinusoidalPositionalEmbedding(
+            embedding_dim=8, padding_idx=1
+        )
+        scripted = torch.jit.script(module)
+        _test_save_and_load(scripted)
+
+    @unittest.skipIf(
+        torch.__version__ < "1.6.0", "Targeting OSS scriptability for the 1.6 release"
+    )
+    def test_export_transformer(self):
+        task, parser = get_dummy_task_and_parser()
+        TransformerModel.add_args(parser)
+        args = parser.parse_args([])
+        model = TransformerModel.build_model(args, task)
+        scripted = torch.jit.script(model)
+        _test_save_and_load(scripted)
+
+    @unittest.skipIf(
+        torch.__version__ < "1.6.0", "Targeting OSS scriptability for the 1.6 release"
+    )
+    def test_export_transformer_no_token_pos_emb(self):
+        task, parser = get_dummy_task_and_parser()
+        TransformerModel.add_args(parser)
+        args = parser.parse_args([])
+        args.no_token_positional_embeddings = True
+        model = TransformerModel.build_model(args, task)
+        scripted = torch.jit.script(model)
+        _test_save_and_load(scripted)
+
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_file_chunker_utils.py b/fairseq/tests/test_file_chunker_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cded04572f0ab68c81db9ad14de1c18951a1a10
--- /dev/null
+++ b/fairseq/tests/test_file_chunker_utils.py
@@ -0,0 +1,63 @@
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import shutil
+import tempfile
+import unittest
+from typing import Optional
+
+
+class TestFileChunker(unittest.TestCase):
+    _tmpdir: Optional[str] = None
+    _tmpfile: Optional[str] = None
+    _line_content = "Hello, World\n"
+    _num_bytes = None
+    _num_lines = 200
+    _num_splits = 20
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls._num_bytes = len(cls._line_content.encode("utf-8"))
+        cls._tmpdir = tempfile.mkdtemp()
+        with open(os.path.join(cls._tmpdir, "test.txt"), "w") as f:
+            cls._tmpfile = f.name
+            for _i in range(cls._num_lines):
+                f.write(cls._line_content)
+            f.flush()
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        # Cleanup temp working dir.
+        if cls._tmpdir is not None:
+            shutil.rmtree(cls._tmpdir)  # type: ignore
+
+    def test_find_offsets(self):
+        from fairseq.file_chunker_utils import find_offsets
+
+        offsets = find_offsets(self._tmpfile, self._num_splits)
+        self.assertEqual(len(offsets), self._num_splits + 1)
+        (zero, *real_offsets, last) = offsets
+        self.assertEqual(zero, 0)
+        for i, o in enumerate(real_offsets):
+            self.assertEqual(
+                o,
+                self._num_bytes
+                + ((i + 1) * self._num_bytes * self._num_lines / self._num_splits),
+            )
+        self.assertEqual(last, self._num_bytes * self._num_lines)
+
+    def test_readchunks(self):
+        from fairseq.file_chunker_utils import Chunker, find_offsets
+
+        offsets = find_offsets(self._tmpfile, self._num_splits)
+        for start, end in zip(offsets, offsets[1:]):
+            with Chunker(self._tmpfile, start, end) as lines:
+                all_lines = list(lines)
+                num_lines = self._num_lines / self._num_splits
+                self.assertAlmostEqual(
+                    len(all_lines), num_lines, delta=1
+                )  # because we split on the bites, we might end up with one more/less line in a chunk
+                self.assertListEqual(
+                    all_lines, [self._line_content for _ in range(len(all_lines))]
+                )
diff --git a/fairseq/tests/test_file_io.py b/fairseq/tests/test_file_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..425812bf1672489093941e5fa09f9da3171559ee
--- /dev/null
+++ b/fairseq/tests/test_file_io.py
@@ -0,0 +1,58 @@
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+from typing import Optional
+from unittest.mock import MagicMock
+
+
+class TestFileIO(unittest.TestCase):
+
+    _tmpdir: Optional[str] = None
+    _tmpfile: Optional[str] = None
+    _tmpfile_contents = "Hello, World"
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls._tmpdir = tempfile.mkdtemp()
+        with open(os.path.join(cls._tmpdir, "test.txt"), "w") as f:
+            cls._tmpfile = f.name
+            f.write(cls._tmpfile_contents)
+            f.flush()
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        # Cleanup temp working dir.
+        if cls._tmpdir is not None:
+            shutil.rmtree(cls._tmpdir)  # type: ignore
+
+    def test_file_io(self):
+        from fairseq.file_io import PathManager
+
+        with PathManager.open(os.path.join(self._tmpdir, "test.txt"), "r") as f:
+            s = f.read()
+        self.assertEqual(s, self._tmpfile_contents)
+
+    def test_file_io_oss(self):
+        # Mock iopath to simulate oss environment.
+        sys.modules["iopath"] = MagicMock()
+        from fairseq.file_io import PathManager
+
+        with PathManager.open(os.path.join(self._tmpdir, "test.txt"), "r") as f:
+            s = f.read()
+        self.assertEqual(s, self._tmpfile_contents)
+
+    def test_file_io_async(self):
+        # ioPath `PathManager` is initialized after the first `opena` call.
+        try:
+            from fairseq.file_io import IOPathManager, PathManager
+            _asyncfile = os.path.join(self._tmpdir, "async.txt")
+            f = PathManager.opena(_asyncfile, "wb")
+            f.close()
+
+        finally:
+            self.assertTrue(PathManager.async_close())
diff --git a/fairseq/tests/test_fp16_optimizer.py b/fairseq/tests/test_fp16_optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce4f1c055ce68b8e3933636fae66cca73c5e9d18
--- /dev/null
+++ b/fairseq/tests/test_fp16_optimizer.py
@@ -0,0 +1,112 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import copy
+import logging
+import unittest
+
+import torch
+from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer
+from omegaconf import OmegaConf
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestGradientScaling(unittest.TestCase):
+    def setUp(self):
+        self.x = torch.tensor([2.0]).cuda().half()
+        weight = 3.0
+        bias = 5.0
+        self.error = 1.0
+        self.target = torch.tensor([self.x * weight + bias + self.error]).cuda().half()
+        self.loss_fn = torch.nn.L1Loss()
+
+        self.model = torch.nn.Linear(1, 1)
+        self.model.weight.data = torch.tensor([[weight]])
+        self.model.bias.data = torch.tensor([bias])
+        self.model.cuda().half()
+        self.params = list(self.model.parameters())
+
+        self.cfg_dls = OmegaConf.create(
+            {
+                "optimization": {
+                    "lr": [0.1],
+                },
+                "optimizer": {
+                    "_name": "adam",
+                    "lr": [0.1],
+                    "adam_betas": "(0.9, 0.999)",
+                    "adam_eps": 1e-8,
+                    "weight_decay": 0.0,
+                },
+                "common": {
+                    "fp16_init_scale": 1,
+                    "fp16_scale_window": 1,
+                    "fp16_scale_tolerance": 1,
+                    "threshold_loss_scale": 1,
+                    "min_loss_scale": 1e-4,
+                    "tpu": False,
+                },
+            }
+        )
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def run_iter(self, model, params, optimizer):
+        optimizer.zero_grad()
+        y = model(self.x)
+        loss = self.loss_fn(y, self.target)
+        optimizer.backward(loss)
+        self.assertEqual(loss, torch.tensor(1.0, device="cuda:0", dtype=torch.float16))
+
+        grad_norm = optimizer.clip_grad_norm(0)
+        self.assertAlmostEqual(grad_norm.item(), 2.2361, 4)
+
+        optimizer.step()
+        self.assertEqual(
+            model.weight,
+            torch.tensor(
+                [[3.0996]], device="cuda:0", dtype=torch.float16, requires_grad=True
+            ),
+        )
+        self.assertEqual(
+            model.bias,
+            torch.tensor(
+                [5.1016], device="cuda:0", dtype=torch.float16, requires_grad=True
+            ),
+        )
+        self.assertEqual(optimizer.scaler.loss_scale, 2.0)
+
+    def test_mixed_precision(self):
+        model = copy.deepcopy(self.model)
+        params = list(model.parameters())
+        optimizer = FP16Optimizer.build_optimizer(self.cfg_dls, params)
+
+        self.run_iter(model, params, optimizer)
+        self.assertTrue(
+            all(
+                torch.all(
+                    fp32_params.eq(
+                        torch.tensor(
+                            [3.1000, 5.1000], device="cuda:0", requires_grad=True
+                        )
+                    )
+                )
+                for fp32_params in optimizer.fp32_params.values()
+            )
+        )
+
+    def test_memory_efficient(self):
+        model = copy.deepcopy(self.model)
+        params = list(model.parameters())
+        optimizer = MemoryEfficientFP16Optimizer.build_optimizer(self.cfg_dls, params)
+
+        self.run_iter(model, params, optimizer)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_huffman.py b/fairseq/tests/test_huffman.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8cd5222b468b2dbf22f13c9dd34c484a0c30205
--- /dev/null
+++ b/fairseq/tests/test_huffman.py
@@ -0,0 +1,201 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import random
+import string
+import typing as tp
+import unittest
+from collections import Counter
+from tempfile import NamedTemporaryFile, TemporaryDirectory
+
+from fairseq.data import Dictionary, indexed_dataset
+from fairseq.data.huffman import (
+    HuffmanCodeBuilder,
+    HuffmanCoder,
+    HuffmanMMapIndexedDataset,
+    HuffmanMMapIndexedDatasetBuilder,
+)
+
+POPULATION = string.ascii_letters + string.digits
+
+
+def make_sentence() -> tp.List[str]:
+    length = random.randint(10, 50)
+    return random.choices(
+        population=POPULATION, k=length, weights=range(1, len(POPULATION) + 1)
+    )
+
+
+def make_data(length=1000) -> tp.List[tp.List[str]]:
+    return (
+        [make_sentence() for _ in range(0, length)]
+        # add all the symbols at least once
+        + [list(string.ascii_letters), list(string.digits)]
+    )
+
+
+def make_counts(data: tp.List[tp.List[str]]) -> Counter:
+    return Counter([symbol for sentence in data for symbol in sentence])
+
+
+def make_code_builder(data: tp.List[tp.List[str]]) -> HuffmanCodeBuilder:
+    builder = HuffmanCodeBuilder()
+    for sentence in data:
+        builder.add_symbols(*sentence)
+    return builder
+
+
+class TestCodeBuilder(unittest.TestCase):
+    def test_code_builder_can_count(self):
+        data = make_data()
+        counts = make_counts(data)
+        builder = make_code_builder(data)
+
+        self.assertEqual(builder.symbols, counts)
+
+    def test_code_builder_can_add(self):
+        data = make_data()
+        counts = make_counts(data)
+        builder = make_code_builder(data)
+
+        new_builder = builder + builder
+
+        self.assertEqual(new_builder.symbols, counts + counts)
+
+    def test_code_builder_can_io(self):
+        data = make_data()
+        builder = make_code_builder(data)
+
+        with NamedTemporaryFile() as tmp_fp:
+            builder.to_file(tmp_fp.name)
+            other_builder = HuffmanCodeBuilder.from_file(tmp_fp.name)
+
+            self.assertEqual(builder.symbols, other_builder.symbols)
+
+
+class TestCoder(unittest.TestCase):
+    def test_coder_can_io(self):
+        data = make_data()
+        builder = make_code_builder(data)
+        coder = builder.build_code()
+
+        with NamedTemporaryFile() as tmp_fp:
+            coder.to_file(tmp_fp.name)
+            other_coder = HuffmanCoder.from_file(tmp_fp.name)
+
+            self.assertEqual(coder, other_coder)
+
+    def test_coder_can_encode_decode(self):
+        data = make_data()
+        builder = make_code_builder(data)
+        coder = builder.build_code()
+
+        encoded = [coder.encode(sentence) for sentence in data]
+        decoded = [[n.symbol for n in coder.decode(enc)] for enc in encoded]
+
+        self.assertEqual(decoded, data)
+
+        unseen_data = make_data()
+        unseen_encoded = [coder.encode(sentence) for sentence in unseen_data]
+        unseen_decoded = [
+            [n.symbol for n in coder.decode(enc)] for enc in unseen_encoded
+        ]
+        self.assertEqual(unseen_decoded, unseen_data)
+
+
+def build_dataset(prefix, data, coder):
+    with HuffmanMMapIndexedDatasetBuilder(prefix, coder) as builder:
+        for sentence in data:
+            builder.add_item(sentence)
+
+
+def sizes(data):
+    return [len(sentence) for sentence in data]
+
+
+class TestHuffmanDataset(unittest.TestCase):
+    def test_huffman_can_encode_decode(self):
+        data = make_data()
+        builder = make_code_builder(data)
+        coder = builder.build_code()
+
+        with TemporaryDirectory() as dirname:
+            prefix = os.path.join(dirname, "test1")
+            build_dataset(prefix, data, coder)
+            dataset = HuffmanMMapIndexedDataset(prefix)
+
+            self.assertEqual(len(dataset), len(data))
+            decoded = [list(dataset.get_symbols(i)) for i in range(0, len(dataset))]
+
+            self.assertEqual(decoded, data)
+            data_sizes = [i.item() for i in dataset.sizes]
+            self.assertEqual(data_sizes, sizes(data))
+
+    def test_huffman_compresses(self):
+        data = make_data()
+        builder = make_code_builder(data)
+        coder = builder.build_code()
+
+        with TemporaryDirectory() as dirname:
+            prefix = os.path.join(dirname, "huffman")
+            build_dataset(prefix, data, coder)
+
+            prefix_mmap = os.path.join(dirname, "mmap")
+            mmap_builder = indexed_dataset.make_builder(
+                indexed_dataset.data_file_path(prefix_mmap),
+                "mmap",
+                vocab_size=len(POPULATION),
+            )
+            dictionary = Dictionary()
+            for c in POPULATION:
+                dictionary.add_symbol(c)
+            dictionary.finalize()
+            for sentence in data:
+                mmap_builder.add_item(dictionary.encode_line(" ".join(sentence)))
+            mmap_builder.finalize(indexed_dataset.index_file_path(prefix_mmap))
+
+            huff_size = os.stat(indexed_dataset.data_file_path(prefix)).st_size
+            mmap_size = os.stat(indexed_dataset.data_file_path(prefix_mmap)).st_size
+            self.assertLess(huff_size, mmap_size)
+
+    def test_huffman_can_append(self):
+        data1 = make_data()
+        builder = make_code_builder(data1)
+        coder = builder.build_code()
+
+        with TemporaryDirectory() as dirname:
+            prefix1 = os.path.join(dirname, "test1")
+            build_dataset(prefix1, data1, coder)
+
+            data2 = make_data()
+            prefix2 = os.path.join(dirname, "test2")
+            build_dataset(prefix2, data2, coder)
+
+            prefix3 = os.path.join(dirname, "test3")
+
+            with HuffmanMMapIndexedDatasetBuilder(prefix3, coder) as builder:
+                builder.append(prefix1)
+                builder.append(prefix2)
+
+            dataset = HuffmanMMapIndexedDataset(prefix3)
+
+            self.assertEqual(len(dataset), len(data1) + len(data2))
+
+            decoded1 = [list(dataset.get_symbols(i)) for i in range(0, len(data1))]
+            self.assertEqual(decoded1, data1)
+
+            decoded2 = [
+                list(dataset.get_symbols(i)) for i in range(len(data1), len(dataset))
+            ]
+            self.assertEqual(decoded2, data2)
+
+            data_sizes = [i.item() for i in dataset.sizes]
+            self.assertEqual(data_sizes[: len(data1)], sizes(data1))
+            self.assertEqual(data_sizes[len(data1) : len(dataset)], sizes(data2))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_inference_dropout.py b/fairseq/tests/test_inference_dropout.py
new file mode 100644
index 0000000000000000000000000000000000000000..353ac674780a9795492c75aa0a7bc0677b07a9c9
--- /dev/null
+++ b/fairseq/tests/test_inference_dropout.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import unittest
+
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.models.transformer import TransformerModel
+from tests.test_sequence_generator import get_dummy_task_and_parser
+
+
+class TestInferenceDropout(unittest.TestCase):
+    def setUp(self):
+        self.task, self.parser = get_dummy_task_and_parser()
+        TransformerModel.add_args(self.parser)
+        self.args = self.parser.parse_args([])
+        self.args.encoder_layers = 2
+        self.args.decoder_layers = 1
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_sets_inference_dropout_to_true(self):
+        self.args.retain_dropout = True
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        cfg = convert_namespace_to_omegaconf(self.args)
+        self.transformer_model.prepare_for_inference_(cfg)
+        assert self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.encoder.layers:
+            assert layer.dropout_module.apply_during_inference
+
+    def test_inference_dropout_false_by_default(self):
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        cfg = convert_namespace_to_omegaconf(self.args)
+        self.transformer_model.prepare_for_inference_(cfg)
+        assert not self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert not self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.encoder.layers:
+            assert not layer.dropout_module.apply_during_inference
+        for layer in self.transformer_model.decoder.layers:
+            assert not layer.dropout_module.apply_during_inference
+
+    def test_applies_training_mode(self):
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        assert self.transformer_model.encoder.dropout_module.training
+        for layer in self.transformer_model.encoder.layers:
+            assert layer.dropout_module.training
+
+        self.transformer_model.eval()
+        assert not self.transformer_model.decoder.dropout_module.training
+        for layer in self.transformer_model.encoder.layers:
+            assert not layer.dropout_module.training
+
+    def test_retain_modules(self):
+        self.args.retain_dropout = True
+        self.args.retain_dropout_modules = [
+            "TransformerEncoder",
+            "TransformerEncoderLayer",
+        ]
+        self.transformer_model = TransformerModel.build_model(self.args, self.task)
+        cfg = convert_namespace_to_omegaconf(self.args)
+        self.transformer_model.prepare_for_inference_(cfg)
+        assert self.transformer_model.encoder.dropout_module.apply_during_inference
+        assert not self.transformer_model.decoder.dropout_module.apply_during_inference
+        for layer in self.transformer_model.decoder.layers:
+            assert not layer.dropout_module.apply_during_inference
diff --git a/fairseq/tests/test_iopath.py b/fairseq/tests/test_iopath.py
new file mode 100644
index 0000000000000000000000000000000000000000..908261a6619806f7ef9b5dd1beb5d6817b249a6e
--- /dev/null
+++ b/fairseq/tests/test_iopath.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from unittest import mock
+
+
+class TestIOPath(unittest.TestCase):
+
+    def test_no_iopath(self):
+        from .test_reproducibility import TestReproducibility
+
+        with mock.patch.dict("sys.modules", {"iopath": None}):
+            # reuse reproducibility tests, which are e2e tests that should cover
+            # most checkpoint related functionality
+            TestReproducibility._test_reproducibility(self, "test_reproducibility")
+
+    def test_no_supports_rename(self):
+        from .test_reproducibility import TestReproducibility
+
+        with mock.patch("fairseq.file_io.PathManager.supports_rename") as mock_fn:
+            mock_fn.return_value = False
+            TestReproducibility._test_reproducibility(self, "test_reproducibility")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_iterators.py b/fairseq/tests/test_iterators.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b3dd4848553357e5e8326ed3a31cf5d68ceea94
--- /dev/null
+++ b/fairseq/tests/test_iterators.py
@@ -0,0 +1,137 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+from fairseq.data import iterators
+
+
+class TestIterators(unittest.TestCase):
+    def test_counting_iterator_index(self, ref=None, itr=None):
+        # Test the indexing functionality of CountingIterator
+        if ref is None:
+            assert itr is None
+            ref = list(range(10))
+            itr = iterators.CountingIterator(ref)
+        else:
+            assert len(ref) == 10
+            assert itr is not None
+
+        self.assertTrue(itr.has_next())
+        self.assertEqual(itr.n, 0)
+        self.assertEqual(next(itr), ref[0])
+        self.assertEqual(itr.n, 1)
+        self.assertEqual(next(itr), ref[1])
+        self.assertEqual(itr.n, 2)
+        itr.skip(3)
+        self.assertEqual(itr.n, 5)
+        self.assertEqual(next(itr), ref[5])
+        itr.skip(2)
+        self.assertEqual(itr.n, 8)
+        self.assertEqual(list(itr), [ref[8], ref[9]])
+        self.assertFalse(itr.has_next())
+
+    def test_counting_iterator_length_mismatch(self):
+        ref = list(range(10))
+        # When the underlying iterable is longer than the CountingIterator,
+        # the remaining items in the iterable should be ignored
+        itr = iterators.CountingIterator(ref, total=8)
+        self.assertEqual(list(itr), ref[:8])
+        # When the underlying iterable is shorter than the CountingIterator,
+        # raise an IndexError when the underlying iterable is exhausted
+        itr = iterators.CountingIterator(ref, total=12)
+        self.assertRaises(IndexError, list, itr)
+
+    def test_counting_iterator_take(self):
+        # Test the "take" method of CountingIterator
+        ref = list(range(10))
+        itr = iterators.CountingIterator(ref)
+        itr.take(5)
+        self.assertEqual(len(itr), len(list(iter(itr))))
+        self.assertEqual(len(itr), 5)
+
+        itr = iterators.CountingIterator(ref)
+        itr.take(5)
+        self.assertEqual(next(itr), ref[0])
+        self.assertEqual(next(itr), ref[1])
+        itr.skip(2)
+        self.assertEqual(next(itr), ref[4])
+        self.assertFalse(itr.has_next())
+
+    def test_grouped_iterator(self):
+        # test correctness
+        x = list(range(10))
+        itr = iterators.GroupedIterator(x, 1)
+        self.assertEqual(list(itr), [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]])
+        itr = iterators.GroupedIterator(x, 4)
+        self.assertEqual(list(itr), [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]])
+        itr = iterators.GroupedIterator(x, 5)
+        self.assertEqual(list(itr), [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
+
+        # test the GroupIterator also works correctly as a CountingIterator
+        x = list(range(30))
+        ref = list(iterators.GroupedIterator(x, 3))
+        itr = iterators.GroupedIterator(x, 3)
+        self.test_counting_iterator_index(ref, itr)
+
+    def test_sharded_iterator(self):
+        # test correctness
+        x = list(range(10))
+        itr = iterators.ShardedIterator(x, num_shards=1, shard_id=0)
+        self.assertEqual(list(itr), x)
+        itr = iterators.ShardedIterator(x, num_shards=2, shard_id=0)
+        self.assertEqual(list(itr), [0, 2, 4, 6, 8])
+        itr = iterators.ShardedIterator(x, num_shards=2, shard_id=1)
+        self.assertEqual(list(itr), [1, 3, 5, 7, 9])
+        itr = iterators.ShardedIterator(x, num_shards=3, shard_id=0)
+        self.assertEqual(list(itr), [0, 3, 6, 9])
+        itr = iterators.ShardedIterator(x, num_shards=3, shard_id=1)
+        self.assertEqual(list(itr), [1, 4, 7, None])
+        itr = iterators.ShardedIterator(x, num_shards=3, shard_id=2)
+        self.assertEqual(list(itr), [2, 5, 8, None])
+
+        # test CountingIterator functionality
+        x = list(range(30))
+        ref = list(iterators.ShardedIterator(x, num_shards=3, shard_id=0))
+        itr = iterators.ShardedIterator(x, num_shards=3, shard_id=0)
+        self.test_counting_iterator_index(ref, itr)
+
+    def test_counting_iterator_buffered_iterator_take(self):
+        ref = list(range(10))
+        buffered_itr = iterators.BufferedIterator(2, ref)
+        itr = iterators.CountingIterator(buffered_itr)
+        itr.take(5)
+        self.assertEqual(len(itr), len(list(iter(itr))))
+        self.assertEqual(len(itr), 5)
+
+        buffered_itr = iterators.BufferedIterator(2, ref)
+        itr = iterators.CountingIterator(buffered_itr)
+        itr.take(5)
+        self.assertEqual(len(buffered_itr), 5)
+        self.assertEqual(len(list(iter(buffered_itr))), 5)
+
+        buffered_itr = iterators.BufferedIterator(2, ref)
+        itr = iterators.CountingIterator(buffered_itr)
+        itr.take(5)
+        self.assertEqual(next(itr), ref[0])
+        self.assertEqual(next(itr), ref[1])
+        itr.skip(2)
+        self.assertEqual(next(itr), ref[4])
+        self.assertFalse(itr.has_next())
+        self.assertRaises(StopIteration, next, buffered_itr)
+
+        ref = list(range(4, 10))
+        buffered_itr = iterators.BufferedIterator(2, ref)
+        itr = iterators.CountingIterator(buffered_itr, start=4)
+        itr.take(5)
+        self.assertEqual(len(itr), 5)
+        self.assertEqual(len(buffered_itr), 1)
+        self.assertEqual(next(itr), ref[0])
+        self.assertFalse(itr.has_next())
+        self.assertRaises(StopIteration, next, buffered_itr)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_label_smoothing.py b/fairseq/tests/test_label_smoothing.py
new file mode 100644
index 0000000000000000000000000000000000000000..04c0f974ac80f7606327f868e948712c3c18f1d0
--- /dev/null
+++ b/fairseq/tests/test_label_smoothing.py
@@ -0,0 +1,123 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import copy
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.criterions.cross_entropy import CrossEntropyCriterion
+from fairseq.criterions.label_smoothed_cross_entropy import (
+    LabelSmoothedCrossEntropyCriterion,
+)
+
+
+class TestLabelSmoothing(unittest.TestCase):
+    def setUp(self):
+        # build dictionary
+        self.d = test_utils.dummy_dictionary(3)
+        vocab = len(self.d)
+        self.assertEqual(vocab, 4 + 3)  # 4 special + 3 tokens
+        self.assertEqual(self.d.pad(), 1)
+        self.assertEqual(self.d.eos(), 2)
+        self.assertEqual(self.d.unk(), 3)
+        pad, eos, unk, w1, w2, w3 = 1, 2, 3, 4, 5, 6  # noqa: F841
+
+        # build dataset
+        self.data = [
+            # the first batch item has padding
+            {
+                "source": torch.LongTensor([w1, eos]),
+                "target": torch.LongTensor([w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w1, eos]),
+                "target": torch.LongTensor([w1, w1, eos]),
+            },
+        ]
+        self.sample = next(test_utils.dummy_dataloader(self.data))
+
+        # build model
+        self.args = argparse.Namespace()
+        self.args.sentence_avg = False
+        self.args.report_accuracy = False
+        self.args.probs = (
+            torch.FloatTensor(
+                [
+                    #      pad   eos  unk   w1   w2   w3
+                    [0.05, 0.05, 0.1, 0.05, 0.3, 0.4, 0.05],
+                    [0.05, 0.10, 0.2, 0.05, 0.2, 0.3, 0.10],
+                    [0.05, 0.15, 0.3, 0.05, 0.1, 0.2, 0.15],
+                ]
+            )
+            .unsqueeze(0)
+            .expand(2, 3, 7)
+        )  # add batch dimension
+        self.task = test_utils.TestTranslationTask.setup_task(self.args, self.d, self.d)
+        self.model = self.task.build_model(self.args)
+
+    def test_nll_loss(self):
+        self.args.label_smoothing = 0.1
+        nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
+        smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
+            self.args, self.task
+        )
+        nll_loss, nll_sample_size, nll_logging_output = nll_crit(
+            self.model, self.sample
+        )
+        smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
+            self.model, self.sample
+        )
+        self.assertLess(abs(nll_loss - nll_logging_output["loss"]), 1e-6)
+        self.assertLess(abs(nll_loss - smooth_logging_output["nll_loss"]), 1e-6)
+
+    def test_padding(self):
+        self.args.label_smoothing = 0.1
+        crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
+        loss, _, logging_output = crit(self.model, self.sample)
+
+        def get_one_no_padding(idx):
+            # create a new sample with just a single batch item so that there's
+            # no padding
+            sample1 = next(test_utils.dummy_dataloader([self.data[idx]]))
+            args1 = copy.copy(self.args)
+            args1.probs = args1.probs[idx, :, :].unsqueeze(0)
+            model1 = self.task.build_model(args1)
+            loss1, _, _ = crit(model1, sample1)
+            return loss1
+
+        loss1 = get_one_no_padding(0)
+        loss2 = get_one_no_padding(1)
+        self.assertAlmostEqual(loss, loss1 + loss2)
+
+    def test_reduction(self):
+        self.args.label_smoothing = 0.1
+        crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
+        loss, _, logging_output = crit(self.model, self.sample, reduce=True)
+        unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False)
+        self.assertAlmostEqual(loss, unreduced_loss.sum())
+
+    def test_zero_eps(self):
+        self.args.label_smoothing = 0.0
+        nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
+        smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
+            self.args, self.task
+        )
+        nll_loss, nll_sample_size, nll_logging_output = nll_crit(
+            self.model, self.sample
+        )
+        smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
+            self.model, self.sample
+        )
+        self.assertAlmostEqual(nll_loss, smooth_loss)
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-6)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_lm_context_window.py b/fairseq/tests/test_lm_context_window.py
new file mode 100644
index 0000000000000000000000000000000000000000..7415e86abdf8ddc2d797092bf98f7a1331e038d6
--- /dev/null
+++ b/fairseq/tests/test_lm_context_window.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from fairseq.data import MonolingualDataset
+from fairseq.tasks.language_modeling import LanguageModelingTask, LanguageModelingConfig
+from tests import utils as test_utils
+
+
+class TestLMContextWindow(unittest.TestCase):
+
+    def test_eval_dataloader(self):
+        dictionary = test_utils.dummy_dictionary(10)
+        assert len(dictionary) == 14  # 4 extra special symbols
+        assert dictionary.pad() == 1
+
+        dataset = test_utils.TestDataset([
+            torch.tensor([4, 5, 6, 7], dtype=torch.long),
+            torch.tensor([8, 9, 10, 11], dtype=torch.long),
+            torch.tensor([12, 13], dtype=torch.long),
+        ])
+        dataset = MonolingualDataset(dataset, sizes=[4, 4, 2], src_vocab=dictionary)
+
+        config = LanguageModelingConfig(tokens_per_sample=4)
+        task = LanguageModelingTask(config, dictionary)
+
+        eval_dataloader = task.eval_lm_dataloader(
+            dataset=dataset,
+            batch_size=1,
+            context_window=2,
+        )
+
+        batch = next(eval_dataloader)
+        assert batch["net_input"]["src_tokens"][0].tolist() == [4, 5, 6, 7, 1, 1]
+        assert batch["target"][0].tolist() == [4, 5, 6, 7, 1, 1]
+
+        batch = next(eval_dataloader)
+        assert batch["net_input"]["src_tokens"][0].tolist() == [6, 7, 8, 9, 10, 11]
+        assert batch["target"][0].tolist() == [1, 1, 8, 9, 10, 11]
+
+        batch = next(eval_dataloader)
+        assert batch["net_input"]["src_tokens"][0].tolist() == [10, 11, 12, 13]
+        assert batch["target"][0].tolist() == [1, 1, 12, 13]
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_lstm_jitable.py b/fairseq/tests/test_lstm_jitable.py
new file mode 100644
index 0000000000000000000000000000000000000000..38f79d17931c32447e96c0fbae2630ac397e1804
--- /dev/null
+++ b/fairseq/tests/test_lstm_jitable.py
@@ -0,0 +1,115 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import tempfile
+import unittest
+
+import torch
+from fairseq.data.dictionary import Dictionary
+from fairseq.models.lstm import LSTMModel
+from fairseq.tasks.fairseq_task import LegacyFairseqTask
+
+
+DEFAULT_TEST_VOCAB_SIZE = 100
+
+
+class DummyTask(LegacyFairseqTask):
+    def __init__(self, args):
+        super().__init__(args)
+        self.dictionary = get_dummy_dictionary()
+        if getattr(self.args, "ctc", False):
+            self.dictionary.add_symbol("<ctc_blank>")
+        self.src_dict = self.dictionary
+        self.tgt_dict = self.dictionary
+
+    @property
+    def source_dictionary(self):
+        return self.src_dict
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE):
+    dummy_dict = Dictionary()
+    # add dummy symbol to satisfy vocab size
+    for id, _ in enumerate(range(vocab_size)):
+        dummy_dict.add_symbol("{}".format(id), 1000)
+    return dummy_dict
+
+
+def get_dummy_task_and_parser():
+    """
+    to build a fariseq model, we need some dummy parse and task. This function
+    is used to create dummy task and parser to faciliate model/criterion test
+
+    Note: we use FbSpeechRecognitionTask as the dummy task. You may want
+    to use other task by providing another function
+    """
+    parser = argparse.ArgumentParser(
+        description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS
+    )
+    DummyTask.add_args(parser)
+    args = parser.parse_args([])
+    task = DummyTask.setup_task(args)
+    return task, parser
+
+
+class TestJitLSTMModel(unittest.TestCase):
+    def _test_save_and_load(self, scripted_module):
+        with tempfile.NamedTemporaryFile() as f:
+            scripted_module.save(f.name)
+            torch.jit.load(f.name)
+
+    def assertTensorEqual(self, t1, t2):
+        t1 = t1[~torch.isnan(t1)]  # can cause size mismatch errors if there are NaNs
+        t2 = t2[~torch.isnan(t2)]
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+    def test_jit_and_export_lstm(self):
+        task, parser = get_dummy_task_and_parser()
+        LSTMModel.add_args(parser)
+        args = parser.parse_args([])
+        args.criterion = ""
+        model = LSTMModel.build_model(args, task)
+        scripted_model = torch.jit.script(model)
+        self._test_save_and_load(scripted_model)
+
+    def test_assert_jit_vs_nonjit_(self):
+        task, parser = get_dummy_task_and_parser()
+        LSTMModel.add_args(parser)
+        args = parser.parse_args([])
+        args.criterion = ""
+        model = LSTMModel.build_model(args, task)
+        model.eval()
+        scripted_model = torch.jit.script(model)
+        scripted_model.eval()
+        idx = len(task.source_dictionary)
+        iter = 100
+        # Inject random input and check output
+        seq_len_tensor = torch.randint(1, 10, (iter,))
+        num_samples_tensor = torch.randint(1, 10, (iter,))
+        for i in range(iter):
+            seq_len = seq_len_tensor[i]
+            num_samples = num_samples_tensor[i]
+            src_token = (torch.randint(0, idx, (num_samples, seq_len)),)
+            src_lengths = torch.randint(1, seq_len + 1, (num_samples,))
+            src_lengths, _ = torch.sort(src_lengths, descending=True)
+            # Force the first sample to have seq_len
+            src_lengths[0] = seq_len
+            prev_output_token = (torch.randint(0, idx, (num_samples, 1)),)
+            result = model(src_token[0], src_lengths, prev_output_token[0], None)
+            scripted_result = scripted_model(
+                src_token[0], src_lengths, prev_output_token[0], None
+            )
+            self.assertTensorEqual(result[0], scripted_result[0])
+            self.assertTensorEqual(result[1], scripted_result[1])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_memory_efficient_fp16.py b/fairseq/tests/test_memory_efficient_fp16.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bf2f29888d6027896128930626b1aafe7f18475
--- /dev/null
+++ b/fairseq/tests/test_memory_efficient_fp16.py
@@ -0,0 +1,78 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import logging
+import unittest
+
+import torch
+from fairseq.optim.adam import FairseqAdam
+from fairseq.optim.fp16_optimizer import MemoryEfficientFP16Optimizer
+from omegaconf import OmegaConf
+
+
+@unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+class TestMemoryEfficientFP16(unittest.TestCase):
+    def setUp(self):
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        logging.disable(logging.NOTSET)
+
+    def test_load_state_dict(self):
+        # define simple FP16 model
+        model = torch.nn.Linear(5, 5).cuda().half()
+        params = list(model.parameters())
+
+        # initialize memory efficient FP16 optimizer
+        # with pseudo DictConfigs
+        optimizer = FairseqAdam(
+            cfg=OmegaConf.create(
+                vars(
+                    argparse.Namespace(
+                        adam_betas="(0.9, 0.999)",
+                        adam_eps=1e-8,
+                        weight_decay=0.0,
+                        lr=[0.00001],
+                    )
+                )
+            ),
+            params=params,
+        )
+        me_optimizer = MemoryEfficientFP16Optimizer(
+            cfg=OmegaConf.create(
+                {
+                    "common": vars(
+                        argparse.Namespace(
+                            fp16_init_scale=1,
+                            fp16_scale_window=1,
+                            fp16_scale_tolerance=1,
+                            threshold_loss_scale=1,
+                            min_loss_scale=1e-4,
+                        )
+                    )
+                }
+            ),
+            params=params,
+            optimizer=optimizer,
+        )
+
+        # optimizer state is created in the first step
+        loss = model(torch.rand(5).cuda().half()).sum()
+        me_optimizer.backward(loss)
+        me_optimizer.step()
+
+        # reload state
+        state = me_optimizer.state_dict()
+        me_optimizer.load_state_dict(state)
+        for k, v in me_optimizer.optimizer.state.items():
+            self.assertTrue(k.dtype == torch.float16)
+            for v_i in v.values():
+                if torch.is_tensor(v_i):
+                    self.assertTrue(v_i.dtype == torch.float32)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_metrics.py b/fairseq/tests/test_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..2de6969cf4445bc6cda44dacf6de765ea30d5f5b
--- /dev/null
+++ b/fairseq/tests/test_metrics.py
@@ -0,0 +1,77 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+import uuid
+
+from fairseq import metrics
+
+
+class TestMetrics(unittest.TestCase):
+    def test_nesting(self):
+        with metrics.aggregate() as a:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate() as b:
+                metrics.log_scalar("loss", 2)
+
+        self.assertEqual(a.get_smoothed_values()["loss"], 1.5)
+        self.assertEqual(b.get_smoothed_values()["loss"], 2)
+
+    def test_new_root(self):
+        with metrics.aggregate() as a:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate(new_root=True) as b:
+                metrics.log_scalar("loss", 2)
+
+        self.assertEqual(a.get_smoothed_values()["loss"], 1)
+        self.assertEqual(b.get_smoothed_values()["loss"], 2)
+
+    def test_nested_new_root(self):
+        with metrics.aggregate() as layer1:
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate(new_root=True) as layer2:
+                metrics.log_scalar("loss", 2)
+                with metrics.aggregate() as layer3:
+                    metrics.log_scalar("loss", 3)
+                    with metrics.aggregate(new_root=True) as layer4:
+                        metrics.log_scalar("loss", 4)
+            metrics.log_scalar("loss", 1.5)
+
+        self.assertEqual(layer4.get_smoothed_values()["loss"], 4)
+        self.assertEqual(layer3.get_smoothed_values()["loss"], 3)
+        self.assertEqual(layer2.get_smoothed_values()["loss"], 2.5)
+        self.assertEqual(layer1.get_smoothed_values()["loss"], 1.25)
+
+    def test_named(self):
+        name = str(uuid.uuid4())
+        metrics.reset_meters(name)
+
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 1)
+
+        metrics.log_scalar("loss", 3)
+
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 2)
+
+        self.assertEqual(metrics.get_smoothed_values(name)["loss"], 1.5)
+
+    def test_nested_duplicate_names(self):
+        name = str(uuid.uuid4())
+        metrics.reset_meters(name)
+
+        with metrics.aggregate(name):
+            metrics.log_scalar("loss", 1)
+            with metrics.aggregate() as other:
+                with metrics.aggregate(name):
+                    metrics.log_scalar("loss", 2)
+            metrics.log_scalar("loss", 6)
+
+        self.assertEqual(metrics.get_smoothed_values(name)["loss"], 3)
+        self.assertEqual(other.get_smoothed_values()["loss"], 2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_multi_corpus_dataset.py b/fairseq/tests/test_multi_corpus_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a79f4b680e5bc2c7374ec6dd8ea525c47b40985
--- /dev/null
+++ b/fairseq/tests/test_multi_corpus_dataset.py
@@ -0,0 +1,79 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from collections import OrderedDict
+
+import torch
+from fairseq.data import LanguagePairDataset, TokenBlockDataset
+from fairseq.data.multi_corpus_dataset import MultiCorpusDataset
+from tests.test_train import mock_dict
+
+
+class TestMultiCorpusDataset(unittest.TestCase):
+    def setUp(self):
+        d = mock_dict()
+        tokens_1 = torch.LongTensor([i for i in range(1, 5000, 2)]).view(1, -1)
+        tokens_ds1 = TokenBlockDataset(
+            tokens_1,
+            sizes=[tokens_1.size(-1)],
+            block_size=1,
+            pad=0,
+            eos=1,
+            include_targets=False,
+        )
+        self.dataset_1 = LanguagePairDataset(
+            tokens_ds1, tokens_ds1.sizes, d, shuffle=False
+        )
+        tokens_2 = torch.LongTensor([i for i in range(0, 5000, 2)]).view(1, -1)
+        tokens_ds2 = TokenBlockDataset(
+            tokens_2,
+            sizes=[tokens_2.size(-1)],
+            block_size=1,
+            pad=0,
+            eos=1,
+            include_targets=False,
+        )
+        self.dataset_2 = LanguagePairDataset(
+            tokens_ds2, tokens_ds2.sizes, d, shuffle=False
+        )
+
+    def _test_sample_helper(
+        self,
+        distribution,
+    ):
+        m = MultiCorpusDataset(
+            OrderedDict({0: self.dataset_1, 1: self.dataset_2}),
+            distribution=distribution,
+            seed=0,
+            sort_indices=True,
+        )
+        m.set_epoch(1)
+        indices = m.ordered_indices()
+        count_sample_from_first_dataset = 0
+        items = set()
+        for i in indices:
+            item = m[i]["source"].item()
+            if item % 2 == 1:
+                count_sample_from_first_dataset += 1
+
+            items.add(item)
+        sample_from_first_ds_percentage = (
+            1.0 * count_sample_from_first_dataset / len(indices)
+        )
+        self.assertLess(
+            abs(sample_from_first_ds_percentage - distribution[0]),
+            0.01,
+        )
+        self.assertEqual(
+            len(items),
+            int(min(len(self.dataset_1), len(indices) * distribution[0])
+                + min(len(self.dataset_1), len(indices) * distribution[1]))
+        )
+        print(distribution)
+
+    def test_multi_corpus_dataset(self):
+        for distribution in [[0.5, 0.5], [0.1, 0.9], [0.9, 0.1]]:
+            self._test_sample_helper(distribution=distribution)
diff --git a/fairseq/tests/test_multi_corpus_sampled_dataset.py b/fairseq/tests/test_multi_corpus_sampled_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..05b20328c5605178767d138cc75e070824679842
--- /dev/null
+++ b/fairseq/tests/test_multi_corpus_sampled_dataset.py
@@ -0,0 +1,95 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from collections import OrderedDict
+
+import numpy as np
+import torch
+from fairseq.data import LanguagePairDataset, TokenBlockDataset
+from fairseq.data.multi_corpus_sampled_dataset import MultiCorpusSampledDataset
+from tests.test_train import mock_dict
+
+
+class TestMultiCorpusSampledDataset(unittest.TestCase):
+    def setUp(self):
+        d = mock_dict()
+        tokens_1 = torch.LongTensor([1]).view(1, -1)
+        tokens_ds1 = TokenBlockDataset(
+            tokens_1,
+            sizes=[tokens_1.size(-1)],
+            block_size=1,
+            pad=0,
+            eos=1,
+            include_targets=False,
+        )
+        self.dataset_1 = LanguagePairDataset(
+            tokens_ds1, tokens_ds1.sizes, d, shuffle=False
+        )
+        tokens_2 = torch.LongTensor([2]).view(1, -1)
+        tokens_ds2 = TokenBlockDataset(
+            tokens_2,
+            sizes=[tokens_2.size(-1)],
+            block_size=1,
+            pad=0,
+            eos=1,
+            include_targets=False,
+        )
+        self.dataset_2 = LanguagePairDataset(
+            tokens_ds2, tokens_ds2.sizes, d, shuffle=False
+        )
+
+    def _test_sample_helper(
+        self,
+        expected_sample_from_first_ds_percentage,
+        num_samples=1000,
+        sampling_func=None,
+    ):
+        # To make sure test is not flaky
+        np.random.seed(0)
+        if sampling_func is None:
+            m = MultiCorpusSampledDataset(
+                OrderedDict({0: self.dataset_1, 1: self.dataset_2}),
+            )
+        else:
+            m = MultiCorpusSampledDataset(
+                OrderedDict({0: self.dataset_1, 1: self.dataset_2}),
+                sampling_func=sampling_func,
+            )
+        m.ordered_indices()
+        count_sample_from_first_dataset = 0
+        for _ in range(num_samples):
+            if m.collater([m[0], m[1]])["net_input"]["src_tokens"][0] == 1:
+                count_sample_from_first_dataset += 1
+        sample_from_first_ds_percentage = (
+            1.0 * count_sample_from_first_dataset / num_samples
+        )
+        self.assertLess(
+            abs(
+                sample_from_first_ds_percentage
+                - expected_sample_from_first_ds_percentage
+            ),
+            0.01,
+        )
+
+    def test_multi_corpus_sampled_dataset_uniform_sample(self):
+        self._test_sample_helper(expected_sample_from_first_ds_percentage=0.5)
+
+    def test_multi_corpus_sampled_dataset_weighted_sample(self):
+        def naive_weighted_sample(weights):
+            def f(l):
+                v = np.random.random()
+                agg = 0
+                for i, weight in enumerate(weights):
+                    agg += weight
+                    if agg > v:
+                        return i
+
+            return f
+
+        self._test_sample_helper(
+            expected_sample_from_first_ds_percentage=0.9,
+            sampling_func=naive_weighted_sample(weights=[0.9, 0.1]),
+        )
diff --git a/fairseq/tests/test_multihead_attention.py b/fairseq/tests/test_multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..620a2d679147bbbb8d15f3323374a39939686ec2
--- /dev/null
+++ b/fairseq/tests/test_multihead_attention.py
@@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from fairseq.modules.multihead_attention import MultiheadAttention
+
+
+class TestMultiheadAttention(unittest.TestCase):
+    def test_append_prev_key_padding_mask(self):
+        bsz = 1
+        src_len = 4
+
+        cases = [
+            # no padding mask
+            (None, None, None),
+            # current padding mask only
+            (
+                torch.tensor([[1]]).bool(),
+                None,
+                torch.tensor([[0, 0, 0, 1]]).bool(),
+            ),
+            # previous padding mask only
+            (
+                None,
+                torch.tensor([[0, 1, 0]]).bool(),
+                torch.tensor([[0, 1, 0, 0]]).bool(),
+            ),
+            # both padding masks
+            (
+                torch.tensor([[1]]).bool(),
+                torch.tensor([[0, 1, 0]]).bool(),
+                torch.tensor([[0, 1, 0, 1]]).bool(),
+            ),
+            # prev_key_padding_mask already full
+            (
+                torch.tensor([[0, 1, 0, 1]]).bool(),
+                None,
+                torch.tensor([[0, 1, 0, 1]]).bool(),
+            ),
+            # key_padding_mask already full
+            (
+                None,
+                torch.tensor([[0, 1, 0, 1]]).bool(),
+                torch.tensor([[0, 1, 0, 1]]).bool(),
+            ),
+        ]
+        for c in cases:
+            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
+                c[0],
+                c[1],
+                batch_size=bsz,
+                src_len=src_len,
+                static_kv=False,
+            )
+
+            if key_padding_mask is not None:
+                self.assertTrue(
+                    torch.all(torch.eq(key_padding_mask, c[2])),
+                    f"Unexpected resultant key padding mask: {key_padding_mask}"
+                    f" given current: {c[0]} and previous: {c[1]}",
+                )
+                self.assertEqual(key_padding_mask.size(0), bsz)
+                self.assertEqual(key_padding_mask.size(1), src_len)
+            else:
+                self.assertIsNone(c[2])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_noising.py b/fairseq/tests/test_noising.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3d0d123c42eaca6f79371aa268049e668fcfcce
--- /dev/null
+++ b/fairseq/tests/test_noising.py
@@ -0,0 +1,530 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Dict, List
+
+import tests.utils as test_utils
+import torch
+from fairseq import utils
+from fairseq.data import (
+    Dictionary,
+    LanguagePairDataset,
+    TransformEosDataset,
+    data_utils,
+    noising,
+)
+
+
+class TestDataNoising(unittest.TestCase):
+    def _get_test_data_with_bpe_cont_marker(self, append_eos=True):
+        """
+        Args:
+            append_eos: if True, each input sentence in the source tokens tensor
+                will have an EOS appended to the end.
+
+        Returns:
+            vocabs: BPE vocab with continuation markers as suffixes to denote
+                non-end of word tokens. This is the standard BPE format used in
+                fairseq's preprocessing.
+            x: input tensor containing numberized source tokens, with EOS at the
+                end if append_eos is true
+            src_lengths: and source lengths.
+        """
+        vocab = Dictionary()
+        vocab.add_symbol("he@@")
+        vocab.add_symbol("llo")
+        vocab.add_symbol("how")
+        vocab.add_symbol("are")
+        vocab.add_symbol("y@@")
+        vocab.add_symbol("ou")
+        vocab.add_symbol("n@@")
+        vocab.add_symbol("ew")
+        vocab.add_symbol("or@@")
+        vocab.add_symbol("k")
+
+        src_tokens = [
+            ["he@@", "llo", "n@@", "ew", "y@@", "or@@", "k"],
+            ["how", "are", "y@@", "ou"],
+        ]
+        x, src_lengths = x, src_lengths = self._convert_src_tokens_to_tensor(
+            vocab=vocab, src_tokens=src_tokens, append_eos=append_eos
+        )
+        return vocab, x, src_lengths
+
+    def _get_test_data_with_bpe_end_marker(self, append_eos=True):
+        """
+        Args:
+            append_eos: if True, each input sentence in the source tokens tensor
+                will have an EOS appended to the end.
+
+        Returns:
+            vocabs: BPE vocab with end-of-word markers as suffixes to denote
+                tokens at the end of a word. This is an alternative to fairseq's
+                standard preprocessing framework and is not generally supported
+                within fairseq.
+            x: input tensor containing numberized source tokens, with EOS at the
+                end if append_eos is true
+            src_lengths: and source lengths.
+        """
+        vocab = Dictionary()
+        vocab.add_symbol("he")
+        vocab.add_symbol("llo_EOW")
+        vocab.add_symbol("how_EOW")
+        vocab.add_symbol("are_EOW")
+        vocab.add_symbol("y")
+        vocab.add_symbol("ou_EOW")
+        vocab.add_symbol("n")
+        vocab.add_symbol("ew_EOW")
+        vocab.add_symbol("or")
+        vocab.add_symbol("k_EOW")
+
+        src_tokens = [
+            ["he", "llo_EOW", "n", "ew_EOW", "y", "or", "k_EOW"],
+            ["how_EOW", "are_EOW", "y", "ou_EOW"],
+        ]
+        x, src_lengths = x, src_lengths = self._convert_src_tokens_to_tensor(
+            vocab=vocab, src_tokens=src_tokens, append_eos=append_eos
+        )
+        return vocab, x, src_lengths
+
+    def _get_test_data_with_word_vocab(self, append_eos=True):
+        """
+        Args:
+            append_eos: if True, each input sentence in the source tokens tensor
+                will have an EOS appended to the end.
+
+        Returns:
+            vocabs: word vocab
+            x: input tensor containing numberized source tokens, with EOS at the
+                end if append_eos is true
+            src_lengths: and source lengths.
+        """
+        vocab = Dictionary()
+
+        vocab.add_symbol("hello")
+        vocab.add_symbol("how")
+        vocab.add_symbol("are")
+        vocab.add_symbol("you")
+        vocab.add_symbol("new")
+        vocab.add_symbol("york")
+        src_tokens = [
+            ["hello", "new", "york", "you"],
+            ["how", "are", "you", "new", "york"],
+        ]
+        x, src_lengths = self._convert_src_tokens_to_tensor(
+            vocab=vocab, src_tokens=src_tokens, append_eos=append_eos
+        )
+        return vocab, x, src_lengths
+
+    def _convert_src_tokens_to_tensor(
+        self, vocab: Dictionary, src_tokens: List[List[str]], append_eos: bool
+    ):
+        src_len = [len(x) for x in src_tokens]
+        # If we have to append EOS, we include EOS in counting src length
+        if append_eos:
+            src_len = [length + 1 for length in src_len]
+
+        x = torch.LongTensor(len(src_tokens), max(src_len)).fill_(vocab.pad())
+        for i in range(len(src_tokens)):
+            for j in range(len(src_tokens[i])):
+                x[i][j] = vocab.index(src_tokens[i][j])
+            if append_eos:
+                x[i][j + 1] = vocab.eos()
+
+        x = x.transpose(1, 0)
+        return x, torch.LongTensor(src_len)
+
+    def assert_eos_at_end(self, x, x_len, eos):
+        """Asserts last token of every sentence in x is EOS """
+        for i in range(len(x_len)):
+            self.assertEqual(
+                x[x_len[i] - 1][i],
+                eos,
+                (
+                    "Expected eos (token id {eos}) at the end of sentence {i} "
+                    "but got {other} instead"
+                ).format(i=i, eos=eos, other=x[i][-1]),
+            )
+
+    def assert_word_dropout_correct(self, x, x_noised, x_len, l_noised):
+        # Expect only the first word (2 bpe tokens) of the first example
+        # was dropped out
+        self.assertEqual(x_len[0] - 2, l_noised[0])
+        for i in range(l_noised[0]):
+            self.assertEqual(x_noised[i][0], x[i + 2][0])
+
+    def test_word_dropout_with_eos(self):
+        vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=True)
+
+        with data_utils.numpy_seed(1234):
+            noising_gen = noising.WordDropout(vocab)
+            x_noised, l_noised = noising_gen.noising(x, x_len, 0.2)
+            self.assert_word_dropout_correct(
+                x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised
+            )
+            self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos())
+
+    def assert_word_blanking_correct(self, x, x_noised, x_len, l_noised, unk):
+        # Expect only the first word (2 bpe tokens) of the first example
+        # was blanked out
+        self.assertEqual(x_len[0], l_noised[0])
+        for i in range(l_noised[0]):
+            if i < 2:
+                self.assertEqual(x_noised[i][0], unk)
+            else:
+                self.assertEqual(x_noised[i][0], x[i][0])
+
+    def test_word_blank_with_eos(self):
+        vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=True)
+
+        with data_utils.numpy_seed(1234):
+            noising_gen = noising.WordDropout(vocab)
+            x_noised, l_noised = noising_gen.noising(x, x_len, 0.2, vocab.unk())
+            self.assert_word_blanking_correct(
+                x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised, unk=vocab.unk()
+            )
+            self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos())
+
+    def generate_unchanged_shuffle_map(self, length):
+        return {i: i for i in range(length)}
+
+    def assert_word_shuffle_matches_expected(
+        self,
+        x,
+        x_len,
+        max_shuffle_distance: int,
+        vocab: Dictionary,
+        expected_shufle_maps: List[Dict[int, int]],
+        expect_eos_at_end: bool,
+        bpe_end_marker=None,
+    ):
+        """
+        This verifies that with a given x, x_len, max_shuffle_distance, and
+        vocab, we get the expected shuffle result.
+
+        Args:
+            x: Tensor of shape (T x B) = (sequence_length, batch_size)
+            x_len: Tensor of length B = batch_size
+            max_shuffle_distance: arg to pass to noising
+            expected_shuffle_maps: List[mapping] where mapping is a
+                Dict[old_index, new_index], mapping x's elements from their
+                old positions in x to their new positions in x.
+            expect_eos_at_end: if True, check the output to make sure there is
+                an EOS at the end.
+            bpe_end_marker: str denoting the BPE end token. If this is not None, we
+                set the BPE cont token to None in the noising classes.
+        """
+        bpe_cont_marker = None
+        if bpe_end_marker is None:
+            bpe_cont_marker = "@@"
+
+        with data_utils.numpy_seed(1234):
+            word_shuffle = noising.WordShuffle(
+                vocab, bpe_cont_marker=bpe_cont_marker, bpe_end_marker=bpe_end_marker
+            )
+            x_noised, l_noised = word_shuffle.noising(
+                x, x_len, max_shuffle_distance=max_shuffle_distance
+            )
+
+        # For every example, we have a different expected shuffle map. We check
+        # that each example is shuffled as expected according to each
+        # corresponding shuffle map.
+        for i in range(len(expected_shufle_maps)):
+            shuffle_map = expected_shufle_maps[i]
+            for k, v in shuffle_map.items():
+                self.assertEqual(x[k][i], x_noised[v][i])
+
+        # Shuffling should not affect the length of each example
+        for pre_shuffle_length, post_shuffle_length in zip(x_len, l_noised):
+            self.assertEqual(pre_shuffle_length, post_shuffle_length)
+        if expect_eos_at_end:
+            self.assert_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos())
+
+    def test_word_shuffle_with_eos(self):
+        vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=True)
+
+        # Assert word shuffle with max shuffle distance 0 causes input to be
+        # unchanged
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            max_shuffle_distance=0,
+            vocab=vocab,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(example_len)
+                for example_len in x_len
+            ],
+            expect_eos_at_end=True,
+        )
+
+        # Assert word shuffle with max shuffle distance 3 matches our expected
+        # shuffle order
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            vocab=vocab,
+            max_shuffle_distance=3,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(x_len[0]),
+                {0: 0, 1: 3, 2: 1, 3: 2},
+            ],
+            expect_eos_at_end=True,
+        )
+
+    def test_word_shuffle_with_eos_nonbpe(self):
+        """The purpose of this is to test shuffling logic with word vocabs"""
+        vocab, x, x_len = self._get_test_data_with_word_vocab(append_eos=True)
+
+        # Assert word shuffle with max shuffle distance 0 causes input to be
+        # unchanged
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            max_shuffle_distance=0,
+            vocab=vocab,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(example_len)
+                for example_len in x_len
+            ],
+            expect_eos_at_end=True,
+        )
+
+        # Assert word shuffle with max shuffle distance 3 matches our expected
+        # shuffle order
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            vocab=vocab,
+            max_shuffle_distance=3,
+            expected_shufle_maps=[
+                {0: 0, 1: 1, 2: 3, 3: 2},
+                {0: 0, 1: 2, 2: 1, 3: 3, 4: 4},
+            ],
+            expect_eos_at_end=True,
+        )
+
+    def test_word_shuffle_without_eos(self):
+        """Same result as word shuffle with eos except no EOS at end"""
+        vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=False)
+
+        # Assert word shuffle with max shuffle distance 0 causes input to be
+        # unchanged
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            max_shuffle_distance=0,
+            vocab=vocab,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(example_len)
+                for example_len in x_len
+            ],
+            expect_eos_at_end=False,
+        )
+
+        # Assert word shuffle with max shuffle distance 3 matches our expected
+        # shuffle order
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            vocab=vocab,
+            max_shuffle_distance=3,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(x_len[0]),
+                {0: 0, 1: 3, 2: 1, 3: 2},
+            ],
+            expect_eos_at_end=False,
+        )
+
+    def test_word_shuffle_without_eos_with_bpe_end_marker(self):
+        """Same result as word shuffle without eos except using BPE end token"""
+        vocab, x, x_len = self._get_test_data_with_bpe_end_marker(append_eos=False)
+
+        # Assert word shuffle with max shuffle distance 0 causes input to be
+        # unchanged
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            max_shuffle_distance=0,
+            vocab=vocab,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(example_len)
+                for example_len in x_len
+            ],
+            expect_eos_at_end=False,
+            bpe_end_marker="_EOW",
+        )
+
+        # Assert word shuffle with max shuffle distance 3 matches our expected
+        # shuffle order
+        self.assert_word_shuffle_matches_expected(
+            x=x,
+            x_len=x_len,
+            vocab=vocab,
+            max_shuffle_distance=3,
+            expected_shufle_maps=[
+                self.generate_unchanged_shuffle_map(x_len[0]),
+                {0: 0, 1: 3, 2: 1, 3: 2},
+            ],
+            expect_eos_at_end=False,
+            bpe_end_marker="_EOW",
+        )
+
+    def assert_no_eos_at_end(self, x, x_len, eos):
+        """Asserts that the last token of each sentence in x is not EOS """
+        for i in range(len(x_len)):
+            self.assertNotEqual(
+                x[x_len[i] - 1][i],
+                eos,
+                "Expected no eos (token id {eos}) at the end of sentence {i}.".format(
+                    eos=eos, i=i
+                ),
+            )
+
+    def test_word_dropout_without_eos(self):
+        """Same result as word dropout with eos except no EOS at end"""
+        vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=False)
+
+        with data_utils.numpy_seed(1234):
+            noising_gen = noising.WordDropout(vocab)
+            x_noised, l_noised = noising_gen.noising(x, x_len, 0.2)
+            self.assert_word_dropout_correct(
+                x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised
+            )
+            self.assert_no_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos())
+
+    def test_word_blank_without_eos(self):
+        """Same result as word blank with eos except no EOS at end"""
+        vocab, x, x_len = self._get_test_data_with_bpe_cont_marker(append_eos=False)
+
+        with data_utils.numpy_seed(1234):
+            noising_gen = noising.WordDropout(vocab)
+            x_noised, l_noised = noising_gen.noising(x, x_len, 0.2, vocab.unk())
+            self.assert_word_blanking_correct(
+                x=x, x_noised=x_noised, x_len=x_len, l_noised=l_noised, unk=vocab.unk()
+            )
+            self.assert_no_eos_at_end(x=x_noised, x_len=l_noised, eos=vocab.eos())
+
+    def _get_noising_dataset_batch(
+        self,
+        src_tokens_no_pad,
+        src_dict,
+        append_eos_to_tgt=False,
+    ):
+        """
+        Constructs a NoisingDataset and the corresponding
+        ``LanguagePairDataset(NoisingDataset(src), src)``. If
+        *append_eos_to_tgt* is True, wrap the source dataset in
+        :class:`TransformEosDataset` to append EOS to the clean source when
+        using it as the target.
+        """
+        src_dataset = test_utils.TestDataset(data=src_tokens_no_pad)
+
+        noising_dataset = noising.NoisingDataset(
+            src_dataset=src_dataset,
+            src_dict=src_dict,
+            seed=1234,
+            max_word_shuffle_distance=3,
+            word_dropout_prob=0.2,
+            word_blanking_prob=0.2,
+            noising_class=noising.UnsupervisedMTNoising,
+        )
+        tgt = src_dataset
+        language_pair_dataset = LanguagePairDataset(
+            src=noising_dataset, tgt=tgt, src_sizes=None, src_dict=src_dict
+        )
+        language_pair_dataset = TransformEosDataset(
+            language_pair_dataset,
+            src_dict.eos(),
+            append_eos_to_tgt=append_eos_to_tgt,
+        )
+
+        dataloader = torch.utils.data.DataLoader(
+            dataset=language_pair_dataset,
+            batch_size=2,
+            collate_fn=language_pair_dataset.collater,
+        )
+        denoising_batch_result = next(iter(dataloader))
+        return denoising_batch_result
+
+    def test_noising_dataset_with_eos(self):
+        src_dict, src_tokens, _ = self._get_test_data_with_bpe_cont_marker(
+            append_eos=True
+        )
+
+        # Format data for src_dataset
+        src_tokens = torch.t(src_tokens)
+        src_tokens_no_pad = []
+        for src_sentence in src_tokens:
+            src_tokens_no_pad.append(
+                utils.strip_pad(tensor=src_sentence, pad=src_dict.pad())
+            )
+        denoising_batch_result = self._get_noising_dataset_batch(
+            src_tokens_no_pad=src_tokens_no_pad, src_dict=src_dict
+        )
+
+        eos, pad = src_dict.eos(), src_dict.pad()
+
+        # Generated noisy source as source
+        expected_src = torch.LongTensor(
+            [[4, 5, 10, 11, 8, 12, 13, eos], [pad, pad, pad, 6, 8, 9, 7, eos]]
+        )
+        # Original clean source as target (right-padded)
+        expected_tgt = torch.LongTensor(
+            [[4, 5, 10, 11, 8, 12, 13, eos], [6, 7, 8, 9, eos, pad, pad, pad]]
+        )
+        generated_src = denoising_batch_result["net_input"]["src_tokens"]
+        tgt_tokens = denoising_batch_result["target"]
+
+        self.assertTensorEqual(expected_src, generated_src)
+        self.assertTensorEqual(expected_tgt, tgt_tokens)
+
+    def test_noising_dataset_without_eos(self):
+        """
+        Similar to test noising dataset with eos except that we have to set
+        *append_eos_to_tgt* to ``True``.
+        """
+
+        src_dict, src_tokens, _ = self._get_test_data_with_bpe_cont_marker(
+            append_eos=False
+        )
+
+        # Format data for src_dataset
+        src_tokens = torch.t(src_tokens)
+        src_tokens_no_pad = []
+        for src_sentence in src_tokens:
+            src_tokens_no_pad.append(
+                utils.strip_pad(tensor=src_sentence, pad=src_dict.pad())
+            )
+        denoising_batch_result = self._get_noising_dataset_batch(
+            src_tokens_no_pad=src_tokens_no_pad,
+            src_dict=src_dict,
+            append_eos_to_tgt=True,
+        )
+
+        eos, pad = src_dict.eos(), src_dict.pad()
+
+        # Generated noisy source as source
+        expected_src = torch.LongTensor(
+            [[4, 5, 10, 11, 8, 12, 13], [pad, pad, pad, 6, 8, 9, 7]]
+        )
+        # Original clean source as target (right-padded)
+        expected_tgt = torch.LongTensor(
+            [[4, 5, 10, 11, 8, 12, 13, eos], [6, 7, 8, 9, eos, pad, pad, pad]]
+        )
+
+        generated_src = denoising_batch_result["net_input"]["src_tokens"]
+        tgt_tokens = denoising_batch_result["target"]
+
+        self.assertTensorEqual(expected_src, generated_src)
+        self.assertTensorEqual(expected_tgt, tgt_tokens)
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_online_backtranslation.py b/fairseq/tests/test_online_backtranslation.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ae7e773da0ff838b3c8151bc14b84a6a9238a72
--- /dev/null
+++ b/fairseq/tests/test_online_backtranslation.py
@@ -0,0 +1,206 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import tempfile
+import unittest
+from pathlib import Path
+from typing import Any, Dict, Sequence
+
+import fairseq.data.indexed_dataset as indexed_dataset
+import fairseq.options
+import fairseq.tasks.online_backtranslation as obt
+import torch
+from tests import utils
+
+
+def mk_sample(tokens: Sequence[int], batch_size: int = 2) -> Dict[str, Any]:
+    batch = torch.stack([torch.tensor(tokens, dtype=torch.long)] * batch_size)
+    sample = {
+        "net_input": {
+            "src_tokens": batch,
+            "prev_output_tokens": batch,
+            "src_lengths": torch.tensor([len(tokens)] * batch_size, dtype=torch.long),
+        },
+        "target": batch[:, 1:],
+    }
+    return sample
+
+
+def mk_dataset(num_samples: int, max_len: int, output: Path):
+    output.parent.mkdir(exist_ok=True)
+    idx = indexed_dataset.IndexedDatasetBuilder(str(output))
+    data = torch.randint(5, 100, (num_samples, max_len))
+    lengths = torch.randint(3, max_len, (num_samples,))
+    for d, l in zip(data, lengths):
+        d[0] = 0
+        idx.add_item(d[:l])
+    idx.finalize(output.with_suffix(".idx"))
+    assert output.exists()
+    assert output.with_suffix(".idx").exists()
+
+
+class OnlineBacktranslationTest(unittest.TestCase):
+
+    tmp_dir = Path(tempfile.mkdtemp(suffix="OnlineBacktranslationTest"))
+
+    @classmethod
+    def obt_task(
+        cls, languages: Sequence[str], data: Path = None, language_mapping: str = None
+    ):
+        dict_path = cls.tmp_dir / "dict.txt"
+        if not dict_path.exists():
+            dictionary = utils.dummy_dictionary(100)
+            dictionary.save(str(dict_path))
+
+        if data is not None:
+            (data / "dict.txt").write_text(dict_path.read_text())
+        else:
+            data = cls.tmp_dir
+        assert len(languages) >= 2
+
+        kwargs = {
+            "arch": "transformer",
+            # --max-sentences=1 for better predictability of batches
+            "max_sentences": 1,
+            # Use characteristics dimensions
+            "encoder_layers": 3,
+            "encoder_embed_dim": 12,
+            "encoder_ffn_embed_dim": 14,
+            "encoder_attention_heads": 4,
+            "decoder_layers": 3,
+            "decoder_embed_dim": 12,
+            "decoder_output_dim": 12,
+            "decoder_ffn_embed_dim": 14,
+            "decoder_attention_heads": 4,
+            # Disable dropout so we have comparable tests.
+            "dropout": 0,
+            "attention_dropout": 0,
+            "activation_dropout": 0,
+            "encoder_layerdrop": 0,
+        }
+
+        args = fairseq.options.get_args(
+            data,
+            task="online_backtranslation",
+            mono_langs=",".join(languages),
+            valid_lang_pairs=f"{languages[0]}-{languages[1]}",
+            tokens_per_sample=256,
+            language_mapping=language_mapping,
+            **kwargs,
+        )
+        task = obt.OnlineBackTranslationTask.setup_task(args)
+        # we need to build the model to have the correct dictionary
+        model = task.build_model(task.args)
+        return task, model
+
+    def tmp_path(self, test_case: str) -> Path:
+        return Path(tempfile.mkdtemp(test_case, dir=self.tmp_dir))
+
+    def test_lang_tokens(self):
+        task, model = self.obt_task(["en", "ro", "zh"])
+        assert obt._lang_token("en") in task.dictionary
+        assert obt._lang_token("ro") in task.dictionary
+        assert obt._lang_token("zh") in task.dictionary
+
+        en_bos = obt._lang_token_index(task.common_dict, "en")
+        assert "en" == task.common_dict[en_bos].strip("_")
+        zh_bos = obt._lang_token_index(task.common_dict, "zh")
+        assert "zh" == task.common_dict[zh_bos].strip("_")
+        zh_sample = mk_sample([zh_bos, 16, 14, 12, 10])
+
+        # we expect to receive the bos token for translation
+        assert task.get_bos_token_from_sample(zh_sample) == en_bos
+
+    def test_backtranslate_sample(self):
+        task, model = self.obt_task(["en", "ro", "zh"])
+
+        en_bos = obt._lang_token_index(task.common_dict, "en")
+        zh_bos = obt._lang_token_index(task.common_dict, "zh")
+        sample = mk_sample([zh_bos, 16, 14, 12, 10])
+
+        task.backtranslate_sample(sample, "zh", "en")
+        target_zh = list(sample["target"][0])
+        assert target_zh == [16, 14, 12, 10]  # original zh sentence
+        generated_en = sample["net_input"]["src_tokens"][0]
+        assert generated_en[0] == en_bos
+
+    def test_train_dataset(self):
+        data = self.tmp_path("test_train_dataset")
+        mk_dataset(20, 10, data / "en" / "train.bin")
+        mk_dataset(10, 10, data / "zh" / "train.bin")
+        task, model = self.obt_task(["en", "zh"], data)
+        task.load_dataset("train")
+
+        en_bos = obt._lang_token_index(task.common_dict, "en")
+        zh_bos = obt._lang_token_index(task.common_dict, "zh")
+
+        train = task.datasets["train"]
+        train.ordered_indices()
+        train.prefetch([0, 19])
+        sample_0 = train[0]
+        sample_19 = train[19]
+        self.assertEqual(
+            set(sample_0.keys()), {"en-BT", "en-DENOISE", "zh-BT", "zh-DENOISE"}
+        )
+        for sample in (sample_0, sample_19):
+            self.assertEqual(sample["en-BT"]["source"][0], en_bos)
+            # bt target isn't ready to look at.
+            self.assertEqual(sample["en-DENOISE"]["source"][0], en_bos)
+            # TODO What could we check on the target side ?
+
+        for i in range(10):
+            # Zh dataset is shorter, and is wrapped around En dataset.
+            train.prefetch([i, i + 10])
+            self.assertEqual(
+                list(train[i]["zh-DENOISE"]["source"]),
+                list(train[i + 10]["zh-DENOISE"]["source"]),
+            )
+            self.assertEqual(train[i]["zh-DENOISE"]["source"][0].item(), zh_bos)
+
+        # Sorted by increasing len
+        self.assertLess(
+            len(sample_0["en-BT"]["source"]), len(sample_19["en-BT"]["source"])
+        )
+
+    def test_valid_dataset(self):
+        data = self.tmp_path("test_valid_dataset")
+        mk_dataset(10, 21, data / "valid.en-zh.en.bin")
+        mk_dataset(10, 21, data / "valid.en-zh.zh.bin")
+
+        task, model = self.obt_task(["en", "zh"], data)
+        valid = task.load_dataset("valid")
+        en_bos = obt._lang_token_index(task.common_dict, "en")
+
+        assert valid is not None
+        valid.prefetch(range(10))
+        sample_0 = valid[0]
+        sample_9 = valid[9]
+        self.assertEqual(sample_0["id"], 0)
+        self.assertEqual(sample_9["id"], 9)
+        self.assertEqual(sample_0["source"][0], en_bos)
+        self.assertEqual(sample_9["source"][0], en_bos)
+        # TODO: could we test the target side ?
+
+    def assertFnMatch(self, fn, values):
+        for x, y in values.items():
+            fn_x = fn(x)
+            self.assertEqual(fn_x, y, f"Fn has wrong value: fn({x}) = {fn_x} != {y}")
+
+    def test_piecewise_linear_fn(self):
+        self.assertFnMatch(
+            obt.PiecewiseLinearFn.from_string("1.0"), {0: 1, 100: 1, 500: 1, 1000: 1}
+        )
+        self.assertFnMatch(
+            obt.PiecewiseLinearFn.from_string("0:1,1000:0"),
+            {0: 1, 500: 0.5, 1000: 0, 2000: 0},
+        )
+        self.assertFnMatch(
+            obt.PiecewiseLinearFn.from_string("0:0,1000:1"),
+            {0: 0, 500: 0.5, 1000: 1, 2000: 1},
+        )
+        self.assertFnMatch(
+            obt.PiecewiseLinearFn.from_string("0:0,1000:1,2000:0"),
+            {0: 0, 500: 0.5, 1000: 1, 1500: 0.5, 2000: 0, 3000: 0},
+        )
diff --git a/fairseq/tests/test_plasma_utils.py b/fairseq/tests/test_plasma_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6344c2a5a73fcb2fb81376e7bd43470963b3674
--- /dev/null
+++ b/fairseq/tests/test_plasma_utils.py
@@ -0,0 +1,126 @@
+import contextlib
+import unittest
+import tempfile
+from io import StringIO
+
+import numpy as np
+
+from tests.utils import create_dummy_data, preprocess_lm_data, train_language_model
+
+try:
+    from pyarrow import plasma
+    from fairseq.data.plasma_utils import PlasmaView, PlasmaStore
+
+    PYARROW_AVAILABLE = True
+except ImportError:
+    PYARROW_AVAILABLE = False
+
+dummy_path = "dummy"
+
+
+@unittest.skipUnless(PYARROW_AVAILABLE, "")
+class TestPlasmaView(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp_file = tempfile.NamedTemporaryFile()  # noqa: P201
+        self.path = self.tmp_file.name
+        self.server = PlasmaStore.start(path=self.path, nbytes=10000)
+        self.client = plasma.connect(self.path, num_retries=10)
+
+    def tearDown(self) -> None:
+        self.client.disconnect()
+        self.tmp_file.close()
+        self.server.kill()
+
+    def test_two_servers_do_not_share_object_id_space(self):
+        data_server_1 = np.array([0, 1])
+        data_server_2 = np.array([2, 3])
+        server_2_path = self.path
+        with tempfile.NamedTemporaryFile() as server_1_path:
+            server = PlasmaStore.start(path=server_1_path.name, nbytes=10000)
+            arr1 = PlasmaView(
+                data_server_1, dummy_path, 1, plasma_path=server_1_path.name
+            )
+            assert len(arr1.client.list()) == 1
+            assert (arr1.array == data_server_1).all()
+            arr2 = PlasmaView(data_server_2, dummy_path, 1, plasma_path=server_2_path)
+            assert (arr2.array == data_server_2).all()
+            assert (arr1.array == data_server_1).all()
+            server.kill()
+
+    def test_hash_collision(self):
+        data_server_1 = np.array([0, 1])
+        data_server_2 = np.array([2, 3])
+        arr1 = PlasmaView(data_server_1, dummy_path, 1, plasma_path=self.path)
+        assert len(arr1.client.list()) == 1
+        arr2 = PlasmaView(data_server_2, dummy_path, 1, plasma_path=self.path)
+        assert len(arr1.client.list()) == 1
+        assert len(arr2.client.list()) == 1
+        assert (arr2.array == data_server_1).all()
+        # New hash key based on tuples
+        arr3 = PlasmaView(
+            data_server_2, dummy_path, (1, 12312312312, None), plasma_path=self.path
+        )
+        assert (
+            len(arr2.client.list()) == 2
+        ), "No new object was created by using a novel hash key"
+        assert (
+            arr3.object_id in arr2.client.list()
+        ), "No new object was created by using a novel hash key"
+        assert (
+            arr3.object_id in arr3.client.list()
+        ), "No new object was created by using a novel hash key"
+        del arr3, arr2, arr1
+
+    @staticmethod
+    def _assert_view_equal(pv1, pv2):
+        np.testing.assert_array_equal(pv1.array, pv2.array)
+
+    def test_putting_same_array_twice(self):
+        data = np.array([4, 4, 4])
+        arr1 = PlasmaView(data, dummy_path, 1, plasma_path=self.path)
+        assert len(self.client.list()) == 1
+        arr1b = PlasmaView(
+            data, dummy_path, 1, plasma_path=self.path
+        )  # should not change contents of store
+        arr1c = PlasmaView(
+            None, dummy_path, 1, plasma_path=self.path
+        )  # should not change contents of store
+
+        assert len(self.client.list()) == 1
+        self._assert_view_equal(arr1, arr1b)
+        self._assert_view_equal(arr1, arr1c)
+        PlasmaView(
+            data, dummy_path, 2, plasma_path=self.path
+        )  # new object id, adds new entry
+        assert len(self.client.list()) == 2
+
+        new_client = plasma.connect(self.path)
+        assert len(new_client.list()) == 2  # new client can access same objects
+        assert isinstance(arr1.object_id, plasma.ObjectID)
+        del arr1b
+        del arr1c
+
+    def test_plasma_store_full_raises(self):
+        with tempfile.NamedTemporaryFile() as new_path:
+            server = PlasmaStore.start(path=new_path.name, nbytes=10000)
+            with self.assertRaises(plasma.PlasmaStoreFull):
+                # 2000 floats is more than 2000 bytes
+                PlasmaView(
+                    np.random.rand(10000, 1), dummy_path, 1, plasma_path=new_path.name
+                )
+            server.kill()
+
+    def test_object_id_overflow(self):
+        PlasmaView.get_object_id("", 2 ** 21)
+
+    def test_training_lm_plasma(self):
+        with contextlib.redirect_stdout(StringIO()):
+            with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir:
+                create_dummy_data(data_dir)
+                preprocess_lm_data(data_dir)
+                train_language_model(
+                    data_dir,
+                    "transformer_lm",
+                    ["--use-plasma-view", "--plasma-path", self.path],
+                    run_validation=True,
+                )
diff --git a/fairseq/tests/test_reproducibility.py b/fairseq/tests/test_reproducibility.py
new file mode 100644
index 0000000000000000000000000000000000000000..94931b2a0721c4adfee8899c89cac24f45973d17
--- /dev/null
+++ b/fairseq/tests/test_reproducibility.py
@@ -0,0 +1,150 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import json
+import os
+import tempfile
+import unittest
+from io import StringIO
+
+import torch
+
+from . import test_binaries
+
+
+class TestReproducibility(unittest.TestCase):
+    def _test_reproducibility(
+        self,
+        name,
+        extra_flags=None,
+        delta=0.0001,
+        resume_checkpoint="checkpoint1.pt",
+        max_epoch=3,
+    ):
+        def get_last_log_stats_containing_string(log_records, search_string):
+            for log_record in logs.records[::-1]:
+                if isinstance(log_record.msg, str) and search_string in log_record.msg:
+                    return json.loads(log_record.msg)
+
+        if extra_flags is None:
+            extra_flags = []
+
+        with tempfile.TemporaryDirectory(name) as data_dir:
+            with self.assertLogs() as logs:
+                test_binaries.create_dummy_data(data_dir)
+                test_binaries.preprocess_translation_data(data_dir)
+
+            # train epochs 1 and 2 together
+            with self.assertLogs() as logs:
+                test_binaries.train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    [
+                        "--dropout",
+                        "0.0",
+                        "--log-format",
+                        "json",
+                        "--log-interval",
+                        "1",
+                        "--max-epoch",
+                        str(max_epoch),
+                    ]
+                    + extra_flags,
+                )
+            train_log = get_last_log_stats_containing_string(logs.records, "train_loss")
+            valid_log = get_last_log_stats_containing_string(logs.records, "valid_loss")
+
+            # train epoch 2, resuming from previous checkpoint 1
+            os.rename(
+                os.path.join(data_dir, resume_checkpoint),
+                os.path.join(data_dir, "checkpoint_last.pt"),
+            )
+            with self.assertLogs() as logs:
+                test_binaries.train_translation_model(
+                    data_dir,
+                    "fconv_iwslt_de_en",
+                    [
+                        "--dropout",
+                        "0.0",
+                        "--log-format",
+                        "json",
+                        "--log-interval",
+                        "1",
+                        "--max-epoch",
+                        str(max_epoch),
+                    ]
+                    + extra_flags,
+                )
+            train_res_log = get_last_log_stats_containing_string(
+                logs.records, "train_loss"
+            )
+            valid_res_log = get_last_log_stats_containing_string(
+                logs.records, "valid_loss"
+            )
+
+            for k in ["train_loss", "train_ppl", "train_num_updates", "train_gnorm"]:
+                self.assertAlmostEqual(
+                    float(train_log[k]), float(train_res_log[k]), delta=delta
+                )
+            for k in [
+                "valid_loss",
+                "valid_ppl",
+                "valid_num_updates",
+                "valid_best_loss",
+            ]:
+                self.assertAlmostEqual(
+                    float(valid_log[k]), float(valid_res_log[k]), delta=delta
+                )
+
+    def test_reproducibility(self):
+        self._test_reproducibility("test_reproducibility")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_reproducibility_fp16(self):
+        self._test_reproducibility(
+            "test_reproducibility_fp16",
+            [
+                "--fp16",
+                "--fp16-init-scale",
+                "4096",
+            ],
+            delta=0.011,
+        )
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_reproducibility_memory_efficient_fp16(self):
+        self._test_reproducibility(
+            "test_reproducibility_memory_efficient_fp16",
+            [
+                "--memory-efficient-fp16",
+                "--fp16-init-scale",
+                "4096",
+            ],
+        )
+
+    @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
+    def test_reproducibility_amp(self):
+        self._test_reproducibility(
+            "test_reproducibility_amp",
+            [
+                "--amp",
+                "--fp16-init-scale",
+                "4096",
+            ],
+            delta=0.011,
+        )
+
+    def test_mid_epoch_reproducibility(self):
+        self._test_reproducibility(
+            "test_mid_epoch_reproducibility",
+            ["--save-interval-updates", "3"],
+            resume_checkpoint="checkpoint_1_3.pt",
+            max_epoch=1,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_resampling_dataset.py b/fairseq/tests/test_resampling_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccb53a253ce6ca0d8e972adfa708144b4299b3cb
--- /dev/null
+++ b/fairseq/tests/test_resampling_dataset.py
@@ -0,0 +1,103 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import collections
+import unittest
+
+import numpy as np
+from fairseq.data import ListDataset, ResamplingDataset
+
+
+class TestResamplingDataset(unittest.TestCase):
+    def setUp(self):
+        self.strings = ["ab", "c", "def", "ghij"]
+        self.weights = [4.0, 2.0, 7.0, 1.5]
+        self.size_ratio = 2
+        self.dataset = ListDataset(
+            self.strings, np.array([len(s) for s in self.strings])
+        )
+
+    def _test_common(self, resampling_dataset, iters):
+        assert len(self.dataset) == len(self.strings) == len(self.weights)
+        assert len(resampling_dataset) == self.size_ratio * len(self.strings)
+
+        results = {"ordered_by_size": True, "max_distribution_diff": 0.0}
+
+        totalfreqs = 0
+        freqs = collections.defaultdict(int)
+
+        for epoch_num in range(iters):
+            resampling_dataset.set_epoch(epoch_num)
+
+            indices = resampling_dataset.ordered_indices()
+            assert len(indices) == len(resampling_dataset)
+
+            prev_size = -1
+
+            for i in indices:
+                cur_size = resampling_dataset.size(i)
+                # Make sure indices map to same sequences within an epoch
+                assert resampling_dataset[i] == resampling_dataset[i]
+
+                # Make sure length of sequence is correct
+                assert cur_size == len(resampling_dataset[i])
+
+                freqs[resampling_dataset[i]] += 1
+                totalfreqs += 1
+
+                if prev_size > cur_size:
+                    results["ordered_by_size"] = False
+
+                prev_size = cur_size
+
+        assert set(freqs.keys()) == set(self.strings)
+        for s, weight in zip(self.strings, self.weights):
+            freq = freqs[s] / totalfreqs
+            expected_freq = weight / sum(self.weights)
+            results["max_distribution_diff"] = max(
+                results["max_distribution_diff"], abs(expected_freq - freq)
+            )
+
+        return results
+
+    def test_resampling_dataset_batch_by_size_false(self):
+        resampling_dataset = ResamplingDataset(
+            self.dataset,
+            self.weights,
+            size_ratio=self.size_ratio,
+            batch_by_size=False,
+            seed=0,
+        )
+
+        results = self._test_common(resampling_dataset, iters=1000)
+
+        # For batch_by_size = False, the batches should be returned in
+        # arbitrary order of size.
+        assert not results["ordered_by_size"]
+
+        # Allow tolerance in distribution error of 2%.
+        assert results["max_distribution_diff"] < 0.02
+
+    def test_resampling_dataset_batch_by_size_true(self):
+        resampling_dataset = ResamplingDataset(
+            self.dataset,
+            self.weights,
+            size_ratio=self.size_ratio,
+            batch_by_size=True,
+            seed=0,
+        )
+
+        results = self._test_common(resampling_dataset, iters=1000)
+
+        # For batch_by_size = True, the batches should be returned in
+        # increasing order of size.
+        assert results["ordered_by_size"]
+
+        # Allow tolerance in distribution error of 2%.
+        assert results["max_distribution_diff"] < 0.02
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_roberta.py b/fairseq/tests/test_roberta.py
new file mode 100644
index 0000000000000000000000000000000000000000..910305f1a964a7ca46c6b900b304581a69a5f3c6
--- /dev/null
+++ b/fairseq/tests/test_roberta.py
@@ -0,0 +1,314 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import functools
+import unittest
+from typing import Any, Dict, Sequence
+
+import fairseq
+import fairseq.options
+import fairseq.tasks
+import torch
+from tests.utils import dummy_dictionary
+
+VOCAB_SIZE = 100
+
+
+@fairseq.tasks.register_task("fake_task")
+class FakeTask(fairseq.tasks.LegacyFairseqTask):
+    def __init__(self, args):
+        super().__init__(args)
+        self.dictionary = dummy_dictionary(VOCAB_SIZE - 4)
+        assert len(self.dictionary) == VOCAB_SIZE
+
+    @property
+    def source_dictionary(self):
+        return self.dictionary
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+@functools.lru_cache()
+def get_toy_model(
+    device: str,
+    architecture: str = "roberta_enc_dec",
+    **extra_args: Any,
+):
+    assert device in ("gpu", "cpu")
+    kwargs = {
+        "arch": architecture,
+        # Use characteristics dimensions
+        "encoder_layers": 3,
+        "encoder_embed_dim": 12,
+        "encoder_ffn_embed_dim": 14,
+        "encoder_attention_heads": 4,
+        "decoder_layers": 3,
+        "decoder_embed_dim": 12,
+        "decoder_ffn_embed_dim": 14,
+        "decoder_attention_heads": 4,
+        # Disable dropout so we have comparable tests.
+        "dropout": 0,
+        "attention_dropout": 0,
+        "activation_dropout": 0,
+        "encoder_layerdrop": 0,
+        # required args
+        "tokens_per_sample": 256,
+        "data": "/tmp/test_roberta",
+    }
+    kwargs.update(extra_args)
+    fake_task = FakeTask(kwargs)
+    args = fairseq.options.get_args(
+        task="online_backtranslation",
+        mono_langs="en,ro",
+        valid_lang_pairs="en-ro",
+        **kwargs,
+    )
+    torch.manual_seed(0)
+    model = fake_task.build_model(args)
+    if device == "gpu":
+        model.cuda()
+    return fake_task, model
+
+
+def mk_sample(
+    lang: str, device: str, tok: Sequence[int] = None, batch_size: int = 2
+) -> Dict[str, Any]:
+    assert device in ("gpu", "cpu")
+    if not tok:
+        if lang == "en":
+            tok = [10, 11, 12, 13, 14, 15, 2]
+        else:
+            tok = [20, 21, 22, 23, 24, 25, 26, 27, 2]
+
+    batch = torch.stack([torch.tensor(tok, dtype=torch.long)] * batch_size)
+    if device == "gpu":
+        batch = batch.cuda()
+    sample = {
+        "net_input": {
+            "src_tokens": batch,
+            "prev_output_tokens": batch,
+            "src_lengths": torch.tensor(
+                [len(tok)] * batch_size, dtype=torch.long, device=batch.device
+            ),
+        },
+        "target": batch[:, 1:],
+    }
+    return sample
+
+
+def cpu_gpu(fn):
+    def helper(self):
+        fn(self, "cpu")
+        if torch.cuda.is_available():
+            fn(self, "gpu")
+
+    return helper
+
+
+def architectures(fn):
+    def helper(self):
+        for arch in ["roberta_enc_dec", "transformer"]:
+            fn(self, arch)
+
+    return helper
+
+
+class RobertaTest(unittest.TestCase):
+    def assertTensorEqual(self, t1, t2, delta: float = 1e-6):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        if delta == 0.0:
+            self.assertEqual(t1.ne(t2).long().sum(), 0)
+        else:
+            self.assertEqual(((t2 - t1).abs() > delta).long().sum(), 0)
+
+    def assertSharing(self, model, link_groups: Sequence[Sequence[str]]):
+        ids = {}
+        for group in link_groups:
+            group_ids = {name: id(params(model, name)) for name in group}
+            shared_id = group_ids[group[0]]
+            self.assertEqual(group_ids, {name: shared_id for name in group})
+            self.assertNotIn(shared_id, ids)
+            ids[shared_id] = group
+
+    def test_roberta_shared_params(self):
+        _, roberta = get_toy_model("cpu", architecture="roberta")
+        self.assertSharing(
+            roberta,
+            [
+                [
+                    "encoder.sentence_encoder.embed_tokens.weight",
+                    "encoder.lm_head.weight",
+                ]
+            ],
+        )
+
+        _, roberta = get_toy_model(
+            "cpu", architecture="roberta", untie_weights_roberta=True
+        )
+        self.assertSharing(
+            roberta,
+            [
+                ["encoder.sentence_encoder.embed_tokens.weight"],
+                ["encoder.lm_head.weight"],
+            ],
+        )
+
+    def test_roberta_enc_dec_shared_params(self):
+        # 3 distinct embeddings
+        _, enc_dec = get_toy_model("cpu", architecture="roberta_enc_dec")
+        self.assertSharing(
+            enc_dec,
+            [
+                ["encoder.embed_tokens.weight"],
+                ["decoder.embed_tokens.weight"],
+                ["decoder.reg_head.weight"],
+            ],
+        )
+
+        # 2 distinct embeddings, one for encoder, one for decoder
+        _, enc_dec = get_toy_model(
+            "cpu", architecture="roberta_enc_dec", share_decoder_input_output_embed=True
+        )
+        self.assertSharing(
+            enc_dec,
+            [
+                ["encoder.embed_tokens.weight"],
+                [
+                    "decoder.embed_tokens.weight",
+                    "decoder.reg_head.weight",
+                ],
+            ],
+        )
+
+        # shared embeddings
+        _, enc_dec = get_toy_model(
+            "cpu", architecture="roberta_enc_dec", share_all_embeddings=True
+        )
+        self.assertSharing(
+            enc_dec,
+            [
+                [
+                    "encoder.embed_tokens.weight",
+                    "decoder.embed_tokens.weight",
+                    "decoder.reg_head.weight",
+                ]
+            ],
+        )
+
+    def test_roberta_max_positions_is_correctly_set(self):
+        device = "cpu"
+        task, model = get_toy_model(device)
+        max_pos = model.max_decoder_positions()
+        self.assertEqual(max_pos, 256)
+        self.assertEqual(max_pos, model.decoder.max_positions())
+        self.assertEqual(max_pos, model.encoder.max_positions())
+        self.assertEqual(max_pos, model.encoder.embed_positions.max_positions)
+
+        sentence = [31 for _ in range(max_pos)]
+        sample = mk_sample("en", device, sentence, batch_size=1)
+        self.assertEqual(list(sample["net_input"]["src_lengths"]), [max_pos])
+        self.assertEqual(len(sample["net_input"]["src_tokens"][0]), max_pos)
+        x, _ = model.forward(**sample["net_input"])
+        self.assertEqual(x.shape, (1, max_pos, VOCAB_SIZE))
+
+    @cpu_gpu
+    def test_roberta_forward_backward(self, device: str):
+        _, model = get_toy_model(device)
+        sample = mk_sample("en", device)
+        en_tokens = sample["net_input"]["src_tokens"]
+        (bs, l) = en_tokens.shape
+        # Forward
+        logits, _ = model(**sample["net_input"])
+        self.assertEqual(logits.shape, (bs, l, VOCAB_SIZE))
+
+        # Backward
+        loss = logits.sum()
+        loss.backward()
+
+    @cpu_gpu
+    def test_roberta_forward_backward_bs1(self, device: str):
+        _, model = get_toy_model(device)
+        sample = mk_sample("en", device, batch_size=1)
+        o, _ = model.forward(**sample["net_input"])
+        loss = o.sum()
+        sample2 = mk_sample("ro", device, batch_size=1)
+        o, _ = model.forward(**sample2["net_input"])
+        loss += o.sum()
+        loss.backward()
+
+    @cpu_gpu
+    def test_roberta_batching(self, device: str):
+        """
+        Checks that the batch of size 2 give twice the same results than the batch of size 1.
+        """
+        _, model = get_toy_model(device)
+        sample = mk_sample("en", device, batch_size=1)
+        slen = sample["net_input"]["src_lengths"][0]
+        sample2 = mk_sample("en", device, batch_size=2)
+        with torch.no_grad():
+            z = model.encoder.forward(
+                sample["net_input"]["src_tokens"], sample["net_input"]["src_lengths"]
+            )
+            z = z["encoder_out"][-1]
+            logits, _ = model.forward(**sample["net_input"])
+
+            z2 = model.encoder.forward(
+                sample2["net_input"]["src_tokens"], sample["net_input"]["src_lengths"]
+            )
+            z2 = z2["encoder_out"][-1]
+            logits2, _ = model.forward(**sample2["net_input"])
+
+        self.assertEqual(z.shape, (slen, 1, 12))
+        self.assertEqual(z2.shape, (slen, 2, 12))
+        self.assertTensorEqual(logits2[0], logits2[1])
+        self.assertTensorEqual(logits[0], logits2[0])
+
+    @cpu_gpu
+    def test_roberta_incremental_decoder(self, device: str):
+        """
+        Checks that incremental decoding yields the same result than non incremental one.
+        """
+        task, model = get_toy_model(device)
+
+        en_sample = mk_sample("en", device)
+        en_tokens = en_sample["net_input"]["src_tokens"]
+        ro_sample = mk_sample("ro", device)
+        ro_tokens = ro_sample["net_input"]["src_tokens"]
+
+        en_enc = model.encoder.forward(
+            en_tokens, src_lengths=en_sample["net_input"]["src_lengths"]
+        )
+        (bs, tgt_len) = ro_tokens.shape
+
+        # Decode without incremental state
+        ro_dec, _ = model.decoder.forward(ro_tokens, encoder_out=en_enc)
+        self.assertEqual(ro_dec.shape, (bs, tgt_len, VOCAB_SIZE))
+        self.assertTensorEqual(ro_dec[0], ro_dec[1])
+
+        # Decode with incremental state
+        inc_state = {}
+        ro_dec_inc = []
+        for l in range(tgt_len):
+            ro, _ = model.decoder.forward(
+                ro_tokens[:, : l + 1], encoder_out=en_enc, incremental_state=inc_state
+            )
+            self.assertEqual(ro.shape, (bs, 1, VOCAB_SIZE))
+            ro_dec_inc.append(ro)
+
+        for l in range(tgt_len):
+            # Intra-batch
+            self.assertTensorEqual(ro_dec_inc[l][0], ro_dec_inc[l][1])
+            # Incremental vs non-incremental
+            self.assertTensorEqual(ro_dec_inc[l][:, 0], ro_dec[:, l])
+
+
+def params(model, name):
+    if "." not in name:
+        return getattr(model, name)
+
+    prefix, name = name.split(".", 1)
+    return params(getattr(model, prefix), name)
diff --git a/fairseq/tests/test_sequence_generator.py b/fairseq/tests/test_sequence_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..9273191962089816edffaa5d0c9c90cb0c3f3c1a
--- /dev/null
+++ b/fairseq/tests/test_sequence_generator.py
@@ -0,0 +1,799 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import tempfile
+import unittest
+import math
+import numpy as np
+
+
+import tests.utils as test_utils
+import torch
+from fairseq import search
+from fairseq.data.dictionary import Dictionary
+from fairseq.models.transformer import TransformerModel
+from fairseq.sequence_generator import EnsembleModel, SequenceGenerator
+from fairseq.ngram_repeat_block import NGramRepeatBlock
+from fairseq.tasks.fairseq_task import LegacyFairseqTask
+
+
+DEFAULT_TEST_VOCAB_SIZE = 100
+
+
+class DummyTask(LegacyFairseqTask):
+    def __init__(self, args):
+        super().__init__(args)
+        self.dictionary = get_dummy_dictionary()
+        if getattr(self.args, "ctc", False):
+            self.dictionary.add_symbol("<ctc_blank>")
+        self.src_dict = self.dictionary
+        self.tgt_dict = self.dictionary
+
+    @property
+    def source_dictionary(self):
+        return self.src_dict
+
+    @property
+    def target_dictionary(self):
+        return self.dictionary
+
+
+def get_dummy_dictionary(vocab_size=DEFAULT_TEST_VOCAB_SIZE):
+    dummy_dict = Dictionary()
+    # add dummy symbol to satisfy vocab size
+    for id, _ in enumerate(range(vocab_size)):
+        dummy_dict.add_symbol("{}".format(id), n=1000)
+    return dummy_dict
+
+
+def get_dummy_task_and_parser():
+    """
+    to build a fariseq model, we need some dummy parse and task. This function
+    is used to create dummy task and parser to faciliate model/criterion test
+
+    Note: we use FbSpeechRecognitionTask as the dummy task. You may want
+    to use other task by providing another function
+    """
+    parser = argparse.ArgumentParser(
+        description="test_dummy_s2s_task", argument_default=argparse.SUPPRESS
+    )
+    DummyTask.add_args(parser)
+    args = parser.parse_args([])
+    task = DummyTask.setup_task(args)
+    return task, parser
+
+
+class TestJitSequenceGeneratorBase(unittest.TestCase):
+    def setUp(self):
+        self.task, self.parser = get_dummy_task_and_parser()
+        eos = self.task.tgt_dict.eos()
+        src_tokens = torch.randint(3, 50, (2, 10)).long()
+        src_tokens = torch.cat((src_tokens, torch.LongTensor([[eos], [eos]])), -1)
+        src_lengths = torch.LongTensor([2, 10])
+        self.sample = {
+            "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths}
+        }
+        TransformerModel.add_args(self.parser)
+        args = self.parser.parse_args([])
+        args.encoder_layers = 2
+        args.decoder_layers = 1
+        self.transformer_model = TransformerModel.build_model(args, self.task)
+
+    def assertOutputEqual(self, hypo, pos_probs):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        self.assertTensorSizeEqual(hypo["positional_scores"], pos_scores)
+        self.assertTensorSizeEqual(pos_scores.numel(), hypo["tokens"].numel())
+
+    def assertTensorSizeEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+    def assertHypoEqual(self, h1, h2):
+        "Check two hypos are equal"
+        self.assertTensorEqual(h1["tokens"], h2["tokens"])
+        self.assertAlmostEqual(h1["positional_scores"], h2["positional_scores"])
+        self.assertLess(abs(h1["score"] - h2["score"]), 1e-6)
+        self.assertAlmostEqual(h1["attention"], h2["attention"])
+
+    def _test_save_and_load(self, scripted_module):
+        with tempfile.NamedTemporaryFile() as f:
+            scripted_module.save(f.name)
+            torch.jit.load(f.name)
+
+
+JIT_MSG = "Targeting OSS scriptability for the 1.6 release"
+
+
+@unittest.skipIf(torch.__version__ < "1.6.0", JIT_MSG)
+class TestJitSequenceGenerator(TestJitSequenceGeneratorBase):
+    def test_export_transformer(self):
+        model = self.transformer_model
+        torch.jit.script(model)
+
+    def test_ensemble_sequence_generator(self):
+        model = self.transformer_model
+        generator = SequenceGenerator(
+            [model],
+            self.task.tgt_dict,
+            beam_size=2,
+            no_repeat_ngram_size=2,
+            max_len_b=10,
+        )
+        scripted_model = torch.jit.script(generator)
+        self._test_save_and_load(scripted_model)
+
+    def test_export_ensemble_model(self):
+        model = self.transformer_model
+        ensemble_models = EnsembleModel([model])
+        torch.jit.script(ensemble_models)
+
+
+class TestExportSearch(unittest.TestCase):
+    def setUp(self):
+        task, _ = get_dummy_task_and_parser()
+        self.tgt_dict = task.tgt_dict
+        self.min_top1_prob = 0.4
+
+    def test_export_diverse_bs(self):
+        search_strategy = search.DiverseBeamSearch(
+            self.tgt_dict, num_groups=2, diversity_strength=0.0
+        )
+        torch.jit.script(search_strategy)
+
+    def test_export_sampling(self):
+        low_sampling_topp = self.min_top1_prob / 2.0
+        search_strategy = search.Sampling(
+            self.tgt_dict, sampling_topp=low_sampling_topp
+        )
+        torch.jit.script(search_strategy)
+
+    def test_export_diverse_siblings_search(self):
+        search_strategy = search.DiverseSiblingsSearch(
+            self.tgt_dict, diversity_rate=0.5
+        )
+        torch.jit.script(search_strategy)
+
+
+class TestSequenceGeneratorBase(unittest.TestCase):
+    def assertHypoTokens(self, hypo, tokens):
+        self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens))
+
+    def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
+        self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
+        score = pos_scores.sum()
+        if normalized:
+            score /= pos_scores.numel() ** lenpen
+        self.assertLess(abs(score - hypo["score"]), 1e-6)
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+class TestSequenceGenerator(TestSequenceGeneratorBase):
+    def setUp(self):
+        (
+            self.tgt_dict,
+            self.w1,
+            self.w2,
+            src_tokens,
+            src_lengths,
+            self.model,
+        ) = test_utils.sequence_generator_setup()
+        self.sample = {
+            "net_input": {"src_tokens": src_tokens, "src_lengths": src_lengths}
+        }
+
+    def test_with_normalization(self):
+        generator = SequenceGenerator([self.model], self.tgt_dict, beam_size=2)
+        hypos = generator.forward(self.sample)
+        eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 1.0])
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
+        self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0])
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0])
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6])
+
+    def test_without_normalization(self):
+        # Sentence 1: unchanged from the normalized case
+        # Sentence 2: beams swap order
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, normalize_scores=False
+        )
+        hypos = generator.forward(self.sample)
+        eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 1.0], normalized=False)
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
+        self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0], normalized=False)
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6], normalized=False)
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos])
+        self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0], normalized=False)
+
+    def test_with_lenpen_favoring_short_hypos(self):
+        lenpen = 0.6
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, len_penalty=lenpen
+        )
+        hypos = generator.forward(self.sample)
+        eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 1.0], lenpen=lenpen)
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w2, w1, w2, eos])
+        self.assertHypoScore(hypos[0][1], [0.1, 0.9, 0.9, 1.0], lenpen=lenpen)
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6], lenpen=lenpen)
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w2, w1, eos])
+        self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.4, 1.0], lenpen=lenpen)
+
+    def test_with_lenpen_favoring_long_hypos(self):
+        lenpen = 5.0
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, len_penalty=lenpen
+        )
+        hypos = generator.forward(self.sample)
+        eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w2, w1, w2, eos])
+        self.assertHypoScore(hypos[0][0], [0.1, 0.9, 0.9, 1.0], lenpen=lenpen)
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w1, eos])
+        self.assertHypoScore(hypos[0][1], [0.9, 1.0], lenpen=lenpen)
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, w1, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.4, 1.0], lenpen=lenpen)
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.6], lenpen=lenpen)
+
+    def test_maxlen(self):
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, max_len_b=2
+        )
+        hypos = generator.forward(self.sample)
+        eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 1.0])
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w2, w2, eos])
+        self.assertHypoScore(hypos[0][1], [0.1, 0.1, 0.6])
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.6])
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w2, w2, eos])
+        self.assertHypoScore(hypos[1][1], [0.3, 0.9, 0.01])
+
+    def test_encoder_with_different_output_len(self):
+        args = self.model.encoder.args
+        task = test_utils.TestTranslationTask.setup_task(
+            args, self.tgt_dict, self.tgt_dict
+        )
+        reshaping_model = test_utils.TestReshapingModel.build_model(args, task)
+        generator = SequenceGenerator(
+            [reshaping_model], self.tgt_dict, beam_size=2, max_len_b=2
+        )
+        hypos = generator.forward(self.sample)
+        for sent in [0, 1]:
+            for beam in [0, 1]:
+                assert hypos[sent][beam]["attention"] is not None
+
+    def test_generation_with_additional_input(self):
+        args = self.model.encoder.args
+        task = test_utils.TestTranslationTask.setup_task(
+            args, self.tgt_dict, self.tgt_dict
+        )
+        add_input_model = test_utils.TestAdditionalInputModel.build_model(args, task)
+        generator = SequenceGenerator([add_input_model], self.tgt_dict, beam_size=2)
+        sample = self.sample.copy()
+        sample["net_input"]["fancy_other_input"] = sample["net_input"]["src_tokens"]
+        hypos = generator.forward(self.sample)
+        eos, w1, w2 = self.tgt_dict.eos(), self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 1.0])
+
+
+@unittest.skipUnless(torch.cuda.is_available(), "")
+class TestRepeatNgramBlocking(TestSequenceGeneratorBase):
+    @classmethod
+    def setUpClass(cls):
+        (
+            cls.tgt_dict,
+            cls.w1,
+            cls.w2,
+            src_tokens,
+            src_lengths,
+            cls.model,
+        ) = test_utils.sequence_generator_setup()
+        return cls
+
+    def test_finds_repetitive_tokens(self):
+        bsz, vocab_size, beam_size, step = 2, 4, 1, 3
+        generated_tok = torch.tensor(
+            [[2, 2, 2, 2], [3, 3, 3, 3]], dtype=torch.long, device="cuda"
+        )
+        lprobs = torch.zeros((beam_size * bsz, vocab_size), device="cuda")
+        desired_result = lprobs.new_tensor(
+            [[0.0, 0.0, -math.inf, 0.0], [0.0, 0.0, 0.0, -math.inf]]
+        )
+
+        cuda_ext_result, baseline_result = self._compare_cuda_ext_to_default_implem(
+            bsz, beam_size, generated_tok, lprobs, step, 2
+        )
+        self.assertTensorEqual(cuda_ext_result, desired_result)
+        self.assertTensorEqual(baseline_result, desired_result)
+
+    @unittest.skipIf(torch.__version__ < "1.6.0", JIT_MSG)
+    def test_jit_no_extension(self):
+        bsz, vocab_size, beam_size, step = 2, 4, 1, 3
+        generated_tok = torch.tensor(
+            [[2, 2, 2, 2], [3, 3, 3, 3]], dtype=torch.long, device="cuda"
+        )
+        lprobs = torch.zeros((beam_size * bsz, vocab_size), device="cuda")
+        blocker = NGramRepeatBlock(2, use_extension=False)
+        base_result = blocker(generated_tok, lprobs.clone(), bsz, beam_size, step)
+        scripted_blocker = torch.jit.script(blocker)
+        jit_result = scripted_blocker(
+            generated_tok, lprobs.clone(), bsz, beam_size, step
+        )
+        self.assertTensorEqual(base_result, jit_result)
+
+    def test_ngram_blocking_same_as_default_implem(self):
+        """Test that cuda extension returns same things as default impl in many settings."""
+        vocab_size = 4
+        step = 6
+        for _ in range(2):
+            block_param = np.random.choice([1, 2, 3, 4])
+            batch_size = np.random.randint(1, 8)
+            beam_size = np.random.choice([1, 2, 4, 8])
+            lprobs = torch.zeros((beam_size * batch_size, vocab_size), device="cuda")
+
+            generated_tok = torch.tensor(
+                np.random.randint(
+                    0, vocab_size, size=(batch_size * beam_size, step + 1)
+                ),
+                device="cuda",
+                dtype=torch.long,
+            )
+            self._compare_cuda_ext_to_default_implem(
+                batch_size,
+                beam_size,
+                generated_tok,
+                lprobs,
+                step,
+                block_param,
+            )
+
+    def _compare_cuda_ext_to_default_implem(
+        self, bsz, beam_size, generated_tok, lprobs, step, block_param
+    ):
+        """Assert that cuda extension and default implem return the same thing."""
+        blocker = NGramRepeatBlock(block_param)
+        assert blocker.use_extension, "Extension not compiled"
+        cuda_ext_result = blocker(
+            generated_tok,
+            lprobs.clone(),
+            bsz,
+            beam_size,
+            step,
+        )
+        blocker.use_extension = False
+        baseline_result = blocker(
+            generated_tok,
+            lprobs.clone(),
+            bsz,
+            beam_size,
+            step,
+        )
+        self.assertTensorEqual(cuda_ext_result, baseline_result)
+        blocker.use_extension = True
+        return cuda_ext_result, baseline_result
+
+
+class TestDiverseBeamSearch(TestSequenceGeneratorBase):
+    def setUp(self):
+        # construct dummy dictionary
+        d = test_utils.dummy_dictionary(vocab_size=2)
+        self.assertEqual(d.pad(), 1)
+        self.assertEqual(d.eos(), 2)
+        self.assertEqual(d.unk(), 3)
+        self.eos = d.eos()
+        self.w1 = 4
+        self.w2 = 5
+
+        # construct source data
+        self.src_tokens = torch.LongTensor(
+            [
+                [self.w1, self.w2, self.eos],
+                [self.w1, self.w2, self.eos],
+            ]
+        )
+        self.src_lengths = torch.LongTensor([2, 2])
+
+        args = argparse.Namespace()
+        unk = 0.0
+        args.beam_probs = [
+            # step 0:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    # sentence 1:
+                    [0.0, unk, 0.9, 0.1],  # beam 1
+                    [0.0, unk, 0.9, 0.1],  # beam 2
+                    # sentence 2:
+                    [0.0, unk, 0.7, 0.3],
+                    [0.0, unk, 0.7, 0.3],
+                ]
+            ),
+            # step 1:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    # sentence 1:
+                    [0.0, unk, 0.6, 0.4],
+                    [0.0, unk, 0.6, 0.4],
+                    # sentence 2:
+                    [0.25, unk, 0.35, 0.4],
+                    [0.25, unk, 0.35, 0.4],
+                ]
+            ),
+            # step 2:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    # sentence 1:
+                    [1.0, unk, 0.0, 0.0],
+                    [1.0, unk, 0.0, 0.0],
+                    # sentence 2:
+                    [0.9, unk, 0.1, 0.0],
+                    [0.9, unk, 0.1, 0.0],
+                ]
+            ),
+        ]
+
+        task = test_utils.TestTranslationTask.setup_task(args, d, d)
+        self.model = task.build_model(args)
+        self.tgt_dict = task.target_dictionary
+
+    def test_diverse_beam_search(self):
+        search_strategy = search.DiverseBeamSearch(
+            self.tgt_dict, num_groups=2, diversity_strength=0.0
+        )
+        generator = SequenceGenerator(
+            [self.model],
+            self.tgt_dict,
+            beam_size=2,
+            search_strategy=search_strategy,
+        )
+        sample = {
+            "net_input": {
+                "src_tokens": self.src_tokens,
+                "src_lengths": self.src_lengths,
+            }
+        }
+        hypos = generator.forward(sample)
+        eos, w1, w2 = self.eos, self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0])
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
+        self.assertHypoScore(hypos[0][1], [0.9, 0.6, 1.0])
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9])
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][1], [0.7, 0.4, 0.9])
+
+
+class TestDiverseSiblingsSearch(TestDiverseBeamSearch):
+    def assertHypoScore(
+        self, hypo, pos_probs, sibling_rank, diversity_rate, normalized=True, lenpen=1.0
+    ):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        pos_scores.sub_(torch.Tensor(sibling_rank) * diversity_rate)
+        self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
+        self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
+        score = pos_scores.sum()
+        if normalized:
+            score /= pos_scores.numel() ** lenpen
+        self.assertLess(abs(score - hypo["score"]), 1e-6)
+
+    def test_diverse_beam_search(self):
+        search_strategy = search.DiverseSiblingsSearch(
+            self.tgt_dict, diversity_rate=0.5
+        )
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy
+        )
+        sample = {
+            "net_input": {
+                "src_tokens": self.src_tokens,
+                "src_lengths": self.src_lengths,
+            }
+        }
+        hypos = generator.forward(sample)
+        eos, w1, w2 = self.eos, self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
+        self.assertHypoScore(hypos[0][0], [0.9, 0.6, 1.0], [0, 1, 1], 0.5)
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w1, w2, eos])
+        self.assertHypoScore(hypos[0][1], [0.9, 0.4, 1.0], [0, 2, 1], 0.5)
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w2, eos])
+        self.assertHypoScore(hypos[1][0], [0.7, 0.4, 0.9], [0, 1, 1], 0.5)
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w1, eos])
+        self.assertHypoScore(hypos[1][1], [0.7, 0.35, 0.9], [0, 2, 1], 0.5)
+
+
+class TestPrefixBeamSearch(TestSequenceGeneratorBase):
+    def setUp(self):
+        # construct dummy dictionary
+        vocab_size = 10
+        d = test_utils.dummy_dictionary(vocab_size=vocab_size)
+        self.assertEqual(d.pad(), 1)
+        self.assertEqual(d.eos(), 2)
+        self.assertEqual(d.unk(), 3)
+        self.eos = d.eos()
+        self.w1 = 4
+        self.w2 = 5
+        self.beam_size = 3
+
+        # construct prefix data
+        self.tokens = torch.LongTensor(
+            [
+                [self.w1, self.w2, self.eos],
+            ]
+        )
+        self.token_lengths = torch.LongTensor([2])
+
+        args = argparse.Namespace()
+        unk = 0.0
+        args.beam_probs = [
+            # prefix step 0:
+            torch.FloatTensor(
+                [
+                    # eos      
+                    [0.0, unk] + [1.0 / vocab_size] * vocab_size  # beam 1
+                ] * self.beam_size
+            ),
+        ] * vocab_size
+
+        task = test_utils.TestTranslationTask.setup_task(args, d, d)
+        self.model = task.build_model(args)
+        self.tgt_dict = task.target_dictionary
+
+    def test_prefix_beam_search(self):
+        search_strategy = search.BeamSearch(self.tgt_dict)
+        generator = SequenceGenerator(
+            [self.model],
+            self.tgt_dict,
+            beam_size=self.beam_size,
+            search_strategy=search_strategy,
+        )
+        sample = {
+            "net_input": {
+                "src_tokens": self.tokens,
+                "src_lengths": self.token_lengths,
+            }
+        }
+        # make sure test sample doesn't break any assertion
+        generator.forward(sample, prefix_tokens=self.tokens[:, :-1])
+
+class TestTopPSamplingSearch(TestSequenceGeneratorBase):
+    def setUp(self):
+        # construct dummy dictionary
+        d = test_utils.dummy_dictionary(vocab_size=2)
+        self.assertEqual(d.pad(), 1)
+        self.assertEqual(d.eos(), 2)
+        self.assertEqual(d.unk(), 3)
+        self.eos = d.eos()
+        self.w1 = 4
+        self.w2 = 5
+
+        # construct source data
+        self.src_tokens = torch.LongTensor(
+            [
+                [self.w1, self.w2, self.eos],
+                [self.w1, self.w2, self.eos],
+            ]
+        )
+        self.src_lengths = torch.LongTensor([2, 2])
+
+        args = argparse.Namespace()
+        unk = 0.0
+        # The minimal probability of top 2 tokens.
+        self.min_top2_prob = 0.75
+        # The minimal probability of the top 1 token.
+        self.min_top1_prob = 0.4
+
+        w1_prob = self.min_top1_prob
+        w2_prob = self.min_top2_prob - self.min_top1_prob
+        eos_prob = 1 - self.min_top2_prob
+
+        args.beam_probs = [
+            # step 0:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 1.0, 0.0],
+                    [0.0, unk, 1.0, 0.0],
+                    [0.0, unk, 1.0, 0.0],
+                    [0.0, unk, 1.0, 0.0],
+                ]
+            ),
+            # step 1:
+            torch.FloatTensor(
+                [
+                    # eos           w1       w2
+                    [eos_prob, unk, w1_prob, w2_prob],
+                    [eos_prob, unk, w1_prob, w2_prob],
+                    [eos_prob, unk, w1_prob, w2_prob],
+                    [eos_prob, unk, w1_prob, w2_prob],
+                ]
+            ),
+            # step 2:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [1.0, unk, 0.0, 0.0],
+                    [1.0, unk, 0.0, 0.0],
+                    [1.0, unk, 0.0, 0.0],
+                    [1.0, unk, 0.0, 0.0],
+                ]
+            ),
+        ]
+
+        task = test_utils.TestTranslationTask.setup_task(args, d, d)
+        self.model = task.build_model(args)
+        self.tgt_dict = task.target_dictionary
+
+    def test_topp_sampling_search_low_prob(self):
+        # Given a prob low enough to top-P sampling, we expect only the top
+        # 1 token to be sampled, which always results in the same output.
+        low_sampling_topp = self.min_top1_prob / 2.0
+        search_strategy = search.Sampling(
+            self.tgt_dict, sampling_topp=low_sampling_topp
+        )
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy
+        )
+        sample = {
+            "net_input": {
+                "src_tokens": self.src_tokens,
+                "src_lengths": self.src_lengths,
+            }
+        }
+        hypos = generator.forward(sample)
+        eos, w1 = self.eos, self.w1
+        # sentence 1, beam 1
+        self.assertHypoTokens(hypos[0][0], [w1, w1, eos])
+        self.assertHypoScore(hypos[0][0], [1.0, 0.4, 1.0])
+        # sentence 1, beam 2
+        self.assertHypoTokens(hypos[0][1], [w1, w1, eos])
+        self.assertHypoScore(hypos[0][1], [1.0, 0.4, 1.0])
+        # sentence 2, beam 1
+        self.assertHypoTokens(hypos[1][0], [w1, w1, eos])
+        self.assertHypoScore(hypos[1][0], [1.0, 0.4, 1.0])
+        # sentence 2, beam 2
+        self.assertHypoTokens(hypos[1][1], [w1, w1, eos])
+        self.assertHypoScore(hypos[1][1], [1.0, 0.4, 1.0])
+
+    def test_topp_sampling_search_high_prob(self):
+        # Given a prob high enough to top-P sampling, any of the top 2
+        # tokens could be sampled. This can cause different outputs.
+        high_sampling_topp = (self.min_top1_prob + self.min_top2_prob) / 2.0
+        search_strategy = search.Sampling(
+            self.tgt_dict, sampling_topp=high_sampling_topp
+        )
+        generator = SequenceGenerator(
+            [self.model], self.tgt_dict, beam_size=2, search_strategy=search_strategy
+        )
+        sample = {
+            "net_input": {
+                "src_tokens": self.src_tokens,
+                "src_lengths": self.src_lengths,
+            }
+        }
+        hypos = generator.forward(sample)
+        eos, w1, w2 = self.eos, self.w1, self.w2
+        # sentence 1, beam 1
+        self.assertTrue(
+            self.hypoTokens(hypos[0][0], [w1, w1, eos])
+            or self.hypoTokens(hypos[0][0], [w1, w2, eos])
+        )
+        self.assertTrue(
+            self.hypoScore(hypos[0][0], [1.0, 0.4, 1.0])
+            or self.hypoScore(hypos[0][0], [1.0, 0.35, 1.0])
+        )
+
+        # sentence 1, beam 2
+        self.assertTrue(
+            self.hypoTokens(hypos[0][1], [w1, w1, eos])
+            or self.hypoTokens(hypos[0][1], [w1, w2, eos])
+        )
+        self.assertTrue(
+            self.hypoScore(hypos[0][1], [1.0, 0.4, 1.0])
+            or self.hypoScore(hypos[0][1], [1.0, 0.35, 1.0])
+        )
+
+        # sentence 2, beam 1
+        self.assertTrue(
+            self.hypoTokens(hypos[1][0], [w1, w1, eos])
+            or self.hypoTokens(hypos[1][0], [w1, w2, eos])
+        )
+        self.assertTrue(
+            self.hypoScore(hypos[1][0], [1.0, 0.4, 1.0])
+            or self.hypoScore(hypos[1][0], [1.0, 0.35, 1.0])
+        )
+
+        # sentence 2, beam 2
+        self.assertTrue(
+            self.hypoTokens(hypos[1][1], [w1, w1, eos])
+            or self.hypoTokens(hypos[1][1], [w1, w2, eos])
+        )
+        self.assertTrue(
+            self.hypoScore(hypos[1][1], [1.0, 0.4, 1.0])
+            or self.hypoScore(hypos[1][1], [1.0, 0.35, 1.0])
+        )
+
+    def hypoTokens(self, hypo, tokens):
+        return self.tensorEqual(hypo["tokens"], torch.LongTensor(tokens))
+
+    def hypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        if not self.almostEqual(hypo["positional_scores"], pos_scores):
+            return False
+        if pos_scores.numel() != hypo["tokens"].numel():
+            return False
+        score = pos_scores.sum()
+        if normalized:
+            score /= pos_scores.numel() ** lenpen
+        return abs(score - hypo["score"]) < 1e-6
+
+    def almostEqual(self, t1, t2):
+        return t1.size() == t2.size() and (t1 - t2).abs().max() < 1e-4
+
+    def tensorEqual(self, t1, t2):
+        return t1.size() == t2.size() and t1.ne(t2).long().sum() == 0
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_sequence_scorer.py b/fairseq/tests/test_sequence_scorer.py
new file mode 100644
index 0000000000000000000000000000000000000000..42f9447b599bcd7a9913aec37d94ea5078ff43a3
--- /dev/null
+++ b/fairseq/tests/test_sequence_scorer.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.sequence_scorer import SequenceScorer
+
+
+class TestSequenceScorer(unittest.TestCase):
+    def test_sequence_scorer(self):
+        # construct dummy dictionary
+        d = test_utils.dummy_dictionary(vocab_size=2)
+        self.assertEqual(d.pad(), 1)
+        self.assertEqual(d.eos(), 2)
+        self.assertEqual(d.unk(), 3)
+        eos = d.eos()
+        w1 = 4
+        w2 = 5
+
+        # construct dataloader
+        data = [
+            {
+                "source": torch.LongTensor([w1, w2, eos]),
+                "target": torch.LongTensor([w1, w2, w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w2, eos]),
+                "target": torch.LongTensor([w2, w1, eos]),
+            },
+            {
+                "source": torch.LongTensor([w2, eos]),
+                "target": torch.LongTensor([w2, eos]),
+            },
+        ]
+        data_itr = test_utils.dummy_dataloader(data)
+
+        # specify expected output probabilities
+        args = argparse.Namespace()
+        unk = 0.0
+        args.beam_probs = [
+            # step 0:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 0.6, 0.4],  # sentence 1
+                    [0.0, unk, 0.4, 0.6],  # sentence 2
+                    [0.0, unk, 0.7, 0.3],  # sentence 3
+                ]
+            ),
+            # step 1:
+            torch.FloatTensor(
+                [
+                    # eos      w1   w2
+                    [0.0, unk, 0.2, 0.7],  # sentence 1
+                    [0.0, unk, 0.8, 0.2],  # sentence 2
+                    [0.7, unk, 0.1, 0.2],  # sentence 3
+                ]
+            ),
+            # step 2:
+            torch.FloatTensor(
+                [
+                    # eos       w1    w2
+                    [0.10, unk, 0.50, 0.4],  # sentence 1
+                    [0.15, unk, 0.15, 0.7],  # sentence 2
+                    [0.00, unk, 0.00, 0.0],  # sentence 3
+                ]
+            ),
+            # step 3:
+            torch.FloatTensor(
+                [
+                    # eos      w1    w2
+                    [0.9, unk, 0.05, 0.05],  # sentence 1
+                    [0.0, unk, 0.00, 0.0],  # sentence 2
+                    [0.0, unk, 0.00, 0.0],  # sentence 3
+                ]
+            ),
+        ]
+        expected_scores = [
+            [0.6, 0.7, 0.5, 0.9],  # sentence 1
+            [0.6, 0.8, 0.15],  # sentence 2
+            [0.3, 0.7],  # sentence 3
+        ]
+
+        task = test_utils.TestTranslationTask.setup_task(args, d, d)
+        model = task.build_model(args)
+        scorer = SequenceScorer(task.target_dictionary)
+        for sample in data_itr:
+            hypos = task.inference_step(scorer, [model], sample)
+            for id, hypos_id in zip(sample["id"].tolist(), hypos):
+                self.assertHypoTokens(hypos_id[0], data[id]["target"])
+                self.assertHypoScore(hypos_id[0], expected_scores[id])
+
+    def assertHypoTokens(self, hypo, tokens):
+        self.assertTensorEqual(hypo["tokens"], torch.LongTensor(tokens))
+
+    def assertHypoScore(self, hypo, pos_probs, normalized=True, lenpen=1.0):
+        pos_scores = torch.FloatTensor(pos_probs).log()
+        self.assertAlmostEqual(hypo["positional_scores"], pos_scores)
+        self.assertEqual(pos_scores.numel(), hypo["tokens"].numel())
+        score = pos_scores.sum()
+        if normalized:
+            score /= pos_scores.numel() ** lenpen
+        self.assertLess(abs(score - hypo["score"]), 1e-6)
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess((t1 - t2).abs().max(), 1e-4)
+
+    def assertTensorEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertEqual(t1.ne(t2).long().sum(), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_sparse_multihead_attention.py b/fairseq/tests/test_sparse_multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e32b25a7fb1e12295b84d0c65064f8e42b7bdd3
--- /dev/null
+++ b/fairseq/tests/test_sparse_multihead_attention.py
@@ -0,0 +1,114 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from fairseq.modules.sparse_multihead_attention import SparseMultiheadAttention
+
+
+class TestSparseMultiheadAttention(unittest.TestCase):
+    def test_sparse_multihead_attention(self):
+        attn_weights = torch.randn(1, 8, 8)
+        bidirectional_sparse_mask = torch.tensor(
+            [
+                [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0],
+                [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0],
+                [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0],
+                [0, 0, 0, 0, 0, float("-inf"), float("-inf"), 0],
+                [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0],
+                [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0],
+                [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0],
+                [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0],
+            ]
+        )
+
+        bidirectional_attention = SparseMultiheadAttention(
+            16, 1, stride=4, expressivity=1, is_bidirectional=True
+        )
+        bidirectional_attention_sparse_mask = (
+            bidirectional_attention.buffered_sparse_mask(attn_weights, 8, 8)
+        )
+        torch.all(
+            torch.eq(bidirectional_attention_sparse_mask, bidirectional_sparse_mask)
+        )
+
+        sparse_mask = torch.tensor(
+            [
+                [
+                    0,
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                ],
+                [
+                    0,
+                    0,
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                ],
+                [
+                    0,
+                    0,
+                    0,
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                ],
+                [
+                    0,
+                    0,
+                    0,
+                    0,
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                ],
+                [0, 0, 0, 0, 0, float("-inf"), float("-inf"), float("-inf")],
+                [
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    0,
+                    0,
+                    0,
+                    float("-inf"),
+                    float("-inf"),
+                ],
+                [
+                    float("-inf"),
+                    float("-inf"),
+                    float("-inf"),
+                    0,
+                    0,
+                    0,
+                    0,
+                    float("-inf"),
+                ],
+                [float("-inf"), float("-inf"), float("-inf"), 0, 0, 0, 0, 0],
+            ]
+        )
+
+        attention = SparseMultiheadAttention(
+            16, 1, stride=4, expressivity=1, is_bidirectional=False
+        )
+        attention_sparse_mask = attention.buffered_sparse_mask(attn_weights, 8, 8)
+
+        torch.all(torch.eq(attention_sparse_mask, sparse_mask))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_token_block_dataset.py b/fairseq/tests/test_token_block_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4d7b76dcd55fe7869dbb1fa188f7b36fb639bda
--- /dev/null
+++ b/fairseq/tests/test_token_block_dataset.py
@@ -0,0 +1,92 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import tests.utils as test_utils
+import torch
+from fairseq.data import TokenBlockDataset
+
+
+class TestTokenBlockDataset(unittest.TestCase):
+    def _build_dataset(self, data, **kwargs):
+        sizes = [len(x) for x in data]
+        underlying_ds = test_utils.TestDataset(data)
+        return TokenBlockDataset(underlying_ds, sizes, **kwargs)
+
+    def test_eos_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [1])
+        self.assertEqual(ds[2].tolist(), [8, 7, 6, 1])
+
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=None, pad=0, eos=1, break_mode="eos")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [8, 7, 6, 1])
+        self.assertEqual(ds[2].tolist(), [1])
+
+    def test_block_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([9, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(data, block_size=3, pad=0, eos=1, break_mode="none")
+        self.assertEqual(ds[0].tolist(), [5, 4, 3])
+        self.assertEqual(ds[1].tolist(), [2, 1, 8])
+        self.assertEqual(ds[2].tolist(), [7, 6, 1])
+        self.assertEqual(ds[3].tolist(), [9, 1])
+
+    def test_complete_break_mode(self):
+        data = [
+            torch.tensor([5, 4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([8, 7, 6, 1], dtype=torch.long),
+            torch.tensor([9, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(
+            data, block_size=6, pad=0, eos=1, break_mode="complete"
+        )
+        self.assertEqual(ds[0].tolist(), [5, 4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [8, 7, 6, 1, 9, 1])
+
+        data = [
+            torch.tensor([4, 3, 2, 1], dtype=torch.long),
+            torch.tensor([5, 1], dtype=torch.long),
+            torch.tensor([1], dtype=torch.long),
+            torch.tensor([6, 1], dtype=torch.long),
+        ]
+        ds = self._build_dataset(
+            data, block_size=3, pad=0, eos=1, break_mode="complete"
+        )
+        self.assertEqual(ds[0].tolist(), [4, 3, 2, 1])
+        self.assertEqual(ds[1].tolist(), [5, 1, 1])
+        self.assertEqual(ds[2].tolist(), [6, 1])
+
+    def test_4billion_tokens(self):
+        """Regression test for numpy type promotion issue https://github.com/numpy/numpy/issues/5745"""
+        data = [torch.tensor(list(range(10000)), dtype=torch.long)] * 430000
+        ds = self._build_dataset(
+            data, block_size=6, pad=0, eos=1, break_mode="complete"
+        )
+        ds[-1]  # __getitem__ works
+        start, end = ds.slice_indices[-1]
+        assert end > 4294967295  # data must be sufficiently large to overflow uint32
+        assert not isinstance(
+            end + 1, float
+        )  # this would also raise, since np.uint64(1) + 1 => 2.0
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_train.py b/fairseq/tests/test_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..02ef94cc5b80c05485144db67501b2acedbaf291
--- /dev/null
+++ b/fairseq/tests/test_train.py
@@ -0,0 +1,247 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import logging
+import unittest
+from io import StringIO
+from unittest.mock import MagicMock, patch
+
+import torch
+from fairseq import checkpoint_utils, data
+from omegaconf import OmegaConf
+
+
+def mock_trainer(epoch, num_updates, iterations_in_epoch):
+    trainer = MagicMock()
+    trainer.load_checkpoint.return_value = {
+        "train_iterator": {
+            "epoch": epoch,
+            "iterations_in_epoch": iterations_in_epoch,
+            "shuffle": False,
+        },
+    }
+    trainer.get_num_updates.return_value = num_updates
+    return trainer
+
+
+def mock_dict():
+    d = MagicMock()
+    d.pad.return_value = 1
+    d.eos.return_value = 2
+    d.unk.return_value = 3
+    return d
+
+
+def get_trainer_and_epoch_itr(epoch, epoch_size, num_updates, iterations_in_epoch):
+    tokens = torch.LongTensor(list(range(epoch_size))).view(1, -1)
+    tokens_ds = data.TokenBlockDataset(
+        tokens,
+        sizes=[tokens.size(-1)],
+        block_size=1,
+        pad=0,
+        eos=1,
+        include_targets=False,
+    )
+    trainer = mock_trainer(epoch, num_updates, iterations_in_epoch)
+    dataset = data.LanguagePairDataset(
+        tokens_ds, tokens_ds.sizes, mock_dict(), shuffle=False
+    )
+    epoch_itr = data.EpochBatchIterator(
+        dataset=dataset,
+        collate_fn=dataset.collater,
+        batch_sampler=[[i] for i in range(epoch_size)],
+    )
+    return trainer, epoch_itr
+
+
+def get_mock_cfg(finetune_from_model):
+    cfg_mock = OmegaConf.create(
+        {
+            "checkpoint": {
+                "save_dir": None,
+                "optimizer_overrides": "{}",
+                "reset_dataloader": False,
+                "reset_meters": False,
+                "reset_optimizer": False,
+                "reset_lr_scheduler": False,
+                "finetune_from_model": finetune_from_model,
+                "model_parallel_size": 1,
+                "restore_file": "checkpoint_last.pt",
+            },
+            "common": {
+                "model_parallel_size": 1,
+            },
+        }
+    )
+    return cfg_mock
+
+
+class TestLoadCheckpoint(unittest.TestCase):
+    def setUp(self):
+        self.cfg_mock = get_mock_cfg(None)
+        self.patches = {
+            "os.makedirs": MagicMock(),
+            "os.path.join": MagicMock(),
+            "os.path.isfile": MagicMock(return_value=True),
+            "os.path.isabs": MagicMock(return_value=False),
+            "fairseq.file_io.PathManager.exists": MagicMock(return_value=False),
+        }
+        self.applied_patches = [patch(p, d) for p, d in self.patches.items()]
+        [p.start() for p in self.applied_patches]
+        logging.disable(logging.CRITICAL)
+
+    def tearDown(self):
+        patch.stopall()
+        logging.disable(logging.NOTSET)
+
+    def test_load_partial_checkpoint(self):
+        with contextlib.redirect_stdout(StringIO()):
+            trainer, epoch_itr = get_trainer_and_epoch_itr(2, 150, 200, 50)
+            trainer.get_train_iterator = MagicMock(return_value=epoch_itr)
+
+            _, epoch_itr = checkpoint_utils.load_checkpoint(
+                self.cfg_mock.checkpoint, trainer
+            )
+
+            self.assertEqual(epoch_itr.epoch, 2)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 50)
+
+            itr = epoch_itr.next_epoch_itr(shuffle=False)
+            self.assertEqual(epoch_itr.epoch, 2)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 50)
+
+            self.assertEqual(next(itr)["net_input"]["src_tokens"][0].item(), 50)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 51)
+
+            for _ in range(150 - 52):
+                next(itr)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 149)
+            self.assertTrue(itr.has_next())
+            next(itr)
+            self.assertFalse(itr.has_next())
+
+            itr = epoch_itr.next_epoch_itr(shuffle=False)
+            self.assertTrue(itr.has_next())
+            self.assertEqual(epoch_itr.epoch, 3)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 0)
+
+    def test_load_full_checkpoint(self):
+        with contextlib.redirect_stdout(StringIO()):
+            trainer, epoch_itr = get_trainer_and_epoch_itr(2, 150, 300, 150)
+            trainer.get_train_iterator = MagicMock(return_value=epoch_itr)
+
+            _, epoch_itr = checkpoint_utils.load_checkpoint(
+                self.cfg_mock.checkpoint, trainer
+            )
+            itr = epoch_itr.next_epoch_itr(shuffle=False)
+
+            self.assertEqual(epoch_itr.epoch, 3)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 0)
+            self.assertEqual(next(itr)["net_input"]["src_tokens"][0].item(), 0)
+
+    def test_load_no_checkpoint(self):
+        with contextlib.redirect_stdout(StringIO()):
+            trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0)
+            trainer.get_train_iterator = MagicMock(return_value=epoch_itr)
+            self.patches["os.path.isfile"].return_value = False
+
+            _, epoch_itr = checkpoint_utils.load_checkpoint(
+                self.cfg_mock.checkpoint, trainer
+            )
+            itr = epoch_itr.next_epoch_itr(shuffle=False)
+
+            self.assertEqual(epoch_itr.epoch, 1)
+            self.assertEqual(epoch_itr.iterations_in_epoch, 0)
+            self.assertEqual(next(itr)["net_input"]["src_tokens"][0].item(), 0)
+
+    def test_finetune_from_model_args_conflict(self):
+        with contextlib.redirect_stdout(StringIO()):
+            trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0)
+            trainer.get_train_iterator = MagicMock(return_value=epoch_itr)
+
+            for arg in [
+                "reset_optimizer",
+                "reset_lr_scheduler",
+                "reset_meters",
+                "reset_dataloader",
+            ]:
+                with self.subTest(arg=arg):
+                    cfg_mock = get_mock_cfg("/temp/checkpoint_pretrained.pt")
+                    cfg_mock["checkpoint"][arg] = True
+                    with self.assertRaises(Exception) as context:
+                        _, _ = checkpoint_utils.load_checkpoint(
+                            cfg_mock.checkpoint, trainer
+                        )
+
+                    self.assertTrue(
+                        "--finetune-from-model can not be set together with either --reset-optimizer"
+                        " or reset_lr_scheduler or reset_meters or reset_dataloader"
+                        in str(context.exception)
+                    )
+
+    def test_finetune_from_model(self):
+        with contextlib.redirect_stdout(StringIO()):
+            trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0)
+            trainer.get_train_iterator = MagicMock(return_value=epoch_itr)
+            from_model_path = "/temp/checkpoint_pretrained.pt"
+
+            def mock_finetune_exist(path):
+                if path == from_model_path:
+                    return True
+                else:
+                    return False
+
+            self.patches[
+                "fairseq.file_io.PathManager.exists"
+            ].side_effect = mock_finetune_exist
+            cfg_mock = get_mock_cfg(from_model_path)
+            cfg_mock.checkpoint.restore_file = "checkpoint_last.pt"
+            _, _ = checkpoint_utils.load_checkpoint(cfg_mock.checkpoint, trainer)
+            (
+                checkpoint_path,
+                reset_optimizer,
+                reset_lr_scheduler,
+                optimizer_overrides,
+            ) = trainer.load_checkpoint.call_args[0]
+            reset_meters = trainer.load_checkpoint.call_args[1]["reset_meters"]
+            self.assertTrue(reset_optimizer)
+            self.assertTrue(reset_lr_scheduler)
+            self.assertTrue(reset_meters)
+
+    def test_finetune_from_model_resume(self):
+        with contextlib.redirect_stdout(StringIO()):
+            trainer, epoch_itr = get_trainer_and_epoch_itr(1, 150, 0, 0)
+            trainer.get_train_iterator = MagicMock(return_value=epoch_itr)
+            from_model_path = "/temp/checkpoint_pretrained.pt"
+
+            # launch second time
+            # both restore_file=checkpoint_last.pt and finetune_from_model are set
+            def mock_finetune_exist(path):
+                if path == from_model_path or path.endsWith("checkpoint_last.pt"):
+                    return True
+                else:
+                    return False
+
+            self.patches[
+                "fairseq.file_io.PathManager.exists"
+            ].side_effect = mock_finetune_exist
+            cfg_mock = get_mock_cfg(from_model_path)
+            cfg_mock.checkpoint.restore_file = "checkpoint_last.pt"
+            _, _ = checkpoint_utils.load_checkpoint(cfg_mock.checkpoint, trainer)
+            (
+                checkpoint_path,
+                reset_optimizer,
+                reset_lr_scheduler,
+                optimizer_overrides,
+            ) = trainer.load_checkpoint.call_args[0]
+            reset_meters = trainer.load_checkpoint.call_args[1]["reset_meters"]
+            self.assertFalse(reset_optimizer)
+            self.assertFalse(reset_lr_scheduler)
+            self.assertFalse(reset_meters)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_transformer.py b/fairseq/tests/test_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..de5c5bdbd49692e63fb1cb50108a791304425dc1
--- /dev/null
+++ b/fairseq/tests/test_transformer.py
@@ -0,0 +1,65 @@
+import argparse
+import unittest
+from typing import Any, Dict, Sequence
+
+import torch
+from fairseq.models import transformer
+
+from tests.test_roberta import FakeTask
+
+
+def mk_sample(tok: Sequence[int] = None, batch_size: int = 2) -> Dict[str, Any]:
+    if not tok:
+        tok = [10, 11, 12, 13, 14, 15, 2]
+
+    batch = torch.stack([torch.tensor(tok, dtype=torch.long)] * batch_size)
+    sample = {
+        "net_input": {
+            "src_tokens": batch,
+            "prev_output_tokens": batch,
+            "src_lengths": torch.tensor(
+                [len(tok)] * batch_size, dtype=torch.long, device=batch.device
+            ),
+        },
+        "target": batch[:, 1:],
+    }
+    return sample
+
+
+def mk_transformer(**extra_args: Any):
+    overrides = {
+        # Use characteristics dimensions
+        "encoder_embed_dim": 12,
+        "encoder_ffn_embed_dim": 14,
+        "decoder_embed_dim": 12,
+        "decoder_ffn_embed_dim": 14,
+        # Disable dropout so we have comparable tests.
+        "dropout": 0,
+        "attention_dropout": 0,
+        "activation_dropout": 0,
+        "encoder_layerdrop": 0,
+    }
+    overrides.update(extra_args)
+    # Overrides the defaults from the parser
+    args = argparse.Namespace(**overrides)
+    transformer.tiny_architecture(args)
+
+    torch.manual_seed(0)
+    task = FakeTask(args)
+    return transformer.TransformerModel.build_model(args, task)
+
+
+class TransformerTestCase(unittest.TestCase):
+    def test_forward_backward(self):
+        model = mk_transformer(encoder_embed_dim=12, decoder_embed_dim=12)
+        sample = mk_sample()
+        o, _ = model.forward(**sample["net_input"])
+        loss = o.sum()
+        loss.backward()
+
+    def test_different_encoder_decoder_embed_dim(self):
+        model = mk_transformer(encoder_embed_dim=12, decoder_embed_dim=16)
+        sample = mk_sample()
+        o, _ = model.forward(**sample["net_input"])
+        loss = o.sum()
+        loss.backward()
diff --git a/fairseq/tests/test_utils.py b/fairseq/tests/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..79195903e0f34372a24fa50312a6e00170c14471
--- /dev/null
+++ b/fairseq/tests/test_utils.py
@@ -0,0 +1,114 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from fairseq import utils
+
+
+class TestUtils(unittest.TestCase):
+    def test_convert_padding_direction(self):
+        pad = 1
+        left_pad = torch.LongTensor(
+            [
+                [2, 3, 4, 5, 6],
+                [1, 7, 8, 9, 10],
+                [1, 1, 1, 11, 12],
+            ]
+        )
+        right_pad = torch.LongTensor(
+            [
+                [2, 3, 4, 5, 6],
+                [7, 8, 9, 10, 1],
+                [11, 12, 1, 1, 1],
+            ]
+        )
+
+        self.assertAlmostEqual(
+            right_pad,
+            utils.convert_padding_direction(
+                left_pad,
+                pad,
+                left_to_right=True,
+            ),
+        )
+        self.assertAlmostEqual(
+            left_pad,
+            utils.convert_padding_direction(
+                right_pad,
+                pad,
+                right_to_left=True,
+            ),
+        )
+
+    def test_make_positions(self):
+        pad = 1
+        left_pad_input = torch.LongTensor(
+            [
+                [9, 9, 9, 9, 9],
+                [1, 9, 9, 9, 9],
+                [1, 1, 1, 9, 9],
+            ]
+        )
+        left_pad_output = torch.LongTensor(
+            [
+                [2, 3, 4, 5, 6],
+                [1, 2, 3, 4, 5],
+                [1, 1, 1, 2, 3],
+            ]
+        )
+        right_pad_input = torch.LongTensor(
+            [
+                [9, 9, 9, 9, 9],
+                [9, 9, 9, 9, 1],
+                [9, 9, 1, 1, 1],
+            ]
+        )
+        right_pad_output = torch.LongTensor(
+            [
+                [2, 3, 4, 5, 6],
+                [2, 3, 4, 5, 1],
+                [2, 3, 1, 1, 1],
+            ]
+        )
+
+        self.assertAlmostEqual(
+            left_pad_output,
+            utils.make_positions(left_pad_input, pad),
+        )
+        self.assertAlmostEqual(
+            right_pad_output,
+            utils.make_positions(right_pad_input, pad),
+        )
+
+    def test_clip_grad_norm_(self):
+        params = torch.nn.Parameter(torch.zeros(5)).requires_grad_(False)
+        grad_norm = utils.clip_grad_norm_(params, 1.0)
+        self.assertTrue(torch.is_tensor(grad_norm))
+        self.assertEqual(grad_norm, 0.0)
+
+        params = [torch.nn.Parameter(torch.zeros(5)) for i in range(3)]
+        for p in params:
+            p.grad = torch.full((5,), fill_value=2.0)
+        grad_norm = utils.clip_grad_norm_(params, 1.0)
+        exp_grad_norm = torch.full((15,), fill_value=2.0).norm()
+        self.assertTrue(torch.is_tensor(grad_norm))
+        self.assertEqual(grad_norm, exp_grad_norm)
+
+        grad_norm = utils.clip_grad_norm_(params, 1.0)
+        self.assertAlmostEqual(grad_norm, torch.tensor(1.0))
+
+    def test_resolve_max_positions_with_tuple(self):
+        resolved = utils.resolve_max_positions(None, (2000, 100, 2000), 12000)
+        self.assertEqual(resolved, (2000, 100, 2000))
+
+    def assertAlmostEqual(self, t1, t2):
+        self.assertEqual(t1.size(), t2.size(), "size mismatch")
+        self.assertLess(utils.item((t1 - t2).abs().max()), 1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/fairseq/tests/test_valid_subset_checks.py b/fairseq/tests/test_valid_subset_checks.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e9191bda66fccfebba34920f88bf7b1efea5f7e
--- /dev/null
+++ b/fairseq/tests/test_valid_subset_checks.py
@@ -0,0 +1,138 @@
+import os
+import shutil
+import tempfile
+import unittest
+
+from fairseq import options
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.data.data_utils import raise_if_valid_subsets_unintentionally_ignored
+from .utils import create_dummy_data, preprocess_lm_data, train_language_model
+
+
+def make_lm_config(
+    data_dir=None,
+    extra_flags=None,
+    task="language_modeling",
+    arch="transformer_lm_gpt2_tiny",
+):
+    task_args = [task]
+    if data_dir is not None:
+        task_args += [data_dir]
+    train_parser = options.get_training_parser()
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            *task_args,
+            "--arch",
+            arch,
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--max-tokens",
+            "500",
+            "--tokens-per-sample",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-epoch",
+            "1",
+        ]
+        + (extra_flags or []),
+    )
+    cfg = convert_namespace_to_omegaconf(train_args)
+    return cfg
+
+
+def write_empty_file(path):
+    with open(path, "w"):
+        pass
+    assert os.path.exists(path)
+
+
+class TestValidSubsetsErrors(unittest.TestCase):
+    """Test various filesystem, clarg combinations and ensure that error raising happens as expected"""
+
+    def _test_case(self, paths, extra_flags):
+        with tempfile.TemporaryDirectory() as data_dir:
+            [
+                write_empty_file(os.path.join(data_dir, f"{p}.bin"))
+                for p in paths + ["train"]
+            ]
+            cfg = make_lm_config(data_dir, extra_flags=extra_flags)
+            raise_if_valid_subsets_unintentionally_ignored(cfg)
+
+    def test_default_raises(self):
+        with self.assertRaises(ValueError):
+            self._test_case(["valid", "valid1"], [])
+        with self.assertRaises(ValueError):
+            self._test_case(
+                ["valid", "valid1", "valid2"], ["--valid-subset", "valid,valid1"]
+            )
+
+    def partially_specified_valid_subsets(self):
+        with self.assertRaises(ValueError):
+            self._test_case(
+                ["valid", "valid1", "valid2"], ["--valid-subset", "valid,valid1"]
+            )
+        # Fix with ignore unused
+        self._test_case(
+            ["valid", "valid1", "valid2"],
+            ["--valid-subset", "valid,valid1", "--ignore-unused-valid-subsets"],
+        )
+
+    def test_legal_configs(self):
+        self._test_case(["valid"], [])
+        self._test_case(["valid", "valid1"], ["--ignore-unused-valid-subsets"])
+        self._test_case(["valid", "valid1"], ["--combine-val"])
+        self._test_case(["valid", "valid1"], ["--valid-subset", "valid,valid1"])
+        self._test_case(["valid", "valid1"], ["--valid-subset", "valid1"])
+        self._test_case(
+            ["valid", "valid1"], ["--combine-val", "--ignore-unused-valid-subsets"]
+        )
+        self._test_case(
+            ["valid1"], ["--valid-subset", "valid1"]
+        )  # valid.bin doesn't need to be ignored.
+
+    def test_disable_validation(self):
+        self._test_case([], ["--disable-validation"])
+        self._test_case(["valid", "valid1"], ["--disable-validation"])
+
+    def test_dummy_task(self):
+        cfg = make_lm_config(task="dummy_lm")
+        raise_if_valid_subsets_unintentionally_ignored(cfg)
+
+    def test_masked_dummy_task(self):
+        cfg = make_lm_config(task="dummy_masked_lm")
+        raise_if_valid_subsets_unintentionally_ignored(cfg)
+
+
+class TestCombineValidSubsets(unittest.TestCase):
+    def _train(self, extra_flags):
+        with self.assertLogs() as logs:
+            with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir:
+                create_dummy_data(data_dir, num_examples=20)
+                preprocess_lm_data(data_dir)
+
+                shutil.copyfile(f"{data_dir}/valid.bin", f"{data_dir}/valid1.bin")
+                shutil.copyfile(f"{data_dir}/valid.idx", f"{data_dir}/valid1.idx")
+                train_language_model(
+                    data_dir,
+                    "transformer_lm",
+                    ["--max-update", "0", "--log-format", "json"] + extra_flags,
+                    run_validation=False,
+                )
+        return [x.message for x in logs.records]
+
+    def test_combined(self):
+        flags = ["--combine-valid-subsets"]
+        logs = self._train(flags)
+        assert any(["valid1" in x for x in logs])  # loaded 100 examples from valid1
+        assert not any(["valid1_ppl" in x for x in logs])  # metrics are combined
+
+    def test_subsets(self):
+        flags = ["--valid-subset", "valid,valid1"]
+        logs = self._train(flags)
+        assert any(["valid_ppl" in x for x in logs])  # loaded 100 examples from valid1
+        assert any(["valid1_ppl" in x for x in logs])  # metrics are combined
diff --git a/fairseq/tests/utils.py b/fairseq/tests/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0c709517aea570acb36901dd47bc12a3025b07
--- /dev/null
+++ b/fairseq/tests/utils.py
@@ -0,0 +1,717 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import json
+import os
+import random
+import sys
+from io import StringIO
+
+import torch
+import torch.nn.functional as F
+from fairseq import options, utils
+from fairseq.data import Dictionary
+from fairseq.data.language_pair_dataset import collate
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+)
+from fairseq.models.fairseq_encoder import EncoderOut
+from fairseq.tasks import LegacyFairseqTask
+from fairseq_cli import generate, interactive, preprocess, train, validate
+import fairseq.distributed.utils as distributed_utils
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+
+
+def dummy_dictionary(vocab_size, prefix="token_"):
+    d = Dictionary()
+    for i in range(vocab_size):
+        token = prefix + str(i)
+        d.add_symbol(token)
+    d.finalize(padding_factor=1)  # don't add extra padding symbols
+    return d
+
+
+def dummy_dataloader(
+    samples, padding_idx=1, eos_idx=2, batch_size=None,
+):
+    if batch_size is None:
+        batch_size = len(samples)
+
+    # add any missing data to samples
+    for i, sample in enumerate(samples):
+        if "id" not in sample:
+            sample["id"] = i
+
+    # create dataloader
+    dataset = TestDataset(samples)
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=batch_size,
+        collate_fn=(lambda samples: collate(samples, padding_idx, eos_idx)),
+    )
+    return iter(dataloader)
+
+
+def sequence_generator_setup():
+    # construct dummy dictionary
+    d = dummy_dictionary(vocab_size=2)
+
+    eos = d.eos()
+    w1 = 4
+    w2 = 5
+
+    # construct source data
+    src_tokens = torch.LongTensor([[w1, w2, eos], [w1, w2, eos]])
+    src_lengths = torch.LongTensor([2, 2])
+
+    args = argparse.Namespace()
+    unk = 0.0
+    args.beam_probs = [
+        # step 0:
+        torch.FloatTensor(
+            [
+                # eos      w1   w2
+                # sentence 1:
+                [0.0, unk, 0.9, 0.1],  # beam 1
+                [0.0, unk, 0.9, 0.1],  # beam 2
+                # sentence 2:
+                [0.0, unk, 0.7, 0.3],
+                [0.0, unk, 0.7, 0.3],
+            ]
+        ),
+        # step 1:
+        torch.FloatTensor(
+            [
+                # eos      w1   w2       prefix
+                # sentence 1:
+                [1.0, unk, 0.0, 0.0],  # w1: 0.9  (emit: w1 <eos>: 0.9*1.0)
+                [0.0, unk, 0.9, 0.1],  # w2: 0.1
+                # sentence 2:
+                [0.25, unk, 0.35, 0.4],  # w1: 0.7  (don't emit: w1 <eos>: 0.7*0.25)
+                [0.00, unk, 0.10, 0.9],  # w2: 0.3
+            ]
+        ),
+        # step 2:
+        torch.FloatTensor(
+            [
+                # eos      w1   w2       prefix
+                # sentence 1:
+                [0.0, unk, 0.1, 0.9],  # w2 w1: 0.1*0.9
+                [
+                    0.6,
+                    unk,
+                    0.2,
+                    0.2,
+                ],  # w2 w2: 0.1*0.1  (emit: w2 w2 <eos>: 0.1*0.1*0.6)
+                # sentence 2:
+                [
+                    0.60,
+                    unk,
+                    0.4,
+                    0.00,
+                ],  # w1 w2: 0.7*0.4  (emit: w1 w2 <eos>: 0.7*0.4*0.6)
+                [0.01, unk, 0.0, 0.99],  # w2 w2: 0.3*0.9
+            ]
+        ),
+        # step 3:
+        torch.FloatTensor(
+            [
+                # eos      w1   w2       prefix
+                # sentence 1:
+                [
+                    1.0,
+                    unk,
+                    0.0,
+                    0.0,
+                ],  # w2 w1 w2: 0.1*0.9*0.9  (emit: w2 w1 w2 <eos>: 0.1*0.9*0.9*1.0)
+                [
+                    1.0,
+                    unk,
+                    0.0,
+                    0.0,
+                ],  # w2 w1 w1: 0.1*0.9*0.1  (emit: w2 w1 w1 <eos>: 0.1*0.9*0.1*1.0)
+                # sentence 2:
+                [
+                    0.1,
+                    unk,
+                    0.5,
+                    0.4,
+                ],  # w2 w2 w2: 0.3*0.9*0.99  (emit: w2 w2 w2 <eos>: 0.3*0.9*0.99*0.1)
+                [
+                    1.0,
+                    unk,
+                    0.0,
+                    0.0,
+                ],  # w1 w2 w1: 0.7*0.4*0.4  (emit: w1 w2 w1 <eos>: 0.7*0.4*0.4*1.0)
+            ]
+        ),
+    ]
+
+    task = TestTranslationTask.setup_task(args, d, d)
+    model = task.build_model(args)
+    tgt_dict = task.target_dictionary
+
+    return tgt_dict, w1, w2, src_tokens, src_lengths, model
+
+
+def create_dummy_data(data_dir, num_examples=100, maxlen=20, alignment=False):
+    def _create_dummy_data(filename):
+        data = torch.rand(num_examples * maxlen)
+        data = 97 + torch.floor(26 * data).int()
+        with open(os.path.join(data_dir, filename), "w") as h:
+            offset = 0
+            for _ in range(num_examples):
+                ex_len = random.randint(1, maxlen)
+                ex_str = " ".join(map(chr, data[offset : offset + ex_len]))
+                print(ex_str, file=h)
+                offset += ex_len
+
+    def _create_dummy_alignment_data(filename_src, filename_tgt, filename):
+        with open(os.path.join(data_dir, filename_src), "r") as src_f, open(
+            os.path.join(data_dir, filename_tgt), "r"
+        ) as tgt_f, open(os.path.join(data_dir, filename), "w") as h:
+            for src, tgt in zip(src_f, tgt_f):
+                src_len = len(src.split())
+                tgt_len = len(tgt.split())
+                avg_len = (src_len + tgt_len) // 2
+                num_alignments = random.randint(avg_len // 2, 2 * avg_len)
+                src_indices = torch.floor(torch.rand(num_alignments) * src_len).int()
+                tgt_indices = torch.floor(torch.rand(num_alignments) * tgt_len).int()
+                ex_str = " ".join(
+                    [
+                        "{}-{}".format(src, tgt)
+                        for src, tgt in zip(src_indices, tgt_indices)
+                    ]
+                )
+                print(ex_str, file=h)
+
+    _create_dummy_data("train.in")
+    _create_dummy_data("train.out")
+    _create_dummy_data("valid.in")
+    _create_dummy_data("valid.out")
+    _create_dummy_data("test.in")
+    _create_dummy_data("test.out")
+
+    if alignment:
+        _create_dummy_alignment_data("train.in", "train.out", "train.align")
+        _create_dummy_alignment_data("valid.in", "valid.out", "valid.align")
+        _create_dummy_alignment_data("test.in", "test.out", "test.align")
+
+
+def preprocess_lm_data(data_dir):
+    preprocess_parser = options.get_preprocessing_parser()
+    preprocess_args = preprocess_parser.parse_args(
+        [
+            "--only-source",
+            "--trainpref",
+            os.path.join(data_dir, "train.out"),
+            "--validpref",
+            os.path.join(data_dir, "valid.out"),
+            "--testpref",
+            os.path.join(data_dir, "test.out"),
+            "--destdir",
+            data_dir,
+        ]
+    )
+    preprocess.main(preprocess_args)
+
+
+def preprocess_translation_data(data_dir, extra_flags=None):
+    preprocess_parser = options.get_preprocessing_parser()
+    preprocess_args = preprocess_parser.parse_args(
+        [
+            "--source-lang",
+            "in",
+            "--target-lang",
+            "out",
+            "--trainpref",
+            os.path.join(data_dir, "train"),
+            "--validpref",
+            os.path.join(data_dir, "valid"),
+            "--testpref",
+            os.path.join(data_dir, "test"),
+            "--thresholdtgt",
+            "0",
+            "--thresholdsrc",
+            "0",
+            "--destdir",
+            data_dir,
+        ]
+        + (extra_flags or []),
+    )
+    preprocess.main(preprocess_args)
+
+
+def preprocess_summarization_data(data_dir, extra_flags=None):
+    preprocess_parser = options.get_preprocessing_parser()
+    preprocess_args = preprocess_parser.parse_args(
+        [
+            "--source-lang",
+            "in",
+            "--target-lang",
+            "out",
+            "--trainpref",
+            os.path.join(data_dir, "train"),
+            "--validpref",
+            os.path.join(data_dir, "valid"),
+            "--testpref",
+            os.path.join(data_dir, "test"),
+            "--thresholdtgt",
+            "0",
+            "--thresholdsrc",
+            "0",
+            "--joined-dictionary",
+            "--destdir",
+            data_dir,
+        ]
+        + (extra_flags or []),
+    )
+    preprocess.main(preprocess_args)
+
+
+def create_laser_data_and_config_json(data_dir):
+    src_langs = ["de", "fr", "ru", "tr", "zh"]
+    tgt_langs = ["en", "es"]
+    config_json = {}
+    config_train_json = []
+    src_vocab = None
+    tgt_vocab = None
+
+    for src_lang in src_langs:
+        for tgt_lang in tgt_langs:
+            langpair_folder = f"{src_lang}-{tgt_lang}"
+
+            langpair_path = os.path.join(data_dir, langpair_folder)
+            os.mkdir(langpair_path)
+            create_dummy_data(langpair_path)
+            preprocess_translation_data(langpair_path, ["--dataset-impl", "cached"])
+
+            src_vocab = os.path.join(langpair_path, "dict.in.txt")
+            tgt_vocab = os.path.join(langpair_path, "dict.out.txt")
+            config_train_json.append(
+                {
+                    "id": 0 if tgt_lang == "en" else 1,
+                    "src": os.path.join(langpair_path, "train.in-out.in"),
+                    "tgt": os.path.join(langpair_path, "train.in-out.out"),
+                }
+            )
+
+    config_json["src_vocab"] = src_vocab
+    config_json["tgt_vocab"] = tgt_vocab
+    config_json["train"] = config_train_json
+
+    with open(os.path.join(data_dir, "laserconfig.json"), "w") as config_file:
+        json.dump(config_json, config_file)
+
+    return config_file
+
+
+def train_translation_model(
+    data_dir,
+    arch,
+    extra_flags=None,
+    task="translation",
+    run_validation=False,
+    lang_flags=None,
+    extra_valid_flags=None,
+    world_size=1,
+):
+    if lang_flags is None:
+        lang_flags = [
+            "--source-lang",
+            "in",
+            "--target-lang",
+            "out",
+        ]
+    train_parser = options.get_training_parser()
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            task,
+            data_dir,
+            "--save-dir",
+            data_dir,
+            "--arch",
+            arch,
+            "--optimizer",
+            "nag",
+            "--lr",
+            "0.05",
+            "--max-tokens",
+            "500",
+            "--max-epoch",
+            "1",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            str(world_size),
+            "--num-workers",
+            "0",
+        ]
+        + lang_flags
+        + (extra_flags or []),
+    )
+
+    cfg = convert_namespace_to_omegaconf(train_args)
+    distributed_utils.call_main(cfg, train.main)
+
+    if run_validation:
+        # test validation
+        validate_parser = options.get_validation_parser()
+        validate_args = options.parse_args_and_arch(
+            validate_parser,
+            [
+                "--task",
+                task,
+                data_dir,
+                "--path",
+                os.path.join(data_dir, "checkpoint_last.pt"),
+                "--valid-subset",
+                "valid",
+                "--max-tokens",
+                "500",
+                "--no-progress-bar",
+                "--num-workers",
+                "0",
+            ]
+            + lang_flags
+            + (extra_valid_flags or []),
+        )
+        validate.main(validate_args)
+
+
+def generate_main(data_dir, extra_flags=None, path=None):
+    if extra_flags is None:
+        extra_flags = [
+            "--print-alignment",
+        ]
+    if path is None:
+        path = os.path.join(data_dir, "checkpoint_last.pt")
+    generate_parser = options.get_generation_parser()
+    generate_args = options.parse_args_and_arch(
+        generate_parser,
+        [
+            data_dir,
+            "--path",
+            path,
+            "--beam",
+            "3",
+            "--batch-size",
+            "64",
+            "--max-len-b",
+            "5",
+            "--gen-subset",
+            "valid",
+            "--no-progress-bar",
+            "--num-workers",
+            "0",
+        ]
+        + (extra_flags or []),
+    )
+
+    # evaluate model in batch mode
+    generate.main(generate_args)
+
+    # evaluate model interactively
+    generate_args.buffer_size = 0
+    generate_args.input = "-"
+    generate_args.batch_size = None
+    orig_stdin = sys.stdin
+    sys.stdin = StringIO("h e l l o\n")
+    interactive.main(generate_args)
+    sys.stdin = orig_stdin
+
+
+class TestDataset(torch.utils.data.Dataset):
+    def __init__(self, data):
+        super().__init__()
+        self.data = data
+        self.sizes = None
+
+    def __getitem__(self, index):
+        return self.data[index]
+
+    def __len__(self):
+        return len(self.data)
+
+
+class TestTranslationTask(LegacyFairseqTask):
+    def __init__(self, args, src_dict, tgt_dict, model):
+        super().__init__(args)
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+        self.model = model
+
+    @classmethod
+    def setup_task(cls, args, src_dict=None, tgt_dict=None, model=None):
+        return cls(args, src_dict, tgt_dict, model)
+
+    def build_model(self, args):
+        return TestModel.build_model(args, self)
+
+    @property
+    def source_dictionary(self):
+        return self.src_dict
+
+    @property
+    def target_dictionary(self):
+        return self.tgt_dict
+
+
+class TestModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @classmethod
+    def build_model(cls, args, task):
+        encoder = TestEncoder(args, task.source_dictionary)
+        decoder = TestIncrementalDecoder(args, task.target_dictionary)
+        return cls(encoder, decoder)
+
+
+class TestEncoder(FairseqEncoder):
+    def __init__(self, args, dictionary):
+        super().__init__(dictionary)
+        self.args = args
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        return EncoderOut(
+            encoder_out=src_tokens,
+            encoder_padding_mask=None,
+            encoder_embedding=None,
+            encoder_states=None,
+            src_tokens=None,
+            src_lengths=None,
+        )
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        return EncoderOut(
+            encoder_out=encoder_out.encoder_out.index_select(0, new_order),
+            encoder_padding_mask=None,
+            encoder_embedding=None,
+            encoder_states=None,
+            src_tokens=None,
+            src_lengths=None,
+        )
+
+
+class TestIncrementalDecoder(FairseqIncrementalDecoder):
+    def __init__(self, args, dictionary):
+        super().__init__(dictionary)
+        assert hasattr(args, "beam_probs") or hasattr(args, "probs")
+        args.max_decoder_positions = getattr(args, "max_decoder_positions", 100)
+        self.args = args
+
+    def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None):
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+        bbsz = prev_output_tokens.size(0)
+        vocab = len(self.dictionary)
+        src_len = encoder_out.encoder_out.size(1)
+        tgt_len = prev_output_tokens.size(1)
+
+        # determine number of steps
+        if incremental_state is not None:
+            # cache step number
+            step = utils.get_incremental_state(self, incremental_state, "step")
+            if step is None:
+                step = 0
+            utils.set_incremental_state(self, incremental_state, "step", step + 1)
+            steps = [step]
+        else:
+            steps = list(range(tgt_len))
+
+        # define output in terms of raw probs
+        if hasattr(self.args, "probs"):
+            assert (
+                self.args.probs.dim() == 3
+            ), "expected probs to have size bsz*steps*vocab"
+            probs = self.args.probs.index_select(1, torch.LongTensor(steps))
+        else:
+            probs = torch.FloatTensor(bbsz, len(steps), vocab).zero_()
+            for i, step in enumerate(steps):
+                # args.beam_probs gives the probability for every vocab element,
+                # starting with eos, then unknown, and then the rest of the vocab
+                if step < len(self.args.beam_probs):
+                    probs[:, i, self.dictionary.eos() :] = self.args.beam_probs[step]
+                else:
+                    probs[:, i, self.dictionary.eos()] = 1.0
+
+        # random attention
+        attn = torch.rand(bbsz, tgt_len, src_len)
+
+        dev = prev_output_tokens.device
+        return probs.to(dev), {"attn": [attn.to(dev)]}
+
+    def get_normalized_probs(self, net_output, log_probs, _):
+        # the decoder returns probabilities directly
+        probs = net_output[0]
+        if log_probs:
+            return probs.log()
+        else:
+            return probs
+
+    def max_positions(self):
+        return self.args.max_decoder_positions
+
+
+class TestReshapingEncoder(FairseqEncoder):
+    def __init__(self, args, dictionary):
+        super().__init__(dictionary)
+        self.args = args
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        b_sz, t_sz = src_tokens.shape
+        padding_needed = t_sz % 2
+        x = src_tokens
+        if padding_needed > 0:
+            padding_needed = 2 - padding_needed
+            x = F.pad(x, (0, padding_needed))
+
+        return EncoderOut(
+            encoder_out=x.view(b_sz, -1, 2),
+            encoder_padding_mask=None,
+            encoder_embedding=None,
+            encoder_states=None,
+            src_tokens=None,
+            src_lengths=None,
+        )
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        return EncoderOut(
+            encoder_out=encoder_out.encoder_out.index_select(0, new_order),
+            encoder_padding_mask=None,
+            encoder_embedding=None,
+            encoder_states=None,
+            src_tokens=None,
+            src_lengths=None,
+        )
+
+
+class TestReshapingModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @classmethod
+    def build_model(cls, args, task):
+        encoder = TestReshapingEncoder(args, task.source_dictionary)
+        decoder = TestIncrementalDecoder(args, task.target_dictionary)
+        return cls(encoder, decoder)
+
+
+class TestAdditionalInputEncoder(FairseqEncoder):
+    def __init__(self, args, dictionary):
+        super().__init__(dictionary)
+        self.args = args
+
+    def forward(self, src_tokens, src_lengths=None, **kwargs):
+        assert "fancy_other_input" in kwargs
+        assert kwargs["fancy_other_input"] is not None
+        return EncoderOut(
+            encoder_out=src_tokens,
+            encoder_padding_mask=None,
+            encoder_embedding=None,
+            encoder_states=None,
+            src_tokens=None,
+            src_lengths=None,
+        )
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        return EncoderOut(
+            encoder_out=encoder_out.encoder_out.index_select(0, new_order),
+            encoder_padding_mask=None,
+            encoder_embedding=None,
+            encoder_states=None,
+            src_tokens=None,
+            src_lengths=None,
+        )
+
+
+class TestAdditionalInputModel(FairseqEncoderDecoderModel):
+    def __init__(self, encoder, decoder):
+        super().__init__(encoder, decoder)
+
+    @classmethod
+    def build_model(cls, args, task):
+        encoder = TestAdditionalInputEncoder(args, task.source_dictionary)
+        decoder = TestIncrementalDecoder(args, task.target_dictionary)
+        return cls(encoder, decoder)
+
+    def forward(self, src_tokens, src_lengths, prev_output_tokens, **kwargs):
+        encoder_out = self.encoder(src_tokens, src_lengths=src_lengths, **kwargs)
+        decoder_out = self.decoder(
+            prev_output_tokens, encoder_out=encoder_out, **kwargs
+        )
+        return decoder_out
+
+
+def train_language_model(
+    data_dir,
+    arch,
+    extra_flags=None,
+    run_validation=False,
+    extra_valid_flags=None,
+    task="language_modeling",
+    world_size=1,
+):
+    train_parser = options.get_training_parser()
+    train_args = options.parse_args_and_arch(
+        train_parser,
+        [
+            "--task",
+            task,
+            data_dir,
+            "--arch",
+            arch,
+            "--optimizer",
+            "adam",
+            "--lr",
+            "0.0001",
+            "--max-tokens",
+            "500",
+            "--tokens-per-sample",
+            "500",
+            "--save-dir",
+            data_dir,
+            "--max-epoch",
+            "1",
+            "--no-progress-bar",
+            "--distributed-world-size",
+            str(world_size),
+            "--ddp-backend",
+            "no_c10d",
+            "--num-workers",
+            "0",
+        ]
+        + (extra_flags or []),
+    )
+    cfg = convert_namespace_to_omegaconf(train_args)
+    distributed_utils.call_main(cfg, train.main)
+
+    if run_validation:
+        # test validation
+        validate_parser = options.get_validation_parser()
+        validate_args = options.parse_args_and_arch(
+            validate_parser,
+            [
+                "--task",
+                task,
+                data_dir,
+                "--path",
+                os.path.join(data_dir, "checkpoint_last.pt"),
+                "--valid-subset",
+                "valid",
+                "--max-tokens",
+                "500",
+                "--no-progress-bar",
+                "--num-workers",
+                "0",
+            ]
+            + (extra_valid_flags or []),
+        )
+        validate.main(validate_args)
diff --git a/fairseq/train.py b/fairseq/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..321de3d9b53f8194b58c26f5cb2c03281afc2bb1
--- /dev/null
+++ b/fairseq/train.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Legacy entry point. Use fairseq_cli/train.py or fairseq-train instead.
+"""
+
+from fairseq_cli.train import cli_main
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..82b1b4830a79ef2bc04e322177cb8653b0b4f3e4
--- /dev/null
+++ b/models/__init__.py
@@ -0,0 +1 @@
+from .polyformer import PolyFormerModel, polyformer_b_architecture, polyformer_l_architecture
\ No newline at end of file
diff --git a/models/polyformer/__init__.py b/models/polyformer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..402884f3d0a770f312b9ead080780ae6ea044b39
--- /dev/null
+++ b/models/polyformer/__init__.py
@@ -0,0 +1 @@
+from .polyformer import PolyFormerModel, polyformer_b_architecture, polyformer_l_architecture
diff --git a/models/polyformer/polyformer.py b/models/polyformer/polyformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..51a2a3d11bf749cb937f64b5781f2734914f058d
--- /dev/null
+++ b/models/polyformer/polyformer.py
@@ -0,0 +1,213 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+PolyFormer
+"""
+from typing import Optional
+
+import logging
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.models import register_model, register_model_architecture
+from fairseq.modules.transformer_sentence_encoder import init_bert_params
+
+from .unify_transformer import TransformerModel
+
+logger = logging.getLogger(__name__)
+
+
+@register_model("polyformer")
+class PolyFormerModel(TransformerModel):
+    __jit_unused_properties__ = ["supported_targets"]
+
+    def __init__(self, args, encoder, decoder):
+        super().__init__(args, encoder, decoder)
+
+        # We follow BERT's random weight initialization
+        self.apply(init_bert_params)
+
+        self.classification_heads = nn.ModuleDict()
+        if hasattr(self.encoder, "dictionary"):
+            self.eos: int = self.encoder.dictionary.eos()
+
+    @staticmethod
+    def add_args(parser):
+        super(PolyFormerModel, PolyFormerModel).add_args(parser)
+        parser.add_argument(
+            "--pooler-dropout",
+            type=float,
+            metavar="D",
+            help="dropout probability in the masked_lm pooler layers",
+        )
+        parser.add_argument(
+            "--pooler-classifier",
+            type=str,
+            choices=['mlp', 'linear'],
+            help="type of pooler classifier",
+        )
+        parser.add_argument(
+            "--pooler-activation-fn",
+            choices=utils.get_available_activation_fns(),
+            help="activation function to use for pooler layer",
+        )
+        parser.add_argument(
+            "--spectral-norm-classification-head",
+            action="store_true",
+            help="Apply spectral normalization on the classification head",
+        )
+
+    @property
+    def supported_targets(self):
+        return {"self"}
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        att_masks,
+        prev_output_tokens_11,
+        prev_output_tokens_12,
+        prev_output_tokens_21,
+        prev_output_tokens_22,
+        delta_x1,
+        delta_y1,
+        delta_x2,
+        delta_y2,
+        patch_images: Optional[torch.Tensor] = None,
+        patch_masks: Optional[torch.Tensor] = None,
+        code_masks: Optional[torch.Tensor] = None,
+        sample_patch_num: Optional[int] = None,
+        features_only: bool = False,
+        classification_head_name: Optional[str] = None,
+        token_embeddings: Optional[torch.Tensor] = None,
+        return_all_hiddens: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        if classification_head_name is not None:
+            features_only = True
+
+        encoder_out = self.encoder(
+            src_tokens,
+            src_lengths=src_lengths,
+            att_masks=att_masks,
+            patch_images=patch_images,
+            patch_masks=patch_masks,
+            token_embeddings=token_embeddings,
+            return_all_hiddens=return_all_hiddens,
+            sample_patch_num=sample_patch_num
+        )
+        x_cls, x_reg, extra = self.decoder(
+            prev_output_tokens_11,
+            prev_output_tokens_12,
+            prev_output_tokens_21,
+            prev_output_tokens_22,
+            delta_x1,
+            delta_y1,
+            delta_x2,
+            delta_y2,
+            code_masks=code_masks,
+            encoder_out=encoder_out,
+            features_only=features_only,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+            src_lengths=src_lengths,
+            return_all_hiddens=return_all_hiddens,
+        )
+        return x_cls, x_reg, extra
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        pass
+
+
+@register_model_architecture("polyformer", "polyformer_l")
+def polyformer_l_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * 1024)
+    args.encoder_layers = getattr(args, "encoder_layers", 12)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 16)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", True)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", True)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 12)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 16)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", True)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", True)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.relu_dropout = getattr(args, "relu_dropout", 0.0)
+    args.dropout = getattr(args, "dropout", 0.0)
+    args.max_target_positions = getattr(args, "max_target_positions", 1024)
+    args.max_source_positions = getattr(args, "max_source_positions", 1024)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", True
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", True)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", True)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", True)
+
+    args.activation_fn = getattr(args, "activation_fn", "gelu")
+    args.pooler_activation_fn = getattr(args, "pooler_activation_fn", "tanh")
+    args.pooler_dropout = getattr(args, "pooler_dropout", 0.0)
+    args.pooler_classifier = getattr(args, "pooler_classifier", "mlp")
+
+    args.resnet_drop_path_rate = getattr(args, "resnet_drop_path_rate", 0.0)
+    args.encoder_drop_path_rate = getattr(args, "encoder_drop_path_rate", 0.0)
+    args.decoder_drop_path_rate = getattr(args, "decoder_drop_path_rate", 0.0)
+
+    args.vis_encoder_type = getattr(args, "vis_encoder_type", "swin-large")
+    args.out_index = getattr(args, "out_index", 3)
+    args.token_bucket_size = getattr(args, "token_bucket_size", 256)
+    args.image_bucket_size = getattr(args, "image_bucket_size", 42)
+
+    args.freeze_encoder_embedding = getattr(args, "freeze_encoder_embedding", False)
+    args.freeze_decoder_embedding = getattr(args, "freeze_decoder_embedding", False)
+    args.add_type_embedding = getattr(args, "add_type_embedding", True)
+    args.attn_scale_factor = getattr(args, "attn_scale_factor", 2)
+
+    args.code_image_size = getattr(args, "code_image_size", 128)
+    args.patch_layernorm_embedding = getattr(args, "patch_layernorm_embedding", True)
+    args.code_layernorm_embedding = getattr(args, "code_layernorm_embedding", True)
+    args.entangle_position_embedding = getattr(args, "entangle_position_embedding", False)
+    args.disable_entangle = getattr(args, "disable_entangle", False)
+    args.sync_bn = getattr(args, "sync_bn", False)
+
+    args.scale_attn = getattr(args, "scale_attn", False)
+    args.scale_fc = getattr(args, "scale_fc", False)
+    args.scale_heads = getattr(args, "scale_heads", False)
+    args.scale_resids = getattr(args, "scale_resids", False)
+
+
+@register_model_architecture("polyformer", "polyformer_b")
+def polyformer_b_architecture(args):
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 768)
+    args.out_index = getattr(args, "out_index", 3)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 4 * 768)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 12)
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 12)
+    args.vis_encoder_type = getattr(args, "vis_encoder_type", "swin-base")
+    polyformer_l_architecture(args)
\ No newline at end of file
diff --git a/models/polyformer/swin.py b/models/polyformer/swin.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c8f6c1a7aaacaf6ca43e6803ea9ff04848ed50c
--- /dev/null
+++ b/models/polyformer/swin.py
@@ -0,0 +1,761 @@
+# ------------------------------------------------------------------------
+# Modified from Swin Transformer (https://github.com/microsoft/Swin-Transformer)
+# Copyright (c) 2021 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ze Liu
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+import numpy as np
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+
+from torch import distributed as dist
+
+
+def get_dist_info():
+    if dist.is_available():
+        initialized = dist.is_initialized()
+    else:
+        initialized = False
+    if initialized:
+        rank = dist.get_rank()
+        world_size = dist.get_world_size()
+    else:
+        rank = 0
+        world_size = 1
+    return rank, world_size
+
+
+def load_state_dict(module, state_dict, strict=False):
+    """Load state_dict to a module.
+
+    This method is modified from :meth:`torch.nn.Module.load_state_dict`.
+    Default value for ``strict`` is set to ``False`` and the message for
+    param mismatch will be shown even if strict is False.
+
+    Args:
+        module (Module): Module that receives the state_dict.
+        state_dict (OrderedDict): Weights.
+        strict (bool): whether to strictly enforce that the keys
+            in :attr:`state_dict` match the keys returned by this module's
+            :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
+    """
+    unexpected_keys = []
+    all_missing_keys = []
+    err_msg = []
+
+    metadata = getattr(state_dict, '_metadata', None)
+    state_dict = state_dict.copy()
+    if metadata is not None:
+        state_dict._metadata = metadata
+
+    # use _load_from_state_dict to enable checkpoint version control
+    def load(module, prefix=''):
+        # recursively check parallel module in case that the model has a
+        # complicated structure, e.g., nn.Module(nn.Module(DDP))
+        local_metadata = {} if metadata is None else metadata.get(
+            prefix[:-1], {})
+        module._load_from_state_dict(state_dict, prefix, local_metadata, True,
+                                     all_missing_keys, unexpected_keys,
+                                     err_msg)
+        for name, child in module._modules.items():
+            if child is not None:
+                load(child, prefix + name + '.')
+
+    load(module)
+    load = None  # break load->load reference cycle
+
+    # ignore "num_batches_tracked" of BN layers
+    missing_keys = [
+        key for key in all_missing_keys if 'num_batches_tracked' not in key
+    ]
+
+    if unexpected_keys:
+        err_msg.append('unexpected key in source '
+                       f'state_dict: {", ".join(unexpected_keys)}\n')
+    if missing_keys:
+        err_msg.append(
+            f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
+
+    rank, _ = get_dist_info()
+    if len(err_msg) > 0 and rank == 0:
+        err_msg.insert(
+            0, 'The model and loaded state dict do not match exactly\n')
+        err_msg = '\n'.join(err_msg)
+        if strict:
+            raise RuntimeError(err_msg)
+        else:
+            print(err_msg)
+
+
+def load_checkpoint(model,
+                    filename,
+                    map_location='cpu',
+                    strict=False):
+    """Load checkpoint from a file or URI.
+
+    Args:
+        model (Module): Module to load checkpoint.
+        filename (str): local filepath
+        map_location (str): Same as :func:`torch.load`.
+        strict (bool): Whether to allow different params for the model and
+            checkpoint.
+
+    Returns:
+        dict or OrderedDict: The loaded checkpoint.
+    """
+    checkpoint = torch.load(filename, map_location=map_location)
+    # OrderedDict is a subclass of dict
+    if not isinstance(checkpoint, dict):
+        raise RuntimeError(
+            f'No state_dict found in checkpoint file {filename}')
+    # get state_dict from checkpoint
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+    # strip prefix of state_dict
+    if list(state_dict.keys())[0].startswith('module.'):
+        state_dict = {k[7:]: v for k, v in state_dict.items()}
+
+    # reshape absolute position embedding
+    if state_dict.get('absolute_pos_embed') is not None:
+        absolute_pos_embed = state_dict['absolute_pos_embed']
+        N1, L, C1 = absolute_pos_embed.size()
+        N2, C2, H, W = model.absolute_pos_embed.size()
+        if N1 != N2 or C1 != C2 or L != H*W:
+            print("Error in loading absolute_pos_embed, pass")
+        else:
+            state_dict['absolute_pos_embed'] = absolute_pos_embed.view(N2, H, W, C2).permute(0, 3, 1, 2)
+
+    # interpolate position bias table if needed
+    relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k]
+    for table_key in relative_position_bias_table_keys:
+        table_pretrained = state_dict[table_key]
+        table_current = model.state_dict()[table_key]
+        L1, nH1 = table_pretrained.size()
+        L2, nH2 = table_current.size()
+        if nH1 != nH2:
+            print(f"Error in loading {table_key}, pass")
+        else:
+            if L1 != L2:
+                S1 = int(L1 ** 0.5)
+                S2 = int(L2 ** 0.5)
+                table_pretrained_resized = F.interpolate(
+                     table_pretrained.permute(1, 0).view(1, nH1, S1, S1),
+                     size=(S2, S2), mode='bicubic')
+                state_dict[table_key] = table_pretrained_resized.view(nH2, L2).permute(1, 0)
+
+    # load state_dict
+    load_state_dict(model, state_dict, strict)
+    return checkpoint
+
+
+class Mlp(nn.Module):
+    """ Multilayer perceptron."""
+
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+
+
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+
+
+class WindowAttention(nn.Module):
+    """ Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x, mask=None):
+        """ Forward function.
+
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+
+        attn = self.attn_drop(attn).half()
+
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class SwinTransformerBlock(nn.Module):
+    """ Swin Transformer Block.
+
+    Args:
+        dim (int): Number of input channels.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+
+    def __init__(self, dim, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        self.H = None
+        self.W = None
+
+    def forward(self, x, mask_matrix):
+        """ Forward function.
+
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+            mask_matrix: Attention mask for cyclic shift.
+        """
+        B, L, C = x.shape
+        H, W = self.H, self.W
+        assert L == H * W, "input feature has wrong size"
+
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+
+        # pad feature maps to multiples of window size
+        pad_l = pad_t = 0
+        pad_r = (self.window_size - W % self.window_size) % self.window_size
+        pad_b = (self.window_size - H % self.window_size) % self.window_size
+        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
+        _, Hp, Wp, _ = x.shape
+
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = mask_matrix
+        else:
+            shifted_x = x
+            attn_mask = None
+
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+
+        # W-MSA/SW-MSA
+        attn_windows = self.attn(x_windows, mask=attn_mask)  # nW*B, window_size*window_size, C
+
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C
+
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+
+        if pad_r > 0 or pad_b > 0:
+            x = x[:, :H, :W, :].contiguous()
+
+        x = x.view(B, H * W, C)
+
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+
+        return x
+
+
+class PatchMerging(nn.Module):
+    """ Patch Merging Layer
+
+    Args:
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+
+    def forward(self, x, H, W):
+        """ Forward function.
+
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+
+        x = x.view(B, H, W, C)
+
+        # padding
+        pad_input = (H % 2 == 1) or (W % 2 == 1)
+        if pad_input:
+            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))
+
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+
+        x = self.norm(x)
+        x = self.reduction(x)
+
+        return x
+
+
+class BasicLayer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+
+    Args:
+        dim (int): Number of feature channels
+        depth (int): Depths of this stage.
+        num_heads (int): Number of attention head.
+        window_size (int): Local window size. Default: 7.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+
+    def __init__(self,
+                 dim,
+                 depth,
+                 num_heads,
+                 window_size=7,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(
+                dim=dim,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer)
+            for i in range(depth)])
+
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+
+    def forward(self, x, H, W):
+        """ Forward function.
+
+        Args:
+            x: Input feature, tensor size (B, H*W, C).
+            H, W: Spatial resolution of the input feature.
+        """
+
+        # calculate attention mask for SW-MSA
+        Hp = int(np.ceil(H / self.window_size)) * self.window_size
+        Wp = int(np.ceil(W / self.window_size)) * self.window_size
+        img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device)  # 1 Hp Wp 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+
+        for blk in self.blocks:
+            blk.H, blk.W = H, W
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, attn_mask)
+            else:
+                x = blk(x, attn_mask)
+        if self.downsample is not None:
+            x_down = self.downsample(x, H, W)
+            Wh, Ww = (H + 1) // 2, (W + 1) // 2
+            return x, H, W, x_down, Wh, Ww
+        else:
+            return x, H, W, x, H, W
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+
+    Args:
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+
+    def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        patch_size = to_2tuple(patch_size)
+        self.patch_size = patch_size
+
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+
+    def forward(self, x):
+        """Forward function."""
+        # padding
+        _, _, H, W = x.size()
+        if W % self.patch_size[1] != 0:
+            x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1]))
+        if H % self.patch_size[0] != 0:
+            x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0]))
+
+        x = self.proj(x)  # B C Wh Ww
+        if self.norm is not None:
+            Wh, Ww = x.size(2), x.size(3)
+            x = x.flatten(2).transpose(1, 2)
+            x = self.norm(x)
+            x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww)
+
+        return x
+
+
+class SwinTransformer(nn.Module):
+    r""" Swin Transformer
+        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
+          https://arxiv.org/pdf/2103.14030
+
+    Args:
+        pretrain_img_size (int): Input image size for training the pretrained model,
+            used in absolute postion embedding. Default 224.
+        patch_size (int | tuple(int)): Patch size. Default: 4
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.2.
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        out_indices (Sequence[int]): Output from which stages.
+        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
+            -1 means not freezing any parameters.
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+    """
+
+    def __init__(self, pretrain_img_size=224, patch_size=4, in_chans=3,
+                 embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24],
+                 window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
+                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2,
+                 norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
+                 out_indices=(0, 1, 2, 3), frozen_stages=-1,
+                 use_checkpoint=False):
+        super().__init__()
+
+        self.pretrain_img_size = pretrain_img_size
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.out_indices = out_indices
+        self.frozen_stages = frozen_stages
+
+        #self.conv2d = nn.Conv2d(512, 1024, 1)
+
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+
+        # absolute position embedding
+        if self.ape:
+            pretrain_img_size = to_2tuple(pretrain_img_size)
+            patch_size = to_2tuple(patch_size)
+            patches_resolution = [pretrain_img_size[0] // patch_size[0], pretrain_img_size[1] // patch_size[1]]
+
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1]))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+
+        # build layers
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
+                               depth=depths[i_layer],
+                               num_heads=num_heads[i_layer],
+                               window_size=window_size,
+                               mlp_ratio=mlp_ratio,
+                               qkv_bias=qkv_bias, qk_scale=qk_scale,
+                               drop=drop_rate, attn_drop=attn_drop_rate,
+                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
+                               norm_layer=norm_layer,
+                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
+                               use_checkpoint=use_checkpoint)
+            self.layers.append(layer)
+
+        num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)]
+        self.num_features = num_features
+
+        # add a norm layer for each output
+        for i_layer in out_indices:
+            layer = norm_layer(num_features[i_layer])
+            layer_name = f'norm{i_layer}'
+            self.add_module(layer_name, layer)
+
+        self._freeze_stages()
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.patch_embed.eval()
+            for param in self.patch_embed.parameters():
+                param.requires_grad = False
+
+        if self.frozen_stages >= 1 and self.ape:
+            self.absolute_pos_embed.requires_grad = False
+
+        if self.frozen_stages >= 2:
+            self.pos_drop.eval()
+            for i in range(0, self.frozen_stages - 1):
+                m = self.layers[i]
+                m.eval()
+                for param in m.parameters():
+                    param.requires_grad = False
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+
+        def _init_weights(m):
+            if isinstance(m, nn.Linear):
+                trunc_normal_(m.weight, std=.02)
+                if isinstance(m, nn.Linear) and m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.LayerNorm):
+                nn.init.constant_(m.bias, 0)
+                nn.init.constant_(m.weight, 1.0)
+
+        if isinstance(pretrained, str):
+            self.apply(_init_weights)
+            load_checkpoint(self, pretrained, strict=False)
+        elif pretrained is None:
+            self.apply(_init_weights)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'absolute_pos_embed'}
+
+    @torch.jit.ignore
+    def no_weight_decay_keywords(self):
+        return {'relative_position_bias_table'}
+
+    def forward(self, x):
+        """Forward function."""
+        x = self.patch_embed(x)
+
+        Wh, Ww = x.size(2), x.size(3)
+        if self.ape:
+            # interpolate the position embedding to the corresponding size
+            absolute_pos_embed = F.interpolate(self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic')
+            x = (x + absolute_pos_embed).flatten(2).transpose(1, 2)  # B Wh*Ww C
+        else:
+            x = x.flatten(2).transpose(1, 2)
+        x = self.pos_drop(x)
+
+        outs = {}
+        for i in range(self.num_layers):
+            layer = self.layers[i]
+            x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww)
+
+            if i in self.out_indices:
+                norm_layer = getattr(self, f'norm{i}')
+                x_out = norm_layer(x_out)
+
+                out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous()
+                outs[f"layer{i}"] = out
+        #outs = outs['layer2'] # [B, 512, 32, 32]
+        outs = outs[f'layer{self.out_indices[0]}']  # [B, 512, 16, 16]
+        #print(outs.shape)
+        return outs
\ No newline at end of file
diff --git a/models/polyformer/unify_multihead_attention.py b/models/polyformer/unify_multihead_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..30af04a37f96283c0212fc666c5f4f0cfc943cfc
--- /dev/null
+++ b/models/polyformer/unify_multihead_attention.py
@@ -0,0 +1,523 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+from typing import Dict, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.incremental_decoding_utils import with_incremental_state
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.quant_noise import quant_noise
+from torch import Tensor, nn
+from torch.nn import Parameter
+
+
+@with_incremental_state
+class MultiheadAttention(nn.Module):
+    """Multi-headed attention.
+
+    See "Attention Is All You Need" for more details.
+    """
+
+    def __init__(
+        self,
+        embed_dim,
+        num_heads,
+        kdim=None,
+        vdim=None,
+        dropout=0.0,
+        bias=True,
+        add_bias_kv=False,
+        add_zero_attn=False,
+        self_attention=False,
+        encoder_decoder_attention=False,
+        q_noise=0.0,
+        qn_block_size=8,
+        scale_factor=2,
+        scale_heads=False
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout_module = FairseqDropout(
+            dropout, module_name=self.__class__.__name__
+        )
+
+        self.head_dim = embed_dim // num_heads
+        assert (
+            self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        self.scaling = float(self.head_dim * scale_factor) ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+        self.c_attn = nn.Parameter(torch.ones((self.num_heads,)), requires_grad=True) if scale_heads else None
+
+        assert not self.self_attention or self.qkv_same_dim, (
+            "Self-attention requires query, key and " "value to be of the same size"
+        )
+
+        self.k_proj = quant_noise(
+            nn.Linear(self.kdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.v_proj = quant_noise(
+            nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.q_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        self.out_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self.reset_parameters()
+
+        self.onnx_trace = False
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            # Empirically observed the convergence to be much better with
+            # the scaled initialization
+            nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
+        else:
+            nn.init.xavier_uniform_(self.k_proj.weight)
+            nn.init.xavier_uniform_(self.v_proj.weight)
+            nn.init.xavier_uniform_(self.q_proj.weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.out_proj.bias is not None:
+            nn.init.constant_(self.out_proj.bias, 0.0)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+
+    def forward(
+        self,
+        query,
+        key: Optional[Tensor],
+        value: Optional[Tensor],
+        key_padding_mask: Optional[Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        need_weights: bool = True,
+        static_kv: bool = False,
+        attn_mask: Optional[Tensor] = None,
+        self_attn_mask: Optional[Tensor] = None,
+        before_softmax: bool = False,
+        need_head_weights: bool = False,
+        attn_bias: Optional[Tensor] = None
+    ) -> Tuple[Tensor, Optional[Tensor]]:
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        is_tpu = query.device.type == "xla"
+
+        tgt_len, bsz, embed_dim = query.size()
+        src_len = tgt_len
+        assert embed_dim == self.embed_dim, f"query dim {embed_dim} != {self.embed_dim}"
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+        if key is not None:
+            src_len, key_bsz, _ = key.size()
+            if not torch.jit.is_scripting():
+                assert key_bsz == bsz
+                assert value is not None
+                assert src_len, bsz == value.shape[:2]
+
+        if (
+            not self.onnx_trace
+            and not is_tpu  # don't use PyTorch version on TPUs
+            and incremental_state is None
+            and not static_kv
+            # A workaround for quantization to work. Otherwise JIT compilation
+            # treats bias in linear module as method.
+            and not torch.jit.is_scripting()
+            and self_attn_mask is None
+            and attn_bias is None
+        ):
+            assert key is not None and value is not None
+            return F.multi_head_attention_forward(
+                query,
+                key,
+                value,
+                self.embed_dim,
+                self.num_heads,
+                torch.empty([0]),
+                torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)),
+                self.bias_k,
+                self.bias_v,
+                self.add_zero_attn,
+                self.dropout_module.p,
+                self.out_proj.weight,
+                self.out_proj.bias,
+                self.training or self.dropout_module.apply_during_inference,
+                key_padding_mask,
+                need_weights,
+                attn_mask,
+                use_separate_proj_weight=True,
+                q_proj_weight=self.q_proj.weight,
+                k_proj_weight=self.k_proj.weight,
+                v_proj_weight=self.v_proj.weight,
+            )
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if saved_state is not None and "prev_key" in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention and self_attn_mask is None:
+            q = self.q_proj(query)
+            k = self.k_proj(query)
+            v = self.v_proj(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.q_proj(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.k_proj(key)
+                v = self.v_proj(key)
+
+        else:
+            assert key is not None and value is not None
+            q = self.q_proj(query)
+            k = self.k_proj(key)
+            v = self.v_proj(value)
+        q *= self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        key_padding_mask.new_zeros(key_padding_mask.size(0), 1),
+                    ],
+                    dim=1,
+                )
+
+        q = (
+            q.contiguous()
+            .view(tgt_len, bsz * self.num_heads, self.head_dim)
+            .transpose(0, 1)
+        )
+        if k is not None:
+            k = (
+                k.contiguous()
+                .view(-1, bsz * self.num_heads, self.head_dim)
+                .transpose(0, 1)
+            )
+        if v is not None:
+            v = (
+                v.contiguous()
+                .view(-1, bsz * self.num_heads, self.head_dim)
+                .transpose(0, 1)
+            )
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if "prev_key" in saved_state:
+                _prev_key = saved_state["prev_key"]
+                assert _prev_key is not None
+                prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    assert k is not None
+                    k = torch.cat([prev_key, k], dim=1)
+                src_len = k.size(1)
+            if "prev_value" in saved_state:
+                _prev_value = saved_state["prev_value"]
+                assert _prev_value is not None
+                prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    assert v is not None
+                    v = torch.cat([prev_value, v], dim=1)
+            prev_key_padding_mask: Optional[Tensor] = None
+            if "prev_key_padding_mask" in saved_state:
+                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
+            assert k is not None and v is not None
+            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
+                key_padding_mask=key_padding_mask,
+                prev_key_padding_mask=prev_key_padding_mask,
+                batch_size=bsz,
+                src_len=k.size(1),
+                static_kv=static_kv,
+            )
+
+            saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_key_padding_mask"] = key_padding_mask
+            # In this branch incremental_state is never None
+            assert incremental_state is not None
+            incremental_state = self._set_input_buffer(incremental_state, saved_state)
+        assert k is not None
+        assert k.size(1) == src_len
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.dim() == 0:
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.add_zero_attn:
+            assert v is not None
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        torch.zeros(key_padding_mask.size(0), 1).type_as(
+                            key_padding_mask
+                        ),
+                    ],
+                    dim=1,
+                )
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_bias is not None:
+            attn_weights += attn_bias
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            if self.onnx_trace:
+                attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1)
+            attn_weights += attn_mask
+
+        if self_attn_mask is not None:
+            self_attn_mask = self_attn_mask.unsqueeze(1).expand(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights += self_attn_mask.contiguous().view(bsz * self.num_heads, tgt_len, src_len)
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            if not is_tpu:
+                attn_weights = attn_weights.masked_fill(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
+                    float("-inf"),
+                )
+            else:
+                attn_weights = attn_weights.transpose(0, 2)
+                attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf"))
+                attn_weights = attn_weights.transpose(0, 2)
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if before_softmax:
+            return attn_weights, v
+
+        attn_weights_float = utils.softmax(
+            attn_weights, dim=-1, onnx_trace=self.onnx_trace
+        )
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = self.dropout_module(attn_weights)
+
+        assert v is not None
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        if self.onnx_trace and attn.size(1) == 1:
+            # when ONNX tracing a single decoder step (sequence length == 1)
+            # the transpose is a no-op copy before view, thus unnecessary
+            attn = attn.contiguous().view(tgt_len, bsz, embed_dim)
+        else:
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        if self.c_attn is not None:
+            attn = attn.view(tgt_len, bsz, self.num_heads, self.head_dim)
+            attn = torch.einsum('tbhd,h->tbhd', attn, self.c_attn)
+            attn = attn.reshape(tgt_len, bsz, self.embed_dim)
+        attn = self.out_proj(attn)
+        attn_weights: Optional[Tensor] = None
+        if need_weights:
+            attn_weights = attn_weights_float.view(
+                bsz, self.num_heads, tgt_len, src_len
+            ).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+
+        return attn, attn_weights
+
+    @staticmethod
+    def _append_prev_key_padding_mask(
+        key_padding_mask: Optional[Tensor],
+        prev_key_padding_mask: Optional[Tensor],
+        batch_size: int,
+        src_len: int,
+        static_kv: bool,
+    ) -> Optional[Tensor]:
+        # saved key padding masks have shape (bsz, seq_len)
+        if prev_key_padding_mask is not None and static_kv:
+            new_key_padding_mask = prev_key_padding_mask
+        elif prev_key_padding_mask is not None and key_padding_mask is not None:
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1
+            )
+        # During incremental decoding, as the padding token enters and
+        # leaves the frame, there will be a time when prev or current
+        # is None
+        elif prev_key_padding_mask is not None:
+            if src_len > prev_key_padding_mask.size(1):
+                filler = torch.zeros(
+                    (batch_size, src_len - prev_key_padding_mask.size(1)),
+                    device=prev_key_padding_mask.device,
+                )
+                new_key_padding_mask = torch.cat(
+                    [prev_key_padding_mask.float(), filler.float()], dim=1
+                )
+            else:
+                new_key_padding_mask = prev_key_padding_mask.float()
+        elif key_padding_mask is not None:
+            if src_len > key_padding_mask.size(1):
+                filler = torch.zeros(
+                    (batch_size, src_len - key_padding_mask.size(1)),
+                    device=key_padding_mask.device,
+                )
+                new_key_padding_mask = torch.cat(
+                    [filler.float(), key_padding_mask.float()], dim=1
+                )
+            else:
+                new_key_padding_mask = key_padding_mask.float()
+        else:
+            new_key_padding_mask = prev_key_padding_mask
+        return new_key_padding_mask
+
+    @torch.jit.export
+    def reorder_incremental_state(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        new_order: Tensor,
+    ):
+        """Reorder buffered internal state (for incremental generation)."""
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            for k in input_buffer.keys():
+                input_buffer_k = input_buffer[k]
+                if input_buffer_k is not None:
+                    if self.encoder_decoder_attention and input_buffer_k.size(
+                        0
+                    ) == new_order.size(0):
+                        break
+                    input_buffer[k] = input_buffer_k.index_select(0, new_order)
+            incremental_state = self._set_input_buffer(incremental_state, input_buffer)
+        return incremental_state
+
+    def _get_input_buffer(
+        self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
+    ) -> Dict[str, Optional[Tensor]]:
+        result = self.get_incremental_state(incremental_state, "attn_state")
+        if result is not None:
+            return result
+        else:
+            empty_result: Dict[str, Optional[Tensor]] = {}
+            return empty_result
+
+    def _set_input_buffer(
+        self,
+        incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+        buffer: Dict[str, Optional[Tensor]],
+    ):
+        return self.set_incremental_state(incremental_state, "attn_state", buffer)
+
+    def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int):
+        return attn_weights
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        prefix = name + "." if name != "" else ""
+        items_to_add = {}
+        keys_to_remove = []
+        for k in state_dict.keys():
+            if k.endswith(prefix + "in_proj_weight"):
+                # in_proj_weight used to be q + k + v with same dimensions
+                dim = int(state_dict[k].shape[0] / 3)
+                items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim]
+                items_to_add[prefix + "k_proj.weight"] = state_dict[k][dim : 2 * dim]
+                items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim :]
+
+                keys_to_remove.append(k)
+
+                k_bias = prefix + "in_proj_bias"
+                if k_bias in state_dict.keys():
+                    dim = int(state_dict[k].shape[0] / 3)
+                    items_to_add[prefix + "q_proj.bias"] = state_dict[k_bias][:dim]
+                    items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][
+                        dim : 2 * dim
+                    ]
+                    items_to_add[prefix + "v_proj.bias"] = state_dict[k_bias][2 * dim :]
+
+                    keys_to_remove.append(prefix + "in_proj_bias")
+
+        for k in keys_to_remove:
+            del state_dict[k]
+
+        for key, value in items_to_add.items():
+            state_dict[key] = value
diff --git a/models/polyformer/unify_transformer.py b/models/polyformer/unify_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b912e164a9012d5da3569e52db2de3d08bf0bc15
--- /dev/null
+++ b/models/polyformer/unify_transformer.py
@@ -0,0 +1,1563 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+import os.path
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from fairseq import utils
+from fairseq.distributed import fsdp_wrap
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqEncoderDecoderModel,
+    FairseqIncrementalDecoder,
+    register_model,
+    register_model_architecture,
+)
+from fairseq.modules import (
+    AdaptiveSoftmax,
+    BaseLayer,
+    FairseqDropout,
+    LayerDropModuleList,
+    LayerNorm,
+    SinusoidalPositionalEmbedding,
+    GradMultiply
+)
+from fairseq.modules.checkpoint_activations import checkpoint_wrapper
+from fairseq.modules.quant_noise import quant_noise as apply_quant_noise_
+from torch import Tensor
+
+from .unify_transformer_layer import TransformerEncoderLayer, TransformerDecoderLayer
+from .swin import SwinTransformer
+from bert.modeling_bert import BertModel
+
+
+
+
+DEFAULT_MAX_SOURCE_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+
+
+DEFAULT_MIN_PARAMS_TO_WRAP = int(1e8)
+
+
+def BatchNorm2d(out_chan, momentum=0.1, eps=1e-3):
+    return nn.SyncBatchNorm.convert_sync_batchnorm(
+        nn.BatchNorm2d(out_chan, momentum=momentum, eps=eps)
+    )
+
+
+def make_token_bucket_position(bucket_size, max_position=DEFAULT_MAX_SOURCE_POSITIONS):
+    context_pos = torch.arange(max_position, dtype=torch.long)[:, None]
+    memory_pos = torch.arange(max_position, dtype=torch.long)[None, :]
+    relative_pos = context_pos - memory_pos
+    sign = torch.sign(relative_pos)
+    mid = bucket_size // 2
+    abs_pos = torch.where((relative_pos<mid) & (relative_pos > -mid), mid-1, torch.abs(relative_pos))
+    log_pos = torch.ceil(torch.log(abs_pos/mid)/math.log((max_position-1)/mid) * (mid-1)) + mid
+    log_pos = log_pos.int()
+    bucket_pos = torch.where(abs_pos.le(mid), relative_pos, log_pos*sign).long()
+    return bucket_pos + bucket_size - 1
+
+
+def make_image_bucket_position(bucket_size, num_relative_distance):
+    coords_h = torch.arange(bucket_size)
+    coords_w = torch.arange(bucket_size)
+    coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+    coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+    relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+    relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+    relative_coords[:, :, 0] += bucket_size - 1  # shift to start from 0
+    relative_coords[:, :, 1] += bucket_size - 1
+    relative_coords[:, :, 0] *= 2 * bucket_size - 1
+    relative_position_index = torch.zeros(size=(bucket_size * bucket_size + 1,) * 2, dtype=relative_coords.dtype)
+    relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+    relative_position_index[0, 0:] = num_relative_distance - 3
+    relative_position_index[0:, 0] = num_relative_distance - 2
+    relative_position_index[0, 0] = num_relative_distance - 1
+    return relative_position_index
+
+
+@register_model("unify_transformer")
+class TransformerModel(FairseqEncoderDecoderModel):
+    """
+    Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)
+    <https://arxiv.org/abs/1706.03762>`_.
+
+    Args:
+        encoder (TransformerEncoder): the encoder
+        decoder (TransformerDecoder): the decoder
+
+    The Transformer model provides the following named architectures and
+    command-line arguments:
+
+    .. argparse::
+        :ref: fairseq.models.transformer_parser
+        :prog:
+    """
+
+    def __init__(self, args, encoder, decoder):
+        super().__init__(encoder, decoder)
+        self.args = args
+        self.supports_align_args = True
+
+    @staticmethod
+    def add_args(parser):
+        """Add model-specific arguments to the parser."""
+        # fmt: off
+        parser.add_argument('--activation-fn',
+                            choices=utils.get_available_activation_fns(),
+                            help='activation function to use')
+        parser.add_argument('--dropout', type=float, metavar='D',
+                            help='dropout probability')
+        parser.add_argument('--attention-dropout', type=float, metavar='D',
+                            help='dropout probability for attention weights')
+        parser.add_argument('--activation-dropout', '--relu-dropout', type=float, metavar='D',
+                            help='dropout probability after activation in FFN.')
+        parser.add_argument('--encoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained encoder embedding')
+        parser.add_argument('--encoder-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension')
+        parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='N',
+                            help='encoder embedding dimension for FFN')
+        parser.add_argument('--encoder-layers', type=int, metavar='N',
+                            help='num encoder layers')
+        parser.add_argument('--encoder-attention-heads', type=int, metavar='N',
+                            help='num encoder attention heads')
+        parser.add_argument('--encoder-normalize-before', action='store_true',
+                            help='apply layernorm before each encoder block')
+        parser.add_argument('--encoder-learned-pos', action='store_true',
+                            help='use learned positional embeddings in the encoder')
+        parser.add_argument('--decoder-embed-path', type=str, metavar='STR',
+                            help='path to pre-trained decoder embedding')
+        parser.add_argument('--decoder-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension')
+        parser.add_argument('--decoder-ffn-embed-dim', type=int, metavar='N',
+                            help='decoder embedding dimension for FFN')
+        parser.add_argument('--decoder-layers', type=int, metavar='N',
+                            help='num decoder layers')
+        parser.add_argument('--decoder-attention-heads', type=int, metavar='N',
+                            help='num decoder attention heads')
+        parser.add_argument('--decoder-learned-pos', action='store_true',
+                            help='use learned positional embeddings in the decoder')
+        parser.add_argument('--decoder-normalize-before', action='store_true',
+                            help='apply layernorm before each decoder block')
+        parser.add_argument('--decoder-output-dim', type=int, metavar='N',
+                            help='decoder output dimension (extra linear layer '
+                                 'if different from decoder embed dim')
+        parser.add_argument('--share-decoder-input-output-embed', action='store_true',
+                            help='share decoder input and output embeddings')
+        parser.add_argument('--share-all-embeddings', action='store_true',
+                            help='share encoder, decoder and output embeddings'
+                                 ' (requires shared dictionary and embed dim)')
+        parser.add_argument('--no-token-positional-embeddings', default=False, action='store_true',
+                            help='if set, disables positional embeddings (outside self attention)')
+        parser.add_argument('--adaptive-softmax-cutoff', metavar='EXPR',
+                            help='comma separated list of adaptive softmax cutoff points. '
+                                 'Must be used with adaptive_loss criterion'),
+        parser.add_argument('--adaptive-softmax-dropout', type=float, metavar='D',
+                            help='sets adaptive softmax dropout for the tail projections')
+        parser.add_argument('--layernorm-embedding', action='store_true',
+                            help='add layernorm to embedding')
+        parser.add_argument('--no-scale-embedding', action='store_true',
+                            help='if True, dont scale embeddings')
+        parser.add_argument('--checkpoint-activations', action='store_true',
+                            help='checkpoint activations at each layer, which saves GPU '
+                                 'memory usage at the cost of some additional compute')
+        parser.add_argument('--offload-activations', action='store_true',
+                            help='checkpoint activations at each layer, then save to gpu. Sets --checkpoint-activations.')
+        # args for "Cross+Self-Attention for Transformer Models" (Peitz et al., 2019)
+        parser.add_argument('--no-cross-attention', default=False, action='store_true',
+                            help='do not perform cross-attention')
+        parser.add_argument('--cross-self-attention', default=False, action='store_true',
+                            help='perform cross+self-attention')
+        # args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
+        parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
+                            help='LayerDrop probability for encoder')
+        parser.add_argument('--decoder-layerdrop', type=float, metavar='D', default=0,
+                            help='LayerDrop probability for decoder')
+        parser.add_argument('--encoder-layers-to-keep', default=None,
+                            help='which layers to *keep* when pruning as a comma-separated list')
+        parser.add_argument('--decoder-layers-to-keep', default=None,
+                            help='which layers to *keep* when pruning as a comma-separated list')
+        # args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
+        parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+                            help='iterative PQ quantization noise at training time')
+        parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+                            help='block size of quantization noise at training time')
+        parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+                            help='scalar quantization noise and scalar quantization at training time')
+        # args for Fully Sharded Data Parallel (FSDP) training
+        parser.add_argument(
+            '--min-params-to-wrap', type=int, metavar='D', default=DEFAULT_MIN_PARAMS_TO_WRAP,
+            help=(
+                'minimum number of params for a layer to be wrapped with FSDP() when '
+                'training with --ddp-backend=fully_sharded. Smaller values will '
+                'improve memory efficiency, but may make torch.distributed '
+                'communication less efficient due to smaller input sizes. This option '
+                'is set to 0 (i.e., always wrap) when --checkpoint-activations or '
+                '--offload-activations are passed.'
+            )
+        )
+
+        parser.add_argument('--resnet-drop-path-rate', type=float,
+                            help='resnet drop path rate')
+        parser.add_argument('--encoder-drop-path-rate', type=float,
+                            help='encoder drop path rate')
+        parser.add_argument('--decoder-drop-path-rate', type=float,
+                            help='encoder drop path rate')
+
+        parser.add_argument('--token-bucket-size', type=int,
+                            help='token bucket size')
+        parser.add_argument('--image-bucket-size', type=int,
+                            help='image bucket size')
+
+        parser.add_argument('--attn-scale-factor', type=float,
+                            help='attention scale factor')
+        parser.add_argument('--freeze-resnet', action='store_true',
+                            help='freeze resnet')
+        parser.add_argument('--freeze-encoder-embedding', action='store_true',
+                            help='freeze encoder token embedding')
+        parser.add_argument('--freeze-decoder-embedding', action='store_true',
+                            help='freeze decoder token embedding')
+        parser.add_argument('--add-type-embedding', action='store_true',
+                            help='add source/region/patch type embedding')
+
+        parser.add_argument('--resnet-type', choices=['resnet50', 'resnet101', 'resnet152', 'swin-base'],
+                            help='resnet type')
+        parser.add_argument('--resnet-model-path', type=str, metavar='STR',
+                            help='path to load resnet')
+        parser.add_argument('--code-image-size', type=int,
+                            help='code image size')
+        parser.add_argument('--patch-layernorm-embedding', action='store_true',
+                            help='add layernorm to patch embedding')
+        parser.add_argument('--code-layernorm-embedding', action='store_true',
+                            help='add layernorm to code embedding')
+        parser.add_argument('--entangle-position-embedding', action='store_true',
+                            help='entangle position embedding')
+        parser.add_argument('--disable-entangle', action='store_true',
+                            help='disable entangle')
+        parser.add_argument('--sync-bn', action='store_true',
+                            help='sync batchnorm')
+
+        parser.add_argument('--scale-attn', action='store_true',
+                            help='scale attn')
+        parser.add_argument('--scale-fc', action='store_true',
+                            help='scale fc')
+        parser.add_argument('--scale-heads', action='store_true',
+                            help='scale heads')
+        parser.add_argument('--scale-resids', action='store_true',
+                            help='scale resids')
+        # fmt: on
+
+    @classmethod
+    def build_model(cls, args, task):
+        """Build a new model instance."""
+
+        # make sure all arguments are present in older models
+        base_architecture(args)
+
+        if args.encoder_layers_to_keep:
+            args.encoder_layers = len(args.encoder_layers_to_keep.split(","))
+        if args.decoder_layers_to_keep:
+            args.decoder_layers = len(args.decoder_layers_to_keep.split(","))
+
+        if getattr(args, "max_source_positions", None) is None:
+            args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
+        if getattr(args, "max_target_positions", None) is None:
+            args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+
+        if args.share_all_embeddings:
+            if src_dict != tgt_dict:
+                raise ValueError("--share-all-embeddings requires a joined dictionary")
+            if args.encoder_embed_dim != args.decoder_embed_dim:
+                raise ValueError(
+                    "--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim"
+                )
+            if args.decoder_embed_path and (
+                args.decoder_embed_path != args.encoder_embed_path
+            ):
+                raise ValueError(
+                    "--share-all-embeddings not compatible with --decoder-embed-path"
+                )
+            encoder_embed_tokens = cls.build_embedding(
+                args, src_dict, args.encoder_embed_dim, args.encoder_embed_path
+            )
+            decoder_embed_tokens = encoder_embed_tokens
+            args.share_decoder_input_output_embed = True
+        else:
+            encoder_embed_tokens = cls.build_embedding(
+                args, src_dict, args.encoder_embed_dim, args.encoder_embed_path
+            )
+            decoder_embed_tokens = cls.build_embedding(
+                args, tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
+            )
+        if getattr(args, "freeze_encoder_embedding", False):
+            encoder_embed_tokens.weight.requires_grad = False
+        if getattr(args, "freeze_decoder_embedding", False):
+            decoder_embed_tokens.weight.requires_grad = False
+        if getattr(args, "offload_activations", False):
+            args.checkpoint_activations = True  # offloading implies checkpointing
+        encoder = cls.build_encoder(args, src_dict, encoder_embed_tokens)
+        decoder = cls.build_decoder(args, tgt_dict, decoder_embed_tokens)
+        if not args.share_all_embeddings:
+            min_params_to_wrap = getattr(
+                args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP
+            )
+            # fsdp_wrap is a no-op when --ddp-backend != fully_sharded
+            encoder = fsdp_wrap(encoder, min_num_params=min_params_to_wrap)
+            decoder = fsdp_wrap(decoder, min_num_params=min_params_to_wrap)
+        return cls(args, encoder, decoder)
+
+    @classmethod
+    def build_embedding(cls, args, dictionary, embed_dim, path=None):
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+
+        emb = Embedding(num_embeddings, embed_dim, padding_idx)
+        # if provided, load from preloaded dictionaries
+        if path:
+            embed_dict = utils.parse_embedding(path)
+            utils.load_embedding(embed_dict, dictionary, emb)
+        return emb
+
+    @classmethod
+    def build_encoder(cls, args, src_dict, embed_tokens):
+        return TransformerEncoder(args, src_dict, embed_tokens)
+
+    @classmethod
+    def build_decoder(cls, args, tgt_dict, embed_tokens):
+        return TransformerDecoder(
+            args,
+            tgt_dict,
+            embed_tokens,
+            no_encoder_attn=getattr(args, "no_cross_attention", False),
+        )
+
+    # TorchScript doesn't support optional arguments with variable length (**kwargs).
+    # Current workaround is to add union of all arguments in child classes.
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        att_masks,
+        prev_output_tokens_11,
+        prev_output_tokens_12,
+        prev_output_tokens_21,
+        prev_output_tokens_22,
+        delta_x1,
+        delta_y1,
+        delta_x2,
+        delta_y2,
+        return_all_hiddens: bool = True,
+        features_only: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        """
+        Run the forward pass for an encoder-decoder model.
+
+        Copied from the base class, but without ``**kwargs``,
+        which are not supported by TorchScript.
+        """
+        encoder_out = self.encoder(
+            src_tokens, src_lengths=src_lengths, return_all_hiddens=return_all_hiddens
+        )
+        decoder_out = self.decoder(
+            prev_output_tokens_11,
+            prev_output_tokens_12,
+            prev_output_tokens_21,
+            prev_output_tokens_22,
+            delta_x1,
+            delta_y1,
+            delta_x2,
+            delta_y2,
+            encoder_out=encoder_out,
+            features_only=features_only,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+            src_lengths=src_lengths,
+            return_all_hiddens=return_all_hiddens,
+        )
+        return decoder_out
+
+    # Since get_normalized_probs is in the Fairseq Model which is not scriptable,
+    # I rewrite the get_normalized_probs from Base Class to call the
+    # helper function in the Base Class.
+    @torch.jit.export
+    def get_normalized_probs(
+        self,
+        net_output: Tuple[Tensor, Optional[Dict[str, List[Optional[Tensor]]]]],
+        log_probs: bool,
+        sample: Optional[Dict[str, Tensor]] = None,
+    ):
+        """Get normalized probabilities (or log probs) from a net's output."""
+        return self.get_normalized_probs_scriptable(net_output, log_probs, sample)
+
+
+class TransformerEncoder(FairseqEncoder):
+    """
+    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
+    is a :class:`TransformerEncoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): encoding dictionary
+        embed_tokens (torch.nn.Embedding): input embedding
+    """
+
+    def __init__(self, args, dictionary, embed_tokens):
+        self.args = args
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([3]))
+
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.encoder_layerdrop = args.encoder_layerdrop
+
+        embed_dim = embed_tokens.embedding_dim
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_source_positions = args.max_source_positions
+        self.num_attention_heads = args.encoder_attention_heads
+
+        self.embed_tokens = embed_tokens
+
+        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)
+
+        if getattr(args, "layernorm_embedding", False):
+            self.layernorm_embedding = LayerNorm(embed_dim)
+        else:
+            self.layernorm_embedding = None
+
+        if getattr(args, "add_type_embedding", False):
+            self.type_embedding = Embedding(2, embed_dim, padding_idx=None)
+        else:
+            self.type_embedding = None
+
+        conv_dim = 1024
+        if args.vis_encoder_type == 'swin-base':
+            out_index = args.out_index
+            self.embed_images = SwinTransformer(pretrain_img_size=384, window_size=12, embed_dim=128,
+                                                out_indices=[out_index],
+                                                depths=[2, 2, 18, 2], num_heads=[4, 8, 16, 32])
+            if out_index == 2:
+                conv_dim = 512
+            ckpt_path = "../../pretrained_weights/swin_base_patch4_window12_384_22k.pth"
+            if os.path.exists(ckpt_path):
+                self.embed_images.init_weights(pretrained=ckpt_path)
+                print("Loaded Swin Pretrained Weights", ckpt_path)
+        elif args.vis_encoder_type == 'swin-large':
+            out_indices = args.out_index
+            self.embed_images = SwinTransformer(pretrain_img_size=384, window_size=12, embed_dim=192,
+                                                out_indices=[out_indices],
+                                                depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48])
+            conv_dim = 768 if out_indices == 2 else 1536
+            ckpt_path = "../../pretrained_weights/swin_large_patch4_window12_384_22k.pth"
+            if os.path.exists(ckpt_path):
+                self.embed_images.init_weights(pretrained=ckpt_path)
+                print("Loaded Swin Pretrained Weights", ckpt_path)
+        else:
+            raise NotImplementedError
+
+        self.image_proj = Linear(conv_dim, embed_dim)
+        if getattr(args, "patch_layernorm_embedding", False):
+            self.patch_layernorm_embedding = LayerNorm(embed_dim)
+        else:
+            self.patch_layernorm_embedding = None
+
+        self.embed_positions = Embedding(args.max_source_positions + 2, embed_dim)
+        self.embed_image_positions = Embedding(args.image_bucket_size ** 2 + 1, embed_dim)
+        self.pos_ln = LayerNorm(embed_dim)
+        self.image_pos_ln = LayerNorm(embed_dim)
+        self.pos_scaling = float(embed_dim / args.encoder_attention_heads * args.attn_scale_factor) ** -0.5
+        self.pos_q_linear = nn.Linear(embed_dim, embed_dim)
+        self.pos_k_linear = nn.Linear(embed_dim, embed_dim)
+
+        if not args.adaptive_input and args.quant_noise_pq > 0:
+            self.quant_noise = apply_quant_noise_(
+                nn.Linear(embed_dim, embed_dim, bias=False),
+                args.quant_noise_pq,
+                args.quant_noise_pq_block_size,
+            )
+        else:
+            self.quant_noise = None
+
+        if self.encoder_layerdrop > 0.0:
+            self.layers = LayerDropModuleList(p=self.encoder_layerdrop)
+        else:
+            self.layers = nn.ModuleList([])
+
+        dpr = [x.item() for x in torch.linspace(0, args.encoder_drop_path_rate, args.encoder_layers)]
+        self.layers.extend(
+            [self.build_encoder_layer(args, drop_path_rate=dpr[i]) for i in range(args.encoder_layers)]
+        )
+        self.num_layers = len(self.layers)
+
+        if args.encoder_normalize_before:
+            self.layer_norm = LayerNorm(embed_dim)
+        else:
+            self.layer_norm = None
+
+        token_bucket_size = args.token_bucket_size
+        token_num_rel_dis = 2 * token_bucket_size - 1
+        token_rp_bucket = make_token_bucket_position(token_bucket_size)
+        self.token_rel_pos_table_list = nn.ModuleList(
+            [Embedding(token_num_rel_dis, self.num_attention_heads, zero_init=True) for _ in range(args.encoder_layers)]
+        )
+
+        image_bucket_size = args.image_bucket_size
+        image_num_rel_dis = (2 * image_bucket_size - 1) * (2 * image_bucket_size - 1) + 3
+        image_rp_bucket = make_image_bucket_position(image_bucket_size, image_num_rel_dis)
+        self.image_rel_pos_table_list = nn.ModuleList(
+            [Embedding(image_num_rel_dis, self.num_attention_heads, zero_init=True) for _ in range(args.encoder_layers)]
+        )
+
+        self.register_buffer("token_rp_bucket", token_rp_bucket)
+        self.register_buffer("image_rp_bucket", image_rp_bucket)
+        self.entangle_position_embedding = args.entangle_position_embedding
+        self.bert = BertModel.from_pretrained("bert-base-uncased")
+
+    def train(self, mode=True):
+        super(TransformerEncoder, self).train(mode)
+        if getattr(self.args, "freeze_resnet", False):
+            for m in self.embed_images.modules():
+                if isinstance(m, nn.BatchNorm2d):
+                    m.eval()
+                    m.weight.requires_grad = False
+                    m.bias.requires_grad = False
+
+    def build_encoder_layer(self, args, drop_path_rate=0.0):
+        layer = TransformerEncoderLayer(args, drop_path_rate=drop_path_rate)
+        checkpoint = getattr(args, "checkpoint_activations", False)
+        if checkpoint:
+            offload_to_cpu = getattr(args, "offload_activations", False)
+            layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu)
+        # if we are checkpointing, enforce that FSDP always wraps the
+        # checkpointed layer, regardless of layer size
+        min_params_to_wrap = (
+            getattr(args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP)
+            if not checkpoint else 0
+        )
+        layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap)
+        return layer
+
+    def get_rel_pos_bias(self, x, idx):
+        seq_len = x.size(1)
+        rp_bucket = self.token_rp_bucket[:seq_len, :seq_len]
+        values = F.embedding(rp_bucket, self.token_rel_pos_table_list[idx].weight)
+        values = values.unsqueeze(0).expand(x.size(0), -1, -1, -1)
+        values = values.permute([0, 3, 1, 2])
+        return values.contiguous()
+
+    def get_image_rel_pos_bias(self, image_position_ids, idx):
+        bsz, seq_len = image_position_ids.shape
+        rp_bucket_size = self.image_rp_bucket.size(1)
+
+        rp_bucket = self.image_rp_bucket.unsqueeze(0).expand(
+            bsz, rp_bucket_size, rp_bucket_size
+        ).gather(1, image_position_ids[:, :, None].expand(bsz, seq_len, rp_bucket_size)
+        ).gather(2, image_position_ids[:, None, :].expand(bsz, seq_len, seq_len))
+        values = F.embedding(rp_bucket, self.image_rel_pos_table_list[idx].weight)
+        values = values.permute(0, 3, 1, 2)
+        return values
+
+    def get_patch_images_info(self, patch_images, sample_patch_num, device):
+        image_embed = self.embed_images(patch_images)
+        h, w = image_embed.shape[-2:]
+        image_num_patches = h * w
+        image_padding_mask = patch_images.new_zeros((patch_images.size(0), image_num_patches)).bool()
+        image_position_idx = torch.arange(w).unsqueeze(0).expand(h, w) + \
+                             torch.arange(h).unsqueeze(1) * self.args.image_bucket_size + 1
+        image_position_idx = image_position_idx.view(-1).to(device)
+        image_position_ids = image_position_idx[None, :].expand(patch_images.size(0), image_num_patches)
+
+        image_embed = image_embed.flatten(2).transpose(1, 2)
+        if sample_patch_num is not None:
+            patch_orders = [
+                random.sample(range(image_num_patches), k=sample_patch_num)
+                for _ in range(patch_images.size(0))
+            ]
+            patch_orders = torch.LongTensor(patch_orders).to(device)
+            image_embed = image_embed.gather(
+                1, patch_orders.unsqueeze(2).expand(-1, -1, image_embed.size(2))
+            )
+            image_num_patches = sample_patch_num
+            image_padding_mask = image_padding_mask.gather(1, patch_orders)
+            image_position_ids = image_position_ids.gather(1, patch_orders)
+        image_pos_embed = self.embed_image_positions(image_position_ids)
+
+        return image_embed, image_num_patches, image_padding_mask, image_position_ids, image_pos_embed
+
+    def forward_embedding(
+        self,
+        src_tokens,
+        att_masks,
+        image_embed: Optional[torch.Tensor] = None,
+        token_embedding: Optional[torch.Tensor] = None,
+        pos_embed: Optional[torch.Tensor] = None,
+        image_pos_embed: Optional[torch.Tensor] = None
+    ):
+        # embed tokens and positions
+        if token_embedding is None:
+            token_embedding = self.bert(src_tokens, attention_mask=att_masks)[0]
+
+        x = embed = token_embedding
+        if self.entangle_position_embedding and pos_embed is not None:
+            x += pos_embed
+        if self.type_embedding is not None:
+            x += self.type_embedding(src_tokens.new_zeros(x.size()[:2]))
+        if self.layernorm_embedding is not None:
+            x = self.layernorm_embedding(x)
+        x = self.dropout_module(x)
+        if self.quant_noise is not None:
+            x = self.quant_noise(x)
+
+        # embed raw images
+        if image_embed is not None:
+            image_embed = self.image_proj(image_embed)
+            image_x = image_embed = image_embed
+            if self.entangle_position_embedding and image_pos_embed is not None:
+                image_x += image_pos_embed
+            if self.type_embedding is not None:
+                image_x += self.type_embedding(src_tokens.new_ones(image_x.size()[:2]))
+            if self.patch_layernorm_embedding is not None:
+                image_x = self.patch_layernorm_embedding(image_x)
+            image_x = self.dropout_module(image_x)
+            if self.quant_noise is not None:
+                image_x = self.quant_noise(image_x)
+            x = torch.cat([image_x, x], dim=1)
+            embed = torch.cat([image_embed, embed], dim=1)
+
+        return x, embed
+
+    def forward(
+        self,
+        src_tokens,
+        src_lengths,
+        att_masks,
+        patch_images: Optional[torch.Tensor] = None,
+        patch_masks: Optional[torch.Tensor] = None,
+        code_masks: Optional[torch.Tensor] = None,
+        return_all_hiddens: bool = False,
+        token_embeddings: Optional[torch.Tensor] = None,
+        sample_patch_num: Optional[int] = None
+    ):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (torch.LongTensor): lengths of each source sentence of
+                shape `(batch)`
+            return_all_hiddens (bool, optional): also return all of the
+                intermediate hidden states (default: False).
+            token_embeddings (torch.Tensor, optional): precomputed embeddings
+                default `None` will recompute embeddings
+
+        Returns:
+            dict:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+                - **encoder_embedding** (Tensor): the (scaled) embedding lookup
+                  of shape `(batch, src_len, embed_dim)`
+                - **encoder_states** (List[Tensor]): all intermediate
+                  hidden states of shape `(src_len, batch, embed_dim)`.
+                  Only populated if *return_all_hiddens* is True.
+        """
+        return self.forward_scriptable(src_tokens,
+                                       src_lengths,
+                                       att_masks,
+                                       patch_images,
+                                       patch_masks,
+                                       return_all_hiddens,
+                                       token_embeddings,
+                                       sample_patch_num)
+
+    # TorchScript doesn't support super() method so that the scriptable Subclass
+    # can't access the base class model in Torchscript.
+    # Current workaround is to add a helper function with different name and
+    # call the helper function from scriptable Subclass.
+    def forward_scriptable(
+        self,
+        src_tokens,
+        src_lengths,
+        att_masks,
+        patch_images: Optional[torch.Tensor] = None,
+        patch_masks: Optional[torch.Tensor] = None,
+        return_all_hiddens: bool = False,
+        token_embeddings: Optional[torch.Tensor] = None,
+        sample_patch_num: Optional[int] = None
+    ):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (torch.LongTensor): lengths of each source sentence of
+                shape `(batch)`
+            return_all_hiddens (bool, optional): also return all of the
+                intermediate hidden states (default: False).
+            token_embeddings (torch.Tensor, optional): precomputed embeddings
+                default `None` will recompute embeddings
+
+        Returns:
+            dict:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+                - **encoder_embedding** (Tensor): the (scaled) embedding lookup
+                  of shape `(batch, src_len, embed_dim)`
+                - **encoder_states** (List[Tensor]): all intermediate
+                  hidden states of shape `(src_len, batch, embed_dim)`.
+                  Only populated if *return_all_hiddens* is True.
+        """
+        image_embed = None
+        image_pos_embed = None
+        if patch_images is not None:
+            image_embed, image_num_patches, image_padding_mask, image_position_ids, image_pos_embed = \
+                self.get_patch_images_info(patch_images, sample_patch_num, src_tokens.device)
+            image_padding_mask[~patch_masks] = True
+
+        encoder_padding_mask = src_tokens.eq(0)
+        #encoder_padding_mask = src_tokens.eq(self.padding_idx)
+        if patch_images is not None:
+            encoder_padding_mask = torch.cat([image_padding_mask, encoder_padding_mask], dim=1)
+        has_pads = (src_tokens.device.type == "xla" or encoder_padding_mask.any())
+
+        pos_embed = self.embed_positions(utils.new_arange(src_tokens))
+        x, encoder_embedding = self.forward_embedding(
+            src_tokens, att_masks, image_embed, token_embeddings,
+            pos_embed, image_pos_embed
+        )
+
+        # account for padding while computing the representation
+        if has_pads:
+            x = x * (1 - encoder_padding_mask.unsqueeze(-1).type_as(x))
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        pos_embed = self.pos_ln(pos_embed)
+        if patch_images is not None:
+            image_pos_embed = self.image_pos_ln(image_pos_embed)
+            pos_embed = torch.cat([image_pos_embed, pos_embed], dim=1)
+
+        pos_q = self.pos_q_linear(pos_embed).view(
+            x.size(1), x.size(0), self.num_attention_heads, -1
+        ).transpose(1, 2) * self.pos_scaling
+        pos_k = self.pos_k_linear(pos_embed).view(
+            x.size(1), x.size(0), self.num_attention_heads, -1
+        ).transpose(1, 2)
+        abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3))
+
+        encoder_states = []
+
+        if return_all_hiddens:
+            encoder_states.append(x)
+
+        # encoder layers
+        for idx, layer in enumerate(self.layers):
+            self_attn_bias = abs_pos_bias.clone()
+            self_attn_bias[:, :, -src_tokens.size(1):, -src_tokens.size(1):] += self.get_rel_pos_bias(src_tokens, idx)
+
+            if patch_images is not None:
+                self_attn_bias[:, :, :x.size(0) - src_tokens.size(1), :x.size(0) - src_tokens.size(1)] += \
+                    self.get_image_rel_pos_bias(image_position_ids, idx)
+            self_attn_bias = self_attn_bias.reshape(-1, x.size(0), x.size(0))
+
+            x = layer(
+                x, encoder_padding_mask=encoder_padding_mask if has_pads else None, self_attn_bias=self_attn_bias
+            )
+            if return_all_hiddens:
+                assert encoder_states is not None
+                encoder_states.append(x)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # The Pytorch Mobile lite interpreter does not supports returning NamedTuple in
+        # `forward` so we use a dictionary instead.
+        # TorchScript does not support mixed values so the values are all lists.
+        # The empty list is equivalent to None.
+        return {
+            "encoder_out": [x],  # T x B x C
+            "encoder_padding_mask": [encoder_padding_mask],  # B x T
+            "encoder_embedding": [],  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": [],
+            "src_lengths": [],
+            "position_embeddings": [pos_embed],  # B x T x C
+        }
+
+    @torch.jit.export
+    def reorder_encoder_out(self, encoder_out: Dict[str, List[Tensor]], new_order):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        if len(encoder_out["encoder_out"]) == 0:
+            new_encoder_out = []
+        else:
+            new_encoder_out = [encoder_out["encoder_out"][0].index_select(1, new_order)]
+        if len(encoder_out["encoder_padding_mask"]) == 0:
+            new_encoder_padding_mask = []
+        else:
+            new_encoder_padding_mask = [
+                encoder_out["encoder_padding_mask"][0].index_select(0, new_order)
+            ]
+        if len(encoder_out["encoder_embedding"]) == 0:
+            new_encoder_embedding = []
+        else:
+            new_encoder_embedding = [
+                encoder_out["encoder_embedding"][0].index_select(0, new_order)
+            ]
+
+        if len(encoder_out["src_tokens"]) == 0:
+            new_src_tokens = []
+        else:
+            new_src_tokens = [(encoder_out["src_tokens"][0]).index_select(0, new_order)]
+
+        if len(encoder_out["src_lengths"]) == 0:
+            new_src_lengths = []
+        else:
+            new_src_lengths = [(encoder_out["src_lengths"][0]).index_select(0, new_order)]
+
+        if len(encoder_out["position_embeddings"]) == 0:
+            new_position_embeddings = []
+        else:
+            new_position_embeddings = [(encoder_out["position_embeddings"][0]).index_select(0, new_order)]
+
+        encoder_states = encoder_out["encoder_states"]
+        if len(encoder_states) > 0:
+            for idx, state in enumerate(encoder_states):
+                encoder_states[idx] = state.index_select(1, new_order)
+
+        return {
+            "encoder_out": new_encoder_out,  # T x B x C
+            "encoder_padding_mask": new_encoder_padding_mask,  # B x T
+            "encoder_embedding": new_encoder_embedding,  # B x T x C
+            "encoder_states": encoder_states,  # List[T x B x C]
+            "src_tokens": new_src_tokens,  # B x T
+            "src_lengths": new_src_lengths,  # B x 1
+            "position_embeddings": new_position_embeddings,  # B x T x C
+        }
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        if self.embed_positions is None:
+            return self.max_source_positions
+        return self.max_source_positions
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = "{}.embed_positions.weights".format(name)
+            if weights_key in state_dict:
+                print("deleting {0}".format(weights_key))
+                del state_dict[weights_key]
+            state_dict[
+                "{}.embed_positions._float_tensor".format(name)
+            ] = torch.FloatTensor(1)
+        for i in range(self.num_layers):
+            # update layer norms
+            self.layers[i].upgrade_state_dict_named(
+                state_dict, "{}.layers.{}".format(name, i)
+            )
+
+        # version_key = "{}.version".format(name)
+        # if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2:
+        #     # earlier checkpoints did not normalize after the stack of layers
+        #     self.layer_norm = None
+        #     self.normalize = False
+        #     state_dict[version_key] = torch.Tensor([1])
+
+        prefix = name + "." if name != "" else ""
+        for param_name, param_tensor in self.state_dict().items():
+            if (prefix + param_name) not in state_dict:
+                state_dict[prefix + param_name] = self.state_dict()[param_name]
+
+        if len(state_dict["encoder.embed_image_positions.weight"]) < len(self.state_dict()["embed_image_positions.weight"]):
+            num_posids_to_add = len(self.state_dict()["embed_image_positions.weight"]) - len(state_dict["encoder.embed_image_positions.weight"])
+            embed_dim = state_dict["encoder.embed_image_positions.weight"].size(1)
+            new_pos_embed_to_add = torch.zeros(num_posids_to_add, embed_dim)
+            nn.init.normal_(new_pos_embed_to_add, mean=0, std=embed_dim ** -0.5)
+            new_pos_embed_to_add = new_pos_embed_to_add.to(
+                dtype=state_dict["encoder.embed_image_positions.weight"].dtype,
+            )
+            state_dict["encoder.embed_image_positions.weight"] = torch.cat(
+                [state_dict["encoder.embed_image_positions.weight"], new_pos_embed_to_add]
+            )
+        return state_dict
+
+
+class TransformerDecoder(FairseqIncrementalDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self,
+        args,
+        dictionary,
+        embed_tokens,
+        no_encoder_attn=False,
+        output_projection=None,
+    ):
+        self.args = args
+        super().__init__(dictionary)
+        self.register_buffer("version", torch.Tensor([3]))
+        self._future_mask = torch.empty(0)
+
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.decoder_layerdrop = args.decoder_layerdrop
+        self.share_input_output_embed = args.share_decoder_input_output_embed
+        self.num_attention_heads = args.decoder_attention_heads
+
+        input_embed_dim = embed_tokens.embedding_dim
+        embed_dim = args.decoder_embed_dim
+        self.embed_dim = embed_dim
+        self.output_embed_dim = args.decoder_output_dim
+
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_target_positions = args.max_target_positions
+
+        self.embed_tokens = embed_tokens
+
+        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)
+
+        if not args.adaptive_input and args.quant_noise_pq > 0:
+            self.quant_noise = apply_quant_noise_(
+                nn.Linear(embed_dim, embed_dim, bias=False),
+                args.quant_noise_pq,
+                args.quant_noise_pq_block_size,
+            )
+        else:
+            self.quant_noise = None
+
+        self.project_in_dim = (
+            Linear(input_embed_dim, embed_dim, bias=False)
+            if embed_dim != input_embed_dim
+            else None
+        )
+
+        if getattr(args, "layernorm_embedding", False):
+            self.layernorm_embedding = LayerNorm(embed_dim)
+        else:
+            self.layernorm_embedding = None
+
+        self.window_size = args.code_image_size // 8
+
+        self.embed_positions = Embedding(args.max_target_positions + 2, embed_dim)
+        self.embed_image_positions = Embedding(args.image_bucket_size ** 2 + 1, embed_dim)
+        self.pos_ln = LayerNorm(embed_dim)
+        self.image_pos_ln = LayerNorm(embed_dim)
+        self.pos_scaling = float(embed_dim / self.num_attention_heads * args.attn_scale_factor) ** -0.5
+        self.self_pos_q_linear = nn.Linear(embed_dim, embed_dim)
+        self.self_pos_k_linear = nn.Linear(embed_dim, embed_dim)
+        self.cross_pos_q_linear = nn.Linear(embed_dim, embed_dim)
+        self.cross_pos_k_linear = nn.Linear(embed_dim, embed_dim)
+
+        if getattr(args, "code_layernorm_embedding", False):
+            self.code_layernorm_embedding = LayerNorm(embed_dim)
+        else:
+            self.code_layernorm_embedding = None
+
+        self.cross_self_attention = getattr(args, "cross_self_attention", False)
+
+        if self.decoder_layerdrop > 0.0:
+            self.layers = LayerDropModuleList(p=self.decoder_layerdrop)
+        else:
+            self.layers = nn.ModuleList([])
+
+        dpr = [x.item() for x in torch.linspace(0, args.decoder_drop_path_rate, args.decoder_layers)]
+        self.layers.extend(
+            [
+                self.build_decoder_layer(args, no_encoder_attn, drop_path_rate=dpr[i])
+                for i in range(args.decoder_layers)
+            ]
+        )
+        self.num_layers = len(self.layers)
+
+        if args.decoder_normalize_before:
+            self.layer_norm = LayerNorm(embed_dim)
+        else:
+            self.layer_norm = None
+
+        self.project_out_dim = (
+            Linear(embed_dim, self.output_embed_dim, bias=False)
+            if embed_dim != self.output_embed_dim and not args.tie_adaptive_weights
+            else None
+        )
+
+        self.adaptive_softmax = None
+        self.reg_head = output_projection
+        if self.reg_head is None:
+            self.build_output_projection(args, dictionary, embed_tokens)
+
+        token_bucket_size = args.token_bucket_size
+        token_num_rel_dis = 2 * token_bucket_size - 1
+        token_rp_bucket = make_token_bucket_position(token_bucket_size)
+        self.token_rel_pos_table_list = nn.ModuleList(
+            [Embedding(token_num_rel_dis, self.num_attention_heads, zero_init=True) for _ in range(args.decoder_layers)]
+        )
+
+        image_bucket_size = args.image_bucket_size
+        image_num_rel_dis = (2 * image_bucket_size - 1) * (2 * image_bucket_size - 1) + 3
+        image_rp_bucket = make_image_bucket_position(image_bucket_size, image_num_rel_dis)
+        image_position_idx = torch.arange(self.window_size).unsqueeze(0).expand(self.window_size, self.window_size) + \
+                             torch.arange(self.window_size).unsqueeze(1) * image_bucket_size + 1
+        image_position_idx = torch.cat([torch.tensor([0]), image_position_idx.view(-1)])
+        image_position_idx = torch.cat([image_position_idx, torch.tensor([1024] * 768)])
+        self.image_rel_pos_table_list = nn.ModuleList(
+            [Embedding(image_num_rel_dis, self.num_attention_heads, zero_init=True) for _ in range(args.decoder_layers)]
+        )
+
+        self.register_buffer("token_rp_bucket", token_rp_bucket)
+        self.register_buffer("image_rp_bucket", image_rp_bucket)
+        self.register_buffer("image_position_idx", image_position_idx)
+        self.entangle_position_embedding = args.entangle_position_embedding
+
+    def build_output_projection(self, args, dictionary, embed_tokens):
+        self.reg_head = MLP(self.output_embed_dim, self.output_embed_dim, 2, 3)
+        nn.init.constant_(self.reg_head.layers[-1].weight.data, 0)
+        nn.init.constant_(self.reg_head.layers[-1].bias.data, 0)
+
+        # classify token types
+        self.cls_head = nn.Linear(
+            self.output_embed_dim, 3, bias=False
+        )  # 3 types: coordinate, polygon separator, eos
+        nn.init.normal_(
+            self.cls_head.weight, mean=0, std=self.output_embed_dim ** -0.5
+        )
+
+        num_base_layers = getattr(args, "base_layers", 0)
+        for i in range(num_base_layers):
+            self.layers.insert(((i+1) * args.decoder_layers) // (num_base_layers + 1), BaseLayer(args))
+
+    def build_decoder_layer(self, args, no_encoder_attn=False, drop_path_rate=0.0):
+        layer = TransformerDecoderLayer(args, no_encoder_attn, drop_path_rate=drop_path_rate)
+        checkpoint = getattr(args, "checkpoint_activations", False)
+        if checkpoint:
+            offload_to_cpu = getattr(args, "offload_activations", False)
+            layer = checkpoint_wrapper(layer, offload_to_cpu=offload_to_cpu)
+        # if we are checkpointing, enforce that FSDP always wraps the
+        # checkpointed layer, regardless of layer size
+        min_params_to_wrap = (
+            getattr(args, "min_params_to_wrap", DEFAULT_MIN_PARAMS_TO_WRAP)
+            if not checkpoint else 0
+        )
+        layer = fsdp_wrap(layer, min_num_params=min_params_to_wrap)
+        return layer
+
+    def get_rel_pos_bias(self, x, idx):
+        seq_len = x.size(1)
+        rp_bucket = self.token_rp_bucket[:seq_len, :seq_len]
+        values = F.embedding(rp_bucket, self.token_rel_pos_table_list[idx].weight)
+        values = values.permute([2, 0, 1])
+        return values.contiguous()
+
+    def get_image_rel_pos_bias(self, x, idx):
+        seq_len = x.size(1)
+        image_position_idx = self.image_position_idx[:seq_len]
+        rp_bucket = self.image_rp_bucket[image_position_idx][:, image_position_idx]
+        values = F.embedding(rp_bucket, self.image_rel_pos_table_list[idx].weight)
+        values = values.permute(2, 0, 1)
+        return values
+
+    def get_pos_info(self, tokens, tgt_pos_embed, src_pos_embed=None, use_image=False):
+        batch_size = tokens.size(0)
+        tgt_len = tokens.size(1)
+        tgt_pos_embed = self.image_pos_ln(tgt_pos_embed) if use_image else self.pos_ln(tgt_pos_embed)
+        if src_pos_embed is not None:
+            src_len = src_pos_embed.size(1)
+            pos_q = self.cross_pos_q_linear(tgt_pos_embed).view(
+                batch_size, tgt_len, self.num_attention_heads, -1
+            ).transpose(1, 2) * self.pos_scaling
+            pos_k = self.cross_pos_k_linear(src_pos_embed).view(
+                batch_size, src_len, self.num_attention_heads, -1
+            ).transpose(1, 2)
+        else:
+            src_len = tgt_pos_embed.size(1)
+            pos_q = self.self_pos_q_linear(tgt_pos_embed).view(
+                batch_size, tgt_len, self.num_attention_heads, -1
+            ).transpose(1, 2) * self.pos_scaling
+            pos_k = self.self_pos_k_linear(tgt_pos_embed).view(
+                batch_size, src_len, self.num_attention_heads, -1
+            ).transpose(1, 2)
+        abs_pos_bias = torch.matmul(pos_q, pos_k.transpose(2, 3))
+        return abs_pos_bias
+
+    def forward(
+        self,
+        prev_output_tokens_11,
+        prev_output_tokens_12,
+        prev_output_tokens_21,
+        prev_output_tokens_22,
+        delta_x1,
+        delta_y1,
+        delta_x2,
+        delta_y2,
+        code_masks: Optional[torch.Tensor] = None,
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        features_only: bool = False,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+        src_lengths: Optional[Any] = None,
+        return_all_hiddens: bool = False,
+    ):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (optional): output from the encoder, used for
+                encoder-side attention, should be of size T x B x C
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+            features_only (bool, optional): only return features without
+                applying output layer (default: False).
+            full_context_alignment (bool, optional): don't apply
+                auto-regressive mask to self-attention (default: False).
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+
+        x, extra = self.extract_features(
+            prev_output_tokens_11,
+            prev_output_tokens_12,
+            prev_output_tokens_21,
+            prev_output_tokens_22,
+            delta_x1,
+            delta_y1,
+            delta_x2,
+            delta_y2,
+            code_masks=code_masks,
+            encoder_out=encoder_out,
+            incremental_state=incremental_state,
+            full_context_alignment=full_context_alignment,
+            alignment_layer=alignment_layer,
+            alignment_heads=alignment_heads,
+        )
+        x1 = x
+        x2 = None
+        if not features_only:
+            x1, x2 = self.output_layer(x)
+        return x1, x2, extra
+
+    def extract_features(
+        self,
+        prev_output_tokens_11,
+        prev_output_tokens_12,
+        prev_output_tokens_21,
+        prev_output_tokens_22,
+        delta_x1,
+        delta_y1,
+        delta_x2,
+        delta_y2,
+        code_masks: Optional[torch.Tensor],
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        return self.extract_features_scriptable(
+            prev_output_tokens_11,
+            prev_output_tokens_12,
+            prev_output_tokens_21,
+            prev_output_tokens_22,
+            delta_x1,
+            delta_y1,
+            delta_x2,
+            delta_y2,
+            code_masks,
+            encoder_out,
+            incremental_state,
+            full_context_alignment,
+            alignment_layer,
+            alignment_heads,
+        )
+
+    """
+    A scriptable subclass of this class has an extract_features method and calls
+    super().extract_features, but super() is not supported in torchscript. A copy of
+    this function is made to be used in the subclass instead.
+    """
+
+    def extract_features_scriptable(
+        self,
+        prev_output_tokens_11,
+        prev_output_tokens_12,
+        prev_output_tokens_21,
+        prev_output_tokens_22,
+        delta_x1,
+        delta_y1,
+        delta_x2,
+        delta_y2,
+        code_masks: Optional[torch.Tensor],
+        encoder_out: Optional[Dict[str, List[Tensor]]],
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        full_context_alignment: bool = False,
+        alignment_layer: Optional[int] = None,
+        alignment_heads: Optional[int] = None,
+    ):
+        """
+        Similar to *forward* but only return features.
+
+        Includes several features from "Jointly Learning to Align and
+        Translate with Transformer Models" (Garg et al., EMNLP 2019).
+
+        Args:
+            full_context_alignment (bool, optional): don't apply
+                auto-regressive mask to self-attention (default: False).
+            alignment_layer (int, optional): return mean alignment over
+                heads at this layer (default: last layer).
+            alignment_heads (int, optional): only average alignment over
+                this many heads (default: all heads).
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+
+        prev_output_tokens = prev_output_tokens_11
+
+        bs, slen = prev_output_tokens.size()
+        if alignment_layer is None:
+            alignment_layer = self.num_layers - 1
+
+        enc: Optional[Tensor] = None
+        padding_mask: Optional[Tensor] = None
+        if encoder_out is not None and len(encoder_out["encoder_out"]) > 0:
+            enc = encoder_out["encoder_out"][0]
+            assert (
+                enc.size()[1] == bs
+            ), f"Expected enc.shape == (t, {bs}, c) got {enc.shape}"
+        if encoder_out is not None and len(encoder_out["encoder_padding_mask"]) > 0:
+            padding_mask = encoder_out["encoder_padding_mask"][0]
+
+        bsz, tgt_len = prev_output_tokens.shape
+        token_position_idx = utils.new_arange(prev_output_tokens)
+        tgt_pos_embed = self.embed_positions(token_position_idx)
+        if code_masks is not None and torch.any(code_masks):
+            image_position_idx = self.image_position_idx[:prev_output_tokens.size(1)].unsqueeze(0).expand(bsz, tgt_len)
+            tgt_pos_embed[code_masks] = self.embed_image_positions(image_position_idx)[code_masks]
+
+        # self attn position bias
+        self_abs_pos_bias = self.get_pos_info(prev_output_tokens, tgt_pos_embed, use_image=False)
+        if code_masks is not None and torch.any(code_masks):
+            self_image_abs_pos_bias = self.get_pos_info(prev_output_tokens, tgt_pos_embed, use_image=True)
+            self_abs_pos_bias[code_masks] = self_image_abs_pos_bias[code_masks]
+        # cross attn position bias
+        src_pos_embed = encoder_out['position_embeddings'][0]
+        cross_abs_pos_bias = self.get_pos_info(prev_output_tokens, tgt_pos_embed, src_pos_embed=src_pos_embed)
+        if code_masks is not None and torch.any(code_masks):
+            cross_image_abs_pos_bias = self.get_pos_info(prev_output_tokens, tgt_pos_embed, src_pos_embed=src_pos_embed, use_image=True)
+            cross_abs_pos_bias[code_masks] = cross_image_abs_pos_bias[code_masks]
+        cross_abs_pos_bias = cross_abs_pos_bias.reshape(-1, *cross_abs_pos_bias.size()[-2:])
+
+        all_prev_output_tokens = prev_output_tokens.clone()
+        if incremental_state is not None:
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            cross_abs_pos_bias = cross_abs_pos_bias[:, -1:, :]
+            tgt_pos_embed = tgt_pos_embed[:, -1:, :]
+
+        # embed tokens and positions
+        token_embedding_11 = self.embed_tokens(prev_output_tokens_11)
+        token_embedding_12 = self.embed_tokens(prev_output_tokens_12)
+        token_embedding_21 = self.embed_tokens(prev_output_tokens_21)
+        token_embedding_22 = self.embed_tokens(prev_output_tokens_22)
+        delta_x1 = delta_x1.unsqueeze(-1).repeat(1, 1, token_embedding_11.shape[-1])
+        delta_x2 = delta_x2.unsqueeze(-1).repeat(1, 1, token_embedding_11.shape[-1])
+        delta_y1 = delta_y1.unsqueeze(-1).repeat(1, 1, token_embedding_11.shape[-1])
+        delta_y2 = delta_y2.unsqueeze(-1).repeat(1, 1, token_embedding_11.shape[-1])
+
+        token_embedding = token_embedding_11*delta_x2*delta_y2 + token_embedding_12*delta_x2*delta_y1 + \
+                          token_embedding_21*delta_x1*delta_y2 + token_embedding_22*delta_x1*delta_y1
+
+        x = self.embed_scale * token_embedding
+
+        if self.quant_noise is not None:
+            x = self.quant_noise(x)
+
+        if self.project_in_dim is not None:
+            x = self.project_in_dim(x)
+
+        if self.entangle_position_embedding is not None and not self.args.disable_entangle:
+            x += tgt_pos_embed
+
+        if self.layernorm_embedding is not None:
+            if code_masks is None or not code_masks.any() or not getattr(self, "code_layernorm_embedding", False):
+                x = self.layernorm_embedding(x.half())
+            elif code_masks is not None and code_masks.all():
+                x = self.code_layernorm_embedding(x)
+            else:
+                x[~code_masks] = self.layernorm_embedding(x[~code_masks])
+                x[code_masks] = self.code_layernorm_embedding(x[code_masks])
+
+        x = self.dropout_module(x)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        self_attn_padding_mask: Optional[Tensor] = None
+        if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any():
+            self_attn_padding_mask = prev_output_tokens.eq(self.padding_idx)
+
+        # decoder layers
+        attn: Optional[Tensor] = None
+        inner_states: List[Optional[Tensor]] = [x]
+        for idx, layer in enumerate(self.layers):
+            if incremental_state is None and not full_context_alignment:
+                self_attn_mask = self.buffered_future_mask(x)
+            else:
+                self_attn_mask = None
+
+            self_attn_bias = self_abs_pos_bias.clone()
+            if code_masks is None or not code_masks.any():
+                self_attn_bias += self.get_rel_pos_bias(all_prev_output_tokens, idx).unsqueeze(0)
+            elif code_masks is not None and code_masks.all():
+                self_attn_bias += self.get_image_rel_pos_bias(all_prev_output_tokens, idx).unsqueeze(0)
+            else:
+                self_attn_bias[~code_masks] += self.get_rel_pos_bias(all_prev_output_tokens, idx).unsqueeze(0)
+                self_attn_bias[code_masks] += self.get_image_rel_pos_bias(all_prev_output_tokens, idx).unsqueeze(0)
+            self_attn_bias = self_attn_bias.reshape(-1, *self_attn_bias.size()[-2:])
+            if incremental_state is not None:
+                self_attn_bias = self_attn_bias[:, -1:, :]
+
+            x, layer_attn, _ = layer(
+                x,
+                enc,
+                padding_mask,
+                incremental_state,
+                self_attn_mask=self_attn_mask,
+                self_attn_padding_mask=self_attn_padding_mask,
+                need_attn=bool((idx == alignment_layer)),
+                need_head_weights=bool((idx == alignment_layer)),
+                self_attn_bias=self_attn_bias,
+                cross_attn_bias=cross_abs_pos_bias
+            )
+            inner_states.append(x)
+            if layer_attn is not None and idx == alignment_layer:
+                attn = layer_attn.float().to(x)
+
+        if attn is not None:
+            if alignment_heads is not None:
+                attn = attn[:alignment_heads]
+
+            # average probabilities over heads
+            attn = attn.mean(dim=0)
+
+        if self.layer_norm is not None:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+
+        return x, {"attn": [attn], "inner_states": inner_states}
+
+    def output_layer(self, features):
+        """Project features to the vocabulary size."""
+        if self.adaptive_softmax is None:
+            # project back to size of vocabulary
+            return self.cls_head(features), F.sigmoid(self.reg_head(features))
+        else:
+            return features
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        if self.embed_positions is None:
+            return self.max_target_positions
+        return self.max_target_positions
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround.
+        if (
+            self._future_mask.size(0) == 0
+            or (not self._future_mask.device == tensor.device)
+            or self._future_mask.size(0) < dim
+        ):
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1
+            )
+        self._future_mask = self._future_mask.to(tensor)
+        return self._future_mask[:dim, :dim]
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = "{}.embed_positions.weights".format(name)
+            if weights_key in state_dict:
+                del state_dict[weights_key]
+            state_dict[
+                "{}.embed_positions._float_tensor".format(name)
+            ] = torch.FloatTensor(1)
+
+        if f"{name}.output_projection.weight" not in state_dict:
+            if self.share_input_output_embed:
+                embed_out_key = f"{name}.embed_tokens.weight"
+            else:
+                embed_out_key = f"{name}.embed_out"
+            if embed_out_key in state_dict:
+                state_dict[f"{name}.output_projection.weight"] = state_dict[
+                    embed_out_key
+                ]
+                if not self.share_input_output_embed:
+                    del state_dict[embed_out_key]
+
+        for i in range(self.num_layers):
+            # update layer norms
+            self.layers[i].upgrade_state_dict_named(
+                state_dict, "{}.layers.{}".format(name, i)
+            )
+
+        prefix = name + "." if name != "" else ""
+        image_params = ["image_position_idx"]
+        for image_param in image_params:
+            state_dict[prefix + image_param] = self.state_dict()[image_param]
+        for param_name, param_tensor in self.state_dict().items():
+            if (prefix + param_name) not in state_dict:
+                state_dict[prefix + param_name] = self.state_dict()[param_name]
+
+        if len(state_dict["decoder.embed_image_positions.weight"]) < len(self.state_dict()["embed_image_positions.weight"]):
+            num_posids_to_add = len(self.state_dict()["embed_image_positions.weight"]) - len(state_dict["decoder.embed_image_positions.weight"])
+            embed_dim = state_dict["decoder.embed_image_positions.weight"].size(1)
+            new_pos_embed_to_add = torch.zeros(num_posids_to_add, embed_dim)
+            nn.init.normal_(new_pos_embed_to_add, mean=0, std=embed_dim ** -0.5)
+            new_pos_embed_to_add = new_pos_embed_to_add.to(
+                dtype=state_dict["decoder.embed_image_positions.weight"].dtype,
+            )
+            state_dict["decoder.embed_image_positions.weight"] = torch.cat(
+                [state_dict["decoder.embed_image_positions.weight"], new_pos_embed_to_add]
+            )
+        return state_dict
+
+
+def Embedding(num_embeddings, embedding_dim, padding_idx=None, zero_init=False):
+    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
+    nn.init.normal_(m.weight, mean=0, std=embedding_dim ** -0.5)
+    if padding_idx is not None:
+        nn.init.constant_(m.weight[padding_idx], 0)
+    if zero_init:
+        nn.init.constant_(m.weight, 0)
+    return m
+
+
+def Linear(in_features, out_features, bias=True):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight)
+    if bias:
+        nn.init.constant_(m.bias, 0.0)
+    return m
+
+
+@register_model_architecture("unify_transformer", "unify_transformer")
+def base_architecture(args):
+    args.encoder_embed_path = getattr(args, "encoder_embed_path", None)
+    args.encoder_embed_dim = getattr(args, "encoder_embed_dim", 512)
+    args.encoder_ffn_embed_dim = getattr(args, "encoder_ffn_embed_dim", 2048)
+    args.encoder_layers = getattr(args, "encoder_layers", 6)
+    args.encoder_attention_heads = getattr(args, "encoder_attention_heads", 8)
+    args.encoder_normalize_before = getattr(args, "encoder_normalize_before", False)
+    args.encoder_learned_pos = getattr(args, "encoder_learned_pos", False)
+    args.decoder_embed_path = getattr(args, "decoder_embed_path", None)
+    args.decoder_embed_dim = getattr(args, "decoder_embed_dim", args.encoder_embed_dim)
+    args.decoder_ffn_embed_dim = getattr(
+        args, "decoder_ffn_embed_dim", args.encoder_ffn_embed_dim
+    )
+    args.decoder_layers = getattr(args, "decoder_layers", 6)
+    args.decoder_attention_heads = getattr(args, "decoder_attention_heads", 8)
+    args.decoder_normalize_before = getattr(args, "decoder_normalize_before", False)
+    args.decoder_learned_pos = getattr(args, "decoder_learned_pos", False)
+    args.attention_dropout = getattr(args, "attention_dropout", 0.0)
+    args.activation_dropout = getattr(args, "activation_dropout", 0.0)
+    args.activation_fn = getattr(args, "activation_fn", "relu")
+    args.dropout = getattr(args, "dropout", 0.1)
+    args.adaptive_softmax_cutoff = getattr(args, "adaptive_softmax_cutoff", None)
+    args.adaptive_softmax_dropout = getattr(args, "adaptive_softmax_dropout", 0)
+    args.share_decoder_input_output_embed = getattr(
+        args, "share_decoder_input_output_embed", False
+    )
+    args.share_all_embeddings = getattr(args, "share_all_embeddings", False)
+    args.no_token_positional_embeddings = getattr(
+        args, "no_token_positional_embeddings", False
+    )
+    args.adaptive_input = getattr(args, "adaptive_input", False)
+    args.no_cross_attention = getattr(args, "no_cross_attention", False)
+    args.cross_self_attention = getattr(args, "cross_self_attention", False)
+
+    args.decoder_output_dim = getattr(
+        args, "decoder_output_dim", args.decoder_embed_dim
+    )
+    args.decoder_input_dim = getattr(args, "decoder_input_dim", args.decoder_embed_dim)
+
+    args.no_scale_embedding = getattr(args, "no_scale_embedding", False)
+    args.layernorm_embedding = getattr(args, "layernorm_embedding", False)
+    args.tie_adaptive_weights = getattr(args, "tie_adaptive_weights", False)
+    args.checkpoint_activations = getattr(args, "checkpoint_activations", False)
+    args.offload_activations = getattr(args, "offload_activations", False)
+    if args.offload_activations:
+        args.checkpoint_activations = True
+    args.encoder_layers_to_keep = getattr(args, "encoder_layers_to_keep", None)
+    args.decoder_layers_to_keep = getattr(args, "decoder_layers_to_keep", None)
+    args.encoder_layerdrop = getattr(args, "encoder_layerdrop", 0)
+    args.decoder_layerdrop = getattr(args, "decoder_layerdrop", 0)
+    args.quant_noise_pq = getattr(args, "quant_noise_pq", 0)
+    args.quant_noise_pq_block_size = getattr(args, "quant_noise_pq_block_size", 8)
+    args.quant_noise_scalar = getattr(args, "quant_noise_scalar", 0)
+
+
+
+class MLP(nn.Module):
+    """ Very simple multi-layer perceptron (also called FFN)"""
+
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        return x
diff --git a/models/polyformer/unify_transformer_layer.py b/models/polyformer/unify_transformer_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cb7c014f23ed1cf3b5bd76b29e51b3221699607
--- /dev/null
+++ b/models/polyformer/unify_transformer_layer.py
@@ -0,0 +1,547 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Dict, List, Optional
+
+import torch
+import torch.nn as nn
+from fairseq import utils
+from fairseq.modules import LayerNorm
+from fairseq.modules.fairseq_dropout import FairseqDropout
+from fairseq.modules.quant_noise import quant_noise
+from torch import Tensor
+
+from .unify_multihead_attention import MultiheadAttention
+
+
+def drop_path(x, drop_prob: float = 0.0, training: bool = False):
+    """
+    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
+    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
+    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
+    argument.
+    """
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (1, x.shape[1], 1)
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
+
+    def __init__(self, drop_prob=None):
+        super().__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+    def extra_repr(self) -> str:
+        return "p={}".format(self.drop_prob)
+
+
+class TransformerEncoderLayer(nn.Module):
+    """Encoder layer block.
+
+    In the original paper each operation (multi-head attention or FFN) is
+    postprocessed with: `dropout -> add residual -> layernorm`. In the
+    tensor2tensor code they suggest that learning is more robust when
+    preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *args.encoder_normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+    """
+
+    def __init__(self, args, drop_path_rate=0.0):
+        super().__init__()
+        self.args = args
+        self.embed_dim = args.encoder_embed_dim
+        self.quant_noise = getattr(args, 'quant_noise_pq', 0)
+        self.quant_noise_block_size = getattr(args, 'quant_noise_pq_block_size', 8) or 8
+        self.self_attn = self.build_self_attention(self.embed_dim, args)
+        self.self_attn_layer_norm = LayerNorm(self.embed_dim)
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.activation_fn = utils.get_activation_fn(
+            activation=getattr(args, 'activation_fn', 'relu') or "relu"
+        )
+        activation_dropout_p = getattr(args, "activation_dropout", 0) or 0
+        if activation_dropout_p == 0:
+            # for backwards compatibility with models that use args.relu_dropout
+            activation_dropout_p = getattr(args, "relu_dropout", 0) or 0
+        self.activation_dropout_module = FairseqDropout(
+            float(activation_dropout_p), module_name=self.__class__.__name__
+        )
+        self.normalize_before = args.encoder_normalize_before
+        self.fc1 = self.build_fc1(
+            self.embed_dim,
+            args.encoder_ffn_embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+        self.fc2 = self.build_fc2(
+            args.encoder_ffn_embed_dim,
+            self.embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+
+        self.attn_ln = LayerNorm(self.embed_dim) if getattr(args, 'scale_attn', False) else None
+        self.nh = self.self_attn.num_heads
+        self.head_dim = self.self_attn.head_dim
+
+        self.ffn_layernorm = LayerNorm(args.encoder_ffn_embed_dim) if getattr(args, 'scale_fc', False) else None
+        self.w_resid = nn.Parameter(torch.ones(self.embed_dim, ), requires_grad=True) if getattr(args, 'scale_resids', False) else None
+
+        self.final_layer_norm = LayerNorm(self.embed_dim)
+
+        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else nn.Identity()
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(
+            nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size
+        )
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(
+            nn.Linear(input_dim, output_dim), p=q_noise, block_size=qn_block_size
+        )
+
+    def build_self_attention(self, embed_dim, args):
+        return MultiheadAttention(
+            embed_dim,
+            args.encoder_attention_heads,
+            dropout=args.attention_dropout,
+            self_attention=True,
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+            scale_factor=args.attn_scale_factor,
+            scale_heads=getattr(args, 'scale_heads', False)
+        )
+
+    def residual_connection(self, x, residual):
+        return residual + self.drop_path(x)
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """
+        Rename layer norm states from `...layer_norms.0.weight` to
+        `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to
+        `...final_layer_norm.weight`
+        """
+        layer_norm_map = {"0": "self_attn_layer_norm", "1": "final_layer_norm"}
+        for old, new in layer_norm_map.items():
+            for m in ("weight", "bias"):
+                k = "{}.layer_norms.{}.{}".format(name, old, m)
+                if k in state_dict:
+                    state_dict["{}.{}.{}".format(name, new, m)] = state_dict[k]
+                    del state_dict[k]
+                if "{}.{}.{}".format(name, new, m) not in state_dict and "{}.{}".format(new, m) in self.state_dict():
+                    state_dict[
+                        "{}.{}.{}".format(name, new, m)
+                    ] = self.state_dict()["{}.{}".format(new, m)]
+
+        prefix = name + "." if name != "" else ""
+        for param_name, param_tensor in self.state_dict().items():
+            if (prefix + param_name) not in state_dict:
+                state_dict[prefix + param_name] = self.state_dict()[param_name]
+
+    def forward(
+        self,
+        x,
+        encoder_padding_mask: Optional[Tensor],
+        attn_mask: Optional[Tensor] = None,
+        self_attn_bias: Optional[Tensor] = None
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor): binary ByteTensor of shape
+                `(batch, seq_len)` where padding elements are indicated by ``1``.
+            attn_mask (ByteTensor): binary tensor of shape `(tgt_len, src_len)`,
+                where `tgt_len` is the length of output and `src_len` is the
+                length of input, though here both are equal to `seq_len`.
+                `attn_mask[tgt_i, src_j] = 1` means that when calculating the
+                embedding for `tgt_i`, we exclude (mask out) `src_j`. This is
+                useful for strided self-attention.
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        # anything in original attn_mask = 1, becomes -1e8
+        # anything in original attn_mask = 0, becomes 0
+        # Note that we cannot use -inf here, because at some edge cases,
+        # the attention weight (before softmax) for some padded element in query
+        # will become -inf, which results in NaN in model parameters
+        if attn_mask is not None:
+            attn_mask = attn_mask.masked_fill(
+                attn_mask.to(torch.bool),
+                -1e8 if x.dtype == torch.float32 else -1e4
+            )
+
+        residual = x
+        if self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+        x, _ = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=encoder_padding_mask,
+            need_weights=False,
+            attn_mask=attn_mask,
+            attn_bias=self_attn_bias
+        )
+        if self.attn_ln is not None:
+            x = self.attn_ln(x)
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.final_layer_norm(x)
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        if self.ffn_layernorm is not None:
+            x = self.ffn_layernorm(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        if self.w_resid is not None:
+            residual = torch.mul(self.w_resid, residual)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.final_layer_norm(x)
+        return x
+
+
+class TransformerDecoderLayer(nn.Module):
+    """Decoder layer block.
+
+    In the original paper each operation (multi-head attention, encoder
+    attention or FFN) is postprocessed with: `dropout -> add residual ->
+    layernorm`. In the tensor2tensor code they suggest that learning is more
+    robust when preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *args.decoder_normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(
+        self, args, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False, drop_path_rate=0.0
+    ):
+        super().__init__()
+        self.embed_dim = args.decoder_embed_dim
+        self.dropout_module = FairseqDropout(
+            args.dropout, module_name=self.__class__.__name__
+        )
+        self.quant_noise = getattr(args, "quant_noise_pq", 0)
+        self.quant_noise_block_size = getattr(args, "quant_noise_pq_block_size", 8)
+
+        self.cross_self_attention = getattr(args, "cross_self_attention", False)
+
+        self.self_attn = self.build_self_attention(
+            self.embed_dim,
+            args,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+        )
+        self.self_attn_ln = LayerNorm(self.embed_dim) if getattr(args, 'scale_attn', False) else None
+        self.cross_attn_ln = LayerNorm(self.embed_dim) if getattr(args, 'scale_attn', False) else None
+        self.nh = self.self_attn.num_heads
+        self.head_dim = self.self_attn.head_dim
+
+        self.activation_fn = utils.get_activation_fn(
+            activation=str(args.activation_fn)
+            if getattr(args, "activation_fn", None) is not None
+            else "relu"
+        )
+        activation_dropout_p = getattr(args, "activation_dropout", 0) or 0
+        if activation_dropout_p == 0:
+            # for backwards compatibility with models that use args.relu_dropout
+            activation_dropout_p = getattr(args, "relu_dropout", 0) or 0
+        self.activation_dropout_module = FairseqDropout(
+            float(activation_dropout_p), module_name=self.__class__.__name__
+        )
+        self.normalize_before = args.decoder_normalize_before
+
+        # use layerNorm rather than FusedLayerNorm for exporting.
+        # char_inputs can be used to determint this.
+        # TODO  remove this once we update apex with the fix
+        export = getattr(args, "char_inputs", False)
+        self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
+
+        if no_encoder_attn:
+            self.encoder_attn = None
+            self.encoder_attn_layer_norm = None
+        else:
+            self.encoder_attn = self.build_encoder_attention(self.embed_dim, args)
+            self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
+
+        self.ffn_layernorm = LayerNorm(args.decoder_ffn_embed_dim) if getattr(args, 'scale_fc', False) else None
+        self.w_resid = nn.Parameter(torch.ones(self.embed_dim, ), requires_grad=True) if getattr(args, 'scale_resids', False) else None
+
+        self.fc1 = self.build_fc1(
+            self.embed_dim,
+            args.decoder_ffn_embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+        self.fc2 = self.build_fc2(
+            args.decoder_ffn_embed_dim,
+            self.embed_dim,
+            self.quant_noise,
+            self.quant_noise_block_size,
+        )
+
+        self.final_layer_norm = LayerNorm(self.embed_dim, export=export)
+        self.need_attn = True
+
+        self.onnx_trace = False
+
+        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else nn.Identity()
+
+    def build_fc1(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size)
+
+    def build_fc2(self, input_dim, output_dim, q_noise, qn_block_size):
+        return quant_noise(nn.Linear(input_dim, output_dim), q_noise, qn_block_size)
+
+    def build_self_attention(
+        self, embed_dim, args, add_bias_kv=False, add_zero_attn=False
+    ):
+        return MultiheadAttention(
+            embed_dim,
+            args.decoder_attention_heads,
+            dropout=args.attention_dropout,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+            self_attention=not getattr(args, "cross_self_attention", False),
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+            scale_factor=args.attn_scale_factor,
+            scale_heads=getattr(args, 'scale_heads', False)
+        )
+
+    def build_encoder_attention(self, embed_dim, args):
+        return MultiheadAttention(
+            embed_dim,
+            args.decoder_attention_heads,
+            kdim=getattr(args, "encoder_embed_dim", None),
+            vdim=getattr(args, "encoder_embed_dim", None),
+            dropout=args.attention_dropout,
+            encoder_decoder_attention=True,
+            q_noise=self.quant_noise,
+            qn_block_size=self.quant_noise_block_size,
+            scale_factor=args.attn_scale_factor,
+            scale_heads=getattr(args, 'scale_heads', False)
+        )
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def residual_connection(self, x, residual):
+        return residual + self.drop_path(x)
+
+    def forward(
+        self,
+        x,
+        encoder_out: Optional[torch.Tensor] = None,
+        encoder_padding_mask: Optional[torch.Tensor] = None,
+        incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+        prev_self_attn_state: Optional[List[torch.Tensor]] = None,
+        prev_attn_state: Optional[List[torch.Tensor]] = None,
+        self_attn_mask: Optional[torch.Tensor] = None,
+        self_attn_padding_mask: Optional[torch.Tensor] = None,
+        need_attn: bool = False,
+        need_head_weights: bool = False,
+        self_attn_bias: Optional[Tensor] = None,
+        cross_attn_bias: Optional[Tensor] = None
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor, optional): binary
+                ByteTensor of shape `(batch, src_len)` where padding
+                elements are indicated by ``1``.
+            need_attn (bool, optional): return attention weights
+            need_head_weights (bool, optional): return attention weights
+                for each head (default: return average over heads).
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        if need_head_weights:
+            need_attn = True
+
+        residual = x
+        if self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+        if prev_self_attn_state is not None:
+            prev_key, prev_value = prev_self_attn_state[:2]
+            saved_state: Dict[str, Optional[Tensor]] = {
+                "prev_key": prev_key,
+                "prev_value": prev_value,
+            }
+            if len(prev_self_attn_state) >= 3:
+                saved_state["prev_key_padding_mask"] = prev_self_attn_state[2]
+            assert incremental_state is not None
+            self.self_attn._set_input_buffer(incremental_state, saved_state)
+        _self_attn_input_buffer = self.self_attn._get_input_buffer(incremental_state)
+        if self.cross_self_attention and not (
+            incremental_state is not None
+            and _self_attn_input_buffer is not None
+            and "prev_key" in _self_attn_input_buffer
+        ):
+            if self_attn_mask is not None:
+                assert encoder_out is not None
+                self_attn_mask = torch.cat(
+                    (x.new_zeros(x.size(0), encoder_out.size(0)), self_attn_mask), dim=1
+                )
+            if self_attn_padding_mask is not None:
+                if encoder_padding_mask is None:
+                    assert encoder_out is not None
+                    encoder_padding_mask = self_attn_padding_mask.new_zeros(
+                        encoder_out.size(1), encoder_out.size(0)
+                    )
+                self_attn_padding_mask = torch.cat(
+                    (encoder_padding_mask, self_attn_padding_mask), dim=1
+                )
+            assert encoder_out is not None
+            y = torch.cat((encoder_out, x), dim=0)
+        else:
+            y = x
+
+        x, attn = self.self_attn(
+            query=x,
+            key=y,
+            value=y,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            need_weights=False,
+            attn_mask=self_attn_mask,
+            attn_bias=self_attn_bias
+        )
+        if self.self_attn_ln is not None:
+            x = self.self_attn_ln(x)
+        x = self.dropout_module(x)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.self_attn_layer_norm(x)
+
+        if self.encoder_attn is not None and encoder_out is not None:
+            residual = x
+            if self.normalize_before:
+                x = self.encoder_attn_layer_norm(x)
+            if prev_attn_state is not None:
+                prev_key, prev_value = prev_attn_state[:2]
+                saved_state: Dict[str, Optional[Tensor]] = {
+                    "prev_key": prev_key,
+                    "prev_value": prev_value,
+                }
+                if len(prev_attn_state) >= 3:
+                    saved_state["prev_key_padding_mask"] = prev_attn_state[2]
+                assert incremental_state is not None
+                self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                need_weights=need_attn or (not self.training and self.need_attn),
+                need_head_weights=need_head_weights,
+                attn_bias=cross_attn_bias
+            )
+            if self.cross_attn_ln is not None:
+                x = self.cross_attn_ln(x)
+            x = self.dropout_module(x)
+            x = self.residual_connection(x, residual)
+            if not self.normalize_before:
+                x = self.encoder_attn_layer_norm(x)
+
+        residual = x
+        if self.normalize_before:
+            x = self.final_layer_norm(x)
+
+        x = self.activation_fn(self.fc1(x))
+        x = self.activation_dropout_module(x)
+        if self.ffn_layernorm is not None:
+            x = self.ffn_layernorm(x)
+        x = self.fc2(x)
+        x = self.dropout_module(x)
+        if self.w_resid is not None:
+            residual = torch.mul(self.w_resid, residual)
+        x = self.residual_connection(x, residual)
+        if not self.normalize_before:
+            x = self.final_layer_norm(x)
+        if self.onnx_trace and incremental_state is not None:
+            saved_state = self.self_attn._get_input_buffer(incremental_state)
+            assert saved_state is not None
+            if self_attn_padding_mask is not None:
+                self_attn_state = [
+                    saved_state["prev_key"],
+                    saved_state["prev_value"],
+                    saved_state["prev_key_padding_mask"],
+                ]
+            else:
+                self_attn_state = [saved_state["prev_key"], saved_state["prev_value"]]
+            return x, attn, self_attn_state
+        return x, attn, None
+
+    def make_generation_fast_(self, need_attn: bool = False, **kwargs):
+        self.need_attn = need_attn
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """
+        Rename layer norm states from `...layer_norms.0.weight` to
+        `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to
+        `...final_layer_norm.weight`
+        """
+        # update layer norms
+        layer_norm_map = {
+            "0": "self_attn_layer_norm",
+            "1": "encoder_attn_layer_norm",
+            "2": "final_layer_norm",
+        }
+        for old, new in layer_norm_map.items():
+            for m in ("weight", "bias"):
+                k = "{}.layer_norms.{}.{}".format(name, old, m)
+                if k in state_dict:
+                    state_dict[
+                        "{}.{}.{}".format(name, new, m)
+                    ] = state_dict[k]
+                    del state_dict[k]
+                if "{}.{}.{}".format(name, new, m) not in state_dict and "{}.{}".format(new, m) in self.state_dict():
+                    state_dict[
+                        "{}.{}.{}".format(name, new, m)
+                    ] = self.state_dict()["{}.{}".format(new, m)]
+
+        prefix = name + "." if name != "" else ""
+        for param_name, param_tensor in self.state_dict().items():
+            if (prefix + param_name) not in state_dict:
+                state_dict[prefix + param_name] = self.state_dict()[param_name]
diff --git a/models/search.py b/models/search.py
new file mode 100644
index 0000000000000000000000000000000000000000..268063a11aa9865586f8cd12545147a16550d061
--- /dev/null
+++ b/models/search.py
@@ -0,0 +1,819 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+from typing import List, Optional
+
+import torch
+import torch.nn as nn
+from fairseq.token_generation_constraints import (
+    ConstraintState,
+    OrderedConstraintState,
+    UnorderedConstraintState,
+)
+from torch import Tensor
+
+
+class Search(nn.Module):
+    def __init__(self, tgt_dict):
+        super().__init__()
+        self.pad = tgt_dict.pad()
+        self.unk = tgt_dict.unk()
+        self.eos = tgt_dict.eos()
+        self.vocab_size = len(tgt_dict)
+        self.src_lengths = torch.tensor(-1)
+        self.supports_constraints = False
+        self.stop_on_max_len = False
+
+    def step(
+        self, step, lprobs, scores, prev_output_tokens=None, original_batch_idxs=None
+    ):
+        """Take a single search step.
+
+        Args:
+            step: the current search step, starting at 0
+            lprobs: (bsz x input_beam_size x vocab_size)
+                the model's log-probabilities over the vocabulary at the current step
+            scores: (bsz x input_beam_size x step)
+                the historical model scores of each hypothesis up to this point
+            prev_output_tokens: (bsz x step)
+                the previously generated oputput tokens
+            original_batch_idxs: (bsz)
+                the tensor with the batch indices, in the range [0, bsz)
+                this is useful in case there has been applied a re-ordering
+                and we need to know the orignal indices
+
+        Return: A tuple of (scores, indices, beams) where:
+            scores: (bsz x output_beam_size)
+                the scores of the chosen elements; output_beam_size can be
+                larger than input_beam_size, e.g., we may return
+                2*input_beam_size to account for EOS
+            indices: (bsz x output_beam_size)
+                the indices of the chosen elements
+            beams: (bsz x output_beam_size)
+                the hypothesis ids of the chosen elements, in the range [0, input_beam_size)
+        """
+        raise NotImplementedError
+
+    @torch.jit.export
+    def set_src_lengths(self, src_lengths):
+        self.src_lengths = src_lengths
+
+    @torch.jit.export
+    def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int):
+        """Initialize constraint states for constrained decoding (if supported).
+
+        Args:
+            batch_constraints: (torch.Tensor, optional)
+                the list of constraints, in packed form
+            beam_size: (int)
+                the beam size
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        pass
+
+    def prune_sentences(self, batch_idxs: Tensor):
+        """
+        Removes constraint states for completed sentences (if supported).
+        This is called from sequence_generator._generate() when sentences are
+        deleted from the batch.
+
+        Args:
+            batch_idxs: Indices of *sentences* whose constraint state should be *kept*.
+        """
+        pass
+
+    def update_constraints(self, active_hypos: Tensor):
+        """
+        Updates the constraint states by selecting the beam items that are retained.
+        This is called at each time step of sequence_generator._generate() when
+        the set of 2 * {beam_size} candidate hypotheses are reduced to the beam size.
+
+        Args:
+            active_hypos: (batch size, beam size)
+              list of integers denoting, for each sentence, which beam candidate items
+              should be kept.
+        """
+        pass
+
+
+class BeamSearch(Search):
+    def __init__(self, tgt_dict):
+        super().__init__(tgt_dict)
+        self.constraint_states = None
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores: Optional[Tensor],
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            assert scores is not None
+            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)
+
+        top_prediction = torch.topk(
+            lprobs.view(bsz, -1),
+            k=min(
+                # Take the best 2 x beam_size predictions. We'll choose the first
+                # beam_size of these which don't predict eos to continue with.
+                beam_size * 2,
+                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+            ),
+        )
+        scores_buf = top_prediction[0]
+        indices_buf = top_prediction[1]
+        # Project back into relative indices and beams
+        beams_buf = indices_buf // vocab_size
+        indices_buf = indices_buf.fmod(vocab_size)
+
+        # At this point, beams_buf and indices_buf are single-dim and contain relative indices
+        return scores_buf, indices_buf, beams_buf
+
+
+class PrefixConstrainedBeamSearch(Search):
+    def __init__(self, tgt_dict, prefix_allowed_tokens_fn):
+        super().__init__(tgt_dict)
+        self.prefix_allowed_tokens_fn = prefix_allowed_tokens_fn
+        self.stop_on_max_len = True
+
+    @torch.jit.export
+    def apply_mask(self, x, prev_output_tokens, original_batch_idxs):
+        beam_size = x.shape[0] // original_batch_idxs.shape[0]
+        original_batch_idxs = (
+            original_batch_idxs.unsqueeze(-1).repeat((1, beam_size)).flatten().tolist()
+        )
+
+        mask = torch.full_like(x, -math.inf)
+        for sent_i, (sent, batch_i) in enumerate(
+            zip(prev_output_tokens, original_batch_idxs)
+        ):
+            mask[sent_i, :, self.prefix_allowed_tokens_fn(batch_i, sent)] = 0
+
+        return mask
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs: Tensor,
+        scores: Tensor,
+        prev_output_tokens: Tensor,
+        original_batch_idxs: Tensor,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+
+        lprobs += self.apply_mask(
+            lprobs.view(bsz * beam_size, 1, vocab_size),
+            prev_output_tokens,
+            original_batch_idxs,
+        ).view(bsz, beam_size, vocab_size)
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            assert scores is not None
+            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)
+
+        top_prediction = torch.topk(
+            lprobs.view(bsz, -1),
+            k=min(
+                # Take the best beam_size predictions. We'll choose the first
+                # beam_size of these which don't predict eos to continue with.
+                beam_size,
+                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+            ),
+        )
+        scores_buf = top_prediction[0]
+        indices_buf = top_prediction[1]
+        beams_buf = indices_buf // vocab_size
+        indices_buf = indices_buf.fmod(vocab_size)
+        return scores_buf, indices_buf, beams_buf
+
+
+class LexicallyConstrainedBeamSearch(Search):
+    """Implements lexically constrained beam search as described in
+
+        Fast Lexically Constrained Decoding with Dynamic Beam
+        Allocation for Neural Machine Translation.  Post & Vilar,
+        NAACL 2018.  https://www.aclweb.org/anthology/N18-1119/
+
+    and
+
+        Improved Lexically Constrained Decoding for Translation and
+        Monolingual Rewriting. Hu et al, NAACL
+        2019. https://www.aclweb.org/anthology/N19-1090/
+
+    This is accomplished by maintaining, for each beam hypothesis, a
+    ConstraintState object (see constraints.py) that tracks which
+    constraints have been generated and using this information to
+    shape the beam for each input sentence.
+    """
+
+    def __init__(self, tgt_dict, representation):
+        super().__init__(tgt_dict)
+        self.representation = representation
+        self.vocab_size = len(tgt_dict)
+        self.num_cands = 0
+        self.supports_constraints = True
+
+    @torch.jit.export
+    def init_constraints(self, batch_constraints: Optional[Tensor], beam_size: int):
+        self.constraint_states = []
+        for constraint_tensor in batch_constraints:
+            if self.representation == "ordered":
+                constraint_state = OrderedConstraintState.create(constraint_tensor)
+            elif self.representation == "unordered":
+                constraint_state = UnorderedConstraintState.create(constraint_tensor)
+
+            self.constraint_states.append([constraint_state for i in range(beam_size)])
+
+    @torch.jit.export
+    def prune_sentences(self, batch_idxs: Tensor):
+        self.constraint_states = [
+            self.constraint_states[i] for i in batch_idxs.tolist()
+        ]
+
+    @torch.jit.export
+    def update_constraints(self, active_hypos: Tensor):
+        if self.constraint_states:
+            batch_size = active_hypos.size(0)
+            for sentid in range(batch_size):
+                self.constraint_states[sentid] = [
+                    self.constraint_states[sentid][i] for i in active_hypos[sentid]
+                ]
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs: Tensor,
+        scores: Optional[Tensor],
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        """
+        A constrained step builds a large candidates list from the following:
+        - the top 2 * {beam_size} items over the whole beam
+        - for each item in the beam
+          - the top {each_k} (default 1)
+          - all next constraints
+        We then compute the constrained state of each beam item, and assign
+        stripe codes: 0 to the best in each bank, 1 to the 2nd-best, and so
+        on. We then sort by (stripe, score), and truncate the list at
+        2 * beam size.
+
+        Args:
+            step: the decoder step
+            lprobs: (batch size, beam size, target vocab)
+                the target-vocab distributions for each item in the beam.
+        Retrun: A tuple of (scores, indices, beams, constraints) where:
+            scores: (batch, output beam size)
+                the scores of the chosen elements
+            indices: (batch, output beam size)
+                the target vocab indices of the chosen elements
+            beams: (batch, output beam size)
+                the 0-indexed hypothesis ids of the chosen elements
+            constraints: (batch, output beam size)
+                the new constraint states
+        """
+        each_k = 1
+        device = lprobs.device
+
+        batch_size, beam_size, vocab_size = lprobs.size()
+
+        self.num_cands = min(
+            # Just take the k-best. We'll get another k from the 1-best from each
+            # row, plus more from the constraints
+            beam_size * 2,
+            lprobs.view(batch_size, -1).size(1) - 1,  # -1 so we never select pad
+        )
+
+        # STEP 0: Preliminary. Prevent EOS for unfinished hyps across all batch items
+        constraint_states = self.constraint_states
+        if constraint_states and step > 0:
+            not_finished_indices = []
+            for sentno, sent_constraints in enumerate(constraint_states):
+                for beamno, state in enumerate(sent_constraints):
+                    index = sentno * beam_size + beamno
+                    if not state.finished:
+                        not_finished_indices.append(index)
+            not_finished_indices = torch.tensor(not_finished_indices)
+            if not_finished_indices.numel() > 0:
+                lprobs.view(batch_size * beam_size, -1)[
+                    not_finished_indices, self.eos
+                ] = -math.inf
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam entry for each batch item
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+        else:
+            # make probs contain cumulative scores for each hypothesis
+            assert scores is not None
+            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)
+
+        top_prediction = torch.topk(
+            lprobs.view(batch_size, -1),
+            self.num_cands,
+        )
+        scores_buf, indices_buf = top_prediction
+        # Project back into relative indices and beams
+        beams_buf = indices_buf // vocab_size
+        indices_buf = indices_buf.fmod(vocab_size)
+
+        # Short circuit if there are no constraints in this batch
+        if not constraint_states:
+            return scores_buf, indices_buf, beams_buf
+
+        # STEP 1: get top-1 from each hypothesis across all sentences in the batch
+        if step > 0:
+            top_scores, top_indices = torch.topk(
+                lprobs.view(batch_size * beam_size, -1),
+                k=each_k,
+                dim=1,
+            )
+            top_scores = top_scores.view(batch_size, -1)
+            top_indices = top_indices.view(batch_size, -1)
+            scores_buf = torch.cat((scores_buf, top_scores), dim=1)
+            indices_buf = torch.cat((indices_buf, top_indices), dim=1)
+            new_beams = torch.arange(0, beam_size, device=device).repeat(batch_size, 1)
+            beams_buf = torch.cat((beams_buf, new_beams), dim=1)
+
+        # Now, process sentences in the batch one by one.
+        new_scores_buf = torch.zeros((batch_size, 2 * beam_size), device=device)
+        new_indices_buf = torch.zeros((batch_size, 2 * beam_size), device=device).long()
+        new_beams_buf = torch.zeros((batch_size, 2 * beam_size), device=device).long()
+        for sentno, states in enumerate(constraint_states):
+            scores, indices, beams, new_states = self.step_sentence(
+                step,
+                sentno,
+                lprobs[sentno],
+                constraint_states[sentno],
+                beams_buf[sentno].clone(),
+                indices_buf[sentno].clone(),
+                scores_buf[sentno].clone(),
+            )
+            new_scores_buf[sentno] = scores
+            new_indices_buf[sentno] = indices
+            new_beams_buf[sentno] = beams
+            self.constraint_states[sentno] = new_states
+
+        return new_scores_buf, new_indices_buf, new_beams_buf
+
+    @torch.jit.export
+    def step_sentence(
+        self,
+        step: int,
+        sentno: int,
+        lprobs: Tensor,
+        constraint_states: List[List[ConstraintState]],
+        beams_buf: Tensor,
+        indices_buf: Tensor,
+        scores_buf: Tensor,
+    ):
+        """Does per-sentence processing. Adds all constraints for each
+        hypothesis to the list of candidates; then removes duplicates,
+        sorts, and dynamically stripes across the banks. All tensor inputs
+        are collapsed to those pertaining to a single input sentence.
+        """
+        device = lprobs.device
+
+        # STEP 2: Add all constraints for each beam item
+        for beamno, state in enumerate(constraint_states):
+            next_tokens = torch.tensor(list(state.next_tokens()), device=device).long()
+            if next_tokens.numel() != 0:
+                indices_buf = torch.cat((indices_buf, next_tokens))
+                next_beams = (
+                    torch.tensor(beamno, device=device)
+                    .repeat(next_tokens.size(0))
+                    .long()
+                )
+                beams_buf = torch.cat((beams_buf, next_beams))
+                next_values = lprobs[beamno].take(next_tokens.view(-1))
+                scores_buf = torch.cat((scores_buf, next_values))
+
+            # At the 0th time step, there is just one beam item
+            if step == 0:
+                break
+
+        # STEP 3: Compute the "bank" for each candidate. This is the
+        # number of constraints it's generated. We need this so that
+        # we can do round-robin allocation of the beam across these
+        # banks. If C is the number of constraints, we select the best
+        # item in bank C, then the best in bank C-1, etc, followed by
+        # the 2nd-best in bank C, the 2nd-best in bank C-1, etc, and so
+        # on, until the maximum beam size. We accomplish this by
+        # creating a sort key and striping across the banks.
+
+        # Compute the new states for all candidates
+        cands_size = indices_buf.size(0)
+        constraint_states = [
+            constraint_states[beams_buf[i]].advance(indices_buf[i])
+            for i in range(cands_size)
+        ]
+
+        banks = torch.tensor([state.bank for state in constraint_states], device=device)
+
+        # STEP 4: Sort
+        num_constraint_tokens = len(state.tokens)
+
+        # Sort by keys (bank, score) (i.e., sort banks together, and scores
+        # within banks). AFAIK pytorch doesn't support either stable sort or
+        # multi-key sorting, so we have to hack this.
+        MAX_SCORE = -100
+        sort_key = (num_constraint_tokens - banks) * MAX_SCORE + scores_buf
+        sort_values, sort_indices = sort_key.sort(dim=0, descending=True)
+        scores_buf = scores_buf[sort_indices]
+        indices_buf = indices_buf[sort_indices]
+        beams_buf = beams_buf[sort_indices]
+        banks = banks[sort_indices]
+
+        # Sort the constraints to follow suit
+        constraint_states = [constraint_states[i] for i in sort_indices]
+
+        # STEP 5: Remove duplicates. The topk calls (overall and
+        # per-row) plus the per-row generation of constraints will
+        # produce duplicates. Here we remove them.
+
+        def roll(t):
+            """Rolls a 1d tensor left by 1.
+
+            [0, 1, 2, 3, 4] becomes [4, 0, 1, 2, 3]
+            """
+            return torch.cat((t[-1].unsqueeze(0), t[0:-1]), dim=0)
+
+        # We map candidates (beam, token_id) to a single dimension.
+        # This is then shifted by 1. We can then easily identify
+        # duplicates and create a mask that identifies unique
+        # extensions.
+        uniques_mask = beams_buf * (self.vocab_size + 1) + indices_buf
+        uniques_mask = roll(uniques_mask) != uniques_mask
+
+        # Use the mask to pare down the data structures
+        scores_buf = torch.masked_select(scores_buf, uniques_mask)
+        indices_buf = torch.masked_select(indices_buf, uniques_mask)
+        beams_buf = torch.masked_select(beams_buf, uniques_mask)
+        banks = torch.masked_select(banks, uniques_mask)
+        i = 1
+        for mask in uniques_mask[1:]:
+            if not mask:
+                constraint_states.pop(i)
+            i += mask
+
+        # STEP 6: Assign IDs round-robin across banks, sort, and
+        # truncate. Now that the candidates are sorted by (bank,
+        # score) and uniqed, we dynamically allocate the {beam_size}
+        # beam by striping across the candidates. These stripes will
+        # be used as sort keys to do round-robin selection. This is
+        # accomplished in a single pass with offsets. Sorting by
+        # highest-banks (furthest-along hypotheses) first ensures
+        # progress through the constraints.
+        #
+        # e.g., BANKS: 3 3 3 2 2 2 2 1 1 1 0 0
+        # OLD STRIPES: 0 1 2 0 1 2 3 0 1 2 0 1
+        # NEW STRIPES: 0 1+4 2+8 0+1 1+5 2+9 3+11 0+2 1+6 2+10 0+3 1+7
+        #            = 0 5 10 1 6 11 13 2 7 12 3 8
+        #
+        # Sorting by this then gives the following banks:
+        #
+        #             3 2 1 0 3 2 1 0 3 2 1 2
+        #
+        # We'll take the top {beam_size} of these.
+        stripe_offsets = [offset * (len(banks) + 1) for offset in range(len(banks) + 1)]
+        stripes = torch.zeros_like(banks)
+        cur_bank_count = -1
+        cur_bank = banks[0]
+        for i, bank in enumerate(banks):
+            if bank != cur_bank:
+                cur_bank_count = 0
+                cur_bank = bank
+            else:
+                cur_bank_count += 1
+            stripes[i] = num_constraint_tokens - bank + stripe_offsets[cur_bank_count]
+
+        # STEP 7: Sort by the stripes values
+        sort_values, sort_indices = stripes.sort(dim=0)
+        scores_buf = scores_buf[sort_indices]
+        indices_buf = indices_buf[sort_indices]
+        beams_buf = beams_buf[sort_indices]
+        constraint_states = [constraint_states[i] for i in sort_indices]
+
+        # STEP 8: Truncate to the candidates size!
+        scores_buf = scores_buf[: self.num_cands]
+        indices_buf = indices_buf[: self.num_cands]
+        beams_buf = beams_buf[: self.num_cands]
+
+        return scores_buf, indices_buf, beams_buf, constraint_states
+
+
+class LengthConstrainedBeamSearch(Search):
+    def __init__(self, tgt_dict, min_len_a, min_len_b, max_len_a, max_len_b):
+        super().__init__(tgt_dict)
+        self.min_len_a = min_len_a
+        self.min_len_b = min_len_b
+        self.max_len_a = max_len_a
+        self.max_len_b = max_len_b
+        self.beam = BeamSearch(tgt_dict)
+        self.needs_src_lengths = True
+
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        min_lens = self.min_len_a * self.src_lengths + self.min_len_b
+        max_lens = self.max_len_a * self.src_lengths + self.max_len_b
+        lprobs[step < min_lens, :, self.eos] = -math.inf
+        lprobs[step >= max_lens, :, self.eos] = 0
+        return self.beam.step(step, lprobs, scores)
+
+
+class DiverseBeamSearch(Search):
+    """Diverse Beam Search.
+
+    See "Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence
+    Models" for details.
+
+    We only implement the Hamming Diversity penalty here, which performed best
+    in the original paper.
+    """
+
+    def __init__(self, tgt_dict, num_groups, diversity_strength):
+        super().__init__(tgt_dict)
+        self.num_groups = num_groups
+        self.diversity_strength = -diversity_strength
+        self.beam = BeamSearch(tgt_dict)
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+        if beam_size % self.num_groups != 0:
+            raise ValueError(
+                "DiverseBeamSearch requires --beam to be divisible by the number of groups"
+            )
+
+        # initialize diversity penalty
+        diversity_buf = torch.zeros(lprobs[:, 0, :].size()).to(lprobs)
+
+        scores_G, indices_G, beams_G = [], [], []
+        for g in range(self.num_groups):
+            lprobs_g = lprobs[:, g :: self.num_groups, :]
+            scores_g = scores[:, g :: self.num_groups, :] if step > 0 else None
+
+            # apply diversity penalty
+            if g > 0:
+                lprobs_g = torch.add(
+                    lprobs_g,
+                    other=diversity_buf.unsqueeze(1),
+                    alpha=self.diversity_strength,
+                )
+            else:
+                lprobs_g = lprobs_g.contiguous()
+
+            scores_buf, indices_buf, beams_buf = self.beam.step(
+                step, lprobs_g, scores_g
+            )
+            beams_buf.mul_(self.num_groups).add_(g)
+
+            scores_G.append(scores_buf.clone())
+            indices_G.append(indices_buf.clone())
+            beams_G.append(beams_buf.clone())
+
+            # update diversity penalty
+            diversity_buf.scatter_add_(
+                1, indices_buf, torch.ones(indices_buf.size()).to(diversity_buf)
+            )
+
+        # interleave results from different groups
+        scores_buf = torch.stack(scores_G, dim=2).view(bsz, -1)
+        indices_buf = torch.stack(indices_G, dim=2).view(bsz, -1)
+        beams_buf = torch.stack(beams_G, dim=2).view(bsz, -1)
+        return scores_buf, indices_buf, beams_buf
+
+
+class Sampling(Search):
+    sampling_topk: int
+    sampling_topp: float
+
+    def __init__(self, tgt_dict, sampling_topk=-1, sampling_topp=-1.0):
+        super().__init__(tgt_dict)
+        self.sampling_topk = sampling_topk
+        self.sampling_topp = sampling_topp
+
+    def _sample_topp(self, lprobs):
+        """Sample among the smallest set of elements whose cumulative probability mass exceeds p.
+
+        See `"The Curious Case of Neural Text Degeneration"
+        (Holtzman et al., 2019) <https://arxiv.org/abs/1904.09751>`_.
+
+        Args:
+            lprobs: (bsz x input_beam_size x vocab_size)
+                the model's log-probabilities over the vocabulary at the current step
+
+        Return: A tuple of (trimed_probs, truncated_indices) where:
+            trimed_probs: (bsz x input_beam_size x ?)
+                the model's probabilities over the elements selected to sample from. The
+                width of the third dimension is determined by top-P.
+            truncated_indices: (bsz x input_beam_size x ?)
+                the indices of the chosen elements.
+        """
+        probs = lprobs.exp_()
+
+        # sort the last dimension (vocab dimension) in descending order
+        sorted_probs, sorted_indices = probs.sort(descending=True)
+
+        # compute a mask to indicate the words to be included in the top-P set.
+        cumsum_probs = sorted_probs.cumsum(dim=2)
+        mask = cumsum_probs.lt(self.sampling_topp)
+
+        # note that mask was computed by 'lt'. One more word needs to be included
+        # so that the cumulative probability mass can exceed p.
+        cumsum_mask = mask.cumsum(dim=2)
+        last_included = cumsum_mask[:, :, -1:]
+        last_included.clamp_(0, mask.size()[2] - 1)
+        mask = mask.scatter_(2, last_included, 1)
+
+        # truncate unnecessary dims.
+        max_dim = last_included.max()
+        truncated_mask = mask[:, :, : max_dim + 1]
+        truncated_probs = sorted_probs[:, :, : max_dim + 1]
+        truncated_indices = sorted_indices[:, :, : max_dim + 1]
+
+        # trim the words that are not in top-P by setting their probabilities
+        # to 0, so that they would not be sampled later.
+        trim_mask = ~truncated_mask
+        trimed_probs = truncated_probs.masked_fill_(trim_mask, 0)
+        return trimed_probs, truncated_indices
+
+    @torch.jit.export
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+
+        if step == 0:
+            # at the first step all hypotheses are equally likely, so use
+            # only the first beam
+            lprobs = lprobs[:, ::beam_size, :].contiguous()
+
+        if self.sampling_topp > 0:
+            # only sample from the smallest set of words whose cumulative probability mass exceeds p
+            probs, top_indices = self._sample_topp(lprobs)
+        elif self.sampling_topk > 0:
+            # only sample from top-k candidates
+            lprobs, top_indices = lprobs.topk(self.sampling_topk)
+            probs = lprobs.exp_()
+        else:
+            probs = lprobs.exp_()
+
+            # dummy data to be consistent with true branch for type check
+            top_indices = torch.empty(0).to(probs)
+        # sample
+        if step == 0:
+            indices_buf = torch.multinomial(
+                probs.view(bsz, -1),
+                beam_size,
+                replacement=True,
+            ).view(bsz, beam_size)
+        else:
+            indices_buf = torch.multinomial(
+                probs.view(bsz * beam_size, -1),
+                1,
+                replacement=True,
+            ).view(bsz, beam_size)
+
+        if step == 0:
+            # expand to beam size
+            probs = probs.expand(bsz, beam_size, -1)
+
+        # gather scores
+        scores_buf = torch.gather(probs, dim=2, index=indices_buf.unsqueeze(-1))
+        scores_buf = scores_buf.log_().view(bsz, -1)
+
+        # remap indices if using top-k or top-P sampling
+        if self.sampling_topk > 0 or self.sampling_topp > 0:
+            indices_buf = torch.gather(
+                top_indices.expand(bsz, beam_size, -1),
+                dim=2,
+                index=indices_buf.unsqueeze(-1),
+            ).squeeze(2)
+
+        if step == 0:
+            beams_buf = indices_buf.new_zeros(bsz, beam_size)
+        else:
+            beams_buf = torch.arange(0, beam_size).to(indices_buf).repeat(bsz, 1)
+            # make scores cumulative
+            scores_buf.add_(
+                torch.gather(scores[:, :, step - 1], dim=1, index=beams_buf)
+            )
+
+        return scores_buf, indices_buf, beams_buf
+
+
+class DiverseSiblingsSearch(Search):
+    """
+    Beam search with diverse siblings.
+
+    See "A Simple, Fast Diverse Decoding Algorithm for Neural Generation" for details.
+    https://arxiv.org/abs/1611.08562
+
+    1/ Calculate hypotheses for each beam
+    2/ Intra-sibling ordering
+    3/ Rewrite scores
+    4/ Choose top K hypotheses
+
+    if diversity_rate == 0 is equivalent to BeamSearch
+    """
+
+    def __init__(self, tgt_dict, diversity_rate):
+        super().__init__(tgt_dict)
+        self.diversity_rate = diversity_rate
+        self.beam = BeamSearch(tgt_dict)
+
+    def step(
+        self,
+        step: int,
+        lprobs,
+        scores,
+        prev_output_tokens: Optional[Tensor] = None,
+        original_batch_idxs: Optional[Tensor] = None,
+    ):
+        bsz, beam_size, vocab_size = lprobs.size()
+        k = min(
+            # Take the best 2 x beam_size predictions. We'll choose the first
+            # beam_size of these which don't predict eos to continue with.
+            beam_size * 2,
+            lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
+        )
+        s_list: List[Tensor]
+        i_list: List[Tensor]
+        s_list = [torch.empty(0).to(lprobs) for i in range(beam_size)]
+        i_list = [torch.LongTensor().to(device=lprobs.device) for i in range(beam_size)]
+        sibling_score = torch.arange(1, k + 1).to(lprobs) * self.diversity_rate
+
+        if step == 0:
+            return self.beam.step(step, lprobs, scores)
+        lprobs.add_(scores[:, :, step - 1].unsqueeze(-1))
+
+        # 1/ Calculate hypotheses for each beam
+        for i in range(beam_size):
+            torch.topk(lprobs[:, i, :].view(bsz, -1), k, out=(s_list[i], i_list[i]))
+            i_list[i].fmod_(vocab_size)
+
+            # 2/ Intra-sibling ordering by default from topk + 3/ Rewrite scores
+            s_list[i].sub_(sibling_score)
+
+        # 4/ Choose top K hypotheses
+        indices = torch.stack(i_list, dim=1).view(bsz, -1)
+
+        final_scores = torch.empty(0).to(lprobs)
+        final_indices = torch.LongTensor().to(device=lprobs.device)
+        final_beams = torch.LongTensor().to(device=lprobs.device)
+        (final_scores, final_indices) = torch.topk(
+            torch.stack(s_list, dim=1).view(bsz, -1),
+            k,
+        )
+
+        final_beams = final_indices // k
+
+        for i in range(bsz):
+            final_indices[i] = indices[i][final_indices[i]]
+
+        return final_scores, final_indices, final_beams
diff --git a/models/sequence_generator.py b/models/sequence_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..935d28d47373eda227acddcd9d084da155329097
--- /dev/null
+++ b/models/sequence_generator.py
@@ -0,0 +1,1058 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import math
+from typing import Dict, List, Optional
+import sys
+
+import torch
+import torch.nn as nn
+from fairseq import search, utils
+from fairseq.models import FairseqIncrementalDecoder
+from torch import Tensor
+from fairseq.ngram_repeat_block import NGramRepeatBlock
+
+from data import data_utils
+
+class SequenceGenerator(nn.Module):
+    def __init__(
+        self,
+        models,
+        tgt_dict,
+        beam_size=1,
+        max_len_a=0,
+        max_len_b=200,
+        max_len=0,
+        min_len=1,
+        normalize_scores=True,
+        len_penalty=1.0,
+        unk_penalty=0.0,
+        temperature=1.0,
+        match_source_len=False,
+        no_repeat_ngram_size=0,
+        search_strategy=None,
+        eos=None,
+        symbols_to_strip_from_output=None,
+        lm_model=None,
+        lm_weight=1.0,
+        constraint_trie=None,
+        constraint_range=None,
+        gen_code=False,
+        gen_box=False,
+        ignore_eos=False,
+        zero_shot=False
+    ):
+        """Generates translations of a given source sentence.
+
+        Args:
+            models (List[~fairseq.models.FairseqModel]): ensemble of models,
+                currently support fairseq.models.TransformerModel for scripting
+            beam_size (int, optional): beam width (default: 1)
+            max_len_a/b (int, optional): generate sequences of maximum length
+                ax + b, where x is the source length
+            max_len (int, optional): the maximum length of the generated output
+                (not including end-of-sentence)
+            min_len (int, optional): the minimum length of the generated output
+                (not including end-of-sentence)
+            normalize_scores (bool, optional): normalize scores by the length
+                of the output (default: True)
+            len_penalty (float, optional): length penalty, where <1.0 favors
+                shorter, >1.0 favors longer sentences (default: 1.0)
+            unk_penalty (float, optional): unknown word penalty, where <0
+                produces more unks, >0 produces fewer (default: 0.0)
+            temperature (float, optional): temperature, where values
+                >1.0 produce more uniform samples and values <1.0 produce
+                sharper samples (default: 1.0)
+            match_source_len (bool, optional): outputs should match the source
+                length (default: False)
+        """
+        super().__init__()
+        if isinstance(models, EnsembleModel):
+            self.model = models
+        else:
+            self.model = EnsembleModel(models)
+        self.gen_code = gen_code
+        self.gen_box = gen_box
+        self.ignore_eos = ignore_eos
+        self.tgt_dict = tgt_dict
+        self.pad = tgt_dict.pad()
+        self.unk = tgt_dict.unk()
+        self.bos = tgt_dict.bos()
+        self.eos = tgt_dict.eos() if eos is None else eos
+        self.symbols_to_strip_from_output = (
+            symbols_to_strip_from_output.union({self.eos})
+            if symbols_to_strip_from_output is not None
+            else {self.bos, self.eos}
+        )
+        self.vocab_size = len(tgt_dict)
+        self.beam_size = beam_size
+        # the max beam size is the dictionary size - 1, since we never select pad
+        self.beam_size = min(beam_size, self.vocab_size - 1)
+        self.max_len_a = max_len_a
+        self.max_len_b = max_len_b
+        self.min_len = min_len
+        self.max_len = max_len or self.model.max_decoder_positions()
+
+        self.normalize_scores = normalize_scores
+        self.len_penalty = len_penalty
+        self.unk_penalty = unk_penalty
+        self.temperature = temperature
+        self.match_source_len = match_source_len
+        self.zero_shot = zero_shot
+
+        if no_repeat_ngram_size > 0:
+            self.repeat_ngram_blocker = NGramRepeatBlock(no_repeat_ngram_size)
+        else:
+            self.repeat_ngram_blocker = None
+
+        assert temperature > 0, "--temperature must be greater than 0"
+
+        self.search = (
+            search.BeamSearch(tgt_dict) if search_strategy is None else search_strategy
+        )
+        # We only need to set src_lengths in LengthConstrainedBeamSearch.
+        # As a module attribute, setting it would break in multithread
+        # settings when the model is shared.
+        self.should_set_src_lengths = (
+            hasattr(self.search, "needs_src_lengths") and self.search.needs_src_lengths
+        )
+
+        self.model.eval()
+
+        self.lm_model = lm_model
+        self.lm_weight = lm_weight
+        if self.lm_model is not None:
+            self.lm_model.eval()
+
+        self.constraint_trie = constraint_trie
+
+        self.constraint_start = None
+        self.constraint_end = None
+        if constraint_range is not None:
+            constraint_start, constraint_end = constraint_range.split(',')
+            self.constraint_start = int(constraint_start)
+            self.constraint_end = int(constraint_end)
+
+    def cuda(self):
+        self.model.cuda()
+        return self
+
+    @torch.no_grad()
+    def forward(
+        self,
+        sample: Dict[str, Dict[str, Tensor]],
+        prefix_tokens: Optional[Tensor] = None,
+        bos_token: Optional[int] = None,
+    ):
+        """Generate a batch of translations.
+
+        Args:
+            sample (dict): batch
+            prefix_tokens (torch.LongTensor, optional): force decoder to begin
+                with these tokens
+            bos_token (int, optional): beginning of sentence token
+                (default: self.eos)
+        """
+        return self._generate(sample, prefix_tokens, bos_token=bos_token)
+
+    # TODO(myleott): unused, deprecate after pytorch-translate migration
+    def generate_batched_itr(self, data_itr, beam_size=None, cuda=False, timer=None):
+        """Iterate over a batched dataset and yield individual translations.
+        Args:
+            cuda (bool, optional): use GPU for generation
+            timer (StopwatchMeter, optional): time generations
+        """
+        for sample in data_itr:
+            s = utils.move_to_cuda(sample) if cuda else sample
+            if "net_input" not in s:
+                continue
+            input = s["net_input"]
+            # model.forward normally channels prev_output_tokens into the decoder
+            # separately, but SequenceGenerator directly calls model.encoder
+            encoder_input = {
+                k: v for k, v in input.items() if k != "prev_output_tokens"
+            }
+            if timer is not None:
+                timer.start()
+            with torch.no_grad():
+                hypos = self.generate(encoder_input)
+            if timer is not None:
+                timer.stop(sum(len(h[0]["tokens"]) for h in hypos))
+            for i, id in enumerate(s["id"].data):
+                # remove padding
+                src = utils.strip_pad(input["src_tokens"].data[i, :], self.pad)
+                ref = (
+                    utils.strip_pad(s["target"].data[i, :], self.pad)
+                    if s["target"] is not None
+                    else None
+                )
+                yield id, src, ref, hypos[i]
+
+    @torch.no_grad()
+    def generate(self, models, sample: Dict[str, Dict[str, Tensor]], **kwargs) -> List[List[Dict[str, Tensor]]]:
+        """Generate translations. Match the api of other fairseq generators.
+
+        Args:
+            models (List[~fairseq.models.FairseqModel]): ensemble of models
+            sample (dict): batch
+            prefix_tokens (torch.LongTensor, optional): force decoder to begin
+                with these tokens
+            constraints (torch.LongTensor, optional): force decoder to include
+                the list of constraints
+            bos_token (int, optional): beginning of sentence token
+                (default: self.eos)
+        """
+        return self._generate(models, sample, **kwargs)
+
+    def _generate(
+        self,
+        models,
+        sample: Dict[str, Dict[str, Tensor]],
+        prefix_tokens: Optional[Tensor] = None,
+        constraints: Optional[Tensor] = None,
+        bos_token: Optional[int] = None,
+    ):
+        model = EnsembleModel(models)
+        incremental_states = torch.jit.annotate(
+            List[Dict[str, Dict[str, Optional[Tensor]]]],
+            [
+                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
+                for i in range(model.models_size)
+            ],
+        )
+        net_input = sample["net_input"]
+
+        if "src_tokens" in net_input:
+            src_tokens = net_input["src_tokens"]
+            # length of the source text being the character length except EndOfSentence and pad
+            src_lengths = (
+                (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1)
+            )
+        elif "source" in net_input:
+            src_tokens = net_input["source"]
+            src_lengths = (
+                net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1)
+                if net_input["padding_mask"] is not None
+                else torch.tensor(src_tokens.size(-1)).to(src_tokens)
+            )
+        elif "features" in net_input:
+            src_tokens = net_input["features"]
+            src_lengths = (
+                net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1)
+                if net_input["padding_mask"] is not None
+                else torch.tensor(src_tokens.size(-1)).to(src_tokens)
+            )
+        else:
+            raise Exception("expected src_tokens or source in net input. input keys: " + str(net_input.keys()))
+
+        # bsz: total number of sentences in beam
+        # Note that src_tokens may have more than 2 dimensions (i.e. audio features)
+        bsz, src_len = src_tokens.size()[:2]
+        beam_size = self.beam_size
+
+        if constraints is not None and not self.search.supports_constraints:
+            raise NotImplementedError(
+                "Target-side constraints were provided, but search method doesn't support them"
+            )
+
+        # Initialize constraints, when active
+        self.search.init_constraints(constraints, beam_size)
+
+        max_len: int = -1
+        if self.match_source_len:
+            max_len = src_lengths.max().item()
+        else:
+            max_len = int(self.max_len_a * src_len + self.max_len_b)
+        assert (
+            self.min_len <= max_len
+        ), "min_len cannot be larger than max_len, please adjust these!"
+        # compute the encoder output for each beam
+        with torch.autograd.profiler.record_function("EnsembleModel: forward_encoder"):
+            encoder_outs = model.forward_encoder(net_input)
+
+        # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores
+        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
+        new_order = new_order.to(src_tokens.device).long()
+        encoder_outs = model.reorder_encoder_out(encoder_outs, new_order)
+        # ensure encoder_outs is a List.
+        assert encoder_outs is not None
+
+        # initialize buffers
+        scores = (
+            torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float()
+        )  # +1 for eos; pad is never chosen for scoring
+        tokens = (
+            torch.zeros(bsz * beam_size, max_len + 2)
+            .to(src_tokens)
+            .long()
+            .fill_(self.pad)
+        )  # +2 for eos and pad
+        # tokens[:, 0] = self.eos if bos_token is None else bos_token
+        tokens[:, 0] = self.bos
+        attn: Optional[Tensor] = None
+
+        # A list that indicates candidates that should be ignored.
+        # For example, suppose we're sampling and have already finalized 2/5
+        # samples. Then cands_to_ignore would mark 2 positions as being ignored,
+        # so that we only finalize the remaining 3 samples.
+        cands_to_ignore = (
+            torch.zeros(bsz, beam_size).to(src_tokens).eq(-1)
+        )  # forward and backward-compatible False mask
+
+        # list of completed sentences
+        finalized = torch.jit.annotate(
+            List[List[Dict[str, Tensor]]],
+            [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
+        )  # contains lists of dictionaries of infomation about the hypothesis being finalized at each step
+
+        # a boolean array indicating if the sentence at the index is finished or not
+        finished = [False for i in range(bsz)]
+        num_remaining_sent = bsz  # number of sentences remaining
+
+        # number of candidate hypos per step
+        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS
+
+        # offset arrays for converting between different indexing schemes
+        bbsz_offsets = (
+            (torch.arange(0, bsz) * beam_size)
+            .unsqueeze(1)
+            .type_as(tokens)
+            .to(src_tokens.device)
+        )
+        cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device)
+
+        reorder_state: Optional[Tensor] = None
+        batch_idxs: Optional[Tensor] = None
+
+        original_batch_idxs: Optional[Tensor] = None
+        if "id" in sample and isinstance(sample["id"], Tensor):
+            original_batch_idxs = sample["id"]
+        else:
+            original_batch_idxs = torch.arange(0, bsz).type_as(tokens)
+
+        for step in range(max_len + 1):  # one extra step for EOS marker
+            # reorder decoder internal states based on the prev choice of beams
+            if reorder_state is not None:
+                if batch_idxs is not None:
+                    # update beam indices to take into account removed sentences
+                    corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(
+                        batch_idxs
+                    )
+                    reorder_state.view(-1, beam_size).add_(
+                        corr.unsqueeze(-1) * beam_size
+                    )
+                    original_batch_idxs = original_batch_idxs[batch_idxs]
+                model.reorder_incremental_state(incremental_states, reorder_state)
+                encoder_outs = model.reorder_encoder_out(
+                    encoder_outs, reorder_state
+                )
+            with torch.autograd.profiler.record_function("EnsembleModel: forward_decoder"):
+                lprobs, avg_attn_scores = model.forward_decoder(
+                    tokens[:, : step + 1],
+                    encoder_outs,
+                    incremental_states,
+                    self.temperature,
+                    constraint_trie=self.constraint_trie,
+                    constraint_start=self.constraint_start,
+                    constraint_end=self.constraint_end,
+                    gen_code=self.gen_code,
+                    zero_shot=self.zero_shot,
+                    prefix_tokens=prefix_tokens
+                )
+
+            if self.lm_model is not None:
+                lm_out = self.lm_model(tokens[:, : step + 1])
+                probs = self.lm_model.get_normalized_probs(
+                    lm_out, log_probs=True, sample=None
+                )
+                probs = probs[:, -1, :] * self.lm_weight
+                lprobs += probs
+            # handle prefix tokens (possibly with different lengths)
+            if (
+                prefix_tokens is not None
+                and step < prefix_tokens.size(1)
+                and step < max_len
+            ):
+                lprobs, tokens, scores = self._prefix_tokens(
+                    step, lprobs, scores, tokens, prefix_tokens, beam_size
+                )
+            elif step < self.min_len:
+                # minimum length constraint (does not apply if using prefix_tokens)
+                lprobs[:, self.eos] = -math.inf
+
+            lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)
+
+            lprobs[:, self.pad] = -math.inf  # never select pad
+            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty
+
+            if (self.gen_code or self.gen_box) and step < max_len:
+                lprobs[:, :4] = -math.inf
+            if self.gen_box:
+                lprobs[:, -1] = -math.inf
+                if (step + 1) % 5 == 0:
+                    lprobs[:, self.constraint_start:59457] = -math.inf
+                else:
+                    lprobs[:, 59457:] = -math.inf
+
+            # handle max length constraint
+            if step >= max_len:
+                lprobs[:, : self.eos] = -math.inf
+                lprobs[:, self.eos + 1 :] = -math.inf
+                if self.ignore_eos:
+                    lprobs[:, self.eos] = 1
+
+            # Record attention scores, only support avg_attn_scores is a Tensor
+            if avg_attn_scores is not None:
+                if attn is None:
+                    attn = torch.empty(
+                        bsz * beam_size, avg_attn_scores.size(1), max_len + 2
+                    ).to(scores)
+                attn[:, :, step + 1].copy_(avg_attn_scores)
+
+            scores = scores.type_as(lprobs)
+            eos_bbsz_idx = torch.empty(0).to(
+                tokens
+            )  # indices of hypothesis ending with eos (finished sentences)
+            eos_scores = torch.empty(0).to(
+                scores
+            )  # scores of hypothesis ending with eos (finished sentences)
+
+            if self.should_set_src_lengths:
+                self.search.set_src_lengths(src_lengths)
+
+            if self.repeat_ngram_blocker is not None:
+                lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step)
+
+            # Shape: (batch, cand_size)
+            cand_scores, cand_indices, cand_beams = self.search.step(
+                step,
+                lprobs.view(bsz, -1, self.vocab_size),
+                scores.view(bsz, beam_size, -1)[:, :, :step],
+                tokens[:, : step + 1],
+                original_batch_idxs,
+            )
+
+            # cand_bbsz_idx contains beam indices for the top candidate
+            # hypotheses, with a range of values: [0, bsz*beam_size),
+            # and dimensions: [bsz, cand_size]
+            cand_bbsz_idx = cand_beams.add(bbsz_offsets)
+
+            # finalize hypotheses that end in eos
+            # Shape of eos_mask: (batch size, beam size)
+            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
+            eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask)
+
+            # only consider eos when it's among the top beam_size indices
+            # Now we know what beam item(s) to finish
+            # Shape: 1d list of absolute-numbered
+            eos_bbsz_idx = torch.masked_select(
+                cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size]
+            )
+
+            finalized_sents: List[int] = []
+            if eos_bbsz_idx.numel() > 0:
+                eos_scores = torch.masked_select(
+                    cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size]
+                )
+
+                finalized_sents = self.finalize_hypos(
+                    step,
+                    eos_bbsz_idx,
+                    eos_scores,
+                    tokens,
+                    scores,
+                    finalized,
+                    finished,
+                    beam_size,
+                    attn,
+                    src_lengths,
+                    max_len,
+                )
+                num_remaining_sent -= len(finalized_sents)
+
+            assert num_remaining_sent >= 0
+            if num_remaining_sent == 0:
+                break
+            if self.search.stop_on_max_len and step >= max_len:
+                break
+            assert step < max_len, f"{step} < {max_len}"
+
+            # Remove finalized sentences (ones for which {beam_size}
+            # finished hypotheses have been generated) from the batch.
+            if len(finalized_sents) > 0:
+                new_bsz = bsz - len(finalized_sents)
+
+                # construct batch_idxs which holds indices of batches to keep for the next pass
+                batch_mask = torch.ones(
+                    bsz, dtype=torch.bool, device=cand_indices.device
+                )
+                batch_mask[finalized_sents] = False
+                # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it
+                batch_idxs = torch.arange(
+                    bsz, device=cand_indices.device
+                ).masked_select(batch_mask)
+
+                # Choose the subset of the hypothesized constraints that will continue
+                self.search.prune_sentences(batch_idxs)
+
+                eos_mask = eos_mask[batch_idxs]
+                cand_beams = cand_beams[batch_idxs]
+                bbsz_offsets.resize_(new_bsz, 1)
+                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
+                cand_scores = cand_scores[batch_idxs]
+                cand_indices = cand_indices[batch_idxs]
+
+                if prefix_tokens is not None:
+                    prefix_tokens = prefix_tokens[batch_idxs]
+                src_lengths = src_lengths[batch_idxs]
+                cands_to_ignore = cands_to_ignore[batch_idxs]
+
+                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
+                if attn is not None:
+                    attn = attn.view(bsz, -1)[batch_idxs].view(
+                        new_bsz * beam_size, attn.size(1), -1
+                    )
+                bsz = new_bsz
+            else:
+                batch_idxs = None
+
+            # Set active_mask so that values > cand_size indicate eos hypos
+            # and values < cand_size indicate candidate active hypos.
+            # After, the min values per row are the top candidate active hypos
+
+            # Rewrite the operator since the element wise or is not supported in torchscript.
+
+            eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size]))
+            active_mask = torch.add(
+                eos_mask.type_as(cand_offsets) * cand_size,
+                cand_offsets[: eos_mask.size(1)],
+            )
+
+            # get the top beam_size active hypotheses, which are just
+            # the hypos with the smallest values in active_mask.
+            # {active_hypos} indicates which {beam_size} hypotheses
+            # from the list of {2 * beam_size} candidates were
+            # selected. Shapes: (batch size, beam size)
+            new_cands_to_ignore, active_hypos = torch.topk(
+                active_mask, k=beam_size, dim=1, largest=False
+            )
+
+            # update cands_to_ignore to ignore any finalized hypos.
+            cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size]
+            # Make sure there is at least one active item for each sentence in the batch.
+            assert (~cands_to_ignore).any(dim=1).all()
+
+            # update cands_to_ignore to ignore any finalized hypos
+
+            # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam
+            # can be selected more than once).
+            active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos)
+            active_scores = torch.gather(cand_scores, dim=1, index=active_hypos)
+
+            active_bbsz_idx = active_bbsz_idx.view(-1)
+            active_scores = active_scores.view(-1)
+
+            # copy tokens and scores for active hypotheses
+
+            # Set the tokens for each beam (can select the same row more than once)
+            tokens[:, : step + 1] = torch.index_select(
+                tokens[:, : step + 1], dim=0, index=active_bbsz_idx
+            )
+            # Select the next token for each of them
+            tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather(
+                cand_indices, dim=1, index=active_hypos
+            )
+            if step > 0:
+                scores[:, :step] = torch.index_select(
+                    scores[:, :step], dim=0, index=active_bbsz_idx
+                )
+            scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather(
+                cand_scores, dim=1, index=active_hypos
+            )
+
+            # Update constraints based on which candidates were selected for the next beam
+            self.search.update_constraints(active_hypos)
+
+            # copy attention for active hypotheses
+            if attn is not None:
+                attn[:, :, : step + 2] = torch.index_select(
+                    attn[:, :, : step + 2], dim=0, index=active_bbsz_idx
+                )
+
+            # reorder incremental state in decoder
+            reorder_state = active_bbsz_idx
+
+        # sort by score descending
+        for sent in range(len(finalized)):
+            scores = torch.tensor(
+                [float(elem["score"].item()) for elem in finalized[sent]]
+            )
+            _, sorted_scores_indices = torch.sort(scores, descending=True)
+            finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices]
+            finalized[sent] = torch.jit.annotate(
+                List[Dict[str, Tensor]], finalized[sent]
+            )
+        return finalized
+
+    def _prefix_tokens(
+        self, step: int, lprobs, scores, tokens, prefix_tokens, beam_size: int
+    ):
+        """Handle prefix tokens"""
+        prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat(1, beam_size).view(-1)
+        prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1))
+        prefix_mask = prefix_toks.ne(self.pad)
+        if self.constraint_trie is None:
+            lprobs[prefix_mask] = torch.min(prefix_lprobs) - 1
+        else:
+            lprobs[prefix_mask] = -math.inf
+        lprobs[prefix_mask] = lprobs[prefix_mask].scatter(
+            -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask]
+        )
+        # if prefix includes eos, then we should make sure tokens and
+        # scores are the same across all beams
+        eos_mask = prefix_toks.eq(self.eos)
+        if eos_mask.any():
+            # validate that the first beam matches the prefix
+            first_beam = tokens[eos_mask].view(-1, beam_size, tokens.size(-1))[
+                :, 0, 1 : step + 1
+            ]
+            eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0]
+            target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step]
+            assert (first_beam == target_prefix).all()
+
+            # copy tokens, scores and lprobs from the first beam to all beams
+            tokens = self.replicate_first_beam(tokens, eos_mask_batch_dim, beam_size)
+            scores = self.replicate_first_beam(scores, eos_mask_batch_dim, beam_size)
+            lprobs = self.replicate_first_beam(lprobs, eos_mask_batch_dim, beam_size)
+        return lprobs, tokens, scores
+
+    def replicate_first_beam(self, tensor, mask, beam_size: int):
+        tensor = tensor.view(-1, beam_size, tensor.size(-1))
+        tensor[mask] = tensor[mask][:, :1, :]
+        return tensor.view(-1, tensor.size(-1))
+
+    def finalize_hypos(
+        self,
+        step: int,
+        bbsz_idx,
+        eos_scores,
+        tokens,
+        scores,
+        finalized: List[List[Dict[str, Tensor]]],
+        finished: List[bool],
+        beam_size: int,
+        attn: Optional[Tensor],
+        src_lengths,
+        max_len: int,
+    ):
+        """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly.
+        A sentence is finalized when {beam_size} finished items have been collected for it.
+
+        Returns number of sentences (not beam items) being finalized.
+        These will be removed from the batch and not processed further.
+        Args:
+            bbsz_idx (Tensor):
+        """
+        assert bbsz_idx.numel() == eos_scores.numel()
+
+        # clone relevant token and attention tensors.
+        # tokens is (batch * beam, max_len). So the index_select
+        # gets the newly EOS rows, then selects cols 1..{step + 2}
+        tokens_clone = tokens.index_select(0, bbsz_idx)[
+            :, 1 : step + 2
+        ]  # skip the first index, which is EOS
+
+        tokens_clone[:, step] = self.eos
+        attn_clone = (
+            attn.index_select(0, bbsz_idx)[:, :, 1 : step + 2]
+            if attn is not None
+            else None
+        )
+
+        # compute scores per token position
+        pos_scores = scores.index_select(0, bbsz_idx)[:, : step + 1]
+        pos_scores[:, step] = eos_scores
+        # convert from cumulative to per-position scores
+        pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]
+
+        # normalize sentence-level scores
+        if self.normalize_scores:
+            eos_scores /= (step + 1) ** self.len_penalty
+
+        # cum_unfin records which sentences in the batch are finished.
+        # It helps match indexing between (a) the original sentences
+        # in the batch and (b) the current, possibly-reduced set of
+        # sentences.
+        cum_unfin: List[int] = []
+        prev = 0
+        for f in finished:
+            if f:
+                prev += 1
+            else:
+                cum_unfin.append(prev)
+        cum_fin_tensor = torch.tensor(cum_unfin, dtype=torch.int).to(bbsz_idx)
+
+        unfin_idx = bbsz_idx // beam_size
+        sent = unfin_idx + torch.index_select(cum_fin_tensor, 0, unfin_idx)
+
+        # Create a set of "{sent}{unfin_idx}", where
+        # "unfin_idx" is the index in the current (possibly reduced)
+        # list of sentences, and "sent" is the index in the original,
+        # unreduced batch
+        # For every finished beam item
+        # sentence index in the current (possibly reduced) batch
+        seen = (sent << 32) + unfin_idx
+        unique_seen: List[int] = torch.unique(seen).tolist()
+
+        if self.match_source_len:
+            condition = step > torch.index_select(src_lengths, 0, unfin_idx)
+            eos_scores = torch.where(condition, torch.tensor(-math.inf), eos_scores)
+        sent_list: List[int] = sent.tolist()
+        for i in range(bbsz_idx.size()[0]):
+            # An input sentence (among those in a batch) is finished when
+            # beam_size hypotheses have been collected for it
+            if len(finalized[sent_list[i]]) < beam_size:
+                if attn_clone is not None:
+                    # remove padding tokens from attn scores
+                    hypo_attn = attn_clone[i]
+                else:
+                    hypo_attn = torch.empty(0)
+
+                finalized[sent_list[i]].append(
+                    {
+                        "tokens": tokens_clone[i],
+                        "score": eos_scores[i],
+                        "attention": hypo_attn,  # src_len x tgt_len
+                        "alignment": torch.empty(0),
+                        "positional_scores": pos_scores[i],
+                    }
+                )
+
+        newly_finished: List[int] = []
+        for unique_s in unique_seen:
+            # check termination conditions for this sentence
+            unique_sent: int = unique_s >> 32
+            unique_unfin_idx: int = unique_s - (unique_sent << 32)
+
+            if not finished[unique_sent] and self.is_finished(
+                step, unique_unfin_idx, max_len, len(finalized[unique_sent]), beam_size
+            ):
+                finished[unique_sent] = True
+                newly_finished.append(unique_unfin_idx)
+
+        return newly_finished
+
+    def is_finished(
+        self,
+        step: int,
+        unfin_idx: int,
+        max_len: int,
+        finalized_sent_len: int,
+        beam_size: int,
+    ):
+        """
+        Check whether decoding for a sentence is finished, which
+        occurs when the list of finalized sentences has reached the
+        beam size, or when we reach the maximum length.
+        """
+        assert finalized_sent_len <= beam_size
+        if finalized_sent_len == beam_size or step == max_len:
+            return True
+        return False
+
+
+class EnsembleModel(nn.Module):
+    """A wrapper around an ensemble of models."""
+
+    def __init__(self, models):
+        super().__init__()
+        self.models_size = len(models)
+        # method '__len__' is not supported in ModuleList for torch script
+        self.single_model = models[0]
+        self.models = nn.ModuleList(models)
+
+        self.has_incremental: bool = False
+        if all(
+            hasattr(m, "decoder") and isinstance(m.decoder, FairseqIncrementalDecoder)
+            for m in models
+        ):
+            self.has_incremental = True
+
+    def forward(self):
+        pass
+
+    def has_encoder(self):
+        return hasattr(self.single_model, "encoder")
+
+    def has_incremental_states(self):
+        return self.has_incremental
+
+    def max_decoder_positions(self):
+        return min([m.max_decoder_positions() for m in self.models if hasattr(m, "max_decoder_positions")] + [sys.maxsize])
+
+    @torch.jit.export
+    def forward_encoder(self, net_input: Dict[str, Tensor]):
+        if not self.has_encoder():
+            return None
+        return [model.encoder.forward_torchscript(net_input) for model in self.models]
+
+    @torch.jit.export
+    def forward_decoder(
+        self,
+        tokens,
+        encoder_outs: List[Dict[str, List[Tensor]]],
+        incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]],
+        temperature: float = 1.0,
+        constraint_trie=None,
+        constraint_start=None,
+        constraint_end=None,
+        gen_code=False,
+        zero_shot=False,
+        prefix_tokens=None
+    ):
+        log_probs = []
+        avg_attn: Optional[Tensor] = None
+        encoder_out: Optional[Dict[str, List[Tensor]]] = None
+        code_mask = (tokens.new_ones(tokens.size(0))*gen_code).bool()
+        for i, model in enumerate(self.models):
+            if self.has_encoder():
+                encoder_out = encoder_outs[i]
+            # decode each model
+            if self.has_incremental_states():
+                decoder_out = model.decoder.forward(
+                    tokens,
+                    code_masks=code_mask,
+                    encoder_out=encoder_out,
+                    incremental_state=incremental_states[i],
+                )
+            else:
+                if hasattr(model, "decoder"):
+                    decoder_out = model.decoder.forward(tokens, code_masks=code_mask, encoder_out=encoder_out)
+                else:
+                    decoder_out = model.forward(tokens)
+
+            attn: Optional[Tensor] = None
+            decoder_len = len(decoder_out)
+            if decoder_len > 1 and decoder_out[1] is not None:
+                if isinstance(decoder_out[1], Tensor):
+                    attn = decoder_out[1]
+                else:
+                    attn_holder = decoder_out[1]["attn"]
+                    if isinstance(attn_holder, Tensor):
+                        attn = attn_holder
+                    elif attn_holder is not None:
+                        attn = attn_holder[0]
+                if attn is not None:
+                    attn = attn[:, -1, :]
+
+            decoder_out_tuple = (
+                decoder_out[0][:, -1:, :].div_(temperature),
+                None if decoder_len <= 1 else decoder_out[1],
+            )
+
+            beam_size = decoder_out_tuple[0].size(0) // prefix_tokens.size(0) if prefix_tokens is not None else 0
+            if constraint_trie is not None and not zero_shot:
+                assert constraint_start is None and constraint_end is None
+                constraint_masks = decoder_out_tuple[0].new_zeros(decoder_out_tuple[0].size()).bool()
+                constraint_prefix_tokens = tokens.tolist()
+                for token_index, constraint_prefix_token in enumerate(constraint_prefix_tokens):
+                    prefix_len = prefix_tokens[token_index // beam_size].ne(1).sum().item() if prefix_tokens is not None else 0
+                    if len(constraint_prefix_token) > prefix_len:
+                        constraint_prefix_token = [0] + constraint_prefix_token[prefix_len+1:]
+                        constraint_nodes = constraint_trie.get_next_layer(constraint_prefix_token)
+                        constraint_masks[token_index][:, constraint_nodes] = True
+                    else:
+                        constraint_masks[token_index] = True
+                decoder_out_tuple[0].masked_fill_(~constraint_masks, -math.inf)
+            if constraint_start is not None and constraint_end is not None and not zero_shot:
+                assert constraint_trie is None
+                decoder_out_tuple[0][:, :, 4:constraint_start] = -math.inf
+                decoder_out_tuple[0][:, :, constraint_end:] = -math.inf
+
+            probs = model.get_normalized_probs(
+                decoder_out_tuple, log_probs=True, sample=None
+            )
+            if constraint_trie is not None and zero_shot:
+                assert constraint_start is None and constraint_end is None
+                constraint_masks = decoder_out_tuple[0].new_zeros(decoder_out_tuple[0].size()).bool()
+                constraint_prefix_tokens = tokens.tolist()
+                for token_index, constraint_prefix_token in enumerate(constraint_prefix_tokens):
+                    constraint_nodes = constraint_trie.get_next_layer(constraint_prefix_token)
+                    constraint_masks[token_index][:, constraint_nodes] = True
+                probs.masked_fill_(~constraint_masks, -math.inf)
+            if constraint_start is not None and constraint_end is not None and zero_shot:
+                assert constraint_trie is None
+                probs[:, :, 4:constraint_start] = -math.inf
+                probs[:, :, constraint_end:] = -math.inf
+            probs = probs[:, -1, :]
+            if self.models_size == 1:
+                return probs, attn
+
+            log_probs.append(probs)
+            if attn is not None:
+                if avg_attn is None:
+                    avg_attn = attn
+                else:
+                    avg_attn.add_(attn)
+
+        avg_probs = torch.logsumexp(torch.stack(log_probs, dim=0), dim=0) - math.log(
+            self.models_size
+        )
+
+        if avg_attn is not None:
+            avg_attn.div_(self.models_size)
+        return avg_probs, avg_attn
+
+    @torch.jit.export
+    def reorder_encoder_out(
+        self, encoder_outs: Optional[List[Dict[str, List[Tensor]]]], new_order
+    ):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        new_outs: List[Dict[str, List[Tensor]]] = []
+        if not self.has_encoder():
+            return new_outs
+        for i, model in enumerate(self.models):
+            assert encoder_outs is not None
+            new_outs.append(
+                model.encoder.reorder_encoder_out(encoder_outs[i], new_order)
+            )
+        return new_outs
+
+    @torch.jit.export
+    def reorder_incremental_state(
+        self,
+        incremental_states: List[Dict[str, Dict[str, Optional[Tensor]]]],
+        new_order,
+    ):
+        if not self.has_incremental_states():
+            return
+        for i, model in enumerate(self.models):
+            model.decoder.reorder_incremental_state_scripting(
+                incremental_states[i], new_order
+            )
+
+
+class SequenceGeneratorWithAlignment(SequenceGenerator):
+    def __init__(
+        self, models, tgt_dict, left_pad_target=False, print_alignment="hard", **kwargs
+    ):
+        """Generates translations of a given source sentence.
+
+        Produces alignments following "Jointly Learning to Align and
+        Translate with Transformer Models" (Garg et al., EMNLP 2019).
+
+        Args:
+            left_pad_target (bool, optional): Whether or not the
+                hypothesis should be left padded or not when they are
+                teacher forced for generating alignments.
+        """
+        super().__init__(EnsembleModelWithAlignment(models), tgt_dict, **kwargs)
+        self.left_pad_target = left_pad_target
+
+        if print_alignment == "hard":
+            self.extract_alignment = utils.extract_hard_alignment
+        elif print_alignment == "soft":
+            self.extract_alignment = utils.extract_soft_alignment
+
+    @torch.no_grad()
+    def generate(self, models, sample, **kwargs):
+        finalized = super()._generate(sample, **kwargs)
+
+        src_tokens = sample["net_input"]["src_tokens"]
+        bsz = src_tokens.shape[0]
+        beam_size = self.beam_size
+        (
+            src_tokens,
+            src_lengths,
+            prev_output_tokens,
+            tgt_tokens,
+        ) = self._prepare_batch_for_alignment(sample, finalized)
+        if any(getattr(m, "full_context_alignment", False) for m in self.model.models):
+            attn = self.model.forward_align(src_tokens, src_lengths, prev_output_tokens)
+        else:
+            attn = [
+                finalized[i // beam_size][i % beam_size]["attention"].transpose(1, 0)
+                for i in range(bsz * beam_size)
+            ]
+
+        if src_tokens.device != "cpu":
+            src_tokens = src_tokens.to("cpu")
+            tgt_tokens = tgt_tokens.to("cpu")
+            attn = [i.to("cpu") for i in attn]
+
+        # Process the attn matrix to extract hard alignments.
+        for i in range(bsz * beam_size):
+            alignment = self.extract_alignment(
+                attn[i], src_tokens[i], tgt_tokens[i], self.pad, self.eos
+            )
+            finalized[i // beam_size][i % beam_size]["alignment"] = alignment
+        return finalized
+
+    def _prepare_batch_for_alignment(self, sample, hypothesis):
+        src_tokens = sample["net_input"]["src_tokens"]
+        bsz = src_tokens.shape[0]
+        src_tokens = (
+            src_tokens[:, None, :]
+            .expand(-1, self.beam_size, -1)
+            .contiguous()
+            .view(bsz * self.beam_size, -1)
+        )
+        src_lengths = sample["net_input"]["src_lengths"]
+        src_lengths = (
+            src_lengths[:, None]
+            .expand(-1, self.beam_size)
+            .contiguous()
+            .view(bsz * self.beam_size)
+        )
+        prev_output_tokens = data_utils.collate_tokens(
+            [beam["tokens"] for example in hypothesis for beam in example],
+            self.pad,
+            self.eos,
+            self.left_pad_target,
+            move_eos_to_beginning=True,
+        )
+        tgt_tokens = data_utils.collate_tokens(
+            [beam["tokens"] for example in hypothesis for beam in example],
+            self.pad,
+            self.eos,
+            self.left_pad_target,
+            move_eos_to_beginning=False,
+        )
+        return src_tokens, src_lengths, prev_output_tokens, tgt_tokens
+
+
+class EnsembleModelWithAlignment(EnsembleModel):
+    """A wrapper around an ensemble of models."""
+
+    def __init__(self, models):
+        super().__init__(models)
+
+    def forward_align(self, src_tokens, src_lengths, prev_output_tokens):
+        avg_attn = None
+        for model in self.models:
+            decoder_out = model(src_tokens, src_lengths, prev_output_tokens)
+            attn = decoder_out[1]["attn"][0]
+            if avg_attn is None:
+                avg_attn = attn
+            else:
+                avg_attn.add_(attn)
+        if len(self.models) > 1:
+            avg_attn.div_(len(self.models))
+        return avg_attn
diff --git a/polyformer_l_refcocog.pt b/polyformer_l_refcocog.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d84bc7652e2e05053c18f197ce96df783451716d
--- /dev/null
+++ b/polyformer_l_refcocog.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:256346027e55fc3d70c4a1096759f7f9345884465f39c1ede248e94cca2eea0d
+size 6772332826
diff --git a/polyformer_module/__init__.py b/polyformer_module/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..30b147a95464b55f55a0dd1dc8555ca69ebec358
--- /dev/null
+++ b/polyformer_module/__init__.py
@@ -0,0 +1,5 @@
+import data
+import models
+import tasks
+import criterions
+import utils
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be61ef7c3c3a3cb2c48a239cc153d9c003fc1965
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+-e ./fairseq/
+opencv-python
+timm
+ftfy==6.0.3
+tensorboardX==2.4.1
+pycocotools==2.0.4
+pycocoevalcap==1.2
+pytorch_lightning
+einops
+datasets
+rouge_score
+tokenizers
+scikit-image
+tensorboard
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_b.sh b/run_scripts/evaluation/evaluate_polyformer_b.sh
new file mode 100644
index 0000000000000000000000000000000000000000..25e9d7809dad1415f8317778cee374d3df163ab2
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_b.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_b'
+num_bins=64
+batch_size=16
+
+
+for epoch in {100..80}
+do
+dataset='refcoco+'
+split='refcoco+_val'
+ckpt_path=../../run_scripts/finetune/${model}_checkpoints/100_5e-5_512/checkpoint_epoch_${epoch}.pt
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/epoch_${epoch}
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+
+dataset='refcoco'
+split='refcoco_val'
+ckpt_path=../../run_scripts/finetune/${model}_checkpoints/100_5e-5_512/checkpoint_epoch_${epoch}.pt
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/epoch_${epoch}
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+
+dataset='refcocog'
+split='refcocog_val'
+ckpt_path=../../run_scripts/finetune/${model}_checkpoints/100_5e-5_512/checkpoint_epoch_${epoch}.pt
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/epoch_${epoch}
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_b_refcoco+.sh b/run_scripts/evaluation/evaluate_polyformer_b_refcoco+.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3cf8e98d1f95c87be65fc6ee24b64cf4d8cc334b
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_b_refcoco+.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_b'
+num_bins=64
+batch_size=16
+
+dataset='refcoco+'
+ckpt_path=../../weights/polyformer_b_refcoco+.pt
+
+for split in 'refcoco+_val' 'refcoco+_testA' 'refcoco+_testB'
+do
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_b_refcoco.sh b/run_scripts/evaluation/evaluate_polyformer_b_refcoco.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b3ab248b3bb624caebff24aaae798f429984bb3d
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_b_refcoco.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export GPUS_PER_NODE=4
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_b'
+num_bins=64
+batch_size=16
+
+dataset='refcoco'
+ckpt_path=../../weights/polyformer_b_refcoco.pt
+
+for split in 'refcoco_val' 'refcoco_testA' 'refcoco_testB'
+do
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
diff --git a/run_scripts/evaluation/evaluate_polyformer_b_refcocog.sh b/run_scripts/evaluation/evaluate_polyformer_b_refcocog.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4a850a07a64ebdb5bb68c0ea164d7c935eb34f3b
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_b_refcocog.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_b'
+num_bins=64
+batch_size=16
+
+dataset='refcocog'
+ckpt_path=../../weights/polyformer_b_refcocog.pt
+
+for split in 'refcocog_val' 'refcocog_test'
+do
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_l.sh b/run_scripts/evaluation/evaluate_polyformer_l.sh
new file mode 100644
index 0000000000000000000000000000000000000000..dbe16736692361368f44e1d1dff97812ca9cc79f
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_l.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_l'
+num_bins=64
+batch_size=16
+
+
+for epoch in {100..80}
+do
+dataset='refcoco+'
+split='refcoco+_val'
+ckpt_path=../../run_scripts/finetune/${model}_checkpoints/100_5e-5_512/checkpoint_epoch_${epoch}.pt
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/epoch_${epoch}
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+
+dataset='refcoco'
+split='refcoco_val'
+ckpt_path=../../run_scripts/finetune/${model}_checkpoints/100_5e-5_512/checkpoint_epoch_${epoch}.pt
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/epoch_${epoch}
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+
+dataset='refcocog'
+split='refcocog_val'
+ckpt_path=../../run_scripts/finetune/${model}_checkpoints/100_5e-5_512/checkpoint_epoch_${epoch}.pt
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/epoch_${epoch}
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_l_refcoco+.sh b/run_scripts/evaluation/evaluate_polyformer_l_refcoco+.sh
new file mode 100644
index 0000000000000000000000000000000000000000..80b5295fa424daf862f9d11869f56983383e2725
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_l_refcoco+.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_l'
+num_bins=64
+batch_size=16
+
+dataset='refcoco+'
+ckpt_path=../../weights/polyformer_l_refcoco+.pt
+
+for split in 'refcoco+_val' 'refcoco+_testA' 'refcoco+_testB'
+do
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_l_refcoco.sh b/run_scripts/evaluation/evaluate_polyformer_l_refcoco.sh
new file mode 100644
index 0000000000000000000000000000000000000000..92daa2ce5fddd47a08be0647e9c9d213398a881d
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_l_refcoco.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_l'
+num_bins=64
+batch_size=16
+
+dataset='refcoco'
+ckpt_path=../../weights/polyformer_l_refcoco.pt
+
+for split in 'refcoco_val' 'refcoco_testA' 'refcoco_testB'
+do
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/evaluation/evaluate_polyformer_l_refcocog.sh b/run_scripts/evaluation/evaluate_polyformer_l_refcocog.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e27426595da78bd297730b88c650ae6a6335e15b
--- /dev/null
+++ b/run_scripts/evaluation/evaluate_polyformer_l_refcocog.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6092
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+export GPUS_PER_NODE=8
+
+
+########################## Evaluate Refcoco+ ##########################
+user_dir=../../polyformer_module
+bpe_dir=../../utils/BPE
+selected_cols=0,5,6,2,4,3
+
+
+model='polyformer_l'
+num_bins=64
+batch_size=16
+
+dataset='refcocog'
+ckpt_path=../../weights/polyformer_l_refcocog.pt
+
+for split in 'refcocog_val' 'refcocog_test'
+do
+data=../../datasets/finetune/${dataset}/${split}.tsv
+result_path=../../results_${model}/${dataset}/
+vis_dir=${result_path}/vis/${split}
+result_dir=${result_path}/result/${split}
+python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_port=${MASTER_PORT} ../../evaluate.py \
+    ${data} \
+    --path=${ckpt_path} \
+    --user-dir=${user_dir} \
+    --task=refcoco \
+    --batch-size=${batch_size} \
+    --log-format=simple --log-interval=10 \
+    --seed=7 \
+    --gen-subset=${split} \
+    --results-path=${result_path} \
+    --no-repeat-ngram-size=3 \
+    --fp16 \
+    --num-workers=0 \
+    --num-bins=${num_bins} \
+    --vis_dir=${vis_dir} \
+    --result_dir=${result_dir} \
+    --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
+done
\ No newline at end of file
diff --git a/run_scripts/finetune/train_polyformer_b.sh b/run_scripts/finetune/train_polyformer_b.sh
new file mode 100644
index 0000000000000000000000000000000000000000..9483a1dee25f10a35c4c4ee4a305ace75243dbc9
--- /dev/null
+++ b/run_scripts/finetune/train_polyformer_b.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6061
+
+det_weight=0.1
+cls_weight=0.0005
+num_bins=64
+log_dir=./polyformer_b_logs
+save_dir=./polyformer_b_checkpoints
+mkdir -p $log_dir $save_dir
+
+bpe_dir=../../utils/BPE
+user_dir=../../polyformer_module
+
+data_dir=../../datasets/finetune
+data=${data_dir}/refcoco+g_train_shuffled.tsv,${data_dir}/refcoco/refcoco_val.tsv
+selected_cols=0,5,6,2,4,3,7
+restore_file=../../weights/polyformer_b_pretrain.pt
+
+
+task=refcoco
+arch=polyformer_b
+criterion=adjust_label_smoothed_cross_entropy
+label_smoothing=0.1
+lr=3e-5
+max_epoch=5
+warmup_ratio=0.06
+batch_size=16
+update_freq=8
+resnet_drop_path_rate=0.0
+encoder_drop_path_rate=0.1
+decoder_drop_path_rate=0.1
+dropout=0.1
+attention_dropout=0.0
+max_src_length=80
+max_tgt_length=420
+
+patch_image_size=512
+
+for max_epoch in 100; do
+  echo "max_epoch "${max_epoch}
+  for lr in 5e-5; do
+    echo "lr "${lr}
+    for patch_image_size in 512; do
+      echo "patch_image_size "${patch_image_size}
+
+      log_file=${log_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}".log"
+      save_path=${save_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}
+      mkdir -p $save_path
+
+      CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=${MASTER_PORT} ../../train.py \
+          $data \
+          --selected-cols=${selected_cols} \
+          --bpe-dir=${bpe_dir} \
+          --user-dir=${user_dir} \
+          --reset-optimizer --reset-dataloader --reset-meters \
+          --save-dir=${save_path} \
+          --task=${task} \
+          --arch=${arch} \
+          --criterion=${criterion} \
+          --label-smoothing=${label_smoothing} \
+          --batch-size=${batch_size} \
+          --update-freq=${update_freq} \
+          --encoder-normalize-before \
+          --restore-file=${restore_file} \
+          --decoder-normalize-before \
+          --share-decoder-input-output-embed \
+          --share-all-embeddings \
+          --layernorm-embedding \
+          --patch-layernorm-embedding \
+          --code-layernorm-embedding \
+          --resnet-drop-path-rate=${resnet_drop_path_rate} \
+          --encoder-drop-path-rate=${encoder_drop_path_rate} \
+          --decoder-drop-path-rate=${decoder_drop_path_rate} \
+          --dropout=${dropout} \
+          --attention-dropout=${attention_dropout} \
+          --weight-decay=0.01 --optimizer=adam --adam-betas="(0.9,0.999)" --adam-eps=1e-08 --clip-norm=1.0 \
+          --lr-scheduler=polynomial_decay --lr=${lr} \
+          --max-epoch=${max_epoch} --warmup-ratio=${warmup_ratio} \
+          --log-format=simple --log-interval=10 \
+          --fixed-validation-seed=7 \
+          --no-epoch-checkpoints --keep-best-checkpoints=1 \
+          --save-interval=1 --validate-interval=1 \
+          --save-interval-updates=500 --validate-interval-updates=500 \
+          --eval-acc \
+          --eval-args='{"beam":5,"min_len":2,"max_len_a":0,"max_len_b":2}' \
+          --best-checkpoint-metric=score --maximize-best-checkpoint-metric \
+          --max-src-length=${max_src_length} \
+          --max-tgt-length=${max_tgt_length} \
+          --find-unused-parameters \
+          --add-type-embedding \
+          --scale-attn \
+          --scale-fc \
+          --scale-heads \
+          --disable-entangle \
+          --num-bins=${num_bins} \
+          --patch-image-size=${patch_image_size} \
+          --fp16 \
+          --fp16-scale-window=512 \
+          --det_weight=${det_weight} \
+          --cls_weight=${cls_weight} \
+          --num-workers=0 > ${log_file} 2>&1
+    done
+  done
+done
\ No newline at end of file
diff --git a/run_scripts/finetune/train_polyformer_l.sh b/run_scripts/finetune/train_polyformer_l.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b62fc901e46d707385cb9fea5dd3b92d5592a621
--- /dev/null
+++ b/run_scripts/finetune/train_polyformer_l.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6061
+
+det_weight=0.1
+cls_weight=0.0005
+num_bins=64
+log_dir=./polyformer_l_logs
+save_dir=./polyformer_l_checkpoints
+mkdir -p $log_dir $save_dir
+
+bpe_dir=../../utils/BPE
+user_dir=../../polyformer_module
+
+data_dir=../../datasets/finetune
+data=${data_dir}/refcoco+g_train_shuffled.tsv,${data_dir}/refcoco/refcoco_val.tsv
+selected_cols=0,5,6,2,4,3,7
+restore_file=../../weights/polyformer_l_pretrain.pt
+
+
+task=refcoco
+arch=polyformer_l
+criterion=adjust_label_smoothed_cross_entropy
+label_smoothing=0.1
+lr=3e-5
+max_epoch=5
+warmup_ratio=0.06
+batch_size=8
+update_freq=8
+resnet_drop_path_rate=0.0
+encoder_drop_path_rate=0.1
+decoder_drop_path_rate=0.1
+dropout=0.1
+attention_dropout=0.0
+max_src_length=80
+max_tgt_length=420
+
+patch_image_size=512
+
+for max_epoch in 100; do
+  echo "max_epoch "${max_epoch}
+  for lr in 5e-5; do
+    echo "lr "${lr}
+    for patch_image_size in 512; do
+      echo "patch_image_size "${patch_image_size}
+
+      log_file=${log_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}".log"
+      save_path=${save_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}
+      mkdir -p $save_path
+
+      CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=${MASTER_PORT} ../../train.py \
+          $data \
+          --selected-cols=${selected_cols} \
+          --bpe-dir=${bpe_dir} \
+          --user-dir=${user_dir} \
+          --reset-optimizer --reset-dataloader --reset-meters \
+          --save-dir=${save_path} \
+          --task=${task} \
+          --arch=${arch} \
+          --criterion=${criterion} \
+          --label-smoothing=${label_smoothing} \
+          --batch-size=${batch_size} \
+          --update-freq=${update_freq} \
+          --encoder-normalize-before \
+          --restore-file=${restore_file} \
+          --decoder-normalize-before \
+          --share-decoder-input-output-embed \
+          --share-all-embeddings \
+          --layernorm-embedding \
+          --patch-layernorm-embedding \
+          --code-layernorm-embedding \
+          --resnet-drop-path-rate=${resnet_drop_path_rate} \
+          --encoder-drop-path-rate=${encoder_drop_path_rate} \
+          --decoder-drop-path-rate=${decoder_drop_path_rate} \
+          --dropout=${dropout} \
+          --attention-dropout=${attention_dropout} \
+          --weight-decay=0.01 --optimizer=adam --adam-betas="(0.9,0.999)" --adam-eps=1e-08 --clip-norm=1.0 \
+          --lr-scheduler=polynomial_decay --lr=${lr} \
+          --max-epoch=${max_epoch} --warmup-ratio=${warmup_ratio} \
+          --log-format=simple --log-interval=10 \
+          --fixed-validation-seed=7 \
+          --no-epoch-checkpoints --keep-best-checkpoints=1 \
+          --save-interval=1 --validate-interval=1 \
+          --save-interval-updates=500 --validate-interval-updates=500 \
+          --eval-acc \
+          --eval-args='{"beam":5,"min_len":2,"max_len_a":0,"max_len_b":2}' \
+          --best-checkpoint-metric=score --maximize-best-checkpoint-metric \
+          --max-src-length=${max_src_length} \
+          --max-tgt-length=${max_tgt_length} \
+          --find-unused-parameters \
+          --add-type-embedding \
+          --scale-attn \
+          --scale-fc \
+          --scale-heads \
+          --disable-entangle \
+          --num-bins=${num_bins} \
+          --patch-image-size=${patch_image_size} \
+          --fp16 \
+          --fp16-scale-window=512 \
+          --det_weight=${det_weight} \
+          --cls_weight=${cls_weight} \
+          --num-workers=0 > ${log_file} 2>&1
+    done
+  done
+done
\ No newline at end of file
diff --git a/run_scripts/pretrain/pretrain_polyformer_b.sh b/run_scripts/pretrain/pretrain_polyformer_b.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5707a1f02bafe4e2de85fb5f0ef78ae4ab86dfac
--- /dev/null
+++ b/run_scripts/pretrain/pretrain_polyformer_b.sh
@@ -0,0 +1,104 @@
+#!/usr/bin/env
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6061
+
+det_weight=1
+cls_weight=0
+num_bins=64
+log_dir=./polyformer_b_pretrain_logs
+save_dir=./polyformer_b_pretrain_checkpoints
+mkdir -p $log_dir $save_dir
+
+bpe_dir=../../utils/BPE
+user_dir=../../polyformer_module
+
+data_dir=../../datasets/pretrain
+data=${data_dir}/train_shuffled.tsv,${data_dir}/val_refcoco_unc.tsv
+selected_cols=0,3,1,2
+
+task=refcoco_pretrain
+arch=polyformer_b
+criterion=adjust_label_smoothed_cross_entropy
+label_smoothing=0.1
+lr=3e-5
+max_epoch=5
+warmup_ratio=0.06
+batch_size=20
+update_freq=8
+resnet_drop_path_rate=0.0
+encoder_drop_path_rate=0.1
+decoder_drop_path_rate=0.1
+dropout=0.1
+attention_dropout=0.0
+max_src_length=80
+max_tgt_length=420
+
+patch_image_size=512
+
+for max_epoch in 20; do
+  echo "max_epoch "${max_epoch}
+  for lr in 5e-5; do
+    echo "lr "${lr}
+    for patch_image_size in 512; do
+      echo "patch_image_size "${patch_image_size}
+
+      log_file=${log_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}".log"
+      save_path=${save_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}
+      mkdir -p $save_path
+
+      CUDA_VISIBLE_DEVICES=0 python3 -m torch.distributed.launch --nproc_per_node=1 --master_port=${MASTER_PORT} ../../train.py \
+          $data \
+          --selected-cols=${selected_cols} \
+          --bpe-dir=${bpe_dir} \
+          --user-dir=${user_dir} \
+          --reset-optimizer --reset-dataloader --reset-meters \
+          --save-dir=${save_path} \
+          --task=${task} \
+          --arch=${arch} \
+          --criterion=${criterion} \
+          --label-smoothing=${label_smoothing} \
+          --batch-size=${batch_size} \
+          --update-freq=${update_freq} \
+          --encoder-normalize-before \
+          --decoder-normalize-before \
+          --share-decoder-input-output-embed \
+          --share-all-embeddings \
+          --layernorm-embedding \
+          --patch-layernorm-embedding \
+          --code-layernorm-embedding \
+          --resnet-drop-path-rate=${resnet_drop_path_rate} \
+          --encoder-drop-path-rate=${encoder_drop_path_rate} \
+          --decoder-drop-path-rate=${decoder_drop_path_rate} \
+          --dropout=${dropout} \
+          --attention-dropout=${attention_dropout} \
+          --weight-decay=0.01 --optimizer=adam --adam-betas="(0.9,0.999)" --adam-eps=1e-08 --clip-norm=1.0 \
+          --lr-scheduler=polynomial_decay --lr=${lr} \
+          --max-epoch=${max_epoch} --warmup-ratio=${warmup_ratio} \
+          --log-format=simple --log-interval=10 \
+          --fixed-validation-seed=7 \
+          --no-epoch-checkpoints --keep-best-checkpoints=1 \
+          --save-interval=1 --validate-interval=1 \
+          --save-interval-updates=1000 --validate-interval-updates=1000 \
+          --eval-acc \
+          --eval-args='{"beam":5,"min_len":2,"max_len_a":0,"max_len_b":2}' \
+          --best-checkpoint-metric=score --maximize-best-checkpoint-metric \
+          --max-src-length=${max_src_length} \
+          --max-tgt-length=${max_tgt_length} \
+          --find-unused-parameters \
+          --add-type-embedding \
+          --scale-attn \
+          --scale-fc \
+          --scale-heads \
+          --disable-entangle \
+          --num-bins=${num_bins} \
+          --patch-image-size=${patch_image_size} \
+          --fp16 \
+          --fp16-scale-window=512 \
+          --det_weight=${det_weight} \
+          --cls_weight=${cls_weight} \
+          --num-workers=0 > ${log_file} 2>&1
+    done
+  done
+done
diff --git a/run_scripts/pretrain/pretrain_polyformer_l.sh b/run_scripts/pretrain/pretrain_polyformer_l.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6c5903b9681181fef5913bed00d7ee209ae1e643
--- /dev/null
+++ b/run_scripts/pretrain/pretrain_polyformer_l.sh
@@ -0,0 +1,104 @@
+#!/usr/bin/env
+
+# The port for communication. Note that if you want to run multiple tasks on the same machine,
+# you need to specify different port numbers.
+export MASTER_PORT=6061
+
+det_weight=1
+cls_weight=0
+num_bins=64
+log_dir=./polyformer_l_pretrain_logs
+save_dir=./polyformer_l_pretrain_checkpoints
+mkdir -p $log_dir $save_dir
+
+bpe_dir=../../utils/BPE
+user_dir=../../polyformer_module
+
+data_dir=../../datasets/pretrain
+data=${data_dir}/train_shuffled.tsv,${data_dir}/val_refcoco_unc.tsv
+selected_cols=0,3,1,2
+
+task=refcoco_pretrain
+arch=polyformer_l
+criterion=adjust_label_smoothed_cross_entropy
+label_smoothing=0.1
+lr=3e-5
+max_epoch=5
+warmup_ratio=0.06
+batch_size=8
+update_freq=8
+resnet_drop_path_rate=0.0
+encoder_drop_path_rate=0.1
+decoder_drop_path_rate=0.1
+dropout=0.1
+attention_dropout=0.0
+max_src_length=80
+max_tgt_length=420
+
+patch_image_size=512
+
+for max_epoch in 20; do
+  echo "max_epoch "${max_epoch}
+  for lr in 5e-5; do
+    echo "lr "${lr}
+    for patch_image_size in 512; do
+      echo "patch_image_size "${patch_image_size}
+
+      log_file=${log_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}".log"
+      save_path=${save_dir}/${max_epoch}"_"${lr}"_"${patch_image_size}
+      mkdir -p $save_path
+
+      CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=${MASTER_PORT} ../../train.py \
+          $data \
+          --selected-cols=${selected_cols} \
+          --bpe-dir=${bpe_dir} \
+          --user-dir=${user_dir} \
+          --reset-optimizer --reset-dataloader --reset-meters \
+          --save-dir=${save_path} \
+          --task=${task} \
+          --arch=${arch} \
+          --criterion=${criterion} \
+          --label-smoothing=${label_smoothing} \
+          --batch-size=${batch_size} \
+          --update-freq=${update_freq} \
+          --encoder-normalize-before \
+          --decoder-normalize-before \
+          --share-decoder-input-output-embed \
+          --share-all-embeddings \
+          --layernorm-embedding \
+          --patch-layernorm-embedding \
+          --code-layernorm-embedding \
+          --resnet-drop-path-rate=${resnet_drop_path_rate} \
+          --encoder-drop-path-rate=${encoder_drop_path_rate} \
+          --decoder-drop-path-rate=${decoder_drop_path_rate} \
+          --dropout=${dropout} \
+          --attention-dropout=${attention_dropout} \
+          --weight-decay=0.01 --optimizer=adam --adam-betas="(0.9,0.999)" --adam-eps=1e-08 --clip-norm=1.0 \
+          --lr-scheduler=polynomial_decay --lr=${lr} \
+          --max-epoch=${max_epoch} --warmup-ratio=${warmup_ratio} \
+          --log-format=simple --log-interval=10 \
+          --fixed-validation-seed=7 \
+          --no-epoch-checkpoints --keep-best-checkpoints=1 \
+          --save-interval=1 --validate-interval=1 \
+          --save-interval-updates=2000 --validate-interval-updates=2000 \
+          --eval-acc \
+          --eval-args='{"beam":5,"min_len":2,"max_len_a":0,"max_len_b":2}' \
+          --best-checkpoint-metric=score --maximize-best-checkpoint-metric \
+          --max-src-length=${max_src_length} \
+          --max-tgt-length=${max_tgt_length} \
+          --find-unused-parameters \
+          --add-type-embedding \
+          --scale-attn \
+          --scale-fc \
+          --scale-heads \
+          --disable-entangle \
+          --num-bins=${num_bins} \
+          --patch-image-size=${patch_image_size} \
+          --fp16 \
+          --fp16-scale-window=512 \
+          --det_weight=${det_weight} \
+          --cls_weight=${cls_weight} \
+          --num-workers=0 > ${log_file} 2>&1
+    done
+  done
+done
diff --git a/tasks/__init__.py b/tasks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3574a1f01d8a6094aac0230d5a252cc0035c6f45
--- /dev/null
+++ b/tasks/__init__.py
@@ -0,0 +1,2 @@
+from .refcoco import RefcocoTask
+from .refcoco_pretrain import RefcocoPretrainTask
\ No newline at end of file
diff --git a/tasks/base_task.py b/tasks/base_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba6a7bb340a0625083f5d53c1b2acd5562fde741
--- /dev/null
+++ b/tasks/base_task.py
@@ -0,0 +1,294 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from dataclasses import dataclass, field
+import logging
+import os
+import math
+import torch
+from typing import Dict, Optional
+
+from fairseq import search
+from fairseq.data import FairseqDataset, iterators, Dictionary
+from fairseq.optim.amp_optimizer import AMPOptimizer
+from fairseq.dataclass import FairseqDataclass
+from fairseq.tasks import FairseqTask, register_task
+from omegaconf import DictConfig
+from torch import Tensor, device, dtype, nn
+
+
+
+logger = logging.getLogger(__name__)
+
+
+def load_bert_pretrained_weights(model, ckpt_path):
+    try:
+        state_dict = torch.load(ckpt_path, map_location="cpu")
+    except Exception:
+        raise OSError(
+            "Unable to load weights from pytorch checkpoint file. "
+            "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True. "
+        )
+
+    missing_keys = []
+    unexpected_keys = []
+    error_msgs = []
+
+
+    # Convert old format to new format if needed from a PyTorch state_dict
+    old_keys = []
+    new_keys = []
+    for key in state_dict.keys():
+        new_key = None
+        if "gamma" in key:
+            new_key = key.replace("gamma", "weight")
+        if "beta" in key:
+            new_key = key.replace("beta", "bias")
+        if new_key:
+            old_keys.append(key)
+            new_keys.append(new_key)
+    for old_key, new_key in zip(old_keys, new_keys):
+        state_dict[new_key] = state_dict.pop(old_key)
+
+    # copy state_dict so _load_from_state_dict can modify it
+    metadata = getattr(state_dict, "_metadata", None)
+    state_dict = state_dict.copy()
+    if metadata is not None:
+        state_dict._metadata = metadata
+
+    ##############################################################################################
+
+    # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants
+    # so we need to apply the function recursively.
+    def load(module: nn.Module, prefix=""):
+        local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+        module._load_from_state_dict(
+            state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs,
+        )
+        for name, child in module._modules.items():
+            if child is not None:
+                load(child, prefix + name + ".")
+
+    # Make sure we are able to load base models as well as derived models (with heads)
+    start_prefix = "bert."
+    load(model, prefix=start_prefix)
+
+    if len(unexpected_keys) > 0:
+        logger.warning(
+            f"Some weights of the model checkpoint at {ckpt_path} were not used when "
+            f"initializing {model.__class__.__name__}: {unexpected_keys}\n"
+            f"- This IS expected if you are initializing {model.__class__.__name__} from the checkpoint of a model trained on another task "
+            f"or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n"
+            f"- This IS NOT expected if you are initializing {model.__class__.__name__} from the checkpoint of a model that you expect "
+            f"to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model)."
+        )
+    else:
+        logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n")
+    if len(missing_keys) > 0:
+        logger.warning(
+            f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at {ckpt_path} "
+            f"and are newly initialized: {missing_keys}\n"
+            f"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference."
+        )
+    else:
+        logger.info(
+            f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at {ckpt_path}.\n"
+            f"If your task is similar to the task the model of the ckeckpoint was trained on, "
+            f"you can already use {model.__class__.__name__} for predictions without further training."
+        )
+    if len(error_msgs) > 0:
+        raise RuntimeError(
+            "Error(s) in loading state_dict for {}:\n\t{}".format(
+                model.__class__.__name__, "\n\t".join(error_msgs)
+            )
+        )
+
+
+
+
+@dataclass
+class BaseConfig(FairseqDataclass):
+    data: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "comma separated path to data list, will be iterated upon during epochs "
+                    "in round-robin manner; valid data are always in the last"
+        },
+    )
+    selected_cols: Optional[str] = field(
+        default=None,
+        metadata={"help": "selected cols"},
+    )
+    bpe_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "bpe dir"},
+    )
+    max_source_positions: int = field(
+        default=1024, metadata={"help": "max number of tokens in the source sequence"}
+    )
+    max_target_positions: int = field(
+        default=1024, metadata={"help": "max number of tokens in the target sequence"}
+    )
+    max_src_length: int = field(
+        default=128, metadata={"help": "the maximum src sequence length"}
+    )
+    max_tgt_length: int = field(
+        default=30, metadata={"help": "the maximum target sequence length"}
+    )
+
+    code_dict_size: int = field(
+        default=8192, metadata={"help": "code dict size"}
+    )
+    patch_image_size: int = field(
+        default=480, metadata={"help": "patch image size"}
+    )
+    num_bins: int = field(
+        default=1000, metadata={"help": "number of quantization bins"}
+    )
+
+    imagenet_default_mean_and_std: bool = field(
+        default=False,
+        metadata={"help": "imagenet normalize"},
+    )
+    constraint_range: Optional[str] = field(
+        default=None,
+        metadata={"help": "constraint range"}
+    )
+
+
+@register_task("base_task", dataclass=BaseConfig)
+class BaseTask(FairseqTask):
+    def __init__(self, cfg: BaseConfig, src_dict, tgt_dict):
+        super().__init__(cfg)
+        self.src_dict = src_dict
+        self.tgt_dict = tgt_dict
+
+    @classmethod
+    def setup_task(cls, cfg: DictConfig, **kwargs):
+        """Setup the task."""
+
+        # Define dictionaries
+        src_dict = Dictionary()
+        tgt_dict = Dictionary()
+
+        # Add 2D bin tokens
+        for i in range(cfg.num_bins):
+            for j in range(cfg.num_bins):
+                src_dict.add_symbol("<bin_{}_{}>".format(i, j))
+                tgt_dict.add_symbol("<bin_{}_{}>".format(i, j))
+
+        logger.info("source dictionary: {} types".format(len(src_dict)))
+        logger.info("target dictionary: {} types".format(len(tgt_dict)))
+        return cls(cfg, src_dict, tgt_dict)
+
+    def get_batch_iterator(
+        self,
+        dataset,
+        max_tokens=None,
+        max_sentences=None,
+        max_positions=None,
+        ignore_invalid_inputs=False,
+        required_batch_size_multiple=1,
+        seed=1,
+        num_shards=1,
+        shard_id=0,
+        num_workers=0,
+        epoch=1,
+        data_buffer_size=0,
+        disable_iterator_cache=False,
+    ):
+        assert isinstance(dataset, FairseqDataset)
+
+        # initialize the dataset with the correct starting epoch
+        dataset.set_epoch(epoch)
+
+        # create mini-batches with given size constraints
+        batch_sampler = [
+            [j for j in range(i, min(i + max_sentences, len(dataset)))]
+            for i in range(0, len(dataset), max_sentences)
+        ]
+        total_row_count = dataset.dataset.get_total_row_count()
+        num_batches = math.ceil(math.ceil(total_row_count / num_shards) / max_sentences)
+        if len(batch_sampler) < num_batches:
+            batch_sampler.append([])
+
+        # return a reusable, sharded iterator
+        epoch_iter = iterators.EpochBatchIterator(
+            dataset=dataset,
+            collate_fn=dataset.collater,
+            batch_sampler=batch_sampler,
+            seed=seed,
+            num_shards=1,
+            shard_id=0,
+            num_workers=num_workers,
+            epoch=epoch,
+            buffer_size=data_buffer_size
+        )
+
+        return epoch_iter
+
+    def build_model(self, cfg: FairseqDataclass):
+        model = super().build_model(cfg)
+        bpe_dict = {
+            "_name": "gpt2",
+            "gpt2_encoder_json": os.path.join(self.cfg.bpe_dir, "encoder.json"),
+            "gpt2_vocab_bpe": os.path.join(self.cfg.bpe_dir, "vocab.bpe")
+        }
+        bpe_dict = DictConfig(bpe_dict)
+        self.bpe = self.build_bpe(bpe_dict)
+        return model
+
+    def train_step(
+        self, sample, model, criterion, optimizer, update_num, ignore_grad=False, **extra_kwargs
+    ):
+        """
+        Do forward and backward, and return the loss as computed by *criterion*
+        for the given *model* and *sample*.
+
+        Args:
+            sample (dict): the mini-batch. The format is defined by the
+                :class:`~fairseq.data.FairseqDataset`.
+            model (~fairseq.models.BaseFairseqModel): the model
+            criterion (~fairseq.criterions.FairseqCriterion): the criterion
+            optimizer (~fairseq.optim.FairseqOptimizer): the optimizer
+            update_num (int): the current update
+            ignore_grad (bool): multiply loss by 0 if this is set to True
+
+        Returns:
+            tuple:
+                - the loss
+                - the sample size, which is used as the denominator for the
+                  gradient
+                - logging outputs to display while training
+        """
+        model.train()
+        model.set_num_updates(update_num)
+        with torch.autograd.profiler.record_function("forward"):
+            with torch.cuda.amp.autocast(enabled=(isinstance(optimizer, AMPOptimizer))):
+                loss, sample_size, logging_output = criterion(model, sample, update_num=update_num)
+        if ignore_grad:
+            loss *= 0
+        with torch.autograd.profiler.record_function("backward"):
+            optimizer.backward(loss)
+        return loss, sample_size, logging_output
+
+    def max_positions(self):
+        """Return the max sentence length allowed by the task."""
+        return (self.cfg.max_source_positions, self.cfg.max_target_positions)
+
+    @property
+    def source_dictionary(self):
+        """Return the source :class:`~fairseq.data.Dictionary`."""
+        return self.src_dict
+
+    @property
+    def target_dictionary(self):
+        """Return the target :class:`~fairseq.data.Dictionary`."""
+        return self.tgt_dict
diff --git a/tasks/refcoco.py b/tasks/refcoco.py
new file mode 100644
index 0000000000000000000000000000000000000000..55541fbf83a1c875eb7de769e09fb5f5b067057d
--- /dev/null
+++ b/tasks/refcoco.py
@@ -0,0 +1,291 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from dataclasses import dataclass, field
+import logging
+from typing import Optional
+import os
+import math
+import numpy as np
+import torch
+from fairseq import metrics
+from fairseq.tasks import register_task
+
+from tasks.base_task import BaseTask, BaseConfig, load_bert_pretrained_weights
+from data.refcoco_dataset import RefcocoDataset
+from data.file_dataset import FileDataset
+
+logger = logging.getLogger(__name__)
+
+
+COO = 0  # <COO> class
+SEP = 1  # <SEP> class
+EOS = 2  # <EOS> class
+bos_index = 0   # index for bos token
+sep_index = 3  # index for separator token
+
+
+
+
+@dataclass
+class RefcocoConfig(BaseConfig):
+    eval_acc: bool = field(
+        default=False, metadata={"help": "evaluation with accuracy"}
+    )
+    eval_args: Optional[str] = field(
+        default='{}',
+        metadata={
+            "help": 'generation args, e.g., \'{"beam": 4, "lenpen": 0.6}\', as JSON string'
+        },
+    )
+    uses_ema: Optional[bool] = field(
+        default=False,
+        metadata={"help": "whether to use ema"},
+    )
+    eval_print_samples: bool = field(
+        default=False, metadata={"help": "print sample generations during validation"}
+    )
+
+    max_image_size: int = field(
+        default=512, metadata={"help": "max image size for normalization"}
+    )
+    scst: bool = field(
+        default=False, metadata={"help": "Self-critical sequence training"}
+    )
+    scst_args: str = field(
+        default='{}',
+        metadata={
+            "help": 'generation args for Self-critical sequence training, as JSON string'
+        },
+    )
+
+
+@register_task("refcoco", dataclass=RefcocoConfig)
+class RefcocoTask(BaseTask):
+    def __init__(self, cfg: RefcocoConfig, src_dict, tgt_dict):
+        super().__init__(cfg, src_dict, tgt_dict)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        paths = self.cfg.data.split(',')
+        assert len(paths) > 0
+
+        if split == 'train':
+            file_path = paths[(epoch - 1) % (len(paths) - 1)]
+        else:
+            file_path = paths[-1]
+        dataset = FileDataset(file_path, self.cfg.selected_cols)
+
+        self.datasets[split] = RefcocoDataset(
+            split,
+            dataset,
+            self.bpe,
+            self.src_dict,
+            self.tgt_dict,
+            max_src_length=self.cfg.max_src_length,
+            max_tgt_length=self.cfg.max_tgt_length,
+            patch_image_size=self.cfg.patch_image_size,
+            imagenet_default_mean_and_std=self.cfg.imagenet_default_mean_and_std,
+            num_bins=self.cfg.num_bins,
+            max_image_size=self.cfg.max_image_size
+        )
+
+    def build_model(self, cfg):
+        model = super().build_model(cfg)
+        bert_path = "../../pretrained_weights/bert-base-uncased-pytorch_model.bin"
+        if os.path.exists(bert_path):
+            load_bert_pretrained_weights(model.encoder.bert, bert_path)
+        if cfg._name == 'polyformer_b':
+            swin_path = "../../pretrained_weights/swin_base_patch4_window12_384_22k.pth"
+        else:
+            swin_path = "../../pretrained_weights/swin_large_patch4_window12_384_22k.pth"
+        if os.path.exists(swin_path):
+            model.encoder.embed_images.init_weights(pretrained=swin_path)
+        return model
+
+    def _calculate_ap_score(self, hyps, refs, thresh=0.5):
+        interacts = torch.cat(
+            [torch.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]),
+             torch.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:])],
+            dim=1
+        )
+        area_predictions = (hyps[:, 2] - hyps[:, 0]) * (hyps[:, 3] - hyps[:, 1])
+        area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1])
+        interacts_w = interacts[:, 2] - interacts[:, 0]
+        interacts_h = interacts[:, 3] - interacts[:, 1]
+        area_interacts = interacts_w * interacts_h
+        ious = area_interacts / (area_predictions + area_targets - area_interacts + 1e-6)
+        return ((ious >= thresh) & (interacts_w > 0) & (interacts_h > 0)).float()
+
+    def valid_step(self, sample, model, criterion):
+        loss, sample_size, logging_output = criterion(model, sample)
+        model.eval()
+        if self.cfg.eval_acc:
+            hyps, refs = self._inference(sample, model)
+            scores = self._calculate_ap_score(hyps.float(), refs.float())
+            logging_output["_score_sum"] = scores.sum().item()
+            logging_output["_score_cnt"] = scores.size(0)
+
+        return loss, sample_size, logging_output
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+
+        def sum_logs(key):
+            import torch
+            result = sum(log.get(key, 0) for log in logging_outputs)
+            if torch.is_tensor(result):
+                result = result.cpu()
+            return result
+
+        def compute_score(meters):
+            score = meters["_score_sum"].sum / meters["_score_cnt"].sum
+            score = score if isinstance(score, float) else score.item()
+            return round(score, 4)
+
+        if sum_logs("_score_cnt") > 0:
+            metrics.log_scalar("_score_sum", sum_logs("_score_sum"))
+            metrics.log_scalar("_score_cnt", sum_logs("_score_cnt"))
+            metrics.log_derived("score", compute_score)
+
+    def _inference(self, sample, model):
+        hyps = self.inference_step(model, sample)
+        refs = sample['region_coords'].float()
+        hyps = hyps * self.cfg.max_image_size
+        hyps[:, ::2] /= sample['w_resize_ratios'].unsqueeze(1)
+        hyps[:, 1::2] /= sample['h_resize_ratios'].unsqueeze(1)
+        return hyps, refs
+
+    def inference_step(self, model, sample):
+        with torch.no_grad():
+            if isinstance(model, list):
+                model = model[0]
+            min_len = 6
+            max_len = 210
+            model.eval()
+            img = sample["net_input"]["patch_images"]
+            b = img.shape[0]
+            prev_output_token_11 = [[bos_index] for _ in range(b)]
+            prev_output_token_12 = [[bos_index] for _ in range(b)]
+            prev_output_token_21 = [[bos_index] for _ in range(b)]
+            prev_output_token_22 = [[bos_index] for _ in range(b)]
+            delta_x1 = [[0] for _ in range(b)]
+            delta_y1 = [[0] for _ in range(b)]
+            delta_x2 = [[1] for _ in range(b)]
+            delta_y2 = [[1] for _ in range(b)]
+
+            gen_out = [[] for _ in range(b)]
+
+            n_bins = self.cfg.num_bins
+
+            unfinish_flag = np.ones(b)
+            i = 0
+
+            encoder_out = model.encoder(
+                sample['net_input']['src_tokens'],
+                src_lengths=sample['net_input']['src_lengths'],
+                att_masks=sample['net_input']['att_masks'],
+                patch_images=sample['net_input']['patch_images'],
+                patch_masks=sample['net_input']['patch_masks'],
+                token_embeddings=None,
+                return_all_hiddens=False,
+                sample_patch_num=None
+            )
+
+            while i < max_len and unfinish_flag.any():
+                prev_output_tokens_11_tensor = torch.tensor(np.array(prev_output_token_11)).to(img.device).long()
+                prev_output_tokens_12_tensor = torch.tensor(np.array(prev_output_token_12)).to(img.device).long()
+                prev_output_tokens_21_tensor = torch.tensor(np.array(prev_output_token_21)).to(img.device).long()
+                prev_output_tokens_22_tensor = torch.tensor(np.array(prev_output_token_22)).to(img.device).long()
+                delta_x1_tensor = torch.tensor(np.array(delta_x1)).to(img.device)
+                delta_x2_tensor = torch.tensor(np.array(delta_x2)).to(img.device)
+                delta_y1_tensor = torch.tensor(np.array(delta_y1)).to(img.device)
+                delta_y2_tensor = torch.tensor(np.array(delta_y2)).to(img.device)
+
+                net_output = model.decoder(
+                    prev_output_tokens_11_tensor,
+                    prev_output_tokens_12_tensor,
+                    prev_output_tokens_21_tensor,
+                    prev_output_tokens_22_tensor,
+                    delta_x1_tensor,
+                    delta_y1_tensor,
+                    delta_x2_tensor,
+                    delta_y2_tensor,
+                    code_masks=None,
+                    encoder_out=encoder_out,
+                    features_only=False,
+                    alignment_layer=None,
+                    alignment_heads=None,
+                    src_lengths=sample['net_input']['src_lengths'],
+                    return_all_hiddens=False
+                )
+
+                cls_output = net_output[0]
+                cls_type = torch.argmax(cls_output, 2)
+                reg_output = net_output[1]
+                for j in range(b):
+                    if unfinish_flag[j] == 1:  # prediction is not finished
+                        cls_j = cls_type[j, i].item()
+                        if cls_j == COO or (cls_j == EOS and i < min_len):
+                            output_j_x, output_j_y = reg_output[j, i].cpu().numpy()
+                            output_j_x = min(output_j_x, 1)
+                            output_j_y = min(output_j_y, 1)
+
+                            gen_out[j].extend([output_j_x, output_j_y])
+
+                            output_j_x = output_j_x * (n_bins - 1)
+                            output_j_y = output_j_y * (n_bins - 1)
+
+                            output_j_x_floor = math.floor(output_j_x)
+                            output_j_y_floor = math.floor(output_j_y)
+                            output_j_x_ceil = math.ceil(output_j_x)
+                            output_j_y_ceil = math.ceil(output_j_y)
+
+                            # tokenization
+                            prev_output_token_11[j].append(output_j_x_floor * n_bins + output_j_y_floor + 4)
+                            prev_output_token_12[j].append(output_j_x_floor * n_bins + output_j_y_ceil + 4)
+                            prev_output_token_21[j].append(output_j_x_ceil * n_bins + output_j_y_floor + 4)
+                            prev_output_token_22[j].append(output_j_x_ceil * n_bins + output_j_y_ceil + 4)
+
+                            delta_x = output_j_x - output_j_x_floor
+                            delta_y = output_j_y - output_j_y_floor
+
+                        elif cls_j == SEP:
+                            gen_out[j].append(2)  # insert 2 indicating separator tokens
+                            prev_output_token_11[j].append(sep_index)
+                            prev_output_token_12[j].append(sep_index)
+                            prev_output_token_21[j].append(sep_index)
+                            prev_output_token_22[j].append(sep_index)
+                            delta_x = 0
+                            delta_y = 0
+                        else:  # eos is predicted and i >= min_len
+                            unfinish_flag[j] = 0
+                            gen_out[j].append(-1)
+                            prev_output_token_11[j].append(2)  # 2 is eos token
+                            prev_output_token_12[j].append(2)  # 2 is eos token
+                            prev_output_token_21[j].append(2)  # 2 is eos token
+                            prev_output_token_22[j].append(2)  # 2 is eos token
+                            delta_x = 0
+                            delta_y = 0
+                    else:  # prediction is finished
+                        gen_out[j].append(-1)
+                        prev_output_token_11[j].append(1)  # 1 is padding token
+                        prev_output_token_12[j].append(1)
+                        prev_output_token_21[j].append(1)
+                        prev_output_token_22[j].append(1)
+                        delta_x = 0
+                        delta_y = 0
+                    delta_x1[j].append(delta_x)
+                    delta_y1[j].append(delta_y)
+                    delta_x2[j].append(1 - delta_x)
+                    delta_y2[j].append(1 - delta_y)
+                i += 1
+        print("inference step: ", i)
+        return gen_out
+
diff --git a/tasks/refcoco_pretrain.py b/tasks/refcoco_pretrain.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fa0b6baeafd391660b956b660305591063fb047
--- /dev/null
+++ b/tasks/refcoco_pretrain.py
@@ -0,0 +1,243 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+
+from dataclasses import dataclass, field
+import os
+import logging
+from typing import Optional
+import math
+import numpy as np
+import torch
+from fairseq import metrics
+from fairseq.tasks import register_task
+
+from tasks.base_task import BaseTask, BaseConfig
+from data.refcoco_pretrain_dataset import RefcocoPretrainDataset
+from data.file_dataset import FileDataset
+from tasks.base_task import BaseTask, BaseConfig, load_bert_pretrained_weights
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class RefcocoPretrainConfig(BaseConfig):
+    eval_acc: bool = field(
+        default=False, metadata={"help": "evaluation with accuracy"}
+    )
+    eval_args: Optional[str] = field(
+        default='{}',
+        metadata={
+            "help": 'generation args, e.g., \'{"beam": 4, "lenpen": 0.6}\', as JSON string'
+        },
+    )
+    uses_ema: Optional[bool] = field(
+        default=False,
+        metadata={"help": "whether to use ema"},
+    )
+    eval_print_samples: bool = field(
+        default=False, metadata={"help": "print sample generations during validation"}
+    )
+
+    max_image_size: int = field(
+        default=512, metadata={"help": "max image size for normalization"}
+    )
+    scst: bool = field(
+        default=False, metadata={"help": "Self-critical sequence training"}
+    )
+    scst_args: str = field(
+        default='{}',
+        metadata={
+            "help": 'generation args for Self-critical sequence training, as JSON string'
+        },
+    )
+
+
+@register_task("refcoco_pretrain", dataclass=RefcocoPretrainConfig)
+class RefcocoPretrainTask(BaseTask):
+    def __init__(self, cfg: RefcocoPretrainConfig, src_dict, tgt_dict):
+        super().__init__(cfg, src_dict, tgt_dict)
+
+    def load_dataset(self, split, epoch=1, combine=False, **kwargs):
+        paths = self.cfg.data.split(',')
+        assert len(paths) > 0
+
+        if split == 'train':
+            file_path = paths[(epoch - 1) % (len(paths) - 1)]
+        else:
+            file_path = paths[-1]
+        dataset = FileDataset(file_path, self.cfg.selected_cols)
+
+        self.datasets[split] = RefcocoPretrainDataset(
+            split,
+            dataset,
+            self.bpe,
+            self.src_dict,
+            self.tgt_dict,
+            max_src_length=self.cfg.max_src_length,
+            max_tgt_length=self.cfg.max_tgt_length,
+            patch_image_size=self.cfg.patch_image_size,
+            imagenet_default_mean_and_std=self.cfg.imagenet_default_mean_and_std,
+            num_bins=self.cfg.num_bins,
+            max_image_size=self.cfg.max_image_size
+        )
+
+    def build_model(self, cfg):
+        model = super().build_model(cfg)
+        bert_path = "../../pretrained_weights/bert-base-uncased-pytorch_model.bin"
+        if os.path.exists(bert_path):
+            load_bert_pretrained_weights(model.encoder.bert, bert_path)
+        if cfg._name == 'polyformer_b':
+            swin_path = "../../pretrained_weights/swin_base_patch4_window12_384_22k.pth"
+        else:
+            swin_path = "../../pretrained_weights/swin_large_patch4_window12_384_22k.pth"
+        if os.path.exists(swin_path):
+            model.encoder.embed_images.init_weights(pretrained=swin_path)
+        return model
+
+    def _calculate_ap_score(self, hyps, refs, thresh=0.5):
+        interacts = torch.cat(
+            [torch.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]),
+             torch.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:])],
+            dim=1
+        )
+        area_predictions = (hyps[:, 2] - hyps[:, 0]) * (hyps[:, 3] - hyps[:, 1])
+        area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1])
+        interacts_w = interacts[:, 2] - interacts[:, 0]
+        interacts_h = interacts[:, 3] - interacts[:, 1]
+        area_interacts = interacts_w * interacts_h
+        ious = area_interacts / (area_predictions + area_targets - area_interacts + 1e-6)
+        return ((ious >= thresh) & (interacts_w > 0) & (interacts_h > 0)).float()
+
+    def valid_step(self, sample, model, criterion):
+        loss, sample_size, logging_output = criterion(model, sample)
+        model.eval()
+        if self.cfg.eval_acc:
+            hyps, refs = self._inference(sample, model)
+            scores = self._calculate_ap_score(hyps.float(), refs.float())
+            logging_output["_score_sum"] = scores.sum().item()
+            logging_output["_score_cnt"] = scores.size(0)
+
+        return loss, sample_size, logging_output
+
+    def reduce_metrics(self, logging_outputs, criterion):
+        super().reduce_metrics(logging_outputs, criterion)
+
+        def sum_logs(key):
+            import torch
+            result = sum(log.get(key, 0) for log in logging_outputs)
+            if torch.is_tensor(result):
+                result = result.cpu()
+            return result
+
+        def compute_score(meters):
+            score = meters["_score_sum"].sum / meters["_score_cnt"].sum
+            score = score if isinstance(score, float) else score.item()
+            return round(score, 4)
+
+        if sum_logs("_score_cnt") > 0:
+            metrics.log_scalar("_score_sum", sum_logs("_score_sum"))
+            metrics.log_scalar("_score_cnt", sum_logs("_score_cnt"))
+            metrics.log_derived("score", compute_score)
+
+    def _inference(self, sample, model):
+        hyps = self.inference_step(model, sample)
+        refs = sample['region_coords'].float()
+        hyps = hyps * self.cfg.max_image_size
+        hyps[:, ::2] /= sample['w_resize_ratios'].unsqueeze(1)
+        hyps[:, 1::2] /= sample['h_resize_ratios'].unsqueeze(1)
+        return hyps, refs
+
+    def inference_step(self, model, sample):
+        with torch.no_grad():
+            if isinstance(model, list):
+                model = model[0]
+            total_len = 2
+            model.eval()
+            img = sample["net_input"]["patch_images"]
+            b = img.shape[0]
+            prev_output_token_11 = [[0] for _ in range(b)]
+            prev_output_token_12 = [[0] for _ in range(b)]
+            prev_output_token_21 = [[0] for _ in range(b)]
+            prev_output_token_22 = [[0] for _ in range(b)]
+            delta_x1 = [[0] for _ in range(b)]
+            delta_y1 = [[0] for _ in range(b)]
+            delta_x2 = [[1] for _ in range(b)]
+            delta_y2 = [[1] for _ in range(b)]
+
+            gen_out = [[] for _ in range(b)]
+
+            n_bins = self.cfg.num_bins
+
+            encoder_out = model.encoder(
+                sample['net_input']['src_tokens'],
+                src_lengths=sample['net_input']['src_lengths'],
+                att_masks=sample['net_input']['att_masks'],
+                patch_images=sample['net_input']['patch_images'],
+                patch_masks=sample['net_input']['patch_masks'],
+                token_embeddings=None,
+                return_all_hiddens=False,
+                sample_patch_num=None
+            )
+
+            for i in range(total_len):
+                prev_output_tokens_11_tensor = torch.tensor(np.array(prev_output_token_11)).to(img.device).long()
+                prev_output_tokens_12_tensor = torch.tensor(np.array(prev_output_token_12)).to(img.device).long()
+                prev_output_tokens_21_tensor = torch.tensor(np.array(prev_output_token_21)).to(img.device).long()
+                prev_output_tokens_22_tensor = torch.tensor(np.array(prev_output_token_22)).to(img.device).long()
+                delta_x1_tensor = torch.tensor(np.array(delta_x1)).to(img.device)
+                delta_x2_tensor = torch.tensor(np.array(delta_x2)).to(img.device)
+                delta_y1_tensor = torch.tensor(np.array(delta_y1)).to(img.device)
+                delta_y2_tensor = torch.tensor(np.array(delta_y2)).to(img.device)
+
+                net_output = model.decoder(
+                    prev_output_tokens_11_tensor,
+                    prev_output_tokens_12_tensor,
+                    prev_output_tokens_21_tensor,
+                    prev_output_tokens_22_tensor,
+                    delta_x1_tensor,
+                    delta_y1_tensor,
+                    delta_x2_tensor,
+                    delta_y2_tensor,
+                    code_masks=None,
+                    encoder_out=encoder_out,
+                    features_only=False,
+                    alignment_layer=None,
+                    alignment_heads=None,
+                    src_lengths=sample['net_input']['src_lengths'],
+                    return_all_hiddens=False
+                )
+                net_output = net_output[1]
+                for j in range(b):
+                    output_j_x, output_j_y = net_output[j, i].cpu().numpy()
+                    gen_out[j].extend([output_j_x, output_j_y])
+
+                    output_j_x = output_j_x * (n_bins - 1)
+                    output_j_y = output_j_y * (n_bins - 1)
+
+                    output_j_x_floor = math.floor(output_j_x)
+                    output_j_y_floor = math.floor(output_j_y)
+                    output_j_x_ceil = math.ceil(output_j_x)
+                    output_j_y_ceil = math.ceil(output_j_y)
+
+                    # convert to token
+                    prev_output_token_11[j].append(output_j_x_floor * n_bins + output_j_y_floor + 4)
+                    prev_output_token_12[j].append(output_j_x_floor * n_bins + output_j_y_ceil + 4)
+                    prev_output_token_21[j].append(output_j_x_ceil * n_bins + output_j_y_floor + 4)
+                    prev_output_token_22[j].append(output_j_x_ceil * n_bins + output_j_y_ceil + 4)
+
+                    delta_x = output_j_x - output_j_x_floor
+                    delta_y = output_j_y - output_j_y_floor
+                    delta_x1[j].append(delta_x)
+                    delta_y1[j].append(delta_y)
+                    delta_x2[j].append(1-delta_x)
+                    delta_y2[j].append(1-delta_y)
+        return torch.tensor(gen_out).to(img.device)
+
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..019093357ab5f086e6e45ee219b5443fa3ef9a13
--- /dev/null
+++ b/train.py
@@ -0,0 +1,543 @@
+#!/usr/bin/env python3 -u
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+
+"""
+Train a new model on one or across multiple GPUs.
+"""
+
+import argparse
+import logging
+import math
+import os
+import sys
+from typing import Dict, Optional, Any, List, Tuple, Callable
+
+# We need to setup root logger before importing any fairseq libraries.
+logging.basicConfig(
+    format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("fairseq_cli.train")
+
+import numpy as np
+import torch
+from fairseq import (
+    # checkpoint_utils,
+    options,
+    quantization_utils,
+    tasks,
+    utils,
+)
+from fairseq.data import iterators
+from fairseq.data.plasma_utils import PlasmaStore
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.distributed import fsdp_enable_wrap, fsdp_wrap, utils as distributed_utils
+from fairseq.file_io import PathManager
+from fairseq.logging import meters, metrics, progress_bar
+from fairseq.model_parallel.megatron_trainer import MegatronTrainer
+# from fairseq.trainer import Trainer
+from omegaconf import DictConfig, OmegaConf
+
+from utils import checkpoint_utils
+from trainer import Trainer
+
+
+def main(cfg: FairseqConfig) -> None:
+    if isinstance(cfg, argparse.Namespace):
+        cfg = convert_namespace_to_omegaconf(cfg)
+
+    utils.import_user_module(cfg.common)
+
+    if distributed_utils.is_master(cfg.distributed_training) and "job_logging_cfg" in cfg:
+        # make hydra logging work with ddp (see # see https://github.com/facebookresearch/hydra/issues/1126)
+        logging.config.dictConfig(OmegaConf.to_container(cfg.job_logging_cfg))
+
+    assert (
+        cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None
+    ), "Must specify batch size either with --max-tokens or --batch-size"
+    metrics.reset()
+
+    if cfg.common.log_file is not None:
+        handler = logging.FileHandler(filename=cfg.common.log_file)
+        logger.addHandler(handler)
+
+    np.random.seed(cfg.common.seed)
+    utils.set_torch_seed(cfg.common.seed)
+
+    if distributed_utils.is_master(cfg.distributed_training):
+        checkpoint_utils.verify_checkpoint_directory(cfg.checkpoint.save_dir)
+
+    # Print args
+    logger.info(cfg)
+
+    if cfg.checkpoint.write_checkpoints_asynchronously:
+        try:
+            import iopath  # noqa: F401
+        except ImportError:
+            logging.exception(
+                "Asynchronous checkpoint writing is specified but iopath is "
+                "not installed: `pip install iopath`"
+            )
+            return
+
+    # Setup task, e.g., translation, language modeling, etc.
+    task = tasks.setup_task(cfg.task)
+
+    assert cfg.criterion, "Please specify criterion to train a model"
+
+    # Build model and criterion
+    if cfg.distributed_training.ddp_backend == "fully_sharded":
+        with fsdp_enable_wrap(cfg.distributed_training):
+            model = fsdp_wrap(task.build_model(cfg.model))
+    else:
+        model = task.build_model(cfg.model)
+    criterion = task.build_criterion(cfg.criterion)
+    logger.info(model)
+    logger.info("task: {}".format(task.__class__.__name__))
+    logger.info("model: {}".format(model.__class__.__name__))
+    logger.info("criterion: {}".format(criterion.__class__.__name__))
+    logger.info(
+        "num. shared model params: {:,} (num. trained: {:,})".format(
+            sum(p.numel() for p in model.parameters() if not getattr(p, "expert", False)),
+            sum(p.numel() for p in model.parameters() if not getattr(p, "expert", False) and p.requires_grad)
+        )
+    )
+
+    logger.info(
+        "num. expert model params: {} (num. trained: {})".format(
+            sum(p.numel() for p in model.parameters() if getattr(p, "expert", False)),
+            sum(p.numel() for p in model.parameters() if getattr(p, "expert", False) and p.requires_grad),
+        )
+    )
+
+    # Load valid dataset (we load training data below, based on the latest checkpoint)
+    # We load the valid dataset AFTER building the model
+    # data_utils.raise_if_valid_subsets_unintentionally_ignored(cfg)
+    if cfg.dataset.combine_valid_subsets:
+        task.load_dataset("valid", combine=True, epoch=1)
+    else:
+        for valid_sub_split in cfg.dataset.valid_subset.split(","):
+            task.load_dataset(valid_sub_split, combine=False, epoch=1)
+
+    # (optionally) Configure quantization
+    if cfg.common.quantization_config_path is not None:
+        quantizer = quantization_utils.Quantizer(
+            config_path=cfg.common.quantization_config_path,
+            max_epoch=cfg.optimization.max_epoch,
+            max_update=cfg.optimization.max_update,
+        )
+    else:
+        quantizer = None
+
+    # Build trainer
+    if cfg.common.model_parallel_size == 1:
+        trainer = Trainer(cfg, task, model, criterion, quantizer)
+    else:
+        trainer = MegatronTrainer(cfg, task, model, criterion)
+    logger.info(
+        "training on {} devices (GPUs/TPUs)".format(
+            cfg.distributed_training.distributed_world_size
+        )
+    )
+    logger.info(
+        "max tokens per device = {} and max sentences per device = {}".format(
+            cfg.dataset.max_tokens,
+            cfg.dataset.batch_size,
+        )
+    )
+
+    # Load the latest checkpoint if one is available and restore the
+    # corresponding train iterator
+    extra_state, epoch_itr = checkpoint_utils.load_checkpoint(
+        cfg.checkpoint,
+        trainer,
+        # don't cache epoch iterators for sharded datasets
+        disable_iterator_cache=True,
+    )
+    if cfg.common.tpu:
+        import torch_xla.core.xla_model as xm
+        xm.rendezvous("load_checkpoint")  # wait for all workers
+
+    max_epoch = cfg.optimization.max_epoch or math.inf
+    if max_epoch > 0 and max_epoch != math.inf:
+        total_num_updates = sum(
+            math.ceil(len(epoch_itr) / cfg.optimization.update_freq[i])
+            if i < len(cfg.optimization.update_freq) else
+            math.ceil(len(epoch_itr) / cfg.optimization.update_freq[-1])
+            for i in range(max_epoch)
+        )
+        trainer.lr_reinit(total_num_updates, trainer.get_num_updates())
+    lr = trainer.get_lr()
+
+    train_meter = meters.StopwatchMeter()
+    train_meter.start()
+    while epoch_itr.next_epoch_idx <= max_epoch:
+        if lr <= cfg.optimization.stop_min_lr:
+            logger.info(
+                f"stopping training because current learning rate ({lr}) is smaller "
+                "than or equal to minimum learning rate "
+                f"(--stop-min-lr={cfg.optimization.stop_min_lr})"
+            )
+            break
+
+        # train for one epoch
+        valid_losses, should_stop = train(cfg, trainer, task, epoch_itr)
+        if should_stop:
+            break
+
+        # only use first validation loss to update the learning rate
+        lr = trainer.lr_step(epoch_itr.epoch, 0)
+        #lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0])
+
+        epoch_itr = trainer.get_train_iterator(
+            epoch_itr.next_epoch_idx,
+            # sharded data: get train iterator for next epoch
+            load_dataset=True,
+            # don't cache epoch iterators for sharded datasets
+            disable_iterator_cache=True,
+        )
+    train_meter.stop()
+    logger.info("done training in {:.1f} seconds".format(train_meter.sum))
+
+    # ioPath implementation to wait for all asynchronous file writes to complete.
+    if cfg.checkpoint.write_checkpoints_asynchronously:
+        logger.info(
+            "ioPath PathManager waiting for all asynchronous checkpoint "
+            "writes to finish."
+        )
+        PathManager.async_close()
+        logger.info("ioPath PathManager finished waiting.")
+
+
+def should_stop_early(cfg: DictConfig, valid_loss: float) -> bool:
+    # skip check if no validation was done in the current epoch
+    if valid_loss is None:
+        return False
+    if cfg.checkpoint.patience <= 0:
+        return False
+
+    def is_better(a, b):
+        return a > b if cfg.checkpoint.maximize_best_checkpoint_metric else a < b
+
+    prev_best = getattr(should_stop_early, "best", None)
+    if prev_best is None or is_better(valid_loss, prev_best):
+        should_stop_early.best = valid_loss
+        should_stop_early.num_runs = 0
+        return False
+    else:
+        should_stop_early.num_runs += 1
+        if should_stop_early.num_runs >= cfg.checkpoint.patience:
+            logger.info(
+                "early stop since valid performance hasn't improved for last {} runs".format(
+                    cfg.checkpoint.patience
+                )
+            )
+            return True
+        else:
+            return False
+
+
+@metrics.aggregate("train")
+def train(
+    cfg: DictConfig, trainer: Trainer, task: tasks.FairseqTask, epoch_itr
+) -> Tuple[List[Optional[float]], bool]:
+    """Train the model for one epoch and return validation losses."""
+    # Initialize data iterator
+    itr = epoch_itr.next_epoch_itr(
+        fix_batches_to_gpus=cfg.distributed_training.fix_batches_to_gpus,
+        shuffle=(epoch_itr.next_epoch_idx > cfg.dataset.curriculum),
+    )
+    update_freq = (
+        cfg.optimization.update_freq[epoch_itr.epoch - 1]
+        if epoch_itr.epoch <= len(cfg.optimization.update_freq)
+        else cfg.optimization.update_freq[-1]
+    )
+    itr = iterators.GroupedIterator(itr, update_freq)
+    if cfg.common.tpu:
+        itr = utils.tpu_data_loader(itr)
+    progress = progress_bar.progress_bar(
+        itr,
+        log_format=cfg.common.log_format,
+        log_file=cfg.common.log_file,
+        log_interval=cfg.common.log_interval,
+        epoch=epoch_itr.epoch,
+        tensorboard_logdir=(
+            cfg.common.tensorboard_logdir
+            if distributed_utils.is_master(cfg.distributed_training)
+            else None
+        ),
+        default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+        wandb_project=(
+            cfg.common.wandb_project
+            if distributed_utils.is_master(cfg.distributed_training)
+            else None
+        ),
+        wandb_run_name=os.environ.get(
+            "WANDB_NAME", os.path.basename(cfg.checkpoint.save_dir)
+        ),
+        azureml_logging=(
+            cfg.common.azureml_logging
+            if distributed_utils.is_master(cfg.distributed_training)
+            else False
+        ),
+    )
+    progress.update_config(_flatten_config(cfg))
+
+    trainer.begin_epoch(epoch_itr.epoch)
+
+    valid_subsets = cfg.dataset.valid_subset.split(",")
+    should_stop = False
+    num_updates = trainer.get_num_updates()
+    logger.info("Start iterating over samples")
+    for i, samples in enumerate(progress):
+        with metrics.aggregate("train_inner"), torch.autograd.profiler.record_function(
+            "train_step-%d" % i
+        ):
+            log_output = trainer.train_step(samples)
+
+        if log_output is not None:  # not OOM, overflow, ...
+            # log mid-epoch stats
+            num_updates = trainer.get_num_updates()
+            if num_updates % cfg.common.log_interval == 0:
+                stats = get_training_stats(metrics.get_smoothed_values("train_inner"))
+                progress.log(stats, tag="train_inner", step=num_updates)
+
+                # reset mid-epoch stats after each log interval
+                # the end-of-epoch stats will still be preserved
+                metrics.reset_meters("train_inner")
+
+        end_of_epoch = not itr.has_next()
+
+        if task.cfg._name == 'refcoco_pretrain':
+            valid_losses, should_stop = validate_and_save(
+                cfg, trainer, task, epoch_itr, valid_subsets, end_of_epoch
+            )
+        else:
+            # skip validation during training in fine-tuning stage
+            valid_losses = 0
+            should_stop = False
+        if should_stop:
+            break
+
+    checkpoint_utils.save_checkpoint(
+        cfg.checkpoint, trainer, epoch_itr, 0
+    )
+    if task.cfg._name == 'refcoco':
+        cmd = f'cp {cfg.checkpoint.save_dir}/checkpoint_last.pt {cfg.checkpoint.save_dir}/checkpoint_epoch_{epoch_itr.epoch}.pt'
+        print(cmd)
+        os.system(cmd)
+    # log end-of-epoch stats
+    logger.info("end of epoch {} (average epoch stats below)".format(epoch_itr.epoch))
+    stats = get_training_stats(metrics.get_smoothed_values("train"))
+    progress.print(stats, tag="train", step=num_updates)
+
+    # reset epoch-level meters
+    metrics.reset_meters("train")
+    return valid_losses, should_stop
+
+
+def _flatten_config(cfg: DictConfig):
+    config = OmegaConf.to_container(cfg)
+    # remove any legacy Namespaces and replace with a single "args"
+    namespace = None
+    for k, v in list(config.items()):
+        if isinstance(v, argparse.Namespace):
+            namespace = v
+            del config[k]
+    if namespace is not None:
+        config["args"] = vars(namespace)
+    return config
+
+
+def validate_and_save(
+    cfg: DictConfig,
+    trainer: Trainer,
+    task: tasks.FairseqTask,
+    epoch_itr,
+    valid_subsets: List[str],
+    end_of_epoch: bool,
+) -> Tuple[List[Optional[float]], bool]:
+    num_updates = trainer.get_num_updates()
+    max_update = cfg.optimization.max_update or math.inf
+
+    # Stopping conditions (and an additional one based on validation loss later
+    # on)
+    should_stop = False
+    if num_updates >= max_update:
+        should_stop = True
+        logger.info(
+            f"Stopping training due to "
+            f"num_updates: {num_updates} >= max_update: {max_update}"
+        )
+
+    training_time_hours = trainer.cumulative_training_time() / (60 * 60)
+    if (
+        cfg.optimization.stop_time_hours > 0
+        and training_time_hours > cfg.optimization.stop_time_hours
+    ):
+        should_stop = True
+        logger.info(
+            f"Stopping training due to "
+            f"cumulative_training_time: {training_time_hours} > "
+            f"stop_time_hours: {cfg.optimization.stop_time_hours} hour(s)"
+        )
+
+    do_save = (
+        (end_of_epoch and epoch_itr.epoch % cfg.checkpoint.save_interval == 0)
+        or should_stop
+        or (
+            cfg.checkpoint.save_interval_updates > 0
+            and num_updates > 0
+            and num_updates % cfg.checkpoint.save_interval_updates == 0
+            and num_updates >= cfg.dataset.validate_after_updates
+        )
+    )
+    do_validate = (
+        (not end_of_epoch and do_save)  # validate during mid-epoch saves
+        or (end_of_epoch and epoch_itr.epoch % cfg.dataset.validate_interval == 0)
+        or should_stop
+        or (
+            cfg.dataset.validate_interval_updates > 0
+            and num_updates > 0
+            and num_updates % cfg.dataset.validate_interval_updates == 0
+        )
+    ) and not cfg.dataset.disable_validation and num_updates >= cfg.dataset.validate_after_updates
+
+    # Validate
+    valid_losses = [None]
+    if do_validate:
+        valid_losses = validate(cfg, trainer, task, epoch_itr, valid_subsets)
+
+    should_stop |= should_stop_early(cfg, valid_losses[0])
+
+    # Save checkpoint
+    if do_save or should_stop:
+        checkpoint_utils.save_checkpoint(
+            cfg.checkpoint, trainer, epoch_itr, valid_losses[0]
+        )
+
+    return valid_losses, should_stop
+
+
+def get_training_stats(stats: Dict[str, Any]) -> Dict[str, Any]:
+    stats["wall"] = round(metrics.get_meter("default", "wall").elapsed_time, 0)
+    return stats
+
+
+def validate(
+    cfg: DictConfig,
+    trainer: Trainer,
+    task: tasks.FairseqTask,
+    epoch_itr,
+    subsets: List[str],
+) -> List[Optional[float]]:
+    """Evaluate the model on the validation set(s) and return the losses."""
+
+    if cfg.dataset.fixed_validation_seed is not None:
+        # set fixed seed for every validation
+        utils.set_torch_seed(cfg.dataset.fixed_validation_seed)
+
+    trainer.begin_valid_epoch(epoch_itr.epoch)
+    valid_losses = []
+    for subset in subsets:
+        logger.info('begin validation on "{}" subset'.format(subset))
+
+        # Initialize data iterator
+        itr = trainer.get_valid_iterator(subset).next_epoch_itr(
+            shuffle=False, set_dataset_epoch=False  # use a fixed valid set
+        )
+        if cfg.common.tpu:
+            itr = utils.tpu_data_loader(itr)
+        progress = progress_bar.progress_bar(
+            itr,
+            log_format=cfg.common.log_format,
+            log_interval=cfg.common.log_interval,
+            epoch=epoch_itr.epoch,
+            prefix=f"valid on '{subset}' subset",
+            tensorboard_logdir=(
+                cfg.common.tensorboard_logdir
+                if distributed_utils.is_master(cfg.distributed_training)
+                else None
+            ),
+            default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"),
+            wandb_project=(
+                cfg.common.wandb_project
+                if distributed_utils.is_master(cfg.distributed_training)
+                else None
+            ),
+            wandb_run_name=os.environ.get(
+                "WANDB_NAME", os.path.basename(cfg.checkpoint.save_dir)
+            ),
+        )
+
+        # create a new root metrics aggregator so validation metrics
+        # don't pollute other aggregators (e.g., train meters)
+        with metrics.aggregate(new_root=True) as agg:
+            for i, sample in enumerate(progress):
+                if cfg.dataset.max_valid_steps is not None and i > cfg.dataset.max_valid_steps:
+                    break
+                trainer.valid_step(sample)
+
+        # log validation stats
+        if hasattr(task, 'get_valid_stats'):
+            stats = task.get_valid_stats(cfg, trainer, agg.get_smoothed_values())
+        else:
+            stats = agg.get_smoothed_values()
+        stats = get_valid_stats(cfg, trainer, stats)
+
+        if hasattr(task, "post_validate"):
+            task.post_validate(trainer.get_model(), stats, agg)
+
+        progress.print(stats, tag=subset, step=trainer.get_num_updates())
+
+        valid_losses.append(stats[cfg.checkpoint.best_checkpoint_metric])
+    return valid_losses
+
+
+def get_valid_stats(
+    cfg: DictConfig, trainer: Trainer, stats: Dict[str, Any]
+) -> Dict[str, Any]:
+    stats["num_updates"] = trainer.get_num_updates()
+    if hasattr(checkpoint_utils.save_checkpoint, "best"):
+        key = "best_{0}".format(cfg.checkpoint.best_checkpoint_metric)
+        best_function = max if cfg.checkpoint.maximize_best_checkpoint_metric else min
+        stats[key] = best_function(
+            checkpoint_utils.save_checkpoint.best,
+            stats[cfg.checkpoint.best_checkpoint_metric],
+        )
+    return stats
+
+
+def cli_main(
+    modify_parser: Optional[Callable[[argparse.ArgumentParser], None]] = None
+) -> None:
+    parser = options.get_training_parser()
+    parser.add_argument("--det_weight", type=float, default=1.0)
+    parser.add_argument("--cls_weight", type=float, default=1.0)
+    args = options.parse_args_and_arch(parser, modify_parser=modify_parser)
+
+    cfg = convert_namespace_to_omegaconf(args)
+
+    if cfg.common.use_plasma_view:
+        server = PlasmaStore(path=cfg.common.plasma_path)
+        logger.info(f"Started plasma server pid {server.server.pid} {cfg.common.plasma_path}")
+
+    if args.profile:
+        with torch.cuda.profiler.profile():
+            with torch.autograd.profiler.emit_nvtx():
+                distributed_utils.call_main(cfg, main)
+    else:
+        distributed_utils.call_main(cfg, main)
+
+    # if cfg.common.use_plasma_view:
+    #     server.server.kill()
+
+
+if __name__ == "__main__":
+    cli_main()
diff --git a/trainer.py b/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eba06473a0e467b7e04832ff8d284b89972f172
--- /dev/null
+++ b/trainer.py
@@ -0,0 +1,1531 @@
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+
+"""
+Train a network across multiple GPUs.
+"""
+
+import contextlib
+import logging
+import sys
+import time
+from argparse import Namespace
+from itertools import chain
+from typing import Any, Dict, List
+
+import torch
+from fairseq import models, optim, utils
+from fairseq.dataclass.configs import FairseqConfig
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from fairseq.distributed import utils as distributed_utils
+from fairseq.file_io import PathManager
+from fairseq.logging import meters, metrics
+from fairseq.models.ema import build_ema
+from fairseq.nan_detector import NanDetector
+from fairseq.optim import lr_scheduler
+from omegaconf import OmegaConf
+
+from utils import checkpoint_utils
+
+logger = logging.getLogger(__name__)
+
+
+class Trainer(object):
+    """Main class for data parallel training.
+
+    This class supports synchronous distributed data parallel training,
+    where multiple workers each have a full model replica and gradients
+    are accumulated across workers before each update. We use
+    :class:`~torch.nn.parallel.DistributedDataParallel` to handle
+    communication of the gradients across workers.
+    """
+
+    def __init__(self, cfg: FairseqConfig, task, model, criterion, quantizer=None):
+
+        if isinstance(cfg, Namespace):
+            logger.warning(
+                "argparse.Namespace configuration is deprecated! Automatically converting to OmegaConf"
+            )
+            cfg = convert_namespace_to_omegaconf(cfg)
+
+        self.cfg = cfg
+        self.task = task
+
+        # catalog shared parameters
+        shared_params = _catalog_shared_params(model)
+        self.tpu = cfg.common.tpu
+        self.cuda = torch.cuda.is_available() and not cfg.common.cpu and not self.tpu
+        if self.cuda:
+            self.device = torch.device("cuda")
+        elif self.tpu:
+            self.device = utils.get_tpu_device()
+        else:
+            self.device = torch.device("cpu")
+
+        if self.is_fsdp:
+            import fairscale
+            if self.cfg.common.bf16:
+                raise ValueError(
+                    "FullyShardedDataParallel is not compatible with --bf16 or "
+                    "--memory-efficient-bf16"
+                )
+            if self.cfg.distributed_training.zero_sharding != "none":
+                raise ValueError(
+                    "FullyShardedDataParallel is not compatible with --zero-sharding "
+                    "option (it's already built in)"
+                )
+            if max(self.cfg.optimization.update_freq) > 1 and fairscale.__version__ < "0.4.0":
+                raise RuntimeError(
+                    "Please update to fairscale 0.4.0 or newer when combining "
+                    "--update-freq with FullyShardedDataParallel"
+                )
+        else:
+            if (
+                hasattr(self.cfg.distributed_training, "cpu_offload")
+                and self.cfg.distributed_training.cpu_offload
+            ):
+                raise ValueError("--cpu-offload requires --ddp-backend=fully_sharded")
+
+        # copy model and criterion to current device/dtype
+        self._criterion = criterion
+        self._model = model
+        if not self.is_fsdp:
+            if cfg.common.fp16:
+                assert not cfg.common.amp, "Cannot use fp16 and AMP together"
+                self._criterion = self._criterion.half()
+                self._model = self._model.half()
+            elif cfg.common.bf16:
+                self._criterion = self._criterion.to(dtype=torch.bfloat16)
+                self._model = self._model.to(dtype=torch.bfloat16)
+            elif cfg.common.amp:
+                self._amp_retries = 0
+        if (
+            not cfg.distributed_training.pipeline_model_parallel
+            # the DistributedFairseqModel wrapper will handle moving to device,
+            # so only handle cases which don't use the wrapper
+            and not self.use_distributed_wrapper
+        ):
+            self._criterion = self._criterion.to(device=self.device)
+            self._model = self._model.to(device=self.device)
+        self.pipeline_model_parallel = cfg.distributed_training.pipeline_model_parallel
+        self.last_device = None
+        if self.cuda and self.pipeline_model_parallel:
+            self.last_device = torch.device(
+                cfg.distributed_training.pipeline_devices[-1]
+            )
+
+        # check that shared parameters are preserved after device transfer
+        for shared_param in shared_params:
+            ref = _get_module_by_path(self._model, shared_param[0])
+            for path in shared_param[1:]:
+                logger.info(
+                    "detected shared parameter: {} <- {}".format(shared_param[0], path)
+                )
+                _set_module_by_path(self._model, path, ref)
+
+        self._dummy_batch = None  # indicates we don't have a dummy batch at first
+        self._lr_scheduler = None
+        self._num_updates = 0
+        self._num_xla_compiles = 0  # for TPUs
+        self._optim_history = None
+        self._optimizer = None
+        self._warn_once = set()
+        self._wrapped_criterion = None
+        self._wrapped_model = None
+        self._ema = None
+
+        # TODO(myleott): support tpu
+        if self.cuda and self.data_parallel_world_size > 1:
+            self._grad_norm_buf = torch.cuda.DoubleTensor(self.data_parallel_world_size)
+        else:
+            self._grad_norm_buf = None
+
+        self.quantizer = quantizer
+        if self.quantizer is not None:
+            self.quantizer.set_trainer(self)
+
+        # get detailed cuda environment
+        if self.cuda:
+            self.cuda_env = utils.CudaEnvironment()
+            if self.data_parallel_world_size > 1:
+                self.cuda_env_arr = distributed_utils.all_gather_list(
+                    self.cuda_env, group=distributed_utils.get_global_group()
+                )
+            else:
+                self.cuda_env_arr = [self.cuda_env]
+            if self.data_parallel_rank == 0:
+                utils.CudaEnvironment.pretty_print_cuda_env_list(self.cuda_env_arr)
+        else:
+            self.cuda_env = None
+            self.cuda_env_arr = None
+
+        metrics.log_start_time("wall", priority=790, round=0)
+
+        self._start_time = time.time()
+        self._previous_training_time = 0
+        self._cumulative_training_time = None
+
+    def reinitialize(self):
+        """Reinitialize the Trainer, typically after model params change."""
+        self._lr_scheduler = None
+        self._optimizer = None
+        self._wrapped_criterion = None
+        self._wrapped_model = None
+
+    @property
+    def data_parallel_world_size(self):
+        if self.cfg.distributed_training.distributed_world_size == 1:
+            return 1
+        return distributed_utils.get_data_parallel_world_size()
+
+    @property
+    def data_parallel_process_group(self):
+        return distributed_utils.get_data_parallel_group()
+
+    @property
+    def data_parallel_rank(self):
+        if self.cfg.distributed_training.distributed_world_size == 1:
+            return 0
+        return distributed_utils.get_data_parallel_rank()
+
+    @property
+    def is_data_parallel_master(self):
+        # NOTE: this returns true for all model parallel replicas with data
+        # parallel rank 0
+        return self.data_parallel_rank == 0
+
+    @property
+    def use_distributed_wrapper(self) -> bool:
+        return (
+            self.data_parallel_world_size > 1 and not self.cfg.optimization.use_bmuf
+        ) or (
+            self.is_fsdp and self.cfg.distributed_training.cpu_offload
+        )
+
+    @property
+    def should_save_checkpoint_on_current_rank(self) -> bool:
+        """Indicates whether to save checkpoints on the current DDP rank."""
+        if (
+            self.is_fsdp and self.cfg.distributed_training.use_sharded_state
+        ) or getattr(self.cfg.model, "base_layers", 0) > 0:
+            return True
+        else:
+            return self.is_data_parallel_master
+
+    @property
+    def always_call_state_dict_during_save_checkpoint(self) -> bool:
+        if self.is_fsdp and not self.cfg.distributed_training.use_sharded_state:
+            # FSDP calls communication collective when consolidating checkpoints
+            return True
+        else:
+            return False
+
+    @property
+    def checkpoint_suffix(self) -> str:
+        """Suffix to add to the checkpoint file name."""
+        if self.is_fsdp and self.cfg.distributed_training.use_sharded_state:
+            return self.cfg.checkpoint.checkpoint_suffix + "-shard{0}".format(
+                self.data_parallel_rank
+            )
+        else:
+            return self.cfg.checkpoint.checkpoint_suffix or ""
+
+    @property
+    def criterion(self):
+        if self._wrapped_criterion is None:
+            if utils.has_parameters(self._criterion) and self.use_distributed_wrapper:
+                self._wrapped_criterion = models.DistributedFairseqModel(
+                    self.cfg.distributed_training,
+                    self._criterion,
+                    process_group=self.data_parallel_process_group,
+                    device=self.device,
+                )
+            else:
+                self._wrapped_criterion = self._criterion
+        return self._wrapped_criterion
+
+    @property
+    def model(self):
+        if self._wrapped_model is None:
+            if self.use_distributed_wrapper:
+                self._wrapped_model = models.DistributedFairseqModel(
+                    self.cfg.distributed_training,
+                    self._model,
+                    process_group=self.data_parallel_process_group,
+                    device=self.device,
+                )
+            else:
+                self._wrapped_model = self._model
+        return self._wrapped_model
+
+    @property
+    def ema(self):
+        if self._ema is None:
+            self._build_ema()
+        return self._ema
+
+    def _build_ema(self):
+        if self.cfg.ema.store_ema:
+            self._ema = build_ema(self._model, self.cfg.ema, self.device)
+            logger.info(
+                "Exponential Moving Average Shadow Model is initialized."
+            )
+
+    @property
+    def optimizer(self):
+        if self._optimizer is None:
+            self._build_optimizer()
+        return self._optimizer
+
+    @property
+    def lr_scheduler(self):
+        if self._lr_scheduler is None:
+            self._build_optimizer()  # this will initialize self._lr_scheduler
+        return self._lr_scheduler
+
+    def _build_optimizer(self):
+        params = list(
+            filter(
+                lambda p: p.requires_grad,
+                chain(self.model.parameters(), self.criterion.parameters()),
+            )
+        )
+
+        if self.is_fsdp and self.cfg.common.fp16:
+            # FullyShardedDataParallel always uses MemoryEfficientFP16 wrapper,
+            # mostly for the grad scaling. But if we don't have the
+            # --memory-efficient-fp16 flag set, then we're effectively doing
+            # regular --fp16 and can allow the use of optimizers that would
+            # otherwise be unsupported by MemoryEfficientFP16Optimizer.
+            allow_unsupported = not self.cfg.common.memory_efficient_fp16
+            self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
+                self.cfg, params, allow_unsupported=allow_unsupported
+            )
+        elif self.cfg.common.fp16 or self.cfg.common.bf16 or self.cfg.common.amp:
+            if self.cuda and torch.cuda.get_device_capability(0)[0] < 7:
+                logger.info(
+                    "NOTE: your device does NOT support faster training with --fp16 or --amp, "
+                    "please switch to FP32 which is likely to be faster"
+                )
+            if (
+                self.cfg.common.memory_efficient_fp16
+                or self.cfg.common.memory_efficient_bf16
+            ):
+                self._optimizer = optim.MemoryEfficientFP16Optimizer.build_optimizer(
+                    self.cfg, params
+                )
+            elif self.cfg.common.amp:
+                self._optimizer = optim.AMPOptimizer.build_optimizer(self.cfg, params)
+            else:
+                self._optimizer = optim.FP16Optimizer.build_optimizer(self.cfg, params)
+        else:
+            if self.cuda and torch.cuda.get_device_capability(0)[0] >= 7:
+                logger.info("NOTE: your device may support faster training with --fp16 or --amp")
+            self._optimizer = optim.build_optimizer(self.cfg.optimizer, params)
+
+        if self.is_fsdp:
+            assert (
+                not self.cfg.optimization.use_bmuf
+            ), "--ddp-backend=fully_sharded is not compatible with BMUF"
+            assert self._optimizer.supports_flat_params, (
+                "--ddp-backend=fully_sharded is only compatible with pointwise "
+                "optimizers (e.g., Adam, AdamW, Adadelta, Adamax, SGD, etc.). "
+                "However, the sharding will result in slightly different results when "
+                "using non-pointwise optimizers (e.g., Adagrad, Adafactor, LAMB)"
+            )
+
+        if self.cfg.optimization.use_bmuf:
+            self._optimizer = optim.FairseqBMUF(
+                self.cfg.bmuf,
+                self._optimizer,
+            )
+
+        if self.cfg.distributed_training.zero_sharding == "os":
+            if (
+                self.cfg.common.fp16
+                and not self.cfg.common.memory_efficient_fp16
+                and not self.cfg.common.memory_efficient_bf16
+            ) and not self.cfg.common.fp16_no_flatten_grads:
+                raise ValueError(
+                    "ZeRO is incomptabile with fp16 and flattened grads. "
+                    "Please use --fp16-no-flatten-grads"
+                )
+            else:
+                optim.shard_(self._optimizer, self.data_parallel_process_group)
+
+        # We should initialize the learning rate scheduler immediately after
+        # building the optimizer, so that the initial learning rate is set.
+        self._lr_scheduler = lr_scheduler.build_lr_scheduler(
+            self.cfg.lr_scheduler,
+            self.optimizer,
+        )
+        self._lr_scheduler.step_update(0)
+
+    @property
+    def is_fsdp(self):
+        return self.cfg.distributed_training.ddp_backend == "fully_sharded"
+
+    def consolidate_optimizer(self):
+        """For OSS, we need to consolidate the state dict."""
+        if self.cfg.checkpoint.no_save_optimizer_state:
+            return
+        self._gathered_optim_state = None
+        if hasattr(self.optimizer.optimizer, "consolidate_state_dict"):
+            self.optimizer.optimizer.consolidate_state_dict()
+        elif self.is_fsdp and not self.model.use_sharded_state:
+            st = self.model.gather_full_optim_state_dict(
+                self.optimizer
+            )  # only returns on rank 0
+            self._gathered_optim_state = st
+
+    def state_dict(self):
+        state_dict = {
+            "args": None,  # legacy
+            "cfg": (
+                OmegaConf.to_container(self.cfg, resolve=True, enum_to_str=True)
+                if OmegaConf.is_config(self.cfg)
+                else self.cfg
+            ),
+            "model": self.model.state_dict(),
+            "criterion": (
+                self.criterion.state_dict()
+                if utils.has_parameters(self.criterion)
+                else None
+            ),
+            "optimizer_history": (self._optim_history or [])
+            + [
+                {
+                    "criterion_name": self.get_criterion().__class__.__name__,
+                    "optimizer_name": self.optimizer.__class__.__name__,
+                    "lr_scheduler_state": self.lr_scheduler.state_dict(),
+                    "num_updates": self.get_num_updates(),
+                }
+            ],
+            "task_state": self.task.state_dict() if self.task is not None else {},
+            "extra_state": {
+                "metrics": metrics.state_dict(),
+                "previous_training_time": self.cumulative_training_time(),
+            },
+        }
+        if self.cfg.ema.store_ema:
+            # Save EMA model state as extra state
+            state_dict["extra_state"]["ema"] = self.ema.get_model().state_dict()
+            if self.cfg.ema.ema_fp32:
+                # Save EMA params in fp32
+                state_dict["extra_state"]["ema_fp32_params"] = self.ema.fp32_params
+        if not self.cfg.checkpoint.no_save_optimizer_state:
+            if self._gathered_optim_state is not None:
+                state_dict["last_optimizer_state"] = self._gathered_optim_state
+                self._gathered_optim_state = None
+            else:
+                state_dict["last_optimizer_state"] = self.optimizer.state_dict()
+        if self.is_fsdp:
+            # save meta data for recombining checkpoint upon loading
+            state_dict["fsdp_metadata"] = self.model.local_metadata_dict()
+        return state_dict
+
+    def save_checkpoint(self, filename, extra_state):
+        """Save all training state in a checkpoint file."""
+        logger.info(f"Saving checkpoint to {filename}")
+        # call state_dict on all ranks in case it needs internal communication
+        state_dict = utils.move_to_cpu(self.state_dict())
+        state_dict["extra_state"].update(extra_state)
+        if self.should_save_checkpoint_on_current_rank:
+            checkpoint_utils.torch_persistent_save(
+                state_dict,
+                filename,
+                async_write=self.cfg.checkpoint.write_checkpoints_asynchronously,
+            )
+        logger.info(f"Finished saving checkpoint to {filename}")
+
+    def load_checkpoint(
+        self,
+        filename,
+        reset_optimizer=False,
+        reset_lr_scheduler=False,
+        optimizer_overrides=None,
+        reset_meters=False,
+    ):
+        """
+        Load all training state from a checkpoint file.
+        rank = 0 will load the checkpoint, and then broadcast it to all
+        other ranks.
+        """
+        extra_state, self._optim_history, last_optim_state = None, [], None
+
+        logger.info(f"Preparing to load checkpoint {filename}")
+        is_distributed = self.data_parallel_world_size > 1
+        bexists = PathManager.isfile(filename)
+        if bexists:
+            load_on_all_ranks = (
+                self.cfg.checkpoint.load_checkpoint_on_all_dp_ranks
+                # TPUs don't support broadcast yet, so load checkpoints
+                # on every worker for now
+                or self.tpu
+                # FSDP requires loading checkpoint shards on all ranks
+                or (self.is_fsdp and self.cfg.distributed_training.use_sharded_state)
+                or getattr(self.cfg.model, "base_layers", 0) > 0
+            )
+
+            if load_on_all_ranks or self.data_parallel_rank == 0:
+                state = checkpoint_utils.load_checkpoint_to_cpu(
+                    filename, load_on_all_ranks=load_on_all_ranks
+                )
+                last_optim_state = state.get("last_optimizer_state", None)
+
+                # If doing zero_sharding, do not broadcast global optimizer
+                # state. Later we will broadcast sharded states to each rank
+                # to avoid memory from exploding.
+                if (
+                    not load_on_all_ranks
+                    and self.cfg.distributed_training.zero_sharding == "os"
+                    and "last_optimizer_state" in state
+                    and is_distributed
+                ):
+                    state["last_optimizer_state"] = "SHARDED"
+            else:
+                last_optim_state = None
+                state = None
+
+            if is_distributed and not load_on_all_ranks:
+                state = distributed_utils.broadcast_object(
+                    state,
+                    src_rank=0,
+                    group=self.data_parallel_process_group,
+                    dist_device=self.device,
+                )
+                if self.data_parallel_rank > 0:
+                    last_optim_state = state.get("last_optimizer_state", None)
+
+            # load model parameters
+            try:
+                if self.cfg.checkpoint.use_ema_weights_to_init_param and "extra_state" in state and "ema" in state["extra_state"]:
+                    logger.info("use_ema_weights_to_init_param = True, will use EMA weights in the ckpt to init the model param...")
+                    ema_state_dict = state["extra_state"]["ema_fp32_params"] if "ema_fp32_params" in state["extra_state"] else state["extra_state"]["ema"]
+                    self.model.load_state_dict(
+                        ema_state_dict, strict=True, model_cfg=self.cfg.model
+                    )
+                else:
+                    self.model.load_state_dict(
+                        state["model"], strict=False, model_cfg=self.cfg.model
+                    )
+                # save memory for later steps
+                if not (self.cfg.ema.store_ema and (self.cfg.checkpoint.use_latest_weights_to_init_ema or not ("extra_state" in state and "ema" in state["extra_state"]))):
+                    del state["model"]
+                if utils.has_parameters(self.get_criterion()):
+                    self.get_criterion().load_state_dict(
+                        state["criterion"], strict=True
+                    )
+                    del state["criterion"]
+
+            except Exception:
+                raise Exception(
+                    "Cannot load model parameters from checkpoint {}; "
+                    "please ensure that the architectures match.".format(filename)
+                )
+            extra_state = state["extra_state"]
+            self._optim_history = state["optimizer_history"]
+
+        if last_optim_state is not None and not reset_optimizer:
+            # rebuild optimizer after loading model, since params may have changed
+            self._build_optimizer()
+
+            # only reload optimizer and lr_scheduler if they match
+            last_optim = self._optim_history[-1]
+            assert (
+                last_optim["criterion_name"] == self.get_criterion().__class__.__name__
+            ), f"Criterion does not match; please reset the optimizer (--reset-optimizer). {last_optim['criterion_name']} vs {self.get_criterion().__class__.__name__}"
+            assert (
+                last_optim["optimizer_name"] == self.optimizer.__class__.__name__
+            ), f"Optimizer does not match; please reset the optimizer (--reset-optimizer). {last_optim['optimizer_name']} vs {self.optimizer.__class__.__name__}"
+
+            if not reset_lr_scheduler:
+                self.lr_scheduler.load_state_dict(last_optim["lr_scheduler_state"])
+
+            if self.is_fsdp and not self.model.use_sharded_state:
+                # if use_sharded_state, the last_optim_state is already sharded, skip this
+                last_optim_state = self.model.get_shard_from_optim_state_dict(
+                    last_optim_state
+                )
+            elif not load_on_all_ranks and is_distributed:
+                last_optim_state = self.optimizer.broadcast_global_state_dict(
+                    last_optim_state
+                )
+
+            self.optimizer.load_state_dict(last_optim_state, optimizer_overrides)
+
+            self.set_num_updates(last_optim["num_updates"])
+
+        if extra_state is not None:
+            itr_state = extra_state["train_iterator"]
+            epoch = itr_state["epoch"]
+
+            if "previous_training_time" in extra_state:
+                self._previous_training_time = extra_state["previous_training_time"]
+                self._start_time = time.time()
+
+            self.lr_step(epoch)
+
+            if (
+                itr_state.get("version", 1) >= 2
+                and itr_state["iterations_in_epoch"] == 0
+            ):
+                # reset meters at start of epoch
+                reset_meters = True
+
+            if "metrics" in extra_state and not reset_meters:
+                metrics.load_state_dict(extra_state["metrics"])
+
+                # reset TimeMeters, since their start times don't make sense anymore
+                for meter in metrics.get_meters("default"):
+                    if isinstance(meter, meters.TimeMeter):
+                        meter.reset()
+
+            if self.cfg.ema.store_ema:
+                if self.cfg.checkpoint.use_latest_weights_to_init_ema or "ema" not in extra_state:
+                    if "ema" not in extra_state:
+                        logger.warn(
+                            "EMA not found in checkpoint. But store_ema is True. "
+                            "EMA is re-initialized from checkpoint."
+                        )
+                    elif self.cfg.checkpoint.use_latest_weights_to_init_ema:
+                        logger.info(
+                            "use_latest_weights_to_init_ema = True. EMA is re-initialized from checkpoint."
+                        )
+                    self.ema.restore(state["model"], build_fp32_params=self.cfg.ema.ema_fp32)
+                    del state["model"]
+                else:
+                    logger.info(
+                        "Loading EMA from checkpoint"
+                    )
+                    self.ema.restore(extra_state["ema"], build_fp32_params=False)
+
+                    if self.cfg.ema.ema_fp32:
+                        if "ema_fp32_params" in extra_state:
+                            logger.info(
+                                "Loading EMA fp32 params from checkpoint"
+                            )
+                            self.ema.build_fp32_params(extra_state["ema_fp32_params"])
+                        else:
+                            logger.info(
+                                "Building EMA fp32 params from EMA model in checkpoint"
+                            )
+                            self.ema.build_fp32_params()
+
+            logger.info(
+                "Loaded checkpoint {} (epoch {} @ {} updates)".format(
+                    filename, epoch, self.get_num_updates()
+                )
+            )
+
+        else:
+            logger.info("No existing checkpoint found {}".format(filename))
+
+        return extra_state
+
+    def get_train_iterator(
+        self,
+        epoch,
+        combine=True,
+        load_dataset=True,
+        data_selector=None,
+        shard_batch_itr=True,
+        disable_iterator_cache=False,
+    ):
+        """Return an EpochBatchIterator over the training set for a given epoch."""
+        if load_dataset:
+            logger.info("loading train data for epoch {}".format(epoch))
+            self.task.load_dataset(
+                self.cfg.dataset.train_subset,
+                epoch=epoch,
+                combine=combine,
+                data_selector=data_selector,
+                tpu=self.tpu,
+            )
+        batch_iterator = self.task.get_batch_iterator(
+            dataset=self.task.dataset(self.cfg.dataset.train_subset),
+            max_tokens=self.cfg.dataset.max_tokens,
+            max_sentences=self.cfg.dataset.batch_size,
+            max_positions=utils.resolve_max_positions(
+                self.task.max_positions(),
+                self.model.max_positions(),
+                self.cfg.dataset.max_tokens,
+            ),
+            ignore_invalid_inputs=True,
+            required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple,
+            seed=self.cfg.common.seed,
+            num_shards=self.data_parallel_world_size if shard_batch_itr else 1,
+            shard_id=self.data_parallel_rank if shard_batch_itr else 0,
+            num_workers=self.cfg.dataset.num_workers,
+            epoch=epoch,
+            data_buffer_size=self.cfg.dataset.data_buffer_size,
+            disable_iterator_cache=disable_iterator_cache,
+        )
+        self.reset_dummy_batch(batch_iterator.first_batch)
+        batch_iterator.dataset.dataset._seek()
+        return batch_iterator
+
+    def get_valid_iterator(
+        self,
+        subset,
+        disable_iterator_cache=False,
+    ):
+        """Return an EpochBatchIterator over given validation subset for a given epoch."""
+        self.task.dataset(subset).dataset._seek()
+        batch_iterator = self.task.get_batch_iterator(
+            dataset=self.task.dataset(subset),
+            max_tokens=self.cfg.dataset.max_tokens_valid,
+            max_sentences=self.cfg.dataset.batch_size_valid,
+            max_positions=utils.resolve_max_positions(
+                self.task.max_positions(),
+                self.model.max_positions(),
+            ),
+            ignore_invalid_inputs=self.cfg.dataset.skip_invalid_size_inputs_valid_test,
+            required_batch_size_multiple=self.cfg.dataset.required_batch_size_multiple,
+            seed=self.cfg.common.seed,
+            num_shards=self.data_parallel_world_size,
+            shard_id=self.data_parallel_rank,
+            num_workers=self.cfg.dataset.num_workers,
+            # always pass a fixed "epoch" to keep validation data consistent
+            # across training epochs
+            epoch=1,
+            data_buffer_size=self.cfg.dataset.data_buffer_size,
+            disable_iterator_cache=disable_iterator_cache,
+        )
+        self.reset_dummy_batch(batch_iterator.first_batch)
+        batch_iterator.dataset.dataset._seek()
+        return batch_iterator
+
+    def begin_epoch(self, epoch):
+        """Called at the beginning of each epoch."""
+        logger.info("begin training epoch {}".format(epoch))
+
+        self.lr_step_begin_epoch(epoch)
+
+        if self.quantizer is not None:
+            self.quantizer.begin_epoch(epoch)
+
+        # task specific setup per epoch
+        self.task.begin_epoch(epoch, self.get_model())
+
+        if self.tpu:
+            import torch_xla.core.xla_model as xm
+
+            xm.rendezvous("begin_epoch")  # wait for all workers
+            xm.mark_step()
+
+    def begin_valid_epoch(self, epoch):
+        """Called at the beginning of each validation epoch."""
+
+        # task specific setup per validation epoch
+        self.task.begin_valid_epoch(epoch, self.get_model())
+
+    def reset_dummy_batch(self, batch):
+        self._dummy_batch = batch
+
+    @metrics.aggregate("train")
+    def train_step(self, samples, raise_oom=False):
+        """Do forward, backward and parameter update."""
+        self._set_seed()
+        self.model.train()
+        self.criterion.train()
+        self.zero_grad()
+
+        metrics.log_start_time("train_wall", priority=800, round=0)
+
+        # If EMA is enabled through store_ema=True
+        # and task.uses_ema is True, pass the EMA model as a keyword
+        # argument to the task.
+        extra_kwargs = {}
+        if self.cfg.ema.store_ema and getattr(self.task, "uses_ema", False):
+            extra_kwargs["ema_model"] = self.ema.get_model()
+
+        # forward and backward pass
+        logging_outputs, sample_size, ooms = [], 0, 0
+        for i, sample in enumerate(samples):  # delayed update loop
+            sample, is_dummy_batch = self._prepare_sample(sample)
+
+            def maybe_no_sync():
+                """
+                Whenever *samples* contains more than one mini-batch, we
+                want to accumulate gradients locally and only call
+                all-reduce in the last backwards pass.
+                """
+                if (
+                    self.data_parallel_world_size > 1
+                    and hasattr(self.model, "no_sync")
+                    and i < len(samples) - 1
+                    # The no_sync context manager results in increased memory
+                    # usage with FSDP, since full-size gradients will be
+                    # accumulated on each GPU. It's typically a better tradeoff
+                    # to do the extra communication with FSDP.
+                    and not self.is_fsdp
+                ):
+                    return self.model.no_sync()
+                else:
+                    return contextlib.ExitStack()  # dummy contextmanager
+
+            try:
+                with maybe_no_sync():
+                    # forward and backward
+                    loss, sample_size_i, logging_output = self.task.train_step(
+                        sample=sample,
+                        model=self.model,
+                        criterion=self.criterion,
+                        optimizer=self.optimizer,
+                        update_num=self.get_num_updates(),
+                        ignore_grad=is_dummy_batch,
+                        **extra_kwargs,
+                    )
+                    del loss
+
+                logging_outputs.append(logging_output)
+                sample_size += sample_size_i
+
+                # emptying the CUDA cache after the first step can
+                # reduce the chance of OOM
+                if self.cuda and self.get_num_updates() == 0:
+                    torch.cuda.empty_cache()
+            except RuntimeError as e:
+                if "out of memory" in str(e):
+                    self._log_oom(e)
+                    if raise_oom:
+                        raise e
+                    logger.warning(
+                        "attempting to recover from OOM in forward/backward pass"
+                    )
+                    ooms += 1
+                    self.zero_grad()
+                    if self.cuda:
+                        torch.cuda.empty_cache()
+                    if self.cfg.distributed_training.distributed_world_size == 1:
+                        return None
+                else:
+                    raise e
+
+            if self.tpu and i < len(samples) - 1:
+                # tpu-comment: every XLA operation before marking step is
+                # appended to the IR graph, and processing too many batches
+                # before marking step can lead to OOM errors.
+                # To handle gradient accumulation use case, we explicitly
+                # mark step here for every forward pass without a backward pass
+                self._xla_markstep_and_send_to_cpu()
+
+        if is_dummy_batch:
+            if torch.is_tensor(sample_size):
+                sample_size.zero_()
+            else:
+                sample_size *= 0.0
+
+        if torch.is_tensor(sample_size):
+            sample_size = sample_size.float()
+        else:
+            sample_size = float(sample_size)
+
+        # gather logging outputs from all replicas
+        if self._sync_stats():
+            train_time = self._local_cumulative_training_time()
+            logging_outputs, (
+                sample_size,
+                ooms,
+                total_train_time,
+            ) = self._aggregate_logging_outputs(
+                logging_outputs, sample_size, ooms, train_time, ignore=is_dummy_batch
+            )
+            self._cumulative_training_time = (
+                total_train_time / self.data_parallel_world_size
+            )
+
+        overflow = False
+        try:
+            with torch.autograd.profiler.record_function("reduce-grads"):
+                # reduce gradients across workers
+                self.optimizer.all_reduce_grads(self.model)
+                if utils.has_parameters(self.criterion):
+                    self.optimizer.all_reduce_grads(self.criterion)
+
+            with torch.autograd.profiler.record_function("multiply-grads"):
+                # multiply gradients by (data_parallel_size / sample_size) since
+                # DDP normalizes by the number of data parallel workers for
+                # improved fp16 precision.
+                # Thus we get (sum_of_gradients / sample_size) at the end.
+                # In case of fp16, this step also undoes loss scaling.
+                # (Debugging note: Some optimizers perform this scaling on the
+                # fly, so inspecting model.parameters() or optimizer.params may
+                # still show the original, unscaled gradients.)
+                numer = (
+                    self.data_parallel_world_size
+                    if not self.cfg.optimization.use_bmuf or self._sync_stats()
+                    else 1
+                )
+                self.optimizer.multiply_grads(numer / (sample_size or 1.0))
+                # Note: (sample_size or 1.0) handles the case of a zero gradient, in a
+                # way that avoids CPU/device transfers in case sample_size is a GPU or
+                # TPU object. The assumption is that the gradient itself is also 0.
+
+            with torch.autograd.profiler.record_function("clip-grads"):
+                # clip grads
+                grad_norm = self.clip_grad_norm(self.cfg.optimization.clip_norm)
+
+            # check that grad norms are consistent across workers
+            # on tpu check tensor is slow
+            if not self.tpu:
+                if (
+                    not self.cfg.optimization.use_bmuf
+                    and self.cfg.distributed_training.ddp_backend != "slow_mo"
+                ):
+                    self._check_grad_norms(grad_norm)
+                if not torch.isfinite(grad_norm).all():
+                    # in case of AMP, if gradients are Nan/Inf then
+                    # optimizer step is still required
+                    if self.cfg.common.amp:
+                        overflow = True
+                    else:
+                        # check local gradnorm single GPU case, trigger NanDetector
+                        raise FloatingPointError("gradients are Nan/Inf")
+
+            with torch.autograd.profiler.record_function("optimizer"):
+                # take an optimization step
+                self.task.optimizer_step(
+                    self.optimizer, model=self.model, update_num=self.get_num_updates()
+                )
+                if self.cfg.common.amp and overflow:
+                    if self._amp_retries == self.cfg.common.amp_batch_retries:
+                        logger.info("AMP: skipping this batch.")
+                        self._amp_retries = 0
+                    else:
+                        self._amp_retries += 1
+                        return self.train_step(samples, raise_oom)  # recursion to feed in same batch
+
+        except FloatingPointError:
+            # re-run the forward and backward pass with hooks attached to print
+            # out where it fails
+            self.zero_grad()
+            with NanDetector(self.get_model()):
+                for _, sample in enumerate(samples):
+                    sample, _ = self._prepare_sample(sample)
+                    self.task.train_step(
+                        sample,
+                        self.model,
+                        self.criterion,
+                        self.optimizer,
+                        self.get_num_updates(),
+                        ignore_grad=False,
+                        **extra_kwargs,
+                    )
+            raise
+        except OverflowError as e:
+            overflow = True
+            logger.info(
+                f"NOTE: gradient overflow detected, ignoring gradient, {str(e)}"
+            )
+            grad_norm = torch.tensor(0.0).cuda()
+            self.zero_grad()
+        except RuntimeError as e:
+            if "out of memory" in str(e):
+                self._log_oom(e)
+                logger.error("OOM during optimization, irrecoverable")
+            raise e
+
+        # Some distributed wrappers (e.g., SlowMo) need access to the optimizer
+        # after the step
+        if hasattr(self.model, "perform_additional_optimizer_actions"):
+            if hasattr(self.optimizer, "fp32_params"):
+                self.model.perform_additional_optimizer_actions(
+                    self.optimizer.optimizer, self.optimizer.fp32_params
+                )
+            else:
+                self.model.perform_additional_optimizer_actions(
+                    self.optimizer.optimizer
+                )
+
+        logging_output = None
+        if not overflow or self.cfg.distributed_training.ddp_backend == "slow_mo":
+            self.set_num_updates(self.get_num_updates() + 1)
+
+            if self.cfg.ema.store_ema:
+                # Step EMA forward with new model.
+                self.ema.step(
+                    self.get_model(),
+                    self.get_num_updates(),
+                )
+                metrics.log_scalar(
+                    "ema_decay",
+                    self.ema.get_decay(),
+                    priority=10000,
+                    round=5,
+                    weight=0,
+                )
+
+            if self.tpu:
+                import torch_xla.core.xla_model as xm
+
+                # mark step on TPUs
+                self._xla_markstep_and_send_to_cpu()
+
+                # only log stats every log_interval steps
+                # this causes wps to be misreported when log_interval > 1
+                logging_output = {}
+                if self.get_num_updates() % self.cfg.common.log_interval == 0:
+                    # log memory usage
+                    mem_info = xm.get_memory_info(self.device)
+                    gb_free = mem_info["kb_free"] / 1024 / 1024
+                    gb_total = mem_info["kb_total"] / 1024 / 1024
+                    metrics.log_scalar(
+                        "gb_free", gb_free, priority=1500, round=1, weight=0
+                    )
+                    metrics.log_scalar(
+                        "gb_total", gb_total, priority=1600, round=1, weight=0
+                    )
+                    logging_outputs = self._xla_markstep_and_send_to_cpu(
+                        logging_outputs
+                    )
+                    logging_output = self._reduce_and_log_stats(
+                        logging_outputs, sample_size, grad_norm
+                    )
+
+                # log whenever there's an XLA compilation, since these
+                # slow down training and may indicate opportunities for
+                # optimization
+                self._check_xla_compilation()
+            else:
+                if self.cuda and self.cuda_env is not None:
+                    # log minimum free memory over the iteration
+                    gb_used = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
+                    torch.cuda.reset_peak_memory_stats()
+                    gb_free = self.cuda_env.total_memory_in_GB - gb_used
+                    metrics.log_scalar(
+                        "gb_free", gb_free, priority=1500, round=1, weight=0
+                    )
+
+                # log stats
+                logging_output = self._reduce_and_log_stats(
+                    logging_outputs, sample_size, grad_norm
+                )
+
+                # clear CUDA cache to reduce memory fragmentation
+                if (
+                    self.cuda
+                    and self.cfg.common.empty_cache_freq > 0
+                    and (
+                        (self.get_num_updates() + self.cfg.common.empty_cache_freq - 1)
+                        % self.cfg.common.empty_cache_freq
+                    )
+                    == 0
+                ):
+                    torch.cuda.empty_cache()
+
+        if self.cfg.common.fp16 or self.cfg.common.amp:
+            metrics.log_scalar(
+                "loss_scale",
+                (
+                    self.optimizer.scaler.loss_scale
+                    if self.cfg.common.fp16
+                    else self.optimizer.scaler.get_scale()
+                ),
+                priority=700,
+                round=4,
+                weight=0,
+            )
+
+        metrics.log_stop_time("train_wall")
+        return logging_output
+
+    @metrics.aggregate("valid")
+    def valid_step(self, sample, raise_oom=False):
+        """Do forward pass in evaluation mode."""
+        if self.tpu:
+            import torch_xla.core.xla_model as xm
+
+            xm.rendezvous("valid_step")  # wait for all workers
+
+        # If EMA is enabled through store_ema=True
+        # and task.uses_ema is True, pass the EMA model as a keyword
+        # argument to the task.
+        extra_kwargs = {}
+        if self.cfg.ema.store_ema and getattr(self.task, "uses_ema", False):
+            extra_kwargs["ema_model"] = self.ema.get_model()
+
+        with torch.no_grad():
+            self.model.eval()
+            self.criterion.eval()
+
+            sample, is_dummy_batch = self._prepare_sample(sample)
+
+            try:
+                _loss, sample_size, logging_output = self.task.valid_step(
+                    sample, self.model, self.criterion, **extra_kwargs
+                )
+            except RuntimeError as e:
+                if "out of memory" in str(e):
+                    self._log_oom(e)
+                    if not raise_oom:
+                        logger.warning(
+                            "ran out of memory in validation step, retrying batch"
+                        )
+                        for p in self.model.parameters():
+                            if p.grad is not None:
+                                p.grad = None  # free some memory
+                        if self.cuda:
+                            torch.cuda.empty_cache()
+                        return self.valid_step(sample, raise_oom=True)
+                raise e
+
+            logging_outputs = [logging_output]
+            if is_dummy_batch:
+                if torch.is_tensor(sample_size):
+                    sample_size.zero_()
+                else:
+                    sample_size *= 0.0
+
+        # gather logging outputs from all replicas
+        if self.data_parallel_world_size > 1:
+            logging_outputs, (sample_size,) = self._aggregate_logging_outputs(
+                logging_outputs,
+                sample_size,
+                ignore=is_dummy_batch,
+            )
+
+        # log validation stats
+        if self.tpu:
+            logging_outputs = self._xla_markstep_and_send_to_cpu(logging_outputs)
+        logging_output = self._reduce_and_log_stats(logging_outputs, sample_size)
+
+        return logging_output
+
+    def zero_grad(self):
+        self.optimizer.zero_grad()
+
+    def lr_step_begin_epoch(self, epoch):
+        """Adjust the learning rate at the beginning of the epoch."""
+        self.lr_scheduler.step_begin_epoch(epoch)
+        # prefer updating the LR based on the number of steps
+        return self.lr_step_update()
+
+    def lr_reinit(self, total_updates, num_updates):
+        self.lr_scheduler.reinit(total_updates, num_updates)
+
+    def lr_step(self, epoch, val_loss=None):
+        """Adjust the learning rate at the end of the epoch."""
+        self.lr_scheduler.step(epoch, val_loss)
+        # prefer updating the LR based on the number of steps
+        return self.lr_step_update()
+
+    def lr_step_update(self):
+        """Update the learning rate after each update."""
+        new_lr = self.lr_scheduler.step_update(self.get_num_updates())
+        if isinstance(new_lr, dict):
+            for k, v in new_lr.items():
+                metrics.log_scalar(f"lr_{k}", v, weight=0, priority=300)
+            new_lr = new_lr.get("default", next(iter(new_lr.values())))
+        else:
+            metrics.log_scalar("lr", new_lr, weight=0, priority=300)
+        return new_lr
+
+    def get_lr(self):
+        """Get the current learning rate."""
+        return self.optimizer.get_lr()
+
+    def get_model(self):
+        """Get the (non-wrapped) model instance."""
+        return self._model
+
+    def get_criterion(self):
+        """Get the (non-wrapped) criterion instance."""
+        return self._criterion
+
+    def get_meter(self, name):
+        """[deprecated] Get a specific meter by name."""
+        from fairseq import meters
+
+        if "get_meter" not in self._warn_once:
+            self._warn_once.add("get_meter")
+            utils.deprecation_warning(
+                "Trainer.get_meter is deprecated. Please use fairseq.metrics instead."
+            )
+
+        train_meters = metrics.get_meters("train")
+        if train_meters is None:
+            train_meters = {}
+
+        if name == "train_loss" and "loss" in train_meters:
+            return train_meters["loss"]
+        elif name == "train_nll_loss":
+            # support for legacy train.py, which assumed this meter is
+            # always initialized
+            m = train_meters.get("nll_loss", None)
+            return m or meters.AverageMeter()
+        elif name == "wall":
+            # support for legacy train.py, which assumed this meter is
+            # always initialized
+            m = metrics.get_meter("default", "wall")
+            return m or meters.TimeMeter()
+        elif name == "wps":
+            m = metrics.get_meter("train", "wps")
+            return m or meters.TimeMeter()
+        elif name in {"valid_loss", "valid_nll_loss"}:
+            # support for legacy train.py, which assumed these meters
+            # are always initialized
+            k = name[len("valid_") :]
+            m = metrics.get_meter("valid", k)
+            return m or meters.AverageMeter()
+        elif name == "oom":
+            return meters.AverageMeter()
+        elif name in train_meters:
+            return train_meters[name]
+        return None
+
+    def get_num_updates(self):
+        """Get the number of parameters updates."""
+        return self._num_updates
+
+    def set_num_updates(self, num_updates):
+        """Set the number of parameters updates."""
+        self._num_updates = num_updates
+        self.lr_step_update()
+        if self.quantizer:
+            self.quantizer.step_update(self._num_updates)
+        metrics.log_scalar("num_updates", self._num_updates, weight=0, priority=200)
+
+    def clip_grad_norm(self, clip_norm):
+        def agg_norm_fn(total_norm):
+            total_norm = total_norm.cuda().float() ** 2
+            total_norm = distributed_utils.all_reduce(
+                total_norm, group=self.data_parallel_process_group
+            )
+            return total_norm ** 0.5
+
+        should_agg_norm = (
+            self.is_fsdp
+            and (
+                self.data_parallel_process_group is not None
+                or torch.distributed.is_initialized()
+            )
+        )
+        return self.optimizer.clip_grad_norm(
+            clip_norm, aggregate_norm_fn=agg_norm_fn if should_agg_norm else None
+        )
+
+    def cumulative_training_time(self):
+        if self._cumulative_training_time is None:
+            # single GPU
+            return self._local_cumulative_training_time()
+        else:
+            return self._cumulative_training_time
+
+    def _local_cumulative_training_time(self):
+        """Aggregate training time in seconds."""
+        return time.time() - self._start_time + self._previous_training_time
+
+    def _fp_convert_sample(self, sample):
+        def apply_half(t):
+            if t.dtype is torch.float32:
+                return t.to(dtype=torch.half)
+            return t
+
+        def apply_bfloat16(t):
+            if t.dtype is torch.float32:
+                return t.to(dtype=torch.bfloat16)
+            return t
+
+        if self.cfg.common.fp16:
+            sample = utils.apply_to_sample(apply_half, sample)
+
+        if self.cfg.common.bf16:
+            sample = utils.apply_to_sample(apply_bfloat16, sample)
+
+        return sample
+
+    def _prepare_sample(self, sample, is_dummy=False):
+        if sample == "DUMMY":
+            raise Exception(
+                "Trying to use an uninitialized 'dummy' batch. This usually indicates "
+                "that the total number of batches is smaller than the number of "
+                "participating GPUs. Try reducing the batch size or using fewer GPUs."
+            )
+
+        if sample is None or len(sample) == 0:
+            assert (
+                self._dummy_batch is not None and len(self._dummy_batch) > 0
+            ), "Invalid dummy batch: {}".format(self._dummy_batch)
+            sample, _ = self._prepare_sample(self._dummy_batch, is_dummy=True)
+            return sample, True
+
+        # Given that PCIe/NVLink bandwidth is significantly smaller than DRAM bandwidth
+        # it makes sense to do the format conversion on the CPU and then transfer
+        # a smaller buffer to the device. This also saves GPU memory capacity.
+
+        if self.cfg.common.on_cpu_convert_precision:
+            sample = self._fp_convert_sample(sample)
+
+        if self.cuda:
+            if self.pipeline_model_parallel:
+                if 'target' in sample:
+                    sample['target'] = utils.move_to_cuda(sample['target'], device=self.last_device)
+            else:
+                sample = utils.move_to_cuda(sample)
+        elif self.tpu and is_dummy:
+            # the dummy batch may not be on the appropriate device
+            sample = utils.move_to_cuda(sample, device=self.device)
+
+        if not self.cfg.common.on_cpu_convert_precision:
+            sample = self._fp_convert_sample(sample)
+
+        if self._dummy_batch == "DUMMY":
+            self._dummy_batch = sample
+
+        return sample, False
+
+    def _set_seed(self):
+        # Set seed based on args.seed and the update number so that we get
+        # reproducible results when resuming from checkpoints
+        seed = self.cfg.common.seed + self.get_num_updates()
+        utils.set_torch_seed(seed)
+
+    def _sync_stats(self):
+        # Return True if it's using multiple GPUs and DDP or multiple GPUs with
+        # BMUF and it's a bmuf sync with warmup iterations completed before.
+        if self.data_parallel_world_size == 1:
+            return False
+        elif self.cfg.optimization.use_bmuf:
+            return (
+                self.get_num_updates() + 1
+            ) % self.cfg.bmuf.global_sync_iter == 0 and (
+                self.get_num_updates() + 1
+            ) > self.cfg.bmuf.warmup_iterations
+        else:
+            return True
+
+    def _log_oom(self, exc):
+        msg = "OOM: Ran out of memory with exception: {}".format(exc)
+        logger.warning(msg)
+        if torch.cuda.is_available() and hasattr(torch.cuda, "memory_summary"):
+            for device_idx in range(torch.cuda.device_count()):
+                logger.warning(torch.cuda.memory_summary(device=device_idx))
+        sys.stderr.flush()
+
+    def _aggregate_logging_outputs(
+        self,
+        logging_outputs: List[Dict[str, Any]],
+        *extra_stats_to_sum,
+        ignore=False,
+    ):
+        if self.task.__class__.logging_outputs_can_be_summed(self.get_criterion()):
+            return self._fast_stat_sync_sum(
+                logging_outputs, *extra_stats_to_sum, ignore=ignore
+            )
+        else:
+            return self._all_gather_list_sync(
+                logging_outputs, *extra_stats_to_sum, ignore=ignore
+            )
+
+    def _all_gather_list_sync(
+        self,
+        logging_outputs: List[Dict[str, Any]],
+        *extra_stats_to_sum,
+        ignore=False,
+    ):
+        """
+        Sync logging outputs across workers. all_gather_list_sync is
+        suitable when logging outputs are complex types.
+        """
+        if self.tpu:
+            raise NotImplementedError
+        if ignore:
+            logging_outputs = []
+        results = list(
+            zip(
+                *distributed_utils.all_gather_list(
+                    [logging_outputs] + list(extra_stats_to_sum),
+                    max_size=getattr(self.cfg.common, "all_gather_list_size", 16384),
+                    group=self.data_parallel_process_group,
+                )
+            )
+        )
+        logging_outputs, extra_stats_to_sum = results[0], results[1:]
+        logging_outputs = list(chain.from_iterable(logging_outputs))
+        extra_stats_to_sum = [sum(s) for s in extra_stats_to_sum]
+        return logging_outputs, extra_stats_to_sum
+
+    def _fast_stat_sync_sum(
+        self,
+        logging_outputs: List[Dict[str, Any]],
+        *extra_stats_to_sum,
+        ignore=False,
+    ):
+        """
+        Sync logging outputs across workers. fast_stat_sync_sum is
+        faster than all_gather_list_sync, but is only suitable when
+        logging outputs are scalars and can be summed. Note that
+        *logging_outputs* cannot contain any nested dicts/lists.
+        """
+        data = {}
+        for i, stat in enumerate(extra_stats_to_sum):
+            data["extra_stats_" + str(i)] = stat
+        if len(logging_outputs) > 0:
+            log_keys = list(logging_outputs[0].keys())
+            for k in log_keys:
+                if not ignore:
+                    v = sum(log[k] for log in logging_outputs if k in log)
+                else:
+                    v = logging_outputs[0][k]
+                    v = torch.zeros_like(v) if torch.is_tensor(v) else 0
+                data["logging_outputs_" + k] = v
+        else:
+            log_keys = None
+
+        data = distributed_utils.all_reduce_dict(
+            data, device=self.device, group=self.data_parallel_process_group
+        )
+
+        extra_stats_to_sum = [
+            data["extra_stats_" + str(i)] for i in range(len(extra_stats_to_sum))
+        ]
+        if log_keys is not None:
+            logging_outputs = [{k: data["logging_outputs_" + k] for k in log_keys}]
+        else:
+            logging_outputs = []
+        return logging_outputs, extra_stats_to_sum
+
+    def _check_grad_norms(self, grad_norm):
+        """Check that grad norms are consistent across workers."""
+        if self._grad_norm_buf is not None:
+            self._grad_norm_buf.zero_()
+            self._grad_norm_buf[self.data_parallel_rank] = grad_norm
+            distributed_utils.all_reduce(
+                self._grad_norm_buf, group=self.data_parallel_process_group
+            )
+
+            def is_consistent(tensor):
+                max_abs_diff = torch.max(torch.abs(tensor - tensor[0]))
+                return (
+                    (torch.isfinite(tensor).all()
+                     and (max_abs_diff / (tensor[0] + 1e-6) < 1e-6).all())
+                    or
+                    (self.cfg.common.amp and not torch.isfinite(tensor).all())
+                    # in case of amp non-finite grads are fine
+                )
+
+            if not is_consistent(self._grad_norm_buf):
+                pretty_detail = "\n".join(
+                    "rank {:3d} = {:.8f}".format(r, n)
+                    for r, n in enumerate(self._grad_norm_buf.tolist())
+                )
+                error_detail = "grad_norm across the workers:\n{}\n".format(
+                    pretty_detail
+                )
+                # use FloatingPointError to trigger NanDetector
+                raise FloatingPointError(
+                    "Fatal error: gradients are inconsistent between workers. "
+                    "Try --ddp-backend=legacy_ddp. "
+                    "Or are you mixing up different generation of GPUs in training?"
+                    + "\n"
+                    + "-" * 80
+                    + "\n{}\n".format(error_detail)
+                    + "-" * 80
+                )
+
+    def _reduce_and_log_stats(self, logging_outputs, sample_size, grad_norm=None):
+        if grad_norm is not None and (
+            not torch.is_tensor(grad_norm) or torch.isfinite(grad_norm)
+        ):
+            metrics.log_speed("ups", 1.0, priority=100, round=2)
+            metrics.log_scalar("gnorm", grad_norm, priority=400, round=3)
+            if self.cfg.optimization.clip_norm > 0:
+                metrics.log_scalar(
+                    "clip",
+                    torch.where(
+                        grad_norm > self.cfg.optimization.clip_norm,
+                        grad_norm.new_tensor(100),
+                        grad_norm.new_tensor(0),
+                    ),
+                    priority=500,
+                    round=1,
+                )
+
+        with metrics.aggregate() as agg:
+            if logging_outputs is not None:
+                self.task.reduce_metrics(logging_outputs, self.get_criterion())
+                del logging_outputs
+
+            # extra warning for criterions that don't properly log a loss value
+            if "loss" not in agg:
+                if "loss" not in self._warn_once:
+                    self._warn_once.add("loss")
+                    logger.warning(
+                        "Criterion.reduce_metrics did not log a 'loss' value, "
+                        "which may break some functionality"
+                    )
+                metrics.log_scalar("loss", -1)
+
+            # support legacy interface
+            if self.tpu:
+                logging_output = {}
+            else:
+                logging_output = agg.get_smoothed_values()
+                logging_output["sample_size"] = sample_size
+                for key_to_delete in ["ppl", "wps", "wpb", "bsz"]:
+                    if key_to_delete in logging_output:
+                        del logging_output[key_to_delete]
+            return logging_output
+
+    def _check_xla_compilation(self):
+        import torch_xla.debug.metrics as met
+
+        compile_stats = met.metric_data("CompileTime")
+        if compile_stats is None:
+            return
+        num_xla_compiles = compile_stats[0]
+        if num_xla_compiles > self._num_xla_compiles:
+            logger.warning(
+                "XLA compilation detected on device #{}; too many of these can lead "
+                "to slow training, but we expect a few in the beginning".format(
+                    self.cfg.distributed_training.distributed_rank
+                )
+            )
+        self._num_xla_compiles = num_xla_compiles
+
+    def _xla_markstep_and_send_to_cpu(self, data=None):
+        import torch_xla.core.xla_model as xm
+
+        xm.mark_step()
+        if data is not None:
+            from fairseq.utils import xla_device_to_cpu
+
+            return xla_device_to_cpu(data)
+
+
+def _catalog_shared_params(module, memo=None, prefix=""):
+    if memo is None:
+        first_call = True
+        memo = {}
+    else:
+        first_call = False
+    for name, param in module._parameters.items():
+        param_prefix = prefix + ("." if prefix else "") + name
+        if param not in memo:
+            memo[param] = []
+        memo[param].append(param_prefix)
+    for name, m in module._modules.items():
+        if m is None:
+            continue
+        submodule_prefix = prefix + ("." if prefix else "") + name
+        _catalog_shared_params(m, memo, submodule_prefix)
+    if first_call:
+        return [x for x in memo.values() if len(x) > 1]
+
+
+def _get_module_by_path(module, path):
+    path = path.split(".")
+    for name in path:
+        module = getattr(module, name)
+    return module
+
+
+def _set_module_by_path(module, path, value):
+    path = path.split(".")
+    for name in path[:-1]:
+        module = getattr(module, name)
+    setattr(module, path[-1], value)
diff --git a/utils/BPE/__init__.py b/utils/BPE/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/utils/BPE/dict.txt b/utils/BPE/dict.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b4d2de40fa2cbeb2c6128cfd031b9872ef4d054
--- /dev/null
+++ b/utils/BPE/dict.txt
@@ -0,0 +1,50260 @@
+13 850314647
+262 800385005
+11 800251374
+284 432911125
+290 394899794
+286 386139013
+257 357878752
+287 311196488
+12 215156821
+329 155236946
+326 154060431
+319 147178919
+318 142591644
+447 130810923
+338 116498242
+351 114784681
+383 108664122
+373 100357189
+366 93880741
+379 93284459
+340 88803471
+355 85749070
+531 85009762
+247 82642284
+307 77095226
+82 76381845
+416 73380803
+422 71911149
+389 68628918
+423 67243391
+468 64317701
+25 63508661
+357 63001640
+339 61994245
+314 60989470
+465 56381137
+481 55817121
+281 55370942
+428 52404829
+8 49955136
+564 49278190
+407 49022194
+251 48828693
+345 46413707
+250 46095324
+511 42623671
+393 41629710
+484 41252315
+356 40985272
+475 40041980
+508 39889004
+517 36480426
+550 35941594
+587 34803895
+547 34523820
+546 33398226
+553 33091056
+543 32654778
+510 32035371
+663 32028126
+460 31691389
+530 31181535
+503 30862486
+635 30813519
+720 30660454
+607 30374808
+477 29369504
+706 29183313
+526 29041171
+14 28893906
+561 27738361
+470 26738514
+614 25458253
+618 24232023
+717 23994060
+673 23817299
+734 23792701
+625 23376942
+661 23220442
+317 22862326
+674 22516011
+632 22500762
+640 22453472
+621 22170426
+656 21469936
+612 21420897
+83 21318775
+679 21314775
+649 21268970
+851 21092011
+938 20404401
+655 20375026
+554 20334200
+584 20320611
+523 20315428
+644 20012607
+40 19422652
+588 19096246
+64 18759122
+617 18693984
+50 18238046
+26689 18079440
+606 17992787
+812 17864313
+6 17843244
+466 17817361
+534 17796224
+532 17532111
+352 17384084
+1 17279082
+611 17091775
+714 17025679
+30 16939428
+645 16677856
+72 16553037
+76 16327061
+651 15971344
+471 15879338
+783 15823492
+683 15819244
+736 15197650
+887 15053172
+784 14786686
+616 14556795
+705 14539133
+691 14309272
+1115 14176045
+26 14145184
+362 14098304
+464 14083981
+16 14033199
+1411 13989417
+1028 13787695
+878 13752356
+1664 13470232
+78 13378307
+1301 13160863
+703 13034870
+780 12998354
+597 12928745
+749 12878804
+852 12866041
+787 12811365
+810 12810008
+1141 12785466
+832 12685151
+981 12676060
+830 12643489
+770 12491952
+1510 12485834
+278 12398432
+513 12345382
+925 12242439
+880 12187173
+838 12095675
+866 12088892
+572 12004582
+1139 11932599
+502 11810743
+347 11778080
+1016 11554835
+1074 11537640
+775 11416721
+883 11147387
+1230 11002697
+835 10997448
+1135 10975044
+867 10945123
+788 10941163
+670 10932097
+1297 10878979
+785 10826031
+17 10797378
+983 10787263
+843 10673666
+259 10657207
+1941 10592731
+279 10574822
+845 10526221
+1110 10523541
+1363 10476406
+1011 10465302
+1285 10446380
+1201 10423577
+968 10348378
+743 10336013
+772 10297739
+1622 10289758
+766 10280263
+2177 10243413
+1181 10234334
+642 10188179
+276 10110644
+815 10077564
+1088 10066642
+2864 10065012
+1218 10051426
+514 10027697
+991 9981059
+881 9920784
+604 9911573
+922 9903273
+892 9899715
+4 9886396
+311 9824882
+777 9788574
+1910 9785844
+360 9705921
+400 9632736
+467 9594120
+821 9549150
+884 9547397
+760 9515151
+1390 9514008
+836 9399858
+88 9399371
+1306 9350994
+350 9326094
+750 9317800
+739 9296956
+910 9295771
+268 9233925
+406 9191046
+1022 9185932
+583 9178621
+509 9120375
+327 9057596
+718 8963492
+995 8936939
+636 8882717
+399 8811370
+826 8792653
+765 8755010
+1440 8704406
+828 8681852
+1029 8668925
+761 8595175
+260 8550153
+68 8521820
+1026 8495454
+1037 8482028
+20 8478672
+18 8372937
+1499 8372574
+371 8359700
+1644 8353078
+32 8336893
+890 8325716
+1119 8309982
+886 8287585
+263 8275838
+309 8275603
+337 8268937
+84 8260458
+1111 8203580
+994 8202575
+272 8193169
+261 8189103
+767 8122128
+390 8069108
+1375 7988935
+1597 7984150
+989 7938818
+73 7922334
+364 7880395
+1107 7805773
+1992 7800278
+283 7777203
+402 7732480
+3217 7712997
+376 7593883
+1266 7589093
+976 7577523
+1194 7566250
+900 7556294
+727 7550625
+1320 7507098
+292 7495866
+77 7470439
+1282 7450955
+1641 7411391
+1171 7409099
+1114 7363929
+1081 7347040
+15 7312426
+367 7306406
+807 7279240
+1160 7272825
+1936 7262915
+274 7253218
+3431 7220578
+299 7218583
+3635 7152871
+3860 7135265
+71 7117156
+1353 7113713
+1392 7090351
+1204 7074121
+3321 7040732
+1043 7037776
+779 7012062
+370 6995545
+19 6983878
+3583 6974965
+898 6966637
+1864 6959506
+711 6952288
+905 6939061
+520 6938890
+582 6920647
+1364 6908579
+1578 6875577
+1105 6855797
+1295 6829761
+1002 6812464
+1256 6769157
+1966 6727321
+657 6641494
+737 6624794
+1104 6593949
+494 6588847
+2997 6571290
+256 6561739
+7303 6545519
+0 6525880
+89 6511169
+74 6481093
+1812 6451687
+2173 6415053
+1448 6412987
+1524 6397024
+1321 6394244
+1584 6392878
+282 6358319
+81 6351445
+1592 6336754
+1705 6331987
+973 6315804
+1234 6313784
+1748 6306370
+449 6287674
+1318 6264091
+1271 6240689
+34 6237906
+1053 6231567
+1123 6220759
+1165 6216478
+1839 6189790
+306 6174365
+1227 6170682
+271 6158328
+2087 6143528
+804 6141406
+1365 6119704
+790 6119691
+1222 6097096
+1528 6093784
+860 6086478
+1718 6080393
+1755 6062347
+304 6058377
+1367 6044404
+418 6021291
+1178 5961986
+273 5944078
+2258 5884266
+921 5875428
+2368 5843037
+1049 5834912
+1444 5825616
+1550 5810506
+1613 5807417
+1625 5806489
+1933 5804564
+3909 5804009
+1315 5793456
+1263 5781210
+412 5780871
+1294 5756002
+1243 5755617
+440 5703257
+288 5696335
+923 5686348
+33 5683530
+4283 5662615
+1542 5657499
+1466 5655969
+2520 5626252
+1737 5617548
+1239 5613802
+1893 5604800
+3502 5592880
+1231 5592559
+805 5586720
+23 5579916
+1422 5562000
+1957 5554673
+21 5545853
+1223 5537815
+1339 5525740
+1439 5518540
+270 5516122
+22 5511292
+1406 5508724
+1751 5500821
+1497 5473031
+1310 5460960
+2237 5456445
+2254 5424602
+3418 5378471
+1366 5362221
+265 5361225
+1541 5359232
+67 5328572
+1637 5322515
+1903 5319711
+1973 5285283
+2938 5283708
+1057 5281356
+1568 5274805
+321 5273108
+2756 5236169
+1830 5223345
+1770 5222102
+65 5208299
+1244 5204410
+1180 5203669
+2098 5169445
+1730 5168645
+2056 5168496
+3349 5156053
+2055 5138614
+2807 5130949
+1101 5123031
+66 5103752
+1816 5093006
+1400 5076411
+1498 5071579
+1642 5055917
+1989 5044992
+1290 5034040
+2643 5023350
+2097 5022728
+1762 5015331
+44 5015012
+479 5008250
+1775 5005597
+2706 5005225
+1909 4997835
+1866 4990678
+1566 4981122
+1336 4950527
+757 4941775
+2063 4937397
+2648 4929257
+293 4925771
+1464 4911409
+2184 4905422
+75 4894914
+392 4889056
+1487 4877439
+1064 4865397
+24 4854296
+1080 4852641
+569 4850805
+1971 4849650
+1605 4847284
+1182 4846260
+1938 4828745
+857 4805157
+1535 4772487
+285 4766512
+1176 4764682
+966 4760238
+2277 4743915
+764 4731348
+1377 4728044
+1479 4720640
+1539 4714734
+1085 4700915
+1811 4696785
+2274 4657189
+869 4652756
+45 4649060
+1099 4644445
+1394 4638480
+1280 4637662
+3000 4618471
+1577 4618338
+544 4614094
+2805 4608260
+35 4606393
+2351 4602990
+1629 4586377
+1661 4584310
+2003 4567755
+49 4546496
+1478 4546419
+2795 4542206
+2828 4536638
+1248 4526225
+1593 4511113
+69 4502347
+2457 4489997
+1511 4484539
+1881 4472447
+47 4460868
+1708 4455874
+1097 4450969
+1551 4445924
+1660 4433465
+1785 4424736
+1627 4412038
+1445 4401529
+2594 4393865
+1719 4389889
+1649 4380458
+2444 4375390
+4287 4371881
+417 4370421
+716 4365322
+1168 4364296
+1735 4360877
+1621 4331683
+2233 4330036
+3249 4325097
+42 4320291
+1276 4314178
+1829 4314165
+1884 4312560
+38 4306679
+2555 4304404
+2084 4296432
+2151 4287967
+1688 4285173
+2831 4280062
+1342 4276707
+1270 4239023
+555 4236623
+1327 4234882
+2139 4227635
+1467 4219535
+2045 4208129
+2714 4198810
+303 4195216
+1771 4185532
+2901 4181081
+2077 4179380
+1863 4178336
+1965 4175165
+2067 4175032
+1716 4169486
+2651 4155215
+2267 4147302
+2607 4145837
+1964 4133545
+1462 4126396
+1978 4126179
+1972 4121510
+1410 4119035
+1679 4106021
+51 4105433
+1871 4105196
+2406 4102924
+2551 4097217
+2008 4097102
+1853 4093083
+70 4092580
+2293 4087207
+2324 4083845
+43 4083418
+1337 4082770
+1813 4079395
+1695 4077362
+960 4077221
+264 4076617
+2688 4072140
+1183 4070745
+1414 4064159
+1474 4057078
+2282 4053024
+3414 4042308
+1430 4037596
+3035 4031658
+1103 4018655
+2059 4015508
+2080 4011218
+2969 4005397
+1919 4000144
+1969 3997080
+316 3993493
+1459 3984707
+1521 3978369
+37 3969953
+1675 3968119
+3009 3965772
+996 3965252
+1596 3943972
+2263 3941497
+3457 3938197
+1450 3937663
+86 3925608
+2058 3919358
+1636 3917053
+1804 3911665
+1429 3909332
+1757 3906464
+354 3891128
+405 3889614
+3176 3888837
+1877 3882885
+1576 3878715
+2893 3873729
+2252 3872753
+1281 3863057
+1254 3862011
+301 3858937
+1048 3852378
+3203 3851712
+2159 3847206
+1626 3842112
+324 3832573
+1760 3832298
+1169 3818301
+2739 3816048
+1687 3814765
+1595 3811813
+1517 3803324
+2260 3791037
+1693 3787455
+1262 3785788
+2102 3779927
+291 3777762
+1923 3777288
+1700 3776768
+2157 3771717
+1378 3757930
+2732 3755186
+79 3754633
+1854 3745778
+3269 3737875
+1502 3737616
+685 3723444
+4200 3719859
+1865 3719356
+128 3715003
+1402 3710786
+2168 3703789
+1986 3699485
+1867 3696280
+2026 3695382
+1683 3694355
+2961 3691575
+1842 3680139
+929 3678416
+2489 3665779
+1052 3661007
+396 3659796
+3329 3643884
+2669 3638778
+1862 3622381
+3452 3605249
+3794 3602291
+2111 3590156
+2046 3587528
+2957 3581398
+1913 3580361
+1441 3577048
+1241 3568130
+46 3565250
+2811 3562952
+2278 3561460
+1998 3550042
+461 3548528
+1744 3532659
+1975 3526648
+2291 3521569
+3056 3518738
+2904 3518633
+1752 3511388
+1900 3510560
+2626 3506312
+1654 3504513
+385 3502364
+2745 3487380
+2057 3487072
+3136 3485766
+7955 3485171
+4139 3481632
+2415 3480433
+2148 3480049
+1628 3467067
+2071 3466219
+2107 3463315
+940 3460357
+1598 3457691
+258 3455533
+1575 3454361
+2826 3452135
+2716 3449165
+3985 3445703
+85 3445461
+1987 3443747
+3598 3439871
+3352 3431656
+2478 3424520
+333 3421332
+246 3418219
+2620 3415717
+1212 3412196
+2450 3409727
+1247 3405540
+1912 3399689
+36 3395391
+346 3391001
+3426 3380470
+3298 3379545
+3292 3377200
+2250 3371380
+2440 3369691
+3061 3367419
+39 3363104
+978 3353736
+1802 3350527
+2431 3348906
+3071 3340128
+2253 3337972
+2494 3334848
+609 3333865
+2310 3329148
+986 3328812
+2635 3325356
+3437 3320853
+2292 3319741
+2823 3308131
+1588 3303360
+269 3302371
+275 3284415
+60 3282646
+2428 3276582
+1918 3276387
+2615 3273427
+2472 3272517
+1690 3267675
+410 3265844
+2678 3262749
+2106 3260749
+2354 3251238
+2717 3247356
+678 3244526
+1109 3242666
+3334 3241700
+3451 3238451
+320 3236458
+3230 3233294
+3389 3229315
+2166 3227294
+1611 3224985
+1994 3213613
+430 3209260
+2986 3199943
+1790 3194716
+1438 3193856
+4784 3192749
+1781 3170903
+302 3166428
+2227 3162561
+54 3145229
+2693 3138924
+1393 3138049
+2597 3137970
+2482 3137124
+3034 3122439
+1946 3121857
+2863 3119047
+3267 3115876
+2041 3113770
+1743 3107914
+2476 3105231
+388 3102434
+300 3100235
+3186 3098789
+1729 3098376
+2488 3094662
+5018 3092842
+4058 3079283
+2156 3078111
+52 3074167
+3096 3072323
+1468 3071877
+2497 3070835
+2793 3050336
+3427 3047066
+1630 3040837
+3284 3037800
+3624 3034708
+2650 3033943
+2785 3033180
+1807 3027961
+3645 3026379
+2691 3025436
+3106 3024747
+3037 3023165
+3759 3023164
+312 3020879
+1767 3018684
+2526 3018183
+666 3015679
+3139 3012306
+3085 3009667
+2223 3002610
+4041 3002353
+2712 3001744
+1838 2997522
+2048 2983869
+2854 2981556
+2534 2972131
+308 2969299
+2646 2967019
+3016 2965071
+3337 2960427
+3187 2957831
+4912 2956818
+3331 2956176
+1643 2956098
+2722 2953729
+2932 2951114
+2422 2950537
+2399 2948398
+500 2946582
+4039 2945677
+3961 2944538
+2222 2943764
+3078 2943739
+4275 2942029
+1724 2934719
+911 2931322
+3296 2930626
+384 2925764
+2319 2924706
+1238 2912540
+1911 2911206
+53 2910401
+2005 2910213
+2923 2909079
+1303 2908146
+4536 2904452
+2921 2898494
+3530 2896507
+343 2894182
+575 2892577
+3058 2891202
+277 2889780
+323 2886056
+710 2881312
+660 2874230
+1949 2873478
+3250 2868743
+225 2861798
+41 2858852
+1808 2848588
+1021 2846040
+3773 2842914
+7713 2841920
+540 2838877
+2137 2837279
+2750 2836122
+3271 2833311
+2994 2832696
+397 2832081
+2174 2831245
+2630 2825882
+1073 2823768
+378 2822150
+2491 2819311
+403 2817676
+2540 2811122
+2060 2808168
+2214 2807667
+2242 2804699
+3554 2801970
+266 2800975
+3442 2799863
+5544 2795504
+1682 2795443
+1351 2777650
+297 2776601
+3155 2770111
+2050 2768526
+3466 2759754
+1544 2759525
+993 2754965
+3340 2752396
+8591 2751808
+1255 2750444
+1895 2750214
+3015 2746600
+3125 2744902
+3945 2744846
+6426 2744124
+2897 2740354
+1309 2739832
+959 2737933
+2822 2737646
+1368 2733555
+2042 2730078
+374 2728295
+3006 2714274
+2245 2700521
+2928 2694744
+2872 2687504
+4896 2686827
+4297 2685685
+2766 2685288
+444 2682283
+2888 2681984
+1200 2679658
+2975 2678829
+377 2675721
+1988 2675064
+2523 2673705
+1583 2671163
+1024 2667070
+415 2666262
+3576 2658993
+2119 2657291
+2647 2648808
+3227 2648233
+1997 2646862
+4081 2645756
+4094 2645293
+1633 2637801
+1917 2637232
+2276 2635825
+2492 2634522
+1312 2634263
+2839 2633915
+2592 2632902
+3662 2624861
+3224 2624698
+1766 2624083
+3663 2624035
+1745 2621047
+5 2620736
+2300 2619855
+4664 2619338
+3430 2619137
+2130 2618208
+6184 2618030
+3687 2611608
+13130 2607739
+2637 2602497
+2622 2597101
+3700 2596588
+2435 2591941
+2158 2587673
+2279 2584888
+2506 2577787
+3724 2574566
+2950 2573209
+2460 2568568
+2125 2566267
+2861 2562749
+1134 2549917
+5454 2544616
+3751 2536696
+1858 2535706
+2579 2530192
+1826 2529534
+2608 2528860
+2681 2527523
+56 2526960
+3814 2525489
+4332 2524158
+2735 2523828
+3367 2523419
+2272 2516165
+3756 2511014
+2585 2509794
+5041 2503584
+4248 2503218
+2802 2502456
+2180 2500659
+3482 2499158
+3899 2496197
+2666 2495174
+395 2490074
+368 2486179
+1976 2484836
+2773 2481413
+669 2475721
+448 2470404
+1314 2468787
+1175 2466968
+3052 2465830
+3491 2465693
+55 2458697
+305 2457793
+2496 2455621
+2241 2454504
+1210 2453199
+2031 2450641
+3111 2447239
+2568 2446982
+7781 2446876
+1635 2445064
+2582 2444049
+2613 2443100
+3195 2441989
+5079 2432713
+2211 2428055
+3234 2426818
+4037 2426428
+1549 2425595
+5991 2421705
+4495 2417713
+952 2416570
+267 2415840
+2458 2414786
+328 2414598
+3790 2413453
+2641 2411736
+1296 2408790
+3199 2407660
+3072 2407258
+763 2402573
+2742 2402326
+4640 2400825
+1907 2399123
+654 2395466
+2911 2394784
+3931 2392276
+3818 2385503
+4346 2385039
+3119 2384203
+31 2383468
+1492 2381026
+3397 2380456
+3484 2379083
+330 2378895
+4706 2377588
+2251 2376885
+2479 2374155
+3053 2371784
+5939 2368766
+1388 2368606
+1692 2366880
+1908 2363611
+4542 2356967
+3596 2356312
+1122 2352389
+5003 2349430
+2962 2349359
+1607 2349127
+3047 2347321
+2627 2346853
+3025 2342305
+2995 2337450
+2835 2335936
+1004 2333657
+3214 2332768
+2029 2332229
+13440 2330317
+1561 2324920
+3074 2315814
+380 2312070
+515 2311421
+365 2310632
+3382 2306041
+363 2305310
+3160 2304973
+296 2303562
+435 2300111
+3512 2297054
+3747 2295650
+1334 2294588
+4281 2294310
+2614 2292185
+2524 2284943
+3394 2281882
+3095 2281714
+2147 2281124
+2187 2279131
+2855 2275319
+2702 2272741
+3517 2271280
+325 2268773
+3520 2268531
+1065 2264589
+3215 2261296
+4395 2260201
+2652 2259754
+1120 2259547
+648 2258982
+4436 2257188
+2089 2255914
+2209 2255859
+4969 2249342
+3022 2248989
+4530 2248944
+2330 2247322
+3261 2246707
+1532 2245244
+12042 2243318
+2270 2243237
+1657 2239438
+2846 2238923
+3999 2237519
+3845 2237334
+2271 2233955
+2126 2233086
+499 2231417
+1659 2229762
+5193 2228394
+3688 2226618
+1382 2226353
+446 2225805
+818 2224123
+2092 2222423
+3623 2220823
+5373 2220082
+2321 2219693
+5057 2219662
+1526 2219349
+5169 2218821
+2912 2217212
+3220 2216122
+3315 2215806
+2808 2214953
+2365 2214929
+2628 2212471
+3805 2211616
+2121 2211232
+1560 2204752
+3259 2204036
+3240 2203146
+2634 2202115
+3656 2200682
+2683 2199255
+3767 2197871
+2612 2193603
+1138 2190750
+3181 2189669
+4193 2189626
+3162 2186721
+2239 2185546
+2988 2185413
+2589 2185214
+1720 2185135
+2192 2184857
+4387 2184827
+4038 2180149
+4492 2178553
+1249 2178508
+3599 2177234
+3988 2176292
+4519 2175412
+2010 2173733
+3965 2173661
+4149 2170484
+3833 2170048
+3017 2169734
+1100 2168903
+4884 2168582
+349 2167597
+2035 2164690
+4040 2163330
+3407 2162686
+3415 2161099
+680 2159475
+1399 2158929
+4661 2157800
+1228 2155490
+551 2154941
+87 2154822
+4376 2154446
+559 2153394
+2392 2153055
+315 2150386
+2342 2150379
+3936 2150034
+1639 2148662
+2837 2143071
+358 2142250
+5153 2141503
+3793 2139387
+2940 2139279
+3126 2139153
+8358 2138790
+1477 2137623
+2720 2137372
+2138 2135247
+5085 2134103
+3957 2132520
+662 2131237
+3432 2125446
+2663 2125055
+8428 2121327
+2408 2121234
+1051 2121150
+3012 2120925
+3740 2120211
+3362 2118913
+2877 2111347
+820 2109304
+1195 2108223
+528 2107435
+2297 2104723
+1956 2104400
+2990 2101876
+5567 2098776
+62 2098771
+2312 2098237
+1570 2097507
+4086 2094834
+1738 2094385
+3142 2094215
+4505 2092752
+1031 2081290
+797 2079864
+2900 2079818
+1157 2079615
+3277 2079046
+3492 2078225
+1803 2078060
+4466 2077826
+4445 2076000
+5953 2074891
+4172 2072344
+2383 2070424
+3274 2062552
+2405 2061430
+474 2059389
+4539 2058329
+5371 2054600
+2753 2053126
+3377 2051234
+2884 2050465
+313 2048274
+1437 2048095
+1495 2047894
+1044 2047797
+1672 2046882
+4773 2046230
+3128 2045207
+4444 2043458
+439 2043235
+12637 2038256
+5692 2037358
+504 2035309
+3307 2032332
+2323 2031475
+2495 2028884
+4196 2027995
+341 2026813
+4176 2023899
+5834 2020359
+4744 2020302
+2563 2016917
+2882 2013222
+505 2010517
+3707 2009054
+1612 2007764
+4652 2006805
+2687 2006766
+5342 1994135
+3670 1992183
+4375 1990482
+2761 1988748
+4756 1987768
+3403 1986706
+2266 1985660
+3066 1985309
+129 1983048
+4481 1981282
+4354 1979172
+2033 1978125
+4576 1977385
+1943 1973375
+2370 1972674
+2486 1971043
+3090 1969680
+2810 1969527
+5401 1967900
+4381 1967895
+3800 1966998
+641 1966963
+2776 1966928
+3611 1965109
+6567 1965030
+3710 1963705
+803 1963552
+1332 1963452
+1600 1962893
+5442 1959629
+2936 1959617
+2723 1956891
+57 1956274
+2331 1955550
+2728 1955126
+4266 1954189
+3708 1952903
+4155 1951761
+3236 1951553
+2011 1949390
+3893 1944470
+3715 1943834
+3501 1942268
+3641 1942249
+3967 1941157
+5695 1939245
+3946 1939135
+1848 1938862
+2982 1935927
+3081 1935780
+2842 1935540
+3393 1929631
+4409 1927034
+533 1926581
+1208 1925558
+6123 1924601
+3328 1919903
+3073 1919157
+5478 1915260
+3690 1915130
+992 1914387
+3519 1914014
+3677 1913098
+4479 1909848
+5555 1908826
+353 1908475
+2952 1907503
+1374 1906721
+2972 1906704
+3151 1906344
+2298 1905945
+5047 1905773
+2407 1905690
+298 1904852
+80 1903294
+1040 1903278
+4590 1902254
+1833 1899229
+5595 1897899
+4251 1897347
+2610 1895735
+2724 1894339
+3626 1892750
+4244 1890732
+4318 1889199
+4957 1886758
+2775 1885004
+5095 1881967
+4721 1880372
+7198 1879205
+3892 1878736
+1558 1875025
+3884 1874312
+3942 1869131
+3206 1868585
+2499 1868244
+2562 1867987
+1507 1866658
+4838 1862252
+289 1862164
+1645 1859225
+2700 1855499
+1754 1855200
+3772 1851086
+4888 1850666
+4045 1848988
+3867 1847435
+157 1843867
+4493 1842927
+2619 1838354
+4786 1836011
+1398 1834121
+3088 1832988
+4120 1831679
+2695 1831610
+5665 1829988
+3381 1828595
+2427 1828369
+4452 1825877
+4477 1823113
+2974 1821399
+6672 1821145
+1814 1821079
+5466 1821016
+4639 1820572
+1265 1819995
+2314 1818041
+1008 1816897
+437 1815421
+3423 1814262
+3245 1812767
+3650 1811355
+2503 1810984
+1728 1810732
+2116 1810704
+8872 1806001
+2332 1805620
+2968 1804542
+1061 1803718
+4581 1802885
+1525 1802513
+3888 1799212
+2925 1794822
+2727 1794109
+1302 1794082
+4560 1793094
+3114 1791585
+3513 1791504
+709 1790178
+4783 1789889
+4488 1789224
+2832 1789080
+2746 1788885
+5652 1787699
+331 1785106
+4865 1784176
+3739 1782930
+4586 1782099
+3877 1780375
+5059 1780266
+2869 1779960
+4485 1778625
+6130 1774579
+3940 1772616
+4271 1768597
+2985 1768596
+4918 1768371
+5030 1768004
+220 1767607
+280 1766245
+1872 1766073
+3706 1765293
+1545 1759778
+5267 1758636
+3761 1757768
+5491 1757560
+2104 1756508
+7313 1756194
+4054 1755775
+2574 1753800
+3667 1753746
+3651 1753739
+4405 1752238
+571 1751391
+4308 1751021
+5437 1750489
+1944 1749114
+1355 1748662
+4274 1746718
+8278 1743712
+578 1742168
+2694 1741205
+2656 1738365
+2636 1738002
+4513 1737002
+2185 1736027
+5449 1728990
+2813 1728933
+2993 1728096
+2587 1728026
+615 1727522
+4881 1726170
+4776 1724711
+2565 1724643
+2726 1722884
+4999 1718878
+3098 1718690
+3685 1717832
+2987 1717445
+1559 1716265
+10205 1715772
+3486 1713237
+427 1712990
+2583 1712089
+4995 1711510
+3871 1710985
+5134 1710681
+4902 1710660
+4602 1710486
+620 1710125
+5537 1709921
+3170 1709734
+1892 1706520
+3173 1705945
+2073 1704583
+5011 1703887
+2346 1703108
+786 1702332
+4059 1699595
+1844 1697348
+295 1696098
+4473 1694758
+4696 1694438
+4280 1692697
+3241 1692185
+3067 1691819
+5694 1688571
+563 1687198
+4987 1687182
+2000 1686158
+1982 1680689
+4317 1679080
+2800 1678829
+2743 1676341
+3782 1676203
+4136 1675550
+4141 1675316
+3011 1674431
+404 1671943
+4508 1671300
+3848 1671224
+6341 1671141
+1486 1670636
+3338 1670469
+4394 1670223
+3357 1669888
+3226 1669751
+6557 1668766
+1359 1668112
+2081 1664853
+2364 1664159
+3146 1663984
+1257 1663233
+2193 1661698
+3294 1659449
+1148 1658760
+411 1658023
+874 1655097
+2219 1654452
+1143 1654129
+4427 1653889
+5583 1651423
+3954 1651313
+2779 1651054
+3216 1650840
+5396 1650681
+2150 1649743
+3127 1647955
+4373 1645141
+3164 1644786
+1433 1644622
+5201 1644557
+2423 1643559
+2672 1641242
+598 1640066
+1869 1639116
+3926 1637219
+6956 1635903
+4390 1635697
+1485 1635611
+6180 1633463
+4186 1632566
+11033 1632005
+10575 1631124
+5398 1630774
+4152 1630130
+6523 1629814
+2708 1628267
+3371 1627540
+4842 1624633
+3807 1624226
+1415 1621531
+414 1620770
+3386 1620487
+5180 1618946
+4956 1618827
+3033 1618817
+4854 1618654
+1395 1617375
+4418 1616971
+3275 1614077
+496 1613939
+4367 1612988
+3115 1611458
+4138 1611246
+707 1609966
+3941 1609655
+943 1608211
+4689 1605650
+541 1602489
+1220 1600656
+5856 1600558
+3465 1599819
+2014 1599485
+2642 1597905
+361 1597790
+3804 1596713
+4930 1595012
+4656 1594232
+4032 1591856
+2094 1591768
+4486 1590377
+3850 1589189
+3417 1587932
+4068 1587210
+6484 1587158
+3573 1586303
+751 1584930
+5273 1584119
+1001 1582810
+4511 1582438
+1350 1582111
+5682 1581323
+5701 1579275
+3750 1578369
+1215 1578106
+1288 1575560
+6443 1574356
+2456 1574032
+4305 1572178
+6786 1570930
+1722 1567616
+5070 1564834
+2391 1563465
+5229 1561859
+4992 1560941
+5828 1560570
+689 1560524
+1000 1560250
+3469 1559413
+3 1558890
+375 1558286
+4789 1557386
+3288 1557204
+5361 1556430
+829 1555089
+602 1553100
+6502 1552435
+3869 1550400
+482 1550363
+3769 1550026
+7 1548704
+695 1548541
+2221 1548436
+3615 1544415
+1891 1544125
+6186 1543528
+1129 1542997
+3741 1540855
+3412 1539970
+2198 1539205
+6193 1536595
+4009 1535736
+3923 1530812
+1879 1530386
+4137 1528607
+4502 1527678
+2513 1527637
+506 1527207
+5717 1527072
+4796 1526999
+5611 1525342
+4438 1524952
+7324 1524876
+5103 1524376
+3932 1523931
+4983 1523249
+2644 1522609
+3354 1522133
+4890 1521795
+3730 1521634
+42159 1521590
+1077 1521212
+1203 1518704
+5531 1515641
+4606 1515536
+3218 1515150
+6745 1513200
+5006 1513115
+5890 1512226
+5052 1511945
+4970 1510725
+6714 1508429
+4030 1506853
+1245 1505561
+3675 1504974
+4426 1504340
+3375 1503790
+3177 1503516
+382 1502511
+39711 1501599
+336 1500357
+1961 1499155
+3504 1498266
+5298 1496420
+732 1496386
+4752 1496009
+2879 1495099
+4260 1494843
+1041 1494107
+4746 1494014
+2556 1493772
+2074 1493385
+8734 1492850
+4979 1492643
+5510 1489374
+3521 1488249
+3812 1487987
+2989 1487864
+1537 1487614
+4219 1485195
+6686 1483477
+2504 1482854
+3584 1481993
+4568 1481365
+3421 1481356
+4237 1481211
+4814 1480648
+334 1479315
+3439 1479269
+3002 1477903
+1417 1477126
+930 1475368
+322 1474128
+1546 1473723
+4647 1473119
+488 1473092
+4073 1473078
+7024 1472792
+1531 1472346
+1797 1471634
+3625 1471454
+2409 1470321
+2196 1468500
+1663 1466759
+5672 1466160
+3689 1465868
+1981 1465495
+2858 1464549
+5199 1464208
+6916 1463903
+3574 1462903
+525 1461357
+3301 1459805
+3341 1459758
+1805 1458557
+2677 1458359
+6821 1458176
+3443 1458132
+4523 1457686
+1821 1456407
+8064 1455853
+2485 1453972
+7648 1453898
+8549 1453728
+3701 1452597
+344 1452384
+5471 1451483
+434 1451045
+624 1450197
+386 1449819
+719 1449119
+3859 1448585
+3092 1447520
+5166 1447492
+5323 1447286
+4585 1446385
+560 1444813
+4056 1444800
+7530 1442047
+3049 1440630
+5399 1440467
+1810 1439368
+39883 1438860
+4619 1438062
+6047 1437307
+4100 1435643
+4403 1435094
+6233 1434841
+4875 1431868
+2215 1430928
+518 1430530
+6798 1430070
+4379 1429963
+2191 1429101
+5212 1428466
+5926 1426982
+3726 1426439
+5127 1425739
+4608 1425400
+3770 1423320
+684 1423301
+3399 1423143
+549 1422979
+1471 1422620
+1326 1422545
+4034 1422191
+4414 1421600
+5260 1421418
+3285 1421044
+4671 1419983
+4388 1419806
+2329 1419508
+3117 1418665
+2512 1418078
+3614 1417918
+5296 1417278
+4380 1416914
+4691 1415979
+1242 1415603
+1042 1415013
+3094 1413700
+3059 1413465
+4019 1411855
+5658 1410605
+4101 1410072
+14420 1409152
+2576 1408914
+7229 1406591
+6095 1406131
+3729 1402106
+6168 1401550
+5535 1401292
+5348 1399444
+3131 1399224
+3050 1399104
+1108 1399004
+4769 1398370
+516 1395487
+5802 1392661
+671 1392187
+5175 1391611
+12216 1390973
+2141 1390969
+3764 1390483
+5158 1389181
+4504 1386980
+2546 1384925
+4301 1384339
+10767 1383934
+6011 1383767
+1793 1383582
+4809 1380537
+2443 1380319
+372 1379345
+2769 1378891
+3822 1378669
+4065 1378474
+7092 1378135
+576 1377793
+8060 1377625
+5150 1375001
+391 1374575
+6246 1374163
+5618 1372598
+4392 1372508
+442 1371294
+2372 1370549
+7636 1368619
+4734 1367530
+2679 1367016
+4811 1365243
+5068 1363461
+2751 1362395
+4986 1361444
+48 1361374
+7638 1360237
+3064 1359735
+9747 1358672
+5747 1358332
+2947 1357419
+2692 1356699
+3613 1356588
+912 1355680
+359 1353287
+4268 1353229
+10395 1351988
+4650 1350702
+3368 1350513
+939 1350079
+452 1349984
+2465 1349409
+4006 1347932
+5710 1347611
+3420 1346666
+5410 1346311
+557 1346053
+2657 1345538
+3562 1344045
+4646 1343369
+1565 1343288
+791 1342812
+3841 1342808
+3244 1342552
+8301 1340601
+15069 1340247
+6290 1339930
+5780 1339030
+5928 1338936
+2231 1338201
+469 1338045
+5123 1337766
+4046 1337492
+1370 1337438
+5742 1336548
+6027 1336530
+3895 1335611
+463 1334739
+2099 1333298
+2763 1332891
+5094 1332269
+6592 1330873
+5096 1330694
+3434 1330495
+11421 1330487
+11092 1330119
+4133 1329427
+2611 1328981
+1701 1328763
+1703 1326543
+2362 1326290
+4574 1325119
+6136 1323601
+1954 1320496
+6334 1318454
+8464 1316365
+2910 1316215
+5093 1315678
+3158 1315478
+5156 1314587
+4314 1314066
+4341 1314003
+409 1313875
+5054 1312722
+4326 1312569
+3327 1311762
+4587 1310546
+7994 1310530
+3436 1310399
+1634 1309798
+4787 1308208
+3842 1307129
+3336 1303877
+1453 1303867
+527 1303388
+3819 1302492
+4870 1302035
+1711 1301722
+4736 1301545
+3280 1301512
+485 1300709
+3938 1300533
+988 1299815
+1023 1299461
+5197 1298056
+3691 1298022
+3785 1297957
+3265 1296350
+2398 1296348
+6729 1296290
+5033 1295463
+3409 1294973
+5913 1291479
+4654 1290537
+7630 1288623
+3709 1286675
+6376 1286193
+3781 1285807
+7756 1285579
+10171 1285322
+2818 1284996
+1014 1282901
+4097 1282603
+4813 1282045
+3038 1281458
+5341 1281418
+420 1281046
+4955 1280803
+8836 1279703
+1731 1278908
+3636 1278857
+3392 1278431
+7392 1277991
+3194 1276594
+4446 1276547
+4540 1275393
+4099 1274105
+4788 1273253
+4167 1272413
+459 1272249
+1948 1271480
+1640 1270835
+7176 1270571
+4429 1269217
+4042 1269107
+2740 1268901
+5866 1268234
+4737 1267826
+6299 1267725
+8150 1266632
+3099 1265853
+19398 1265684
+1878 1265115
+4893 1265091
+2318 1263652
+5627 1263447
+8909 1263276
+5174 1263166
+2829 1261919
+4688 1261278
+6289 1261136
+4765 1258870
+4525 1258646
+4940 1258077
+433 1257886
+4675 1257299
+3950 1257106
+4334 1255854
+4371 1255469
+4478 1255023
+6459 1254251
+3958 1253900
+5364 1253811
+2327 1253249
+119 1252957
+3264 1252530
+3588 1252368
+1624 1252185
+4885 1251484
+521 1251476
+3607 1251163
+7420 1251114
+3572 1250281
+536 1250114
+6325 1249281
+5140 1249176
+5284 1248712
+1828 1247137
+4978 1245733
+4708 1244437
+6825 1243796
+3968 1241799
+6280 1240647
+6835 1240422
+12052 1240210
+6023 1236586
+1283 1236065
+5804 1235634
+4762 1234227
+2095 1233277
+2447 1232432
+445 1231550
+3734 1231293
+3161 1231023
+3612 1230155
+2638 1229977
+1343 1228431
+6516 1227971
+5137 1227542
+7014 1227326
+6638 1226525
+2275 1225586
+5670 1225366
+15862 1224702
+5426 1222773
+1338 1222222
+4569 1222157
+12499 1221455
+4043 1221352
+2043 1221192
+2581 1221146
+622 1220070
+1547 1219481
+1096 1219401
+5461 1219355
+3657 1219079
+4966 1218169
+1137 1218103
+5366 1217930
+2866 1216988
+3356 1216655
+4641 1215786
+5163 1214894
+1656 1214167
+3825 1213380
+443 1212646
+3863 1212582
+6078 1212500
+2426 1212420
+425 1212037
+647 1211602
+7320 1211287
+1086 1211104
+3568 1210892
+4621 1210058
+5741 1209984
+4763 1209467
+4698 1209089
+2948 1208943
+2461 1208205
+1714 1207871
+7372 1205825
+5689 1205245
+4533 1205189
+3084 1205002
+5112 1204702
+3148 1204416
+4205 1203481
+3933 1202908
+11302 1202825
+7968 1201690
+4410 1200581
+6491 1199580
+6884 1199477
+9027 1199088
+6476 1197258
+2402 1196830
+3272 1196577
+4946 1196366
+1847 1195644
+2068 1195640
+2230 1194751
+5668 1194054
+6841 1193691
+6628 1192538
+5617 1192353
+3704 1192118
+454 1190907
+4113 1190819
+3360 1188180
+3595 1188055
+6388 1186942
+3024 1186481
+5676 1186075
+2616 1185977
+5076 1185573
+6997 1185148
+1573 1184210
+2502 1184197
+3956 1183015
+3306 1182352
+4191 1182121
+2907 1181996
+3758 1181687
+2933 1181593
+12820 1181273
+3505 1181150
+2481 1181067
+4258 1180761
+5339 1179754
+4497 1178907
+4705 1178786
+8424 1178672
+6989 1178672
+7225 1178011
+519 1177463
+5613 1177032
+837 1176942
+4203 1176560
+2736 1176366
+7067 1176283
+726 1175669
+2926 1175583
+5839 1175102
+4028 1175019
+6898 1174845
+46444 1173449
+3518 1172575
+2176 1172215
+3344 1171044
+6682 1170042
+4286 1170039
+4075 1168934
+3925 1168605
+3516 1168434
+7915 1167772
+6314 1167088
+1666 1166890
+6586 1165644
+3896 1164453
+1205 1163886
+4315 1163570
+6717 1163432
+4048 1163167
+6301 1162084
+7281 1161824
+3589 1161761
+1045 1161449
+3335 1160825
+3840 1160664
+4406 1160456
+4330 1160159
+1340 1159444
+871 1158633
+5995 1158628
+3722 1158586
+4618 1158428
+5010 1157604
+4564 1157265
+3190 1156679
+4369 1156412
+4047 1154902
+6342 1154157
+5707 1154075
+4393 1153892
+3355 1153701
+1413 1152770
+6542 1152768
+6447 1152716
+7366 1152389
+5382 1152114
+4683 1151509
+2311 1150108
+2328 1149589
+5179 1149446
+3496 1149070
+4206 1148604
+3748 1146891
+4425 1146252
+5311 1145728
+6702 1145147
+4398 1144278
+3952 1144184
+4795 1143514
+4900 1141559
+6858 1140893
+3348 1140558
+4166 1140194
+7396 1138944
+1277 1138668
+5001 1137748
+3446 1137430
+10662 1136646
+4858 1136361
+3091 1136277
+8783 1135186
+2605 1135155
+3809 1134429
+3251 1134183
+6983 1133029
+4673 1132793
+6025 1131898
+473 1131860
+1582 1131777
+8900 1131533
+5942 1131397
+5448 1131115
+5845 1131004
+3665 1130444
+4153 1129425
+3580 1129156
+6612 1129095
+5394 1129005
+30494 1128381
+421 1128217
+2883 1128149
+6922 1128102
+7044 1128058
+3788 1126787
+591 1126267
+4067 1123860
+4632 1123334
+5871 1122772
+3653 1121714
+6265 1121635
+2061 1121351
+1709 1121162
+3648 1120782
+7172 1120292
+2112 1120153
+2566 1119761
+4165 1119543
+2039 1118103
+4077 1117991
+5213 1117193
+2939 1116899
+9952 1116227
+11214 1115457
+6294 1115417
+6182 1114988
+1236 1114768
+4676 1113847
+14018 1113731
+5818 1113054
+6568 1112793
+3649 1112551
+7945 1111652
+4290 1111506
+11063 1111148
+7478 1110529
+5664 1110127
+2561 1108755
+11419 1108318
+599 1108264
+7055 1107776
+5025 1107245
+4151 1106800
+6699 1106003
+3660 1105998
+5007 1105989
+4753 1105892
+4122 1103755
+3951 1103085
+4819 1103013
+7541 1102865
+6308 1101995
+5081 1101551
+3827 1101551
+6348 1101534
+7025 1101049
+5520 1100898
+4713 1100782
+8406 1099966
+5257 1099287
+3592 1099115
+9689 1098471
+3497 1097861
+4430 1097362
+9912 1097102
+8153 1097019
+6108 1096435
+6154 1096150
+5502 1094500
+7425 1094293
+7127 1093101
+3283 1093034
+4624 1092815
+7000 1092519
+6241 1091967
+17560 1091497
+512 1090181
+5403 1090022
+6081 1088193
+3359 1088146
+3221 1087980
+7082 1087741
+3290 1087438
+1921 1086770
+10169 1085547
+3717 1084848
+4963 1084155
+3487 1083889
+4512 1083842
+781 1083714
+3402 1082905
+1717 1082724
+3297 1082684
+7910 1082114
+6934 1081913
+4914 1080997
+5504 1080481
+7415 1079818
+4441 1079514
+4928 1079417
+1323 1078936
+5764 1078723
+13598 1078487
+5205 1078470
+4747 1078344
+9068 1077920
+18015 1077721
+7008 1077624
+9502 1077051
+3919 1076785
+7939 1076621
+4461 1076561
+6355 1076297
+3026 1074645
+6046 1074406
+453 1073993
+2128 1073157
+3594 1073046
+2149 1072456
+13520 1072257
+5290 1071604
+5675 1070845
+4050 1070287
+5451 1069447
+6643 1069445
+4336 1069141
+431 1068235
+5389 1067211
+1514 1066254
+3424 1065502
+1906 1065274
+5581 1064940
+7504 1064485
+3578 1064180
+3666 1063907
+3744 1063473
+590 1063051
+5370 1062933
+7072 1062318
+9256 1062317
+833 1062017
+5236 1061829
+16462 1060612
+5291 1059433
+4201 1059425
+3777 1059131
+861 1058768
+2356 1058460
+562 1058081
+4831 1056793
+1362 1056283
+11435 1056101
+5637 1055964
+6079 1055924
+495 1055621
+3463 1055372
+1870 1055004
+3597 1054912
+6191 1054492
+4302 1054350
+672 1054156
+6332 1054152
+5538 1054038
+3774 1053925
+4678 1053559
+5852 1052549
+6698 1052045
+694 1052010
+4642 1051966
+2208 1051338
+5252 1050757
+1619 1050376
+4001 1050365
+3970 1050229
+5316 1049898
+6076 1049745
+5170 1049627
+4633 1049406
+6219 1048935
+3318 1048824
+5087 1048697
+5895 1048672
+4499 1048514
+3776 1048447
+5386 1048337
+5203 1047480
+5293 1047160
+4171 1047152
+3159 1047020
+6150 1046461
+8009 1045545
+1268 1044664
+4953 1044482
+5176 1044260
+6272 1043933
+5349 1043837
+7728 1043680
+3953 1043554
+3197 1042977
+6116 1041224
+8886 1040488
+6343 1040295
+5086 1039785
+958 1039594
+831 1039207
+1317 1038938
+893 1037936
+5597 1037832
+9621 1037628
+538 1037073
+501 1036988
+7908 1036807
+6110 1036682
+1899 1036322
+8511 1035818
+5954 1035279
+96 1035250
+6853 1034370
+3350 1033703
+4610 1033561
+2951 1033423
+7124 1033278
+5693 1032803
+7476 1032681
+6288 1031061
+4496 1030906
+4538 1030856
+4771 1029333
+2075 1028886
+2113 1028695
+10390 1028255
+7779 1027567
+507 1027336
+3406 1027246
+7517 1026986
+5395 1026926
+387 1025854
+3252 1025192
+4570 1024225
+4261 1024017
+8092 1023604
+4964 1023023
+2013 1022989
+3918 1022342
+4599 1021950
+5762 1021750
+6995 1021687
+4291 1020561
+5750 1020427
+3586 1020053
+6035 1019853
+4104 1019777
+7802 1018633
+5587 1018578
+1424 1018565
+2344 1017560
+4609 1017207
+4860 1016525
+4365 1016357
+3511 1016197
+4905 1015998
+4025 1015981
+3347 1015669
+7028 1015628
+2078 1015524
+1983 1015352
+3031 1015337
+979 1015312
+9072 1014743
+5122 1014687
+3891 1014111
+5474 1014058
+7880 1013953
+3210 1013783
+568 1013436
+5220 1013406
+4434 1012403
+4697 1011550
+5362 1011498
+3737 1011380
+756 1011159
+5136 1010585
+5242 1010457
+5110 1010415
+5523 1010390
+4908 1010100
+7534 1010030
+3387 1008087
+8063 1007595
+3910 1007470
+701 1007115
+600 1005923
+2425 1005712
+2713 1005668
+4792 1005214
+3806 1004967
+4190 1004836
+798 1004024
+3947 1003824
+1734 1003335
+774 1002962
+49430 1002589
+5859 1001740
+5704 1001278
+4238 1001170
+918 1001111
+4553 1000558
+5031 1000426
+8437 999699
+2246 999417
+4084 998042
+4197 998026
+8545 997343
+9611 996559
+4887 996285
+7865 995676
+4692 995619
+6961 995585
+1404 995563
+4423 995201
+8108 995057
+3450 994841
+4849 994834
+4220 994662
+889 994403
+6914 993625
+3621 993134
+1039 992744
+5938 992199
+715 992013
+1689 991685
+5281 991483
+1482 990745
+4372 990263
+7964 989765
+10123 989614
+3854 989508
+782 988371
+4800 988139
+4439 988074
+7395 987559
+4325 987452
+2448 986794
+1681 986285
+5882 986221
+5891 984558
+6630 984477
+3555 984317
+5733 983370
+6613 982970
+455 982830
+4725 982772
+7943 981571
+2885 981392
+5419 980765
+7374 979270
+4917 978913
+3561 978800
+1078 978520
+4162 978296
+9669 978242
+2815 978236
+1680 978235
+8693 978157
+498 977781
+4422 977129
+2167 976132
+2390 975986
+4931 975803
+4635 975620
+5922 973722
+5115 973595
+4384 973528
+7927 973338
+8031 973073
+424 972909
+896 972209
+1352 972098
+4974 971531
+3716 971410
+5545 971211
+4837 970688
+3105 970602
+3972 969545
+9388 969002
+6151 968990
+5380 968941
+5798 968828
+6848 968442
+5062 968256
+8136 968147
+7291 967583
+3087 967418
+1962 966850
+3894 966452
+10191 966175
+2051 965356
+913 964915
+8047 964868
+4458 964367
+5300 964311
+6994 964187
+3725 962770
+7529 962681
+8121 962545
+5526 961651
+6033 961565
+6466 961557
+4150 961454
+1650 961250
+486 961032
+491 960800
+5745 960709
+8878 959982
+5334 959692
+5045 959526
+7903 959023
+6057 958754
+5749 958063
+8087 958003
+6264 957693
+4004 957171
+9413 956628
+8611 956473
+627 955912
+1219 955395
+3499 954203
+10610 954183
+776 954130
+3404 953636
+7593 952789
+3713 951708
+4694 951700
+5486 951539
+9089 950646
+2389 950463
+10499 950298
+5788 950061
+6926 948694
+4518 948669
+7864 948572
+748 948224
+7606 947933
+1010 947597
+8059 945880
+4571 944428
+16267 944143
+6705 944128
+8674 943230
+4923 942533
+2421 942380
+5556 941699
+4735 941386
+5270 940960
+4202 940146
+5335 937988
+1890 937973
+6088 937822
+4816 937750
+4588 937427
+7452 937286
+1658 937147
+5533 936928
+1616 936623
+7712 935685
+3108 935633
+6205 935469
+7901 935057
+4950 934554
+3619 934220
+419 933976
+3478 933536
+5262 932866
+945 932541
+18840 931723
+5009 930935
+21138 930513
+1772 929479
+4755 929010
+5941 928630
+7133 927505
+6977 927246
+4833 926967
+7018 926786
+6403 926666
+6497 926492
+10247 926122
+6149 925968
+1446 925939
+25370 925871
+5688 925496
+10 925332
+8123 923747
+5989 923482
+1503 922356
+4637 922330
+4634 921904
+12385 921506
+4631 921152
+7628 921120
+413 920921
+7351 919431
+5975 919370
+5091 919110
+7297 918894
+5149 918737
+10330 918368
+12131 918249
+6621 918125
+7403 918086
+7052 917508
+369 917495
+3886 916915
+5125 916460
+2366 916128
+9116 915995
+7137 915749
+4998 915052
+5929 914971
+5585 914915
+3538 914319
+26442 913487
+4710 913429
+7253 913428
+3303 913013
+4457 912882
+40026 912761
+3914 910594
+6793 910482
+6596 910007
+4947 909361
+3122 908576
+7746 908460
+6510 908043
+2890 908025
+2420 907987
+5699 907839
+5402 907697
+8473 907013
+6128 906605
+4451 906397
+2934 906049
+12874 905857
+1677 905839
+5228 904072
+6103 904029
+3835 903727
+4855 903439
+4750 902769
+332 902136
+9475 902096
+9005 901510
+4916 900786
+6464 899911
+3853 899655
+3268 899638
+8602 899334
+2665 899101
+6402 898959
+2744 898743
+4601 898430
+7062 898339
+8785 897821
+567 897616
+10391 897375
+7725 896852
+15320 896741
+5423 896175
+4490 895533
+6416 895290
+5713 895254
+10501 895030
+1432 894640
+9077 894615
+6483 894614
+5586 894579
+4622 894347
+20877 893427
+2079 893154
+623 893020
+4991 892646
+5690 892425
+10618 890849
+12184 890553
+8244 890099
+1780 889484
+1313 889246
+106 888842
+795 888161
+3638 887959
+4353 887887
+10542 887847
+5409 887321
+7546 887173
+2396 886955
+4347 886599
+850 886537
+629 886491
+1128 886222
+6119 886169
+6727 886128
+4236 886028
+3281 885374
+7466 885097
+873 884923
+10830 884441
+4894 884043
+3878 884013
+7389 883818
+2347 883556
+7848 883275
+5223 883182
+3900 883053
+3783 883041
+5982 882775
+5198 882436
+1736 881949
+2393 881816
+2395 881208
+6358 881038
+7683 880486
+8342 880373
+8078 879936
+10371 879935
+8761 879473
+7356 879394
+9604 879106
+6960 878827
+2624 878148
+4684 877919
+3189 877629
+2547 877507
+4920 876005
+5860 875894
+6133 875719
+5230 875515
+3539 875493
+1959 875161
+8292 875055
+6509 874958
+1012 874561
+7611 874327
+6943 874065
+7557 872959
+3977 872862
+4960 872487
+11529 870460
+3632 869034
+2474 868380
+3278 868202
+7261 868159
+5182 867690
+6196 867596
+5322 867298
+5438 867126
+5214 866142
+5836 865387
+7595 864999
+489 864993
+2199 864291
+5017 864182
+3802 864008
+6849 863857
+4929 863148
+5898 862499
+6441 862493
+7118 861461
+12551 861355
+8488 861121
+9141 861100
+1381 860833
+7533 860823
+7458 860382
+10827 860061
+1451 859861
+4615 859080
+4764 859011
+8111 858777
+4324 858591
+7647 858536
+3223 858325
+5055 857740
+4296 857441
+5797 857174
+4351 856996
+5407 856847
+3544 856775
+1795 856495
+1665 856106
+5495 855994
+4257 855834
+8050 855330
+10109 854644
+7022 854557
+7920 854365
+7799 854340
+5608 854336
+5385 854118
+3046 854067
+7194 852548
+16964 852383
+7651 852355
+12379 852095
+4213 851873
+7456 851550
+11695 851095
+7401 851086
+7412 850942
+1586 850652
+5143 850210
+902 849624
+5952 849584
+6200 849045
+11643 848152
+5071 847489
+458 847417
+4547 847319
+6907 847057
+7705 846873
+1341 846615
+10021 846038
+1902 845644
+5479 845479
+14897 844971
+2188 844313
+5455 844072
+4459 843875
+4719 843858
+2959 843654
+595 843545
+3132 843221
+5924 843160
+1232 842710
+10306 842643
+574 842221
+5287 841971
+10728 841811
+441 841635
+7794 841423
+5609 841175
+5867 841140
+9366 840353
+8403 840105
+7585 840023
+6405 840017
+6875 839897
+2834 839591
+4179 839528
+12011 839205
+7433 839201
+4292 839187
+7492 839060
+3851 838918
+747 838741
+4785 838458
+1620 837460
+1710 837330
+4343 836110
+4216 836033
+8185 835422
+4577 835155
+5529 834254
+1475 834211
+8165 834201
+7627 834128
+2836 834105
+5708 834100
+11383 834017
+9138 833943
+9008 832798
+2124 832447
+4168 832285
+6802 831767
+4952 831698
+6716 831649
+7806 831440
+4333 830985
+5295 830546
+4036 830203
+5716 830154
+5615 830060
+3644 829493
+3608 829215
+2238 829204
+1345 828544
+3048 828260
+6142 827480
+10805 827295
+5202 826828
+4620 826416
+7459 825942
+4856 825116
+5811 824590
+4232 824275
+4362 824215
+2937 823809
+8502 823728
+5358 823668
+6155 823643
+8974 823355
+9266 821977
+6215 821613
+573 821459
+4053 820973
+7017 820798
+5911 819328
+3917 819177
+4643 819129
+7868 819089
+9003 818965
+4146 818654
+7463 818425
+6175 818176
+6669 818066
+8366 817761
+6594 817656
+2853 817307
+118 817287
+5264 817284
+7421 817216
+8211 817116
+9692 817105
+6305 816562
+7973 816504
+8810 815781
+7269 815760
+2682 815662
+12516 815187
+7524 814872
+2873 814870
+2154 814832
+2949 814350
+6318 813767
+7244 813673
+497 813572
+10413 813503
+698 813483
+7859 813085
+5043 812590
+4981 812572
+4922 812555
+5004 812273
+8127 812230
+8414 811932
+6240 811897
+12147 811787
+3616 811551
+5776 811486
+9870 811062
+7740 810523
+2091 810014
+2792 809891
+3205 809207
+6622 808813
+5849 808800
+8200 808021
+585 807549
+9470 807304
+9818 807238
+7083 806891
+4082 806605
+4975 806503
+2316 806460
+3551 805935
+6493 805854
+2816 805436
+5350 804945
+4003 804496
+1706 804386
+4695 804360
+8221 804223
+3211 803968
+8708 803949
+5155 803913
+4988 803565
+4874 803237
+10106 802720
+3232 802701
+7732 802397
+5326 802066
+6427 801361
+5631 801279
+6740 800879
+6100 800742
+401 800726
+4803 800624
+6363 800619
+5727 800412
+1127 800086
+9298 800083
+4996 800014
+4106 799519
+487 799506
+4131 799341
+5610 799162
+7121 798834
+6115 798667
+6481 798460
+4829 798303
+3881 797726
+962 797583
+6642 796913
+7139 796371
+4263 795916
+3872 794949
+11957 794566
+4071 794495
+8533 794334
+8919 793766
+8639 793455
+7516 792793
+5873 792586
+6276 792041
+2034 791816
+8796 791720
+5353 791490
+7103 791434
+2548 791227
+8329 791148
+9605 791129
+9953 790868
+8372 790696
+589 790467
+8144 789428
+5221 789411
+7012 789054
+6520 788825
+7831 788498
+844 788331
+1623 788130
+5696 787166
+4645 787122
+7986 787107
+5827 787081
+5445 787003
+5243 786478
+5046 786472
+6330 786159
+5014 786066
+6379 786013
+10636 785808
+917 785162
+3124 784947
+2640 784189
+9725 783553
+9253 783425
+8049 783310
+6538 782938
+5638 782700
+8632 782584
+6807 782551
+721 782040
+16059 781941
+6333 781716
+6074 781638
+5565 781181
+1765 780911
+5810 780630
+3714 780526
+954 780449
+5778 780376
+5789 779996
+1068 779555
+3363 779048
+5593 778906
+3640 778657
+10240 778494
+4289 778292
+5114 778205
+987 778126
+46640 777912
+7413 777318
+7317 776881
+9 776842
+11115 776693
+5021 776583
+5381 776513
+6563 776178
+3753 776103
+2264 776075
+6905 775817
+6936 775626
+6050 775178
+5752 775047
+7064 774442
+4471 774370
+2007 774332
+7099 774232
+10876 773951
+7078 773426
+3963 773066
+6619 772959
+6888 772667
+8536 772525
+6670 772421
+3829 772119
+6198 771881
+14015 770622
+7002 770508
+1018 770301
+4174 770174
+4701 770100
+8395 769730
+2599 769416
+7040 769323
+5612 769037
+7970 769031
+2438 768787
+10749 768289
+8982 768073
+5161 768005
+2453 767894
+7171 767851
+3815 766903
+3996 766842
+5850 766046
+6228 765987
+4899 765434
+7271 765147
+2718 765137
+6235 765125
+8407 765069
+862 765005
+5906 764849
+1187 764634
+8636 764295
+5814 764221
+4096 764076
+1142 764058
+8620 764028
+19809 763246
+6165 763231
+7840 762640
+6614 762319
+11079 761718
+13308 761408
+10799 761174
+1192 761045
+6774 760963
+3683 760869
+2394 760818
+6482 760799
+7823 760780
+1778 760726
+4462 760542
+6131 760457
+6253 760190
+6070 760142
+9436 760093
+5885 759875
+2449 759865
+1556 759355
+2598 759301
+9648 759298
+6692 759148
+8587 759113
+9180 758708
+4889 758705
+6296 758275
+10614 758002
+6623 757773
+1516 757483
+5983 757289
+5963 757079
+9371 756527
+9839 756521
+6190 756459
+457 756319
+7423 756132
+20635 756084
+2451 756028
+8936 755576
+5517 755366
+3001 754873
+2689 754598
+7797 754588
+7411 754253
+5433 753725
+6260 753428
+11618 752903
+7519 752784
+6189 752766
+1931 752571
+11514 752163
+5441 751927
+11356 751848
+6401 751735
+1324 751409
+2339 751243
+5417 750911
+735 750739
+5022 750547
+3908 750126
+3765 750060
+9461 749906
+8282 749744
+778 748051
+3873 747968
+7212 747911
+13423 747877
+6712 746778
+5940 746619
+593 745833
+12787 745585
+8024 744764
+7874 744587
+6204 744070
+5761 743492
+408 743484
+6490 743166
+7800 742528
+5879 742339
+6266 742335
+4320 741518
+2623 741505
+5363 740788
+1017 740736
+7288 740643
+9019 740546
+6870 740253
+6693 740189
+4329 739395
+7404 739189
+4093 738959
+9825 738518
+8857 737741
+10648 737228
+5318 737201
+16175 736827
+6823 736756
+10807 735530
+3535 735395
+2892 735274
+12180 735097
+3023 734685
+5188 734061
+10006 733791
+14549 733714
+33721 733497
+5875 733377
+9087 733374
+5338 732693
+5863 732401
+12785 732355
+4845 732267
+5857 732242
+18293 731947
+4595 731814
+4222 731703
+9500 731638
+9283 731189
+8877 731155
+8470 731144
+6872 730530
+4188 730422
+6225 730269
+5108 730199
+7148 729814
+1581 729749
+10500 729418
+6832 729243
+2920 729192
+6656 728880
+2670 728538
+7312 728480
+4295 728371
+5462 728288
+6339 728140
+3862 727744
+9975 727410
+7188 727311
+5861 727283
+5387 726788
+11761 726787
+634 726665
+5118 726571
+8976 726201
+8838 725568
+744 725461
+342 725294
+7163 725203
+6709 725026
+1146 724457
+9920 724076
+5896 723335
+4781 722493
+4537 722266
+11171 722136
+4433 722080
+8415 722004
+1095 721984
+8036 721541
+8685 720983
+4934 720956
+8879 720855
+8998 719936
+10654 719841
+4760 719610
+6134 719284
+9918 719163
+7897 718765
+11754 718515
+7432 718161
+9951 718152
+5254 717916
+4158 717914
+5186 717844
+20832 717534
+10039 717274
+6626 717236
+1069 716980
+6593 716881
+6085 716690
+9671 716454
+9070 716415
+8663 716353
+10804 715896
+5706 715743
+5359 715340
+27868 715294
+6792 715292
+6741 715229
+9153 715036
+13129 715014
+5044 714833
+6647 714813
+6511 714437
+8100 714414
+13648 714321
+9735 714169
+10964 714150
+22971 713833
+6829 713647
+4249 713298
+5365 712933
+5732 712932
+4143 712823
+5996 712740
+8618 712707
+6073 712606
+5986 712473
+1030 712471
+1461 712265
+5992 712229
+2066 711631
+18841 711614
+9925 711387
+8259 711334
+7205 710271
+11117 710047
+4793 709820
+6478 709698
+3858 709005
+2996 708450
+6430 708208
+4007 707891
+768 707799
+6067 707724
+9320 707652
+1894 707449
+7431 707015
+1509 706866
+14207 706655
+5511 706560
+6553 706382
+4385 706268
+4925 706167
+8096 706036
+6143 705550
+6188 705375
+9764 705084
+13126 704724
+6554 704581
+7394 704389
+5288 704007
+7912 703955
+4483 703878
+8929 703551
+14466 702956
+9071 702931
+8518 702631
+12517 702625
+32290 702438
+8581 702251
+7962 702211
+5719 702082
+5916 701971
+2577 701821
+7328 701292
+3248 700210
+5743 699668
+5457 699547
+6126 699148
+2931 698934
+6323 698857
+7639 698407
+1494 698189
+5846 697622
+2685 697571
+7791 697392
+294 697012
+13100 696907
+5726 696441
+3200 696066
+5946 696036
+7464 695756
+6304 695647
+10504 695601
+7547 695511
+9880 695470
+4572 695347
+14237 695318
+7373 695309
+9477 695162
+6980 695128
+5806 694770
+605 694642
+2998 694435
+7895 694205
+4528 693789
+2749 693766
+5483 693676
+9566 693519
+2580 693316
+10174 693096
+8704 693080
+9909 692938
+21393 692900
+3993 692679
+12262 692396
+10464 692058
+8155 692022
+6817 691879
+2943 691761
+5645 691684
+6576 691662
+8540 691355
+8742 691341
+4116 691092
+5303 690849
+9570 690723
+2380 690285
+11394 690269
+14017 689530
+6809 689244
+6129 688667
+5369 688418
+4345 688195
+5274 688173
+239 687921
+4090 687761
+8072 687681
+5876 687636
+7195 687577
+5667 687373
+1950 686507
+2484 686452
+8179 686263
+6608 686261
+8688 686110
+6279 685683
+10729 685582
+7173 685513
+6687 685094
+6409 684986
+4469 684945
+10604 684560
+6135 684453
+8832 684446
+14708 684416
+1990 684197
+1082 684138
+5566 683609
+5559 683395
+4074 683378
+2256 683376
+2024 683346
+8073 683283
+897 683245
+6173 683136
+7734 682901
+8882 682762
+1855 682313
+5034 682278
+9852 681791
+7624 681768
+6673 681693
+6411 681081
+7364 681064
+4913 681060
+6031 680976
+7344 680890
+6827 680734
+12168 680648
+9591 680619
+7246 680607
+5644 679833
+4069 679756
+7387 679646
+7318 679542
+5456 679522
+1662 679367
+7886 679365
+7977 679346
+14403 679217
+7311 679157
+11947 679007
+7924 678757
+9755 678632
+2436 678453
+947 678119
+7884 678117
+5884 678031
+6662 677968
+7323 677870
+5543 677819
+2919 677816
+10405 677754
+9101 677581
+6093 677515
+7051 677389
+8224 677269
+7337 677160
+2295 677074
+1158 676751
+6844 676610
+5412 676453
+4306 676409
+7224 676369
+7846 676354
+11289 676149
+3237 676053
+4876 675868
+7259 675866
+9754 675784
+7256 675714
+7771 675679
+11534 675527
+8972 675521
+9761 675505
+876 675302
+4159 675287
+22940 674880
+10131 674860
+690 674447
+1904 674392
+1083 673897
+6032 673886
+5101 673784
+3738 672730
+3180 672440
+4391 672381
+10673 672248
+2857 672245
+2001 672175
+9651 671458
+5413 671453
+11133 671373
+8556 671203
+7522 671159
+4270 671110
+3682 670879
+21648 670847
+5933 670801
+11409 670739
+8362 670409
+11287 670223
+7719 670222
+3698 670219
+7016 670090
+613 669934
+5766 669916
+10093 669908
+249 669379
+5157 669251
+4935 668785
+18912 668708
+1589 668686
+6218 668678
+4221 668611
+1533 668559
+6461 668440
+6666 668008
+9679 667710
+5301 667687
+5920 667645
+6041 667255
+10026 667109
+537 666906
+4455 666658
+5787 666453
+6157 666445
+5207 666201
+17846 665807
+8305 665802
+3992 665716
+8592 665686
+2953 665632
+7482 664859
+7485 664609
+3904 664497
+1325 664191
+3955 663800
+3582 663747
+6232 663677
+5858 663380
+6675 663306
+603 663284
+3510 662971
+3198 662873
+6868 661915
+3398 661810
+2662 661713
+5548 661134
+3254 660739
+3435 660236
+3208 660141
+7602 660107
+2944 660028
+6140 660004
+9084 659896
+22346 659867
+8213 659526
+8359 659308
+8987 659019
+4252 658938
+7283 658854
+3188 658518
+11807 658016
+9906 657789
+4088 657734
+7796 657606
+1092 657505
+1431 657491
+9119 657490
+4751 657349
+5539 656930
+8672 656751
+7486 656405
+746 656119
+2705 656036
+6056 655720
+1995 655638
+8589 655599
+10397 655550
+6725 655530
+10242 655020
+7276 654911
+9074 654748
+38913 654696
+7898 654624
+1740 654207
+4327 654145
+7652 653660
+381 653525
+4061 653521
+1671 653489
+1530 653176
+7189 653134
+13430 652994
+4339 652906
+6049 652838
+37707 652638
+825 652479
+7346 652418
+2780 652110
+3113 651930
+5901 651795
+14024 651686
+6555 651683
+9659 651412
+11154 651044
+4836 650886
+10016 650673
+5887 650661
+9118 650642
+7832 650470
+5564 650455
+7334 650225
+10737 650100
+5292 649726
+2984 649605
+5428 649179
+9280 648905
+6952 648709
+920 648620
+19360 648453
+31215 648415
+5894 648264
+5390 648185
+8607 648182
+7030 647962
+5443 647898
+13785 647830
+5603 647672
+5278 647399
+127 647382
+2538 647133
+5330 646394
+7204 645847
+5488 645597
+6082 645495
+9025 645143
+13119 645117
+12673 645054
+5870 644976
+9439 644729
+6531 644334
+5524 643753
+8308 643717
+2049 643595
+3169 643406
+1102 643192
+1416 643014
+4556 642555
+6801 642064
+1452 641982
+4225 641671
+4961 641591
+664 641444
+9899 641281
+7765 641155
+3668 640797
+577 640764
+8345 640747
+5795 640537
+8411 640494
+5621 640471
+6370 640152
+9486 639933
+13089 639843
+5187 639596
+6665 639411
+8335 639410
+8475 639341
+3705 639284
+5302 638828
+9716 638798
+4361 638678
+2047 638657
+10219 638643
+310 638361
+6891 637744
+7011 637600
+3559 637533
+6559 637504
+687 637481
+10071 637167
+5035 637079
+3811 637040
+6292 636820
+7325 636136
+5679 635912
+9992 635639
+5509 635306
+3152 635231
+12074 635104
+9957 634959
+6655 634887
+9016 634863
+7410 634829
+696 634729
+7960 634441
+3876 634388
+2596 633768
+10013 633760
+6163 633090
+7849 632916
+5506 632595
+4727 632496
+2667 632345
+6768 632173
+6616 632141
+11257 632026
+10068 631604
+7123 631579
+7542 631579
+7023 631466
+7720 631250
+6796 631244
+3070 630513
+3256 630499
+2767 630481
+9974 630365
+692 630358
+5549 630267
+6896 630202
+9002 630065
+6923 629920
+15811 629850
+9738 629838
+2891 629691
+4231 629583
+8606 629251
+11823 629198
+9853 628987
+7027 628837
+3991 628805
+2190 628682
+6862 628532
+4939 627964
+14914 627859
+2902 627520
+11615 627385
+4904 627359
+10650 627124
+5279 626755
+7777 626600
+799 626519
+6183 626487
+15113 626346
+1229 626246
+12224 625949
+36826 625284
+2501 625099
+6000 624750
+9326 624573
+8978 624449
+10101 624337
+2514 624302
+11660 624213
+8318 624036
+4562 623978
+1939 623886
+1067 623705
+7981 623554
+8590 623553
+7150 623194
+7216 623063
+6640 623002
+2127 622758
+8237 622309
+4951 622270
+9885 621960
+7830 621930
+6877 621661
+5485 621547
+854 621467
+3342 621167
+5503 621125
+3745 620985
+668 620572
+1850 620506
+7342 620486
+8082 620245
+565 620179
+429 620017
+9739 619828
+4449 619512
+8737 619500
+3182 619289
+10714 618874
+3332 618818
+3901 618659
+3154 618496
+7272 618484
+7953 618134
+5734 617900
+6065 617790
+9847 617491
+5075 617361
+2257 617024
+3865 616847
+4321 616792
+7029 616539
+11897 616236
+7835 616232
+10433 615972
+9617 615869
+5490 615816
+848 615766
+10357 615359
+7810 614764
+8666 614423
+2414 614200
+5501 614089
+6838 613977
+3791 613640
+1505 613292
+11821 612953
+9024 612648
+6298 612576
+8037 612282
+9392 612232
+7444 612155
+11162 612106
+8667 611893
+859 611851
+5514 611662
+7184 611457
+6633 611425
+7625 611246
+6029 611220
+7703 611085
+1167 610976
+1112 610958
+5405 610796
+6097 610555
+5655 610530
+3834 610027
+7077 610008
+8141 609961
+11070 609752
+126 609368
+7697 608871
+3661 608867
+6337 608742
+1653 608691
+8270 608543
+3563 608510
+1173 608225
+8277 608152
+1602 608008
+5527 607857
+8148 607798
+9307 607780
+6087 607464
+3506 607304
+19195 606852
+4962 606712
+5160 606653
+6885 606329
+1522 606222
+4144 606105
+9375 605871
+23997 605838
+7215 605645
+7136 605240
+7696 605204
+5439 604950
+7867 604769
+4417 604529
+6979 604164
+594 603974
+9389 603879
+9730 603658
+11826 603599
+15007 603523
+450 603404
+7784 603393
+12858 603312
+9763 603136
+2560 602954
+8780 602780
+1015 602661
+7219 602594
+11344 602587
+5729 602572
+6550 602536
+5475 602409
+2493 602094
+2305 602075
+10481 601222
+6467 601095
+15855 601040
+11226 600883
+9406 600845
+12104 600838
+3134 600815
+11223 600279
+592 600247
+10273 600115
+10955 599932
+11523 599822
+10329 599458
+6591 599380
+2631 599358
+6536 598751
+9498 597864
+2584 597750
+942 597599
+3312 597138
+6932 596982
+10663 596904
+11582 596881
+1056 596523
+1380 596400
+7406 596161
+10856 595892
+4178 595769
+6918 595606
+7637 595338
+3262 595268
+7309 595160
+8378 595058
+5235 594694
+6373 594645
+7539 594423
+10318 594361
+8720 594360
+10490 594265
+6782 594145
+4627 594120
+4844 594063
+456 594019
+2202 593801
+10512 593750
+1610 593244
+9232 592972
+13837 592565
+6590 592522
+4944 592362
+7174 592237
+3270 591789
+5465 591294
+6958 591198
+7520 590915
+5966 590914
+6004 590867
+6230 590717
+7380 590682
+9273 590639
+7179 590581
+10252 590079
+11232 590075
+10018 589454
+4397 589171
+10553 588862
+11654 588414
+5265 588100
+10307 588072
+7049 587817
+7971 587507
+7370 587415
+2269 587239
+4827 587061
+2178 587040
+28154 586814
+7681 586670
+2777 586647
+704 586590
+2032 586160
+29240 586009
+4303 585944
+6833 585900
+13304 585788
+658 585509
+4259 584968
+11783 584697
+10010 584641
+436 584638
+4044 584624
+5072 584498
+11149 584320
+2999 583791
+6991 583506
+9359 583285
+10319 583073
+8218 583031
+745 582797
+13210 582777
+6147 582612
+8420 582572
+6275 582333
+9784 582289
+2946 582278
+6928 582062
+5016 581857
+5519 581711
+7525 581516
+4638 581509
+28749 581004
+5917 580810
+2367 580376
+9416 580198
+3110 579900
+3837 579873
+10731 579847
+9443 579795
+9279 579534
+12090 579424
+9533 579369
+9046 579350
+3101 579267
+4079 578683
+11763 578236
+15434 577988
+6356 577910
+3944 577882
+10711 577459
+8514 577435
+7441 577388
+524 577286
+34754 577196
+8694 577150
+6518 577063
+8248 577045
+4839 576905
+11343 576880
+8107 576869
+5584 576838
+1211 576832
+5116 576522
+9593 576137
+9512 575883
+5357 575872
+5981 575845
+4091 575497
+14591 575465
+9955 575415
+8090 575406
+6822 575321
+2306 575187
+6755 575130
+7691 575061
+8055 574976
+8215 574691
+7341 574574
+8083 574574
+11687 574439
+6650 574049
+9358 573897
+6834 573740
+3461 573724
+12508 573413
+7192 573384
+15110 573316
+1924 573275
+6496 573211
+794 573111
+5481 573030
+17674 572959
+8547 572682
+6632 572577
+882 572533
+8990 572530
+7043 571991
+2791 571520
+7668 571498
+8732 571494
+4051 571420
+7674 571339
+936 570961
+3721 570900
+3060 570854
+6731 570838
+6317 570838
+11852 570745
+676 570627
+6414 570552
+8848 570361
+3823 570118
+9264 569921
+6840 569894
+4360 569706
+6268 569701
+6776 569555
+7383 569475
+6551 569359
+15319 569196
+8076 569167
+7142 568628
+11416 568580
+5374 568324
+8740 568107
+1455 568100
+6044 567784
+7437 567697
+1443 567507
+6066 567414
+5865 567337
+13104 567326
+4506 567190
+8819 566704
+8528 566670
+6164 566437
+12988 566156
+7093 565967
+3549 565654
+4877 565544
+4312 565286
+6413 565089
+1140 565068
+8774 564935
+4554 564912
+2213 564896
+10808 564748
+5799 564629
+10556 564192
+2430 564111
+253 563468
+8465 563415
+17768 563337
+8962 563249
+3887 563038
+9640 562781
+907 562010
+6886 561513
+7502 561321
+3326 561303
+10607 561199
+480 561037
+1571 560925
+14987 560911
+3755 560808
+522 560722
+8326 560191
+8261 560155
+10043 560098
+451 559924
+8361 559912
+8028 559794
+9594 559778
+545 559407
+31681 559385
+6946 559306
+7108 559275
+2645 559165
+5635 558799
+8302 558731
+6588 558119
+7263 558055
+14935 557913
+9272 557817
+9589 557707
+11745 557593
+3395 557506
+16004 557248
+6861 557017
+8062 557002
+9349 556662
+4031 556646
+8245 556554
+1460 556326
+4579 556319
+5000 556167
+10105 556070
+5215 556063
+10061 556034
+8744 555748
+1435 555400
+8272 555325
+5964 555146
+12767 555106
+9073 554642
+10530 554555
+11663 554385
+3553 554378
+8198 554300
+7034 554151
+2978 553814
+120 553526
+6584 553151
+3861 553102
+1776 553022
+7748 552759
+462 552707
+2123 552637
+1667 552623
+2661 552550
+16009 552434
+9984 552378
+652 552358
+7786 552200
+16407 551865
+11849 551642
+24385 551605
+1357 551592
+9051 551540
+8384 551212
+16974 551193
+12690 551066
+6372 551013
+14100 550991
+22767 550854
+9181 550845
+3980 550594
+3763 550539
+10383 550504
+1501 550432
+9663 550249
+3454 550052
+10764 549946
+11408 549714
+13417 549671
+5238 549218
+4309 549159
+5598 549109
+7175 548618
+3365 548511
+11264 548307
+8759 548096
+1036 547793
+5575 547523
+6099 547457
+1726 547406
+9095 547200
+11016 546805
+6600 546767
+9117 546746
+4724 546694
+4535 546644
+19859 546641
+10085 546192
+11990 545927
+5616 545745
+11867 545469
+558 545369
+12750 545195
+11453 545040
+6007 545014
+4805 544806
+11536 544790
+8381 544636
+12007 544575
+1678 544421
+9354 544408
+5969 544383
+10120 544328
+2004 544322
+9336 544293
+11047 544173
+10656 544121
+9601 543949
+1133 543728
+2771 543554
+948 543517
+7264 543327
+5436 543269
+5253 543261
+2304 543194
+5464 543083
+9935 542754
+13289 542612
+3830 542534
+8826 542457
+12164 542022
+10432 541880
+9691 541784
+5472 541671
+5715 541589
+1799 541522
+8295 541394
+8390 541248
+7384 541184
+4450 541159
+4739 541155
+2945 541039
+9292 541013
+8985 540742
+14039 540629
+4224 540466
+10207 540401
+9687 540209
+14051 540174
+8015 540103
+6393 539847
+6208 539816
+8665 539668
+9176 539559
+2417 539392
+6529 539382
+6678 539231
+1806 538825
+10158 538815
+9308 538759
+9769 538636
+11798 538499
+2686 538256
+30756 538096
+12774 537997
+10202 537811
+20948 537713
+5984 537666
+6034 537532
+5269 537514
+12024 537300
+6016 537023
+3068 536803
+10653 536746
+4544 536720
+47383 536644
+6303 536574
+1670 536539
+9368 536507
+4711 536324
+6688 536289
+11374 536287
+5332 536164
+5415 535772
+686 535624
+12302 535597
+8034 535259
+7298 535243
+5027 534858
+4404 534801
+4993 534397
+4310 534235
+11474 534227
+9179 534109
+3013 533926
+4267 533674
+5947 533656
+6242 533613
+9657 533598
+6971 533518
+8683 533415
+891 533390
+9894 533388
+10278 533319
+1033 533218
+9432 533117
+10935 533048
+955 533045
+6930 532804
+6609 532747
+6216 532571
+863 532480
+10200 532443
+4973 532320
+3388 532276
+8512 532164
+8747 532078
+6754 531928
+1150 531903
+10629 531822
+9634 531776
+6851 531720
+4503 531705
+11520 531460
+2671 530938
+14536 530797
+2040 530459
+4522 530342
+3376 530119
+11366 530088
+8560 529987
+10183 529970
+3913 529729
+27572 529657
+9231 529501
+6006 529484
+12353 529322
+5582 529169
+10936 529025
+11596 528842
+13270 528815
+4778 528726
+1958 528689
+9965 528383
+7996 528093
+5256 527765
+7987 527727
+8724 527617
+12761 527594
+14205 527540
+30101 527445
+2376 527425
+11113 527174
+12108 526987
+7523 526948
+14861 526831
+6769 526760
+10251 526687
+5141 526686
+12056 526659
+11911 526627
+808 526618
+8829 526430
+15142 526406
+7932 526264
+21842 525942
+4269 525894
+11288 525883
+3703 525813
+11661 525338
+1423 525254
+5107 525204
+9038 525200
+7760 525179
+2675 525099
+7214 525047
+7997 524960
+3202 524802
+6611 524696
+1886 524345
+2200 524290
+11565 524211
+9505 524148
+10177 524099
+8391 523969
+4145 523792
+9309 523601
+1098 523601
+28589 523309
+16404 523284
+13663 523201
+11465 523027
+14943 522826
+8227 522727
+728 522579
+10315 522535
+11182 522489
+5711 522363
+9592 522137
+12901 522119
+6842 521979
+7584 521897
+3605 521789
+3459 521714
+7844 521599
+32790 521575
+9489 521286
+6270 521053
+2142 521051
+8203 521013
+5453 521007
+9836 520903
+38387 520845
+5800 520632
+2442 520625
+10763 520466
+5498 520396
+7588 520298
+8584 520208
+1834 520004
+8830 519633
+6440 519510
+3104 519421
+8519 519326
+12980 518862
+10266 518833
+8705 518717
+7507 518708
+3175 518680
+3489 518674
+10290 518362
+5268 518131
+4498 518124
+2845 518039
+7604 518016
+4871 517787
+5640 517709
+2018 517645
+9552 517437
+2025 517381
+7690 517300
+10674 517255
+1084 517164
+1774 516848
+9481 516274
+9558 516120
+11044 516115
+7543 516105
+8054 516100
+14637 516058
+5049 515800
+12989 515724
+6866 515494
+10665 515424
+5340 515194
+5698 515174
+7907 515166
+17329 515143
+4693 515073
+4349 514677
+610 514618
+4958 514534
+9658 514511
+7058 514162
+11279 514146
+5909 513815
+2403 513591
+4273 513532
+5948 513355
+3324 513090
+7622 512788
+4543 512710
+47013 512667
+15994 512605
+16020 512591
+6227 512519
+14167 512493
+122 512466
+6439 512393
+1787 512303
+4810 512061
+6970 511802
+7348 511766
+13419 511721
+3183 511719
+1697 511536
+1346 511242
+6639 511134
+5642 511062
+11372 511039
+7827 511016
+1360 510851
+7841 510829
+1694 510698
+9638 510663
+5967 510625
+6903 510571
+5425 510480
+5931 510406
+856 510365
+16213 510309
+9840 509866
+9114 509800
+6153 509717
+3503 509652
+7663 509651
+2915 509566
+6492 509531
+4859 509528
+3797 508985
+9869 508778
+7498 508579
+7090 508348
+2935 508256
+8061 508150
+5129 508134
+2850 507942
+10346 507712
+8171 507083
+11126 506565
+12313 506541
+8114 506538
+1925 506318
+7265 506203
+10308 506177
+2788 506150
+18355 506070
+6789 505868
+7428 505805
+7739 505672
+335 505540
+4437 505375
+1046 505338
+7505 505231
+1820 505189
+6646 505079
+4847 505035
+7723 504840
+1874 504583
+10769 504401
+6062 504379
+9393 504300
+3609 504287
+8022 504015
+8163 503963
+5662 503954
+4578 503944
+28808 503835
+9733 503821
+4419 503723
+12152 503716
+9584 503534
+7724 503491
+12139 503393
+1225 503219
+12451 503123
+2404 503095
+7161 503020
+38325 502957
+13064 502866
+13959 502619
+16685 502602
+8489 502599
+2801 502372
+8948 502353
+7744 502332
+11301 502329
+18317 502311
+6380 502208
+9554 502166
+10638 502156
+6350 502075
+14395 502028
+733 501896
+11255 501750
+8793 501681
+2704 501584
+7941 501562
+9142 501326
+8766 501173
+7042 501158
+9161 501143
+14316 501138
+1058 501137
+7567 500930
+17215 500926
+7722 500875
+7686 500769
+10009 500732
+8834 500182
+1267 500098
+5194 500074
+12433 499839
+10094 499728
+1153 499598
+10826 499365
+1951 499236
+6378 499083
+10309 498859
+14771 498759
+11563 498613
+7327 498161
+7069 498100
+12274 498050
+9188 497940
+3514 497889
+9859 497683
+4294 497668
+6634 497660
+8616 497651
+14456 497583
+12420 497494
+9262 497418
+1050 497098
+8849 497057
+9337 496804
+8698 496736
+5680 496519
+8714 496340
+6599 496156
+13922 496122
+12783 496080
+13956 496070
+9904 496047
+9469 495805
+9204 495517
+6856 495493
+4282 495458
+8331 495218
+7741 495080
+7876 495033
+13101 494968
+8868 494871
+10102 494813
+6537 494662
+6174 494574
+1006 494209
+11292 493769
+8138 493741
+7417 493306
+7819 493293
+4021 493280
+5069 493031
+1676 492861
+7618 492809
+619 492743
+7111 492488
+13098 492423
+5219 492395
+9549 492153
+1674 492046
+12644 491967
+5657 491932
+9238 491889
+1186 491713
+9247 491667
+6419 491609
+14080 491582
+12506 491546
+7850 491532
+8276 491506
+8319 491476
+7745 491425
+14389 491164
+11725 491143
+6948 491116
+8852 491069
+12557 491017
+2860 490863
+12438 490713
+11938 490511
+17154 490479
+12352 490449
+9209 490055
+6465 489970
+7362 489941
+9456 489939
+3808 489894
+10691 489840
+6118 489617
+3526 489349
+11717 488745
+13553 488681
+570 488430
+8846 488377
+10579 488145
+7667 488138
+9398 488058
+6252 488041
+10816 487804
+9902 487778
+6635 487683
+10986 487574
+12255 487515
+6772 487362
+10409 487277
+4035 487133
+6508 487039
+7578 486957
+38898 486923
+8209 486827
+2301 486497
+8350 486454
+8649 486382
+10226 486188
+7477 486080
+46195 486017
+4761 485991
+6572 485950
+104 485842
+10589 485787
+5368 485706
+11057 485511
+14379 485358
+22306 485315
+10064 485172
+9040 485169
+2439 484771
+1305 484754
+9301 484652
+6295 484648
+2093 484477
+7095 484430
+9465 484269
+8867 484258
+7164 484152
+30772 483866
+8104 483650
+2516 483348
+9559 483026
+4526 482958
+11345 482881
+7632 482880
+6086 482805
+13479 482529
+8018 482355
+4217 482302
+7147 482262
+10277 482204
+12534 482102
+9613 481874
+8922 481792
+7766 481713
+5908 481572
+3260 481497
+2806 481392
+3242 481272
+9103 481197
+6412 481109
+6374 480706
+5816 480672
+2649 480629
+18325 480570
+28244 480558
+4448 480540
+3257 480335
+6947 480290
+12217 480181
+7699 480013
+3433 479977
+9694 479829
+9014 479704
+14200 479703
+6282 479693
+6541 479643
+32009 479575
+8383 479249
+4598 479147
+10209 479146
+9403 479045
+6448 478902
+17692 478863
+7457 478667
+2433 478511
+724 478152
+13045 478149
+7634 478062
+12031 477941
+8565 477889
+6365 477835
+6122 477730
+1569 477700
+8239 477668
+581 477633
+864 477246
+7007 477223
+8971 477114
+10937 477035
+7487 476747
+10596 476592
+11840 476530
+8916 476449
+9276 476256
+8676 476237
+2163 476190
+875 476049
+12996 475823
+4531 475813
+11467 475808
+3771 475740
+5625 475421
+16987 475263
+10704 475232
+36430 474914
+4377 474829
+10503 474740
+10810 474711
+9759 474548
+13189 474527
+17944 474409
+4130 474295
+25650 474213
+2606 474053
+11448 473925
+6159 473703
+7552 473678
+956 473675
+2019 473515
+4304 473492
+13347 473405
+1299 473258
+8507 473017
+10930 472996
+6209 472990
+1590 472948
+6507 472885
+12155 472759
+10401 472745
+12087 472716
+6026 472567
+8847 472500
+5184 472211
+8286 472197
+9544 472194
+6105 472192
+11231 471976
+12696 471900
+9317 471819
+4625 471755
+9327 471505
+7473 470986
+2840 470738
+8197 470623
+14780 470281
+12946 470152
+3813 469939
+12382 469887
+5308 469869
+10382 469852
+17662 469530
+13509 469357
+5650 469084
+7274 468913
+3537 468876
+6512 468810
+8466 468776
+16693 468472
+4767 468256
+8461 468202
+13285 468151
+729 468066
+2737 467943
+6654 467872
+2543 467645
+539 467455
+7169 467379
+21003 467300
+7675 467213
+12815 467106
+14138 466738
+3166 466697
+5739 466627
+6177 466578
+1898 466078
+30251 465915
+7881 465902
+9772 465804
+13312 465503
+19646 465313
+5897 465263
+7560 465238
+8119 465232
+7151 465210
+15752 464964
+4476 464929
+3077 464826
+4941 464756
+10152 464493
+3255 464477
+7930 464418
+8561 464331
+8057 464290
+5073 464075
+13191 463887
+6498 463863
+7357 463530
+9815 463411
+13489 463148
+12369 463029
+6762 462915
+4482 462736
+2350 462731
+6506 462634
+2152 462592
+11622 462312
+8668 462185
+5878 462019
+2852 462006
+16145 461943
+6064 461812
+11772 461652
+3529 461506
+11617 461437
+13662 461340
+2922 461227
+21195 461196
+8733 461097
+8546 461014
+19299 460751
+9131 460711
+7863 460685
+4240 460580
+8889 460543
+25151 460407
+41615 460212
+6322 460155
+7914 460099
+15572 459295
+4580 459246
+12682 459131
+6899 458997
+9722 458947
+5951 458826
+2701 458808
+1512 458735
+10655 458723
+9674 458635
+7285 458614
+13864 458453
+7706 458443
+14463 458408
+11969 458352
+12022 458342
+8030 458206
+5718 458203
+9499 458191
+18091 457686
+10820 457682
+11153 457653
+17189 457496
+7397 457166
+15435 457165
+10404 457133
+1079 457130
+7186 456764
+11831 456653
+10633 456482
+870 456379
+10719 456339
+4089 456215
+10470 456203
+11729 456195
+13178 456192
+3768 456183
+14691 455976
+28 455976
+10976 455911
+4663 455897
+5249 455773
+8442 455449
+5282 455325
+3720 455145
+13356 455082
+11581 454949
+2381 454810
+601 454802
+903 454655
+10099 454543
+16483 454518
+10399 454507
+1252 454428
+11305 454423
+3246 454342
+12605 454177
+15247 453973
+3866 453730
+46481 453702
+4548 453681
+10325 453562
+11566 453362
+6287 453357
+9240 453334
+6244 453271
+4474 453245
+4524 453216
+16519 453141
+1897 453137
+3732 453109
+3885 453072
+15244 452997
+11434 452891
+11100 452784
+949 452773
+10747 452756
+8223 452719
+6125 452444
+8207 452351
+12053 452321
+2870 452256
+10303 452236
+8112 452129
+2044 452086
+11749 451878
+7009 451838
+8098 451813
+11233 451812
+8853 451678
+15302 451647
+7257 451557
+4851 451508
+24247 451387
+14651 451369
+5671 451350
+29690 451218
+5673 450985
+7429 450645
+14526 450579
+1489 450407
+18381 450182
+895 450118
+18279 450047
+7511 449889
+7564 449873
+2539 449697
+10423 449668
+10222 449447
+12427 449438
+8233 449184
+15446 449140
+13659 449125
+7852 449025
+4487 449014
+14486 449007
+9750 448958
+6889 448930
+11189 448906
+11769 448691
+9528 448621
+8339 448582
+7460 448524
+7480 448475
+6645 448428
+16167 448391
+5961 448383
+11468 448323
+10947 448316
+7448 448262
+10831 448231
+7743 448200
+11790 448123
+3864 448020
+9699 447968
+17741 447947
+8551 447942
+10997 447848
+631 447787
+8956 447706
+5831 447623
+741 447620
+10428 447512
+13310 447352
+1273 447226
+8084 447095
+5508 446729
+6951 446617
+6810 446454
+9252 446194
+1304 446147
+11628 446139
+12408 446021
+5600 445908
+3620 445780
+11102 445687
+1817 445635
+15260 445631
+4521 445604
+7891 445507
+9644 445478
+10540 445477
+7623 445150
+4869 445065
+15846 445057
+13965 444997
+6547 444996
+10497 444967
+7698 444957
+7262 444924
+21996 444912
+3565 444847
+7361 444827
+8601 444817
+1090 444803
+9519 444736
+4790 444625
+6867 444594
+17420 444560
+7491 444513
+8190 444458
+1513 444222
+10522 444039
+6674 444008
+7183 443956
+2088 443644
+9207 443623
+11320 443607
+10088 443581
+11136 443530
+4460 443497
+6042 443486
+7716 443421
+13854 443389
+5674 443287
+16581 443281
+9167 443098
+7165 442919
+4687 442895
+4866 442855
+10784 442654
+4732 442633
+865 442626
+5237 442517
+12652 442432
+10075 442326
+3525 442279
+3483 442207
+853 442188
+10834 442053
+10092 442033
+6544 442002
+9212 441858
+8776 441583
+12857 441523
+9588 441423
+13445 441268
+7301 441201
+9670 441165
+9208 441056
+9397 440989
+7393 440970
+30830 440747
+14959 440610
+6286 440540
+9157 440442
+10033 440162
+8313 440089
+7407 440088
+5078 439983
+10297 439973
+3585 439845
+11632 439807
+11088 439726
+8873 439603
+11905 439537
+7050 439349
+12771 439335
+10578 439236
+14006 439225
+9189 439199
+13943 439161
+8242 438925
+27737 438892
+14685 438862
+2841 438853
+8188 438775
+6060 438757
+10657 438692
+8468 438514
+6939 438377
+5224 438336
+3602 438299
+10029 437970
+7475 437917
+21675 437790
+11328 437753
+5258 437656
+793 437502
+2105 437344
+11461 437203
+18335 437071
+6366 437018
+7995 437015
+7506 436901
+10062 436677
+6846 436653
+10159 436578
+10848 436530
+10660 436510
+7369 436436
+7032 436299
+2446 436290
+5066 436196
+7974 435903
+8568 435896
+18936 435627
+10991 435619
+8386 435599
+5628 435553
+3975 435372
+796 435152
+7445 435125
+11307 434750
+4364 434663
+9667 434529
+9629 434332
+6706 434291
+8486 434203
+3981 434045
+8137 433968
+8526 433856
+11780 433721
+12803 433690
+9817 433586
+3020 433537
+10115 433505
+4903 433496
+16077 433356
+10275 433334
+11243 433238
+12088 433209
+9144 433171
+3695 433088
+7472 433029
+9319 432997
+8026 432869
+6697 432716
+9036 432689
+6505 432658
+4911 432291
+28227 432239
+4758 432224
+1845 432086
+10836 431880
+7021 431848
+8296 431812
+12328 431752
+1005 431721
+5923 431708
+13162 431573
+13097 431521
+32825 431122
+10913 431081
+9623 430972
+13682 430905
+25440 430798
+15016 430612
+2696 430394
+1587 430367
+10463 430242
+12612 430177
+10448 430165
+9196 430158
+20241 429817
+3263 429681
+12533 429646
+12577 429472
+3907 429457
+14717 429425
+9041 429321
+10140 429303
+5440 429164
+8225 429150
+7979 429111
+5162 429109
+34979 428999
+8263 428967
+9643 428904
+548 428758
+1698 428731
+2441 428648
+11058 428630
+6575 428307
+8686 428274
+14719 428107
+9980 428077
+9299 428002
+3966 427967
+11431 427858
+626 427844
+7170 427618
+8945 427570
+21703 427336
+12199 427179
+15818 427119
+9524 427061
+94 427057
+10084 427023
+8275 426906
+9803 426806
+4848 426803
+12507 426752
+8093 426716
+5400 426665
+10822 426582
+10544 426474
+8684 426470
+3474 426393
+2824 426292
+1124 426099
+5767 425935
+7763 425894
+2052 425887
+5888 425825
+17894 425817
+6910 425796
+25792 425781
+723 425679
+11668 425517
+8860 425445
+1027 425439
+6454 425414
+6787 425354
+15958 425316
+4712 425219
+23731 425071
+39990 425025
+7822 424954
+7237 424883
+7750 424792
+5469 424745
+13030 424621
+438 424384
+2569 424271
+6795 424188
+10012 424179
+16348 424166
+14359 423890
+3994 423764
+11330 423708
+9889 423613
+7767 423523
+6761 423314
+11237 423255
+9194 423211
+8025 423111
+2787 423038
+10053 423032
+8257 422993
+3838 422979
+8999 422972
+8013 422938
+14886 422812
+3974 422673
+11803 422479
+6277 422372
+10054 422115
+8006 422012
+8482 421985
+5333 421820
+4846 421787
+11123 421771
+13795 421760
+14597 421541
+12268 421468
+9539 421435
+11646 421416
+12848 421415
+10828 421149
+4465 421005
+8312 420878
+1940 420842
+9384 420774
+10343 420682
+38563 420682
+2171 420647
+10907 420307
+14847 420294
+8888 420198
+5496 420132
+4898 419944
+10821 419846
+8754 419807
+18836 419789
+7306 419436
+4480 419357
+13107 419242
+4662 419029
+13301 418988
+7620 418900
+6387 418663
+398 418591
+13316 418591
+5404 418532
+3212 418520
+14683 418447
+14372 418412
+13683 418208
+9753 418195
+4164 418183
+11476 418137
+6869 418119
+8463 418118
+10759 417587
+12478 417567
+14813 417528
+11753 417140
+7772 417075
+15152 417032
+6973 416961
+26840 416803
+2236 416706
+8454 416300
+11228 416181
+18802 416171
+12112 416118
+7600 416058
+14345 416018
+8883 416008
+13620 415903
+3550 415841
+10282 415776
+16127 415765
+9599 415745
+11376 415739
+10074 415692
+731 415656
+7243 415472
+14057 415445
+8157 415366
+5774 415223
+6117 415193
+1117 415145
+13077 415133
+17448 415079
+14424 415006
+5868 414948
+7228 414712
+8564 414687
+1496 414653
+11939 414600
+9137 414451
+18042 414426
+8871 414367
+7527 414259
+9210 414225
+6942 414161
+9284 414093
+11864 413979
+4729 413800
+15336 413797
+29749 413689
+9808 413450
+6986 413448
+6397 413294
+15598 413245
+14819 413174
+13547 413105
+6777 413085
+13804 413005
+13257 412980
+7446 412953
+6273 412946
+12620 412923
+7178 412896
+8953 412856
+10893 412416
+9031 412276
+702 412269
+15338 412154
+7747 412118
+7872 412058
+12769 412014
+11722 411923
+9983 411914
+16256 411890
+10732 411859
+19277 411821
+5883 411797
+11942 411756
+7255 411620
+10568 411592
+22917 411489
+3425 411402
+3494 411372
+18707 411253
+3185 411125
+15733 411004
+10460 410993
+4453 410817
+4401 410718
+7363 410707
+20954 410678
+10270 410579
+8404 410530
+16570 410515
+16863 410211
+11702 410131
+4235 409832
+9001 409794
+12806 409633
+12223 409518
+9396 409503
+13797 409366
+12402 409239
+7526 409230
+12600 409139
+15100 409091
+10681 409050
+10246 408972
+11872 408925
+7579 408648
+9172 408405
+17796 408380
+9115 408352
+21793 408261
+12691 408216
+6364 408060
+6859 408027
+13038 407988
+1632 407958
+12477 407924
+15378 407851
+9352 407688
+3633 407655
+5803 407653
+8646 407621
+7587 407575
+15331 407451
+1758 407253
+877 407147
+11998 407145
+11679 407103
+3601 407096
+4669 407086
+227 407081
+11334 407006
+8131 406946
+1759 406891
+8452 406865
+8997 406822
+9574 406812
+6797 406810
+11336 406762
+8501 406738
+11114 406687
+10311 406649
+10150 406623
+17689 406578
+11300 406421
+9814 406384
+8389 406349
+8271 406333
+7954 406313
+10576 406312
+6565 406306
+7545 406240
+9102 406164
+14328 406013
+2144 405990
+13673 405932
+5337 405901
+8865 405864
+1155 405852
+3719 405528
+9258 405495
+13093 405422
+9011 405319
+6257 405240
+1835 405060
+12871 405052
+5588 404835
+16629 404629
+9799 404597
+15295 404559
+11066 404496
+8716 404329
+12928 404150
+12206 404017
+9635 404014
+7635 403984
+7561 403954
+12388 403633
+13630 403434
+13592 403053
+15896 402919
+20197 402829
+9645 402757
+13393 402755
+12165 402698
+14074 402666
+9332 402472
+12406 402273
+6938 402132
+8471 402079
+15615 401716
+14410 401641
+8020 401607
+9573 401541
+813 401316
+13709 401296
+7267 401278
+4049 401218
+7817 401160
+1125 400938
+6695 400924
+753 400632
+13117 400629
+7467 400617
+7405 400469
+8741 400432
+1347 400368
+10095 400320
+10692 400261
+7558 400226
+9133 400206
+4467 400136
+8814 399952
+1648 399945
+5172 399767
+1723 399704
+5384 399677
+32425 399666
+8130 399658
+26094 399611
+6602 399400
+7501 399051
+21356 399047
+13960 399015
+26119 399002
+1177 399001
+15076 398977
+7965 398937
+14072 398855
+2545 398812
+15033 398769
+10156 398733
+9015 398630
+5550 398523
+3735 398483
+15694 398450
+12318 398428
+9709 398412
+4156 398372
+6937 398056
+13447 397970
+11083 397902
+7612 397887
+677 397838
+4709 397653
+8539 397640
+3405 397576
+15184 397562
+10880 397473
+7718 397294
+1846 397197
+13919 397195
+17199 397161
+12963 397157
+2357 397072
+8168 396992
+13067 396930
+7360 396710
+6625 396663
+12670 396660
+11363 396611
+8644 396544
+1071 396138
+9569 396044
+11103 396035
+16278 396022
+12356 395953
+20790 395715
+12160 395513
+1278 395447
+9985 395426
+6589 395388
+1322 395383
+14327 395356
+10727 395331
+6824 395153
+15301 394895
+9233 394857
+8970 394826
+8147 394812
+11406 394778
+4129 394771
+29657 394731
+13339 394630
+3141 394547
+26220 394385
+9029 394178
+14133 393701
+4313 393607
+10741 393597
+12000 393560
+11038 393455
+7708 393396
+6981 393382
+48539 393366
+12965 393321
+3051 393202
+9697 392989
+2629 392849
+556 392807
+11481 392800
+6595 392750
+1371 392745
+19099 392599
+5602 392531
+7039 392512
+14905 392497
+11118 392472
+18390 392419
+2848 392286
+7982 392117
+1034 392071
+7825 392001
+12930 391952
+8262 391872
+926 391870
+13904 391618
+3528 391502
+6259 391442
+432 391392
+4897 391238
+2797 391028
+21852 390801
+3844 390776
+7075 390638
+11972 390586
+18286 390550
+10914 390522
+12181 390457
+10274 390401
+2411 390339
+11704 390287
+9856 390187
+9268 390171
+12150 390103
+9879 389997
+7471 389987
+9185 389986
+12879 389967
+7455 389884
+1747 389879
+13340 389812
+10127 389663
+10402 389568
+19613 389434
+4285 389340
+2038 389257
+5847 389229
+8097 389217
+10014 389052
+7695 389038
+6964 388887
+11915 388736
+17305 388721
+3069 388564
+6779 388434
+7866 388384
+1789 388251
+12684 388146
+5521 388118
+10595 388088
+6873 387763
+1818 387683
+7054 387518
+10754 387382
+6451 387287
+9655 387128
+8355 387116
+8129 387080
+16393 387050
+13999 386969
+7787 386929
+11276 386759
+5562 386693
+13741 386688
+12285 386638
+6641 386580
+6737 386535
+7187 386445
+2536 386412
+14122 386389
+15682 386380
+6234 386377
+10302 386248
+14115 386219
+16666 386042
+10332 385907
+3964 385839
+6620 385645
+10361 385548
+8902 385496
+10966 385484
+14187 385467
+13633 385375
+17063 385313
+18621 385065
+10617 385042
+12539 384984
+14997 384883
+11983 384868
+5468 384814
+7238 384796
+10721 384572
+15555 384479
+1275 384392
+11432 384301
+2977 384284
+10569 384253
+13359 384110
+11560 384076
+10342 384009
+6569 383954
+19122 383938
+22611 383811
+6036 383687
+8474 383644
+5740 383429
+7692 383359
+586 383295
+1419 383273
+4066 383194
+1538 383149
+3191 382945
+12284 382931
+14620 382918
+9110 382879
+4825 382730
+21199 382531
+8126 382516
+11652 382509
+3889 382433
+4352 382237
+8478 382053
+5422 381988
+6597 381839
+8246 381825
+16052 381819
+2748 381810
+9530 381684
+16119 381666
+6445 381644
+18292 381607
+23370 381562
+10089 381523
+11077 381365
+7226 381330
+13329 381179
+14021 381124
+9758 381123
+11809 381086
+13772 381080
+3036 381064
+15223 381038
+6486 381025
+6814 380938
+13241 380861
+8027 380855
+8884 380820
+2054 380811
+19709 380793
+6504 380605
+9626 380566
+11515 380412
+8628 380336
+13367 380121
+13861 380071
+31362 380053
+1647 380010
+17820 379781
+9166 379560
+6906 379374
+14469 379282
+14227 379210
+3754 379002
+11001 378926
+11526 378867
+8347 378753
+7862 378733
+9281 378662
+17718 378652
+4293 378636
+13302 378542
+6982 378491
+14217 378485
+8494 378459
+4932 378435
+10408 378283
+15796 378222
+10588 378132
+23357 377965
+17462 377860
+8904 377819
+11000 377789
+16738 377598
+17499 377581
+11677 377538
+4936 377530
+1130 377448
+9007 377376
+9900 377348
+7382 377267
+5760 377258
+937 376979
+12252 376780
+20311 376769
+849 376755
+1389 376721
+17715 376614
+10135 376600
+14066 376597
+7026 376387
+132 376386
+6957 376353
+11734 376300
+3258 376225
+6452 376005
+909 375926
+11332 375842
+10386 375819
+10710 375699
+14825 375691
+8088 375684
+12148 375681
+9936 375544
+7270 375490
+11781 375370
+6226 375337
+2437 375301
+9846 375164
+9296 375038
+17027 374977
+2337 374938
+3912 374885
+15162 374882
+8252 374852
+10484 374831
+20251 374829
+2165 374799
+6901 374710
+10243 374664
+13671 374469
+7358 374221
+16120 374131
+3149 374119
+9021 374053
+7110 374044
+11078 373834
+1484 373662
+8069 373610
+10868 373588
+3184 373585
+2754 373555
+1292 373520
+10146 373513
+6912 373432
+7754 373402
+4013 373341
+3792 373296
+9149 373012
+11259 372932
+12159 372917
+4700 372912
+1007 372888
+21538 372840
+4959 372816
+6479 372812
+13226 372725
+7586 372712
+14105 372647
+16720 372515
+5653 372464
+13336 372347
+13612 372275
+4589 372141
+13914 372001
+14433 371923
+1631 371838
+7166 371618
+12906 371602
+8234 371586
+5705 371556
+7875 371528
+3929 371451
+13354 371418
+9677 371286
+9619 371233
+7247 371188
+9511 370981
+8794 370937
+21506 370897
+10058 370891
+10305 370833
+10192 370769
+12665 370754
+16752 370681
+7375 370584
+100 370551
+10145 370550
+12396 370469
+2930 370325
+16440 370303
+6579 370297
+8523 370210
+6450 369913
+2471 369759
+3039 369622
+823 369518
+8673 369498
+28689 369444
+14728 369442
+13019 369391
+14537 369330
+9177 369323
+11846 369307
+9564 369236
+15286 369223
+15725 369135
+8258 369120
+16094 369079
+8217 368989
+12897 368893
+14162 368880
+10918 368753
+7820 368688
+4563 368543
+9807 368432
+9561 368424
+18609 368234
+9075 368089
+9085 368050
+10231 368001
+11138 367959
+12246 367928
+972 367806
+10255 367786
+7906 367661
+8680 367603
+15115 367522
+13048 367461
+11087 367387
+9494 367342
+18071 367262
+964 367230
+12373 367196
+4175 367107
+8659 367082
+2070 367075
+7045 367061
+14510 367048
+10571 367012
+15070 366988
+12654 366942
+3310 366871
+7548 366733
+5977 366666
+12259 366570
+4909 366534
+16640 366491
+3167 366477
+4558 366360
+19610 366257
+11099 366173
+12232 366119
+7509 366023
+9374 365891
+3470 365884
+17127 365533
+8192 365498
+7114 365476
+10380 365341
+16137 365300
+4366 365277
+7946 365216
+7365 365191
+7088 364886
+9480 364726
+8216 364713
+7685 364626
+12932 364617
+2195 364581
+9931 364506
+13844 364477
+11390 364405
+8220 364387
+8253 364234
+14408 364112
+7882 364093
+12263 364016
+7152 363829
+3998 363827
+1087 363800
+2507 363606
+7427 363566
+7521 363514
+10708 363450
+7714 363438
+2248 363431
+15930 363224
+6651 363153
+7733 362858
+6457 362836
+8566 362822
+8498 362813
+1063 362682
+2353 362622
+10040 362443
+14931 361977
+23512 361973
+6766 361959
+5999 361585
+9348 361559
+9376 361481
+7603 361432
+7422 361379
+10184 361347
+5648 361297
+3320 361123
+10017 361097
+14909 361015
+9910 360987
+3824 360918
+6515 360811
+7496 360806
+19303 360766
+8791 360691
+12523 360595
+8993 360575
+5801 360431
+4529 360398
+10965 360256
+229 360226
+9457 360164
+11252 360152
+16346 360033
+14449 359968
+11686 359944
+18334 359855
+14812 359787
+10468 359667
+7998 359571
+9964 359523
+7451 359379
+9241 359237
+6711 359234
+4545 359213
+8287 359148
+816 358897
+10147 358736
+12380 358729
+9949 358648
+1196 358550
+9107 358437
+15795 358413
+5824 358312
+5372 358295
+5124 358289
+9086 358245
+6748 358216
+9168 358064
+6805 357931
+15498 357866
+14679 357853
+14300 357819
+15528 357737
+6069 357670
+40138 357664
+12763 357550
+16051 357498
+12389 357475
+4002 357463
+12069 357303
+6371 357290
+13572 357238
+15342 357172
+9860 357116
+7664 356998
+7950 356936
+10846 356850
+17547 356634
+18475 356600
+13125 356555
+9328 356554
+6543 356451
+2954 356267
+15055 356232
+10833 356213
+26878 356180
+5013 356178
+6953 356175
+5654 356154
+6101 356107
+14641 356073
+9151 356027
+4421 355987
+16535 355894
+5639 355847
+5050 355821
+9938 355713
+11325 355685
+7531 355646
+12981 355592
+7666 355538
+9399 355432
+13415 355422
+7538 355305
+1258 355163
+18545 355158
+2759 355119
+1136 355109
+6617 355081
+13571 355038
+14069 355019
+12437 354981
+17960 354969
+9325 354939
+13708 354915
+6420 354705
+14757 354591
+14919 354480
+9199 354451
+4108 354398
+12949 354382
+6578 354376
+9361 354353
+10718 354244
+13593 354199
+6052 354007
+8436 353994
+9105 353886
+12275 353856
+9121 353405
+11106 353379
+10800 353350
+8598 353325
+2100 353199
+2290 353173
+15080 353027
+11642 353004
+16542 352734
+16111 352650
+15602 352627
+10608 352611
+5313 352603
+17782 352590
+7838 352517
+16445 352493
+6909 352475
+14791 352413
+9857 352193
+17842 352066
+1638 352040
+16712 351998
+2164 351959
+14364 351891
+3287 351887
+8706 351810
+12873 351660
+13075 351610
+22484 351603
+16707 351599
+7381 351585
+4463 351585
+699 351567
+11856 351470
+25851 351312
+10364 351096
+8266 350892
+12492 350873
+14493 350708
+8854 350697
+13346 350683
+10469 350675
+10216 350657
+5147 350609
+4169 350591
+10872 350527
+10366 350409
+12801 350394
+9572 350320
+8149 350232
+6375 350198
+5447 350173
+28141 350103
+18733 350035
+13642 350000
+1144 349908
+10421 349802
+10427 349683
+14377 349622
+17826 349557
+6528 349442
+9877 349409
+4198 349335
+11550 349300
+14840 349249
+16952 349120
+7776 349119
+8914 349098
+15164 349012
+6469 348997
+6206 348967
+7731 348954
+6890 348922
+1348 348722
+10086 348564
+15037 348531
+9345 348509
+10474 348482
+8949 348330
+35327 348207
+13458 348166
+4901 348016
+13853 348002
+10925 347889
+9835 347859
+5414 347837
+3369 347791
+8582 347636
+11682 347617
+31433 347583
+9136 347427
+4702 347410
+9482 347376
+11271 347359
+566 347358
+17334 347227
+13850 347132
+11023 347130
+18263 347068
+10400 347041
+27133 346684
+6462 346645
+11272 346626
+9963 346622
+15198 346518
+7818 346505
+646 346495
+6778 346466
+9775 346459
+13485 346404
+9013 346329
+11946 346288
+10668 346248
+1330 346195
+8517 346056
+6239 345826
+9557 345703
+16978 345593
+8348 345532
+1272 345478
+11238 345461
+18555 345457
+20872 345277
+10359 345271
+17036 345196
+2120 344930
+2280 344773
+8573 344722
+10515 344702
+12020 344681
+10597 344592
+9260 344591
+12047 344581
+10429 344566
+11185 344446
+5892 344360
+5250 344253
+9160 343902
+8458 343880
+7252 343790
+31173 343777
+4022 343555
+10003 343480
+11193 343472
+6562 343348
+13925 343228
+17406 343189
+4730 343165
+11155 343055
+12231 343030
+17014 343022
+16618 342966
+7278 342897
+9211 342896
+5100 342826
+15066 342723
+11594 342721
+12527 342717
+8288 342688
+3664 342668
+9490 342586
+3839 342537
+14429 342519
+1075 342420
+2232 342183
+10980 342178
+2022 342057
+11472 342053
+3979 342036
+15346 341959
+15863 341944
+1691 341900
+15294 341872
+11112 341843
+14569 341817
+8915 341671
+10304 341465
+4843 341417
+6494 341415
+7926 341332
+8515 341225
+12046 341172
+1216 341117
+9473 341068
+9274 341050
+11570 341048
+6659 340992
+22473 340911
+8075 340763
+15393 340693
+14883 340689
+2522 340630
+9960 340607
+8212 340582
+7208 340577
+10594 340450
+21909 340415
+10765 340343
+4583 340262
+8603 340144
+13637 339972
+7138 339832
+9581 339824
+13188 339727
+15689 339721
+8749 339613
+18576 339525
+8425 339432
+10241 339308
+3902 339257
+11937 339211
+8222 339181
+7621 339139
+2518 339112
+11758 339065
+5855 338929
+12173 338908
+12615 338903
+8801 338853
+842 338791
+13142 338723
+5516 338721
+12329 338602
+7388 338534
+26105 338531
+5032 338437
+7795 338417
+8957 338345
+2483 338332
+10038 338262
+3743 338006
+8181 337997
+14952 337748
+5492 337746
+12882 337707
+11648 337676
+8806 337599
+5833 337517
+9780 337469
+4623 337392
+17880 337247
+4745 337244
+12538 337238
+6369 337188
+6758 337119
+15706 337069
+15870 336993
+9400 336894
+5721 336815
+7097 336767
+7869 336739
+20092 336483
+13229 336427
+20486 336410
+12970 336280
+8786 336264
+8440 336084
+12746 336078
+8268 335957
+19034 335955
+20082 335940
+5601 335924
+5210 335873
+17907 335805
+6075 335758
+4835 335634
+2129 335490
+10931 335423
+5607 335409
+11333 335370
+7653 335365
+12400 335354
+11602 335344
+4731 335188
+12575 335173
+1618 335125
+13606 335118
+21285 335017
+12107 334923
+1387 334914
+34033 334900
+14495 334864
+7331 334829
+18740 334712
+3987 334670
+16134 334569
+14860 334478
+1062 334442
+17185 334381
+12497 334367
+19616 334301
+3696 334292
+12772 334266
+11403 334201
+10172 334185
+2183 334078
+17409 334077
+9646 333987
+10941 333957
+4424 333731
+13109 333719
+32214 333692
+9408 333646
+5703 333577
+9901 333570
+7135 333416
+11495 333379
+13953 333300
+10182 333300
+8541 333269
+13833 333263
+9471 333259
+16527 333092
+16533 333048
+1739 333033
+2160 332999
+24520 332927
+17541 332914
+245 332905
+1601 332894
+8989 332859
+16512 332747
+4265 332705
+14576 332575
+3150 332510
+12565 332496
+10439 332445
+31112 332411
+14926 332365
+11450 332296
+1984 332256
+9871 332254
+4277 332226
+11684 332109
+6882 331909
+6733 331893
+11521 331862
+22488 331824
+10839 331793
+1055 331762
+10969 331754
+11177 331679
+18154 331626
+5737 331561
+7801 331482
+14409 331482
+11597 331282
+12023 331207
+9170 331045
+2799 331010
+8124 331002
+11553 330990
+9022 330926
+12827 330921
+10801 330902
+1699 330871
+12880 330544
+12465 330534
+14286 330531
+14783 330519
+7902 330467
+20000 330396
+10750 330328
+8887 330300
+14317 330224
+9620 330208
+13280 330181
+8712 330141
+11584 330100
+5053 329959
+5907 329943
+4415 329942
+10454 329927
+11132 329916
+14858 329894
+15670 329864
+8125 329754
+15422 329750
+12831 329749
+37666 329684
+16433 329663
+3192 329598
+11636 329464
+10193 329407
+11375 329359
+10695 329328
+14388 329322
+8180 329251
+9874 329097
+8920 329091
+4534 328825
+8903 328698
+23964 328655
+17122 328606
+7048 328597
+6211 328591
+8925 328576
+11955 328523
+2378 328502
+14896 328482
+9751 328231
+12578 328224
+1773 328185
+6338 328096
+8839 327999
+10923 327937
+7778 327911
+12617 327835
+9018 327783
+11732 327780
+7563 327739
+6803 327707
+8969 327692
+5552 327602
+3905 327557
+9911 327480
+7119 327458
+36285 327444
+7684 327424
+9616 327313
+11129 327297
+12032 327230
+11490 327080
+7792 327061
+13574 327032
+11398 327007
+8066 326906
+1952 326868
+2710 326703
+13362 326668
+14457 326642
+17339 326625
+12580 326624
+21861 326488
+21157 326473
+34119 326457
+14235 326388
+11819 326384
+4102 326329
+7209 326168
+6195 326143
+10291 326111
+8894 326110
+7330 325951
+5869 325935
+14525 325893
+13120 325862
+6167 325828
+9509 325807
+24382 325742
+7855 325721
+10078 325656
+7112 325654
+5599 325576
+8122 325525
+15105 325514
+15740 325468
+16753 325462
+9517 325427
+10953 325424
+13936 325332
+8862 325273
+3464 325244
+5452 325232
+20983 325102
+17582 325099
+1483 324860
+8035 324825
+8824 324800
+14119 324704
+12766 324689
+9412 324668
+12780 324579
+12521 324471
+9752 324452
+13923 324383
+6001 324264
+13145 324264
+9464 324208
+7336 324205
+15626 324177
+19773 324148
+8701 324135
+8504 324114
+9422 324110
+10457 324084
+5832 324083
+14654 324004
+13777 323947
+5142 323924
+3843 323885
+14264 323868
+8621 323792
+6176 323751
+10538 323625
+38836 323556
+9958 323458
+493 323456
+15406 323421
+10098 323257
+13272 323251
+14882 323077
+18369 323073
+18525 323037
+11278 322890
+24754 322889
+3536 322832
+12542 322786
+7893 322728
+2971 322724
+6949 322711
+12699 322709
+11920 322700
+9813 322679
+17078 322609
+5446 322605
+2778 322596
+13694 322559
+14567 322526
+12049 322315
+23576 322120
+4316 322022
+8699 322006
+9743 321964
+4331 321936
+10843 321863
+19310 321849
+8469 321771
+4121 321673
+12513 321647
+4470 321634
+9431 321433
+5793 321318
+7773 321315
+21279 321289
+8713 321248
+1233 321228
+3731 321181
+17381 321180
+9628 321107
+4386 321100
+9845 321023
+19250 320960
+5064 320841
+5540 320721
+4185 320694
+15469 320670
+7735 320664
+21313 320604
+965 320601
+10529 320574
+12545 320556
+725 320395
+15593 320377
+12641 320315
+3364 320117
+9429 320034
+11009 319918
+11293 319719
+17357 319656
+4105 319633
+2224 319519
+15287 319462
+15929 319083
+5500 319073
+5848 318999
+3826 318962
+7353 318844
+7721 318797
+6495 318768
+16093 318732
+12309 318723
+18672 318712
+20171 318609
+16852 318532
+11169 318478
+9314 318476
+2229 318472
+7329 318393
+7416 318319
+20384 318047
+11004 317965
+17524 317883
+8678 317800
+11540 317774
+659 317752
+5036 317723
+13106 317669
+8787 317543
+13029 317496
+20716 317472
+2821 317076
+13748 317005
+9391 316991
+19332 316987
+19483 316986
+9727 316962
+11658 316942
+12607 316917
+13883 316914
+9155 316768
+2348 316745
+17955 316743
+1373 316719
+7873 316717
+2069 316670
+21105 316623
+8771 316595
+10452 316592
+11756 316513
+17915 316507
+6455 316406
+11824 316389
+22815 316388
+13784 316374
+7424 316364
+6386 316312
+20617 316271
+5557 316215
+20357 316185
+14794 316066
+17352 316046
+28630 315957
+5968 315855
+16428 315835
+6920 315834
+11130 315824
+11545 315711
+2118 315680
+3727 315673
+16182 315559
+15229 315523
+16387 315368
+21933 315289
+11359 315283
+10649 315261
+4659 315233
+14240 315221
+17356 315168
+17192 315122
+8689 315037
+11765 315023
+6251 314974
+12141 314897
+9055 314803
+11845 314792
+13566 314771
+9934 314711
+2741 314681
+27344 314655
+8281 314654
+5874 314594
+9891 314528
+12634 314519
+8782 314510
+21865 314451
+9520 314369
+11768 314347
+10206 314338
+12759 314296
+4255 314282
+35185 314258
+10271 314254
+11926 314202
+1385 314190
+9323 314182
+25841 313849
+13644 313842
+3686 313681
+18726 313573
+10059 313482
+1328 313246
+8365 313222
+7096 313182
+12811 313118
+21877 313081
+16679 313028
+9811 313018
+11882 313016
+5416 312970
+7275 312919
+7613 312904
+4177 312865
+9460 312773
+18848 312722
+14544 312645
+16915 312621
+13901 312583
+7156 312558
+12735 312557
+14850 312525
+9373 312429
+7888 312382
+13449 312351
+29378 312294
+13307 312290
+22026 312233
+17521 312201
+9892 312174
+16476 312147
+1333 312144
+13342 312097
+14598 312036
+18859 312015
+8765 311951
+12228 311876
+5355 311833
+13933 311638
+20741 311606
+9824 311582
+9104 311540
+26138 311538
+13768 311520
+23481 311505
+14600 311385
+18309 311349
+931 311338
+3673 311267
+10651 311194
+6685 311113
+11473 311083
+13276 311034
+8880 310999
+6489 310946
+12695 310937
+4802 310930
+9355 310849
+10938 310762
+4743 310727
+15300 310686
+10768 310656
+9575 310603
+12519 310526
+10215 310423
+13124 310339
+10170 310325
+13902 310286
+13944 310233
+12540 310206
+13317 310177
+12151 310173
+1286 310125
+7125 309987
+11172 309933
+14669 309886
+12926 309859
+7661 309685
+9794 309629
+9585 309568
+14135 309560
+2194 309554
+11294 309522
+2544 309428
+13692 309380
+16248 309370
+17196 309335
+9175 309081
+15261 309005
+1473 308985
+1960 308936
+13456 308907
+10546 308821
+14762 308772
+10591 308714
+11721 308591
+3086 308558
+9363 308490
+13176 308475
+11989 308474
+10543 308427
+23781 308348
+13403 308323
+9099 308280
+11784 308078
+32089 307995
+529 307911
+95 307756
+3939 307654
+16435 307639
+20182 307587
+34847 307510
+10509 307452
+6071 307446
+11500 307410
+10384 307357
+15343 307354
+1963 307209
+9541 307199
+14939 306944
+20014 306924
+11903 306893
+5921 306873
+12723 306865
+3178 306813
+17389 306782
+12705 306774
+12068 306684
+3460 306676
+10070 306651
+11192 306520
+21841 306506
+9017 306491
+20755 306438
+6843 306369
+8670 306349
+17281 306327
+19391 306285
+9450 306242
+16574 305876
+12812 305859
+5067 305834
+7295 305811
+15604 305795
+15454 305645
+9757 305637
+7709 305617
+7443 305616
+2558 305584
+11013 305563
+11161 305551
+8661 305334
+13146 305327
+9294 305325
+6248 305317
+16457 305285
+24505 305278
+9550 305260
+535 305241
+18677 305121
+13832 305112
+5325 305045
+10630 304946
+12983 304915
+20119 304903
+8161 304873
+13840 304833
+12528 304832
+2085 304744
+15664 304720
+885 304692
+22593 304679
+9563 304674
+15392 304568
+9776 304540
+7815 304496
+5331 304477
+5023 304443
+17661 304421
+12344 304409
+14082 304392
+14737 304191
+10134 304135
+19952 304119
+12955 303828
+18505 303801
+7812 303777
+21574 303696
+5209 303694
+11234 303620
+9867 303608
+12751 303483
+12091 303455
+12606 303441
+11142 303322
+9447 303166
+11533 303122
+16168 303103
+7207 303092
+11666 302797
+9842 302745
+11322 302716
+11410 302571
+9567 302570
+18256 302546
+9706 302540
+10494 302536
+14899 302514
+8537 302402
+14101 302392
+5430 302339
+16307 302301
+3325 302118
+13766 302110
+6431 302098
+11626 302087
+1434 301879
+10518 301863
+12669 301838
+10869 301811
+7702 301726
+7258 301624
+7605 301599
+4891 301498
+1504 301484
+11665 301409
+11426 301408
+18220 301381
+13512 301342
+17068 301234
+1942 301225
+3311 301188
+9415 301179
+3820 301175
+11512 301087
+8719 300995
+8182 300908
+17235 300901
+5777 300818
+5794 300789
+7223 300767
+18864 300667
+10443 300646
+24589 300646
+17132 300620
+4340 300534
+7104 300478
+1188 300402
+3019 300388
+6992 300317
+8354 300316
+13609 300300
+4245 300252
+9079 300197
+15199 300105
+7157 300038
+762 299991
+17450 299943
+17044 299810
+16349 299699
+8975 299568
+4806 299541
+2895 299520
+15573 299493
+13116 299369
+10625 299261
+13037 299171
+2419 299150
+7610 299146
+13201 298875
+7599 298787
+3831 298725
+15984 298640
+5171 298514
+2604 298497
+13594 298417
+10287 298407
+10032 298356
+14538 298240
+31061 298197
+17630 298135
+14503 298117
+7808 298087
+4000 298047
+5660 298010
+6213 297993
+14417 297966
+8495 297957
+6245 297936
+29115 297877
+10069 297860
+5932 297831
+847 297818
+15459 297746
+7197 297706
+7937 297624
+6407 297566
+8477 297541
+17490 297510
+7957 297419
+19847 297359
+16298 297176
+16328 297109
+5097 297095
+18708 297085
+2817 297026
+10758 296992
+6941 296987
+17930 296964
+9322 296934
+10299 296918
+7644 296748
+13320 296692
+10927 296691
+11812 296683
+18151 296588
+8650 296576
+11952 296532
+653 296525
+10467 296457
+222 296347
+18985 296336
+17646 296312
+3361 296291
+15508 296281
+10908 296275
+12299 296124
+11589 296065
+4357 296044
+4649 296014
+19912 295919
+10238 295896
+7598 295800
+10902 295794
+824 295763
+2434 295738
+108 295681
+14396 295673
+19566 295671
+16313 295653
+4509 295636
+14616 295610
+17619 295541
+10999 295494
+5755 295491
+8697 295465
+13830 295385
+19045 295195
+21162 295171
+5489 295157
+11191 295115
+15153 295040
+3449 294958
+11150 294898
+18133 294799
+10989 294694
+14770 294585
+11554 294580
+10476 294501
+5002 294469
+3440 294422
+21765 294385
+11791 294325
+15840 294296
+17719 294251
+5812 294223
+25236 294167
+16721 294070
+16888 293941
+868 293915
+12118 293877
+6458 293767
+14533 293678
+10000 293634
+24664 293626
+12019 293496
+1686 293475
+8309 293429
+3055 293333
+13230 293232
+12536 293224
+15369 293223
+8941 293204
+14778 293112
+22535 293108
+11015 293042
+18825 292878
+17159 292877
+8895 292682
+1035 292678
+15372 292655
+14248 292637
+10898 292633
+13973 292544
+11801 292523
+20261 292508
+10609 292453
+11311 292434
+17671 292434
+7768 292322
+26318 292282
+16031 292266
+10948 292244
+10899 292189
+2345 292185
+12671 291955
+11889 291889
+20899 291879
+13183 291782
+5286 291580
+11909 291559
+11637 291497
+6965 291479
+12525 291428
+6170 291277
+4103 291236
+12937 291227
+15523 291170
+16156 291108
+24272 291089
+9507 291083
+11899 291014
+17857 290869
+6521 290825
+4092 290814
+7159 290777
+24320 290770
+12435 290704
+8492 290694
+11932 290634
+11206 290586
+14447 290535
+18961 290530
+48099 290522
+8640 290518
+492 290477
+1418 290394
+19982 290348
+9225 290286
+2261 290270
+11168 290218
+18479 290213
+20499 290165
+11810 290067
+11210 289892
+9329 289865
+15471 289784
+14518 289756
+18020 289680
+14488 289576
+7035 289565
+8662 289554
+11681 289503
+21562 289256
+4863 289185
+4064 289173
+19247 289165
+5195 289164
+16582 289117
+6908 289054
+21866 289013
+12266 288914
+3697 288830
+10233 288812
+12377 288775
+14734 288753
+33683 288737
+771 288733
+2030 288732
+11026 288731
+15317 288722
+6902 288708
+48796 288704
+22386 288678
+21614 288662
+2134 288587
+5765 288576
+13584 288406
+7378 288289
+819 288274
+10129 288254
+11075 288220
+2680 288189
+13831 288175
+18393 288077
+11676 288063
+15383 287991
+4359 287991
+36030 287903
+20852 287857
+5181 287657
+10726 287650
+21323 287633
+10559 287568
+14332 287540
+9555 287519
+9595 287510
+6194 287503
+6539 287493
+11818 287374
+1202 287364
+4319 287305
+18110 287296
+6573 287280
+15891 287257
+9293 287226
+12432 287194
+10030 287189
+18086 287141
+14086 287073
+10762 287047
+13465 287036
+19744 287018
+9387 287010
+14867 286974
+11464 286951
+24003 286921
+8617 286874
+10471 286857
+3112 286748
+12725 286626
+21165 286488
+7319 286337
+12036 286305
+4229 286285
+12537 286255
+11005 286249
+697 286174
+10700 286147
+12548 286127
+7515 286098
+6109 286082
+16015 286066
+8046 286061
+11544 285977
+13855 285908
+11735 285898
+15192 285893
+9653 285800
+10792 285796
+7616 285649
+15556 285556
+17799 285509
+9773 285508
+25287 285465
+14416 285439
+18572 285395
+10358 285292
+9183 285207
+12073 285159
+16754 285095
+8721 285047
+17081 285026
+12593 285023
+9568 284957
+8101 284931
+12002 284929
+8472 284908
+7770 284900
+5077 284807
+28852 284805
+19523 284761
+14219 284693
+15177 284685
+13722 284615
+12734 284575
+13404 284454
+4982 284434
+6751 284376
+10008 284360
+6096 284343
+14999 284301
+10087 284270
+11918 284068
+11699 284065
+10701 283996
+9583 283931
+6291 283902
+15196 283850
+15416 283835
+14521 283648
+13723 283591
+7993 283539
+8559 283442
+7650 283209
+11552 283208
+7929 283037
+18106 283014
+11065 283009
+9793 282858
+10860 282804
+2189 282772
+8717 282753
+2849 282749
+4014 282718
+7900 282712
+7335 282598
+14225 282574
+20228 282555
+12633 282528
+10824 282517
+11455 282474
+552 282383
+11954 282283
+3911 282276
+6757 282252
+8840 282235
+8805 282201
+19375 282196
+8580 282166
+6738 282152
+10906 282124
+13325 282069
+12111 282057
+7193 282053
+13004 281987
+904 281961
+15579 281954
+11003 281952
+17608 281805
+8135 281776
+12308 281762
+5994 281742
+12689 281646
+13678 281637
+3400 281600
+17151 281552
+20176 281468
+6381 281450
+10420 281445
+16105 281417
+6283 281376
+16774 281323
+12865 281321
+9675 281161
+21439 281160
+10073 281140
+15012 281072
+14820 281032
+4420 281012
+13389 280931
+15278 280924
+14707 280864
+11693 280834
+20989 280800
+9061 280631
+7811 280622
+17702 280585
+19712 280517
+18085 280506
+15792 280423
+19747 280412
+8315 280313
+39683 280210
+4679 280163
+14674 280064
+23039 280029
+20707 280003
+21014 279940
+10250 279930
+11936 279904
+15062 279814
+13588 279790
+11975 279759
+16528 279751
+24687 279670
+15616 279652
+11119 279609
+20355 279492
+9458 279484
+2603 279343
+6335 279249
+2340 279229
+17423 279191
+23623 279109
+17325 279054
+3570 278931
+13551 278846
+12995 278839
+18168 278838
+5781 278820
+9986 278790
+4112 278768
+13184 278726
+15890 278588
+18678 278548
+14213 278519
+1979 278509
+16420 278503
+13111 278460
+3846 278452
+5643 278436
+872 278359
+11868 278353
+12688 278278
+14273 278220
+40466 278181
+20700 278103
+14888 277999
+10218 277984
+16940 277960
+2072 277862
+12348 277847
+35946 277842
+16029 277774
+1013 277769
+28840 277738
+2896 277683
+15505 277680
+6446 277613
+8691 277549
+1298 277514
+14752 277419
+5714 277364
+8058 277325
+20894 277293
+16434 277271
+32115 277243
+10667 277221
+5889 277187
+16210 277086
+19788 277074
+3533 277064
+23555 277023
+8177 276841
+10045 276692
+1712 276688
+1335 276680
+20139 276667
+19245 276644
+10652 276628
+12798 276600
+16332 276591
+1702 276565
+13466 276542
+7894 276539
+13750 276477
+28070 276466
+4801 276375
+8758 276319
+6121 276242
+15599 276225
+9216 276173
+18913 276162
+15257 276075
+15397 276033
+12079 275990
+9154 275941
+9066 275923
+10041 275851
+18685 275759
+19362 275648
+5748 275520
+4134 275490
+10968 275417
+8116 275403
+20337 275390
+9676 275296
+14523 275249
+10280 275230
+11829 275119
+9864 275070
+2313 275056
+11010 275033
+1967 274991
+11611 274774
+17836 274745
+7967 274431
+9080 274379
+13891 274378
+11069 274377
+9197 274377
+6487 274363
+48198 274323
+11164 274304
+14725 274301
+12868 274287
+977 274251
+18358 274249
+11072 274222
+8405 273995
+13110 273961
+1436 273939
+9812 273852
+38251 273850
+4976 273834
+37175 273792
+4279 273790
+5756 273779
+10637 273707
+10130 273685
+6080 273601
+12182 273585
+7202 273453
+8850 273393
+1763 273379
+10498 273327
+2387 273318
+14058 273304
+15360 273269
+5572 273260
+6300 273225
+9193 273201
+4873 273182
+18034 273131
+10643 273103
+18966 273048
+13338 273037
+17468 273016
+8942 272969
+10706 272949
+12553 272893
+18353 272878
+15623 272786
+13573 272782
+15901 272749
+9441 272650
+10813 272641
+8735 272558
+6721 272540
+8338 272518
+22905 272496
+9639 272495
+8500 272427
+8099 272380
+9148 272311
+5854 272243
+10204 272231
+20222 272164
+10616 272112
+7036 272099
+12203 272005
+9927 271973
+2528 271969
+14672 271956
+20710 271955
+14481 271954
+13938 271929
+10776 271845
+17588 271844
+36309 271778
+6546 271746
+15296 271737
+9508 271628
+20522 271611
+2172 271603
+8202 271578
+12829 271575
+19800 271532
+26763 271446
+6752 271369
+43577 271245
+11256 271209
+19717 271164
+8095 271162
+11095 271143
+3849 271032
+8235 271029
+738 270906
+11559 270890
+20059 270852
+13408 270845
+3028 270826
+5700 270819
+2382 270815
+8033 270752
+9796 270711
+9707 270670
+15586 270602
+11263 270508
+13032 270474
+3982 270469
+18066 270444
+13059 270444
+14064 270365
+14085 270332
+28145 270328
+12911 270252
+11558 270112
+16239 270107
+15188 270048
+23761 270032
+5826 269980
+11032 269966
+20782 269958
+17838 269931
+13546 269902
+13041 269892
+16831 269852
+14779 269849
+5222 269801
+24711 269796
+36763 269737
+28125 269727
+11462 269718
+17913 269702
+5499 269615
+25335 269614
+14663 269604
+21730 269511
+14912 269493
+8711 269479
+12014 269429
+10436 269409
+14142 269313
+9888 269307
+18513 269211
+12651 269198
+13413 269143
+10261 269129
+1875 269121
+19101 269115
+20259 269074
+6581 268988
+11832 268914
+6224 268884
+11557 268878
+14667 268836
+9713 268811
+4943 268781
+16383 268725
+17704 268673
+20581 268654
+17642 268565
+15348 268558
+8798 268409
+9826 268357
+18772 268355
+13977 268336
+12436 268316
+11357 268262
+9305 268185
+11949 268141
+8509 268137
+10176 268091
+20529 268080
+14260 268054
+16246 268025
+7577 268019
+22635 267980
+12411 267878
+15220 267832
+16902 267772
+15726 267705
+17475 267693
+11997 267649
+9719 267643
+9966 267593
+11706 267584
+27993 267517
+9698 267510
+16769 267493
+12520 267425
+18939 267423
+6998 267352
+13714 267345
+7988 267328
+5432 267300
+9159 267270
+970 267247
+1883 267156
+12597 267130
+969 267116
+7851 267079
+11396 267044
+6319 267022
+7572 267019
+13487 267000
+13586 266983
+25945 266865
+11135 266858
+20426 266817
+22183 266781
+16334 266772
+20325 266757
+21494 266718
+5626 266678
+14033 266677
+2731 266637
+18212 266575
+12601 266521
+8396 266504
+10584 266501
+9829 266467
+5128 266439
+4718 266285
+8283 266232
+3163 266200
+18665 266165
+14043 266065
+2140 266017
+19553 265926
+12724 265897
+12908 265737
+22933 265703
+13990 265584
+15463 265520
+9162 265465
+14517 265285
+12336 265242
+5408 265206
+17167 265200
+15971 265164
+25651 265080
+8727 265079
+19017 265072
+33160 265025
+21147 264980
+16893 264915
+16797 264903
+13684 264899
+9849 264890
+16559 264831
+14718 264765
+12655 264748
+4023 264639
+9947 264581
+16155 264574
+31594 264544
+6104 264519
+15806 264392
+21221 264345
+1397 264227
+4083 264178
+23394 264177
+10083 264160
+33328 264146
+6156 264080
+12939 264080
+9950 264074
+8166 264059
+15030 264027
+32324 263999
+12346 263955
+13474 263950
+11874 263884
+19995 263882
+130 263839
+14634 263819
+5646 263795
+5421 263702
+7294 263697
+17165 263670
+6764 263622
+12503 263616
+6428 263585
+12225 263506
+4853 263495
+24161 263476
+7268 263464
+10829 263388
+8399 263381
+11694 263379
+8341 263349
+10689 263278
+17788 263200
+15832 263001
+10485 262951
+17053 262897
+98 262873
+15503 262806
+10393 262799
+21026 262785
+16089 262736
+7145 262696
+14329 262691
+10988 262667
+4938 262617
+6631 262431
+13094 262414
+4759 262370
+10713 262359
+722 262358
+6607 262294
+10338 262229
+27459 262199
+1985 262192
+8005 262074
+11160 262045
+20065 262043
+2530 262029
+9917 262001
+11107 261971
+5245 261877
+8842 261858
+13244 261844
+1756 261828
+18624 261697
+18496 261651
+13640 261637
+13234 261637
+12460 261630
+13688 261554
+934 261540
+13998 261539
+3949 261532
+15591 261500
+10766 261405
+13463 261377
+3498 261340
+7853 261336
+13657 261333
+16565 261324
+11266 261295
+10050 261267
+17187 261200
+7468 261148
+6570 261101
+9112 261095
+12841 261041
+15465 260960
+19868 260941
+14742 260914
+12080 260898
+12008 260822
+9514 260792
+8968 260774
+18240 260745
+10453 260671
+20614 260614
+12162 260544
+13534 260539
+1207 260515
+19675 260494
+5211 260493
+7532 260462
+27116 260264
+18014 260241
+4272 260231
+12101 260002
+20055 260002
+18184 259947
+21742 259850
+8187 259823
+789 259784
+7073 259779
+11338 259732
+14710 259554
+13820 259548
+16918 259503
+18327 259488
+5354 259482
+1154 259371
+13112 259364
+5411 259218
+17413 259156
+7570 259098
+17028 259065
+3775 259024
+7576 258971
+10495 258913
+6127 258860
+3742 258785
+3524 258778
+29002 258748
+15438 258703
+4948 258702
+19804 258665
+5899 258622
+2197 258614
+6453 258581
+13293 258579
+24880 258460
+9850 258441
+6974 258388
+6488 258313
+13475 258303
+9637 258255
+20633 258235
+20920 258220
+12910 258106
+11326 258084
+15443 258081
+6577 258043
+11323 258035
+8300 258029
+5735 257973
+15415 257934
+16449 257901
+2531 257843
+6894 257785
+6968 257729
+11418 257714
+8449 257532
+7688 257450
+22257 257392
+13134 257380
+8807 257349
+11281 257236
+16076 257119
+14110 257102
+27061 257101
+20430 257077
+6098 257021
+12890 257009
+5779 257002
+6473 256775
+11236 256721
+10403 256692
+3943 256662
+20052 256644
+8441 256628
+18899 256570
+14067 256490
+11576 256482
+10884 256474
+12616 256472
+4111 256447
+16905 256399
+14903 256344
+11195 256341
+7780 256319
+1164 256273
+8967 256253
+3493 256114
+8752 256058
+2541 256032
+10392 256030
+10581 255868
+3378 255861
+1798 255786
+5629 255752
+15191 255708
+11204 255626
+5536 255621
+18473 255525
+28209 255401
+8797 255344
+28086 255308
+5935 255300
+9369 255253
+8781 255237
+14601 255157
+235 255145
+755 255125
+6715 255118
+12457 255082
+3779 255039
+21525 255029
+18193 254999
+4447 254987
+24577 254972
+13069 254847
+12598 254776
+2564 254772
+11274 254713
+9765 254691
+16849 254651
+23425 254644
+22016 254589
+348 254584
+8977 254497
+10190 254449
+20442 254376
+10627 254325
+10417 254323
+13982 254312
+11353 254285
+24538 254246
+13478 254180
+7660 254144
+11131 254100
+14285 254067
+7646 254033
+12201 253999
+11029 253849
+11513 253830
+17218 253821
+1291 253782
+22159 253746
+11847 253742
+11422 253730
+4980 253724
+19817 253712
+9526 253590
+8324 253576
+7439 253554
+3575 253498
+6915 253302
+18406 253209
+14559 253205
+11853 253138
+10724 253101
+15976 253017
+4335 252999
+18489 252895
+19323 252807
+11746 252702
+4117 252677
+16443 252637
+14954 252628
+16028 252566
+15224 252495
+9961 252455
+9300 252455
+840 252429
+5513 252399
+6485 252340
+13016 252335
+10647 252236
+12556 252163
+36103 252155
+5822 252148
+10574 252130
+14159 252092
+14103 251995
+3007 251941
+4868 251923
+11608 251871
+15543 251818
+12627 251743
+2632 251725
+17369 251625
+21605 251579
+23201 251546
+11614 251528
+14636 251438
+14113 251425
+10996 251405
+1059 251391
+17277 251368
+12325 251362
+17676 251307
+15760 251285
+1358 251238
+11925 251213
+35851 251159
+16179 251087
+9506 251020
+9405 251012
+14581 250999
+21714 250981
+15866 250957
+19292 250939
+13778 250936
+2721 250909
+16805 250843
+9609 250738
+2894 250728
+1481 250694
+14984 250681
+12187 250629
+16209 250606
+6667 250598
+12039 250592
+26351 250590
+19464 250589
+3847 250521
+10096 250519
+28143 250431
+11561 250430
+18755 250425
+6831 250402
+5345 250399
+18303 250342
+12744 250205
+17645 250183
+472 250178
+13258 250161
+7889 250156
+13985 250150
+3100 250117
+12399 250031
+11842 250024
+6770 250012
+6028 249984
+19422 249883
+6185 249881
+16095 249875
+14442 249839
+18277 249820
+14008 249716
+4780 249716
+10564 249657
+7935 249650
+6524 249633
+14261 249603
+10942 249535
+8201 249530
+21872 249484
+12387 249481
+5159 249453
+11963 249430
+10852 249408
+20572 249303
+37985 249161
+26225 249133
+1428 249004
+23267 248991
+22347 248928
+542 248860
+28162 248806
+4226 248801
+11675 248800
+8947 248788
+1534 248786
+41224 248756
+9768 248730
+13677 248668
+10844 248660
+15123 248616
+37379 248570
+25672 248531
+9324 248483
+14196 248438
+12097 248435
+19687 248328
+8861 248293
+13279 248278
+13835 248278
+10975 248165
+25682 248143
+8532 248039
+11751 247945
+18524 247934
+19886 247924
+11483 247918
+29393 247899
+3003 247868
+21532 247858
+11886 247855
+9919 247809
+8579 247739
+7508 247717
+5178 247696
+10958 247675
+11760 247663
+8788 247636
+14000 247605
+17104 247525
+11574 247443
+16824 247389
+17551 247323
+2363 247278
+8117 247274
+15970 247271
+15401 247260
+19339 247215
+21336 247052
+5277 246999
+9905 246986
+15461 246929
+1426 246892
+12010 246866
+14665 246863
+7700 246845
+10825 246814
+1384 246762
+26360 246755
+14166 246713
+19786 246710
+1447 246566
+17871 246562
+4170 246466
+12802 246446
+17735 246443
+13863 246416
+9987 246399
+6771 246369
+12037 246276
+9922 246272
+11298 246270
+14071 246245
+12818 246200
+2002 246197
+2899 246184
+12034 246175
+14111 246152
+12287 246149
+18080 246097
+13892 246079
+6102 246042
+18158 246018
+14644 245998
+6158 245996
+12242 245993
+12058 245991
+1837 245897
+700 245844
+8554 245716
+3372 245711
+1287 245705
+1126 245700
+13237 245676
+2906 245653
+4472 245609
+14148 245596
+9715 245545
+11740 245521
+2320 245503
+24782 245428
+9721 245401
+822 245388
+23063 245383
+2416 245248
+44155 245172
+49443 245152
+19522 245137
+712 245074
+22297 245043
+10744 245042
+15395 244980
+9837 244959
+16674 244923
+9220 244912
+5532 244856
+10077 244828
+9686 244823
+14236 244822
+2676 244770
+12132 244758
+9050 244729
+14226 244674
+13157 244611
+17699 244469
+17038 244387
+6893 244356
+13504 244340
+10047 244337
+22867 244314
+13091 244305
+10435 244293
+33672 244254
+15600 244222
+21796 244154
+14971 244148
+20531 244110
+12170 244096
+24721 244062
+37129 244008
+18183 243979
+17559 243908
+15238 243887
+19436 243839
+811 243838
+15006 243778
+15097 243778
+18329 243649
+20536 243618
+4665 243601
+13969 243595
+11342 243536
+579 243534
+15013 243522
+6700 243506
+13524 243505
+7402 243497
+8280 243462
+10301 243456
+21291 243454
+19167 243434
+13472 243353
+20472 243313
+13011 243279
+16238 243267
+17996 243223
+16534 243174
+11811 243163
+1614 243152
+18704 243103
+9217 243076
+5949 243011
+19090 242932
+9923 242910
+12086 242909
+7461 242849
+12700 242795
+18373 242776
+17371 242768
+5563 242766
+47201 242738
+21070 242727
+17634 242709
+10425 242709
+8434 242578
+5305 242536
+13252 242499
+14348 242487
+9246 242471
+15150 242436
+12500 242426
+17528 242424
+18887 242414
+24116 242389
+23488 242387
+7101 242331
+12973 242273
+2617 242131
+7222 242040
+20330 241995
+15713 241800
+16787 241745
+19307 241718
+18884 241713
+14960 241706
+957 241671
+14748 241658
+7102 241557
+7006 241500
+6340 241483
+8946 241477
+17832 241462
+17905 241448
+26244 241403
+15812 241375
+11770 241368
+9661 241361
+17591 241249
+12729 241190
+26058 241190
+6663 241181
+14923 241131
+20131 241093
+21668 241073
+18470 241045
+4852 241036
+24568 241000
+8835 240978
+31259 240907
+13214 240824
+1746 240812
+4597 240806
+11006 240802
+394 240783
+14713 240716
+13535 240664
+14702 240509
+10323 240409
+19025 240405
+15403 240391
+15470 240322
+10682 240296
+10111 240284
+18518 240277
+7160 240260
+15707 240246
+16452 240209
+14178 240201
+16032 240154
+19116 240139
+8113 240113
+13852 240086
+14022 240037
+12591 239957
+14649 239945
+14339 239916
+21474 239915
+17709 239869
+21952 239817
+8264 239800
+17285 239799
+15737 239731
+17205 239671
+12622 239654
+15444 239648
+16316 239634
+2096 239603
+22948 239581
+9988 239554
+15462 239547
+13167 239539
+3989 239496
+14347 239410
+7911 239385
+18950 239341
+16012 239331
+15779 239300
+13245 239217
+10939 239205
+43208 239145
+8044 239110
+32537 239109
+9109 239055
+16468 239053
+9831 239022
+12289 239015
+12473 239008
+18956 238967
+14034 238965
+32390 238949
+1603 238943
+14759 238899
+19568 238873
+11312 238863
+33448 238862
+12602 238848
+10863 238822
+11508 238722
+1721 238695
+15347 238681
+13473 238680
+14443 238591
+32482 238579
+21305 238555
+2412 238509
+24964 238499
+9433 238479
+12778 238475
+5183 238472
+9749 238441
+6707 238429
+13232 238307
+18410 238305
+9977 238236
+8167 238147
+6089 238087
+2262 238000
+23058 237971
+14009 237931
+14218 237929
+8856 237914
+21938 237904
+9865 237862
+23698 237825
+22261 237799
+12066 237794
+19614 237663
+21550 237644
+6503 237632
+21421 237611
+4407 237500
+18728 237407
+12585 237404
+18190 237370
+13913 237323
+6327 237314
+7091 237275
+7484 237239
+14807 237233
+19919 237104
+2240 237027
+12883 237006
+12672 236984
+17245 236981
+18613 236789
+16135 236789
+44510 236747
+35409 236737
+15031 236551
+12013 236527
+15386 236516
+17176 236497
+10815 236496
+10492 236470
+10911 236469
+13537 236392
+4243 236333
+6400 236316
+29007 236291
+7631 236276
+8314 236264
+20765 236214
+23559 236191
+39859 236188
+14123 236160
+113 236154
+5580 236071
+9257 236046
+10670 236036
+15773 236026
+20395 235988
+18889 235975
+3681 235975
+19136 235965
+14552 235898
+8768 235870
+17075 235844
+7031 235821
+18073 235812
+18311 235801
+4822 235768
+25158 235758
+17906 235743
+17016 235723
+17355 235718
+11170 235613
+7129 235589
+23128 235587
+10746 235529
+3960 235490
+19819 235460
+7177 235452
+8996 235421
+16659 235413
+1070 235407
+4157 235389
+8304 235384
+20604 235267
+7834 235243
+10048 235231
+11270 235182
+13701 235151
+16689 235144
+8718 235065
+39999 235029
+15530 235010
+10598 235006
+19477 234921
+22923 234913
+19834 234897
+19603 234886
+8604 234842
+22230 234835
+10312 234832
+13885 234797
+15280 234727
+13754 234708
+15841 234647
+13622 234596
+12412 234580
+7122 234532
+22874 234512
+11546 234510
+12123 234453
+31995 234438
+18623 234425
+10090 234380
+11506 234346
+16295 234317
+28282 234313
+9907 234307
+17030 234286
+15364 234266
+2781 234214
+15273 234174
+9425 234152
+8599 234104
+10912 234073
+19887 234033
+12278 234000
+8779 233995
+928 233978
+22410 233978
+37186 233977
+10552 233962
+17785 233866
+3137 233815
+2286 233801
+12588 233798
+11446 233668
+20209 233640
+11541 233540
+13118 233501
+20994 233463
+7299 233456
+20495 233430
+14615 233395
+12028 233375
+13771 233371
+19940 233078
+10715 233075
+11940 233049
+15744 233012
+14023 233010
+21298 232991
+10755 232962
+12445 232844
+15849 232811
+12161 232793
+13488 232737
+19627 232725
+14303 232713
+9970 232693
+17155 232615
+16293 232590
+18423 232583
+12917 232573
+13225 232560
+16816 232513
+22137 232462
+2550 232439
+39970 232428
+1749 232423
+26053 232383
+7231 232347
+24540 232344
+12452 232280
+20890 232235
+5304 232190
+10861 232125
+5026 232123
+25763 232111
+15375 232100
+20882 232062
+29224 232056
+17051 232023
+6068 231967
+20377 231908
+15868 231882
+21588 231866
+478 231844
+9004 231835
+10025 231767
+3916 231764
+28058 231697
+6324 231683
+18039 231677
+14891 231669
+9009 231622
+9215 231608
+3515 231529
+12316 231500
+10225 231487
+13895 231467
+14790 231452
+1831 231443
+10537 231322
+7571 231276
+19790 231269
+16115 231247
+12990 231210
+7590 231116
+3699 231084
+3041 231078
+12566 231025
+12191 231001
+13613 230993
+8447 230991
+5770 230906
+45811 230849
+1557 230840
+12059 230775
+5120 230749
+6124 230744
+8595 230721
+9718 230711
+12872 230700
+10066 230682
+26280 230666
+14053 230646
+16126 230641
+20815 230603
+20903 230557
+916 230552
+241 230545
+17654 230421
+20202 230418
+5542 230400
+3109 230337
+17364 230305
+3174 230284
+17797 230253
+31561 230216
+13105 230200
+9656 230199
+16639 230184
+11835 230183
+6139 230117
+9082 230099
+14287 230049
+18280 230024
+25670 229987
+7430 229958
+18481 229950
+22671 229936
+11084 229910
+12174 229895
+11073 229858
+13756 229837
+17600 229795
+11447 229737
+8845 229737
+15534 229685
+21576 229658
+3522 229633
+19405 229593
+14381 229582
+18866 229566
+4033 229525
+12075 229520
+22536 229517
+19947 229507
+16280 229473
+6203 229472
+15491 229435
+8563 229365
+11680 229308
+20799 229304
+13705 229298
+18933 229208
+17266 229208
+14462 229158
+12742 229153
+23669 229130
+20382 229080
+10889 229078
+20565 229060
+12439 229004
+5686 228929
+16467 228914
+17542 228834
+11764 228816
+13693 228765
+10823 228749
+15892 228474
+30452 228448
+10472 228390
+16610 228364
+9236 228343
+24174 228330
+17476 228315
+15800 228213
+15981 228148
+7236 228132
+26199 228108
+2684 228107
+3103 228045
+12133 228039
+9672 228007
+4532 228006
+10900 227989
+8924 227958
+8799 227886
+10224 227883
+28986 227838
+10797 227833
+2782 227758
+33007 227718
+12463 227636
+29367 227597
+7221 227558
+12739 227509
+26597 227410
+12914 227402
+20656 227371
+22372 227348
+12954 227344
+4919 227333
+1996 227306
+14516 227304
+19584 227302
+33467 227285
+15549 227265
+14451 227254
+25176 227163
+8513 227110
+9265 227079
+5825 227077
+16916 227064
+8837 227058
+11304 227013
+16525 226927
+13881 226803
+10817 226749
+21513 226745
+48716 226692
+7218 226646
+5019 226635
+38526 226632
+15436 226625
+10175 226595
+8529 226551
+22167 226539
+1520 226496
+8173 226474
+8959 226444
+29411 226444
+10195 226443
+1465 226389
+1025 226359
+2265 226346
+2243 226311
+10592 226276
+12912 226248
+13819 226212
+10972 226199
+13043 226171
+21088 226141
+11620 226139
+22221 226136
+21233 226136
+2575 226078
+7729 226071
+11314 226060
+14845 225997
+5661 225991
+13273 225979
+2385 225975
+21512 225910
+5937 225907
+16330 225870
+4299 225870
+15662 225857
+22750 225820
+11478 225806
+22925 225769
+24033 225758
+13643 225757
+21097 225757
+1162 225739
+17058 225730
+17487 225677
+15077 225668
+6759 225659
+29293 225603
+17807 225584
+6181 225569
+8893 225560
+23547 225538
+17507 225495
+15928 225483
+5164 225472
+117 225463
+17195 225402
+15852 225393
+14431 225373
+19478 225368
+20695 225363
+15318 225341
+17494 225257
+12062 225251
+22908 225210
+15771 225198
+224 225168
+30958 225155
+12686 225055
+13638 225016
+19388 225000
+14703 224981
+16970 224976
+2454 224974
+17690 224952
+20409 224908
+20111 224898
+19765 224893
+11759 224893
+22081 224874
+9316 224841
+16517 224830
+6306 224819
+1540 224790
+13156 224756
+14212 224670
+14554 224670
+9978 224653
+23889 224643
+9701 224604
+10451 224582
+8534 224555
+14900 224517
+6045 224382
+8476 224300
+17691 224299
+4768 224263
+21995 224248
+8901 224228
+15313 224200
+15567 224200
+26040 224190
+36753 224177
+30289 224128
+13053 224119
+8045 224114
+12581 224098
+10532 224031
+14471 224019
+10422 224014
+14129 223973
+10619 223972
+14643 223911
+17217 223859
+18169 223852
+13355 223803
+6742 223772
+4262 223693
+2809 223686
+14430 223638
+10001 223524
+20516 223522
+12974 223505
+16566 223488
+7989 223485
+11445 223472
+13044 223433
+3856 223428
+10349 223409
+14855 223408
+17113 223375
+13388 223352
+14738 223342
+5126 223298
+6684 223218
+13675 223215
+12092 223199
+11380 223127
+14608 223060
+8011 223015
+13453 223013
+3898 222961
+20492 222958
+12300 222874
+16400 222852
+22281 222839
+12218 222818
+15608 222811
+35253 222785
+5074 222748
+13641 222731
+49280 222731
+20769 222727
+6955 222709
+7481 222695
+19502 222671
+1386 222670
+8695 222613
+9344 222608
+237 222484
+21408 222483
+5972 222470
+7196 222462
+17967 222444
+26702 222349
+20940 222340
+16403 222307
+15425 222295
+14256 222285
+17374 222212
+10580 222212
+21776 222204
+13013 222189
+24573 222178
+14109 222164
+3300 222160
+4017 222053
+12330 222052
+13679 222039
+11384 222000
+13477 221999
+8169 221984
+8068 221968
+23647 221922
+9944 221915
+906 221889
+5934 221882
+26603 221847
+10276 221841
+11382 221793
+12971 221790
+3021 221779
+10945 221752
+9221 221735
+23720 221725
+10770 221698
+14305 221674
+23990 221645
+3225 221614
+13720 221600
+13334 221578
+1151 221567
+16378 221516
+1072 221490
+48160 221489
+31033 221484
+1412 221480
+12560 221416
+13670 221410
+14096 221391
+16002 221375
+16732 221374
+3383 221339
+13514 221325
+3481 221313
+8800 221265
+17668 221262
+10690 221256
+12247 221236
+2135 221229
+17716 221203
+3080 221187
+21205 221165
+15895 221164
+9618 221164
+40196 221132
+20541 221101
+9976 221070
+5796 221049
+11436 221034
+7972 221000
+21679 220976
+5060 220952
+18653 220948
+13823 220936
+10745 220919
+18103 220907
+9521 220896
+11492 220856
+10959 220757
+16825 220751
+7080 220723
+17025 220694
+15519 220606
+11873 220606
+15698 220593
+16117 220569
+18921 220522
+2490 220511
+15449 220499
+32777 220459
+12856 220431
+14042 220401
+2535 220336
+999 220320
+8748 220319
+22966 220306
+16615 220206
+30303 220200
+21724 220193
+15679 220149
+35912 220141
+16568 220080
+13995 220065
+23075 220052
+16413 220030
+13351 220009
+5534 220005
+14044 219957
+18675 219945
+6881 219871
+18751 219864
+123 219758
+21791 219714
+15210 219709
+773 219683
+10547 219682
+5757 219678
+15018 219671
+10524 219668
+10933 219637
+18828 219624
+16013 219618
+13133 219536
+10426 219525
+16757 219524
+3903 219524
+15893 219477
+3488 219405
+13528 219373
+3014 219331
+24287 219327
+16798 219323
+19994 219277
+14958 219254
+6865 219234
+24434 219228
+42623 219200
+5231 219155
+9039 219130
+18727 219118
+16937 219109
+27943 219103
+22724 219087
+11085 219087
+23794 219086
+9607 219056
+19660 219029
+4411 218968
+14090 218928
+17337 218915
+22693 218906
+9190 218872
+12297 218858
+5263 218728
+8655 218726
+49258 218675
+3684 218645
+27620 218613
+24712 218557
+9914 218554
+11110 218532
+20893 218523
+17830 218518
+4389 218498
+23400 218425
+11583 218403
+2768 218401
+24537 218396
+7343 218395
+17370 218395
+21406 218351
+15032 218351
+10181 218300
+8231 218226
+12030 218226
+20097 218153
+13428 218146
+18532 218134
+8823 218128
+12846 218114
+9455 218081
+13209 218080
+16310 218054
+20464 218046
+11843 218044
+15203 218041
+10577 218034
+20955 217996
+8548 217983
+10849 217980
+12333 217976
+8630 217966
+18284 217888
+9740 217809
+1221 217776
+7899 217760
+10011 217740
+7626 217665
+13558 217631
+18914 217578
+15937 217549
+29611 217546
+15894 217527
+9030 217526
+18241 217517
+26315 217499
+11511 217481
+23696 217456
+11391 217437
+11067 217404
+1213 217394
+22861 217392
+12834 217369
+10283 217326
+40912 217323
+12716 217304
+14545 217260
+16381 217225
+15207 217189
+4703 217182
+13261 217111
+16050 217102
+17100 217085
+15171 217048
+12639 217029
+10960 217025
+22957 216931
+12950 216859
+18456 216850
+15757 216841
+12704 216839
+12702 216778
+17945 216772
+14342 216745
+23724 216736
+9028 216726
+18901 216711
+3543 216674
+21376 216672
+15734 216671
+3934 216663
+8722 216500
+27987 216482
+20138 216439
+14037 216414
+14491 216413
+2843 216352
+22421 216341
+13578 216333
+16862 216275
+12554 216271
+28537 216255
+5173 216251
+12549 216248
+21623 216176
+17456 216144
+16484 216142
+17659 216113
+17736 216061
+7079 216055
+21913 215938
+14764 215893
+11443 215865
+11776 215842
+31873 215831
+4738 215782
+18501 215701
+12694 215684
+19625 215672
+19003 215654
+6460 215618
+12127 215615
+16160 215607
+12094 215525
+10249 215518
+15045 215504
+10424 215428
+5605 215404
+12082 215401
+17092 215391
+13054 215323
+14288 215244
+9472 215228
+18711 215207
+16538 215140
+15074 215093
+19975 215087
+11816 215077
+28288 215071
+17884 215042
+3937 215038
+12797 214963
+22600 214939
+23475 214902
+5393 214860
+5979 214807
+10802 214804
+1801 214787
+967 214768
+10488 214663
+14263 214634
+10857 214609
+31157 214596
+8905 214546
+4714 214537
+17029 214532
+23368 214500
+15570 214476
+33496 214457
+17445 214346
+13206 214340
+24416 214331
+2487 214279
+18494 214232
+8577 214231
+22028 214210
+3617 214151
+12372 214126
+41805 214121
+13792 214118
+14869 214110
+12713 214089
+11219 214037
+9527 214012
+688 214000
+8627 213979
+3118 213865
+11022 213825
+18136 213766
+16749 213678
+2207 213676
+5167 213671
+12125 213643
+27227 213623
+6857 213550
+12864 213535
+1308 213529
+17397 213499
+23593 213362
+16035 213342
+11917 213329
+9124 213275
+13774 213224
+14183 213191
+22161 213177
+15427 213148
+15233 213144
+12055 213128
+971 213066
+3723 213027
+10640 213027
+13941 213025
+36128 213014
+11091 213006
+17034 212967
+23166 212933
+9370 212901
+10686 212857
+23751 212844
+31777 212803
+19138 212754
+24579 212728
+8858 212692
+5930 212671
+12476 212652
+6580 212651
+6972 212647
+13669 212629
+6383 212600
+18419 212599
+8284 212585
+23015 212571
+19582 212553
+8070 212508
+8855 212503
+9056 212502
+15727 212495
+5276 212469
+14833 212421
+9535 212418
+18152 212403
+9577 212394
+15240 212379
+15607 212375
+24323 212369
+11444 212362
+14973 212361
+17853 212358
+13937 212356
+17313 212347
+6808 212331
+24663 212323
+19033 212285
+11317 212254
+12296 212141
+20365 212058
+14284 212016
+15808 211962
+15955 211952
+16392 211948
+6297 211881
+1856 211881
+14186 211830
+2288 211802
+5837 211713
+2664 211706
+14604 211705
+14901 211645
+28190 211626
+22101 211577
+18330 211516
+19896 211500
+21523 211488
+11392 211463
+2618 211419
+14450 211415
+11904 211401
+11859 211384
+17444 211351
+9343 211266
+2770 211247
+20243 211246
+14659 211228
+9430 211228
+12077 211197
+14732 211181
+15704 211147
+21946 211139
+12269 211069
+11731 211062
+11672 211055
+17872 210994
+15668 210988
+14548 210950
+12969 210928
+1235 210923
+15215 210892
+10909 210879
+48798 210872
+3428 210853
+1523 210844
+12916 210814
+13027 210809
+11728 210779
+29073 210714
+13888 210709
+16846 210705
+9612 210673
+10162 210657
+16085 210643
+14320 210584
+13619 210579
+23194 210418
+19077 210354
+8159 210295
+20118 210278
+24821 210256
+16651 210150
+22637 210124
+2709 210072
+19163 210068
+15254 210067
+44600 210015
+9419 210007
+13988 209990
+15550 209888
+13204 209858
+22280 209856
+17422 209842
+11697 209839
+12936 209801
+11186 209777
+35621 209746
+10164 209721
+18918 209712
+633 209613
+20776 209595
+13737 209449
+31506 209446
+17424 209442
+8226 209393
+24868 209374
+12624 209351
+12511 209343
+2927 209279
+8080 209270
+14740 209255
+5838 209214
+5959 209211
+22548 209176
+13536 209058
+19174 209012
+24144 209012
+18845 208893
+26402 208846
+1156 208837
+12594 208796
+2533 208779
+14358 208763
+36485 208754
+11888 208739
+7232 208733
+23526 208652
+3610 208564
+20178 208484
+43998 208460
+10926 208352
+3458 208345
+13816 208330
+38742 208301
+14081 208278
+11399 208260
+20085 208192
+13961 208187
+12805 208183
+14614 208174
+3883 208152
+1885 208139
+3008 208126
+23639 208079
+17070 208016
+5283 207990
+9341 207938
+5476 207931
+9684 207911
+19661 207893
+11564 207862
+19105 207862
+22847 207837
+16007 207836
+11524 207827
+15144 207818
+21468 207755
+16914 207689
+18404 207670
+12390 207654
+17840 207497
+16008 207494
+45871 207470
+2586 207461
+14628 207455
+18265 207430
+18614 207428
+7081 207359
+8770 207356
+5297 207336
+18798 207306
+15525 207287
+19808 207251
+14506 207241
+20400 207080
+12788 207060
+14741 207043
+41153 207022
+32922 207010
+8160 206987
+1529 206971
+9379 206951
+13721 206944
+10347 206925
+3247 206897
+13387 206828
+19043 206818
+1815 206782
+23894 206771
+16053 206753
+3852 206737
+18075 206716
+9941 206709
+24576 206705
+23174 206693
+12471 206688
+2153 206680
+3304 206669
+18476 206649
+43318 206638
+14293 206636
+4494 206621
+12319 206566
+29533 206556
+12120 206522
+7931 206512
+19284 206500
+1851 206486
+5594 206411
+4990 206402
+3618 206396
+3201 206381
+14401 206362
+19902 206336
+14321 206300
+17322 206286
+11941 206270
+20110 206225
+18222 206211
+17780 206209
+17536 206137
+1118 206127
+11496 206117
+23430 206105
+10932 206102
+17390 206059
+13646 206043
+6653 206006
+16494 205987
+23358 205935
+10438 205877
+8979 205865
+15281 205825
+6302 205821
+11678 205815
+3353 205776
+20819 205742
+13224 205690
+14499 205675
+23554 205671
+28113 205647
+9495 205646
+5576 205637
+16048 205569
+12459 205499
+33597 205435
+7645 205422
+13790 205322
+15542 205315
+46245 205291
+18895 205274
+12244 205269
+20081 205229
+7284 205225
+25869 205181
+24392 205170
+10565 205124
+13611 205121
+6500 205103
+16261 205096
+14147 205064
+6649 205025
+6819 205001
+17272 204957
+8822 204956
+39726 204889
+4400 204876
+9306 204876
+9229 204814
+6747 204801
+12842 204800
+9704 204776
+35747 204774
+9830 204741
+15500 204721
+9930 204720
+14502 204642
+11315 204596
+19732 204574
+9312 204415
+21146 204408
+14496 204374
+11875 204360
+15859 204334
+30917 204299
+8763 204243
+17023 204218
+1779 204140
+13407 204123
+21462 204106
+14802 204105
+18352 204097
+14556 204094
+11378 204029
+17252 203976
+14660 203975
+30102 203960
+18730 203909
+15107 203853
+6091 203824
+6911 203759
+17432 203643
+14393 203587
+19530 203585
+9578 203567
+25890 203504
+13872 203480
+25415 203476
+20533 203450
+19911 203416
+13879 203386
+12579 203381
+3545 203356
+19210 203348
+20163 203348
+14942 203332
+21371 203316
+20091 203230
+10362 203162
+15516 203101
+24836 203043
+14877 203030
+18515 203027
+12985 203014
+11971 202922
+16490 202920
+14241 202830
+9113 202823
+17931 202801
+10862 202692
+21835 202678
+24175 202629
+8906 202566
+22799 202531
+24389 202517
+10620 202510
+23638 202485
+14946 202399
+29602 202361
+6021 202316
+15736 202310
+16078 202283
+1131 202258
+15869 202240
+13181 202236
+1311 202227
+3082 202222
+10920 202218
+24153 202190
+11173 202148
+21997 202148
+17304 202107
+18210 202094
+13803 202072
+7206 202055
+15953 202037
+13549 201990
+3558 201987
+22984 201969
+20953 201925
+13870 201922
+34630 201878
+11152 201875
+20821 201871
+18087 201855
+11815 201831
+17396 201827
+17112 201822
+26283 201750
+7565 201738
+8128 201738
+40055 201718
+22636 201689
+27883 201686
+28047 201679
+22643 201678
+15805 201639
+22207 201629
+15673 201585
+10717 201583
+16637 201556
+15089 201546
+14768 201433
+19304 201430
+25079 201428
+8940 201385
+15499 201358
+13055 201337
+23434 201293
+15035 201291
+22559 201281
+14232 201276
+5840 201267
+13467 201221
+6201 201203
+18319 201201
+18199 201115
+18078 201095
+36458 201045
+13179 201043
+34969 201035
+12555 201016
+24357 201015
+22489 200993
+19635 200941
+12294 200933
+22276 200926
+13205 200911
+31684 200896
+16055 200851
+10664 200848
+1673 200815
+5098 200813
+18322 200775
+5119 200771
+16636 200749
+29394 200726
+13627 200720
+10239 200697
+10716 200671
+8937 200664
+13215 200643
+26914 200631
+18617 200624
+4685 200606
+12822 200605
+5636 200549
+40319 200524
+16783 200520
+21335 200482
+21022 200464
+11299 200458
+27801 200441
+8254 200419
+7676 200363
+8178 200318
+26015 200274
+223 200265
+24527 200249
+2752 200218
+21819 200209
+7958 200165
+15843 200150
+11105 200111
+23524 200102
+10678 200093
+10866 200087
+21284 200061
+13471 200057
+6055 199960
+16736 199936
+18721 199855
+16518 199836
+11635 199796
+1915 199773
+13650 199720
+3828 199718
+13810 199695
+15193 199677
+9916 199675
+10081 199628
+12526 199625
+12647 199611
+25807 199610
+9664 199591
+15314 199455
+16842 199444
+11175 199437
+15531 199413
+34309 199402
+16508 199393
+11104 199386
+7730 199379
+18641 199320
+4173 199281
+8696 199241
+27928 199240
+6603 199220
+10232 199206
+28057 199161
+22553 199146
+26851 199116
+11424 199111
+18757 199088
+5289 199070
+25254 199069
+25125 199065
+14343 198992
+10365 198991
+17936 198991
+27469 198988
+17366 198972
+15154 198972
+4382 198876
+12198 198871
+16759 198834
+14337 198784
+7109 198771
+19740 198641
+17581 198607
+24379 198563
+3816 198537
+9693 198535
+15241 198532
+23817 198507
+18804 198432
+24255 198411
+6107 198349
+17362 198316
+10809 198310
+21116 198310
+5039 198291
+2308 198291
+26088 198236
+18260 198222
+14458 198213
+25389 198156
+21008 198155
+11470 198084
+21859 198070
+11283 198029
+16230 198019
+47809 197994
+13997 197990
+8267 197972
+27011 197967
+12197 197954
+12362 197946
+24131 197924
+10940 197885
+37628 197882
+8811 197822
+14509 197774
+15373 197762
+15515 197717
+16865 197711
+13815 197675
+7813 197661
+12381 197657
+3401 197647
+12273 197626
+21516 197617
+14384 197598
+103 197579
+19135 197563
+18409 197561
+39760 197544
+11634 197538
+20019 197411
+12887 197393
+5956 197370
+24668 197359
+15251 197327
+9342 197322
+27003 197303
+21050 197272
+25759 197268
+25640 197266
+9510 197262
+18204 197255
+9098 197246
+16868 197227
+21507 197214
+15882 197137
+14324 197103
+23827 197101
+9823 197072
+31340 197067
+7326 197066
+10385 196985
+18739 196964
+809 196944
+22319 196940
+19920 196910
+19688 196860
+10478 196853
+11571 196838
+12214 196815
+34058 196793
+16655 196733
+17296 196724
+14611 196714
+14412 196691
+11667 196663
+19413 196620
+8875 196593
+13165 196540
+16734 196518
+24104 196511
+8364 196502
+2326 196500
+5985 196441
+12706 196438
+8208 196390
+5329 196290
+681 196210
+31389 196208
+17135 196207
+25186 196204
+10622 196197
+19843 196177
+9932 196088
+2875 196086
+3798 196083
+9032 196057
+21782 196019
+16083 196002
+8966 195986
+18134 195959
+740 195949
+16118 195920
+17455 195892
+17958 195875
+10326 195842
+40110 195837
+3123 195811
+13035 195796
+12158 195765
+27526 195763
+16551 195691
+24590 195680
+13238 195546
+9340 195533
+19376 195525
+17270 195514
+9048 195472
+13062 195464
+17098 195441
+4968 195439
+13767 195439
+23307 195310
+15221 195299
+5121 195294
+17496 195292
+9513 195226
+9858 195219
+1742 195203
+13910 195163
+10317 195147
+15441 195142
+27606 195130
+28797 195114
+14075 195031
+15819 194979
+17161 194971
+32190 194967
+48063 194956
+2228 194853
+11148 194826
+20569 194815
+10528 194814
+18544 194813
+16384 194776
+11262 194776
+36553 194753
+12838 194747
+12167 194707
+19089 194655
+9442 194643
+15804 194612
+7408 194589
+9714 194544
+13502 194485
+30094 194423
+20758 194418
+7130 194411
+14803 194409
+30683 194325
+3495 194295
+9723 194235
+10375 194199
+19198 194180
+34016 194170
+42897 194156
+16503 194153
+15135 194135
+19964 194121
+24304 194073
+5573 194073
+14307 194006
+12280 194000
+7742 193988
+7617 193977
+10971 193959
+16546 193948
+17628 193922
+22195 193916
+18062 193910
+22516 193894
+34761 193893
+16556 193874
+13212 193838
+13728 193837
+16054 193822
+20494 193814
+8397 193791
+2 193789
+14246 193784
+15872 193776
+11137 193773
+20117 193750
+9096 193748
+12660 193694
+14441 193657
+11607 193643
+26436 193642
+21750 193638
+12547 193620
+22540 193620
+9445 193578
+23941 193545
+30382 193517
+10949 193503
+19665 193474
+12239 193456
+22687 193447
+13975 193440
+22013 193425
+17812 193409
+19639 193409
+10257 193402
+27083 193366
+4378 193331
+25375 193276
+3702 193272
+25014 193255
+1809 193231
+4864 193160
+6561 193114
+15253 193107
+23888 193103
+12564 193103
+12407 193070
+11111 193052
+12847 193023
+97 193021
+8932 193008
+19223 193008
+13324 193005
+9479 192988
+29536 192979
+33888 192911
+11913 192848
+8450 192848
+18006 192801
+4128 192796
+9404 192771
+23502 192733
+9476 192630
+22233 192603
+19984 192576
+4867 192557
+16033 192554
+19579 192541
+22418 192522
+7679 192514
+14161 192475
+12446 192437
+17652 192394
+15039 192387
+13114 192384
+23995 192370
+15283 192312
+16287 192264
+18201 192232
+22099 192198
+22596 192180
+13427 192167
+21299 192163
+5819 192093
+6058 192084
+23445 192073
+23605 192046
+16180 191892
+25715 191891
+19889 191824
+17492 191797
+1458 191777
+12922 191773
+14693 191767
+17375 191734
+13446 191698
+14801 191695
+11202 191687
+16514 191672
+15487 191656
+13791 191645
+15242 191615
+14245 191581
+3801 191536
+12226 191534
+24238 191508
+20103 191476
+2371 191454
+13151 191441
+22578 191429
+23249 191414
+8965 191396
+17578 191380
+12384 191374
+13591 191368
+13395 191318
+5805 191296
+18010 191271
+23078 191243
+7013 191207
+11923 191207
+12934 191173
+11068 191129
+20282 191124
+27290 191101
+20534 191056
+10112 191047
+18024 191030
+6220 191015
+14422 190963
+20154 190925
+24743 190912
+27905 190886
+18153 190854
+12888 190844
+12653 190841
+14765 190815
+13980 190798
+14004 190783
+18652 190783
+8876 190748
+13666 190685
+14052 190683
+16172 190678
+15699 190672
+7536 190620
+7554 190609
+12755 190599
+15426 190597
+4005 190592
+18976 190577
+13454 190558
+20501 190550
+14890 190507
+26355 190499
+14895 190386
+5012 190382
+10143 190362
+23382 190336
+13135 190315
+14143 190311
+8408 190282
+20200 190266
+15010 190236
+18732 190207
+16847 190178
+16109 190116
+24253 190092
+15264 190053
+17392 190030
+30774 190011
+13565 189985
+16499 189968
+20993 189957
+1604 189929
+9779 189922
+10795 189864
+35942 189857
+9386 189853
+25040 189836
+9174 189786
+19690 189778
+12245 189734
+20071 189723
+19574 189707
+18389 189707
+11779 189627
+26691 189621
+18198 189589
+12666 189586
+12509 189521
+10116 189504
+43967 189435
+11795 189420
+13275 189387
+15986 189374
+7057 189357
+11616 189356
+14528 189342
+14852 189333
+7037 189303
+20792 189288
+6660 189274
+13061 189255
+18786 189228
+23344 189217
+21612 189165
+13952 189163
+18394 189160
+14446 189141
+11254 189124
+10733 189118
+20669 189093
+16493 189039
+1408 189035
+17998 188970
+16861 188962
+10688 188950
+9518 188895
+16826 188888
+23040 188881
+17695 188866
+5189 188861
+7707 188860
+19550 188833
+13931 188821
+16929 188812
+11673 188801
+7788 188737
+16431 188706
+17428 188625
+12680 188619
+2964 188531
+8251 188521
+12649 188513
+19126 188486
+13614 188464
+12498 188434
+240 188366
+23960 188355
+16027 188339
+14736 188328
+8557 188257
+14504 188242
+12317 188216
+18928 188187
+13647 188177
+730 188141
+8499 188040
+25752 188019
+25799 188009
+4475 187984
+19705 187920
+242 187919
+9321 187901
+13971 187893
+17949 187866
+26546 187806
+4337 187792
+1574 187732
+16070 187679
+13314 187655
+16290 187605
+6390 187575
+32991 187551
+11080 187546
+6945 187488
+23139 187471
+23788 187464
+9365 187434
+11040 187433
+22381 187427
+18313 187380
+26085 187341
+14982 187323
+8574 187276
+19958 187261
+11414 187206
+1166 187206
+17537 187191
+15381 187178
+13385 187171
+21736 187169
+6530 187142
+7084 187139
+5528 187138
+26856 187128
+10757 187127
+13401 187114
+9991 187065
+12293 187060
+16477 187032
+4484 186968
+8790 186965
+3083 186903
+18033 186884
+17308 186848
+21609 186846
+4527 186814
+17997 186785
+17243 186783
+15114 186781
+15797 186778
+13927 186730
+10373 186727
+10874 186712
+7805 186676
+12698 186634
+14313 186602
+20320 186582
+19271 186577
+7715 186575
+16090 186565
+6236 186562
+13596 186556
+10119 186540
+27792 186538
+14668 186502
+12264 186476
+27837 186457
+32584 186446
+15897 186417
+2814 186411
+1191 186396
+12472 186389
+9173 186320
+20194 186304
+7749 186228
+7239 186223
+12044 186214
+12298 186188
+30851 186186
+10213 186182
+9427 186147
+31547 186096
+26056 186096
+5154 186083
+6548 186073
+1500 186064
+13153 186036
+21740 186013
+12450 186009
+17535 186007
+13187 185997
+15665 185967
+14772 185946
+22445 185931
+39073 185927
+28247 185912
+10671 185884
+13373 185871
+10335 185854
+9969 185840
+14892 185836
+8576 185825
+5614 185778
+5886 185753
+17332 185753
+10566 185745
+20515 185743
+12393 185692
+16300 185686
+26107 185651
+25872 185641
+7582 185618
+11050 185594
+7085 185589
+13266 185586
+10785 185573
+22931 185543
+13052 185522
+16355 185505
+17321 185499
+18563 185409
+6954 185351
+16532 185333
+9034 185322
+8631 185317
+7857 185290
+3642 185273
+13497 185252
+9353 185245
+38926 185244
+21505 185243
+32903 185225
+7785 185218
+12925 185211
+23489 185206
+121 185160
+17914 185159
+20252 185137
+6756 185118
+14333 185097
+15660 185070
+12227 185042
+8210 185040
+14727 185014
+21012 185010
+16968 185009
+30664 184980
+44144 184965
+23088 184951
+7038 184948
+22770 184841
+15980 184824
+18791 184788
+20829 184782
+46011 184765
+13457 184748
+17264 184706
+8363 184673
+5579 184646
+33864 184641
+12134 184598
+28490 184582
+6090 184556
+18088 184536
+30693 184523
+2475 184522
+17901 184511
+12568 184491
+11012 184482
+18530 184461
+21029 184451
+2673 184416
+15453 184413
+23837 184409
+19121 184397
+14995 184396
+14247 184351
+26391 184333
+6648 184215
+17083 184126
+15398 184122
+24154 184099
+17466 184079
+23566 184073
+17057 184041
+1456 184024
+36107 184020
+11441 183972
+12892 183957
+29424 183938
+17728 183935
+17096 183913
+19358 183880
+19592 183868
+12135 183847
+15617 183769
+7947 183698
+22132 183693
+17921 183642
+14930 183631
+16373 183628
+14139 183577
+15339 183563
+20208 183553
+9434 183516
+17948 183511
+15282 183476
+22558 183422
+16245 183386
+20879 183384
+20760 183322
+15112 183317
+19037 183298
+2790 183225
+9334 183222
+12586 183218
+18655 183207
+19823 183187
+21361 183163
+17471 183140
+16633 183140
+35348 183095
+28422 183085
+15329 183035
+21681 183027
+23393 183018
+14365 183016
+26661 182986
+22103 182984
+17188 182968
+6582 182948
+6247 182823
+11128 182782
+14281 182774
+9683 182761
+16821 182757
+18687 182664
+11739 182625
+31996 182584
+11109 182582
+4605 182563
+9033 182516
+9091 182502
+6148 182479
+12365 182357
+16205 182328
+28102 182310
+10703 182278
+2206 182267
+19431 182240
+13020 182239
+19316 182231
+4933 182220
+16828 182198
+3500 182192
+23288 182185
+10491 182150
+15789 182145
+8505 182107
+4886 182055
+11613 181967
+19651 181962
+7266 181950
+12421 181938
+27463 181932
+5092 181911
+26159 181884
+12238 181842
+20754 181817
+13017 181767
+15321 181759
+31415 181722
+17777 181718
+10590 181707
+3557 181705
+22182 181685
+2283 181681
+26182 181679
+15382 181678
+20915 181642
+10281 181615
+11286 181602
+25450 181598
+13081 181592
+24692 181592
+15335 181586
+10245 181585
+35240 181555
+12419 181544
+7379 181476
+12625 181426
+107 181418
+46568 181398
+29911 181382
+11996 181319
+18842 181296
+13197 181295
+26683 181289
+10983 181248
+20143 181240
+9251 181201
+58 181192
+25013 181189
+24269 181184
+8016 181180
+15138 181178
+5246 181147
+10753 181123
+25279 181114
+49188 181107
+22235 181106
+10416 181102
+15962 181083
+858 181077
+10603 181041
+28345 181027
+17517 181012
+13685 181010
+13796 180966
+5962 180927
+10560 180906
+12720 180895
+23443 180844
+14179 180758
+5936 180697
+14774 180686
+13396 180683
+13963 180683
+11806 180666
+21990 180646
+18195 180618
+9111 180605
+3032 180476
+6931 180457
+19029 180423
+26291 180419
+4674 180417
+30096 180414
+12737 180368
+10437 180314
+22937 180290
+21569 180184
+39173 180170
+6477 180162
+8340 180155
+12186 180123
+19784 180123
+12172 180100
+8067 180079
+27686 180044
+22112 180031
+20351 180027
+12386 180017
+24533 180009
+18983 179914
+26013 179907
+2169 179890
+14505 179883
+12057 179881
+19196 179873
+12703 179836
+15692 179684
+10787 179678
+20788 179665
+22382 179642
+14800 179640
+20079 179626
+25496 179602
+23957 179584
+17262 179561
+13076 179553
+7963 179552
+14792 179535
+10431 179516
+19558 179512
+19624 179503
+14444 179480
+1379 179445
+3313 179441
+9385 179437
+9546 179322
+17869 179308
+7308 179274
+21274 179269
+5775 179268
+17675 179252
+7689 179189
+13343 179180
+24797 179178
+29040 179165
+2734 179158
+15638 179153
+13203 179151
+23943 179129
+13718 179096
+24078 179090
+8692 179077
+12051 179067
+42326 179058
+14157 179052
+20884 179044
+16223 179043
+23976 178992
+12200 178959
+16748 178947
+14083 178946
+19055 178945
+24459 178906
+16988 178836
+16219 178833
+48984 178785
+7500 178772
+14297 178751
+9993 178748
+8643 178746
+3010 178732
+22586 178718
+36427 178703
+20158 178700
+18580 178676
+5574 178583
+16110 178569
+14013 178558
+30994 178554
+12157 178553
+28078 178528
+30739 178473
+17349 178470
+16427 178409
+29928 178401
+16522 178396
+16263 178391
+13734 178348
+15914 178347
+12114 178274
+8913 178254
+17774 178239
+17612 178210
+29560 178195
+14283 178152
+20169 178090
+42357 178076
+43461 178071
+22681 178031
+10398 177993
+24004 177926
+7759 177925
+15232 177918
+7440 177896
+18625 177828
+14201 177827
+12714 177772
+13246 177752
+12501 177747
+26809 177746
+1527 177730
+14720 177703
+15271 177600
+15820 177574
+17291 177547
+953 177535
+7980 177516
+16044 177494
+27639 177455
+18012 177379
+7300 177367
+16389 177354
+5783 177341
+15775 177334
+10442 177311
+21248 177251
+15945 177248
+11400 177211
+22490 177209
+16212 177168
+20432 177124
+16625 177111
+12122 177089
+12948 177075
+20312 177066
+26767 177058
+11762 177052
+6679 177034
+36061 177015
+27941 177013
+10388 177008
+8634 177003
+26685 176994
+2259 176811
+21504 176802
+21941 176770
+19051 176713
+15052 176707
+18365 176701
+22311 176694
+23629 176647
+7386 176630
+14645 176620
+19671 176614
+7442 176566
+908 176554
+963 176538
+6708 176535
+13092 176534
+21041 176525
+16584 176498
+2798 176462
+11578 176420
+11303 176408
+7273 176391
+22893 176380
+4614 176339
+6765 176307
+14590 176286
+16176 176282
+12260 176279
+13568 176271
+17986 176270
+13533 176231
+28040 176226
+3978 176223
+9627 176157
+20210 176137
+18996 176125
+21891 176057
+15678 176047
+12532 175994
+15925 175983
+12449 175949
+19925 175941
+16933 175937
+9786 175935
+16699 175919
+9390 175908
+21809 175905
+37108 175883
+12800 175860
+22962 175858
+14841 175811
+13378 175806
+14594 175747
+20353 175712
+11841 175708
+3196 175676
+8260 175670
+9424 175656
+20685 175654
+2473 175652
+12383 175641
+18141 175636
+12070 175606
+17214 175605
+23668 175599
+12878 175578
+6996 175575
+14434 175564
+5117 175538
+19137 175531
+13836 175530
+27748 175492
+24756 175452
+2876 175441
+12006 175423
+18633 175421
+11625 175394
+28111 175300
+17131 175299
+26101 175296
+4757 175293
+16190 175283
+4660 175281
+24499 175281
+13957 175252
+23592 175226
+14532 175208
+6422 175206
+9795 175192
+6320 175173
+21650 175161
+23676 175135
+7764 175042
+9290 174995
+14112 174966
+10904 174950
+10928 174887
+14735 174859
+5728 174841
+14543 174810
+22175 174807
+14530 174738
+1741 174726
+12113 174647
+27616 174644
+13028 174622
+18054 174599
+638 174583
+13182 174582
+15359 174563
+17431 174553
+43850 174526
+16319 174525
+24252 174506
+21198 174471
+20023 174399
+7063 174367
+19562 174366
+22779 174346
+4307 174344
+1769 174327
+14239 174325
+5375 174305
+24454 174256
+8023 174253
+3541 174227
+9088 174178
+12301 174161
+14655 174111
+12462 174104
+25157 174068
+27546 174066
+25995 174064
+19087 174060
+23955 174041
+22102 174021
+13703 174011
+12175 174004
+21204 173982
+17935 173962
+20412 173922
+12083 173901
+14953 173892
+13439 173877
+30175 173873
+11796 173872
+21884 173811
+27911 173805
+29311 173801
+951 173797
+7923 173778
+18526 173761
+12733 173710
+28891 173706
+13121 173684
+1548 173674
+26293 173655
+18632 173633
+29141 173580
+6385 173558
+3694 173557
+15518 173539
+12213 173531
+25225 173478
+10782 173464
+20550 173462
+7227 173435
+817 173425
+8343 173419
+16730 173409
+25147 173394
+33339 173376
+27541 173306
+26203 173301
+31167 173285
+11901 173265
+11623 173194
+14376 173160
+27042 173159
+16079 173133
+7858 173078
+12952 172985
+17110 172967
+9828 172956
+16611 172909
+14788 172908
+21455 172893
+9666 172882
+10725 172849
+14087 172813
+11061 172779
+13139 172764
+12701 172753
+13400 172750
+10645 172749
+26456 172713
+22653 172706
+12824 172703
+7883 172684
+15637 172673
+8508 172668
+3671 172656
+16197 172651
+6359 172639
+25434 172563
+5191 172562
+9915 172552
+1761 172546
+20990 172531
+18520 172530
+7305 172488
+8558 172486
+15345 172477
+19378 172448
+23681 172412
+23936 172403
+14026 172399
+26373 172356
+10194 172322
+11459 172305
+12085 172285
+16336 172271
+13405 172236
+12361 172235
+6061 172209
+3757 172202
+8550 172200
+17222 172176
+6735 172162
+7235 172062
+23975 172027
+16473 172025
+11987 172020
+22098 171992
+17461 171991
+12656 171991
+6763 171989
+46179 171979
+12347 171925
+12470 171923
+8891 171915
+21071 171901
+18870 171890
+1655 171870
+21166 171857
+6444 171853
+1840 171774
+8869 171773
+12784 171753
+28472 171729
+21571 171719
+24763 171628
+12320 171619
+21840 171608
+1927 171529
+18382 171521
+5753 171500
+17290 171498
+44892 171491
+9801 171488
+12584 171458
+101 171438
+7514 171434
+46990 171428
+29913 171428
+20771 171416
+2601 171410
+17460 171395
+21220 171384
+16895 171380
+9127 171376
+15619 171357
+19094 171350
+20455 171331
+33323 171302
+28975 171299
+4774 171292
+20319 171273
+15844 171257
+16686 171115
+27025 171095
+30646 171073
+27607 171054
+9335 171010
+18367 171000
+22569 170994
+33564 170982
+22634 170958
+22300 170944
+18729 170910
+33653 170901
+16657 170899
+7670 170892
+14480 170888
+21862 170874
+17095 170838
+22383 170834
+8991 170821
+22394 170769
+13676 170754
+35659 170660
+38858 170592
+11638 170560
+1240 170543
+26353 170531
+11609 170454
+14690 170433
+12004 170432
+9269 170423
+16146 170371
+27599 170360
+14928 170326
+1930 170295
+7454 170289
+23328 170274
+31546 170245
+5247 170202
+17803 170200
+19258 170198
+5980 170197
+24980 170151
+742 170143
+19057 170127
+4910 170077
+22953 170045
+25585 170029
+21927 170019
+29090 170017
+31159 169975
+7128 169965
+8065 169957
+8833 169947
+27582 169932
+13386 169901
+31346 169899
+11766 169880
+16047 169880
+5978 169862
+16608 169852
+10031 169833
+31078 169816
+32636 169802
+10370 169746
+10675 169728
+47979 169720
+33231 169712
+24684 169670
+16621 169667
+19764 169656
+19988 169648
+29747 169644
+44009 169639
+7074 169638
+9800 169628
+1585 169536
+24073 169523
+21087 169487
+22720 169463
+22434 169431
+25074 169400
+12659 169366
+11733 169362
+35093 169320
+15873 169309
+16602 169292
+14485 169259
+17449 169228
+14234 169204
+30245 169196
+18064 169190
+18735 169178
+12569 169172
+12493 169135
+14648 169118
+25935 169114
+26116 169108
+675 169099
+8306 169097
+2956 169065
+12229 169035
+23076 169019
+11387 168999
+18130 168991
+16930 168979
+12790 168977
+15767 168976
+21110 168952
+31703 168924
+12779 168902
+24485 168902
+29674 168898
+17120 168891
+16359 168887
+19169 168876
+1076 168872
+16894 168838
+10322 168830
+20552 168816
+16324 168804
+26078 168785
+34786 168725
+2006 168691
+11542 168666
+16932 168631
+11968 168561
+18647 168543
+11556 168522
+30384 168493
+899 168471
+13374 168460
+22225 168459
+15881 168449
+11341 168440
+6644 168425
+10712 168414
+21266 168360
+17876 168314
+8610 168313
+22541 168302
+12121 168276
+105 168274
+18053 168229
+11031 168215
+17441 168157
+11640 168079
+1788 168065
+12697 168063
+18079 168041
+14789 168034
+23577 168015
+25686 168011
+14325 168010
+14826 167973
+8808 167968
+7385 167959
+15656 167943
+26406 167933
+3477 167926
+25167 167922
+20441 167896
+9254 167895
+14102 167884
+13580 167830
+22041 167798
+27611 167791
+19711 167781
+29890 167771
+34943 167757
+32149 167722
+24207 167666
+14551 167664
+1929 167661
+5851 167643
+9792 167642
+13207 167637
+17711 167633
+23438 167558
+4095 167545
+18656 167543
+10761 167496
+9310 167481
+31509 167455
+13084 167453
+12823 167449
+12484 167418
+29272 167411
+23780 167399
+19725 167368
+20271 167359
+20386 167350
+8071 167327
+23982 167266
+26451 167221
+16675 167217
+23415 167158
+2508 167108
+17938 167087
+18519 167077
+13717 167065
+9053 167045
+11575 167045
+6723 167044
+5830 167038
+11451 167009
+20237 166982
+13656 166930
+33118 166913
+2929 166913
+34589 166887
+12886 166877
+21011 166858
+20886 166856
+11053 166854
+5225 166852
+13747 166832
+9351 166806
+14676 166779
+16171 166777
+31526 166766
+24031 166749
+26606 166743
+13889 166741
+18679 166738
+11757 166711
+30035 166709
+2114 166669
+22695 166667
+11054 166649
+11442 166649
+15413 166635
+9579 166605
+13239 166595
+13752 166594
+1567 166549
+13699 166525
+17962 166524
+13510 166490
+18387 166475
+1934 166420
+26400 166403
+36759 166372
+15172 166364
+21666 166333
+25733 166308
+24799 166303
+14386 166271
+11463 166237
+8290 166233
+25914 166221
+4207 166202
+11562 166199
+7310 166186
+12576 166158
+12236 166143
+17372 166140
+23270 166122
+10605 166100
+11349 166100
+9726 166095
+24164 166090
+6950 166083
+13267 166071
+19636 166055
+12422 166053
+10217 166036
+14340 166030
+15163 166028
+14048 166027
+14154 165992
+22309 165928
+14266 165928
+34217 165921
+11052 165919
+33399 165910
+3832 165889
+16237 165889
+7287 165873
+22351 165865
+15291 165860
+1928 165844
+17673 165838
+6615 165779
+12844 165748
+47875 165744
+713 165695
+28392 165691
+14678 165625
+16545 165608
+14492 165582
+21402 165530
+22361 165529
+19219 165467
+4596 165457
+12923 165451
+5702 165438
+11820 165426
+1089 165423
+13516 165419
+26190 165371
+25701 165371
+13827 165370
+4328 165308
+34366 165287
+19140 165262
+10320 165222
+11863 165214
+12033 165196
+28523 165172
+31791 165153
+15489 165126
+17274 165125
+15477 165115
+23829 165086
+19290 165064
+21531 165051
+13070 165041
+7710 165021
+11516 165009
+21006 165000
+23832 164975
+9948 164892
+24850 164885
+11101 164882
+27117 164844
+32564 164833
+22771 164818
+8142 164818
+34506 164763
+16192 164663
+13452 164658
+10368 164642
+4342 164624
+23325 164571
+14153 164564
+32136 164562
+19500 164555
+1777 164538
+26216 164529
+21518 164502
+17863 164502
+11662 164492
+14851 164477
+28059 164405
+27035 164382
+12144 164302
+12903 164283
+17564 164257
+13006 164252
+13753 164245
+1768 164242
+9848 164233
+23247 164218
+16788 164197
+12979 164190
+41179 164177
+2217 164136
+9255 164136
+41588 164100
+23413 164093
+14955 164090
+11664 164075
+23429 164074
+2889 164070
+20002 164057
+4630 164034
+13318 164034
+9058 164026
+11543 163998
+13608 163983
+23643 163952
+18067 163942
+4501 163926
+22158 163921
+20404 163910
+21418 163874
+17344 163864
+12444 163801
+901 163789
+29037 163767
+16500 163753
+14093 163735
+29306 163647
+7837 163646
+22223 163631
+13122 163620
+14776 163601
+5589 163591
+9989 163571
+23624 163502
+8274 163471
+16325 163463
+30031 163460
+9330 163450
+839 163430
+24397 163382
+8605 163360
+5988 163329
+11860 163296
+16765 163287
+20527 163284
+3316 163277
+915 163267
+18434 163258
+10165 163223
+14185 163180
+15464 163159
+27274 163156
+20165 163120
+11011 163101
+20390 163090
+15276 163078
+21133 163077
+32694 163019
+19745 163009
+18246 163004
+2181 162993
+8074 162946
+20304 162946
+42057 162941
+11089 162921
+17109 162920
+13799 162841
+18021 162828
+21817 162821
+21270 162821
+11870 162809
+7056 162800
+21167 162709
+19657 162699
+17887 162692
+21023 162683
+12793 162679
+22958 162650
+18862 162610
+18753 162565
+16585 162550
+30843 162549
+19432 162535
+22680 162525
+6895 162511
+18488 162509
+8435 162506
+26301 162478
+46129 162457
+17346 162449
+11094 162410
+23068 162393
+23617 162378
+24232 162378
+17742 162375
+20291 162364
+22260 162360
+22639 162344
+30872 162339
+8431 162339
+15371 162317
+15305 162307
+21896 162293
+26615 162268
+665 162268
+20714 162265
+17368 162256
+20093 162231
+21183 162191
+23200 162167
+19855 162160
+33922 162151
+1615 162141
+33724 162135
+4062 162117
+18570 162085
+7149 162079
+16853 162064
+20742 162037
+25050 162013
+7591 161991
+8369 161974
+28643 161972
+17454 161968
+15472 161941
+31005 161940
+18416 161919
+33911 161884
+10220 161861
+9942 161846
+14089 161843
+12524 161833
+18852 161808
+2218 161795
+21119 161783
+10351 161736
+27847 161736
+11624 161735
+14639 161728
+226 161620
+11736 161616
+20739 161604
+11329 161601
+20352 161563
+17054 161549
+4180 161537
+33385 161528
+25889 161523
+16320 161497
+33193 161491
+31767 161467
+11247 161429
+28759 161424
+14378 161422
+23590 161420
+20393 161410
+15654 161390
+12098 161372
+27986 161357
+18397 161348
+11649 161326
+23070 161293
+25634 161289
+19475 161286
+15916 161274
+13779 161164
+16299 161163
+3984 161154
+19517 161142
+16860 161140
+12546 161125
+8229 161124
+25793 161099
+15332 161092
+14104 161088
+27357 161081
+15099 161076
+16034 161064
+15722 161054
+18618 161004
+21158 160987
+9491 160981
+12096 160973
+18362 160959
+31230 160951
+22171 160939
+15749 160938
+23280 160928
+27180 160917
+19350 160903
+13917 160863
+28686 160840
+22387 160829
+14150 160787
+8950 160765
+7711 160757
+11433 160753
+20447 160753
+21743 160744
+17589 160726
+22830 160718
+15564 160701
+15935 160665
+17086 160655
+11025 160649
+1020 160616
+3915 160590
+21445 160576
+12860 160543
+20653 160539
+22627 160534
+28355 160530
+13727 160517
+13331 160513
+14657 160501
+15293 160493
+12345 160491
+25469 160481
+8859 160468
+21249 160462
+26538 160449
+25684 160447
+5691 160438
+14846 160434
+30427 160423
+5844 160391
+18323 160386
+14318 160373
+25535 160370
+20840 160352
+10526 160335
+16247 160329
+18666 160320
+15850 160286
+15326 160278
+23259 160245
+17260 160245
+24091 160196
+13769 160191
+27540 160149
+12898 160107
+12661 160094
+11657 160079
+5912 160074
+11738 160067
+2913 159992
+8204 159989
+26804 159960
+14595 159957
+14730 159947
+32760 159941
+32078 159907
+13460 159893
+21425 159882
+26618 159858
+32879 159838
+12592 159836
+24832 159811
+15536 159796
+16537 159787
+27828 159774
+18857 159772
+14483 159740
+25054 159712
+9586 159707
+20096 159707
+17079 159702
+14370 159698
+19207 159671
+26824 159630
+33139 159614
+25172 159602
+13686 159592
+40289 159583
+21838 159580
+18128 159571
+14733 159563
+31292 159553
+18139 159513
+44066 159510
+25165 159492
+14371 159474
+17255 159432
+5379 159426
+4119 159418
+15250 159397
+12041 159368
+39463 159338
+14862 159315
+22140 159290
+7769 159278
+8629 159228
+29714 159198
+28855 159198
+14484 159181
+16255 159158
+22945 159156
+9875 159043
+12188 159039
+12877 158992
+33605 158976
+41449 158974
+11212 158974
+32688 158957
+13455 158950
+17638 158947
+8531 158908
+21193 158855
+7220 158842
+3875 158831
+6017 158800
+9724 158796
+13808 158790
+21798 158768
+16569 158740
+8487 158725
+12891 158721
+16169 158668
+50079 158664
+14662 158663
+9565 158660
+26070 158632
+16715 158620
+25982 158612
+14921 158609
+24190 158591
+16304 158587
+16561 158553
+17052 158522
+17590 158510
+2012 158490
+40635 158487
+14193 158454
+16597 158446
+29200 158411
+25768 158388
+16599 158346
+22283 158342
+14190 158305
+17984 158291
+11368 158289
+894 158285
+8726 158253
+13271 158252
+38818 158242
+15909 158213
+15279 158211
+19987 158186
+14029 158156
+2967 158148
+7597 158145
+11282 158119
+30334 158112
+1819 158040
+7211 158026
+8804 158002
+24152 157998
+9863 157974
+15124 157954
+12189 157895
+31507 157885
+10381 157862
+20551 157853
+35293 157847
+8664 157839
+15828 157836
+17114 157813
+15585 157797
+32317 157785
+21141 157733
+29444 157718
+13570 157718
+11352 157674
+933 157613
+13222 157602
+14310 157588
+12929 157586
+18017 157577
+15647 157572
+28278 157566
+14540 157562
+9576 157531
+10599 157529
+11140 157502
+25936 157484
+39637 157468
+5684 157467
+20997 157452
+23115 157437
+18108 157436
+20705 157426
+21457 157387
+16511 157373
+28564 157373
+23420 157353
+13786 157351
+10284 157333
+11723 157333
+24005 157315
+10919 157293
+27874 157280
+16548 157274
+9995 157264
+13141 157253
+15907 157235
+12571 157218
+2247 157218
+28182 157209
+14121 157200
+1552 157195
+7400 157179
+15747 157150
+10188 157144
+13079 157126
+29443 157105
+22605 157085
+14638 157043
+22722 157041
+38061 157031
+19702 156983
+24713 156960
+29600 156910
+27622 156908
+23811 156882
+18612 156867
+29330 156804
+31071 156786
+18340 156752
+26962 156752
+16460 156724
+20126 156706
+5275 156655
+20342 156637
+18259 156614
+888 156611
+15816 156610
+30405 156596
+10742 156594
+26068 156592
+12572 156545
+23980 156530
+18468 156522
+6256 156514
+18504 156499
+4020 156492
+22323 156483
+10685 156482
+18752 156458
+17268 156414
+13788 156414
+5083 156407
+31472 156400
+2908 156375
+26807 156369
+24142 156367
+22571 156353
+11060 156348
+17093 156298
+20642 156289
+19551 156285
+3780 156274
+19495 156268
+800 156267
+20603 156263
+18147 156244
+35151 156227
+18385 156178
+18684 156168
+20601 156162
+8553 156147
+14028 156145
+46279 156133
+16456 156127
+7913 156073
+17066 156071
+12692 156046
+25978 156013
+14173 156010
+25299 156002
+23519 155994
+28335 155959
+20157 155953
+5677 155929
+3971 155902
+12267 155878
+3309 155863
+20933 155816
+24296 155768
+5821 155741
+18236 155729
+17893 155698
+17012 155685
+15902 155658
+20051 155639
+16395 155631
+25688 155605
+11711 155597
+22455 155571
+22095 155561
+18590 155536
+28052 155527
+43856 155456
+25109 155429
+19414 155419
+23546 155417
+25296 155396
+15021 155380
+8624 155360
+34338 155326
+48543 155324
+18264 155290
+2285 155264
+34139 155261
+17920 155238
+25201 155154
+12279 155145
+16075 155111
+14818 155107
+16318 155106
+16397 155078
+37244 155044
+23472 155043
+29220 155029
+11501 155027
+18446 154973
+3319 154963
+18180 154948
+15101 154935
+18105 154933
+16740 154932
+8960 154919
+17616 154899
+28280 154889
+13521 154875
+10583 154855
+7071 154845
+12326 154824
+16024 154804
+21292 154782
+1047 154750
+11188 154727
+18223 154723
+12768 154698
+13391 154683
+11948 154679
+28354 154643
+12105 154624
+24833 154599
+12179 154573
+15399 154535
+17089 154509
+14687 154465
+2374 154450
+10248 154438
+15723 154435
+15621 154410
+630 154398
+14809 154370
+17722 154361
+23424 154352
+14088 154336
+2429 154321
+14275 154316
+19026 154282
+7493 154278
+32948 154265
+3733 154263
+20270 154229
+15830 154223
+19602 154195
+6517 154179
+17989 154171
+46668 154162
+19068 154157
+10285 154148
+30223 154138
+17981 154123
+24545 154116
+8241 154084
+6404 154027
+1185 154019
+4227 154018
+14208 153999
+16811 153998
+18867 153997
+16755 153996
+36431 153984
+25613 153954
+19481 153934
+13435 153913
+34112 153902
+22676 153889
+11096 153830
+18920 153800
+40113 153775
+19447 153772
+32440 153762
+23632 153760
+27052 153749
+11945 153742
+26091 153725
+15718 153715
+31736 153713
+21821 153700
+20153 153693
+1553 153661
+24300 153645
+12395 153620
+11800 153610
+24859 153584
+31318 153521
+17254 153482
+7573 153476
+255 153473
+25230 153472
+24625 153458
+9878 153450
+10114 153435
+21400 153433
+14884 153380
+26918 153347
+12370 153341
+18432 153335
+16459 153330
+26023 153308
+8118 153298
+49355 153294
+25846 153268
+6005 153259
+34910 153259
+6392 153251
+44828 153232
+17530 153226
+20462 153208
+15867 153204
+10548 153174
+18342 153154
+24135 153137
+11335 153129
+15571 153117
+22970 153096
+16507 153049
+14440 153036
+2764 153035
+23533 153022
+8681 153019
+16195 152986
+4507 152962
+8613 152961
+14045 152955
+26326 152952
+4250 152929
+12958 152900
+29825 152864
+19830 152848
+23265 152843
+36863 152777
+31110 152776
+20670 152773
+14580 152733
+13707 152713
+28397 152701
+21820 152700
+14640 152687
+9380 152661
+10635 152624
+14814 152596
+16356 152522
+11246 152502
+40001 152483
+23013 152432
+25316 152418
+27724 152407
+25018 152406
+15628 152383
+35637 152371
+20424 152336
+18269 152305
+13172 152300
+2143 152274
+47633 152263
+3411 152256
+1190 152228
+21827 152218
+16642 152203
+22205 152196
+13148 152180
+4230 152159
+15551 152108
+8002 152108
+15226 152070
+11261 152055
+11705 152032
+20240 152015
+23918 151954
+31625 151951
+21028 151947
+43566 151927
+2341 151908
+19935 151900
+12863 151899
+32958 151881
+15691 151875
+16887 151864
+16963 151863
+14338 151859
+27194 151857
+26909 151855
+19999 151840
+6871 151824
+14593 151815
+26716 151771
+13899 151760
+20753 151700
+36095 151697
+15298 151683
+32496 151606
+2865 151590
+17809 151567
+29797 151566
+36744 151544
+22368 151535
+12271 151498
+19632 151465
+9356 151455
+20583 151415
+17061 151403
+17781 151390
+24968 151389
+41461 151377
+22877 151362
+4906 151313
+13667 151309
+27283 151288
+31764 151280
+18648 151258
+47286 151250
+31918 151179
+16277 151130
+19519 151110
+19931 151099
+21434 151094
+35385 151073
+21808 151068
+4127 151053
+30112 151050
+29402 151046
+17383 151039
+3279 151028
+41727 151001
+34549 150992
+7141 150988
+16001 150978
+12740 150977
+19013 150919
+2317 150903
+13616 150870
+19829 150866
+26544 150859
+23494 150829
+11793 150822
+5678 150789
+21346 150787
+6839 150781
+16612 150779
+2859 150761
+16133 150748
+25462 150743
+25175 150735
+29222 150706
+18266 150702
+15557 150701
+24413 150649
+17378 150591
+14714 150581
+1868 150571
+4189 150541
+2747 150526
+14470 150516
+26960 150510
+12209 150502
+1685 150501
+6880 150485
+18165 150479
+14383 150433
+4561 150430
+12804 150411
+48298 150408
+24766 150396
+22964 150364
+17837 150349
+28792 150347
+4160 150339
+3874 150327
+13113 150326
+9654 150323
+13858 150312
+13476 150291
+13000 150289
+19443 150285
+7856 150248
+20631 150223
+23663 150219
+36878 150211
+4356 150206
+12005 150181
+5061 150174
+17292 150167
+37929 150165
+1174 150149
+7100 150145
+12119 150121
+19934 150114
+25911 150114
+26985 150114
+18551 150112
+10979 150111
+6475 150106
+31063 150073
+23008 150064
+11198 150059
+19131 150055
+20156 150033
+14073 150025
+18140 150022
+14394 150016
+3438 150003
+12027 149972
+20774 149888
+21126 149879
+19097 149868
+10345 149816
+41012 149812
+14438 149801
+18553 149773
+16954 149767
+21423 149719
+32519 149709
+29074 149707
+8784 149698
+2963 149683
+13026 149678
+23052 149660
+28242 149649
+16677 149632
+13599 149629
+36692 149596
+18550 149593
+3351 149580
+3711 149539
+26454 149533
+18916 149510
+23531 149481
+17467 149475
+11207 149458
+11710 149431
+10720 149413
+3629 149411
+18315 149407
+28649 149298
+7490 149297
+43484 149267
+19315 149251
+20561 149234
+12681 149204
+31460 149200
+11707 149196
+19278 149166
+13951 149157
+25452 149155
+2898 149145
+16472 149134
+20276 149129
+16889 149126
+23786 149104
+14799 149093
+13326 149084
+37361 149083
+18499 149066
+29418 149057
+2668 149055
+23902 149051
+12839 149015
+29806 148976
+24380 148923
+28108 148871
+32939 148870
+14497 148862
+5623 148850
+17730 148845
+15635 148837
+44508 148835
+20077 148829
+11499 148813
+14271 148813
+22191 148798
+9737 148798
+16960 148777
+32682 148773
+31630 148764
+35231 148749
+21039 148721
+43841 148710
+31248 148693
+25313 148685
+25043 148683
+21112 148627
+20289 148622
+18811 148620
+16265 148608
+7655 148578
+9045 148574
+15712 148554
+26039 148540
+22264 148525
+13869 148513
+14031 148505
+8392 148502
+34783 148499
+24536 148490
+33110 148480
+21380 148469
+26686 148438
+9695 148399
+15765 148398
+10586 148396
+9834 148374
+14363 148368
+42189 148367
+8297 148330
+42235 148326
+24163 148323
+21511 148301
+13539 148287
+16259 148284
+22841 148280
+34655 148279
+13639 148270
+1873 148220
+236 148220
+28791 148173
+16981 148158
+31002 148157
+19666 148135
+17609 148080
+11527 148078
+16938 148071
+19649 148013
+961 147986
+7673 147977
+15143 147964
+14583 147917
+13645 147910
+12590 147909
+18724 147905
+27502 147901
+23942 147874
+11428 147867
+4740 147865
+27609 147860
+16425 147859
+20731 147848
+29640 147844
+3143 147820
+23852 147767
+10348 147753
+4777 147752
+11930 147739
+16904 147704
+2307 147703
+17498 147652
+12791 147635
+10107 147624
+29751 147606
+16406 147605
+12017 147592
+17625 147562
+9382 147562
+14351 147554
+22244 147533
+5725 147520
+2655 147490
+23655 147473
+21530 147408
+12657 147405
+4879 147395
+22075 147383
+21685 147355
+22786 147242
+21420 147225
+15794 147218
+3577 147175
+9536 147160
+35193 147093
+27348 147076
+17198 147049
+8615 147031
+24037 147017
+6532 147016
+25289 146998
+10897 146965
+156 146955
+39920 146925
+22647 146921
+10148 146909
+10891 146887
+14041 146872
+18143 146867
+15460 146865
+5518 146864
+2917 146864
+17123 146801
+23600 146765
+10582 146703
+23240 146699
+17502 146680
+26943 146669
+7488 146654
+14282 146636
+21161 146625
+22879 146622
+14176 146611
+28250 146596
+18654 146583
+20715 146561
+11056 146533
+16645 146525
+40160 146513
+608 146508
+13770 146488
+16830 146483
+21987 146426
+34162 146377
+14136 146323
+20505 146305
+12741 146284
+13530 146275
+27250 146260
+32489 146242
+9929 146232
+34183 146216
+27775 146212
+21644 146198
+11530 146177
+6309 146169
+19496 146142
+20047 146103
+16521 146087
+22926 146036
+3228 146019
+15108 145976
+27879 145951
+15921 145950
+18818 145948
+11379 145936
+28259 145928
+6051 145927
+27886 145925
+21919 145900
+27771 145884
+19002 145869
+28565 145863
+10229 145861
+19480 145857
+7120 145846
+1901 145839
+16470 145810
+10456 145793
+25464 145789
+11895 145789
+19597 145785
+8769 145767
+24787 145747
+35764 145738
+34116 145734
+36040 145708
+13884 145698
+29285 145693
+29484 145690
+11916 145681
+20244 145661
+26899 145652
+8739 145638
+16231 145626
+11767 145621
+27334 145619
+39823 145614
+24193 145602
+26249 145596
+31774 145592
+12889 145583
+15933 145566
+26739 145565
+48974 145558
+5146 145504
+19383 145482
+22772 145462
+18379 145458
+18695 145447
+20874 145411
+10593 145401
+37720 145373
+19783 145319
+15085 145314
+9297 145310
+17316 145293
+18522 145290
+18785 145276
+63 145276
+6229 145255
+21733 145255
+12479 145254
+23186 145227
+14489 145197
+10752 145179
+39375 145173
+31636 145170
+35597 145170
+39632 145167
+18295 145138
+41924 145119
+14448 145118
+28458 145088
+20671 145086
+10602 145079
+18595 145063
+15190 145062
+19346 145051
+11179 145035
+16216 145026
+15941 144995
+11741 144989
+9261 144984
+18094 144969
+25479 144862
+26136 144855
+27229 144848
+7761 144848
+2973 144807
+17160 144798
+24248 144764
+7200 144739
+22375 144732
+23356 144732
+14361 144729
+21057 144721
+18508 144691
+4817 144670
+9598 144624
+12709 144620
+11108 144598
+6423 144592
+14097 144587
+15822 144585
+14653 144550
+20813 144539
+21541 144521
+26242 144514
+37440 144509
+4114 144509
+13375 144474
+18575 144446
+3222 144446
+10783 144443
+24105 144434
+18283 144432
+10197 144427
+14349 144424
+38813 144413
+39249 144396
+6724 144389
+490 144388
+32452 144361
+7068 144353
+19171 144331
+26154 144327
+22336 144323
+9866 144305
+22108 144292
+6468 144287
+15825 144282
+23702 144255
+6988 144243
+10730 144241
+28414 144241
+17276 144230
+14983 144229
+14864 144214
+26348 144200
+1843 144196
+25357 144180
+14180 144170
+18662 144158
+24067 144156
+14699 144154
+10924 144126
+20162 144107
+22038 144094
+14724 144049
+27633 144028
+8310 143966
+19050 143962
+17411 143945
+2299 143915
+1617 143906
+26372 143898
+17416 143870
+37072 143867
+879 143850
+21624 143840
+6790 143798
+18750 143787
+20672 143775
+3157 143711
+2386 143672
+13143 143671
+21641 143653
+4559 143641
+26134 143618
+15807 143611
+40990 143519
+17206 143512
+12481 143506
+8510 143503
+12495 143490
+10227 143482
+20576 143478
+28379 143414
+15904 143411
+14871 143409
+12153 143373
+24194 143336
+21272 143326
+11700 143323
+19000 143288
+17004 143282
+12683 143277
+6499 143264
+20250 143258
+22543 143237
+17835 143236
+21337 143227
+36888 143176
+8524 143165
+20600 143162
+13186 143159
+5420 143155
+38488 143155
+35295 143153
+26935 143119
+29737 143113
+12290 143106
+6072 143102
+35399 143098
+15050 143097
+19544 143080
+21813 143026
+22371 143025
+13024 143020
+13529 143006
+12719 143001
+8756 142996
+26390 142984
+39099 142967
+13063 142945
+21024 142943
+15455 142914
+6574 142910
+8299 142910
+35136 142909
+22736 142908
+25647 142902
+16352 142896
+18573 142873
+2593 142862
+23312 142838
+36861 142830
+14169 142814
+7409 142806
+9756 142803
+28238 142793
+15949 142780
+24779 142717
+11894 142698
+37911 142687
+5450 142670
+8322 142664
+15133 142663
+22004 142624
+14252 142623
+16291 142622
+19996 142485
+21542 142483
+12237 142454
+22814 142400
+14323 142388
+6048 142372
+24946 142362
+16567 142345
+26046 142335
+12817 142314
+20805 142310
+7251 142296
+33338 142294
+24086 142293
+20461 142280
+2322 142269
+43812 142263
+12312 142254
+9582 142243
+38134 142234
+26907 142226
+16218 142210
+38671 142196
+13002 142196
+4826 142194
+14947 142188
+30836 142187
+10288 142181
+15175 142177
+15743 142174
+8228 142163
+17162 142134
+13005 142118
+31721 142116
+36139 142113
+10128 142107
+9688 142074
+43941 142063
+23773 142057
+14498 142033
+24740 141961
+15644 141960
+30633 141944
+15875 141934
+21114 141929
+13127 141927
+15504 141914
+8373 141897
+18969 141895
+20345 141876
+15234 141835
+8344 141831
+15043 141803
+19877 141769
+13690 141755
+17648 141754
+9285 141750
+12777 141739
+20493 141725
+33876 141723
+20175 141717
+22151 141698
+48828 141689
+25665 141686
+1259 141670
+20698 141670
+14010 141664
+28330 141659
+15349 141650
+5945 141623
+30801 141606
+26414 141595
+17800 141591
+998 141574
+16832 141552
+16492 141540
+27153 141534
+16399 141504
+13444 141490
+19848 141477
+28291 141476
+40253 141467
+21735 141446
+11685 141439
+39304 141433
+17687 141365
+11503 141351
+19534 141326
+18244 141322
+19487 141311
+15985 141281
+31612 141266
+13368 141262
+11290 141261
+10230 141260
+7657 141247
+18213 141238
+14996 141232
+23627 141202
+20075 141173
+5843 141143
+25333 141137
+22862 141121
+10477 141085
+22432 141070
+19425 141064
+20392 141053
+16361 141043
+23407 141014
+40624 141014
+27267 140988
+12258 140965
+18605 140949
+33020 140940
+28553 140933
+20974 140924
+14296 140899
+46935 140878
+2511 140811
+19437 140786
+21107 140783
+18591 140782
+6811 140766
+12243 140763
+1349 140737
+10974 140709
+5151 140698
+12748 140677
+25621 140661
+19263 140656
+16061 140651
+4432 140640
+31658 140637
+40060 140636
+19384 140632
+20814 140631
+25142 140619
+19301 140587
+23198 140585
+14131 140572
+7435 140568
+27515 140552
+15248 140540
+24581 140504
+25657 140498
+8402 140492
+15421 140465
+5720 140454
+17977 140454
+11471 140450
+24851 140420
+25308 140397
+13370 140392
+19479 140354
+22462 140350
+46000 140325
+29759 140315
+12964 140307
+11211 140304
+1197 140274
+20172 140272
+11510 140237
+17087 140207
+17897 140176
+17315 140174
+22064 140170
+37758 140169
+8608 140168
+8105 140165
+27247 140162
+12899 140156
+14436 140154
+20013 140149
+32134 140148
+10998 140138
+23354 140134
+26971 140127
+20327 140124
+26773 140115
+34982 140110
+20286 140083
+19313 140048
+23608 140032
+12505 140012
+27593 139969
+32025 139966
+19347 139957
+13365 139945
+13970 139935
+22452 139930
+8133 139915
+26445 139911
+12967 139876
+15134 139872
+9597 139860
+17107 139855
+19253 139841
+36895 139840
+2703 139835
+12029 139834
+40713 139815
+29269 139809
+17810 139778
+13517 139757
+16543 139737
+16502 139714
+14773 139695
+5351 139685
+12623 139663
+35363 139642
+27501 139623
+4715 139600
+3129 139597
+9245 139595
+9463 139588
+27885 139587
+17965 139583
+19132 139552
+13996 139549
+17282 139532
+15658 139528
+13319 139474
+5958 139469
+22218 139457
+25332 139452
+36779 139435
+22211 139423
+3379 139419
+6921 139416
+9234 139404
+17902 139396
+13288 139395
+23486 139371
+26479 139365
+37135 139360
+18371 139338
+7870 139292
+22597 139285
+26812 139273
+38704 139262
+14170 139260
+41708 139256
+19192 139241
+25003 139237
+2082 139236
+33330 139203
+39818 139192
+34105 139171
+25951 139164
+40020 139161
+21493 139131
+14128 139128
+9303 139119
+8103 139094
+14555 139070
+14156 139032
+22078 139012
+3266 138995
+14680 138963
+41941 138960
+18929 138948
+7938 138938
+17144 138922
+5724 138921
+18764 138920
+18915 138877
+10970 138844
+6255 138824
+19092 138812
+23233 138772
+16526 138749
+18657 138723
+9012 138692
+11580 138689
+16723 138684
+3144 138649
+754 138649
+5005 138632
+13012 138619
+944 138584
+10814 138583
+18560 138535
+17858 138511
+5751 138507
+29467 138495
+10354 138474
+10735 138452
+23019 138452
+29374 138452
+39946 138446
+22954 138429
+20127 138406
+15068 138388
+32644 138345
+31518 138313
+25912 138299
+24349 138293
+19989 138283
+12261 138276
+19494 138260
+28220 138234
+14546 138229
+26508 138228
+13706 138227
+21432 138224
+30093 138195
+18997 138186
+18935 138185
+19009 138150
+9596 138085
+21980 138078
+22301 138067
+21150 138016
+30311 138013
+28395 138002
+1145 137982
+17415 137982
+11892 137980
+48216 137923
+25448 137913
+14482 137889
+18965 137887
+17825 137879
+18523 137867
+11196 137844
+6850 137838
+19036 137832
+8516 137831
+10736 137823
+15686 137814
+8352 137792
+17324 137789
+25204 137788
+30315 137776
+25320 137772
+26874 137734
+13159 137723
+32369 137717
+13817 137699
+34673 137683
+9478 137680
+32646 137661
+21904 137649
+19789 137636
+25091 137583
+16700 137566
+21768 137552
+13066 137528
+23266 137523
+13893 137487
+12016 137481
+12323 137456
+25583 137455
+23300 137453
+15696 137327
+12855 137310
+49219 137299
+8151 137294
+16908 137290
+8439 137277
+6037 137276
+15285 137271
+27265 137223
+14769 137218
+15532 137198
+13763 137185
+30479 137177
+20524 137161
+28356 137159
+8575 137140
+12850 137138
+16045 137125
+27678 137117
+23226 137112
+10696 137107
+16391 137106
+20463 137105
+14743 137089
+15963 137075
+18138 137041
+31335 137033
+33709 137028
+8753 137024
+18310 136991
+8249 136987
+18116 136975
+19228 136973
+24009 136969
+21348 136945
+9997 136892
+35521 136858
+30260 136849
+20902 136836
+22641 136817
+5327 136805
+17073 136787
+13390 136757
+15934 136751
+35109 136719
+25974 136715
+24050 136706
+18903 136670
+37173 136634
+1652 136609
+23940 136579
+4223 136573
+26347 136572
+16162 136554
+16380 136548
+11851 136525
+14937 136515
+19161 136465
+22043 136462
+20849 136448
+31745 136448
+13818 136435
+23956 136431
+47144 136428
+34974 136423
+13801 136409
+22053 136378
+2525 136377
+6210 136367
+31540 136361
+1293 136354
+13993 136347
+14494 136319
+5259 136294
+4818 136274
+17864 136261
+24828 136253
+19674 136251
+25143 136242
+26913 136241
+11412 136220
+3810 136187
+24463 136173
+47962 136161
+18838 136132
+15657 136132
+34891 136078
+18302 136078
+29377 136051
+21349 136043
+28650 136013
+5088 136008
+18250 135991
+21691 135968
+15430 135949
+2654 135947
+44830 135945
+8520 135934
+12429 135919
+19888 135915
+17813 135910
+15827 135906
+22844 135900
+20434 135895
+20201 135887
+15554 135886
+12884 135878
+13859 135821
+827 135794
+27022 135785
+5973 135784
+26721 135779
+20170 135770
+19670 135770
+10892 135733
+15028 135730
+30928 135729
+9537 135713
+16771 135662
+4686 135657
+17899 135653
+7338 135653
+9742 135638
+8874 135624
+25067 135612
+2401 135602
+18644 135580
+4915 135574
+23727 135562
+15201 135534
+5746 135506
+21829 135459
+21040 135442
+17173 135442
+15768 135440
+27139 135425
+23790 135421
+16808 135408
+32722 135364
+31941 135358
+17991 135358
+9996 135356
+9360 135346
+14754 135332
+23049 135312
+18449 135257
+17153 135236
+2470 135234
+12956 135230
+1725 135221
+41107 135218
+16461 135211
+18860 135206
+15651 135184
+19110 135179
+15643 135177
+14149 135109
+16337 135106
+39897 135092
+12693 135089
+26649 135084
+11389 135075
+9145 135074
+26346 135068
+27443 135067
+22798 135063
+8377 135055
+34698 135017
+32250 135009
+24002 135005
+22663 135003
+19696 135001
+27698 134997
+21803 134967
+32108 134952
+18582 134939
+17101 134938
+32641 134906
+16038 134906
+9712 134898
+32462 134885
+15899 134884
+15468 134883
+15374 134876
+12642 134866
+22304 134858
+44133 134834
+15222 134777
+22001 134772
+29760 134753
+24553 134748
+35706 134738
+18715 134725
+9525 134724
+14716 134703
+22089 134687
+46754 134683
+9423 134677
+37118 134677
+25041 134658
+29995 134646
+24585 134646
+7340 134641
+10984 134634
+17387 134623
+32076 134607
+31178 134594
+26542 134575
+3672 134564
+5773 134545
+23006 134542
+24959 134532
+10154 134529
+22534 134525
+14979 134521
+21246 134516
+26376 134513
+12364 134511
+20725 134496
+18412 134495
+22080 134493
+7066 134484
+19412 134469
+7821 134443
+25518 134438
+24023 134427
+13540 134402
+45826 134397
+13843 134350
+17849 134341
+41992 134319
+5434 134314
+16226 134302
+24301 134299
+13742 134277
+17843 134272
+15915 134271
+33693 134260
+31958 134254
+26601 134245
+21500 134191
+33545 134161
+13213 134121
+48922 134117
+17979 134103
+11346 134093
+14723 134054
+18705 134045
+18008 134043
+19416 134036
+33167 134027
+33666 134023
+23043 133985
+8772 133973
+12440 133968
+39419 133931
+19403 133911
+25426 133910
+4454 133866
+22707 133862
+20865 133845
+26778 133839
+36538 133814
+19716 133802
+6362 133794
+32944 133782
+24017 133778
+10659 133777
+20824 133736
+37539 133714
+29771 133710
+6449 133687
+3044 133682
+19497 133674
+18081 133662
+16660 133646
+3448 133633
+18700 133633
+38735 133626
+7594 133617
+14335 133600
+12015 133581
+5460 133580
+20818 133557
+14198 133555
+20001 133546
+10396 133540
+21350 133521
+22864 133484
+33953 133448
+17534 133432
+29638 133397
+18383 133386
+7241 133382
+43630 133346
+25160 133310
+5356 133302
+24534 133271
+21660 133251
+23140 133242
+19600 133234
+17239 133213
+13559 133210
+34944 133196
+17819 133125
+24722 133121
+34509 133118
+7737 133116
+23225 133089
+12957 133079
+19158 133031
+11215 133028
+35004 133010
+8491 132998
+30461 132990
+38599 132981
+17762 132978
+21277 132978
+9587 132973
+4828 132970
+16572 132957
+27803 132945
+20155 132912
+20333 132911
+18805 132907
+1732 132894
+18258 132883
+13601 132867
+17330 132854
+806 132852
+25463 132829
+15475 132816
+24803 132814
+32074 132808
+12078 132797
+16178 132792
+18552 132780
+33362 132778
+23178 132778
+21811 132771
+20030 132746
+34747 132706
+31827 132697
+11881 132687
+7518 132678
+14985 132669
+46800 132651
+22662 132640
+31307 132614
+28231 132609
+14664 132607
+19978 132601
+10794 132574
+32897 132545
+12115 132544
+30758 132541
+27388 132532
+28894 132530
+29066 132508
+18031 132440
+5321 132440
+31156 132418
+25130 132405
+34241 132380
+22255 132338
+18252 132308
+23548 132307
+35300 132300
+26264 132293
+22403 132289
+21358 132287
+27324 132257
+35331 132241
+19623 132232
+3444 132224
+39608 132215
+34995 132211
+7245 132211
+13332 132205
+26383 132203
+19869 132142
+34802 132112
+21419 132108
+12945 132088
+23033 132074
+14352 132073
+20545 132063
+37698 132057
+18132 132029
+13364 132018
+24199 132011
+12249 132005
+21788 132003
+26459 131986
+18585 131981
+14145 131961
+15025 131951
+8094 131944
+5056 131936
+8496 131930
+7951 131926
+32464 131913
+21572 131899
+23890 131893
+31187 131878
+27094 131874
+14775 131870
+16164 131870
+44070 131862
+935 131858
+22892 131842
+33127 131841
+18535 131830
+18045 131822
+17985 131820
+27028 131794
+10990 131793
+233 131755
+1032 131748
+12810 131670
+20295 131661
+7003 131644
+13531 131639
+19283 131633
+19521 131629
+10883 131607
+3102 131603
+23310 131602
+28972 131585
+5165 131579
+17885 131576
+12185 131569
+22688 131533
+23630 131530
+6472 131510
+20857 131502
+10840 131472
+13357 131449
+16131 131445
+19305 131438
+16157 131430
+32188 131412
+19406 131393
+18030 131387
+4016 131369
+16617 131368
+18514 131367
+25953 131334
+13333 131318
+23023 131299
+24315 131296
+15195 131293
+26735 131288
+13793 131276
+26608 131252
+19280 131233
+32737 131230
+5976 131228
+28132 131197
+25623 131179
+27663 131158
+18661 131145
+20152 131143
+15687 131126
+19850 131110
+6783 131085
+13140 131080
+32382 131072
+12809 131070
+1454 131069
+13496 131061
+23342 131057
+28223 131024
+19141 131022
+16924 131003
+15799 130966
+15943 130965
+19069 130964
+38768 130960
+30680 130916
+20230 130906
+22447 130893
+20220 130885
+23641 130885
+18257 130869
+20061 130868
+29755 130867
+13798 130850
+11340 130838
+8379 130835
+9164 130831
+23254 130822
+31592 130815
+31036 130808
+23411 130797
+16746 130783
+19533 130778
+18600 130768
+18192 130748
+5669 130742
+19189 130734
+17419 130704
+18056 130682
+24424 130681
+12854 130680
+19410 130662
+5872 130645
+16784 130638
+13108 130617
+23885 130606
+27779 130603
+14811 130593
+18579 130591
+15495 130558
+20607 130555
+10076 130554
+23366 130553
+18832 130552
+11982 130540
+22126 130538
+30717 130536
+14181 130527
+15064 130516
+31585 130506
+25564 130505
+29872 130490
+26167 130479
+19066 130476
+21922 130471
+8483 130441
+10389 130430
+26338 130402
+8881 130361
+9662 130356
+11817 130350
+16139 130336
+23659 130331
+3291 130328
+10203 130318
+2833 130283
+24310 130243
+29717 130191
+11028 130165
+14760 130155
+10992 130152
+10449 130128
+40518 130127
+13981 130093
+22573 130086
+6197 130086
+37478 130063
+19086 130054
+31311 130029
+49368 130024
+14795 129994
+19386 129991
+16217 129979
+24737 129960
+9543 129951
+45091 129944
+25476 129943
+10978 129933
+5910 129924
+30858 129910
+16043 129863
+21568 129860
+23782 129837
+13256 129807
+8410 129804
+24479 129787
+28236 129781
+34540 129770
+21332 129767
+37415 129704
+12530 129690
+29045 129676
+9052 129673
+19255 129652
+15917 129642
+17261 129624
+24083 129604
+17706 129598
+13597 129594
+30629 129587
+16523 129579
+17767 129570
+34285 129568
+39873 129554
+20623 129536
+17940 129530
+13519 129530
+27583 129529
+15762 129522
+3803 129499
+36401 129483
+30825 129482
+26106 129455
+2960 129446
+28559 129444
+19993 129436
+27209 129405
+6187 129401
+29364 129394
+18577 129378
+22310 129377
+18029 129363
+25556 129363
+32802 129362
+11364 129341
+16958 129340
+17677 129312
+24554 129301
+28150 129243
+37376 129209
+34564 129195
+13825 129189
+4239 129188
+28793 129168
+28319 129154
+10153 129150
+22086 129150
+17317 129150
+14806 129144
+6799 129137
+15577 129112
+23319 129096
+20833 129088
+9410 129056
+13242 129047
+11674 129039
+23704 129017
+11992 129017
+13829 129014
+11216 129009
+29029 128983
+19308 128976
+32408 128968
+11319 128936
+16379 128905
+2871 128865
+16711 128854
+19695 128852
+10214 128837
+10411 128813
+3299 128802
+7087 128781
+15502 128780
+21509 128778
+12469 128777
+14211 128776
+13160 128763
+21076 128759
+30321 128753
+12414 128727
+27918 128698
+30258 128698
+27179 128690
+18873 128688
+16026 128688
+20622 128666
+6429 128658
+15666 128641
+20775 128611
+14722 128607
+20749 128607
+26690 128554
+44723 128548
+18482 128544
+22176 128537
+13794 128537
+34497 128533
+28080 128527
+21320 128520
+14027 128499
+13235 128496
+15275 128492
+21454 128488
+19173 128447
+16976 128441
+15923 128440
+10350 128420
+21543 128354
+17919 128353
+28992 128338
+26617 128329
+14828 128323
+20239 128323
+16257 128293
+14745 128239
+17577 128234
+33487 128231
+6605 128229
+2086 128196
+17795 128182
+11960 128158
+5763 128145
+20691 128128
+9417 128124
+24535 128114
+11848 128102
+29470 128090
+2349 128089
+19832 128082
+14130 128060
+18306 128059
+28931 128054
+14991 128049
+16464 128042
+13906 128036
+13964 128032
+24999 128027
+21608 128023
+22836 128019
+34778 128017
+1224 128012
+8156 127986
+41530 127975
+8255 127963
+30511 127955
+16463 127953
+31802 127951
+20897 127935
+20027 127933
+18447 127917
+15847 127899
+24671 127878
+4161 127872
+20726 127850
+13698 127830
+20159 127826
+7354 127810
+36399 127809
+21120 127800
+11423 127791
+24093 127773
+35482 127769
+13424 127768
+29056 127739
+16967 127728
+3855 127719
+31329 127712
+13123 127695
+31970 127686
+22198 127638
+9449 127632
+20883 127616
+26626 127615
+9437 127588
+24660 127570
+25619 127569
+16481 127568
+16479 127562
+9367 127550
+25615 127547
+14963 127542
+24126 127539
+10838 127539
+20069 127526
+2388 127526
+22821 127519
+19546 127503
+25187 127487
+6437 127487
+29328 127462
+3817 127422
+24841 127416
+11959 127411
+21381 127405
+12685 127401
+29276 127377
+12265 127352
+17435 127348
+15061 127330
+1669 127328
+14292 127320
+17713 127315
+19063 127307
+19157 127281
+8351 127277
+19264 127255
+17757 127245
+11752 127207
+14553 127192
+20188 127170
+30088 127157
+15569 127152
+22424 127141
+7191 127137
+4551 127129
+21257 127127
+31100 127127
+9267 127119
+28181 127109
+14658 127089
+17603 127087
+8851 127082
+11976 127069
+26089 127068
+14976 127059
+3868 127049
+1226 127040
+20116 127039
+16697 127037
+32426 127029
+18282 127021
+8158 127004
+26044 127003
+12176 126996
+17361 126991
+23465 126976
+34283 126916
+24110 126892
+26482 126883
+14798 126866
+36011 126836
+11698 126823
+7736 126779
+28199 126777
+18359 126777
+33157 126772
+18057 126766
+5344 126749
+29852 126727
+8426 126721
+27466 126715
+14460 126712
+3930 126684
+14582 126678
+17865 126674
+8176 126622
+31325 126613
+12128 126609
+39998 126599
+17039 126588
+15325 126572
+15700 126538
+17789 126511
+20229 126490
+27082 126490
+24173 126455
+47417 126452
+21049 126376
+12416 126374
+234 126350
+9608 126318
+10723 126314
+6344 126314
+25693 126308
+27994 126292
+23527 126266
+110 126256
+25438 126255
+20018 126243
+26401 126222
+17009 126214
+35398 126200
+27181 126178
+15394 126166
+22895 126160
+13399 126152
+6152 126070
+33051 126068
+30918 126061
+32300 126058
+9090 126034
+11147 126006
+20573 126005
+22162 125978
+1727 125971
+36872 125968
+31455 125964
+13459 125960
+12431 125944
+23369 125940
+20738 125923
+8992 125913
+31806 125894
+29864 125890
+12959 125889
+17867 125857
+20212 125844
+9126 125812
+21151 125803
+29822 125802
+17769 125762
+14032 125746
+12587 125739
+24872 125735
+27051 125733
+22145 125721
+21329 125709
+2343 125702
+21880 125672
+25026 125659
+15900 125632
+5241 125627
+6434 125614
+23003 125594
+13897 125543
+18051 125534
+13468 125528
+15265 125521
+27165 125510
+28024 125506
+22052 125501
+29111 125497
+26321 125484
+13755 125474
+15778 125466
+11240 125464
+30498 125461
+23754 125450
+23856 125414
+33379 125367
+14165 125365
+12749 125345
+25028 125333
+28313 125331
+16386 125299
+16282 125293
+16949 125290
+36983 125273
+16829 125258
+21631 125254
+4199 125233
+38579 125213
+18794 125199
+7940 125190
+26431 125158
+40396 125151
+18978 125148
+30126 125144
+28830 125126
+17275 125125
+20469 125107
+4666 125106
+16342 125106
+30644 125051
+21555 125045
+10480 125034
+10536 125025
+10756 125025
+12035 125019
+8056 125013
+15972 124988
+18131 124971
+13948 124971
+26728 124956
+14249 124947
+15927 124937
+44130 124928
+46880 124912
+11891 124899
+27120 124857
+17760 124849
+15756 124847
+15458 124842
+15218 124822
+44418 124816
+21899 124814
+16276 124788
+11799 124788
+28274 124781
+15423 124764
+15588 124733
+17626 124723
+35921 124719
+15613 124712
+20062 124697
+29388 124694
+24878 124664
+9150 124653
+5314 124647
+19170 124645
+25005 124640
+13527 124636
+13604 124619
+17604 124610
+4626 124572
+28085 124567
+21801 124559
+42506 124556
+17404 124554
+4716 124547
+11879 124527
+38943 124485
+15524 124457
+14035 124441
+49963 124398
+18502 124394
+34072 124393
+20249 124389
+5299 124386
+15669 124369
+27890 124349
+28293 124329
+18839 124308
+19273 124302
+43204 124276
+21027 124269
+27309 124267
+23692 124252
+18082 124252
+2659 124243
+22727 124243
+1199 124239
+19525 124239
+29897 124224
+22325 124221
+30681 124190
+11782 124180
+22376 124105
+20612 124096
+2373 124096
+27512 124091
+27320 124071
+23953 124058
+16851 124048
+21547 124045
+16408 124037
+19719 123989
+20440 123988
+24185 123978
+16402 123943
+23446 123940
+31832 123926
+25821 123920
+6535 123915
+37236 123894
+26624 123888
+32788 123883
+25408 123870
+36707 123865
+38477 123845
+17021 123842
+3760 123831
+7662 123825
+14962 123814
+30569 123795
+32251 123776
+14787 123775
+21517 123753
+8205 123745
+19874 123734
+14160 123724
+19642 123705
+15408 123694
+14357 123690
+11621 123689
+25522 123687
+26673 123684
+17639 123681
+26849 123656
+5429 123621
+27999 123612
+12852 123612
+27350 123608
+23842 123602
+1214 123579
+30895 123559
+45376 123535
+33461 123520
+21845 123504
+29072 123497
+19183 123465
+10374 123419
+25866 123418
+24405 123406
+31154 123400
+23397 123390
+16003 123386
+19444 123382
+14267 123353
+15259 123346
+19374 123341
+6999 123340
+19330 123337
+11701 123317
+18281 123284
+14476 123278
+30857 123273
+35571 123272
+19451 123265
+16030 123263
+10870 123261
+18403 123215
+17170 123207
+10921 123203
+15837 123185
+29049 123176
+17141 123172
+15368 123168
+16609 123161
+18822 123148
+16524 123143
+38396 123091
+28842 123082
+19249 123080
+34803 123075
+13300 123073
+19595 123057
+15751 123055
+23679 123044
+50196 123042
+27938 123041
+18658 122934
+5391 122903
+23121 122853
+18990 122850
+14816 122833
+12816 122817
+25016 122807
+20268 122806
+33743 122793
+28549 122781
+26640 122767
+34649 122757
+25123 122740
+27557 122730
+13532 122717
+24172 122715
+8199 122667
+18178 122650
+21683 122604
+16690 122592
+35773 122588
+25575 122564
+14276 122559
+17990 122553
+27820 122549
+32907 122517
+22901 122505
+19611 122498
+18044 122478
+15629 122450
+18774 122446
+9156 122434
+27702 122421
+17755 122419
+17064 122398
+23352 122397
+39489 122393
+33099 122372
+3345 122359
+15277 122337
+15821 122332
+28608 122328
+32664 122320
+31045 122298
+24800 122293
+30724 122286
+33391 122275
+16555 122274
+11487 122269
+27922 122268
+29866 122267
+20105 122265
+13589 122260
+11181 122223
+14520 122206
+23933 122204
+21067 122194
+17233 122177
+17333 122168
+28760 122160
+15483 122157
+18207 122151
+19178 122151
+12835 122146
+26896 122141
+14461 122113
+19111 122104
+29431 122093
+25577 122083
+22355 122083
+17105 122065
+12819 122057
+24224 122023
+15772 122005
+21331 121999
+35292 121948
+15000 121946
+16880 121939
+46289 121924
+38718 121920
+18650 121917
+2146 121912
+31574 121909
+23143 121899
+4594 121896
+11844 121885
+24789 121882
+18767 121867
+40418 121845
+29834 121842
+27975 121824
+11914 121808
+26577 121791
+26004 121781
+27862 121776
+18957 121771
+21089 121764
+19015 121750
+14114 121748
+22118 121746
+35825 121727
+22803 121723
+16062 121692
+20963 121687
+27027 121665
+15833 121661
+12840 121654
+43736 121641
+13218 121636
+19188 121623
+14084 121622
+30552 121613
+20841 121595
+7925 121592
+28331 121583
+16289 121565
+28948 121558
+30422 121546
+19518 121542
+16785 121516
+22777 121507
+27604 121474
+22436 121467
+10005 121465
+3079 121458
+25218 121457
+17814 121418
+14144 121412
+10178 121411
+12994 121402
+5813 121392
+17026 121374
+34271 121366
+7210 121366
+22499 121344
+15448 121313
+26380 121311
+16412 121301
+24072 121295
+10133 121291
+14944 121282
+29975 121282
+31770 121280
+24138 121264
+7839 121247
+16997 121228
+4821 121222
+26966 121221
+41770 121215
+4653 121195
+15814 121179
+17442 121169
+7575 121133
+14268 121123
+38779 121115
+15178 121110
+37506 121108
+31107 121078
+10835 121074
+14404 121073
+19684 121048
+12550 121038
+3622 121019
+34273 121016
+15488 121016
+16936 121013
+32054 121011
+24054 121010
+34361 120997
+37822 120990
+3147 120971
+23276 120969
+18157 120957
+10254 120946
+5377 120932
+8702 120927
+20875 120921
+14907 120915
+31004 120888
+2588 120876
+23431 120875
+28212 120873
+41624 120868
+2505 120854
+37213 120848
+15728 120840
+12136 120838
+4830 120827
+23455 120816
+15213 120805
+28323 120804
+20324 120794
+22700 120786
+11230 120775
+31446 120774
+4775 120761
+15957 120760
+24076 120758
+18696 120758
+12518 120749
+28470 120716
+28449 120707
+11244 120691
+15781 120676
+27174 120676
+14425 120666
+15187 120654
+17142 120653
+27655 120646
+16662 120643
+15674 120638
+20952 120631
+15389 120630
+22105 120580
+20606 120580
+9223 120553
+34855 120551
+28275 120508
+22835 120507
+9791 120495
+13503 120475
+3976 120460
+33925 120449
+15709 120440
+20371 120439
+21187 120396
+27470 120386
+14682 120381
+18988 120379
+30197 120370
+11985 120353
+16014 120342
+21900 120323
+16309 120301
+44568 120291
+29583 120286
+10995 120285
+20518 120284
+22249 120281
+17210 120279
+30145 120243
+21404 120234
+5591 120233
+28314 120227
+2537 120221
+19236 120189
+31388 120186
+23732 120184
+14277 120172
+27504 120153
+10337 120141
+18639 120121
+35410 120108
+16224 120100
+45284 120055
+20525 120041
+28382 120031
+16510 120026
+5730 120006
+32612 119981
+49746 119974
+22111 119950
+14712 119947
+43321 119941
+20889 119936
+22250 119924
+14711 119913
+16871 119905
+35686 119871
+19827 119866
+24816 119820
+14199 119818
+10812 119815
+32801 119798
+29875 119795
+24746 119781
+18663 119772
+25876 119743
+33845 119741
+19134 119731
+20246 119692
+15216 119688
+7465 119681
+2758 119676
+10946 119674
+12807 119659
+44083 119659
+28755 119659
+22202 119642
+16220 119634
+29713 119630
+15758 119592
+11397 119582
+31066 119571
+27008 119551
+18101 119524
+21676 119521
+9206 119499
+28822 119497
+38780 119495
+39597 119480
+24215 119476
+18422 119472
+17228 119434
+9484 119428
+12116 119426
+19205 119424
+24462 119423
+35837 119407
+25604 119399
+20444 119399
+15922 119371
+24482 119369
+17698 119363
+31769 119358
+16235 119356
+6249 119355
+19505 119342
+15334 119337
+19059 119312
+19224 119292
+6637 119277
+21638 119272
+40666 119252
+47563 119245
+36677 119243
+15231 119229
+17341 119228
+18413 119221
+19287 119197
+10314 119173
+19992 119162
+28818 119150
+13674 119110
+17318 119107
+42095 119104
+27154 119096
+37518 119076
+20195 119074
+19885 119066
+2462 119063
+14932 119055
+19254 119041
+36552 119014
+11358 118995
+33534 118974
+12163 118957
+27434 118926
+19644 118916
+27511 118900
+17231 118891
+48349 118867
+44337 118827
+3674 118802
+22976 118787
+1319 118776
+15641 118775
+18963 118772
+19047 118771
+31593 118770
+24268 118770
+24120 118766
+24410 118765
+7854 118753
+17658 118737
+12558 118726
+7669 118722
+23634 118718
+12250 118710
+31039 118696
+12711 118695
+24791 118694
+27136 118687
+33445 118674
+17137 118670
+22116 118652
+21372 118651
+24759 118644
+19637 118643
+12314 118624
+4024 118619
+19572 118616
+8106 118612
+21190 118608
+29329 118591
+6749 118583
+5522 118573
+21189 118538
+20727 118524
+35083 118523
+22591 118520
+8336 118518
+18421 118510
+8120 118508
+27478 118507
+27667 118497
+18531 118493
+7975 118489
+18455 118481
+24967 118452
+16622 118441
+16560 118434
+17856 118417
+25048 118416
+17247 118409
+13008 118399
+30169 118392
+27023 118392
+16415 118387
+28237 118383
+25884 118378
+14561 118377
+28623 118376
+1974 118374
+34468 118353
+30997 118352
+17479 118309
+15063 118308
+40188 118300
+12183 118289
+28487 118265
+39692 118254
+10850 118242
+596 118239
+24024 118178
+29010 118175
+26549 118175
+7544 118174
+14622 118141
+21030 118135
+32130 118109
+15964 118100
+10321 118085
+5406 118074
+12799 118067
+9248 118049
+19450 118047
+13353 118038
+20340 118037
+20945 118033
+26562 118027
+24817 118026
+23141 118026
+35232 118004
+20747 118003
+17988 117996
+7001 117976
+26363 117967
+32210 117957
+31842 117951
+29216 117939
+15376 117938
+16727 117927
+22475 117899
+35560 117898
+16101 117871
+14917 117868
+9782 117864
+19837 117850
+16385 117848
+9130 117810
+26768 117788
+997 117779
+6864 117774
+21837 117760
+32055 117755
+36316 117750
+18592 117740
+36606 117732
+6285 117725
+13281 117721
+36345 117709
+4704 117694
+21229 117692
+34174 117672
+3366 117657
+45695 117655
+15129 117648
+46441 117643
+41863 117632
+1555 117625
+16039 117620
+14729 117604
+6161 117600
+32224 117592
+24058 117564
+17326 117563
+15745 117548
+8952 117546
+19235 117537
+17286 117529
+38167 117528
+10601 117522
+10136 117507
+49583 117488
+18821 117476
+46154 117472
+19272 117463
+21343 117461
+27707 117457
+3669 117453
+15766 117428
+34488 117427
+21846 117412
+37851 117408
+26516 117401
+21143 117391
+18725 117376
+34438 117368
+18702 117362
+8162 117343
+25259 117340
+22868 117336
+27818 117332
+18344 117327
+924 117316
+21472 117310
+40439 117306
+16971 117294
+32666 117293
+19125 117285
+31006 117275
+4861 117256
+14968 117255
+18692 117226
+8609 117209
+11854 117197
+30833 117194
+14941 117190
+12924 117181
+16450 117174
+48088 117173
+3305 117171
+15998 117171
+33017 117167
+40858 117164
+32579 117161
+17446 117132
+17636 117129
+21921 117124
+45567 117124
+41445 117094
+17631 117091
+23962 117024
+28367 117022
+14380 117015
+19417 117010
+38029 116996
+708 116994
+28990 116979
+17197 116973
+12368 116961
+23538 116953
+30418 116952
+49611 116944
+20913 116940
+30241 116931
+32451 116924
+31750 116924
+6837 116916
+15228 116889
+21830 116886
+14966 116854
+11377 116852
+24854 116839
+36269 116837
+248 116834
+13939 116827
+39290 116766
+3778 116752
+9771 116747
+22846 116733
+21347 116731
+33457 116727
+18451 116724
+19681 116718
+45790 116681
+13508 116680
+22493 116676
+22598 116671
+4720 116667
+30024 116661
+20928 116656
+21096 116654
+27581 116650
+5758 116642
+12902 116633
+22778 116623
+25894 116617
+15719 116606
+39398 116603
+16807 116602
+27231 116596
+7816 116582
+34108 116573
+30567 116559
+10056 116543
+14715 116519
+14475 116497
+23910 116456
+14834 116456
+30364 116454
+25706 116453
+15753 116444
+28442 116434
+35469 116433
+12310 116425
+15910 116416
+21957 116405
+39808 116388
+16903 116385
+18989 116376
+39487 116376
+14782 116372
+44744 116356
+17180 116354
+18556 116349
+26876 116324
+27436 116313
+25482 116308
+13735 116304
+25394 116294
+26450 116277
+37616 116277
+7643 116276
+10550 116245
+19693 116243
+25852 116220
+1009 116180
+12861 116176
+11043 116174
+17124 116170
+24933 116165
+12921 116149
+18642 116146
+15402 116135
+2469 116096
+28578 116075
+23682 116074
+17939 116040
+17918 116033
+4491 116022
+4399 116019
+22232 116015
+30837 116010
+2296 116003
+19356 115997
+30738 115978
+10934 115971
+15836 115952
+19266 115932
+28571 115926
+23558 115922
+22761 115911
+30217 115908
+28083 115882
+19750 115877
+15418 115869
+7438 115863
+37918 115860
+42442 115851
+22568 115848
+4670 115832
+20537 115831
+26212 115829
+10626 115826
+25722 115824
+24952 115801
+38739 115794
+28172 115756
+22942 115738
+32246 115724
+22698 115711
+26724 115689
+16394 115688
+19860 115671
+22045 115657
+19771 115654
+7286 115652
+31393 115649
+24719 115594
+10606 115590
+11742 115574
+20417 115560
+39641 115541
+30157 115539
+18979 115489
+29784 115471
+22871 115456
+16656 115454
+17412 115448
+20185 115441
+10340 115438
+22152 115437
+22753 115424
+47970 115422
+8332 115410
+20388 115407
+40238 115385
+17136 115384
+19215 115375
+5497 115369
+24630 115363
+36026 115356
+6217 115346
+38307 115331
+12931 115321
+44985 115318
+19240 115308
+8588 115305
+17343 115297
+16965 115297
+21108 115274
+31668 115272
+23423 115269
+44021 115255
+18316 115242
+111 115218
+30017 115213
+19152 115197
+34668 115186
+43449 115178
+14746 115152
+32559 115151
+39199 115140
+19014 115138
+8325 115132
+13348 115122
+28137 115112
+17351 115074
+10961 115046
+24448 115041
+30625 115012
+32473 114997
+27243 114972
+25737 114952
+38988 114930
+33629 114917
+1198 114880
+23041 114878
+33197 114860
+32763 114850
+25732 114849
+24617 114837
+4657 114831
+24237 114822
+18492 114820
+48609 114805
+17726 114802
+19575 114794
+10324 114779
+15009 114778
+10943 114778
+46485 114767
+34609 114761
+24011 114759
+24626 114742
+11393 114718
+14262 114713
+13432 114709
+20517 114709
+18813 114701
+23897 114697
+13813 114693
+24456 114672
+17870 114659
+4820 114652
+15363 114638
+18137 114627
+27239 114619
+37520 114611
+1791 114610
+34022 114584
+15693 114577
+16314 114563
+6696 114550
+22425 114545
+14934 114533
+13068 114533
+37936 114533
+46520 114532
+7510 114530
+19692 114528
+28001 114519
+20858 114465
+15959 114460
+34410 114446
+18429 114436
+21383 114434
+29476 114425
+20830 114419
+36019 114417
+15440 114410
+31140 114376
+13886 114375
+17451 114350
+16728 114339
+19516 114329
+19218 114295
+6347 114288
+6013 114285
+25347 114282
+28960 114280
+44286 114263
+20736 114226
+7758 114219
+25837 114215
+11994 114203
+17293 114196
+9435 114179
+2244 114175
+15186 114155
+18341 114147
+29147 114139
+26803 114113
+12342 114113
+33061 114104
+24829 114104
+18135 114084
+31047 114082
+40560 114068
+21307 114058
+21416 114031
+27046 114011
+24261 114008
+47168 113996
+10344 113973
+9222 113955
+102 113953
+3564 113949
+27026 113948
+26172 113940
+3130 113924
+3063 113907
+24627 113901
+27296 113856
+10157 113854
+11950 113851
+33617 113842
+15589 113838
+31799 113808
+19340 113805
+109 113805
+2573 113794
+47180 113776
+6878 113771
+18077 113739
+17714 113729
+40159 113721
+40344 113718
+38580 113716
+35007 113711
+35053 113685
+10896 113670
+19353 113666
+11339 113666
+47652 113661
+17099 113658
+28785 113653
+25338 113646
+22076 113639
+8085 113636
+9235 113629
+11386 113612
+19344 113612
+17877 113612
+20496 113594
+24682 113590
+32127 113557
+23915 113556
+10462 113553
+2621 113550
+30554 113540
+5133 113528
+13954 113525
+47296 113443
+16726 113424
+10173 113419
+20999 113413
+17584 113387
+35440 113386
+2352 113376
+26619 113372
+14177 113343
+7234 113325
+19289 113294
+25446 113249
+31984 113228
+18682 113217
+22435 113192
+22044 113175
+36952 113170
+20285 113152
+24060 113141
+25736 113139
+30697 113084
+18984 113082
+14568 113068
+24473 113039
+17924 113037
+32376 113032
+33708 113030
+28941 113014
+17298 113013
+13878 113012
+14956 113000
+14542 112997
+29189 112980
+17166 112979
+22199 112966
+23130 112943
+34724 112929
+15683 112912
+17523 112887
+16084 112886
+17655 112871
+17558 112871
+20421 112866
+36477 112850
+17805 112849
+18450 112847
+35401 112845
+20929 112836
+14397 112833
+15181 112827
+12169 112804
+25374 112802
+20970 112801
+5482 112794
+16370 112782
+16550 112781
+21010 112776
+42071 112773
+13196 112755
+16576 112754
+15467 112752
+91 112729
+21966 112720
+29018 112719
+27788 112718
+23107 112696
+6691 112695
+38256 112694
+24610 112693
+10256 112691
+26202 112678
+23950 112673
+1563 112672
+20828 112642
+28587 112640
+16159 112637
+22518 112637
+9467 112608
+23529 112607
+10360 112607
+23948 112603
+9230 112592
+21961 112588
+26630 112585
+12961 112577
+16835 112565
+33726 112551
+28550 112537
+16042 112531
+9732 112521
+24948 112520
+10917 112519
+17238 112508
+15482 112506
+14419 112484
+16573 112478
+20756 112465
+17549 112426
+16294 112420
+18881 112418
+2851 112409
+29758 112392
+16489 112375
+14209 112371
+26585 112361
+40928 112354
+9665 112346
+17033 112337
+43225 112332
+23541 112321
+22694 112320
+25436 112317
+23825 112315
+21998 112298
+8642 112295
+24313 112284
+16840 112281
+29980 112271
+10693 112258
+30426 112255
+21702 112236
+17970 112199
+3027 112194
+24891 112183
+14572 112169
+4722 112149
+3219 112147
+1449 112130
+27496 112098
+22848 112089
+18162 112084
+28375 112054
+20294 112053
+29019 112048
+26736 112047
+5139 112032
+15618 112031
+36067 112030
+26066 112024
+9998 112023
+36518 112019
+29260 112017
+46536 112016
+22268 112007
+12618 111987
+17623 111987
+25750 111980
+30287 111957
+18384 111956
+16057 111956
+33311 111930
+33371 111930
+6606 111906
+21590 111904
+41806 111904
+23718 111903
+29553 111853
+24025 111849
+34687 111840
+27497 111833
+18120 111829
+18098 111817
+13505 111816
+2413 111776
+27906 111774
+18744 111765
+20835 111746
+1800 111745
+18622 111736
+16999 111726
+14047 111710
+26278 111708
+7885 111680
+18981 111679
+37909 111671
+14259 111665
+40098 111653
+22292 111646
+11466 111607
+27473 111603
+15975 111601
+3018 111598
+19612 111585
+18820 111566
+31946 111560
+36178 111544
+14233 111526
+12093 111521
+5336 111514
+13841 111480
+39992 111478
+25694 111476
+49487 111476
+19426 111473
+20449 111469
+1536 111462
+22843 111459
+22807 111449
+21411 111447
+22097 111387
+12510 111385
+20629 111383
+21832 111360
+15185 111321
+9973 111320
+29926 111313
+17888 111306
+25570 111298
+34521 111292
+13762 111274
+38529 111270
+32534 111267
+27119 111259
+18877 111243
+792 111231
+21570 111201
+7738 111178
+31620 111174
+31622 111172
+22581 111154
+2179 111152
+11144 111141
+5040 111121
+16747 111073
+13025 111060
+6010 111050
+17568 111050
+20433 111010
+20577 110983
+19217 110974
+29012 110948
+20562 110917
+18197 110910
+14547 110908
+33098 110894
+35783 110881
+22096 110879
+12821 110869
+16760 110864
+15131 110864
+34560 110857
+27418 110855
+974 110850
+26891 110849
+36986 110848
+36683 110815
+13433 110810
+25813 110795
+24634 110781
+5328 110780
+28604 110768
+19894 110754
+28628 110720
+17508 110714
+24884 110708
+22723 110705
+21001 110697
+4628 110669
+18233 110660
+31617 110625
+23994 110620
+14872 110618
+22537 110592
+16347 110563
+11880 110559
+33847 110559
+20471 110555
+18583 110553
+9678 110546
+13787 110530
+37081 110528
+38805 110507
+15094 110504
+33625 110502
+12397 110501
+34814 110499
+29 110485
+9377 110471
+25814 110470
+37790 110464
+17076 110419
+36199 110412
+32206 110411
+12076 110407
+23687 110402
+8729 110399
+22533 110393
+17565 110375
+25691 110364
+16416 110357
+16292 110346
+28773 110346
+16106 110343
+27147 110342
+13015 110331
+16343 110317
+30744 110312
+15968 110303
+35903 110267
+7934 110261
+243 110257
+30007 110254
+13482 110252
+12256 110244
+23017 110238
+4241 110232
+30379 110211
+20173 110198
+28276 110176
+24065 110172
+19951 110168
+45818 110162
+26420 110158
+18493 110158
+19379 110154
+36852 110123
+17229 110106
+17453 110105
+23707 110102
+12064 110092
+12707 110082
+31244 110069
+28014 110046
+2980 110045
+17556 110043
+1401 110024
+27872 110021
+12648 110011
+10796 110009
+18500 110006
+22414 109992
+30628 109992
+12178 109989
+8423 109985
+15284 109974
+23448 109945
+27206 109927
+22575 109926
+21025 109915
+24264 109903
+23384 109898
+2284 109898
+14479 109885
+29921 109881
+8757 109867
+34753 109867
+27216 109862
+19561 109854
+15731 109841
+31108 109834
+5285 109830
+14206 109802
+20406 109789
+21625 109782
+6162 109780
+27589 109766
+40162 109754
+26853 109753
+33006 109733
+13868 109724
+30215 109723
+17644 109720
+31942 109711
+20794 109705
+8870 109685
+38613 109647
+43105 109641
+21951 109636
+3508 109629
+15610 109610
+29597 109607
+37070 109563
+21823 109545
+14766 109542
+9383 109541
+9708 109539
+16925 109531
+18321 109531
+16251 109514
+22560 109502
+25340 109489
+13071 109479
+32669 109479
+18461 109462
+40532 109458
+34186 109449
+28513 109446
+37921 109420
+8652 109399
+27517 109399
+12430 109375
+14290 109374
+50240 109356
+27058 109344
+25892 109323
+18626 109313
+9178 109304
+36363 109265
+24034 109262
+27333 109250
+35553 109243
+9440 109239
+34699 109239
+49403 109230
+7020 109226
+16073 109215
+17074 109191
+16695 109190
+48583 109183
+47177 109166
+26496 109164
+8567 109162
+16158 109157
+17700 109114
+16058 109112
+27974 109089
+28879 109087
+17163 109079
+25957 109071
+24622 109053
+26049 109044
+17345 109038
+23622 109028
+25949 109027
+27833 109020
+26389 109002
+15169 109002
+25065 108979
+21651 108957
+23804 108946
+14698 108943
+23062 108941
+20387 108930
+15993 108930
+32868 108923
+27835 108908
+20101 108907
+37126 108899
+24649 108881
+23911 108878
+21200 108876
+941 108873
+13050 108861
+21099 108851
+24522 108847
+4984 108838
+15337 108834
+32992 108832
+34553 108827
+5731 108822
+20338 108811
+35345 108795
+37270 108794
+36758 108791
+8459 108790
+17656 108782
+42556 108782
+21707 108777
+41816 108764
+29842 108758
+41759 108758
+15938 108753
+29139 108746
+21068 108743
+16944 108734
+13746 108721
+2136 108712
+21566 108691
+15510 108679
+15533 108678
+26361 108677
+15939 108671
+25826 108655
+43416 108650
+11460 108646
+29761 108646
+18235 108626
+17289 108618
+17342 108612
+20031 108607
+30067 108607
+28245 108589
+29578 108583
+32444 108580
+24141 108576
+23783 108551
+21203 108544
+25203 108539
+45980 108539
+19458 108529
+10957 108528
+29588 108507
+24701 108498
+13695 108489
+8109 108486
+25141 108469
+36451 108460
+13411 108459
+12635 108442
+5558 108421
+15344 108417
+17548 108401
+25539 108399
+17775 108360
+28926 108341
+23103 108333
+45624 108330
+38602 108318
+26905 108304
+31275 108289
+39717 108286
+21673 108284
+10905 108265
+23185 108247
+34479 108242
+12251 108242
+20425 108242
+12177 108231
+21799 108218
+22673 108189
+31988 108187
+23993 108184
+28503 108175
+18427 108171
+32810 108130
+21848 108123
+33624 108112
+11598 108082
+24471 108049
+47604 108032
+18357 108026
+10707 108011
+9122 107999
+17696 107994
+22527 107990
+19647 107989
+15874 107977
+6258 107969
+16019 107967
+23973 107959
+26635 107954
+23887 107919
+13822 107917
+25965 107902
+18693 107889
+35089 107873
+28289 107850
+30770 107844
+10356 107804
+22561 107803
+42735 107795
+13745 107775
+35759 107766
+14992 107755
+28619 107755
+35206 107752
+7614 107745
+9446 107705
+36675 107690
+10327 107670
+41427 107666
+12876 107664
+23086 107651
+19901 107616
+21031 107579
+44321 107574
+33059 107560
+41253 107537
+33673 107536
+30597 107535
+14294 107534
+7250 107531
+27505 107531
+24341 107525
+36700 107516
+28485 107514
+34272 107514
+29336 107476
+20881 107471
+11653 107461
+29575 107452
+6243 107451
+23256 107447
+15529 107429
+21954 107420
+16605 107413
+2375 107413
+18436 107396
+14893 107391
+25523 107378
+7922 107373
+18179 107368
+13493 107366
+35691 107365
+17607 107348
+30401 107338
+1356 107332
+16882 107330
+25247 107317
+29521 107310
+11354 107308
+17942 107301
+23621 107297
+17439 107288
+11659 107272
+28346 107263
+17365 107258
+19658 107251
+44198 107247
+21596 107243
+25427 107235
+15017 107215
+18987 107199
+11401 107183
+10781 107179
+12375 107176
+15561 107168
+39062 107167
+4834 107159
+19038 107153
+21655 107138
+16125 107128
+23404 107127
+7952 107119
+29838 107098
+23514 107074
+22566 107038
+31876 107035
+25477 107014
+17186 107001
+25910 106995
+25824 106991
+25146 106974
+22894 106968
+29058 106953
+3718 106938
+36486 106932
+19113 106932
+24963 106930
+21046 106917
+20349 106912
+8625 106905
+25395 106878
+35188 106861
+34002 106860
+29656 106849
+932 106830
+18787 106830
+28672 106817
+39700 106798
+15058 106798
+31845 106798
+35892 106792
+30081 106760
+18399 106755
+39560 106753
+43262 106752
+13602 106745
+3384 106743
+19748 106725
+20528 106718
+29463 106712
+15645 106680
+17624 106678
+36719 106671
+19439 106659
+42668 106650
+21658 106623
+20236 106603
+12986 106599
+12870 106581
+8079 106576
+39137 106568
+18854 106552
+13664 106543
+12253 106495
+14063 106481
+25136 106471
+16762 106462
+16668 106453
+37578 106452
+29803 106432
+19168 106428
+26005 106408
+21362 106404
+21276 106400
+24271 106381
+26349 106371
+22900 106361
+20657 106360
+17753 106358
+46726 106344
+6113 106338
+24226 106337
+31074 106336
+17927 106333
+28044 106330
+4517 106327
+27687 106320
+19761 106313
+13575 106310
+18174 106282
+29820 106265
+18350 106262
+21723 106250
+37425 106240
+19654 106224
+5261 106223
+33035 106219
+36236 106203
+31333 106199
+43804 106198
+1860 106194
+11813 106179
+5685 106177
+13950 106177
+18418 106170
+16739 106164
+15936 106134
+18778 106127
+35902 106125
+32211 106122
+26815 106121
+27909 106119
+38001 106102
+48245 106097
+10339 106094
+30947 106078
+7842 106055
+24929 106038
+6391 105988
+21430 105973
+16426 105960
+15121 105958
+24128 105950
+17501 105930
+26336 105916
+25422 105910
+44844 105856
+5905 105843
+25485 105839
+2377 105825
+17586 105823
+14175 105822
+25614 105818
+22168 105808
+48560 105805
+42192 105798
+4135 105767
+12918 105742
+31117 105726
+17522 105716
+20732 105712
+16222 105709
+18035 105690
+15290 105633
+32470 105630
+22549 105607
+43013 105596
+18734 105595
+32034 105590
+13860 105581
+31232 105578
+39507 105571
+33787 105569
+5185 105569
+34992 105557
+20487 105544
+18894 105536
+14870 105520
+12941 105491
+29599 105482
+23543 105464
+16836 105460
+28229 105455
+7414 105449
+21615 105449
+36962 105418
+18028 105409
+35468 105406
+39032 105406
+24351 105397
+2220 105387
+39329 105383
+13661 105375
+985 105373
+31610 105352
+16430 105349
+20401 105348
+26038 105347
+21368 105326
+3646 105323
+26474 105320
+6767 105317
+44487 105294
+18782 105285
+17279 105275
+15497 105268
+40707 105266
+30385 105261
+29799 105256
+2557 105241
+1149 105226
+23080 105189
+17554 105179
+20160 105164
+27723 105162
+32586 105160
+10676 105154
+14107 105139
+10982 105131
+27316 105111
+10644 105109
+23365 105106
+1882 105085
+19193 105060
+24057 105059
+35507 105055
+11427 105024
+16065 105020
+18395 105017
+47975 105001
+12583 104995
+1861 104991
+38581 104975
+19589 104971
+19942 104959
+34807 104949
+14739 104948
+30446 104922
+37958 104912
+10510 104907
+6966 104889
+31376 104889
+18474 104873
+22650 104867
+28917 104843
+15857 104825
+12171 104825
+12038 104822
+42555 104819
+36547 104790
+15991 104789
+23030 104785
+19726 104780
+39911 104761
+26779 104752
+27565 104752
+22928 104741
+17184 104739
+16627 104734
+3571 104687
+15494 104677
+27337 104671
+10091 104669
+15989 104665
+26036 104660
+33500 104650
+24690 104606
+20674 104597
+27311 104596
+17783 104590
+23231 104588
+40556 104586
+28163 104581
+13397 104574
+27709 104566
+17621 104565
+30773 104562
+41582 104562
+17592 104532
+28723 104530
+36442 104526
+1264 104523
+16069 104515
+49743 104496
+32743 104485
+27585 104484
+33943 104479
+23229 104478
+30469 104472
+8337 104465
+18247 104461
+25363 104461
+22649 104458
+3969 104437
+23923 104424
+31019 104413
+4967 104390
+17778 104379
+27399 104375
+29209 104367
+13865 104354
+35079 104350
+17544 104349
+8863 104347
+21942 104345
+9703 104340
+12853 104326
+26319 104318
+30190 104315
+23184 104285
+26147 104273
+2903 104262
+20354 104253
+18671 104253
+16800 104229
+14242 104219
+8445 104217
+24824 104211
+33874 104198
+15632 104192
+15122 104183
+22804 104182
+33831 104177
+17587 104144
+31118 104114
+17686 104108
+17348 104103
+19957 104091
+24377 104085
+33828 104081
+16382 104077
+41916 104045
+17172 104043
+1376 104042
+18002 104038
+11978 104035
+20372 104024
+18898 104014
+30884 103943
+18780 103938
+19938 103936
+23151 103928
+15990 103904
+38684 103894
+18645 103870
+30596 103865
+9092 103852
+30105 103835
+47280 103829
+12040 103816
+27428 103816
+27875 103813
+9414 103804
+33442 103802
+18339 103797
+11777 103788
+42263 103767
+43602 103760
+25880 103756
+7793 103754
+19905 103747
+6083 103746
+16049 103738
+22397 103731
+26248 103720
+21553 103708
+16577 103703
+26725 103662
+13629 103660
+23610 103655
+41158 103638
+24263 103636
+24348 103590
+40370 103580
+9729 103568
+9861 103567
+29313 103564
+25756 103550
+24169 103541
+29234 103528
+47827 103506
+13345 103491
+19470 103488
+24411 103488
+17263 103478
+27658 103467
+12089 103457
+28634 103444
+1493 103440
+21160 103432
+24157 103415
+29941 103391
+26476 103389
+23147 103388
+30359 103378
+26540 103363
+11789 103361
+11437 103354
+20346 103346
+14589 103346
+19276 103339
+3339 103311
+37142 103300
+11517 103293
+20070 103275
+5842 103264
+13431 103260
+27811 103244
+45196 103236
+11908 103232
+12728 103220
+26399 103206
+20024 103200
+16194 103196
+31105 103148
+19022 103137
+2175 103129
+17784 103128
+41976 103126
+18507 103123
+9972 103123
+2302 103102
+22307 103102
+12219 103095
+39943 103079
+33089 103052
+36577 103024
+17146 103018
+30341 103009
+42896 103003
+19870 102997
+32681 102991
+29634 102984
+44081 102977
+9529 102948
+32013 102947
+35627 102934
+37568 102931
+27401 102921
+15741 102914
+4015 102903
+8653 102895
+27157 102878
+47214 102866
+13322 102862
+41124 102858
+30679 102858
+39295 102856
+24788 102840
+17175 102840
+27426 102836
+47837 102813
+8583 102810
+21104 102782
+37112 102767
+12281 102746
+33251 102743
+45015 102705
+24267 102704
+29756 102671
+6368 102668
+10151 102659
+25488 102649
+39874 102634
+39393 102632
+30938 102631
+24056 102621
+19471 102619
+9551 102609
+6345 102608
+22063 102603
+42631 102602
+17399 102593
+3870 102589
+33577 102587
+39723 102543
+12270 102529
+15136 102525
+21122 102522
+38938 102520
+2881 102514
+3370 102507
+48286 102505
+2117 102500
+37178 102500
+15268 102482
+33185 102482
+20976 102474
+30789 102472
+34325 102467
+18781 102456
+21007 102453
+10185 102450
+14197 102440
+19560 102427
+36595 102375
+31214 102367
+4591 102346
+31790 102342
+25720 102333
+33998 102322
+25758 102308
+34955 102296
+22783 102275
+33644 102260
+46614 102258
+30537 102232
+23096 102226
+12109 102226
+23264 102218
+38465 102200
+40911 102195
+21310 102194
+44634 102193
+15476 102191
+23458 102181
+29156 102179
+16225 102168
+28648 102167
+33556 102156
+18177 102139
+7332 102129
+25401 102126
+16737 102106
+22197 102104
+20880 102095
+26395 102091
+27612 102084
+26965 102082
+30966 102073
+14666 102066
+27240 102061
+3920 102059
+34612 102023
+29497 102013
+18947 102013
+34492 102003
+32835 101996
+34806 101986
+23602 101982
+20242 101975
+5473 101972
+17169 101970
+8132 101968
+33208 101965
+17754 101957
+16600 101953
+16857 101939
+24532 101910
+49927 101896
+42177 101891
+34308 101891
+22737 101889
+22729 101888
+26890 101883
+24654 101875
+22359 101875
+1163 101864
+11081 101853
+22943 101845
+20431 101843
+32748 101835
+23967 101825
+15777 101819
+23787 101814
+23189 101797
+21545 101792
+39220 101773
+25582 101761
+8562 101754
+17311 101740
+22982 101738
+29188 101725
+18565 101715
+21435 101714
+40897 101711
+19010 101700
+33670 101699
+14366 101688
+34985 101687
+9734 101682
+14289 101662
+17779 101650
+25854 101637
+13929 101621
+28187 101620
+22187 101605
+14243 101605
+7292 101591
+17557 101589
+25927 101575
+17091 101562
+18593 101542
+30164 101540
+30008 101533
+23508 101508
+19166 101497
+18326 101451
+30979 101436
+24365 101436
+33817 101433
+29618 101426
+23708 101424
+16181 101413
+43025 101391
+3042 101389
+31463 101385
+7367 101375
+35321 101374
+35532 101370
+29468 101349
+14108 101344
+25819 101342
+26850 101315
+6883 101315
+24171 101305
+46198 101304
+37686 101297
+11670 101289
+1920 101275
+10296 101271
+25656 101262
+40730 101257
+12311 101237
+31523 101234
+20720 101219
+25217 101212
+18697 101203
+40186 101194
+14563 101183
+16322 101180
+25712 101177
+11651 101167
+22412 101167
+20825 101160
+46055 101145
+14704 101139
+21567 101129
+14835 101127
+20538 101116
+14330 101095
+20078 101079
+47677 101066
+43480 101065
+19095 101037
+11773 101021
+25162 101015
+22551 101012
+19048 101010
+23204 101006
+30933 100994
+22238 100974
+29504 100967
+21664 100965
+20699 100965
+9331 100964
+16896 100963
+24150 100958
+45726 100946
+16092 100944
+22392 100928
+27545 100921
+25510 100913
+24972 100898
+38880 100887
+26166 100877
+49251 100856
+22706 100856
+24658 100850
+23014 100847
+38507 100845
+5768 100834
+29585 100820
+22978 100773
+18902 100769
+8195 100745
+14839 100734
+3116 100728
+17032 100702
+26122 100699
+41673 100697
+15481 100667
+23034 100663
+20793 100660
+17898 100658
+13542 100658
+22669 100657
+31134 100637
+14230 100636
+16953 100622
+40419 100616
+2838 100609
+10447 100600
+18737 100583
+23018 100572
+32017 100554
+13166 100545
+17485 100536
+28893 100535
+12442 100532
+18608 100530
+14924 100523
+27555 100523
+31183 100517
+27196 100509
+35852 100507
+20192 100505
+4726 100505
+3746 100502
+15702 100496
+22498 100492
+19683 100492
+16006 100456
+39929 100453
+29764 100451
+6223 100439
+21501 100433
+43724 100430
+40458 100410
+19369 100402
+28772 100394
+8303 100394
+29545 100385
+45526 100360
+39781 100356
+17085 100353
+30115 100343
+46965 100335
+9785 100334
+9766 100314
+33324 100280
+16046 100274
+5807 100274
+28528 100268
+21098 100265
+38762 100258
+10161 100256
+39268 100256
+36194 100251
+24607 100247
+29170 100235
+28800 100226
+2991 100180
+28515 100159
+26862 100146
+1316 100144
+23796 100138
+11046 100136
+19337 100127
+12608 100126
+7376 100114
+29732 100107
+17567 100098
+20991 100094
+12636 100093
+24130 100092
+45130 100092
+984 100092
+13851 100071
+33452 100071
+39410 100054
+22259 100046
+9967 100038
+30593 100023
+27548 100018
+10680 100008
+39030 100008
+29384 99993
+17724 99992
+25358 99984
+21080 99981
+3857 99981
+32719 99971
+7649 99960
+32787 99958
+36388 99950
+43708 99940
+28635 99937
+32705 99930
+13254 99925
+30776 99923
+35481 99922
+17234 99912
+15119 99899
+34908 99861
+13805 99861
+33609 99851
+22677 99843
+18276 99837
+26183 99827
+15309 99810
+20263 99799
+41405 99793
+18512 99793
+16839 99779
+13545 99766
+19239 99755
+41800 99754
+17862 99754
+1794 99749
+21647 99737
+39964 99736
+23383 99723
+31908 99720
+25999 99720
+11308 99720
+35956 99695
+5105 99693
+26207 99660
+30783 99655
+24450 99653
+27343 99648
+24842 99644
+24347 99634
+18954 99625
+20820 99621
+23461 99614
+29798 99593
+10279 99589
+25605 99582
+19910 99562
+26775 99561
+8597 99558
+14574 99545
+6269 99520
+22321 99518
+14195 99510
+48148 99507
+32098 99474
+23316 99455
+12340 99450
+26786 99446
+17572 99423
+19401 99415
+45395 99405
+2733 99392
+26670 99378
+31826 99377
+18630 99373
+22505 99372
+37388 99371
+17978 99364
+12366 99357
+18297 99352
+5315 99352
+23618 99350
+21235 99343
+13898 99335
+26345 99331
+16236 99327
+39140 99323
+38772 99314
+28038 99308
+16121 99306
+18975 99302
+23309 99263
+12434 99260
+19096 99255
+29297 99240
+31370 99229
+19318 99226
+12403 99216
+29721 99214
+14251 99213
+19268 99200
+34705 99187
+11999 99183
+13526 99183
+16717 99175
+26103 99152
+21883 99148
+19288 99131
+18953 99126
+20039 99125
+31833 99122
+23245 99121
+22692 99118
+29532 99112
+39810 99072
+21810 99069
+17640 99069
+34707 99067
+30489 99055
+33478 99054
+18229 99034
+28651 99027
+34930 99027
+26530 99022
+26555 99020
+43305 99001
+28444 99001
+38204 98999
+24638 98996
+22526 98983
+14796 98983
+19229 98967
+30399 98959
+18460 98956
+23099 98954
+17543 98953
+14627 98946
+27539 98941
+48023 98930
+27725 98918
+10508 98910
+24166 98907
+30654 98895
+27382 98894
+19321 98893
+16123 98889
+22644 98882
+17430 98881
+19605 98878
+27806 98875
+19123 98875
+23207 98824
+25987 98811
+26048 98805
+35801 98804
+11348 98796
+10944 98793
+10117 98787
+15104 98780
+25724 98770
+47804 98760
+37903 98756
+14299 98745
+22896 98739
+26211 98735
+9649 98716
+19604 98712
+14056 98710
+18370 98651
+35434 98644
+238 98630
+47317 98624
+20843 98621
+32987 98612
+34880 98609
+15387 98604
+7190 98579
+32370 98579
+22977 98578
+20611 98567
+33905 98561
+2036 98556
+31057 98553
+43265 98550
+45870 98549
+7217 98549
+16113 98546
+33997 98528
+11251 98527
+42484 98525
+18336 98522
+26409 98518
+19035 98512
+47251 98487
+15548 98478
+3630 98467
+21258 98454
+23730 98450
+16613 98449
+10793 98428
+26443 98414
+14570 98405
+4402 98405
+32364 98397
+30091 98392
+40944 98386
+12277 98362
+20947 98360
+23148 98355
+21135 98351
+42745 98344
+25185 98339
+7991 98331
+10265 98313
+28901 98292
+4593 98291
+33261 98290
+24644 98283
+13626 98282
+18587 98281
+11502 98276
+31437 98267
+26622 98265
+21764 98242
+45408 98214
+32330 98212
+15639 98198
+16777 98196
+27440 98189
+38278 98169
+24395 98167
+25051 98164
+39588 98158
+31596 98157
+32536 98133
+24558 98122
+12765 98117
+28194 98102
+28437 98098
+24955 98087
+44211 98086
+41196 98079
+37536 98069
+21635 98058
+3628 98053
+48931 98053
+15377 98041
+10771 98034
+28118 98030
+32443 98013
+21923 98008
+23916 98006
+25718 98005
+16687 98002
+16215 98001
+46819 97997
+11404 97997
+3445 97990
+15750 97970
+8623 97967
+28892 97960
+19508 97960
+13231 97943
+42883 97942
+10705 97939
+28061 97919
+29014 97915
+14887 97908
+31974 97903
+6855 97903
+17765 97903
+14155 97900
+16614 97893
+26117 97891
+37154 97883
+30890 97880
+26746 97874
+23335 97858
+10022 97838
+32020 97828
+12678 97827
+45810 97821
+28752 97795
+30996 97783
+31949 97779
+12544 97767
+39143 97757
+25967 97745
+16418 97741
+19064 97735
+30612 97693
+31357 97686
+29529 97682
+11927 97679
+17031 97679
+32597 97675
+29357 97674
+10414 97665
+45051 97663
+34827 97662
+40197 97660
+25017 97655
+13555 97649
+18288 97648
+7619 97621
+14612 97621
+12456 97621
+24723 97618
+31661 97616
+5900 97605
+32183 97601
+18026 97599
+42693 97599
+16531 97596
+26520 97586
+29667 97584
+29596 97581
+19723 97575
+27439 97575
+30757 97553
+31473 97552
+26082 97544
+26328 97542
+20908 97540
+37085 97527
+18121 97515
+33140 97514
+15633 97513
+27396 97512
+19556 97510
+22030 97498
+22888 97492
+11014 97487
+20321 97485
+33685 97481
+28744 97463
+34068 97455
+18539 97452
+15663 97451
+16731 97434
+22147 97423
+23949 97421
+28514 97417
+49173 97414
+28193 97385
+22329 97366
+25659 97354
+41035 97343
+26860 97342
+44369 97324
+39452 97317
+12371 97315
+17300 97313
+40449 97290
+20410 97288
+17145 97282
+22710 97279
+40195 97271
+19552 97265
+4246 97248
+21815 97245
+33515 97229
+31558 97228
+22060 97228
+39802 97223
+16885 97204
+43004 97203
+6701 97196
+28076 97186
+25734 97186
+35807 97182
+20571 97179
+19187 97172
+11986 97157
+11822 97156
+22295 97144
+30236 97137
+30140 97126
+14301 97108
+21085 97100
+22870 97092
+28949 97089
+23327 97080
+14927 97073
+24892 97071
+9078 97070
+25779 97053
+17080 97048
+22181 97048
+24805 97047
+48206 97042
+14014 97036
+17694 97032
+28853 97030
+25223 97019
+21092 97015
+8042 97008
+14898 97007
+34681 97005
+18594 97001
+18834 96999
+46499 96989
+18176 96985
+45212 96983
+19922 96982
+44726 96970
+4126 96968
+34755 96962
+24784 96957
+25428 96943
+48460 96934
+40388 96923
+2825 96911
+21686 96907
+32265 96898
+23301 96894
+11862 96892
+23299 96891
+21124 96879
+5106 96872
+38276 96871
+19144 96865
+26197 96858
+17511 96854
+17531 96848
+16312 96845
+15053 96835
+16716 96830
+8421 96817
+18543 96812
+18118 96805
+32615 96803
+18411 96789
+3076 96780
+20888 96775
+19459 96769
+6018 96764
+24495 96759
+22051 96753
+26299 96752
+4604 96742
+23071 96741
+1106 96729
+34327 96727
+38619 96710
+38740 96708
+43237 96702
+46097 96692
+35234 96689
+19468 96678
+8923 96678
+30785 96671
+33750 96670
+24514 96666
+17207 96649
+20420 96646
+19433 96645
+19884 96635
+24350 96635
+6661 96624
+23165 96608
+20254 96601
+33094 96592
+22979 96591
+22623 96590
+23160 96579
+13522 96578
+9203 96556
+24048 96555
+9971 96528
+10780 96521
+34701 96520
+25863 96507
+36231 96504
+24942 96501
+34060 96479
+20040 96463
+14392 96453
+29015 96451
+9696 96441
+30257 96436
+31784 96430
+13543 96429
+19267 96422
+47967 96420
+13826 96420
+36889 96417
+4798 96406
+34440 96405
+25483 96376
+19133 96356
+34780 96356
+41348 96344
+33718 96343
+10378 96333
+17486 96328
+13309 96326
+9192 96311
+21754 96301
+29136 96290
+27278 96290
+26605 96273
+31269 96262
+26843 96260
+29815 96243
+4862 96231
+21219 96219
+20787 96213
+2856 96205
+33130 96196
+28445 96191
+22604 96180
+27340 96178
+15544 96168
+25115 96151
+19165 96145
+10007 96142
+28036 96135
+20408 96116
+2725 96115
+22989 96083
+19143 96079
+27264 96072
+26077 96072
+9632 96053
+4063 96036
+24089 96036
+29239 96032
+23273 96026
+14661 96021
+35415 96012
+3043 96011
+27567 96010
+39447 96010
+9069 95993
+31171 95988
+27353 95985
+32137 95980
+40695 95952
+20750 95941
+40222 95937
+21100 95922
+21558 95913
+32410 95910
+27177 95908
+20056 95890
+31220 95888
+13905 95883
+35274 95871
+22668 95857
+18275 95855
+26594 95835
+25786 95822
+14564 95821
+27308 95810
+24281 95805
+19897 95803
+32490 95796
+14948 95762
+2017 95741
+19713 95740
+24387 95733
+25075 95724
+22474 95724
+18831 95719
+34886 95717
+27425 95714
+13315 95698
+28492 95692
+48395 95686
+30652 95683
+30651 95671
+39361 95668
+41114 95663
+35121 95658
+23867 95651
+31981 95650
+2021 95649
+21662 95645
+16632 95638
+25377 95637
+21461 95633
+28120 95633
+25646 95620
+16588 95617
+36799 95615
+9883 95607
+20925 95592
+18511 95588
+44168 95587
+39672 95576
+21164 95565
+18027 95562
+33972 95558
+31637 95548
+22365 95536
+35336 95531
+8825 95523
+22339 95522
+7608 95521
+24947 95515
+14411 95506
+1307 95505
+16989 95486
+18616 95482
+33902 95473
+25409 95465
+29488 95452
+21552 95448
+7474 95436
+32605 95425
+33147 95421
+15304 95409
+39124 95402
+30970 95402
+33859 95401
+21851 95396
+12589 95395
+25507 95358
+24262 95358
+24014 95356
+46366 95351
+4107 95324
+11491 95315
+17747 95299
+22582 95286
+33405 95282
+37066 95272
+38131 95270
+16264 95267
+16649 95265
+25729 95254
+15174 95230
+16509 95221
+23879 95218
+41688 95215
+29658 95204
+8356 95193
+17917 95190
+33352 95171
+31735 95167
+35983 95166
+8812 95147
+12196 95147
+10837 95146
+9873 95136
+20935 95134
+7469 95114
+14751 95109
+21984 95097
+19018 95075
+32714 95065
+29809 95057
+33777 95057
+25376 95057
+32678 95048
+17182 95037
+46281 95032
+39245 95014
+25303 95012
+37487 94994
+42748 94986
+31341 94966
+19923 94954
+33356 94942
+16917 94935
+1704 94933
+20299 94931
+29782 94925
+24066 94922
+20664 94915
+17973 94910
+10516 94908
+19701 94899
+9395 94892
+9346 94887
+36715 94880
+22912 94880
+21318 94874
+10788 94873
+31656 94867
+21223 94849
+34962 94832
+34462 94830
+12233 94826
+13548 94793
+36636 94775
+12202 94762
+26784 94758
+6421 94752
+27731 94750
+34710 94739
+32540 94736
+3057 94727
+11024 94710
+31565 94694
+49281 94689
+19001 94683
+31678 94677
+6545 94676
+35687 94673
+26316 94633
+21134 94630
+34050 94630
+18023 94626
+44572 94623
+44781 94623
+8743 94596
+7807 94590
+5633 94589
+13587 94580
+36978 94574
+23122 94574
+12341 94564
+36002 94562
+18142 94560
+47678 94555
+13018 94537
+27356 94533
+9937 94525
+682 94499
+21640 94495
+16243 94465
+36503 94462
+47660 94450
+33503 94441
+28303 94435
+4749 94429
+32831 94420
+8530 94420
+26487 94418
+18469 94417
+36251 94414
+29125 94408
+14091 94399
+25494 94399
+25455 94377
+14331 94376
+12248 94351
+6750 94335
+18808 94333
+21490 94320
+22733 94314
+11696 94312
+23149 94305
+17241 94299
+22185 94288
+28814 94273
+26502 94273
+14272 94272
+14700 94259
+24768 94253
+13481 94244
+6014 94234
+30279 94231
+29686 94225
+6703 94216
+17306 94216
+3323 94178
+14515 94178
+46820 94167
+3005 94160
+18364 94150
+29662 94130
+29208 94129
+10036 94128
+26253 94128
+28294 94121
+39728 94102
+18480 94091
+27821 94088
+18420 94085
+9855 94077
+41260 94067
+6897 94063
+32951 94062
+10444 94043
+13180 94043
+16619 94037
+26113 94031
+2064 94029
+16308 94028
+37401 94028
+28094 94019
+39125 94017
+27373 94015
+11775 94014
+22378 94008
+16741 94001
+16630 93994
+33984 93982
+22609 93963
+18948 93957
+13282 93949
+44323 93934
+13464 93932
+12354 93930
+14362 93923
+20328 93920
+25234 93919
+33527 93918
+15889 93906
+21021 93906
+43870 93903
+26310 93900
+12488 93900
+11855 93898
+39039 93895
+16374 93866
+26002 93859
+16744 93856
+14856 93839
+14613 93838
+17500 93826
+15323 93817
+21366 93803
+22626 93795
+15887 93795
+31704 93784
+22529 93784
+44317 93779
+40057 93769
+19985 93767
+801 93764
+27839 93746
+25263 93723
+32253 93722
+23905 93721
+13443 93719
+25281 93709
+24623 93701
+36421 93695
+22007 93687
+46306 93686
+32853 93681
+25170 93666
+15209 93656
+24677 93641
+17575 93638
+11331 93636
+1172 93625
+38188 93622
+31099 93611
+40445 93606
+27321 93601
+24230 93597
+13199 93586
+18320 93570
+18025 93568
+28996 93564
+37033 93563
+15784 93561
+21639 93553
+30828 93548
+30618 93537
+43245 93534
+31201 93532
+19024 93527
+30456 93527
+2905 93518
+8007 93500
+20265 93497
+22125 93482
+1733 93482
+39508 93474
+19844 93473
+21127 93472
+43513 93459
+35689 93452
+37907 93450
+1562 93429
+5152 93413
+33519 93375
+12026 93372
+22085 93372
+19493 93369
+6713 93365
+19504 93352
+24683 93347
+27297 93340
+34906 93338
+33458 93338
+24006 93338
+20066 93336
+23689 93335
+30286 93335
+28499 93334
+13716 93325
+17873 93324
+44502 93309
+4370 93305
+31136 93289
+10981 93284
+38881 93278
+18548 93272
+10295 93266
+50048 93265
+24391 93248
+4489 93237
+11248 93237
+17852 93233
+11902 93222
+6919 93198
+42100 93192
+26102 93178
+20680 93163
+46719 93161
+26581 93156
+20054 93155
+26906 93147
+22385 93113
+8525 93108
+23746 93103
+10212 93102
+30839 93100
+23205 93097
+19294 93086
+24918 93085
+32938 93085
+20640 93076
+667 93075
+3004 93074
+36384 93063
+15255 93058
+30600 93056
+18272 93046
+20588 93041
+29483 93036
+2309 93024
+5132 93015
+40318 92991
+8931 92991
+25620 92989
+44075 92987
+8764 92985
+25305 92983
+25277 92980
+14879 92973
+29986 92951
+16876 92946
+44041 92944
+27571 92918
+10740 92916
+7918 92914
+36516 92907
+26548 92906
+27060 92905
+9532 92905
+23095 92897
+37490 92887
+35493 92884
+14686 92881
+34593 92863
+23057 92846
+46917 92840
+20042 92840
+29070 92838
+7633 92830
+12849 92825
+1490 92822
+45303 92813
+25860 92800
+36832 92793
+26916 92791
+22019 92786
+35591 92785
+31065 92776
+3927 92775
+41164 92760
+10459 92752
+23477 92737
+26057 92730
+29210 92720
+10973 92712
+5232 92711
+24359 92705
+16766 92705
+36812 92702
+17347 92685
+32413 92675
+19201 92674
+11887 92664
+35017 92664
+28646 92661
+10418 92641
+49762 92640
+27197 92636
+19882 92616
+23453 92613
+48607 92600
+19175 92599
+26727 92597
+5102 92590
+25629 92577
+252 92571
+5028 92570
+19845 92565
+6436 92559
+36611 92554
+40021 92550
+23968 92549
+13909 92544
+25339 92539
+11980 92536
+13227 92534
+28170 92528
+26645 92527
+42499 92522
+18043 92508
+16729 92503
+12106 92502
+22299 92483
+25937 92475
+31233 92465
+15568 92444
+17090 92443
+21051 92440
+28591 92440
+6171 92411
+26655 92397
+24584 92386
+14314 92384
+19297 92377
+12896 92376
+24433 92374
+36771 92372
+12862 92368
+21452 92367
+46459 92358
+30870 92352
+7352 92337
+7061 92328
+24325 92322
+22188 92321
+38838 92316
+16446 92309
+23557 92299
+18643 92298
+10916 92283
+35078 92260
+7322 92256
+27822 92251
+24656 92247
+18463 92237
+39661 92227
+9064 92217
+20278 92214
+9496 92212
+34881 92210
+23074 92204
+25449 92191
+19883 92186
+24219 92184
+12394 92182
+3652 92170
+6683 92164
+42630 92161
+22574 92161
+26610 92156
+22106 92156
+18059 92142
+29654 92139
+22206 92133
+30325 92133
+25317 92121
+39309 92115
+10527 92106
+28393 92100
+23056 92099
+42150 92097
+21216 92095
+26800 92087
+17150 92081
+38478 92078
+20399 92072
+8048 92063
+27138 92063
+33892 92057
+7642 92046
+43326 92046
+14573 92043
+29786 92038
+11280 92028
+27662 92026
+35646 92025
+19102 92023
+7928 92018
+41345 92013
+19440 91973
+1093 91965
+7836 91956
+34405 91939
+23239 91929
+35497 91928
+6424 91916
+23217 91909
+19838 91909
+42350 91901
+26334 91895
+35816 91894
+20269 91892
+30718 91879
+37969 91869
+27397 91860
+34077 91858
+14151 91858
+9501 91855
+22423 91839
+11519 91834
+30462 91831
+14068 91822
+24402 91813
+10865 91811
+40132 91800
+48858 91799
+18221 91797
+12417 91795
+19862 91786
+46381 91781
+46260 91774
+6456 91765
+33318 91757
+25116 91754
+5200 91737
+21294 91735
+36904 91733
+20067 91721
+16288 91712
+16170 91709
+36574 91700
+30678 91699
+30437 91688
+20326 91683
+21814 91672
+45963 91662
+32659 91636
+41845 91629
+26938 91628
+20383 91621
+27957 91620
+28662 91619
+32979 91615
+30570 91607
+32002 91591
+26565 91588
+9827 91567
+34915 91566
+30012 91562
+34686 91552
+23084 91543
+17555 91541
+45229 91527
+34696 91525
+21256 91522
+4682 91516
+39303 91511
+34422 91500
+13958 91496
+22219 91493
+46422 91471
+32526 91470
+47775 91463
+6040 91460
+17909 91457
+13216 91449
+12796 91444
+27907 91443
+25173 91430
+42286 91423
+20693 91419
+16866 91415
+22765 91411
+11498 91403
+8677 91396
+8394 91383
+22730 91375
+18783 91374
+40474 91368
+29753 91348
+22889 91344
+24337 91343
+41525 91340
+22949 91334
+31890 91331
+23699 91322
+25971 91308
+26777 91308
+11291 91307
+7368 91305
+10798 91299
+18478 91272
+16641 91269
+12609 91244
+28983 91230
+15824 91227
+12562 91224
+12060 91223
+16369 91222
+23353 91220
+28411 91218
+21661 91211
+24569 91210
+20009 91210
+45371 91202
+25861 91202
+34474 91200
+25984 91189
+21879 91170
+10910 91168
+13834 91159
+19343 91156
+36588 91150
+20037 91146
+33749 91143
+22625 91131
+2509 91128
+34732 91098
+30880 91097
+25725 91070
+14821 91063
+19547 91062
+21674 91042
+31158 91034
+10028 91033
+30043 91027
+14426 91025
+17219 91024
+30046 91023
+25403 91020
+32342 91016
+39265 91015
+43664 91003
+27840 91001
+29359 90985
+19387 90978
+10505 90960
+21621 90959
+33242 90945
+13946 90944
+21915 90942
+24635 90941
+16132 90938
+9702 90936
+29489 90927
+3468 90926
+21649 90913
+20958 90900
+25923 90899
+15992 90897
+35633 90887
+6726 90881
+21971 90869
+29628 90833
+14731 90827
+44025 90826
+26695 90806
+20460 90802
+42185 90800
+35165 90794
+46561 90793
+20744 90788
+29475 90778
+8375 90769
+20905 90760
+11224 90757
+14805 90754
+20445 90749
+23401 90746
+25355 90745
+25154 90744
+11163 90741
+27523 90734
+37387 90731
+40412 90730
+3534 90717
+41346 90716
+25349 90716
+23348 90716
+20060 90705
+6571 90703
+31367 90700
+30926 90696
+24950 90694
+24346 90689
+24489 90677
+36029 90667
+12025 90667
+13058 90666
+9191 90664
+34051 90653
+16809 90649
+19322 90646
+21392 90646
+14701 90631
+17851 90631
+40644 90629
+11146 90629
+32632 90625
+12001 90620
+14874 90605
+47966 90604
+26593 90566
+24484 90565
+40288 90564
+21682 90555
+35671 90544
+39927 90542
+13569 90541
+32832 90540
+28661 90540
+6818 90494
+14843 90492
+28157 90474
+46832 90471
+22776 90466
+11814 90459
+34676 90459
+27325 90455
+35564 90433
+36985 90432
+19446 90431
+17246 90424
+14005 90400
+22332 90391
+12867 90389
+15919 90389
+28534 90385
+3054 90384
+27123 90380
+6179 90369
+15027 90348
+37874 90337
+17512 90325
+19221 90317
+26187 90309
+42284 90307
+12050 90272
+29741 90260
+13992 90260
+44340 90258
+9123 90249
+31429 90243
+9094 90233
+6351 90232
+23167 90231
+31355 90228
+21486 90212
+7659 90210
+21438 90195
+38428 90192
+30231 90185
+35874 90179
+42873 90167
+23403 90166
+19704 90164
+29570 90155
+4808 90144
+28464 90134
+30307 90129
+16603 90115
+29060 90084
+38774 90082
+7230 90081
+27075 90071
+36467 90067
+21429 90066
+20128 90058
+29459 90052
+12782 90047
+30153 90043
+25930 90039
+17738 90032
+27449 90015
+22054 90013
+21611 90009
+30305 90007
+28088 90001
+24497 89973
+18485 89969
+10080 89960
+20618 89956
+15197 89950
+33286 89941
+13083 89940
+15492 89935
+6237 89910
+99 89908
+25331 89905
+27521 89900
+30700 89889
+21094 89876
+30706 89862
+32486 89832
+34584 89809
+24903 89790
+19528 89789
+11912 89787
+24375 89784
+17102 89776
+44452 89773
+19503 89772
+41900 89768
+35896 89762
+16900 89760
+18972 89751
+42052 89745
+1887 89743
+26924 89735
+19185 89734
+20795 89727
+6024 89723
+18952 89721
+43791 89710
+34766 89704
+34138 89697
+38018 89666
+37619 89649
+10235 89647
+33072 89646
+27529 89644
+4464 89642
+28558 89637
+34017 89631
+16405 89630
+17672 89628
+37123 89621
+35047 89619
+29362 89618
+34895 89610
+32982 89591
+14562 89589
+28488 89587
+22756 89582
+37584 89569
+18959 89566
+24599 89549
+29118 89544
+42208 89537
+11567 89483
+28984 89465
+31231 89449
+22129 89445
+39739 89445
+26740 89444
+24303 89429
+36025 89413
+6442 89413
+20038 89409
+42682 89400
+25794 89392
+25056 89364
+19813 89364
+10316 89363
+22020 89356
+26762 89352
+19893 89352
+23503 89344
+11489 89341
+26296 89337
+1217 89335
+19762 89326
+18596 89322
+26387 89322
+26871 89320
+4500 89317
+10977 89305
+40378 89302
+14507 89298
+27259 89297
+9347 89282
+37690 89276
+37090 89264
+27602 89254
+20435 89238
+9744 89232
+33190 89219
+18814 89214
+33886 89204
+18529 89202
+17047 89198
+15176 89195
+15354 89191
+20050 89188
+22554 89182
+20767 89175
+36633 89171
+25107 89167
+28823 89141
+19555 89120
+12599 89115
+37217 89112
+22331 89107
+20687 89097
+36069 89093
+34601 89090
+15735 89076
+20950 89069
+43174 89068
+12130 89067
+31402 89064
+20982 89060
+34281 89046
+44467 89038
+16772 89035
+35141 89033
+14062 89029
+28176 89026
+16186 89018
+13269 89016
+41826 89014
+30883 89010
+23987 89009
+36429 88992
+16439 88987
+10211 88981
+27508 88970
+37940 88969
+28999 88958
+26772 88951
+29816 88945
+38617 88939
+26311 88925
+20923 88924
+39113 88915
+41782 88906
+17669 88900
+24632 88869
+23221 88864
+32414 88862
+27015 88857
+9760 88848
+17000 88841
+28224 88835
+26527 88835
+31920 88827
+8051 88783
+37519 88779
+15180 88745
+26930 88742
+12543 88731
+29383 88705
+26553 88695
+33014 88691
+48812 88689
+19962 88667
+41904 88658
+40974 88658
+27980 88656
+41703 88655
+1179 88655
+48822 88632
+23027 88622
+41005 88616
+10137 88611
+36971 88610
+39045 88607
+43126 88606
+20625 88601
+35692 88599
+13248 88586
+49003 88585
+30848 88577
+38317 88556
+20339 88552
+17682 88545
+31683 88530
+18586 88516
+25407 88513
+36590 88511
+20673 88510
+13217 88506
+24374 88506
+23749 88504
+15212 88500
+17284 88500
+20313 88499
+32241 88490
+17221 88486
+13809 88465
+19434 88465
+38522 88456
+32438 88443
+21431 88423
+31213 88420
+32174 88415
+24685 88383
+15355 88376
+18581 88355
+13380 88347
+12391 88343
+23934 88340
+23714 88328
+16221 88324
+10521 88319
+27624 88319
+28803 88308
+41905 88301
+12204 88288
+25588 88280
+15056 88278
+30722 88274
+21787 88272
+29043 88266
+22987 88259
+49183 88255
+13617 88253
+26816 88252
+7070 88251
+21334 88247
+18628 88212
+3133 88198
+24949 88197
+21082 88190
+4582 88188
+38758 88184
+19585 88179
+44535 88175
+35982 88163
+17480 88160
+20145 88159
+36245 88154
+15165 88151
+115 88146
+2016 88119
+34317 88106
+19186 88084
+7755 88080
+16859 88067
+18631 88056
+15420 88053
+2369 88051
+32601 88050
+27095 88048
+22837 88047
+17707 88046
+26149 88041
+30920 88040
+16064 88036
+9459 88029
+37332 88028
+18992 88026
+10379 88024
+17509 88021
+11203 88021
+16957 88014
+18285 88002
+27146 87994
+32176 87987
+23050 87981
+15801 87967
+39540 87967
+13652 87966
+25208 87947
+42605 87943
+33613 87937
+48955 87926
+11933 87924
+33375 87916
+29505 87903
+31893 87902
+27341 87889
+13848 87889
+16513 87886
+21546 87885
+22486 87883
+26898 87878
+34380 87877
+42065 87875
+28336 87874
+34136 87870
+36718 87867
+22875 87867
+23083 87850
+40766 87848
+23332 87846
+28180 87823
+19020 87823
+37685 87821
+21387 87810
+41137 87797
+20088 87795
+28431 87790
+46287 87790
+19710 87785
+4125 87779
+14356 87778
+3600 87767
+48125 87766
+17650 87762
+28775 87741
+33945 87736
+26973 87730
+30441 87726
+41578 87725
+12461 87723
+25359 87712
+45961 87709
+31618 87691
+44303 87688
+23456 87686
+9538 87672
+40782 87672
+9630 87667
+16790 87642
+14309 87637
+25290 87637
+4072 87629
+26944 87622
+18063 87615
+17299 87603
+21131 87601
+25891 87597
+49657 87597
+34916 87591
+14642 87588
+25845 87571
+38330 87569
+40833 87560
+20137 87551
+15906 87546
+22459 87541
+44824 87541
+45034 87527
+22513 87527
+6676 87523
+35917 87515
+29727 87509
+32099 87508
+5352 87502
+20439 87483
+17059 87474
+22350 87468
+39727 87467
+33742 87456
+29832 87455
+25168 87454
+24160 87451
+32919 87428
+20391 87422
+4029 87420
+23386 87417
+35062 87416
+27699 87413
+15601 87411
+11922 87411
+20466 87406
+14025 87405
+19072 87404
+30784 87397
+1113 87395
+19686 87389
+26671 87386
+8455 87381
+38442 87381
+36443 87363
+20667 87362
+19407 87355
+13483 87353
+41742 87350
+116 87319
+30174 87314
+29195 87309
+40214 87303
+7065 87302
+37756 87299
+27144 87283
+13251 87280
+32733 87276
+20080 87269
+12915 87263
+19979 87254
+25571 87253
+40735 87247
+5216 87243
+30925 87241
+23972 87238
+11979 87218
+24092 87199
+22570 87199
+32552 87193
+42644 87190
+29775 87186
+21929 87183
+33218 87179
+24749 87172
+19098 87166
+21156 87163
+21784 87162
+13277 87156
+23996 87153
+19766 87145
+21226 87139
+28160 87137
+31926 87128
+19907 87119
+30512 87116
+45446 87114
+4629 87109
+23427 87108
+18637 87107
+19878 87105
+8736 87087
+46552 87080
+16345 87068
+25351 87060
+4355 87050
+23505 87043
+41182 87032
+4124 87020
+22338 87016
+19812 87009
+21115 87005
+48855 87005
+31273 87005
+23422 87003
+19390 86988
+27768 86982
+21698 86976
+23116 86962
+20007 86959
+25566 86954
+21385 86952
+19536 86949
+8792 86943
+17968 86920
+8174 86919
+38536 86905
+6063 86901
+20703 86898
+36810 86894
+28126 86890
+16789 86885
+21853 86881
+30188 86871
+39651 86864
+33521 86862
+18182 86794
+10486 86793
+14454 86792
+44311 86779
+22363 86773
+21367 86765
+6944 86763
+6352 86755
+7155 86746
+40902 86745
+22000 86734
+33659 86734
+26157 86731
+17216 86730
+39734 86729
+18620 86726
+25273 86720
+5596 86708
+11743 86698
+29023 86698
+7059 86696
+41078 86692
+47249 86684
+21732 86675
+34899 86673
+28020 86666
+14319 86663
+38403 86650
+40208 86648
+34767 86648
+15974 86641
+16250 86637
+34662 86634
+47089 86629
+38928 86617
+30941 86595
+3374 86590
+21178 86585
+17546 86574
+11074 86569
+22287 86567
+35979 86561
+5681 86552
+9200 86549
+34914 86544
+20256 86537
+20149 86534
+20064 86533
+19506 86528
+38433 86524
+18437 86523
+31239 86522
+17710 86521
+6587 86521
+22034 86519
+35105 86513
+20411 86510
+29025 86498
+10517 86485
+20479 86485
+46950 86480
+14453 86465
+24381 86458
+12913 86443
+16285 86440
+43248 86430
+16810 86427
+31863 86426
+27307 86426
+37956 86416
+19863 86405
+31742 86403
+24211 86401
+19733 86398
+33164 86396
+21854 86393
+11267 86390
+18102 86389
+29413 86385
+24621 86382
+42700 86378
+14848 86378
+2287 86373
+12466 86365
+22999 86360
+23665 86352
+22254 86342
+6039 86338
+50158 86337
+35805 86333
+35593 86330
+21882 86328
+42168 86324
+24406 86311
+15688 86305
+16227 86302
+40669 86301
+9700 86300
+29317 86295
+40233 86280
+31865 86275
+10466 86256
+17200 86255
+19607 86252
+14513 86248
+24659 86243
+19861 86240
+6658 86221
+48297 86217
+3532 86216
+14981 86192
+18710 86180
+24775 86178
+32456 86177
+15111 86175
+35680 86154
+39995 86152
+4565 86145
+30006 86119
+19342 86114
+22010 86111
+47594 86102
+18634 86096
+46187 86094
+33337 86078
+41145 86076
+23171 86067
+23992 86054
+32405 86043
+23978 86038
+23152 86035
+42602 86035
+13991 86026
+13360 86023
+44481 86020
+43586 85995
+30985 85990
+21779 85988
+36647 85984
+32145 85984
+19091 85979
+20723 85970
+10819 85966
+23173 85955
+25124 85952
+12423 85949
+18851 85940
+33664 85931
+16583 85923
+20063 85918
+15029 85918
+41839 85915
+41960 85914
+24998 85901
+17697 85901
+27355 85892
+6313 85891
+44749 85880
+17232 85876
+33514 85867
+16453 85860
+10407 85858
+14350 85856
+22849 85835
+38665 85818
+27973 85810
+28077 85807
+19341 85804
+34682 85799
+44170 85798
+20592 85797
+21079 85791
+47538 85790
+48848 85782
+24404 85771
+37003 85769
+12363 85756
+49315 85752
+32619 85743
+11201 85714
+37006 85709
+6794 85702
+19698 85702
+21391 85696
+39282 85682
+32175 85666
+21375 85666
+13152 85656
+24435 85645
+27618 85634
+17065 85633
+18659 85631
+26374 85628
+20514 85605
+20823 85599
+23077 85598
+27213 85593
+22289 85585
+44359 85571
+25909 85565
+24187 85554
+16638 85537
+1713 85534
+19409 85527
+30496 85519
+24352 85508
+26428 85501
+13259 85501
+17157 85501
+33814 85500
+40777 85492
+21214 85486
+17520 85481
+32716 85466
+22139 85463
+47574 85451
+1300 85447
+22072 85440
+12753 85438
+20584 85429
+43784 85427
+31225 85421
+13920 85414
+26312 85404
+17242 85403
+11438 85403
+14936 85397
+23202 85390
+46185 85375
+33590 85372
+15711 85369
+34041 85365
+33579 85355
+16102 85354
+17545 85354
+42766 85336
+29679 85326
+10223 85325
+38552 85319
+20558 85307
+17395 85297
+39086 85295
+44298 85291
+2360 85279
+42086 85229
+38343 85228
+29949 85222
+32961 85219
+29445 85218
+15587 85208
+19208 85208
+28213 85206
+28099 85203
+41505 85202
+24205 85202
+43596 85194
+15780 85194
+17438 85184
+20416 85168
+13631 85165
+27437 85162
+35663 85144
+39017 85136
+22714 85100
+36976 85098
+18013 85096
+36591 85091
+28438 85079
+24097 85067
+35386 85055
+7877 85048
+31483 85027
+24451 85024
+43002 85016
+23701 85016
+33347 84993
+41200 84983
+18927 84979
+20734 84970
+22959 84955
+18689 84945
+18723 84939
+21822 84921
+31840 84912
+26262 84907
+34233 84900
+38118 84899
+34118 84893
+4954 84883
+30845 84882
+28049 84881
+25044 84875
+36646 84852
+28943 84843
+42502 84843
+15410 84842
+31088 84829
+10475 84828
+26472 84822
+22546 84798
+25243 84792
+20428 84780
+41438 84769
+20089 84735
+20708 84727
+36229 84724
+47430 84724
+28595 84720
+32280 84715
+49220 84709
+10885 84706
+23757 84702
+6169 84700
+41406 84691
+19078 84686
+24694 84679
+26378 84668
+24636 84664
+45265 84637
+2517 84635
+32314 84617
+16635 84612
+41695 84611
+22562 84610
+19856 84609
+18440 84601
+11222 84597
+14529 84591
+20770 84589
+32260 84583
+20132 84581
+42037 84578
+16189 84555
+27876 84552
+20122 84548
+35181 84541
+44113 84540
+19921 84539
+24751 84537
+13144 84535
+23373 84533
+27304 84530
+19565 84526
+8040 84520
+29324 84519
+48880 84511
+35397 84505
+34545 84501
+15611 84497
+38701 84493
+20396 84487
+5029 84481
+14866 84476
+35766 84462
+38623 84462
+16086 84456
+25569 84448
+42649 84440
+33195 84424
+10104 84420
+13713 84404
+24795 84401
+35034 84400
+39503 84396
+32022 84377
+14705 84368
+40993 84365
+19361 84360
+23436 84357
+14584 84352
+25027 84340
+49722 84326
+31067 84323
+40679 84318
+24592 84293
+30263 84286
+33881 84282
+29612 84277
+31812 84264
+26643 84257
+2600 84257
+759 84240
+36418 84236
+13949 84232
+26889 84227
+22712 84221
+15982 84216
+23399 84215
+114 84202
+10734 84196
+43575 84156
+24273 84156
+16812 84155
+31853 84150
+19908 84144
+23799 84141
+19419 84118
+31252 84118
+17598 84115
+35727 84094
+45577 84092
+29144 84089
+40626 84080
+16564 84076
+2204 84075
+32116 84069
+8444 84067
+20559 84066
+5651 84061
+23860 84058
+28064 84049
+28584 84041
+27647 84029
+14557 84024
+16104 84020
+39617 84009
+24167 83997
+3193 83990
+47995 83975
+18606 83975
+11718 83972
+37919 83962
+23218 83959
+42823 83952
+29395 83950
+21128 83946
+17892 83945
+12335 83944
+35329 83941
+45093 83930
+25711 83925
+34113 83915
+16498 83910
+9278 83906
+3093 83901
+24276 83874
+25430 83867
+32924 83857
+17519 83857
+31133 83847
+16082 83843
+34235 83842
+22430 83805
+26403 83802
+17513 83793
+22451 83789
+35464 83787
+37492 83774
+35242 83765
+46335 83757
+27645 83757
+13681 83746
+29314 83746
+28890 83724
+27810 83719
+28486 83717
+35673 83715
+4078 83714
+29777 83710
+41721 83708
+27322 83705
+35338 83703
+36694 83698
+30029 83690
+30090 83688
+39830 83687
+44145 83684
+38115 83678
+7158 83677
+23985 83676
+5634 83673
+7282 83661
+27940 83657
+35670 83649
+40599 83648
+8298 83623
+34005 83610
+20130 83610
+42525 83606
+28454 83553
+43192 83552
+44603 83550
+28910 83539
+20648 83524
+18876 83523
+26356 83521
+12146 83507
+27671 83493
+49484 83488
+8089 83484
+14514 83480
+26720 83479
+30750 83476
+45814 83473
+47989 83463
+21043 83449
+42178 83443
+18388 83431
+4741 83420
+29502 83417
+26925 83403
+18964 83394
+26140 83383
+13158 83378
+39278 83375
+47534 83374
+21415 83366
+18261 83361
+26131 83360
+17048 83359
+24655 83357
+27335 83356
+23851 83353
+13585 83348
+28136 83345
+26083 83343
+21601 83343
+19621 83339
+44024 83333
+1572 83327
+7814 83325
+27251 83321
+34852 83305
+22904 83303
+21715 83295
+18669 83293
+30927 83293
+3441 83288
+24401 83274
+25635 83268
+12349 83254
+17583 83254
+21804 83252
+21726 83251
+23734 83237
+10854 83231
+39751 83228
+19541 83222
+18290 83213
+29104 83211
+43761 83209
+25406 83202
+32189 83184
+23271 83184
+43772 83146
+19499 83145
+4147 83143
+32557 83142
+7140 83136
+8291 83130
+45646 83127
+21182 83112
+25246 83099
+27069 83097
+27170 83096
+21752 83090
+8656 83090
+32620 83075
+8827 83071
+9636 83067
+18227 83050
+23000 83046
+46508 83043
+37242 83042
+16794 83011
+36385 83008
+12881 83001
+32889 82994
+37529 82991
+13014 82987
+17211 82985
+18018 82981
+4677 82980
+9650 82977
+5683 82975
+34927 82961
+25300 82960
+27141 82953
+10019 82944
+49001 82942
+8843 82933
+7371 82932
+8321 82922
+15802 82912
+28300 82907
+18506 82899
+11630 82889
+23648 82887
+37427 82882
+20610 82882
+32365 82876
+9624 82853
+46518 82849
+15473 82831
+11351 82821
+35819 82808
+13469 82804
+42379 82793
+30595 82792
+27347 82787
+16628 82779
+39542 82771
+23485 82756
+28405 82747
+39634 82742
+39151 82740
+3509 82734
+25727 82733
+22665 82730
+15950 82729
+6214 82724
+41835 82718
+8896 82715
+37866 82710
+9999 82701
+25329 82696
+11316 82687
+6192 82680
+25567 82674
+11241 82673
+19601 82670
+18346 82669
+2186 82665
+13740 82653
+20951 82649
+1469 82635
+32267 82625
+27103 82610
+35806 82602
+25023 82588
+30198 82585
+3906 82560
+17202 82555
+13719 82553
+40209 82550
+19408 82538
+20385 82535
+21514 82521
+12254 82513
+44291 82500
+28536 82492
+17082 82488
+9275 82469
+17421 82467
+3075 82450
+45518 82444
+29669 82442
+34968 82425
+33909 82424
+23690 82423
+20227 82418
+21036 82416
+7574 82416
+3253 82415
+15237 82415
+11034 82411
+25764 82394
+15034 82390
+24282 82387
+36546 82373
+35160 82370
+23710 82362
+18598 82356
+47297 82355
+48685 82349
+15353 82346
+20862 82346
+13219 82345
+22426 82344
+27067 82335
+11166 82333
+10585 82314
+27884 82313
+19162 82312
+28804 82309
+29256 82305
+12972 82300
+8802 82297
+33306 82287
+33404 82285
+23390 82282
+24619 82276
+24302 82269
+15539 82264
+29661 82247
+30701 82247
+47618 82247
+21078 82241
+24408 82240
+23677 82232
+20850 82229
+15447 82226
+19472 82220
+14413 82219
+20309 82196
+46879 82190
+41463 82189
+16143 82184
+16196 82183
+30972 82180
+1993 82173
+31440 82162
+41668 82157
+34238 82149
+10561 82128
+43412 82123
+15636 82117
+28146 82117
+11125 82115
+34015 82104
+15831 82104
+23432 82102
+42730 82088
+30047 82085
+23929 82079
+16586 82073
+18660 82067
+18759 82066
+41459 82064
+15292 82051
+18830 82046
+25046 82045
+7233 82042
+38990 82038
+13582 82032
+9226 81980
+21247 81977
+27376 81961
+25227 81957
+42342 81947
+24902 81940
+32887 81940
+36143 81930
+15219 81922
+919 81920
+24566 81914
+20870 81910
+20140 81909
+29586 81901
+22216 81900
+45103 81893
+19486 81878
+21983 81874
+32289 81870
+30809 81868
+37053 81866
+19399 81850
+15983 81845
+22563 81842
+23572 81842
+8139 81836
+9128 81835
+46397 81830
+20293 81801
+19586 81783
+23007 81782
+29961 81764
+43287 81756
+27929 81752
+29543 81746
+23575 81736
+13022 81724
+21761 81723
+13655 81719
+11045 81715
+12142 81710
+42429 81706
+23616 81703
+29103 81702
+7789 81700
+14146 81693
+32660 81672
+30037 81671
+29742 81662
+20121 81661
+19370 81638
+26706 81626
+14588 81618
+30319 81618
+50041 81613
+29707 81611
+19190 81607
+40751 81605
+47189 81603
+28318 81591
+23715 81585
+27638 81584
+33837 81583
+17201 81577
+23038 81572
+31787 81556
+16122 81552
+50083 81547
+29337 81542
+40365 81531
+35271 81528
+45202 81525
+20232 81502
+47630 81502
+18810 81484
+19643 81480
+23091 81478
+5444 81477
+20697 81477
+25126 81475
+22144 81474
+769 81474
+28269 81463
+39043 81461
+20887 81459
+23161 81458
+23026 81451
+39926 81451
+15049 81444
+23501 81440
+40650 81435
+25524 81408
+1646 81400
+15918 81400
+38455 81393
+8367 81384
+10993 81384
+32899 81381
+50133 81375
+3473 81360
+24345 81358
+27893 81353
+12760 81344
+15646 81342
+23670 81337
+14511 81326
+15940 81319
+19755 81319
+10887 81318
+28175 81314
+14439 81312
+30572 81306
+42576 81303
+39534 81290
+12770 81290
+14326 81277
+25087 81271
+13552 81268
+16060 81259
+22092 81247
+35002 81244
+24423 81243
+38180 81235
+29594 81225
+20992 81224
+26320 81221
+21140 81215
+44786 81212
+36882 81212
+26162 81179
+32675 81171
+1515 81170
+30527 81169
+28310 81165
+19397 81164
+33143 81161
+22760 81159
+21773 81149
+22517 81149
+35893 81147
+24103 81137
+34590 81135
+27070 81133
+20582 81129
+28577 81123
+24956 81121
+25314 81114
+36746 81114
+20721 81106
+24930 81104
+18537 81102
+5063 81101
+26022 81100
+49396 81097
+30404 81074
+33728 81070
+3654 81068
+24115 81059
+41434 81057
+34520 81057
+17002 81054
+30666 81041
+31569 81037
+40130 81037
+21586 81032
+43682 81030
+29479 81029
+35603 81026
+25767 81025
+8984 80993
+4754 80989
+19594 80983
+20332 80979
+29702 80975
+23841 80974
+20003 80974
+20033 80972
+25692 80968
+35839 80965
+21289 80960
+7656 80949
+15060 80949
+6094 80930
+37544 80927
+37709 80924
+31451 80915
+25133 80906
+41401 80905
+16129 80903
+7999 80886
+41281 80883
+34086 80870
+39342 80864
+42257 80859
+40263 80859
+44365 80857
+23597 80856
+32720 80854
+21125 80848
+15269 80847
+24862 80845
+26174 80844
+37386 80843
+13175 80826
+21670 80822
+21137 80808
+17926 80808
+36867 80796
+16616 80794
+29379 80791
+30882 80789
+22408 80788
+16329 80788
+22998 80773
+20842 80757
+35877 80753
+22952 80752
+21073 80739
+30400 80735
+40775 80731
+40167 80729
+18405 80727
+17194 80723
+22296 80716
+28016 80706
+25708 80704
+9862 80691
+24523 80691
+9913 80690
+36085 80687
+23333 80685
+29843 80679
+45406 80664
+25664 80645
+18714 80643
+22612 80629
+20973 80628
+18181 80627
+46434 80609
+32153 80608
+30340 80606
+26681 80604
+14415 80598
+25011 80590
+19392 80588
+21293 80583
+24985 80568
+6360 80567
+16022 80562
+24162 80548
+32474 80544
+25073 80536
+34595 80528
+13605 80518
+17660 80516
+25495 80514
+31394 80512
+38572 80512
+29989 80509
+1999 80508
+31723 80501
+25680 80491
+12061 80482
+36831 80480
+21032 80478
+13421 80478
+22393 80463
+23371 80452
+31488 80447
+31164 80442
+26541 80433
+38717 80417
+48823 80416
+44008 80409
+7126 80385
+19981 80378
+24969 80373
+35270 80368
+31530 80362
+48114 80360
+20864 80352
+31867 80351
+30525 80350
+28856 80349
+40592 80346
+33990 80345
+31839 80344
+7833 80334
+47299 80329
+22120 80328
+31241 80322
+45788 80321
+6985 80318
+39261 80308
+21510 80308
+15770 80292
+18788 80285
+30370 80275
+48514 80274
+25827 80268
+24594 80258
+17376 80257
+20224 80253
+10888 80252
+23238 80248
+10163 80244
+17526 80238
+20374 80237
+27386 80229
+24801 80218
+2053 80217
+13915 80213
+26989 80212
+34133 80200
+20586 80200
+38672 80194
+25451 80193
+15671 80185
+23999 80169
+40886 80166
+15565 80166
+5082 80162
+30900 80156
+19769 80150
+19139 80150
+11951 80147
+21885 80143
+23844 80137
+6238 80121
+43083 80114
+30759 80114
+34443 80100
+21268 80099
+38847 80098
+21857 80073
+29900 80068
+10057 80068
+13632 80050
+19799 80048
+2132 80045
+37503 80041
+21824 80040
+25939 80038
+13987 80032
+8484 80023
+45208 80014
+21986 80012
+20678 80012
+11165 79976
+41774 79971
+44126 79970
+38388 79969
+4794 79968
+22726 79963
+8738 79960
+31868 79955
+26734 79954
+20284 79940
+19333 79939
+35618 79934
+28847 79927
+22014 79925
+21622 79923
+29423 79921
+36502 79919
+23974 79918
+29860 79915
+33173 79909
+27220 79907
+29399 79906
+35248 79906
+31314 79894
+10551 79889
+48471 79877
+16652 79863
+48053 79863
+40013 79862
+14625 79857
+21977 79829
+19581 79823
+19061 79817
+21994 79817
+32548 79813
+14003 79811
+38289 79810
+43109 79805
+41147 79756
+14508 79749
+38427 79744
+24703 79729
+19824 79727
+37321 79714
+36975 79713
+26900 79689
+33011 79682
+2527 79680
+32711 79679
+28921 79671
+35457 79666
+26441 79656
+20892 79652
+42124 79649
+47463 79637
+22782 79635
+14842 79622
+33978 79614
+32354 79610
+15883 79607
+16898 79604
+8041 79602
+17106 79601
+20484 79600
+20376 79597
+38562 79592
+21443 79587
+11260 79577
+22652 79575
+32862 79568
+41828 79563
+33126 79558
+25766 79548
+34861 79544
+14432 79544
+25420 79542
+22258 79540
+31796 79537
+22956 79516
+21534 79497
+17987 79492
+34101 79489
+17129 79481
+26020 79469
+18129 79463
+20956 79450
+30794 79439
+1463 79429
+21843 79428
+45868 79418
+2794 79412
+23965 79408
+43323 79407
+13416 79407
+35695 79396
+24270 79394
+5736 79389
+23510 79380
+23744 79372
+29169 79356
+21982 79353
+38245 79352
+45411 79323
+15715 79321
+25893 79316
+11691 79313
+27366 79309
+37259 79303
+24340 79295
+45753 79292
+29396 79292
+28121 79292
+5722 79290
+20717 79278
+39847 79265
+31537 79264
+8152 79251
+22525 79235
+32363 79232
+29827 79230
+2958 79227
+29948 79223
+21148 79222
+30254 79214
+18333 79210
+17335 79206
+38957 79204
+18495 79200
+24563 79199
+23738 79198
+28615 79192
+35530 79192
+43463 79184
+35568 79179
+18360 79168
+31701 79156
+43017 79141
+43676 79130
+34760 79122
+26602 79116
+21834 79101
+18516 79095
+42148 79070
+14354 79062
+14969 79059
+26532 79054
+38911 79053
+21751 79048
+26680 79048
+20006 79046
+50040 79033
+31462 79033
+18564 79029
+43443 79028
+23137 79022
+9908 79020
+20508 79019
+39314 79019
+30356 79017
+27690 79010
+18465 78997
+27902 78989
+27716 78971
+11059 78971
+13871 78948
+48067 78943
+28320 78927
+11518 78916
+26672 78914
+49652 78904
+15738 78895
+20187 78894
+20811 78893
+13448 78892
+43531 78889
+35506 78886
+25072 78882
+6415 78879
+25687 78877
+28716 78870
+5864 78861
+43050 78855
+7976 78850
+23797 78837
+38732 78837
+44047 78835
+5309 78835
+45178 78834
+4510 78827
+44765 78811
+12193 78808
+43512 78806
+22992 78806
+45932 78801
+33860 78799
+15954 78779
+31251 78772
+20555 78770
+47661 78770
+4214 78769
+15774 78769
+12490 78767
+29520 78767
+35289 78758
+48976 78756
+26297 78753
+30885 78744
+21603 78739
+15834 78738
+20114 78736
+10262 78730
+24738 78719
+16376 78717
+34334 78706
+19514 78701
+23540 78695
+10557 78689
+21339 78686
+31143 78680
+3413 78673
+28201 78672
+20585 78672
+2830 78670
+15088 78661
+12993 78660
+16173 78652
+22839 78637
+10473 78636
+36562 78634
+35304 78618
+41999 78610
+26579 78595
+11225 78591
+21939 78585
+38660 78578
+34265 78575
+19070 78573
+30861 78564
+43090 78563
+24496 78562
+19115 78561
+1329 78558
+27538 78547
+8543 78546
+44185 78543
+31896 78543
+10049 78542
+40434 78536
+23302 78532
+20900 78532
+23528 78532
+34481 78531
+36239 78519
+32791 78518
+32687 78515
+9897 78500
+36634 78497
+42103 78487
+33436 78484
+45667 78482
+18561 78480
+26719 78479
+43450 78473
+31258 78462
+20690 78450
+17067 78447
+27930 78444
+29102 78442
+34798 78441
+10562 78431
+20302 78424
+24486 78417
+35829 78416
+47819 78411
+12900 78406
+31176 78396
+16758 78394
+18775 78388
+24353 78386
+35390 78385
+29685 78385
+22661 78381
+38338 78375
+37704 78362
+36520 78332
+9485 78326
+28215 78324
+20681 78323
+16563 78316
+21502 78314
+43991 78311
+17463 78301
+24699 78298
+18931 78293
+11744 78286
+31028 78286
+20394 78279
+27055 78278
+21629 78275
+37932 78270
+17359 78259
+29780 78254
+19257 78246
+15871 78245
+42225 78242
+29354 78232
+17798 78232
+25343 78231
+7809 78230
+30976 78228
+41363 78228
+31145 78217
+24020 78217
+16590 78196
+12138 78196
+37575 78195
+27185 78177
+19127 78171
+28706 78158
+31038 78127
+19262 78125
+7117 78118
+46620 78116
+40176 78116
+19916 78108
+20901 78106
+24101 78101
+23518 78086
+47857 78085
+12595 78083
+31293 78076
+37979 78074
+30658 78056
+27235 78055
+9516 78038
+29547 78031
+30284 78013
+112 78012
+41271 78007
+24059 77989
+18930 77971
+4965 77967
+27848 77965
+12758 77962
+29309 77958
+29946 77948
+27269 77932
+18797 77932
+17743 77928
+31535 77926
+28612 77911
+21678 77906
+40014 77902
+9059 77897
+17908 77890
+12081 77885
+34989 77860
+17503 77857
+10511 77851
+46551 77849
+17929 77844
+11174 77839
+36395 77827
+20196 77820
+32036 77818
+43692 77817
+35221 77806
+22128 77799
+34305 77796
+23285 77795
+6144 77792
+28967 77783
+19634 77777
+17615 77773
+36374 77764
+25000 77762
+17314 77759
+36300 77753
+23703 77749
+28519 77743
+43405 77742
+7959 77740
+11048 77740
+13523 77734
+46216 77727
+19490 77726
+49490 77724
+48971 77721
+23612 77716
+44599 77715
+30036 77715
+12825 77712
+43162 77712
+30227 77693
+23762 77688
+32994 77681
+16388 77676
+23492 77674
+22461 77663
+9452 77659
+6178 77627
+27963 77625
+36805 77620
+21818 77615
+32319 77614
+40154 77613
+28970 77608
+9060 77599
+39592 77597
+13195 77580
+20594 77580
+12894 77579
+30044 77568
+21757 77563
+45056 77562
+44419 77558
+27628 77551
+15558 77546
+6788 77541
+23261 77538
+41272 77538
+34928 77528
+19782 77522
+12350 77518
+18391 77506
+44842 77504
+7905 77503
+31139 77501
+35652 77500
+35999 77499
+25875 77480
+25802 77478
+24519 77476
+8635 77474
+19441 77471
+30350 77468
+43455 77467
+17168 77462
+19997 77459
+8250 77458
+38434 77457
+20873 77449
+27668 77447
+16152 77446
+35169 77421
+36814 77420
+44544 77418
+21218 77409
+24822 77400
+13294 77395
+32450 77387
+25092 77387
+21487 77383
+40460 77380
+39211 77377
+13800 77367
+43932 77358
+18517 77355
+23866 77347
+22286 77340
+16867 77334
+25152 77324
+20553 77320
+22514 77320
+17793 77307
+33783 77303
+14586 77286
+10377 77281
+8918 77270
+25931 77259
+36419 77256
+15592 77256
+29682 77249
+15146 77246
+22444 77239
+4383 77225
+36988 77224
+33444 77218
+48983 77214
+31997 77212
+16644 77206
+11205 77205
+5998 77202
+25210 77200
+20996 77196
+48157 77195
+26415 77194
+11778 77192
+18149 77189
+34003 77187
+23474 77173
+8777 77171
+29518 77166
+17916 77163
+1152 77149
+21790 77145
+16901 77142
+20927 77138
+13541 77116
+15920 77113
+2774 77105
+20367 77099
+22265 77096
+38680 77094
+31121 77094
+39012 77089
+27677 77088
+41373 77083
+48729 77074
+21479 77073
+14998 77069
+24210 77061
+24081 77044
+35129 77042
+21102 77035
+22863 77031
+25352 77021
+43915 77018
+39869 77017
+44202 77010
+4942 77002
+41503 77001
+23470 77000
+27232 76985
+41388 76981
+29901 76961
+38044 76955
+16241 76950
+42868 76948
+19699 76941
+4592 76939
+23085 76930
+24596 76929
+26324 76928
+44289 76912
+17703 76910
+40780 76909
+24927 76889
+44748 76882
+22242 76860
+19372 76851
+19787 76843
+47457 76840
+47435 76836
+15246 76835
+24324 76834
+22420 76830
+12933 76830
+4514 76822
+33940 76814
+17745 76813
+24234 76808
+38203 76807
+23631 76806
+33363 76796
+1750 76791
+37602 76789
+39414 76789
+15173 76788
+28603 76788
+38759 76784
+32539 76783
+10234 76772
+38157 76762
+21322 76762
+28617 76760
+29525 76744
+33654 76743
+17732 76734
+41462 76728
+17923 76722
+22237 76718
+26196 76716
+27805 76710
+26839 76709
+19775 76705
+19509 76704
+37916 76704
+3789 76703
+50199 76702
+41064 76688
+29218 76679
+31368 76651
+32840 76651
+25709 76646
+41324 76644
+21692 76643
+43733 76642
+15490 76622
+20932 76622
+29808 76614
+16803 76606
+38464 76586
+23105 76585
+23255 76574
+48323 76572
+27615 76568
+33668 76568
+18304 76552
+27519 76532
+29350 76528
+47765 76527
+19435 76523
+32400 76522
+35535 76514
+18646 76508
+24384 76508
+33495 76507
+28569 76503
+42008 76499
+8393 76496
+48773 76493
+22471 76488
+25828 76481
+25232 76471
+41239 76469
+26464 76457
+34461 76455
+9728 76448
+31525 76447
+37335 76439
+40520 76426
+38500 76423
+23125 76416
+23306 76411
+21806 76410
+33358 76405
+32228 76401
+34141 76393
+47412 76393
+31797 76392
+13422 76385
+10871 76382
+7762 76380
+16919 76375
+40742 76375
+29039 76370
+25238 76367
+27074 76359
+33254 76355
+15230 76351
+11633 76342
+29523 76337
+24666 76334
+35315 76331
+40811 76328
+23102 76326
+28838 76321
+26010 76317
+11220 76316
+29496 76311
+31369 76308
+44424 76306
+14478 76304
+7116 76304
+38154 76303
+22021 76298
+27484 76298
+16480 76292
+34240 76292
+29116 76292
+26669 76281
+6250 76280
+25315 76280
+12338 76268
+19214 76260
+39855 76257
+28370 76256
+33419 76250
+17484 76243
+22608 76243
+34001 76234
+15788 76229
+29876 76229
+50170 76224
+30269 76220
+34004 76218
+23675 76213
+23913 76210
+23439 76203
+39325 76184
+32878 76174
+25159 76166
+25301 76164
+25098 76160
+12531 76153
+48841 76142
+17637 76140
+31748 76139
+22384 76131
+31049 76122
+17179 76121
+34074 76121
+8432 76118
+33642 76114
+29351 76113
+29137 76111
+35925 76102
+22333 76098
+36411 76068
+20359 76060
+30451 76057
+38294 76054
+30509 76051
+35512 76026
+29902 76024
+8398 76021
+13429 76019
+21171 76018
+29781 76017
+7185 76015
+30278 75999
+3235 75996
+34092 75987
+9631 75975
+45933 75965
+21873 75954
+23500 75953
+49182 75947
+24240 75940
+1786 75938
+28928 75933
+45197 75926
+16855 75925
+2784 75921
+25119 75920
+7307 75919
+8189 75911
+12512 75903
+12727 75899
+35559 75894
+11347 75888
+36468 75879
+18843 75876
+46050 75875
+37392 75874
+48496 75857
+19232 75856
+26758 75854
+35823 75840
+44963 75831
+24007 75816
+33572 75812
+35276 75811
+46065 75804
+30939 75794
+21867 75794
+9302 75791
+46078 75784
+5272 75778
+35833 75769
+16691 75755
+13306 75748
+37298 75744
+11708 75730
+22983 75728
+29340 75721
+8353 75717
+30914 75715
+42954 75712
+22379 75707
+19460 75701
+19906 75698
+28084 75689
+21771 75686
+37480 75684
+13873 75684
+23944 75680
+24960 75675
+27465 75674
+41637 75669
+18694 75658
+30428 75655
+26757 75645
+18205 75638
+20363 75635
+34936 75633
+22485 75627
+15102 75617
+6357 75615
+29960 75600
+49209 75584
+8679 75577
+3546 75576
+30977 75569
+25632 75557
+25833 75549
+19977 75545
+33956 75544
+24444 75531
+24106 75524
+28862 75524
+25847 75505
+25542 75499
+33128 75495
+26982 75493
+32671 75485
+41838 75484
+32819 75476
+48438 75472
+32834 75463
+18926 75459
+23658 75455
+27455 75449
+20919 75449
+22565 75446
+31928 75443
+37641 75432
+30316 75432
+29437 75427
+37788 75419
+39457 75419
+33354 75418
+35425 75415
+26711 75396
+31411 75387
+39900 75380
+43544 75378
+20358 75374
+24419 75373
+28232 75372
+9940 75371
+24557 75366
+25771 75352
+14607 75344
+32027 75329
+5903 75304
+45951 75299
+27225 75295
+22353 75291
+14040 75284
+31726 75270
+38079 75262
+39762 75258
+25662 75255
+28576 75247
+38851 75237
+5168 75233
+21778 75228
+3540 75220
+35213 75213
+27098 75211
+12978 75206
+40768 75203
+30237 75202
+48503 75183
+37036 75164
+19338 75158
+20422 75157
+21694 75142
+36697 75138
+28579 75128
+27513 75113
+46708 75106
+28582 75102
+34254 75099
+34790 75099
+45674 75095
+22193 75095
+33214 75092
+39791 75091
+34064 75074
+32313 75073
+44492 75071
+20995 75066
+17895 75063
+26587 75046
+14857 75043
+13491 75040
+17013 75037
+8612 75035
+34856 75029
+38630 75028
+20090 75026
+32684 75022
+32890 75022
+38146 75021
+13299 75006
+11167 74991
+25950 74981
+22642 74971
+32119 74969
+25012 74955
+40089 74954
+30089 74943
+3995 74942
+18533 74941
+41788 74940
+49224 74936
+1564 74935
+9106 74934
+44692 74924
+18307 74920
+37191 74908
+34550 74892
+32623 74878
+3165 74870
+18599 74862
+16396 74848
+45276 74848
+5048 74839
+24971 74824
+28679 74812
+4132 74812
+43269 74808
+27794 74802
+9893 74773
+32856 74770
+34132 74759
+24601 74750
+29963 74744
+18112 74741
+44585 74739
+35344 74730
+36047 74721
+37445 74712
+17859 74711
+38287 74711
+25555 74710
+25697 74705
+23290 74697
+44745 74696
+33179 74689
+20136 74683
+19274 74679
+23931 74675
+23359 74667
+6199 74664
+25849 74661
+21442 74654
+21491 74645
+26237 74637
+40526 74624
+18452 74624
+38632 74621
+34602 74616
+30529 74615
+26126 74614
+19656 74610
+40667 74599
+17504 74594
+25800 74593
+22377 74589
+29166 74588
+17794 74580
+26430 74573
+39898 74555
+31348 74553
+21196 74551
+5620 74551
+18917 74547
+28510 74546
+5427 74545
+18886 74536
+36200 74534
+40095 74533
+5823 74531
+15648 74530
+13434 74528
+46347 74527
+30921 74525
+16540 74524
+18607 74521
+30371 74515
+29004 74508
+29651 74506
+14134 74479
+19204 74475
+24165 74472
+19084 74468
+1170 74468
+34643 74465
+19866 74452
+15072 74450
+3879 74444
+18812 74444
+48312 74440
+21414 74438
+19234 74436
+21802 74415
+18807 74406
+25379 74399
+34638 74398
+11017 74394
+42247 74389
+48834 74384
+23880 74379
+36602 74360
+38367 74356
+2334 74356
+27669 74355
+14216 74348
+40243 74345
+21428 74345
+50203 74338
+41377 74335
+38083 74332
+42993 74331
+30320 74326
+8265 74316
+21210 74310
+21355 74300
+5606 74291
+40765 74288
+31486 74287
+31605 74283
+20008 74282
+19802 74277
+16562 74277
+26325 74277
+19100 74270
+42799 74267
+41662 74243
+38485 74229
+25810 74228
+40004 74225
+34603 74220
+21345 74218
+13621 74209
+39547 74207
+13480 74192
+24688 74191
+29339 74191
+29295 74172
+35380 74171
+30244 74161
+39836 74161
+11601 74158
+23805 74150
+13876 74150
+3799 74145
+15677 74116
+6053 74114
+32010 74112
+36359 74108
+20011 74103
+41010 74095
+35668 74092
+22056 74087
+6212 74082
+17734 74076
+42471 74073
+47793 74072
+43103 74069
+14836 74066
+23532 74065
+41398 74061
+4883 74055
+31901 74055
+32355 74046
+23822 74039
+37277 74031
+16269 74022
+46391 74015
+41059 74013
+28697 74010
+39979 74008
+8429 74002
+26170 74000
+37755 73997
+30605 73995
+21403 73992
+18816 73991
+21409 73984
+19147 73976
+29044 73968
+38109 73960
+37937 73956
+35985 73955
+46019 73951
+42266 73949
+35049 73945
+37762 73936
+23748 73913
+29692 73907
+45373 73899
+4550 73875
+32416 73858
+42445 73845
+33359 73842
+18858 73839
+2633 73835
+45377 73829
+16108 73829
+30497 73826
+30421 73826
+27096 73821
+24176 73820
+36945 73810
+48043 73804
+30915 73799
+31245 73798
+30288 73795
+11002 73795
+18466 73787
+33425 73777
+29804 73772
+43647 73770
+21020 73763
+42228 73761
+33263 73752
+11475 73752
+19396 73752
+21978 73750
+43159 73723
+32730 73711
+22946 73708
+46583 73704
+20962 73702
+13590 73700
+35373 73684
+27525 73684
+48009 73669
+27700 73666
+48371 73665
+30699 73664
+46210 73664
+17552 73662
+46530 73659
+26148 73653
+33798 73638
+23598 73632
+42603 73623
+35056 73612
+32883 73610
+33125 73606
+19075 73603
+29632 73594
+26145 73592
+29432 73590
+12968 73589
+17506 73586
+27464 73585
+19949 73585
+29831 73584
+35834 73580
+5990 73578
+26127 73576
+18194 73568
+20762 73559
+22816 73552
+42509 73551
+27184 73542
+26188 73540
+30107 73539
+38030 73536
+3233 73532
+34911 73528
+39660 73527
+9979 73519
+27059 73514
+32670 73514
+14291 73511
+8330 73508
+38282 73505
+44271 73500
+19606 73491
+42365 73489
+47882 73487
+2983 73484
+18578 73482
+34679 73479
+42884 73476
+34794 73474
+30302 73469
+39598 73466
+9890 73465
+30492 73458
+13802 73455
+40325 73443
+19269 73441
+29430 73437
+29539 73429
+24994 73416
+15262 73404
+23584 73401
+39644 73400
+44035 73400
+24390 73398
+25531 73394
+16249 73377
+21234 73365
+24924 73362
+41204 73347
+38830 73342
+29131 73339
+5038 73335
+4018 73328
+15606 73321
+44919 73320
+37948 73316
+43280 73312
+26369 73309
+36556 73307
+5791 73306
+46353 73306
+15442 73304
+11650 73293
+24500 73292
+48314 73279
+27953 73272
+8091 73272
+28691 73269
+6326 73264
+17267 73262
+38555 73256
+29264 73251
+38620 73250
+25865 73248
+27390 73238
+21245 73235
+31044 73228
+26269 73226
+4516 73220
+18300 73218
+29696 73214
+19320 73212
+17947 73206
+48863 73205
+20190 73203
+14295 73198
+41595 73180
+25111 73175
+24030 73166
+27857 73160
+45673 73154
+28997 73150
+21278 73150
+28824 73147
+37723 73144
+27637 73139
+27752 73135
+29681 73135
+18435 73121
+3634 73106
+44684 73099
+30859 73096
+26223 73095
+32930 73092
+20665 73091
+27223 73089
+22884 73084
+43931 73077
+15001 73067
+19751 73061
+26570 73061
+29148 73056
+29811 73037
+15563 73029
+12643 73025
+40422 73024
+35134 73022
+36554 73021
+17770 73018
+23361 73007
+30901 73003
+17748 72999
+12529 72998
+36176 72980
+41783 72979
+26256 72979
+2674 72964
+38075 72946
+38253 72945
+8841 72943
+31712 72943
+37791 72938
+15996 72937
+1953 72932
+3542 72927
+15680 72922
+16975 72912
+29768 72912
+47040 72905
+36815 72904
+25924 72903
+22173 72899
+36548 72896
+40997 72886
+33895 72866
+35657 72865
+25311 72845
+9067 72839
+3213 72835
+37664 72832
+49425 72831
+26564 72831
+32256 72817
+38662 72814
+32166 72810
+31623 72802
+37651 72787
+43720 72786
+16147 72777
+10082 72773
+27485 72765
+40744 72757
+28908 72743
+8000 72730
+9717 72718
+18922 72709
+40351 72708
+6281 72696
+20510 72692
+45844 72690
+4823 72684
+14873 72678
+39193 72672
+4396 72658
+29916 72628
+11715 72627
+23476 72620
+20273 72616
+22538 72609
+14617 72601
+46458 72599
+22751 72578
+28753 72578
+21539 72572
+25508 72570
+35480 72570
+49063 72568
+19328 72567
+33989 72558
+8710 72553
+42722 72551
+7824 72542
+36955 72530
+28195 72523
+8400 72507
+26834 72505
+31864 72501
+30984 72480
+32411 72480
+16834 72479
+23389 72458
+34331 72455
+24177 72455
+24430 72447
+15793 72444
+39404 72440
+12603 72437
+41489 72434
+25888 72429
+12322 72428
+9135 72421
+25244 72417
+25070 72415
+22878 72408
+44398 72404
+26017 72400
+32839 72390
+20005 72387
+25625 72364
+29092 72356
+19349 72342
+31606 72321
+11644 72320
+5051 72318
+4098 72313
+22990 72310
+43640 72310
+12786 72308
+8493 72308
+28695 72300
+32359 72299
+21794 72290
+38977 72287
+36884 72287
+12991 72284
+30881 72260
+30711 72255
+37383 72251
+33073 72250
+12947 72241
+19430 72240
+20233 72232
+26014 72231
+44878 72217
+3579 72212
+18454 72211
+25573 72210
+23246 72203
+21033 72202
+40649 72186
+24730 72182
+45011 72178
+29186 72178
+21989 72176
+38681 72168
+6836 72156
+32758 72156
+46373 72155
+48007 72154
+26041 72153
+26958 72151
+17729 72145
+28918 72142
+24139 72140
+20712 72140
+16815 72138
+28307 72086
+20347 72078
+23840 72069
+33587 72055
+17790 72052
+27900 72051
+24107 72049
+22354 72048
+23374 72038
+18562 72036
+17062 72034
+21925 72026
+31285 72024
+17951 72016
+28819 72016
+4374 72011
+4573 72010
+36980 72007
+17514 71996
+46643 71986
+1922 71979
+6690 71978
+43703 71974
+23101 71974
+40498 71967
+11265 71961
+10642 71957
+32955 71942
+26285 71937
+9047 71934
+19104 71933
+13414 71929
+9100 71928
+39072 71921
+31138 71921
+15761 71915
+29292 71915
+27491 71913
+41736 71911
+40892 71907
+26021 71902
+38789 71898
+10051 71896
+19960 71888
+37260 71870
+27315 71863
+23060 71860
+40394 71857
+25082 71849
+17283 71841
+23195 71835
+34286 71835
+49521 71822
+18366 71820
+39016 71820
+46625 71816
+49618 71805
+34269 71799
+39576 71799
+17382 71795
+24901 71787
+47489 71781
+15631 71777
+37717 71774
+28110 71766
+14880 71764
+12195 71755
+24887 71753
+43731 71748
+16921 71740
+49117 71738
+45856 71731
+11258 71724
+22326 71717
+47813 71700
+1980 71685
+30323 71681
+36041 71670
+6106 71670
+31256 71669
+24790 71665
+16305 71650
+40899 71647
+36671 71646
+8821 71639
+48018 71638
+33948 71636
+38809 71632
+26665 71631
+8571 71628
+42920 71621
+15024 71618
+17084 71609
+40846 71601
+27873 71601
+4057 71593
+17972 71585
+30022 71585
+45301 71581
+35018 71576
+24883 71549
+25717 71546
+26073 71546
+37968 71542
+49528 71504
+43816 71495
+11719 71494
+11277 71488
+34659 71487
+46936 71484
+21690 71480
+22441 71474
+46321 71472
+40065 71470
+38297 71469
+32655 71468
+27580 71466
+33807 71442
+21159 71439
+21453 71437
+19826 71429
+25520 71426
+7277 71426
+19062 71420
+39958 71414
+27574 71403
+22349 71388
+13932 71386
+39236 71384
+22721 71365
+23324 71362
+28594 71358
+24429 71354
+42389 71352
+29779 71351
+23693 71343
+23766 71342
+42990 71339
+46502 71337
+27113 71334
+41763 71319
+42620 71318
+32691 71317
+34009 71310
+13085 71295
+7596 71293
+20248 71292
+45749 71290
+24098 71289
+28461 71286
+36679 71281
+38817 71277
+35383 71271
+10890 71269
+21325 71253
+9921 71251
+34214 71246
+11071 71244
+23281 71240
+20542 71240
+48807 71238
+29263 71236
+27942 71233
+28284 71225
+24114 71218
+21636 71218
+38981 71216
+38687 71215
+21042 71211
+49808 71207
+21528 71201
+29365 71200
+18973 71191
+39109 71190
+11973 71187
+19810 71186
+30055 71185
+29499 71183
+28148 71183
+23213 71174
+21213 71149
+4985 71145
+21522 71134
+30886 71129
+39116 71127
+21710 71121
+43792 71116
+33376 71113
+20936 71108
+3373 71094
+27476 71092
+21083 71068
+28763 71062
+36192 71061
+17319 71038
+37399 71036
+17208 71034
+22972 71024
+16036 71024
+37257 71012
+43152 71009
+47712 70998
+37443 70993
+29745 70981
+12885 70978
+33557 70969
+45913 70959
+26657 70959
+33996 70951
+26051 70948
+27457 70943
+10334 70943
+23642 70932
+48912 70930
+37938 70918
+11439 70912
+20102 70906
+12718 70905
+25638 70900
+18348 70896
+2521 70892
+17571 70886
+41584 70886
+44436 70880
+22267 70872
+38286 70867
+12129 70857
+15367 70854
+30879 70846
+27336 70843
+16504 70842
+42373 70836
+24241 70830
+20567 70830
+30054 70825
+24329 70824
+40680 70823
+41227 70820
+35703 70819
+22576 70814
+42281 70809
+23530 70803
+34860 70795
+45606 70782
+40250 70782
+41749 70770
+49194 70768
+37742 70766
+23798 70763
+17601 70762
+10450 70755
+29178 70752
+8368 70747
+26275 70735
+24808 70734
+15065 70731
+35840 70723
+20838 70716
+25747 70709
+2062 70701
+41803 70700
+26251 70693
+25815 70675
+48467 70673
+28517 70672
+22318 70667
+40942 70666
+23158 70655
+40875 70652
+29535 70652
+22855 70635
+9774 70630
+45026 70622
+12674 70621
+29996 70619
+25413 70596
+39953 70596
+34660 70595
+32829 70593
+36126 70588
+39365 70580
+16817 70579
+9868 70578
+22660 70577
+28507 70577
+23388 70576
+26931 70576
+17427 70574
+24600 70568
+37595 70567
+22100 70565
+23791 70561
+14697 70556
+19155 70555
+16506 70537
+36525 70535
+13610 70529
+1206 70521
+27648 70515
+27328 70514
+15649 70511
+25416 70502
+20616 70494
+23809 70485
+2553 70479
+32164 70470
+11420 70470
+19300 70468
+23776 70465
+16315 70464
+19898 70464
+41082 70455
+41396 70443
+32767 70430
+254 70422
+18742 70420
+40764 70418
+19821 70413
+26790 70412
+11493 70412
+24447 70406
+19774 70400
+35301 70391
+990 70390
+37634 70379
+28372 70378
+25740 70374
+21363 70373
+21473 70372
+43235 70368
+39607 70364
+26228 70359
+24216 70351
+27767 70349
+4123 70346
+16252 70339
+48633 70332
+46608 70328
+7447 70321
+17834 70313
+8462 70308
+32121 70306
+25972 70300
+26234 70298
+38201 70296
+38971 70292
+14619 70278
+40814 70275
+21503 70267
+25137 70263
+3581 70263
+24306 70260
+21540 70244
+21000 70244
+12833 70237
+27105 70234
+44406 70234
+22036 70232
+32466 70231
+49404 70224
+22169 70222
+30160 70214
+19225 70193
+21613 70192
+21805 70190
+18763 70186
+4520 70181
+42355 70181
+8939 70179
+1836 70177
+580 70171
+18868 70166
+36233 70163
+12468 70157
+29591 70156
+22364 70156
+7609 70155
+41390 70148
+6513 70148
+28538 70146
+22107 70144
+28546 70143
+27063 70143
+22084 70142
+40011 70134
+26027 70132
+29278 70131
+45181 70127
+24317 70124
+28533 70122
+36782 70115
+22520 70109
+28583 70109
+42746 70097
+26081 70085
+29885 70083
+14675 70079
+35281 70074
+36348 70072
+30840 70067
+29699 70066
+24853 70057
+21296 70056
+4782 70038
+24068 70032
+12482 70030
+31353 70017
+17226 70016
+6556 70011
+1825 70008
+28412 69996
+18070 69988
+24628 69971
+39209 69959
+13856 69952
+34904 69950
+40840 69948
+15932 69944
+30242 69937
+19032 69933
+20234 69924
+20262 69918
+33188 69907
+39136 69891
+21048 69889
+31976 69883
+31481 69881
+18127 69880
+34203 69878
+752 69878
+6610 69871
+3455 69861
+18904 69854
+32461 69854
+40269 69845
+16781 69841
+18892 69828
+18878 69827
+4541 69827
+9487 69827
+6806 69819
+43346 69816
+14353 69798
+25030 69794
+49626 69792
+26838 69791
+45549 69787
+21869 69774
+23683 69763
+30183 69759
+33174 69758
+41979 69756
+12762 69751
+21354 69736
+43047 69736
+16270 69734
+50026 69732
+22632 69732
+14747 69731
+17338 69723
+38643 69717
+32902 69712
+23110 69705
+27549 69700
+9277 69694
+3507 69686
+44805 69681
+30534 69678
+24865 69678
+6784 69672
+36900 69668
+34250 69664
+32205 69664
+28717 69662
+26954 69656
+28509 69654
+42038 69649
+23932 69648
+21719 69646
+12515 69640
+24681 69637
+44416 69624
+28831 69615
+37042 69608
+38982 69601
+28726 69584
+33329 69576
+29454 69562
+1888 69560
+31990 69558
+49798 69544
+32629 69541
+24208 69540
+23845 69536
+36267 69535
+21934 69530
+16823 69514
+6307 69514
+17952 69508
+22898 69507
+38921 69502
+45341 69498
+46579 69491
+26426 69491
+38057 69486
+148 69485
+19571 69469
+35189 69467
+37377 69461
+30822 69455
+17657 69455
+16920 69447
+19238 69442
+43428 69439
+48588 69438
+14258 69432
+20318 69428
+44373 69421
+17384 69421
+22343 69416
+42273 69407
+36073 69393
+44099 69386
+34558 69382
+2182 69372
+44122 69362
+8958 69360
+15987 69355
+36721 69351
+14001 69349
+15236 69341
+32168 69336
+44799 69329
+30259 69321
+45754 69314
+23082 69307
+34344 69301
+43561 69281
+11425 69264
+38700 69258
+37729 69256
+27384 69253
+41071 69239
+29931 69229
+21573 69226
+18907 69225
+25274 69223
+27859 69222
+36389 69219
+31470 69218
+41325 69218
+10573 69208
+38152 69199
+25572 69189
+31291 69186
+11688 69169
+15708 69166
+47847 69162
+39068 69161
+47431 69152
+41555 69145
+47711 69141
+45044 69135
+13099 69132
+33413 69129
+21059 69125
+29650 69123
+17596 69119
+38872 69117
+13223 69117
+32308 69116
+34939 69107
+24657 69092
+31212 69088
+27518 69087
+36425 69083
+45293 69073
+43537 69071
+27361 69066
+42764 69047
+13311 69045
+16544 69045
+37448 69037
+48876 69037
+31288 69035
+35359 69023
+8585 69014
+11898 69008
+10236 69006
+38952 69005
+26861 68998
+30154 68997
+15596 68994
+17149 68989
+38839 68989
+8401 68987
+27330 68980
+8043 68980
+50090 68980
+37013 68972
+16592 68972
+28969 68964
+45294 68963
+5347 68961
+27817 68960
+23563 68958
+28098 68954
+31515 68953
+29849 68952
+30282 68946
+20186 68945
+19828 68944
+4892 68940
+25690 68935
+32268 68934
+16436 68934
+5769 68933
+20912 68927
+17265 68920
+40056 68920
+29055 68908
+19760 68907
+7753 68904
+1935 68903
+7076 68901
+20167 68891
+27432 68890
+46828 68889
+24910 68885
+20547 68875
+3567 68863
+24425 68852
+8654 68852
+23223 68835
+11197 68830
+35421 68824
+8910 68812
+33217 68811
+28493 68809
+45863 68800
+18052 68798
+46535 68794
+47925 68792
+26722 68790
+3089 68790
+28325 68788
+31316 68779
+28206 68778
+40946 68772
+18225 68769
+22328 68767
+41769 68761
+17670 68748
+15595 68747
+33005 68742
+19181 68738
+22091 68727
+33632 68725
+2567 68720
+9641 68707
+9195 68698
+23367 68695
+26963 68694
+42953 68682
+17437 68678
+48757 68677
+48847 68674
+28021 68673
+23873 68672
+42629 68663
+45347 68658
+32448 68654
+43154 68647
+26902 68647
+19792 68633
+34748 68631
+27409 68630
+33533 68623
+36508 68621
+42999 68618
+22094 68611
+42113 68606
+16879 68604
+33423 68602
+26831 68566
+29534 68559
+36498 68554
+14098 68548
+49495 68547
+22200 68542
+32331 68535
+28991 68534
+37558 68531
+44349 68530
+28652 68526
+11020 68522
+18215 68522
+39036 68522
+40770 68514
+44911 68509
+6514 68491
+31123 68490
+32980 68488
+2234 68488
+20866 68486
+38261 68481
+35798 68480
+42574 68479
+46388 68479
+7933 68469
+23498 68452
+40072 68449
+26510 68442
+43623 68440
+17388 68438
+6389 68436
+24257 68423
+38059 68421
+42736 68418
+45936 68413
+26128 68413
+39313 68412
+35323 68403
+35594 68392
+39025 68389
+42094 68389
+17847 68386
+16631 68385
+15328 68375
+32043 68370
+19849 68366
+26490 68365
+22066 68357
+18046 68351
+2303 68337
+9876 68336
+13137 68333
+23823 68332
+15652 68332
+35080 68330
+23323 68327
+43890 68326
+46732 68320
+19103 68319
+27939 68311
+27234 68306
+27391 68301
+29140 68300
+26137 68300
+36333 68299
+8183 68290
+31189 68284
+36576 68283
+16187 68280
+27568 68277
+15538 68276
+44018 68273
+47717 68253
+39905 68252
+24436 68249
+27407 68240
+21591 68238
+40015 68237
+19345 68231
+20226 68230
+21894 68221
+38372 68212
+30151 68212
+31568 68190
+16858 68186
+18664 68180
+35249 68168
+46578 68143
+33915 68142
+34093 68118
+20437 68114
+37946 68102
+40317 68102
+40543 68098
+18741 68094
+21709 68085
+36662 68083
+42705 68059
+8789 68055
+16703 68053
+40696 68051
+49293 68048
+12291 68045
+41971 68045
+47349 68038
+18815 68034
+38511 68033
+49894 68021
+26547 68021
+34536 68018
+28207 68017
+41086 68006
+47822 68005
+1970 67984
+24028 67974
+20934 67971
+36987 67966
+10113 67960
+22951 67949
+22142 67944
+21035 67942
+36353 67938
+20780 67936
+47074 67928
+25705 67927
+21916 67919
+8230 67917
+44050 67910
+21365 67885
+19371 67884
+25240 67878
+26835 67875
+22601 67874
+35432 67872
+16056 67863
+27298 67860
+48711 67849
+23865 67829
+22827 67823
+30974 67809
+27989 67792
+21344 67777
+38992 67776
+36472 67776
+14786 67766
+39026 67764
+31861 67757
+43814 67744
+21792 67733
+18510 67733
+41540 67732
+23490 67731
+40528 67730
+13087 67730
+9580 67729
+26240 67727
+1472 67727
+33601 67721
+47265 67721
+38919 67714
+20986 67710
+13384 67707
+49338 67694
+12828 67690
+30957 67687
+19454 67685
+18278 67654
+33513 67647
+14398 67646
+22594 67641
+11786 67639
+11876 67631
+2965 67624
+21427 67616
+20959 67612
+18376 67612
+26566 67609
+7201 67606
+27878 67604
+35933 67590
+13174 67587
+13839 67580
+4210 67579
+25112 67570
+46533 67567
+15967 67567
+9288 67558
+39154 67553
+39091 67544
+19917 67544
+20474 67543
+23774 67539
+38058 67537
+15299 67536
+19082 67534
+3795 67530
+26329 67512
+30455 67511
+41422 67509
+37016 67505
+32741 67502
+30062 67498
+24362 67497
+19206 67497
+36755 67486
+8457 67486
+26071 67485
+38495 67483
+21769 67476
+40983 67460
+41618 67451
+19325 67443
+14904 67441
+27088 67432
+32952 67427
+36507 67406
+29180 67403
+27947 67402
+30438 67398
+19930 67392
+20307 67391
+34187 67386
+45899 67384
+13033 67380
+26821 67373
+36933 67370
+7199 67368
+36592 67367
+34430 67365
+38080 67336
+3153 67334
+17688 67334
+32404 67333
+49369 67330
+20718 67327
+25327 67321
+43487 67314
+43229 67311
+10044 67310
+31277 67305
+29684 67302
+19903 67297
+22590 67291
+27183 67288
+15754 67287
+19222 67287
+22744 67282
+39046 67281
+38798 67277
+10613 67264
+39690 67260
+29711 67255
+21407 67245
+6207 67241
+19074 67237
+27029 67230
+28889 67226
+28267 67224
+12483 67216
+35335 67215
+49118 67214
+47097 67210
+16796 67207
+4233 67207
+16011 67191
+12458 67184
+22115 67183
+48831 67181
+19040 67180
+5570 67179
+4140 67176
+17983 67174
+35060 67167
+40688 67167
+26573 67160
+16365 67158
+24332 67149
+17756 67147
+35022 67146
+13042 67146
+2205 67137
+35127 67135
+40399 67133
+31147 67131
+24769 67131
+31931 67100
+27252 67100
+19909 67095
+19954 67094
+29630 67093
+141 67090
+19118 67089
+28332 67086
+45269 67082
+13824 67082
+36709 67080
+23594 67072
+27498 67061
+34922 67056
+44421 67043
+43765 67042
+27681 67037
+3314 67021
+31096 67016
+25250 67005
+17759 67001
+18571 66993
+9553 66992
+20179 66988
+29426 66977
+39944 66971
+19939 66965
+13624 66964
+9690 66958
+25772 66950
+5943 66948
+914 66937
+27208 66937
+38707 66931
+17010 66916
+8196 66915
+20868 66914
+42849 66913
+33021 66909
+23959 66906
+16354 66884
+26191 66878
+29595 66874
+9426 66872
+32800 66871
+25192 66866
+20100 66864
+21770 66862
+22050 66851
+24715 66851
+31422 66848
+46574 66843
+28105 66838
+14925 66831
+49858 66830
+15137 66829
+36131 66822
+11825 66818
+24530 66807
+13924 66806
+24716 66805
+26590 66799
+29451 66789
+16326 66788
+20836 66786
+30065 66785
+24796 66784
+7060 66782
+30060 66781
+30849 66779
+17585 66776
+24905 66776
+7680 66766
+6271 66765
+41037 66764
+46355 66764
+44272 66760
+29793 66742
+26500 66741
+8480 66739
+20917 66730
+23311 66727
+43861 66724
+20124 66724
+40563 66706
+41665 66705
+40524 66696
+21777 66695
+21536 66694
+23717 66689
+19609 66688
+46039 66687
+31879 66686
+24507 66676
+26888 66675
+34037 66674
+26810 66672
+40752 66671
+29290 66670
+25721 66667
+10363 66663
+40376 66660
+10419 66658
+34143 66645
+15274 66641
+21549 66633
+22427 66614
+37878 66586
+50236 66581
+42540 66581
+40645 66581
+22231 66577
+42847 66574
+45277 66567
+26432 66561
+24561 66549
+19485 66549
+34245 66545
+23834 66544
+40030 66540
+28459 66539
+7916 66538
+19507 66528
+20975 66527
+16552 66525
+36290 66524
+36254 66523
+45109 66517
+35696 66517
+20796 66516
+23771 66513
+26636 66502
+34639 66497
+4680 66491
+41913 66486
+28233 66484
+45327 66484
+37521 66479
+20048 66470
+25878 66469
+4804 66460
+5792 66455
+49398 66448
+21240 66426
+43757 66408
+27861 66400
+47515 66397
+15417 66396
+29105 66386
+37131 66383
+35459 66374
+4600 66368
+27415 66365
+22256 66364
+12012 66358
+28962 66357
+28225 66354
+15997 66351
+24386 66347
+21225 66346
+24012 66345
+26596 66344
+35449 66338
+47447 66338
+25858 66334
+41514 66330
+42542 66326
+29287 66319
+36398 66313
+32019 66307
+26206 66303
+26928 66297
+44294 66297
+21697 66296
+49767 66295
+33078 66293
+27718 66291
+30466 66290
+23134 66287
+39370 66268
+25433 66267
+25988 66261
+24506 66244
+30555 66234
+20578 66233
+37339 66227
+21718 66226
+24922 66224
+1937 66223
+17402 66221
+20615 66220
+21433 66216
+35032 66210
+23586 66204
+17824 66200
+33171 66198
+37556 66196
+30888 66190
+37499 66186
+21712 66183
+45177 66183
+31846 66165
+13381 66156
+25809 66154
+19088 66145
+19180 66144
+39288 66144
+33769 66140
+24578 66140
+26936 66139
+32826 66138
+41946 66127
+28123 66126
+49783 66126
+24049 66125
+42439 66123
+36524 66121
+37951 66112
+19645 66109
+36031 66104
+23275 66104
+17393 66104
+35466 66102
+18104 66083
+3527 66079
+20729 66079
+15155 66078
+21377 66075
+23570 66072
+33628 66058
+23938 66048
+47699 66043
+14990 66037
+34937 66035
+31453 66030
+19445 66030
+19348 66026
+45346 66016
+15079 66007
+35715 66005
+38548 66000
+38153 65991
+37517 65991
+30166 65984
+45603 65971
+9562 65967
+24777 65964
+25424 65959
+23667 65959
+25103 65957
+40935 65951
+35754 65946
+40701 65945
+27561 65943
+40274 65937
+44107 65934
+20675 65933
+38826 65927
+25318 65925
+26043 65925
+34268 65910
+48041 65908
+48117 65899
+28079 65896
+40633 65893
+31302 65891
+33280 65891
+36696 65891
+39745 65887
+35043 65887
+24291 65887
+36963 65885
+38262 65885
+40691 65883
+29050 65883
+34299 65881
+38111 65877
+23654 65875
+20834 65872
+18159 65871
+17400 65869
+27697 65862
+41611 65849
+46669 65845
+18558 65830
+35724 65825
+27851 65810
+29107 65807
+27262 65806
+13040 65802
+35845 65801
+23263 65798
+38459 65796
+30171 65791
+43774 65789
+20511 65786
+31320 65784
+41223 65776
+16996 65766
+22947 65764
+48309 65762
+40863 65761
+6785 65758
+38222 65757
+6804 65742
+33071 65740
+37276 65739
+33122 65739
+18629 65736
+23818 65735
+27475 65734
+32860 65727
+8622 65724
+23442 65717
+32358 65707
+27474 65707
+23824 65698
+36386 65698
+5577 65688
+10496 65681
+41943 65678
+11454 65675
+35966 65675
+19672 65672
+4807 65671
+13513 65669
+32969 65668
+28756 65657
+33568 65656
+35953 65653
+19244 65645
+29569 65643
+25425 65639
+31757 65634
+18273 65628
+6015 65625
+23009 65625
+22670 65621
+19972 65621
+15424 65619
+14192 65616
+35954 65615
+32445 65608
+29202 65608
+48164 65606
+16980 65604
+42475 65604
+32596 65603
+17118 65600
+44801 65600
+46406 65590
+32735 65589
+28011 65586
+27284 65582
+7583 65578
+43207 65573
+16931 65572
+11991 65561
+19233 65546
+29155 65542
+18298 65537
+33262 65533
+21154 65530
+43741 65528
+15456 65524
+39718 65508
+32569 65507
+15705 65504
+20350 65502
+24587 65498
+38164 65495
+29398 65495
+43393 65493
+24562 65489
+19114 65489
+20668 65477
+30538 65466
+34700 65463
+22049 65448
+34548 65447
+34014 65440
+27182 65432
+41702 65430
+21669 65427
+23372 65425
+35446 65425
+38178 65413
+21144 65412
+17749 65408
+34892 65405
+29232 65404
+43742 65401
+42151 65399
+25834 65394
+34315 65393
+14837 65392
+40685 65390
+16654 65389
+21607 65389
+35297 65378
+21816 65366
+19023 65364
+26852 65347
+27807 65347
+19736 65337
+21850 65336
+18574 65335
+21172 65322
+16833 65304
+31532 65302
+30973 65298
+29168 65274
+32762 65273
+9057 65269
+29450 65265
+8164 65261
+2760 65256
+35485 65254
+19865 65248
+46051 65245
+12984 65244
+43226 65242
+30172 65239
+18955 65239
+6084 65232
+31381 65230
+29614 65225
+31747 65202
+13369 65200
+38863 65198
+46653 65189
+37796 65188
+41140 65183
+12295 65181
+22547 65180
+39479 65177
+1506 65172
+25212 65167
+41902 65161
+39148 65157
+30793 65151
+29235 65150
+1361 65149
+6874 65148
+31001 65148
+2552 65138
+48484 65133
+13202 65132
+29242 65132
+37030 65130
+47154 65121
+21374 65119
+28526 65113
+44119 65112
+37180 65104
+28491 65102
+33368 65100
+44143 65096
+44381 65091
+17890 65088
+31956 65082
+18668 65067
+31781 65053
+18345 65041
+40992 65037
+9647 65032
+42125 65030
+35351 65029
+42498 65016
+9357 65005
+33522 64997
+30510 64996
+29065 64992
+30889 64992
+16966 64975
+26614 64971
+33936 64956
+40807 64948
+27819 64940
+27995 64930
+17301 64924
+31647 64923
+41310 64920
+30713 64919
+1491 64917
+11837 64914
+39360 64913
+24755 64910
+12552 64907
+25547 64904
+9822 64903
+34369 64900
+23981 64897
+34623 64895
+5493 64885
+49042 64883
+32914 64880
+26647 64878
+25241 64876
+19281 64872
+32753 64865
+26139 64864
+44386 64859
+27281 64851
+24739 64846
+44918 64838
+37566 64837
+22406 64833
+29844 64824
+42824 64805
+38063 64802
+14189 64796
+19664 64796
+5226 64782
+45463 64775
+47735 64766
+24780 64765
+22033 64742
+27048 64735
+28010 64734
+20083 64723
+27044 64718
+5458 64715
+37097 64709
+35971 64708
+3308 64701
+46811 64664
+34295 64662
+41896 64661
+27200 64652
+9062 64637
+26016 64631
+21890 64630
+26886 64628
+25980 64626
+33663 64621
+38719 64620
+3948 64618
+22780 64614
+22739 64601
+45556 64598
+44626 64582
+45125 64580
+40308 64579
+27899 64569
+24311 64568
+30375 64567
+32232 64565
+23591 64564
+49872 64553
+4323 64552
+6978 64549
+29382 64548
+19727 64540
+3231 64538
+30449 64533
+48906 64531
+21016 64531
+22248 64528
+16184 64525
+27276 64518
+21643 64514
+48769 64514
+39203 64511
+28530 64511
+23176 64508
+31332 64504
+31290 64502
+28654 64500
+7919 64500
+25397 64494
+28527 64492
+47595 64484
+33052 64481
+26215 64471
+18396 64466
+32607 64461
+21825 64459
+5360 64453
+16669 64450
+44179 64446
+28197 64443
+10286 64436
+19836 64433
+33806 64431
+25293 64419
+48361 64416
+43419 64408
+20108 64400
+38355 64398
+39358 64397
+15202 64381
+30256 64355
+36740 64355
+34998 64354
+36304 64343
+38320 64341
+18971 64336
+33081 64334
+34518 64329
+32083 64325
+30000 64322
+22701 64318
+19818 64306
+37313 64295
+40978 64294
+26730 64290
+24846 64271
+36121 64270
+27558 64261
+36009 64260
+18977 64259
+41926 64244
+26609 64238
+46779 64238
+28387 64238
+21826 64235
+40907 64224
+29671 64224
+24989 64214
+22656 64210
+25592 64206
+20743 64200
+25784 64193
+42392 64193
+24939 64189
+13398 64180
+45399 64171
+14635 64156
+26001 64155
+32029 64152
+48677 64145
+29927 64140
+42288 64135
+48302 64135
+4515 64131
+2101 64129
+38230 64125
+25655 64125
+14076 64121
+28779 64117
+5647 64115
+15217 64114
+36092 64113
+41967 64112
+16161 64107
+16487 64103
+22494 64102
+25063 64100
+33041 64096
+27759 64090
+24488 64083
+28973 64075
+46889 64075
+17515 64063
+31725 64060
+23769 64050
+35675 64047
+20161 64046
+26688 64040
+26112 64036
+28153 64031
+26470 64024
+44210 64015
+34571 64013
+42754 64007
+3921 64003
+21230 63999
+24356 63996
+46949 63996
+49860 63995
+43442 63995
+35128 63992
+2384 63991
+32033 63990
+9182 63984
+16274 63982
+15225 63979
+4070 63978
+31639 63975
+22465 63974
+41651 63973
+9448 63964
+35198 63962
+32504 63961
+45187 63958
+13103 63957
+12960 63950
+38360 63945
+13880 63937
+9804 63930
+11313 63921
+19772 63918
+47592 63915
+29333 63914
+20546 63914
+41073 63912
+37924 63907
+34821 63904
+24916 63903
+35444 63900
+44352 63891
+25076 63890
+4648 63889
+43985 63881
+6719 63881
+7803 63878
+35377 63878
+38523 63877
+38285 63866
+33899 63865
+6929 63864
+40352 63860
+46539 63858
+22303 63842
+36437 63837
+30061 63837
+35756 63836
+18041 63835
+41533 63833
+30987 63821
+22967 63820
+18722 63818
+34744 63812
+22745 63805
+25997 63798
+49577 63798
+17510 63796
+25519 63785
+41892 63784
+41965 63770
+19538 63757
+33702 63754
+25368 63752
+16096 63747
+45732 63743
+11362 63741
+38174 63739
+25776 63730
+36302 63728
+31101 63726
+24643 63725
+25699 63722
+46994 63713
+4636 63712
+47693 63712
+14838 63705
+10300 63703
+20646 63699
+28784 63696
+20634 63694
+22358 63685
+20144 63683
+28477 63682
+11184 63680
+24000 63673
+21261 63668
+32885 63667
+9819 63666
+16773 63664
+29020 63660
+37134 63652
+31477 63638
+46729 63633
+45555 63632
+11866 63631
+15903 63631
+14646 63631
+19130 63628
+18542 63623
+33753 63623
+25963 63620
+17385 63616
+29207 63608
+37461 63607
+19265 63594
+21508 63593
+22160 63584
+26536 63580
+42628 63580
+14254 63575
+43376 63574
+36762 63562
+34796 63561
+38315 63559
+31104 63552
+33861 63550
+18951 63549
+34351 63535
+25268 63532
+45064 63530
+29353 63529
+5659 63528
+35201 63526
+20853 63522
+19811 63520
+37043 63510
+12535 63506
+25976 63503
+30705 63500
+43868 63497
+11176 63484
+31352 63473
+39387 63471
+25493 63470
+45067 63470
+18768 63469
+30186 63465
+32249 63463
+31480 63457
+39011 63446
+22335 63443
+7861 63443
+24793 63441
+14602 63437
+17641 63429
+31498 63427
+25134 63426
+17297 63425
+29494 63420
+16486 63417
+21060 63409
+30663 63408
+35296 63401
+32967 63395
+33228 63392
+21729 63392
+17140 63374
+5317 63374
+30292 63371
+38234 63366
+37286 63360
+10141 63355
+19734 63350
+11804 63348
+19474 63340
+22247 63340
+12944 63322
+24018 63317
+38558 63316
+44936 63315
+33675 63309
+26224 63307
+26732 63300
+18058 63298
+27965 63296
+26684 63290
+27887 63277
+27221 63276
+1193 63271
+33453 63265
+21585 63258
+33578 63258
+40959 63243
+18720 63239
+24995 63234
+27004 63227
+27127 63224
+30647 63222
+20334 63219
+12997 63206
+24231 63200
+35554 63196
+25557 63194
+46989 63192
+48316 63191
+40074 63184
+38606 63184
+10994 63179
+47894 63176
+19442 63173
+15714 63170
+11284 63168
+46411 63167
+38129 63165
+24478 63158
+21255 63158
+49482 63158
+1407 63144
+21388 63141
+24069 63136
+34990 63135
+38646 63131
+25678 63125
+34596 63123
+45189 63122
+34040 63116
+34231 63112
+18905 63097
+11857 63093
+38358 63092
+36806 63091
+28776 63083
+37931 63081
+31892 63075
+29439 63072
+33290 63072
+47005 63070
+26424 63069
+25560 63058
+40430 63056
+48158 63054
+17464 63051
+22113 63047
+35065 63031
+50075 63023
+21667 63021
+20034 63019
+29943 63018
+28092 63017
+2108 63016
+41640 63013
+1132 63012
+20624 63009
+36876 63005
+27305 63002
+20381 63002
+39523 62996
+20478 62995
+49250 62994
+14797 62994
+13060 62992
+44310 62991
+23574 62990
+45476 62967
+26830 62965
+11296 62964
+9362 62963
+32507 62960
+31379 62955
+45033 62949
+23284 62947
+35938 62944
+22131 62944
+29135 62942
+25077 62938
+21578 62927
+25773 62927
+39372 62925
+3990 62921
+43341 62916
+23816 62916
+31091 62911
+33075 62909
+32442 62896
+18248 62881
+38014 62879
+26304 62872
+22352 62867
+8081 62866
+12976 62864
+50231 62857
+24921 62842
+18636 62837
+19846 62837
+41155 62821
+26574 62810
+42923 62806
+20844 62801
+31198 62794
+19650 62783
+17240 62778
+16856 62777
+11555 62777
+18049 62755
+25381 62751
+20043 62746
+36903 62744
+15624 62742
+23853 62741
+43678 62733
+6435 62733
+42952 62723
+14967 62723
+13049 62721
+19052 62720
+31351 62705
+23145 62704
+40596 62682
+27065 62678
+32805 62667
+33745 62661
+23417 62652
+27408 62651
+25071 62648
+35037 62647
+49067 62645
+38534 62633
+30712 62631
+42838 62626
+37629 62624
+25654 62620
+4142 62620
+44760 62618
+15845 62603
+3447 62602
+34809 62595
+7004 62593
+46137 62589
+45329 62587
+42862 62584
+26421 62583
+13696 62574
+15835 62573
+34529 62558
+25544 62533
+42345 62518
+23183 62516
+29987 62514
+25789 62501
+38162 62501
+39854 62496
+26397 62485
+20822 62484
+36839 62474
+31682 62473
+31443 62465
+32135 62459
+47955 62453
+37095 62453
+43097 62450
+34061 62442
+33633 62434
+28340 62429
+24786 62427
+17103 62419
+27327 62417
+40359 62413
+28234 62412
+36997 62408
+7254 62406
+8593 62406
+25580 62406
+31086 62406
+6652 62405
+21965 62398
+2445 62396
+28246 62395
+37075 62394
+31720 62392
+28767 62388
+11458 62385
+18534 62382
+36391 62380
+24648 62377
+31163 62375
+32385 62372
+22679 62367
+42328 62360
+24475 62355
+23741 62350
+36354 62339
+24672 62336
+29817 62332
+33782 62331
+19531 62331
+26218 62328
+17470 62306
+30050 62304
+43415 62300
+16985 62293
+38487 62277
+28364 62268
+47304 62257
+23421 62252
+10458 62249
+20260 62247
+25543 62245
+18869 62239
+36735 62235
+43256 62231
+28574 62228
+25562 62227
+42521 62222
+36768 62221
+230 62217
+30040 62215
+33880 62212
+35104 62207
+21489 62201
+13001 62195
+35256 62193
+24986 62193
+49018 62190
+9338 62183
+27855 62171
+31603 62153
+12721 62149
+34685 62149
+46977 62149
+41969 62142
+37732 62140
+20316 62132
+21888 62131
+43611 62127
+28671 62123
+23180 62123
+38137 62122
+12480 62120
+32628 62118
+35024 62117
+27495 62108
+49145 62102
+30250 62091
+24734 62064
+30913 62064
+34429 62054
+46327 62051
+11531 62033
+13154 62028
+19418 62024
+34475 62022
+38087 62021
+231 62021
+15396 62008
+34199 62003
+1715 61999
+48674 61998
+46409 61995
+30339 61994
+36897 61990
+42261 61989
+42621 61987
+29573 61985
+13377 61984
+31513 61978
+29042 61962
+15605 61953
+48734 61946
+24203 61944
+38725 61937
+10901 61934
+30868 61934
+29772 61921
+18175 61918
+31689 61915
+41804 61914
+25943 61906
+26519 61903
+29837 61901
+15810 61896
+15813 61893
+23117 61892
+17776 61889
+46227 61866
+35357 61862
+35685 61857
+43431 61855
+40129 61852
+40808 61849
+44014 61847
+13096 61846
+42554 61846
+39292 61842
+49002 61837
+22121 61814
+47891 61810
+32925 61807
+26394 61798
+40587 61798
+30721 61796
+15630 61788
+26951 61786
+23336 61766
+32302 61755
+22531 61745
+25781 61738
+23755 61736
+22236 61733
+12043 61731
+36743 61729
+18188 61723
+46072 61722
+39123 61718
+30755 61718
+7829 61705
+41718 61704
+43296 61704
+20010 61702
+34677 61701
+14046 61698
+16991 61697
+37553 61697
+3560 61695
+20676 61692
+42198 61685
+31934 61682
+26879 61678
+15785 61673
+24308 61668
+42356 61666
+22859 61660
+36858 61655
+45402 61650
+31810 61638
+47688 61619
+22703 61619
+17481 61615
+35517 61613
+44410 61596
+35228 61593
+12003 61583
+40349 61581
+29026 61576
+48273 61569
+28218 61562
+38920 61559
+25731 61552
+22655 61546
+32945 61545
+28161 61541
+46611 61534
+45022 61528
+13193 61514
+21056 61506
+47554 61503
+15357 61502
+32757 61495
+31238 61489
+30391 61486
+42518 61482
+24407 61479
+32046 61475
+39244 61451
+24246 61448
+10533 61447
+28082 61441
+35733 61435
+44252 61429
+21868 61424
+31517 61419
+17758 61418
+23236 61418
+12283 61412
+29301 61405
+24925 61402
+45996 61401
+8001 61395
+47727 61390
+36605 61372
+19177 61372
+39094 61368
+26386 61359
+10531 61355
+34166 61355
+32533 61342
+29259 61333
+33917 61333
+33652 61327
+44838 61322
+11977 61317
+41930 61315
+30349 61314
+28357 61311
+34081 61307
+17886 61305
+20266 61299
+6990 61299
+28344 61296
+34264 61290
+29036 61289
+34282 61284
+32202 61283
+14756 61281
+35003 61281
+36246 61280
+19915 61265
+37307 61264
+24312 61257
+32985 61253
+18289 61249
+12332 61245
+17766 61244
+34845 61239
+45369 61233
+30082 61222
+21652 61216
+23695 61211
+33585 61199
+15405 61190
+16081 61188
+15015 61185
+41476 61183
+19590 61181
+23859 61170
+18863 61167
+18490 61161
+11187 61159
+42786 61148
+44843 61148
+27346 61147
+35406 61139
+33867 61134
+44840 61129
+36773 61125
+36432 61123
+50070 61122
+32028 61117
+45562 61116
+12938 61113
+14831 61105
+20942 61104
+13335 61100
+48405 61090
+34226 61088
+8481 61085
+15478 61083
+49748 61083
+21207 61079
+25144 61077
+46286 61073
+31469 61072
+43130 61062
+39822 61061
+25907 61048
+36045 61046
+47561 61037
+22769 61036
+34838 61036
+5424 61034
+38633 61033
+11007 61028
+31083 61025
+20759 61025
+30224 61022
+44914 61021
+22483 61020
+16869 61019
+31788 61019
+19798 61014
+50198 61001
+7355 60988
+21580 60971
+23855 60969
+27135 60966
+48654 60963
+38931 60960
+29194 60955
+40390 60945
+30877 60939
+36405 60929
+42073 60929
+40542 60924
+39772 60914
+30084 60911
+18392 60902
+38822 60895
+46277 60890
+4298 60888
+14049 60881
+32972 60873
+27962 60865
+29716 60851
+11249 60842
+15537 60840
+45005 60835
+26124 60831
+42291 60828
+29461 60826
+26717 60826
+28607 60825
+39161 60824
+36964 60824
+34207 60824
+26457 60816
+5592 60812
+15194 60805
+9158 60805
+21581 60802
+26370 60800
+48937 60796
+18032 60795
+31604 60791
+26340 60789
+1405 60786
+37525 60786
+22320 60783
+38502 60765
+39952 60762
+25738 60756
+32064 60753
+33677 60751
+30873 60749
+22828 60744
+46744 60739
+24217 60738
+29370 60732
+41645 60726
+39656 60717
+48535 60715
+26194 60715
+44288 60714
+23098 60713
+45155 60711
+42226 60701
+46898 60673
+23625 60672
+29862 60671
+48857 60666
+25276 60654
+33681 60648
+28836 60647
+15266 60643
+41852 60638
+13966 60622
+25219 60620
+31600 60617
+31549 60616
+48639 60615
+23089 60611
+47319 60606
+25121 60605
+38554 60603
+29199 60601
+30544 60598
+25221 60595
+17436 60594
+19659 60593
+20022 60593
+28641 60590
+17115 60590
+43599 60589
+15584 60589
+27692 60585
+42183 60566
+15947 60565
+42196 60562
+32392 60560
+25704 60558
+22448 60555
+21932 60549
+24458 60547
+39330 60543
+29537 60537
+32528 60527
+29635 60520
+44997 60518
+45073 60514
+28837 60511
+47802 60499
+46766 60497
+45475 60497
+17020 60484
+14977 60483
+42619 60480
+25906 60478
+30771 60467
+2658 60460
+19814 60460
+47552 60455
+41886 60448
+36106 60444
+47956 60433
+15634 60428
+11577 60425
+20343 60420
+18680 60417
+49984 60413
+28142 60403
+4681 60401
+44790 60400
+40636 60397
+23895 60390
+32745 60388
+46151 60384
+9372 60384
+32874 60383
+32964 60374
+26844 60369
+46193 60365
+18823 60359
+26653 60359
+30942 60355
+37816 60352
+22705 60342
+45194 60327
+44111 60323
+23440 60322
+44358 60319
+27211 60315
+33024 60303
+40917 60302
+35771 60302
+39092 60300
+40769 60294
+19554 60291
+29738 60290
+47403 60284
+30909 60271
+46191 60271
+25546 60261
+32484 60261
+27452 60257
+38965 60250
+13704 60250
+31022 60249
+30063 60249
+46134 60246
+35855 60243
+39829 60238
+4668 60237
+18218 60237
+29542 60233
+37684 60224
+48403 60210
+37470 60207
+12645 60205
+21926 60196
+13764 60195
+16558 60195
+4348 60194
+31939 60194
+27492 60183
+34608 60180
+37159 60179
+48423 60162
+47485 60161
+29556 60161
+39097 60161
+34664 60152
+49200 60151
+37914 60142
+25500 60137
+18549 60126
+28443 60112
+20713 60101
+13894 60095
+25094 60095
+22740 60094
+42441 60086
+43898 60085
+28816 60073
+44949 60068
+45585 60067
+23556 60066
+16983 60058
+28415 60056
+19974 60049
+33511 60047
+44443 60044
+39480 60041
+24250 60038
+30874 60037
+21630 60035
+21831 60035
+18777 60032
+29787 60031
+30393 60023
+38426 60015
+40588 60009
+37240 60004
+2755 59997
+28188 59996
+25996 59996
+29255 59994
+20895 59989
+34146 59983
+41953 59981
+32625 59979
+17248 59979
+5104 59972
+48283 59969
+37391 59969
+37119 59967
+15429 59965
+35020 59965
+30954 59962
+31309 59954
+27782 59938
+6138 59937
+37897 59935
+35122 59932
+26781 59931
+21208 59929
+25639 59924
+39671 59923
+23133 59922
+15120 59920
+33760 59907
+31782 59900
+5080 59899
+44391 59896
+20757 59894
+47148 59891
+35142 59889
+46684 59885
+17792 59884
+32727 59878
+20802 59870
+19679 59863
+36800 59857
+33636 59854
+15716 59853
+2090 59853
+24758 59852
+17806 59850
+32715 59848
+27173 59847
+8490 59846
+27951 59844
+41407 59843
+47919 59843
+20785 59838
+31432 59834
+28214 59832
+30855 59825
+4278 59824
+35647 59823
+32609 59822
+35984 59820
+27312 59812
+11532 59807
+35734 59805
+3752 59802
+22909 59794
+33421 59791
+43923 59788
+22253 59785
+30503 59777
+44525 59772
+34164 59772
+19548 59770
+26258 59768
+16667 59767
+25062 59765
+41161 59755
+17007 59751
+43728 59748
+48269 59743
+7890 59742
+22186 59740
+15627 59733
+20867 59729
+44437 59723
+22017 59722
+31195 59722
+25457 59721
+33775 59719
+44192 59717
+22415 59711
+22631 59708
+42614 59705
+8146 59693
+6564 59688
+22366 59684
+25181 59682
+3692 59681
+11469 59681
+37216 59677
+20403 59676
+36788 59673
+13051 59672
+9777 59668
+39777 59660
+38979 59657
+35101 59653
+31829 59642
+30490 59641
+34311 59634
+33588 59631
+39042 59629
+15086 59629
+42351 59619
+22127 59613
+20446 59613
+32193 59611
+22487 59610
+24696 59607
+23691 59598
+2355 59598
+22470 59594
+30632 59593
+16066 59586
+22994 59578
+18611 59575
+16206 59575
+30720 59574
+19524 59571
+42997 59570
+13759 59568
+44871 59566
+23759 59565
+39909 59558
+21967 59549
+30814 59546
+28416 59542
+15409 59539
+25453 59537
+21444 59536
+41282 59530
+23662 59528
+30447 59521
+42386 59519
+36506 59517
+43084 59516
+31889 59513
+6310 59502
+30860 59487
+43771 59483
+45023 59478
+7019 59472
+44354 59465
+33983 59464
+41790 59457
+6263 59445
+25530 59441
+31321 59441
+32468 59441
+24233 59440
+48130 59438
+19685 59431
+5919 59426
+25904 59423
+29912 59421
+9214 59415
+26412 59413
+31392 59409
+15308 59408
+30030 59404
+24938 59402
+25177 59394
+30113 59390
+39421 59380
+29895 59379
+637 59379
+29731 59377
+25024 59376
+23760 59371
+4924 59370
+47312 59370
+19107 59368
+7978 59362
+26219 59359
+38351 59358
+10015 59354
+49737 59352
+2122 59351
+32814 59351
+9242 59350
+21958 59347
+11086 59347
+35831 59334
+8307 59333
+1425 59327
+36219 59316
+15046 59308
+23752 59307
+18640 59302
+16696 59300
+29491 59298
+21228 59290
+17533 59289
+15625 59288
+42692 59287
+48421 59280
+31831 59278
+23387 59264
+4284 59263
+7896 59262
+24707 59254
+16962 59245
+17963 59238
+34102 59236
+22370 59236
+34365 59235
+44677 59233
+34967 59231
+20476 59229
+21047 59228
+24221 59226
+37604 59224
+10110 59224
+35311 59223
+16331 59221
+24080 59216
+12140 59213
+18287 59204
+28087 59203
+36745 59201
+19933 59201
+46527 59199
+1579 59198
+24873 59187
+11327 59179
+8453 59171
+17280 59162
+38462 59156
+27430 59155
+26029 59149
+32480 59149
+26289 59148
+39720 59144
+25525 59137
+39337 59135
+43675 59128
+37484 59115
+44691 59114
+19943 59106
+32295 59101
+39733 59099
+24544 59095
+43855 59091
+50024 59086
+4288 59084
+4699 59083
+33198 59083
+42541 59070
+46488 59067
+30473 59066
+31922 59065
+28347 59052
+39566 59035
+14255 59033
+42167 59029
+26221 59028
+24314 59027
+34123 59026
+20057 59025
+36866 59020
+14915 59014
+33163 59014
+30519 59013
+13806 59001
+31365 59000
+27091 58999
+29417 58999
+43775 58997
+42290 58994
+27869 58990
+26648 58982
+22025 58979
+37438 58979
+42845 58975
+18756 58971
+46997 58969
+10611 58969
+12494 58967
+40425 58967
+50251 58964
+47686 58963
+20323 58958
+33976 58945
+49486 58945
+37899 58943
+24848 58943
+44660 58932
+15451 58930
+42207 58925
+46089 58925
+30829 58919
+30130 58917
+26855 58915
+33779 58912
+28476 58907
+33009 58904
+34161 58898
+23405 58896
+21185 58889
+41524 58885
+35774 58884
+50089 58880
+43945 58873
+23482 58867
+26241 58853
+24090 58852
+4055 58849
+25751 58830
+21617 58829
+36761 58820
+24889 58820
+42599 58819
+38028 58818
+9809 58816
+47220 58809
+35577 58807
+41556 58807
+27410 58805
+27870 58805
+36608 58803
+24501 58798
+28902 58792
+45083 58791
+38879 58785
+18792 58785
+22579 58780
+22618 58773
+19577 58770
+38223 58764
+41870 58746
+17731 58745
+34818 58744
+25616 58740
+42436 58738
+29237 58738
+35110 58734
+20918 58733
+39459 58725
+28738 58715
+37463 58713
+39812 58709
+46767 58707
+14306 58705
+28783 58701
+20733 58699
+9962 58697
+44236 58696
+23278 58694
+29898 58694
+35209 58689
+34854 58685
+36438 58681
+25804 58680
+46007 58680
+26698 58677
+26875 58672
+22503 58672
+3140 58670
+40894 58669
+16793 58662
+44970 58661
+28844 58652
+34988 58638
+40442 58638
+33269 58635
+15147 58634
+29327 58633
+21695 58622
+20368 58617
+30485 58613
+40934 58611
+40389 58609
+21935 58608
+32141 58606
+31155 58593
+46463 58590
+35374 58589
+41701 58588
+30588 58588
+7556 58582
+35651 58581
+48014 58574
+10951 58573
+22775 58569
+20839 58564
+42994 58559
+41643 58555
+21887 58552
+22217 58545
+39291 58543
+26031 58542
+2455 58537
+25966 58536
+33596 58532
+46123 58530
+45633 58527
+35719 58524
+36830 58521
+41480 58517
+20058 58513
+23303 58513
+31095 58511
+40950 58509
+28317 58508
+18050 58492
+44108 58487
+36642 58468
+19655 58465
+41457 58463
+27562 58462
+33085 58453
+39871 58444
+19296 58443
+30937 58443
+7162 58440
+26877 58436
+23182 58431
+44848 58423
+9228 58421
+21529 58418
+44493 58418
+24040 58414
+36457 58410
+40929 58405
+3639 58404
+26880 58403
+3490 58403
+12282 58393
+30726 58389
+36514 58387
+42293 58387
+42928 58378
+26658 58374
+32215 58370
+10199 58369
+31698 58368
+37824 58366
+25432 58364
+35717 58359
+24490 58347
+20644 58343
+29493 58341
+47328 58335
+26061 58334
+49457 58333
+3796 58323
+14164 58318
+30002 58318
+36586 58317
+26986 58314
+28606 58312
+22040 58310
+43989 58306
+14019 58297
+24421 58293
+7483 58288
+18850 58283
+17627 58279
+35014 58255
+32881 58255
+12137 58253
+39708 58249
+20632 58244
+27010 58234
+49318 58218
+9896 58209
+19924 58190
+33315 58190
+40665 58189
+49537 58186
+28305 58184
+6020 58183
+23694 58181
+50135 58179
+41213 58174
+30275 58170
+44559 58165
+36944 58162
+40264 58158
+30853 58143
+41034 58141
+30051 58128
+25108 58126
+21326 58123
+40424 58118
+47665 58118
+26100 58118
+26186 58117
+33824 58116
+21002 58116
+19567 58105
+19420 58099
+31818 58099
+47928 58097
+31403 58095
+34851 58094
+17259 58085
+23649 58080
+4612 58079
+49622 58074
+27306 58073
+23961 58067
+17213 58060
+23112 58048
+41326 58037
+26668 58033
+32872 58029
+38302 58027
+32333 58025
+27931 58021
+37915 58020
+47689 58020
+45239 58013
+21290 58011
+28429 58011
+16841 58007
+26582 58002
+46451 58002
+32483 58001
+24178 57999
+30607 57998
+38493 57996
+40915 57983
+38099 57982
+48453 57976
+43474 57970
+37051 57967
+24260 57964
+25798 57964
+32095 57962
+34056 57956
+29861 57953
+21554 57950
+23179 57940
+8448 57938
+23810 57932
+36162 57927
+10483 57927
+40977 57926
+44150 57920
+28502 57918
+45587 57910
+21422 57909
+30592 57908
+32613 57895
+13623 57890
+37599 57890
+42127 57890
+26969 57887
+18368 57885
+21265 57877
+27268 57871
+29345 57862
+20709 57862
+45775 57858
+46278 57852
+16969 57849
+16799 57847
+37718 57845
+1859 57835
+39257 57835
+32677 57812
+23258 57812
+36896 57812
+15710 57809
+34177 57809
+14721 57805
+41429 57804
+19591 57799
+30239 57797
+37976 57793
+33871 57788
+35654 57781
+33555 57780
+24725 57779
+20477 57769
+29652 57766
+2966 57764
+37562 57760
+15898 57751
+24228 57750
+13010 57750
+11159 57746
+9851 57740
+48591 57739
+40228 57735
+16475 57733
+34552 57731
+46691 57726
+35589 57719
+37538 57717
+3333 57717
+46905 57712
+24521 57712
+32453 57712
+29361 57708
+36292 57703
+23706 57697
+48500 57690
+24560 57689
+24082 57684
+37356 57680
+34010 57679
+5507 57677
+31449 57675
+36583 57673
+21239 57664
+25260 57662
+5656 57660
+43310 57659
+45031 57659
+39828 57655
+39619 57654
+26192 57654
+7540 57648
+27163 57642
+39722 57639
+8675 57634
+30176 57634
+28451 57630
+42417 57625
+20730 57612
+38175 57608
+31522 57604
+29405 57604
+31421 57592
+38242 57585
+49165 57573
+46507 57569
+30415 57568
+33611 57531
+9493 57527
+30300 57527
+41648 57526
+21606 57519
+30775 57514
+29254 57513
+33285 57511
+29955 57497
+47458 57491
+17701 57484
+28882 57476
+25913 57473
+28885 57472
+26798 57469
+10641 57466
+46368 57462
+49325 57455
+34356 57454
+39616 57454
+28539 57452
+14322 57451
+21169 57445
+25631 57441
+34029 57439
+20450 57438
+41045 57437
+30524 57435
+7956 57430
+43701 57424
+36644 57420
+31265 57420
+40718 57412
+5663 57408
+34034 57406
+17855 57404
+32521 57402
+29151 57392
+45108 57388
+36541 57386
+28506 57372
+37751 57368
+19200 57366
+6540 57363
+13102 57358
+23244 57352
+9956 57349
+20752 57346
+2796 57338
+21960 57327
+21593 57325
+27684 57319
+34954 57316
+28602 57313
+30255 57309
+9503 57289
+92 57268
+35622 57267
+34763 57264
+23603 57258
+39654 57255
+20151 57254
+33265 57249
+47879 57242
+10841 57235
+45496 57232
+24070 57230
+33658 57225
+34262 57220
+28521 57218
+35113 57216
+33835 57211
+32906 57205
+7213 57201
+11417 57197
+14270 57194
+31977 57187
+17950 57178
+38045 57170
+42368 57164
+4799 57161
+26042 57158
+40837 57146
+29265 57144
+14455 57141
+48714 57140
+21976 57126
+23601 57117
+25541 57116
+24494 57115
+15327 57113
+43999 57107
+7640 57103
+35437 57099
+36327 57091
+37352 57086
+10055 57082
+14269 57078
+44978 57077
+44333 57074
+44042 57065
+34735 57059
+31734 57056
+32679 57052
+32132 57050
+38814 57042
+5233 57039
+2131 57037
+23349 57036
+23713 57034
+38935 57024
+7549 57023
+39255 57022
+36205 57015
+23181 57007
+12392 57000
+4611 56997
+47697 56991
+10445 56985
+46884 56981
+30132 56975
+15412 56974
+16899 56965
+7757 56961
+30042 56960
+43349 56955
+35006 56948
+45966 56948
+25004 56945
+14274 56929
+29267 56926
+31027 56918
+23164 56918
+24182 56915
+25292 56902
+24928 56902
+29360 56900
+44343 56899
+42476 56895
+32701 56892
+36995 56889
+34428 56886
+25509 56886
+41430 56885
+2571 56885
+45774 56882
+18911 56874
+14603 56874
+43748 56867
+45247 56860
+29229 56854
+29466 56847
+30761 56839
+44128 56834
+35786 56825
+43379 56824
+24338 56822
+23666 56814
+28980 56814
+24760 56811
+45658 56806
+42627 56806
+20779 56803
+30777 56791
+37610 56790
+38244 56789
+21688 56787
+8467 56787
+47609 56787
+47518 56786
+17539 56785
+23292 56784
+25677 56778
+6331 56778
+12410 56776
+4182 56760
+22580 56753
+23153 56752
+46429 56749
+37439 56748
+2715 56743
+23848 56742
+25565 56740
+27605 56739
+39490 56731
+22156 56730
+18459 56725
+26497 56719
+41898 56715
+25325 56711
+41252 56710
+18967 56709
+44483 56703
+35658 56701
+44019 56697
+35362 56684
+38435 56682
+43479 56681
+47186 56681
+45644 56679
+27762 56677
+37884 56675
+30935 56659
+25309 56658
+6959 56658
+29456 56655
+28639 56646
+31731 56635
+49627 56635
+32774 56628
+10855 56627
+21450 56620
+25266 56612
+30508 56607
+37990 56606
+30179 56602
+14920 56602
+29653 56601
+26718 56596
+48988 56590
+26744 56589
+41778 56586
+37373 56586
+11944 56581
+21584 56570
+37761 56569
+13978 56568
+22395 56567
+21795 56564
+14565 56563
+9610 56556
+45795 56554
+16465 56554
+40421 56554
+38423 56551
+28625 56551
+22595 56550
+45209 56538
+28903 56537
+20938 56528
+6003 56526
+49197 56525
+20467 56516
+26980 56515
+44718 56514
+37318 56513
+26274 56511
+25398 56509
+18111 56505
+26771 56503
+44500 56502
+36791 56501
+15078 56500
+39845 56496
+8443 56486
+45400 56483
+16571 56481
+31395 56477
+36096 56475
+15157 56474
+36656 56459
+20694 56450
+25421 56435
+15170 56430
+24913 56430
+27479 56430
+27592 56429
+9444 56424
+39406 56421
+23097 56418
+11970 56414
+38608 56412
+33340 56409
+43425 56406
+15729 56403
+19197 56400
+15721 56391
+1783 56378
+27719 56373
+43995 56372
+37954 56368
+28109 56358
+35803 56358
+33482 56352
+35337 56350
+32882 56344
+4815 56338
+39034 56337
+47608 56334
+33181 56327
+20806 56323
+21295 56321
+28593 56319
+44067 56314
+16909 56311
+49037 56310
+22986 56305
+29452 56304
+24662 56297
+9682 56294
+47015 56286
+24831 56285
+7349 56285
+36074 56282
+33973 56279
+32736 56278
+22638 56278
+11645 56278
+26379 56277
+12663 56274
+18846 56271
+38398 56269
+42158 56265
+42395 56261
+41120 56258
+49842 56258
+48781 56253
+32254 56248
+47012 56247
+22196 56247
+5944 56245
+21755 56232
+30033 56219
+47164 56219
+20798 56212
+28633 56207
+19363 56204
+39451 56202
+40559 56195
+20807 56192
+20831 56189
+37198 56188
+21783 56184
+33736 56183
+32472 56179
+38583 56177
+28916 56177
+38820 56176
+40088 56171
+30668 56136
+20281 56136
+47586 56130
+18271 56125
+9685 56124
+25769 56120
+11190 56116
+33946 56112
+39630 56109
+32578 56106
+27079 56102
+12632 56092
+11402 56091
+29957 56084
+16199 56073
+29406 56069
+36448 56063
+45942 56060
+26271 56059
+19932 56054
+40870 56054
+27217 56052
+7015 56046
+33600 56044
+33255 56038
+11865 56033
+25280 56033
+26444 56031
+25696 56031
+2065 56019
+45897 56008
+17459 56005
+6008 56004
+11671 56003
+44366 56001
+24041 55998
+1488 55996
+32530 55992
+35923 55988
+11218 55985
+31599 55983
+30080 55976
+21693 55973
+43811 55970
+40745 55968
+33247 55956
+33907 55953
+30069 55944
+11194 55942
+14036 55942
+25466 55940
+36610 55939
+37121 55938
+3391 55932
+21074 55932
+28286 55931
+39965 55927
+26384 55926
+12752 55921
+16814 55920
+22613 55915
+15096 55898
+16670 55896
+25190 55889
+29871 55885
+18603 55880
+27134 55878
+15051 55871
+17878 55869
+43859 55861
+7315 55860
+35326 55852
+11522 55852
+5378 55849
+27364 55837
+27107 55833
+35210 55831
+19311 55830
+18879 55824
+5955 55818
+23523 55816
+26009 55814
+32848 55813
+5015 55811
+26509 55809
+23159 55806
+44250 55802
+3346 55802
+29802 55802
+24369 55802
+40855 55798
+28090 55795
+39205 55793
+26974 55791
+28359 55790
+13984 55790
+37714 55786
+29663 55782
+24993 55780
+20768 55775
+19334 55762
+49112 55756
+37084 55748
+42408 55740
+10570 55739
+27845 55732
+37329 55724
+16927 55723
+23729 55720
+4311 55715
+17622 55707
+42760 55700
+37325 55691
+7293 55688
+20981 55688
+16340 55686
+34632 55682
+36070 55679
+14500 55678
+50066 55675
+25148 55670
+14652 55668
+20436 55668
+25390 55658
+29385 55652
+39670 55647
+24718 55646
+23243 55641
+23208 55638
+26012 55637
+18993 55632
+37514 55622
+46049 55621
+29527 55618
+30038 55617
+21054 55616
+19770 55610
+24984 55608
+11572 55606
+32710 55597
+36512 55593
+23604 55588
+39343 55587
+39150 55583
+41069 55580
+28829 55578
+28034 55573
+28046 55565
+41950 55564
+41328 55563
+24420 55562
+32151 55558
+43653 55551
+25068 55547
+29093 55545
+32560 55544
+38808 55541
+45851 55541
+45982 55540
+43308 55535
+21579 55532
+25036 55520
+34110 55510
+47261 55508
+41764 55506
+10482 55505
+33627 55504
+20937 55497
+6720 55495
+48968 55483
+22228 55479
+20073 55476
+24651 55475
+41046 55474
+43858 55473
+2023 55471
+42221 55463
+19630 55459
+40776 55457
+23678 55455
+25411 55446
+28713 55443
+43042 55438
+22853 55436
+38400 55433
+33025 55431
+49108 55431
+18676 55426
+24588 55426
+35106 55418
+44923 55414
+24720 55411
+30276 55404
+43688 55403
+27675 55399
+12668 55398
+40801 55396
+37708 55386
+33839 55382
+19008 55378
+5547 55375
+25749 55373
+24706 55368
+10333 55361
+18239 55361
+26917 55359
+26452 55350
+33710 55348
+26494 55342
+38927 55341
+26521 55341
+36132 55339
+30423 55339
+30912 55336
+42573 55334
+36989 55319
+1159 55317
+26785 55317
+25179 55316
+6254 55315
+9886 55315
+37322 55315
+41360 55313
+32084 55306
+4215 55303
+21093 55295
+41817 55292
+42431 55288
+35924 55287
+36440 55283
+37254 55279
+36778 55270
+41683 55269
+28203 55268
+21836 55268
+28580 55267
+19569 55260
+33848 55259
+28563 55259
+36358 55240
+39350 55237
+38102 55229
+29981 55228
+34840 55222
+37728 55218
+49375 55217
+12582 55217
+28385 55203
+44385 55201
+9606 55200
+27903 55194
+42334 55191
+34395 55188
+13349 55188
+29089 55184
+24773 55181
+34629 55179
+30020 55178
+38651 55178
+3380 55177
+24051 55176
+35935 55175
+38191 55172
+27224 55154
+17866 55153
+46107 55151
+37206 55147
+37314 55142
+28295 55130
+27414 55129
+39206 55129
+46362 55127
+12613 55119
+24013 55119
+37465 55110
+46849 55105
+46132 55100
+43911 55096
+30204 55092
+44470 55088
+33388 55088
+14854 55084
+43881 55084
+15227 55080
+28450 55073
+44922 55068
+24815 55067
+21478 55067
+49696 55063
+19402 55060
+28273 55059
+24119 55057
+31375 55056
+26646 55054
+30013 55048
+29009 55044
+50106 55032
+22153 55030
+35130 55022
+47545 55019
+49006 55019
+39019 55013
+31878 54992
+37745 54990
+9731 54987
+2786 54984
+1237 54980
+15084 54978
+18908 54970
+37808 54965
+38283 54963
+32327 54961
+36066 54961
+37752 54960
+35860 54960
+40931 54957
+33517 54955
+37768 54955
+21748 54952
+22584 54951
+27362 54948
+49570 54941
+23507 54936
+21192 54934
+34012 54933
+3604 54928
+49407 54927
+39834 54919
+32111 54909
+46921 54896
+35546 54890
+38730 54887
+33291 54885
+14578 54872
+30623 54867
+34905 54864
+13402 54851
+25365 54845
+45747 54842
+41352 54842
+24053 54840
+43997 54836
+33466 54832
+36274 54831
+28423 54820
+27704 54818
+24071 54814
+28616 54814
+5554 54807
+1945 54798
+29914 54797
+41493 54795
+19851 54794
+32502 54786
+31087 54777
+24673 54771
+49247 54770
+27043 54766
+24039 54762
+19980 54751
+30902 54750
+40513 54747
+49195 54742
+44282 54741
+6962 54740
+33446 54738
+42397 54735
+49185 54735
+31501 54730
+35368 54728
+34573 54726
+41454 54719
+16417 54718
+16422 54702
+42239 54701
+38337 54695
+18681 54683
+1442 54682
+45813 54682
+6329 54675
+11965 54674
+41875 54669
+42803 54656
+47692 54656
+14906 54654
+18349 54651
+29579 54648
+7046 54648
+30696 54642
+30229 54641
+37113 54638
+28568 54637
+47470 54636
+29689 54632
+23917 54627
+38031 54626
+44117 54623
+17848 54622
+49873 54615
+15655 54615
+43052 54614
+158 54613
+7106 54607
+6395 54603
+22995 54602
+30522 54599
+30827 54589
+35233 54585
+35573 54579
+32281 54578
+31174 54577
+3787 54571
+34782 54569
+33201 54556
+44864 54556
+31413 54556
+6474 54555
+50219 54539
+22699 54535
+2210 54524
+4163 54521
+30357 54521
+27142 54520
+24646 54519
+28572 54516
+29673 54514
+23135 54512
+32387 54512
+16279 54498
+43251 54489
+6775 54488
+24571 54483
+33799 54482
+22963 54472
+16661 54472
+46116 54471
+38796 54465
+15851 54456
+37305 54454
+27101 54452
+33630 54448
+42857 54446
+43593 54445
+18148 54437
+29128 54422
+27721 54421
+17046 54415
+26210 54413
+20483 54411
+48994 54409
+31884 54408
+16945 54405
+23452 54403
+36232 54402
+19839 54401
+30413 54400
+11227 54394
+37551 54379
+13875 54376
+26637 54372
+13691 54362
+17458 54359
+21626 54349
+26808 54348
+19880 54341
+25806 54340
+10687 54336
+27695 54334
+34958 54334
+42971 54330
+42718 54327
+29850 54320
+20279 54312
+28682 54304
+38964 54284
+42358 54281
+31998 54278
+31531 54265
+29983 54262
+19179 54261
+40153 54257
+33124 54257
+3145 54254
+10845 54253
+25229 54249
+2418 54241
+25288 54238
+13649 54236
+29243 54236
+1684 54235
+27864 54231
+30386 54227
+41758 54226
+17008 54222
+46194 54218
+26819 54216
+17693 54216
+9633 54216
+40457 54213
+12830 54211
+49862 54210
+29704 54209
+13268 54209
+22405 54207
+40514 54205
+6780 54204
+25695 54200
+8600 54185
+42626 54183
+17380 54181
+25360 54178
+15581 54176
+35082 54174
+39585 54163
+30953 54160
+16371 54148
+28849 54147
+16653 54143
+10867 54143
+16491 54137
+38716 54134
+27888 54133
+49637 54129
+19780 54129
+13409 54124
+6718 54118
+8570 54118
+23048 54115
+25501 54100
+37408 54097
+31075 54093
+13436 54090
+39095 54089
+30064 54085
+33584 54078
+20931 54076
+16272 54070
+36188 54070
+42658 54069
+31003 54068
+39543 54067
+47404 54064
+16360 54062
+23375 54058
+25478 54054
+38886 54052
+6800 54049
+28729 54035
+7537 54035
+43968 54033
+45488 54033
+41262 54032
+22819 54032
+25006 54032
+49179 54032
+45159 54028
+42813 54028
+14951 54027
+15385 54026
+20609 54023
+24888 54021
+46346 54011
+25473 54010
+32595 54009
+35605 54006
+22792 54005
+46924 53997
+15003 53994
+49980 53992
+19004 53991
+9037 53986
+40727 53986
+18861 53980
+41314 53971
+22965 53970
+44953 53968
+32807 53966
+30740 53952
+47137 53950
+26903 53934
+27996 53933
+42157 53933
+40595 53933
+30690 53932
+22220 53931
+12496 53928
+5560 53928
+41440 53923
+36847 53922
+24046 53919
+19216 53915
+37264 53915
+38510 53911
+24810 53909
+24124 53906
+16283 53900
+37476 53899
+26143 53896
+45524 53883
+29889 53882
+35606 53878
+14599 53877
+22373 53874
+31465 53866
+31670 53855
+27395 53855
+44348 53848
+23011 53847
+14618 53847
+46081 53846
+34208 53839
+2916 53837
+24785 53828
+47358 53827
+45861 53821
+28285 53818
+40570 53809
+12907 53808
+37079 53803
+42569 53792
+18882 53788
+29729 53785
+18444 53782
+34047 53777
+28746 53774
+35159 53772
+23989 53771
+16702 53767
+34470 53762
+9767 53761
+21875 53758
+36909 53757
+42077 53751
+19641 53747
+40497 53744
+30767 53737
+15106 53732
+30220 53732
+37009 53730
+29154 53726
+13039 53723
+693 53718
+23069 53718
+29959 53714
+42211 53714
+26359 53711
+3655 53710
+30048 53694
+28790 53688
+28701 53687
+42427 53682
+33773 53681
+35196 53676
+42916 53673
+8544 53661
+37063 53660
+26893 53655
+44633 53654
+19156 53654
+35019 53645
+40355 53644
+23408 53643
+21378 53639
+29109 53638
+21252 53638
+16984 53632
+27078 53629
+39712 53628
+24896 53624
+17017 53619
+26788 53616
+38870 53615
+12072 53614
+35730 53613
+34216 53613
+43459 53609
+20247 53600
+19145 53600
+5738 53598
+28023 53598
+11505 53582
+30545 53572
+21949 53572
+22974 53571
+11802 53566
+26600 53561
+28932 53556
+23419 53553
+43022 53552
+26300 53550
+22711 53549
+1932 53546
+32650 53542
+36892 53536
+33846 53534
+34527 53531
+9453 53529
+24343 53528
+2133 53526
+26408 53521
+19482 53516
+40598 53513
+34858 53513
+29944 53509
+23499 53509
+40332 53509
+13653 53506
+30135 53503
+29724 53501
+19648 53498
+49383 53497
+45483 53495
+4344 53489
+26837 53478
+35413 53477
+30630 53469
+25025 53464
+40924 53457
+30686 53455
+34292 53453
+29932 53445
+32693 53445
+40447 53439
+44886 53438
+25607 53437
+47461 53435
+46233 53432
+45680 53429
+45777 53428
+20225 53424
+25805 53422
+42568 53421
+34491 53420
+19708 53419
+39028 53418
+33506 53415
+23304 53414
+27804 53410
+36747 53397
+29219 53395
+28678 53393
+26910 53391
+35583 53388
+38264 53387
+45517 53387
+27926 53385
+7181 53382
+45088 53382
+48854 53378
+28805 53378
+31390 53376
+41027 53375
+22890 53369
+28898 53368
+29576 53368
+44178 53364
+21654 53362
+24764 53356
+49019 53352
+24977 53350
+49134 53349
+14120 53348
+48886 53346
+45263 53346
+29117 53341
+24502 53339
+20107 53335
+37105 53333
+33565 53329
+24691 53326
+21870 53320
+47756 53316
+40377 53314
+37896 53313
+42645 53308
+22910 53306
+38934 53284
+26770 53282
+38039 53278
+12398 53275
+37406 53273
+29481 53269
+10244 53259
+35138 53258
+25596 53250
+22464 53242
+48209 53237
+14344 53233
+46269 53231
+45676 53229
+16149 53216
+15200 53214
+6689 53213
+27433 53210
+26074 53207
+47216 53205
+29762 53200
+15952 53198
+7249 53195
+26306 53194
+26827 53185
+28554 53185
+22495 53185
+2226 53182
+35023 53182
+31645 53181
+23199 53180
+27482 53178
+34599 53178
+20191 53177
+33067 53174
+44404 53171
+34666 53171
+30792 53170
+47974 53170
+16886 53170
+32150 53168
+50151 53164
+34070 53163
+39133 53148
+22577 53145
+43588 53145
+25503 53145
+24556 53143
+26270 53130
+37890 53124
+12514 53120
+17379 53106
+13200 53101
+25610 53099
+34671 53093
+140 53091
+45078 53087
+27654 53083
+21910 53078
+42404 53064
+18232 53057
+28669 53054
+46929 53051
+19329 53049
+25336 53042
+41746 53036
+34304 53034
+43092 53028
+22478 53022
+20465 53014
+32420 53012
+22042 53003
+12067 53001
+41535 53000
+27601 52999
+36364 52989
+39127 52985
+5569 52983
+28436 52975
+13450 52973
+35287 52972
+36601 52968
+28947 52967
+29028 52961
+47714 52961
+21467 52957
+244 52954
+29624 52945
+20845 52944
+34502 52941
+35980 52941
+30392 52936
+28005 52935
+26926 52930
+23536 52926
+48320 52918
+40471 52914
+29030 52908
+44870 52900
+45183 52897
+47499 52897
+2009 52895
+26133 52884
+29883 52883
+31445 52877
+25754 52873
+34579 52856
+43809 52853
+40583 52852
+41074 52852
+36444 52850
+31405 52844
+15087 52843
+43657 52840
+21905 52838
+31363 52837
+48498 52832
+42453 52829
+21129 52825
+39730 52824
+16843 52823
+32104 52822
+16072 52816
+44588 52815
+31541 52815
+12154 52809
+20219 52802
+34383 52800
+16021 52799
+5343 52799
+14355 52789
+39083 52788
+24109 52785
+39569 52775
+20029 52769
+46133 52765
+41731 52761
+12628 52748
+40753 52744
+10489 52743
+24235 52736
+34234 52736
+28518 52736
+22659 52725
+43738 52724
+27360 52720
+33667 52719
+35918 52717
+6826 52717
+30301 52717
+22817 52715
+426 52710
+34260 52700
+35782 52699
+43524 52693
+7289 52692
+26708 52692
+32325 52690
+39465 52687
+44610 52686
+1391 52686
+22624 52685
+32162 52667
+46034 52666
+40847 52662
+42805 52661
+18896 52661
+14400 52660
+37928 52659
+18673 52648
+23146 52640
+27429 52639
+22416 52632
+40212 52625
+33719 52625
+33305 52619
+21616 52619
+27544 52595
+21940 52589
+28251 52588
+9076 52588
+40789 52577
+28007 52574
+42058 52572
+33906 52563
+25138 52560
+33050 52558
+25879 52557
+47535 52556
+39983 52546
+22629 52543
+25304 52537
+40302 52527
+31609 52523
+21937 52520
+14849 52518
+18754 52518
+39435 52515
+48378 52513
+21874 52510
+21696 52510
+32673 52504
+45215 52500
+35510 52499
+45910 52497
+27834 52496
+33731 52489
+35118 52483
+28308 52479
+33757 52476
+30500 52475
+28610 52464
+41559 52463
+24680 52451
+44663 52449
+32932 52449
+17472 52447
+31177 52443
+43934 52431
+16442 52430
+48115 52429
+20348 52427
+40851 52424
+28131 52421
+33364 52420
+21103 52419
+43348 52417
+19184 52416
+27152 52410
+22672 52409
+45059 52406
+40151 52402
+4443 52397
+49270 52393
+36000 52391
+29541 52391
+23444 52390
+13147 52384
+46837 52383
+29622 52383
+23609 52379
+35239 52370
+36317 52362
+2255 52360
+2162 52359
+11275 52357
+49362 52356
+30791 52352
+27406 52346
+31825 52344
+42152 52339
+24982 52336
+22124 52336
+28739 52334
+19545 52332
+32783 52327
+41772 52322
+24813 52321
+31638 52319
+35146 52312
+10624 52311
+20032 52310
+50050 52305
+45867 52302
+48249 52302
+32631 52298
+8506 52297
+33158 52291
+32063 52290
+20370 52286
+19948 52285
+3172 52285
+19473 52284
+49661 52284
+23656 52278
+32262 52270
+23908 52268
+9943 52263
+33093 52257
+21311 52257
+32784 52257
+32554 52256
+37358 52253
+32911 52252
+41077 52248
+31969 52246
+26749 52244
+33348 52244
+37370 52235
+42724 52234
+23127 52234
+41546 52229
+9741 52228
+24869 52222
+29574 52220
+45594 52219
+47905 52217
+41594 52214
+22234 52206
+34180 52205
+24297 52203
+40476 52201
+23871 52200
+14885 52199
+15245 52190
+7086 52188
+24727 52183
+28581 52180
+6845 52180
+21379 52176
+24354 52173
+11042 52171
+30645 52170
+11712 52166
+14656 52159
+40879 52156
+22209 52156
+47318 52152
+46212 52149
+42433 52143
+21600 52139
+11883 52139
+21657 52137
+25928 52133
+36912 52131
+38252 52126
+35025 52123
+22450 52118
+36846 52117
+29617 52111
+42710 52110
+30993 52108
+26339 52103
+48334 52095
+20135 52089
+19058 52087
+47058 52085
+21671 52085
+21972 52077
+41085 52076
+43276 52074
+42236 52065
+36455 52063
+38479 52063
+34762 52062
+31967 52061
+26760 52058
+24653 52055
+24551 52052
+31420 52051
+32598 52048
+13034 52046
+20549 52045
+10743 52042
+16455 52038
+1905 52035
+8172 52034
+21717 52032
+42888 52026
+34172 52023
+13240 52016
+17164 52015
+27124 52015
+9083 52013
+32857 52002
+39732 52001
+25090 52000
+32837 51993
+17310 51987
+27089 51983
+28939 51981
+21095 51980
+35107 51967
+28139 51965
+35865 51963
+43423 51955
+23462 51949
+46584 51949
+26179 51944
+28030 51942
+40084 51942
+5240 51939
+20764 51937
+46595 51936
+31328 51936
+35849 51934
+45415 51932
+35031 51930
+37089 51917
+43867 51914
+25441 51904
+19918 51896
+13660 51895
+27394 51894
+42640 51893
+48576 51891
+24652 51885
+47647 51872
+34537 51870
+20123 51868
+19973 51868
+25049 51866
+38458 51864
+32931 51863
+33332 51860
+30821 51858
+38309 51853
+29853 51842
+44194 51831
+25237 51830
+21440 51828
+40676 51823
+44703 51823
+25209 51822
+19617 51821
+28504 51820
+22440 51819
+11858 51811
+38207 51809
+35690 51803
+22583 51802
+31665 51800
+23395 51795
+38695 51794
+36134 51789
+47983 51788
+30170 51788
+41675 51786
+17550 51784
+40500 51782
+9994 51782
+22521 51779
+34477 51778
+21963 51777
+47972 51777
+37237 51767
+25917 51765
+33473 51765
+22850 51763
+31933 51763
+19835 51763
+37789 51761
+31874 51757
+36780 51755
+1599 51750
+37468 51740
+29366 51740
+36289 51731
+34392 51731
+47290 51722
+33259 51719
+16658 51714
+32384 51713
+17941 51713
+14390 51703
+23904 51703
+19955 51700
+38736 51694
+42609 51694
+33865 51693
+40426 51673
+7005 51663
+11590 51655
+38597 51652
+20418 51644
+14020 51643
+33401 51643
+44956 51638
+32543 51626
+48722 51623
+36426 51621
+31888 51616
+4728 51615
+48946 51612
+19971 51611
+5809 51604
+34426 51599
+19619 51599
+43976 51593
+15288 51591
+31378 51588
+33841 51587
+27073 51583
+45552 51582
+28719 51582
+33275 51581
+23169 51580
+33134 51573
+27978 51572
+18942 51572
+49068 51570
+28178 51569
+33575 51569
+20389 51560
+8140 51560
+39246 51558
+26942 51555
+1991 51554
+12405 51541
+33002 51539
+34358 51531
+46795 51531
+4555 51518
+36283 51518
+31236 51518
+11600 51509
+42729 51506
+9171 51503
+25579 51502
+814 51501
+49408 51493
+13341 51489
+41575 51489
+41698 51484
+23469 51467
+47680 51463
+15612 51463
+27979 51458
+50054 51457
+44459 51457
+37557 51452
+18738 51450
+27795 51448
+8010 51447
+30580 51441
+21174 51434
+45978 51434
+30878 51425
+32283 51425
+44537 51420
+42179 51416
+30059 51408
+29567 51407
+25643 51404
+24976 51403
+26507 51402
+45316 51399
+36775 51395
+47706 51393
+27635 51393
+45744 51392
+17831 51391
+39561 51387
+17993 51384
+13358 51383
+49474 51382
+32379 51378
+40124 51370
+26466 51368
+21386 51368
+13563 51360
+49470 51355
+26666 51353
+31935 51352
+30594 51351
+10455 51345
+31663 51333
+46413 51331
+27001 51329
+45047 51329
+24826 51328
+36224 51313
+27062 51308
+47451 51304
+44882 51300
+36168 51297
+38290 51296
+46184 51291
+36129 51291
+22818 51284
+46892 51280
+18347 51273
+20106 51271
+3983 51270
+25326 51268
+38916 51261
+16724 51260
+46685 51260
+14059 51259
+31822 51256
+25337 51255
+13776 51255
+36464 51245
+45962 51245
+47474 51241
+16961 51238
+10293 51234
+25829 51227
+32959 51227
+44413 51224
+47437 51223
+16240 51217
+21786 51209
+9844 51209
+29754 51205
+45651 51197
+19970 51179
+22134 51177
+9339 51165
+34996 51164
+37319 51155
+24431 51155
+17489 51153
+47225 51146
+20809 51141
+28942 51141
+33733 51140
+30274 51139
+30346 51138
+36856 51134
+19365 51129
+26405 51124
+28053 51104
+29227 51103
+13247 51102
+24555 51100
+25080 51088
+18486 51085
+23983 51085
+24900 51081
+39386 51079
+21526 51079
+25761 51078
+35294 51075
+28621 51071
+30137 51062
+34557 51061
+34501 51061
+41715 51057
+15370 51052
+30436 51050
+31582 51049
+41088 51049
+39774 51047
+50214 51040
+25916 51040
+44322 51029
+38656 51025
+40794 51024
+37194 51021
+27627 51020
+16939 51015
+24931 51014
+29440 51010
+32306 51008
+37803 51007
+34246 51006
+41135 51002
+35949 50997
+29296 50997
+1066 50992
+2738 50991
+36250 50981
+38026 50975
+48133 50974
+40603 50972
+22308 50965
+36433 50954
+17328 50953
+9843 50953
+32090 50951
+5251 50944
+44851 50934
+23044 50932
+28750 50925
+34130 50924
+11141 50919
+28241 50917
+20706 50916
+3590 50913
+24953 50912
+24417 50908
+38000 50905
+44044 50905
+35864 50898
+7146 50894
+27826 50894
+8021 50891
+48694 50888
+48756 50887
+35595 50885
+36312 50870
+25291 50855
+15547 50852
+31459 50850
+44998 50845
+41191 50841
+9219 50826
+16437 50813
+32792 50812
+14878 50805
+7512 50797
+25037 50796
+30158 50787
+23534 50784
+46760 50782
+12215 50775
+40304 50772
+45926 50771
+14535 50770
+48470 50762
+33586 50761
+8715 50761
+34678 50761
+34531 50760
+39636 50758
+149 50753
+14587 50750
+24570 50749
+15701 50748
+48684 50747
+38494 50732
+34933 50731
+45221 50726
+28471 50726
+29097 50723
+41329 50723
+39554 50718
+48664 50712
+32766 50697
+28210 50694
+6519 50692
+7053 50688
+17966 50686
+28895 50684
+39517 50683
+33836 50671
+14753 50663
+35280 50660
+8422 50654
+18424 50653
+7489 50648
+21766 50645
+49554 50637
+34890 50631
+14677 50628
+41598 50626
+24550 50623
+40975 50623
+14250 50622
+22334 50622
+18407 50613
+7860 50598
+26298 50597
+38336 50596
+17139 50591
+34723 50591
+19260 50591
+32244 50586
+19950 50581
+32740 50577
+48506 50573
+37843 50572
+23568 50571
+15513 50571
+27351 50564
+30778 50556
+37671 50554
+21081 50550
+21973 50548
+8927 50545
+23016 50544
+26633 50537
+2519 50537
+2452 50533
+46985 50530
+29176 50521
+11217 50514
+24140 50505
+45683 50504
+26434 50502
+42908 50502
+22881 50500
+37005 50484
+49411 50476
+25762 50476
+27318 50473
+28866 50470
+38236 50468
+23053 50467
+7528 50466
+30862 50464
+45808 50462
+12832 50455
+43943 50448
+44833 50448
+47930 50444
+7983 50440
+46117 50435
+37069 50427
+27150 50422
+12935 50421
+31132 50418
+37315 50417
+29387 50414
+28548 50413
+33091 50413
+36816 50412
+36497 50409
+16142 50409
+28754 50402
+27014 50395
+36494 50392
+46234 50389
+21044 50387
+29480 50380
+43183 50377
+27338 50373
+18833 50371
+31679 50370
+31109 50367
+27168 50366
+22654 50362
+33474 50355
+25047 50354
+30865 50354
+34291 50353
+33343 50348
+26250 50345
+36994 50342
+41379 50341
+26929 50336
+24464 50328
+35515 50322
+26477 50320
+33955 50315
+14633 50314
+47908 50314
+24218 50313
+36628 50313
+37953 50310
+22924 50309
+41494 50309
+28655 50296
+26881 50289
+28135 50289
+43081 50275
+30810 50272
+30959 50261
+20566 50259
+21592 50255
+22008 50255
+27164 50249
+44127 50245
+49939 50241
+44216 50237
+40996 50230
+19291 50228
+22463 50225
+31295 50220
+47528 50218
+14007 50214
+46106 50210
+31354 50208
+26413 50206
+36569 50204
+28993 50199
+21072 50194
+36701 50191
+22969 50191
+32697 50186
+10915 50183
+28806 50178
+1796 50175
+22150 50168
+28560 50164
+16601 50150
+6975 50149
+6315 50149
+17651 50140
+43268 50139
+37930 50136
+20458 50134
+34154 50133
+27460 50129
+43029 50127
+6728 50123
+18217 50117
+28368 50114
+29835 50107
+44209 50105
+29642 50098
+40947 50092
+38625 50091
+48195 50079
+35259 50077
+29000 50077
+22524 50075
+20402 50071
+43070 50067
+10228 50059
+25474 50054
+22960 50054
+26934 50051
+39538 50048
+14298 50043
+23560 50040
+45773 50032
+29358 50031
+3678 50028
+22029 50028
+25505 50024
+36195 50015
+24757 50015
+28498 50006
+9492 50005
+20812 50005
+49523 49984
+39604 49984
+25985 49971
+14414 49966
+27367 49965
+4850 49956
+50224 49950
+29098 49944
+44038 49938
+46008 49937
+32909 49937
+11871 49933
+29997 49933
+7497 49929
+20647 49911
+15520 49911
+34252 49911
+30408 49909
+49653 49906
+34321 49904
+23313 49903
+37197 49902
+34929 49901
+29974 49898
+16704 49893
+27793 49884
+41777 49884
+9132 49873
+34342 49867
+20473 49864
+15951 49864
+18294 49863
+28009 49861
+19149 49861
+19833 49859
+36272 49857
+17003 49853
+17961 49851
+33307 49843
+2145 49840
+27114 49840
+12303 49838
+19730 49834
+35372 49831
+24641 49823
+24987 49820
+24298 49815
+33331 49815
+28002 49811
+19142 49808
+24650 49805
+43581 49805
+26783 49800
+28685 49797
+43543 49791
+30980 49791
+27499 49784
+46864 49783
+29076 49783
+25885 49779
+33941 49777
+22980 49777
+36665 49775
+21139 49763
+29380 49762
+27735 49761
+46562 49759
+30747 49759
+31555 49758
+45711 49758
+28545 49747
+21738 49746
+44654 49734
+36071 49732
+21632 49723
+39916 49723
+45132 49721
+18527 49720
+47479 49718
+39533 49714
+47246 49714
+18231 49708
+13560 49705
+10684 49703
+28389 49700
+5709 49699
+20916 49692
+43178 49690
+36714 49689
+11365 49683
+48064 49679
+44379 49675
+26332 49674
+27736 49672
+2037 49665
+30261 49665
+47725 49661
+30353 49656
+39567 49653
+33019 49650
+12418 49640
+27623 49639
+32984 49638
+23378 49635
+26279 49634
+20781 49629
+32724 49624
+49266 49623
+22603 49622
+25386 49617
+46988 49608
+41471 49598
+15140 49597
+31994 49596
+44579 49595
+45264 49589
+20728 49587
+47681 49584
+35223 49584
+28796 49577
+35454 49574
+36540 49572
+34069 49565
+31247 49565
+30367 49565
+21956 49560
+20596 49555
+25330 49548
+39620 49547
+33381 49545
+27625 49542
+24604 49522
+41987 49519
+7279 49518
+8170 49515
+31186 49508
+49769 49496
+28733 49492
+37648 49490
+43590 49484
+34355 49484
+28774 49483
+14435 49476
+4264 49476
+5786 49473
+27317 49473
+36240 49472
+42687 49470
+25783 49463
+42251 49460
+40973 49454
+44466 49448
+25110 49444
+29765 49442
+34613 49442
+30310 49441
+27997 49432
+36054 49431
+20909 49427
+43244 49427
+32520 49414
+30967 49413
+21212 49412
+38748 49408
+34597 49406
+38085 49403
+25856 49402
+46305 49400
+25439 49400
+10122 49386
+46834 49384
+29894 49383
+10341 49374
+13190 49373
+25743 49369
+45614 49367
+31331 49365
+46882 49365
+16994 49363
+48487 49361
+28887 49360
+23674 49356
+43039 49352
+31250 49350
+44135 49348
+36483 49342
+26467 49341
+12324 49341
+23385 49336
+5494 49330
+43974 49327
+24183 49324
+23059 49319
+22808 49316
+3385 49311
+39159 49311
+36197 49310
+24147 49304
+6924 49302
+8930 49301
+33045 49292
+19429 49288
+38591 49285
+25739 49285
+39476 49283
+29723 49269
+25069 49269
+36270 49268
+38376 49265
+42478 49265
+46855 49263
+42070 49262
+13208 49255
+30834 49251
+1784 49245
+33427 49243
+31719 49240
+14346 49240
+15885 49235
+46903 49229
+12428 49225
+35866 49223
+4872 49218
+13363 49213
+35135 49211
+12604 49209
+41391 49206
+34485 49198
+39735 49197
+24063 49190
+3097 49186
+36682 49185
+43144 49184
+19065 49181
+32344 49177
+25744 49174
+22557 49169
+33168 49166
+17482 49164
+32510 49162
+42815 49158
+25986 49154
+36686 49154
+43106 49152
+21045 49150
+36293 49150
+44188 49148
+17946 49128
+44636 49125
+28324 49121
+15783 49119
+32195 49119
+22716 49115
+18351 49113
+37726 49108
+29778 49103
+31070 49101
+16441 49100
+37731 49095
+45094 49089
+21152 49085
+28381 49076
+28068 49076
+41334 49076
+9044 49068
+16200 49064
+36110 49063
+5089 49060
+26006 49059
+24483 49044
+43400 49042
+24582 49038
+47755 49036
+28596 49032
+27843 49027
+48253 49023
+34205 49021
+50209 49013
+19742 49006
+42734 49005
+32600 49004
+39614 49000
+29373 48996
+29604 48993
+19694 48991
+47170 48989
+45759 48988
+28027 48981
+46930 48975
+23660 48974
+19796 48973
+30574 48968
+41716 48956
+42737 48956
+45588 48956
+26580 48950
+38408 48948
+47978 48944
+21201 48944
+36717 48942
+37632 48938
+14709 48933
+36294 48933
+28704 48933
+12212 48926
+29016 48916
+36212 48912
+46854 48910
+35495 48905
+25702 48904
+2867 48903
+36114 48901
+40343 48900
+34715 48895
+35183 48886
+31015 48884
+21123 48884
+16596 48880
+19112 48876
+30587 48871
+25859 48867
+27631 48852
+25791 48847
+29887 48846
+12920 48846
+16620 48844
+44043 48839
+24470 48838
+49132 48837
+38674 48837
+27441 48831
+23094 48828
+17721 48826
+10903 48825
+31274 48824
+22511 48824
+19620 48819
+21892 48813
+47817 48808
+18651 48808
+27045 48804
+19427 48799
+23334 48799
+31261 48798
+17336 48796
+48199 48791
+9787 48786
+37221 48772
+43579 48768
+36351 48764
+36242 48761
+43127 48760
+14253 48757
+35199 48750
+32772 48748
+25922 48745
+28854 48743
+30930 48743
+32219 48728
+28144 48724
+20198 48721
+19753 48720
+32793 48715
+48039 48715
+26664 48713
+25644 48711
+29708 48709
+26110 48708
+13972 48703
+39397 48694
+38914 48689
+19794 48689
+21680 48676
+37116 48673
+39005 48672
+22073 48657
+31608 48655
+29403 48647
+25242 48646
+33154 48641
+28202 48637
+26535 48633
+35094 48632
+37184 48631
+28952 48630
+35862 48624
+46516 48622
+29091 48620
+33764 48618
+33226 48617
+41220 48613
+4428 48613
+19542 48612
+33541 48610
+38034 48608
+31983 48606
+22057 48597
+22746 48593
+27241 48574
+25533 48572
+4300 48571
+40611 48569
+46395 48566
+18949 48559
+29332 48557
+27630 48555
+37813 48554
+31597 48550
+43274 48548
+25602 48546
+34541 48540
+28025 48538
+24611 48538
+12415 48533
+13262 48531
+25459 48527
+49553 48527
+34874 48527
+46057 48527
+49372 48523
+38866 48509
+24778 48508
+34224 48505
+22437 48495
+45468 48490
+17975 48490
+24256 48482
+29031 48476
+37882 48475
+39163 48473
+40255 48472
+26811 48466
+36450 48465
+1852 48465
+27121 48464
+19367 48463
+19961 48459
+44518 48455
+44655 48454
+40793 48454
+12464 48453
+34384 48452
+34902 48450
+36172 48441
+16339 48441
+29079 48437
+10760 48437
+33469 48435
+27587 48432
+36776 48426
+35208 48425
+19054 48425
+25516 48424
+34078 48422
+41973 48414
+11122 48412
+38173 48400
+4584 48399
+34882 48397
+26277 48391
+34434 48390
+29683 48382
+7752 48377
+45983 48376
+40910 48374
+31848 48373
+39769 48366
+12222 48364
+49752 48364
+23726 48363
+17019 48362
+26366 48359
+37091 48357
+36991 48356
+49851 48341
+22374 48341
+47572 48339
+21309 48338
+23337 48332
+28262 48329
+29246 48326
+49598 48325
+26130 48321
+18690 48315
+38594 48314
+23487 48314
+12573 48312
+31017 48310
+45736 48307
+21273 48305
+8648 48300
+29355 48295
+41089 48293
+7672 48292
+28714 48291
+20661 48276
+34912 48270
+45568 48263
+8671 48262
+44485 48260
+2690 48257
+27912 48256
+45494 48256
+21645 48246
+36559 48242
+46567 48239
+30622 48235
+26557 48229
+44523 48227
+36960 48227
+19867 48223
+10461 48221
+17723 48219
+29549 48218
+44862 48218
+24904 48218
+45142 48216
+23100 48213
+37279 48209
+9887 48209
+46470 48205
+18230 48202
+45511 48202
+48850 48202
+39037 48199
+33119 48197
+24814 48195
+42792 48194
+9042 48189
+39685 48187
+23253 48186
+34276 48182
+43373 48177
+43120 48174
+36193 48173
+5090 48171
+49902 48167
+24894 48161
+36833 48160
+43663 48159
+37248 48150
+28799 48150
+42898 48146
+32656 48145
+39339 48140
+47199 48135
+31068 48133
+35455 48131
+21725 48119
+47916 48119
+41336 48115
+33271 48114
+33977 48108
+35008 48107
+49432 48106
+30383 48106
+24975 48102
+47773 48099
+23506 48097
+33133 48091
+36612 48090
+5306 48090
+36998 48065
+37073 48064
+20206 48062
+11934 48060
+32728 48056
+45425 48054
+16341 48052
+36281 48044
+17363 48035
+35465 48033
+15880 48030
+16357 48025
+21527 48023
+22275 48023
+33502 48015
+13672 48014
+41679 48012
+31986 48012
+28867 48011
+33755 48009
+24776 48006
+8357 48004
+37292 47998
+34746 47994
+37027 47992
+40519 47990
+36279 47980
+6417 47979
+36083 47978
+28456 47977
+31114 47970
+38527 47969
+34567 47968
+39302 47965
+45504 47964
+25600 47960
+32377 47960
+27712 47958
+33424 47957
+10811 47948
+28771 47935
+35262 47924
+26539 47917
+29282 47917
+3643 47915
+39412 47915
+18255 47913
+14750 47910
+38088 47905
+6398 47904
+40470 47903
+45246 47901
+40662 47899
+24504 47899
+41229 47898
+49292 47898
+46160 47896
+35309 47896
+47329 47892
+40453 47890
+26352 47889
+22682 47885
+21262 47885
+39441 47884
+27238 47884
+28378 47879
+14132 47878
+21969 47877
+31533 47876
+33851 47875
+30582 47875
+22342 47874
+15075 47874
+40955 47871
+35332 47870
+27244 47864
+26181 47863
+22832 47858
+43956 47853
+29846 47852
+2697 47844
+20666 47844
+26281 47843
+18268 47843
+9928 47836
+2083 47836
+32350 47834
+37342 47831
+21315 47829
+27132 47825
+15809 47819
+38987 47819
+34375 47811
+47668 47809
+41836 47806
+25284 47803
+17273 47802
+39862 47800
+16311 47791
+1476 47789
+47853 47786
+39027 47783
+48213 47783
+8012 47782
+33063 47765
+27445 47759
+34849 47745
+41970 47743
+42170 47742
+42876 47742
+1161 47739
+36742 47733
+47151 47729
+13115 47725
+18301 47725
+35272 47723
+27219 47708
+3416 47707
+28177 47699
+38738 47699
+29770 47687
+26465 47686
+38383 47685
+22069 47680
+21456 47679
+14174 47674
+23539 47667
+46247 47665
+34459 47663
+22179 47659
+22449 47658
+42791 47654
+39603 47653
+50169 47652
+26667 47651
+44498 47645
+44868 47642
+28876 47640
+26701 47638
+5109 47637
+19327 47637
+31064 47634
+30573 47633
+35905 47625
+45232 47624
+43616 47621
+28217 47618
+48642 47617
+27086 47617
+42992 47611
+42327 47611
+23688 47609
+26282 47598
+36099 47596
+40852 47596
+43766 47594
+33297 47593
+49466 47591
+17209 47585
+43670 47582
+24748 47581
+32353 47571
+48342 47569
+38757 47561
+32905 47561
+32085 47556
+9250 47542
+35729 47542
+37663 47540
+16112 47538
+33694 47537
+31556 47535
+28277 47531
+25104 47526
+38429 47525
+39074 47524
+40404 47523
+46739 47521
+28940 47519
+27329 47518
+23753 47514
+29041 47510
+31267 47506
+35163 47506
+18150 47505
+33159 47503
+24811 47492
+22148 47488
+25342 47487
+38812 47487
+39283 47486
+25808 47483
+15486 47483
+12808 47482
+30103 47481
+19159 47481
+32572 47477
+30430 47475
+25881 47464
+37486 47459
+27506 47446
+28334 47432
+21561 47418
+18801 47417
+30180 47414
+39383 47411
+25587 47410
+28091 47408
+41626 47405
+47228 47403
+29419 47402
+1480 47400
+30363 47399
+33319 47394
+43558 47393
+27946 47391
+16623 47391
+37749 47390
+31271 47390
+18619 47388
+44003 47374
+21458 47373
+29181 47371
+38119 47362
+31807 47362
+10963 47356
+28060 47355
+46922 47354
+37331 47351
+27577 47345
+32884 47333
+22515 47327
+19853 47322
+19354 47319
+40431 47314
+16275 47311
+5915 47310
+44339 47303
+35489 47299
+43398 47294
+35096 47293
+18799 47293
+46577 47288
+29005 47287
+25534 47286
+27657 47281
+20298 47279
+42299 47272
+27613 47271
+29341 47270
+45723 47268
+38908 47263
+37164 47253
+38260 47252
+1251 47243
+47848 47234
+35817 47231
+28453 47230
+28878 47227
+5065 47223
+20214 47217
+13220 47216
+25649 47214
+38186 47210
+46822 47209
+34693 47207
+17827 47207
+19520 47204
+19756 47202
+24249 47201
+32514 47200
+2844 47198
+49416 47195
+36874 47186
+39962 47184
+37800 47183
+30669 47181
+43100 47180
+25671 47175
+15450 47174
+28888 47160
+34079 47156
+19484 47156
+42274 47154
+6278 47154
+37793 47153
+33816 47153
+31693 47149
+47218 47148
+33858 47146
+40689 47137
+21287 47137
+40105 47133
+30571 47133
+32402 47132
+38009 47131
+24294 47129
+22930 47120
+19707 47115
+34941 47114
+42425 47112
+19498 47111
+28609 47109
+34134 47104
+44056 47100
+19976 47094
+17678 47084
+47562 47083
+15310 47073
+16458 47068
+37228 47064
+23756 47063
+15590 47060
+19355 47056
+42906 47056
+45512 47056
+32529 47052
+11593 47052
+31528 47052
+21713 47051
+28880 47045
+20800 47043
+22291 47033
+40511 47032
+48569 47016
+48060 47011
+43799 47003
+20113 47001
+36579 46998
+36748 46996
+34263 46996
+49520 46994
+38686 46986
+35177 46971
+42967 46970
+14336 46967
+45184 46965
+47413 46965
+34789 46965
+45139 46964
+3065 46962
+23921 46961
+28687 46955
+32654 46950
+44738 46946
+35153 46942
+32761 46937
+17647 46935
+46150 46934
+38761 46931
+35355 46921
+12454 46913
+30876 46903
+3552 46902
+27458 46902
+23277 46901
+10465 46898
+35662 46897
+16643 46895
+41794 46895
+39524 46892
+42899 46892
+39164 46887
+36101 46879
+11370 46878
+12221 46873
+32293 46870
+29641 46869
+28424 46863
+28309 46861
+13775 46860
+39767 46856
+29343 46855
+34155 46853
+35932 46852
+46875 46850
+50173 46846
+35426 46843
+7615 46840
+42004 46840
+46447 46837
+28786 46834
+37048 46829
+32122 46829
+29280 46827
+1091 46827
+10709 46824
+42571 46820
+30468 46812
+35526 46807
+28807 46786
+6059 46785
+9313 46784
+43652 46779
+39640 46776
+48568 46764
+5957 46763
+25194 46763
+15432 46761
+50186 46760
+26135 46759
+36360 46755
+11019 46751
+30097 46749
+27952 46749
+32808 46748
+3935 46742
+25584 46738
+47377 46725
+26996 46725
+24134 46725
+24709 46720
+23585 46717
+36865 46715
+31910 46710
+16854 46708
+36093 46703
+48853 46698
+19900 46698
+43104 46694
+31349 46692
+41238 46686
+21521 46678
+49412 46678
+44026 46677
+45332 46677
+14406 46675
+25371 46664
+43257 46655
+33656 46655
+46861 46652
+27201 46649
+24944 46646
+29101 46642
+32343 46636
+17999 46634
+21306 46629
+32294 46629
+49104 46628
+23581 46624
+30875 46614
+48794 46612
+45645 46611
+45516 46610
+32396 46608
+20922 46608
+38406 46606
+11321 46605
+49612 46603
+35441 46600
+49190 46592
+9805 46592
+48701 46584
+30968 46582
+30066 46577
+46340 46575
+31377 46575
+35901 46567
+22208 46565
+21232 46562
+19108 46557
+45542 46557
+21194 46551
+35631 46546
+42580 46546
+29619 46542
+40943 46540
+44614 46537
+26957 46535
+32393 46532
+27377 46527
+47219 46524
+26975 46524
+24415 46523
+19680 46517
+40530 46513
+30811 46500
+30264 46491
+37054 46491
+18011 46489
+23250 46486
+21499 46482
+37818 46472
+22032 46469
+19831 46467
+34498 46463
+38833 46459
+12732 46458
+25710 46456
+29433 46452
+31207 46449
+47272 46447
+34569 46444
+33018 46438
+5530 46432
+18040 46428
+39891 46423
+28033 46423
+47353 46423
+33471 46415
+16678 46414
+35941 46414
+34191 46402
+18970 46402
+15183 46398
+17220 46397
+35180 46394
+13665 46383
+23126 46383
+32556 46382
+23087 46378
+47295 46377
+47196 46366
+31043 46364
+8110 46360
+13494 46358
+40698 46356
+40363 46355
+31168 46353
+35182 46351
+11143 46351
+34935 46346
+34122 46340
+27791 46338
+31172 46333
+40702 46330
+6172 46330
+16682 46322
+49597 46319
+23564 46319
+28590 46318
+31894 46316
+32542 46310
+24299 46303
+23214 46302
+6202 46299
+25598 46299
+13350 46299
+30548 46293
+49825 46288
+40062 46287
+34862 46283
+21163 46283
+20637 46283
+47560 46276
+30381 46274
+11599 46272
+27717 46270
+6120 46269
+21242 46269
+20025 46268
+33431 46263
+23447 46262
+36319 46260
+46980 46258
+32806 46258
+15008 46253
+29950 46251
+33570 46249
+14894 46239
+26761 46237
+19270 46234
+32921 46233
+42818 46228
+7671 46225
+37644 46222
+27254 46222
+47984 46221
+32729 46216
+2909 46212
+40657 46210
+29621 46209
+33713 46208
+14106 46206
+26034 46200
+22204 46196
+14265 46195
+11035 46193
+42437 46189
+44302 46189
+49967 46178
+44301 46177
+22633 46176
+40490 46165
+36930 46164
+8320 46159
+18093 46152
+11528 46151
+37026 46151
+35045 46149
+35370 46147
+30168 46146
+4772 46145
+43747 46137
+41130 46132
+34076 46130
+35088 46124
+16505 46123
+37428 46120
+20638 46115
+43007 46112
+43944 46109
+16258 46106
+22567 46098
+36214 46098
+23807 46093
+46972 46091
+9939 46091
+25384 46088
+29283 46087
+37293 46082
+7296 46079
+26707 46079
+36513 46072
+26173 46070
+32794 46069
+37891 46068
+42886 46058
+28328 46053
+44131 46048
+29397 46040
+42232 46039
+40741 46034
+26000 46034
+12205 46032
+37457 46032
+20964 46031
+19252 46030
+26753 46030
+32479 46029
+39078 46024
+25419 46023
+27642 46023
+45827 46017
+42097 46016
+32773 46015
+24227 46013
+35842 46009
+49300 46004
+32204 46002
+37554 46000
+23589 45991
+45046 45989
+48742 45987
+30355 45987
+34038 45986
+15307 45982
+39784 45980
+39987 45978
+39956 45976
+28388 45976
+22823 45976
+26473 45974
+22024 45969
+48437 45964
+38960 45958
+39716 45952
+31545 45952
+18458 45949
+45344 45949
+33561 45942
+34875 45941
+15861 45939
+32120 45939
+35639 45935
+32769 45930
+26990 45927
+43953 45925
+28954 45923
+38540 45916
+50172 45916
+40033 45912
+31448 45909
+39067 45905
+44769 45904
+29094 45900
+44768 45900
+18638 45900
+47244 45896
+29722 45892
+39960 45887
+46330 45887
+24372 45887
+16761 45865
+28107 45865
+8154 45864
+28031 45863
+36111 45861
+47950 45860
+32894 45859
+13908 45858
+47513 45857
+34151 45856
+27404 45855
+20453 45853
+14214 45848
+20751 45847
+43066 45844
+48901 45838
+20454 45834
+28271 45830
+35988 45827
+17134 45825
+11669 45820
+34243 45820
+47722 45818
+26471 45818
+39131 45806
+28448 45805
+46033 45802
+34316 45800
+20300 45799
+37676 45798
+34195 45795
+7550 45791
+26063 45787
+39420 45776
+30232 45771
+28183 45769
+26641 45765
+44818 45764
+40192 45764
+43176 45763
+36145 45762
+12942 45758
+26463 45756
+45878 45751
+26153 45749
+12192 45748
+37347 45747
+20979 45745
+44846 45740
+24843 45739
+42324 45735
+44220 45734
+28858 45732
+28351 45729
+27151 45728
+35011 45726
+26592 45726
+39235 45724
+27198 45724
+21721 45721
+37975 45715
+29442 45714
+17860 45699
+36383 45699
+23268 45698
+38754 45697
+37900 45696
+48709 45695
+37261 45695
+9926 45687
+45238 45687
+34152 45684
+2397 45683
+26333 45682
+37550 45681
+40966 45675
+31151 45673
+27769 45670
+8687 45664
+30185 45662
+27412 45661
+15787 45653
+44820 45649
+33536 45648
+33900 45647
+38705 45646
+25417 45642
+21767 45642
+22362 45640
+39678 45636
+50220 45628
+35073 45627
+41523 45625
+14065 45624
+24344 45624
+43153 45623
+31749 45622
+44652 45618
+36058 45615
+45413 45615
+39453 45611
+40641 45609
+27126 45601
+40136 45601
+36910 45601
+49628 45598
+32920 45597
+43492 45595
+49323 45589
+18005 45585
+29211 45582
+21762 45581
+49796 45577
+23831 45574
+36308 45571
+27758 45570
+40835 45569
+34486 45557
+37234 45553
+11720 45552
+34885 45548
+31629 45546
+46644 45537
+27253 45531
+26919 45525
+22136 45522
+29924 45521
+31489 45520
+49987 45520
+22564 45510
+29546 45510
+11537 45509
+10415 45508
+19259 45507
+24572 45499
+12304 45497
+40606 45494
+47748 45493
+42329 45492
+33790 45490
+15854 45490
+24170 45487
+45958 45477
+18076 45476
+41740 45473
+23550 45470
+38854 45468
+16183 45461
+41113 45458
+11055 45456
+35940 45455
+36394 45452
+21064 45451
+46544 45449
+9673 45449
+31059 45449
+9548 45448
+36769 45446
+28737 45442
+40307 45440
+23672 45439
+34663 45437
+23162 45434
+38689 45431
+42139 45426
+45200 45424
+40900 45424
+17620 45423
+38878 45422
+47657 45421
+39493 45401
+33182 45401
+19467 45398
+30795 45397
+16260 45391
+32367 45390
+35550 45386
+49341 45385
+40461 45380
+18219 45374
+8638 45371
+26484 45361
+21447 45360
+19941 45353
+10554 45352
+1823 45347
+29369 45345
+24702 45344
+13255 45341
+15098 45339
+29462 45334
+39373 45334
+41529 45333
+36941 45332
+23945 45328
+16010 45325
+50028 45324
+41928 45319
+21396 45318
+27162 45318
+49655 45310
+37222 45308
+26526 45303
+19852 45300
+27742 45296
+33105 45293
+4247 45291
+47410 45289
+37327 45285
+22717 45285
+46958 45282
+41053 45278
+20109 45272
+25514 45268
+6425 45263
+23272 45241
+21856 45239
+27036 45239
+24168 45230
+43939 45229
+15379 45223
+21352 45223
+25903 45223
+26949 45221
+30019 45218
+31130 45216
+41484 45215
+15125 45210
+32993 45207
+25685 45203
+38048 45200
+16911 45199
+45049 45195
+38932 45191
+9143 45188
+35794 45186
+37743 45183
+42186 45178
+33464 45174
+19172 45170
+17594 45164
+30551 45164
+36288 45164
+17133 45162
+27371 45162
+26093 45154
+48335 45154
+24309 45147
+28747 45147
+31991 45146
+38862 45145
+28978 45145
+29062 45144
+26887 45143
+25591 45143
+8145 45141
+29735 45139
+35906 45138
+23426 45132
+42850 45127
+36098 45122
+22035 45122
+35822 45120
+47573 45116
+26692 45113
+2361 45108
+15878 45107
+48687 45101
+24513 45096
+33270 45094
+22458 45092
+5145 45089
+26704 45087
+38378 45077
+26822 45071
+21515 45070
+43533 45069
+25830 45065
+42753 45063
+6940 45049
+28192 45048
+34683 45045
+32123 45042
+10035 45039
+44486 45035
+5042 45034
+29051 45032
+40859 45030
+7580 45025
+40687 45024
+30506 45013
+28959 45012
+42802 45012
+32816 45007
+22826 44996
+9474 44990
+31955 44989
+33187 44989
+49232 44988
+31202 44986
+9781 44978
+39076 44977
+39308 44974
+34965 44971
+24094 44964
+228 44960
+30549 44958
+23175 44953
+35613 44952
+17727 44938
+29985 44937
+11369 44930
+48604 44924
+22210 44923
+47877 44920
+10698 44909
+25797 44905
+20414 44904
+34030 44904
+27666 44903
+40904 44898
+39652 44897
+42828 44896
+27573 44894
+24288 44891
+49498 44886
+20485 44883
+24158 44882
+14558 44881
+17237 44879
+6293 44877
+22245 44876
+39679 44873
+13921 44870
+42490 44862
+38041 44860
+22911 44854
+46690 44854
+39128 44852
+39826 44851
+27109 44840
+23055 44839
+18875 44838
+19678 44838
+41520 44837
+5135 44836
+35584 44835
+30246 44828
+45222 44827
+24143 44827
+39411 44818
+49901 44817
+30559 44817
+48592 44815
+33963 44813
+23191 44808
+45892 44797
+30621 44795
+32154 44793
+25964 44793
+7949 44790
+34019 44787
+9243 44787
+1420 44786
+36703 44785
+42587 44782
+13131 44771
+46825 44767
+26820 44766
+9745 44766
+2498 44761
+42606 44758
+17817 44754
+18487 44752
+29524 44752
+32160 44750
+38189 44750
+39085 44745
+36587 44742
+10858 44739
+42378 44730
+31142 44728
+42495 44724
+19461 44719
+16806 44718
+15452 44716
+35873 44714
+39778 44711
+28801 44710
+26782 44709
+34480 44705
+33224 44703
+1269 44703
+32435 44702
+16364 44699
+36712 44699
+44913 44696
+11318 44691
+23537 44690
+11787 44689
+34903 44688
+21599 44683
+33896 44682
+27746 44679
+30568 44673
+30627 44670
+26576 44668
+43273 44658
+18107 44654
+29805 44652
+35429 44650
+49668 44648
+15411 44640
+47169 44633
+32581 44631
+37324 44631
+33344 44627
+25245 44625
+20084 44614
+33612 44606
+32167 44606
+19251 44605
+23684 44597
+7316 44597
+33732 44594
+23234 44593
+29086 44588
+12998 44588
+37378 44584
+31428 44584
+35212 44582
+47747 44581
+41630 44577
+26524 44575
+30086 44574
+16578 44571
+26978 44571
+47162 44567
+30844 44566
+4717 44565
+46662 44563
+14002 44556
+21259 44556
+9295 44554
+21441 44549
+33097 44546
+25498 44544
+30982 44543
+7921 44543
+27393 44541
+20914 44540
+27829 44539
+48073 44537
+22981 44534
+41909 44533
+232 44532
+34966 44528
+48028 44524
+31103 44522
+31450 44521
+22154 44521
+3924 44512
+17045 44511
+47830 44509
+43171 44509
+29697 44507
+46505 44502
+32049 44502
+21395 44500
+25307 44497
+45760 44496
+26774 44494
+22763 44486
+38094 44484
+34336 44481
+33852 44479
+46598 44478
+35278 44478
+36915 44475
+23454 44472
+6549 44471
+31648 44466
+22047 44464
+47533 44455
+48127 44454
+36582 44453
+38728 44453
+1184 44453
+23051 44443
+33189 44442
+38590 44435
+48965 44433
+32225 44432
+33418 44428
+41545 44427
+47399 44426
+25129 44425
+22194 44423
+9139 44423
+25929 44422
+32954 44421
+21062 44419
+48368 44416
+18206 44413
+15650 44407
+37829 44404
+27326 44404
+10441 44401
+26818 44398
+30416 44397
+1955 44397
+42917 44396
+37748 44394
+27688 44394
+38777 44394
+10487 44393
+41007 44390
+42610 44378
+35889 44373
+4607 44372
+15815 44371
+41218 44363
+28815 44362
+41980 44359
+38760 44357
+26243 44355
+20177 44355
+30004 44351
+24088 44348
+10121 44346
+45998 44341
+21282 44341
+48279 44340
+22651 44339
+45817 44335
+12754 44332
+23341 44328
+29262 44327
+26217 44324
+36619 44321
+18944 44321
+40459 44320
+50232 44317
+32235 44315
+28875 44314
+22790 44313
+46547 44313
+38176 44310
+8086 44307
+38962 44305
+40891 44299
+30796 44295
+25882 44295
+28826 44294
+35075 44292
+29183 44291
+33935 44290
+38868 44289
+16429 44288
+14418 44287
+43285 44280
+2249 44278
+24705 44274
+41985 44271
+25760 44270
+32630 44269
+14280 44268
+7390 44265
+41819 44261
+26422 44260
+22229 44256
+43150 44253
+28417 44252
+37437 44249
+46474 44245
+39835 44244
+42546 44238
+35471 44234
+10832 44232
+29643 44231
+25921 44227
+17042 44226
+48184 44221
+21497 44212
+25393 44210
+38883 44207
+27166 44207
+23109 44206
+14650 44198
+46673 44197
+40199 44184
+8866 44184
+22481 44183
+28994 44182
+20221 44181
+20305 44181
+27892 44179
+35173 44176
+23891 44174
+34290 44173
+28265 44166
+24487 44164
+32662 44164
+19822 44164
+43493 44160
+14922 44159
+18415 44158
+31137 44154
+29244 44153
+31706 44152
+28629 44152
+22068 44145
+35158 44145
+20766 44142
+36880 44140
+22413 44135
+23801 44135
+10539 44131
+13782 44130
+23671 44125
+23467 44124
+43842 44112
+35879 44109
+20969 44104
+40581 44101
+37692 44100
+31487 44096
+42575 44095
+35238 44095
+35245 44093
+36710 44091
+30530 44088
+41954 44085
+44795 44078
+13420 44076
+39167 44074
+36183 44073
+24857 44067
+9218 44066
+41719 44065
+12194 44063
+45830 44062
+38316 44057
+40772 44057
+26826 44049
+32481 44036
+37371 44030
+39857 44029
+33964 44028
+38794 44027
+46339 44023
+38951 44019
+37278 44017
+18938 44012
+40790 44012
+14902 44011
+33879 44009
+32114 44002
+19854 44002
+29962 44001
+41566 43997
+27405 43980
+33082 43978
+48695 43976
+19501 43972
+45024 43971
+19129 43971
+33260 43971
+43644 43970
+46218 43963
+29791 43962
+47041 43962
+32270 43961
+11285 43960
+44490 43956
+4276 43950
+39675 43948
+39438 43943
+39041 43943
+37029 43941
+26364 43936
+44752 43934
+29830 43928
+48918 43924
+21254 43920
+38291 43916
+46593 43910
+12009 43909
+30471 43901
+47600 43900
+18372 43897
+35424 43887
+37316 43881
+47096 43881
+38368 43877
+39917 43875
+25735 43874
+31283 43872
+28944 43871
+2338 43871
+25334 43867
+18761 43862
+22401 43856
+22507 43853
+32913 43850
+22172 43849
+46221 43847
+19700 43846
+27191 43844
+42458 43838
+28433 43838
+36263 43835
+24818 43828
+35501 43825
+19394 43824
+38775 43821
+35968 43820
+40568 43802
+38003 43798
+30247 43788
+24414 43787
+21533 43786
+29623 43783
+45129 43777
+37973 43774
+34624 43773
+38181 43771
+18016 43766
+31906 43758
+15546 43750
+31012 43748
+32240 43744
+49565 43741
+37660 43741
+46496 43736
+7944 43735
+36151 43734
+40045 43730
+49676 43728
+39824 43726
+42998 43725
+23163 43724
+16972 43722
+30365 43718
+16302 43716
+35534 43711
+36184 43711
+8102 43710
+33589 43709
+32776 43708
+48271 43698
+35473 43696
+16912 43694
+27064 43694
+32237 43692
+31210 43686
+27937 43684
+49906 43684
+38299 43677
+13583 43672
+15312 43671
+44180 43669
+41249 43662
+34099 43659
+37965 43652
+32222 43650
+48292 43647
+32813 43645
+28097 43641
+16185 43641
+28748 43636
+28541 43636
+25183 43632
+36324 43631
+36051 43631
+5280 43627
+31229 43626
+46982 43625
+36708 43624
+34221 43622
+33184 43621
+14868 43619
+20784 43609
+36334 43608
+45527 43607
+44200 43601
+15526 43601
+47766 43599
+24027 43598
+33593 43589
+34759 43587
+32018 43585
+19842 43584
+24605 43582
+36622 43577
+32309 43575
+13926 43563
+14421 43561
+12776 43559
+29563 43556
+31804 43553
+32469 43546
+16018 43542
+15559 43541
+36811 43540
+33729 43534
+48660 43533
+30502 43527
+25703 43522
+23457 43519
+5960 43519
+26956 43499
+36266 43496
+26578 43485
+29637 43484
+34740 43484
+24498 43483
+43457 43482
+35222 43480
+45354 43479
+28179 43476
+28765 43469
+40621 43464
+32067 43462
+18701 43462
+39770 43457
+20296 43457
+17841 43455
+18824 43455
+40677 43453
+16698 43452
+37416 43446
+42657 43444
+15092 43443
+41711 43441
+31856 43438
+25088 43436
+33875 43434
+42409 43433
+45789 43431
+16242 43428
+27936 43426
+45262 43425
+16706 43425
+24747 43423
+46301 43422
+48417 43418
+29899 43417
+43350 43416
+22619 43414
+27895 43410
+46427 43408
+17879 43406
+30477 43403
+29956 43397
+50146 43390
+47443 43389
+45235 43385
+34188 43384
+34127 43381
+42438 43381
+38553 43380
+7314 43380
+43715 43380
+30786 43377
+34884 43376
+49235 43376
+44792 43372
+33680 43370
+40856 43364
+41878 43363
+34170 43363
+31165 43361
+17323 43360
+18328 43358
+48257 43358
+27693 43351
+41893 43349
+34020 43343
+33884 43341
+28032 43339
+28403 43335
+29935 43334
+37317 43333
+32956 43329
+39552 43326
+28965 43322
+27773 43320
+34419 43317
+21333 43317
+22388 43311
+29217 43310
+8433 43307
+15597 43302
+18871 43296
+43833 43295
+38132 43288
+27472 43285
+44441 43285
+31182 43282
+44699 43282
+34837 43276
+35254 43274
+12789 43274
+20589 43271
+22141 43268
+26096 43267
+37247 43262
+26263 43259
+22922 43259
+29338 43255
+31669 43255
+44866 43252
+12305 43251
+37871 43250
+28006 43244
+42504 43242
+29482 43235
+48303 43232
+44972 43232
+47977 43222
+25527 43220
+6361 43218
+48614 43217
+18443 43212
+18228 43212
+26067 43212
+45391 43209
+27400 43206
+50058 43205
+26723 43204
+30184 43202
+33805 43197
+38096 43188
+41225 43184
+16890 43181
+31860 43173
+33516 43172
+36062 43170
+17011 43165
+34994 43163
+39673 43155
+39381 43152
+41756 43151
+32311 43150
+34454 43149
+35405 43148
+42841 43147
+45707 43144
+32995 43137
+22411 43133
+27257 43129
+44429 43129
+8333 43128
+31772 43127
+33292 43123
+42749 43121
+47143 43120
+41175 43119
+28003 43117
+38065 43116
+49462 43116
+45691 43112
+22555 43110
+44595 43110
+32721 43109
+45375 43107
+27780 43106
+26481 43105
+45715 43102
+31780 43099
+21684 43096
+43383 43096
+37432 43093
+34397 43093
+47129 43092
+25948 43087
+20791 43084
+48705 43082
+24322 43079
+33177 43076
+40586 43074
+24823 43067
+25674 43063
+7089 43061
+33763 43060
+43175 43053
+42048 43049
+31779 43046
+24339 43044
+17685 43044
+37451 43044
+40221 43043
+11429 43041
+47770 43039
+28680 43034
+18536 43032
+42116 43026
+7248 43023
+23005 43016
+33877 43012
+13789 43012
+38561 43010
+49756 43006
+40958 43006
+23777 43005
+25900 43004
+44268 43004
+28699 43001
+25135 43001
+27827 42996
+31505 42994
+4546 42994
+23875 42990
+50081 42989
+44941 42985
+27921 42970
+17937 42964
+35947 42961
+33619 42959
+44255 42956
+32423 42955
+31588 42954
+33655 42954
+43075 42953
+28168 42952
+37291 42950
+41269 42950
+21701 42949
+24698 42944
+25550 42944
+9401 42937
+37727 42928
+17119 42924
+37493 42919
+42689 42916
+25470 42915
+28384 42914
+46664 42912
+29368 42904
+44243 42900
+29464 42893
+44874 42892
+34087 42891
+26937 42890
+42410 42886
+33802 42883
+9660 42879
+27000 42870
+42694 42869
+13544 42864
+20636 42857
+22391 42854
+34147 42853
+23588 42852
+26731 42848
+40898 42848
+48256 42844
+18809 42842
+28846 42840
+42142 42835
+38091 42834
+26765 42828
+36641 42826
+29225 42825
+44316 42824
+29138 42819
+44717 42816
+36175 42816
+10155 42813
+26084 42813
+17072 42812
+24111 42810
+31866 42808
+40171 42806
+37625 42804
+16262 42804
+48784 42803
+15054 42801
+29877 42801
+50217 42796
+26460 42794
+49241 42794
+40502 42791
+39511 42791
+6583 42786
+24827 42784
+35456 42783
+29063 42783
+30445 42783
+38225 42779
+19248 42776
+31372 42769
+27798 42769
+38504 42768
+48666 42768
+28963 42766
+42020 42764
+24893 42754
+30079 42754
+27617 42753
+31534 42740
+22389 42738
+35581 42737
+35463 42737
+37367 42734
+36557 42731
+24409 42729
+33720 42727
+36589 42724
+21173 42721
+26832 42714
+47971 42712
+7115 42708
+23120 42706
+20977 42702
+29867 42701
+25713 42697
+40913 42690
+7878 42688
+43770 42687
+25669 42686
+21260 42680
+44630 42677
+37343 42676
+43259 42670
+48516 42669
+37040 42668
+34031 42666
+33068 42665
+23065 42665
+16756 42663
+48222 42657
+23355 42657
+23963 42656
+13441 42655
+49076 42653
+3566 42647
+28909 42644
+32024 42642
+23045 42629
+46699 42624
+42890 42623
+36004 42623
+36566 42622
+23297 42621
+22607 42621
+49852 42618
+29407 42618
+19213 42617
+26607 42616
+13169 42606
+47018 42602
+25993 42602
+44756 42599
+16676 42594
+32692 42591
+37245 42590
+27188 42589
+19071 42588
+37784 42586
+39408 42585
+44554 42585
+25156 42581
+29874 42578
+39536 42578
+44932 42570
+40936 42567
+42774 42565
+36630 42564
+40854 42561
+28055 42556
+31464 42554
+44147 42548
+30011 42548
+32415 42547
+10210 42541
+36355 42541
+44524 42539
+34259 42538
+3922 42538
+30253 42534
+1279 42534
+34940 42533
+27877 42532
+1926 42531
+8633 42531
+36808 42529
+22645 42528
+32234 42528
+38900 42525
+29739 42520
+27462 42509
+29718 42504
+42511 42503
+25402 42497
+36706 42493
+22269 42489
+14452 42483
+33833 42482
+29514 42479
+40982 42469
+10369 42469
+47364 42468
+44731 42466
+22732 42465
+37000 42461
+34202 42455
+28731 42452
+5759 42450
+39676 42448
+44945 42448
+3453 42445
+17193 42445
+24195 42441
+38698 42441
+26115 42436
+15956 42435
+8898 42432
+29789 42431
+18084 42430
+27420 42429
+37565 42429
+45510 42425
+48113 42424
+39631 42424
+29404 42421
+45807 42420
+47741 42418
+42852 42417
+18338 42416
+50073 42414
+28158 42411
+28848 42409
+37100 42409
+3548 42407
+20570 42407
+14423 42403
+35505 42402
+39473 42401
+26571 42398
+30481 42395
+37626 42391
+27591 42389
+33992 42388
+37753 42387
+29605 42385
+24278 42380
+28668 42376
+11440 42372
+29648 42371
+44299 42366
+25578 42360
+23064 42358
+40648 42357
+2803 42357
+39935 42351
+30149 42343
+39080 42343
+22337 42333
+44574 42328
+43074 42324
+26814 42317
+27041 42314
+13323 42314
+29615 42308
+22087 42302
+45928 42297
+48233 42296
+48750 42291
+1427 42285
+28874 42278
+33264 42276
+22886 42272
+36167 42261
+39795 42260
+31581 42246
+29376 42245
+31016 42244
+38682 42241
+16708 42237
+37560 42236
+31474 42234
+47507 42227
+23298 42222
+23188 42222
+30852 42221
+34483 42220
+33233 42214
+17708 42214
+46941 42212
+11984 42209
+30507 42207
+27756 42201
+32553 42198
+37589 42197
+23922 42195
+29608 42194
+33863 42191
+22587 42188
+46714 42184
+23262 42183
+27621 42182
+18940 42179
+48120 42178
+16694 42175
+36160 42173
+33599 42168
+49314 42168
+49157 42166
+26511 42160
+37301 42151
+48138 42149
+22852 42147
+10679 42144
+48066 42143
+32947 42139
+48111 42138
+43573 42133
+26118 42132
+28127 42131
+24137 42130
+7418 42123
+49030 42123
+16091 42120
+36189 42119
+33752 42116
+43542 42116
+34702 42114
+34225 42112
+38769 42110
+43014 42105
+41439 42099
+21405 42099
+25862 42091
+44615 42090
+15126 42090
+30850 42086
+37862 42079
+31025 42078
+26050 42078
+44312 42074
+24286 42070
+35765 42067
+47508 42067
+41151 42067
+26554 42066
+37935 42065
+14477 42065
+20848 42065
+37299 42064
+43086 42063
+26195 42063
+31296 42061
+41354 42054
+38999 42049
+35268 42048
+20356 42039
+1344 42033
+13847 42032
+26114 42031
+35969 42026
+50062 42024
+36684 42023
+40544 42022
+14220 42021
+24804 42016
+35927 42015
+22023 42012
+17875 42006
+12491 42002
+14382 42000
+22327 41999
+41957 41998
+28827 41988
+40795 41986
+26741 41982
+39725 41979
+29172 41978
+35576 41976
+33417 41974
+36415 41973
+47438 41973
+7167 41970
+8430 41967
+34731 41967
+28742 41967
+49377 41966
+41437 41961
+42843 41960
+13500 41957
+35250 41957
+37099 41956
+33970 41954
+31987 41953
+35154 41952
+42450 41948
+44341 41944
+44564 41944
+47122 41943
+41773 41936
+24761 41935
+41126 41935
+25460 41934
+46009 41933
+43190 41933
+28870 41916
+28964 41912
+30023 41911
+32433 41910
+33546 41906
+41318 41902
+48268 41899
+24549 41899
+46587 41893
+4658 41878
+28946 41877
+38068 41876
+46742 41876
+36939 41875
+30648 41873
+32726 41869
+28637 41867
+31085 41866
+49829 41861
+30124 41849
+4194 41845
+27118 41844
+43261 41842
+44158 41841
+21155 41839
+23362 41833
+29592 41827
+44775 41825
+643 41823
+46703 41822
+30161 41821
+49431 41819
+42085 41812
+35197 41807
+29503 41805
+29300 41804
+43965 41801
+36783 41797
+31692 41794
+38616 41791
+49774 41791
+47181 41788
+45450 41784
+42209 41781
+44992 41780
+16350 41778
+37876 41773
+49101 41773
+26795 41767
+33914 41764
+25803 41764
+34372 41762
+43520 41759
+42140 41752
+37620 41742
+40815 41737
+30990 41734
+28467 41730
+37872 41728
+22190 41728
+49589 41727
+18155 41725
+29933 41722
+46412 41720
+40181 41719
+21269 41714
+37993 41709
+22697 41707
+23237 41706
+37764 41706
+24368 41701
+37267 41700
+43381 41699
+24907 41699
+47547 41699
+44160 41698
+20443 41697
+28511 41696
+49595 41696
+49088 41693
+31430 41690
+27256 41689
+35981 41683
+47444 41682
+33662 41680
+31060 41678
+14204 41678
+35650 41677
+47095 41675
+30550 41673
+36550 41670
+49744 41667
+6736 41667
+10187 41666
+40448 41665
+5239 41661
+35547 41660
+38421 41659
+5234 41658
+46609 41649
+16163 41647
+29691 41646
+29038 41645
+22883 41645
+25096 41640
+44284 41637
+14579 41628
+24574 41625
+11993 41622
+47952 41619
+41189 41618
+40280 41617
+40690 41615
+23220 41606
+35701 41605
+35205 41604
+45058 41604
+14304 41602
+18683 41602
+31468 41601
+26247 41600
+49839 41596
+17353 41592
+38631 41591
+37429 41590
+42047 41590
+38801 41590
+26204 41589
+11268 41586
+28658 41584
+42123 41579
+16074 41575
+27487 41572
+46175 41572
+25445 41564
+36953 41562
+41273 41559
+43851 41558
+29633 41556
+15763 41554
+30517 41553
+5324 41552
+36034 41551
+12110 41544
+31113 41543
+27368 41542
+23685 41538
+48590 41530
+35698 41527
+1403 41526
+25010 41525
+28279 41521
+33687 41517
+35308 41516
+47500 41515
+49634 41515
+24181 41513
+47100 41508
+35672 41507
+31598 41504
+29145 41504
+21699 41491
+35616 41490
+46361 41489
+49297 41482
+14522 41481
+33878 41481
+20016 41478
+38005 41473
+29473 41473
+34228 41470
+29952 41470
+28778 41469
+29655 41467
+7569 41467
+45800 41462
+43499 41461
+27176 41460
+6816 41454
+37128 41454
+30041 41453
+17223 41449
+46157 41449
+40185 41446
+34824 41446
+32927 41445
+31999 41437
+43157 41437
+46515 41434
+15315 41431
+28703 41425
+31056 41421
+40372 41419
+45318 41414
+32747 41414
+45615 41414
+13290 41400
+25521 41397
+40066 41394
+30144 41393
+48501 41391
+50210 41390
+23619 41390
+18146 41386
+36256 41384
+32910 41382
+40252 41381
+26427 41381
+43902 41378
+28386 41377
+29906 41375
+34592 41371
+19176 41367
+41353 41365
+49739 41364
+26591 41362
+34781 41361
+18897 41349
+41565 41346
+50017 41345
+49994 41342
+32421 41342
+27596 41340
+29187 41337
+36015 41337
+46907 41336
+38019 41334
+32960 41334
+20597 41333
+32946 41318
+31235 41318
+28705 41316
+46073 41308
+41779 41307
+27263 41305
+46264 41303
+32576 41299
+35785 41299
+30674 41299
+24767 41292
+37462 41292
+35784 41287
+27761 41285
+48155 41283
+29517 41282
+47380 41282
+41738 41273
+47579 41271
+37984 41270
+31289 41268
+36213 41268
+27199 41266
+43018 41265
+40960 41262
+49468 41260
+33238 41252
+44763 41246
+13730 41245
+48015 41244
+23800 41239
+36049 41238
+39053 41236
+14945 41236
+43554 41236
+28408 41234
+23815 41220
+15739 41219
+22709 41217
+31685 41217
+27228 41209
+24909 41205
+41860 41197
+31843 41193
+41188 41192
+35525 41189
+15350 41187
+29915 41178
+46946 41176
+24399 41174
+49759 41170
+2992 41169
+14125 41168
+14437 41163
+15365 41162
+31641 41162
+37303 41159
+28069 41159
+37886 41159
+43255 41153
+46013 41152
+17253 41151
+18100 41150
+22749 41149
+28845 41148
+35683 41146
+34797 41137
+35341 41137
+28711 41131
+17561 41129
+29013 41124
+6732 41122
+22985 41112
+37812 41100
+20072 41098
+33826 41097
+7942 41096
+21911 41096
+30963 41094
+40323 41091
+41483 41088
+29206 41086
+41404 41078
+20595 41077
+20827 41075
+35261 41075
+5248 41073
+20509 41072
+5630 41067
+20797 41065
+21437 41060
+24403 41046
+49683 41046
+30649 41046
+42474 41043
+26337 41041
+35951 41041
+37114 41040
+28226 41036
+25675 41034
+34963 41032
+23628 41032
+45261 41025
+38560 41010
+49690 41008
+14988 41003
+37668 41003
+34819 41002
+15995 40999
+47031 40998
+28494 40998
+34173 40992
+34528 40991
+19404 40990
+37489 40987
+26863 40987
+48741 40985
+27205 40985
+44447 40979
+2424 40976
+27481 40973
+12272 40969
+31360 40968
+1594 40962
+34815 40960
+22572 40956
+42549 40955
+30252 40955
+22189 40954
+40018 40952
+24825 40952
+48119 40948
+7144 40943
+40551 40943
+46773 40942
+30125 40941
+25321 40937
+16941 40932
+38413 40932
+46131 40924
+47061 40923
+35097 40917
+25429 40915
+49059 40913
+20724 40912
+31875 40906
+43671 40898
+39271 40887
+23209 40882
+21338 40881
+49536 40878
+42040 40877
+31586 40876
+42618 40876
+39915 40874
+27230 40871
+16232 40868
+9409 40868
+39178 40866
+23700 40866
+39001 40860
+36090 40859
+34150 40858
+28933 40856
+30943 40848
+46896 40845
+24277 40842
+48258 40841
+33631 40835
+3456 40830
+23716 40825
+46450 40824
+31824 40821
+3606 40820
+34006 40819
+27710 40817
+18995 40808
+32752 40805
+40338 40804
+32696 40798
+41597 40785
+40827 40784
+18784 40782
+41837 40780
+29420 40779
+34551 40766
+47342 40762
+48171 40761
+24602 40749
+31193 40749
+40661 40747
+21731 40736
+32213 40734
+18568 40733
+33230 40732
+39047 40730
+18308 40724
+34614 40723
+4440 40723
+43414 40722
+37357 40710
+20133 40708
+43754 40706
+40224 40701
+39262 40700
+34863 40698
+30299 40695
+27969 40689
+10520 40689
+30412 40688
+44146 40684
+46426 40681
+20215 40679
+17477 40678
+38411 40673
+29979 40673
+33657 40671
+44516 40671
+22791 40668
+43188 40661
+17815 40659
+16647 40658
+19752 40656
+23673 40655
+27649 40651
+44473 40650
+28211 40648
+46919 40647
+4949 40647
+23315 40645
+34654 40644
+43726 40642
+28186 40638
+37172 40636
+35629 40634
+22523 40633
+32181 40631
+27017 40630
+33107 40629
+33822 40626
+20816 40625
+29252 40622
+45687 40622
+38193 40618
+33904 40617
+44096 40612
+22117 40611
+45566 40610
+27143 40603
+33595 40601
+36138 40600
+38645 40597
+41246 40593
+27416 40592
+15433 40592
+26350 40591
+31149 40590
+20199 40586
+8039 40584
+37880 40575
+32170 40570
+21286 40569
+19891 40568
+48967 40564
+28239 40556
+33598 40555
+40183 40555
+43933 40547
+37459 40545
+21426 40544
+26506 40544
+39240 40541
+14137 40540
+43288 40534
+29165 40532
+28304 40532
+24136 40527
+16432 40526
+47997 40522
+37001 40522
+24265 40515
+43972 40513
+24096 40506
+28982 40504
+23347 40500
+26897 40497
+18145 40494
+44076 40493
+35797 40490
+12919 40490
+44285 40488
+44620 40484
+29580 40482
+40771 40479
+25270 40479
+28552 40479
+35303 40477
+23142 40459
+43988 40456
+37546 40454
+36190 40452
+27644 40451
+39088 40450
+40773 40447
+35055 40446
+33294 40441
+12306 40439
+40144 40437
+36820 40431
+13989 40429
+9806 40429
+26754 40428
+27776 40428
+39864 40426
+41994 40425
+29130 40418
+17006 40414
+33229 40413
+40617 40411
+20068 40407
+18377 40403
+30745 40401
+24197 40394
+8954 40393
+21898 40391
+17340 40391
+36660 40386
+45443 40386
+35520 40381
+45678 40381
+37885 40381
+1822 40380
+41720 40379
+20924 40378
+16529 40371
+39281 40369
+40906 40345
+50180 40342
+33256 40339
+38308 40329
+12487 40328
+44715 40323
+40986 40323
+19079 40320
+47014 40317
+36691 40309
+39221 40308
+39160 40308
+43003 40307
+22829 40300
+19230 40298
+19564 40292
+35492 40288
+26208 40285
+36489 40283
+17579 40282
+28263 40279
+45378 40274
+48390 40274
+38639 40273
+46902 40273
+28446 40272
+39273 40268
+31564 40268
+31343 40268
+40000 40266
+25319 40265
+30407 40265
+35934 40258
+40048 40255
+42545 40254
+34297 40253
+1396 40249
+44456 40245
+31089 40245
+12234 40236
+13758 40229
+12747 40228
+27774 40228
+22932 40226
+33550 40225
+31975 40223
+40634 40214
+49882 40211
+49952 40208
+32647 40206
+32566 40194
+26995 40192
+31580 40191
+42837 40182
+40614 40181
+35275 40180
+15206 40172
+29972 40168
+48226 40167
+39002 40165
+25447 40165
+28598 40164
+1094 40157
+31031 40157
+16191 40155
+48434 40151
+32094 40150
+13376 40144
+26411 40140
+48759 40139
+29303 40137
+29909 40137
+36672 40136
+25728 40136
+46629 40132
+37341 40125
+23516 40122
+31278 40122
+34725 40120
+41415 40119
+44771 40116
+13260 40112
+23721 40110
+28551 40108
+34644 40108
+37271 40105
+39796 40101
+27192 40099
+20851 40097
+45030 40097
+28715 40097
+27778 40096
+42613 40096
+31454 40091
+40909 40090
+42892 40089
+47149 40089
+31434 40088
+39663 40082
+35939 40080
+30748 40076
+42119 40074
+41903 40074
+13550 40071
+32497 40070
+9545 40066
+21390 40065
+35856 40063
+15786 40060
+34839 40060
+28930 40058
+29776 40056
+17465 40056
+32699 40055
+19706 40054
+22243 40050
+46190 40050
+17288 40042
+32798 40040
+12149 40040
+33950 40033
+46295 40031
+37140 40026
+47624 40026
+41522 40025
+25960 40024
+48367 40022
+36529 40021
+37986 40020
+26977 40020
+35878 40019
+29947 40013
+28103 40013
+45548 40010
+47492 40003
+42858 40002
+41448 40002
+47042 40001
+47140 39987
+49870 39984
+37607 39979
+31792 39970
+28700 39963
+48218 39958
+33039 39958
+29119 39956
+13564 39956
+43200 39953
+21992 39948
+43314 39941
+19638 39941
+29559 39931
+17823 39928
+49290 39927
+12795 39925
+50045 39923
+49866 39916
+24492 39909
+32933 39906
+42371 39904
+13164 39903
+29268 39899
+6746 39896
+32476 39893
+19936 39893
+48456 39893
+38537 39889
+33237 39888
+40678 39885
+25035 39883
+27948 39877
+8890 39877
+36158 39876
+25059 39864
+19815 39864
+48153 39863
+47004 39863
+30238 39861
+45099 39856
+41378 39841
+30820 39839
+36849 39836
+26884 39834
+21037 39832
+21749 39829
+35090 39814
+45419 39812
+29414 39809
+39564 39808
+36326 39801
+42826 39801
+25356 39800
+46379 39796
+14822 39795
+48296 39795
+41532 39791
+43896 39791
+32492 39791
+27753 39790
+48128 39789
+17613 39787
+48904 39786
+49420 39782
+15208 39781
+19231 39779
+17580 39775
+19720 39775
+22865 39773
+29939 39772
+42514 39766
+37215 39765
+40320 39762
+31219 39761
+35582 39761
+35016 39760
+35769 39759
+43424 39758
+9239 39754
+38787 39752
+31869 39746
+25788 39746
+31323 39745
+26533 39740
+39369 39736
+24616 39733
+37987 39731
+45036 39731
+31819 39730
+6382 39727
+37524 39727
+48580 39725
+30495 39725
+36661 39724
+46851 39721
+37464 39717
+34244 39717
+23906 39713
+29767 39711
+21991 39707
+17128 39706
+34025 39705
+20292 39696
+31146 39690
+44221 39679
+28900 39673
+44628 39673
+24676 39672
+14465 39667
+48881 39665
+37738 39664
+20423 39663
+48424 39661
+43278 39661
+34857 39660
+30052 39658
+36906 39657
+33512 39651
+7350 39646
+24274 39645
+37722 39645
+41534 39644
+25944 39644
+42880 39641
+27790 39641
+30304 39640
+31650 39637
+29184 39637
+34417 39636
+13849 39634
+25920 39634
+43113 39625
+36573 39617
+45513 39616
+3736 39616
+26923 39613
+39138 39613
+45763 39610
+36161 39604
+21304 39604
+32110 39602
+37606 39597
+34970 39597
+37683 39585
+3476 39583
+38228 39582
+26019 39581
+30095 39580
+18117 39577
+46041 39577
+12156 39574
+46004 39574
+39510 39573
+35382 39572
+31651 39571
+45121 39568
+32592 39566
+6813 39566
+48022 39565
+35057 39561
+19462 39560
+7153 39559
+36511 39559
+47788 39558
+23021 39555
+36756 39547
+45220 39546
+41824 39545
+13916 39544
+31543 39542
+28567 39541
+24509 39539
+47331 39535
+36637 39534
+22457 39531
+15999 39530
+44803 39530
+33530 39529
+14202 39528
+43359 39528
+41696 39528
+26478 39524
+6892 39524
+26322 39515
+46738 39513
+37623 39512
+28369 39509
+30614 39502
+32683 39495
+30354 39494
+48914 39487
+39798 39485
+13986 39485
+47769 39481
+16097 39479
+38496 39479
+49286 39476
+31794 39471
+44618 39471
+45829 39466
+39593 39461
+15675 39459
+36264 39458
+41891 39449
+8328 39446
+21758 39446
+24936 39437
+42625 39430
+16673 39421
+41410 39418
+27799 39418
+30280 39417
+37952 39416
+34453 39413
+39502 39412
+33919 39409
+41865 39409
+27998 39408
+42402 39406
+25032 39399
+30219 39397
+20104 39395
+15977 39394
+30147 39393
+37149 39393
+10751 39388
+31356 39387
+36012 39380
+3627 39377
+16144 39374
+23392 39364
+33183 39360
+36050 39357
+27172 39351
+28257 39349
+38550 39345
+29253 39338
+28138 39320
+43170 39318
+36068 39318
+42966 39318
+35445 39317
+37810 39316
+22348 39315
+45459 39315
+46035 39312
+26792 39303
+24118 39300
+14473 39299
+45602 39298
+49807 39295
+29132 39292
+20968 39290
+25256 39286
+18060 39283
+30940 39281
+38443 39276
+30948 39268
+32133 39267
+25770 39263
+22950 39256
+30032 39256
+27673 39254
+47405 39250
+40684 39249
+35216 39245
+28447 39240
+12425 39240
+24906 39237
+31209 39234
+35091 39233
+44226 39232
+46637 39219
+27112 39216
+41871 39213
+25061 39213
+46414 39210
+27588 39209
+27218 39207
+30272 39205
+27715 39202
+27289 39202
+41040 39201
+29687 39200
+29449 39200
+43264 39198
+37246 39198
+22800 39197
+24254 39197
+7240 39196
+48026 39193
+43949 39193
+2878 39191
+27171 39188
+20245 39186
+33411 39183
+30501 39183
+26260 39182
+21917 39180
+32117 39172
+47099 39171
+24876 39170
+44708 39169
+25345 39169
+35711 39168
+36519 39165
+27970 39161
+35077 39159
+34328 39158
+47745 39156
+34451 39155
+39832 39152
+24940 39151
+25959 39150
+38648 39141
+17763 39141
+17525 39133
+36332 39131
+11485 39129
+16874 39128
+45546 39126
+27777 39119
+13974 39112
+29965 39111
+18047 39111
+25667 39111
+36339 39109
+31270 39109
+42578 39108
+46878 39108
+48343 39107
+28396 39104
+39865 39104
+37019 39102
+30904 39100
+2336 39100
+39399 39090
+25191 39089
+31111 39080
+26090 39076
+33334 39075
+44058 39073
+44940 39073
+8388 39070
+31427 39063
+29558 39061
+37294 39060
+37949 39057
+21009 39053
+42021 39050
+16409 39045
+40468 39043
+37721 39041
+24870 39039
+36280 39037
+28130 39034
+34741 39033
+40693 39027
+32926 39026
+50191 39025
+44880 39023
+35995 39022
+47750 39021
+32618 39017
+17764 39014
+13278 39009
+44175 39000
+21077 38999
+42201 38998
+34691 38996
+32789 38989
+41209 38985
+36146 38975
+14916 38973
+6311 38970
+50008 38970
+39263 38969
+21492 38968
+42340 38967
+24591 38965
+28520 38962
+28597 38951
+40440 38951
+27345 38949
+23326 38943
+11587 38943
+35184 38933
+28174 38926
+19631 38924
+44697 38924
+45447 38923
+31930 38921
+31557 38918
+34326 38914
+33866 38914
+20044 38912
+27680 38912
+43112 38908
+48049 38907
+25286 38897
+23451 38895
+40273 38893
+22278 38891
+39694 38890
+47959 38890
+49335 38889
+24772 38887
+41884 38886
+44091 38881
+23350 38878
+15720 38876
+33770 38876
+41264 38875
+16414 38873
+24726 38871
+8479 38865
+34611 38862
+40123 38862
+27203 38861
+25707 38859
+46337 38856
+43064 38856
+28936 38855
+36534 38852
+40817 38852
+39110 38851
+21895 38839
+45396 38839
+41342 38837
+44512 38829
+29512 38827
+27917 38827
+21858 38825
+49590 38822
+39477 38821
+20803 38816
+29213 38814
+43942 38807
+17771 38807
+20861 38805
+40905 38799
+11961 38798
+38379 38790
+27564 38790
+48563 38788
+24693 38787
+21153 38784
+28420 38783
+34894 38773
+23092 38771
+49524 38769
+34738 38767
+16099 38767
+37285 38766
+17469 38764
+24279 38760
+48622 38756
+22429 38755
+27009 38754
+47347 38749
+45699 38746
+30110 38740
+17250 38737
+46702 38733
+41370 38730
+48572 38727
+44770 38724
+43929 38714
+35038 38714
+17278 38711
+48274 38710
+6860 38704
+41414 38703
+28825 38699
+42375 38699
+14407 38698
+45429 38693
+41149 38689
+49847 38686
+23036 38685
+10067 38683
+22466 38683
+25785 38674
+34737 38671
+41063 38670
+33849 38667
+41883 38659
+36453 38657
+43032 38653
+35919 38652
+6773 38647
+2466 38644
+15366 38642
+20777 38640
+40268 38636
+40291 38635
+41516 38633
+19044 38633
+35524 38629
+32144 38628
+31805 38627
+38086 38626
+40270 38621
+36336 38621
+33387 38620
+17818 38619
+35800 38617
+34360 38615
+20772 38614
+9821 38611
+46288 38609
+22691 38600
+48422 38597
+25272 38591
+29645 38590
+41018 38589
+20711 38588
+24962 38587
+49510 38587
+47828 38586
+33520 38585
+14368 38577
+32261 38575
+20211 38575
+39198 38573
+46895 38569
+38889 38566
+32391 38562
+44666 38561
+47257 38554
+35710 38554
+47098 38553
+39013 38552
+37274 38544
+23483 38536
+28585 38532
+32109 38530
+46791 38526
+22813 38524
+35870 38516
+25782 38513
+46357 38512
+9271 38512
+45211 38510
+27033 38510
+28976 38498
+45248 38498
+33047 38498
+36380 38494
+47713 38493
+37086 38490
+30624 38490
+38745 38488
+30514 38487
+26611 38487
+2886 38486
+10873 38485
+43351 38484
+41562 38483
+42191 38481
+34841 38481
+45994 38474
+45712 38474
+22921 38473
+41567 38467
+28925 38464
+21577 38461
+21084 38459
+12331 38458
+4187 38456
+29133 38455
+26529 38455
+41013 38451
+48998 38450
+31475 38448
+22657 38445
+29551 38444
+47313 38442
+45769 38424
+38192 38424
+34688 38420
+42240 38414
+18483 38412
+34135 38410
+33488 38403
+31759 38398
+23571 38396
+36946 38395
+41929 38393
+40003 38393
+40878 38387
+34898 38380
+13155 38379
+18160 38378
+2711 38376
+27640 38374
+26485 38370
+41737 38366
+21759 38355
+27486 38353
+38612 38348
+11350 38348
+33591 38347
+28159 38346
+48486 38344
+25823 38344
+46657 38340
+21186 38338
+46312 38332
+29951 38331
+33065 38329
+43504 38328
+22302 38327
+11726 38323
+22622 38322
+35325 38320
+28204 38319
+32680 38318
+23460 38317
+35112 38316
+14964 38314
+34176 38313
+11878 38313
+48406 38312
+40037 38311
+12794 38311
+33048 38310
+30601 38310
+49844 38305
+40964 38304
+31081 38297
+16979 38294
+23402 38292
+17108 38291
+26284 38289
+46852 38285
+39153 38284
+12574 38284
+39180 38282
+27960 38281
+1753 38281
+28627 38280
+40798 38279
+37256 38278
+47982 38274
+45074 38274
+20643 38274
+28588 38272
+2529 38269
+24920 38267
+37624 38267
+9198 38265
+7391 38263
+49187 38262
+23463 38259
+44488 38253
+21797 38246
+26950 38245
+35174 38243
+12940 38229
+34920 38224
+28802 38217
+34641 38216
+44234 38214
+34464 38206
+21985 38199
+24845 38188
+37681 38185
+44767 38185
+38182 38184
+48106 38182
+16913 38182
+17610 38180
+24745 38174
+21610 38172
+30533 38169
+26423 38168
+9289 38166
+34425 38161
+29214 38160
+45342 38154
+14215 38152
+20876 38146
+38425 38145
+31342 38143
+32221 38140
+48566 38137
+32674 38136
+9249 38132
+46730 38132
+32357 38131
+3897 38129
+47022 38124
+20658 38124
+34332 38120
+35427 38120
+26660 38119
+13733 38118
+39580 38113
+32744 38109
+39574 38109
+49400 38108
+37995 38104
+26392 38101
+36919 38101
+37104 38097
+43955 38091
+31572 38090
+14974 38085
+20957 38085
+37630 38085
+2289 38083
+39146 38079
+28140 38078
+39589 38076
+33162 38065
+12409 38065
+29057 38064
+31773 38062
+34869 38059
+10179 38054
+21847 38048
+36539 38044
+24528 38043
+34089 38041
+40496 38041
+47130 38038
+47993 38036
+36817 38036
+38585 38035
+41119 38032
+30376 38031
+46942 38031
+41031 38028
+31359 38025
+48531 38025
+32877 38023
+28306 38023
+3029 38020
+29033 38017
+34948 38016
+44376 38015
+33592 38013
+23580 38012
+38520 38011
+35894 38006
+8014 38001
+39853 37999
+48843 37998
+28302 37993
+16718 37987
+32052 37986
+20911 37986
+47028 37978
+48731 37970
+21382 37967
+44495 37962
+17974 37958
+34298 37958
+6601 37958
+32201 37957
+39015 37956
+29649 37954
+13501 37951
+23437 37948
+32611 37938
+25840 37935
+39348 37935
+29068 37935
+25499 37933
+41861 37932
+27734 37931
+42761 37930
+8637 37929
+30989 37927
+37868 37926
+32547 37921
+18703 37920
+25778 37917
+37333 37911
+37646 37910
+36824 37907
+4435 37901
+39525 37900
+33155 37899
+44151 37890
+45156 37889
+34774 37885
+18069 37882
+35529 37877
+45916 37870
+32337 37866
+36459 37864
+27560 37862
+29538 37862
+30453 37860
+15511 37857
+20413 37857
+21241 37855
+21953 37854
+42320 37853
+26697 37852
+48102 37850
+46783 37847
+49252 37846
+12987 37840
+26983 37839
+16398 37839
+21537 37839
+36016 37837
+17680 37837
+13983 37836
+34441 37836
+37537 37835
+25171 37835
+44390 37829
+37643 37829
+41714 37826
+25870 37822
+24783 37819
+29526 37818
+14191 37818
+34987 37817
+24184 37817
+41320 37813
+18890 37808
+32900 37805
+39556 37800
+43793 37798
+28116 37795
+33508 37787
+32754 37787
+14487 37785
+38209 37779
+45223 37779
+45162 37778
+31018 37777
+20605 37775
+24245 37773
+20497 37773
+24612 37771
+44888 37767
+43503 37766
+49255 37765
+41680 37762
+25404 37762
+42110 37756
+47796 37754
+9652 37754
+22497 37740
+6743 37737
+33178 37735
+44904 37733
+29082 37732
+38853 37730
+23740 37724
+34505 37724
+24180 37715
+20856 37715
+22312 37711
+31227 37707
+20506 37704
+25322 37700
+28727 37699
+40585 37697
+35548 37696
+37611 37694
+45498 37692
+43307 37690
+40714 37688
+35557 37687
+14539 37686
+40249 37684
+17828 37684
+41664 37684
+49874 37684
+37980 37679
+14184 37678
+42101 37673
+26308 37671
+13603 37668
+34872 37666
+45846 37666
+48928 37665
+36575 37664
+28481 37662
+42743 37662
+38272 37662
+22119 37659
+50060 37658
+15803 37651
+18019 37650
+43783 37649
+42241 37647
+21728 37646
+39082 37626
+36234 37622
+40755 37614
+23653 37610
+44766 37607
+43744 37604
+8578 37601
+45054 37601
+31510 37600
+32338 37599
+834 37595
+30194 37591
+40593 37584
+24213 37583
+25149 37579
+48681 37575
+46016 37572
+27140 37567
+33509 37567
+44635 37562
+47825 37562
+38359 37561
+13689 37551
+32298 37549
+45237 37543
+24468 37539
+43709 37534
+9451 37532
+26872 37526
+22913 37525
+18601 37524
+19570 37521
+22163 37517
+34771 37517
+49727 37511
+40919 37510
+38306 37505
+22226 37503
+44507 37500
+33004 37499
+41236 37491
+47416 37490
+35168 37489
+15884 37488
+32156 37485
+21780 37484
+32161 37482
+15552 37479
+32493 37479
+39501 37479
+46313 37477
+13437 37473
+36689 37468
+32209 37464
+32299 37460
+29024 37458
+39114 37454
+26168 37454
+20264 37453
+36237 37450
+35598 37448
+40547 37444
+12905 37430
+38060 37421
+20943 37420
+47739 37419
+5020 37415
+24991 37414
+26987 37412
+34039 37411
+25083 37408
+29270 37408
+22648 37399
+39484 37399
+22991 37397
+36238 37392
+40868 37391
+16284 37386
+5841 37383
+40327 37382
+38127 37380
+35688 37374
+35996 37373
+15042 37371
+22324 37369
+24222 37366
+41268 37365
+24275 37365
+33036 37362
+30692 37362
+32012 37362
+48306 37361
+44827 37356
+26705 37354
+44331 37353
+36322 37351
+29555 37350
+16598 37342
+47055 37339
+33735 37339
+18937 37337
+35761 37335
+26045 37333
+13732 37328
+25648 37325
+38876 37325
+36097 37322
+15445 37321
+47062 37318
+27277 37318
+30788 37316
+11413 37316
+27522 37314
+14577 37310
+34594 37309
+42346 37304
+36478 37303
+42264 37302
+29157 37302
+31300 37302
+36340 37299
+35723 37296
+49789 37293
+7434 37292
+20855 37288
+40410 37287
+44368 37286
+45075 37283
+44049 37282
+28740 37279
+31287 37278
+17529 37277
+17138 37271
+40170 37271
+26448 37263
+30685 37263
+22419 37258
+41846 37255
+33966 37253
+42951 37251
+46675 37251
+26979 37249
+50071 37249
+24839 37248
+34378 37244
+25399 37238
+37088 37238
+27255 37237
+47584 37236
+30499 37235
+10572 37235
+18173 37231
+28647 37231
+3289 37225
+4110 37224
+39813 37220
+16802 37217
+26912 37216
+36349 37214
+43227 37212
+47039 37210
+37306 37209
+10877 37205
+29318 37205
+37901 37204
+5111 37204
+16087 37202
+40953 37199
+29807 37198
+45143 37198
+49417 37193
+36412 37190
+43369 37190
+40152 37176
+21839 37167
+41917 37166
+17618 37151
+19385 37151
+44388 37148
+34401 37143
+49558 37140
+18209 37140
+47704 37139
+32593 37136
+35148 37132
+38439 37126
+43386 37115
+38884 37115
+25095 37111
+28164 37104
+22059 37103
+3276 37102
+36035 37102
+20482 37101
+46043 37100
+47203 37089
+8316 37089
+32817 37087
+35826 37081
+26787 37078
+48629 37077
+27656 37075
+46466 37075
+15876 37064
+38975 37064
+39973 37064
+33492 37060
+43222 37058
+37220 37058
+47566 37049
+42418 37046
+42515 37043
+20599 37042
+34289 37042
+37526 37041
+43497 37041
+36179 37039
+47248 37034
+47767 37032
+19944 37027
+33438 37026
+33840 37025
+12726 37025
+39931 37024
+21633 37020
+40991 37016
+28522 37014
+40796 37012
+25796 37008
+35737 37003
+34494 37001
+22728 36999
+29992 36997
+47494 36996
+34387 36992
+45782 36990
+31897 36990
+31485 36981
+13284 36981
+39207 36980
+12143 36980
+44232 36977
+37873 36975
+46105 36973
+25161 36970
+23211 36968
+39851 36967
+18747 36964
+41315 36964
+32455 36960
+21013 36959
+30638 36957
+13857 36955
+29587 36954
+35052 36953
+43751 36953
+28017 36952
+24678 36948
+18426 36944
+45927 36942
+20053 36940
+21303 36938
+24624 36937
+31243 36936
+15746 36935
+34063 36935
+45161 36932
+14055 36932
+28200 36929
+37064 36926
+25369 36924
+42851 36921
+45043 36915
+46696 36908
+26108 36908
+24603 36907
+8214 36907
+27149 36904
+42252 36900
+19737 36893
+24974 36890
+41039 36886
+42664 36885
+17943 36885
+30332 36885
+5925 36880
+27207 36880
+17024 36878
+26972 36871
+6730 36869
+32008 36867
+20961 36862
+21217 36860
+47631 36849
+43186 36848
+42017 36846
+15826 36843
+5723 36842
+31169 36838
+27057 36838
+37192 36829
+29968 36829
+20427 36826
+18910 36823
+33811 36822
+48179 36822
+27402 36822
+36599 36817
+43203 36811
+32231 36808
+32532 36804
+29349 36804
+16725 36803
+47496 36802
+36860 36795
+38669 36791
+45497 36785
+4234 36784
+23464 36776
+41400 36770
+28071 36761
+16286 36760
+47460 36759
+26086 36753
+36838 36750
+38221 36748
+44257 36741
+16198 36730
+10632 36727
+29598 36725
+32107 36725
+31766 36720
+38051 36719
+39454 36717
+45995 36715
+19689 36713
+45948 36711
+28788 36710
+36844 36708
+2757 36708
+21981 36706
+49230 36704
+22988 36703
+18457 36701
+37409 36700
+22279 36698
+41521 36691
+38722 36691
+36973 36682
+35867 36678
+32626 36677
+44584 36674
+32651 36674
+20459 36673
+47888 36672
+35458 36671
+34716 36669
+19146 36665
+45609 36664
+29763 36659
+43206 36655
+44725 36655
+31503 36650
+40381 36649
+16234 36647
+42926 36646
+41693 36645
+19803 36640
+33862 36639
+21902 36630
+33603 36629
+35356 36625
+49606 36620
+48373 36619
+10410 36617
+49890 36612
+39635 36611
+9710 36610
+38323 36610
+33109 36610
+25285 36609
+11407 36606
+38386 36599
+48261 36598
+47387 36594
+33928 36591
+25622 36591
+17822 36591
+21281 36589
+46525 36588
+25946 36587
+49581 36580
+34834 36579
+10180 36579
+33560 36576
+43562 36576
+20141 36569
+45386 36569
+44275 36564
+44098 36563
+16485 36560
+36901 36559
+7783 36557
+25418 36556
+33266 36551
+35139 36546
+24452 36542
+37087 36538
+24875 36538
+27800 36537
+34711 36537
+28857 36534
+26407 36533
+23020 36533
+29286 36531
+5113 36529
+30734 36525
+24770 36523
+41554 36520
+10879 36518
+28728 36517
+16928 36514
+21202 36503
+32172 36503
+34538 36501
+38961 36499
+36344 36496
+41539 36494
+49414 36493
+27982 36491
+34266 36483
+35218 36480
+32708 36474
+30425 36468
+34503 36467
+30463 36464
+45535 36457
+31029 36450
+37543 36449
+22620 36449
+24970 36448
+37454 36447
+46407 36444
+34109 36442
+27372 36441
+7182 36439
+35353 36438
+39287 36436
+28868 36435
+39338 36433
+29134 36428
+18941 36425
+44094 36418
+25653 36415
+33781 36413
+42639 36412
+11269 36410
+46711 36403
+27563 36401
+9394 36398
+37062 36397
+33138 36395
+29561 36386
+31925 36377
+47339 36364
+25278 36362
+31521 36360
+16770 36356
+38265 36356
+35137 36352
+35714 36346
+22530 36342
+31858 36341
+13862 36341
+47108 36340
+10775 36334
+24943 36333
+41579 36330
+42910 36328
+946 36325
+40007 36319
+45764 36316
+35161 36316
+40140 36308
+40737 36289
+39190 36287
+43115 36287
+13712 36286
+45503 36280
+26829 36280
+48786 36269
+47223 36267
+30991 36264
+9407 36260
+13928 36258
+28907 36255
+37355 36254
+27785 36251
+32698 36248
+32037 36243
+47640 36241
+36958 36238
+19967 36234
+38078 36231
+33015 36231
+16071 36226
+27653 36222
+23565 36222
+46467 36222
+42362 36221
+29429 36220
+47260 36217
+33408 36216
+29801 36215
+23318 36214
+29728 36213
+31738 36212
+22396 36208
+35015 36203
+8003 36198
+47805 36195
+16266 36193
+35899 36184
+46619 36182
+27967 36177
+19795 36173
+45062 36165
+28979 36163
+46548 36163
+16751 36161
+24149 36159
+35028 36155
+27896 36154
+41710 36150
+40857 36147
+38036 36146
+32950 36142
+34581 36135
+28817 36130
+48215 36128
+44967 36127
+41247 36127
+36082 36123
+42447 36116
+46208 36113
+13080 36113
+49777 36112
+11656 36112
+28301 36109
+9534 36108
+35863 36104
+43627 36102
+25382 36102
+37814 36095
+49547 36094
+31188 36089
+31753 36089
+20702 36087
+26915 36082
+46062 36080
+18776 36079
+26492 36077
+31093 36075
+42046 36069
+36666 36067
+38046 36066
+47453 36065
+35519 36064
+40757 36064
+44001 36063
+19583 36062
+41754 36061
+15047 36061
+37115 36061
+6744 36058
+46769 36055
+25998 36053
+48803 36049
+39239 36047
+14078 36047
+16942 36045
+28974 36044
+49162 36042
+40071 36039
+46656 36039
+1608 36037
+28896 36036
+44875 36035
+15640 36035
+24911 36031
+42028 36031
+48776 36031
+38200 36029
+42941 36022
+18055 36021
+42882 36014
+37185 36014
+20930 36012
+33027 36010
+8745 36003
+29008 36002
+47478 36002
+48667 36002
+44611 36000
+34993 35998
+27889 35998
+44823 35994
+42391 35988
+32855 35987
+16233 35985
+43840 35983
+35484 35982
+27760 35976
+33289 35970
+9590 35970
+38304 35969
+19963 35962
+50142 35961
+48357 35955
+40697 35955
+48450 35952
+28953 35946
+48574 35944
+42231 35942
+49879 35942
+38185 35931
+25022 35930
+15127 35929
+22713 35923
+49445 35922
+21360 35921
+29099 35917
+36527 35910
+43960 35909
+49363 35905
+20205 35904
+37223 35903
+32755 35900
+8413 35899
+49772 35896
+45484 35895
+37125 35895
+13313 35894
+47268 35893
+42292 35892
+46292 35887
+35531 35880
+42870 35880
+17230 35878
+27081 35878
+26682 35876
+42972 35876
+2699 35875
+27533 35875
+33499 35873
+24856 35867
+24305 35866
+21227 35865
+41214 35864
+15681 35860
+34232 35858
+42900 35856
+30005 35850
+34080 35849
+42594 35847
+28592 35843
+39226 35843
+44571 35842
+41528 35833
+26993 35830
+35431 35828
+23835 35828
+26598 35820
+31412 35805
+41840 35803
+46658 35802
+14231 35801
+33982 35794
+23881 35794
+15048 35791
+8926 35791
+27040 35789
+31170 35786
+44581 35779
+45457 35779
+39674 35779
+32227 35778
+25153 35774
+2880 35769
+7704 35767
+23468 35762
+23971 35758
+47083 35754
+42517 35751
+17303 35748
+36102 35744
+30741 35743
+47401 35734
+43185 35725
+30727 35722
+32568 35720
+44907 35720
+9982 35713
+40761 35712
+28096 35711
+29976 35704
+39129 35700
+10886 35696
+34923 35692
+42462 35690
+32274 35688
+19085 35678
+19926 35674
+28430 35673
+17287 35666
+49684 35665
+35021 35663
+28297 35658
+43388 35658
+27659 35654
+37571 35653
+26968 35653
+40120 35653
+43131 35653
+49909 35652
+16990 35649
+15484 35647
+49334 35646
+41108 35642
+37413 35642
+41685 35640
+38598 35635
+30615 35635
+27249 35635
+31730 35634
+15204 35633
+12646 35631
+40126 35629
+42927 35628
+46635 35627
+41254 35626
+30528 35620
+39539 35620
+27019 35616
+11794 35615
+41226 35610
+41125 35610
+31814 35609
+34756 35609
+18567 35607
+49009 35603
+47664 35603
+31211 35600
+41854 35597
+28843 35595
+3693 35594
+31716 35591
+21288 35591
+24877 35586
+46242 35583
+44989 35583
+41123 35582
+50176 35577
+44355 35575
+43342 35575
+35545 35574
+23836 35571
+39863 35570
+13911 35570
+18943 35558
+34689 35558
+26268 35552
+45240 35547
+38991 35547
+5148 35544
+46814 35535
+3396 35534
+42370 35530
+28676 35528
+29954 35523
+22240 35522
+30483 35522
+48525 35509
+39081 35508
+29818 35499
+46829 35499
+48202 35493
+42088 35491
+32112 35488
+36079 35487
+48383 35487
+37111 35486
+10612 35485
+9614 35485
+36668 35481
+32549 35481
+37210 35480
+14016 35477
+42423 35477
+22860 35475
+48986 35473
+7094 35473
+27158 35471
+28922 35468
+34253 35460
+38791 35458
+33689 35457
+47868 35448
+40054 35446
+30835 35446
+35960 35441
+31010 35438
+11869 35437
+16017 35432
+46715 35430
+23833 35430
+21495 35430
+22606 35428
+40608 35427
+25086 35427
+25341 35425
+9213 35424
+38227 35424
+40450 35418
+28640 35418
+26273 35416
+20735 35411
+29500 35409
+46631 35407
+24575 35399
+40070 35397
+39384 35396
+31192 35395
+42106 35393
+25540 35390
+16451 35387
+49935 35380
+10722 35379
+46626 35371
+7717 35370
+13383 35370
+33148 35364
+47476 35363
+21170 35360
+38912 35356
+42508 35356
+46909 35350
+38190 35349
+40300 35348
+35704 35345
+43978 35343
+20405 35335
+25437 35334
+24724 35332
+26246 35330
+11488 35329
+18893 35328
+27392 35326
+14312 35325
+46534 35325
+20315 35325
+45735 35322
+24242 35319
+45405 35317
+33083 35316
+28850 35315
+48748 35314
+6704 35312
+47163 35312
+33016 35310
+18795 35307
+39741 35305
+33960 35302
+48221 35300
+32779 35299
+35005 35295
+27189 35294
+11371 35292
+34035 35284
+20987 35280
+43511 35277
+14647 35269
+34690 35268
+48358 35267
+35643 35266
+26313 35264
+34036 35264
+45350 35263
+48420 35263
+46348 35261
+34785 35258
+43958 35257
+30248 35252
+39885 35252
+25089 35251
+15496 35245
+27797 35240
+26276 35239
+6145 35233
+38848 35230
+31102 35227
+32171 35225
+40078 35223
+42825 35219
+49654 35218
+48656 35218
+15148 35216
+29087 35211
+49679 35210
+6812 35205
+40831 35205
+36972 35201
+42741 35189
+40515 35184
+31649 35174
+6760 35169
+40395 35168
+24820 35166
+25511 35165
+10355 35164
+48636 35163
+13295 35163
+34670 35161
+40589 35155
+23042 35154
+30267 35154
+7154 35154
+36446 35144
+40446 35139
+44565 35137
+21019 35128
+33211 35128
+49530 35126
+27339 35122
+32786 35117
+38238 35113
+28048 35113
+42002 35109
+36738 35109
+23144 35107
+38267 35105
+42339 35105
+33416 35105
+32575 35104
+44219 35104
+8928 35102
+25269 35097
+6433 35096
+49575 35089
+40326 35089
+36014 35087
+2762 35080
+29069 35075
+48347 35074
+37733 35074
+25898 35071
+34341 35070
+32653 35065
+43837 35062
+39155 35059
+23912 35058
+17018 35051
+11385 35051
+29204 35045
+29174 35043
+44981 35040
+39496 35037
+42836 35032
+31009 35026
+31566 35026
+32142 35020
+32996 35020
+34658 35017
+20203 35016
+45098 35010
+12315 35005
+12611 35003
+21018 34996
+4733 34995
+36057 34994
+3168 34984
+42607 34983
+25991 34979
+42422 34979
+31947 34976
+46652 34975
+44590 34974
+39797 34972
+40871 34969
+44810 34969
+19203 34962
+26551 34961
+41795 34952
+30802 34949
+44306 34948
+41586 34939
+29096 34936
+36564 34935
+33341 34931
+38006 34931
+41918 34931
+43716 34931
+45315 34930
+38135 34929
+42988 34929
+37041 34928
+29322 34914
+38373 34914
+20810 34913
+40115 34913
+32886 34910
+41699 34908
+45652 34907
+34859 34905
+39389 34901
+21881 34898
+14385 34898
+32143 34897
+45085 34897
+13265 34896
+45017 34895
+39233 34893
+45172 34892
+38966 34888
+27910 34888
+29122 34887
+39740 34880
+30505 34877
+37212 34877
+32185 34869
+16520 34868
+41231 34865
+33121 34865
+34721 34860
+17190 34857
+34730 34856
+41747 34856
+28684 34850
+40294 34847
+26525 34838
+32129 34834
+47421 34829
+38842 34825
+4655 34821
+45407 34816
+24100 34815
+30971 34811
+31166 34808
+46964 34803
+39439 34801
+33475 34799
+42022 34799
+43853 34797
+44806 34796
+26895 34794
+17309 34794
+21928 34793
+28872 34793
+30707 34790
+34278 34788
+47450 34785
+32621 34780
+31476 34778
+29298 34770
+46881 34770
+45502 34767
+38008 34766
+45525 34763
+12792 34761
+46622 34759
+44643 34757
+23123 34753
+26381 34752
+36209 34744
+24750 34742
+23644 34740
+30373 34739
+41855 34738
+6471 34737
+31397 34737
+41478 34736
+26146 34736
+24736 34733
+43189 34733
+21524 34731
+38621 34728
+36932 34727
+29448 34726
+12679 34717
+47193 34711
+49278 34711
+20641 34710
+17171 34709
+46891 34701
+23219 34700
+31135 34696
+39229 34694
+31607 34694
+47027 34690
+19559 34686
+24917 34683
+33607 34679
+35556 34679
+29552 34676
+47495 34674
+46017 34673
+24043 34668
+27676 34664
+31778 34656
+6604 34655
+5459 34649
+43507 34645
+49791 34645
+30805 34643
+29903 34641
+33301 34640
+6406 34638
+35131 34630
+42793 34628
+41186 34627
+38731 34627
+23924 34626
+41157 34625
+49304 34622
+27531 34622
+40760 34620
+8232 34617
+50136 34615
+18072 34615
+39793 34608
+22356 34606
+32051 34603
+9668 34602
+23998 34602
+40716 34601
+36177 34597
+40137 34595
+25536 34594
+44182 34592
+37661 34591
+41431 34590
+25400 34588
+46230 34584
+30199 34583
+39322 34583
+30584 34582
+36135 34580
+49827 34580
+31786 34580
+27159 34579
+12893 34579
+28439 34573
+41654 34564
+49600 34559
+26675 34558
+32016 34558
+32686 34557
+48598 34556
+22409 34556
+38206 34551
+23552 34550
+12621 34549
+29275 34547
+24155 34545
+25822 34541
+30361 34540
+47428 34538
+47610 34538
+46709 34535
+44845 34534
+42767 34533
+45334 34533
+32545 34530
+39460 34529
+42727 34528
+32375 34528
+34484 34522
+40131 34521
+43051 34519
+27920 34516
+35569 34513
+34382 34513
+30780 34506
+26303 34504
+28881 34499
+32378 34493
+36033 34486
+24895 34484
+32759 34479
+21090 34473
+30155 34472
+46549 34472
+45226 34469
+36043 34468
+49817 34468
+37782 34466
+35483 34466
+22754 34465
+42205 34464
+19543 34460
+20341 34457
+48537 34457
+29129 34456
+28782 34455
+5376 34452
+13425 34451
+17684 34448
+13907 34447
+49119 34445
+47434 34442
+50128 34441
+31514 34440
+28362 34428
+47229 34422
+34532 34419
+23496 34419
+48596 34418
+28692 34416
+45918 34414
+30435 34413
+39326 34409
+36006 34402
+14940 34402
+36105 34400
+18667 34398
+33040 34395
+32386 34395
+23338 34392
+40487 34390
+30148 34389
+30616 34385
+34057 34381
+41250 34380
+30338 34373
+36367 34371
+29923 34370
+18007 34369
+21901 34368
+19336 34366
+41321 34366
+23079 34364
+16819 34362
+21897 34362
+39981 34360
+8731 34357
+40309 34352
+24732 34352
+13484 34350
+48985 34350
+21132 34348
+33149 34348
+44474 34347
+34452 34347
+42879 34346
+32591 34345
+21466 34343
+45698 34338
+41393 34335
+29528 34332
+23222 34331
+24665 34329
+24792 34324
+34418 34321
+27988 34317
+17516 34305
+39191 34289
+46848 34288
+39838 34287
+48180 34286
+23573 34286
+34568 34280
+29215 34276
+18554 34275
+33620 34274
+47620 34273
+30749 34271
+30804 34270
+47662 34268
+42566 34261
+41549 34253
+16488 34252
+30121 34251
+32561 34247
+25663 34246
+43527 34243
+36819 34242
+43067 34237
+21328 34233
+33451 34231
+26309 34230
+44995 34227
+27359 34226
+31924 34225
+31667 34223
+30806 34220
+39152 34220
+32781 34219
+48586 34217
+12021 34216
+30560 34214
+9043 34211
+21557 34203
+32820 34198
+45972 34193
+31082 34192
+19453 34188
+27860 34185
+8619 34185
+49243 34184
+49840 34181
+12745 34180
+27641 34180
+15132 34173
+43079 34172
+26864 34170
+32861 34170
+48118 34167
+14127 34166
+45436 34164
+44674 34159
+46913 34159
+36968 34157
+30458 34152
+8134 34149
+35162 34147
+19366 34144
+23874 34139
+33443 34136
+24525 34136
+40830 34124
+26854 34124
+34864 34124
+20146 34122
+17744 34115
+45138 34114
+49708 34113
+28736 34104
+45655 34101
+25312 34099
+25344 34097
+40565 34092
+45241 34086
+32191 34086
+5590 34083
+28065 34079
+40834 34073
+49248 34072
+22544 34071
+15856 34066
+45275 34064
+43802 34061
+30639 34056
+48833 34053
+44244 34050
+40930 34048
+33456 34044
+38347 34041
+33282 34041
+11785 34041
+49792 34034
+45997 34030
+33776 34029
+21482 34029
+48521 34026
+46517 34026
+37407 34020
+34887 34017
+11361 34017
+32292 34016
+49911 34012
+33688 34010
+32192 34010
+38923 34007
+34826 34006
+16884 33999
+25378 33998
+44645 33991
+26235 33991
+19588 33983
+37052 33979
+35408 33978
+47146 33976
+16768 33975
+5131 33974
+47367 33968
+47165 33965
+34011 33964
+40473 33963
+33535 33962
+34194 33959
+43865 33958
+34493 33957
+46196 33952
+38130 33952
+24552 33950
+34449 33949
+41237 33949
+49868 33942
+15479 33940
+10851 33939
+15004 33939
+32118 33936
+19452 33930
+39680 33926
+49699 33922
+36021 33922
+29167 33914
+10661 33914
+49000 33912
+5784 33912
+36531 33902
+37375 33900
+14681 33900
+25899 33899
+46317 33899
+45168 33898
+6501 33898
+14755 33897
+25239 33890
+28484 33889
+48696 33888
+31279 33888
+40121 33887
+45179 33883
+45954 33880
+13867 33879
+24367 33879
+24202 33877
+41160 33873
+15717 33868
+49099 33867
+4212 33865
+47742 33861
+35820 33858
+7774 33856
+27012 33854
+47761 33854
+44505 33853
+39565 33850
+27030 33848
+44580 33848
+40813 33848
+44283 33847
+46390 33844
+33930 33842
+18417 33842
+41475 33835
+45061 33834
+27018 33833
+35176 33832
+42959 33824
+37252 33824
+37970 33822
+30673 33822
+22088 33821
+44240 33821
+34230 33821
+40210 33817
+40494 33812
+49831 33809
+39388 33804
+46524 33795
+47550 33795
+18674 33790
+45365 33786
+8555 33783
+26160 33780
+44428 33780
+41016 33779
+30403 33778
+36449 33772
+21111 33770
+26569 33767
+40949 33765
+37183 33759
+29626 33758
+43332 33758
+42893 33757
+4882 33755
+22996 33752
+41956 33751
+46706 33751
+10034 33748
+39579 33748
+46307 33745
+27696 33744
+47887 33741
+48878 33738
+34192 33731
+30936 33717
+29197 33716
+45939 33712
+20504 33710
+42874 33710
+39993 33709
+43818 33706
+34423 33705
+43553 33703
+38156 33702
+31760 33701
+47866 33689
+32258 33685
+30558 33682
+39642 33680
+32838 33679
+36202 33679
+44005 33677
+37594 33673
+5431 33673
+34396 33664
+31834 33662
+19535 33660
+36535 33654
+30222 33647
+37933 33646
+41350 33645
+19512 33643
+37397 33640
+47810 33639
+9483 33638
+31677 33631
+25839 33628
+27729 33626
+42807 33620
+40237 33620
+36585 33615
+36923 33615
+46646 33605
+16037 33592
+25700 33590
+22239 33583
+21034 33580
+30318 33577
+35086 33577
+38341 33571
+25383 33570
+38050 33570
+27757 33567
+42265 33566
+40722 33565
+38584 33564
+42099 33561
+45306 33560
+4468 33552
+29794 33550
+32551 33549
+39425 33548
+36466 33543
+35640 33540
+41259 33537
+28681 33535
+25353 33533
+30368 33531
+34125 33526
+34925 33525
+45642 33525
+49032 33524
+48977 33518
+19067 33516
+33397 33514
+34846 33514
+34322 33512
+45313 33511
+27503 33509
+42688 33502
+21863 33502
+33046 33498
+40150 33484
+34511 33479
+28460 33476
+33057 33475
+24765 33473
+31914 33473
+37080 33472
+38869 33472
+21944 33470
+34065 33468
+24733 33466
+32709 33465
+23291 33464
+37881 33463
+36223 33462
+34412 33459
+11484 33459
+42444 33458
+33671 33457
+49384 33454
+26028 33453
+41421 33448
+34918 33447
+10772 33446
+32565 33443
+15877 33442
+16973 33442
+42129 33441
+36218 33440
+49142 33438
+28543 33437
+17911 33437
+41751 33436
+47105 33435
+43210 33433
+10646 33432
+45832 33414
+46737 33408
+37353 33405
+36845 33405
+17717 33405
+41001 33405
+33287 33403
+40174 33399
+38090 33398
+9895 33397
+44371 33395
+35883 33391
+38397 33390
+16539 33381
+23379 33372
+45950 33372
+48595 33371
+28115 33369
+36794 33368
+36736 33364
+27385 33364
+35516 33361
+29477 33360
+28977 33358
+32329 33357
+38327 33357
+14596 33356
+40102 33353
+44364 33346
+34514 33345
+33055 33343
+28440 33341
+48519 33338
+24674 33335
+27450 33332
+22262 33330
+48240 33325
+7693 33325
+38123 33324
+27665 33323
+30070 33322
+35872 33322
+28149 33321
+42262 33319
+46961 33315
+47433 33314
+23035 33311
+10881 33308
+43880 33307
+47591 33307
+19393 33307
+15672 33301
+14152 33298
+12327 33297
+46174 33296
+33327 33296
+43165 33292
+33537 33292
+40229 33290
+44735 33287
+41109 33280
+40534 33277
+49364 33277
+42913 33274
+47002 33273
+18254 33270
+10440 33263
+43672 33260
+29606 33260
+19807 33258
+33166 33256
+14970 33253
+49859 33251
+25590 33251
+20218 33250
+42968 33250
+7033 33249
+39976 33248
+37024 33248
+24285 33242
+35225 33241
+35527 33241
+46225 33240
+40508 33237
+49291 33237
+39486 33235
+26677 33233
+30798 33229
+41623 33228
+36825 33225
+29201 33223
+34318 33223
+41117 33219
+43344 33219
+23863 33217
+33304 33216
+28208 33214
+38751 33212
+36893 33209
+44277 33207
+34157 33207
+18934 33204
+42361 33204
+21209 33201
+44619 33195
+29077 33193
+45324 33193
+24133 33191
+20548 33189
+40625 33187
+43852 33186
+32057 33186
+47729 33178
+38677 33177
+16000 33176
+37754 33176
+49838 33175
+31553 33170
+22735 33165
+13438 33163
+19428 33162
+32841 33161
+46382 33161
+23340 33158
+21449 33156
+33626 33155
+35358 33152
+39300 33151
+46401 33149
+13828 33147
+41921 33147
+26097 33146
+40896 33144
+18430 33141
+21484 33141
+37502 33139
+20451 33139
+26531 33138
+28290 33137
+47691 33135
+46226 33131
+13935 33126
+27303 33124
+26230 33124
+32297 33121
+35491 33118
+49910 33115
+29001 33112
+34277 33110
+50100 33109
+41907 33108
+3997 33107
+42255 33104
+8820 33102
+41975 33101
+20087 33096
+26069 33092
+25873 33088
+36032 33087
+47917 33086
+45010 33082
+19596 33082
+33774 33080
+22794 33077
+49178 33077
+24863 33075
+42598 33072
+7843 33070
+41558 33064
+39989 33064
+49349 33063
+39966 33061
+30100 33059
+38348 33054
+43044 33049
+35461 33044
+30799 33043
+41802 33041
+27626 33039
+41879 33038
+26552 33038
+26200 33036
+47645 33036
+27961 33036
+28072 33034
+30290 33032
+29607 33023
+32563 33022
+27706 33021
+30703 33019
+30742 33010
+42647 33006
+14094 33006
+26845 33001
+38486 33000
+39351 32999
+43360 32998
+46243 32998
+44716 32993
+47222 32991
+33679 32989
+44937 32986
+35725 32982
+36638 32982
+2707 32979
+44462 32977
+40995 32977
+31675 32976
+26970 32967
+22340 32967
+40883 32966
+42491 32964
+21222 32960
+30689 32952
+43494 32950
+31722 32945
+45550 32944
+37844 32942
+45779 32939
+38749 32938
+43088 32936
+20028 32934
+32349 32933
+7775 32931
+34115 32929
+43635 32926
+44304 32923
+4192 32923
+42617 32923
+24613 32919
+42146 32919
+30181 32919
+17605 32915
+44712 32915
+31511 32908
+48145 32906
+34394 32906
+34942 32904
+37255 32902
+33754 32897
+33106 32895
+44720 32895
+38042 32895
+29061 32894
+17037 32890
+44861 32887
+28734 32885
+6984 32878
+44377 32872
+45451 32865
+43567 32865
+17204 32865
+43133 32864
+19220 32864
+13896 32862
+38573 32861
+44656 32853
+29584 32853
+33690 32852
+33712 32852
+27913 32852
+43598 32849
+28287 32848
+30896 32845
+42659 32844
+22011 32844
+28562 32843
+36115 32843
+49573 32838
+13451 32833
+18484 32833
+49559 32825
+37792 32824
+28877 32823
+37807 32821
+44142 32820
+47782 32817
+25009 32812
+8017 32810
+42633 32809
+31921 32808
+49331 32807
+17980 32805
+23830 32799
+34868 32796
+19998 32793
+25042 32792
+35858 32792
+37107 32791
+27783 32789
+24595 32784
+40968 32782
+45360 32780
+46392 32776
+43279 32773
+28468 32772
+40416 32770
+15214 32769
+31900 32765
+47958 32764
+17954 32763
+49237 32758
+42200 32751
+24192 32751
+22357 32750
+32409 32750
+36222 32749
+20904 32745
+5893 32745
+31899 32744
+6913 32742
+33390 32740
+41038 32739
+43560 32737
+41122 32732
+35333 32730
+20429 32730
+45041 32729
+9838 32729
+46806 32726
+22284 32724
+37398 32723
+31707 32723
+37202 32722
+41210 32717
+14334 32716
+48743 32715
+42755 32714
+27714 32713
+35108 32712
+31640 32712
+31550 32711
+48829 32711
+5993 32703
+46843 32701
+43107 32698
+13710 32698
+39959 32696
+35634 32692
+26150 32690
+42377 32690
+21560 32689
+39395 32686
+34833 32684
+40597 32683
+32525 32681
+11595 32680
+40017 32679
+6146 32677
+27424 32677
+30429 32677
+26948 32675
+32764 32674
+28327 32670
+36001 32669
+41674 32668
+49725 32666
+8380 32665
+29251 32661
+30766 32660
+11750 32656
+8521 32652
+48329 32647
+42128 32647
+34598 32643
+33504 32642
+32303 32640
+10196 32640
+34465 32636
+28557 32635
+18528 32634
+3293 32630
+45558 32630
+36765 32629
+36187 32629
+4080 32627
+32220 32625
+27781 32620
+36465 32613
+40191 32612
+28221 32602
+46188 32598
+41154 32598
+49245 32597
+29441 32597
+2467 32596
+1696 32595
+46419 32587
+3171 32585
+42859 32579
+35290 32577
+27652 32573
+39430 32573
+33169 32569
+37785 32565
+31084 32565
+36396 32562
+47019 32559
+24700 32558
+10847 32555
+23196 32551
+40501 32548
+39541 32545
+34563 32542
+48715 32541
+36318 32540
+33298 32539
+47320 32539
+34409 32539
+30127 32538
+30995 32537
+38691 32535
+48704 32533
+802 32530
+41418 32529
+16177 32526
+25855 32526
+26231 32525
+35857 32522
+19457 32522
+30123 32521
+35599 32521
+45802 32521
+35084 32519
+30434 32518
+43136 32515
+47948 32514
+47115 32513
+26342 32510
+18746 32509
+50102 32504
+39474 32502
+44193 32497
+27222 32497
+9311 32497
+8899 32494
+2027 32494
+38985 32491
+23364 32486
+8317 32483
+47119 32482
+4416 32477
+37312 32475
+3243 32475
+21181 32468
+42513 32468
+27342 32468
+36155 32468
+36731 32467
+40527 32467
+47820 32464
+46624 32460
+37433 32459
+46959 32451
+32389 32450
+13636 32449
+35669 32446
+15156 32445
+12773 32444
+36841 32444
+35563 32441
+39577 32433
+28062 32427
+49038 32426
+29854 32418
+47008 32417
+34751 32413
+48621 32412
+40525 32411
+27212 32409
+47054 32406
+45120 32403
+21551 32403
+49198 32398
+47619 32397
+14781 32397
+43082 32396
+50185 32395
+26265 32395
+33440 32391
+34275 32385
+47386 32379
+41428 32378
+35959 32377
+45889 32375
+34323 32372
+39599 32370
+41750 32368
+46991 32365
+40536 32363
+41527 32361
+39937 32361
+32282 32356
+35931 32353
+8569 32352
+11714 32346
+39804 32337
+5463 32336
+31496 32333
+33389 32333
+34757 32330
+21184 32330
+32637 32325
+40836 32323
+6863 32321
+46400 32320
+35322 32315
+43091 32309
+27287 32302
+42844 32297
+38515 32293
+24885 32293
+30832 32291
+40535 32290
+35352 32289
+38010 32288
+31883 32285
+32446 32283
+30824 32275
+36947 32275
+48363 32273
+38441 32273
+25818 32259
+36331 32253
+27728 32250
+41961 32248
+30869 32246
+40180 32244
+36855 32239
+37609 32238
+40529 32237
+30733 32237
+29930 32235
+42278 32235
+42414 32235
+24438 32233
+40409 32227
+34357 32224
+18731 32224
+41996 32224
+46345 32221
+40028 32219
+37110 32217
+33604 32217
+38945 32214
+11906 32214
+44262 32213
+42798 32212
+17801 32211
+32850 32211
+48910 32206
+28230 32205
+35880 32205
+35389 32203
+50099 32200
+35054 32196
+47749 32193
+47968 32193
+46945 32191
+28981 32190
+33267 32187
+34791 32176
+34071 32169
+33785 32167
+27453 32167
+11036 32166
+38424 32164
+22696 32164
+38436 32164
+47543 32163
+40699 32158
+46908 32158
+33064 32156
+12307 32156
+8988 32154
+29564 32152
+37700 32146
+36711 32145
+29855 32133
+13330 32132
+30291 32132
+12211 32132
+29813 32130
+20910 32121
+35897 32120
+19825 32119
+18998 32116
+29509 32113
+9381 32113
+45665 32108
+26286 32105
+30819 32104
+34114 32103
+14428 32102
+48455 32099
+12288 32098
+19540 32095
+39713 32092
+37417 32091
+43638 32090
+44553 32089
+46420 32089
+37109 32088
+37679 32087
+23775 32080
+34977 32079
+13625 32077
+19728 32075
+26583 32074
+29236 32073
+32522 32069
+18211 32065
+32638 32063
+36651 32062
+31717 32062
+43508 32058
+32045 32049
+36078 32048
+14070 32047
+42454 32043
+49127 32040
+35314 32038
+41951 32031
+43893 32031
+46967 32031
+36784 32029
+12826 32024
+47029 32022
+38049 32020
+13418 32019
+33326 32017
+36053 32013
+34616 32011
+37479 32010
+17130 32010
+43043 32009
+20535 32009
+36943 32008
+32066 32007
+12048 32006
+48837 31988
+47239 31984
+18627 31978
+42304 31974
+47267 31974
+9615 31967
+44794 31966
+30336 31965
+21175 31960
+13934 31957
+41106 31953
+14832 31953
+31980 31952
+47893 31950
+35164 31948
+44414 31947
+34587 31946
+35026 31943
+32732 31939
+2812 31939
+50160 31938
+42932 31938
+35371 31934
+43684 31933
+16338 31933
+45931 31930
+40296 31927
+21267 31926
+27049 31923
+26226 31921
+31570 31920
+34052 31918
+27977 31911
+20267 31909
+36182 31907
+44446 31906
+50034 31903
+29907 31900
+38444 31898
+6739 31897
+32544 31889
+41894 31888
+31180 31882
+27128 31882
+29695 31881
+34646 31880
+41244 31878
+31850 31878
+29620 31878
+32718 31876
+49285 31871
+19914 31870
+37861 31868
+47653 31867
+40087 31866
+36005 31858
+35033 31857
+22146 31853
+13634 31853
+49458 31850
+16664 31849
+44273 31846
+46173 31844
+50195 31840
+40313 31839
+25838 31838
+40330 31837
+30981 31836
+46461 31836
+32652 31836
+43545 31833
+13760 31832
+27679 31828
+42949 31828
+33435 31827
+48093 31827
+29348 31820
+18061 31817
+28660 31817
+27832 31811
+49264 31807
+22602 31805
+18770 31803
+17953 31802
+22431 31797
+32702 31794
+39248 31792
+44849 31792
+13057 31791
+15574 31788
+50013 31787
+35365 31787
+30440 31784
+27842 31784
+32159 31780
+14315 31779
+30387 31775
+29920 31769
+38074 31768
+44476 31766
+18113 31766
+31190 31764
+36329 31760
+39638 31757
+41809 31756
+38811 31756
+29856 31753
+48475 31748
+34444 31748
+47926 31746
+40068 31745
+46108 31742
+40774 31742
+8746 31729
+18216 31729
+35081 31728
+47107 31728
+23808 31727
+33191 31726
+25765 31724
+38331 31721
+34082 31721
+25553 31720
+34446 31720
+40112 31717
+24642 31717
+2730 31717
+29548 31716
+20540 31710
+28466 31703
+1668 31703
+38781 31703
+18906 31698
+50215 31698
+43068 31697
+23583 31695
+40980 31694
+44773 31693
+41286 31692
+45848 31686
+26742 31685
+11295 31679
+13842 31678
+22787 31678
+21168 31677
+30723 31676
+44776 31676
+40334 31674
+37552 31673
+36369 31673
+23892 31671
+27137 31669
+17249 31669
+31690 31667
+35808 31664
+23061 31661
+12413 31661
+41856 31657
+47259 31653
+47841 31652
+7010 31651
+40032 31644
+1409 31644
+43712 31644
+30092 31643
+38724 31640
+41177 31636
+50225 31632
+48509 31629
+26239 31621
+29075 31619
+30114 31610
+5902 31608
+48723 31599
+39809 31598
+41413 31596
+13658 31595
+49753 31595
+31911 31594
+14541 31593
+33144 31591
+22858 31588
+23876 31587
+35661 31581
+47865 31579
+29474 31578
+47254 31573
+24645 31573
+36544 31572
+26883 31572
+15541 31562
+37719 31562
+41655 31561
+47316 31560
+29516 31560
+26429 31555
+25994 31552
+28483 31550
+42204 31548
+28469 31541
+40869 31539
+28937 31538
+36402 31538
+11509 31537
+15622 31532
+30577 31530
+36959 31529
+30362 31528
+15839 31525
+44889 31523
+30057 31523
+45112 31522
+20587 31518
+25489 31513
+39254 31510
+47734 31507
+47204 31506
+45445 31506
+46070 31504
+35436 31503
+34750 31501
+23410 31499
+42248 31497
+36404 31493
+13595 31490
+35361 31490
+38240 31488
+43195 31486
+36654 31484
+44181 31481
+42193 31480
+33115 31479
+38850 31477
+29540 31475
+26065 31475
+24838 31474
+41631 31470
+27867 31466
+10289 31466
+6734 31465
+31713 31463
+27990 31459
+15829 31457
+43693 31455
+31240 31452
+44346 31447
+24988 31446
+49013 31446
+48973 31445
+23409 31445
+38024 31443
+39844 31442
+29288 31433
+37820 31431
+25480 31428
+34810 31426
+27854 31424
+33544 31422
+38475 31421
+23849 31420
+15560 31419
+40094 31417
+41517 31407
+43609 31402
+27578 31389
+36362 31387
+34144 31387
+30911 31386
+43556 31382
+26290 31375
+38531 31374
+42403 31371
+39955 31371
+37989 31370
+41399 31368
+18856 31366
+13470 31366
+45309 31360
+44205 31360
+35587 31359
+34665 31357
+39514 31354
+42596 31345
+47158 31341
+31345 31340
+44577 31340
+24932 31340
+22788 31338
+37582 31335
+48354 31333
+41062 31333
+48214 31332
+31602 31329
+23491 31328
+32847 31326
+36186 31326
+44722 31325
+36920 31325
+42461 31323
+46377 31322
+42497 31319
+39532 31317
+8004 31316
+19777 31316
+40873 31315
+35592 31314
+44834 31312
+40579 31311
+25405 31307
+32339 31304
+32749 31301
+38361 31299
+6967 31297
+27720 31295
+44678 31293
+27597 31289
+22067 31288
+35853 31288
+41118 31279
+13506 31277
+16454 31268
+22674 31267
+31601 31261
+38890 31261
+40818 31257
+17251 31256
+37137 31255
+48491 31254
+29191 31250
+45852 31248
+43371 31246
+46113 31242
+6231 31241
+48917 31234
+30182 31233
+17111 31233
+2591 31232
+6558 31229
+18758 31229
+11655 31228
+15093 31228
+27689 31226
+43578 31225
+28516 31223
+42153 31222
+39362 31220
+29750 31220
+38940 31214
+32424 31213
+32974 31213
+6820 31213
+10372 31208
+34190 31201
+40064 31193
+483 31186
+19191 31185
+44534 31184
+29498 31172
+35376 31168
+31672 31164
+48172 31156
+7985 31154
+30195 31153
+15407 31153
+48824 31152
+22664 31147
+34209 31146
+40562 31145
+1383 31144
+24774 31143
+38468 31141
+31632 31140
+23930 31135
+47961 31132
+44811 31131
+32624 31130
+40142 31124
+25989 31123
+40939 31122
+9093 31122
+16875 31120
+35552 31117
+12559 31116
+38405 31115
+35462 31112
+24686 31111
+5505 31109
+41536 31109
+35963 31106
+37382 31103
+41116 31102
+30847 31100
+46128 31099
+40750 31098
+50043 31096
+38545 31095
+42394 31095
+33026 31094
+29190 31093
+49309 31080
+40623 31079
+10631 31078
+18126 31074
+39975 31073
+21460 31073
+11569 31072
+37208 31072
+48308 31071
+32035 31068
+44527 31068
+17632 31066
+29571 31066
+26823 31058
+22556 31057
+32471 31056
+10987 31055
+29833 31054
+43325 31054
+31816 31053
+35132 31049
+47587 31048
+11525 31047
+28292 31045
+14585 31043
+5255 31040
+36454 31039
+42285 31037
+45583 31037
+33850 31036
+32075 31035
+31575 31024
+24453 31021
+47667 31020
+33145 31016
+23418 31016
+36441 31015
+47779 31013
+22589 31013
+46124 31011
+46638 31004
+47395 31001
+31306 31001
+42045 30999
+42958 30995
+5568 30994
+27881 30994
+30842 30992
+29171 30989
+46398 30983
+18171 30981
+34952 30976
+44085 30975
+46494 30974
+25594 30973
+27763 30971
+37658 30971
+49982 30968
+38410 30964
+38098 30960
+26375 30959
+23819 30952
+48692 30947
+34726 30944
+47448 30940
+38007 30939
+49556 30938
+45466 30937
+47751 30927
+49344 30926
+31756 30925
+10507 30922
+48044 30920
+38246 30917
+19019 30916
+35665 30914
+33037 30912
+33649 30912
+15419 30905
+36730 30904
+31055 30899
+39877 30898
+37687 30897
+39130 30895
+37959 30889
+21224 30883
+30714 30880
+23553 30876
+42308 30876
+47414 30873
+7562 30873
+37840 30872
+44300 30867
+36869 30866
+47519 30860
+21589 30860
+49591 30858
+41199 30856
+25790 30854
+46627 30853
+48097 30851
+48939 30849
+44245 30847
+32395 30844
+34499 30841
+35522 30841
+8527 30840
+48805 30839
+37967 30839
+43829 30837
+32827 30834
+24280 30833
+8725 30829
+37783 30828
+34107 30827
+43613 30825
+35152 30822
+37302 30822
+25150 30821
+11551 30816
+48669 30814
+33901 30812
+22070 30812
+7694 30810
+20314 30801
+22822 30799
+31916 30798
+21211 30797
+5388 30794
+34800 30789
+46531 30789
+49083 30789
+48454 30785
+46265 30781
+18546 30778
+42768 30777
+39939 30776
+31297 30769
+48020 30767
+45230 30765
+32859 30765
+23680 30765
+46564 30763
+34672 30762
+14744 30760
+37533 30760
+5266 30755
+41653 30754
+43035 30752
+23235 30748
+33058 30748
+50076 30741
+29048 30739
+34406 30739
+17403 30730
+6664 30726
+46394 30726
+22009 30724
+38668 30722
+16955 30721
+38939 30721
+46168 30712
+17786 30706
+44313 30704
+31380 30701
+20283 30701
+44809 30701
+23322 30698
+33686 30691
+40712 30691
+20189 30690
+30797 30688
+45455 30688
+23914 30686
+21988 30679
+45540 30677
+38609 30674
+37501 30673
+45794 30670
+25487 30669
+50153 30663
+26561 30659
+47136 30658
+27944 30657
+40826 30654
+46867 30652
+16775 30650
+35859 30649
+40145 30648
+37421 30647
+36798 30645
+29159 30644
+22796 30640
+29925 30640
+29455 30635
+25323 30634
+38647 30629
+47607 30626
+23210 30621
+21753 30616
+42835 30616
+48241 30613
+29879 30611
+25777 30608
+42732 30599
+40590 30597
+49529 30596
+41184 30594
+26518 30591
+1246 30591
+17891 30590
+14278 30590
+45092 30588
+41442 30587
+39132 30584
+41843 30583
+31504 30582
+42337 30580
+11367 30578
+35776 30578
+41110 30577
+15948 30575
+37482 30575
+29958 30572
+31237 30570
+13761 30569
+36417 30567
+10167 30565
+35601 30562
+13507 30559
+42025 30558
+41332 30557
+30949 30555
+44353 30551
+37300 30550
+44432 30542
+22074 30542
+22477 30541
+45260 30541
+37850 30540
+32989 30539
+16587 30539
+38198 30536
+42833 30535
+9810 30534
+27102 30530
+31591 30530
+48001 30530
+38708 30530
+35175 30526
+11457 30525
+25857 30524
+10535 30524
+44693 30517
+37530 30515
+29331 30514
+27047 30513
+41997 30513
+36140 30505
+45593 30502
+48446 30497
+18462 30496
+37795 30492
+15912 30480
+12489 30479
+41574 30476
+26156 30469
+30111 30462
+45581 30462
+3107 30460
+43983 30460
+35607 30459
+42708 30449
+45370 30448
+31284 30446
+31344 30444
+46981 30442
+39875 30440
+49567 30438
+18119 30435
+37179 30433
+31414 30433
+27233 30432
+11830 30431
+39775 30427
+46125 30424
+39977 30423
+46206 30423
+45292 30422
+45432 30422
+5687 30418
+37144 30416
+30098 30416
+42016 30410
+42160 30401
+30119 30400
+33890 30398
+26613 30397
+29083 30394
+23970 30394
+46418 30387
+18837 30385
+22271 30385
+47131 30383
+8908 30383
+44607 30379
+32642 30379
+48336 30378
+39867 30376
+28683 30376
+42041 30376
+32973 30375
+41395 30374
+40101 30372
+16923 30371
+4791 30369
+43409 30368
+25310 30367
+6935 30364
+18242 30356
+42912 30355
+32039 30354
+30864 30353
+44657 30352
+44714 30351
+50020 30350
+44472 30349
+2170 30349
+40168 30347
+11245 30346
+32077 30341
+38942 30337
+27661 30336
+25253 30335
+32690 30334
+46937 30332
+48644 30329
+42806 30327
+29053 30324
+16591 30324
+41091 30320
+43374 30319
+41968 30316
+44497 30312
+35788 30306
+33393 30306
+13136 30303
+36877 30300
+24099 30300
+50086 30295
+22734 30295
+49023 30290
+39498 30288
+13192 30285
+48575 30281
+45919 30273
+39311 30272
+1470 30270
+44297 30270
+27285 30267
+45204 30266
+30983 30263
+20965 30263
+21236 30261
+20598 30260
+23739 30256
+45374 30248
+38097 30247
+40594 30245
+27431 30239
+17121 30238
+26992 30230
+17478 30230
+18291 30229
+46944 30225
+20985 30225
+10875 30222
+14308 30220
+39659 30219
+43746 30217
+31700 30217
+16390 30217
+38101 30217
+42018 30205
+46694 30202
+46853 30200
+34566 30199
+39293 30192
+27332 30192
+11890 30188
+30955 30188
+47207 30187
+24123 30184
+30285 30183
+48778 30177
+37854 30175
+41187 30172
+49066 30170
+19691 30169
+44016 30168
+39331 30164
+38319 30163
+47791 30158
+39145 30157
+21520 30157
+9421 30157
+42779 30156
+41197 30154
+39469 30153
+37211 30151
+8995 30150
+23232 30147
+34466 30143
+36159 30142
+42715 30138
+6267 30135
+48830 30135
+9006 30133
+37263 30129
+21342 30117
+18343 30113
+26343 30112
+16992 30107
+48045 30106
+41834 30105
+46818 30103
+18597 30102
+26514 30102
+47188 30101
+42551 30101
+8334 30099
+37583 30096
+26024 30095
+46215 30094
+45479 30092
+48870 30091
+40810 30086
+20216 30085
+46454 30082
+36522 30081
+36154 30081
+45709 30080
+13056 30080
+46590 30078
+3120 30076
+49181 30074
+25085 30073
+27085 30071
+38895 30070
+41728 30070
+49259 30070
+49804 30069
+45397 30069
+38268 30066
+34313 30060
+30962 30058
+38596 30057
+34120 30054
+17925 30052
+41056 30051
+47052 30047
+31408 30035
+31338 30033
+40428 30031
+6160 30028
+43452 30025
+40800 30020
+50233 30016
+45404 30011
+28051 30005
+33034 30003
+46801 30002
+43080 29999
+43338 29992
+50156 29991
+35844 29989
+30450 29987
+27210 29987
+39980 29986
+48966 29986
+30737 29986
+41869 29981
+44670 29979
+28821 29976
+19840 29972
+35340 29971
+21747 29970
+30099 29969
+14918 29968
+15352 29965
+28171 29964
+45389 29956
+39052 29954
+48920 29946
+31887 29944
+41881 29944
+33551 29938
+49522 29936
+43662 29932
+25491 29930
+15979 29930
+38771 29929
+26674 29928
+28659 29926
+12018 29921
+49180 29917
+25372 29916
+42242 29914
+37617 29912
+30523 29909
+29284 29906
+48483 29903
+40723 29901
+33394 29898
+38578 29897
+44852 29895
+46336 29894
+45612 29893
+8751 29892
+46776 29892
+49389 29891
+42134 29890
+33706 29890
+33001 29887
+20217 29882
+45598 29876
+41402 29875
+41690 29875
+24983 29870
+33927 29869
+37759 29867
+48733 29866
+30665 29866
+19759 29866
+44222 29859
+45308 29857
+49207 29857
+49350 29857
+35674 29856
+41067 29856
+36890 29851
+38384 29849
+46790 29848
+10023 29848
+43123 29846
+44740 29843
+47973 29840
+35682 29839
+38273 29829
+32081 29828
+16202 29827
+36036 29824
+47091 29807
+33303 29806
+28479 29806
+44522 29804
+19106 29803
+10666 29799
+43466 29799
+38303 29799
+47334 29792
+41044 29783
+47523 29783
+10434 29783
+44924 29775
+33273 29772
+40999 29770
+5987 29769
+43779 29766
+14949 29763
+27660 29762
+42671 29756
+45164 29755
+33868 29755
+27375 29753
+35195 29750
+22741 29748
+13724 29747
+42275 29739
+21627 29737
+45151 29736
+38849 29732
+44109 29731
+42738 29730
+17443 29730
+47980 29729
+28342 29728
+41266 29728
+16801 29727
+36741 29726
+14994 29725
+48105 29710
+35186 29709
+20373 29708
+45967 29708
+44345 29704
+28042 29702
+38723 29694
+33370 29691
+15942 29690
+33820 29688
+29315 29688
+39071 29687
+45045 29686
+44173 29685
+9035 29685
+14464 29679
+34106 29676
+34692 29663
+26631 29661
+39475 29658
+31046 29656
+44637 29655
+43981 29651
+28315 29650
+34404 29650
+41432 29649
+34547 29648
+37205 29647
+19892 29645
+44141 29644
+42023 29642
+33618 29642
+27066 29638
+32871 29637
+30442 29634
+33300 29634
+25164 29629
+48266 29629
+31940 29628
+41070 29625
+47643 29624
+40315 29617
+40077 29614
+38255 29613
+5037 29608
+28540 29607
+36313 29606
+21772 29605
+39189 29598
+50103 29597
+31282 29597
+26842 29597
+39537 29593
+47001 29592
+15388 29592
+36296 29589
+36065 29587
+30457 29581
+44247 29581
+48492 29579
+43309 29579
+44039 29578
+41608 29578
+43247 29576
+23212 29574
+34956 29573
+49471 29573
+28166 29570
+45171 29565
+39894 29565
+46784 29564
+23154 29563
+32272 29561
+36262 29556
+33562 29552
+27468 29551
+33350 29551
+36089 29549
+41621 29549
+37627 29549
+33013 29546
+40293 29543
+23073 29536
+24010 29533
+4254 29529
+20490 29528
+20489 29528
+14222 29527
+10985 29515
+44064 29511
+30808 29510
+25412 29508
+23947 29503
+9945 29501
+43700 29498
+45894 29495
+26847 29494
+41933 29489
+41098 29489
+41004 29489
+18425 29489
+46833 29486
+34709 29484
+45153 29483
+28035 29482
+27984 29480
+14830 29475
+38756 29475
+22456 29473
+41288 29473
+19513 29472
+39423 29472
+31785 29470
+48061 29469
+30335 29468
+31224 29464
+25414 29462
+28311 29461
+15703 29461
+21844 29456
+34986 29456
+48325 29456
+46066 29455
+25871 29453
+42656 29452
+28219 29447
+49996 29447
+8503 29445
+48663 29445
+31125 29440
+39612 29434
+19640 29433
+35000 29433
+42256 29431
+34576 29428
+40469 29426
+34091 29425
+13132 29423
+15527 29421
+34772 29420
+45274 29418
+42176 29416
+26161 29412
+25214 29412
+2820 29409
+33531 29408
+49402 29407
+33043 29407
+48116 29406
+12321 29406
+42706 29405
+40096 29404
+49549 29403
+14550 29402
+9788 29402
+19491 29400
+47135 29399
+3659 29397
+24742 29396
+32148 29390
+36673 29390
+49186 29386
+49305 29384
+17408 29382
+47778 29379
+46166 29378
+16713 29377
+35972 29365
+32858 29364
+36100 29364
+26455 29359
+36724 29358
+28828 29355
+48963 29354
+38126 29353
+24284 29349
+37999 29346
+45255 29340
+36377 29339
+49663 29336
+39846 29336
+36993 29333
+39521 29329
+23657 29328
+33974 29326
+40557 29324
+5817 29322
+45888 29321
+36593 29320
+45622 29318
+22919 29317
+1857 29317
+49419 29314
+1554 29308
+47895 29307
+35752 29306
+44115 29304
+46504 29303
+31444 29302
+29392 29298
+42079 29290
+39858 29289
+41602 29284
+36221 29284
+35518 29281
+22061 29277
+13243 29273
+33407 29272
+41128 29272
+48411 29271
+30923 29270
+49758 29269
+38930 29265
+3762 29262
+22704 29261
+33432 29259
+18428 29253
+42677 29252
+45906 29252
+32770 29251
+20481 29250
+27260 29243
+45981 29243
+27494 29242
+17666 29241
+11395 29240
+39267 29239
+47844 29234
+25127 29233
+35435 29231
+33576 29229
+40297 29228
+32823 29228
+23868 29224
+20407 29224
+44475 29223
+35588 29218
+14172 29218
+37035 29214
+47737 29214
+26354 29213
+26433 29212
+36215 29204
+35229 29204
+37018 29201
+29175 29199
+16067 29198
+35227 29197
+36060 29197
+36153 29193
+20564 29191
+42665 29189
+26726 29188
+47874 29187
+47628 29182
+37590 29182
+42772 29180
+45039 29180
+47327 29178
+37170 29175
+38765 29173
+2028 29172
+28810 29169
+46058 29169
+37290 29168
+30826 29168
+43272 29166
+27108 29163
+31763 29161
+26868 29157
+47565 29154
+22502 29154
+47325 29154
+36042 29147
+44499 29141
+18467 29138
+25532 29138
+9798 29136
+31552 29129
+41307 29128
+42933 29125
+44460 29123
+38614 29120
+47501 29119
+27824 29118
+35821 29118
+33038 29117
+18208 29111
+46563 29109
+41768 29103
+43713 29098
+26079 29098
+38133 29096
+13812 29094
+41550 29090
+42560 29088
+23927 29087
+45105 29084
+41978 29083
+45843 29083
+37207 29082
+47541 29081
+18986 29075
+43365 29074
+35778 29074
+34522 29070
+35354 29069
+42674 29064
+20978 29063
+39471 29063
+25897 29059
+15653 29058
+45174 29057
+38492 29057
+35367 29056
+30460 29052
+33949 29050
+34975 29048
+34843 29047
+31697 29046
+26946 29045
+38349 29044
+24615 29044
+37904 29042
+38888 29040
+48388 29038
+21519 29037
+45087 29035
+45586 29035
+35451 29029
+40582 29028
+31203 29026
+26567 29025
+43033 29020
+39879 29018
+25933 29016
+30557 29016
+44407 29013
+43906 29013
+23067 29008
+45690 29006
+17152 29002
+30704 29001
+40867 29000
+36435 29000
+15695 28998
+36617 28996
+34770 28995
+48095 28994
+31963 28991
+48217 28988
+36022 28988
+45190 28980
+44434 28978
+49452 28977
+30015 28976
+8658 28973
+21734 28973
+41606 28973
+28529 28973
+25681 28968
+1827 28967
+28129 28963
+41301 28961
+8175 28959
+30480 28958
+37281 28957
+37106 28957
+46167 28955
+18191 28955
+39024 28955
+30760 28954
+37585 28954
+8374 28950
+30131 28949
+22277 28949
+44533 28948
+26748 28947
+17667 28945
+45040 28945
+10952 28938
+26793 28936
+31423 28927
+39055 28924
+38763 28924
+44073 28924
+49726 28915
+30083 28910
+20125 28910
+43139 28907
+34267 28904
+2609 28901
+18698 28899
+45312 28899
+44105 28898
+35214 28895
+43798 28894
+33543 28893
+45985 28891
+40284 28890
+36789 28889
+22390 28886
+24283 28886
+47166 28886
+34274 28880
+25848 28879
+35257 28879
+13372 28876
+9854 28871
+37011 28871
+43317 28866
+40227 28864
+14881 28864
+32513 28864
+47289 28863
+30271 28860
+49040 28856
+25350 28852
+29896 28847
+28043 28846
+44885 28844
+26882 28842
+3676 28842
+42464 28841
+36669 28838
+20984 28836
+28184 28836
+42957 28831
+44594 28829
+10336 28829
+34279 28826
+37830 28825
+43286 28823
+27598 28823
+38258 28823
+47422 28821
+11612 28816
+48397 28814
+9010 28810
+33957 28809
+41217 28806
+47940 28805
+25019 28805
+39951 28805
+37402 28802
+39033 28801
+47301 28797
+17426 28796
+38530 28794
+28720 28794
+36667 28793
+48259 28792
+31985 28789
+44624 28788
+37385 28787
+23886 28787
+48707 28784
+39705 28784
+47034 28782
+25002 28781
+41739 28780
+28884 28779
+40738 28779
+32328 28778
+36311 28775
+29973 28764
+43646 28761
+29436 28761
+37061 28760
+38369 28759
+10100 28759
+28912 28755
+35768 28753
+11836 28753
+45501 28752
+31710 28749
+30372 28747
+43141 28747
+25298 28746
+44964 28744
+46283 28743
+29308 28741
+27766 28740
+40658 28739
+23345 28738
+36403 28736
+16114 28735
+34324 28734
+41234 28733
+37209 28731
+23742 28727
+40675 28727
+17540 28724
+43124 28720
+26189 28719
+33818 28718
+30803 28718
+28256 28717
+33472 28715
+15166 28715
+9600 28713
+32990 28705
+48134 28705
+18398 28705
+16438 28703
+13394 28699
+42078 28698
+41042 28689
+8387 28688
+48384 28685
+28371 28684
+15730 28684
+36795 28681
+48993 28681
+31398 28678
+18509 28678
+33645 28675
+33316 28673
+43295 28673
+32266 28671
+49734 28671
+11090 28670
+45805 28670
+36286 28668
+15791 28668
+47183 28668
+45004 28665
+34433 28665
+33903 28663
+33053 28662
+22018 28660
+44732 28658
+21066 28657
+48464 28654
+14696 28653
+29726 28652
+34385 28645
+39269 28642
+35219 28639
+34628 28637
+15059 28636
+22215 28636
+46142 28634
+29416 28633
+30807 28631
+45456 28630
+3986 28622
+49775 28622
+36871 28621
+33639 28620
+35288 28618
+40200 28617
+37657 28615
+19928 28615
+31416 28610
+25388 28607
+42222 28600
+29088 28600
+34561 28598
+29814 28598
+11479 28597
+34831 28596
+49779 28596
+49301 28595
+43181 28589
+7495 28589
+45084 28589
+40075 28587
+37136 28585
+36273 28579
+36720 28578
+12292 28570
+32531 28569
+33355 28569
+49236 28569
+48698 28569
+10272 28568
+39947 28567
+37760 28566
+27971 28562
+43028 28556
+47094 28554
+40479 28552
+16244 28552
+49320 28552
+12455 28548
+22789 28546
+18296 28542
+37243 28541
+29305 28540
+28426 28537
+27610 28531
+7966 28529
+26976 28521
+49465 28521
+27145 28519
+50162 28517
+38901 28516
+49978 28510
+49212 28508
+39519 28508
+29766 28507
+40316 28506
+26075 28506
+35641 28506
+32459 28505
+45858 28504
+34391 28504
+29892 28501
+45055 28501
+50134 28500
+34512 28500
+27310 28499
+39418 28498
+25517 28497
+14054 28496
+29893 28492
+1354 28492
+34415 28491
+42374 28491
+47093 28490
+33741 28490
+29381 28488
+40838 28486
+32658 28482
+26920 28481
+28950 28479
+39185 28478
+39063 28477
+15978 28475
+32199 28475
+45703 28470
+33748 28467
+21544 28466
+42161 28464
+29390 28461
+38121 28459
+25812 28457
+39440 28455
+30343 28454
+31128 28453
+27841 28451
+35095 28445
+45096 28444
+47606 28444
+34515 28443
+2772 28442
+50072 28439
+14972 28438
+35447 28435
+18214 28433
+40802 28432
+30591 28426
+31828 28424
+24394 28423
+39144 28423
+45283 28420
+39843 28415
+47568 28414
+37283 28414
+39924 28408
+44034 28404
+30743 28400
+49435 28396
+28441 28392
+42388 28389
+39202 28388
+49227 28388
+48415 28386
+32187 28384
+49507 28382
+48377 28382
+36821 28371
+30419 28371
+40329 28367
+39238 28367
+46060 28365
+40463 28362
+38702 28362
+34722 28361
+38840 28361
+1121 28361
+45231 28359
+48969 28357
+44264 28356
+93 28353
+49401 28353
+16351 28347
+21979 28345
+43347 28342
+40023 28342
+20045 28341
+42699 28338
+35046 28338
+28927 28334
+11041 28333
+45663 28328
+28674 28326
+35868 28326
+33479 28324
+49871 28323
+25730 28322
+38867 28318
+49164 28316
+29937 28315
+42816 28313
+45414 28313
+4350 28312
+45393 28308
+20544 28302
+28709 28302
+47629 28302
+50042 28301
+38968 28297
+50190 28292
+38081 28291
+39982 28288
+37360 28283
+47266 28282
+45730 28282
+27966 28280
+15545 28276
+49242 28275
+45146 28274
+31729 28271
+39988 28270
+42102 28268
+42212 28267
+31524 28265
+25220 28264
+30465 28263
+35537 28261
+48355 28255
+36371 28253
+18909 28247
+37473 28246
+48352 28235
+22048 28231
+40271 28229
+2970 28219
+14427 28216
+42331 28214
+28270 28214
+34296 28212
+41704 28212
+48929 28206
+20235 28206
+23664 28204
+49957 28204
+31194 28198
+32042 28191
+38570 28190
+37750 28189
+39856 28184
+13021 28184
+49206 28183
+46240 28180
+38158 28179
+41487 28177
+28413 28177
+12619 28177
+49729 28176
+40721 28176
+13406 28172
+49786 28172
+35757 28168
+24633 28163
+33821 28162
+42831 28157
+46559 28156
+36013 28155
+45600 28150
+38404 28150
+34212 28146
+22840 28141
+27451 28140
+27713 28140
+30045 28139
+21741 28137
+44541 28137
+27087 28135
+36123 28134
+33253 28128
+34458 28124
+27934 28123
+49078 28121
+21716 28120
+30120 28119
+49698 28118
+33830 28118
+47890 28118
+16977 28116
+27417 28115
+14986 28114
+26997 28113
+23820 28108
+22907 28108
+43764 28107
+11747 28105
+30191 28102
+39048 28101
+45694 28100
+26165 28100
+24830 28099
+43622 28092
+28531 28091
+26120 28087
+25139 28084
+38773 28084
+49080 28077
+27507 28076
+35117 28072
+39247 28072
+47341 28072
+49308 28071
+37507 28065
+45296 28065
+18779 28065
+46845 28061
+30586 28060
+38589 28059
+33212 28056
+43472 28049
+12738 28046
+37964 28043
+5190 28035
+31048 28035
+48982 28034
+23723 28033
+14785 28031
+48927 28031
+40038 28030
+48706 28024
+20095 28022
+46553 28022
+39645 28019
+23578 28019
+43882 28019
+37419 28011
+24752 28009
+49448 28007
+39349 28006
+34307 28003
+17712 28002
+40272 28002
+42341 28000
+22479 28000
+49399 27997
+20539 27997
+43905 27996
+50065 27996
+14402 27994
+14118 27992
+32030 27992
+41537 27990
+44335 27989
+37734 27989
+18982 27988
+48100 27986
+22768 27984
+26651 27983
+47943 27981
+38622 27979
+32182 27979
+34432 27974
+50077 27971
+48074 27970
+46873 27969
+19027 27968
+40504 27966
+39594 27964
+24472 27964
+40788 27963
+34476 27962
+32695 27958
+18262 27958
+29999 27957
+22795 27956
+47992 27954
+48281 27952
+38476 27951
+35891 27949
+31014 27944
+42723 27943
+39171 27940
+20663 27938
+14534 27937
+26780 27936
+46473 27934
+42935 27934
+34066 27934
+36957 27930
+25021 27929
+41194 27925
+38792 27920
+18712 27919
+24997 27919
+34526 27915
+19549 27913
+47695 27913
+9428 27913
+22532 27909
+48101 27905
+41366 27900
+47884 27898
+31655 27897
+2379 27897
+15182 27896
+25264 27895
+34182 27894
+39606 27894
+29471 27891
+40093 27891
+29998 27890
+49587 27889
+10950 27885
+24961 27883
+22109 27877
+49354 27877
+42775 27876
+50011 27875
+25515 27872
+43845 27871
+48293 27866
+34353 27864
+31903 27864
+36908 27861
+41669 27858
+24965 27858
+45671 27856
+29769 27855
+40444 27851
+37381 27848
+23170 27848
+33378 27842
+25868 27837
+28056 27836
+46927 27836
+47332 27836
+49295 27835
+43534 27834
+36257 27828
+49620 27828
+21264 27827
+28240 27827
+31228 27825
+41851 27823
+37879 27822
+40369 27820
+48238 27819
+23561 27817
+9224 27808
+47832 27805
+22491 27803
+20094 27802
+1916 27800
+48051 27797
+46163 27791
+22476 27787
+48360 27780
+31907 27776
+21634 27775
+21602 27774
+47338 27774
+32407 27774
+17049 27773
+28945 27765
+13495 27764
+26833 27761
+22715 27759
+45743 27758
+18164 27754
+44772 27754
+45381 27753
+29409 27746
+23542 27745
+27349 27742
+32242 27741
+32935 27737
+47382 27734
+19966 27734
+39842 27734
+42218 27734
+8961 27731
+36165 27728
+31862 27725
+32100 27725
+32880 27725
+46647 27725
+27272 27718
+40845 27717
+49887 27713
+27178 27708
+32312 27705
+36495 27702
+42154 27702
+8019 27699
+49428 27697
+43843 27694
+9720 27693
+46778 27692
+36220 27691
+44129 27691
+34642 27689
+27552 27687
+40610 27681
+44477 27680
+31548 27679
+39336 27679
+27242 27678
+19456 27677
+30141 27673
+32417 27671
+42811 27669
+30536 27661
+37927 27659
+39704 27653
+40941 27651
+43914 27651
+15614 27649
+41605 27648
+34669 27646
+31578 27645
+40903 27643
+45437 27642
+21005 27642
+45273 27642
+45291 27638
+46213 27636
+29228 27630
+41717 27630
+49120 27628
+42510 27626
+41368 27626
+31702 27625
+40049 27625
+28532 27624
+38120 27624
+15316 27623
+44071 27623
+36185 27621
+40067 27619
+48797 27619
+43916 27619
+45756 27618
+27527 27616
+42773 27615
+34413 27615
+40933 27607
+33660 27605
+46605 27604
+44653 27604
+46054 27602
+44583 27602
+36277 27596
+20651 27594
+24992 27589
+36063 27588
+31877 27588
+20148 27586
+43071 27581
+38937 27580
+48815 27575
+41889 27575
+24639 27574
+33277 27564
+45541 27563
+27461 27555
+35190 27551
+41991 27546
+38877 27545
+39379 27544
+22212 27542
+46351 27542
+43417 27541
+15073 27540
+47632 27539
+42907 27538
+42260 27536
+41927 27534
+41278 27534
+39613 27533
+21642 27529
+32957 27528
+17896 27527
+26764 27524
+36491 27520
+41337 27517
+37034 27512
+25251 27512
+28781 27511
+47351 27511
+39669 27508
+38690 27508
+23743 27501
+36086 27498
+26213 27492
+45828 27489
+16203 27478
+29095 27478
+23551 27476
+48788 27475
+43552 27475
+39805 27472
+37147 27471
+46229 27470
+28063 27468
+9054 27466
+30406 27466
+39214 27465
+36999 27463
+49976 27463
+37962 27462
+45733 27461
+43110 27461
+8963 27460
+7513 27459
+8376 27458
+11449 27458
+31691 27457
+41015 27456
+48611 27450
+35830 27450
+28478 27449
+33834 27447
+24038 27447
+49824 27439
+47077 27437
+20419 27436
+25366 27436
+27099 27435
+37494 27435
+43886 27429
+40643 27428
+32523 27424
+39390 27424
+46030 27421
+36378 27415
+30695 27412
+49340 27412
+32388 27408
+31733 27407
+41663 27406
+33396 27405
+33771 27405
+34523 27405
+23535 27404
+30730 27399
+48637 27395
+44668 27389
+37532 27388
+28462 27386
+48029 27384
+48690 27383
+47602 27379
+46236 27378
+31841 27377
+48016 27376
+30702 27367
+34059 27365
+44463 27361
+47366 27361
+24481 27354
+40724 27352
+33032 27351
+42878 27347
+47256 27346
+12475 27343
+25741 27342
+38905 27342
+25942 27342
+38056 27342
+26030 27341
+25481 27340
+31196 27339
+6470 27338
+45955 27336
+47483 27335
+26512 27334
+21397 27333
+30919 27326
+32804 27324
+32799 27321
+50139 27315
+28067 27305
+39868 27304
+47024 27301
+30122 27297
+38332 27295
+27516 27293
+46718 27292
+36157 27291
+19226 27291
+44999 27289
+43919 27289
+39783 27287
+36688 27287
+40604 27285
+50202 27284
+6221 27279
+47270 27279
+41760 27278
+38305 27275
+34247 27275
+44011 27273
+29100 27271
+47092 27267
+30225 27267
+1372 27265
+44854 27263
+34044 27262
+20626 27261
+43489 27260
+35660 27260
+44246 27259
+39602 27258
+45831 27239
+44350 27237
+41241 27235
+42701 27235
+15724 27226
+42653 27220
+39699 27217
+30932 27215
+45716 27212
+40027 27211
+8647 27211
+19041 27210
+36653 27204
+46753 27203
+48186 27202
+11082 27199
+48094 27196
+13729 27193
+43356 27191
+23024 27190
+32700 27188
+43299 27188
+37846 27187
+33372 27184
+40706 27184
+35402 27182
+32585 27182
+11486 27178
+44517 27175
+26543 27168
+40134 27167
+21739 27162
+14853 27161
+43501 27159
+42777 27158
+37621 27157
+40187 27157
+48058 27155
+1369 27154
+15823 27153
+22482 27152
+49642 27150
+28638 27149
+38747 27147
+30944 27147
+37238 27146
+33507 27146
+39135 27145
+9820 27144
+32508 27140
+39059 27137
+7592 27136
+38170 27135
+37701 27132
+29032 27130
+33069 27129
+23747 27127
+33844 27124
+41138 27122
+47441 27118
+43621 27117
+31912 27113
+40844 27113
+22213 27111
+34374 27109
+25836 27106
+46688 27103
+42420 27099
+36116 27096
+42162 27096
+13579 27096
+42984 27093
+37847 27090
+38634 27087
+39840 27087
+34249 27085
+40306 27082
+17225 27081
+12570 27078
+46280 27078
+34489 27075
+37268 27072
+42087 27072
+42642 27067
+14965 27066
+36615 27065
+32040 27061
+34482 27061
+11373 27057
+35882 27055
+33882 27055
+47816 27051
+25431 27049
+41820 27045
+47867 27045
+34377 27041
+48673 27039
+40462 27035
+40303 27033
+4997 27033
+36936 27032
+29198 27031
+40922 27028
+16767 27025
+26461 27021
+11619 27015
+37645 27014
+40888 27014
+42467 27013
+47721 27012
+30146 27012
+22689 27010
+18090 27008
+29858 27006
+41479 27006
+34873 27006
+44389 27005
+47856 27004
+49880 27002
+17882 27001
+44658 26999
+42564 26995
+25114 26995
+16986 26995
+33400 26992
+38125 26991
+33008 26976
+21646 26969
+18760 26967
+35502 26965
+39204 26963
+38893 26962
+31662 26960
+21548 26956
+48166 26955
+31499 26949
+39899 26947
+35328 26941
+43650 26941
+27853 26940
+46630 26939
+44536 26939
+10628 26936
+33638 26934
+36517 26933
+44138 26933
+29371 26930
+43587 26929
+49202 26927
+21459 26924
+45560 26922
+46901 26922
+47509 26920
+45768 26920
+4008 26920
+11588 26919
+45613 26919
+36887 26918
+32998 26917
+42238 26913
+44102 26912
+42347 26911
+49924 26909
+47564 26906
+24914 26905
+25627 26903
+25140 26903
+32050 26901
+10694 26899
+38340 26899
+32460 26898
+19150 26898
+24295 26894
+37127 26893
+29326 26887
+20288 26883
+41385 26883
+35722 26880
+39903 26879
+45834 26877
+37673 26877
+43056 26874
+34471 26871
+22824 26871
+43483 26864
+16088 26863
+39277 26860
+36626 26859
+39392 26859
+36072 26855
+6396 26852
+47277 26852
+31339 26851
+42673 26844
+30768 26843
+39750 26843
+8844 26843
+38810 26838
+29744 26834
+23615 26833
+33117 26830
+47655 26830
+39402 26828
+43236 26828
+32373 26822
+45879 26815
+35499 26813
+44545 26813
+29627 26811
+15684 26810
+36243 26810
+19164 26808
+7936 26807
+50166 26805
+44053 26804
+37563 26803
+44068 26800
+6887 26799
+24398 26796
+32152 26794
+48140 26794
+42270 26791
+27916 26789
+35375 26789
+32399 26789
+20457 26784
+47187 26784
+48350 26783
+49680 26782
+29153 26781
+27838 26780
+43548 26776
+35677 26775
+32731 26775
+46090 26773
+48234 26773
+37241 26770
+44947 26768
+27115 26767
+9797 26766
+31020 26765
+7347 26764
+5829 26761
+37547 26761
+31837 26760
+15603 26760
+44426 26758
+45363 26757
+41284 26753
+33558 26752
+7398 26751
+49382 26745
+39954 26733
+21672 26733
+41112 26728
+23515 26727
+34314 26725
+33916 26725
+25726 26724
+61 26723
+24055 26720
+38823 26720
+32712 26717
+37320 26717
+40429 26716
+24229 26714
+46721 26713
+11538 26713
+42894 26709
+9642 26708
+34436 26705
+36375 26703
+49571 26700
+48730 26700
+31160 26698
+41306 26698
+11027 26697
+45922 26694
+40024 26693
+39279 26692
+37004 26685
+15509 26674
+37694 26673
+30504 26673
+34256 26671
+34159 26671
+24855 26671
+31654 26671
+45253 26669
+43232 26669
+48511 26668
+38013 26668
+28152 26668
+36625 26667
+24026 26665
+46139 26665
+38753 26663
+14050 26662
+24798 26660
+50131 26659
+17367 26656
+40215 26653
+38635 26653
+43333 26648
+43606 26648
+46119 26646
+22166 26646
+34424 26638
+47157 26636
+46252 26631
+34801 26631
+35059 26630
+9990 26629
+45233 26628
+43396 26626
+42335 26625
+30992 26623
+46027 26617
+34835 26617
+41243 26616
+38364 26613
+48211 26612
+25113 26612
+43674 26609
+42143 26605
+7892 26603
+15297 26601
+22785 26600
+47510 26595
+10352 26592
+44480 26592
+23722 26588
+35916 26588
+43435 26587
+29081 26587
+34951 26586
+46266 26585
+35124 26584
+39764 26581
+34606 26581
+6680 26580
+29120 26580
+31855 26579
+33552 26579
+12230 26579
+32811 26574
+33988 26573
+29819 26570
+38750 26570
+44782 26567
+36217 26563
+34184 26561
+26756 26558
+36879 26557
+49950 26557
+45643 26557
+22842 26556
+40432 26555
+50206 26551
+31280 26549
+18849 26548
+23113 26543
+49672 26538
+36649 26538
+33812 26536
+38726 26535
+44672 26534
+43891 26534
+46813 26534
+34825 26533
+50212 26532
+45968 26530
+35273 26521
+43973 26521
+35243 26515
+35617 26515
+16705 26513
+43470 26508
+27927 26507
+49296 26507
+36565 26507
+43439 26506
+42934 26504
+20854 26502
+49263 26494
+33542 26492
+22854 26491
+41129 26488
+24209 26487
+34743 26487
+32097 26478
+45625 26469
+32180 26468
+46272 26468
+32073 26468
+19956 26464
+15161 26462
+47224 26461
+44118 26461
+16421 26461
+26885 26458
+18855 26457
+45481 26450
+46705 26449
+48012 26445
+35824 26444
+36328 26444
+22298 26444
+47590 26441
+35246 26441
+48626 26439
+49410 26431
+35871 26427
+35360 26420
+15391 26418
+39600 26415
+49981 26411
+44062 26411
+36039 26402
+35998 26402
+42434 26401
+39035 26398
+36141 26398
+49353 26395
+8775 26393
+36210 26387
+44017 26386
+35419 26385
+24129 26384
+42961 26384
+39978 26380
+44976 26379
+24669 26378
+5950 26373
+30787 26369
+24897 26367
+27155 26366
+35076 26361
+42922 26361
+44853 26359
+26007 26358
+32643 26358
+27258 26356
+45060 26351
+35989 26346
+49376 26343
+31714 26339
+28556 26339
+21327 26337
+40295 26337
+25940 26337
+26988 26337
+42132 26336
+41207 26336
+32128 26335
+133 26334
+27411 26333
+38887 26332
+44832 26329
+46557 26328
+41024 26327
+38568 26323
+49889 26319
+12401 26319
+34450 26317
+33538 26316
+35068 26314
+8818 26313
+26869 26313
+33569 26313
+24446 26312
+7751 26310
+46101 26309
+36905 26308
+48122 26307
+48242 26306
+32746 26305
+41084 26304
+25905 26302
+21781 26301
+49861 26301
+7826 26301
+47696 26295
+20591 26292
+49588 26291
+35804 26291
+45553 26290
+49159 26289
+38151 26288
+44092 26282
+45188 26281
+28915 26280
+15905 26279
+47442 26276
+37151 26274
+43607 26273
+28841 26273
+31026 26272
+44983 26271
+3238 26268
+13635 26268
+22005 26265
+17457 26264
+34339 26264
+23339 26264
+44680 26256
+38714 26255
+45812 26255
+46750 26253
+37580 26252
+41672 26251
+22508 26244
+30390 26238
+46275 26236
+47263 26235
+40715 26233
+12722 26231
+18251 26230
+26550 26227
+42352 26223
+26769 26223
+37513 26223
+26556 26222
+44314 26221
+36373 26219
+45418 26218
+42970 26214
+15151 26211
+13882 26205
+21931 26203
+49631 26202
+41885 26200
+29203 26197
+38784 26196
+31076 26192
+5820 26191
+15838 26190
+15457 26187
+45905 26186
+47436 26186
+47391 26185
+29108 26179
+34140 26178
+34229 26175
+36149 26174
+20415 26166
+31216 26165
+22968 26164
+25396 26162
+24327 26161
+42149 26157
+39340 26154
+39548 26153
+40080 26147
+37939 26146
+35404 26146
+1782 26145
+45499 26144
+33961 26141
+46782 26136
+43936 26135
+11755 26133
+8730 26129
+42679 26127
+46952 26126
+35264 26124
+39761 26118
+15594 26112
+16296 26110
+35126 26109
+40571 26109
+32285 26107
+22246 26107
+30657 26106
+48755 26105
+28399 26103
+40427 26098
+44986 26096
+11178 26096
+40670 26095
+40622 26093
+43330 26092
+49913 26092
+37193 26091
+48152 26089
+11713 26089
+42683 26087
+20375 26087
+31952 26086
+32494 26085
+1580 26085
+47589 26084
+48299 26082
+30410 26078
+20017 26074
+27111 26068
+22138 26067
+45014 26066
+33985 26066
+40016 26066
+40194 26063
+29386 26063
+24125 26062
+8497 26061
+37171 26061
+25328 26060
+42049 26059
+38333 26057
+40086 26057
+1038 26050
+45102 26050
+41021 26048
+43500 26047
+23398 26046
+48916 26044
+42465 26044
+15258 26044
+27814 26043
+49208 26039
+14824 26035
+32361 26032
+46698 26031
+28173 26031
+30162 26030
+48247 26029
+35679 26027
+23768 26023
+48517 26021
+39535 26020
+17969 26019
+33137 26014
+48151 26013
+35586 26012
+28573 26007
+31491 26007
+25055 25998
+40467 25997
+14140 25997
+38603 25994
+42713 25994
+28341 25992
+3637 25991
+19527 25991
+38210 25988
+41193 25988
+41212 25987
+27749 25986
+13940 25984
+46423 25982
+46938 25981
+40567 25980
+49192 25977
+28555 25977
+36460 25977
+37853 25977
+42067 25972
+49596 25963
+18844 25960
+21417 25959
+47964 25959
+39527 25959
+29703 25959
+49455 25959
+49272 25957
+29143 25954
+31471 25952
+19242 25951
+29675 25949
+48077 25949
+19652 25946
+39625 25945
+28155 25945
+45584 25944
+37639 25943
+32997 25940
+27646 25938
+19663 25934
+27898 25933
+9402 25932
+42914 25932
+35074 25930
+34179 25924
+25801 25923
+32056 25923
+49085 25919
+46232 25919
+41283 25916
+25324 25915
+39928 25913
+46069 25909
+35536 25908
+47300 25908
+22743 25907
+30960 25903
+17611 25903
+15431 25903
+27796 25893
+35667 25892
+49673 25891
+48356 25891
+36618 25888
+22224 25886
+32622 25882
+9515 25880
+17071 25878
+20366 25877
+26620 25875
+18540 25874
+40580 25873
+32093 25872
+9165 25870
+22122 25869
+20944 25863
+25132 25863
+44757 25851
+44186 25851
+42801 25850
+43395 25848
+46308 25848
+45471 25846
+41139 25845
+38248 25843
+24958 25841
+22801 25840
+40085 25839
+40335 25834
+38683 25834
+24670 25833
+15659 25831
+40492 25829
+44511 25827
+27446 25823
+9187 25820
+40398 25819
+36410 25809
+11158 25807
+49393 25805
+34626 25803
+1519 25803
+36392 25802
+15361 25800
+45785 25800
+50243 25799
+49761 25796
+34347 25792
+42415 25792
+17995 25791
+43426 25789
+27202 25787
+33933 25783
+38766 25779
+3890 25779
+34830 25779
+21907 25775
+25603 25774
+25816 25774
+23864 25773
+35904 25770
+44100 25765
+22588 25761
+25548 25754
+34752 25752
+49014 25750
+31334 25749
+11724 25739
+30129 25733
+44006 25733
+36597 25726
+39212 25726
+41498 25723
+34565 25722
+39506 25722
+15159 25722
+34938 25718
+29936 25718
+16204 25718
+9304 25716
+22891 25715
+15946 25714
+11120 25711
+26710 25709
+42810 25708
+39624 25707
+49311 25706
+35150 25705
+39901 25703
+27770 25703
+49385 25702
+45957 25702
+36966 25699
+46322 25691
+35987 25689
+21283 25689
+38652 25683
+41687 25680
+27190 25679
+34201 25677
+44938 25676
+39195 25672
+28283 25670
+31011 25669
+20871 25666
+24062 25666
+26733 25662
+34510 25658
+49609 25654
+41465 25653
+42224 25652
+38077 25652
+40615 25649
+30790 25648
+25883 25647
+38944 25647
+48341 25647
+40694 25646
+38393 25646
+40090 25646
+47284 25644
+29757 25641
+43888 25638
+34896 25638
+28702 25636
+38324 25633
+31222 25632
+39646 25619
+36766 25618
+45469 25614
+42716 25613
+43478 25611
+33156 25608
+47406 25605
+10853 25603
+44813 25597
+35412 25595
+26634 25590
+37196 25584
+20521 25578
+40932 25575
+49636 25572
+40358 25570
+37887 25568
+19781 25568
+37982 25565
+47388 25565
+43574 25560
+31406 25559
+44570 25553
+30200 25551
+30433 25549
+23104 25538
+43871 25537
+49310 25535
+7658 25534
+40336 25532
+41041 25532
+40509 25531
+39054 25527
+35042 25526
+32457 25526
+41617 25523
+48961 25519
+37809 25519
+14684 25518
+33540 25518
+43340 25516
+6566 25516
+47059 25514
+42969 25513
+33965 25513
+39272 25512
+45934 25510
+4012 25505
+41168 25502
+30389 25499
+32842 25498
+35342 25497
+49639 25497
+30841 25496
+29478 25496
+39038 25496
+37710 25490
+41799 25487
+42354 25485
+39376 25483
+50112 25482
+38409 25480
+20303 25479
+22616 25477
+27863 25477
+21999 25476
+41613 25474
+4087 25473
+38239 25473
+48071 25471
+48581 25468
+45048 25467
+42325 25466
+28626 25465
+44548 25463
+8194 25463
+50150 25463
+46439 25462
+37161 25461
+11703 25459
+46591 25458
+19715 25458
+35551 25457
+38311 25453
+47455 25452
+22975 25452
+42457 25450
+49175 25448
+30682 25447
+43555 25445
+34784 25433
+24075 25430
+13327 25429
+8803 25428
+46969 25426
+25864 25421
+12966 25417
+28374 25413
+31527 25411
+17060 25404
+29192 25403
+26659 25401
+18003 25400
+44730 25399
+42083 25395
+32102 25394
+35120 25392
+48962 25392
+42865 25391
+37330 25391
+42387 25389
+41083 25388
+38366 25388
+31298 25387
+46465 25384
+33437 25377
+27739 25376
+48243 25374
+47043 25373
+30999 25373
+49265 25369
+22509 25369
+43924 25365
+14221 25364
+47774 25362
+24597 25358
+31054 25358
+46365 25357
+37230 25355
+47872 25353
+44662 25353
+46586 25352
+36023 25351
+36244 25347
+47912 25345
+32912 25345
+42733 25339
+47902 25337
+45480 25335
+47075 25333
+32334 25330
+49441 25326
+14375 25325
+28971 25324
+33022 25318
+40829 25315
+3030 25314
+24316 25314
+24812 25314
+49723 25312
+36087 25306
+45042 25304
+38450 25303
+41014 25300
+37593 25299
+6321 25297
+15071 25296
+27266 25296
+11808 25295
+43737 25295
+41724 25293
+41639 25291
+31260 25290
+24289 25289
+49830 25289
+41593 25283
+39930 25282
+25197 25279
+18402 25279
+37287 25278
+34126 25274
+41087 25273
+32218 25273
+7904 25271
+37572 25270
+40472 25267
+43528 25265
+41131 25265
+13009 25264
+40362 25260
+30563 25260
+48579 25252
+42982 25247
+43928 25246
+34637 25245
+49025 25244
+38328 25243
+7470 25240
+37309 25240
+38229 25232
+28998 25232
+50087 25230
+44993 25229
+25886 25225
+46859 25223
+46603 25218
+38922 25218
+41028 25212
+44777 25211
+43694 25211
+29334 25210
+15014 25209
+20682 25209
+7321 25209
+35965 25206
+40923 25203
+11834 25202
+11591 25200
+23066 25199
+8981 25196
+22876 25195
+36681 25194
+26961 25194
+20049 25192
+46797 25192
+21302 25190
+45837 25188
+48280 25187
+39264 25179
+39710 25175
+31953 25174
+48550 25173
+35310 25171
+22083 25171
+36676 25171
+46172 25170
+43773 25170
+3959 25168
+42203 25167
+49069 25166
+33459 25166
+40169 25165
+39591 25161
+32173 25161
+30566 25156
+47869 25156
+21855 25153
+36509 25152
+47842 25150
+36793 25148
+45714 25147
+46469 25140
+23308 25139
+38541 25138
+38322 25135
+12522 25129
+44737 25124
+36094 25124
+36207 25123
+23466 25121
+36705 25118
+42426 25118
+39006 25115
+49391 25114
+22178 25114
+32000 25113
+28380 25112
+25364 25110
+21720 25108
+38611 25107
+46122 25105
+35305 25103
+43114 25097
+12992 25096
+37635 25096
+8143 25089
+45921 25083
+38915 25083
+23351 25083
+35155 25083
+38150 25079
+41786 25078
+30283 25072
+31037 25071
+43354 25069
+42156 25069
+11121 25063
+47855 25062
+47309 25060
+35907 25057
+30752 25056
+42076 25056
+10072 25055
+25100 25054
+24016 25053
+35267 25053
+48418 25052
+46951 25051
+13814 25050
+46487 25048
+12502 25048
+46040 25048
+25975 25047
+42421 25046
+38446 25045
+8327 25044
+26595 25043
+25045 25040
+24074 25040
+19731 25037
+48606 25031
+38163 25029
+31418 25028
+37145 25027
+36536 25027
+10394 25023
+20679 25023
+24580 25021
+25252 25017
+33206 25017
+32079 25017
+42765 25016
+34980 25016
+39056 25015
+8323 25014
+49897 25012
+44103 25009
+48315 25006
+48149 25005
+49564 25003
+38259 25000
+49805 24999
+47716 24998
+41638 24998
+46029 24994
+33573 24994
+32140 24991
+42925 24990
+33646 24989
+36108 24989
+19128 24988
+18089 24988
+38212 24986
+29991 24984
+14767 24984
+38997 24980
+40540 24979
+38712 24976
+49483 24975
+33580 24974
+49593 24974
+33123 24968
+24151 24966
+47142 24966
+41864 24966
+35709 24963
+47362 24962
+11683 24961
+33342 24959
+48981 24957
+38076 24955
+13095 24953
+33548 24952
+39677 24951
+19608 24949
+38329 24949
+37008 24949
+26222 24948
+47487 24945
+29647 24944
+18096 24940
+23549 24939
+28690 24939
+36080 24938
+17094 24937
+33958 24937
+27186 24932
+41290 24929
+45186 24928
+27106 24925
+46943 24924
+25774 24919
+48990 24918
+33525 24917
+45110 24914
+32436 24913
+21038 24911
+33869 24911
+46594 24910
+40314 24910
+38506 24908
+17593 24908
+14519 24908
+39497 24906
+27034 24906
+33827 24904
+15501 24904
+23129 24903
+20966 24903
+28191 24902
+45741 24901
+49629 24901
+47481 24901
+21912 24900
+41121 24899
+38171 24898
+18123 24893
+36898 24893
+44491 24892
+46592 24891
+27037 24891
+45245 24888
+44496 24887
+35993 24885
+32908 24885
+14374 24884
+43576 24878
+28693 24876
+49619 24875
+47291 24874
+34620 24870
+37098 24867
+43281 24866
+42294 24864
+16549 24861
+46734 24855
+49154 24854
+18318 24854
+45420 24854
+40488 24850
+25755 24846
+38859 24845
+46087 24832
+23567 24829
+39318 24829
+4363 24828
+40862 24826
+39595 24825
+49380 24816
+48921 24812
+38089 24809
+32158 24808
+31384 24804
+41890 24801
+45780 24801
+47007 24794
+22433 24793
+13046 24793
+38524 24792
+38054 24792
+10259 24789
+10149 24784
+39416 24783
+45320 24783
+17570 24777
+28281 24775
+18829 24774
+46291 24773
+34403 24771
+36366 24768
+20012 24767
+43250 24766
+14829 24765
+21370 24762
+31115 24759
+5470 24758
+36864 24755
+38483 24753
+36306 24751
+27314 24750
+33788 24749
+42662 24749
+48348 24748
+47843 24745
+15158 24743
+44689 24743
+10534 24734
+48766 24734
+26959 24732
+41285 24732
+20563 24732
+34559 24728
+47851 24722
+47221 24722
+43161 24720
+18186 24713
+44201 24711
+29438 24701
+2273 24701
+5790 24699
+30443 24696
+49440 24695
+35324 24690
+39468 24689
+49105 24689
+30021 24687
+44951 24685
+10818 24684
+15067 24682
+48651 24682
+42526 24680
+31407 24680
+28045 24679
+48135 24678
+38721 24677
+12276 24677
+47462 24676
+37833 24672
+35247 24666
+42505 24666
+43370 24663
+44841 24660
+35708 24660
+48025 24659
+42446 24656
+24508 24656
+46529 24655
+11235 24655
+39064 24649
+37229 24648
+44089 24648
+15480 24643
+36627 24642
+45731 24641
+24148 24641
+24044 24636
+30671 24634
+38657 24632
+45141 24630
+45629 24628
+39744 24626
+34817 24623
+43603 24623
+41180 24621
+45423 24620
+48430 24619
+30398 24619
+47243 24617
+27808 24616
+19151 24614
+42108 24610
+42137 24604
+43963 24603
+6274 24601
+46273 24600
+33215 24599
+29774 24597
+44088 24597
+30964 24597
+45002 24595
+34625 24595
+29977 24591
+36480 24589
+47390 24588
+35699 24585
+39837 24583
+24467 24583
+47780 24581
+38084 24577
+43486 24576
+14311 24574
+47337 24573
+47352 24572
+29969 24571
+41544 24567
+50147 24554
+37102 24551
+27093 24549
+32321 24549
+37570 24548
+41755 24548
+20719 24546
+35740 24545
+4085 24542
+49599 24541
+20639 24537
+24204 24535
+9205 24529
+34490 24525
+11928 24520
+32226 24517
+42707 24513
+33669 24512
+45578 24509
+27569 24507
+40383 24506
+29810 24506
+42315 24505
+31150 24503
+46042 24503
+37251 24500
+46674 24494
+43658 24494
+41061 24491
+35649 24490
+43151 24490
+24432 24488
+34370 24487
+40791 24486
+39686 24485
+23802 24482
+11964 24482
+29603 24481
+36144 24481
+28770 24478
+39009 24474
+32627 24472
+44514 24469
+47153 24469
+24629 24468
+33088 24468
+33192 24466
+44526 24463
+19879 24462
+29185 24461
+49246 24457
+32062 24452
+30539 24448
+36657 24447
+39158 24445
+40286 24444
+47833 24444
+40812 24444
+33938 24442
+43166 24441
+12626 24441
+48024 24439
+29408 24438
+28151 24430
+28124 24427
+49848 24426
+47208 24421
+7654 24421
+48957 24419
+46282 24419
+33317 24416
+7499 24416
+9291 24410
+37614 24408
+41472 24408
+26714 24408
+1968 24407
+29839 24407
+8921 24406
+46462 24405
+49932 24404
+26797 24403
+33227 24402
+29346 24402
+37466 24402
+27750 24397
+36777 24390
+38172 24388
+36551 24385
+27825 24382
+46580 24379
+45874 24379
+40464 24377
+2602 24376
+38675 24373
+32427 24373
+45185 24371
+30214 24369
+16935 24368
+21321 24365
+49447 24363
+43122 24359
+44022 24359
+49975 24358
+22155 24357
+44958 24353
+36629 24352
+16877 24351
+28390 24350
+45956 24348
+38875 24348
+44631 24340
+43935 24339
+26151 24338
+37542 24335
+41258 24333
+29501 24333
+49919 24328
+36397 24324
+10186 24323
+42744 24318
+22812 24314
+19677 24313
+24631 24313
+19466 24312
+48474 24311
+1284 24310
+31882 24310
+37076 24309
+44966 24306
+29400 24302
+49968 24301
+37068 24301
+38974 24299
+44695 24298
+28343 24294
+35615 24287
+164 24283
+27380 24283
+20507 24282
+49956 24281
+46098 24279
+45607 24279
+34310 24278
+34287 24277
+45343 24277
+34535 24276
+32277 24273
+49895 24271
+43224 24268
+30643 24267
+39682 24266
+40206 24263
+6525 24261
+29506 24260
+40787 24258
+40305 24256
+44979 24255
+38232 24253
+45000 24250
+21597 24249
+30152 24242
+46899 24242
+48244 24241
+30028 24240
+33971 24240
+35970 24239
+48225 24239
+16733 24235
+42651 24235
+32217 24235
+44789 24230
+23640 24229
+29173 24228
+36680 24228
+28722 24218
+18900 24215
+38746 24215
+45478 24215
+27784 24214
+40609 24213
+44681 24205
+43677 24201
+36979 24194
+37992 24194
+39100 24193
+29616 24193
+5880 24189
+44065 24187
+42364 24186
+35403 24184
+27514 24183
+41959 24181
+43138 24177
+6463 24174
+27871 24174
+24899 24174
+45976 24172
+43885 24167
+46074 24167
+47709 24165
+36393 24160
+37169 24156
+31964 24155
+35846 24155
+32562 24153
+21436 24148
+41515 24147
+3593 24147
+31304 24146
+36749 24140
+22845 24138
+33698 24136
+26586 24134
+38909 24134
+49191 24132
+37002 24132
+9531 24128
+45969 24121
+34407 24119
+30546 24118
+47378 24117
+46164 24115
+41132 24109
+45355 24108
+39721 24108
+23779 24104
+42227 24104
+36247 24102
+33220 24101
+39380 24100
+43957 24098
+50127 24097
+19946 24096
+16804 24096
+24809 24096
+8053 24096
+30578 24095
+21179 24095
+37528 24095
+48412 24091
+45016 24089
+22702 24087
+15580 24086
+45477 24086
+25666 24084
+38595 24083
+47840 24080
+48996 24075
+41797 24073
+12757 24070
+48083 24066
+23279 24057
+36760 24056
+4616 24056
+38873 24054
+33981 24050
+40076 24049
+46628 24047
+44822 24047
+46641 24043
+35602 24042
+20980 24041
+42363 24038
+38380 24037
+5561 24037
+39706 24035
+17839 24031
+48787 24029
+5918 24029
+23317 24027
+23495 24027
+1289 24026
+37469 24026
+34583 24023
+23449 24020
+47753 24016
+36570 24013
+36323 24012
+43329 24012
+33102 24009
+44530 24009
+41526 24007
+46323 24007
+22293 24006
+30484 24002
+22214 24002
+34467 24000
+32139 23999
+36434 23999
+31035 23998
+46794 23995
+25492 23994
+22764 23993
+46436 23988
+48036 23982
+27550 23977
+43220 23970
+29457 23965
+12337 23961
+35638 23960
+18068 23958
+29601 23957
+26750 23957
+41367 23955
+37396 23955
+35010 23955
+40739 23954
+48793 23954
+47923 23954
+19400 23946
+23582 23946
+40569 23945
+47953 23945
+37282 23945
+42937 23943
+36298 23942
+49048 23942
+33832 23940
+11604 23939
+45460 23938
+24667 23933
+41656 23931
+45660 23929
+49745 23929
+48196 23928
+49163 23928
+28605 23923
+25189 23919
+21924 23918
+18124 23917
+38339 23917
+43836 23913
+28029 23913
+44093 23913
+30377 23912
+19913 23911
+14527 23905
+13581 23903
+49741 23903
+48643 23902
+38431 23902
+43495 23899
+26679 23897
+49854 23893
+49941 23893
+38989 23892
+47906 23888
+42570 23886
+37682 23885
+25265 23883
+33302 23880
+48055 23878
+5244 23876
+31072 23872
+38976 23871
+42319 23870
+4118 23869
+16795 23867
+47740 23867
+18736 23864
+49904 23862
+21727 23862
+26468 23855
+38547 23854
+47673 23853
+34822 23852
+163 23851
+45176 23849
+39882 23848
+44664 23848
+26255 23847
+18305 23840
+37730 23836
+38525 23832
+48710 23831
+31162 23827
+37998 23821
+31835 23819
+48255 23817
+47944 23817
+26141 23811
+42648 23811
+37431 23810
+35286 23807
+33872 23807
+40125 23807
+45564 23806
+33921 23803
+26650 23802
+46601 23801
+40103 23801
+33665 23800
+43759 23799
+16469 23797
+36907 23796
+41029 23793
+29458 23791
+38107 23791
+21659 23791
+21238 23786
+47873 23785
+40758 23777
+46138 23775
+45326 23775
+31497 23774
+44206 23773
+46396 23771
+49378 23769
+39472 23766
+12124 23764
+8349 23761
+33079 23761
+50117 23759
+26713 23756
+15189 23755
+43788 23752
+41174 23751
+22177 23749
+35978 23748
+29422 23745
+39215 23745
+25962 23744
+29258 23744
+49566 23743
+13866 23740
+23450 23738
+45543 23731
+42305 23730
+44865 23729
+16735 23729
+41564 23728
+38737 23727
+45679 23726
+43469 23721
+42584 23719
+33610 23717
+26404 23712
+40437 23711
+22820 23711
+36462 23710
+48459 23709
+32953 23709
+15576 23705
+16140 23704
+39256 23701
+49225 23699
+33819 23698
+49716 23695
+14163 23694
+5546 23694
+40600 23693
+46324 23686
+29922 23685
+50229 23684
+42795 23682
+38147 23681
+49992 23678
+40887 23673
+4972 23666
+7568 23664
+39058 23663
+41882 23662
+49077 23661
+35092 23661
+31466 23660
+38871 23658
+32583 23658
+23899 23658
+20649 23656
+32672 23655
+49695 23653
+45727 23652
+22090 23650
+43743 23649
+39305 23649
+32703 23646
+40022 23644
+39549 23643
+48675 23643
+39609 23642
+33969 23640
+43149 23638
+39623 23638
+44263 23637
+10292 23633
+14117 23632
+44002 23629
+34085 23624
+44290 23622
+34728 23620
+22802 23616
+27603 23615
+43538 23614
+42206 23610
+42051 23610
+46731 23606
+42537 23605
+45289 23604
+20207 23604
+40198 23599
+39139 23595
+43831 23593
+48112 23591
+40732 23590
+44027 23590
+12448 23589
+38539 23589
+24113 23587
+11097 23583
+50082 23581
+26175 23580
+26295 23579
+36330 23575
+23925 23573
+47132 23570
+47674 23570
+48898 23570
+42155 23570
+3882 23569
+43147 23569
+35066 23568
+49148 23568
+36052 23567
+40660 23567
+10956 23566
+48862 23564
+28688 23564
+37923 23564
+49124 23563
+43711 23562
+16554 23561
+36357 23557
+20530 23551
+47687 23551
+35716 23545
+43964 23545
+39773 23538
+42489 23534
+35910 23533
+42582 23532
+46983 23531
+43980 23530
+34645 23529
+38249 23529
+40354 23524
+32904 23520
+37132 23519
+41081 23517
+20470 23515
+37622 23514
+37071 23510
+44471 23509
+26417 23508
+19080 23508
+47398 23507
+47836 23505
+40321 23495
+48275 23493
+38440 23489
+9186 23489
+44915 23489
+44682 23483
+37225 23476
+33042 23474
+31191 23474
+28081 23472
+41859 23466
+30746 23465
+12866 23465
+32535 23465
+13128 23464
+30106 23462
+19881 23458
+39003 23457
+34131 23448
+27500 23447
+38706 23447
+16595 23444
+35848 23442
+32663 23439
+13283 23437
+43984 23436
+43951 23434
+12610 23433
+48903 23430
+41983 23428
+16837 23423
+20896 23423
+20926 23420
+6830 23419
+38275 23415
+40882 23413
+28435 23413
+30764 23413
+34376 23412
+44551 23412
+39119 23412
+36024 23411
+43011 23410
+43218 23400
+17821 23400
+12814 23397
+43918 23394
+9184 23392
+39252 23389
+38224 23385
+30193 23384
+8038 23383
+46789 23377
+50035 23376
+35085 23374
+39382 23373
+46099 23372
+32477 23365
+15022 23362
+18923 23361
+31823 23360
+38159 23360
+45158 23356
+41634 23356
+28708 23353
+34717 23351
+36891 23350
+24102 23350
+49951 23349
+35366 23344
+43482 23342
+22938 23341
+3928 23337
+42590 23337
+24518 23337
+36914 23334
+10567 23333
+41412 23333
+37308 23332
+11360 23331
+17320 23331
+42608 23330
+43334 23326
+23986 23320
+12975 23320
+49623 23319
+31847 23318
+43234 23314
+39108 23313
+42272 23313
+15540 23312
+26087 23306
+34917 23306
+40893 23301
+41303 23297
+38205 23293
+37364 23287
+30488 23287
+49392 23286
+5058 23282
+38473 23280
+2572 23280
+40455 23280
+24383 23279
+19056 23273
+17015 23272
+37474 23272
+28812 23271
+37249 23269
+45428 23269
+32320 23268
+26789 23265
+8247 23264
+45514 23263
+41447 23262
+46443 23260
+44465 23260
+11953 23259
+47230 23255
+41632 23255
+28260 23253
+41619 23252
+47829 23249
+48389 23249
+37703 23247
+47446 23246
+45244 23246
+47234 23245
+37020 23243
+29629 23243
+26121 23238
+12207 23230
+42297 23228
+26499 23226
+24206 23224
+46844 23224
+36488 23222
+19492 23222
+40740 23220
+45806 23219
+47669 23219
+39997 23213
+29022 23211
+42762 23210
+17116 23206
+11929 23204
+49433 23201
+35697 23199
+18960 23198
+47452 23196
+37021 23193
+42661 23193
+26288 23193
+37515 23185
+38733 23185
+48827 23185
+37825 23182
+10208 23181
+17802 23178
+24393 23178
+48943 23174
+38346 23164
+35990 23163
+45095 23153
+6328 23153
+45317 23152
+34420 23152
+37603 23149
+42936 23147
+38569 23145
+44816 23143
+38575 23142
+42611 23141
+28989 23141
+5130 23141
+13382 23136
+45822 23135
+38402 23134
+45799 23133
+21705 23133
+27595 23130
+27959 23128
+40047 23121
+49317 23118
+47626 23117
+36931 23116
+43921 23115
+33241 23115
+41677 23115
+39409 23114
+42013 23114
+47938 23113
+37153 23113
+46416 23112
+42989 23110
+27280 23109
+38741 23106
+33759 23100
+37823 23100
+39328 23098
+11051 23096
+24637 23091
+36809 23090
+36913 23089
+34578 23089
+46318 23087
+39352 23084
+29688 23080
+47134 23076
+22037 23074
+22914 23073
+26523 23072
+43096 23072
+41775 23072
+26955 23065
+28391 23064
+36859 23064
+40821 23063
+46755 23062
+46565 23060
+43407 23059
+27850 23053
+48520 23051
+162 23050
+32863 23047
+30273 23046
+32126 23044
+5418 23043
+19005 23042
+38022 23041
+41167 23040
+48197 23034
+49846 23033
+30677 23029
+47049 23029
+30816 23029
+36996 23028
+31234 23025
+47910 23022
+41322 23022
+43187 23022
+43132 23018
+35394 23017
+45207 23014
+25456 23013
+13781 23011
+36974 23008
+28675 23008
+27740 23008
+6815 23007
+49473 23006
+22628 23005
+36505 23002
+35549 22999
+48154 22998
+30085 22997
+34776 22996
+47803 22995
+38313 22985
+35467 22983
+47627 22977
+24095 22973
+45657 22971
+40965 22969
+19873 22965
+46408 22962
+37869 22960
+48934 22957
+23433 22957
+45604 22956
+36951 22949
+27413 22948
+29693 22948
+46338 22945
+43366 22944
+23441 22935
+47057 22933
+45631 22931
+39667 22930
+38254 22929
+38472 22927
+35623 22926
+32962 22925
+48577 22922
+26699 22919
+24547 22919
+45191 22917
+39466 22916
+48461 22916
+47047 22916
+45621 22913
+38533 22910
+25066 22910
+47104 22909
+42279 22907
+34427 22906
+29978 22903
+27226 22902
+34251 22894
+11456 22893
+43072 22887
+32725 22882
+35962 22879
+39320 22879
+9282 22875
+8186 22873
+49867 22871
+45534 22868
+35391 22868
+37895 22866
+3209 22864
+46493 22861
+39448 22861
+18745 22858
+41785 22856
+25528 22854
+31705 22853
+33616 22852
+39518 22852
+43156 22849
+43705 22847
+32488 22847
+42449 22841
+38959 22839
+27294 22838
+31218 22837
+47045 22835
+34765 22825
+48542 22824
+43539 22822
+22941 22821
+47112 22819
+40881 22816
+44457 22816
+31452 22815
+31516 22812
+33962 22812
+49820 22811
+34343 22810
+41923 22806
+38831 22806
+36678 22805
+40328 22805
+34416 22805
+45001 22805
+10563 22804
+34574 22804
+39799 22803
+41338 22803
+39545 22802
+37060 22801
+40681 22801
+39495 22801
+25262 22800
+15620 22799
+45974 22799
+22398 22796
+49721 22795
+21464 22789
+46671 22789
+40141 22788
+24717 22781
+48913 22781
+38666 22780
+41649 22776
+48883 22773
+12334 22773
+36729 22773
+33887 22772
+48426 22767
+33697 22767
+48811 22767
+28407 22765
+25257 22764
+44455 22762
+35876 22761
+25184 22761
+46831 22759
+47194 22755
+23569 22753
+40408 22751
+49914 22746
+37737 22740
+29152 22739
+37618 22739
+33808 22737
+30296 22735
+20021 22735
+29910 22733
+30610 22733
+36734 22733
+31751 22731
+45331 22731
+4779 22722
+37711 22720
+41569 22719
+42872 22716
+32082 22709
+41589 22708
+50007 22705
+41093 22702
+37864 22701
+32322 22697
+48817 22697
+6346 22692
+46735 22690
+36723 22684
+41612 22684
+50193 22684
+33104 22684
+15358 22683
+37831 22679
+39563 22679
+46642 22678
+35171 22677
+44361 22670
+33791 22666
+31024 22665
+49326 22665
+39610 22664
+50078 22661
+38967 22653
+40554 22652
+30631 22644
+35369 22640
+48868 22633
+45650 22633
+35885 22632
+39101 22631
+47883 22630
+39709 22629
+34153 22627
+46186 22625
+20120 22623
+30731 22622
+50208 22621
+43485 22621
+41491 22621
+44582 22620
+27427 22617
+37857 22615
+34439 22613
+32060 22610
+40819 22606
+12339 22601
+36699 22599
+33700 22599
+46376 22599
+43570 22598
+37576 22594
+8586 22594
+36785 22577
+2981 22577
+24061 22576
+37893 22571
+42725 22571
+27543 22568
+50105 22568
+49453 22567
+42794 22566
+42050 22561
+19697 22561
+46241 22560
+39985 22558
+11716 22558
+47449 22553
+46199 22552
+46094 22552
+43565 22552
+43337 22551
+22806 22551
+42081 22550
+35291 22549
+40231 22547
+37972 22545
+16344 22543
+24675 22541
+40652 22541
+39512 22538
+45068 22535
+47102 22534
+40149 22534
+27964 22534
+35562 22533
+49177 22527
+41190 22526
+32263 22526
+46446 22525
+30342 22524
+50023 22522
+48345 22522
+49079 22522
+35700 22518
+48559 22518
+33924 22517
+39925 22517
+34185 22516
+37856 22516
+47372 22515
+48324 22514
+48513 22512
+17077 22512
+13654 22511
+47314 22510
+47480 22509
+48872 22507
+31242 22507
+46350 22506
+36260 22502
+36088 22501
+40861 22499
+36561 22496
+41453 22496
+48950 22492
+36118 22485
+33246 22484
+36048 22483
+48683 22483
+35475 22481
+21263 22479
+43101 22477
+21760 22474
+23286 22473
+24244 22471
+39093 22470
+45070 22469
+35544 22467
+25261 22466
+46309 22464
+38422 22462
+16541 22457
+39779 22457
+21595 22455
+45019 22449
+34961 22447
+46001 22445
+49920 22443
+47302 22441
+42069 22434
+37028 22434
+15428 22434
+47356 22433
+18946 22432
+15390 22429
+2976 22426
+50046 22426
+44784 22425
+47684 22423
+48079 22421
+41172 22419
+46141 22419
+8885 22417
+16023 22416
+59 22415
+48419 22413
+48792 22412
+42122 22410
+46682 22409
+16515 22408
+21480 22407
+18332 22405
+9542 22404
+28764 22404
+46389 22395
+42516 22395
+36368 22395
+4413 22392
+5712 22390
+33932 22388
+14673 22384
+46082 22384
+44550 22383
+42398 22380
+22443 22379
+30374 22378
+44641 22377
+38298 22373
+39174 22371
+23607 22368
+35739 22367
+40956 22365
+22539 22364
+30908 22356
+47599 22355
+48370 22354
+42846 22354
+42043 22351
+27489 22348
+32146 22348
+47355 22348
+42163 22347
+48359 22347
+43355 22346
+18588 22345
+26625 22344
+47159 22340
+25954 22339
+32617 22339
+42666 22338
+7143 22338
+13900 22334
+46804 22334
+50021 22333
+34346 22331
+23637 22330
+47067 22330
+30117 22327
+16367 22326
+31144 22321
+40619 22320
+49624 22318
+7917 22316
+32676 22315
+42330 22313
+20498 22308
+36801 22305
+21582 22305
+29680 22300
+44074 22300
+44228 22300
+30206 22297
+16297 22296
+30077 22296
+21706 22292
+40749 22291
+20686 22288
+2914 22286
+41425 22284
+35012 22282
+20737 22281
+41734 22279
+5138 22278
+28860 22265
+44382 22261
+44665 22261
+31992 22259
+50197 22259
+40985 22258
+22104 22256
+47679 22255
+15095 22253
+42061 22252
+26644 22249
+45029 22249
+19797 22247
+49799 22240
+47475 22240
+49801 22239
+39299 22233
+34812 22232
+32867 22230
+38986 22225
+30243 22224
+46836 22224
+42487 22220
+32538 22219
+42111 22218
+47459 22213
+39018 22210
+37334 22209
+10525 22208
+22438 22203
+22006 22202
+30314 22200
+20041 22199
+47324 22198
+48427 22198
+29162 22192
+34907 22189
+3045 22187
+20099 22186
+38371 22183
+40190 22177
+29783 22177
+42695 22174
+41144 22173
+39483 22172
+24358 22171
+47354 22169
+42399 22166
+49647 22160
+32005 22156
+49456 22154
+34053 22153
+33076 22150
+40438 22143
+31206 22140
+14957 22140
+22610 22138
+38549 22134
+48005 22133
+36203 22132
+47557 22131
+27816 22130
+34463 22128
+32751 22127
+49093 22127
+45279 22124
+38374 22123
+33602 22122
+42714 22121
+40499 22119
+50012 22114
+21914 22114
+39949 22114
+47363 22113
+39743 22113
+40163 22112
+40333 22107
+37272 22106
+40762 22104
+34330 22102
+33310 22101
+38356 22100
+45984 22100
+27483 22097
+12756 22095
+35282 22093
+30277 22092
+18238 22092
+13185 22090
+34158 22090
+29581 22090
+1609 22088
+30924 22087
+18793 22086
+49754 22085
+28645 22083
+22899 22080
+37615 22077
+44230 22075
+37636 22073
+31754 22072
+9063 22070
+24867 22067
+22939 22061
+29865 22061
+30846 22057
+46255 22057
+48624 22053
+19011 22052
+48284 22051
+48480 22046
+36530 22044
+48248 22043
+41165 22040
+30694 22039
+42366 22037
+45382 22034
+22404 22032
+11690 22031
+45787 22030
+27021 22021
+45737 22017
+32516 22017
+24326 22016
+42144 22012
+24132 22007
+25174 22007
+35642 22004
+29059 22003
+42947 22003
+13518 22003
+39243 22002
+25392 22000
+49172 21998
+34950 21998
+44106 21997
+42107 21997
+45757 21997
+30863 21997
+36956 21995
+41289 21995
+26178 21994
+47850 21991
+12715 21991
+35973 21987
+15879 21985
+37794 21984
+31800 21980
+3040 21979
+41211 21979
+25545 21979
+45173 21979
+25835 21978
+28911 21977
+24449 21977
+48201 21972
+45314 21970
+6781 21969
+32165 21968
+38903 21961
+48332 21959
+37567 21958
+33794 21957
+17377 21954
+37414 21954
+49990 21950
+45792 21945
+34096 21942
+48632 21941
+29266 21939
+47241 21936
+37735 21934
+31982 21933
+49306 21933
+16557 21932
+37338 21930
+39544 21928
+49329 21926
+19154 21925
+2510 21924
+31891 21922
+32648 21922
+29249 21921
+45971 21918
+49508 21917
+39653 21917
+49514 21916
+21401 21916
+29054 21916
+9556 21915
+47929 21912
+48721 21901
+48512 21899
+19117 21897
+30464 21895
+45572 21892
+35071 21891
+37776 21888
+35735 21887
+41075 21887
+43704 21887
+45012 21886
+40841 21881
+34530 21877
+16138 21875
+43231 21871
+39122 21871
+36620 21871
+38310 21870
+38629 21862
+42380 21860
+36249 21856
+43434 21852
+38352 21852
+41304 21850
+29299 21850
+42712 21849
+49231 21849
+6925 21848
+44569 21847
+17833 21843
+35681 21843
+45213 21842
+17417 21841
+49586 21840
+32430 21836
+47124 21836
+21619 21833
+49706 21833
+47814 21833
+34959 21832
+42534 21829
+46431 21827
+45259 21821
+37747 21820
+26846 21819
+39553 21816
+39004 21816
+33374 21814
+48004 21811
+34546 21811
+2765 21810
+28718 21809
+16353 21808
+29792 21808
+27279 21807
+27379 21802
+43315 21801
+47128 21801
+21280 21800
+5604 21798
+34828 21797
+23206 21795
+40876 21790
+16501 21788
+47909 21786
+38092 21786
+43184 21785
+21243 21785
+35133 21784
+18166 21781
+38703 21780
+49986 21780
+45654 21779
+26302 21777
+47885 21777
+37434 21773
+39918 21773
+47023 21772
+41308 21771
+10878 21768
+44539 21768
+35678 21765
+27629 21765
+34137 21764
+41423 21761
+23203 21760
+39558 21759
+27672 21758
+14933 21757
+48008 21755
+40885 21752
+36314 21752
+39586 21751
+40081 21751
+41580 21748
+49888 21748
+37613 21745
+33398 21744
+7399 21743
+26776 21740
+48949 21739
+27836 21736
+33345 21736
+27398 21735
+35815 21733
+24198 21733
+42905 21733
+17069 21731
+8917 21731
+42704 21731
+38995 21729
+37094 21729
+43134 21729
+44815 21728
+50064 21727
+38456 21726
+44860 21725
+36299 21723
+10198 21722
+41789 21722
+39689 21722
+19895 21722
+18790 21719
+34302 21717
+40275 21717
+36201 21716
+41813 21715
+48070 21714
+4994 21713
+30732 21711
+17746 21706
+43862 21704
+36496 21701
+44903 21700
+29700 21698
+46472 21697
+31383 21694
+35664 21692
+44461 21688
+41287 21687
+35809 21686
+44430 21685
+37777 21682
+6753 21678
+32869 21678
+12909 21678
+28037 21676
+41291 21674
+24047 21673
+36271 21671
+46492 21670
+17148 21669
+48783 21667
+33608 21663
+40234 21662
+45661 21659
+35721 21654
+44593 21653
+40184 21651
+47076 21651
+37804 21651
+27291 21650
+49714 21649
+49010 21643
+36643 21638
+28694 21638
+25155 21637
+41548 21636
+1707 21636
+45409 21634
+24156 21630
+49983 21621
+44251 21621
+35890 21614
+45722 21614
+41660 21613
+24225 21612
+40877 21608
+43509 21605
+47411 21604
+27904 21604
+10139 21598
+19539 21595
+45090 21592
+42780 21589
+41275 21589
+49386 21586
+48091 21583
+16153 21583
+38466 21581
+39771 21580
+32567 21577
+41822 21577
+29052 21576
+26008 21575
+34820 21574
+13036 21573
+46962 21571
+47326 21570
+46960 21563
+40618 21562
+32970 21562
+43536 21559
+16907 21557
+48311 21550
+38783 21550
+39526 21548
+46526 21547
+45551 21546
+49719 21544
+35472 21541
+16005 21540
+47336 21537
+25118 21536
+26330 21536
+46197 21533
+13047 21533
+42529 21532
+26738 21532
+22742 21529
+43666 21523
+20523 21522
+26305 21520
+40823 21513
+32439 21511
+27537 21511
+44149 21509
+39117 21504
+39942 21500
+40405 21498
+43289 21497
+41477 21497
+27891 21497
+41542 21496
+39880 21494
+28957 21493
+46663 21491
+40385 21489
+13874 21486
+30521 21485
+22667 21480
+4442 21479
+40058 21475
+46290 21475
+46762 21473
+18835 21472
+17354 21469
+46724 21465
+46606 21464
+44439 21464
+41577 21462
+48924 21461
+10954 21455
+7041 21454
+37505 21449
+40495 21448
+46661 21444
+47205 21442
+24318 21442
+49555 21442
+32058 21436
+41810 21432
+45649 21431
+33987 21430
+27743 21423
+29840 21423
+37055 21421
+14863 21420
+46298 21418
+39530 21417
+40276 21416
+44857 21415
+7494 21413
+26092 21413
+39801 21406
+33979 21401
+36664 21396
+42194 21395
+43624 21391
+42309 21390
+28073 21388
+40860 21388
+49747 21382
+48649 21381
+38856 21381
+38605 21377
+43780 21376
+29796 21374
+32824 21374
+2281 21373
+43860 21373
+49688 21371
+28402 21368
+46044 21362
+46885 21356
+40128 21355
+16820 21355
+20847 21354
+48176 21352
+26922 21351
+48250 21351
+45025 21350
+37483 21348
+49809 21346
+29733 21345
+45521 21338
+42921 21335
+42055 21334
+18635 21332
+21389 21329
+44994 21328
+20378 21327
+10138 21324
+49184 21322
+46916 21322
+48425 21321
+36376 21319
+40671 21317
+41557 21315
+16692 21312
+48661 21310
+42634 21306
+28041 21301
+46509 21301
+30986 21300
+49276 21299
+44048 21299
+33462 21296
+26261 21293
+49640 21293
+49102 21292
+6618 21288
+45193 21287
+40954 21287
+44137 21286
+43125 21285
+47101 21282
+5771 21281
+33484 21277
+14889 21276
+38576 21275
+41101 21271
+42533 21269
+46293 21266
+31938 21266
+37475 21266
+49692 21264
+18038 21260
+48200 21260
+29067 21259
+40785 21258
+44987 21258
+40889 21256
+39357 21251
+43523 21248
+45993 21247
+19006 21245
+30931 21241
+1116 21237
+30606 21237
+43803 21227
+45505 21225
+13967 21223
+43168 21223
+46254 21223
+36208 21222
+12065 21221
+28677 21221
+49478 21220
+46585 21219
+41292 21216
+39578 21216
+39344 21216
+43293 21212
+42280 21207
+21301 21206
+33621 21206
+41666 21201
+30662 21201
+35875 21201
+31199 21201
+30650 21201
+31120 21200
+17483 21196
+32243 21191
+35418 21188
+43403 21182
+39201 21176
+42960 21173
+22805 21171
+39736 21171
+37304 21171
+32200 21168
+42472 21168
+44281 21168
+13903 21167
+39312 21167
+40051 21166
+4184 21164
+10683 21163
+39251 21157
+32371 21154
+39166 21154
+33883 21153
+32968 21152
+49973 21151
+21101 21148
+41066 21146
+26155 21146
+23709 21141
+41590 21140
+39111 21138
+31495 21137
+41460 21137
+48987 21127
+13821 21127
+38073 21123
+46005 21123
+23155 21122
+47988 21116
+2868 21115
+38906 21115
+48564 21112
+40245 21112
+40173 21103
+31880 21103
+44162 21100
+27403 21100
+134 21099
+34865 21094
+11156 21093
+36981 21089
+43016 21089
+21871 21086
+27559 21086
+45682 21085
+31350 21083
+43874 21082
+39023 21078
+46847 21076
+38505 21074
+46067 21071
+48182 21066
+21687 21066
+28730 21057
+50189 21056
+42875 21054
+34335 21052
+11229 21050
+10922 21050
+37348 21049
+36927 21046
+42726 21046
+46550 21045
+26588 21044
+13065 21040
+13090 21037
+22834 21036
+13082 21035
+42268 21030
+45630 21029
+8451 21029
+47792 21024
+28757 21022
+34312 21022
+49461 21022
+49233 21019
+38407 21018
+16742 21017
+39186 21016
+35140 21014
+20949 21013
+45458 21008
+49979 21004
+37323 21001
+43626 21001
+30420 21001
+32915 20999
+43253 20992
+45136 20991
+40700 20990
+1889 20989
+26123 20988
+33553 20987
+38546 20983
+46271 20981
+5853 20980
+42015 20974
+25283 20974
+39216 20973
+30565 20972
+43820 20971
+44957 20971
+35145 20971
+37472 20971
+43610 20970
+36445 20969
+49381 20968
+47283 20968
+14975 20966
+34178 20966
+32704 20965
+35720 20963
+36902 20961
+34647 20959
+35030 20959
+46612 20958
+5877 20957
+35378 20956
+32975 20954
+35755 20947
+14238 20947
+41166 20946
+46378 20944
+27556 20942
+38846 20941
+18521 20938
+35388 20935
+40116 20935
+36549 20933
+48399 20928
+37531 20927
+39449 20923
+47303 20919
+12999 20917
+25235 20916
+27701 20916
+42005 20914
+46146 20913
+33483 20912
+37117 20907
+39327 20907
+30234 20907
+43633 20902
+18122 20898
+45493 20895
+6933 20893
+7845 20884
+33723 20877
+24418 20874
+39904 20872
+48736 20870
+8660 20868
+26307 20867
+49122 20866
+33449 20863
+30270 20862
+30893 20861
+48726 20861
+45072 20860
+38636 20860
+49282 20859
+42010 20859
+49418 20858
+34359 20856
+37716 20855
+24543 20853
+33284 20852
+40157 20851
+17156 20851
+48809 20847
+49697 20843
+40957 20842
+34742 20840
+42747 20839
+41858 20836
+37769 20835
+22058 20835
+43363 20833
+48680 20831
+5927 20830
+13228 20826
+33856 20826
+37780 20825
+48230 20824
+40375 20819
+48212 20819
+3712 20814
+45719 20809
+26267 20809
+23072 20809
+41043 20808
+33647 20804
+49940 20803
+47287 20798
+3138 20795
+27129 20794
+32599 20788
+22012 20787
+30018 20780
+49024 20775
+39118 20775
+30317 20775
+36337 20773
+28004 20773
+47551 20773
+47904 20770
+43690 20770
+39513 20765
+18448 20763
+9790 20762
+28022 20761
+45325 20759
+33056 20758
+31409 20757
+39940 20757
+25983 20755
+40979 20754
+45485 20753
+45704 20752
+36059 20752
+4552 20746
+34288 20745
+40285 20744
+29878 20744
+35156 20742
+37902 20741
+48263 20741
+29593 20740
+43510 20740
+41195 20739
+49542 20738
+40523 20738
+43221 20737
+2400 20736
+39895 20734
+40182 20732
+49625 20732
+8778 20731
+50032 20729
+32276 20725
+40564 20723
+24441 20722
+28352 20717
+6261 20716
+33797 20715
+34618 20715
+49707 20715
+45929 20713
+4878 20713
+43116 20713
+47009 20712
+28833 20712
+38066 20708
+16946 20707
+16575 20707
+45693 20706
+48052 20704
+34435 20703
+46460 20703
+26817 20700
+22266 20699
+39738 20697
+27068 20694
+13561 20693
+35997 20692
+37806 20691
+45128 20690
+39831 20689
+24661 20687
+47861 20686
+20098 20686
+44004 20680
+44261 20678
+39242 20675
+41148 20674
+11586 20671
+45118 20671
+4832 20671
+40043 20667
+44184 20659
+22797 20659
+25033 20657
+43322 20654
+22454 20653
+33258 20653
+34793 20649
+38220 20648
+40248 20648
+36722 20646
+43996 20645
+48189 20643
+10079 20642
+43540 20641
+47913 20635
+16709 20626
+43193 20622
+37765 20614
+39768 20612
+45028 20607
+47484 20606
+12743 20606
+45632 20606
+13962 20604
+21250 20597
+38082 20597
+28667 20596
+42740 20595
+49561 20587
+15535 20587
+43367 20585
+41364 20585
+43410 20584
+40987 20584
+10672 20583
+49109 20581
+38055 20580
+40179 20578
+42583 20574
+25642 20573
+24753 20570
+34124 20569
+49944 20568
+49351 20563
+49288 20562
+31041 20560
+34456 20556
+41019 20556
+32918 20553
+50000 20552
+37845 20550
+33239 20549
+41344 20546
+19738 20545
+35926 20544
+43391 20542
+36265 20542
+47799 20541
+37349 20539
+45900 20534
+47408 20528
+35474 20526
+42105 20524
+49853 20523
+42885 20521
+41473 20519
+40005 20515
+39179 20514
+46984 20512
+2201 20512
+36424 20512
+12843 20509
+29508 20507
+23909 20504
+39378 20503
+46415 20502
+30691 20502
+47276 20501
+45668 20495
+42175 20493
+41017 20493
+17982 20491
+50213 20491
+48062 20490
+37860 20490
+40175 20489
+23661 20489
+1508 20487
+37224 20484
+14761 20479
+48159 20478
+10294 20477
+44817 20476
+47426 20473
+37258 20473
+41847 20473
+34046 20471
+18009 20470
+38586 20470
+29715 20470
+47702 20469
+43169 20466
+33993 20462
+44218 20460
+41745 20459
+34400 20459
+44925 20458
+32059 20458
+5965 20458
+20456 20448
+39731 20446
+45758 20446
+48866 20445
+39188 20445
+43912 20444
+11062 20440
+34813 20437
+42117 20437
+45421 20436
+33758 20435
+46827 20433
+34739 20431
+34524 20429
+15578 20427
+6019 20426
+40638 20426
+25228 20426
+24008 20424
+37289 20423
+11415 20422
+42778 20422
+47666 20421
+4456 20417
+37680 20416
+29984 20416
+35736 20416
+38053 20415
+45538 20413
+45940 20413
+47897 20412
+37827 20412
+5084 20411
+17665 20411
+24461 20410
+34300 20409
+41495 20409
+23346 20408
+42918 20407
+28613 20406
+31674 20405
+43420 20404
+45590 20403
+46387 20402
+27288 20401
+44927 20400
+47969 20400
+45840 20399
+42215 20396
+21470 20396
+41136 20392
+16366 20388
+43685 20387
+39403 20386
+41302 20384
+46759 20383
+45850 20380
+40147 20379
+24882 20376
+35396 20376
+41232 20373
+30139 20372
+33272 20369
+48429 20369
+43707 20367
+45366 20365
+35200 20365
+37802 20363
+18688 20363
+41143 20362
+19633 20361
+24019 20356
+40486 20356
+47818 20354
+21197 20353
+9832 20347
+35908 20344
+42650 20342
+45089 20342
+28664 20341
+33854 20341
+22274 20340
+38907 20338
+35810 20337
+45720 20336
+26939 20333
+25271 20332
+24334 20332
+19317 20331
+34169 20330
+33367 20329
+48229 20326
+4977 20324
+42819 20321
+31589 20317
+46352 20316
+44576 20312
+50181 20307
+43631 20301
+41486 20301
+48466 20298
+36603 20298
+41295 20298
+40820 20297
+29988 20295
+49125 20294
+46023 20294
+41706 20293
+35964 20293
+45280 20292
+46435 20287
+32238 20286
+37453 20284
+32069 20276
+42199 20275
+45842 20274
+46169 20268
+48294 20266
+31080 20265
+40940 20264
+32739 20262
+37424 20257
+32923 20256
+35828 20256
+20655 20255
+26794 20254
+39315 20254
+36928 20254
+40433 20252
+33967 20246
+5217 20240
+45131 20238
+42213 20237
+21319 20237
+33402 20235
+24531 20235
+45339 20232
+39368 20222
+30813 20221
+21618 20219
+40797 20219
+38885 20215
+16634 20204
+33186 20201
+31634 20199
+50140 20197
+29734 20196
+38628 20195
+48544 20192
+20397 20187
+33112 20184
+13749 20183
+3962 20183
+39096 20181
+45009 20180
+37852 20180
+46147 20178
+46341 20176
+32475 20171
+4575 20170
+42353 20170
+36076 20170
+33549 20167
+24981 20167
+46235 20166
+47106 20165
+49898 20165
+30641 20163
+48909 20161
+17488 20159
+25746 20155
+47623 20154
+36600 20154
+48136 20151
+30192 20145
+18541 20142
+31148 20142
+31633 20136
+29884 20131
+48782 20129
+30518 20125
+15289 20115
+32795 20114
+32738 20113
+38312 20109
+44793 20108
+47957 20103
+23012 20098
+33463 20089
+49089 20089
+49921 20088
+41065 20086
+48380 20083
+49674 20079
+47985 20078
+16714 20077
+42871 20077
+30189 20077
+49644 20075
+43875 20074
+14141 20073
+25990 20073
+39697 20073
+25099 20071
+30969 20069
+26266 20064
+18765 20063
+28358 20063
+49934 20062
+45385 20060
+18156 20060
+37988 20059
+17992 20059
+40035 20051
+44736 20050
+44260 20047
+13515 20045
+40423 20044
+45797 20043
+36870 20042
+49012 20035
+44400 20032
+48386 20028
+33939 20026
+25506 20025
+23928 20024
+42411 20022
+13651 20022
+39120 20021
+45427 20021
+41198 20018
+22528 20017
+48463 20014
+47642 20012
+8416 20009
+21918 20007
+40311 20005
+22809 20002
+25979 20002
+39664 19999
+30087 19998
+36961 19997
+37826 19996
+43697 19995
+3229 19992
+39210 19990
+43057 19984
+15609 19981
+35490 19980
+48295 19977
+27787 19976
+38697 19972
+21860 19971
+40393 19970
+46180 19968
+49244 19964
+36226 19964
+45106 19961
+30396 19959
+49995 19957
+47675 19953
+32362 19947
+18314 19946
+25679 19945
+37601 19944
+47578 19944
+32318 19943
+13628 19943
+42276 19941
+48291 19940
+45401 19938
+13887 19933
+36725 19932
+41600 19930
+44136 19929
+41948 19928
+47069 19926
+33366 19923
+45252 19920
+46636 19911
+37015 19908
+13073 19906
+42428 19904
+38918 19903
+43901 19901
+30322 19900
+39878 19899
+47330 19892
+33199 19890
+46617 19889
+27968 19889
+41938 19887
+41752 19886
+42091 19885
+44123 19885
+39628 19882
+40465 19881
+48057 19881
+44639 19880
+18380 19879
+1060 19878
+45472 19877
+11988 19875
+4971 19874
+17910 19872
+22690 19867
+42939 19864
+47468 19863
+23651 19858
+20112 19858
+42430 19856
+34487 19851
+46201 19845
+34437 19840
+41257 19837
+37775 19833
+50085 19831
+38438 19831
+42074 19827
+38237 19826
+45107 19822
+46314 19820
+49503 19820
+29401 19820
+22157 19819
+25117 19815
+42413 19815
+34792 19812
+20448 19811
+46758 19806
+43600 19805
+33101 19805
+32852 19803
+44908 19800
+37203 19799
+45111 19798
+12658 19791
+25661 19788
+32941 19784
+40573 19783
+39107 19777
+44705 19772
+42250 19772
+49488 19770
+9622 19769
+45721 19762
+40256 19761
+34964 19761
+42601 19761
+43876 19761
+32001 19760
+46792 19757
+38802 19755
+48376 19752
+41722 19750
+27894 19748
+48177 19747
+45113 19746
+40848 19745
+11877 19744
+12235 19744
+36276 19743
+18048 19742
+42790 19741
+48408 19736
+34631 19736
+47601 19734
+15514 19733
+36104 19727
+42769 19726
+44881 19726
+39872 19725
+44617 19724
+36693 19721
+48745 19720
+32316 19720
+38654 19717
+9872 19716
+23172 19712
+43808 19710
+48882 19709
+43580 19707
+44578 19705
+39696 19701
+42962 19698
+30235 19698
+27992 19698
+44928 19697
+43673 19697
+49650 19696
+40654 19695
+45531 19695
+17751 19694
+19007 19692
+41647 19691
+41261 19690
+3121 19689
+16998 19689
+38970 19688
+45288 19686
+32616 19685
+42789 19684
+38070 19683
+46356 19683
+48584 19681
+46438 19679
+23812 19678
+48108 19678
+10803 19678
+47824 19675
+30620 19674
+12359 19674
+41604 19669
+41895 19668
+31184 19667
+27880 19665
+45124 19664
+47070 19661
+46883 19660
+46302 19656
+44667 19652
+23093 19650
+16684 19649
+43432 19646
+44969 19644
+16530 19643
+49097 19643
+6854 19643
+17573 19642
+7992 19637
+31000 19634
+44977 19633
+40682 19631
+28631 19629
+38457 19628
+15507 19626
+49196 19625
+35438 19624
+47576 19619
+26694 19617
+46020 19615
+47308 19615
+41458 19612
+43571 19610
+28761 19608
+40572 19606
+41914 19605
+48739 19605
+20280 19603
+47491 19602
+32928 19601
+25346 19599
+46258 19598
+43451 19592
+47493 19590
+46028 19584
+32454 19583
+10600 19583
+43813 19580
+47482 19574
+31094 19573
+37167 19571
+42690 19569
+42343 19568
+42463 19564
+44363 19556
+28869 19555
+42776 19547
+40417 19545
+33505 19545
+44893 19544
+43026 19544
+49216 19542
+20335 19541
+28254 19539
+40747 19538
+30409 19536
+31551 19536
+6694 19534
+38445 19534
+42115 19533
+30903 19532
+24292 19531
+48030 19530
+42214 19526
+50168 19524
+48638 19523
+20193 19521
+20502 19520
+48110 19519
+15911 19517
+48194 19515
+40659 19515
+43284 19514
+26729 19513
+40620 19511
+35795 19510
+49446 19508
+38843 19508
+18442 19507
+23396 19504
+34694 19504
+26766 19500
+38142 19497
+38828 19495
+33070 19495
+21968 19493
+46061 19493
+44549 19492
+50080 19491
+28106 19487
+13344 19486
+18099 19484
+41381 19483
+26025 19482
+33751 19479
+44399 19479
+22165 19479
+48392 19474
+38793 19474
+42321 19471
+21889 19465
+38117 19459
+43778 19457
+46894 19455
+34431 19454
+46572 19452
+37586 19449
+46632 19449
+29891 19447
+40279 19447
+46417 19447
+48264 19446
+26233 19445
+36580 19444
+38015 19444
+49837 19443
+37522 19443
+28455 19442
+18885 19441
+46333 19437
+43908 19436
+10201 19435
+27300 19433
+31286 19429
+48488 19428
+30765 19426
+49115 19425
+31563 19422
+38314 19420
+35609 19415
+43948 19414
+22135 19412
+31613 19407
+40639 19405
+31972 19404
+47521 19401
+34615 19394
+40972 19394
+38139 19392
+30482 19390
+39912 19389
+37832 19389
+39332 19388
+18584 19388
+42548 19386
+47899 19386
+47498 19385
+38071 19384
+44879 19381
+32340 19380
+1261 19379
+45960 19378
+39147 19373
+22504 19372
+45864 19371
+49123 19371
+46442 19370
+39089 19370
+50161 19369
+38350 19369
+38202 19362
+41599 19361
+38600 19361
+23190 19359
+22550 19359
+38954 19358
+23952 19358
+46871 19356
+47990 19356
+39112 19355
+21722 19355
+42956 19354
+44649 19354
+40756 19352
+34808 19349
+30424 19349
+42032 19348
+11021 19348
+40207 19347
+49064 19346
+35143 19345
+49953 19344
+36275 19344
+38852 19344
+49147 19343
+34026 19341
+39800 19341
+25512 19341
+43006 19341
+4989 19336
+24480 19335
+41456 19335
+21463 19335
+9411 19330
+23645 19328
+23435 19326
+46239 19325
+7462 19324
+47760 19323
+35854 19323
+9378 19322
+18713 19321
+35167 19320
+43719 19319
+45006 19319
+42130 19315
+48856 19313
+48634 19313
+46886 19311
+38177 19310
+47369 19309
+42759 19308
+24945 19307
+31577 19306
+44215 19304
+28864 19303
+41642 19302
+43182 19298
+46925 19296
+38509 19295
+44468 19295
+49319 19292
+48764 19292
+37839 19291
+42336 19291
+31050 19287
+44338 19283
+28114 19281
+46096 19278
+16606 19277
+45734 19276
+25052 19274
+34023 19272
+44031 19269
+37050 19268
+41833 19267
+38116 19266
+36555 19264
+29289 19263
+48092 19261
+18363 19260
+41541 19258
+23522 19258
+47931 19257
+45989 19254
+47683 19253
+49701 19250
+46052 19249
+34621 19246
+33203 19245
+40711 19243
+47900 19243
+47233 19242
+2874 19237
+46501 19232
+49111 19231
+18386 19230
+38062 19230
+46056 19227
+15676 19227
+38375 19226
+45838 19225
+47200 19223
+48724 19223
+39102 19222
+49439 19218
+42075 19216
+18144 19214
+45881 19211
+46872 19211
+29344 19210
+47949 19209
+39573 19207
+41925 19206
+45349 19206
+44896 19203
+38685 19202
+44397 19198
+41169 19197
+21177 19196
+8460 19196
+35123 19194
+48573 19184
+18095 19182
+16956 19180
+1914 19179
+24990 19179
+16786 19176
+23562 19176
+10042 19175
+25029 19175
+34619 19175
+37652 19174
+36813 19172
+36948 19170
+20773 19161
+24412 19159
+20763 19158
+29304 19157
+27319 19156
+38412 19156
+22918 19155
+46109 19154
+40441 19147
+37798 19144
+39405 19140
+48150 19139
+39785 19137
+29114 19137
+38604 19131
+43462 19129
+44408 19124
+45122 19121
+17434 19121
+34200 19120
+46749 19118
+27130 19114
+35509 19113
+49509 19113
+50010 19112
+42407 19112
+45452 19112
+44960 19110
+46868 19108
+45696 19107
+48096 19106
+46270 19105
+27270 19098
+22902 19089
+34393 19089
+10519 19088
+33074 19085
+43864 19082
+42174 19080
+25633 19080
+44560 19078
+48630 19075
+11850 19073
+48524 19071
+36305 19069
+22869 19066
+18719 19064
+43048 19060
+44891 19058
+28871 19058
+47670 19057
+47880 19057
+44513 19053
+14688 19052
+31643 19047
+48260 19047
+33651 19047
+39413 19045
+46263 19044
+33571 19039
+17191 19036
+43009 19036
+44176 19033
+41792 19032
+34779 19031
+28886 19030
+46158 19027
+46993 19026
+48825 19023
+45944 19022
+19741 19019
+41583 19018
+21936 19016
+49845 19013
+48398 19012
+25058 19011
+5467 19008
+36282 19007
+41492 19006
+24512 19005
+48187 19004
+42763 19002
+29274 19001
+16982 19001
+47237 19001
+33325 19001
+50027 18997
+42435 18992
+35230 18989
+46589 18989
+40631 18985
+18749 18983
+26368 18982
+36228 18982
+21663 18979
+4027 18979
+13807 18978
+36499 18978
+36109 18977
+46176 18972
+26491 18968
+39443 18965
+49974 18963
+31457 18963
+46103 18954
+47785 18952
+47016 18951
+27175 18951
+43986 18950
+41072 18949
+34408 18947
+48813 18946
+7290 18945
+47396 18945
+41280 18945
+48645 18944
+30187 18936
+50167 18933
+43339 18933
+38650 18932
+29163 18931
+12650 18931
+40736 18930
+47915 18929
+17181 18928
+39584 18927
+20619 18925
+47305 18924
+44063 18924
+30142 18919
+44214 18918
+10027 18917
+26967 18914
+41279 18912
+48170 18912
+45225 18910
+50222 18905
+22192 18898
+46285 18894
+39324 18893
+37446 18892
+20297 18891
+43119 18888
+13163 18888
+43465 18885
+13297 18883
+41627 18882
+49397 18881
+43445 18881
+37458 18881
+47941 18881
+48246 18878
+8289 18877
+37889 18876
+42562 18875
+33995 18875
+49964 18873
+49052 18862
+49857 18861
+32606 18860
+41874 18856
+44598 18854
+29277 18853
+46623 18852
+41765 18852
+42011 18850
+38169 18849
+27991 18847
+34349 18846
+5312 18842
+46250 18839
+21704 18836
+40251 18831
+26901 18830
+23770 18828
+47350 18821
+27214 18820
+48930 18819
+19735 18817
+35387 18813
+39509 18810
+29410 18809
+47807 18803
+49489 18800
+17040 18794
+48462 18794
+34850 18790
+34411 18788
+34610 18787
+47694 18786
+2203 18785
+40401 18782
+43427 18779
+13264 18779
+25775 18778
+43819 18776
+27636 18773
+46476 18770
+37941 18768
+33086 18768
+41955 18764
+45705 18762
+45441 18758
+43300 18751
+8535 18747
+43854 18745
+44265 18740
+38318 18739
+35087 18736
+18991 18735
+8522 18729
+45857 18727
+37691 18725
+44685 18725
+49499 18724
+27215 18723
+34718 18721
+35950 18719
+43564 18718
+23168 18717
+24085 18717
+19489 18716
+16448 18715
+43822 18715
+48665 18711
+49550 18711
+42145 18708
+10639 18702
+26836 18702
+48697 18696
+39568 18695
+27844 18695
+48772 18694
+47397 18693
+48037 18692
+25367 18692
+39615 18686
+44504 18685
+48442 18683
+43385 18683
+5666 18682
+42675 18680
+43883 18680
+24647 18678
+42271 18676
+41403 18674
+5914 18674
+26892 18674
+48864 18672
+48647 18667
+17566 18666
+14391 18664
+42283 18664
+32031 18662
+21113 18661
+24212 18661
+48381 18658
+37400 18656
+46649 18649
+24360 18645
+34919 18645
+47613 18644
+39550 18641
+33934 18639
+41998 18636
+16424 18628
+32942 18626
+24112 18626
+36241 18625
+26098 18622
+40414 18621
+36818 18620
+48648 18618
+47812 18613
+49130 18607
+9333 18606
+21930 18604
+41050 18602
+26991 18599
+47901 18598
+42443 18596
+30380 18595
+42938 18592
+45791 18592
+34829 18591
+43722 18590
+44372 18589
+36255 18584
+28919 18584
+17271 18581
+23862 18578
+25955 18577
+24121 18576
+42482 18574
+36056 18572
+44675 18570
+40804 18569
+24355 18569
+49816 18569
+42702 18568
+37905 18568
+46743 18567
+36949 18567
+10430 18566
+41060 18561
+8456 18559
+25612 18558
+31728 18557
+44269 18555
+29919 18555
+42405 18554
+16579 18546
+24084 18544
+20150 18544
+34888 18543
+46692 18542
+30297 18541
+31737 18540
+18497 18538
+22062 18535
+48489 18534
+44622 18532
+49720 18532
+23736 18528
+47085 18527
+34712 18524
+35443 18524
+44087 18524
+32501 18516
+19537 18515
+26341 18513
+28547 18510
+32527 18508
+35712 18505
+45639 18502
+41330 18501
+47838 18500
+48719 18500
+25593 18499
+38752 18498
+44955 18496
+27547 18490
+39866 18490
+6900 18490
+22285 18490
+17350 18490
+43930 18489
+32179 18487
+44586 18486
+48808 18482
+9802 18478
+35064 18476
+19424 18473
+50205 18471
+29982 18470
+48861 18470
+49330 18470
+42307 18469
+47037 18465
+44055 18465
+33096 18465
+42412 18464
+35035 18463
+48006 18463
+43389 18462
+34333 18462
+39334 18459
+15026 18458
+46752 18457
+10859 18456
+44753 18455
+43950 18454
+41183 18453
+8032 18452
+24145 18449
+47646 18445
+47275 18444
+13031 18444
+44327 18442
+41815 18441
+46678 18441
+44573 18439
+38226 18434
+42604 18433
+26994 18431
+46296 18430
+29929 18429
+39921 18429
+13607 18421
+42493 18421
+39168 18419
+38902 18416
+46026 18412
+44396 18410
+38187 18410
+37452 18410
+19465 18406
+49150 18402
+43557 18402
+49424 18399
+48578 18393
+33838 18392
+41531 18390
+41962 18390
+16845 18385
+35775 18380
+48305 18380
+46900 18379
+11605 18378
+31439 18378
+27949 18377
+48337 18369
+23789 18368
+46681 18368
+21086 18368
+43191 18367
+31257 18366
+23733 18366
+39492 18364
+43680 18361
+45839 18358
+47963 18356
+34652 18353
+8206 18352
+42739 18347
+41610 18346
+43559 18346
+44295 18341
+41228 18336
+23156 18336
+43446 18336
+26857 18335
+40908 18335
+45943 18335
+6969 18330
+44719 18329
+43521 18329
+38565 18327
+49666 18324
+46093 18323
+45702 18321
+44659 18315
+40350 18313
+45628 18312
+17418 18309
+46860 18307
+30352 18307
+47512 18307
+48618 18306
+43277 18303
+17881 18301
+38588 18300
+34897 18298
+35350 18293
+31989 18291
+36823 18290
+33131 18289
+39776 18289
+50074 18289
+48602 18286
+40348 18281
+45486 18277
+48414 18276
+47488 18275
+21118 18274
+35513 18271
+45214 18270
+16536 18267
+43468 18267
+27789 18267
+43847 18264
+17425 18262
+41850 18261
+12357 18261
+36954 18258
+50029 18255
+40052 18254
+34773 18253
+14124 18252
+43283 18248
+46478 18248
+48603 18244
+45319 18244
+31255 18241
+48740 18240
+43059 18238
+31671 18236
+38451 18236
+11931 18232
+32216 18224
+47389 18218
+44172 18216
+49033 18214
+32007 18214
+22055 18214
+22592 18212
+44847 18211
+40451 18211
+48892 18209
+41051 18207
+28524 18205
+24511 18205
+30613 18203
+40505 18201
+33142 18200
+45515 18198
+48653 18198
+44375 18197
+13337 18196
+44612 18192
+32203 18188
+17407 18187
+38642 18187
+45256 18185
+48767 18185
+4667 18183
+43665 18182
+48156 18180
+30026 18180
+13161 18180
+37699 18179
+42001 18179
+50052 18177
+28089 18176
+20627 18176
+38508 18171
+9680 18171
+29080 18170
+42635 18169
+42955 18169
+35635 18164
+45157 18162
+33923 18160
+38219 18160
+40287 18158
+42996 18155
+36523 18155
+35251 18152
+8417 18152
+19872 18151
+24704 18150
+23108 18145
+43312 18144
+41963 18142
+23428 18135
+46079 18130
+31793 18124
+45891 18123
+31664 18119
+25723 18118
+17791 18117
+44494 18117
+44305 18116
+29670 18116
+31904 18115
+22851 18114
+38161 18108
+19945 18106
+31305 18105
+31715 18103
+45007 18101
+19743 18100
+17725 18095
+49051 18091
+47582 18090
+43172 18088
+34390 18085
+29550 18084
+48075 18084
+44542 18083
+15493 18083
+47570 18081
+44826 18078
+29746 18077
+32106 18074
+19241 18074
+47274 18070
+37363 18070
+38692 18067
+6012 18067
+47859 18061
+29149 18060
+34097 18059
+27935 18054
+24437 18054
+39961 18049
+48441 18044
+23611 18037
+43243 18037
+41263 18034
+38627 18032
+13072 18031
+43563 18028
+48945 18028
+32315 18026
+26486 18023
+28575 18019
+43239 18018
+35914 18016
+48165 18016
+28095 18011
+42187 18007
+16306 18006
+21015 18005
+41596 18004
+42622 18003
+42312 18003
+43668 17999
+44307 17998
+36804 17997
+47240 17993
+49348 17993
+26388 17987
+5307 17986
+38471 17985
+34167 17985
+34706 17982
+48465 17981
+44694 17977
+30867 17976
+36659 17975
+32208 17975
+32512 17973
+43887 17967
+41235 17966
+44114 17966
+45728 17963
+22467 17962
+49641 17960
+20308 17957
+42237 17956
+39134 17954
+39633 17952
+44743 17949
+38179 17947
+41841 17945
+47896 17934
+49900 17929
+49366 17926
+45523 17919
+39601 17918
+12360 17917
+49803 17910
+32449 17909
+19357 17908
+48054 17902
+31007 17899
+37045 17899
+9097 17898
+47612 17893
+14445 17893
+43098 17891
+40143 17891
+39450 17890
+33429 17888
+41601 17886
+35051 17881
+32649 17881
+49915 17880
+40663 17875
+44887 17874
+47111 17871
+31554 17869
+49875 17868
+19319 17868
+17391 17866
+37994 17865
+37836 17863
+20846 17860
+16870 17859
+27524 17858
+4742 17855
+22469 17855
+47726 17851
+41095 17849
+41732 17849
+46618 17848
+49313 17847
+46914 17846
+24802 17846
+27664 17844
+44543 17843
+39417 17837
+38213 17836
+48556 17831
+22272 17831
+34575 17827
+31761 17827
+21176 17827
+49579 17827
+37152 17825
+30541 17822
+24510 17821
+19816 17821
+22993 17820
+37074 17818
+49234 17818
+43656 17817
+41990 17817
+46440 17815
+49254 17815
+48623 17812
+44253 17811
+48090 17810
+48086 17810
+15311 17810
+37057 17809
+30675 17808
+11827 17808
+44203 17802
+22512 17799
+46888 17798
+44943 17789
+44552 17787
+26446 17786
+47965 17783
+13221 17780
+45938 17780
+11535 17780
+10702 17777
+29519 17777
+35298 17773
+17850 17772
+28906 17768
+44661 17766
+37637 17764
+29917 17764
+1764 17764
+34337 17762
+24290 17761
+22407 17760
+49437 17759
+18243 17757
+28905 17756
+45462 17756
+44532 17754
+49436 17753
+20164 17753
+29857 17748
+18819 17747
+10260 17747
+47152 17738
+46046 17738
+48444 17734
+17177 17733
+37828 17730
+24609 17724
+8760 17719
+39647 17715
+41351 17714
+37288 17714
+48947 17714
+20683 17709
+26055 17708
+45114 17707
+11049 17705
+42676 17704
+43739 17702
+31960 17702
+30233 17702
+42188 17698
+50223 17698
+35838 17697
+44897 17695
+23792 17695
+47758 17692
+12875 17691
+44259 17690
+25683 17688
+43805 17686
+37394 17686
+43046 17681
+49782 17681
+44069 17680
+48047 17675
+43490 17672
+24559 17671
+44020 17670
+23977 17670
+27454 17664
+45616 17663
+28863 17661
+41369 17661
+2463 17659
+29126 17658
+45216 17653
+12443 17647
+20630 17643
+29247 17642
+47731 17641
+43732 17638
+41705 17636
+45825 17635
+47044 17635
+40104 17635
+44169 17634
+25652 17634
+23521 17634
+41793 17633
+35379 17633
+32661 17631
+33108 17629
+44060 17628
+8645 17628
+42322 17623
+39121 17621
+33481 17619
+46371 17616
+43336 17615
+47191 17615
+39090 17610
+34128 17603
+47056 17595
+42084 17595
+40392 17594
+41141 17594
+30226 17592
+27369 17588
+43209 17587
+30329 17585
+2216 17584
+31950 17583
+40839 17583
+32870 17580
+37346 17579
+14911 17577
+35731 17576
+11030 17576
+42782 17576
+11018 17576
+40324 17575
+31336 17575
+37779 17575
+43946 17573
+49856 17572
+45738 17571
+43787 17570
+40260 17567
+34684 17566
+44934 17566
+20971 17565
+50183 17563
+46159 17563
+44673 17562
+24035 17561
+28329 17561
+38744 17560
+40201 17558
+45465 17557
+46267 17556
+39887 17553
+26331 17551
+48236 17546
+42258 17546
+10328 17546
+43605 17545
+39572 17541
+44965 17539
+45670 17534
+43027 17534
+46841 17533
+35909 17532
+30074 17531
+50238 17530
+36252 17522
+49075 17517
+45636 17515
+31310 17514
+46665 17514
+44000 17514
+46519 17510
+39570 17507
+38497 17506
+39437 17505
+38896 17504
+35911 17502
+18538 17502
+39816 17499
+46248 17497
+27020 17497
+47184 17490
+20238 17490
+17614 17490
+40301 17490
+35744 17489
+11981 17488
+43589 17485
+19899 17484
+41094 17483
+46887 17483
+35961 17481
+43625 17479
+46432 17476
+48884 17475
+27480 17474
+44121 17474
+6030 17473
+28321 17469
+43529 17469
+35211 17468
+29123 17468
+2862 17463
+26940 17461
+48555 17460
+43063 17460
+38624 17458
+42488 17457
+42964 17452
+33913 17451
+13303 17450
+18999 17447
+2847 17442
+44973 17441
+29882 17440
+25020 17436
+42131 17434
+45080 17434
+38385 17433
+37592 17423
+43240 17423
+44442 17421
+14913 17419
+37696 17416
+29511 17416
+6671 17416
+49602 17416
+26458 17415
+49703 17414
+21594 17414
+45182 17409
+48818 17406
+14606 17403
+37296 17403
+39491 17397
+41761 17396
+45964 17396
+15082 17393
+33441 17391
+45935 17391
+41376 17388
+42581 17388
+46468 17386
+27374 17386
+40647 17385
+41409 17385
+31098 17383
+41047 17382
+36479 17381
+45648 17380
+841 17379
+20745 17379
+44078 17378
+42965 17378
+17005 17376
+13462 17375
+3591 17374
+42980 17373
+49257 17370
+29078 17370
+23037 17369
+13249 17368
+32895 17367
+50165 17366
+45761 17363
+37174 17360
+25213 17357
+49303 17354
+24866 17353
+36227 17352
+39183 17351
+44153 17345
+39923 17344
+4148 17343
+48926 17343
+18431 17343
+46815 17343
+29150 17341
+43292 17341
+47370 17341
+19295 17335
+19211 17334
+46717 17331
+49211 17330
+48327 17329
+5772 17327
+43994 17322
+19285 17321
+34997 17317
+44764 17317
+24064 17316
+49356 17316
+18471 17316
+33554 17316
+43223 17314
+28780 17314
+19676 17314
+49394 17313
+36191 17312
+4566 17311
+43683 17310
+35881 17310
+42494 17306
+26908 17306
+48552 17305
+43146 17300
+41159 17291
+41700 17290
+48228 17290
+42643 17288
+16750 17287
+13287 17286
+11134 17284
+44329 17284
+41561 17283
+45949 17282
+36352 17281
+38513 17277
+24189 17277
+19768 17271
+37078 17271
+42301 17265
+34667 17256
+35945 17254
+33975 17251
+35157 17251
+47776 17251
+49114 17249
+44077 17245
+42141 17242
+9108 17242
+27187 17240
+14993 17240
+46781 17239
+35625 17238
+48959 17237
+48188 17236
+15522 17235
+24729 17232
+48964 17230
+40733 17229
+33635 17226
+47946 17224
+28425 17218
+31042 17216
+22658 17215
+49525 17213
+32178 17211
+36728 17204
+46807 17203
+28599 17201
+20020 17201
+38679 17198
+2570 17197
+40545 17194
+41455 17194
+47127 17192
+40384 17191
+42703 17191
+42563 17187
+30583 17187
+9244 17187
+26245 17184
+48952 17183
+31686 17181
+34211 17174
+44900 17174
+14817 17166
+32186 17166
+37201 17164
+45659 17161
+25435 17159
+39196 17158
+44980 17157
+38208 17155
+44417 17152
+31993 17147
+47544 17145
+48894 17144
+12220 17144
+37496 17141
+38864 17139
+44489 17133
+30010 17132
+26755 17131
+40202 17130
+27919 17130
+47555 17125
+49730 17120
+46091 17120
+20306 17116
+42978 17116
+47871 17116
+39422 17115
+24196 17109
+2110 17108
+37007 17107
+43514 17105
+44249 17104
+43228 17104
+10221 17104
+26632 17102
+38418 17102
+31401 17102
+46095 17102
+43515 17101
+44528 17101
+46953 17101
+32717 17101
+43687 17097
+50006 17096
+23509 17095
+33528 17092
+45765 17089
+11962 17088
+29544 17087
+30009 17085
+3475 17079
+18547 17077
+30516 17077
+49289 17076
+34090 17073
+31951 17072
+45559 17072
+41644 17071
+44877 17069
+31126 17069
+38972 17069
+50055 17061
+35149 17061
+37883 17060
+37460 17060
+17147 17058
+49189 17057
+44729 17055
+44171 17054
+16593 17053
+49053 17046
+31021 17044
+25551 17044
+37354 17043
+33691 17042
+43619 17041
+46112 17039
+30344 17037
+44183 17037
+33980 17033
+36170 17031
+47603 17028
+43413 17026
+46798 17025
+39061 17021
+41827 17020
+7909 17019
+47672 17015
+40108 17011
+49253 17010
+26439 17004
+48528 17003
+49645 16995
+26419 16995
+49532 16993
+44589 16990
+35745 16989
+44950 16988
+13321 16985
+21237 16981
+40262 16980
+26358 16978
+41867 16977
+42520 16972
+36038 16970
+46114 16970
+50019 16969
+47210 16968
+47036 16967
+46571 16967
+16327 16966
+46542 16965
+3471 16963
+21993 16961
+35743 16958
+48840 16956
+32803 16956
+33704 16956
+31741 16956
+36390 16952
+46877 16947
+24079 16945
+34472 16941
+29273 16940
+15252 16940
+46809 16938
+26560 16938
+5744 16937
+45886 16934
+28322 16933
+41267 16931
+20650 16928
+24428 16927
+22545 16923
+33336 16919
+32381 16917
+39106 16914
+42385 16912
+42820 16907
+44701 16906
+31740 16906
+30910 16906
+30173 16905
+38484 16904
+49459 16904
+22123 16903
+24978 16902
+32428 16902
+21628 16901
+17227 16900
+34600 16896
+20704 16895
+14474 16895
+10774 16892
+44509 16891
+45100 16888
+20204 16886
+42856 16886
+42054 16885
+47641 16884
+40277 16884
+44116 16883
+25606 16883
+38947 16878
+49700 16877
+47539 16877
+48431 16876
+41300 16875
+33202 16874
+14038 16873
+40849 16872
+48262 16871
+46803 16870
+45557 16870
+24443 16869
+47156 16868
+50119 16868
+35330 16862
+37649 16862
+48518 16859
+45467 16851
+33430 16851
+38467 16850
+43807 16849
+39528 16846
+45053 16841
+42946 16841
+47898 16838
+49735 16836
+20181 16836
+15128 16833
+27579 16833
+49768 16833
+32707 16831
+45952 16827
+45666 16824
+4797 16819
+16948 16817
+47360 16816
+40193 16815
+44383 16814
+32264 16805
+22251 16800
+50226 16799
+11076 16795
+34478 16795
+35915 16793
+40602 16791
+41784 16790
+33701 16790
+30817 16790
+46503 16789
+40926 16788
+26584 16786
+40673 16785
+45206 16784
+31644 16776
+43723 16775
+46928 16774
+46303 16773
+40575 16773
+40984 16768
+23251 16767
+45862 16766
+37165 16765
+31830 16762
+44384 16758
+46704 16757
+15356 16757
+36594 16753
+43824 16753
+42253 16751
+27365 16750
+45987 16747
+47907 16745
+38353 16742
+21275 16739
+49669 16738
+49787 16736
+20526 16736
+50242 16733
+25574 16732
+20960 16730
+39765 16728
+17117 16725
+48747 16725
+17932 16724
+47424 16723
+46862 16723
+23750 16723
+40366 16722
+46012 16720
+2212 16720
+43140 16720
+42483 16717
+19423 16717
+43952 16714
+31438 16709
+29828 16708
+41134 16708
+47994 16705
+44647 16704
+29248 16703
+50174 16697
+42600 16696
+37512 16693
+43275 16691
+42672 16689
+49500 16688
+39557 16687
+49087 16686
+31615 16682
+17041 16681
+44030 16677
+46405 16676
+37336 16676
+41563 16676
+36924 16673
+17854 16669
+42313 16668
+49822 16663
+45601 16662
+41872 16660
+18769 16659
+49541 16659
+45902 16657
+49685 16656
+46325 16653
+37797 16651
+43021 16650
+40380 16650
+44207 16647
+37689 16645
+31595 16644
+26984 16641
+32196 16637
+23545 16636
+33135 16631
+40435 16631
+23197 16627
+41103 16626
+34823 16625
+49750 16621
+41723 16620
+45372 16619
+39217 16618
+16466 16613
+42024 16607
+38567 16606
+25932 16605
+41934 16605
+44395 16602
+44741 16599
+10264 16599
+47886 16597
+21785 16597
+49481 16589
+39702 16589
+48820 16588
+39984 16586
+33730 16585
+43937 16580
+25660 16579
+47921 16574
+49784 16574
+49512 16569
+29106 16567
+15960 16565
+25595 16560
+47787 16560
+49733 16560
+45126 16558
+20921 16556
+34008 16551
+28566 16549
+27080 16549
+42053 16546
+41922 16541
+45770 16541
+43615 16539
+11603 16536
+22752 16534
+39852 16533
+45310 16533
+31185 16532
+49718 16531
+44931 16527
+2625 16525
+22288 16524
+45692 16524
+48979 16522
+45508 16522
+31624 16518
+48428 16516
+17617 16514
+30378 16509
+48109 16507
+1876 16507
+46410 16506
+20277 16505
+11180 16502
+33947 16498
+34921 16496
+38734 16494
+46761 16494
+43835 16491
+47846 16487
+975 16486
+24474 16485
+27747 16483
+36452 16482
+48409 16482
+38138 16479
+44780 16475
+44869 16474
+49682 16474
+28958 16469
+49878 16465
+30602 16464
+2819 16462
+47616 16457
+46256 16457
+48365 16455
+32478 16453
+48612 16452
+38140 16451
+44134 16448
+39933 16448
+47849 16446
+28601 16443
+43645 16442
+17883 16436
+34129 16434
+45290 16434
+42557 16432
+11337 16432
+34570 16427
+49294 16424
+950 16420
+49592 16418
+49922 16417
+27053 16415
+42691 16410
+50088 16410
+45766 16409
+48679 16409
+26026 16404
+47548 16403
+44671 16402
+19368 16398
+38827 16394
+42480 16391
+23331 16389
+31836 16388
+38257 16388
+23878 16386
+14194 16385
+46918 16382
+40503 16382
+32044 16381
+24695 16378
+46957 16375
+48098 16375
+42853 16369
+6316 16368
+41511 16367
+49287 16367
+20223 16366
+16780 16365
+37841 16364
+34007 16361
+48839 16360
+5904 16360
+44856 16358
+35648 16358
+49949 16356
+42486 16354
+24117 16354
+34675 16351
+47597 16351
+45500 16350
+37688 16350
+38556 16348
+48530 16348
+27786 16348
+40282 16344
+8382 16343
+40218 16342
+20580 16340
+47503 16338
+24366 16336
+19673 16333
+47580 16330
+29319 16327
+41466 16324
+44152 16322
+44921 16322
+9903 16321
+30617 16319
+32796 16318
+27858 16316
+31387 16316
+49936 16316
+14609 16306
+49055 16305
+33204 16305
+26537 16298
+26382 16296
+47728 16296
+27727 16293
+6137 16291
+49971 16288
+43608 16288
+43686 16287
+33320 16286
+17295 16286
+44422 16285
+16650 16284
+48173 16280
+35758 16279
+47247 16275
+45605 16271
+17976 16266
+45571 16262
+46736 16261
+49943 16260
+50123 16251
+44761 16251
+29487 16250
+42459 16249
+47346 16248
+22380 16246
+8703 16245
+48338 16243
+38806 16242
+15130 16240
+49638 16240
+38195 16237
+45925 16231
+5487 16231
+40962 16226
+19381 16225
+42787 16223
+46511 16221
+29788 16220
+46660 16219
+42416 16219
+35504 16218
+46500 16216
+44778 16214
+46192 16214
+49617 16212
+28505 16210
+49327 16209
+31752 16206
+25122 16206
+50234 16205
+8907 16205
+45506 16201
+36150 16200
+37739 16199
+50014 16198
+45877 16197
+41450 16195
+17307 16193
+22082 16192
+27383 16188
+37491 16187
+29193 16187
+12764 16186
+49307 16186
+44698 16185
+36925 16185
+21312 16184
+28751 16183
+11145 16181
+11956 16179
+41984 16178
+6629 16174
+32657 16174
+33908 16172
+35772 16172
+42060 16170
+24526 16167
+44750 16166
+47155 16165
+42572 16165
+48771 16162
+2294 16161
+38017 16157
+31371 16156
+23858 16156
+46839 16147
+35258 16144
+39077 16143
+30636 16138
+49193 16138
+27722 16138
+6534 16137
+43977 16127
+31404 16126
+37166 16126
+3410 16123
+45439 16122
+19335 16121
+32335 16118
+47192 16116
+29665 16115
+37849 16113
+49614 16108
+37577 16108
+927 16104
+30402 16100
+34661 16100
+26398 16099
+42080 16098
+45362 16097
+8236 16096
+21369 16096
+21145 16095
+36885 16092
+48960 16090
+46393 16090
+1274 16089
+44324 16087
+45701 16086
+45224 16086
+30472 16085
+25282 16084
+49717 16083
+47385 16083
+7871 16081
+45050 16080
+44229 16078
+15117 16076
+33894 16076
+11627 16073
+40019 16073
+2542 16072
+38710 16071
+49218 16071
+37059 16068
+9438 16067
+43252 16067
+46556 16067
+48372 16067
+50068 16065
+40299 16065
+48628 16062
+37540 16062
+41650 16059
+15748 16057
+17889 16055
+19806 16054
+37678 16050
+40353 16050
+6377 16043
+43795 16040
+44132 16040
+41299 16040
+41496 16034
+14203 16034
+32587 16032
+48167 16031
+48655 16030
+40521 16028
+45304 16026
+15044 16021
+35869 16015
+45127 16015
+47467 16015
+40225 16014
+41201 16012
+43199 16011
+33422 16009
+45653 16008
+43128 16008
+28361 16006
+31276 16004
+10446 16003
+21300 16000
+45116 15998
+45909 15997
+41936 15995
+45618 15995
+13461 15995
+27456 15994
+49959 15992
+46342 15991
+44125 15988
+49533 15987
+41932 15984
+32038 15983
+48223 15979
+36568 15973
+46756 15971
+49517 15971
+37925 15969
+29873 15967
+40538 15965
+28795 15963
+33175 15960
+37163 15960
+39701 15957
+20579 15954
+31852 15952
+21498 15952
+9134 15945
+46992 15941
+35684 15941
+44427 15940
+44609 15936
+47860 15935
+47374 15929
+17752 15927
+12474 15923
+35976 15921
+26252 15919
+30143 15918
+2161 15916
+43324 15914
+26314 15912
+22149 15909
+31062 15908
+43438 15908
+40382 15907
+32271 15906
+10412 15906
+47202 15895
+48548 15895
+20817 15892
+50192 15891
+6132 15888
+6791 15886
+34893 15885
+49760 15883
+48717 15882
+46018 15882
+29142 15882
+37774 15881
+25673 15877
+49199 15877
+39285 15876
+31124 15874
+26254 15871
+37640 15870
+20183 15869
+20941 15868
+38819 15868
+43758 15866
+37204 15863
+41513 15859
+17538 15856
+36303 15856
+9736 15854
+44796 15851
+48640 15849
+47567 15849
+14808 15849
+26035 15848
+28951 15836
+7302 15836
+43335 15834
+38659 15834
+27323 15830
+34414 15829
+28475 15828
+43568 15827
+49985 15827
+49659 15825
+45570 15820
+39766 15815
+8750 15815
+30576 15810
+43230 15809
+50187 15806
+25842 15806
+12596 15804
+32351 15804
+26676 15801
+44561 15798
+31152 15798
+31821 15797
+32876 15796
+40970 15796
+34320 15793
+42612 15792
+45804 15787
+46484 15785
+44254 15782
+43776 15780
+13538 15774
+31153 15772
+39182 15771
+44040 15770
+38069 15770
+32610 15769
+45573 15769
+39306 15768
+48896 15766
+36148 15764
+41416 15764
+47634 15763
+32434 15759
+26505 15758
+48691 15756
+42834 15753
+38025 15751
+38498 15750
+35217 15750
+15965 15748
+9147 15747
+32412 15747
+22685 15747
+43421 15746
+14405 15744
+47976 15741
+23192 15740
+42911 15739
+10189 15739
+45453 15739
+23857 15738
+24548 15736
+16362 15736
+40340 15735
+23847 15733
+10506 15733
+48081 15727
+30262 15727
+38770 15726
+30397 15726
+38699 15725
+37838 15725
+48554 15724
+28421 15720
+43643 15720
+15562 15720
+35191 15720
+35486 15719
+2554 15718
+37677 15718
+24476 15718
+42796 15718
+44367 15714
+11539 15713
+48129 15712
+39649 15712
+48980 15712
+16482 15710
+44084 15705
+33715 15702
+20967 15700
+46064 15699
+49373 15699
+37667 15699
+48089 15699
+37182 15698
+41327 15698
+23260 15694
+23937 15693
+29522 15690
+43550 15690
+26171 15685
+27381 15683
+43846 15676
+43585 15675
+31492 15675
+20076 15672
+37926 15672
+39848 15671
+42864 15670
+21637 15665
+9954 15663
+24979 15662
+43502 15662
+37488 15661
+24330 15661
+34572 15659
+43148 15658
+29179 15658
+37744 15657
+43498 15656
+44650 15656
+40298 15655
+31973 15655
+48540 15654
+14623 15654
+50182 15652
+41178 15651
+23029 15650
+43959 15648
+31635 15648
+30324 15646
+44095 15646
+49766 15645
+22825 15642
+49961 15639
+21565 15638
+41009 15637
+8831 15636
+35334 15632
+34083 15628
+47876 15628
+13912 15625
+37564 15623
+42027 15622
+32177 15616
+39515 15612
+41003 15609
+32163 15604
+36739 15604
+49156 15600
+34871 15600
+39346 15595
+6917 15594
+40146 15592
+49141 15591
+32614 15590
+34799 15588
+43390 15587
+48891 15587
+19326 15585
+28028 15584
+31583 15581
+49039 15579
+44774 15578
+39208 15578
+37058 15575
+8596 15570
+13274 15569
+5383 15568
+43155 15566
+38795 15566
+30454 15565
+48161 15564
+33523 15562
+40531 15560
+34736 15559
+42991 15558
+4723 15556
+48587 15555
+45801 15553
+39790 15551
+22071 15546
+40784 15543
+44235 15543
+34117 15542
+45217 15541
+28419 15541
+12358 15538
+32304 15537
+40584 15534
+45305 15532
+49423 15529
+47420 15525
+44124 15519
+41607 15518
+43639 15514
+35814 15512
+43863 15510
+13023 15510
+43884 15508
+49469 15505
+47211 15504
+43938 15504
+11943 15503
+26558 15501
+28897 15501
+36645 15495
+45752 15494
+32257 15492
+50069 15486
+4338 15486
+28000 15481
+45257 15476
+50132 15476
+40100 15476
+39817 15472
+43584 15470
+28777 15468
+20488 15468
+41636 15468
+44370 15466
+45097 15465
+2789 15465
+50095 15463
+38194 15463
+26873 15461
+28418 15460
+34931 15457
+32812 15457
+32633 15456
+19718 15456
+46486 15455
+34160 15445
+28261 15444
+49262 15440
+22856 15432
+26703 15428
+47273 15426
+27435 15426
+16063 15416
+3647 15414
+42401 15414
+47038 15413
+46856 15413
+9884 15412
+13286 15409
+48712 15407
+15667 15406
+38956 15402
+48718 15401
+48290 15400
+38279 15398
+44905 15395
+25853 15395
+37966 15389
+46874 15387
+43045 15386
+46693 15386
+46088 15385
+6114 15383
+23883 15383
+43060 15382
+45295 15382
+41949 15381
+50141 15379
+29748 15376
+28249 15372
+48019 15372
+46926 15368
+43927 15364
+18187 15363
+45219 15363
+35958 15361
+27590 15360
+35948 15359
+40400 15356
+38782 15355
+15019 15348
+45575 15345
+44629 15340
+19983 15339
+34972 15329
+15908 15329
+48970 15323
+6054 15323
+21908 15320
+42528 15318
+39807 15316
+43526 15311
+47359 15311
+22480 15309
+39444 15306
+41829 15304
+39957 15303
+41712 15299
+24045 15299
+33412 15298
+31058 15296
+37600 15294
+41572 15292
+12664 15291
+27532 15290
+32934 15287
+35578 15280
+42987 15279
+46209 15274
+45930 15273
+43637 15271
+41830 15270
+39896 15266
+46970 15263
+49594 15262
+45860 15261
+42891 15260
+45781 15259
+35241 15259
+46048 15257
+23412 15253
+26799 15251
+30068 15248
+43547 15246
+41171 15242
+38743 15241
+24422 15241
+33415 15240
+138 15238
+40373 15237
+46725 15235
+47553 15233
+46319 15233
+39748 15230
+46152 15230
+28985 15230
+30058 15226
+43926 15225
+38973 15224
+16778 15218
+28299 15217
+49823 15215
+29469 15215
+19279 15215
+25475 15210
+8311 15210
+33695 15205
+38711 15205
+38503 15202
+40743 15202
+41426 15201
+32487 15201
+42854 15199
+21341 15199
+29421 15198
+38845 15197
+42318 15194
+16688 15191
+43142 15190
+48800 15188
+6092 15186
+31927 15185
+44884 15185
+21556 15183
+48935 15182
+46162 15181
+36757 15180
+44688 15180
+32347 15178
+48307 15177
+19703 15169
+35614 15169
+44638 15162
+46940 15160
+13711 15159
+49713 15159
+49557 15156
+49757 15156
+21394 15154
+44739 15153
+34210 15152
+39950 15149
+21774 15142
+42121 15141
+18773 15141
+43872 15139
+11629 15138
+47922 15136
+10513 15128
+47789 15124
+23136 15123
+40118 15119
+30133 15118
+22599 15118
+49841 15117
+48627 15115
+47141 15109
+19532 15107
+44328 15104
+46560 15101
+27110 15097
+44890 15096
+45438 15096
+8657 15095
+50239 15094
+27751 15089
+42333 15088
+4617 15087
+44045 15086
+39505 15086
+18234 15086
+45134 15084
+30118 15081
+31776 15081
+42863 15077
+31097 15076
+42456 15075
+48597 15072
+48326 15071
+30988 15070
+48143 15070
+49764 15067
+47927 15065
+32775 15064
+46359 15063
+28248 15062
+5177 15060
+38786 15059
+35718 15059
+47920 15057
+24926 15056
+18958 15054
+39341 15052
+47529 15051
+42007 15051
+43030 15050
+24439 15048
+38607 15046
+43760 15043
+43290 15041
+9748 15039
+23737 15036
+49552 15033
+50049 15032
+23587 15032
+30653 15031
+46838 15029
+41443 15028
+44821 15028
+27363 15024
+33897 15022
+44721 15020
+29664 15020
+40616 15016
+35115 15011
+50254 15011
+21061 15011
+18375 15010
+35187 15010
+42632 15009
+49170 15009
+33594 15008
+22500 15008
+42220 15006
+49005 15002
+41587 15000
+7607 14995
+43440 14988
+43592 14986
+47752 14985
+47588 14983
+8412 14982
+41676 14980
+33033 14980
+15011 14980
+18037 14978
+21812 14975
+26589 14975
+49352 14975
+28920 14969
+37801 14967
+42003 14960
+26236 14957
+45338 14953
+33498 14952
+43817 14948
+39289 14943
+27100 14939
+43454 14939
+46607 14933
+41553 14931
+38419 14930
+42589 14928
+49954 14927
+46498 14926
+39870 14925
+24837 14924
+37449 14924
+49544 14921
+25969 14920
+46528 14919
+39780 14914
+39974 14914
+26209 14911
+27976 14910
+45210 14905
+43826 14905
+45580 14903
+17562 14894
+37977 14893
+36640 14888
+45345 14886
+42092 14885
+18826 14885
+43725 14878
+33944 14877
+30348 14877
+42349 14875
+22666 14874
+34218 14874
+5346 14873
+44237 14871
+40046 14868
+48439 14864
+22003 14861
+44802 14860
+37943 14860
+41919 14860
+45924 14859
+15002 14859
+47464 14858
+33842 14855
+38342 14853
+36235 14851
+35284 14851
+22442 14851
+45820 14851
+33136 14849
+33335 14848
+45718 14847
+38216 14843
+40726 14841
+49988 14840
+47736 14840
+49113 14839
+27 14839
+44734 14839
+25641 14833
+10669 14833
+45489 14830
+42068 14828
+47379 14826
+37162 14818
+44929 14818
+45071 14813
+40475 14806
+16683 14805
+16993 14802
+49336 14801
+29112 14796
+25222 14791
+18891 14789
+48732 14788
+47439 14783
+44037 14783
+44521 14782
+34294 14779
+15888 14778
+15091 14777
+19805 14777
+40646 14773
+37218 14772
+48328 14772
+48635 14771
+34984 14768
+49131 14766
+49800 14766
+41812 14765
+42012 14764
+34900 14764
+41255 14759
+22002 14758
+17212 14745
+25558 14743
+14158 14742
+16214 14739
+4937 14737
+32965 14736
+26323 14735
+46300 14734
+21191 14734
+26859 14727
+39162 14724
+43040 14724
+45079 14722
+11828 14719
+48232 14717
+32588 14713
+5024 14710
+45424 14708
+13947 14707
+18361 14705
+12378 14701
+40655 14698
+28205 14698
+27248 14697
+25443 14697
+16363 14697
+15642 14696
+27039 14695
+40008 14695
+36984 14693
+40576 14692
+38196 14686
+42544 14682
+29161 14680
+30709 14675
+41294 14675
+34909 14673
+48192 14664
+49021 14663
+36921 14662
+46857 14661
+48449 14660
+40411 14658
+50148 14656
+40938 14654
+39276 14653
+41543 14653
+50067 14652
+38661 14651
+34733 14647
+45596 14647
+37702 14640
+26752 14639
+34213 14638
+50228 14637
+46910 14637
+19030 14637
+46770 14636
+37284 14635
+45165 14635
+32198 14635
+19042 14631
+50091 14627
+44469 14619
+35423 14617
+47236 14612
+24465 14611
+46821 14611
+25608 14611
+18874 14608
+7701 14606
+43411 14599
+49463 14595
+42812 14590
+49649 14589
+42909 14585
+48768 14585
+47003 14585
+35384 14583
+29677 14580
+48252 14579
+43617 14579
+30128 14575
+48322 14574
+29644 14570
+17740 14568
+35626 14567
+47733 14567
+38168 14566
+10514 14564
+45574 14561
+16016 14559
+47440 14558
+40884 14555
+48775 14552
+30907 14552
+30619 14550
+4651 14549
+33532 14548
+44690 14546
+2942 14544
+31642 14542
+49092 14535
+41058 14534
+43069 14528
+16926 14527
+41347 14526
+49442 14525
+35400 14524
+49395 14523
+14810 14522
+46480 14519
+32286 14519
+27682 14518
+48810 14514
+49047 14511
+20906 14506
+17126 14505
+47176 14504
+31308 14501
+47121 14499
+42770 14497
+45576 14497
+24744 14497
+47053 14491
+48777 14488
+41508 14488
+49656 14487
+46633 14484
+48888 14483
+29823 14479
+30945 14478
+49049 14476
+47517 14474
+26745 14473
+13738 14470
+30056 14470
+32380 14469
+45426 14465
+27650 14464
+50031 14463
+38112 14461
+11221 14460
+16423 14458
+39662 14456
+33809 14454
+8008 14454
+17956 14453
+33322 14452
+44959 14449
+37898 14447
+33060 14445
+49299 14444
+48615 14441
+48751 14440
+21653 14437
+49497 14436
+8542 14430
+41811 14430
+27651 14428
+34329 14428
+44440 14426
+31746 14426
+39259 14426
+39968 14418
+36624 14417
+49421 14416
+48925 14415
+20317 14415
+46385 14414
+43001 14411
+20142 14409
+15439 14404
+25231 14402
+46876 14399
+46869 14399
+47744 14397
+41823 14393
+30975 14393
+32929 14390
+47719 14389
+41981 14389
+42406 14388
+37888 14387
+49477 14385
+49491 14385
+31849 14381
+3973 14380
+29709 14379
+26927 14379
+37535 14378
+33622 14377
+48132 14377
+48000 14375
+47032 14374
+46489 14373
+26904 14370
+45448 14366
+43448 14362
+31385 14362
+39562 14356
+19194 14355
+42298 14355
+26981 14354
+45431 14353
+44344 14353
+37912 14352
+38933 14349
+9778 14348
+46816 14346
+38532 14345
+33281 14344
+37670 14343
+48168 14340
+48072 14339
+9420 14338
+49540 14334
+27831 14331
+36614 14330
+31885 14329
+42681 14329
+45272 14327
+43496 14326
+41670 14326
+49917 14325
+40312 14322
+41467 14322
+20808 14321
+19199 14319
+24460 14317
+43781 14314
+32431 14312
+45620 14307
+46823 14305
+45473 14302
+48479 14301
+34165 14301
+40728 14300
+29562 14297
+38658 14296
+24886 14293
+32041 14286
+41339 14286
+32279 14283
+44438 14282
+34319 14282
+48942 14280
+10962 14277
+50005 14272
+9488 14271
+46457 14269
+48571 14267
+49815 14265
+39029 14264
+48546 14263
+47323 14263
+36152 14262
+24236 14261
+46558 14259
+27125 14256
+22114 14255
+35666 14253
+31870 14252
+22313 14249
+21964 14248
+31709 14248
+28339 14247
+37044 14245
+45895 14241
+21745 14239
+49359 14239
+19757 14237
+41507 14228
+12166 14226
+27271 14226
+43201 14225
+41988 14224
+28363 14223
+30781 14222
+2464 14222
+25820 14220
+30598 14218
+37056 14215
+13680 14215
+46712 14215
+26335 14214
+30561 14214
+21475 14214
+35009 14214
+30637 14205
+47381 14204
+44762 14203
+40967 14199
+37067 14199
+30708 14197
+46751 14196
+45619 14195
+44640 14194
+38512 14191
+32780 14191
+49072 14188
+41276 14184
+35728 14183
+48254 14179
+36774 14177
+21446 14175
+24331 14175
+21308 14174
+40029 14173
+46645 14173
+18789 14170
+40566 14169
+38199 14167
+42259 14165
+35166 14162
+34084 14159
+39941 14154
+45610 14150
+165 14149
+36934 14148
+29725 14146
+43679 14146
+8427 14140
+50221 14133
+38709 14131
+43464 14130
+41844 14127
+39363 14127
+12210 14126
+36439 14123
+42096 14121
+26616 14119
+11324 14118
+49274 14117
+39066 14116
+44676 14116
+12367 14114
+49538 14106
+49298 14105
+46370 14104
+20939 14099
+17957 14099
+45020 14097
+36422 14094
+24220 14093
+17429 14091
+14689 14090
+16607 14088
+6657 14080
+49605 14078
+47525 14076
+27846 14072
+38538 14071
+38917 14070
+4672 14066
+33676 14066
+45491 14065
+45937 14061
+45688 14060
+37569 14056
+27056 14054
+49892 14052
+46456 14052
+8279 14045
+34636 14045
+9560 14044
+32571 14044
+15249 14037
+39687 14037
+42190 14035
+33804 14031
+39963 14027
+39367 14026
+19986 14025
+45388 14023
+43745 14021
+26032 14019
+38033 14015
+18274 14015
+38414 14014
+46223 14014
+46670 14013
+20258 14008
+24996 14006
+44996 14004
+49937 14000
+11748 13999
+47033 13998
+46588 13997
+25832 13996
+47583 13994
+40189 13994
+25468 13993
+28934 13992
+43058 13989
+31119 13986
+39270 13984
+44540 13982
+49810 13981
+50124 13978
+42977 13977
+48887 13976
+44787 13976
+47743 13976
+38489 13972
+16827 13971
+41181 13966
+45412 13965
+32372 13964
+38474 13962
+11547 13961
+37917 13960
+36532 13959
+8933 13955
+34622 13954
+48785 13953
+25202 13953
+45579 13952
+41725 13952
+41011 13948
+23032 13947
+48270 13943
+47577 13941
+49938 13939
+31322 13937
+39141 13936
+31978 13935
+33210 13924
+13088 13919
+16624 13917
+32269 13912
+33640 13908
+36474 13906
+44007 13905
+5551 13905
+37158 13899
+37280 13898
+45323 13898
+48276 13898
+39546 13896
+25968 13896
+30076 13892
+48905 13890
+50107 13890
+45917 13890
+48208 13889
+31268 13880
+35452 13876
+43447 13876
+48013 13873
+9946 13873
+40216 13869
+43163 13869
+26052 13866
+46200 13866
+17055 13865
+45882 13854
+43954 13852
+36091 13851
+47593 13850
+46380 13849
+35192 13849
+30575 13849
+40443 13849
+28988 13843
+31200 13843
+35099 13841
+29971 13831
+50063 13830
+49923 13830
+29182 13829
+10331 13827
+31319 13827
+17244 13819
+22617 13818
+24923 13814
+49271 13814
+39629 13814
+19721 13809
+39319 13809
+10634 13808
+23459 13803
+36829 13802
+34981 13800
+45920 13800
+49916 13798
+34062 13794
+8409 13787
+24860 13787
+37160 13786
+47011 13785
+28296 13780
+10037 13778
+49903 13775
+46863 13772
+32844 13768
+27685 13767
+35920 13766
+35511 13765
+145 13758
+11797 13757
+26628 13754
+46974 13754
+8817 13753
+42817 13752
+37944 13750
+43402 13746
+24608 13743
+37894 13742
+36007 13741
+29160 13733
+43019 13728
+46224 13728
+23901 13726
+10406 13725
+42056 13723
+45328 13722
+40042 13720
+44279 13716
+26435 13715
+34045 13715
+43591 13715
+9711 13714
+32006 13714
+23633 13713
+3408 13712
+32401 13712
+38610 13709
+45945 13709
+23697 13708
+15235 13707
+16779 13706
+50004 13705
+19937 13704
+50115 13704
+11297 13701
+33361 13699
+33414 13695
+40628 13694
+45086 13692
+47197 13687
+42029 13687
+41568 13686
+16872 13686
+15404 13684
+26184 13682
+33150 13681
+38713 13674
+46003 13672
+31399 13672
+47343 13670
+35957 13666
+40036 13664
+31621 13662
+45884 13657
+45821 13657
+48351 13655
+19046 13655
+39079 13652
+32346 13651
+25188 13650
+48802 13645
+38655 13644
+23504 13643
+9026 13640
+41417 13640
+36848 13639
+26515 13635
+44984 13635
+25844 13634
+49223 13632
+43062 13632
+48340 13632
+23215 13629
+45287 13627
+19599 13624
+50018 13623
+33740 13621
+18185 13617
+35789 13616
+27378 13613
+46181 13611
+49090 13610
+8897 13610
+6585 13602
+17928 13602
+43873 13600
+25467 13600
+22748 13599
+45487 13598
+27576 13596
+18817 13596
+28075 13595
+37675 13593
+47079 13592
+37295 13591
+6526 13591
+45152 13590
+43202 13588
+26612 13584
+39529 13584
+8690 13584
+34352 13583
+8219 13581
+48440 13580
+39366 13578
+42369 13573
+41221 13572
+37369 13571
+43832 13568
+4841 13565
+22747 13562
+34844 13561
+43179 13561
+22031 13560
+45359 13560
+39754 13555
+43364 13555
+49240 13554
+49480 13552
+46716 13548
+30922 13548
+24697 13544
+49931 13543
+4408 13542
+39385 13541
+42295 13541
+22143 13540
+46299 13540
+45669 13540
+47549 13539
+2719 13538
+25716 13537
+16864 13536
+32383 13536
+43785 13534
+23636 13531
+11381 13530
+46597 13529
+41547 13527
+16881 13519
+34680 13519
+19243 13516
+26099 13515
+41452 13512
+33696 13512
+48846 13507
+41277 13501
+42093 13498
+32287 13498
+33216 13492
+39481 13491
+36797 13489
+34469 13489
+18968 13488
+44097 13482
+37441 13477
+48078 13476
+46810 13475
+8973 13475
+48207 13474
+21598 13472
+45361 13470
+38535 13470
+14171 13469
+19153 13469
+40754 13467
+36842 13463
+39494 13460
+27237 13457
+48210 13456
+23988 13456
+49818 13455
+41435 13452
+44394 13451
+27054 13450
+46963 13448
+48435 13448
+26064 13447
+22290 13446
+36537 13443
+40379 13443
+30556 13437
+28570 13437
+42244 13437
+41684 13435
+4208 13434
+4195 13431
+38824 13431
+16665 13427
+40040 13427
+28724 13425
+31106 13416
+34533 13415
+30526 13414
+16701 13413
+34088 13413
+26205 13411
+6002 13408
+45180 13408
+45855 13408
+41887 13405
+23579 13402
+20272 13400
+38729 13396
+46171 13385
+47373 13381
+46328 13377
+49928 13375
+49613 13373
+48317 13373
+28272 13371
+40226 13370
+31584 13369
+48374 13360
+49711 13358
+28093 13356
+25057 13356
+10502 13351
+6627 13351
+49020 13351
+47575 13350
+46713 13349
+25742 13349
+19965 13349
+9227 13348
+45330 13341
+36572 13341
+23784 13339
+43378 13334
+30590 13332
+20783 13329
+40601 13329
+23293 13327
+21063 13325
+18706 13324
+39819 13323
+1003 13321
+46038 13318
+49965 13312
+49203 13308
+43473 13307
+47821 13304
+43541 13303
+43129 13301
+48010 13300
+48770 13297
+27705 13295
+18649 13293
+47269 13291
+47138 13290
+42709 13287
+48445 13286
+31961 13285
+30491 13282
+44855 13281
+22282 13278
+47996 13278
+50098 13277
+31479 13275
+16719 13274
+23132 13270
+28665 13268
+48339 13266
+47811 13266
+5310 13264
+42561 13263
+48042 13262
+49345 13262
+147 13262
+30729 13259
+42460 13251
+17493 13245
+48735 13245
+47637 13240
+10004 13239
+33066 13238
+38389 13238
+45440 13236
+37934 13236
+33235 13234
+36635 13232
+43005 13231
+38064 13230
+33146 13229
+36751 13227
+25934 13226
+46826 13224
+18691 13221
+29391 13218
+12441 13215
+25628 13213
+24191 13212
+19767 13209
+7629 13208
+38144 13198
+36802 13198
+11200 13191
+43121 13190
+11497 13188
+31902 13188
+20344 13188
+45991 13185
+16228 13184
+161 13183
+12837 13182
+24951 13179
+42269 13178
+37654 13177
+40050 13177
+41265 13177
+48282 13175
+19298 13174
+40642 13170
+12638 13168
+47870 13167
+26371 13166
+36484 13163
+39684 13160
+25563 13159
+32937 13155
+35655 13154
+8986 13152
+33942 13148
+46053 13147
+15141 13147
+23898 13146
+34973 13145
+45589 13143
+41419 13141
+15553 13136
+2480 13134
+22916 13132
+48050 13132
+4076 13128
+33703 13125
+32374 13122
+32245 13120
+28835 13119
+44835 13119
+41625 13115
+45149 13114
+41464 13113
+48532 13111
+38861 13107
+43699 13103
+37328 13103
+34215 13102
+8944 13101
+23951 13101
+40177 13101
+17705 13100
+22184 13099
+48104 13096
+38437 13096
+48849 13095
+47161 13092
+47777 13092
+31544 13089
+40989 13088
+46689 13088
+33351 13087
+12541 13087
+38416 13085
+39260 13083
+46522 13080
+14360 13077
+43311 13075
+45322 13074
+29730 13067
+45387 13064
+35428 13063
+15303 13061
+24258 13060
+49998 13057
+36335 13056
+50036 13051
+29678 13051
+3631 13048
+30034 13047
+37638 13042
+48646 13042
+38233 13041
+31562 13040
+45470 13039
+48331 13036
+49569 13036
+41944 13035
+23283 13030
+24529 13028
+38280 13028
+38301 13027
+49881 13026
+49989 13024
+46772 13021
+15168 13021
+50137 13020
+36578 13020
+37471 13019
+35072 13018
+41730 13017
+45915 13017
+40779 13017
+19286 13013
+41616 13009
+47757 13007
+47238 13005
+45923 13005
+30395 13005
+42332 13004
+31315 13004
+17178 12995
+19511 12995
+28710 12993
+42512 12993
+32463 12990
+48502 12989
+10065 12986
+43211 12985
+39972 12981
+15160 12980
+23763 12973
+32822 12969
+50096 12969
+50154 12965
+2109 12963
+42531 12963
+17934 12960
+28216 12955
+44621 12954
+36456 12953
+47264 12952
+33382 12948
+19669 12944
+24223 12944
+9504 12942
+42136 12941
+47321 12941
+16680 12935
+26393 12934
+37368 12931
+24819 12928
+34193 12926
+40522 12925
+49635 12925
+44883 12922
+31249 12921
+38788 12921
+42072 12917
+43756 12917
+40039 12916
+36492 12916
+47622 12914
+34605 12909
+17804 12902
+47231 12899
+49966 12899
+38994 12898
+49812 12897
+38996 12897
+46316 12896
+35542 12894
+24159 12892
+7553 12892
+44445 12891
+31872 12888
+13615 12885
+36420 12882
+9571 12880
+41362 12879
+15384 12878
+38559 12871
+40763 12869
+38395 12867
+32845 12865
+48065 12865
+37559 12857
+29310 12856
+30754 12849
+35111 12849
+36206 12846
+40683 12842
+45946 12842
+36315 12840
+23343 12839
+36416 12839
+39371 12837
+5204 12836
+35596 12834
+31567 12830
+25867 12826
+46802 12825
+49776 12825
+44733 12822
+41022 12817
+48689 12815
+45883 12815
+13739 12808
+43306 12806
+35936 12805
+13410 12799
+45013 12792
+47839 12792
+23725 12789
+17569 12788
+43362 12779
+18771 12775
+32307 12774
+49671 12773
+32768 12769
+32397 12768
+41935 12765
+20907 12764
+35285 12764
+11064 12761
+9816 12761
+49479 12759
+20575 12757
+35487 12754
+36413 12753
+18602 12753
+49814 12752
+45203 12752
+16850 12751
+45885 12748
+34883 12743
+37971 12742
+41689 12741
+47407 12741
+33029 12725
+40951 12722
+47048 12722
+39913 12719
+31461 12717
+39485 12711
+14784 12708
+45815 12705
+37138 12704
+19246 12700
+36010 12698
+48385 12692
+27076 12691
+40533 12687
+43970 12686
+23850 12686
+45849 12683
+42538 12682
+45816 12678
+32505 12678
+49681 12677
+46482 12676
+35832 12676
+46475 12670
+11692 12669
+24477 12669
+47536 12668
+5974 12668
+42585 12660
+20654 12658
+22857 12658
+15776 12658
+33200 12658
+2410 12658
+48021 12656
+12708 12652
+13702 12652
+46817 12650
+50114 12649
+30779 12645
+32999 12644
+40731 12641
+48940 12637
+21065 12636
+23606 12633
+38394 12630
+33737 12624
+28787 12623
+22510 12620
+42147 12620
+48076 12619
+45333 12607
+35619 12606
+48619 12602
+50129 12600
+48752 12597
+6993 12596
+36967 12595
+13490 12594
+44241 12591
+23778 12588
+12102 12587
+13918 12583
+47081 12582
+28353 12581
+19758 12573
+28512 12572
+34236 12569
+42904 12568
+38950 12567
+33222 12564
+34054 12561
+35644 12559
+37122 12559
+44605 12555
+27131 12555
+34197 12550
+39049 12548
+34848 12546
+33244 12542
+25297 12541
+34719 12541
+33090 12541
+16995 12540
+19093 12538
+21886 12538
+46596 12537
+11124 12537
+36543 12536
+49268 12534
+44072 12528
+39571 12527
+15521 12526
+16838 12524
+38829 12522
+23216 12521
+34513 12520
+41270 12518
+46774 12516
+49275 12512
+20860 12511
+15690 12509
+41912 12506
+41509 12505
+8052 12505
+50194 12503
+44814 12502
+32898 12501
+13744 12501
+50056 12499
+38047 12496
+48978 12488
+37715 12487
+16722 12487
+32940 12482
+46740 12481
+20519 12481
+25206 12480
+9746 12476
+41173 12475
+40114 12474
+11213 12463
+37997 12461
+43969 12457
+41659 12454
+21488 12454
+49260 12453
+48033 12452
+47914 12452
+22872 12451
+30670 12449
+25484 12446
+38557 12444
+49343 12443
+32278 12443
+33403 12442
+17737 12442
+5397 12441
+31294 12439
+33268 12437
+43629 12434
+48508 12434
+34507 12433
+26999 12431
+23939 12430
+39758 12429
+50230 12427
+40127 12424
+48353 12423
+43794 12421
+39060 12421
+41500 12418
+29110 12418
+28050 12418
+39442 12415
+46723 12412
+31435 12411
+41030 12404
+13138 12403
+26033 12403
+45859 12400
+13838 12398
+30330 12394
+40371 12393
+19904 12392
+32828 12390
+34517 12390
+32765 12388
+37805 12382
+49360 12382
+25630 12377
+40346 12374
+47125 12368
+49732 12367
+22472 12366
+36836 12365
+21535 12365
+29257 12363
+42390 12362
+9023 12361
+41371 12359
+48919 12359
+27594 12358
+37892 12356
+47858 12354
+43669 12354
+28377 12346
+39719 12342
+49169 12341
+12667 12340
+43696 12338
+47425 12337
+44266 12337
+26462 12336
+49886 12336
+49367 12336
+11310 12334
+48059 12334
+9081 12333
+22614 12333
+42800 12325
+38011 12323
+40002 12323
+30688 12323
+39020 12322
+26144 12322
+41857 12321
+27897 12320
+35448 12317
+39889 12313
+46765 12310
+45063 12310
+12447 12310
+32500 12309
+13846 12306
+22762 12303
+12687 12301
+45383 12300
+40357 12298
+48178 12296
+31959 12296
+15969 12294
+37983 12291
+26125 12288
+44895 12287
+41571 12286
+48533 12279
+39176 12279
+44961 12276
+47087 12275
+21604 12275
+48842 12273
+24606 12268
+48401 12266
+33165 12263
+26709 12263
+35777 12263
+45394 12261
+18226 12257
+28743 12255
+36407 12252
+18299 12251
+43849 12248
+39237 12245
+36372 12244
+44029 12240
+11098 12239
+5835 12238
+38345 12236
+45880 12235
+29465 12235
+49449 12234
+41240 12233
+34024 12232
+42616 12232
+45076 12227
+45947 12227
+49153 12227
+36163 12220
+45482 12220
+17599 12218
+25458 12218
+39969 12211
+4253 12210
+49460 12208
+43205 12208
+39250 12206
+43196 12205
+43659 12204
+33100 12204
+43294 12204
+3569 12202
+41469 12202
+31373 12201
+47315 12199
+41105 12199
+38797 12193
+30547 12192
+28913 12191
+49755 12189
+44804 12188
+46433 12187
+28768 12187
+43519 12186
+6394 12186
+34653 12185
+43752 12183
+33857 12180
+29035 12180
+38857 12179
+40165 12179
+49106 12178
+48859 12177
+19968 12176
+47759 12174
+50218 12172
+48287 12169
+32004 12167
+17001 12167
+47392 12165
+45903 12153
+48224 12153
+39794 12149
+49016 12148
+28904 12147
+42019 12144
+44433 12143
+25568 12143
+38980 12141
+49152 12139
+45959 12135
+22936 12132
+44080 12124
+10020 12124
+48819 12122
+46262 12121
+5971 12113
+39294 12112
+20074 12110
+10739 12110
+4549 12107
+50053 12105
+25973 12105
+38832 12102
+28496 12102
+45904 12101
+45965 12097
+40553 12095
+31313 12095
+49526 12095
+48387 12093
+20556 12092
+47051 12092
+46986 12090
+33559 12086
+34055 12083
+32668 12073
+26018 12072
+8538 12072
+39436 12069
+43992 12068
+33793 12067
+42931 12066
+36261 12064
+24890 12063
+25097 12061
+4880 12061
+39356 12057
+48085 12057
+30431 12053
+35364 12051
+27299 12050
+32558 12050
+4567 12048
+32366 12048
+47617 12047
+32893 12046
+17360 12044
+37265 12042
+42217 12040
+48482 12035
+46771 12034
+31616 12032
+49387 12032
+49724 12031
+47110 12027
+48478 12025
+23705 12022
+33674 12021
+34049 12020
+23047 12017
+13379 12016
+28366 12014
+37516 12011
+27882 12010
+25504 12009
+49977 12008
+42500 12005
+17497 12004
+46971 12001
+26806 12000
+27815 12000
+40337 11998
+42756 11996
+31520 11995
+35751 11995
+49662 11994
+41958 11991
+35100 11991
+50057 11987
+49907 11987
+8240 11987
+47556 11985
+31789 11979
+43634 11977
+43345 11976
+48871 11975
+46920 11972
+47311 11970
+22897 11970
+7828 11969
+39715 11967
+30946 11967
+41048 11967
+15267 11964
+48481 11958
+41691 11956
+14670 11955
+34223 11955
+48251 11953
+45970 11953
+49972 11948
+48048 11948
+46987 11945
+45713 11943
+49664 11940
+25526 11938
+43249 11936
+36493 11934
+30887 11934
+43516 11927
+28169 11926
+34627 11926
+41451 11925
+31040 11924
+18438 11911
+37819 11908
+12084 11907
+30735 11903
+37310 11899
+3419 11898
+34591 11897
+48139 11896
+46024 11895
+3317 11895
+21485 11894
+35928 11894
+42277 11892
+26418 11892
+39746 11891
+27331 11890
+49054 11889
+33641 11889
+49667 11886
+50241 11882
+46205 11882
+40122 11880
+49100 11877
+48779 11876
+31943 11873
+16188 11873
+29124 11866
+44962 11862
+48391 11859
+42945 11853
+27535 11852
+44212 11851
+48758 11849
+39626 11849
+49098 11842
+41389 11836
+47771 11836
+41906 11827
+49826 11824
+44224 11821
+43947 11819
+40824 11817
+7132 11815
+41787 11814
+27738 11813
+23471 11813
+26493 11810
+39991 11808
+5206 11806
+48693 11800
+40866 11799
+43920 11797
+49321 11795
+42839 11794
+47615 11793
+48300 11792
+20568 11791
+47878 11790
+35713 11789
+21353 11788
+17597 11788
+28349 11785
+45656 11784
+33279 11783
+28409 11781
+49238 11781
+38217 11780
+43838 11780
+38673 11778
+37963 11777
+34399 11774
+17331 11774
+49740 11767
+33739 11766
+41697 11763
+6904 11758
+48203 11756
+20253 11755
+22201 11750
+44411 11749
+27490 11747
+38664 11744
+48068 11741
+32068 11740
+48668 11739
+33308 11736
+48616 11735
+47465 11732
+41842 11730
+46768 11729
+21244 11725
+41316 11724
+36137 11724
+47854 11722
+44479 11721
+46686 11721
+46575 11719
+47335 11719
+31312 11718
+46246 11714
+31030 11712
+44899 11712
+41796 11709
+42832 11709
+36687 11706
+15517 11703
+23765 11702
+24919 11700
+47998 11700
+48040 11697
+47088 11694
+48989 11690
+20274 11687
+28185 11687
+42919 11682
+41733 11682
+30203 11676
+21559 11675
+29158 11675
+47080 11675
+11250 11672
+6522 11671
+28400 11671
+22615 11668
+34656 11667
+46276 11656
+28117 11653
+11242 11646
+30025 11645
+47801 11644
+41397 11644
+35392 11644
+50122 11643
+50164 11643
+50244 11642
+35044 11642
+42686 11641
+49947 11640
+46793 11631
+27554 11630
+32580 11629
+36136 11628
+40656 11625
+45599 11624
+39806 11623
+12710 11620
+45380 11619
+11355 11619
+42393 11617
+46569 11614
+14228 11612
+44139 11604
+41502 11602
+40578 11601
+30655 11601
+20310 11595
+42552 11593
+35884 11592
+13877 11591
+49835 11590
+36807 11590
+40512 11589
+37549 11589
+46978 11587
+25034 11585
+40843 11584
+47532 11581
+11116 11577
+47103 11570
+29003 11566
+45065 11564
+44325 11564
+23187 11560
+47278 11560
+37598 11559
+45845 11558
+36631 11557
+41497 11557
+36230 11557
+42230 11556
+47520 11555
+27423 11554
+48507 11553
+47073 11553
+43429 11553
+29666 11552
+45873 11552
+27471 11549
+49205 11549
+41801 11548
+33926 11544
+12286 11544
+43821 11542
+3135 11538
+26377 11537
+37038 11535
+36716 11534
+48851 11533
+42827 11532
+45167 11531
+43750 11531
+17574 11525
+25373 11524
+27708 11524
+50211 11524
+37103 11520
+49210 11517
+44700 11517
+171 11513
+42112 11511
+31217 11511
+46404 11510
+44351 11510
+48394 11510
+25952 11510
+16211 11509
+40945 11507
+24127 11506
+32873 11506
+34398 11504
+35237 11499
+46032 11494
+26559 11490
+48032 11489
+47456 11482
+49502 11480
+25038 11480
+27923 11479
+35763 11479
+33428 11478
+17808 11477
+25896 11476
+34028 11473
+31299 11472
+25166 11471
+35204 11466
+46573 11465
+36409 11463
+49633 11460
+40805 11460
+24491 11459
+32589 11459
+36325 11451
+45793 11448
+26528 11448
+14630 11447
+45611 11443
+37947 11441
+42118 11440
+855 11436
+49405 11434
+36113 11432
+43800 11429
+47514 11425
+49103 11425
+37705 11425
+31948 11425
+18196 11424
+40976 11422
+44558 11421
+26894 11417
+40244 11416
+20789 11414
+35027 11411
+42267 11410
+49027 11408
+25877 11407
+49062 11406
+32866 11404
+32723 11401
+40969 11400
+39359 11398
+35417 11397
+44602 11396
+44933 11396
+45847 11395
+50038 11390
+27683 11389
+23525 11389
+46006 11388
+19729 11386
+24370 11386
+22460 11381
+42323 11379
+46244 11374
+16377 11373
+31013 11369
+40510 11363
+8641 11363
+29590 11360
+25908 11355
+40539 11350
+44342 11347
+25093 11346
+14244 11346
+43797 11346
+30891 11345
+46523 11342
+7345 11339
+25981 11337
+33780 11337
+31034 11337
+49962 11336
+30687 11335
+46483 11334
+32604 11329
+47754 11329
+36501 11328
+48932 11325
+17874 11324
+27754 11322
+17829 11319
+44454 11319
+41821 11317
+34388 11317
+33103 11317
+49534 11316
+18092 11315
+28406 11315
+45710 11311
+50120 11311
+8294 11309
+44982 11306
+49689 11305
+45569 11300
+43506 11298
+19331 11297
+20361 11297
+23900 11294
+45069 11292
+47700 11288
+29245 11284
+29625 11282
+25956 11280
+25196 11279
+10313 11275
+34788 11275
+48907 11271
+42039 11268
+47942 11267
+49773 11266
+47933 11265
+43143 11264
+33257 11259
+23517 11258
+42848 11256
+45686 11254
+47710 11252
+42469 11252
+26921 11250
+45057 11250
+44059 11249
+43917 11247
+44990 11247
+46063 11243
+25552 11240
+32236 11239
+47621 11238
+37405 11237
+50253 11233
+45637 11233
+13170 11233
+15988 11232
+31374 11231
+32503 11230
+38263 11228
+27865 11228
+39504 11224
+42044 11223
+31560 11221
+9129 11216
+34163 11214
+41311 11213
+35539 11213
+38295 11213
+41036 11211
+17903 11208
+50138 11202
+37039 11197
+39461 11196
+14011 11191
+46747 11190
+49585 11189
+22422 11187
+10738 11186
+36216 11185
+35543 11183
+17257 11182
+36287 11182
+21069 11181
+41142 11181
+39396 11177
+46375 11176
+24542 11173
+40204 11173
+35694 11173
+30308 11171
+41603 11170
+37837 11166
+11199 11165
+34978 11165
+45623 11165
+45641 11164
+49371 11164
+18827 11160
+8385 11159
+18001 11159
+44315 11158
+43399 11157
+47954 11157
+41877 11154
+38100 11143
+6410 11140
+45536 11139
+27530 11137
+34368 11131
+19739 11128
+33801 11122
+43535 11121
+36639 11118
+11958 11117
+40605 11110
+40916 11108
+49332 11107
+31317 11100
+31366 11092
+12836 11089
+23872 11088
+39522 11086
+12504 11085
+32157 11084
+49166 11080
+46667 11077
+8184 11072
+14467 11066
+8938 11063
+28054 11063
+16375 11061
+18312 11060
+39010 11060
+30411 11053
+39936 11052
+38470 11052
+34714 11051
+40865 11050
+46830 11048
+34878 11047
+49948 11047
+21359 11047
+30366 11044
+31808 11044
+32854 11044
+32124 11043
+29177 11043
+34971 11042
+42523 11032
+37232 11025
+41899 11024
+29425 11023
+46403 11016
+28015 11014
+47235 11013
+32296 11010
+11309 11010
+33999 11007
+35263 11004
+41163 11003
+42400 11002
+35836 10999
+19714 10998
+30196 10996
+38592 10994
+44248 10994
+2268 10990
+30417 10990
+2359 10988
+45052 10986
+47114 10985
+34729 10982
+46540 10982
+24879 10980
+46576 10979
+20364 10979
+16910 10975
+33288 10966
+27122 10966
+2325 10965
+46015 10963
+20475 10960
+39084 10959
+49365 10958
+41692 10948
+40734 10947
+4748 10946
+35753 10942
+36528 10941
+27744 10941
+13618 10939
+38778 10934
+26932 10934
+47123 10933
+46354 10931
+42751 10929
+33219 10928
+29231 10926
+44023 10925
+46161 10918
+29427 10917
+45117 10917
+13554 10912
+36350 10910
+48889 10910
+36225 10909
+5782 10909
+50121 10907
+31079 10906
+31587 10904
+45307 10903
+29490 10901
+37275 10899
+10237 10892
+42944 10889
+7242 10887
+43595 10886
+41102 10886
+48804 10885
+45771 10885
+31657 10884
+36164 10882
+33225 10882
+23177 10875
+42641 10874
+47371 10873
+36075 10870
+28427 10867
+45104 10863
+46687 10862
+19377 10860
+14229 10860
+23635 10860
+32422 10858
+38104 10858
+34768 10857
+16401 10857
+27301 10856
+46259 10855
+43460 10854
+31529 10853
+15270 10850
+23295 10849
+46931 10847
+29084 10844
+46399 10842
+44401 10840
+25559 10837
+21398 10836
+43649 10835
+48017 10833
+13845 10832
+42126 10830
+41646 10824
+36020 10824
+36055 10822
+4154 10819
+25958 10814
+49017 10813
+29375 10812
+24849 10811
+12117 10811
+27204 10810
+25609 10810
+45021 10809
+44101 10803
+42963 10800
+17035 10799
+32499 10798
+32087 10798
+34189 10797
+135 10793
+41384 10793
+15057 10789
+39426 10789
+22164 10787
+33738 10784
+46490 10778
+41920 10776
+30075 10775
+47368 10775
+47126 10774
+45591 10766
+31458 10763
+18464 10763
+28189 10761
+44713 10761
+36873 10759
+46425 10758
+45745 10755
+38432 10754
+23861 10752
+25233 10750
+39428 10749
+39932 10749
+47831 10748
+22774 10743
+26438 10742
+46153 10741
+27741 10736
+14758 10733
+6677 10731
+16474 10728
+43041 10728
+38517 10727
+43632 10726
+23296 10724
+37297 10724
+47707 10719
+9259 10715
+36835 10714
+17959 10708
+7377 10708
+27295 10707
+25825 10707
+46666 10704
+29812 10702
+48239 10698
+46145 10697
+47293 10695
+35422 10695
+18569 10689
+4644 10688
+20380 10687
+12563 10684
+28463 10683
+32896 10680
+41408 10679
+39892 10679
+34745 10678
+41825 10676
+50002 10672
+44412 10668
+31500 10666
+32053 10665
+24188 10665
+41766 10664
+46453 10663
+37032 10661
+46045 10660
+32977 10660
+28434 10658
+38023 10655
+43076 10652
+23882 10650
+41245 10650
+34145 10649
+42536 10642
+23479 10641
+20761 10641
+24266 10640
+17868 10640
+38993 10639
+27354 10638
+48432 10636
+41713 10636
+13687 10635
+36342 10634
+35317 10626
+34421 10625
+16710 10624
+41331 10623
+21469 10619
+34021 10619
+44484 10617
+45416 10616
+43137 10615
+33205 10613
+48594 10612
+49143 10610
+20362 10610
+49610 10602
+30207 10601
+41355 10599
+18962 10598
+36423 10596
+33095 10596
+26176 10593
+38363 10592
+34811 10591
+33455 10584
+1250 10583
+49958 10580
+45357 10579
+28535 10574
+38969 10572
+39353 10569
+29990 10568
+30163 10565
+46835 10564
+25258 10563
+36381 10562
+19148 10558
+32936 10557
+42104 10546
+48821 10544
+13492 10543
+49107 10541
+40292 10537
+15769 10536
+43327 10535
+46720 10532
+41361 10531
+43522 10530
+42303 10528
+42559 10527
+21136 10526
+49519 10523
+42306 10517
+33727 10515
+40178 10513
+46648 10506
+39948 10506
+11631 10506
+42808 10505
+38910 10502
+40961 10502
+30116 10501
+33581 10497
+40686 10497
+28834 10496
+49993 10495
+38678 10493
+48235 10488
+25617 10486
+30823 10477
+27932 10477
+42233 10470
+13725 10469
+42829 10468
+47504 10466
+13168 10459
+29434 10458
+29824 10457
+31519 10457
+34239 10453
+46812 10449
+45776 10441
+5632 10440
+18331 10439
+16025 10436
+47394 10436
+41831 10435
+43382 10435
+47784 10432
+39499 10431
+16317 10427
+11507 10417
+34957 10416
+28586 10412
+47659 10411
+22684 10404
+38670 10404
+49821 10394
+40937 10392
+48992 10391
+20620 10389
+25561 10382
+6636 10380
+27071 10377
+21399 10377
+32026 10375
+13811 10374
+25599 10374
+11008 10373
+11209 10373
+32574 10369
+47281 10368
+40872 10366
+41986 10363
+17602 10360
+34853 10358
+35653 10357
+47310 10356
+34734 10344
+18980 10343
+31226 10340
+43215 10339
+44944 10339
+41133 10337
+12355 10336
+42902 10336
+23920 10334
+35796 10329
+37311 10325
+44876 10325
+31771 10323
+18766 10320
+17398 10320
+4613 10320
+30815 10319
+26480 10317
+3302 10313
+44356 10312
+9120 10312
+25008 10312
+47800 10311
+46865 10310
+35478 10306
+48169 10298
+25198 10293
+8813 10291
+24541 10286
+25015 10286
+30228 10285
+37908 10284
+48601 10284
+15858 10282
+20452 10281
+7984 10281
+14793 10280
+33767 10277
+41910 10276
+41635 10276
+47179 10274
+16672 10271
+34542 10270
+20166 10266
+38449 10265
+46437 10262
+29836 10260
+29127 10258
+29011 10257
+50094 10255
+41319 10254
+44403 10253
+18888 10250
+36827 10245
+27438 10243
+35811 10240
+20692 10240
+32577 10239
+43358 10235
+38357 10234
+29967 10234
+30532 10233
+48915 10232
+29279 10232
+12099 10231
+38490 10230
+47790 10230
+23650 10228
+40148 10227
+32506 10225
+9125 10219
+43372 10215
+25361 10214
+45268 10210
+32011 10207
+26568 10207
+21297 10203
+42697 10201
+23896 10201
+23813 10198
+39284 10196
+29389 10196
+2729 10196
+43173 10195
+44988 10193
+34651 10188
+9140 10186
+43823 10185
+37867 10182
+43031 10181
+35762 10180
+38785 10177
+42302 10177
+41735 10176
+21708 10173
+19653 10169
+30078 10168
+46543 10167
+46364 10164
+26059 10163
+31262 10163
+44420 10163
+31653 10161
+26489 10159
+48676 10158
+43401 10154
+5578 10151
+37555 10150
+32750 10146
+20696 10142
+37442 10139
+16663 10139
+39884 10139
+40630 10139
+17183 10139
+40119 10138
+28673 10137
+32864 10135
+46445 10134
+49765 10131
+48436 10130
+18557 10129
+22903 10129
+42577 10128
+22708 10127
+33450 10126
+2335 10124
+46334 10122
+36120 10118
+44710 10112
+42219 10112
+39040 10108
+47635 10107
+40783 10103
+35048 10101
+47167 10099
+38955 10099
+19448 10098
+44894 10092
+27488 10091
+33243 10090
+42981 10089
+16848 10088
+39648 10087
+43012 10086
+47262 10086
+21130 10086
+46204 10085
+2639 10084
+45725 10082
+38037 10081
+23712 10080
+34555 10080
+49004 10077
+34175 10075
+23406 10074
+26453 10072
+13567 10072
+49710 10071
+16321 10071
+46934 10067
+36400 10065
+14938 10057
+37786 10056
+33747 10056
+35541 10054
+41510 10052
+47375 10051
+26164 10051
+26259 10049
+27160 10047
+49678 10047
+35203 10045
+45638 10045
+168 10045
+40880 10044
+32986 10034
+32498 10029
+32273 10026
+24052 10025
+50130 10022
+36081 10022
+41880 10020
+48002 10017
+34946 10016
+21970 10012
+8626 10011
+31795 10010
+44187 10010
+48816 10008
+18374 10008
+23828 10007
+48174 10007
+32305 10000
+46178 9992
+49738 9991
+22294 9986
+34362 9978
+31494 9968
+45278 9968
+43530 9966
+136 9966
+38166 9965
+45778 9963
+22227 9962
+50022 9958
+38284 9955
+36652 9953
+39698 9952
+50039 9948
+17904 9947
+46654 9946
+19309 9946
+26952 9939
+38803 9938
+49229 9935
+46148 9934
+8951 9934
+29964 9931
+45018 9929
+35114 9929
+45348 9928
+33526 9925
+44280 9924
+48146 9923
+27467 9915
+44362 9915
+26449 9913
+30579 9913
+24937 9906
+37023 9905
+49450 9900
+46372 9899
+31694 9898
+19081 9895
+40006 9891
+36268 9888
+40082 9885
+42109 9881
+18503 9880
+47636 9878
+48220 9877
+34657 9876
+18491 9874
+18354 9871
+44696 9869
+18324 9868
+31628 9867
+50152 9865
+44347 9865
+38266 9864
+39169 9863
+42985 9861
+22920 9861
+48672 9857
+20778 9853
+28995 9853
+11253 9847
+11208 9846
+48708 9838
+41312 9835
+9522 9833
+24614 9830
+38247 9829
+45740 9829
+31281 9826
+40994 9823
+11893 9823
+38924 9822
+45444 9821
+22399 9821
+43061 9821
+16419 9821
+48510 9820
+25586 9817
+38420 9816
+35416 9816
+46805 9815
+41192 9814
+17447 9808
+48832 9807
+14989 9802
+19628 9802
+32447 9801
+40555 9797
+48605 9795
+14876 9794
+41474 9793
+34588 9791
+49429 9791
+49091 9790
+23124 9790
+4557 9789
+23478 9788
+23274 9785
+28008 9784
+44616 9782
+30358 9779
+49675 9776
+48038 9775
+31646 9775
+33369 9773
+5484 9770
+44287 9768
+3467 9765
+35812 9764
+41952 9761
+38516 9756
+40491 9753
+30763 9751
+25843 9751
+49677 9748
+50044 9746
+42698 9745
+4115 9745
+43601 9743
+32836 9739
+42282 9739
+39213 9738
+34960 9737
+26132 9735
+19598 9734
+30470 9732
+31482 9730
+151 9730
+41897 9727
+42593 9722
+35787 9716
+48551 9715
+50111 9708
+35312 9708
+16496 9705
+35226 9705
+48035 9703
+27092 9702
+28026 9701
+49060 9700
+24108 9700
+26488 9694
+32336 9693
+37741 9690
+37534 9687
+42860 9683
+48899 9682
+39729 9680
+7436 9676
+24874 9675
+49576 9675
+24794 9670
+44519 9670
+49204 9669
+11838 9669
+25105 9669
+42595 9665
+38514 9665
+20134 9664
+47644 9661
+27236 9660
+27016 9659
+33113 9659
+49415 9656
+49705 9654
+25689 9654
+23954 9652
+43467 9650
+47698 9647
+19124 9647
+31197 9645
+24762 9645
+27104 9637
+43263 9635
+44906 9635
+49284 9634
+16174 9628
+34348 9628
+46973 9627
+31324 9625
+20628 9622
+24251 9621
+47376 9619
+10132 9619
+35850 9611
+3531 9609
+32976 9606
+24593 9606
+49283 9604
+41866 9604
+32301 9603
+34100 9603
+37450 9596
+49548 9592
+33392 9591
+22738 9590
+26813 9590
+41682 9588
+38004 9587
+49136 9585
+34067 9585
+19352 9584
+18224 9584
+21180 9583
+31303 9583
+4211 9581
+28798 9580
+45066 9580
+37608 9578
+33460 9578
+32582 9577
+43698 9577
+41485 9572
+1651 9569
+11388 9569
+42448 9559
+39432 9557
+13731 9557
+44378 9555
+35827 9555
+20035 9546
+50061 9542
+5553 9541
+33823 9536
+27954 9533
+45973 9533
+34544 9532
+49616 9532
+46449 9531
+5862 9528
+24445 9526
+31337 9524
+42742 9524
+33959 9520
+46906 9519
+45697 9518
+39876 9515
+43352 9515
+650 9513
+24735 9513
+48662 9510
+43304 9507
+31361 9507
+42138 9505
+46149 9504
+21483 9504
+24858 9499
+43620 9494
+22439 9493
+42579 9492
+19389 9491
+43830 9490
+30073 9489
+43909 9489
+44242 9481
+8238 9481
+27812 9480
+34073 9465
+33049 9460
+46170 9458
+41942 9457
+32983 9456
+24427 9454
+28987 9453
+3295 9452
+48141 9451
+10258 9450
+20878 9448
+16154 9438
+31673 9431
+43767 9430
+42667 9429
+24371 9429
+27691 9426
+49584 9425
+14929 9419
+43111 9417
+30515 9417
+47526 9415
+32248 9411
+7687 9407
+8256 9407
+32778 9406
+47986 9395
+15764 9391
+13976 9391
+37447 9383
+32071 9378
+44393 9376
+18880 9371
+19626 9370
+38072 9364
+13955 9361
+47017 9359
+29845 9358
+2653 9355
+49328 9353
+8446 9351
+36969 9349
+41576 9346
+39639 9345
+18762 9343
+49651 9339
+33614 9336
+9789 9327
+42034 9326
+27981 9326
+40454 9321
+26185 9321
+39433 9321
+28600 9321
+44902 9321
+48703 9319
+49269 9316
+18000 9316
+43505 9316
+18378 9308
+43254 9307
+48011 9307
+30177 9306
+48490 9301
+49215 9301
+42591 9299
+42830 9297
+37541 9296
+35732 9295
+9270 9295
+49022 9292
+22344 9289
+34870 9286
+48175 9285
+35567 9284
+31923 9284
+2015 9283
+36650 9282
+49043 9281
+45708 9279
+17294 9278
+12775 9278
+39587 9278
+24781 9277
+36482 9276
+38501 9276
+44858 9274
+38382 9272
+27387 9267
+33312 9267
+50059 9265
+25007 9265
+37974 9264
+50184 9261
+41176 9260
+48795 9258
+34713 9258
+44746 9256
+45836 9255
+50104 9246
+39581 9245
+11579 9243
+46031 9240
+49793 9240
+30611 9232
+36992 9230
+47060 9225
+25031 9224
+34508 9222
+45595 9218
+48237 9216
+33003 9215
+49160 9213
+45896 9211
+48137 9209
+50037 9208
+43476 9207
+35624 9206
+37497 9204
+39297 9202
+9540 9201
+33873 9198
+49171 9197
+39424 9196
+41156 9191
+20520 9190
+23484 9187
+47502 9187
+29071 9181
+41501 9180
+14079 9179
+45742 9177
+39022 9177
+4926 9172
+31699 9171
+10842 9170
+42821 9169
+40803 9169
+46998 9167
+8669 9167
+27493 9166
+38720 9165
+44079 9164
+16745 9161
+47650 9156
+37781 9153
+27302 9150
+46130 9149
+15167 9143
+49955 9141
+43755 9140
+47605 9138
+47786 9138
+28104 9138
+34257 9136
+37037 9134
+16128 9134
+48780 9133
+48865 9130
+38136 9124
+32988 9120
+45461 9110
+44839 9108
+43594 9107
+48936 9105
+43681 9102
+41937 9096
+47845 9094
+48565 9092
+26513 9086
+9163 9084
+46363 9083
+38694 9081
+47415 9080
+29859 9080
+28432 9080
+14626 9077
+31539 9076
+49361 9072
+37219 9071
+48382 9069
+48713 9067
+24620 9065
+22811 9061
+42930 9061
+43135 9060
+38755 9060
+44975 9058
+44357 9057
+45597 9057
+19302 9057
+39104 9055
+47808 9053
+44788 9050
+29196 9049
+41054 9043
+39445 9042
+37950 9039
+32255 9038
+33954 9036
+44164 9034
+30961 9031
+49333 9031
+45266 9031
+44597 9030
+42532 9027
+23150 9026
+48407 9026
+26805 9026
+32809 9023
+1260 9023
+43301 9022
+48671 9020
+34043 9019
+40998 9017
+30104 9015
+28489 9011
+48163 9005
+49084 9003
+24864 8996
+47250 8993
+40918 8992
+40213 8991
+40342 8989
+31962 8988
+50246 8988
+49539 8986
+33510 8984
+28832 8983
+37591 8983
+50201 8982
+26483 8975
+30951 8972
+31571 8972
+34879 8967
+2020 8966
+10002 8964
+35299 8963
+23031 8960
+10124 8957
+12982 8952
+40061 8951
+32021 8949
+40220 8948
+46261 8948
+44909 8946
+49863 8943
+36702 8942
+32233 8940
+14632 8939
+40921 8939
+33395 8934
+7339 8929
+49545 8927
+39627 8926
+29233 8924
+40729 8923
+31131 8919
+30294 8918
+40436 8917
+28741 8916
+38804 8915
+24567 8913
+30978 8907
+35477 8903
+14694 8902
+30369 8900
+45390 8897
+32515 8895
+21975 8892
+27755 8892
+44687 8891
+41170 8887
+41757 8884
+33278 8883
+37130 8881
+27084 8880
+42567 8877
+50255 8875
+18414 8874
+37875 8874
+38825 8873
+35748 8871
+47035 8868
+26080 8862
+14571 8856
+39031 8848
+30053 8847
+48515 8846
+50171 8845
+37561 8841
+46870 8841
+45267 8841
+37326 8839
+24915 8836
+19929 8832
+21410 8831
+50003 8825
+39050 8825
+31223 8824
+44702 8824
+41374 8822
+37587 8821
+22317 8820
+35411 8817
+24565 8816
+21587 8815
+36965 8813
+49034 8811
+42678 8807
+36609 8804
+48826 8802
+35194 8802
+42033 8801
+33563 8796
+23230 8795
+27090 8794
+47999 8793
+47724 8792
+31484 8791
+49161 8787
+44623 8786
+48599 8783
+30309 8782
+33889 8780
+27077 8778
+43343 8775
+30459 8775
+39583 8773
+4109 8772
+35349 8767
+45410 8766
+29530 8765
+29706 8762
+29568 8760
+44751 8759
+43604 8759
+49065 8759
+30333 8758
+17772 8757
+39657 8756
+35224 8754
+31426 8752
+35450 8752
+22731 8750
+23919 8746
+42597 8744
+38693 8742
+48725 8740
+45893 8737
+32971 8735
+25780 8733
+38949 8732
+46071 8727
+29970 8725
+6418 8719
+7887 8719
+48190 8719
+17312 8717
+10806 8715
+49891 8715
+48951 8715
+17994 8714
+7566 8712
+30178 8712
+49693 8710
+39596 8710
+20652 8707
+47344 8701
+1019 8701
+35636 8699
+15860 8698
+25145 8694
+10555 8692
+2924 8692
+19864 8690
+33491 8690
+48879 8690
+26662 8685
+41152 8683
+16335 8681
+48410 8681
+14373 8679
+48126 8675
+43093 8673
+50110 8672
+33493 8666
+49797 8665
+43940 8665
+8651 8657
+48476 8655
+29238 8652
+28348 8649
+44556 8645
+7682 8639
+40254 8638
+38844 8634
+29609 8634
+37092 8629
+39665 8625
+42781 8624
+26743 8621
+31744 8620
+49715 8619
+29363 8618
+46655 8612
+19120 8612
+6262 8611
+45634 8611
+18974 8611
+16763 8611
+32348 8610
+34171 8608
+16130 8607
+48400 8605
+39401 8603
+28809 8601
+49658 8599
+13525 8596
+10376 8593
+26385 8592
+39075 8592
+41686 8591
+47185 8590
+50252 8589
+40963 8587
+40725 8585
+34381 8584
+46068 8584
+17088 8582
+49133 8582
+13366 8580
+35749 8579
+38582 8578
+40552 8578
+46211 8577
+28128 8577
+21700 8576
+21448 8575
+30866 8574
+40217 8573
+40607 8571
+21746 8568
+17394 8563
+35316 8559
+44529 8557
+6022 8553
+6963 8553
+26169 8553
+32943 8551
+38480 8549
+41652 8548
+37746 8547
+41876 8547
+49543 8543
+41023 8542
+40205 8540
+28495 8537
+35116 8537
+30513 8536
+47806 8536
+35898 8534
+12257 8533
+48877 8532
+43877 8529
+36663 8528
+47497 8527
+37366 8526
+33360 8524
+25215 8520
+37877 8520
+30635 8517
+35986 8511
+29790 8510
+38984 8508
+33346 8506
+31803 8505
+39307 8505
+30039 8502
+17971 8501
+16447 8500
+34363 8499
+30634 8498
+14210 8496
+40166 8494
+20543 8492
+37049 8490
+22222 8489
+1331 8483
+36414 8483
+47511 8483
+48897 8481
+37724 8477
+26177 8476
+47862 8475
+20500 8474
+26627 8472
+47072 8467
+44157 8467
+46059 8466
+50097 8464
+4218 8463
+40746 8463
+29321 8463
+27161 8461
+25387 8458
+47178 8457
+46650 8454
+49785 8448
+43180 8447
+41814 8444
+39274 8443
+39464 8440
+36790 8439
+40172 8438
+38093 8438
+46707 8437
+46555 8435
+29908 8430
+44901 8427
+34500 8423
+44939 8421
+14875 8419
+17452 8413
+35994 8412
+19991 8409
+49221 8409
+49261 8409
+23248 8408
+49876 8407
+42246 8405
+48495 8398
+24146 8397
+28636 8396
+50245 8395
+42372 8393
+31092 8393
+28929 8391
+38958 8386
+28501 8386
+48333 8386
+48923 8385
+41762 8384
+12561 8380
+39051 8380
+34094 8379
+39234 8378
+25676 8376
+44863 8376
+39605 8373
+35566 8372
+18800 8370
+21373 8370
+44873 8370
+29694 8366
+46304 8366
+28642 8363
+38108 8362
+35179 8360
+38401 8359
+39782 8357
+32352 8356
+35886 8354
+18924 8354
+32491 8353
+45979 8351
+43164 8349
+43430 8348
+46915 8346
+39014 8342
+32194 8337
+45258 8333
+35125 8331
+38983 8331
+33194 8330
+37372 8329
+13511 8328
+23377 8325
+31330 8324
+29034 8323
+19793 8321
+34836 8321
+44482 8319
+39218 8310
+38334 8305
+40341 8304
+40239 8300
+27005 8298
+32061 8297
+35313 8296
+27442 8293
+38800 8291
+21564 8291
+35610 8290
+47345 8290
+35252 8287
+47245 8283
+23227 8279
+45464 8278
+34075 8275
+34306 8273
+9237 8272
+40828 8268
+41518 8267
+12190 8261
+33141 8259
+30414 8259
+43219 8259
+46537 8256
+41771 8253
+34777 8249
+39400 8247
+39335 8247
+44669 8246
+45561 8242
+31652 8239
+35900 8238
+40482 8234
+46214 8229
+37181 8227
+34648 8227
+41743 8226
+3358 8225
+33240 8223
+37231 8222
+40952 8221
+17050 8221
+44308 8220
+33722 8217
+13250 8216
+42731 8216
+37253 8215
+34220 8213
+25442 8212
+39228 8208
+3485 8202
+35580 8202
+26469 8202
+19261 8201
+35215 8201
+45236 8200
+41694 8198
+42663 8193
+28196 8190
+19668 8187
+27726 8186
+33313 8184
+13743 8183
+40484 8183
+25817 8179
+42344 8170
+26751 8167
+46104 8163
+21893 8163
+28316 8162
+10621 8158
+46513 8156
+16553 8156
+28859 8156
+44258 8155
+45796 8150
+34367 8149
+14706 8145
+7581 8140
+34913 8140
+46207 8139
+30725 8139
+47217 8136
+32821 8135
+6111 8134
+23119 8133
+639 8131
+39914 8128
+23893 8127
+31979 8123
+2076 8121
+39057 8118
+44837 8116
+47685 8111
+42861 8111
+46135 8109
+40407 8107
+36156 8102
+49031 8099
+49044 8095
+4824 8092
+49316 8091
+43710 8088
+38897 8087
+42383 8085
+2459 8082
+16271 8082
+32125 8082
+22838 8078
+33891 8076
+39227 8075
+21188 8075
+42229 8075
+37659 8075
+41420 8074
+42181 8072
+41873 8070
+19749 8070
+44783 8068
+33497 8068
+30476 8068
+3766 8068
+45270 8067
+42950 8065
+48504 8061
+20722 8060
+41798 8059
+25078 8058
+49648 8058
+26841 8051
+45192 8051
+44606 8051
+40708 8051
+17327 8049
+6722 8048
+47477 8045
+44449 8044
+48087 8042
+29631 8041
+29701 8040
+43546 8039
+37596 8035
+42842 8035
+48121 8035
+28398 8034
+28473 8034
+22519 8033
+35498 8032
+39296 8031
+25811 8029
+40612 8027
+43982 8027
+35479 8027
+29938 8026
+35565 8024
+24021 8022
+44380 8021
+15400 8011
+12977 8010
+23946 8007
+45875 8007
+38544 8003
+35943 8003
+39241 8002
+23620 7999
+38447 7998
+11494 7998
+33615 7996
+36950 7990
+22882 7987
+25601 7986
+34983 7986
+30313 7984
+26362 7983
+19053 7982
+20988 7974
+47723 7974
+47306 7973
+25120 7970
+25668 7969
+46284 7968
+16207 7963
+36211 7959
+27866 7957
+18267 7950
+40550 7947
+35612 7946
+24966 7941
+46084 7939
+35888 7938
+36018 7936
+43436 7935
+16411 7932
+8614 7928
+41620 7925
+38676 7923
+13352 7919
+40374 7915
+19119 7915
+33711 7911
+49413 7910
+25938 7910
+35013 7909
+32594 7909
+42338 7905
+24708 7904
+30108 7902
+31141 7893
+44601 7891
+35937 7886
+48589 7880
+18036 7877
+40079 7877
+46014 7875
+48620 7874
+43689 7874
+46680 7867
+43618 7866
+40483 7866
+31762 7864
+35070 7855
+46203 7850
+44800 7846
+18445 7844
+16124 7841
+47298 7840
+34389 7839
+41032 7838
+20147 7836
+39170 7830
+26437 7828
+48699 7828
+36837 7824
+25513 7823
+38882 7821
+20287 7816
+28480 7814
+15512 7813
+45772 7811
+49864 7811
+47409 7805
+43730 7804
+49578 7802
+47889 7792
+38587 7792
+14763 7790
+49158 7789
+32640 7788
+48890 7773
+38978 7769
+37410 7766
+1606 7765
+38638 7763
+8934 7763
+45532 7762
+32901 7760
+49833 7760
+32981 7759
+24598 7755
+33567 7753
+43655 7745
+39301 7743
+45154 7740
+14575 7736
+26654 7736
+34867 7735
+45081 7734
+44791 7734
+29566 7734
+49467 7732
+29261 7727
+29531 7725
+44567 7725
+34447 7725
+29271 7724
+27732 7723
+46842 7720
+35991 7718
+26072 7715
+32155 7713
+21833 7710
+30027 7709
+29821 7702
+38453 7702
+25537 7701
+38293 7700
+17633 7699
+44196 7698
+43397 7698
+90 7697
+42452 7694
+18718 7694
+476 7692
+50250 7690
+32458 7690
+27245 7689
+49302 7685
+36990 7675
+33377 7672
+35929 7670
+49493 7667
+48844 7664
+38454 7661
+33765 7661
+21121 7659
+33952 7659
+48288 7654
+32888 7654
+44061 7653
+42942 7644
+48762 7640
+44503 7640
+41490 7640
+27261 7638
+24200 7636
+37025 7629
+30608 7627
+46616 7625
+31851 7622
+36899 7622
+25211 7621
+35319 7621
+24640 7619
+27273 7616
+37767 7616
+26201 7613
+36077 7613
+43642 7610
+45681 7608
+29888 7604
+40767 7603
+35574 7602
+34695 7601
+15263 7601
+49930 7600
+17373 7599
+41127 7594
+32875 7594
+37957 7593
+48027 7589
+33574 7585
+41628 7583
+19959 7582
+19256 7580
+25039 7576
+11730 7576
+167 7576
+48494 7574
+30394 7567
+2532 7564
+33249 7562
+37548 7561
+37189 7558
+36868 7554
+38288 7553
+34866 7551
+48836 7547
+37362 7542
+50047 7542
+25831 7540
+49632 7540
+47663 7537
+25354 7537
+15023 7535
+34270 7535
+49279 7534
+38767 7533
+48570 7527
+39103 7526
+29676 7523
+42940 7522
+45746 7519
+41274 7517
+49454 7513
+49942 7509
+46075 7509
+41025 7508
+27419 7507
+24328 7507
+32602 7506
+48549 7505
+11157 7501
+14566 7497
+16813 7496
+37384 7492
+26425 7491
+41681 7490
+38002 7487
+39087 7485
+36008 7484
+8767 7479
+10268 7479
+6879 7477
+50207 7476
+39427 7473
+48561 7472
+44644 7470
+26652 7468
+27421 7467
+20998 7466
+12851 7465
+38391 7464
+49121 7464
+37047 7459
+35244 7454
+36840 7450
+38231 7449
+4857 7443
+25977 7441
+31129 7436
+42455 7436
+40548 7434
+39225 7433
+30328 7432
+12951 7431
+21004 7429
+50126 7425
+30659 7424
+49580 7424
+38415 7423
+36295 7422
+31508 7421
+29668 7414
+11935 7407
+15205 7407
+37065 7406
+26003 7404
+39230 7403
+30281 7401
+41356 7400
+38837 7396
+36500 7394
+16229 7389
+34204 7389
+41206 7388
+38141 7386
+10479 7385
+44920 7382
+34554 7381
+46126 7375
+49970 7373
+30306 7373
+42670 7373
+40842 7370
+26696 7370
+2595 7366
+41341 7365
+44742 7362
+47213 7359
+38143 7359
+45509 7354
+13773 7354
+18806 7352
+6976 7352
+41570 7349
+35255 7348
+32441 7347
+30240 7344
+44374 7340
+39197 7334
+46604 7333
+33161 7330
+12631 7327
+35590 7321
+45282 7319
+34586 7313
+21620 7313
+18408 7311
+21357 7310
+34953 7306
+18161 7306
+49686 7300
+14624 7297
+33383 7296
+15330 7295
+38482 7291
+31272 7290
+34926 7287
+35570 7287
+23958 7286
+44563 7284
+26953 7280
+37365 7279
+49869 7277
+31493 7275
+49492 7273
+24042 7273
+48799 7273
+43892 7267
+48451 7265
+37834 7261
+42296 7260
+28732 7259
+17563 7259
+32771 7256
+7641 7254
+48272 7254
+24259 7253
+7426 7252
+15582 7250
+28923 7247
+37404 7246
+29294 7245
+47133 7244
+40506 7242
+32949 7236
+45898 7234
+41079 7233
+20086 7230
+47516 7229
+36284 7228
+45627 7225
+34048 7224
+13498 7223
+40368 7219
+5881 7218
+37504 7216
+44724 7216
+29006 7215
+46728 7213
+42006 7213
+49572 7209
+49712 7206
+23843 7202
+49008 7201
+26747 7199
+42771 7198
+21215 7195
+41931 7194
+19573 7193
+32105 7192
+49379 7189
+20468 7188
+44679 7182
+37821 7176
+40097 7176
+34947 7176
+28039 7175
+44292 7173
+11861 7172
+25294 7171
+7790 7168
+20740 7167
+25106 7161
+35119 7159
+39156 7156
+39478 7155
+38020 7154
+44812 7152
+42466 7148
+37697 7147
+36648 7145
+46251 7145
+24396 7145
+39861 7142
+27447 7141
+18065 7136
+1054 7134
+25306 7131
+44360 7130
+9833 7126
+48593 7125
+31400 7124
+47937 7123
+48582 7122
+25919 7121
+37139 7120
+37740 7119
+20438 7118
+3680 7114
+41208 7112
+33433 7105
+43894 7104
+28721 7102
+30542 7101
+44387 7094
+16950 7091
+46933 7089
+47585 7088
+43404 7087
+36772 7086
+27477 7084
+33010 7083
+32645 7079
+8243 7078
+41488 7077
+24583 7074
+8700 7067
+47294 7066
+42317 7064
+43827 7063
+30769 7063
+50025 7059
+36822 7058
+48631 7054
+17664 7051
+39881 7050
+49144 7049
+48485 7040
+15742 7040
+19746 7039
+29205 7039
+36883 7037
+45853 7034
+17679 7032
+31755 7030
+36881 7027
+32797 7025
+43878 7022
+42550 7022
+37945 7021
+27958 7021
+26629 7018
+41499 7015
+45166 7003
+45336 7001
+39839 6998
+9770 6994
+44054 6993
+41298 6992
+23493 6991
+31968 6990
+44227 6987
+19312 6987
+8191 6983
+43320 6982
+34379 6981
+49516 6981
+36803 6976
+32550 6976
+44759 6975
+27536 6973
+40139 6971
+36526 6970
+31919 6969
+23228 6968
+49095 6968
+40988 6967
+33357 6966
+22621 6966
+43387 6964
+29492 6964
+18994 6963
+46546 6963
+17733 6963
+43441 6960
+49828 6955
+10773 6955
+2315 6955
+46975 6953
+11477 6951
+46683 6950
+42979 6950
+34455 6948
+36834 6946
+7559 6944
+43456 6943
+49138 6942
+24834 6941
+31590 6941
+19615 6939
+31122 6939
+39902 6938
+46102 6937
+44276 6937
+17043 6937
+26964 6935
+36754 6932
+40331 6931
+39200 6930
+46002 6930
+26062 6926
+41744 6924
+14472 6918
+15380 6916
+41633 6916
+37269 6916
+44991 6916
+18202 6912
+44159 6911
+20026 6910
+31419 6910
+49370 6910
+48124 6909
+29307 6908
+24342 6901
+29223 6893
+29221 6891
+33786 6883
+39458 6882
+50188 6882
+38241 6880
+22585 6876
+3836 6870
+29017 6868
+49562 6865
+28018 6862
+41748 6859
+33349 6859
+49802 6859
+33028 6853
+5970 6851
+40853 6847
+48791 6844
+49709 6842
+14629 6836
+13078 6836
+36607 6834
+38027 6832
+50178 6831
+49176 6830
+43437 6830
+42696 6829
+37674 6827
+31053 6814
+42171 6814
+34242 6813
+49670 6812
+30603 6812
+40478 6812
+36843 6811
+22927 6810
+49422 6806
+43740 6805
+12962 6805
+16776 6802
+18453 6795
+40107 6791
+40397 6791
+22955 6788
+47172 6788
+28745 6781
+38035 6781
+13576 6781
+41862 6779
+12629 6779
+46120 6776
+33684 6775
+10387 6773
+19593 6769
+31815 6763
+46025 6760
+33077 6759
+18615 6759
+17505 6756
+46022 6753
+48567 6753
+14950 6753
+36122 6748
+17922 6745
+18743 6744
+28298 6743
+40091 6743
+32916 6740
+43828 6736
+20684 6732
+16444 6731
+26257 6731
+28100 6730
+8077 6728
+48895 6728
+10166 6727
+13263 6726
+41008 6725
+25947 6725
+35608 6724
+46477 6723
+32184 6722
+36894 6719
+46421 6719
+44591 6717
+25385 6715
+40981 6714
+31442 6712
+48084 6712
+43517 6709
+38899 6709
+43572 6709
+24361 6708
+14126 6708
+30642 6708
+31798 6706
+43614 6705
+21206 6705
+26447 6703
+16951 6700
+47781 6700
+24835 6696
+47117 6693
+40360 6691
+43612 6691
+49790 6689
+20613 6683
+50016 6681
+35645 6676
+41901 6675
+32665 6675
+35750 6670
+32851 6669
+46369 6669
+33485 6669
+38998 6666
+40901 6666
+37436 6663
+38688 6663
+23935 6660
+46077 6658
+19576 6654
+33650 6652
+12904 6652
+15732 6646
+2333 6644
+42310 6644
+45038 6643
+28019 6642
+26152 6638
+35802 6636
+44501 6635
+19209 6633
+16764 6632
+40044 6631
+27619 6630
+41242 6629
+19578 6629
+45685 6627
+15145 6622
+23767 6617
+10623 6615
+37870 6614
+10677 6613
+12095 6612
+31116 6611
+39462 6610
+36733 6608
+43777 6608
+10699 6607
+44819 6602
+4812 6600
+13556 6600
+28101 6595
+42503 6595
+139 6594
+37150 6591
+40920 6590
+35835 6585
+47537 6584
+46610 6583
+31945 6582
+29942 6579
+26599 6578
+23806 6578
+16594 6576
+35630 6574
+45271 6573
+49884 6573
+2358 6572
+18400 6571
+44798 6568
+42481 6559
+42685 6557
+50015 6552
+7113 6550
+31857 6550
+32138 6548
+29347 6548
+48765 6547
+46076 6544
+32429 6542
+49728 6540
+33080 6540
+45869 6534
+17649 6533
+42948 6532
+26663 6526
+48529 6526
+35220 6525
+24388 6525
+16873 6521
+47206 6518
+15973 6517
+47918 6517
+25200 6516
+42135 6516
+46613 6516
+10118 6513
+20602 6509
+10126 6508
+49811 6508
+15341 6508
+25423 6507
+41202 6507
+38095 6500
+28465 6498
+48103 6497
+25163 6496
+46374 6496
+42485 6493
+31358 6492
+45119 6492
+29918 6491
+27856 6489
+40322 6489
+13942 6488
+40914 6488
+25636 6486
+19314 6486
+20689 6485
+13194 6482
+33373 6480
+27024 6480
+35742 6478
+7419 6477
+48852 6477
+49918 6473
+23520 6471
+31436 6468
+6668 6467
+27195 6464
+31809 6462
+16943 6454
+5515 6450
+39841 6448
+47050 6448
+36533 6446
+40059 6446
+37835 6443
+33314 6440
+48954 6439
+41251 6438
+47705 6437
+45337 6437
+14302 6436
+7107 6434
+28820 6430
+40481 6427
+2979 6425
+20863 6424
+44478 6423
+26272 6423
+43975 6421
+10786 6421
+49028 6420
+43331 6419
+44910 6418
+16947 6418
+32113 6417
+24469 6417
+17302 6413
+39788 6411
+47524 6410
+8419 6406
+15583 6402
+48413 6401
+44935 6400
+14279 6399
+46311 6398
+40864 6397
+27956 6396
+41767 6396
+46228 6394
+14823 6393
+25941 6390
+30520 6390
+47116 6389
+44213 6384
+35726 6382
+33333 6381
+31627 6369
+25486 6369
+46850 6354
+37340 6353
+23903 6350
+41591 6349
+40387 6347
+42440 6347
+12453 6343
+36604 6342
+15256 6342
+8892 6339
+40230 6339
+49110 6338
+38835 6337
+8371 6334
+4927 6331
+15103 6329
+49096 6327
+46780 6326
+27634 6324
+49126 6322
+3480 6319
+18853 6318
+46315 6318
+20168 6316
+48493 6316
+38452 6310
+7798 6309
+34000 6309
+35746 6309
+38936 6309
+43020 6308
+33283 6308
+36515 6306
+34674 6305
+49770 6304
+45234 6299
+32485 6298
+6432 6296
+47150 6296
+29027 6295
+28663 6293
+40705 6293
+17203 6292
+48379 6289
+15864 6281
+33410 6279
+47065 6274
+42887 6273
+46253 6271
+29880 6270
+6077 6269
+36133 6259
+41614 6254
+26129 6253
+42797 6249
+40339 6249
+48545 6248
+14061 6247
+30014 6246
+33716 6245
+27745 6244
+42243 6239
+42501 6236
+17739 6232
+13968 6232
+33353 6229
+35283 6229
+27608 6228
+42721 6225
+21053 6213
+32223 6211
+48790 6210
+31915 6208
+45565 6206
+46634 6204
+28698 6204
+21117 6201
+7727 6200
+29356 6199
+13499 6197
+34617 6194
+20557 6192
+45783 6188
+48613 6187
+21496 6186
+28066 6185
+44758 6183
+43749 6182
+39431 6181
+45582 6180
+47852 6177
+33276 6175
+33480 6172
+47242 6165
+29291 6161
+35790 6160
+22961 6160
+8273 6155
+46521 6153
+39707 6151
+43099 6145
+21878 6143
+17224 6141
+31688 6139
+43714 6139
+32555 6131
+47285 6129
+28410 6128
+45977 6128
+25302 6127
+34556 6125
+21106 6123
+1457 6123
+15324 6122
+43641 6122
+8269 6119
+17258 6118
+44336 6117
+45841 6114
+14399 6112
+49045 6108
+40825 6106
+7551 6106
+21330 6102
+24881 6101
+45353 6100
+36046 6096
+34104 6094
+29610 6093
+48468 6092
+22079 6089
+26687 6089
+20301 6086
+16136 6084
+31932 6083
+15118 6079
+48659 6078
+31614 6075
+28497 6075
+49999 6072
+17663 6069
+49563 6068
+48814 6067
+45352 6066
+49780 6065
+28735 6065
+45677 6065
+35381 6065
+34539 6062
+46127 6060
+35460 6054
+50237 6048
+31801 6048
+28813 6047
+28873 6047
+20257 6044
+43036 6041
+26504 6040
+32394 6037
+26522 6033
+49621 6033
+41470 6033
+6624 6031
+23966 6030
+17606 6030
+33365 6029
+45751 6029
+16681 6027
+45833 6026
+13253 6026
+3547 6022
+22360 6015
+46428 6015
+30676 6014
+38727 6014
+26563 6013
+46700 6012
+42784 6011
+25961 6010
+13007 6009
+16906 6008
+33170 6005
+6828 6003
+10310 6002
+37235 6001
+41076 6001
+33299 6000
+32072 5999
+42788 5998
+47625 5993
+32103 5993
+48749 5992
+35955 5991
+36787 5990
+43904 5986
+46840 5981
+16604 5979
+49515 5978
+146 5978
+45529 5976
+6384 5973
+42814 5970
+40640 5969
+32323 5968
+48433 5967
+36346 5966
+46786 5965
+47190 5965
+45901 5963
+35503 5960
+30710 5957
+34280 5952
+46231 5952
+39787 5949
+37656 5946
+38344 5945
+31559 5942
+41394 5941
+25199 5941
+12614 5941
+27520 5937
+16547 5936
+43844 5935
+35302 5932
+43108 5932
+43392 5932
+49277 5931
+31695 5931
+50175 5930
+39792 5926
+38618 5921
+28452 5921
+29800 5921
+46080 5919
+47658 5919
+35843 5919
+39456 5917
+46785 5913
+43368 5913
+46297 5911
+30337 5911
+30894 5908
+47427 5908
+28851 5907
+40890 5903
+15437 5903
+47288 5901
+38103 5900
+43990 5893
+30956 5892
+8864 5892
+18716 5891
+15333 5885
+39286 5884
+14182 5883
+50051 5879
+39919 5878
+42881 5874
+46294 5871
+50144 5870
+37393 5864
+38114 5860
+46144 5857
+33494 5854
+40692 5852
+33476 5849
+44278 5846
+37736 5844
+37573 5835
+23028 5835
+24546 5834
+45430 5834
+33380 5833
+27038 5832
+34350 5825
+41581 5823
+48867 5822
+5477 5818
+14490 5815
+45592 5815
+31502 5814
+44968 5811
+38183 5808
+32706 5808
+42254 5807
+49946 5806
+36732 5806
+28482 5805
+23054 5803
+20331 5799
+26517 5795
+45854 5793
+36343 5792
+20659 5791
+29146 5788
+42133 5781
+39429 5778
+30486 5776
+47255 5775
+44199 5773
+46538 5773
+39938 5772
+26801 5771
+49485 5768
+43198 5766
+48789 5765
+37725 5763
+18114 5762
+24954 5762
+39888 5760
+27097 5756
+26604 5753
+8911 5748
+48034 5745
+14369 5744
+13173 5744
+31859 5743
+47173 5739
+30543 5737
+24321 5735
+24840 5735
+23869 5735
+8552 5733
+33245 5732
+10541 5730
+46599 5729
+47471 5729
+36727 5726
+35628 5723
+5218 5721
+28644 5720
+45608 5714
+150 5711
+46219 5710
+47522 5708
+40632 5705
+30854 5700
+46383 5698
+41358 5696
+29515 5693
+48941 5693
+27553 5692
+27072 5684
+42098 5683
+30003 5682
+27915 5682
+12730 5675
+41057 5675
+32345 5672
+26495 5672
+42419 5669
+44566 5669
+48902 5665
+37672 5658
+37942 5658
+24243 5657
+48536 5654
+33853 5652
+27148 5649
+34227 5647
+32978 5645
+17844 5643
+40874 5642
+37865 5642
+37960 5637
+23252 5636
+12895 5636
+22027 5635
+31391 5631
+44270 5628
+18709 5628
+36341 5624
+49687 5624
+39908 5623
+49691 5619
+44596 5615
+9933 5614
+21665 5613
+41667 5613
+37770 5612
+29121 5608
+40653 5605
+20946 5602
+24912 5601
+42014 5596
+18439 5596
+21789 5592
+49494 5588
+49406 5588
+35977 5585
+36064 5580
+8572 5578
+45912 5578
+36147 5575
+30474 5573
+43532 5573
+49081 5572
+45198 5572
+41915 5568
+35320 5566
+4026 5566
+26848 5565
+22915 5564
+46748 5562
+47834 5560
+37418 5560
+42975 5559
+34198 5558
+40925 5558
+47730 5555
+48838 5555
+31205 5555
+9898 5552
+44319 5551
+26621 5550
+31396 5550
+28561 5544
+35738 5539
+29513 5538
+32833 5537
+41317 5536
+47454 5530
+18203 5530
+26416 5529
+13236 5527
+27389 5527
+47772 5527
+25753 5523
+19380 5521
+13074 5520
+39575 5519
+36027 5517
+33815 5516
+26109 5515
+36198 5513
+46710 5511
+16743 5510
+31626 5508
+50177 5508
+25554 5506
+24524 5502
+43925 5500
+46727 5499
+30698 5492
+31971 5491
+13305 5490
+1824 5490
+28861 5489
+42182 5483
+15239 5483
+36347 5483
+17576 5482
+44709 5479
+13930 5478
+34042 5475
+28914 5475
+10789 5474
+47419 5471
+42809 5470
+31441 5469
+34634 5461
+4945 5454
+50248 5452
+8360 5450
+38113 5446
+24333 5446
+47357 5445
+50092 5443
+23010 5440
+47402 5436
+27694 5433
+27193 5433
+29320 5432
+30892 5424
+32635 5424
+33582 5421
+27313 5420
+40164 5418
+47960 5415
+2515 5412
+46639 5411
+46510 5406
+23599 5406
+26287 5405
+35676 5403
+26858 5403
+25901 5403
+46183 5402
+41622 5396
+21149 5391
+13890 5388
+38270 5386
+10894 5385
+35575 5382
+46566 5379
+28956 5379
+3204 5377
+42031 5374
+27031 5374
+20180 5371
+46085 5368
+35393 5368
+8293 5368
+49357 5367
+49239 5367
+43406 5361
+38615 5361
+13149 5359
+40710 5358
+45201 5355
+46452 5353
+30934 5353
+43085 5352
+23416 5351
+30493 5347
+33634 5345
+42036 5343
+22468 5341
+31253 5340
+36204 5339
+14387 5339
+24426 5328
+45530 5326
+44831 5324
+42114 5324
+44225 5322
+48562 5320
+30156 5320
+40759 5319
+49057 5318
+42895 5316
+11921 5308
+29672 5307
+48652 5306
+44531 5306
+34924 5306
+9602 5304
+44604 5303
+9924 5301
+16478 5301
+9350 5300
+40546 5298
+45417 5293
+33092 5292
+50125 5292
+46360 5289
+4358 5288
+25624 5288
+49574 5285
+30230 5282
+28614 5280
+40627 5277
+28122 5274
+28268 5272
+7131 5270
+46897 5270
+38032 5269
+29302 5268
+44515 5267
+23391 5265
+35780 5263
+47783 5263
+44156 5262
+14168 5262
+43651 5260
+36428 5260
+2578 5254
+29945 5251
+27830 5246
+24378 5244
+33855 5244
+25454 5237
+39007 5235
+42547 5232
+45133 5230
+41995 5230
+10615 5229
+45647 5227
+23969 5223
+13233 5221
+46455 5221
+15036 5220
+26357 5219
+34769 5215
+44859 5214
+39407 5210
+36632 5210
+47911 5209
+30345 5209
+32465 5207
+26870 5203
+26828 5198
+35895 5196
+31711 5189
+37855 5187
+47892 5187
+46177 5186
+34442 5182
+43866 5179
+33898 5173
+45300 5166
+50157 5165
+20837 5165
+38948 5164
+3239 5161
+29934 5159
+17495 5158
+48657 5151
+40232 5150
+21656 5150
+23793 5148
+24015 5148
+18925 5145
+30581 5144
+45941 5139
+45285 5137
+33762 5136
+31898 5135
+34877 5134
+49742 5134
+44727 5131
+35781 5131
+24516 5130
+38518 5127
+46911 5120
+24364 5117
+42507 5115
+44296 5114
+29660 5112
+11924 5111
+12485 5111
+11788 5107
+49702 5105
+44562 5104
+45140 5102
+27925 5098
+31659 5097
+7203 5095
+30218 5094
+19016 5093
+39934 5090
+2500 5088
+19763 5086
+17056 5084
+42588 5081
+16107 5081
+40577 5080
+5320 5080
+22077 5078
+33172 5073
+21413 5071
+34371 5071
+44683 5064
+48473 5061
+20231 5059
+9881 5059
+46367 5058
+23314 5057
+23758 5057
+25444 5055
+36487 5054
+39668 5053
+49140 5051
+43054 5049
+15924 5048
+14978 5048
+46384 5048
+12859 5046
+46037 5045
+35913 5044
+49451 5039
+40009 5037
+42042 5035
+36125 5033
+15782 5032
+45547 5031
+27586 5030
+44451 5029
+28653 5024
+46329 5022
+48278 5022
+44086 5022
+27448 5020
+9315 5020
+26802 5019
+18559 5018
+27282 5017
+1880 5016
+39219 5012
+6349 5012
+32091 5011
+44330 5011
+38799 5007
+19160 5006
+15966 5006
+16372 5005
+8029 5002
+16358 5002
+21351 5000
+24515 5000
+46808 4995
+18097 4995
+42901 4993
+31127 4993
+16671 4992
+28839 4990
+26365 4989
+17964 4980
+21800 4977
+49444 4977
+41908 4974
+26737 4973
+43869 4973
+47763 4972
+6560 4971
+15848 4971
+40809 4967
+35346 4966
+19722 4963
+20701 4963
+43260 4962
+19563 4961
+48267 4957
+33825 4956
+45368 4952
+23838 4947
+44032 4945
+37124 4945
+13371 4943
+41296 4941
+39551 4941
+32096 4940
+43661 4939
+26294 4937
+49912 4936
+40480 4935
+26163 4935
+14605 4934
+38715 4933
+14749 4932
+38197 4930
+33054 4929
+48477 4928
+40083 4926
+21906 4926
+31611 4925
+47639 4924
+22880 4923
+22369 4920
+41848 4919
+47020 4919
+33129 4918
+11239 4916
+28935 4914
+25757 4914
+38566 4909
+17595 4908
+48738 4908
+33132 4906
+48948 4904
+43810 4899
+39455 4896
+30589 4895
+10791 4891
+41387 4891
+28258 4888
+28508 4885
+16580 4884
+20891 4884
+22646 4883
+22400 4882
+33734 4881
+35170 4880
+14910 4878
+30016 4876
+17097 4874
+17861 4871
+47466 4869
+29352 4869
+11995 4868
+37120 4866
+3784 4861
+21465 4860
+43418 4857
+46514 4855
+24455 4849
+45101 4845
+23686 4845
+25391 4844
+45403 4843
+21231 4842
+39650 4840
+43762 4835
+39559 4834
+45175 4834
+45243 4832
+31811 4832
+48191 4832
+38821 4827
+6222 4824
+47333 4817
+19109 4816
+46047 4814
+46615 4810
+40281 4810
+13442 4805
+27772 4801
+43660 4798
+30150 4796
+31382 4792
+33248 4790
+41411 4789
+32966 4772
+47025 4769
+17491 4769
+36982 4766
+44231 4764
+35236 4764
+46111 4757
+27566 4749
+15865 4747
+20369 4746
+33744 4743
+29639 4741
+31425 4739
+30001 4736
+38521 4736
+25101 4734
+42995 4733
+46602 4733
+49094 4730
+25169 4730
+33196 4728
+43167 4726
+44120 4725
+42384 4723
+21962 4717
+17143 4715
+8285 4712
+41424 4712
+44239 4711
+41233 4710
+10267 4710
+41446 4706
+24201 4703
+40133 4701
+16040 4699
+14631 4699
+42553 4698
+5192 4698
+1189 4696
+32608 4696
+6009 4695
+4368 4694
+45299 4694
+39555 4694
+18932 4693
+36476 4690
+40347 4690
+19031 4690
+33234 4689
+42223 4687
+41097 4682
+43118 4679
+40850 4679
+9169 4676
+40651 4676
+33518 4672
+46995 4665
+39622 4665
+49475 4665
+30071 4664
+48301 4663
+43377 4661
+43628 4659
+13361 4659
+40258 4652
+43258 4650
+6354 4648
+37712 4643
+39833 4643
+4183 4642
+28228 4637
+36504 4625
+19801 4622
+43471 4621
+20885 4619
+46659 4617
+42711 4616
+43194 4615
+48056 4613
+49438 4612
+45729 4612
+32101 4609
+33766 4608
+22552 4600
+26572 4600
+40402 4599
+20593 4596
+45914 4596
+37214 4596
+34749 4596
+24908 4594
+40799 4591
+23646 4587
+10353 4587
+20574 4587
+35779 4584
+23814 4582
+16254 4577
+36471 4576
+49604 4576
+41205 4576
+22015 4569
+44190 4568
+23821 4562
+32356 4561
+21109 4559
+20898 4559
+48538 4557
+42477 4556
+46155 4554
+29868 4552
+35442 4551
+48131 4549
+34562 4549
+41359 4548
+49788 4545
+33152 4545
+47109 4545
+16497 4544
+50200 4541
+10895 4538
+34149 4537
+19778 4535
+38365 4533
+48860 4532
+41966 4527
+24179 4525
+29507 4523
+33789 4522
+30736 4518
+33951 4515
+26865 4514
+25249 4512
+34293 4511
+19212 4511
+46021 4511
+31718 4510
+20688 4508
+27286 4506
+10103 4504
+40668 4500
+21575 4496
+44425 4496
+30388 4495
+15944 4494
+46824 4493
+33772 4488
+48682 4488
+19871 4486
+48885 4485
+38941 4483
+14116 4480
+22866 4478
+15414 4474
+9523 4473
+36942 4473
+45035 4467
+38110 4466
+35952 4462
+14224 4460
+46722 4459
+44850 4457
+3479 4456
+37273 4454
+19724 4453
+27955 4452
+35414 4450
+37155 4448
+43815 4447
+25461 4442
+46955 4442
+19359 4441
+33406 4439
+39347 4438
+32023 4436
+44538 4432
+7453 4430
+24517 4429
+31871 4427
+33661 4427
+34577 4425
+6038 4424
+48142 4424
+49646 4420
+30535 4418
+7098 4416
+34496 4414
+11306 4414
+46343 4413
+36571 4413
+36320 4410
+49511 4405
+3462 4404
+28266 4402
+29428 4401
+35632 4399
+17440 4399
+18847 4398
+42822 4397
+44642 4394
+33717 4392
+20360 4391
+43008 4391
+24214 4390
+36084 4386
+9625 4383
+50118 4383
+43597 4375
+49217 4375
+29323 4373
+38644 4373
+37773 4373
+47598 4368
+37012 4366
+44191 4366
+38148 4365
+36387 4363
+3749 4362
+17474 4358
+41185 4356
+47473 4347
+35741 4345
+36510 4334
+44166 4331
+17236 4329
+44409 4328
+15038 4327
+23795 4324
+38215 4320
+166 4320
+48003 4312
+36613 4307
+28961 4305
+16368 4304
+40247 4303
+38399 4300
+28712 4296
+50204 4290
+31680 4288
+37188 4287
+26545 4287
+33274 4285
+42432 4285
+45675 4282
+33678 4281
+32086 4278
+20015 4278
+23002 4278
+46217 4273
+39945 4272
+38564 4271
+13994 4270
+35813 4268
+37815 4268
+14459 4267
+38274 4267
+20129 4266
+44613 4262
+28376 4261
+11093 4255
+29720 4253
+48875 4252
+42359 4250
+28074 4242
+49347 4241
+37500 4240
+50109 4233
+18919 4232
+47935 4230
+30540 4229
+8193 4226
+24586 4223
+37766 4218
+23735 4218
+42903 4217
+46775 4217
+36117 4216
+29736 4214
+46733 4213
+39582 4209
+38043 4209
+7969 4208
+33968 4204
+18074 4203
+31895 4202
+32603 4199
+46320 4199
+14621 4196
+33062 4195
+14844 4194
+36911 4191
+3821 4190
+7678 4187
+27358 4187
+27050 4184
+3880 4183
+34795 4181
+38891 4180
+49751 4180
+38499 4172
+5435 4165
+4770 4165
+20826 4164
+8809 4163
+43316 4158
+49749 4155
+49531 4153
+9462 4149
+36781 4149
+32419 4147
+45077 4143
+43790 4135
+10168 4132
+34303 4132
+37351 4130
+28119 4129
+37148 4129
+11709 4128
+9152 4127
+42059 4126
+4256 4126
+43721 4126
+43270 4125
+38038 4124
+26639 4122
+32065 4121
+32495 4121
+49222 4120
+5754 4120
+30221 4120
+37920 4117
+28252 4117
+28474 4117
+45988 4115
+33321 4108
+41020 4108
+28333 4101
+25538 4097
+29453 4092
+44808 4089
+48608 4083
+42197 4083
+22773 4079
+10558 4077
+43654 4076
+29064 4075
+39223 4067
+41982 4067
+48082 4063
+34457 4061
+43094 4059
+38641 4059
+37344 4059
+30998 4058
+31069 4054
+9547 4052
+39275 4051
+25267 4049
+43987 4048
+48625 4045
+21945 4042
+26111 4042
+9783 4039
+10777 4037
+36259 4031
+28147 4031
+40203 4026
+41940 4026
+8594 4023
+50149 4023
+30564 4020
+45367 4020
+30950 4020
+22885 4018
+37655 4017
+38790 4017
+49041 4011
+28769 4011
+46672 4010
+44707 4010
+43525 4003
+44898 4001
+25576 4000
+36558 3996
+1832 3995
+47252 3991
+31775 3988
+49836 3987
+15005 3984
+24036 3984
+43384 3983
+3658 3983
+28428 3981
+15575 3978
+34635 3974
+45242 3974
+131 3972
+41781 3971
+36291 3969
+43233 3968
+33606 3968
+32032 3962
+11183 3962
+47924 3961
+28373 3959
+34703 3956
+40637 3956
+38815 3955
+37250 3953
+32252 3953
+42166 3952
+26317 3951
+19083 3950
+33236 3947
+49061 3946
+48558 3946
+32782 3945
+38640 3944
+19076 3943
+47823 3942
+34764 3938
+36598 3936
+24564 3934
+19275 3934
+22719 3934
+4412 3932
+32634 3931
+36770 3930
+46430 3929
+23772 3927
+34168 3927
+15179 3925
+49899 3924
+26396 3922
+46697 3922
+28789 3919
+1253 3918
+35767 3913
+45205 3912
+35967 3911
+41560 3910
+11452 3910
+44592 3909
+39266 3908
+24941 3907
+22522 3905
+16844 3905
+49128 3904
+48447 3902
+33501 3899
+30952 3899
+43895 3892
+26214 3890
+29230 3888
+7304 3887
+30326 3881
+12103 3881
+45027 3878
+37022 3877
+41671 3877
+39069 3875
+25255 3869
+46100 3868
+38807 3866
+30672 3863
+42592 3858
+47113 3856
+10263 3855
+22683 3855
+42009 3854
+49501 3854
+18401 3853
+41609 3850
+47638 3849
+26180 3847
+6353 3846
+39126 3846
+47365 3842
+44946 3839
+23803 3837
+8816 3836
+38491 3834
+41741 3824
+38855 3824
+18498 3824
+44028 3823
+27933 3819
+15322 3818
+40211 3816
+36616 3816
+45835 3815
+40034 3815
+32147 3814
+33800 3813
+28766 3808
+32169 3808
+25637 3808
+34148 3808
+2783 3804
+47798 3803
+42539 3801
+28338 3801
+31909 3799
+41482 3798
+19622 3798
+2827 3797
+18270 3793
+38874 3793
+45254 3792
+3273 3789
+41026 3789
+28404 3788
+37713 3787
+28611 3787
+38417 3783
+19776 3780
+10024 3778
+36463 3777
+5619 3776
+48754 3775
+32846 3774
+33486 3769
+26344 3769
+8912 3768
+33176 3767
+36850 3766
+45700 3762
+46220 3762
+48330 3758
+41989 3758
+44458 3757
+22170 3755
+41219 3754
+49926 3754
+29486 3749
+49151 3746
+17518 3746
+31929 3746
+44223 3744
+45684 3742
+30871 3741
+49908 3741
+29870 3740
+33796 3739
+26367 3736
+12208 3731
+36258 3730
+47279 3726
+19449 3720
+49933 3718
+23241 3713
+28622 3712
+10367 3711
+35395 3711
+35178 3708
+46799 3708
+10929 3708
+38637 3705
+31696 3702
+21849 3701
+30609 3698
+47071 3697
+48181 3696
+42473 3696
+41104 3694
+17256 3692
+18610 3691
+13757 3690
+49256 3687
+41552 3684
+15466 3678
+12351 3676
+46182 3675
+49560 3674
+37403 3673
+22929 3670
+43087 3664
+35791 3664
+37146 3661
+8935 3660
+34999 3660
+16301 3659
+43782 3657
+39967 3657
+18167 3656
+30265 3655
+18172 3655
+46676 3655
+31954 3651
+39345 3650
+37647 3650
+38663 3646
+32685 3642
+41162 3640
+46358 3633
+41000 3632
+23877 3631
+36674 3631
+42195 3624
+44747 3622
+48547 3622
+49849 3620
+39825 3619
+26037 3618
+26440 3616
+22810 3615
+37510 3614
+28883 3613
+43582 3611
+19754 3611
+23114 3607
+38601 3607
+45037 3602
+37017 3602
+33547 3600
+44435 3597
+47718 3593
+29046 3591
+47815 3591
+45798 3591
+32830 3590
+12145 3589
+28222 3589
+31410 3589
+39849 3587
+38296 3587
+37477 3587
+13751 3586
+40517 3586
+36977 3585
+10046 3583
+26060 3578
+48310 3574
+47348 3574
+42180 3574
+26198 3568
+27614 3567
+44318 3566
+33439 3564
+42840 3563
+41392 3562
+33539 3561
+33714 3560
+19488 3560
+50145 3558
+18717 3557
+24335 3556
+43907 3554
+17386 3554
+35279 3553
+40106 3546
+24741 3545
+36124 3545
+45890 3545
+35620 3544
+23826 3544
+44706 3541
+29085 3541
+47531 3538
+49388 3537
+34445 3530
+19618 3527
+28312 3525
+38481 3523
+42652 3522
+5649 3522
+2660 3518
+41512 3514
+36623 3510
+31417 3510
+17125 3510
+36436 3505
+26656 3504
+25205 3500
+33087 3498
+41055 3498
+46701 3497
+19557 3497
+14815 3495
+32070 3495
+27908 3490
+18337 3490
+4707 3490
+20336 3487
+26638 3487
+44464 3485
+36542 3482
+7555 3476
+43271 3474
+15109 3473
+48319 3470
+37996 3469
+39815 3468
+40025 3467
+39590 3466
+46326 3461
+39531 3459
+23513 3459
+37187 3458
+49139 3457
+39470 3456
+11430 3455
+3390 3454
+49794 3453
+45809 3452
+49026 3452
+5196 3451
+33637 3450
+48686 3449
+40832 3449
+10493 3448
+21677 3448
+26193 3448
+33931 3447
+43319 3447
+35770 3442
+46912 3442
+30331 3437
+20275 3437
+36851 3436
+35347 3436
+47026 3435
+46893 3435
+44267 3433
+29435 3432
+34248 3430
+46143 3430
+37077 3427
+33293 3425
+47322 3422
+47361 3419
+43158 3416
+43160 3413
+37481 3410
+36037 3409
+41090 3409
+41506 3408
+43477 3407
+40709 3406
+38841 3404
+35974 3403
+22492 3403
+18249 3400
+30448 3400
+10587 3398
+47209 3397
+7333 3396
+36028 3396
+26238 3396
+27002 3393
+49155 3393
+49601 3393
+31743 3392
+15340 3391
+5480 3388
+18589 3387
+33784 3384
+42165 3384
+35792 3383
+43212 3383
+32815 3381
+23380 3379
+37010 3377
+44942 3377
+24844 3376
+19364 3373
+11151 3370
+22630 3369
+47227 3368
+33707 3366
+26693 3366
+35202 3366
+10545 3365
+1518 3363
+44754 3361
+26158 3359
+34364 3357
+23480 3357
+19293 3355
+14257 3354
+38321 3347
+21251 3344
+44807 3341
+25064 3340
+50093 3339
+41707 3336
+48289 3329
+30818 3329
+7990 3323
+4228 3322
+27849 3320
+21744 3319
+43806 3316
+11127 3315
+22252 3313
+32341 3312
+42082 3309
+14012 3308
+44954 3304
+34258 3301
+18872 3299
+43216 3296
+44154 3295
+40778 3295
+26501 3294
+28012 3294
+44334 3294
+49945 3293
+44238 3293
+38460 3293
+21075 3291
+20621 3289
+44836 3288
+33409 3285
+49036 3284
+34805 3282
+39658 3281
+48522 3281
+35760 3275
+18604 3274
+33420 3274
+35265 3273
+45755 3267
+33447 3263
+35102 3258
+38040 3253
+33792 3252
+27983 3251
+31073 3250
+46257 3247
+18245 3245
+15759 3243
+40806 3237
+35500 3235
+3322 3234
+31254 3229
+20480 3229
+40781 3228
+41808 3227
+20255 3224
+35420 3223
+35147 3223
+8370 3220
+49893 3212
+41709 3209
+17643 3208
+39070 3207
+48402 3207
+33120 3207
+39232 3207
+20184 3206
+22873 3206
+13600 3203
+40063 3201
+45872 3200
+47118 3200
+48933 3196
+23111 3194
+35058 3190
+35488 3184
+28938 3184
+42976 3183
+19237 3182
+36356 3178
+49608 3177
+26104 3175
+46932 3174
+43303 3174
+49850 3174
+18865 3173
+43979 3173
+44090 3171
+32368 3167
+22935 3167
+49506 3166
+44217 3164
+28525 3162
+44177 3162
+35538 3160
+40310 3157
+30444 3156
+44450 3153
+35433 3152
+18441 3151
+32229 3147
+45160 3144
+47120 3143
+23652 3141
+29485 3140
+30467 3138
+6552 3135
+31008 3134
+46331 3131
+45866 3131
+6876 3130
+41436 3130
+22314 3125
+38542 3125
+32247 3121
+15485 3121
+16891 3119
+48641 3118
+38764 3117
+7047 3113
+35172 3110
+42289 3109
+42184 3108
+23321 3107
+34032 3103
+32963 3102
+36196 3102
+37046 3101
+3062 3100
+49496 3095
+37653 3093
+48650 3091
+13577 3080
+9681 3079
+40135 3078
+16818 3065
+28326 3064
+49116 3064
+48346 3060
+29565 3060
+36567 3058
+3179 3056
+44825 3056
+25850 3055
+31467 3050
+43214 3047
+24400 3047
+44632 3047
+10697 3047
+24852 3044
+19012 3043
+11639 3039
+24728 3038
+37778 3037
+12063 3034
+27914 3034
+29335 3032
+20329 3030
+4921 3030
+36248 3028
+46512 3025
+38128 3024
+45528 3022
+2225 3022
+41080 3022
+39184 3021
+32818 3020
+48975 3013
+12343 3012
+5697 3011
+45169 3010
+7449 3010
+37359 3010
+38012 3010
+42646 3009
+22428 3008
+33209 3007
+37395 3006
+13003 3000
+20859 2999
+35277 2996
+28811 2996
+13668 2995
+46010 2993
+42660 2993
+47445 2991
+41780 2991
+35001 2990
+17527 2986
+35528 2984
+26715 2983
+22305 2981
+16151 2980
+46679 2978
+39986 2978
+36767 2975
+2103 2974
+22417 2974
+36857 2973
+5144 2969
+41911 2968
+35533 2967
+33643 2967
+32275 2964
+41382 2964
+9981 2963
+13086 2962
+19306 2959
+46115 2958
+45311 2952
+23839 2948
+17174 2947
+38052 2942
+28865 2940
+44195 2940
+40792 2934
+47046 2933
+27950 2933
+38106 2932
+31579 2930
+25102 2921
+169 2921
+22678 2920
+39688 2920
+37799 2919
+26941 2918
+36970 2915
+19351 2912
+19857 2908
+30715 2907
+39355 2904
+35494 2902
+23360 2902
+45195 2899
+32467 2898
+41357 2895
+32197 2895
+11549 2894
+37693 2894
+14501 2891
+41111 2890
+22506 2887
+47064 2886
+43834 2884
+31619 2882
+27575 2881
+12467 2879
+39789 2876
+45147 2875
+14095 2875
+5319 2874
+20660 2872
+44779 2872
+48845 2872
+35041 2871
+10864 2871
+25490 2870
+22453 2869
+32418 2866
+38277 2862
+38577 2858
+46332 2857
+16166 2856
+23414 2849
+26327 2845
+40927 2843
+23289 2842
+32570 2841
+49883 2841
+38865 2839
+37597 2838
+11573 2837
+46757 2836
+33746 2834
+38816 2833
+45490 2833
+49885 2833
+29863 2832
+21828 2831
+44012 2829
+36112 2827
+49806 2821
+37422 2819
+48728 2817
+42120 2816
+39354 2815
+27945 2814
+47384 2810
+24503 2810
+19282 2807
+26232 2807
+49046 2805
+16589 2804
+38392 2803
+38354 2801
+3330 2800
+48700 2799
+45522 2798
+30205 2792
+37262 2790
+48107 2788
+9762 2784
+34582 2782
+19580 2780
+24087 2778
+30661 2776
+33870 2776
+38292 2774
+39516 2773
+47558 2772
+34889 2764
+38067 2759
+26623 2758
+43753 2758
+25925 2753
+27169 2749
+25714 2746
+50235 2745
+43217 2744
+37978 2742
+28457 2742
+27292 2742
+43695 2741
+29719 2737
+44608 2730
+37157 2729
+21384 2727
+35029 2727
+35558 2726
+48938 2726
+22367 2725
+40010 2725
+23025 2722
+42173 2719
+41068 2715
+23745 2715
+47145 2712
+44952 2711
+46954 2708
+23269 2708
+38281 2705
+32689 2705
+30487 2705
+14980 2703
+16141 2698
+38904 2694
+31966 2693
+37031 2693
+41203 2690
+49615 2690
+27510 2686
+30753 2684
+18566 2683
+19324 2677
+35470 2677
+1792 2677
+33114 2676
+42522 2674
+39323 2668
+47084 2666
+49763 2665
+17414 2664
+38448 2664
+49513 2664
+20869 2663
+4181 2663
+4209 2662
+47006 2662
+8709 2661
+37177 2655
+17022 2654
+21974 2651
+26142 2650
+21340 2647
+22203 2644
+34111 2640
+40099 2638
+34787 2636
+31077 2635
+28167 2633
+47030 2629
+37612 2628
+47174 2628
+15817 2626
+18699 2626
+48443 2626
+40257 2625
+35841 2625
+37156 2623
+6367 2622
+34727 2622
+40613 2621
+42216 2621
+49427 2617
+30293 2616
+44110 2614
+38269 2614
+16897 2613
+45537 2613
+37200 2612
+38235 2611
+40895 2610
+5541 2609
+49071 2608
+35693 2608
+45824 2606
+45135 2606
+39759 2604
+41807 2603
+11610 2603
+48162 2601
+8723 2595
+33910 2594
+4052 2584
+22887 2584
+49832 2584
+44711 2580
+34095 2577
+31301 2576
+16273 2575
+48147 2570
+37511 2568
+980 2568
+41776 2566
+35476 2563
+21471 2561
+46554 2560
+47469 2555
+19526 2555
+37411 2553
+19875 2549
+41972 2548
+32713 2547
+41256 2547
+1591 2545
+18748 2544
+25180 2543
+43706 2542
+30249 2541
+34876 2540
+23081 2540
+44163 2539
+47506 2539
+46471 2538
+9146 2537
+18796 2532
+16516 2529
+49337 2528
+22755 2528
+43583 2524
+30585 2519
+26759 2514
+4840 2510
+42974 2509
+42064 2508
+4766 2507
+44453 2506
+48658 2505
+31266 2505
+31447 2504
+39994 2501
+50030 2499
+28620 2499
+36379 2495
+38571 2493
+18022 2493
+43691 2492
+33986 2490
+44189 2487
+5525 2485
+34181 2480
+19411 2479
+36321 2474
+49505 2474
+758 2471
+36361 2470
+27528 2468
+32785 2467
+38925 2467
+49228 2467
+19382 2464
+44930 2463
+26011 2462
+31181 2462
+41375 2460
+49342 2458
+45492 2457
+30728 2455
+41033 2454
+42451 2453
+22180 2453
+18109 2452
+46495 2452
+31542 2445
+30599 2444
+47951 2441
+36670 2437
+41993 2437
+36621 2434
+34142 2433
+23157 2430
+36017 2425
+40117 2425
+35887 2424
+50163 2424
+47651 2424
+30216 2421
+48744 2419
+31512 2418
+35040 2416
+35847 2416
+23118 2414
+44046 2409
+48469 2408
+47764 2406
+21317 2405
+7168 2404
+41729 2404
+29510 2402
+49137 2400
+21055 2399
+30553 2396
+39194 2393
+22501 2390
+11792 2390
+38946 2385
+33768 2384
+29904 2383
+42558 2380
+25472 2376
+30312 2376
+48900 2375
+25380 2374
+39175 2373
+49795 2372
+34495 2371
+45454 2369
+20004 2369
+49174 2369
+17410 2368
+45672 2366
+39814 2364
+8707 2363
+30782 2363
+11884 2362
+21689 2362
+23870 2359
+21920 2356
+34804 2352
+22093 2347
+5208 2346
+9065 2345
+43145 2344
+14671 2342
+19515 2340
+13783 2337
+14961 2335
+45689 2335
+31631 2334
+46156 2333
+31838 2332
+29905 2331
+45724 2328
+41372 2325
+34720 2320
+47472 2317
+34932 2316
+36690 2316
+40452 2316
+33583 2314
+45519 2314
+24871 2310
+48505 2309
+43786 2309
+47708 2306
+46479 2306
+46121 2303
+40574 2302
+47010 2299
+37199 2298
+22022 2298
+24319 2296
+40246 2294
+44974 2290
+30347 2288
+45803 2286
+47086 2283
+39724 2283
+28618 2280
+39065 2278
+5641 2276
+30604 2275
+10142 2272
+21476 2272
+23257 2270
+40155 2269
+40259 2268
+28968 2265
+24771 2263
+46136 2261
+41947 2260
+36658 2257
+13557 2252
+27985 2250
+34354 2250
+48499 2249
+27600 2246
+27167 2246
+44448 2245
+8964 2243
+17750 2242
+16495 2239
+22542 2239
+47701 2239
+46947 2237
+49358 2237
+42624 2235
+49226 2234
+28198 2231
+48321 2230
+40485 2230
+41100 2230
+10778 2229
+23979 2228
+34196 2227
+21864 2226
+35705 2226
+43702 2225
+31844 2223
+39890 2221
+33725 2221
+40971 2220
+19858 2218
+36370 2217
+33912 2214
+10967 2210
+159 2209
+25178 2208
+8682 2204
+12731 2204
+31179 2204
+48760 2201
+47720 2200
+45876 2196
+42855 2195
+36127 2195
+34340 2193
+39910 2190
+28365 2187
+22330 2183
+6527 2182
+846 2179
+50249 2178
+17635 2177
+38155 2173
+32212 2170
+37859 2169
+28656 2165
+37633 2163
+45662 2163
+39971 2161
+40541 2161
+28762 2160
+48956 2160
+30856 2159
+49896 2157
+48806 2157
+21412 2155
+48869 2155
+49630 2154
+27703 2151
+32230 2150
+42986 2148
+49214 2145
+49665 2142
+43000 2141
+24861 2141
+31660 2141
+44405 2140
+42719 2140
+48369 2139
+39021 2136
+7879 2135
+49905 2132
+44208 2132
+37195 2131
+3679 2129
+47068 2128
+27275 2127
+42758 2124
+37665 2123
+47835 2120
+20290 2116
+42169 2115
+43049 2111
+45286 2111
+41052 2109
+11774 2107
+48523 2100
+32667 2100
+46237 2100
+45351 2098
+49504 2097
+17269 2092
+48720 2091
+36253 2089
+21017 2089
+42867 2089
+23004 2088
+26047 2088
+32734 2081
+46890 2080
+28624 2079
+36408 2078
+40413 2074
+42530 2074
+33044 2069
+32573 2068
+29250 2066
+45008 2065
+49374 2064
+15931 2062
+45908 2062
+47649 2061
+37910 2058
+19890 2057
+44971 2052
+49050 2050
+48362 2047
+10882 2047
+47063 2045
+49925 2036
+12126 2035
+5785 2034
+41444 2034
+46165 2034
+46582 2029
+40591 2028
+11771 2024
+40053 2022
+47732 2022
+9454 2020
+38667 2019
+36044 2019
+48285 2019
+34460 2019
+33920 2018
+46787 2017
+13715 2016
+25529 2015
+4322 2014
+25224 2013
+9201 2012
+39500 2012
+11139 2008
+14188 2005
+33111 2005
+26575 2000
+41753 1998
+19469 1998
+9959 1996
+48452 1995
+30640 1995
+39377 1994
+23595 1993
+24376 1992
+14092 1991
+43717 1990
+28337 1989
+48183 1987
+30167 1986
+25348 1985
+11405 1984
+41099 1983
+48375 1982
+16791 1982
+22046 1980
+23320 1978
+44010 1977
+28794 1974
+36916 1974
+22759 1974
+21876 1974
+28758 1973
+48046 1972
+16883 1967
+20560 1965
+30626 1965
+16878 1964
+45356 1960
+15798 1960
+31364 1959
+15566 1959
+30475 1958
+19182 1956
+20748 1953
+12376 1952
+13291 1952
+22793 1951
+18883 1950
+40356 1949
+50247 1947
+35523 1945
+46621 1944
+18237 1943
+29881 1941
+37430 1940
+41519 1938
+49434 1936
+39044 1936
+47656 1934
+31327 1933
+21583 1933
+28134 1933
+26947 1933
+39000 1931
+38860 1929
+44015 1923
+45507 1923
+26945 1922
+44797 1921
+28707 1920
+13726 1916
+9418 1914
+29829 1913
+40158 1912
+137 1911
+33000 1910
+4907 1909
+38649 1909
+42381 1908
+49834 1907
+35039 1904
+21058 1902
+32014 1901
+45364 1899
+45163 1899
+36918 1896
+36406 1895
+48617 1894
+5512 1894
+23907 1893
+43037 1890
+19667 1890
+32892 1889
+2559 1886
+38696 1882
+32080 1880
+22833 1877
+21756 1876
+11805 1875
+32259 1872
+35975 1872
+30201 1871
+45249 1867
+13328 1867
+22402 1864
+13292 1864
+36545 1863
+49322 1862
+1147 1857
+34013 1857
+44140 1856
+27809 1856
+35307 1855
+26410 1852
+43857 1849
+42973 1848
+34098 1848
+44867 1847
+19990 1845
+11967 1843
+46386 1843
+23381 1836
+31264 1832
+47292 1829
+42866 1827
+33030 1826
+35540 1819
+20608 1818
+39149 1818
+44625 1817
+46238 1814
+48763 1812
+26292 1811
+35453 1810
+43034 1807
+29886 1806
+24957 1806
+47768 1804
+10063 1804
+35611 1802
+46846 1800
+22130 1799
+37093 1797
+28899 1796
+43789 1795
+12676 1795
+38528 1790
+47160 1788
+33566 1787
+37498 1785
+40558 1785
+45554 1776
+1421 1775
+5294 1775
+17358 1774
+18253 1773
+27444 1772
+44728 1769
+19028 1768
+48344 1763
+12567 1762
+27670 1758
+2235 1757
+42915 1753
+35861 1752
+43475 1751
+25895 1750
+7479 1750
+22973 1742
+43267 1739
+38776 1738
+45170 1737
+50227 1736
+46651 1736
+42929 1734
+16892 1733
+49346 1731
+37133 1729
+37350 1729
+21943 1728
+22766 1726
+4011 1725
+49865 1725
+39621 1725
+35656 1721
+7450 1720
+49213 1719
+49877 1715
+25748 1715
+41115 1713
+29743 1711
+23193 1707
+43422 1705
+31347 1705
+33810 1704
+25275 1703
+27674 1702
+15090 1701
+39157 1697
+33648 1695
+42636 1694
+42063 1694
+20746 1694
+31913 1693
+24807 1691
+14859 1690
+44402 1689
+14908 1689
+40261 1688
+13736 1685
+49168 1683
+40267 1683
+11907 1683
+36655 1681
+39643 1681
+16410 1679
+39316 1677
+49819 1674
+19463 1673
+40507 1673
+31246 1673
+46448 1673
+42316 1671
+31687 1671
+9466 1671
+49603 1670
+7726 1667
+34156 1665
+25471 1663
+15020 1663
+34301 1661
+1947 1661
+36181 1660
+31326 1657
+30266 1656
+43458 1656
+41964 1650
+38469 1650
+44293 1649
+33692 1647
+43380 1643
+26678 1643
+43433 1642
+31758 1641
+47400 1639
+2477 1638
+36307 1638
+48991 1637
+10549 1636
+32332 1634
+16268 1633
+28966 1632
+30656 1632
+47226 1629
+46979 1629
+19373 1628
+35036 1626
+38929 1624
+6312 1624
+42615 1622
+45819 1621
+47863 1620
+47195 1619
+26933 1617
+46695 1615
+27711 1614
+18686 1613
+28394 1613
+33386 1612
+42524 1612
+48205 1608
+32360 1605
+9318 1604
+45626 1603
+48526 1601
+29710 1601
+25874 1601
+14610 1599
+3472 1597
+44309 1594
+39922 1594
+19510 1590
+38381 1590
+34121 1588
+28253 1587
+49660 1586
+29773 1585
+32048 1582
+34219 1582
+45664 1581
+46464 1579
+5271 1578
+48911 1578
+22110 1573
+42720 1572
+34018 1569
+38335 1569
+44704 1569
+15149 1568
+45137 1567
+42172 1566
+35339 1563
+43910 1562
+45250 1560
+13392 1560
+23022 1559
+33012 1559
+48277 1557
+16333 1553
+23764 1553
+19202 1553
+19060 1551
+30049 1550
+43993 1546
+32432 1541
+41657 1541
+12374 1540
+31905 1536
+45032 1536
+43408 1534
+39224 1531
+42717 1531
+44423 1528
+19876 1528
+12054 1524
+13412 1523
+34816 1522
+28383 1518
+30295 1516
+39298 1514
+21142 1513
+30327 1513
+19953 1512
+39850 1511
+21563 1505
+17653 1504
+5099 1502
+48123 1499
+30268 1498
+39181 1497
+45297 1495
+36854 1494
+21947 1494
+32639 1494
+21271 1493
+18200 1493
+45748 1491
+43961 1488
+7847 1488
+48534 1487
+43073 1486
+29826 1485
+49991 1483
+42492 1482
+3603 1481
+40704 1480
+43024 1476
+45739 1475
+35585 1471
+41222 1470
+3728 1468
+30360 1465
+40489 1465
+37455 1464
+29712 1463
+30136 1462
+28112 1462
+21052 1459
+33699 1459
+31676 1457
+44036 1454
+43729 1454
+39310 1453
+43078 1453
+33250 1451
+15926 1446
+44506 1442
+29869 1440
+28156 1440
+44520 1438
+47690 1435
+142 1433
+33223 1432
+48761 1432
+34775 1431
+11900 1431
+39187 1430
+47746 1429
+29316 1428
+42468 1428
+36365 1427
+27823 1424
+30562 1424
+31263 1423
+40031 1422
+3429 1421
+30159 1420
+44415 1418
+11039 1418
+47066 1415
+47090 1415
+47212 1412
+20491 1409
+34402 1408
+5008 1408
+28924 1406
+37706 1404
+26227 1404
+45384 1401
+47991 1401
+11411 1399
+20972 1399
+41049 1399
+19662 1399
+38211 1399
+46858 1397
+21314 1391
+13780 1391
+42360 1391
+33705 1387
+49736 1386
+36563 1386
+44013 1384
+37337 1383
+46923 1379
+49312 1379
+23242 1377
+46763 1375
+31490 1375
+49929 1373
+35944 1370
+11482 1370
+38300 1369
+50155 1366
+32590 1363
+43825 1362
+47881 1360
+46349 1360
+48585 1360
+40156 1359
+47797 1359
+37141 1358
+32326 1355
+41305 1355
+12630 1351
+45767 1350
+34945 1350
+29698 1349
+19438 1348
+25182 1344
+24457 1341
+43444 1340
+23473 1339
+24935 1339
+27422 1338
+49035 1338
+40403 1336
+41818 1334
+35600 1334
+6284 1331
+7665 1331
+36171 1328
+3343 1326
+45887 1325
+40092 1324
+23511 1321
+47215 1320
+23626 1319
+11833 1317
+16080 1317
+49267 1316
+46581 1313
+33756 1308
+45251 1308
+25787 1307
+16041 1304
+27901 1302
+36382 1300
+33885 1300
+37906 1296
+19529 1295
+45907 1294
+37239 1293
+3282 1290
+38834 1290
+29613 1290
+18170 1289
+45533 1288
+31221 1285
+6852 1285
+36698 1285
+19395 1285
+40406 1285
+42519 1285
+23106 1283
+47947 1282
+33180 1281
+46570 1281
+37508 1278
+42300 1277
+27813 1275
+8485 1274
+39391 1274
+45750 1272
+27730 1271
+42680 1270
+45999 1270
+23305 1269
+36695 1267
+47546 1266
+11641 1263
+28401 1263
+50084 1263
+26825 1258
+45302 1254
+14524 1254
+33295 1253
+12100 1253
+38953 1253
+34697 1252
+39222 1252
+38021 1252
+36560 1250
+36180 1249
+3422 1248
+40456 1246
+16100 1245
+38543 1242
+37390 1240
+11037 1238
+25216 1235
+42479 1234
+26866 1234
+48835 1233
+34708 1232
+47393 1231
+43727 1229
+39827 1228
+29557 1227
+40069 1227
+26796 1226
+19455 1226
+44197 1225
+34640 1225
+10748 1224
+2155 1224
+27765 1216
+29241 1215
+43117 1215
+47795 1212
+47738 1211
+49694 1211
+20046 1211
+47864 1209
+46344 1208
+33623 1206
+43763 1204
+14060 1200
+40161 1199
+24539 1196
+16648 1195
+29281 1193
+17912 1193
+22065 1188
+43971 1188
+37863 1186
+35061 1186
+25618 1184
+12927 1183
+18125 1181
+37523 1179
+37101 1179
+41333 1177
+160 1176
+12241 1175
+14804 1175
+17720 1174
+27370 1173
+41248 1170
+17816 1169
+46746 1167
+39520 1165
+38271 1163
+34255 1163
+11727 1162
+33232 1162
+43879 1162
+21950 1162
+39098 1161
+22944 1161
+36750 1161
+10779 1160
+34519 1160
+25918 1158
+45281 1157
+36461 1154
+7503 1153
+36297 1152
+29572 1152
+37650 1150
+7180 1149
+30351 1149
+36764 1147
+23046 1145
+32756 1144
+48472 1143
+29472 1143
+5227 1142
+14865 1140
+12813 1140
+28243 1137
+8943 1137
+37848 1136
+19969 1136
+47527 1135
+34261 1132
+21903 1129
+35260 1127
+14560 1126
+42030 1126
+41592 1124
+47171 1124
+12426 1122
+46956 1122
+12071 1121
+20662 1119
+21711 1117
+36713 1117
+48227 1116
+22781 1115
+30660 1115
+43551 1113
+43328 1111
+41504 1109
+18163 1108
+31431 1106
+48873 1103
+31051 1102
+38184 1102
+40816 1097
+37345 1096
+35103 1095
+34704 1094
+27764 1092
+41096 1088
+47429 1088
+35561 1087
+41468 1086
+49551 1085
+43089 1083
+27570 1082
+47945 1078
+18477 1077
+49146 1077
+6847 1076
+16208 1073
+16934 1072
+16281 1067
+37420 1067
+24239 1065
+50143 1065
+2698 1064
+39786 1063
+37168 1061
+47082 1060
+45340 1059
+38124 1058
+16922 1056
+46310 1054
+11568 1053
+42527 1053
+32131 1053
+29460 1053
+44082 1052
+32849 1052
+18083 1052
+46796 1049
+49056 1049
+13486 1045
+30719 1041
+46966 1039
+28255 1036
+23138 1035
+22273 1035
+12486 1030
+45145 1028
+49070 1027
+37380 1027
+39695 1025
+41661 1024
+47611 1019
+48144 1019
+22345 1018
+44204 1017
+45640 1017
+45115 1017
+42752 1016
+31944 1016
+40674 1015
+41343 1014
+8762 1014
+45148 1013
+44575 1011
+33843 1011
+30667 1010
+43266 1008
+44948 1006
+33252 1005
+26911 1005
+35930 1004
+42654 1003
+42245 1002
+21424 999
+33151 998
+45865 997
+44052 996
+40822 996
+23294 993
+31386 993
+12640 989
+9705 989
+20786 986
+16471 985
+29495 984
+49535 984
+12953 982
+25915 982
+9020 978
+11647 976
+1209 976
+13211 972
+47139 966
+43966 966
+22174 961
+41002 960
+46976 959
+39258 959
+46086 958
+39611 958
+43922 956
+48080 956
+39231 955
+35407 955
+19820 951
+21324 950
+19421 950
+47826 949
+45442 949
+33031 946
+48997 943
+24714 941
+33221 940
+23001 940
+6043 939
+41585 939
+26076 938
+27156 935
+47715 934
+40561 931
+29325 931
+48313 931
+48364 929
+47340 928
+12045 927
+37605 925
+36853 922
+30751 920
+12675 919
+47671 915
+40041 914
+38165 911
+9497 911
+36752 911
+10790 908
+7948 906
+15697 905
+44165 904
+33213 903
+28544 903
+35707 902
+44917 901
+29848 900
+12736 898
+26475 896
+31965 892
+37096 892
+25195 892
+48958 891
+45992 889
+38963 888
+38218 887
+27632 887
+42655 880
+36119 879
+32403 878
+25001 873
+37771 873
+17845 872
+18670 872
+29312 869
+41726 862
+15139 862
+25597 862
+43488 859
+21481 858
+25081 858
+40223 857
+45474 856
+22718 853
+27972 853
+46497 850
+32003 849
+21737 847
+24029 847
+47676 846
+31023 846
+37695 843
+21477 843
+37955 842
+42669 840
+32092 840
+43549 838
+39666 836
+39482 833
+19682 833
+46764 832
+7535 832
+6927 832
+14367 831
+28165 830
+25745 830
+34222 828
+31724 827
+143 824
+42000 823
+48670 823
+17158 821
+11839 820
+33524 817
+39105 816
+25902 815
+29554 815
+41791 815
+22316 814
+44646 813
+32291 809
+49426 805
+37456 803
+41309 803
+43015 803
+38551 800
+42565 799
+2549 797
+44829 796
+9263 796
+38461 795
+43889 795
+35269 794
+29994 794
+36003 794
+31917 793
+22270 791
+12424 791
+46274 791
+33470 790
+33778 789
+46092 788
+46036 785
+41335 784
+31456 784
+21775 783
+15211 783
+15081 782
+3786 781
+29841 781
+16116 780
+43297 777
+43903 776
+34976 772
+19587 772
+4431 772
+40391 771
+49201 771
+21763 769
+21091 768
+45298 767
+45990 766
+48265 763
+37374 760
+29847 760
+42924 759
+39763 756
+7961 755
+41150 753
+37014 753
+30298 751
+30838 750
+144 748
+175 748
+40664 746
+9202 745
+15362 745
+38626 745
+39415 744
+36875 741
+15886 741
+5392 741
+15306 741
+23497 739
+25410 738
+45398 736
+14030 735
+38430 734
+28133 734
+22758 733
+18189 733
+34284 730
+30138 729
+46741 728
+23991 728
+29940 727
+36584 727
+37423 727
+49568 726
+20379 725
+47903 722
+8795 721
+6141 719
+31813 719
+48678 717
+22341 716
+46202 715
+27802 714
+41293 713
+27352 711
+44148 709
+37961 708
+29785 706
+40477 705
+29412 705
+21948 704
+43900 701
+40367 699
+22446 699
+43718 697
+42684 692
+37772 690
+45911 688
+46866 686
+30831 685
+8728 685
+36685 685
+10108 684
+33918 684
+39618 682
+48746 681
+39317 681
+34585 680
+3556 679
+21955 678
+15790 674
+6336 673
+9603 673
+47505 672
+46189 668
+25226 668
+23719 667
+49518 665
+37083 664
+39691 661
+43238 658
+25295 656
+6480 653
+23884 653
+48688 652
+20645 650
+36310 650
+48972 650
+31424 647
+48702 646
+34534 642
+41216 642
+30916 640
+33084 639
+37588 639
+45358 638
+40265 637
+49007 636
+30929 636
+8815 635
+44274 634
+8955 633
+38145 630
+43357 628
+37581 627
+29705 626
+43962 626
+33116 626
+22934 624
+46506 623
+42376 622
+36142 622
+11966 622
+32284 620
+36278 619
+9841 617
+29966 616
+38105 616
+17433 615
+19779 614
+170 613
+25060 613
+7280 612
+36737 612
+29415 612
+24679 611
+22784 611
+47762 610
+982 608
+48416 606
+9468 604
+46083 603
+42348 602
+43023 599
+29047 599
+46532 598
+29582 598
+34237 597
+24442 597
+22263 596
+41146 595
+12712 594
+45617 593
+27643 593
+47078 593
+39893 591
+49704 589
+49339 589
+32288 588
+49969 587
+37176 586
+43913 586
+24032 585
+15474 583
+44557 583
+2468 581
+43055 580
+34580 580
+37190 576
+8773 576
+19841 575
+46118 573
+35266 573
+43291 571
+43491 569
+22906 568
+13700 567
+37266 567
+28657 566
+10523 566
+38593 566
+6710 565
+31175 565
+13562 562
+40717 561
+23854 555
+38574 555
+48553 554
+34758 554
+29740 553
+28235 552
+9882 552
+50101 549
+39747 547
+37527 547
+29953 545
+45228 545
+43095 543
+44627 542
+40748 540
+40948 539
+14077 535
+41092 535
+42314 535
+43636 531
+37763 530
+29212 529
+42757 527
+46777 526
+42249 524
+28350 524
+23224 523
+27032 522
+41215 521
+15083 519
+14726 519
+28360 518
+44051 518
+16148 517
+16792 515
+13298 515
+36338 514
+48393 513
+16150 512
+31854 512
+15661 511
+43648 511
+31817 511
+36470 510
+21316 507
+47581 506
+35430 506
+38463 505
+48774 505
+49135 504
+48801 503
+27246 502
+32015 502
+21364 501
+19785 498
+45520 498
+24122 497
+35067 497
+5622 497
+15961 496
+48557 493
+23287 492
+10144 491
+10097 491
+26867 487
+41888 487
+49409 484
+24336 484
+39488 483
+25248 482
+20398 481
+38892 479
+20322 479
+16626 477
+35508 476
+26791 476
+40290 475
+49546 475
+16253 474
+43768 473
+43197 473
+14099 470
+47418 469
+9287 468
+14692 468
+25053 467
+45227 466
+42164 466
+14512 466
+28632 465
+45986 464
+30134 462
+38519 462
+28955 460
+29636 458
+33489 457
+44057 457
+20036 457
+23376 456
+15272 455
+36166 452
+29577 450
+32541 449
+24373 448
+24689 448
+39737 447
+5571 447
+10160 446
+49582 446
+49015 446
+44755 444
+23711 442
+42804 441
+28013 441
+40672 440
+37143 440
+35439 439
+8346 439
+35050 437
+15116 436
+36447 436
+44926 435
+16323 434
+31768 434
+44174 431
+14777 430
+15853 430
+21451 428
+41340 428
+47232 428
+46999 427
+35799 425
+35555 425
+25131 424
+19927 423
+41349 423
+8983 421
+34991 417
+20213 416
+34949 415
+33682 415
+44332 414
+29589 413
+49771 413
+49129 412
+24186 412
+33803 411
+48893 410
+25645 408
+19415 407
+44547 407
+35235 405
+39321 404
+40786 403
+37389 401
+34650 401
+45717 399
+20590 397
+8994 396
+26054 393
+48031 393
+13697 391
+2955 390
+40266 386
+1977 385
+17405 384
+2115 382
+3207 382
+45082 381
+3156 381
+32517 381
+43102 381
+40537 380
+43375 378
+47932 378
+30109 377
+48737 376
+16165 373
+19629 371
+30072 370
+37485 368
+3286 368
+48995 367
+24493 367
+25465 366
+43848 366
+21253 365
+24440 364
+49607 359
+37227 359
+48541 358
+41573 358
+40386 353
+7601 352
+29851 352
+22133 352
+29659 352
+49011 351
+45218 351
+18004 350
+40720 349
+37467 346
+42783 345
+32398 343
+49960 342
+35306 342
+34345 341
+27007 341
+12404 340
+42543 339
+42638 338
+49167 335
+31208 334
+45784 333
+33296 333
+20512 332
+32742 332
+18356 331
+44587 331
+30165 329
+41433 327
+40364 327
+9049 326
+2918 326
+17683 325
+17681 325
+24077 324
+25611 324
+6166 323
+39681 323
+49855 323
+44256 322
+39333 322
+49073 320
+31937 320
+37509 317
+43077 316
+15842 316
+33384 315
+25497 315
+30965 315
+48497 315
+16193 315
+47987 313
+22725 313
+46140 313
+2804 312
+32524 311
+20532 310
+47939 310
+28542 310
+39192 309
+10125 308
+17773 305
+47423 305
+46268 301
+24363 301
+20513 300
+26498 300
+50179 299
+36469 298
+34543 297
+20115 297
+28696 297
+36786 296
+26712 294
+31090 293
+42062 290
+23131 287
+38326 287
+34373 287
+26095 286
+25626 286
+46424 285
+15506 285
+9000 285
+31936 283
+30800 282
+49058 281
+27551 280
+44648 276
+12240 276
+18803 275
+7589 274
+41641 274
+40109 274
+1896 273
+47000 273
+32518 272
+45379 272
+45635 272
+33529 272
+13765 270
+40420 270
+12845 269
+16201 268
+33465 268
+24001 267
+49249 267
+41853 267
+4010 266
+8418 265
+49813 265
+25581 264
+43481 263
+46788 262
+41629 262
+24710 261
+35144 261
+39907 260
+47021 260
+20801 260
+25128 259
+30812 259
+48304 258
+18115 257
+48404 256
+33829 256
+23846 255
+6112 254
+5997 254
+6987 253
+7677 253
+26503 253
+44161 253
+46491 252
+48231 251
+11910 251
+37669 250
+35702 249
+49464 248
+41323 248
+37233 247
+26998 246
+36828 246
+24806 244
+32207 243
+30762 242
+42637 242
+16103 242
+172 242
+49472 242
+25084 241
+7105 239
+19791 238
+41974 237
+45335 237
+31052 236
+37426 236
+13177 235
+38894 235
+37495 234
+25549 234
+25795 234
+43801 234
+43282 233
+13296 231
+38149 231
+24466 230
+28264 230
+34525 230
+48448 228
+36581 227
+36521 227
+49082 225
+48318 225
+8755 225
+36792 223
+43353 223
+19039 223
+43302 222
+20677 220
+47307 220
+32310 220
+31732 220
+19021 219
+17401 219
+36922 219
+12717 218
+33426 217
+46904 217
+33309 216
+33795 215
+29993 214
+4242 214
+32546 212
+35818 208
+47175 208
+11896 208
+34934 208
+48908 208
+42367 208
+43769 207
+50159 207
+35514 205
+45495 204
+43213 203
+2590 203
+4895 203
+27542 202
+39996 201
+35063 201
+47569 200
+40283 198
+37817 198
+1543 197
+37757 197
+14592 196
+27733 195
+39394 195
+13979 194
+12662 193
+2887 193
+42287 191
+22241 191
+32891 189
+11548 189
+26642 188
+11480 187
+15755 186
+10060 186
+15913 186
+45144 185
+42026 185
+19227 184
+33761 184
+25207 182
+38122 182
+44916 181
+29113 181
+22831 179
+13945 179
+6681 178
+34901 178
+22322 178
+50108 177
+29164 176
+23544 174
+46545 174
+15351 173
+22496 173
+39742 173
+11606 173
+36796 173
+26229 171
+39115 171
+35572 170
+16959 169
+30716 168
+37642 167
+44651 167
+27852 167
+40073 167
+37913 167
+45975 166
+46968 166
+38362 165
+29446 165
+7359 164
+17473 163
+40012 163
+47258 163
+46677 162
+49390 162
+45150 162
+24022 161
+24898 160
+32239 160
+45434 159
+27509 159
+39703 159
+3587 157
+11974 157
+36726 156
+15685 155
+38653 154
+24618 154
+17532 153
+24973 151
+41386 151
+48944 151
+41678 151
+7804 149
+33991 148
+21959 146
+14531 145
+41441 144
+37412 144
+41658 143
+6399 142
+2941 142
+33893 141
+41849 140
+11919 140
+44912 139
+46541 138
+32088 137
+35604 137
+48610 137
+50033 137
+19476 137
+25926 136
+23330 136
+49843 136
+50001 135
+48999 134
+48458 134
+36301 133
+16646 132
+47271 132
+40111 132
+23329 132
+41313 131
+23728 130
+33207 128
+47542 127
+37082 127
+10269 126
+39434 126
+48204 125
+43667 124
+6533 122
+49273 122
+40361 122
+49324 121
+25589 121
+8438 120
+2432 120
+17761 120
+10052 120
+37435 119
+47981 118
+12869 116
+34607 115
+41006 113
+47596 113
+45123 112
+43246 110
+40719 110
+26700 110
+41945 109
+152 109
+16098 109
+6598 109
+47253 107
+46249 107
+29021 106
+45563 106
+47147 105
+45539 105
+35343 105
+43241 105
+50216 105
+43734 105
+35922 104
+29226 104
+44431 104
+221 104
+25970 104
+27293 103
+1841 103
+14468 103
+42311 103
+47559 102
+49643 102
+35069 101
+8115 101
+44233 100
+16303 100
+47282 99
+40629 99
+39008 99
+32509 98
+32917 98
+42396 98
+176 97
+36169 96
+48753 96
+10253 94
+40516 94
+3523 93
+48185 93
+49086 93
+44104 92
+31204 92
+43839 91
+27924 90
+17933 90
+49476 89
+36596 89
+46745 89
+44167 87
+30531 87
+47486 87
+40549 86
+35992 85
+34832 85
+33468 85
+46996 84
+40493 84
+43242 84
+7134 83
+11585 80
+22640 80
+37811 80
+42869 80
+39860 79
+25719 78
+23984 77
+11273 76
+41481 76
+20554 75
+13198 75
+28725 74
+42210 74
+155 73
+48727 73
+35318 72
+41365 71
+32511 70
+40345 70
+31708 69
+44872 69
+41832 69
+9968 69
+28670 69
+30478 67
+10298 67
+11885 66
+11737 66
+17787 66
+41939 65
+41868 65
+39467 65
+41538 64
+44546 63
+20503 62
+47682 61
+4060 61
+16068 60
+48457 59
+36473 59
+45449 59
+50113 58
+15040 58
+46110 58
+47614 58
+50116 57
+43313 57
+18945 57
+29795 57
+37858 56
+26534 55
+18433 55
+39886 55
+33490 54
+40415 54
+36704 53
+44785 53
+43899 51
+32865 51
+47530 51
+39364 50
+42535 49
+25887 49
+37981 49
+49527 49
+33937 48
+36940 48
+22315 47
+48874 47
+4204 47
+23596 47
+31478 47
+42035 46
+9286 46
+38016 46
+11689 46
+42785 45
+46222 45
+33994 45
+47794 45
+43897 45
+42877 45
+48953 44
+44686 44
+37545 44
+45435 43
+34386 42
+7260 42
+13171 42
+23926 41
+24307 40
+31820 40
+44392 39
+48600 38
+41230 38
+38390 38
+13150 37
+154 37
+38243 36
+29447 36
+44326 36
+29646 35
+19073 35
+15041 35
+45433 34
+42382 32
+38377 32
+47540 32
+47936 32
+47432 31
+49149 31
+30432 31
+39280 30
+48366 30
+12943 29
+37922 29
+14695 29
+13426 29
+16782 28
+49997 28
+36926 28
+23613 28
+22675 28
+35098 28
+6408 28
+33153 28
+31739 28
+43518 27
+7782 27
+29752 27
+23363 26
+45823 26
+43053 26
+40236 24
+37787 24
+49029 24
+33023 24
+20804 24
+34103 24
+27013 23
+37991 23
+42943 23
+44033 23
+41380 22
+25698 22
+42750 22
+25362 22
+44555 22
+22039 20
+42983 20
+45762 20
+27006 20
+12677 19
+48219 19
+43394 19
+37662 19
+17553 18
+45422 18
+30439 18
+14341 18
+11592 17
+33929 17
+17629 17
+42889 17
+153 17
+14223 17
+43010 17
+32843 16
+48193 16
+35793 16
+36475 16
+31161 15
+16822 15
+47182 15
+44112 14
+34604 14
+31881 14
+46402 13
+36937 12
+28500 12
+24731 12
+27584 12
+43796 11
+36481 11
+40703 11
+19049 10
+44444 10
+4690 10
+42470 9
+41977 9
+36917 9
+46948 9
+10658 9
+38250 9
+43298 8
+45953 8
+36929 8
+42234 7
+38160 7
+47490 7
+40235 7
+5808 7
+45786 7
+36490 6
+5367 6
+27534 6
+21807 5
+36886 5
+39693 5
+30684 5
+37226 5
+15243 5
+34633 5
+22997 5
+25658 4
+45321 4
+8980 4
+47648 4
+34206 4
+43569 4
+36862 3
+49778 3
+45392 3
+42066 3
+36130 3
+46939 3
+6438 3
+34842 2
+48527 2
+38370 2
+34473 2
+40278 2
+20174 2
+5815 1
+9364 1
+39142 1
+47703 1
+49074 1
+31536 1
+14827 1
+23090 1
+43735 1
+24847 1
+40219 1
+32437 1
+31727 1
+124 0
+125 0
+173 0
+174 0
+177 0
+178 0
+179 0
+180 0
+181 0
+182 0
+183 0
+184 0
+185 0
+186 0
+187 0
+188 0
+189 0
+190 0
+191 0
+192 0
+193 0
+194 0
+195 0
+196 0
+197 0
+198 0
+199 0
+200 0
+201 0
+202 0
+203 0
+204 0
+205 0
+206 0
+207 0
+208 0
+209 0
+210 0
+211 0
+212 0
+213 0
+214 0
+215 0
+216 0
+217 0
+218 0
+219 0
+628 0
+1849 0
+4603 0
+5624 0
+8828 0
+11504 0
+12781 0
+17811 0
+17900 0
+18472 0
+22686 0
+22757 0
+23282 0
+23614 0
+23785 0
+24293 0
+24934 0
+25193 0
+25502 0
+25992 0
+28666 0
+29342 0
+29372 0
+30202 0
+30208 0
+30209 0
+30210 0
+30211 0
+30212 0
+30213 0
+30897 0
+30898 0
+30899 0
+30905 0
+30906 0
+31032 0
+31538 0
+31573 0
+31576 0
+31666 0
+31765 0
+31783 0
+31886 0
+31957 0
+32047 0
+32406 0
+33434 0
+33454 0
+33477 0
+33813 0
+34027 0
+34448 0
+34504 0
+34516 0
+35207 0
+35496 0
+35579 0
+36173 0
+36174 0
+36935 0
+36938 0
+37444 0
+37574 0
+37579 0
+37631 0
+37842 0
+38214 0
+39165 0
+39172 0
+39177 0
+39253 0
+39374 0
+39446 0
+39655 0
+39714 0
+39749 0
+39752 0
+39753 0
+39755 0
+39756 0
+39757 0
+39803 0
+39811 0
+39820 0
+39821 0
+39906 0
+40240 0
+40241 0
+40242 0
+41297 0
+41383 0
+41551 0
+42089 0
+42090 0
+42202 0
+42424 0
+42496 0
+42586 0
+42728 0
+43038 0
+43065 0
+43177 0
+43361 0
+43453 0
+44320 0
+45003 0
+45199 0
+45544 0
+45545 0
+45706 0
+46600 0
+47198 0
+47571 0
+47654 0
+47934 0
+48069 0
+48396 0
+49731 0
+49781 0
+50009 0
+50256 0
+madeupword0000 0
+madeupword0001 0
+madeupword0002 0
diff --git a/utils/BPE/encoder.json b/utils/BPE/encoder.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f1d9aaca301414e7f6c9396df506798ff4eb9a6
--- /dev/null
+++ b/utils/BPE/encoder.json
@@ -0,0 +1 @@
+{"!": 0, "\"": 1, "#": 2, "$": 3, "%": 4, "&": 5, "'": 6, "(": 7, ")": 8, "*": 9, "+": 10, ",": 11, "-": 12, ".": 13, "/": 14, "0": 15, "1": 16, "2": 17, "3": 18, "4": 19, "5": 20, "6": 21, "7": 22, "8": 23, "9": 24, ":": 25, ";": 26, "<": 27, "=": 28, ">": 29, "?": 30, "@": 31, "A": 32, "B": 33, "C": 34, "D": 35, "E": 36, "F": 37, "G": 38, "H": 39, "I": 40, "J": 41, "K": 42, "L": 43, "M": 44, "N": 45, "O": 46, "P": 47, "Q": 48, "R": 49, "S": 50, "T": 51, "U": 52, "V": 53, "W": 54, "X": 55, "Y": 56, "Z": 57, "[": 58, "\\": 59, "]": 60, "^": 61, "_": 62, "`": 63, "a": 64, "b": 65, "c": 66, "d": 67, "e": 68, "f": 69, "g": 70, "h": 71, "i": 72, "j": 73, "k": 74, "l": 75, "m": 76, "n": 77, "o": 78, "p": 79, "q": 80, "r": 81, "s": 82, "t": 83, "u": 84, "v": 85, "w": 86, "x": 87, "y": 88, "z": 89, "{": 90, "|": 91, "}": 92, "~": 93, "\u00a1": 94, "\u00a2": 95, "\u00a3": 96, "\u00a4": 97, "\u00a5": 98, "\u00a6": 99, "\u00a7": 100, "\u00a8": 101, "\u00a9": 102, "\u00aa": 103, "\u00ab": 104, "\u00ac": 105, "\u00ae": 106, "\u00af": 107, "\u00b0": 108, "\u00b1": 109, "\u00b2": 110, "\u00b3": 111, "\u00b4": 112, "\u00b5": 113, "\u00b6": 114, "\u00b7": 115, "\u00b8": 116, "\u00b9": 117, "\u00ba": 118, "\u00bb": 119, "\u00bc": 120, "\u00bd": 121, "\u00be": 122, "\u00bf": 123, "\u00c0": 124, "\u00c1": 125, "\u00c2": 126, "\u00c3": 127, "\u00c4": 128, "\u00c5": 129, "\u00c6": 130, "\u00c7": 131, "\u00c8": 132, "\u00c9": 133, "\u00ca": 134, "\u00cb": 135, "\u00cc": 136, "\u00cd": 137, "\u00ce": 138, "\u00cf": 139, "\u00d0": 140, "\u00d1": 141, "\u00d2": 142, "\u00d3": 143, "\u00d4": 144, "\u00d5": 145, "\u00d6": 146, "\u00d7": 147, "\u00d8": 148, "\u00d9": 149, "\u00da": 150, "\u00db": 151, "\u00dc": 152, "\u00dd": 153, "\u00de": 154, "\u00df": 155, "\u00e0": 156, "\u00e1": 157, "\u00e2": 158, "\u00e3": 159, "\u00e4": 160, "\u00e5": 161, "\u00e6": 162, "\u00e7": 163, "\u00e8": 164, "\u00e9": 165, "\u00ea": 166, "\u00eb": 167, "\u00ec": 168, "\u00ed": 169, "\u00ee": 170, "\u00ef": 171, "\u00f0": 172, "\u00f1": 173, "\u00f2": 174, "\u00f3": 175, "\u00f4": 176, "\u00f5": 177, "\u00f6": 178, "\u00f7": 179, "\u00f8": 180, "\u00f9": 181, "\u00fa": 182, "\u00fb": 183, "\u00fc": 184, "\u00fd": 185, "\u00fe": 186, "\u00ff": 187, "\u0100": 188, "\u0101": 189, "\u0102": 190, "\u0103": 191, "\u0104": 192, "\u0105": 193, "\u0106": 194, "\u0107": 195, "\u0108": 196, "\u0109": 197, "\u010a": 198, "\u010b": 199, "\u010c": 200, "\u010d": 201, "\u010e": 202, "\u010f": 203, "\u0110": 204, "\u0111": 205, "\u0112": 206, "\u0113": 207, "\u0114": 208, "\u0115": 209, "\u0116": 210, "\u0117": 211, "\u0118": 212, "\u0119": 213, "\u011a": 214, "\u011b": 215, "\u011c": 216, "\u011d": 217, "\u011e": 218, "\u011f": 219, "\u0120": 220, "\u0121": 221, "\u0122": 222, "\u0123": 223, "\u0124": 224, "\u0125": 225, "\u0126": 226, "\u0127": 227, "\u0128": 228, "\u0129": 229, "\u012a": 230, "\u012b": 231, "\u012c": 232, "\u012d": 233, "\u012e": 234, "\u012f": 235, "\u0130": 236, "\u0131": 237, "\u0132": 238, "\u0133": 239, "\u0134": 240, "\u0135": 241, "\u0136": 242, "\u0137": 243, "\u0138": 244, "\u0139": 245, "\u013a": 246, "\u013b": 247, "\u013c": 248, "\u013d": 249, "\u013e": 250, "\u013f": 251, "\u0140": 252, "\u0141": 253, "\u0142": 254, "\u0143": 255, "\u0120t": 256, "\u0120a": 257, "he": 258, "in": 259, "re": 260, "on": 261, "\u0120the": 262, "er": 263, "\u0120s": 264, "at": 265, "\u0120w": 266, "\u0120o": 267, "en": 268, "\u0120c": 269, "it": 270, "is": 271, "an": 272, "or": 273, "es": 274, "\u0120b": 275, "ed": 276, "\u0120f": 277, "ing": 278, "\u0120p": 279, "ou": 280, "\u0120an": 281, "al": 282, "ar": 283, "\u0120to": 284, "\u0120m": 285, "\u0120of": 286, "\u0120in": 287, "\u0120d": 288, "\u0120h": 289, "\u0120and": 290, "ic": 291, "as": 292, "le": 293, "\u0120th": 294, "ion": 295, "om": 296, "ll": 297, "ent": 298, "\u0120n": 299, "\u0120l": 300, "st": 301, "\u0120re": 302, "ve": 303, "\u0120e": 304, "ro": 305, "ly": 306, "\u0120be": 307, "\u0120g": 308, "\u0120T": 309, "ct": 310, "\u0120S": 311, "id": 312, "ot": 313, "\u0120I": 314, "ut": 315, "et": 316, "\u0120A": 317, "\u0120is": 318, "\u0120on": 319, "im": 320, "am": 321, "ow": 322, "ay": 323, "ad": 324, "se": 325, "\u0120that": 326, "\u0120C": 327, "ig": 328, "\u0120for": 329, "ac": 330, "\u0120y": 331, "ver": 332, "ur": 333, "\u0120u": 334, "ld": 335, "\u0120st": 336, "\u0120M": 337, "'s": 338, "\u0120he": 339, "\u0120it": 340, "ation": 341, "ith": 342, "ir": 343, "ce": 344, "\u0120you": 345, "il": 346, "\u0120B": 347, "\u0120wh": 348, "ol": 349, "\u0120P": 350, "\u0120with": 351, "\u01201": 352, "ter": 353, "ch": 354, "\u0120as": 355, "\u0120we": 356, "\u0120(": 357, "nd": 358, "ill": 359, "\u0120D": 360, "if": 361, "\u01202": 362, "ag": 363, "ers": 364, "ke": 365, "\u0120\"": 366, "\u0120H": 367, "em": 368, "\u0120con": 369, "\u0120W": 370, "\u0120R": 371, "her": 372, "\u0120was": 373, "\u0120r": 374, "od": 375, "\u0120F": 376, "ul": 377, "ate": 378, "\u0120at": 379, "ri": 380, "pp": 381, "ore": 382, "\u0120The": 383, "\u0120se": 384, "us": 385, "\u0120pro": 386, "\u0120ha": 387, "um": 388, "\u0120are": 389, "\u0120de": 390, "ain": 391, "and": 392, "\u0120or": 393, "igh": 394, "est": 395, "ist": 396, "ab": 397, "rom": 398, "\u0120N": 399, "th": 400, "\u0120com": 401, "\u0120G": 402, "un": 403, "op": 404, "00": 405, "\u0120L": 406, "\u0120not": 407, "ess": 408, "\u0120ex": 409, "\u0120v": 410, "res": 411, "\u0120E": 412, "ew": 413, "ity": 414, "ant": 415, "\u0120by": 416, "el": 417, "os": 418, "ort": 419, "oc": 420, "qu": 421, "\u0120from": 422, "\u0120have": 423, "\u0120su": 424, "ive": 425, "ould": 426, "\u0120sh": 427, "\u0120this": 428, "nt": 429, "ra": 430, "pe": 431, "ight": 432, "art": 433, "ment": 434, "\u0120al": 435, "ust": 436, "end": 437, "--": 438, "all": 439, "\u0120O": 440, "ack": 441, "\u0120ch": 442, "\u0120le": 443, "ies": 444, "red": 445, "ard": 446, "\u00e2\u0122": 447, "out": 448, "\u0120J": 449, "\u0120ab": 450, "ear": 451, "iv": 452, "ally": 453, "our": 454, "ost": 455, "gh": 456, "pt": 457, "\u0120pl": 458, "ast": 459, "\u0120can": 460, "ak": 461, "ome": 462, "ud": 463, "The": 464, "\u0120his": 465, "\u0120do": 466, "\u0120go": 467, "\u0120has": 468, "ge": 469, "'t": 470, "\u0120U": 471, "rou": 472, "\u0120sa": 473, "\u0120j": 474, "\u0120but": 475, "\u0120wor": 476, "\u0120all": 477, "ect": 478, "\u0120k": 479, "ame": 480, "\u0120will": 481, "ok": 482, "\u0120whe": 483, "\u0120they": 484, "ide": 485, "01": 486, "ff": 487, "ich": 488, "pl": 489, "ther": 490, "\u0120tr": 491, "..": 492, "\u0120int": 493, "ie": 494, "ure": 495, "age": 496, "\u0120ne": 497, "ial": 498, "ap": 499, "ine": 500, "ice": 501, "\u0120me": 502, "\u0120out": 503, "ans": 504, "one": 505, "ong": 506, "ions": 507, "\u0120who": 508, "\u0120K": 509, "\u0120up": 510, "\u0120their": 511, "\u0120ad": 512, "\u01203": 513, "\u0120us": 514, "ated": 515, "ous": 516, "\u0120more": 517, "ue": 518, "og": 519, "\u0120St": 520, "ind": 521, "ike": 522, "\u0120so": 523, "ime": 524, "per": 525, ".\"": 526, "ber": 527, "iz": 528, "act": 529, "\u0120one": 530, "\u0120said": 531, "\u0120-": 532, "are": 533, "\u0120your": 534, "cc": 535, "\u0120Th": 536, "\u0120cl": 537, "ep": 538, "ake": 539, "able": 540, "ip": 541, "\u0120cont": 542, "\u0120which": 543, "ia": 544, "\u0120im": 545, "\u0120about": 546, "\u0120were": 547, "very": 548, "ub": 549, "\u0120had": 550, "\u0120en": 551, "\u0120comp": 552, ",\"": 553, "\u0120In": 554, "\u0120un": 555, "\u0120ag": 556, "ire": 557, "ace": 558, "au": 559, "ary": 560, "\u0120would": 561, "ass": 562, "ry": 563, "\u0120\u00e2\u0122": 564, "cl": 565, "ook": 566, "ere": 567, "so": 568, "\u0120V": 569, "ign": 570, "ib": 571, "\u0120off": 572, "\u0120te": 573, "ven": 574, "\u0120Y": 575, "ile": 576, "ose": 577, "ite": 578, "orm": 579, "\u0120201": 580, "\u0120res": 581, "\u0120man": 582, "\u0120per": 583, "\u0120other": 584, "ord": 585, "ult": 586, "\u0120been": 587, "\u0120like": 588, "ase": 589, "ance": 590, "ks": 591, "ays": 592, "own": 593, "ence": 594, "\u0120dis": 595, "ction": 596, "\u0120any": 597, "\u0120app": 598, "\u0120sp": 599, "int": 600, "ress": 601, "ations": 602, "ail": 603, "\u01204": 604, "ical": 605, "\u0120them": 606, "\u0120her": 607, "ount": 608, "\u0120Ch": 609, "\u0120ar": 610, "\u0120if": 611, "\u0120there": 612, "\u0120pe": 613, "\u0120year": 614, "av": 615, "\u0120my": 616, "\u0120some": 617, "\u0120when": 618, "ough": 619, "ach": 620, "\u0120than": 621, "ru": 622, "ond": 623, "ick": 624, "\u0120over": 625, "vel": 626, "\u0120qu": 627, "\u010a\u010a": 628, "\u0120sc": 629, "reat": 630, "ree": 631, "\u0120It": 632, "ound": 633, "port": 634, "\u0120also": 635, "\u0120part": 636, "fter": 637, "\u0120kn": 638, "\u0120bec": 639, "\u0120time": 640, "ens": 641, "\u01205": 642, "ople": 643, "\u0120what": 644, "\u0120no": 645, "du": 646, "mer": 647, "ang": 648, "\u0120new": 649, "----": 650, "\u0120get": 651, "ory": 652, "ition": 653, "ings": 654, "\u0120just": 655, "\u0120into": 656, "\u01200": 657, "ents": 658, "ove": 659, "te": 660, "\u0120people": 661, "\u0120pre": 662, "\u0120its": 663, "\u0120rec": 664, "\u0120tw": 665, "ian": 666, "irst": 667, "ark": 668, "ors": 669, "\u0120work": 670, "ade": 671, "ob": 672, "\u0120she": 673, "\u0120our": 674, "wn": 675, "ink": 676, "lic": 677, "\u012019": 678, "\u0120He": 679, "ish": 680, "nder": 681, "ause": 682, "\u0120him": 683, "ons": 684, "\u0120[": 685, "\u0120ro": 686, "form": 687, "ild": 688, "ates": 689, "vers": 690, "\u0120only": 691, "oll": 692, "\u0120spe": 693, "ck": 694, "ell": 695, "amp": 696, "\u0120acc": 697, "\u0120bl": 698, "ious": 699, "urn": 700, "ft": 701, "ood": 702, "\u0120how": 703, "hed": 704, "\u0120'": 705, "\u0120after": 706, "aw": 707, "\u0120att": 708, "ov": 709, "ne": 710, "\u0120play": 711, "erv": 712, "ict": 713, "\u0120could": 714, "itt": 715, "\u0120am": 716, "\u0120first": 717, "\u01206": 718, "\u0120act": 719, "\u0120$": 720, "ec": 721, "hing": 722, "ual": 723, "ull": 724, "\u0120comm": 725, "oy": 726, "old": 727, "ces": 728, "ater": 729, "\u0120fe": 730, "\u0120bet": 731, "we": 732, "iff": 733, "\u0120two": 734, "ock": 735, "\u0120back": 736, ").": 737, "ident": 738, "\u0120under": 739, "rough": 740, "sel": 741, "xt": 742, "\u0120may": 743, "round": 744, "\u0120po": 745, "ph": 746, "iss": 747, "\u0120des": 748, "\u0120most": 749, "\u0120did": 750, "\u0120add": 751, "ject": 752, "\u0120inc": 753, "fore": 754, "\u0120pol": 755, "ont": 756, "\u0120again": 757, "clud": 758, "tern": 759, "\u0120know": 760, "\u0120need": 761, "\u0120cons": 762, "\u0120co": 763, "\u0120.": 764, "\u0120want": 765, "\u0120see": 766, "\u01207": 767, "ning": 768, "iew": 769, "\u0120This": 770, "ced": 771, "\u0120even": 772, "\u0120ind": 773, "ty": 774, "\u0120We": 775, "ath": 776, "\u0120these": 777, "\u0120pr": 778, "\u0120use": 779, "\u0120because": 780, "\u0120fl": 781, "ng": 782, "\u0120now": 783, "\u0120\u00e2\u0122\u0135": 784, "com": 785, "ise": 786, "\u0120make": 787, "\u0120then": 788, "ower": 789, "\u0120every": 790, "\u0120Un": 791, "\u0120sec": 792, "oss": 793, "uch": 794, "\u0120em": 795, "\u0120=": 796, "\u0120Re": 797, "ied": 798, "rit": 799, "\u0120inv": 800, "lect": 801, "\u0120supp": 802, "ating": 803, "\u0120look": 804, "man": 805, "pect": 806, "\u01208": 807, "row": 808, "\u0120bu": 809, "\u0120where": 810, "ific": 811, "\u0120years": 812, "ily": 813, "\u0120diff": 814, "\u0120should": 815, "\u0120rem": 816, "Th": 817, "In": 818, "\u0120ev": 819, "day": 820, "'re": 821, "rib": 822, "\u0120rel": 823, "ss": 824, "\u0120def": 825, "\u0120right": 826, "\u0120sy": 827, "),": 828, "les": 829, "000": 830, "hen": 831, "\u0120through": 832, "\u0120Tr": 833, "__": 834, "\u0120way": 835, "\u0120don": 836, "\u0120,": 837, "\u012010": 838, "ased": 839, "\u0120ass": 840, "ublic": 841, "\u0120reg": 842, "\u0120And": 843, "ix": 844, "\u0120very": 845, "\u0120includ": 846, "other": 847, "\u0120imp": 848, "oth": 849, "\u0120sub": 850, "\u0120\u00e2\u0122\u0136": 851, "\u0120being": 852, "arg": 853, "\u0120Wh": 854, "==": 855, "ible": 856, "\u0120does": 857, "ange": 858, "ram": 859, "\u01209": 860, "ert": 861, "ps": 862, "ited": 863, "ational": 864, "\u0120br": 865, "\u0120down": 866, "\u0120many": 867, "aking": 868, "\u0120call": 869, "uring": 870, "ities": 871, "\u0120ph": 872, "ics": 873, "als": 874, "\u0120dec": 875, "ative": 876, "ener": 877, "\u0120before": 878, "ility": 879, "\u0120well": 880, "\u0120much": 881, "erson": 882, "\u0120those": 883, "\u0120such": 884, "\u0120ke": 885, "\u0120end": 886, "\u0120But": 887, "ason": 888, "ting": 889, "\u0120long": 890, "ef": 891, "\u0120think": 892, "ys": 893, "\u0120bel": 894, "\u0120sm": 895, "its": 896, "ax": 897, "\u0120own": 898, "\u0120prov": 899, "\u0120set": 900, "ife": 901, "ments": 902, "ble": 903, "ward": 904, "\u0120show": 905, "\u0120pres": 906, "ms": 907, "omet": 908, "\u0120ob": 909, "\u0120say": 910, "\u0120Sh": 911, "ts": 912, "ful": 913, "\u0120eff": 914, "\u0120gu": 915, "\u0120inst": 916, "und": 917, "ren": 918, "cess": 919, "\u0120ent": 920, "\u0120You": 921, "\u0120good": 922, "\u0120start": 923, "ince": 924, "\u0120made": 925, "tt": 926, "stem": 927, "olog": 928, "up": 929, "\u0120|": 930, "ump": 931, "\u0120hel": 932, "vern": 933, "ular": 934, "ually": 935, "\u0120ac": 936, "\u0120mon": 937, "\u0120last": 938, "\u0120200": 939, "10": 940, "\u0120stud": 941, "ures": 942, "\u0120Ar": 943, "self": 944, "ars": 945, "meric": 946, "ues": 947, "cy": 948, "\u0120min": 949, "ollow": 950, "\u0120col": 951, "io": 952, "\u0120mod": 953, "\u0120count": 954, "\u0120Com": 955, "hes": 956, "\u0120fin": 957, "air": 958, "ier": 959, "\u00e2\u0122\u0136": 960, "read": 961, "ank": 962, "atch": 963, "ever": 964, "\u0120str": 965, "\u0120point": 966, "ork": 967, "\u0120New": 968, "\u0120sur": 969, "ool": 970, "alk": 971, "ement": 972, "\u0120used": 973, "ract": 974, "ween": 975, "\u0120same": 976, "oun": 977, "\u0120Al": 978, "ci": 979, "\u0120differe": 980, "\u0120while": 981, "--------": 982, "\u0120game": 983, "cept": 984, "\u0120sim": 985, "...": 986, "\u0120inter": 987, "ek": 988, "\u0120report": 989, "\u0120produ": 990, "\u0120still": 991, "led": 992, "ah": 993, "\u0120here": 994, "\u0120world": 995, "\u0120though": 996, "\u0120num": 997, "arch": 998, "imes": 999, "ale": 1000, "\u0120Se": 1001, "\u0120If": 1002, "//": 1003, "\u0120Le": 1004, "\u0120ret": 1005, "\u0120ref": 1006, "\u0120trans": 1007, "ner": 1008, "ution": 1009, "ters": 1010, "\u0120take": 1011, "\u0120Cl": 1012, "\u0120conf": 1013, "way": 1014, "ave": 1015, "\u0120going": 1016, "\u0120sl": 1017, "ug": 1018, "\u0120Americ": 1019, "\u0120spec": 1020, "\u0120hand": 1021, "\u0120between": 1022, "ists": 1023, "\u0120De": 1024, "oot": 1025, "It": 1026, "\u0120ear": 1027, "\u0120against": 1028, "\u0120high": 1029, "gan": 1030, "az": 1031, "ather": 1032, "\u0120exp": 1033, "\u0120op": 1034, "\u0120ins": 1035, "\u0120gr": 1036, "\u0120help": 1037, "\u0120requ": 1038, "ets": 1039, "ins": 1040, "\u0120Pro": 1041, "ism": 1042, "\u0120found": 1043, "land": 1044, "ata": 1045, "uss": 1046, "ames": 1047, "\u0120person": 1048, "\u0120great": 1049, "pr": 1050, "\u0120sign": 1051, "\u0120An": 1052, "'ve": 1053, "\u0120somet": 1054, "\u0120ser": 1055, "hip": 1056, "\u0120run": 1057, "\u0120:": 1058, "\u0120ter": 1059, "irect": 1060, "\u0120follow": 1061, "\u0120det": 1062, "ices": 1063, "\u0120find": 1064, "12": 1065, "\u0120mem": 1066, "\u0120cr": 1067, "ered": 1068, "ex": 1069, "\u0120ext": 1070, "uth": 1071, "ense": 1072, "co": 1073, "\u0120team": 1074, "ving": 1075, "ouse": 1076, "ash": 1077, "att": 1078, "ved": 1079, "\u0120system": 1080, "\u0120As": 1081, "der": 1082, "ives": 1083, "min": 1084, "\u0120lead": 1085, "\u0120Bl": 1086, "cent": 1087, "\u0120around": 1088, "\u0120govern": 1089, "\u0120cur": 1090, "velop": 1091, "any": 1092, "\u0120cour": 1093, "alth": 1094, "ages": 1095, "ize": 1096, "\u0120car": 1097, "ode": 1098, "\u0120law": 1099, "\u0120read": 1100, "'m": 1101, "con": 1102, "\u0120real": 1103, "\u0120support": 1104, "\u012012": 1105, "....": 1106, "\u0120really": 1107, "ness": 1108, "\u0120fact": 1109, "\u0120day": 1110, "\u0120both": 1111, "ying": 1112, "\u0120serv": 1113, "\u0120For": 1114, "\u0120three": 1115, "\u0120wom": 1116, "\u0120med": 1117, "ody": 1118, "\u0120They": 1119, "50": 1120, "\u0120exper": 1121, "ton": 1122, "\u0120each": 1123, "akes": 1124, "\u0120che": 1125, "\u0120cre": 1126, "ines": 1127, "\u0120rep": 1128, "19": 1129, "gg": 1130, "illion": 1131, "\u0120grou": 1132, "ute": 1133, "ik": 1134, "We": 1135, "get": 1136, "ER": 1137, "\u0120met": 1138, "\u0120says": 1139, "ox": 1140, "\u0120during": 1141, "ern": 1142, "ized": 1143, "ared": 1144, "\u0120fam": 1145, "ically": 1146, "\u0120happ": 1147, "\u0120Is": 1148, "\u0120char": 1149, "med": 1150, "vent": 1151, "\u0120gener": 1152, "ient": 1153, "ple": 1154, "iet": 1155, "rent": 1156, "11": 1157, "ves": 1158, "ption": 1159, "\u012020": 1160, "formation": 1161, "\u0120cor": 1162, "\u0120offic": 1163, "ield": 1164, "\u0120too": 1165, "ision": 1166, "\u0120inf": 1167, "\u0120Z": 1168, "the": 1169, "oad": 1170, "\u0120public": 1171, "\u0120prog": 1172, "ric": 1173, "**": 1174, "\u0120war": 1175, "\u0120power": 1176, "view": 1177, "\u0120few": 1178, "\u0120loc": 1179, "\u0120different": 1180, "\u0120state": 1181, "\u0120head": 1182, "'ll": 1183, "\u0120poss": 1184, "\u0120stat": 1185, "ret": 1186, "ants": 1187, "\u0120val": 1188, "\u0120iss": 1189, "\u0120cle": 1190, "ivers": 1191, "anc": 1192, "\u0120expl": 1193, "\u0120another": 1194, "\u0120Q": 1195, "\u0120av": 1196, "thing": 1197, "nce": 1198, "Wh": 1199, "\u0120child": 1200, "\u0120since": 1201, "ired": 1202, "less": 1203, "\u0120life": 1204, "\u0120develop": 1205, "ittle": 1206, "\u0120dep": 1207, "\u0120pass": 1208, "\u00e3\u0125": 1209, "\u0120turn": 1210, "orn": 1211, "This": 1212, "bers": 1213, "ross": 1214, "\u0120Ad": 1215, "\u0120fr": 1216, "\u0120resp": 1217, "\u0120second": 1218, "oh": 1219, "\u0120/": 1220, "\u0120disc": 1221, "\u0120&": 1222, "\u0120something": 1223, "\u0120comple": 1224, "\u0120ed": 1225, "\u0120fil": 1226, "\u0120month": 1227, "aj": 1228, "uc": 1229, "\u0120government": 1230, "\u0120without": 1231, "\u0120leg": 1232, "\u0120dist": 1233, "\u0120put": 1234, "\u0120quest": 1235, "ann": 1236, "\u0120prot": 1237, "20": 1238, "\u0120never": 1239, "ience": 1240, "\u0120level": 1241, "\u0120art": 1242, "\u0120things": 1243, "\u0120might": 1244, "\u0120effect": 1245, "\u0120contro": 1246, "\u0120cent": 1247, "\u012018": 1248, "\u0120allow": 1249, "\u0120belie": 1250, "chool": 1251, "ott": 1252, "\u0120incre": 1253, "\u0120feel": 1254, "\u0120result": 1255, "\u0120lot": 1256, "\u0120fun": 1257, "ote": 1258, "\u0120ty": 1259, "erest": 1260, "\u0120contin": 1261, "\u0120using": 1262, "\u0120big": 1263, "201": 1264, "\u0120ask": 1265, "\u0120best": 1266, "\u0120)": 1267, "IN": 1268, "\u0120opp": 1269, "30": 1270, "\u0120number": 1271, "iness": 1272, "St": 1273, "lease": 1274, "\u0120ca": 1275, "\u0120must": 1276, "\u0120direct": 1277, "\u0120gl": 1278, "\u0120<": 1279, "\u0120open": 1280, "\u0120post": 1281, "\u0120come": 1282, "\u0120seem": 1283, "ording": 1284, "\u0120week": 1285, "ately": 1286, "ital": 1287, "\u0120el": 1288, "riend": 1289, "\u0120far": 1290, "\u0120tra": 1291, "inal": 1292, "\u0120pri": 1293, "\u0120US": 1294, "\u0120place": 1295, "\u0120form": 1296, "\u0120told": 1297, "\":": 1298, "ains": 1299, "ature": 1300, "\u0120Trump": 1301, "\u0120stand": 1302, "\u0120#": 1303, "ider": 1304, "\u0120Fr": 1305, "\u0120next": 1306, "\u0120soc": 1307, "\u0120pur": 1308, "\u0120let": 1309, "\u0120little": 1310, "\u0120hum": 1311, "\u0120i": 1312, "ron": 1313, "15": 1314, "\u012015": 1315, "\u0120commun": 1316, "\u0120mark": 1317, "\u0120There": 1318, "\u0120wr": 1319, "\u0120That": 1320, "\u0120information": 1321, "ways": 1322, "\u0120bus": 1323, "app": 1324, "\u0120invest": 1325, "me": 1326, "\u0120hard": 1327, "ained": 1328, "ead": 1329, "\u0120import": 1330, "\u0120appro": 1331, "\u0120test": 1332, "\u0120tri": 1333, "\u0120rest": 1334, "osed": 1335, "\u0120full": 1336, "\u0120care": 1337, "\u0120Sp": 1338, "\u0120case": 1339, "ON": 1340, "\u0120sk": 1341, "\u0120less": 1342, "\u0120+": 1343, "\u0120partic": 1344, "\u0120Pl": 1345, "ably": 1346, "uck": 1347, "ished": 1348, "chn": 1349, "be": 1350, "\u0120list": 1351, "ator": 1352, "\u0120top": 1353, "\u0120adv": 1354, "\u0120Be": 1355, "ruct": 1356, "\u0120dem": 1357, "ration": 1358, "ling": 1359, "gy": 1360, "reen": 1361, "ger": 1362, "\u0120home": 1363, "\u0120left": 1364, "\u0120better": 1365, "\u0120data": 1366, "\u012011": 1367, "\u0120attack": 1368, "\u0120proble": 1369, "line": 1370, "ards": 1371, "\u0120beh": 1372, "ral": 1373, "\u0120How": 1374, "\u0120She": 1375, "arge": 1376, "\u0120--": 1377, "://": 1378, "\u0120bro": 1379, "\u0120Ph": 1380, "ats": 1381, "\u0120build": 1382, "ww": 1383, "ided": 1384, "aim": 1385, "ases": 1386, "ency": 1387, "\u0120main": 1388, "ined": 1389, "\u0120including": 1390, "\u0120{": 1391, "\u0120got": 1392, "\u0120interest": 1393, "\u0120keep": 1394, "\u0120X": 1395, "\u0120eas": 1396, "aining": 1397, "\u0120class": 1398, "\u00e2\u0122\u00a6": 1399, "\u0120No": 1400, "\u0120var": 1401, "\u0120small": 1402, "ample": 1403, "AT": 1404, "\u0120ide": 1405, "\u0120So": 1406, "\u0120rece": 1407, "\u0120polit": 1408, "\u0120mov": 1409, "\u0120plan": 1410, "\u0120percent": 1411, "iving": 1412, "\u0120camp": 1413, "\u0120pay": 1414, "14": 1415, "sc": 1416, "ised": 1417, "\u0120unt": 1418, "oney": 1419, "ploy": 1420, "====": 1421, "\u0120didn": 1422, "\u0120Ind": 1423, "els": 1424, "ertain": 1425, "\u0120pos": 1426, "____": 1427, "iver": 1428, "\u0120process": 1429, "\u0120program": 1430, "ified": 1431, "\u0120Rep": 1432, "16": 1433, "uro": 1434, "ology": 1435, "atter": 1436, "ina": 1437, "\u0120name": 1438, "\u0120All": 1439, "\u0120four": 1440, "\u0120return": 1441, "vious": 1442, "bs": 1443, "\u0120called": 1444, "\u0120move": 1445, "\u0120Sc": 1446, "ird": 1447, "\u0120group": 1448, "\u0120bre": 1449, "\u0120men": 1450, "\u0120cap": 1451, "ten": 1452, "ee": 1453, "\u0120dri": 1454, "leg": 1455, "here": 1456, "uthor": 1457, "\u0120pat": 1458, "\u0120current": 1459, "ides": 1460, "\u0120pop": 1461, "to": 1462, "ention": 1463, "\u0120always": 1464, "\u0120mil": 1465, "\u0120women": 1466, "\u012016": 1467, "\u0120old": 1468, "iven": 1469, "raph": 1470, "\u0120Or": 1471, "ror": 1472, "ently": 1473, "\u0120near": 1474, "\u0120Ex": 1475, "ream": 1476, "sh": 1477, "\u012014": 1478, "\u0120free": 1479, "ission": 1480, "stand": 1481, "\u0120Con": 1482, "ality": 1483, "used": 1484, "13": 1485, "\u0120design": 1486, "\u0120change": 1487, "\u0120chang": 1488, "\u0120bo": 1489, "\u0120vis": 1490, "ember": 1491, "\u0120book": 1492, "ready": 1493, "\u0120kill": 1494, "25": 1495, "pped": 1496, "\u0120away": 1497, "\u0120able": 1498, "\u0120country": 1499, "\u0120const": 1500, "arn": 1501, "\u0120order": 1502, "AR": 1503, "ior": 1504, "ium": 1505, "orth": 1506, "18": 1507, "ailable": 1508, "\u0120sw": 1509, "\u0120million": 1510, "\u012013": 1511, "atic": 1512, "ted": 1513, "\u0120Go": 1514, "\u0120oper": 1515, "eng": 1516, "\u0120thing": 1517, "ajor": 1518, "conom": 1519, "\u0120Comm": 1520, "\u0120why": 1521, "ured": 1522, "ural": 1523, "\u0120school": 1524, "by": 1525, "\u0120Mar": 1526, "\u0120aff": 1527, "\u0120days": 1528, "\u0120ann": 1529, "ush": 1530, "ane": 1531, "If": 1532, "eg": 1533, "\u0120prof": 1534, "\u0120health": 1535, "outh": 1536, "But": 1537, "ional": 1538, ".,": 1539, "\u0120sol": 1540, "\u0120already": 1541, "\u012030": 1542, "\u0120charact": 1543, "He": 1544, "\u0120friend": 1545, "ES": 1546, "ians": 1547, "icle": 1548, "'d": 1549, "\u0120On": 1550, "\u0120least": 1551, "\u0120prom": 1552, "\u0120dr": 1553, "\u0120hist": 1554, "ither": 1555, "\u0120est": 1556, "iqu": 1557, "17": 1558, "son": 1559, "\u0120tell": 1560, "\u0120talk": 1561, "ohn": 1562, "oint": 1563, "lection": 1564, "AN": 1565, "\u0120until": 1566, "augh": 1567, "\u0120later": 1568, "\u0120ve": 1569, "\u0120view": 1570, "ending": 1571, "ived": 1572, "\u0120word": 1573, "ware": 1574, "\u0120cost": 1575, "\u0120enough": 1576, "\u0120give": 1577, "\u0120United": 1578, "\u0120techn": 1579, "arent": 1580, "OR": 1581, "\u0120par": 1582, "\u0120Dr": 1583, "\u01202016": 1584, "rist": 1585, "ering": 1586, "\u0120\u00c2": 1587, "\u0120large": 1588, "side": 1589, "acy": 1590, "ccess": 1591, "\u0120win": 1592, "\u0120important": 1593, "\u0120199": 1594, "\u0120doesn": 1595, "\u012017": 1596, "\u0120business": 1597, "\u0120clear": 1598, "\u0120rese": 1599, "\",": 1600, "ury": 1601, "\u0120equ": 1602, "aster": 1603, "alf": 1604, "\u0120American": 1605, "nect": 1606, "\u0120expect": 1607, "iversity": 1608, "\u0120occ": 1609, "\u0120Fl": 1610, "\u0120kind": 1611, "\u0120mean": 1612, "\u0120past": 1613, "\u0120dev": 1614, "\u0120bas": 1615, "let": 1616, "raft": 1617, "\u0120organ": 1618, "\u0120del": 1619, "\u0120perform": 1620, "\u0120story": 1621, "\u0120season": 1622, "\u0120Col": 1623, "\u0120claim": 1624, "\u0120came": 1625, "\u0120within": 1626, "\u0120line": 1627, "\u0120project": 1628, "\u0120At": 1629, "\u0120control": 1630, "ended": 1631, "\u0120Sy": 1632, "\u0120air": 1633, "ization": 1634, "\u0120*": 1635, "ley": 1636, "\u0120money": 1637, "idd": 1638, "You": 1639, "for": 1640, "\u0120family": 1641, "\u0120making": 1642, "\u0120bit": 1643, "\u0120police": 1644, "\u0120happen": 1645, "\u0120vers": 1646, "ony": 1647, "uff": 1648, "\u0120When": 1649, "\u0120sit": 1650, "ideo": 1651, "lf": 1652, "ison": 1653, "\u0120sure": 1654, "gin": 1655, "\u0120appear": 1656, "\u0120light": 1657, "\u0120es": 1658, "of": 1659, "\u0120water": 1660, "\u0120times": 1661, "not": 1662, "\u0120grow": 1663, "\u0120company": 1664, "\u0120Te": 1665, "ows": 1666, "\u0120mar": 1667, "ource": 1668, "iol": 1669, "arm": 1670, "br": 1671, "\u0120example": 1672, "\u0120conc": 1673, "\u0120fore": 1674, "\u0120To": 1675, "pro": 1676, "EN": 1677, "ries": 1678, "\u012025": 1679, "\u0120Can": 1680, "ney": 1681, "\u0120actually": 1682, "\u0120ever": 1683, "urity": 1684, "aken": 1685, "aps": 1686, "\u0120tax": 1687, "\u0120major": 1688, "ama": 1689, "\u0120often": 1690, "eral": 1691, "\u0120human": 1692, "\u0120job": 1693, "ister": 1694, "\u0120available": 1695, "ocr": 1696, "enn": 1697, "aid": 1698, "ivid": 1699, "\u0120record": 1700, "?\"": 1701, "\u0120sing": 1702, "\u0120Am": 1703, "idence": 1704, "\u0120news": 1705, "ster": 1706, "\u0120econom": 1707, "\u0120following": 1708, "\u0120Br": 1709, "ising": 1710, "\u0120hour": 1711, "most": 1712, "ument": 1713, "\u0120sex": 1714, "\u0120desc": 1715, "\u0120become": 1716, "\u0120Ed": 1717, "\u0120took": 1718, "\u0120having": 1719, "\u0120product": 1720, "ault": 1721, "As": 1722, "aring": 1723, "\u0120means": 1724, "\u0120hop": 1725, "une": 1726, "\u0120cho": 1727, "\u0120certain": 1728, "\u0120non": 1729, "\u0120deal": 1730, "24": 1731, "lement": 1732, "oci": 1733, "ene": 1734, "\u0120side": 1735, "\u0120Pr": 1736, "\u0120May": 1737, "\u0120reason": 1738, "ued": 1739, "ched": 1740, "ulation": 1741, "\u0120elect": 1742, "\u0120official": 1743, "\u0120possible": 1744, "\u0120hold": 1745, "ands": 1746, "ots": 1747, "\u0120city": 1748, "ories": 1749, "\u0120sever": 1750, "\u0120children": 1751, "\u0120once": 1752, "\u0120activ": 1753, "ler": 1754, "\u0120night": 1755, "itions": 1756, "\u0120John": 1757, "ape": 1758, "play": 1759, "\u0120done": 1760, "\u0120lim": 1761, "\u0120working": 1762, "\u0120Pres": 1763, "orld": 1764, "eb": 1765, "\u0120Co": 1766, "\u0120body": 1767, "ails": 1768, "utes": 1769, "\u0120Mr": 1770, "\u0120whether": 1771, "\u0120author": 1772, "rop": 1773, "\u0120proper": 1774, "\u0120seen": 1775, ");": 1776, "\u0120fac": 1777, "\u0120Su": 1778, "\u0120cond": 1779, "iting": 1780, "\u0120course": 1781, "\u0120}": 1782, "----------------": 1783, "aign": 1784, "\u0120event": 1785, "\u0120eng": 1786, "\u0120pot": 1787, "\u0120intern": 1788, "iam": 1789, "\u0120short": 1790, "empt": 1791, "\u00e3\u0124": 1792, "\u0120God": 1793, "ilar": 1794, "80": 1795, "\u0120orig": 1796, "IS": 1797, "ourn": 1798, "ability": 1799, "itive": 1800, "\u0120dam": 1801, "\u0120100": 1802, "\u0120press": 1803, "\u0120doing": 1804, "\u0120protect": 1805, "ring": 1806, "\u0120thought": 1807, "\u0120question": 1808, "rew": 1809, "\u0120War": 1810, "\u0120several": 1811, "\u0120State": 1812, "\u0120given": 1813, "\u0120fund": 1814, "\u0120Tw": 1815, "\u0120went": 1816, "ances": 1817, "work": 1818, "por": 1819, "my": 1820, "40": 1821, "\u0120arg": 1822, "artment": 1823, "ustom": 1824, "\u0120polic": 1825, "\u0120meet": 1826, "\u0120creat": 1827, "22": 1828, "\u0120States": 1829, "\u0120games": 1830, "raw": 1831, "uture": 1832, "\u0120understand": 1833, "urs": 1834, "\u0120Ob": 1835, "lish": 1836, "sy": 1837, "\u0120makes": 1838, "\u0120won": 1839, "agon": 1840, "\u0120htt": 1841, "\u0120love": 1842, "ential": 1843, "\u0120complete": 1844, "par": 1845, "\u0120Im": 1846, "AL": 1847, "\u0120account": 1848, "\u00c2\u0142": 1849, "ored": 1850, "vert": 1851, "\u0120ident": 1852, "\u01202015": 1853, "\u0120others": 1854, "\u0120Min": 1855, "iber": 1856, "verage": 1857, "There": 1858, "itional": 1859, "dd": 1860, "\u0120prob": 1861, "\u0120young": 1862, "\u0120along": 1863, "\u0120according": 1864, "\u0120yet": 1865, "\u0120members": 1866, "\u0120What": 1867, "oid": 1868, "\u0120Man": 1869, "And": 1870, "\u0120among": 1871, "ai": 1872, "\u0120employ": 1873, "\u0120Res": 1874, "\u0120>": 1875, "\u0120invol": 1876, "\u0120low": 1877, "af": 1878, "\u0120Car": 1879, "\u0120hig": 1880, "\u0120One": 1881, "\u0120Sec": 1882, "ination": 1883, "\u0120likely": 1884, "\u0120ant": 1885, "aged": 1886, "\u0120Russ": 1887, "\u0120ben": 1888, "\u0120rele": 1889, "For": 1890, "back": 1891, "\u0120Not": 1892, "\u0120president": 1893, "ball": 1894, "\u0120access": 1895, "ividual": 1896, "\u0120Dem": 1897, "\u0120Euro": 1898, "60": 1899, "\u0120known": 1900, "irl": 1901, "\u0120Gr": 1902, "\u0120early": 1903, "use": 1904, "iety": 1905, "\u00e2\u0122\u0135": 1906, "\u0120fight": 1907, "\u0120sent": 1908, "\u0120today": 1909, "\u0120market": 1910, "\".": 1911, "\u0120based": 1912, "\u0120strong": 1913, "urther": 1914, "\u0120deb": 1915, "mber": 1916, "\u0120problem": 1917, "\u0120death": 1918, "\u0120social": 1919, "imate": 1920, "AS": 1921, "ortun": 1922, "\u0120campaign": 1923, "ery": 1924, "Ch": 1925, "\u0120ey": 1926, "ially": 1927, "\u0120mus": 1928, "wh": 1929, "pos": 1930, "\u0120er": 1931, "\u0120saf": 1932, "\u0120months": 1933, "iron": 1934, "\u0120viol": 1935, "\u0120five": 1936, "\u0120stre": 1937, "\u0120players": 1938, "inc": 1939, "ald": 1940, "year": 1941, "aun": 1942, "\u0120success": 1943, "\u0120present": 1944, "erence": 1945, "\u01202014": 1946, "\u0120sugg": 1947, "\u0120particular": 1948, "\u0120try": 1949, "\u0120suggest": 1950, "\u0120Christ": 1951, "ones": 1952, "\u0120priv": 1953, "23": 1954, "\u0120crit": 1955, "\u0120land": 1956, "\u0120local": 1957, "ify": 1958, "29": 1959, "\u0120aut": 1960, "ED": 1961, "\u0120Gu": 1962, "\u0120mult": 1963, "\u0120political": 1964, "\u0120asked": 1965, "\u0120former": 1966, "itter": 1967, "ript": 1968, "\u0120close": 1969, "\u0120pract": 1970, "\u0120York": 1971, "\u0120getting": 1972, "\u0120across": 1973, "\u0120comb": 1974, "\u0120believe": 1975, "\u0120z": 1976, "\u0120toget": 1977, "\u0120together": 1978, "\u0120Cent": 1979, "irc": 1980, "\u0120individual": 1981, "\u0120Mc": 1982, "27": 1983, "isk": 1984, "\u0120Eng": 1985, "\u0120face": 1986, "\u012024": 1987, "\u0120value": 1988, "\u0120area": 1989, "ev": 1990, "\u0120writ": 1991, "\u0120President": 1992, "\u0120vot": 1993, "\u0120key": 1994, "\u0120mom": 1995, "put": 1996, "\u0120anything": 1997, "\u0120experience": 1998, "attle": 1999, "\u0120mind": 2000, "aff": 2001, "omm": 2002, "\u0120future": 2003, "ged": 2004, "\u0120cut": 2005, "\u0120tot": 2006, "itch": 2007, "\u0120video": 2008, "\u0120investig": 2009, "\u0120net": 2010, "\u0120My": 2011, "rict": 2012, "ien": 2013, ".)": 2014, "\u0120impro": 2015, "though": 2016, "wards": 2017, "\u0120connect": 2018, "\u0120Med": 2019, "selves": 2020, "ensive": 2021, "mb": 2022, "ober": 2023, "ators": 2024, "An": 2025, "\u012050": 2026, "\u0120redu": 2027, "resent": 2028, "\u0120above": 2029, "\u0120fre": 2030, "\u0120Europe": 2031, "sw": 2032, "\u0120amount": 2033, "\u0120App": 2034, "\u0120either": 2035, "\u0120milit": 2036, "\u0120anal": 2037, "\u0120fail": 2038, "\u0120En": 2039, "ales": 2040, "\u0120special": 2041, "\u0120black": 2042, "IT": 2043, "cher": 2044, "\u0120looking": 2045, "\u0120fire": 2046, "yn": 2047, "\u0120almost": 2048, "oon": 2049, "\u0120study": 2050, "\u0120miss": 2051, "ches": 2052, "rown": 2053, "\u0120tre": 2054, "\u0120community": 2055, "\u0120media": 2056, "\u0120food": 2057, "\u0120comes": 2058, "\u0120University": 2059, "\u0120single": 2060, "What": 2061, "uly": 2062, "\u0120half": 2063, "ague": 2064, "hod": 2065, "\u0120Republic": 2066, "\u0120started": 2067, "\u0120quick": 2068, "oto": 2069, "book": 2070, "\u0120issue": 2071, "itor": 2072, "\u0120else": 2073, "\u0120consider": 2074, "26": 2075, "rodu": 2076, "\u0120taken": 2077, "28": 2078, "99": 2079, "\u0120With": 2080, "\u0120true": 2081, "\u0120wa": 2082, "\u0120trad": 2083, "\u0120ago": 2084, "\u0120mess": 2085, "ief": 2086, "\u0120added": 2087, "oke": 2088, "\u0120bad": 2089, "\u0120fav": 2090, "33": 2091, "\u0120similar": 2092, "ask": 2093, "\u0120Don": 2094, "\u0120character": 2095, "orts": 2096, "\u0120House": 2097, "\u0120reported": 2098, "\u0120type": 2099, "val": 2100, "iod": 2101, "\u0120However": 2102, "\u0120targ": 2103, "\u0120entire": 2104, "pping": 2105, "\u0120history": 2106, "\u0120live": 2107, "ffic": 2108, "........": 2109, "ederal": 2110, "\u0120trying": 2111, "\u0120discuss": 2112, "\u0120Har": 2113, "aces": 2114, "lished": 2115, "\u0120self": 2116, "osp": 2117, "rest": 2118, "\u0120room": 2119, "elt": 2120, "\u0120fall": 2121, "olution": 2122, "\u0120et": 2123, "\u0120x": 2124, "\u0120isn": 2125, "\u0120idea": 2126, "bo": 2127, "\u0120sound": 2128, "\u0120Dep": 2129, "\u0120someone": 2130, "cially": 2131, "ully": 2132, "\u0120foc": 2133, "\u0120object": 2134, "ift": 2135, "aper": 2136, "\u0120player": 2137, "\u0120rather": 2138, "\u0120service": 2139, "ashing": 2140, "\u0120Do": 2141, "\u0120Part": 2142, "rug": 2143, "mon": 2144, "ply": 2145, "\u0120mor": 2146, "\u0120nothing": 2147, "\u0120provide": 2148, "IC": 2149, "ung": 2150, "\u0120party": 2151, "\u0120exist": 2152, "\u0120mag": 2153, "70": 2154, "\u0120rul": 2155, "\u0120house": 2156, "\u0120behind": 2157, "\u0120however": 2158, "\u0120World": 2159, "\u0120sum": 2160, "\u0120applic": 2161, "\u0120;": 2162, "\u0120function": 2163, "gr": 2164, "\u0120Pol": 2165, "\u0120front": 2166, "200": 2167, "\u0120series": 2168, "\u0120tem": 2169, "\u0120typ": 2170, "ills": 2171, "\u0120opt": 2172, "\u0120points": 2173, "\u0120below": 2174, "itted": 2175, "\u0120specific": 2176, "\u01202017": 2177, "umb": 2178, "\u0120ra": 2179, "\u0120previous": 2180, "\u0120pret": 2181, "reme": 2182, "\u0120custom": 2183, "\u0120court": 2184, "\u0120Me": 2185, "\u0120repl": 2186, "\u0120whole": 2187, "go": 2188, "cer": 2189, "\u0120treat": 2190, "\u0120Act": 2191, "\u0120probably": 2192, "\u0120learn": 2193, "ender": 2194, "\u0120Ass": 2195, "\u0120version": 2196, "now": 2197, "\u0120check": 2198, "\u0120Cal": 2199, "RE": 2200, "minist": 2201, "On": 2202, "ources": 2203, "\u0120benef": 2204, "\u0120doc": 2205, "\u0120deter": 2206, "\u0120enc": 2207, "\u0120super": 2208, "\u0120address": 2209, "\u0120vict": 2210, "\u01202013": 2211, "\u0120meas": 2212, "tr": 2213, "\u0120field": 2214, "When": 2215, "\u0120signific": 2216, "uge": 2217, "\u0120feat": 2218, "\u0120common": 2219, "load": 2220, "\u0120begin": 2221, "\u0120bring": 2222, "\u0120action": 2223, "erman": 2224, "\u0120describ": 2225, "\u0120indust": 2226, "\u0120wanted": 2227, "ried": 2228, "ming": 2229, "\u0120attempt": 2230, "45": 2231, "fer": 2232, "\u0120due": 2233, "ression": 2234, "##": 2235, "\u0120shall": 2236, "\u0120six": 2237, "oo": 2238, "\u0120step": 2239, "\u0120pub": 2240, "\u0120himself": 2241, "\u012023": 2242, "\u0120cop": 2243, "\u0120dest": 2244, "\u0120stop": 2245, "AC": 2246, "ibility": 2247, "\u0120lab": 2248, "icult": 2249, "\u0120hours": 2250, "\u0120create": 2251, "\u0120further": 2252, "\u0120America": 2253, "\u0120City": 2254, "\u0120dou": 2255, "head": 2256, "ST": 2257, "\u0120North": 2258, "cing": 2259, "\u0120national": 2260, "ule": 2261, "\u0120Inst": 2262, "\u0120taking": 2263, "\u0120Qu": 2264, "irt": 2265, "\u0120red": 2266, "\u0120research": 2267, "viron": 2268, "\u0120Ge": 2269, "\u0120break": 2270, "ana": 2271, "\u0120space": 2272, "aterial": 2273, "\u0120recent": 2274, "\u0120Ab": 2275, "\u0120general": 2276, "\u0120hit": 2277, "\u0120period": 2278, "\u0120everything": 2279, "ively": 2280, "\u0120phys": 2281, "\u0120saying": 2282, "anks": 2283, "\u0120cou": 2284, "\u0120cult": 2285, "aced": 2286, "eal": 2287, "uation": 2288, "\u0120coun": 2289, "lu": 2290, "\u0120include": 2291, "\u0120position": 2292, "\u0120After": 2293, "\u0120Canad": 2294, "\u0120Em": 2295, "\u0120imm": 2296, "\u0120Red": 2297, "\u0120pick": 2298, "\u0120compl": 2299, "\u0120matter": 2300, "reg": 2301, "ext": 2302, "angu": 2303, "isc": 2304, "ole": 2305, "aut": 2306, "\u0120compet": 2307, "eed": 2308, "fect": 2309, "\u012021": 2310, "\u0120Sen": 2311, "\u0120These": 2312, "asing": 2313, "\u0120cannot": 2314, "\u0120init": 2315, "\u0120relations": 2316, "ached": 2317, "\u0120bar": 2318, "\u012040": 2319, "\u0120TH": 2320, "\u01202012": 2321, "\u0120vol": 2322, "\u0120ground": 2323, "\u0120security": 2324, "\u0120upd": 2325, "ilt": 2326, "35": 2327, "\u0120concern": 2328, "\u0120Just": 2329, "\u0120white": 2330, "\u0120seems": 2331, "\u0120Her": 2332, "pecially": 2333, "ients": 2334, "\u0120announ": 2335, "\u0120fig": 2336, "ights": 2337, "\u0120stri": 2338, "like": 2339, "ids": 2340, "\u0120sus": 2341, "\u0120watch": 2342, "\u0120\u00e2": 2343, "\u0120wind": 2344, "\u0120Cont": 2345, "\u0120itself": 2346, "\u0120mass": 2347, "Al": 2348, "yle": 2349, "ique": 2350, "\u0120National": 2351, "\u0120abs": 2352, "\u0120pack": 2353, "\u0120outside": 2354, "\u0120anim": 2355, "\u0120pain": 2356, "eter": 2357, "\u0120manag": 2358, "duct": 2359, "ogn": 2360, "\u0120]": 2361, "\u0120Sept": 2362, "sec": 2363, "off": 2364, "\u0120Jan": 2365, "\u0120foot": 2366, "ades": 2367, "\u0120third": 2368, "\u0120mot": 2369, "\u0120evidence": 2370, "inton": 2371, "\u0120threat": 2372, "apt": 2373, "ples": 2374, "cle": 2375, "\u0120lo": 2376, "\u0120decl": 2377, "\u0120item": 2378, "medi": 2379, "\u0120represent": 2380, "omb": 2381, "amer": 2382, "\u0120significant": 2383, "ograph": 2384, "su": 2385, "\u0120cal": 2386, "ires": 2387, "0000": 2388, "ID": 2389, "AM": 2390, "\u0120simply": 2391, "\u0120longer": 2392, "\u0120file": 2393, "OT": 2394, "che": 2395, "So": 2396, "ateg": 2397, "org": 2398, "\u0120His": 2399, "\u0120ener": 2400, "\u0120dom": 2401, "\u0120upon": 2402, "ili": 2403, "\":\"": 2404, "\u0120themselves": 2405, "\u0120coming": 2406, "\u0120quite": 2407, "\u0120difficult": 2408, "\u0120Bar": 2409, "ilities": 2410, "rel": 2411, "ends": 2412, "cial": 2413, "64": 2414, "\u0120woman": 2415, "rap": 2416, "yr": 2417, "\u0120necess": 2418, "ips": 2419, "\u0120text": 2420, "\u0120require": 2421, "\u0120military": 2422, "\u0120review": 2423, "\u0120respons": 2424, "75": 2425, "\u0120subject": 2426, "\u0120instead": 2427, "\u0120issues": 2428, "\u0120gen": 2429, "\",\"": 2430, "\u0120minutes": 2431, "\u0120weap": 2432, "ray": 2433, "amed": 2434, "time": 2435, "bl": 2436, "How": 2437, "\u0120code": 2438, "\u0120Sm": 2439, "\u0120higher": 2440, "\u0120Ste": 2441, "ris": 2442, "\u0120page": 2443, "\u0120students": 2444, "\u0120Intern": 2445, "\u0120method": 2446, "\u0120Aug": 2447, "\u0120Per": 2448, "\u0120Ag": 2449, "\u0120policy": 2450, "\u0120Sw": 2451, "\u0120exec": 2452, "\u0120accept": 2453, "ume": 2454, "ribut": 2455, "\u0120words": 2456, "\u0120final": 2457, "\u0120changes": 2458, "\u0120Democr": 2459, "\u0120friends": 2460, "\u0120respect": 2461, "\u0120ep": 2462, "\u0120compan": 2463, "ivil": 2464, "\u0120damage": 2465, "****": 2466, "ogle": 2467, "vironment": 2468, "\u0120neg": 2469, "ental": 2470, "\u0120ap": 2471, "\u0120total": 2472, "ival": 2473, "!\"": 2474, "lim": 2475, "\u0120needs": 2476, "\u0120agre": 2477, "\u0120development": 2478, "\u0120age": 2479, "iple": 2480, "21": 2481, "\u0120results": 2482, "\u0120Af": 2483, "Sh": 2484, "\u0120gun": 2485, "\u0120Obama": 2486, "roll": 2487, "\u0120@": 2488, "\u0120rights": 2489, "\u0120Brit": 2490, "\u0120running": 2491, "\u0120wasn": 2492, "\u0120port": 2493, "\u0120rate": 2494, "\u0120pretty": 2495, "\u0120target": 2496, "\u0120saw": 2497, "\u0120circ": 2498, "\u0120works": 2499, "icro": 2500, "alt": 2501, "over": 2502, "www": 2503, "That": 2504, "lier": 2505, "\u0120everyone": 2506, "ude": 2507, "\u0120pie": 2508, "iddle": 2509, "rael": 2510, "\u0120rad": 2511, "\u0120block": 2512, "\u0120walk": 2513, "To": 2514, "\u00e3\u0123": 2515, "nes": 2516, "\u0120Aust": 2517, "aul": 2518, "rote": 2519, "\u0120South": 2520, "ession": 2521, "oph": 2522, "\u0120shows": 2523, "\u0120site": 2524, "\u0120jo": 2525, "\u0120risk": 2526, "clus": 2527, "lt": 2528, "\u0120inj": 2529, "iding": 2530, "\u0120Spe": 2531, "\u0120chall": 2532, "irm": 2533, "\u012022": 2534, "itting": 2535, "str": 2536, "\u0120hy": 2537, "LE": 2538, "key": 2539, "\u0120began": 2540, "atur": 2541, "ashington": 2542, "lam": 2543, "\u0120Dav": 2544, "bit": 2545, "\u0120size": 2546, "\u0120Par": 2547, "38": 2548, "ournal": 2549, "face": 2550, "\u0120decision": 2551, "\u0120larg": 2552, "\u0120jud": 2553, "rect": 2554, "\u0120continue": 2555, "\u0120Oct": 2556, "overed": 2557, "\u0120Int": 2558, "========": 2559, "\u0120parent": 2560, "\u0120Will": 2561, "\u0120easy": 2562, "\u0120drug": 2563, "anger": 2564, "\u0120sense": 2565, "\u0120di": 2566, "iday": 2567, "\u0120energy": 2568, "istic": 2569, "\u0120associ": 2570, "arter": 2571, "obal": 2572, "eks": 2573, "\u0120El": 2574, "urch": 2575, "\u0120girl": 2576, "oe": 2577, "itle": 2578, "\u012028": 2579, "\u0120Che": 2580, "\u0120request": 2581, "\u0120soon": 2582, "\u0120host": 2583, "ky": 2584, "\u0120states": 2585, "omes": 2586, "\u0120material": 2587, "lex": 2588, "\u0120moment": 2589, "\u0120answ": 2590, "onse": 2591, "\u0120especially": 2592, "\u0120norm": 2593, "\u0120services": 2594, "pite": 2595, "ran": 2596, "\u0120role": 2597, "44": 2598, "):": 2599, "\u0120cred": 2600, "Cl": 2601, "________": 2602, "\u0120mat": 2603, "\u0120log": 2604, "\u0120Clinton": 2605, "OU": 2606, "\u0120office": 2607, "\u012026": 2608, "\u0120charg": 2609, "\u0120track": 2610, "ma": 2611, "\u0120heart": 2612, "\u0120ball": 2613, "\u0120personal": 2614, "\u0120building": 2615, "na": 2616, "set": 2617, "body": 2618, "\u0120Black": 2619, "\u0120increase": 2620, "itten": 2621, "\u0120needed": 2622, "36": 2623, "32": 2624, "=\"": 2625, "\u0120lost": 2626, "\u0120became": 2627, "\u0120groups": 2628, "\u0120Mus": 2629, "\u0120wrote": 2630, "\u0120Pe": 2631, "\u0120prop": 2632, "joy": 2633, "\u00c3\u00a9": 2634, "\u0120White": 2635, "\u0120dead": 2636, ".'": 2637, "\u0120http": 2638, "\u0120webs": 2639, "OS": 2640, "\u0120inside": 2641, "\u0120wrong": 2642, "\u0120statement": 2643, "\u0120...": 2644, "yl": 2645, "\u0120film": 2646, "\u0120music": 2647, "\u0120share": 2648, "ification": 2649, "\u0120release": 2650, "\u0120forward": 2651, "\u0120stay": 2652, "\u0120comput": 2653, "itte": 2654, "ser": 2655, "\u0120original": 2656, "\u0120card": 2657, "\u0120cand": 2658, "\u0120div": 2659, "atural": 2660, "\u0120favor": 2661, "OM": 2662, "\u0120cases": 2663, "uses": 2664, "\u0120section": 2665, "\u0120leave": 2666, "ging": 2667, "oved": 2668, "\u0120Washington": 2669, "39": 2670, "\u0120Gl": 2671, "\u0120required": 2672, "action": 2673, "apan": 2674, "oor": 2675, "iter": 2676, "\u0120King": 2677, "\u0120countries": 2678, "\u0120German": 2679, "lling": 2680, "\u012027": 2681, "34": 2682, "\u0120questions": 2683, "\u0120prim": 2684, "\u0120cell": 2685, "\u0120shoot": 2686, "\u0120anyone": 2687, "\u0120West": 2688, "\u0120affect": 2689, "epend": 2690, "\u0120online": 2691, "\u0120Israel": 2692, "\u0120September": 2693, "\u0120ability": 2694, "\u0120content": 2695, "ises": 2696, "\u0120reve": 2697, "\u0120laun": 2698, "\u0120indic": 2699, "\u0120force": 2700, "cast": 2701, "\u0120sold": 2702, "aving": 2703, "fl": 2704, "\u0120soft": 2705, "\u0120companies": 2706, "ceed": 2707, "\u0120article": 2708, "\u0120aud": 2709, "\u0120rev": 2710, "\u0120educ": 2711, "\u0120playing": 2712, "05": 2713, "\u0120held": 2714, "ctor": 2715, "\u0120released": 2716, "\u0120federal": 2717, "37": 2718, "\u0120administ": 2719, "\u0120interview": 2720, "\u0120install": 2721, "\u0120received": 2722, "\u0120source": 2723, "uk": 2724, "Ph": 2725, "\u0120serious": 2726, "\u0120created": 2727, "\u0120cause": 2728, "\u0120immedi": 2729, "\u0120defin": 2730, "uel": 2731, "\u0120Department": 2732, "ctions": 2733, "\u0120Cour": 2734, "\u0120Now": 2735, "ze": 2736, "ites": 2737, "itution": 2738, "\u0120late": 2739, "\u0120speak": 2740, "ners": 2741, "\u0120legal": 2742, "ari": 2743, "\u0120Cor": 2744, "\u0120weeks": 2745, "\u0120model": 2746, "\u0120pred": 2747, "\u0120exact": 2748, "BC": 2749, "\u0120By": 2750, "ING": 2751, "osing": 2752, "\u0120takes": 2753, "\u0120regard": 2754, "\u0120opportun": 2755, "\u0120price": 2756, "\u0120198": 2757, "\u0120Apr": 2758, "fully": 2759, "\u0120ord": 2760, "\u0120problems": 2761, "ruction": 2762, "ham": 2763, "\u0120Count": 2764, "lege": 2765, "\u0120leaders": 2766, "ET": 2767, "lev": 2768, "\u0120deep": 2769, "ological": 2770, "ese": 2771, "haps": 2772, "\u0120Some": 2773, "\u0120pers": 2774, "\u0120contract": 2775, "\u0120relationship": 2776, "sp": 2777, "oud": 2778, "\u0120base": 2779, "48": 2780, "mit": 2781, "Ad": 2782, "ancial": 2783, "\u0120consum": 2784, "\u0120potential": 2785, "\u0120langu": 2786, "rem": 2787, "eth": 2788, "\u0120relig": 2789, "ressed": 2790, "66": 2791, "\u0120link": 2792, "\u0120lower": 2793, "ayer": 2794, "\u0120June": 2795, "\u0120fem": 2796, "unt": 2797, "erc": 2798, "urd": 2799, "\u0120contact": 2800, "\u0120ill": 2801, "\u0120mother": 2802, "\u0120estab": 2803, "htt": 2804, "\u0120March": 2805, "\u0120Bro": 2806, "\u0120China": 2807, "\u012029": 2808, "\u0120squ": 2809, "\u0120provided": 2810, "\u0120average": 2811, "asons": 2812, "\u01202011": 2813, "\u0120exam": 2814, "lin": 2815, "55": 2816, "ned": 2817, "\u0120perfect": 2818, "\u0120tou": 2819, "alse": 2820, "ux": 2821, "\u0120buy": 2822, "\u0120shot": 2823, "\u0120collect": 2824, "\u0120phot": 2825, "\u0120played": 2826, "\u0120surpr": 2827, "\u0120officials": 2828, "\u0120simple": 2829, "avy": 2830, "\u0120industry": 2831, "\u0120hands": 2832, "ground": 2833, "\u0120pull": 2834, "\u0120round": 2835, "\u0120user": 2836, "\u0120range": 2837, "uary": 2838, "\u0120private": 2839, "ops": 2840, "ees": 2841, "\u0120ways": 2842, "\u0120Mich": 2843, "\u0120veh": 2844, "\u0120except": 2845, "\u0120terms": 2846, "imum": 2847, "pper": 2848, "ION": 2849, "ores": 2850, "\u0120Dragon": 2851, "oul": 2852, "\u0120den": 2853, "\u0120performance": 2854, "\u0120bill": 2855, "cil": 2856, "47": 2857, "\u0120environment": 2858, "\u0120exc": 2859, "add": 2860, "\u0120worth": 2861, "\u0120pict": 2862, "\u0120chance": 2863, "\u01202018": 2864, "bor": 2865, "\u0120speed": 2866, "iction": 2867, "\u0120alleg": 2868, "\u0120Japan": 2869, "atory": 2870, "reet": 2871, "\u0120match": 2872, "\u0120II": 2873, "\u0120stru": 2874, "order": 2875, "\u0120ste": 2876, "\u0120living": 2877, "\u0120struct": 2878, "ino": 2879, "\u0120separ": 2880, "hern": 2881, "\u0120response": 2882, "\u0120enjoy": 2883, "\u0120via": 2884, "AD": 2885, "uments": 2886, "acebook": 2887, "\u0120member": 2888, "ibr": 2889, "izing": 2890, "\u0120tool": 2891, "\u0120Mon": 2892, "\u0120While": 2893, "hood": 2894, "\u0120Ang": 2895, "\u0120Def": 2896, "\u0120offer": 2897, "Tr": 2898, "aur": 2899, "\u0120turned": 2900, "\u0120July": 2901, "down": 2902, "anced": 2903, "\u0120recently": 2904, "\u0120Ear": 2905, "\u0120ce": 2906, "\u0120Star": 2907, "\u0120Cong": 2908, "rought": 2909, "\u0120blood": 2910, "\u0120hope": 2911, "\u0120comment": 2912, "aint": 2913, "\u0120arri": 2914, "iles": 2915, "\u0120particip": 2916, "ought": 2917, "ription": 2918, "08": 2919, "49": 2920, "\u0120gave": 2921, "\u0120select": 2922, "\u0120killed": 2923, "sych": 2924, "\u0120goes": 2925, "ij": 2926, "\u0120coll": 2927, "\u0120impact": 2928, "atives": 2929, "\u0120Ser": 2930, "09": 2931, "\u0120August": 2932, "\u0120boy": 2933, "de": 2934, "\u0120Des": 2935, "\u0120felt": 2936, "US": 2937, "\u0120expected": 2938, "\u0120image": 2939, "\u0120Mark": 2940, "ccording": 2941, "oice": 2942, "EC": 2943, "\u0120Mag": 2944, "ened": 2945, "hold": 2946, "\u0120Post": 2947, "\u0120prevent": 2948, "No": 2949, "\u0120involved": 2950, "\u0120eyes": 2951, "\u0120quickly": 2952, "At": 2953, "unk": 2954, "\u0120behav": 2955, "\u0120ur": 2956, "\u0120led": 2957, "come": 2958, "ey": 2959, "\u0120candid": 2960, "\u0120earlier": 2961, "\u0120focus": 2962, "ety": 2963, "Pro": 2964, "ledge": 2965, "ixed": 2966, "illed": 2967, "\u0120popular": 2968, "AP": 2969, "\u0120sett": 2970, "light": 2971, "\u0120various": 2972, "inks": 2973, "\u0120levels": 2974, "\u0120road": 2975, "ellig": 2976, "ables": 2977, "hel": 2978, "ittee": 2979, "\u0120Gener": 2980, "ype": 2981, "\u0120heard": 2982, "icles": 2983, "\u0120mis": 2984, "\u0120users": 2985, "\u0120San": 2986, "\u0120improve": 2987, "\u0120father": 2988, "\u0120search": 2989, "They": 2990, "vil": 2991, "\u0120profess": 2992, "\u0120knew": 2993, "\u0120loss": 2994, "\u0120events": 2995, "65": 2996, "\u0120billion": 2997, "07": 2998, "02": 2999, "\u0120News": 3000, "\u0120AM": 3001, "\u0120cover": 3002, "where": 3003, "ension": 3004, "\u0120bott": 3005, "\u0120areas": 3006, "ences": 3007, "ope": 3008, "\u0120Twitter": 3009, "ael": 3010, "\u0120gets": 3011, "\u0120Google": 3012, "\u0120sn": 3013, "iant": 3014, "\u0120vote": 3015, "\u0120nearly": 3016, "\u0120included": 3017, "\u0120recogn": 3018, "zz": 3019, "mm": 3020, "aled": 3021, "\u0120happened": 3022, "04": 3023, "\u0120hot": 3024, "\u0120whose": 3025, "\u0120civil": 3026, "\u0120suff": 3027, "oes": 3028, "itiz": 3029, "\u0120Syri": 3030, "\u0120respond": 3031, "\u0120hon": 3032, "\u0120features": 3033, "\u0120economic": 3034, "\u0120April": 3035, "rim": 3036, "\u0120technology": 3037, "\u0120option": 3038, "aging": 3039, "\u0120purch": 3040, "Re": 3041, "\u0120lat": 3042, "chie": 3043, "isl": 3044, "\u0120recomm": 3045, "uf": 3046, "\u0120training": 3047, "\u0120effects": 3048, "\u0120fast": 3049, "\u01202010": 3050, "\u0120occur": 3051, "\u0120website": 3052, "\u0120email": 3053, "\u0120sens": 3054, "ech": 3055, "\u0120oil": 3056, "\u0120influ": 3057, "\u0120currently": 3058, "\u0120Sch": 3059, "\u0120Add": 3060, "\u0120goal": 3061, "\u0120scient": 3062, "\u0120conv": 3063, "100": 3064, "emy": 3065, "\u0120decided": 3066, "\u0120travel": 3067, "\u0120mention": 3068, "LL": 3069, "03": 3070, "\u0120election": 3071, "\u0120phone": 3072, "\u0120looks": 3073, "\u0120situation": 3074, "\u0120cy": 3075, "\u0120hor": 3076, "bed": 3077, "\u0120Court": 3078, "aily": 3079, "aves": 3080, "\u0120quality": 3081, "\u0120Comp": 3082, "wise": 3083, "\u0120table": 3084, "\u0120staff": 3085, "\u0120Wind": 3086, "ett": 3087, "\u0120tried": 3088, "idered": 3089, "\u0120addition": 3090, "\u0120box": 3091, "\u0120lack": 3092, "arily": 3093, "\u0120wide": 3094, "\u0120mid": 3095, "\u0120board": 3096, "ysis": 3097, "\u0120anti": 3098, "ha": 3099, "\u0120dig": 3100, "ening": 3101, "\u0120dro": 3102, "Con": 3103, "68": 3104, "\u0120slow": 3105, "based": 3106, "sequ": 3107, "\u0120path": 3108, "Ex": 3109, "aker": 3110, "\u0120worked": 3111, "\u0120pen": 3112, "\u0120engine": 3113, "\u0120looked": 3114, "\u0120Super": 3115, "\u0120Serv": 3116, "\u0120victim": 3117, "Un": 3118, "\u0120property": 3119, "\u0120introdu": 3120, "\u0120execut": 3121, "\u0120PM": 3122, "Le": 3123, "\u0120color": 3124, "\u0120More": 3125, "\u012060": 3126, "\u0120network": 3127, "\u0120date": 3128, "cul": 3129, "idge": 3130, "\u0120extra": 3131, "31": 3132, "\u0120sle": 3133, "67": 3134, "\u0120wond": 3135, "\u0120reports": 3136, "just": 3137, "\u0120Austral": 3138, "\u0120capital": 3139, "\u0120ens": 3140, "\u0120command": 3141, "\u0120allowed": 3142, "\u0120prep": 3143, "\u0120capt": 3144, "hib": 3145, "\u0120numbers": 3146, "chan": 3147, "\u0120fair": 3148, "mp": 3149, "oms": 3150, "\u0120reach": 3151, "With": 3152, "tain": 3153, "\u0120broad": 3154, "\u0120couple": 3155, "ecause": 3156, "lying": 3157, "\u0120Feb": 3158, "\u0120screen": 3159, "\u0120lives": 3160, "\u0120prior": 3161, "\u0120Congress": 3162, "Ar": 3163, "\u0120approach": 3164, "\u0120emer": 3165, "aries": 3166, "\u0120Dis": 3167, "serv": 3168, "\u0120Ne": 3169, "\u0120built": 3170, "cies": 3171, "\u0120repe": 3172, "\u0120rules": 3173, "force": 3174, "\u0120Pal": 3175, "\u0120financial": 3176, "\u0120considered": 3177, "\u0120Char": 3178, "nces": 3179, "\u0120IS": 3180, "\u0120brought": 3181, "\u0120bi": 3182, "iers": 3183, "\u0120Sim": 3184, "OP": 3185, "\u0120products": 3186, "\u0120visit": 3187, "\u0120document": 3188, "\u0120conduct": 3189, "\u0120completely": 3190, "ining": 3191, "\u0120Calif": 3192, "ibly": 3193, "\u0120written": 3194, "\u0120TV": 3195, "ements": 3196, "\u0120draw": 3197, "One": 3198, "\u0120published": 3199, "\u0120secret": 3200, "rain": 3201, "het": 3202, "\u0120Facebook": 3203, "onday": 3204, "\u0120Up": 3205, "\u0120sexual": 3206, "\u0120thous": 3207, "\u0120Pat": 3208, "\u0120ess": 3209, "\u0120standard": 3210, "\u0120arm": 3211, "ges": 3212, "ection": 3213, "\u0120fell": 3214, "\u0120foreign": 3215, "ani": 3216, "\u0120Friday": 3217, "\u0120regular": 3218, "inary": 3219, "\u0120increased": 3220, "\u0120usually": 3221, "\u0120demon": 3222, "\u0120dark": 3223, "\u0120additional": 3224, "rol": 3225, "\u0120Of": 3226, "\u0120production": 3227, "!!": 3228, "undred": 3229, "\u0120international": 3230, "idents": 3231, "\u0120Free": 3232, "roup": 3233, "\u0120race": 3234, "\u0120mach": 3235, "\u0120huge": 3236, "All": 3237, "lear": 3238, "ovember": 3239, "\u0120town": 3240, "\u0120attention": 3241, "\u0120Off": 3242, "yond": 3243, "\u0120Then": 3244, "field": 3245, "\u0120terror": 3246, "raz": 3247, "\u0120Bo": 3248, "\u0120meeting": 3249, "\u0120Park": 3250, "\u0120arrest": 3251, "\u0120fear": 3252, "\u0120aw": 3253, "\u0120Val": 3254, "oring": 3255, "',": 3256, "\u0120extreme": 3257, "arr": 3258, "\u0120workers": 3259, "After": 3260, "\u012031": 3261, "net": 3262, "ament": 3263, "\u0120directly": 3264, "\u0120population": 3265, "ube": 3266, "\u0120October": 3267, "\u0120IN": 3268, "\u0120January": 3269, "59": 3270, "\u0120David": 3271, "\u0120cross": 3272, "cember": 3273, "\u0120First": 3274, "\u0120message": 3275, "irit": 3276, "\u0120nation": 3277, "\u0120poll": 3278, "isions": 3279, "\u0120answer": 3280, "ny": 3281, "isode": 3282, "\u0120carry": 3283, "\u0120Russia": 3284, "\u0120hear": 3285, "ength": 3286, "roy": 3287, "\u0120natural": 3288, "inally": 3289, "\u0120dog": 3290, "mitted": 3291, "\u0120trade": 3292, "\u0120subst": 3293, "\u0120multiple": 3294, "\u0120Afric": 3295, "\u0120fans": 3296, "\u0120sort": 3297, "\u0120global": 3298, "ication": 3299, "\u0120Wed": 3300, "ara": 3301, "\u0120achie": 3302, "\u0120language": 3303, "vey": 3304, "\u0120tal": 3305, "\u0120necessary": 3306, "\u0120details": 3307, "\u0120sen": 3308, "\u0120Sund": 3309, "\u0120Reg": 3310, "\u0120Rec": 3311, "06": 3312, "\u0120sil": 3313, "ressive": 3314, "\u0120medical": 3315, "unch": 3316, "ornia": 3317, "\u0120und": 3318, "fort": 3319, "ocks": 3320, "\u0120Monday": 3321, "uesday": 3322, "craft": 3323, "77": 3324, "urt": 3325, "\u0120ver": 3326, "\u0120Hill": 3327, "\u0120receive": 3328, "\u0120morning": 3329, "estern": 3330, "\u0120bank": 3331, "\u0120sat": 3332, "irth": 3333, "\u0120High": 3334, "\u0120device": 3335, "\u0120THE": 3336, "\u0120Center": 3337, "\u0120safe": 3338, "\u0120ple": 3339, "\u0120Canada": 3340, "\u0120systems": 3341, "\u0120assist": 3342, "\u0120surv": 3343, "\u0120battle": 3344, "\u0120Soc": 3345, "vertis": 3346, "She": 3347, "\u0120paper": 3348, "\u0120growth": 3349, "\u0120cast": 3350, "Sc": 3351, "\u0120plans": 3352, "lled": 3353, "\u0120parts": 3354, "\u0120wall": 3355, "\u0120movement": 3356, "\u0120practice": 3357, "imately": 3358, "\u0120display": 3359, "\u0120sometimes": 3360, "omp": 3361, "\u0120Paul": 3362, "\u0120Yes": 3363, "king": 3364, "58": 3365, "oly": 3366, "\u0120son": 3367, "\u0120avoid": 3368, "okes": 3369, "\u0120Jew": 3370, "\u0120towards": 3371, "asc": 3372, "\u0120//": 3373, "\u0120Kore": 3374, "\u0120talking": 3375, "\u0120correct": 3376, "\u0120spent": 3377, "icks": 3378, "iable": 3379, "eared": 3380, "\u0120term": 3381, "\u0120wants": 3382, "oming": 3383, "\u0120ut": 3384, "\u0120doub": 3385, "\u0120forces": 3386, "\u0120please": 3387, "69": 3388, "\u0120November": 3389, "atform": 3390, "ondon": 3391, "\u0120ones": 3392, "\u0120immediately": 3393, "\u0120Russian": 3394, "\u0120Met": 3395, "\u0120deg": 3396, "\u0120parents": 3397, "CH": 3398, "\u0120Americans": 3399, "aly": 3400, "\u0120Mod": 3401, "\u0120shown": 3402, "\u0120conditions": 3403, "\u0120stuff": 3404, "\u0120reb": 3405, "\u0120Your": 3406, "\u0120includes": 3407, "nown": 3408, "\u0120Sam": 3409, "\u0120experien": 3410, "mission": 3411, "\u0120Even": 3412, "aught": 3413, "\u0120announced": 3414, "\u0120Republican": 3415, "\u0120determin": 3416, "\u0120described": 3417, "\u0120County": 3418, "()": 3419, "\u0120door": 3420, "\u0120changed": 3421, "\u0120neigh": 3422, "\u0120Here": 3423, "\u0120clean": 3424, "\u0120pan": 3425, "\u0120December": 3426, "\u0120European": 3427, "iring": 3428, "apter": 3429, "\u0120club": 3430, "\u0120Tuesday": 3431, "\u0120paid": 3432, "\u0120Net": 3433, "\u0120attacks": 3434, "\u0120characters": 3435, "\u0120alone": 3436, "\u0120director": 3437, "dom": 3438, "\u012035": 3439, "\u0120load": 3440, "\u0120rout": 3441, "\u0120California": 3442, "\u0120finally": 3443, "\u0120rac": 3444, "\u0120contr": 3445, "\u0120exactly": 3446, "resh": 3447, "pri": 3448, "\u0120Islam": 3449, "\u0120nature": 3450, "\u0120career": 3451, "\u0120latest": 3452, "\u0120convers": 3453, "\u0120Sl": 3454, "pose": 3455, "cient": 3456, "\u0120Inc": 3457, "ivity": 3458, "88": 3459, "\u0120Att": 3460, "\u0120Mor": 3461, "nesday": 3462, "\u0120weight": 3463, "ken": 3464, "\u0120note": 3465, "\u0120teams": 3466, "\u0120\\": 3467, "airs": 3468, "\u0120Green": 3469, "\u0120hundred": 3470, "onent": 3471, "\u0120streng": 3472, "\u0120consist": 3473, "icated": 3474, "\u0120regul": 3475, "\u0120lic": 3476, "astic": 3477, "\u0120ten": 3478, "ursday": 3479, "elligence": 3480, "ously": 3481, "\u0120UK": 3482, "BI": 3483, "\u0120costs": 3484, "\u0120independ": 3485, "\u0120AP": 3486, "\u0120normal": 3487, "\u0120hom": 3488, "\u0120obvious": 3489, "\u0120swe": 3490, "\u0120star": 3491, "\u0120ready": 3492, "acher": 3493, "\u0120implement": 3494, "gest": 3495, "\u0120song": 3496, "\u0120Get": 3497, "\u0120Lab": 3498, "\u0120interesting": 3499, "using": 3500, "\u0120giving": 3501, "\u0120Sunday": 3502, "\u0120etc": 3503, "\u0120middle": 3504, "\u0120remember": 3505, "right": 3506, "osition": 3507, "utions": 3508, "\u0120max": 3509, "46": 3510, "\u0120yourself": 3511, "\u0120demand": 3512, "\u0120treatment": 3513, "\u0120danger": 3514, "\u0120Cons": 3515, "\u0120guy": 3516, "\u0120British": 3517, "\u0120physical": 3518, "\u0120related": 3519, "\u0120remain": 3520, "\u0120couldn": 3521, "\u0120refer": 3522, "\u0120citiz": 3523, "box": 3524, "ENT": 3525, "board": 3526, "\u0120inn": 3527, "IG": 3528, "ero": 3529, "\u0120Street": 3530, "ospital": 3531, "rench": 3532, "chers": 3533, "\u0120stra": 3534, "OL": 3535, "ager": 3536, "\u0120AN": 3537, "\u0120easily": 3538, "IA": 3539, "enge": 3540, "iny": 3541, "\u0120clos": 3542, "ocked": 3543, "\u0120uses": 3544, "\u0120Coun": 3545, "Im": 3546, "uild": 3547, "??": 3548, "more": 3549, "\u0120ang": 3550, "\u0120write": 3551, "olute": 3552, "57": 3553, "\u0120leader": 3554, "\u0120reading": 3555, "</": 3556, "\u0120autom": 3557, "ests": 3558, "43": 3559, "\u0120legisl": 3560, "\u0120Gold": 3561, "\u0120designed": 3562, "\u0120ST": 3563, "\u0120Leg": 3564, "ares": 3565, "\u0120beaut": 3566, "\u0120Tex": 3567, "\u0120appears": 3568, "\u0120strugg": 3569, "\u0120Rom": 3570, "\u012000": 3571, "\u0120choice": 3572, "\u0120particularly": 3573, "\u0120From": 3574, "oper": 3575, "\u0120London": 3576, "anned": 3577, "\u0120allows": 3578, "obile": 3579, "\u0120difference": 3580, "\u00e2\u0122\u00a2": 3581, "\u0120View": 3582, "\u0120Wednesday": 3583, "\u0120although": 3584, "\u0120relative": 3585, "\u0120application": 3586, "atever": 3587, "\u0120aren": 3588, "\u0120myself": 3589, "\u0120imag": 3590, "\u0120dise": 3591, "\u0120society": 3592, "\u0120frequ": 3593, "\u0120English": 3594, "\u0120poor": 3595, "\u0120Day": 3596, "\u0120writing": 3597, "\u0120seven": 3598, "\u0120starting": 3599, "\u0120bud": 3600, "\u0120print": 3601, "\u0120Trans": 3602, "ufact": 3603, "\u0120Stud": 3604, "new": 3605, "\u0120crim": 3606, "\u0120gives": 3607, "\u0120cool": 3608, "ae": 3609, "iance": 3610, "\u0120General": 3611, "\u0120thinking": 3612, "\u0120save": 3613, "\u0120limited": 3614, "\u0120Party": 3615, "\u0120meaning": 3616, "pen": 3617, "owers": 3618, "\u0120Jack": 3619, "EM": 3620, "\u0120nice": 3621, "rupt": 3622, "\u0120gas": 3623, "\u0120eight": 3624, "\u0120feet": 3625, "\u0120effort": 3626, "\u0120ign": 3627, "icit": 3628, "Bl": 3629, "coin": 3630, "\u0120opin": 3631, "\u0120brain": 3632, "While": 3633, "hest": 3634, "\u0120Thursday": 3635, "\u0120wouldn": 3636, "aughter": 3637, "\u0120touch": 3638, "lements": 3639, "\u0120studies": 3640, "\u0120center": 3641, "cont": 3642, "orge": 3643, "\u0120computer": 3644, "\u0120investigation": 3645, "Pl": 3646, "orks": 3647, "\u01202008": 3648, "\u0120increasing": 3649, "\u0120store": 3650, "\u0120comments": 3651, "\u0120bal": 3652, "men": 3653, "\u0120doll": 3654, "\u0120liber": 3655, "\u0120wife": 3656, "\u0120laws": 3657, "aturday": 3658, "itness": 3659, "\u0120modern": 3660, "\u0120Sk": 3661, "\u0120administration": 3662, "\u0120opportunity": 3663, "\u0120sal": 3664, "\u0120powerful": 3665, "My": 3666, "\u0120claims": 3667, "\u0120Earth": 3668, "ords": 3669, "\u0120title": 3670, "\u0120esc": 3671, "name": 3672, "Not": 3673, "omen": 3674, "\u0120beyond": 3675, "\u0120camer": 3676, "\u0120sell": 3677, "itute": 3678, "earch": 3679, "\u0120appl": 3680, "iment": 3681, "42": 3682, "\u0120Art": 3683, "\u0120unf": 3684, "\u0120violence": 3685, "urg": 3686, "\u0120East": 3687, "\u0120compared": 3688, "\u0120options": 3689, "\u0120throughout": 3690, "\u0120vs": 3691, "igr": 3692, ".[": 3693, "aches": 3694, "78": 3695, "\u0120files": 3696, "FL": 3697, "EL": 3698, "arian": 3699, "\u0120James": 3700, "\u0120Air": 3701, "anch": 3702, "\u0120detail": 3703, "\u0120piece": 3704, "PS": 3705, "\u0120named": 3706, "\u0120education": 3707, "\u0120drive": 3708, "\u0120items": 3709, "\u0120student": 3710, "iced": 3711, "::": 3712, "ico": 3713, "\u0120throw": 3714, "\u0120scene": 3715, "\u0120complex": 3716, "\u01202009": 3717, "\u0120prec": 3718, "\u0120Bre": 3719, "79": 3720, "\u0120concept": 3721, "\u0120status": 3722, "aming": 3723, "\u0120died": 3724, "\u0120knowledge": 3725, "\u0120beginning": 3726, "OD": 3727, "ruary": 3728, "\u0120certainly": 3729, "\u0120guys": 3730, "\u0120slight": 3731, "inn": 3732, "ounds": 3733, "\u0120fine": 3734, "\u0120fat": 3735, "ications": 3736, "\u0120perhaps": 3737, "\u0120Ant": 3738, "\u0120income": 3739, "\u0120https": 3740, "\u0120majority": 3741, "ports": 3742, "ston": 3743, "\u0120greater": 3744, "\u0120feed": 3745, "entially": 3746, "\u0120safety": 3747, "\u0120unique": 3748, "andom": 3749, "\u0120gone": 3750, "\u0120showed": 3751, "\u0120histor": 3752, "\u0120counter": 3753, "ius": 3754, "ida": 3755, "\u0120leading": 3756, "ipe": 3757, "\u0120send": 3758, "\u0120Donald": 3759, "erve": 3760, "\u0120defense": 3761, "inese": 3762, "\u0120yes": 3763, "\u0120Fire": 3764, "\u0120Muslim": 3765, "raq": 3766, "\u0120continued": 3767, "osh": 3768, "\u0120provides": 3769, "\u0120prison": 3770, "\u0120Pre": 3771, "\u0120happy": 3772, "\u0120economy": 3773, "\u0120trust": 3774, "ags": 3775, "\u0120Game": 3776, "\u0120weapons": 3777, "uman": 3778, "\u0120Cle": 3779, "itation": 3780, "\u0120analysis": 3781, "\u0120Times": 3782, "\u0120science": 3783, "->": 3784, "\u0120figure": 3785, "\u0120disapp": 3786, "enty": 3787, "\u0120software": 3788, "\u0120ult": 3789, "\u0120officers": 3790, "New": 3791, "Is": 3792, "\u0120remains": 3793, "\u0120India": 3794, "\u0120psych": 3795, "rief": 3796, "\u0120cat": 3797, "esc": 3798, "\u0120observ": 3799, "\u0120stage": 3800, "\u0120Dark": 3801, "\u0120enter": 3802, "change": 3803, "\u0120passed": 3804, "\u0120despite": 3805, "\u0120Out": 3806, "\u0120movie": 3807, "rs": 3808, "\u0120voice": 3809, "mine": 3810, "\u0120Play": 3811, "\u0120toward": 3812, "\u0120Ter": 3813, "\u0120region": 3814, "\u0120values": 3815, "orters": 3816, "\u0120mount": 3817, "\u0120officer": 3818, "\u0120Other": 3819, "ban": 3820, "\u0120hous": 3821, "wood": 3822, "room": 3823, "IV": 3824, "\u0120Sun": 3825, "see": 3826, "\u0120Over": 3827, "rog": 3828, "90": 3829, "\u0120lay": 3830, "\u0120Tur": 3831, "awn": 3832, "\u0120pressure": 3833, "\u0120Sub": 3834, "\u0120books": 3835, "edom": 3836, "\u0120Sand": 3837, "AA": 3838, "ago": 3839, "\u0120reasons": 3840, "ford": 3841, "\u0120activity": 3842, "UT": 3843, "Now": 3844, "\u0120Senate": 3845, "cell": 3846, "night": 3847, "\u0120calls": 3848, "inter": 3849, "\u0120letter": 3850, "\u0120Rob": 3851, "\u0120Je": 3852, "\u0120choose": 3853, "\u0120Law": 3854, "Get": 3855, "Be": 3856, "\u0120rob": 3857, "\u0120types": 3858, "\u0120platform": 3859, "\u0120quarter": 3860, "RA": 3861, "\u0120Time": 3862, "\u0120maybe": 3863, "\u0120Cr": 3864, "95": 3865, "pre": 3866, "\u0120moving": 3867, "\u0120lif": 3868, "\u0120gold": 3869, "\u0120som": 3870, "\u0120patients": 3871, "\u0120truth": 3872, "\u0120Ke": 3873, "urance": 3874, "antly": 3875, "mar": 3876, "\u0120charge": 3877, "\u0120Great": 3878, "\u0120cele": 3879, "--------------------------------": 3880, "\u0120rock": 3881, "roid": 3882, "ancy": 3883, "\u0120credit": 3884, "aud": 3885, "By": 3886, "\u0120Every": 3887, "\u0120moved": 3888, "inger": 3889, "ribution": 3890, "\u0120names": 3891, "\u0120straight": 3892, "\u0120Health": 3893, "\u0120Well": 3894, "\u0120feature": 3895, "\u0120rule": 3896, "\u0120sche": 3897, "inated": 3898, "\u0120Michael": 3899, "berg": 3900, "41": 3901, "iled": 3902, "band": 3903, "\u0120click": 3904, "\u0120Angel": 3905, "onents": 3906, "\u00c2\u0143": 3907, "\u0120Iraq": 3908, "\u0120Saturday": 3909, "\u0120aware": 3910, "part": 3911, "\u0120pattern": 3912, "OW": 3913, "\u0120Let": 3914, "\u0120grad": 3915, "igned": 3916, "\u0120associated": 3917, "\u0120style": 3918, "no": 3919, "iation": 3920, "aith": 3921, "ilies": 3922, "\u0120stories": 3923, "uration": 3924, "\u0120individuals": 3925, "\u0120\u00e2\u0122\u00a6": 3926, "miss": 3927, "\u0120Associ": 3928, "ishing": 3929, "aby": 3930, "\u0120summer": 3931, "\u0120Ben": 3932, "\u012032": 3933, "\u0120arch": 3934, "uty": 3935, "\u0120Texas": 3936, "hol": 3937, "\u0120fully": 3938, "\u0120mill": 3939, "\u0120followed": 3940, "\u0120Bill": 3941, "\u0120Indian": 3942, "\u0120Secret": 3943, "\u0120Bel": 3944, "\u0120February": 3945, "\u0120jobs": 3946, "\u0120seemed": 3947, "\u0120Govern": 3948, "ipped": 3949, "\u0120reality": 3950, "\u0120lines": 3951, "\u0120park": 3952, "\u0120measure": 3953, "\u0120Our": 3954, "IM": 3955, "\u0120brother": 3956, "\u0120growing": 3957, "\u0120ban": 3958, "\u0120estim": 3959, "\u0120cry": 3960, "\u0120School": 3961, "\u0120mechan": 3962, "\u0120OF": 3963, "\u0120Windows": 3964, "\u0120rates": 3965, "\u0120Oh": 3966, "\u0120positive": 3967, "\u0120culture": 3968, "istics": 3969, "ica": 3970, "\u0120har": 3971, "ya": 3972, "itely": 3973, "ipp": 3974, "\u0120map": 3975, "encies": 3976, "\u0120William": 3977, "II": 3978, "akers": 3979, "56": 3980, "\u0120Mart": 3981, "\u0120Rem": 3982, "\u0120altern": 3983, "itude": 3984, "\u0120coach": 3985, "rowd": 3986, "Don": 3987, "\u0120kids": 3988, "\u0120journal": 3989, "\u0120corpor": 3990, "\u0120false": 3991, "\u0120web": 3992, "\u0120sleep": 3993, "\u0120contain": 3994, "\u0120sto": 3995, "\u0120bed": 3996, "iverse": 3997, "\u0120Rich": 3998, "\u0120Chinese": 3999, "\u0120pun": 4000, "\u0120meant": 4001, "known": 4002, "\u0120notice": 4003, "\u0120favorite": 4004, "aven": 4005, "\u0120condition": 4006, "\u0120purpose": 4007, "))": 4008, "\u0120organization": 4009, "\u0120challeng": 4010, "\u0120manufact": 4011, "\u0120susp": 4012, "\u0120Ac": 4013, "\u0120critic": 4014, "unes": 4015, "uclear": 4016, "\u0120mer": 4017, "vention": 4018, "\u012080": 4019, "\u0120mist": 4020, "\u0120Us": 4021, "\u0120Tor": 4022, "http": 4023, "olf": 4024, "\u0120larger": 4025, "\u0120advant": 4026, "\u0120resear": 4027, "\u0120actions": 4028, "ml": 4029, "\u0120kept": 4030, "\u0120aim": 4031, ",'": 4032, "col": 4033, "\u0120benefits": 4034, "ifying": 4035, "\u0120actual": 4036, "\u0120International": 4037, "\u0120vehicle": 4038, "\u0120chief": 4039, "\u0120efforts": 4040, "\u0120League": 4041, "\u0120Most": 4042, "\u0120wait": 4043, "\u0120adult": 4044, "\u0120overall": 4045, "\u0120speech": 4046, "\u0120highly": 4047, "\u0120female": 4048, "\u0120error": 4049, "\u0120effective": 4050, "54": 4051, "\u0120encour": 4052, "well": 4053, "\u0120failed": 4054, "\u0120conserv": 4055, "\u0120programs": 4056, "\u0120trou": 4057, "\u0120ahead": 4058, "500": 4059, "vertisement": 4060, "IP": 4061, "\u0120Found": 4062, "pir": 4063, "\u0120%": 4064, "\u0120crime": 4065, "ander": 4066, "\u0120location": 4067, "\u0120Iran": 4068, "\u0120behavior": 4069, "azing": 4070, "\u0120rare": 4071, "\u0120emb": 4072, "\u0120caused": 4073, "\u0120ship": 4074, "\u0120active": 4075, "\u0120contribut": 4076, "\u0120green": 4077, "\u0120acqu": 4078, "\u0120reflect": 4079, "venue": 4080, "\u0120firm": 4081, "\u0120birth": 4082, "].": 4083, "\u0120clearly": 4084, "\u0120emot": 4085, "\u0120agency": 4086, "riage": 4087, "\u0120memory": 4088, "98": 4089, "SA": 4090, "\u0120See": 4091, "acing": 4092, "CC": 4093, "\u0120biggest": 4094, "\u0120rap": 4095, "\u0120basic": 4096, "\u0120band": 4097, "eat": 4098, "\u0120suspect": 4099, "\u0120Mac": 4100, "\u012090": 4101, "mark": 4102, "istan": 4103, "\u0120spread": 4104, "ams": 4105, "ki": 4106, "asy": 4107, "rav": 4108, "\u0120Rober": 4109, "\u0120demonstr": 4110, "rated": 4111, "\u0120absolute": 4112, "\u0120places": 4113, "\u0120impl": 4114, "ibrary": 4115, "\u0120cards": 4116, "\u0120destroy": 4117, "\u0120virt": 4118, "vere": 4119, "\u0120appeared": 4120, "yan": 4121, "point": 4122, "\u0120beg": 4123, "\u0120temper": 4124, "spe": 4125, "anted": 4126, "ears": 4127, "\u0120Direct": 4128, "\u0120length": 4129, "\u0120blog": 4130, "amb": 4131, "\u0120integ": 4132, "\u0120resources": 4133, "acc": 4134, "iful": 4135, "\u0120spot": 4136, "\u0120forced": 4137, "\u0120thousands": 4138, "\u0120Minister": 4139, "\u0120qual": 4140, "\u0120French": 4141, "atically": 4142, "\u0120generally": 4143, "\u0120drink": 4144, "\u0120thus": 4145, "IL": 4146, "odes": 4147, "\u0120appropri": 4148, "\u0120Read": 4149, "\u0120whom": 4150, "\u0120eye": 4151, "\u0120college": 4152, "\u012045": 4153, "irection": 4154, "\u0120ensure": 4155, "\u0120apparent": 4156, "iders": 4157, "\u0120religious": 4158, "\u0120minor": 4159, "olic": 4160, "\u0120tro": 4161, "\u0120Why": 4162, "ribute": 4163, "met": 4164, "\u0120primary": 4165, "\u0120developed": 4166, "\u0120peace": 4167, "\u0120skin": 4168, "ste": 4169, "ava": 4170, "\u0120blue": 4171, "\u0120families": 4172, "\u0120ir": 4173, "\u0120apply": 4174, "\u0120inform": 4175, "\u0120Smith": 4176, "CT": 4177, "ii": 4178, "\u0120limit": 4179, "\u0120resist": 4180, "................": 4181, "umn": 4182, "\u0120conflic": 4183, "\u0120twe": 4184, "udd": 4185, "\u0120Tom": 4186, "\u0120liter": 4187, "que": 4188, "bon": 4189, "\u0120hair": 4190, "\u0120eventually": 4191, "\u0120pus": 4192, "\u0120helped": 4193, "\u0120agg": 4194, "orney": 4195, "\u0120Apple": 4196, "\u0120fit": 4197, "\u0120Sur": 4198, "\u0120prem": 4199, "\u0120sales": 4200, "\u0120seconds": 4201, "\u0120strength": 4202, "\u0120feeling": 4203, "\u00bf\u00bd": 4204, "\u0120tour": 4205, "\u0120knows": 4206, "oom": 4207, "\u0120exerc": 4208, "\u0120somew": 4209, "\u00ef\u00bf\u00bd": 4210, ">>": 4211, "\u0120spokes": 4212, "\u0120ideas": 4213, "\u0120regist": 4214, "soft": 4215, "\u0120Del": 4216, "\u0120PC": 4217, "\u0120propos": 4218, "\u0120launch": 4219, "\u0120bottom": 4220, "TH": 4221, "\u0120Please": 4222, "vest": 4223, "itz": 4224, "\u0120Inter": 4225, "\u0120script": 4226, "\u0120rat": 4227, "arning": 4228, "\u0120il": 4229, "\u0120Jer": 4230, "\u0120Are": 4231, "\u0120whatever": 4232, "oken": 4233, "cience": 4234, "\u0120mode": 4235, "\u0120agree": 4236, "\u0120sources": 4237, "\u0120initial": 4238, "\u0120restrict": 4239, "\u0120wonder": 4240, "usion": 4241, "####": 4242, "\u0120Sil": 4243, "ville": 4244, "\u0120burn": 4245, "tw": 4246, "asion": 4247, "\u0120\u00c2\u00a3": 4248, "\u0120nor": 4249, "uing": 4250, "\u0120reached": 4251, "\u0120sun": 4252, "\u0120categ": 4253, "igration": 4254, "\u0120cook": 4255, "\u0120promot": 4256, "\u0120male": 4257, "\u0120climate": 4258, "\u0120fix": 4259, "\u0120alleged": 4260, "UR": 4261, "alled": 4262, "\u0120images": 4263, "Cont": 4264, "ota": 4265, "\u0120schools": 4266, "ios": 4267, "\u0120drop": 4268, "\u0120stream": 4269, "\u0120Mo": 4270, "\u0120previously": 4271, "aling": 4272, "\u0120pet": 4273, "\u0120double": 4274, "\u0120(@": 4275, "annel": 4276, "\u0120default": 4277, "ties": 4278, "\u0120rank": 4279, "\u0120Dec": 4280, "\u0120Council": 4281, "\u0120weapon": 4282, "\u0120stock": 4283, "\u0120analy": 4284, "\u0120Str": 4285, "\u0120picture": 4286, "\u0120Police": 4287, "ference": 4288, "\u0120century": 4289, "\u0120citizens": 4290, "\u0120onto": 4291, "\u0120expand": 4292, "\u0120hero": 4293, "\u0120Sol": 4294, "\u0120wild": 4295, "\u0120update": 4296, "\u0120customers": 4297, "ront": 4298, "def": 4299, "\u0120lik": 4300, "\u0120criminal": 4301, "\u0120Christian": 4302, "SP": 4303, "76": 4304, "\u0120leaving": 4305, "\u0120otherwise": 4306, "\u0120Dist": 4307, "\u0120basis": 4308, "52": 4309, "53": 4310, "icip": 4311, "\u0120Ber": 4312, "\u0120recommend": 4313, "\u0120floor": 4314, "\u0120crowd": 4315, "oles": 4316, "\u012070": 4317, "\u0120central": 4318, "\u0120Ev": 4319, "\u0120dream": 4320, "\u0120download": 4321, "\u0120confir": 4322, "\u0120Thom": 4323, "\u0120window": 4324, "\u0120happens": 4325, "\u0120unit": 4326, "\u0120tend": 4327, "\u0120spl": 4328, "\u0120becomes": 4329, "\u0120fighting": 4330, "\u0120predict": 4331, "\u0120Press": 4332, "\u0120Power": 4333, "\u0120heavy": 4334, "aked": 4335, "\u0120fan": 4336, "orter": 4337, "ategy": 4338, "BA": 4339, "izes": 4340, "\u0120spend": 4341, "Here": 4342, "\u01202007": 4343, "\u0120adop": 4344, "\u0120Ham": 4345, "\u0120football": 4346, "\u0120Port": 4347, "oday": 4348, "51": 4349, "ampions": 4350, "\u0120transfer": 4351, "ht": 4352, "\u012038": 4353, "term": 4354, "acity": 4355, "\u0120bur": 4356, "],": 4357, "ternal": 4358, "rig": 4359, "but": 4360, "\u0120therefore": 4361, "\u0120Because": 4362, "resp": 4363, "rey": 4364, "\u0120mission": 4365, "Some": 4366, "\u0120noted": 4367, "\u0120assum": 4368, "\u0120disease": 4369, "\u0120edit": 4370, "\u0120progress": 4371, "rd": 4372, "\u0120Brown": 4373, "ocal": 4374, "\u0120adding": 4375, "\u0120raised": 4376, "\u0120Any": 4377, "\u0120tick": 4378, "\u0120seeing": 4379, "\u0120People": 4380, "\u0120agreement": 4381, "\u0120server": 4382, "\u0120wat": 4383, "\u0120debate": 4384, "\u0120supposed": 4385, "iling": 4386, "\u0120largest": 4387, "\u0120successful": 4388, "\u0120Pri": 4389, "\u0120Democratic": 4390, "\u0120jump": 4391, "\u0120Syria": 4392, "\u0120owners": 4393, "\u0120offers": 4394, "\u0120shooting": 4395, "\u0120effic": 4396, "sey": 4397, "\u0120haven": 4398, "verse": 4399, "tered": 4400, "\u0120Light": 4401, "imal": 4402, "\u0120Big": 4403, "\u0120defend": 4404, "\u0120beat": 4405, "\u0120records": 4406, "%)": 4407, "\u0120scen": 4408, "\u0120employees": 4409, "\u0120devices": 4410, "hem": 4411, "\u0120commer": 4412, "\u0120Mex": 4413, "\u0120benefit": 4414, "\u0120Prof": 4415, "\u0120illeg": 4416, "\u0120surface": 4417, "\u0120Also": 4418, "\u0120harm": 4419, "ingly": 4420, "wide": 4421, "\u0120Alex": 4422, "\u0120shut": 4423, "\u0120Cur": 4424, "\u0120lose": 4425, "pm": 4426, "\u0120challenge": 4427, "semb": 4428, "\u0120station": 4429, "\u0120intelligence": 4430, "\u0120accur": 4431, "\u0120Flor": 4432, "\u0120requires": 4433, "\u0120Mal": 4434, "bum": 4435, "\u0120hospital": 4436, "\u0120spirit": 4437, "\u0120offered": 4438, "\u0120produce": 4439, "\u0120Commun": 4440, "\u0120creating": 4441, "\u0120cris": 4442, "spect": 4443, "\u0120ended": 4444, "\u0120daily": 4445, "\u0120voters": 4446, "lands": 4447, "ias": 4448, "ih": 4449, "ona": 4450, "\u0120smart": 4451, "\u0120Office": 4452, "\u0120Lord": 4453, "rial": 4454, "\u0120Internet": 4455, "\u0120circum": 4456, "\u0120extremely": 4457, "'.": 4458, "\u0120opinion": 4459, "\u0120Mil": 4460, "\u0120gain": 4461, "BS": 4462, "\u0120Fin": 4463, "yp": 4464, "\u0120useful": 4465, "\u0120budget": 4466, "\u0120comfort": 4467, "isf": 4468, "\u0120background": 4469, "eline": 4470, "\u0120episode": 4471, "\u0120enemy": 4472, "\u0120trial": 4473, "\u0120establish": 4474, "date": 4475, "\u0120Cap": 4476, "\u0120continues": 4477, "\u0120showing": 4478, "\u0120Union": 4479, "with": 4480, "\u0120posted": 4481, "\u0120System": 4482, "\u0120eat": 4483, "rian": 4484, "\u0120rise": 4485, "\u0120Germany": 4486, "ils": 4487, "\u0120signed": 4488, "\u0120vill": 4489, "\u0120grand": 4490, "mor": 4491, "\u0120England": 4492, "\u0120projects": 4493, "umber": 4494, "\u0120conference": 4495, "za": 4496, "\u0120responsible": 4497, "\u0120Arab": 4498, "\u0120learned": 4499, "\u00e2\u0122\u0136\u00e2\u0122\u0136": 4500, "ipping": 4501, "\u0120George": 4502, "OC": 4503, "\u0120returned": 4504, "\u0120Australia": 4505, "\u0120brief": 4506, "Qu": 4507, "\u0120brand": 4508, "illing": 4509, "abled": 4510, "\u0120highest": 4511, "\u0120train": 4512, "\u0120Commission": 4513, "while": 4514, "\u0120nom": 4515, "ception": 4516, "\u0120mut": 4517, "\u0120Blue": 4518, "\u0120incident": 4519, "vant": 4520, "86": 4521, "\u0120ID": 4522, "\u0120nuclear": 4523, "74": 4524, "\u0120Like": 4525, "\u0120RE": 4526, "\u0120Micro": 4527, "li": 4528, "mail": 4529, "\u0120charges": 4530, "89": 4531, "\u0120adjust": 4532, "ado": 4533, "\u0120earth": 4534, "NA": 4535, "\u0120prices": 4536, "PA": 4537, "\u0120draft": 4538, "\u0120runs": 4539, "\u0120candidate": 4540, "enses": 4541, "\u0120management": 4542, "\u0120Phil": 4543, "\u0120Miss": 4544, "\u0120teach": 4545, "gram": 4546, "\u0120understanding": 4547, "ait": 4548, "icago": 4549, "Add": 4550, "\u0120Ep": 4551, "secut": 4552, "\u0120separate": 4553, "\u0120instance": 4554, "\u0120eth": 4555, "\u0120unless": 4556, "********": 4557, "\u0120Fore": 4558, "inate": 4559, "\u0120operations": 4560, "Sp": 4561, "\u0120faith": 4562, "gar": 4563, "\u0120Church": 4564, "ronic": 4565, "\u0120config": 4566, "osure": 4567, "\u0120activities": 4568, "\u0120traditional": 4569, "\u012036": 4570, "\u0120direction": 4571, "\u0120machine": 4572, "\u0120surround": 4573, "\u0120push": 4574, "unction": 4575, "\u0120EU": 4576, "\u0120easier": 4577, "\u0120argument": 4578, "GB": 4579, "\u0120micro": 4580, "\u0120spending": 4581, "izations": 4582, "\u0120theory": 4583, "adow": 4584, "\u0120calling": 4585, "\u0120Last": 4586, "\u0120der": 4587, "\u0120influence": 4588, "\u0120commit": 4589, "\u0120photo": 4590, "\u0120unc": 4591, "istry": 4592, "gn": 4593, "aste": 4594, "acks": 4595, "\u0120disp": 4596, "ady": 4597, "do": 4598, "\u0120Good": 4599, "\u0120`": 4600, "\u0120wish": 4601, "\u0120revealed": 4602, "\u00c2\u0142\u00c2\u0142": 4603, "lig": 4604, "\u0120enforce": 4605, "\u0120Committee": 4606, "\u0120chem": 4607, "\u0120miles": 4608, "\u0120interested": 4609, "\u0120solution": 4610, "icy": 4611, "inct": 4612, "\u0120->": 4613, "\u0120Det": 4614, "\u0120removed": 4615, "\u0120compar": 4616, "eah": 4617, "\u0120plant": 4618, "\u0120Since": 4619, "\u0120achieve": 4620, "\u0120advantage": 4621, "\u0120slightly": 4622, "bing": 4623, "\u0120placed": 4624, "under": 4625, "2015": 4626, "\u0120Mad": 4627, "\u0120tim": 4628, "oses": 4629, "\u0120cru": 4630, "\u0120Rock": 4631, "\u0120mostly": 4632, "\u0120negative": 4633, "\u0120setting": 4634, "\u0120produced": 4635, "\u0120mur": 4636, "\u0120connection": 4637, "\u0120Mer": 4638, "\u0120driver": 4639, "\u0120executive": 4640, "\u0120assault": 4641, "\u0120born": 4642, "\u0120Ver": 4643, "tained": 4644, "\u0120structure": 4645, "\u0120reduce": 4646, "\u0120decades": 4647, "\u0120ded": 4648, "uke": 4649, "\u0120Many": 4650, "idden": 4651, "\u0120league": 4652, "Se": 4653, "\u0120join": 4654, "\u0120disco": 4655, "\u0120die": 4656, "cks": 4657, "actions": 4658, "\u0120assess": 4659, "agn": 4660, "\u0120goals": 4661, "ours": 4662, "IR": 4663, "\u0120senior": 4664, "iller": 4665, "mod": 4666, "ipment": 4667, "ocol": 4668, "uy": 4669, "\u0120Que": 4670, "\u0120parties": 4671, "irgin": 4672, "\u0120learning": 4673, "itable": 4674, "\u0120street": 4675, "\u0120camera": 4676, "App": 4677, "\u0120skills": 4678, "bre": 4679, "cious": 4680, "\u0120celebr": 4681, "\u0120Franc": 4682, "\u0120existing": 4683, "\u0120willing": 4684, "lor": 4685, "\u0120id": 4686, "\u0120Space": 4687, "\u0120critical": 4688, "\u0120La": 4689, "ortunately": 4690, "\u0120serve": 4691, "\u0120cold": 4692, "\u0120species": 4693, "TS": 4694, "\u0120animals": 4695, "\u0120Bay": 4696, "\u0120older": 4697, "\u0120Under": 4698, "estic": 4699, "\u0120Tre": 4700, "\u0120teacher": 4701, "\u0120prefer": 4702, "vis": 4703, "\u0120thread": 4704, "\u0120Matt": 4705, "\u0120manager": 4706, "\u00e3\u0125\u00bb": 4707, "\u0120professional": 4708, "\u0120Vol": 4709, "\u0120notes": 4710, "These": 4711, "ula": 4712, "\u0120fresh": 4713, "ented": 4714, "uzz": 4715, "edy": 4716, "clusion": 4717, "\u0120Rel": 4718, "\u0120doubt": 4719, "EO": 4720, "\u0120opened": 4721, "\u0120Bit": 4722, "Advertisement": 4723, "\u0120guess": 4724, "\u0120UN": 4725, "\u0120sequ": 4726, "\u0120explain": 4727, "otten": 4728, "\u0120attract": 4729, "aks": 4730, "\u0120string": 4731, "\u0120context": 4732, "ossible": 4733, "\u0120Republicans": 4734, "\u0120solid": 4735, "\u0120cities": 4736, "\u0120asking": 4737, "\u0120random": 4738, "ups": 4739, "uries": 4740, "arant": 4741, "dden": 4742, "gl": 4743, "\u0120Florida": 4744, "\u0120depend": 4745, "\u0120Scott": 4746, "\u012033": 4747, "\u0120iT": 4748, "icon": 4749, "\u0120mentioned": 4750, "\u01202000": 4751, "\u0120claimed": 4752, "\u0120definitely": 4753, "ulf": 4754, "\u0120core": 4755, "\u0120opening": 4756, "\u0120Const": 4757, "which": 4758, "\u0120Tra": 4759, "AG": 4760, "72": 4761, "\u0120believed": 4762, "ada": 4763, "\u012048": 4764, "\u0120Security": 4765, "yright": 4766, "\u0120Pet": 4767, "\u0120Lou": 4768, "\u0120holding": 4769, "================": 4770, "\u0120ice": 4771, "\u0120brow": 4772, "\u0120authorities": 4773, "host": 4774, "word": 4775, "\u0120score": 4776, "\u0120Div": 4777, "\u0120cells": 4778, "\u0120transl": 4779, "\u0120neighbor": 4780, "\u0120remove": 4781, "uct": 4782, "\u0120district": 4783, "\u0120According": 4784, "\u0120worse": 4785, "\u0120concerns": 4786, "\u0120presidential": 4787, "\u0120policies": 4788, "\u0120Hall": 4789, "73": 4790, "\u0120hus": 4791, "AY": 4792, "\u01202006": 4793, "\u0120Jud": 4794, "\u0120independent": 4795, "\u0120Justice": 4796, "iliar": 4797, "print": 4798, "ighter": 4799, "\u0120protection": 4800, "zen": 4801, "\u0120sudden": 4802, "house": 4803, "\u0120Jes": 4804, "PR": 4805, "\u0120Inf": 4806, "\u0120bul": 4807, "\u0120_": 4808, "\u0120Service": 4809, "\u0120PR": 4810, "\u0120strategy": 4811, "ffect": 4812, "\u0120girls": 4813, "\u0120missing": 4814, "oyal": 4815, "\u0120Team": 4816, "ulated": 4817, "\u0120dat": 4818, "\u0120politics": 4819, "abor": 4820, "According": 4821, "\u0120spell": 4822, "\u0120graph": 4823, "orthern": 4824, "TC": 4825, "Ab": 4826, "\u0120labor": 4827, "isher": 4828, "\u0120kick": 4829, "\u0120iTunes": 4830, "\u0120steps": 4831, "poses": 4832, "\u0120smaller": 4833, "En": 4834, "bert": 4835, "\u0120roll": 4836, "\u0120researchers": 4837, "\u0120closed": 4838, "\u0120transport": 4839, "\u0120lawy": 4840, "________________": 4841, "\u0120Chicago": 4842, "\u0120aspect": 4843, "\u0120none": 4844, "\u0120marriage": 4845, "96": 4846, "\u0120elements": 4847, "\u0120Fre": 4848, "\u0120Sal": 4849, "\u0120dram": 4850, "FC": 4851, "top": 4852, "equ": 4853, "\u0120hearing": 4854, "\u0120supported": 4855, "\u0120testing": 4856, "cohol": 4857, "\u0120massive": 4858, "\u0120stick": 4859, "\u0120guard": 4860, "isco": 4861, "phone": 4862, "From": 4863, "However": 4864, "\u0120border": 4865, "\u0120copy": 4866, "ography": 4867, "list": 4868, "71": 4869, "\u0120owner": 4870, "class": 4871, "ruit": 4872, "rate": 4873, "\u0120Once": 4874, "\u0120digital": 4875, "\u0120task": 4876, "ERS": 4877, "\u0120incred": 4878, "tes": 4879, "++": 4880, "\u0120France": 4881, "\u0120breat": 4882, "owl": 4883, "\u0120issued": 4884, "\u0120Western": 4885, "\u0120detect": 4886, "\u0120partners": 4887, "\u0120shared": 4888, "\u0120Call": 4889, "\u0120cancer": 4890, "ache": 4891, "ribe": 4892, "\u0120explained": 4893, "\u0120heat": 4894, "{\"": 4895, "\u0120investment": 4896, "\u0120Book": 4897, "\u0120wood": 4898, "\u0120tools": 4899, "\u0120Although": 4900, "\u0120belief": 4901, "\u0120crisis": 4902, "\u0120ge": 4903, "\u0120MP": 4904, "\u0120operation": 4905, "type": 4906, "~~": 4907, "ga": 4908, "\u0120contains": 4909, "anta": 4910, "\u0120express": 4911, "\u0120Group": 4912, "\u0120Journal": 4913, "ka": 4914, "\u0120amb": 4915, "\u0120USA": 4916, "\u0120finding": 4917, "\u0120funding": 4918, "how": 4919, "\u0120established": 4920, "ideos": 4921, "\u0120degree": 4922, "\u0120dangerous": 4923, "anging": 4924, "\u0120freedom": 4925, "pport": 4926, "outhern": 4927, "\u0120church": 4928, "\u0120catch": 4929, "\u0120Two": 4930, "\u0120presence": 4931, "\u0120Guard": 4932, "Up": 4933, "\u0120authority": 4934, "\u0120Project": 4935, "\u0120button": 4936, "\u0120consequ": 4937, "\u0120valid": 4938, "\u0120weak": 4939, "\u0120starts": 4940, "\u0120reference": 4941, "\u0120Mem": 4942, "\")": 4943, "UN": 4944, "orage": 4945, "\u0120Open": 4946, "\u0120collection": 4947, "ym": 4948, "gency": 4949, "\u0120beautiful": 4950, "ros": 4951, "\u0120tells": 4952, "\u0120waiting": 4953, "nel": 4954, "\u0120providing": 4955, "\u0120Democrats": 4956, "\u0120daughter": 4957, "\u0120master": 4958, "\u0120purposes": 4959, "\u0120Japanese": 4960, "\u0120equal": 4961, "\u0120turns": 4962, "\u0120documents": 4963, "\u0120watching": 4964, "Res": 4965, "\u0120ran": 4966, "2014": 4967, "\u0120reject": 4968, "\u0120Korea": 4969, "\u0120victims": 4970, "Level": 4971, "erences": 4972, "\u0120witness": 4973, "\u012034": 4974, "\u0120reform": 4975, "coming": 4976, "\u0120occup": 4977, "\u0120caught": 4978, "\u0120traffic": 4979, "ading": 4980, "\u0120models": 4981, "ario": 4982, "\u0120served": 4983, "\u0120batter": 4984, "uate": 4985, "\u0120Secretary": 4986, "\u0120agreed": 4987, "\u0120truly": 4988, "ynam": 4989, "\u0120Ret": 4990, "\u0120units": 4991, "\u0120Research": 4992, "hand": 4993, "azine": 4994, "\u0120Mike": 4995, "\u0120variety": 4996, "otal": 4997, "\u0120amazing": 4998, "\u0120confirmed": 4999, "\u0120entirely": 5000, "\u0120purchase": 5001, "\u0120element": 5002, "\u0120cash": 5003, "\u0120determine": 5004, "De": 5005, "\u0120cars": 5006, "\u0120Wall": 5007, "\u00e2\u0138": 5008, "\u0120views": 5009, "\u0120drugs": 5010, "\u0120department": 5011, "\u0120Step": 5012, "uit": 5013, "\u012039": 5014, "asure": 5015, "\u0120Class": 5016, "\u0120covered": 5017, "\u0120Bank": 5018, "\u0120mere": 5019, "uana": 5020, "\u0120multi": 5021, "\u0120mix": 5022, "\u0120unlike": 5023, "levision": 5024, "\u0120stopped": 5025, "\u0120sem": 5026, "\u0120Gal": 5027, "ules": 5028, "\u0120wel": 5029, "\u0120Johnson": 5030, "la": 5031, "\u0120skill": 5032, "\u0120becoming": 5033, "rie": 5034, "\u0120appropriate": 5035, "fe": 5036, "ellow": 5037, "\u0120Prot": 5038, "ulate": 5039, "ocation": 5040, "\u0120weekend": 5041, "odies": 5042, "\u0120sites": 5043, "\u0120animal": 5044, "\u0120Tim": 5045, "\u0120scale": 5046, "\u0120charged": 5047, "\u0120instruct": 5048, "illa": 5049, "\u0120methods": 5050, "\u0120cert": 5051, "\u0120judge": 5052, "\u0120Hel": 5053, "\u0120dollars": 5054, "\u0120standing": 5055, "\u0120Squ": 5056, "\u0120debt": 5057, "liam": 5058, "\u0120driving": 5059, "\u0120Sum": 5060, "\u0120Edition": 5061, "\u0120album": 5062, "andon": 5063, "IF": 5064, "\u0120Uk": 5065, "63": 5066, "ader": 5067, "\u0120commercial": 5068, "esh": 5069, "\u0120Government": 5070, "\u0120discovered": 5071, "\u0120output": 5072, "\u0120Hillary": 5073, "\u0120Carol": 5074, "\u01202005": 5075, "\u0120abuse": 5076, "ancing": 5077, "\u0120switch": 5078, "\u0120annual": 5079, "Tw": 5080, "\u0120stated": 5081, "agement": 5082, "inner": 5083, "\u0120democr": 5084, "\u0120residents": 5085, "\u0120allowing": 5086, "\u0120factors": 5087, "odd": 5088, "\u0120fuck": 5089, "emies": 5090, "\u0120occurred": 5091, "oti": 5092, "\u0120north": 5093, "\u0120Public": 5094, "\u0120injury": 5095, "\u0120insurance": 5096, "CL": 5097, "olly": 5098, "\u00e3\u0122": 5099, "\u0120repeated": 5100, "\u0120arms": 5101, "anged": 5102, "\u0120construction": 5103, "\u0120fle": 5104, "PU": 5105, "icians": 5106, "\u0120forms": 5107, "\u0120McC": 5108, "antic": 5109, "\u0120mental": 5110, "pire": 5111, "\u0120equipment": 5112, "\u0120fant": 5113, "\u0120discussion": 5114, "\u0120regarding": 5115, "kin": 5116, "arp": 5117, "\u0120chair": 5118, "ogue": 5119, "\u0120proceed": 5120, "\u0120Id": 5121, "Our": 5122, "\u0120murder": 5123, "Man": 5124, "\u012049": 5125, "asp": 5126, "\u0120supply": 5127, "\u0120input": 5128, "\u0120wealth": 5129, "liament": 5130, "\u0120proced": 5131, "orial": 5132, "\u0120Stat": 5133, "\u0120NFL": 5134, "hens": 5135, "\u0120Institute": 5136, "\u0120putting": 5137, "ournament": 5138, "etic": 5139, "\u0120located": 5140, "\u0120kid": 5141, "eria": 5142, "run": 5143, "\u0120princ": 5144, "\u0120!": 5145, "going": 5146, "\u0120Bet": 5147, "\u0120clot": 5148, "\u0120telling": 5149, "\u0120proposed": 5150, "iot": 5151, "orry": 5152, "\u0120funds": 5153, "gment": 5154, "\u0120Life": 5155, "\u0120baby": 5156, "\u0120Back": 5157, "\u0120spoke": 5158, "Image": 5159, "\u0120earn": 5160, "\u0120AT": 5161, "gu": 5162, "\u0120exchange": 5163, "\u0120Lin": 5164, "oving": 5165, "\u0120pair": 5166, "More": 5167, "azon": 5168, "\u0120arrested": 5169, "\u0120killing": 5170, "can": 5171, "\u0120Card": 5172, "yd": 5173, "\u0120identified": 5174, "\u0120mobile": 5175, "\u0120thanks": 5176, "onym": 5177, "\u0120Form": 5178, "\u0120hundreds": 5179, "\u0120Chris": 5180, "\u0120Cat": 5181, "\u0120trend": 5182, "hat": 5183, "\u0120Av": 5184, "oman": 5185, "\u0120electric": 5186, "\u0120Wil": 5187, "SE": 5188, "Of": 5189, "\u0120restaur": 5190, "oted": 5191, "\u0120trig": 5192, "\u0120nine": 5193, "\u0120bomb": 5194, "Why": 5195, "\u00c2\u00af": 5196, "\u0120coverage": 5197, "\u0120appeal": 5198, "\u0120Robert": 5199, "\u0120Sup": 5200, "\u0120finished": 5201, "\u0120flow": 5202, "\u0120deliver": 5203, "\u0120calcul": 5204, "\u0120photos": 5205, "\u0120phil": 5206, "\u0120pieces": 5207, "\u0120appre": 5208, "kes": 5209, "\u0120rough": 5210, "Do": 5211, "\u0120partner": 5212, "\u0120concerned": 5213, "\u012037": 5214, "\u0120Gen": 5215, "Col": 5216, "ctors": 5217, "\u0120=>": 5218, "state": 5219, "\u0120suggested": 5220, "\u0120Force": 5221, "CE": 5222, "\u0120herself": 5223, "\u0120Plan": 5224, "works": 5225, "ooth": 5226, "rency": 5227, "\u0120corner": 5228, "\u0120husband": 5229, "\u0120internet": 5230, "\u0120Aut": 5231, "ems": 5232, "osen": 5233, "\u0120Atl": 5234, "gen": 5235, "\u0120balance": 5236, "62": 5237, "\u0120sounds": 5238, "text": 5239, "\u0120arr": 5240, "oves": 5241, "\u0120millions": 5242, "\u0120radio": 5243, "\u0120satisf": 5244, "\u0120Dam": 5245, "Mr": 5246, "Go": 5247, "Spe": 5248, "\u0120combat": 5249, "rant": 5250, "\u0120Gree": 5251, "\u0120fuel": 5252, "\u0120distance": 5253, "\u0120tests": 5254, "\u0120decre": 5255, "\u0120Er": 5256, "\u0120managed": 5257, "DS": 5258, "\u0120tit": 5259, "\u0120measures": 5260, "\u0120Liber": 5261, "\u0120attend": 5262, "ashed": 5263, "\u0120Jose": 5264, "\u0120Night": 5265, "dit": 5266, "\u0120Nov": 5267, "\u0120End": 5268, "outs": 5269, "\u0120generation": 5270, "\u0120advoc": 5271, "yth": 5272, "\u0120conversation": 5273, "\u0120Sky": 5274, "active": 5275, "cel": 5276, "rier": 5277, "\u0120Frank": 5278, "\u0120gender": 5279, "\u0120concent": 5280, "\u0120carried": 5281, "anda": 5282, "\u0120Virgin": 5283, "\u0120arrived": 5284, "icide": 5285, "aded": 5286, "\u0120failure": 5287, "\u0120minimum": 5288, "lets": 5289, "\u0120worst": 5290, "\u0120keeping": 5291, "\u0120intended": 5292, "\u0120illegal": 5293, "\u0120subsc": 5294, "\u0120determined": 5295, "\u0120trip": 5296, "Yes": 5297, "\u0120raise": 5298, "\u0120~": 5299, "\u0120feels": 5300, "\u0120package": 5301, "\u0120Jo": 5302, "hi": 5303, "2016": 5304, "real": 5305, "\u0120fra": 5306, "\u0120symb": 5307, "Me": 5308, "ucky": 5309, "pret": 5310, "\u0120Kh": 5311, "\u0120Edit": 5312, "\u0120Web": 5313, "emic": 5314, "\u0120Color": 5315, "\u0120justice": 5316, "Int": 5317, "\u0120farm": 5318, "cknow": 5319, "\">": 5320, "eless": 5321, "\u0120reduced": 5322, "\u0120500": 5323, "xx": 5324, "\u0120Rad": 5325, "\u0120Wood": 5326, "\u0120clin": 5327, "\u0120hyp": 5328, "iler": 5329, "ura": 5330, "kins": 5331, "85": 5332, "61": 5333, "\u0120Their": 5334, "\u0120Mary": 5335, "\u0120san": 5336, "\u0120novel": 5337, "\u0120Who": 5338, "\u0120capacity": 5339, "\u0120impossible": 5340, "\u0120plays": 5341, "\u0120minister": 5342, "ijuana": 5343, "icate": 5344, "\u0120Set": 5345, "\u0120fram": 5346, "\u0120ing": 5347, "\u0120communities": 5348, "\u0120FBI": 5349, "ita": 5350, "\u0120bon": 5351, "\u0120strateg": 5352, "\u0120interests": 5353, "lock": 5354, "gers": 5355, "mas": 5356, "\u0120AND": 5357, "\u0120conflict": 5358, "\u0120requirements": 5359, "\u0120sac": 5360, "\u0120operating": 5361, "ini": 5362, "related": 5363, "\u0120committed": 5364, "\u0120relatively": 5365, "\u0120south": 5366, "\u00c2\u00af\u00c2\u00af": 5367, "\u0120afford": 5368, "\u0120identity": 5369, "\u0120decisions": 5370, "\u0120accused": 5371, "place": 5372, "\u0120victory": 5373, "och": 5374, "iat": 5375, "Name": 5376, "Com": 5377, "tion": 5378, "eds": 5379, "\u0120seek": 5380, "\u0120tight": 5381, "\u0120Images": 5382, "\u0120initi": 5383, "\u0120humans": 5384, "\u0120familiar": 5385, "\u0120audience": 5386, "\u0120internal": 5387, "venture": 5388, "\u0120sides": 5389, "\u0120TO": 5390, "\u0120dim": 5391, "\u0120conclud": 5392, "\u0120appoint": 5393, "\u0120enforcement": 5394, "\u0120Jim": 5395, "\u0120Association": 5396, "\u0120circumst": 5397, "\u0120Canadian": 5398, "\u0120joined": 5399, "\u0120differences": 5400, "\u0120Los": 5401, "\u0120protest": 5402, "\u0120twice": 5403, "win": 5404, "\u0120glass": 5405, "arsh": 5406, "\u0120Army": 5407, "\u0120expression": 5408, "\u0120decide": 5409, "\u0120planning": 5410, "ania": 5411, "\u0120handle": 5412, "\u0120Microsoft": 5413, "\u0120Nor": 5414, "\u0120maximum": 5415, "\u0120Rev": 5416, "\u0120sea": 5417, "\u0120eval": 5418, "\u0120helps": 5419, "ref": 5420, "\u0120bound": 5421, "\u0120mouth": 5422, "\u0120standards": 5423, "\u0120clim": 5424, "\u0120Camp": 5425, "\u0120Fox": 5426, "cles": 5427, "\u0120army": 5428, "\u0120Techn": 5429, "acking": 5430, "xy": 5431, "SS": 5432, "\u012042": 5433, "\u0120bug": 5434, "\u0120Ukrain": 5435, "\u0120Max": 5436, "\u0120Jones": 5437, "\u0120Show": 5438, "lo": 5439, "\u0120planet": 5440, "\u012075": 5441, "\u0120winning": 5442, "\u0120faster": 5443, "\u0120spect": 5444, "\u0120broken": 5445, "TR": 5446, "\u0120defined": 5447, "\u0120healthy": 5448, "\u0120competition": 5449, "https": 5450, "\u0120Island": 5451, "\u0120Fe": 5452, "\u0120announce": 5453, "\u0120Cup": 5454, "\u0120Instead": 5455, "\u0120client": 5456, "\u0120possibly": 5457, "section": 5458, "ocket": 5459, "look": 5460, "\u0120finish": 5461, "\u0120crew": 5462, "\u0120reserv": 5463, "\u0120editor": 5464, "\u0120hate": 5465, "\u0120sale": 5466, "\u0120controvers": 5467, "\u0120pages": 5468, "wing": 5469, "\u0120numer": 5470, "\u0120opposition": 5471, "\u01202004": 5472, "\u0120refuge": 5473, "\u0120flight": 5474, "\u0120apart": 5475, "\u0120Lat": 5476, "Americ": 5477, "\u0120Africa": 5478, "\u0120applications": 5479, "\u0120Palest": 5480, "\u0120Bur": 5481, "\u0120gar": 5482, "\u0120Social": 5483, "\u0120upgr": 5484, "\u0120shape": 5485, "\u0120speaking": 5486, "ansion": 5487, "ao": 5488, "\u0120Sn": 5489, "\u0120worry": 5490, "\u0120Britain": 5491, "Please": 5492, "roud": 5493, "\u0120hun": 5494, "\u0120introduced": 5495, "\u0120diet": 5496, "Ind": 5497, "\u0120Second": 5498, "\u0120functions": 5499, "uts": 5500, "\u0120Each": 5501, "\u0120Jeff": 5502, "\u0120stress": 5503, "\u0120accounts": 5504, "\u0120guarant": 5505, "\u0120Ann": 5506, "edia": 5507, "\u0120honest": 5508, "\u0120tree": 5509, "\u0120African": 5510, "\u0120Bush": 5511, "},": 5512, "\u0120sch": 5513, "\u0120Only": 5514, "\u0120fif": 5515, "igan": 5516, "\u0120exercise": 5517, "\u0120Exp": 5518, "\u0120scientists": 5519, "\u0120legislation": 5520, "\u0120Work": 5521, "\u0120Spr": 5522, "\u00c3\u0124": 5523, "\u0120Human": 5524, "\u0120\u00e8": 5525, "\u0120survey": 5526, "\u0120rich": 5527, "rip": 5528, "\u0120maintain": 5529, "\u0120flo": 5530, "\u0120leadership": 5531, "stream": 5532, "\u0120Islamic": 5533, "\u012001": 5534, "\u0120College": 5535, "\u0120magic": 5536, "\u0120Prime": 5537, "\u0120figures": 5538, "2017": 5539, "inder": 5540, "xual": 5541, "\u0120Dead": 5542, "\u0120absolutely": 5543, "\u0120fourth": 5544, "\u0120presented": 5545, "respond": 5546, "rible": 5547, "\u0120alcohol": 5548, "ato": 5549, "\u0120DE": 5550, "porary": 5551, "\u0120grab": 5552, "\u0120vari": 5553, "\u0120quant": 5554, "\u0120Photo": 5555, "\u0120plus": 5556, "rick": 5557, "arks": 5558, "\u0120alternative": 5559, "\u0120pil": 5560, "\u0120approx": 5561, "that": 5562, "\u0120objects": 5563, "\u0120Ro": 5564, "\u0120Android": 5565, "\u0120significantly": 5566, "\u0120Road": 5567, "kay": 5568, "Read": 5569, "avor": 5570, "\u0120acknow": 5571, "\u0120HD": 5572, "\u0120Sing": 5573, "Or": 5574, "\u0120Mont": 5575, "\u0120uns": 5576, "prof": 5577, "\u0120negoti": 5578, "\u0120Arch": 5579, "iki": 5580, "\u0120television": 5581, "\u0120Jewish": 5582, "\u0120committee": 5583, "\u0120motor": 5584, "\u0120appearance": 5585, "\u0120sitting": 5586, "\u0120strike": 5587, "\u0120Down": 5588, "comp": 5589, "\u0120Hist": 5590, "\u0120fold": 5591, "acement": 5592, "\u0120Louis": 5593, "\u0120belong": 5594, "\u0120\u00e2\u0122\u00a2": 5595, "\u0120mort": 5596, "\u0120prepared": 5597, "\u012064": 5598, "\u0120Master": 5599, "\u0120indeed": 5600, "\u0120Den": 5601, "\u0120rent": 5602, "TA": 5603, "ourney": 5604, "arc": 5605, "Su": 5606, "97": 5607, "\u0120advice": 5608, "\u0120changing": 5609, "\u0120listed": 5610, "\u0120launched": 5611, "isation": 5612, "\u0120Peter": 5613, "ishes": 5614, "\u0120lived": 5615, "\u0120Mel": 5616, "\u0120Supreme": 5617, "\u0120Federal": 5618, "\u0120);": 5619, "ructure": 5620, "\u0120sets": 5621, "\u0120philos": 5622, "uous": 5623, "\u0120\u00c2\u0142": 5624, "\u0120applied": 5625, "\u0120NOT": 5626, "\u0120housing": 5627, "\u0120Mount": 5628, "\u0120odd": 5629, "\u0120sust": 5630, "DA": 5631, "fficient": 5632, "\u0120?": 5633, "olved": 5634, "\u0120powers": 5635, "\u0120thr": 5636, "\u0120remaining": 5637, "\u0120Water": 5638, "LC": 5639, "\u0120causes": 5640, "\u00e3\u0123\u00ae": 5641, "\u0120manner": 5642, "ads": 5643, "\u0120suggests": 5644, "\u0120ends": 5645, "standing": 5646, "fig": 5647, "\u0120Dun": 5648, "idth": 5649, "\u0120gay": 5650, "\u0120termin": 5651, "\u0120Angeles": 5652, "MS": 5653, "\u0120scientific": 5654, "\u0120coal": 5655, "apers": 5656, "bar": 5657, "\u0120Thomas": 5658, "\u0120sym": 5659, "\u0120Run": 5660, "this": 5661, "PC": 5662, "igrants": 5663, "\u0120minute": 5664, "\u0120District": 5665, "cellent": 5666, "\u0120leaves": 5667, "\u0120completed": 5668, "amin": 5669, "\u0120focused": 5670, "\u0120monitor": 5671, "\u0120vehicles": 5672, "MA": 5673, "\u0120Mass": 5674, "\u0120Grand": 5675, "\u0120affected": 5676, "itutional": 5677, "\u0120construct": 5678, "\u0120follows": 5679, "\u0120ton": 5680, "reens": 5681, "\u0120homes": 5682, "\u0120Ext": 5683, "\u0120Level": 5684, "rast": 5685, "\u0120Ir": 5686, "\u0120elim": 5687, "\u0120largely": 5688, "\u0120Joe": 5689, "\u0120votes": 5690, "alls": 5691, "\u0120businesses": 5692, "\u0120Foundation": 5693, "\u0120Central": 5694, "\u0120yards": 5695, "\u0120materials": 5696, "ulner": 5697, "\u0120guide": 5698, "\u0120closer": 5699, "ums": 5700, "\u0120sports": 5701, "eder": 5702, "Just": 5703, "\u0120taxes": 5704, "84": 5705, "\u0120Old": 5706, "\u0120decade": 5707, "ola": 5708, "\u0120vir": 5709, "\u0120dropped": 5710, "\u0120delay": 5711, "itect": 5712, "\u0120secure": 5713, "stein": 5714, "level": 5715, "\u0120treated": 5716, "\u0120filed": 5717, "aine": 5718, "\u0120van": 5719, "\u0120mir": 5720, "\u0120column": 5721, "icted": 5722, "eper": 5723, "\u0120rot": 5724, "\u0120consult": 5725, "\u0120entry": 5726, "\u0120marijuana": 5727, "\u0120Dou": 5728, "\u0120apparently": 5729, "oking": 5730, "clusive": 5731, "\u0120increases": 5732, "ano": 5733, "\u0120specifically": 5734, "\u0120tele": 5735, "ensions": 5736, "\u0120religion": 5737, "abilities": 5738, "\u0120frame": 5739, "\u0120Note": 5740, "\u0120Lee": 5741, "\u0120helping": 5742, "\u0120edge": 5743, "oston": 5744, "\u0120organizations": 5745, "\u00c3\u0125": 5746, "\u0120Both": 5747, "hips": 5748, "\u0120bigger": 5749, "\u0120boost": 5750, "\u0120Stand": 5751, "\u0120row": 5752, "uls": 5753, "abase": 5754, "\u0120rid": 5755, "Let": 5756, "aren": 5757, "rave": 5758, "\u0120stret": 5759, "PD": 5760, "\u0120vision": 5761, "\u0120wearing": 5762, "\u0120appreci": 5763, "\u0120award": 5764, "\u0120Use": 5765, "\u0120factor": 5766, "war": 5767, "ulations": 5768, ")(": 5769, "\u0120god": 5770, "\u0120territ": 5771, "\u0120param": 5772, "asts": 5773, "87": 5774, "\u0120enemies": 5775, "\u0120Games": 5776, "FF": 5777, "\u0120accident": 5778, "Well": 5779, "\u0120Martin": 5780, "TER": 5781, "\u0120ath": 5782, "\u0120Hell": 5783, "\u0120forg": 5784, "\u0120veter": 5785, "\u0120Medic": 5786, "free": 5787, "\u0120stars": 5788, "\u0120expensive": 5789, "\u0120acad": 5790, "rawn": 5791, "\u0120Whe": 5792, "\u0120lock": 5793, "\u0120format": 5794, "\u0120soldiers": 5795, "sm": 5796, "\u0120agent": 5797, "\u0120responsibility": 5798, "ora": 5799, "\u0120Science": 5800, "\u0120rapid": 5801, "\u0120tough": 5802, "\u0120Jesus": 5803, "\u0120believes": 5804, "ML": 5805, "\u0120wear": 5806, "lete": 5807, "\u00c3\u0125\u00c3\u0124": 5808, "\u0120Dri": 5809, "\u0120commission": 5810, "\u0120Bob": 5811, "Oh": 5812, "aped": 5813, "\u0120warm": 5814, "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124": 5815, "\u01202003": 5816, "ortion": 5817, "\u0120hasn": 5818, "uster": 5819, "\u0120univers": 5820, "\u0120Ill": 5821, "\u0120king": 5822, "ologies": 5823, "94": 5824, "\u0120Tem": 5825, "\u0120Mos": 5826, "\u0120patient": 5827, "\u0120Mexico": 5828, "cean": 5829, "\u0120Death": 5830, "\u0120Sanders": 5831, "you": 5832, "\u0120Cast": 5833, "\u0120Company": 5834, "pty": 5835, "\u0120happening": 5836, "FP": 5837, "\u0120Battle": 5838, "\u0120bought": 5839, "Am": 5840, "Mod": 5841, "Us": 5842, "uters": 5843, "\u0120Cre": 5844, "\u0120Those": 5845, "\u012044": 5846, "iser": 5847, "\u0120soul": 5848, "\u0120Top": 5849, "\u0120Harry": 5850, "\u0120Aw": 5851, "\u0120seat": 5852, "ffee": 5853, "\u0120revolution": 5854, "\u0120(\"": 5855, "\u0120During": 5856, "ette": 5857, "\u0120ring": 5858, "\u0120offensive": 5859, "\u0120returns": 5860, "\u0120videos": 5861, "\u0120discl": 5862, "\u0120famous": 5863, "enced": 5864, "\u0120Sign": 5865, "\u0120River": 5866, "\u0120300": 5867, "PM": 5868, "\u0120Bus": 5869, "\u0120CH": 5870, "\u0120candidates": 5871, "arden": 5872, "\u0120percentage": 5873, "\u0120visual": 5874, "\u0120thank": 5875, "\u0120trouble": 5876, "nergy": 5877, "\u01202001": 5878, "\u0120prove": 5879, "ashion": 5880, "\u0120enh": 5881, "\u0120Long": 5882, "UM": 5883, "\u0120connected": 5884, "\u0120possibility": 5885, "Over": 5886, "\u0120expert": 5887, "\u0120library": 5888, "arts": 5889, "\u0120Director": 5890, "\u0120fellow": 5891, "92": 5892, "irty": 5893, "\u0120dry": 5894, "\u0120signs": 5895, "\u0120Love": 5896, "\u0120quiet": 5897, "foot": 5898, "\u0120pure": 5899, "\u0120Hun": 5900, "\u0120filled": 5901, "phas": 5902, "\u0120Elect": 5903, "endment": 5904, "\u0120Expl": 5905, "\u0120unable": 5906, "ns": 5907, "mo": 5908, "\u0120vast": 5909, "obe": 5910, "\u0120identify": 5911, "apping": 5912, "\u0120Carolina": 5913, "gress": 5914, "\u0120prote": 5915, "\u0120fish": 5916, "\u0120circumstances": 5917, "razy": 5918, "\u0120Phot": 5919, "\u0120bodies": 5920, "\u0120Mur": 5921, "\u0120developing": 5922, "\u0120AR": 5923, "\u0120experienced": 5924, "\u0120substant": 5925, "\u0120Board": 5926, "esome": 5927, "\u0120domestic": 5928, "\u0120combined": 5929, "\u0120Put": 5930, "\u0120chemical": 5931, "\u0120Child": 5932, "\u0120pool": 5933, "\u0120Cy": 5934, "\u0120egg": 5935, "cons": 5936, "sters": 5937, "\u0120hurt": 5938, "\u0120markets": 5939, "\u0120conservative": 5940, "\u0120supporters": 5941, "\u0120agencies": 5942, "idel": 5943, "Ob": 5944, "urb": 5945, "\u012043": 5946, "\u0120Defense": 5947, "ye": 5948, "\u0120Ap": 5949, "dule": 5950, "\u0120temperature": 5951, "\u0120conducted": 5952, "\u0120Chief": 5953, "\u0120pulled": 5954, "\u0120fol": 5955, "Last": 5956, "onto": 5957, "osis": 5958, "VER": 5959, "Des": 5960, "\u0120Pan": 5961, "First": 5962, "\u0120advance": 5963, "\u0120license": 5964, "rors": 5965, "\u0120Jon": 5966, "\u0120imagine": 5967, "\u0120hell": 5968, "\u0120fixed": 5969, "\u0120incor": 5970, "osite": 5971, "\u0120Log": 5972, "icken": 5973, "]:": 5974, "\u0120surprise": 5975, "hab": 5976, "\u0120craft": 5977, "olt": 5978, "\u0120Jul": 5979, "\u0120dial": 5980, "\u0120relevant": 5981, "\u0120entered": 5982, "\u0120leads": 5983, "\u0120AD": 5984, "\u0120Clean": 5985, "\u0120pictures": 5986, "essor": 5987, "\u0120alt": 5988, "\u0120paying": 5989, "Per": 5990, "\u0120Market": 5991, "\u0120updates": 5992, "amily": 5993, "\u0120Type": 5994, "\u0120Home": 5995, "\u012055": 5996, "sembly": 5997, "rome": 5998, "83": 5999, "\u0120greatest": 6000, "\u0120height": 6001, "\u0120heav": 6002, "aints": 6003, "\u0120listen": 6004, "aser": 6005, "\u0120SH": 6006, "\u0120capable": 6007, "acle": 6008, "\u0120perspect": 6009, "inating": 6010, "\u0120offering": 6011, "rypt": 6012, "\u0120Develop": 6013, "abin": 6014, "rc": 6015, "\u0120bright": 6016, "alty": 6017, "arrow": 6018, "\u0120suppl": 6019, "inding": 6020, "acked": 6021, "gypt": 6022, "\u0120Another": 6023, "pg": 6024, "\u0120Virginia": 6025, "\u0120Lu": 6026, "\u0120planned": 6027, "\u0120pit": 6028, "\u0120sweet": 6029, "Type": 6030, "\u0120Di": 6031, "\u0120typically": 6032, "\u0120Francisco": 6033, "\u0120prospect": 6034, "\u0120Dan": 6035, "\u0120teen": 6036, "rees": 6037, "\u0120sched": 6038, "\u0120hol": 6039, "\u0120scr": 6040, "\u0120lots": 6041, "life": 6042, "\u0120newsp": 6043, "\u0120forget": 6044, "\u0120None": 6045, "\u0120Middle": 6046, "\u0120Ryan": 6047, "edd": 6048, "\u0120severe": 6049, "\u0120suit": 6050, "ller": 6051, "93": 6052, "\u0120correspond": 6053, "\u0120explos": 6054, "uations": 6055, "\u0120flag": 6056, "game": 6057, "rid": 6058, "\u0120prin": 6059, "\u0120Data": 6060, "\u0120deploy": 6061, "\u0120Enter": 6062, "suit": 6063, "ghan": 6064, "\u0120Men": 6065, "\u0120thoughts": 6066, "\u0120matters": 6067, "\u0120adapt": 6068, "\u0120Ari": 6069, "\u0120fill": 6070, "\u0120forth": 6071, "\u0120sam": 6072, "\u012041": 6073, "\u0120payment": 6074, "\u0120Hor": 6075, "\u0120spring": 6076, "duc": 6077, "\u0120losing": 6078, "\u0120bringing": 6079, "FO": 6080, "ala": 6081, "\u0120distribution": 6082, "hered": 6083, "bour": 6084, "\u0120Israeli": 6085, "oma": 6086, "\u0120combination": 6087, "\u0120plenty": 6088, "VE": 6089, "Can": 6090, "\u0120Haw": 6091, "\u0120perman": 6092, "\u0120Special": 6093, "\u0120tow": 6094, "\u0120seeking": 6095, "\u0120examples": 6096, "\u0120classes": 6097, "cr": 6098, "\u0120beer": 6099, "\u0120moves": 6100, "\u0120IP": 6101, "\u0120Kn": 6102, "\u0120panel": 6103, "Even": 6104, "\u0120properly": 6105, "\u0120ris": 6106, "\u0120plug": 6107, "\u0120estimated": 6108, "Every": 6109, "\u0120defensive": 6110, "agraph": 6111, "\u0120pregn": 6112, "\u0120instit": 6113, "\u0120Vict": 6114, "\u0120volume": 6115, "\u0120positions": 6116, "\u0120links": 6117, "\u0120Program": 6118, "\u0120Week": 6119, "agues": 6120, "\u0120transform": 6121, "ker": 6122, "\u0120CEO": 6123, "\u0120cas": 6124, "\u0120opponent": 6125, "\u0120tweet": 6126, "\u0120Code": 6127, "\u0120shop": 6128, "\u0120fly": 6129, "\u0120talks": 6130, "\u0120bag": 6131, "Phone": 6132, "\u0120aid": 6133, "\u0120plants": 6134, "\u012065": 6135, "\u0120attorney": 6136, "arters": 6137, "quest": 6138, "\u0120Magic": 6139, "\u0120begins": 6140, "\u0120myster": 6141, "\u0120environmental": 6142, "\u0120storage": 6143, "NN": 6144, "\u0120marg": 6145, "\u0120ske": 6146, "\u0120metal": 6147, "elly": 6148, "\u0120ordered": 6149, "\u0120remained": 6150, "\u0120loved": 6151, "\u0120prompt": 6152, "\u0120updated": 6153, "\u0120experts": 6154, "\u0120walking": 6155, "\u0120ancient": 6156, "\u0120performed": 6157, "ATE": 6158, "\u0120neither": 6159, "iency": 6160, "\u0120manufacture": 6161, "\u0120Pak": 6162, "\u0120selected": 6163, "\u0120mine": 6164, "\u0120ultimately": 6165, "\u0120explan": 6166, "\u0120label": 6167, "\u0120Services": 6168, "ributed": 6169, "Trump": 6170, "\u0120syn": 6171, "\u0120Ult": 6172, "SC": 6173, "\u0120meat": 6174, "\u0120giant": 6175, "\u0120Wars": 6176, "\u0120ON": 6177, "\u0120adm": 6178, "\u0120interpret": 6179, "\u0120evening": 6180, "\u0120evil": 6181, "\u0120Boston": 6182, "\u0120Wild": 6183, "\u0120\u00c3": 6184, "\u0120Bitcoin": 6185, "\u0120Amazon": 6186, "Dr": 6187, "\u0120Information": 6188, "\u0120obviously": 6189, "\u0120advanced": 6190, "Photo": 6191, "olar": 6192, "\u0120weather": 6193, "\u0120symbol": 6194, "\u0120sole": 6195, "\u0120potentially": 6196, "oster": 6197, "\u0120originally": 6198, "mun": 6199, "300": 6200, "aze": 6201, "essions": 6202, "\u0120deck": 6203, "\u0120stood": 6204, "\u0120youth": 6205, "\u0120Bern": 6206, "Rep": 6207, "\u0120Test": 6208, "\u0120basically": 6209, "otic": 6210, "\u0120involve": 6211, "olit": 6212, "lyn": 6213, "See": 6214, "\u0120aircraft": 6215, "\u0120confirm": 6216, "EW": 6217, "\u0120messages": 6218, "\u0120Richard": 6219, "\u0120kit": 6220, "\u0120prohib": 6221, "\u0120vulner": 6222, "isters": 6223, "\u0120existence": 6224, "\u0120turning": 6225, "\u0120SP": 6226, "\u0120desire": 6227, "\u0120flat": 6228, "\u0120ment": 6229, "season": 6230, "anges": 6231, "\u0120neighborhood": 6232, "\u0120Lake": 6233, "ATION": 6234, "\u0120pointed": 6235, "bur": 6236, "\u0120innov": 6237, "ucks": 6238, "UL": 6239, "\u0120professor": 6240, "\u0120expressed": 6241, "AB": 6242, "icious": 6243, "\u01202002": 6244, "\u0120Dev": 6245, "\u0120session": 6246, "\u0120bare": 6247, "sen": 6248, "\u0120diss": 6249, "\u0120Cath": 6250, "\u0120Pass": 6251, "\u0120Point": 6252, "\u0120doctor": 6253, "orrow": 6254, "ailed": 6255, "\u0120Rub": 6256, "\u0120DC": 6257, "\u0120Charl": 6258, "person": 6259, "\u0120writer": 6260, "ighters": 6261, "ureau": 6262, "\u0120oblig": 6263, "\u0120recorded": 6264, "\u0120broke": 6265, "\u0120orders": 6266, "ilty": 6267, "\u0120motion": 6268, "inity": 6269, "law": 6270, "adium": 6271, "\u0120immigration": 6272, "\u0120contrast": 6273, "\u0120batt": 6274, "\u0120excellent": 6275, "\u0120technical": 6276, "ami": 6277, "\u0120tun": 6278, "\u0120cloud": 6279, "\u0120Year": 6280, "geon": 6281, "\u0120creation": 6282, "\u0120strange": 6283, "\u0120auth": 6284, "\u0120fort": 6285, "born": 6286, "\u0120extent": 6287, "\u0120Today": 6288, "\u0120Club": 6289, "\u0120rain": 6290, "\u0120sample": 6291, "\u0120accepted": 6292, "\u0120tact": 6293, "\u0120fired": 6294, "\u0120Son": 6295, "\u0120stands": 6296, "\u0120boot": 6297, "\u012047": 6298, "\u0120statements": 6299, "\u0120versions": 6300, "\u0120selling": 6301, "ounded": 6302, "\u01201990": 6303, "\u0120weren": 6304, "\u0120Watch": 6305, "\u0120experiment": 6306, "Post": 6307, "\u0120retail": 6308, "uled": 6309, "Inst": 6310, "unte": 6311, "\u00e3\u0125\u00bc": 6312, "\u0120depart": 6313, "\u0120bond": 6314, "ivery": 6315, "ompl": 6316, "\u0120reaction": 6317, "\u0120Syrian": 6318, "\u0120Pac": 6319, "apped": 6320, "aniel": 6321, "DP": 6322, "\u0120resolution": 6323, "\u0120react": 6324, "\u0120approved": 6325, "onom": 6326, "mond": 6327, "\u0120Offic": 6328, "---": 6329, "\u0120replace": 6330, "\u0120tack": 6331, "\u0120sport": 6332, "\u0120chain": 6333, "\u0120emergency": 6334, "rad": 6335, "\u0120Palestin": 6336, "\u012046": 6337, "\u0120automatically": 6338, "\u0120route": 6339, "\u0120pal": 6340, "\u0120banks": 6341, "\u0120Paris": 6342, "\u0120Media": 6343, "road": 6344, "icing": 6345, "ixt": 6346, "isted": 6347, "\u0120grew": 6348, "\u0120coord": 6349, "\u0120Where": 6350, "omin": 6351, "\u0120subs": 6352, "\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd": 6353, "\u0120\u00c2\u00b1": 6354, "\u0120corporate": 6355, "\u0120selection": 6356, "noon": 6357, "\u0120Report": 6358, "cs": 6359, "cluding": 6360, "orders": 6361, "anche": 6362, "\u0120Its": 6363, "\u0120slowly": 6364, "\u0120Egypt": 6365, "\u0120Acc": 6366, "\u0120colle": 6367, "iques": 6368, "EX": 6369, "\u0120attempts": 6370, "url": 6371, "\u0120Cross": 6372, "\u0120findings": 6373, "\u0120SC": 6374, "\u0120OR": 6375, "\u0120index": 6376, "ensity": 6377, "\u0120Way": 6378, "\u0120Land": 6379, "\u0120shock": 6380, "dis": 6381, "\u0120dynam": 6382, "\u0120cart": 6383, "mosp": 6384, "Since": 6385, "iest": 6386, "\u0120Boy": 6387, "\u0120storm": 6388, "\u0120Contin": 6389, "2013": 6390, "hew": 6391, "ilit": 6392, "\u0120essential": 6393, "iquid": 6394, "Other": 6395, "ivered": 6396, "\u0120reasonable": 6397, "Act": 6398, "\u0120subsequ": 6399, "\u0120Pack": 6400, "\u0120Fort": 6401, "\u0120considering": 6402, "\u0120university": 6403, "log": 6404, "\u0120married": 6405, "\u0120illust": 6406, "\u0120True": 6407, "\u00a3\u0131": 6408, "\u0120numerous": 6409, "rastructure": 6410, "\u0120seriously": 6411, "\u0120referred": 6412, "ua": 6413, "\u0120consistent": 6414, "onna": 6415, "\u0120Real": 6416, "ruption": 6417, "ciples": 6418, "\u0120facts": 6419, "91": 6420, "otes": 6421, "erg": 6422, "Then": 6423, "\u0120accompl": 6424, "Note": 6425, "\u0120revenue": 6426, "\u0120passing": 6427, "\u0120mal": 6428, "een": 6429, "\u0120Yet": 6430, "\u0120gather": 6431, "terday": 6432, "ework": 6433, "\u0120Author": 6434, "Pe": 6435, "\u0120optim": 6436, "\u0120rub": 6437, "\u0120\u00e8\u00a3\u0131": 6438, "\u0120unknown": 6439, "stone": 6440, "\u0120union": 6441, "olve": 6442, "\u0120opportunities": 6443, "\u0120browser": 6444, "\u0120Wal": 6445, "\u0120Cost": 6446, "\u0120reporting": 6447, "sts": 6448, "pet": 6449, "\u0120sand": 6450, "\u0120suddenly": 6451, "\u0120surprising": 6452, "\u0120VR": 6453, "\u0120somewhat": 6454, "\u0120Bas": 6455, "ulture": 6456, "izz": 6457, "\u0120CD": 6458, "\u0120challenges": 6459, "\u0120settings": 6460, "\u0120experiences": 6461, "\u0120Full": 6462, "\u0120cann": 6463, "\u0120receiving": 6464, "EST": 6465, "\u0120joint": 6466, "\u0120cultural": 6467, "\u0120ast": 6468, "82": 6469, "astern": 6470, "ceived": 6471, "\u0120Cru": 6472, "\u0120bull": 6473, "pired": 6474, "amm": 6475, "\u0120facing": 6476, "power": 6477, "\u0120boss": 6478, "\u0120Hol": 6479, "\u0120instr": 6480, "\u0120increasingly": 6481, "\u0120shift": 6482, "\u0120streets": 6483, "\u0120Williams": 6484, "abb": 6485, "\u0120lie": 6486, "\u0120laugh": 6487, "\u0120Ca": 6488, "PL": 6489, "\u0120adults": 6490, "\u0120customer": 6491, "\u0120obtained": 6492, "\u0120supporting": 6493, "html": 6494, "fire": 6495, "\u0120detailed": 6496, "\u0120picked": 6497, "\u0120Right": 6498, "lder": 6499, "EE": 6500, "stood": 6501, "\u0120Kim": 6502, "\u0120wire": 6503, "\u0120sight": 6504, "\u0120developers": 6505, "\u0120persons": 6506, "\u0120sad": 6507, "\u0120cup": 6508, "\u0120warning": 6509, "\u0120boys": 6510, "long": 6511, "\u0120bird": 6512, "fo": 6513, "\u0120wal": 6514, "\u0120observed": 6515, "\u0120zone": 6516, "iveness": 6517, "\u0120channel": 6518, "cript": 6519, "\u0120refused": 6520, "\u0120Again": 6521, "\u0120suc": 6522, "\u0120spokesman": 6523, "\u0120Ref": 6524, "rite": 6525, "ouston": 6526, "\u00e3\u0125\u00b3": 6527, "\u0120Sher": 6528, "\u0120acts": 6529, "\u0120Name": 6530, "\u0120struggle": 6531, "arry": 6532, "ometimes": 6533, "\u0120discrim": 6534, "HT": 6535, "\u0120category": 6536, "\u0120realize": 6537, "\u0120employee": 6538, "\u0120Afghan": 6539, "enger": 6540, "\u0120guns": 6541, "\u0120Steve": 6542, "\u0120Mot": 6543, "\u0120Ol": 6544, "oked": 6545, "\u0120thick": 6546, "\u0120fairly": 6547, "illy": 6548, "\u0120surve": 6549, "\u0120Mat": 6550, "weight": 6551, "\u00e2\u0136": 6552, "\u0120troops": 6553, "\u0120agents": 6554, "\u0120battery": 6555, "\u0120motiv": 6556, "\u00c3\u00a1": 6557, "Sec": 6558, "den": 6559, "overy": 6560, "LS": 6561, "\u0120flu": 6562, "\u0120confident": 6563, "\u0120Oper": 6564, "\u0120empty": 6565, "\u0120phen": 6566, "\u0120sector": 6567, "\u0120excited": 6568, "\u0120remote": 6569, "aph": 6570, "oen": 6571, "\u0120destroyed": 6572, "\u0120moral": 6573, "\u0120HP": 6574, "\u0120Ron": 6575, "\u0120dress": 6576, "\u0120Bat": 6577, "\u0120lit": 6578, "\u0120MS": 6579, "\u0120af": 6580, "HL": 6581, "rum": 6582, "isms": 6583, "\u0120shouldn": 6584, "\u0120sympt": 6585, "\u0120Toronto": 6586, "hetic": 6587, "\u0120carbon": 6588, "\u0120installed": 6589, "\u0120violent": 6590, "\u0120solar": 6591, "ja": 6592, "\u0120practices": 6593, "\u0120ride": 6594, "\u0120Penn": 6595, "\u0120improved": 6596, "\u0120audio": 6597, "\u0120behavi": 6598, "\u0120PS": 6599, "\u0120eating": 6600, "Data": 6601, "\u0120Review": 6602, "pass": 6603, "claim": 6604, "uated": 6605, "angers": 6606, "chen": 6607, "\u0120properties": 6608, "\u0120anywhere": 6609, "Another": 6610, "\u0120blow": 6611, "\u0120Jackson": 6612, "\u0120proud": 6613, "\u0120plane": 6614, "lines": 6615, "\u0120square": 6616, "\u0120proof": 6617, "ansas": 6618, "\u0120talked": 6619, "makers": 6620, "\u0120sister": 6621, "\u0120holds": 6622, "\u0120resident": 6623, "\u0120==": 6624, "\u0120resistance": 6625, "\u0120split": 6626, "\u0120prosecut": 6627, "\u0120confidence": 6628, "resents": 6629, "\u0120cuts": 6630, "\u0120exception": 6631, "\u0120zero": 6632, "Getty": 6633, "\u0120copyright": 6634, "\u0120totally": 6635, "ormal": 6636, "ifications": 6637, "\u0120Australian": 6638, "\u0120sick": 6639, "\u0120150": 6640, "\u0120household": 6641, "\u0120fees": 6642, "\u0120drivers": 6643, "ogen": 6644, "\u0120NY": 6645, "\u0120necessarily": 6646, "\u0120regulations": 6647, "earing": 6648, "sl": 6649, "\u0120perspective": 6650, "care": 6651, "icial": 6652, "His": 6653, "\u0120escape": 6654, "\u0120surprised": 6655, "\u0120Van": 6656, "urrent": 6657, "\u0120vac": 6658, "81": 6659, "\u0120Thus": 6660, "\u0120emphas": 6661, "\u0120Champions": 6662, "\u0120Ice": 6663, "\u0120narr": 6664, "\u0120heads": 6665, "\u0120causing": 6666, "bel": 6667, "fortunately": 6668, "\u0120Ma": 6669, "\u0120targets": 6670, "cipl": 6671, "\u0120afternoon": 6672, "\u0120adds": 6673, "\u0120Maybe": 6674, "\u0120Four": 6675, "essed": 6676, "plete": 6677, "\u0120usual": 6678, "cho": 6679, "ingu": 6680, "\u0120withd": 6681, "\u0120Energy": 6682, "\u0120Econom": 6683, "OO": 6684, "\u0120articles": 6685, "\u0120injured": 6686, "\u0120manage": 6687, "\u0120explains": 6688, "\u0120diagn": 6689, "Rec": 6690, "atures": 6691, "\u0120linked": 6692, "\u0120discussed": 6693, "\u0120explo": 6694, "\u0120occasion": 6695, "athan": 6696, "\u0120opposite": 6697, "\u0120faces": 6698, "\u0120denied": 6699, "\u0120Knight": 6700, "\u0120nut": 6701, "\u0120approximately": 6702, "\u0120disappoint": 6703, "onymous": 6704, "\u0120Best": 6705, "\u0120Lo": 6706, "\u0120Hy": 6707, "\u0120Aff": 6708, "\u0120voting": 6709, "anwhile": 6710, "\u0120III": 6711, "\u0120institutions": 6712, "agram": 6713, "\u0120Daily": 6714, "\u0120drag": 6715, "\u0120nearby": 6716, "\u0120guilty": 6717, "\u0120conver": 6718, "Pre": 6719, "ship": 6720, "\u0120reward": 6721, "\u0120philosoph": 6722, "\u0120SS": 6723, "ugh": 6724, "\u0120apps": 6725, "friend": 6726, "\u0120upper": 6727, "\u0120advert": 6728, "\u0120snow": 6729, "\u0120frust": 6730, "\u0120ourselves": 6731, "Fr": 6732, "\u0120Die": 6733, "ampion": 6734, "\u0120dismiss": 6735, "\u0120cere": 6736, "\u0120signal": 6737, "from": 6738, "\u0120).": 6739, "\u012052": 6740, "\u0120crimes": 6741, "itors": 6742, "estival": 6743, "useum": 6744, "\u0120council": 6745, "\u0120Saud": 6746, "May": 6747, "\u0120Gun": 6748, "ician": 6749, "ether": 6750, "\u0120sufficient": 6751, "\u0120Hen": 6752, "sole": 6753, "\u0120historical": 6754, "\u0120Far": 6755, "\u0120Turn": 6756, "\u0120pin": 6757, "\u0120succeed": 6758, "mat": 6759, "lymp": 6760, "\u0120tradition": 6761, "\u0120Ok": 6762, "\u0120cro": 6763, "\u0120description": 6764, "alle": 6765, "\u0120sky": 6766, "Te": 6767, "\u0120widely": 6768, "\u0120wave": 6769, "\u0120definition": 6770, "\u0120Jews": 6771, "\u0120cycle": 6772, "\u0120refere": 6773, "\u0120brings": 6774, "usal": 6775, "\u0120alive": 6776, "\u0120frequently": 6777, "\u0120intention": 6778, "\u0120Control": 6779, "lv": 6780, "ystem": 6781, "\u0120privacy": 6782, "gent": 6783, "rence": 6784, "\u0120Quest": 6785, "\u0120Christmas": 6786, "\u0120rail": 6787, "\u0120cooper": 6788, "\u0120tested": 6789, "\u0120Capt": 6790, "asks": 6791, "\u0120comfortable": 6792, "\u0120delivered": 6793, "scape": 6794, "\u0120depth": 6795, "\u0120GOP": 6796, "\u0120writes": 6797, "\u0120assets": 6798, "\u0120sav": 6799, "iments": 6800, "\u0120transition": 6801, "\u0120artist": 6802, "\u0120Look": 6803, "\u0120lob": 6804, "\u0120components": 6805, "arity": 6806, "\u0120walked": 6807, "\u0120root": 6808, "\u0120participants": 6809, "\u0120noticed": 6810, "\u0120resc": 6811, "\u0120nav": 6812, "\u0120Administ": 6813, "da": 6814, "utral": 6815, "plate": 6816, "\u0120importance": 6817, "\u0120assert": 6818, "iously": 6819, "cription": 6820, "\u0120injuries": 6821, "\u0120Check": 6822, "\u0120registered": 6823, "\u0120intent": 6824, "\u0120missed": 6825, "ographic": 6826, "\u0120sentence": 6827, "ounter": 6828, "\u0120assistance": 6829, "evin": 6830, "\u0120database": 6831, "\u0120buildings": 6832, "\u0120classic": 6833, "\u0120thinks": 6834, "\u0120Ohio": 6835, "Pr": 6836, "ugg": 6837, "\u0120fee": 6838, "pan": 6839, "\u0120effectively": 6840, "\u0120facility": 6841, "\u0120bear": 6842, "\u0120chapter": 6843, "\u0120dogs": 6844, "\u0120Columb": 6845, "\u0120latter": 6846, "itial": 6847, "\u0120admitted": 6848, "TV": 6849, "\u0120Georg": 6850, "\u0120posts": 6851, "\\\\": 6852, "\u0120lawyer": 6853, "\u0120equival": 6854, "\u0120mand": 6855, "\u0120controlled": 6856, "\u0120Walk": 6857, "\u0120Andrew": 6858, "\u0120menu": 6859, "amental": 6860, "\u0120protected": 6861, "va": 6862, "\u0120administr": 6863, "oral": 6864, "\u0120rein": 6865, "\u0120Sar": 6866, "\u0120amounts": 6867, "\u0120native": 6868, "\u0120Moon": 6869, "\u0120represents": 6870, "\u0120abandon": 6871, "\u0120carrying": 6872, "\u0120tank": 6873, "mary": 6874, "\u0120declared": 6875, "Tube": 6876, "\u0120hat": 6877, "\u0120punish": 6878, "ellect": 6879, "mes": 6880, "\u0120universe": 6881, "\u0120Rod": 6882, "phy": 6883, "\u0120infrastructure": 6884, "\u012051": 6885, "\u0120opposed": 6886, "ownt": 6887, "ca": 6888, "\u0120Make": 6889, "\u0120hardware": 6890, "\u0120coffee": 6891, "Rel": 6892, "bal": 6893, "world": 6894, "\u0120Saf": 6895, "\u0120Sea": 6896, "inals": 6897, "\u0120owned": 6898, "\u0120hall": 6899, "ersion": 6900, "\u0120describe": 6901, "\u0120Pot": 6902, "\u0120portion": 6903, "\u0120atmosp": 6904, "\u0120governments": 6905, "\u0120depending": 6906, "\u0120offense": 6907, "\u0120trick": 6908, "awa": 6909, "\u0120Line": 6910, "\u0120Vis": 6911, "\u0120Hard": 6912, "\u0120Orig": 6913, "\u0120Click": 6914, "\u0120desk": 6915, "\u0120Valley": 6916, "\u0120Sov": 6917, "\u0120movies": 6918, "\u0120remark": 6919, "\u0120mail": 6920, "\u0120conscious": 6921, "\u0120ruling": 6922, "\u0120Rights": 6923, "\u0120medic": 6924, "hent": 6925, "\u0120Women": 6926, "><": 6927, "\u0120replaced": 6928, "\u0120Prem": 6929, "\u0120Thanks": 6930, "\u0120renew": 6931, "\u0120Ball": 6932, "iform": 6933, "\u0120shots": 6934, "Comm": 6935, "\u0120armed": 6936, "\u0120constant": 6937, "\u0120taste": 6938, "\u0120realized": 6939, "\u0120buff": 6940, "\u0120mo": 6941, "\u0120efficient": 6942, "Most": 6943, "oration": 6944, "ifies": 6945, "\u0120communication": 6946, "\u0120flood": 6947, "\u0120consequences": 6948, "\u0120anyway": 6949, "igg": 6950, "\u0120GM": 6951, "\u0120Thank": 6952, "\u0120iron": 6953, "\u0120evolution": 6954, "\u0120Cop": 6955, "twitter": 6956, "\u012095": 6957, "\u0120relationships": 6958, "adel": 6959, "\u0120Young": 6960, "\u0120proposal": 6961, "ayers": 6962, "uilding": 6963, "\u0120Hot": 6964, "ORE": 6965, "cos": 6966, "\u0120collabor": 6967, "PG": 6968, "axy": 6969, "\u0120knowing": 6970, "\u0120supports": 6971, "owed": 6972, "\u0120controls": 6973, "\u0120merely": 6974, "umer": 6975, "\u0120athlet": 6976, "\u0120fashion": 6977, "path": 6978, "\u0120gift": 6979, "\u0120era": 6980, "AND": 6981, "\u0120kinds": 6982, "\u0120Korean": 6983, "\u0120legit": 6984, "ulous": 6985, "\u0120essentially": 6986, "\u0120therap": 6987, "nic": 6988, "\u0120suffered": 6989, "\u0120hur": 6990, "\u0120promise": 6991, "\u0120excess": 6992, "\u0120overw": 6993, "\u0120prime": 6994, "\u0120Houston": 6995, "erry": 6996, "\u0120Ms": 6997, "RS": 6998, "2012": 6999, "\u0120stores": 7000, "\u0120Olymp": 7001, "\u0120journey": 7002, "Although": 7003, "Sub": 7004, "\u0120Educ": 7005, "\u0120Chapter": 7006, "\u0120requests": 7007, "\u0120consumers": 7008, "\u0120tiny": 7009, "\u0120isol": 7010, "\u0120Fair": 7011, "ba": 7012, "\u0120YOU": 7013, "\u0120crash": 7014, "celer": 7015, "\u0120emotional": 7016, "\u0120goods": 7017, "\u0120elected": 7018, "\u0120moder": 7019, "\u0120Linux": 7020, "\u0120blocks": 7021, "\u0120island": 7022, "\u0120Society": 7023, "\u0120elections": 7024, "\u0120broadcast": 7025, "\u0120cheap": 7026, "\u0120nations": 7027, "\u0120seasons": 7028, "400": 7029, "\u0120waste": 7030, "\u0120Sat": 7031, "\u0120fields": 7032, "employ": 7033, "\u0120profile": 7034, "\u0120authors": 7035, "ALL": 7036, "\u0120Gra": 7037, "west": 7038, "\u0120Ty": 7039, "\u0120deaths": 7040, "\u0120vacc": 7041, "\u0120formed": 7042, "\u0120du": 7043, "\u0120ongoing": 7044, "\u0120Muslims": 7045, "elf": 7046, "igure": 7047, "\u0120assume": 7048, "\u0120Ukraine": 7049, "water": 7050, "\u0120coast": 7051, "\u0120voted": 7052, "gor": 7053, "\u0120AS": 7054, "\u0120Michigan": 7055, "aza": 7056, "\u0120Arm": 7057, "iro": 7058, "\u0120flex": 7059, "asters": 7060, "''": 7061, "\u0120welcome": 7062, "arl": 7063, "\u0120locations": 7064, "igation": 7065, "\u0120Fil": 7066, "\u0120buying": 7067, "\u0120architect": 7068, "\u0120harder": 7069, "\u0120Cub": 7070, "\u0120interface": 7071, "\u0120restaurant": 7072, "\u0120discover": 7073, "\u0120exceed": 7074, "\u0120favour": 7075, "gery": 7076, "\u0120duty": 7077, "\u0120pitch": 7078, "ador": 7079, "\u0120Mach": 7080, "boy": 7081, "\u0120responded": 7082, "\u0120extended": 7083, "hers": 7084, "Many": 7085, "raid": 7086, "ifer": 7087, "\u0120Ins": 7088, "Ser": 7089, "\u0120medium": 7090, "she": 7091, "\u0120Sports": 7092, "\u0120magazine": 7093, "utation": 7094, "\u0120limits": 7095, "\u0120Gall": 7096, "\u0120external": 7097, "razil": 7098, "\u0120younger": 7099, "tle": 7100, "\u0120remind": 7101, "\u0120CON": 7102, "\u0120immediate": 7103, "\u0120hidden": 7104, "\u0120volunte": 7105, "\u0120simpl": 7106, "odcast": 7107, "\u0120phase": 7108, "dr": 7109, "\u0120plot": 7110, "\u0120exposure": 7111, "RI": 7112, "ograp": 7113, "vin": 7114, "anish": 7115, "\u0120Acad": 7116, "\u0120Engine": 7117, "\u0120expansion": 7118, "\u0120Pay": 7119, "Your": 7120, "\u0120pushed": 7121, "\u0120Ell": 7122, "\u0120Head": 7123, "\u0120marketing": 7124, "\u0120AC": 7125, "ket": 7126, "\u0120hits": 7127, "\u0120gro": 7128, "\u0120Age": 7129, "\u0120Scot": 7130, "][": 7131, "\u0120stim": 7132, "\u0120iPhone": 7133, "\u012a\u0134": 7134, "\u0120narrow": 7135, "\u0120Getty": 7136, "\u0120Turkey": 7137, "\u0120perfectly": 7138, "\u0120enable": 7139, "utch": 7140, "\u0120precise": 7141, "\u0120regime": 7142, "\u0120shif": 7143, "\u0120compens": 7144, "gun": 7145, "div": 7146, "\u0120chosen": 7147, "\u0120Ken": 7148, "Any": 7149, "\u0120trees": 7150, "\u0120recommended": 7151, "\u0120Ren": 7152, "uable": 7153, "\u0120HT": 7154, "Follow": 7155, "EG": 7156, "\u0120Hand": 7157, "\u0120Kenn": 7158, "\u0120arguments": 7159, "\u0120exists": 7160, "\u0120bike": 7161, "\u0120Conserv": 7162, "\u0120breaking": 7163, "\u0120Gar": 7164, "\u0120crazy": 7165, "\u0120virtual": 7166, "aylor": 7167, "ixel": 7168, "\u01201980": 7169, "\u0120permission": 7170, "\u0120Series": 7171, "\u0120consumer": 7172, "\u0120closely": 7173, "called": 7174, "\u012054": 7175, "\u0120hopes": 7176, "\u0120array": 7177, "\u0120Win": 7178, "\u0120Labour": 7179, "\u0120spons": 7180, "\u0120Ire": 7181, "\u0120pow": 7182, "\u0120readers": 7183, "\u0120employment": 7184, "\u0120creature": 7185, "\u0120resulting": 7186, "\u0120accurate": 7187, "\u0120moments": 7188, "\u0120argued": 7189, "\u0120ped": 7190, "During": 7191, "\u012053": 7192, "\u0120Tal": 7193, "\u0120sought": 7194, "\u0120suffering": 7195, "\u0120icon": 7196, "lee": 7197, "\u0120($": 7198, "alian": 7199, "\u00c2\u00b0": 7200, "\u0120pra": 7201, "\u0120bonus": 7202, "(\"": 7203, "ko": 7204, "\u0120acting": 7205, "DE": 7206, "fall": 7207, "\u0120comparison": 7208, "\u0120smooth": 7209, "\u0120NAS": 7210, "upp": 7211, "\u0120Joseph": 7212, "eping": 7213, "\u0120Take": 7214, "\u0120Mid": 7215, "\u0120sending": 7216, "fast": 7217, "\u0120Fall": 7218, "\u0120dealing": 7219, "user": 7220, "\u0120Organ": 7221, "Co": 7222, "\u0120attached": 7223, "\u0120sees": 7224, "%.": 7225, "\u0120typical": 7226, "ART": 7227, "\u0120finds": 7228, "\u0120Asia": 7229, "umin": 7230, "\u0120Core": 7231, "\u0120Ent": 7232, "inent": 7233, "uce": 7234, "\u0120Blood": 7235, "\u0120Never": 7236, "\u0120emails": 7237, "\u0120highlight": 7238, "\u0120confront": 7239, "atus": 7240, "uted": 7241, "\u0120unus": 7242, "\u0120topic": 7243, "\u0120Adam": 7244, "\u0120ble": 7245, "ati": 7246, "\u0120understood": 7247, "Set": 7248, "struct": 7249, "TP": 7250, "\u0120mob": 7251, "aa": 7252, "\u0120Start": 7253, "pected": 7254, "sell": 7255, "\u0120dedicated": 7256, "\u0120CA": 7257, "uan": 7258, "\u0120songs": 7259, "escription": 7260, "\u0120tech": 7261, "\u0120rape": 7262, "\u0120aside": 7263, "\u0120grant": 7264, "\u012056": 7265, "sub": 7266, "\u0120argue": 7267, "\u0120containing": 7268, "\u0120schedule": 7269, "\u0120liberal": 7270, "\u0120publicly": 7271, "\u0120heavily": 7272, "\u0120Ut": 7273, "iner": 7274, "\u0120Section": 7275, "\u0120Care": 7276, "weet": 7277, "ls": 7278, "Dis": 7279, "\u00e2\u0136\u0122": 7280, "\u0120Follow": 7281, "Back": 7282, "\u0120IT": 7283, "\u0120bes": 7284, "ji": 7285, "\u0120Hit": 7286, "ested": 7287, "\u0120everybody": 7288, "\u0120Swed": 7289, "\u0120femin": 7290, "\u0120facilities": 7291, "\u0120conven": 7292, "Comp": 7293, "\u0120OS": 7294, "core": 7295, "\u0120anx": 7296, "\u0120division": 7297, "\u0120Cam": 7298, "\u0120Stan": 7299, "mates": 7300, "\u0120explore": 7301, "plom": 7302, "\u0120shares": 7303, "pload": 7304, "anes": 7305, "\u0120ideal": 7306, "eters": 7307, "\u0120Base": 7308, "\u0120plastic": 7309, "\u0120distinct": 7310, "\u0120Network": 7311, "\u0120Seattle": 7312, "\u0120trading": 7313, "ensus": 7314, "intend": 7315, "\u0120exhib": 7316, "\u0120initially": 7317, "\u0120Food": 7318, "\u0120thousand": 7319, "\u0120Business": 7320, "acter": 7321, "\u0120paragraph": 7322, "\u0120roughly": 7323, "\u0120www": 7324, "\u0120creative": 7325, "\u0120Conf": 7326, "\u0120consumption": 7327, "\u0120films": 7328, "agan": 7329, "\u0120obtain": 7330, "\u0120tall": 7331, "\u0120tor": 7332, "\u0120acknowled": 7333, "\u0120grown": 7334, "alo": 7335, "KE": 7336, "\u0120400": 7337, "enders": 7338, "taining": 7339, "UG": 7340, "\u0120suicide": 7341, "\u0120watched": 7342, "\u0120List": 7343, "ali": 7344, "rehens": 7345, "\u0120surrounding": 7346, "\u0120pip": 7347, "\u0120flying": 7348, "\u0120Java": 7349, "ordan": 7350, "\u0120serving": 7351, "inations": 7352, "post": 7353, "\u0120sho": 7354, "Av": 7355, "\u0120jail": 7356, "zy": 7357, "\u01201999": 7358, "\u0120</": 7359, "\u0120literally": 7360, "\u0120Sir": 7361, "\u0120exposed": 7362, "\u0120lies": 7363, "star": 7364, "\u0120bat": 7365, "\u0120earned": 7366, "\u0120Dig": 7367, "\u0120specified": 7368, "\u0120Season": 7369, "\u0120degrees": 7370, "Donald": 7371, "\u0120centre": 7372, "\u0120sharing": 7373, "\u0120winter": 7374, "\u0120CO": 7375, "Che": 7376, "\u0120\u00ce": 7377, "MP": 7378, "\u0120unw": 7379, "\u0120fewer": 7380, "\u0120Mir": 7381, "\u0120somewhere": 7382, "\u0120Key": 7383, "\u0120attacked": 7384, "\u0120Kir": 7385, "\u0120domain": 7386, "\u0120stronger": 7387, "\u012099": 7388, "\u0120penalty": 7389, "Id": 7390, "Script": 7391, "\u0120declined": 7392, "\u0120neck": 7393, "\u0120fraud": 7394, "\u0120currency": 7395, "\u0120rising": 7396, "RC": 7397, "\u00e2\u0122\u00a6\u00e2\u0122\u00a6": 7398, "Hz": 7399, "\u0120tab": 7400, "\u0120talent": 7401, "nam": 7402, "\u0120NBA": 7403, "\u0120village": 7404, "\u0120legs": 7405, "\u0120Next": 7406, "Ed": 7407, "\u0120acid": 7408, "\u0120hyd": 7409, "800": 7410, "\u0120involving": 7411, "\u0120Image": 7412, "\u0120Before": 7413, "Fl": 7414, "\u0120yesterday": 7415, "Source": 7416, "\u0120terrorist": 7417, "\u0120sup": 7418, "\u0120synt": 7419, "\u0120Saudi": 7420, "\u0120west": 7421, "\u0120ru": 7422, "burg": 7423, "\u0120visible": 7424, "\u0120struck": 7425, "rison": 7426, "\u0120awesome": 7427, "\u0120drawn": 7428, "\u0120answers": 7429, "\u0120Girl": 7430, "\u0120Ram": 7431, "\u0120threats": 7432, "\u0120defeat": 7433, "osit": 7434, "\u0120vent": 7435, "aturally": 7436, "American": 7437, "enda": 7438, "\u0120Holy": 7439, "\u0120rum": 7440, "%,": 7441, "case": 7442, "\u0120History": 7443, "\u0120YouTube": 7444, "\u0120situations": 7445, "\u0120DNA": 7446, "Ste": 7447, "\u0120saved": 7448, "Item": 7449, "\u0120recip": 7450, "ologist": 7451, "\u0120faced": 7452, "\u0120elig": 7453, "Once": 7454, "\u0120Li": 7455, "uh": 7456, "\u0120mistake": 7457, "\u0120Division": 7458, "\u0120Bell": 7459, "\u0120symptoms": 7460, "\u00c2\u00ae": 7461, "\u0120domin": 7462, "\u0120falling": 7463, "\u0120ending": 7464, "ashes": 7465, "\u0120matches": 7466, "\u0120Online": 7467, "\u0120explanation": 7468, "Def": 7469, "redit": 7470, "\u0120anymore": 7471, "\u0120Total": 7472, "\u0120FOR": 7473, "ushed": 7474, "\u0120letters": 7475, "\u0120risks": 7476, "\u0120OK": 7477, "\u0120reportedly": 7478, ":\\": 7479, "\u0120plate": 7480, "\u0120subjects": 7481, "\u0120attempted": 7482, "ifier": 7483, "iana": 7484, "\u0120unlikely": 7485, "\u0120Though": 7486, "uma": 7487, "\u0120Invest": 7488, "\u0120Prin": 7489, "ican": 7490, "\u0120Dar": 7491, "\u0120Colorado": 7492, "aug": 7493, "\u0120veget": 7494, "aos": 7495, "ria": 7496, "\u0120shel": 7497, "\u0120marked": 7498, "\u0120()": 7499, "\u0120spr": 7500, "po": 7501, "\u0120Link": 7502, "\u0120defe": 7503, "\u0120Jr": 7504, "\u0120theme": 7505, "\u0120passion": 7506, "\u0120Pen": 7507, "\u0120info": 7508, "izer": 7509, "\u0120shit": 7510, "\u0120Civil": 7511, "apse": 7512, "cre": 7513, "\u0120poly": 7514, "\u0120component": 7515, "\u0120Charles": 7516, "\u0120Ireland": 7517, "\u0120Prov": 7518, "\u0120doctors": 7519, "\u0120granted": 7520, "\u0120paint": 7521, "\u0120honor": 7522, "\u0120smoke": 7523, "\u0120payments": 7524, "\u0120primarily": 7525, "\u0120Kingdom": 7526, "rich": 7527, "atell": 7528, "\u0120deals": 7529, "\u0120scheduled": 7530, "\u0120fundamental": 7531, "\u0120protein": 7532, "\u0120newspaper": 7533, "\u0120clients": 7534, "ython": 7535, "\u0120Date": 7536, "hus": 7537, "\u0120feedback": 7538, "\u0120stretch": 7539, "\u0120cock": 7540, "\u0120hotel": 7541, "\u0120Queen": 7542, "\u0120sugar": 7543, "\u0120ju": 7544, "\u0120milk": 7545, "\u0120approval": 7546, "\u0120Live": 7547, "\u0120equivalent": 7548, "efully": 7549, "\u0120insert": 7550, "zona": 7551, "\u0120extension": 7552, "dri": 7553, "John": 7554, "\u0120accomp": 7555, "Sm": 7556, "\u0120Fund": 7557, "\u0120constantly": 7558, "\u0120``": 7559, "\u0120generated": 7560, "\u0120Action": 7561, "\u0120Psych": 7562, "\u0120Tri": 7563, "\u0120recognize": 7564, "\u0120vary": 7565, "pha": 7566, "\u0120Ra": 7567, "df": 7568, "etch": 7569, "\u0120Soviet": 7570, "Two": 7571, "\u0120patterns": 7572, "\u0120profession": 7573, "aning": 7574, "Time": 7575, "\u0120Lim": 7576, "\u0120colors": 7577, "\u0120Az": 7578, "\u0120TR": 7579, "\u0120infect": 7580, "\u0120phenomen": 7581, "\u0120shell": 7582, "Also": 7583, "\u0120puts": 7584, "\u0120delivery": 7585, "\u0120brown": 7586, "\u0120processing": 7587, "\u0120lights": 7588, "essage": 7589, "\u0120Brook": 7590, "\u0120Aud": 7591, "lation": 7592, "\u0120industrial": 7593, "Like": 7594, "\u0120Brazil": 7595, "rous": 7596, "ESS": 7597, "\u0120Luc": 7598, "\u0120somehow": 7599, "\u012085": 7600, "\u0120proport": 7601, "\u0120politicians": 7602, "\u0120indicate": 7603, "\u0120hole": 7604, "\u0120techniques": 7605, "\u0120competitive": 7606, "\u0120phr": 7607, "\u0120vo": 7608, "istent": 7609, "\u0120Dream": 7610, "\u0120campus": 7611, "\u0120aspects": 7612, "\u0120helpful": 7613, "\u0120shield": 7614, "orse": 7615, "\u0120trigger": 7616, "mal": 7617, "\u012058": 7618, "\u0120tort": 7619, "\u0120personally": 7620, "\u0120tag": 7621, "\u0120keeps": 7622, "\u0120Video": 7623, "\u0120bench": 7624, "\u0120gap": 7625, "aire": 7626, "\u0120east": 7627, "\u0120recovery": 7628, "perial": 7629, "\u0120profit": 7630, "\u0120Mic": 7631, "\u012057": 7632, "\u0120colon": 7633, "\u0120strongly": 7634, "style": 7635, "\u0120allegations": 7636, "han": 7637, "\u0120reporters": 7638, "jo": 7639, "rine": 7640, "arget": 7641, "andal": 7642, "\u012003": 7643, "\u0120flash": 7644, "trans": 7645, "\u0120strict": 7646, "\u0120parking": 7647, "\u0120Pakistan": 7648, "\u0120li": 7649, "\u0120weird": 7650, "\u0120Eric": 7651, "\u0120regions": 7652, "\u0120Jun": 7653, "\u0120intellect": 7654, "\u0120WH": 7655, "oding": 7656, "ributes": 7657, "upid": 7658, "\u0120Tit": 7659, "\u0120finger": 7660, "oria": 7661, "\u0120elev": 7662, "\u0120Field": 7663, "\u0120conclusion": 7664, ";;": 7665, "\u0120feelings": 7666, "\u0120extensive": 7667, "\u0120mixed": 7668, "\u0120neuro": 7669, "vy": 7670, "\u0120harass": 7671, "\u0120Circ": 7672, "ouch": 7673, "\u0120territory": 7674, "\u0120successfully": 7675, "Mar": 7676, "\u0120ingred": 7677, "\u0120overwhel": 7678, "\u0120layer": 7679, "View": 7680, "\u0120allies": 7681, "illance": 7682, "\u0120Three": 7683, "\u0120bunch": 7684, "\u0120normally": 7685, "\u0120networks": 7686, "\u0120sacr": 7687, "\u0120CIA": 7688, "bles": 7689, "\u0120chose": 7690, "\u0120opponents": 7691, "\u0120regardless": 7692, "\u0120franch": 7693, "\u0120pref": 7694, "\u0120Po": 7695, "\u0120bridge": 7696, "anna": 7697, "\u0120Silver": 7698, "\u0120wage": 7699, "page": 7700, "rior": 7701, "\u0120radical": 7702, "\u0120Little": 7703, "\u0120manip": 7704, "\u0120secretary": 7705, "\u0120gang": 7706, "DR": 7707, "FA": 7708, "\u0120decent": 7709, "\u0120Spirit": 7710, "\u0120uncle": 7711, "\u0120Development": 7712, "\u0120investors": 7713, "\u0120walls": 7714, "\u0120publish": 7715, "\u0120generate": 7716, "issions": 7717, "car": 7718, "\u0120promote": 7719, "\u0120cutting": 7720, "\u0120chest": 7721, "\u0120drinking": 7722, "\u0120collected": 7723, "\u012072": 7724, "\u0120hoping": 7725, "\u0120embr": 7726, "gorith": 7727, "\u0120warned": 7728, "\u0120instructions": 7729, "OG": 7730, "\u0120Did": 7731, "\u0120Agency": 7732, "\u0120gear": 7733, "\u0120criticism": 7734, "\u0120Further": 7735, "\u0120util": 7736, "anny": 7737, "Red": 7738, "\u0120counsel": 7739, "\u0120Asian": 7740, "\u0120reduction": 7741, "pool": 7742, "\u0120teaching": 7743, "\u0120deeply": 7744, "iy": 7745, "\u0120estimates": 7746, "\u0120choices": 7747, "\u0120permanent": 7748, "inem": 7749, "kel": 7750, "\u0120fasc": 7751, "pse": 7752, "file": 7753, "\u0120Low": 7754, "\u0120Person": 7755, "\u0120tournament": 7756, "stal": 7757, "\u0120mel": 7758, "UST": 7759, "\u0120Ray": 7760, "azi": 7761, "Val": 7762, "\u0120contained": 7763, "\u0120Holly": 7764, "\u0120wake": 7765, "\u0120reveal": 7766, "\u0120processes": 7767, "\u0120ISIS": 7768, "\u012009": 7769, "\u0120blind": 7770, "\u0120steel": 7771, "\u0120Bad": 7772, "\u0120carefully": 7773, "appy": 7774, "roit": 7775, "\u0120gaming": 7776, "\u0120houses": 7777, "\u0120Coll": 7778, "\u0120truck": 7779, "erm": 7780, "\u0120scored": 7781, "\u0120occas": 7782, "return": 7783, "bound": 7784, "var": 7785, "\u0120sharp": 7786, "\u0120afraid": 7787, "\u0120EX": 7788, "amber": 7789, "cific": 7790, "\u0120scheme": 7791, "NC": 7792, "\u0120Polit": 7793, "\u0120decline": 7794, "\u01201998": 7795, "\u0120pushing": 7796, "\u0120possession": 7797, "\u0120privile": 7798, "\u0120teachers": 7799, "\u0120yield": 7800, "HA": 7801, "\u0120Davis": 7802, "itled": 7803, "########": 7804, "\u0120rig": 7805, "\u0120Daniel": 7806, "acon": 7807, "\u0120hide": 7808, "uten": 7809, "\u0120colleagues": 7810, "\u0120principles": 7811, "\u0120loud": 7812, "\u0120sin": 7813, "\u0120Demon": 7814, "\u0120stone": 7815, "\u012002": 7816, "\u0120taught": 7817, "\u0120terrible": 7818, "\u0120stuck": 7819, "\u0120Policy": 7820, "teen": 7821, "\u0120implementation": 7822, "\u0120BBC": 7823, "\u0120API": 7824, "\u0120wheel": 7825, "allas": 7826, "\u0120champions": 7827, "olars": 7828, "player": 7829, "\u0120repeatedly": 7830, "\u0120Still": 7831, "\u0120likes": 7832, "asty": 7833, "ester": 7834, "\u0120Catholic": 7835, "RL": 7836, "\u0120bath": 7837, "\u0120noise": 7838, "title": 7839, "\u0120northern": 7840, "Part": 7841, "\u0120magn": 7842, "\u0120fab": 7843, "\u0120Ash": 7844, "\u0120displ": 7845, "\u0120ticket": 7846, "\u0120murd": 7847, "\u0120alongside": 7848, "\u0120Music": 7849, "\u0120river": 7850, "\u0120Steel": 7851, "\u0120CL": 7852, "\u0120Player": 7853, "\u0120Mult": 7854, "owing": 7855, "rep": 7856, "size": 7857, "\u0120tur": 7858, "\u0120Georgia": 7859, "iscal": 7860, "raction": 7861, "\u0120cable": 7862, "\u012059": 7863, "\u0120wins": 7864, "\u0120upcoming": 7865, "\u0120survive": 7866, "\u0120inspired": 7867, "\u0120Education": 7868, "\u0120statistics": 7869, "\u0120Foot": 7870, "iami": 7871, "\u0120yellow": 7872, "\u0120Page": 7873, ".-": 7874, "\u0120Has": 7875, "\u0120urban": 7876, "\u0120ax": 7877, "essel": 7878, "\\\"": 7879, "\u0120quarterback": 7880, "\u0120register": 7881, "\u0120Labor": 7882, "\u0120abilities": 7883, "\u0120Family": 7884, "\u0120variable": 7885, "\u0120Price": 7886, "\u0120contem": 7887, "\u0120thin": 7888, "\u0120Equ": 7889, "data": 7890, "\u0120gotten": 7891, "\u0120constit": 7892, "\u0120asks": 7893, "\u0120tail": 7894, "\u0120exciting": 7895, "\u0120Effect": 7896, "\u0120Spanish": 7897, "\u0120encourage": 7898, "inson": 7899, "\u0120Ah": 7900, "\u0120commitment": 7901, "CS": 7902, "\u0120rally": 7903, "\u0120::": 7904, "\u0120subsid": 7905, "\u0120spin": 7906, "\u0120captured": 7907, "2018": 7908, "\u0120innoc": 7909, "\u0120allegedly": 7910, "\u0120Come": 7911, "\u0120artists": 7912, "\u0120Number": 7913, "\u0120electronic": 7914, "\u0120regional": 7915, "apes": 7916, "\u0120wra": 7917, "\u0120myth": 7918, "prise": 7919, "\u0120Miller": 7920, "\u0120Creat": 7921, "\u0120Episode": 7922, "bell": 7923, "\u0120directed": 7924, "\u0120extract": 7925, "\u0120sorry": 7926, "\u0120vice": 7927, "agger": 7928, "\u0120Support": 7929, "\u012066": 7930, "\u0120Iron": 7931, "\u0120wonderful": 7932, "\u0120gra": 7933, "Net": 7934, "ione": 7935, "Eng": 7936, "\u0120ships": 7937, "ikes": 7938, "\u0120Kevin": 7939, "itar": 7940, "\u0120activists": 7941, "true": 7942, "\u0120Arizona": 7943, "enth": 7944, "\u0120Despite": 7945, "\u0120SE": 7946, "\u0120habit": 7947, "ernel": 7948, "\u0120inqu": 7949, "\u0120abortion": 7950, "\u0120void": 7951, "\u0120explicit": 7952, "\u0120engaged": 7953, "\u0120angry": 7954, "\u0120rating": 7955, "\u0120frag": 7956, "bro": 7957, "icking": 7958, "dev": 7959, "\u0120worried": 7960, "\u0120obser": 7961, "\u0120apartment": 7962, "\u0120GT": 7963, "\u0120estate": 7964, "\u0120Constitution": 7965, "emon": 7966, "\u0120Snow": 7967, "\u0120county": 7968, "\u0120disag": 7969, "\u0120Stephen": 7970, "\u0120immigrants": 7971, "wind": 7972, "\u0120Nations": 7973, "\u0120folks": 7974, "Out": 7975, "\u0120gall": 7976, "\u0120targeted": 7977, "\u0120stead": 7978, "\u0120Bon": 7979, "\u0120Lib": 7980, "\u0120informed": 7981, "\u0120120": 7982, "chain": 7983, "idelines": 7984, "orough": 7985, "\u0120driven": 7986, "\u0120regularly": 7987, "\u0120basket": 7988, "\u0120principle": 7989, "ocument": 7990, "\u0120stun": 7991, "ibilities": 7992, "\u0120Roman": 7993, "\u0120About": 7994, "\u0120alert": 7995, "\u0120democracy": 7996, "\u0120represented": 7997, "HS": 7998, "cers": 7999, "parent": 8000, "Art": 8001, "pack": 8002, "\u0120diplom": 8003, "rets": 8004, "\u0120NO": 8005, "\u0120capture": 8006, "\u0120Adv": 8007, "\u0126\u00a2": 8008, "\u0120announcement": 8009, "\u0120Lear": 8010, "\u0120hook": 8011, "\u0120purs": 8012, "\u0120Such": 8013, "\u0120Camer": 8014, "\u0120refugees": 8015, "\u0120Ve": 8016, "Pol": 8017, "\u0120recognized": 8018, "lib": 8019, "\u0120hadn": 8020, "Ass": 8021, "\u0120pilot": 8022, "ushing": 8023, "\u0120returning": 8024, "\u0120trail": 8025, "\u0120Stone": 8026, "\u0120routine": 8027, "\u0120courts": 8028, "\u0120desper": 8029, "\u0120friendly": 8030, "\u0120Italy": 8031, "\u0120pled": 8032, "\u0120breath": 8033, "\u0120studio": 8034, "NS": 8035, "\u0120impressive": 8036, "\u0120Afghanistan": 8037, "\u0120fing": 8038, "\u0120downt": 8039, "inking": 8040, "\u0120Rog": 8041, "iary": 8042, "color": 8043, "sex": 8044, "aron": 8045, "\u0120fault": 8046, "\u0120Nick": 8047, "Down": 8048, "\u0120Rose": 8049, "\u0120Southern": 8050, "XX": 8051, "isodes": 8052, "List": 8053, "600": 8054, "\u0120outcome": 8055, "err": 8056, "\u0120elsewhere": 8057, "\u0120retire": 8058, "\u0120pounds": 8059, "\u0120Global": 8060, "People": 8061, "\u0120communications": 8062, "\u0120loan": 8063, "\u0120ratio": 8064, "\u0120Empire": 8065, "\u0120gonna": 8066, "\u0120invent": 8067, "DF": 8068, "\u01201970": 8069, "\u0120Common": 8070, "pat": 8071, "\u0120promised": 8072, "\u0120dinner": 8073, "\u0120Hom": 8074, "\u0120creates": 8075, "\u0120operate": 8076, "verty": 8077, "\u0120Jordan": 8078, "etime": 8079, "\u0120sustain": 8080, "Reg": 8081, "\u0120incredible": 8082, "ima": 8083, "\u0120warrant": 8084, "\u0120mm": 8085, "Att": 8086, "\u0120lawsuit": 8087, "\u0120reviews": 8088, "iture": 8089, "\u0120Source": 8090, "lights": 8091, "\u0120Ford": 8092, "\u012063": 8093, "group": 8094, "store": 8095, "\u0120featured": 8096, "\u0120forever": 8097, "\u0120poverty": 8098, "\u0120Pop": 8099, "\u0120CNN": 8100, "azz": 8101, "abis": 8102, "aching": 8103, "\u0120laid": 8104, "\u0120Supp": 8105, "\u0120filter": 8106, "ena": 8107, "\u0120Community": 8108, "\u0120creatures": 8109, "uction": 8110, "\u0120Royal": 8111, "\u0120association": 8112, "\u0120Connect": 8113, "\u0120Brad": 8114, "\u00e2\u0138\u012a": 8115, "lers": 8116, "there": 8117, "\u0120Gi": 8118, "\u0120valuable": 8119, "ACK": 8120, "\u0120Taylor": 8121, "\u0120liquid": 8122, "\u0120Attorney": 8123, "\u0120Carl": 8124, "\u0120Final": 8125, "aga": 8126, "\u0120Wilson": 8127, "Because": 8128, "\u0120Professor": 8129, "aka": 8130, "\u0120incredibly": 8131, "rance": 8132, "!)": 8133, "Ref": 8134, "sk": 8135, "\u0120solutions": 8136, "\u0120atmosphere": 8137, "\u0120blame": 8138, "umes": 8139, "\u0120Nob": 8140, "CA": 8141, "umps": 8142, "rical": 8143, "\u0120Putin": 8144, "\u0120Dest": 8145, "oric": 8146, "\u0120PA": 8147, "\u0120respectively": 8148, "wan": 8149, "\u0120fifth": 8150, "\u00e2\u0126\u00a2": 8151, "\u0120Cry": 8152, "\u0120governor": 8153, "resident": 8154, "\u0120purchased": 8155, "\u0120hack": 8156, "\u0120intense": 8157, "obs": 8158, "\u0120origin": 8159, "\u0120define": 8160, "\u0120careful": 8161, "***": 8162, "\u0120shoulder": 8163, "Click": 8164, "\u0120tied": 8165, "\u0120destruction": 8166, "oured": 8167, "\u0120nobody": 8168, "\u0120ho": 8169, "\u0120Exper": 8170, "\u0120tip": 8171, "\";": 8172, "\u0120technique": 8173, "\u0120jur": 8174, "\u0120Pok": 8175, "bow": 8176, "\u0120legend": 8177, "\u0120accord": 8178, "\u0120busy": 8179, "\u0120Intel": 8180, "\u0120hang": 8181, "aki": 8182, ".]": 8183, "\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136": 8184, "\u0120surgery": 8185, "\u0120reprodu": 8186, "\u0120uniform": 8187, "\u0120scenes": 8188, "code": 8189, "\u012062": 8190, "lisher": 8191, "\u0120Have": 8192, "phia": 8193, "\u0120crypt": 8194, "\u0120recon": 8195, "\u0120scream": 8196, "\u0120adopted": 8197, "\u0120scores": 8198, "Ne": 8199, "\u0120Italian": 8200, "including": 8201, "BO": 8202, "\u0120indicated": 8203, "\u0120entertain": 8204, "Gu": 8205, "Text": 8206, "iel": 8207, "\u0120twenty": 8208, "\u0120engage": 8209, "offs": 8210, "\u0120Pacific": 8211, "\u0120smile": 8212, "\u0120personnel": 8213, "\u0120toler": 8214, "\u0120doors": 8215, "\u0120tone": 8216, "\u0120machines": 8217, "\u0120entering": 8218, "tenance": 8219, "CO": 8220, "\u0120Jersey": 8221, "\u0120forest": 8222, "\u0120horse": 8223, "\u0120complaint": 8224, "\u0120Spring": 8225, "yo": 8226, "\u0120Plus": 8227, "eding": 8228, "\u0120Return": 8229, "quarters": 8230, "ials": 8231, "cow": 8232, "\u0120academic": 8233, "\u0120fruit": 8234, "\u01201996": 8235, "ogether": 8236, "\u0120wine": 8237, "\u0120pursu": 8238, "\u0120Steven": 8239, "\u0120licens": 8240, "Who": 8241, "\u0120clothes": 8242, "rection": 8243, "\u0120squad": 8244, "\u0120stable": 8245, "\u0120raw": 8246, "zens": 8247, "Star": 8248, "uties": 8249, "ancer": 8250, "\u0120keys": 8251, "\u0120Mu": 8252, "\u0120complicated": 8253, "iger": 8254, "\u0120Text": 8255, "\u0120absor": 8256, "\u012068": 8257, "\u0120funny": 8258, "\u0120relief": 8259, "\u0120Lew": 8260, "\u0120Cook": 8261, "\u0120chart": 8262, "\u0120drawing": 8263, "GE": 8264, "\u0120module": 8265, "\u0120Bull": 8266, "ILL": 8267, "\u0120salt": 8268, "00000000": 8269, "ille": 8270, "\u0120resource": 8271, "away": 8272, "adelphia": 8273, "\u0120Bru": 8274, "\u012067": 8275, "\u0120somebody": 8276, "\u0120participate": 8277, "\u0120rose": 8278, "wered": 8279, "\u0120muscle": 8280, "\u0120consent": 8281, "\u0120continuing": 8282, "\u0120Guardian": 8283, "\u0120Order": 8284, "regon": 8285, "\u0120rear": 8286, "\u0120provision": 8287, "\u0120liked": 8288, "rient": 8289, "\u0120bra": 8290, "Trans": 8291, "\u0120meetings": 8292, "\u0120tox": 8293, "\u0120convent": 8294, "\u0120auto": 8295, "\u0120recording": 8296, "\u0120Soft": 8297, "001": 8298, "\u0120Roll": 8299, "\u0120programming": 8300, "\u0120pic": 8301, "\u0120proved": 8302, "\u0120stab": 8303, "\u0120Ast": 8304, "\u0120caption": 8305, "ulating": 8306, "\u0120Attack": 8307, "\u0120newly": 8308, "\u01201997": 8309, "fr": 8310, "\u0120discipl": 8311, "\u0120Greek": 8312, "\u0120edition": 8313, "\u0120Does": 8314, "\u0120Box": 8315, "ifle": 8316, "acket": 8317, "\u0120passes": 8318, "\u0120guest": 8319, "\u0120acceler": 8320, "itals": 8321, "UD": 8322, "\u0120authent": 8323, "\u0120Rest": 8324, "oval": 8325, "ta": 8326, "uine": 8327, "\u0120armor": 8328, "\u0120Town": 8329, "\u0120compat": 8330, "\u0120inches": 8331, "Despite": 8332, "\u0120assign": 8333, "herent": 8334, "\u0120prepare": 8335, "\u0120Meg": 8336, "ockey": 8337, "\u0120depends": 8338, "\u0120tracks": 8339, "watch": 8340, "\u0120lists": 8341, "\u0120Northern": 8342, "\u0120alter": 8343, "rec": 8344, "\u0120Eastern": 8345, "\u0120condem": 8346, "\u0120everywhere": 8347, "?'": 8348, "\u0120affili": 8349, "\u0120fought": 8350, "\":{\"": 8351, "\u0120mac": 8352, "itarian": 8353, "\u0120scope": 8354, "\u0120AL": 8355, "aws": 8356, "arms": 8357, "\u0120que": 8358, "\u0120enjoyed": 8359, "nesota": 8360, "\u0120aggressive": 8361, "\u0120Story": 8362, "\u0120IV": 8363, "\u0120recipe": 8364, "\u0120rarely": 8365, "\u0120Medical": 8366, "value": 8367, "angel": 8368, "aying": 8369, "omething": 8370, "\u0120subsection": 8371, "\u0120southern": 8372, "\u0120frequency": 8373, "rete": 8374, "rolled": 8375, "ults": 8376, "\u0120Nic": 8377, "\u0120behalf": 8378, "\u0120sequence": 8379, "abet": 8380, "\u0120controversial": 8381, "\u0120comprom": 8382, "\u0120worker": 8383, "\u0120mainly": 8384, "\u0120algorith": 8385, "\u0120Major": 8386, "orce": 8387, "gender": 8388, "\u0120organized": 8389, "\u0120fake": 8390, "\u0120concluded": 8391, "\u0120ED": 8392, "\u0120Exec": 8393, "rage": 8394, "\u0120chances": 8395, "berry": 8396, "\u0120Trad": 8397, "\u0120configuration": 8398, "\u0120withdraw": 8399, "\u0120fro": 8400, "udes": 8401, "\u0120Brother": 8402, "\u0120Brian": 8403, "\u0120tries": 8404, "\u0120samples": 8405, "\u0120bid": 8406, "\u0120Golden": 8407, "\u0120photograph": 8408, "ifest": 8409, "\u0120DO": 8410, "\u0120Parliament": 8411, "****************": 8412, "Rem": 8413, "\u0120contest": 8414, "\u0120signing": 8415, "px": 8416, "\u0120Zeal": 8417, "\u00e2\u0136\u0122\u00e2\u0136\u0122": 8418, "Ear": 8419, "\u0120exit": 8420, "Before": 8421, "\u0120Corpor": 8422, "null": 8423, "month": 8424, "\u0120racial": 8425, "otted": 8426, "\u0120Veg": 8427, "\u0120Reuters": 8428, "\u0120sword": 8429, "pson": 8430, "\u0120Romney": 8431, "aed": 8432, "\u0120trib": 8433, "\u0120inner": 8434, "\u0120protocol": 8435, "\u0120Bi": 8436, "\u0120Miami": 8437, "everal": 8438, "press": 8439, "\u0120shipping": 8440, "\u0120Amendment": 8441, "\u0120Howard": 8442, "connect": 8443, "\u0120Disc": 8444, "\u0120Jac": 8445, "iamond": 8446, "\u0120Therefore": 8447, "ses": 8448, "\u0120Princess": 8449, "\u0120USB": 8450, "\u0120Anth": 8451, "\u0120surveillance": 8452, "\u0120apolog": 8453, "\u012061": 8454, "owa": 8455, "\u0120fulf": 8456, "js": 8457, "\u0120luck": 8458, "usted": 8459, "\u0120\u00c2\u00a7": 8460, "ni": 8461, "\u0120anticip": 8462, "eman": 8463, "\u0120winner": 8464, "\u0120silver": 8465, "lla": 8466, "icity": 8467, "\u0120unusual": 8468, "\u0120crack": 8469, "\u0120ties": 8470, "ez": 8471, "\u0120practical": 8472, "\u0120province": 8473, "\u0120Place": 8474, "\u0120priority": 8475, "ICE": 8476, "\u0120describes": 8477, "\u0120branch": 8478, "Form": 8479, "aska": 8480, "missions": 8481, "bi": 8482, "\u0120porn": 8483, "\u0120Turk": 8484, "\u0120enthus": 8485, "\u0120fighters": 8486, "\u012008": 8487, "\u0120Detroit": 8488, "\u0120foundation": 8489, "avid": 8490, "Are": 8491, "\u0120judgment": 8492, "cling": 8493, "\u0120solve": 8494, "\u0120Design": 8495, "Where": 8496, "hesis": 8497, "\u0120Tro": 8498, "after": 8499, "\u0120neutral": 8500, "\u0120Palestinian": 8501, "\u0120Hollywood": 8502, "\u0120advis": 8503, "\u0120Non": 8504, "yes": 8505, "olis": 8506, "\u0120reputation": 8507, "\u0120smell": 8508, "\u0120bread": 8509, "\u0120Bul": 8510, "\u0120Beach": 8511, "\u0120claiming": 8512, "\u0120genetic": 8513, "\u0120technologies": 8514, "\u0120upgrade": 8515, "rows": 8516, "\u0120developer": 8517, "\u0120Josh": 8518, "\u0120Disney": 8519, "erved": 8520, "ipal": 8521, "\u0120unex": 8522, "\u0120barely": 8523, "then": 8524, "\u0120Pub": 8525, "\u0120illness": 8526, "etary": 8527, "\u0120Bal": 8528, "\u0120patch": 8529, "\u0120butt": 8530, "\u0120stupid": 8531, "\u0120Dog": 8532, "\u0120Dallas": 8533, "front": 8534, "iece": 8535, "\u0120protests": 8536, "\u0120chat": 8537, "oenix": 8538, "\u0120wing": 8539, "\u0120parliament": 8540, "\u012077": 8541, "osexual": 8542, "\u0120render": 8543, "ptions": 8544, "\u0120Coast": 8545, "osa": 8546, "\u0120Greg": 8547, "hop": 8548, "\u0120Management": 8549, "\u0120bitcoin": 8550, "\u0120recover": 8551, "\u0120incorpor": 8552, "orne": 8553, "\u0120Using": 8554, "\u0120preced": 8555, "\u0120threatened": 8556, "\u0120spiritual": 8557, "\u0120Event": 8558, "\u0120Fred": 8559, "\u0120advertising": 8560, "\u0120improvements": 8561, "\u0120Custom": 8562, "\u0120errors": 8563, "\u0120sensitive": 8564, "\u0120Navy": 8565, "\u0120cream": 8566, "Look": 8567, "\u0120exclusive": 8568, "\u0120comprehens": 8569, "\u0120deleg": 8570, "\u0120conce": 8571, "\u0120remem": 8572, "\u0120structures": 8573, "\u0120stored": 8574, "ND": 8575, "\u01201000": 8576, "UP": 8577, "\u0120Budd": 8578, "AF": 8579, "woman": 8580, "\u0120Academy": 8581, "\u00f0\u0141": 8582, "sea": 8583, "\u0120temporary": 8584, "About": 8585, "esters": 8586, "\u0120tickets": 8587, "\u0120possess": 8588, "inch": 8589, "oz": 8590, "\u0120la": 8591, "\u0120contracts": 8592, "\u0120unp": 8593, "\u0120cig": 8594, "\u0120Kat": 8595, "ultural": 8596, "asm": 8597, "\u0120mountain": 8598, "\u0120Captain": 8599, "Step": 8600, "making": 8601, "\u0120Spain": 8602, "\u0120equally": 8603, "\u0120lands": 8604, "aters": 8605, "\u0120rejected": 8606, "era": 8607, "imm": 8608, "rix": 8609, "CD": 8610, "\u0120transaction": 8611, "gener": 8612, "lessly": 8613, "\u0120||": 8614, "\u0120cos": 8615, "\u0120Henry": 8616, "\u0120provisions": 8617, "\u0120gained": 8618, "\u0120directory": 8619, "\u0120raising": 8620, "\u0120Sep": 8621, "olen": 8622, "onder": 8623, "\u0120console": 8624, "inst": 8625, "\u0120bom": 8626, "\u0120uncertain": 8627, "150": 8628, "ocking": 8629, "\u0120measured": 8630, "\u0120plain": 8631, "\u0120seats": 8632, "\u0120dict": 8633, "SL": 8634, "afe": 8635, "\u0120estimate": 8636, "izon": 8637, "athered": 8638, "\u0120contributed": 8639, "\u0120episodes": 8640, "ommod": 8641, "Gr": 8642, "ANT": 8643, "\u012069": 8644, "Gener": 8645, "\u0120250": 8646, "viously": 8647, "rogen": 8648, "\u0120terrorism": 8649, "\u0120movements": 8650, "entle": 8651, "ounce": 8652, "\u0120Soul": 8653, "\u0120prev": 8654, "\u0120Table": 8655, "acts": 8656, "riors": 8657, "tab": 8658, "\u0120suffer": 8659, "\u0120nerv": 8660, "\u0120mainstream": 8661, "\u0120Wolf": 8662, "\u0120franchise": 8663, "bat": 8664, "\u0120demands": 8665, "\u0120agenda": 8666, "\u0120dozen": 8667, "\u0120clinical": 8668, "izard": 8669, "\u0120Op": 8670, "td": 8671, "\u0120visited": 8672, "\u0120Perhaps": 8673, "\u0120actor": 8674, "\u0120delic": 8675, "\u0120contribute": 8676, "\u0120inject": 8677, "\u0120Es": 8678, "acco": 8679, "\u0120listening": 8680, "\u0120congress": 8681, "ependent": 8682, "\u0120premium": 8683, "\u012076": 8684, "\u0120Irish": 8685, "\u0120assigned": 8686, "\u0120Phys": 8687, "\u0120worldwide": 8688, "\u0120narrative": 8689, "otype": 8690, "mont": 8691, "base": 8692, "\u0120Bowl": 8693, "\u0120Administration": 8694, "\u0120relation": 8695, "\u0120EV": 8696, "CP": 8697, "\u0120covers": 8698, "\u012078": 8699, "\u0120certific": 8700, "\u0120grass": 8701, "\u012004": 8702, "piracy": 8703, "ira": 8704, "\u0120engineering": 8705, "\u0120Mars": 8706, "\u0120unemploy": 8707, "\u0120Foreign": 8708, "stract": 8709, "\u0120ven": 8710, "\u0120steal": 8711, "\u0120replied": 8712, "\u0120ultimate": 8713, "\u0120titles": 8714, "dated": 8715, "\u0120joy": 8716, "aus": 8717, "\u0120hyper": 8718, "aku": 8719, "\u0120officially": 8720, "\u0120Product": 8721, "\u0120difficulty": 8722, "peror": 8723, "\u0120resulted": 8724, "ribed": 8725, "link": 8726, "who": 8727, "~~~~": 8728, "\u0120Speed": 8729, "\u0120Viet": 8730, "Wind": 8731, "\u0120Barack": 8732, "\u0120restrictions": 8733, "\u0120Share": 8734, "\u01201995": 8735, "itionally": 8736, "\u0120beauty": 8737, "opt": 8738, "\u0120maps": 8739, "\u0120CR": 8740, "\u0120Nation": 8741, "\u0120Cruz": 8742, "Will": 8743, "\u0120electricity": 8744, "\u0120org": 8745, "\u0120burd": 8746, "\u0120violation": 8747, "\u0120usage": 8748, "\u0120permit": 8749, "\u0120Chron": 8750, "\u0120Fant": 8751, "\u0120naturally": 8752, "\u012007": 8753, "\u0120thrown": 8754, "\u0120Awoken": 8755, "\u0120alien": 8756, "\u0120Hero": 8757, "\u0120Kent": 8758, "\u0120Rick": 8759, "rike": 8760, "\u0120pace": 8761, "},{\"": 8762, "GL": 8763, "\u0120poison": 8764, "\u0120Tower": 8765, "\u0120formal": 8766, "alysis": 8767, "\u0120genuine": 8768, "\u0120kil": 8769, "aver": 8770, "\u0120procedure": 8771, "\u0120Prop": 8772, "intendo": 8773, "\u0120Main": 8774, "asant": 8775, "\u0120trained": 8776, "Game": 8777, "\u0120Load": 8778, "\u0120MA": 8779, "\u0120crucial": 8780, "\u0120lets": 8781, "\u0120FR": 8782, "\u0120champion": 8783, "101": 8784, "\u0120Conference": 8785, "\u0120writers": 8786, "\u0120connections": 8787, "\u0120okay": 8788, "irms": 8789, "\u0120Rand": 8790, "\u0120encounter": 8791, "\u0120Buff": 8792, "\u0120achieved": 8793, "\u0120checks": 8794, "iscons": 8795, "\u0120assistant": 8796, "\u0120whenever": 8797, "\u0120Access": 8798, "\u0120Ur": 8799, "bin": 8800, "\u0120clock": 8801, "isp": 8802, "opher": 8803, "\u0120borrow": 8804, "\u0120mad": 8805, "\u0120personality": 8806, "only": 8807, "IST": 8808, "abama": 8809, "\u0120gains": 8810, "\u0120commonly": 8811, "\u0120terr": 8812, "\u0120hypot": 8813, "\u0120rely": 8814, "\u0120tiss": 8815, "isconsin": 8816, "\u0120ridic": 8817, "function": 8818, "\u0120Oregon": 8819, "\u0120uncom": 8820, "rating": 8821, "eland": 8822, "\u0120NC": 8823, "\u0120moon": 8824, "annon": 8825, "\u0120vulnerable": 8826, "utive": 8827, "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142": 8828, "\u0120Radio": 8829, "\u0120western": 8830, "sect": 8831, "\u0120Tony": 8832, "\u0120occurs": 8833, "\u0120Os": 8834, "\u0120Hon": 8835, "\u00c3\u0143": 8836, "\u0120vessel": 8837, "\u0120Scotland": 8838, "\u0120discrimination": 8839, "\u0120subsequent": 8840, "string": 8841, "\u0120fantasy": 8842, "\u0120Shadow": 8843, "\u0120testim": 8844, "WE": 8845, "iti": 8846, "ras": 8847, "\u0120boat": 8848, "\u0120marks": 8849, "\u0120ordinary": 8850, "\u0120ren": 8851, "\u0120representative": 8852, "\u0120petition": 8853, "\u012073": 8854, "\u0120adventure": 8855, "\u0120ignore": 8856, "\u0120Philadelphia": 8857, "\u0120Sav": 8858, "VP": 8859, "\u0120factory": 8860, "\u0120tasks": 8861, "\u0120depression": 8862, "zed": 8863, "................................": 8864, "\u0120Storm": 8865, "\u0120cogn": 8866, "\u0120eligible": 8867, "\u0120reducing": 8868, "via": 8869, "\u012005": 8870, "\u0120striking": 8871, "\u0120dollar": 8872, "ho": 8873, "OV": 8874, "\u0120instrument": 8875, "\u0120philosophy": 8876, "\u0120Moore": 8877, "\u0120Avenue": 8878, "\u0120ruled": 8879, "\u0120Front": 8880, "INE": 8881, "\u0120Mah": 8882, "\u0120scenario": 8883, "\u0120NASA": 8884, "\u0120enorm": 8885, "\u0120debut": 8886, "\u0120tea": 8887, "Today": 8888, "\u0120absence": 8889, "Sim": 8890, "\u0120ham": 8891, "leep": 8892, "\u0120tables": 8893, "\u0120Heart": 8894, "MI": 8895, "Ke": 8896, "requ": 8897, "VD": 8898, "map": 8899, "\u0120chairman": 8900, "\u0120pump": 8901, "\u0120rapidly": 8902, "vi": 8903, "\u0120substantial": 8904, "EP": 8905, "des": 8906, "chant": 8907, "ilipp": 8908, "\u0120Santa": 8909, "riers": 8910, "anchester": 8911, "Load": 8912, "\u0120Case": 8913, "\u0120saving": 8914, "\u012074": 8915, "\u0120AFP": 8916, "erning": 8917, "ounced": 8918, "\u0120Minnesota": 8919, "\u0120Was": 8920, "\u0120recru": 8921, "\u0120assessment": 8922, "\u0120Bron": 8923, "UE": 8924, "\u0120dynamic": 8925, "\u0120furn": 8926, "ulator": 8927, "\u0120propag": 8928, "high": 8929, "\u0120accommod": 8930, "\u0120stack": 8931, "\u0120Sus": 8932, "writ": 8933, "\u0120reven": 8934, "\u0120Godd": 8935, "\u0120Zealand": 8936, "abs": 8937, "\u0120brut": 8938, "\u0120perpet": 8939, "hot": 8940, "\u0120hardly": 8941, "\u0120Burn": 8942, "\u00e3\u0124\u00b9": 8943, "\u0120sty": 8944, "\u0120transactions": 8945, "\u0120gate": 8946, "\u0120screens": 8947, "\u0120submitted": 8948, "\u0120101": 8949, "\u0120languages": 8950, "ught": 8951, "emen": 8952, "\u0120falls": 8953, "\u0120coc": 8954, "\u0124\u00ac": 8955, "\u0120strikes": 8956, "pa": 8957, "\u0120deliber": 8958, "\u0120IM": 8959, "\u0120relax": 8960, "annels": 8961, "\u0120Senator": 8962, "\u0120extrem": 8963, "\u0120},": 8964, "\u0120Deb": 8965, "\u0120bell": 8966, "\u0120disorder": 8967, "cut": 8968, "\u0120iOS": 8969, "\u0120locked": 8970, "\u0120emissions": 8971, "\u0120shortly": 8972, "\"]": 8973, "\u0120Judge": 8974, "\u0120Sometimes": 8975, "\u0120rival": 8976, "\u0120dust": 8977, "\u0120reaching": 8978, "File": 8979, "\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af": 8980, "inois": 8981, "\u0120Jason": 8982, "\u0120satell": 8983, "aret": 8984, "\u0120stations": 8985, "\u0120agric": 8986, "\u0120Technology": 8987, "comes": 8988, "\u0120Unfortunately": 8989, "\u0120Children": 8990, "\u0120applies": 8991, "asted": 8992, "\u0120anger": 8993, "ailability": 8994, "\u0120Damage": 8995, "\u0120compare": 8996, "\u0120Standard": 8997, "\u0120aimed": 8998, "\u0120Ba": 8999, "anguage": 9000, "\u0120regulation": 9001, "\u0120jury": 9002, "\u0120airport": 9003, "\u0120sections": 9004, "\u0120Prince": 9005, "emed": 9006, "\u0120medicine": 9007, "\u0120hitting": 9008, "\u0120spark": 9009, "olves": 9010, "\u0120ads": 9011, "State": 9012, "\u0120foods": 9013, "\u0120replacement": 9014, "\u0120chicken": 9015, "\u0120lowest": 9016, "\u0120minds": 9017, "\u0120involves": 9018, "ui": 9019, "\u0120arrang": 9020, "\u0120procedures": 9021, "\u0120Which": 9022, "iversary": 9023, "\u0120bills": 9024, "\u0120improvement": 9025, "\u0120inev": 9026, "\u0120expectations": 9027, "\u0120intellectual": 9028, "\u0120spaces": 9029, "\u0120mechanism": 9030, "250": 9031, "break": 9032, "\u0120Ze": 9033, "\u0120Tenn": 9034, "\u0120Balt": 9035, "\u0120barrel": 9036, "\u0120static": 9037, "mann": 9038, "Police": 9039, "\u0120tips": 9040, "\u0120handling": 9041, "cus": 9042, "oded": 9043, "ilton": 9044, "iry": 9045, "\u0120journalists": 9046, "ourse": 9047, "\u0120comic": 9048, "\u0120nomine": 9049, "ITY": 9050, "\u0120versus": 9051, "\u0120loop": 9052, "\u0120surf": 9053, "\u0120Indust": 9054, "\u0120Hunter": 9055, "\u0120beliefs": 9056, "isan": 9057, "\u0120setup": 9058, "\u0120brew": 9059, "image": 9060, "\u0120computers": 9061, "fol": 9062, "},\"": 9063, "\u0120Medal": 9064, "\u0120taxp": 9065, "\u0120displayed": 9066, "\u0120grav": 9067, "\u0120fiscal": 9068, "Mon": 9069, "\u0120Moscow": 9070, "\u0120Kong": 9071, "\u0120Centre": 9072, "\u0120cameras": 9073, "\u0120Mrs": 9074, "\u0120Hay": 9075, "\u0120aver": 9076, "\u0120Kelly": 9077, "py": 9078, "\u0120requirement": 9079, "\u0120entitled": 9080, "ombie": 9081, "\u0120shadow": 9082, "agic": 9083, "\u0120Ak": 9084, "\u0120elite": 9085, "\u0120divided": 9086, "\u0120heading": 9087, "\u0120copies": 9088, "\u0120losses": 9089, "\u0120vit": 9090, "ked": 9091, "\u0120Bry": 9092, "\u0120ans": 9093, "\u0120Steam": 9094, "\u0120reporter": 9095, "heim": 9096, "\u0120Item": 9097, "\u0120superior": 9098, "don": 9099, "erent": 9100, "\u00c3\u00b6": 9101, "\u0120therapy": 9102, "\u0120peak": 9103, "\u0120Model": 9104, "\u0120lying": 9105, "\u0120gam": 9106, "zer": 9107, "ritten": 9108, "\u0120responses": 9109, "\u0120consideration": 9110, "\u0120Bible": 9111, "\u0120loyal": 9112, "\u0120instant": 9113, "\u0120pm": 9114, "\u0120Forest": 9115, "\u00c3\u00bc": 9116, "\u0120extend": 9117, "\u0120convicted": 9118, "\u0120founder": 9119, "\u0120convin": 9120, "\u0120Oak": 9121, "check": 9122, "\u0120scholars": 9123, "ped": 9124, "\u0120overse": 9125, "Top": 9126, "count": 9127, "\u0120Ark": 9128, "\u00c2\u00b7": 9129, "\u012006": 9130, "\u0120LA": 9131, "md": 9132, "\u0120Latin": 9133, "imental": 9134, "\u0120CPU": 9135, "\u0120substance": 9136, "\u0120minority": 9137, "\u0120manufacturing": 9138, "Er": 9139, "ocolate": 9140, "\u0120attended": 9141, "\u0120Manager": 9142, "rations": 9143, "\u0120appreciate": 9144, "omy": 9145, "GBT": 9146, "idency": 9147, "BL": 9148, "\u0120guarantee": 9149, "position": 9150, "\u0120ocean": 9151, "clude": 9152, "\u0120headed": 9153, "\u0120tape": 9154, "\u0120loose": 9155, "\u0120logic": 9156, "\u0120proven": 9157, "\u0120spir": 9158, "\u0120admit": 9159, "isa": 9160, "\u0120investigate": 9161, "\u01201994": 9162, "sylv": 9163, "\u0120Lost": 9164, "cest": 9165, "\u012071": 9166, "\u0120requested": 9167, "\u0120windows": 9168, "\u0120Pok\u00c3\u00a9": 9169, "\u0120Without": 9170, "Met": 9171, "\u0120behaviour": 9172, "\u0120reader": 9173, "\u0120hung": 9174, "\u0120Keep": 9175, "\u0120roles": 9176, "\u0120implemented": 9177, "\u0120blank": 9178, "\u0120serves": 9179, "\u0120Jay": 9180, "\u0120cited": 9181, "\u0120Friend": 9182, "profit": 9183, "apon": 9184, "\u0120repair": 9185, "item": 9186, "arrass": 9187, "\u0120critics": 9188, "adi": 9189, "\u0120Father": 9190, "\u0120shout": 9191, "\u0120fool": 9192, "\u012088": 9193, "\u0120producing": 9194, "\u0120lib": 9195, "\u0120rounds": 9196, "\u0120circle": 9197, "\u0120prepar": 9198, "\u0120submit": 9199, "\u0120nic": 9200, "morrow": 9201, "\u00e3\u0125\u00ab": 9202, "Under": 9203, "\u0120vital": 9204, "atern": 9205, "\u0120password": 9206, "\u0120publication": 9207, "\u0120prominent": 9208, "\u0120speaks": 9209, "\u0120bars": 9210, "\u0120deeper": 9211, "\u0120Mill": 9212, "ported": 9213, "\u0120wid": 9214, "\u0120butter": 9215, "\u0120smoking": 9216, "\u0120indicates": 9217, "Key": 9218, "ropri": 9219, "\u0120File": 9220, "alling": 9221, "asting": 9222, "\u0120Rus": 9223, "\u0120adj": 9224, "\u012079": 9225, "aval": 9226, "\u0120presum": 9227, "burgh": 9228, "onic": 9229, "\u0120fur": 9230, "\u0120polls": 9231, "ika": 9232, "\u0120secondary": 9233, "\u0120monster": 9234, "igs": 9235, "\u0120Current": 9236, "Event": 9237, "\u0120ownership": 9238, "endar": 9239, "\u0120arrive": 9240, "\u0120Tax": 9241, "\u0120null": 9242, "\u0120Priv": 9243, "\u0120thro": 9244, "\u0120kiss": 9245, "cat": 9246, "\u0120upset": 9247, "angle": 9248, "itches": 9249, "ector": 9250, "ologists": 9251, "\u0120Galaxy": 9252, "\u0120corruption": 9253, "\u0120hint": 9254, "enter": 9255, "\u0120Hospital": 9256, "\u0120greatly": 9257, "\u0120begun": 9258, "esy": 9259, "\u0120soil": 9260, "\u0120Anton": 9261, "\u0120maintenance": 9262, "\u00e3\u0125\u00a9": 9263, "\u0120dozens": 9264, "\u0120humanity": 9265, "\u0120Alabama": 9266, "\u0120rom": 9267, "worth": 9268, "aping": 9269, "sylvania": 9270, "lah": 9271, "\u0120gathered": 9272, "GA": 9273, "\u0120attacking": 9274, "found": 9275, "\u0120Square": 9276, "\u0120arbit": 9277, "ictions": 9278, "\u0120Wisconsin": 9279, "\u0120dance": 9280, "\u0120Saint": 9281, "archy": 9282, "\u0120baseball": 9283, "\u0120contributions": 9284, "\u0120literature": 9285, "\u0120exha": 9286, "perty": 9287, "test": 9288, "\u0120bab": 9289, "\u0120container": 9290, "letter": 9291, "\u0120fallen": 9292, "\u0120websites": 9293, "\u0120bottle": 9294, "\u0120Sac": 9295, "\u0120breast": 9296, "\u0120PL": 9297, "\u0120veteran": 9298, "\u0120interviews": 9299, "\u0120Ale": 9300, "\u0120banned": 9301, "engers": 9302, "\u0120Revolution": 9303, "inth": 9304, "\u0120concerning": 9305, "IVE": 9306, "\u0120expenses": 9307, "\u0120Matthew": 9308, "\u0120Columbia": 9309, "ds": 9310, "istance": 9311, "\u0120entity": 9312, "...\"": 9313, "\u0120reliable": 9314, "\u0120paralle": 9315, "\u0120Christians": 9316, "\u0120opinions": 9317, "\u0120indu": 9318, "low": 9319, "\u0120compete": 9320, "\u0120thorough": 9321, "\u0120employed": 9322, "\u0120establishment": 9323, "igen": 9324, "\u0120Cro": 9325, "\u0120lawyers": 9326, "\u0120Station": 9327, "TE": 9328, "\u0120Lind": 9329, "\u0120Pur": 9330, "itary": 9331, "\u0120efficiency": 9332, "\u00e2\u0122\u0132": 9333, "\u0120Ly": 9334, "\u0120mask": 9335, "\u0120disaster": 9336, "\u0120ages": 9337, "ERE": 9338, "esis": 9339, "\u0120Hold": 9340, "\u0120casual": 9341, "bled": 9342, "\u0120enabled": 9343, "\u0120Environment": 9344, "\u0120Intelligence": 9345, "iper": 9346, "\u0120Map": 9347, "\u0120BE": 9348, "\u0120emerged": 9349, "isdom": 9350, "\u0120cabin": 9351, "\u0120registration": 9352, "\u0120fingers": 9353, "\u0120roster": 9354, "\u0120framework": 9355, "\u0120Doctor": 9356, "etts": 9357, "\u0120transportation": 9358, "\u0120awareness": 9359, "Her": 9360, "\u0120attempting": 9361, "Off": 9362, "\u0120Store": 9363, "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124": 9364, "\u0120Know": 9365, "\u0120defence": 9366, "\u0120scan": 9367, "\u0120Ten": 9368, "\u0120Chair": 9369, "\u0120PH": 9370, "\u0120Atlanta": 9371, "\u0120fucking": 9372, "\u0120answered": 9373, "bn": 9374, "\u0120Kar": 9375, "\u0120categories": 9376, "\u0120rational": 9377, "\u0120cust": 9378, "\u0120robot": 9379, "\u0120correctly": 9380, "\u0120gif": 9381, "\u0120graphics": 9382, "mic": 9383, "\u0120grounds": 9384, "\u0120Opp": 9385, "iate": 9386, "\u0120distributed": 9387, "\u0120sanctions": 9388, "\u0120challenging": 9389, "uto": 9390, "\u0120ingredients": 9391, "\u0120invited": 9392, "\u0120founded": 9393, "\u0120Requ": 9394, "ded": 9395, "\u0120bowl": 9396, "\u0120brothers": 9397, "\u0120Ha": 9398, "IO": 9399, "\u0120wages": 9400, "imore": 9401, "ocial": 9402, "\u0120seed": 9403, "atively": 9404, "\u0120addresses": 9405, "\u0120Iowa": 9406, "abeth": 9407, "\u0120attitude": 9408, "isd": 9409, "child": 9410, "\u0120mole": 9411, "\u0120discovery": 9412, "yard": 9413, "Br": 9414, "\u012082": 9415, "\u0120supplies": 9416, "elling": 9417, "\u0120distingu": 9418, "CR": 9419, "\u0120recept": 9420, "\u0120vert": 9421, "\u0120swim": 9422, "bec": 9423, "door": 9424, "\u0120Yeah": 9425, "\u0120gal": 9426, "\u0120interact": 9427, "\u0120ESP": 9428, "\u0120CS": 9429, "amps": 9430, "\u0120convinced": 9431, "\u0120objective": 9432, "\u0120dish": 9433, "\u0120Photos": 9434, "lad": 9435, "\u0120downtown": 9436, "oil": 9437, "inction": 9438, "\u0120tomorrow": 9439, "\u0120COM": 9440, "\u0120survival": 9441, "shot": 9442, "\u0120settlement": 9443, "Cons": 9444, "\u0120Xbox": 9445, "interest": 9446, "\u0120SM": 9447, "argo": 9448, "eness": 9449, "\u0120ethnic": 9450, "bered": 9451, "Min": 9452, "\u0120Tok": 9453, "\u0120incent": 9454, "\u0120Command": 9455, "\u0120maintained": 9456, "\u0120breaks": 9457, "bridge": 9458, "atar": 9459, "agg": 9460, "\u0120Finally": 9461, "unicip": 9462, "\u0120Ont": 9463, "left": 9464, "\u0120recognition": 9465, "\u0120*/": 9466, "\u0120Pers": 9467, "\u0120welf": 9468, "\u0120addressed": 9469, "\u0120Kansas": 9470, "\u0120virus": 9471, "\u0120whereas": 9472, "\u0120papers": 9473, "rams": 9474, "\u0120Ministry": 9475, "\u0120pleasure": 9476, "\u0120acquired": 9477, "\u0120duration": 9478, "jpg": 9479, "\u0120calm": 9480, "\u0120NHL": 9481, "\u0120burning": 9482, "\u0120folder": 9483, "icked": 9484, "\u0120Py": 9485, "\u0120Illinois": 9486, "Class": 9487, "\u0120Goddess": 9488, "\u0120performing": 9489, "\u0120welfare": 9490, "jar": 9491, "Inter": 9492, "\u0120lin": 9493, "\u0120enhance": 9494, "\u0120notion": 9495, "fare": 9496, "ypes": 9497, "\u0120Area": 9498, "\u0120cannabis": 9499, "\u0120Diego": 9500, "fs": 9501, "\u0120Manchester": 9502, "comm": 9503, "inite": 9504, "\u0120covering": 9505, "\u0120Sound": 9506, "\u01201960": 9507, "\u012084": 9508, "elect": 9509, "zing": 9510, "\u0120citizen": 9511, "\u0120phones": 9512, "\u0120raid": 9513, "\u0120ignored": 9514, "\u0120Object": 9515, "\u0120upload": 9516, "card": 9517, "\u0120modified": 9518, "\u0120rooms": 9519, "iah": 9520, "range": 9521, "heast": 9522, "achus": 9523, "\u0120suggesting": 9524, "\u00e2\u0122\u012d": 9525, "grade": 9526, "El": 9527, "\u0120clothing": 9528, "\u0120rh": 9529, "\u0120Han": 9530, "unity": 9531, "encing": 9532, "\u0120Austin": 9533, "secution": 9534, "tra": 9535, "dem": 9536, "\u0120Qual": 9537, "\u0120heaven": 9538, "\u0120stages": 9539, "\u0120wedd": 9540, "plus": 9541, "ificial": 9542, "\u0120Imm": 9543, "\u0120Ho": 9544, "ieties": 9545, "\u0120phrase": 9546, "\u0120brill": 9547, "actory": 9548, "\u0120providers": 9549, "\u0120silence": 9550, "\u0120aer": 9551, "\u0120AI": 9552, "\u0120Adventure": 9553, "\u0120platforms": 9554, "\u0120demonstrated": 9555, "\u0120interf": 9556, "ington": 9557, "\u0120races": 9558, "\u0120grade": 9559, "ultane": 9560, "\u0120Through": 9561, "false": 9562, "\u0120bow": 9563, "\u0120AB": 9564, "\u0120flavor": 9565, "\u0120historic": 9566, "gov": 9567, "\u0120colour": 9568, "\u0120viewed": 9569, "\u0120Email": 9570, "elcome": 9571, "\u0120intervention": 9572, "\u0120diversity": 9573, "\u0120periods": 9574, "\u0120reverse": 9575, "\u0120Very": 9576, "\u0120quote": 9577, "\u0120Left": 9578, "through": 9579, "\u0120screw": 9580, "\u0120landing": 9581, "\u0120pill": 9582, "\u0120wet": 9583, "\u0120protesters": 9584, "\u0120repeat": 9585, "aved": 9586, "erk": 9587, "\u0120salary": 9588, "\u0120Pennsylvania": 9589, "Still": 9590, "\u0120mayor": 9591, "\u0120kitchen": 9592, "\u0120featuring": 9593, "\u0120Museum": 9594, "\u0120Tournament": 9595, "\u0120Fal": 9596, "\u0120servers": 9597, "UC": 9598, "\u0120anybody": 9599, "img": 9600, "\u0120Trade": 9601, "ixture": 9602, "theless": 9603, "\u0120finance": 9604, "\u0120closing": 9605, "\u0120Patri": 9606, "iac": 9607, "abel": 9608, "\u0120>>": 9609, "orous": 9610, "\u0120firms": 9611, "screen": 9612, "una": 9613, "\u0120embarrass": 9614, "ulse": 9615, "\u0120letting": 9616, "\u0120threw": 9617, "iley": 9618, "\u0120channels": 9619, "lan": 9620, "\u0120Vegas": 9621, "\u0120sear": 9622, "\u0120fantastic": 9623, "arre": 9624, "uzzle": 9625, "\u0120Der": 9626, "Those": 9627, "\u0120swing": 9628, "\u0120sheet": 9629, "index": 9630, "cover": 9631, "ogan": 9632, "\u0120variables": 9633, "\u0120Tech": 9634, "\u0120spoken": 9635, "achel": 9636, "\u0120Da": 9637, "\u0120Mountain": 9638, "\u0120loaded": 9639, "\u0120footage": 9640, "version": 9641, "\u0120unl": 9642, "\u0120Phoenix": 9643, "\u0120throwing": 9644, "\u0120firing": 9645, "\u0120tracking": 9646, "\u0120width": 9647, "\u0120struggling": 9648, "rooms": 9649, "otion": 9650, "\u0120monthly": 9651, "\u0120Server": 9652, "\u0120eggs": 9653, "open": 9654, "MC": 9655, "\u01201993": 9656, "\u0120hired": 9657, "\u0120stayed": 9658, "\u0120Allen": 9659, "\u0120stro": 9660, "\u012098": 9661, "step": 9662, "\u0120Turkish": 9663, "\u0120fabric": 9664, "isting": 9665, "\u0120Dom": 9666, "\u0120dates": 9667, "\u0120pron": 9668, "\u0120basketball": 9669, "\u0120lucky": 9670, "\u0120Arabia": 9671, "\u0120assumed": 9672, "esty": 9673, "\u0120affairs": 9674, "\u0120glad": 9675, "\u0120Indeed": 9676, "\u0120FA": 9677, "\u0120Word": 9678, "\u0120joining": 9679, "ifice": 9680, "pread": 9681, "irts": 9682, "\u0120Select": 9683, "\u0120populations": 9684, "aware": 9685, "\u0120nose": 9686, "\u0120complaints": 9687, "start": 9688, "\u0120scoring": 9689, "Thanks": 9690, "\u0120mining": 9691, "\u0120visitors": 9692, "SH": 9693, "\u0120damaged": 9694, "\u0120characteristics": 9695, "\u0120Pent": 9696, "DC": 9697, "\u012083": 9698, "\u0120Six": 9699, "rates": 9700, "\u0120flags": 9701, "\u0120Brew": 9702, "dog": 9703, "Mark": 9704, "////": 9705, "\u0120execution": 9706, "\u0120joke": 9707, "phones": 9708, "\u0120testimony": 9709, "\u0120obst": 9710, "QL": 9711, "\u0120Cut": 9712, "\u0120studied": 9713, "\u0120Nintendo": 9714, "icket": 9715, "\u0120NBC": 9716, "\u0120lad": 9717, "\u0120Bra": 9718, "\u0120Moh": 9719, "\u0120kernel": 9720, "\u0120overwhelming": 9721, "\u0120aged": 9722, "\u0120applicable": 9723, "\u0120Cond": 9724, "\u0120roads": 9725, "\u0120Block": 9726, "made": 9727, "odge": 9728, "\u0120commands": 9729, "\u0120offices": 9730, "veland": 9731, "\u0120tut": 9732, "\u0120receiver": 9733, "\u0120Fro": 9734, "\u0120shopping": 9735, "\u0120iP": 9736, "\u0120Stre": 9737, "\u0120ABC": 9738, "\u0120entertainment": 9739, "\u0120Bow": 9740, "orted": 9741, "Mc": 9742, "\u0120reads": 9743, "grad": 9744, "\u0120Collect": 9745, "\u0120\u00e2\u012a\u0134": 9746, "\u0120Capital": 9747, "ederation": 9748, "\u0120employer": 9749, "\u0120involvement": 9750, "\u0120anxiety": 9751, "alia": 9752, "\u0120roof": 9753, "\u0120Among": 9754, "\u0120Democrat": 9755, "\u0120stats": 9756, "\u0120Vill": 9757, "\u0120constitutional": 9758, "\u0120referring": 9759, "itty": 9760, "\u0120tackle": 9761, "outube": 9762, "\u0120backed": 9763, "\u0120Hong": 9764, "\u0120Broad": 9765, "\u0120ele": 9766, "\u0120Ott": 9767, "\u01201992": 9768, "hour": 9769, "achusetts": 9770, "Cal": 9771, "\u0120defeated": 9772, "\u012081": 9773, "esp": 9774, "\u0120seemingly": 9775, "was": 9776, "\u0120Jenn": 9777, "\u0120Kurd": 9778, "\u0120gene": 9779, "\u0120discount": 9780, "Ret": 9781, "ECT": 9782, "();": 9783, "\u0120clubs": 9784, "\u0120sid": 9785, "\u0120Marsh": 9786, "Check": 9787, "\u0120pp": 9788, "\u0120Eag": 9789, "idespread": 9790, "\u0120beings": 9791, "FT": 9792, "\u0120introduction": 9793, "\u0120Change": 9794, "ARD": 9795, "\u0120110": 9796, "adows": 9797, "ierce": 9798, "\u0120meal": 9799, "author": 9800, "\u0120Bang": 9801, "lahoma": 9802, "\u0120ranks": 9803, "2011": 9804, "????": 9805, "max": 9806, "\u0120collapse": 9807, "\u0120opens": 9808, "\u0120echo": 9809, "\u0120soph": 9810, "\u0120racist": 9811, "\u0120enormous": 9812, "\u0120waves": 9813, "\u0120tap": 9814, "\u0120comprehensive": 9815, ".--": 9816, "\u0120Roy": 9817, "\u0120farmers": 9818, "Related": 9819, "aired": 9820, "rones": 9821, "\u0120Crim": 9822, "\u0120proportion": 9823, "\u0120designs": 9824, "\u0120negotiations": 9825, "\u0120virtually": 9826, "\u0120Batman": 9827, "\u0120warn": 9828, "\u0120legitimate": 9829, "mate": 9830, "\u0120convention": 9831, ",,": 9832, "netic": 9833, "\u0120SD": 9834, "\u0120consistently": 9835, "\u0120compensation": 9836, "\u0120punishment": 9837, "\u0120ye": 9838, "\u0120tie": 9839, "\u0120Bureau": 9840, "irlf": 9841, "\u0120Bu": 9842, "\u0120Aren": 9843, "\u0120Philipp": 9844, "\u0120knife": 9845, "\u0120memories": 9846, "\u0120Ross": 9847, "\u0120angle": 9848, "\u012086": 9849, "\u0120Thunder": 9850, "\u0120rend": 9851, "\u0120Tour": 9852, "\u0120counts": 9853, "sung": 9854, "\u0120Imp": 9855, "\u0120educational": 9856, "\u0120accessible": 9857, "COM": 9858, "\u0120drew": 9859, "yer": 9860, "Gl": 9861, "amine": 9862, "ORT": 9863, "OB": 9864, "IB": 9865, "master": 9866, "\u0120trials": 9867, "ogy": 9868, "har": 9869, "\u0120Trust": 9870, "\u0120preferred": 9871, "irlfriend": 9872, "\u0120Nev": 9873, "\u0120bin": 9874, "\u0120cow": 9875, "Page": 9876, "\u0120signature": 9877, "\u0120BL": 9878, "700": 9879, "\u0120retired": 9880, "\u0120bytes": 9881, "\u0120neighb": 9882, "\u0120Legend": 9883, "\u0120devast": 9884, "\u0120suspected": 9885, "isons": 9886, "\u0120Pok\u00c3\u00a9mon": 9887, "scale": 9888, "\u0120capabilities": 9889, "\u0120revel": 9890, "\u0120cheese": 9891, "dy": 9892, "igrant": 9893, "\u0120failing": 9894, "bits": 9895, "\u0120Heroes": 9896, "\u0120Ghost": 9897, "\u0120Scient": 9898, "\u0120appointed": 9899, "uri": 9900, "\u0120institution": 9901, "\u0120expanded": 9902, "greg": 9903, "\u0120monitoring": 9904, "\u0120podcast": 9905, "\u0120coalition": 9906, "\u012096": 9907, "Jo": 9908, "\u0120stolen": 9909, "\u0120Sab": 9910, "\u0120stops": 9911, "\u0120holiday": 9912, "\u0120intr": 9913, "Car": 9914, "Black": 9915, "\u0120LGBT": 9916, "\u0120warming": 9917, "\u0120Anderson": 9918, "\u012089": 9919, "\u0120producer": 9920, "Med": 9921, "\u0120accuracy": 9922, "\u0120Marvel": 9923, "izabeth": 9924, "\u0120Patrick": 9925, "mony": 9926, "\u0120mini": 9927, "acles": 9928, "\u0120overt": 9929, "they": 9930, "\u0120membership": 9931, "\u0120Ven": 9932, "\u0120exch": 9933, "\u0120removal": 9934, "\u0120Dave": 9935, "TY": 9936, "mad": 9937, "\u0120Find": 9938, "\u0120adequ": 9939, "\u0120ec": 9940, "\u0120teeth": 9941, "\u0120emotion": 9942, "\u0120perm": 9943, "\u0120solely": 9944, "db": 9945, "\u0120extraord": 9946, "IGHT": 9947, "cal": 9948, "\u0120guidelines": 9949, "\u0120dying": 9950, "\u0120suspended": 9951, "\u0120Premier": 9952, "\u0120Anthony": 9953, "elve": 9954, "\u0120dad": 9955, "\u0120Eth": 9956, "\u0120Football": 9957, "\u0120abandoned": 9958, "\u0120<<": 9959, "\u0120march": 9960, "\u0120horror": 9961, "\u00e2\u0122\u00a6\"": 9962, "\u0120childhood": 9963, "\u0120campaigns": 9964, "\u0120lunch": 9965, "\u0120Albert": 9966, "block": 9967, "\u00e2\u0138\u012a\u00e2\u0138\u012a": 9968, "ounding": 9969, "\u0120bone": 9970, "organ": 9971, "aders": 9972, "\u0120Flash": 9973, "\u0120Drive": 9974, "\u0120tonight": 9975, "\u0120wars": 9976, "\u0120FL": 9977, "\u0120formation": 9978, "const": 9979, "News": 9980, "\u0120compe": 9981, "orious": 9982, "\u0120Staff": 9983, "\u0120discussions": 9984, "\u0120Protection": 9985, "\u0120Jam": 9986, "\u0120criteria": 9987, "\u0120installation": 9988, "\u0120accomplish": 9989, "izza": 9990, "\u0120publisher": 9991, "\u0120rescue": 9992, "\u0120Try": 9993, "ULL": 9994, "\u0120Som": 9995, "\u0120Hop": 9996, "oret": 9997, "ths": 9998, "ordon": 9999, "\u0120pocket": 10000, "\u0120Inv": 10001, "Download": 10002, "\u0120Crime": 10003, "\u0120bene": 10004, "\u0120Guide": 10005, "\u0120Assembly": 10006, "\u0120parameters": 10007, "IE": 10008, "\u0120Alexander": 10009, "\u0120concert": 10010, "\u0120Sche": 10011, "\u0120shoes": 10012, "\u0120visiting": 10013, "\u0120recall": 10014, "\u0120bub": 10015, "\u0120rural": 10016, "\u0120concrete": 10017, "\u0120Ros": 10018, "Next": 10019, "Russ": 10020, "\u0120loans": 10021, "\u0120Shield": 10022, "\u0120trem": 10023, "hemat": 10024, "kg": 10025, "\u0120Harris": 10026, "isition": 10027, "\u0120Move": 10028, "\u0120FC": 10029, "\u0120fate": 10030, "\u0120Cho": 10031, "\u0120tired": 10032, "\u0120principal": 10033, "hist": 10034, "iences": 10035, "athy": 10036, "\u0120sevent": 10037, "\u0120mood": 10038, "\u0120strategic": 10039, "\u0120diseases": 10040, "\u0120forum": 10041, "\u0120tempor": 10042, "\u0120headquarters": 10043, "Par": 10044, "ige": 10045, "flix": 10046, "\u0120guitar": 10047, "\u012094": 10048, "Only": 10049, "\u0120releases": 10050, "roph": 10051, "================================": 10052, "\u0120600": 10053, "\u0120Continue": 10054, "igate": 10055, "\u0120Crit": 10056, "system": 10057, "\u0120disabled": 10058, "\u0120unexpected": 10059, "ithub": 10060, "\u0120unclear": 10061, "\u0120Est": 10062, "\u0120contrad": 10063, "\u0120strategies": 10064, "ventures": 10065, "\u0120passage": 10066, "AME": 10067, "\u0120improving": 10068, "\u0120reveals": 10069, "\u0120decrease": 10070, "ova": 10071, "\u0120annoy": 10072, "\u0120Short": 10073, "\u0120Library": 10074, "\u0120cyber": 10075, "nell": 10076, "\u0120Hur": 10077, "\u0120CB": 10078, "\u0120photograp": 10079, "UI": 10080, "\u0120sed": 10081, "Ge": 10082, "\u012087": 10083, "\u0120diverse": 10084, "\u0120encouraged": 10085, "\u0120conspiracy": 10086, "\u0120birds": 10087, "\u0120operator": 10088, "\u0120handful": 10089, "\u0120classified": 10090, "?)": 10091, "\u0120dramatic": 10092, "\u0120investigators": 10093, "ito": 10094, "\u0120widespread": 10095, "\u0120Room": 10096, "----------------------------------------------------------------": 10097, "\u0120collective": 10098, "\u0120journalist": 10099, "String": 10100, "\u0120temperatures": 10101, "ila": 10102, "\u0120guid": 10103, "\u0120inspect": 10104, "\u0120missile": 10105, "\u0120Mayor": 10106, "\u0120manual": 10107, "\u0120simultane": 10108, "\u0120ratings": 10109, "\u0120suck": 10110, "\u012097": 10111, "\u0120universal": 10112, "\u0120pharm": 10113, "\u0120disrupt": 10114, "iano": 10115, "AV": 10116, "\u0120ft": 10117, "\u0120statist": 10118, "olds": 10119, "\u0120Walker": 10120, "php": 10121, "\u0120undert": 10122, "\u0120Las": 10123, "ishop": 10124, "ntil": 10125, "reshold": 10126, "\u0120Whether": 10127, "Ms": 10128, "\u0120deny": 10129, "\u0120Cloud": 10130, "\u0120provider": 10131, "\u0120surviv": 10132, "\u0120Update": 10133, "has": 10134, "\u0120mistakes": 10135, "charge": 10136, "pled": 10137, "rity": 10138, "\u0120node": 10139, "\u0120Massachusetts": 10140, "ools": 10141, "lication": 10142, "\u0120fails": 10143, "emale": 10144, "ori": 10145, "backs": 10146, "\u0120shirt": 10147, "\u0120''": 10148, "\u0120NAT": 10149, "\u0120waters": 10150, "elson": 10151, "\u0120ease": 10152, "\u0120scar": 10153, "\u0120contents": 10154, "mind": 10155, "\u0120contribution": 10156, "\u0120shr": 10157, "\u0120handed": 10158, "\u0120stability": 10159, "\u0120trave": 10160, "Em": 10161, "\u0120mirror": 10162, "123": 10163, "\u0120weigh": 10164, "\u0120fiction": 10165, "ouver": 10166, "istant": 10167, "rition": 10168, "\u0120Fed": 10169, "\u0120physically": 10170, "\u0120stake": 10171, "\u0120Article": 10172, "\u0120Arc": 10173, "\u0120Lewis": 10174, "\u0120Mind": 10175, "\u0120demonstrate": 10176, "\u0120profits": 10177, "vision": 10178, "omic": 10179, "olid": 10180, "\u0120battles": 10181, "\u0120drives": 10182, "\u0120eastern": 10183, "\u0120Sony": 10184, "!!!": 10185, "aration": 10186, "vard": 10187, "\u0120GL": 10188, "portation": 10189, "\u012092": 10190, "\u0120lawmakers": 10191, "\u0120protecting": 10192, "\u0120EPA": 10193, "\u0120yeah": 10194, "\u0120shame": 10195, "olph": 10196, "even": 10197, "xit": 10198, "\u0120attach": 10199, "\u0120representing": 10200, "\u0120obs": 10201, "\u0120Utah": 10202, "iffs": 10203, "\u0120Freedom": 10204, "\u00c3\u00b3": 10205, "AK": 10206, "\u0120incidents": 10207, "itage": 10208, "\u0120viewers": 10209, "cd": 10210, "\u0120mouse": 10211, "\u0120clar": 10212, "\u0120accordance": 10213, "\u0120bot": 10214, "cor": 10215, "\u0120Summer": 10216, "held": 10217, "\u0120innocent": 10218, "\u0120initiative": 10219, "ols": 10220, "________________________________": 10221, "\u0120spots": 10222, "pace": 10223, "\u0120conventional": 10224, "\u0120corporations": 10225, "\u0120blocked": 10226, "HD": 10227, "attered": 10228, "\u0120refers": 10229, "\u0120buck": 10230, "\u0120Digital": 10231, "120": 10232, "\u0120topics": 10233, "TF": 10234, "\u00c4\u0123": 10235, "brid": 10236, "reement": 10237, "\u0120underlying": 10238, "\u0120Member": 10239, "\u0120investigating": 10240, "\u0120pregnancy": 10241, "\u0120touchdown": 10242, "\u0120Band": 10243, "\u0120Caller": 10244, "\u0120instances": 10245, "PP": 10246, "wa": 10247, "Good": 10248, "\u01201991": 10249, "\u0120Cold": 10250, "\u0120fears": 10251, "\u0120remarks": 10252, "\u0128\u0134": 10253, "atal": 10254, "\u0120mit": 10255, "\u0120experiments": 10256, "ipt": 10257, "Color": 10258, "indu": 10259, "Update": 10260, "\u012093": 10261, "Ag": 10262, "\u0120\u00e5": 10263, "ancouver": 10264, "Both": 10265, "\u0120judges": 10266, "Object": 10267, "\u0120stere": 10268, "umbn": 10269, "\u0120participation": 10270, "\u0120Stars": 10271, "\u0120Jere": 10272, "\u0120weekly": 10273, "\u0120Ban": 10274, "\u0120conversations": 10275, "\u0120Pitt": 10276, "uz": 10277, "\u0120Indiana": 10278, "\u0120Kick": 10279, "\u0120infection": 10280, "\u0120heroes": 10281, "\u0120settled": 10282, "\u0120strip": 10283, "\u0120hal": 10284, "\u0120dump": 10285, "\u0120Sci": 10286, "\u0120les": 10287, "\u0120references": 10288, "\u0120URL": 10289, "\u0120Bridge": 10290, "\u0120wanting": 10291, "Force": 10292, "\u0120exclus": 10293, "Meanwhile": 10294, "mn": 10295, "\u0120gentle": 10296, "maker": 10297, "senal": 10298, "\u0120Gro": 10299, "ouri": 10300, "\u0120Rain": 10301, "\u0120Alliance": 10302, "\u0120lift": 10303, "ela": 10304, "SD": 10305, "\u0120Cleveland": 10306, "\u0120ranked": 10307, "\u0120stadium": 10308, "\u0120deadly": 10309, "\u00e4\u00b8": 10310, "\u0120riding": 10311, "aria": 10312, "\u0120Armor": 10313, "\u0120documentation": 10314, "\u0120Greece": 10315, "reek": 10316, "\u0120lens": 10317, "\u0120Sa": 10318, "\u0120gross": 10319, "\u0120Emer": 10320, "agers": 10321, "\u0120Dub": 10322, "\u0120Rh": 10323, "\u0120AMD": 10324, "\u0120arrival": 10325, "\u0120desert": 10326, "\u0120supplement": 10327, "\u0120Resp": 10328, "\u0120knee": 10329, "\u0120margin": 10330, "font": 10331, "ogg": 10332, "2010": 10333, "\u0120Pir": 10334, "\u0120Prom": 10335, "ivals": 10336, "\u0120intake": 10337, "\u0120differently": 10338, "ugs": 10339, "\u0120bits": 10340, "cluded": 10341, "\u0120searching": 10342, "\u0120Du": 10343, "umble": 10344, "\u0120functional": 10345, "\u0120Baltimore": 10346, "\u0120Could": 10347, "\u0120desired": 10348, "\u0120circuit": 10349, "\u0120Lyn": 10350, "\u0120GO": 10351, "\u0120False": 10352, "repre": 10353, "':": 10354, "alties": 10355, "\u0120minim": 10356, "\u0120drove": 10357, "\u0120Should": 10358, "\u0120hip": 10359, "\u0120pros": 10360, "\u0120utility": 10361, "\u0120Nature": 10362, "\u0120Mode": 10363, "President": 10364, "opp": 10365, "rat": 10366, "formance": 10367, "\u0120concentration": 10368, "\u0120font": 10369, "\u0120Bud": 10370, "\u0120amid": 10371, "\u0120revers": 10372, "\u0120ML": 10373, "Bar": 10374, "\u0120interaction": 10375, "\u0120jurisd": 10376, "\u0120spells": 10377, "dep": 10378, "fil": 10379, "\u0120civilians": 10380, "utter": 10381, "\u0120Cooper": 10382, "\u0120Below": 10383, "\u0120entrance": 10384, "\u0120convert": 10385, "\u0120controversy": 10386, "owered": 10387, "\u0120contrary": 10388, "\u0120arc": 10389, "\u0120Executive": 10390, "\u0120Officer": 10391, "\u0120packages": 10392, "\u0120progressive": 10393, "width": 10394, "\u0120reserved": 10395, "vol": 10396, "\u0120Samsung": 10397, "\u0120printed": 10398, "\u0120centers": 10399, "\u0120introduce": 10400, "\u0120Kennedy": 10401, "\u0120odds": 10402, "\u0120surely": 10403, "\u0120independence": 10404, "\u0120passengers": 10405, "reprene": 10406, "\u0120Beh": 10407, "\u0120loves": 10408, "\u0120ESPN": 10409, "\u0120facilit": 10410, "\u0120identical": 10411, "\u0120doct": 10412, "\u0120partnership": 10413, "conf": 10414, "\u0120Hide": 10415, "\u0120confused": 10416, "\u0120Cow": 10417, "Men": 10418, "\u0120wrest": 10419, "\u0120Iraqi": 10420, "\u0120holes": 10421, "\u0120Studies": 10422, "\u0120pregnant": 10423, "hard": 10424, "\u0120signals": 10425, "IX": 10426, "\u0120pulling": 10427, "\u0120graduate": 10428, "\u0120nominee": 10429, "Date": 10430, "\u0120permitted": 10431, "\u0120\u00e2\u0124\u00ac": 10432, "\u0120Oklahoma": 10433, "Start": 10434, "\u0120authorized": 10435, "\u0120alarm": 10436, "\u0120Cos": 10437, "van": 10438, "\u0120generations": 10439, "cular": 10440, "\u0120dragon": 10441, "\u0120Software": 10442, "\u0120Edward": 10443, "\u0120controller": 10444, "Sen": 10445, "gered": 10446, "\u0120Vik": 10447, "\u0120approached": 10448, "Thank": 10449, "\u0120cance": 10450, "\u0120formula": 10451, "\u0120Small": 10452, "\u0120weakness": 10453, "\u0120ramp": 10454, "itudes": 10455, "jud": 10456, "\u0120brilliant": 10457, "\u0120accus": 10458, "source": 10459, "\u0120800": 10460, "\u0120Evil": 10461, "Sw": 10462, "\u0120homeless": 10463, "week": 10464, "iens": 10465, "rics": 10466, "\u0120Third": 10467, "TO": 10468, "\u0120organic": 10469, "\u0120presentation": 10470, "agh": 10471, "\u0120Download": 10472, "vation": 10473, "\u0120assembly": 10474, "orable": 10475, "holders": 10476, "\u0120Bernie": 10477, "\u0120Help": 10478, "\u0120tong": 10479, "\u0120Fight": 10480, "\u0120beach": 10481, "Book": 10482, "\u0120Lic": 10483, "\u0120rush": 10484, "\u0120Round": 10485, "oup": 10486, "\u0120Marx": 10487, "\u0120calculated": 10488, "\u0120Devil": 10489, "\u0120Sarah": 10490, "\u0120occasionally": 10491, "\u0120bullet": 10492, "Available": 10493, "gate": 10494, "\u012091": 10495, "\u0120hosp": 10496, "\u0120promises": 10497, "\u0120HIV": 10498, "\u0120Stadium": 10499, "\u0120Stock": 10500, "\u0120Corporation": 10501, "gage": 10502, "NG": 10503, "\u0120Credit": 10504, "\u0120sne": 10505, "ibl": 10506, "\u0120accum": 10507, "such": 10508, "\u0120terrorists": 10509, "\u0120consciousness": 10510, "\u0120Zh": 10511, "\u0120drama": 10512, "oola": 10513, "piration": 10514, "\u0120labour": 10515, "\u0120Nin": 10516, "\u0120utter": 10517, "\u0120democratic": 10518, "\u0120assass": 10519, "ilation": 10520, "\u0120gest": 10521, "\u0120abroad": 10522, "\u0120metab": 10523, "\u0120sorts": 10524, "\u0120flav": 10525, "UB": 10526, "\u0120mg": 10527, "\u0120Nothing": 10528, "\u0120Od": 10529, "\u0120musical": 10530, "2009": 10531, "\u0120drops": 10532, "ocated": 10533, "ateral": 10534, "000000": 10535, "\u0120gre": 10536, "\u0120equality": 10537, "\u0120burden": 10538, "\u0120vig": 10539, "\u0120Leader": 10540, "------------": 10541, "\u0120ceremony": 10542, "\u0120fighter": 10543, "\u0120actors": 10544, "\u0120\u00e6": 10545, "aman": 10546, "Fi": 10547, "\u0120align": 10548, "puter": 10549, "\u0120elder": 10550, "\u0120NSA": 10551, "\u0120representation": 10552, "\u0120Ontario": 10553, "ITH": 10554, "usalem": 10555, "\u0120harassment": 10556, "itzer": 10557, "\u0120symp": 10558, "\u0120boxes": 10559, "\u0120DR": 10560, "\u0120manifest": 10561, "atre": 10562, "\u0120^": 10563, "\u0120dies": 10564, "leton": 10565, "\u0120missions": 10566, "ethe": 10567, "\u0120resolve": 10568, "\u0120followers": 10569, "\u0120asc": 10570, "\u0120km": 10571, "lord": 10572, "ammed": 10573, "\u0120silent": 10574, "\u0120Associated": 10575, "\u0120timing": 10576, "\u0120prisoners": 10577, "\u0120Kings": 10578, "\u0120Five": 10579, "\u0120tower": 10580, "\u0120approaches": 10581, "\u0120precisely": 10582, "\u0120bureau": 10583, "\u0120Mother": 10584, "\u0120Iss": 10585, "\u0120keyboard": 10586, "itual": 10587, "\u0120funded": 10588, "\u0120staying": 10589, "\u0120psychological": 10590, "\u0120mile": 10591, "\u0120Leon": 10592, "\u0120Barb": 10593, "will": 10594, "\u0120wider": 10595, "\u0120Atlantic": 10596, "\u0120till": 10597, "\u0120Rome": 10598, "rot": 10599, "\u0120accompan": 10600, "\u0120flour": 10601, "aco": 10602, "World": 10603, "\u0120Express": 10604, "\u0120Yu": 10605, "Cor": 10606, "\u0120pleased": 10607, "party": 10608, "\u0120pointing": 10609, "\u0120inflation": 10610, "\u0120roy": 10611, "\u0120),": 10612, "ainer": 10613, "\u0120wedding": 10614, "ormon": 10615, "\u0120requiring": 10616, "\u0120qualified": 10617, "\u0120segment": 10618, "END": 10619, "\u0120sizes": 10620, "eals": 10621, "\u0120corrupt": 10622, "assador": 10623, "\u0120celeb": 10624, "\u0120dreams": 10625, "\u0120Mess": 10626, "\u0120checking": 10627, "\u0120Version": 10628, "\u0120preparing": 10629, "\u0120actively": 10630, "\u0120Diff": 10631, "\u0120lux": 10632, "\u0120Winter": 10633, "acteria": 10634, "\u0120NE": 10635, "\u0120deputy": 10636, "\u0120transgender": 10637, "\u0120summary": 10638, "\u0120inher": 10639, "eries": 10640, "char": 10641, "\u0120Yan": 10642, "\u0120knock": 10643, "\u0120Path": 10644, "\u0120lip": 10645, "roller": 10646, "\u0120impression": 10647, "\u0120celebrate": 10648, "\u0120slide": 10649, "\u0120guests": 10650, "\u0120clip": 10651, "FS": 10652, "\u0120savings": 10653, "\u0120captain": 10654, "\u0120legacy": 10655, "\u0120Denver": 10656, "\u0120wounded": 10657, "taboola": 10658, "ACT": 10659, "\u0120pursue": 10660, "\u0120oxy": 10661, "\u0120q": 10662, "\u0120semi": 10663, "\u0120Need": 10664, "\u0120Affairs": 10665, "\u0120obsc": 10666, "\u0120checked": 10667, "\u0120dual": 10668, "Code": 10669, "\u0120MD": 10670, "lem": 10671, "ulty": 10672, "\u0120\u00c2\u00a9": 10673, "\u0120Elizabeth": 10674, "\u0120centuries": 10675, "arded": 10676, "src": 10677, "\u0120evident": 10678, "ennis": 10679, "atin": 10680, "\u0120unemployment": 10681, "\u0120Mario": 10682, "\u0120intim": 10683, "Christ": 10684, "\u0120biological": 10685, "\u0120soldier": 10686, "\u0120Added": 10687, "\u0120math": 10688, "\u0120Gil": 10689, "\u0120bias": 10690, "\u0120dating": 10691, "\u0120Ocean": 10692, "\u0120mice": 10693, "Mus": 10694, "hire": 10695, "\u0120Tes": 10696, "Server": 10697, "limited": 10698, "Size": 10699, "\u0120meters": 10700, "\u0120rocket": 10701, "essee": 10702, "\u0120certificate": 10703, "\u0120Iranian": 10704, "ASS": 10705, "\u0120grid": 10706, "Dec": 10707, "\u0120rolling": 10708, "commun": 10709, "\u0120Sweden": 10710, "bury": 10711, "\u0120tissue": 10712, "\u0120racism": 10713, "\u0120Local": 10714, "\u0120mystery": 10715, "\u0120examine": 10716, "\u0120stem": 10717, "\u0120sits": 10718, "\u0120hoped": 10719, "oting": 10720, "\u0120dialogue": 10721, "\u0120persu": 10722, "Watch": 10723, "lay": 10724, "MAN": 10725, "\u0120chronic": 10726, "\u0120Portland": 10727, "market": 10728, "\u0120SEC": 10729, "\u0120parallel": 10730, "\u0120scandal": 10731, "\u0120carries": 10732, "\u0120phenomenon": 10733, "human": 10734, "acker": 10735, "\u0120Ox": 10736, "\u0120retirement": 10737, "tainment": 10738, "ovie": 10739, "\u0120Gear": 10740, "\u0120duties": 10741, "\u0120dose": 10742, "\u0120scroll": 10743, "MB": 10744, "inf": 10745, "\u0120sauce": 10746, "\u0120landscape": 10747, "reddit": 10748, "\u0120Championship": 10749, "\u0120Reddit": 10750, "alid": 10751, "\u0120coin": 10752, "\u0120overs": 10753, "\u0120posting": 10754, "about": 10755, "\u0120fel": 10756, "andy": 10757, "\u0120bold": 10758, "\u0120focusing": 10759, "effect": 10760, "GR": 10761, "\u0120deemed": 10762, "\u0120recommendations": 10763, "\u0120stepped": 10764, "\u0120voter": 10765, "\u0120Deep": 10766, "\u0120Instagram": 10767, "\u0120moderate": 10768, "\u0120Maryland": 10769, "\u0120restricted": 10770, "\u0120MB": 10771, "\u0120Chall": 10772, "\u0120tob": 10773, "\u0120cir": 10774, "\u0120Occ": 10775, "\u0120Ever": 10776, "\u0120collaps": 10777, "INFO": 10778, "=-": 10779, "\u0120Pict": 10780, "\u0120Account": 10781, "nc": 10782, "\u0120ought": 10783, "\u0120export": 10784, "\u0120drunk": 10785, "('": 10786, "\u0120wise": 10787, "\u0120Mort": 10788, "necess": 10789, "\u0120ancest": 10790, "\u0120Incre": 10791, "\u0120frequent": 10792, "mir": 10793, "\u0120interpretation": 10794, "\u0120dependent": 10795, "\u0120coins": 10796, "\u0120Bol": 10797, "Video": 10798, "\u0120Justin": 10799, "\u0120fatal": 10800, "\u0120cooking": 10801, "\u0120confusion": 10802, "ipher": 10803, "\u0120custody": 10804, "\u0120Morgan": 10805, "omach": 10806, "\u0120Governor": 10807, "\u0120restaurants": 10808, "eling": 10809, "\u0120acknowledged": 10810, "\u0120ther": 10811, "\u0120genes": 10812, "ching": 10813, "Hey": 10814, "\u0120tactics": 10815, "\u0120Mexican": 10816, "\u0120vend": 10817, "\u0120hes": 10818, "quer": 10819, "\u0120noting": 10820, "\u0120Cameron": 10821, "\u0120targeting": 10822, "rock": 10823, "\u0120credits": 10824, "\u0120emotions": 10825, "\u0120representatives": 10826, "news": 10827, "\u0120legislative": 10828, "\u0120removing": 10829, "\u0120tweeted": 10830, "\u0120Carter": 10831, "\u0120Fixed": 10832, "\u0120forcing": 10833, "\u0120speaker": 10834, "\u0120males": 10835, "\u0120Vietnam": 10836, "lined": 10837, "\u0120concepts": 10838, "\u0120voices": 10839, "oir": 10840, "\u0120Trib": 10841, "Whe": 10842, "\u0120Jerusalem": 10843, "\u0120Sant": 10844, "\u0120cul": 10845, "\u0120lady": 10846, "\u0120Hawai": 10847, "\u0120arts": 10848, "\u0120Inn": 10849, "\u0120Machine": 10850, "\u0120Emperor": 10851, "\u0120slot": 10852, "gly": 10853, "\u0120Process": 10854, "III": 10855, "\u0120athletes": 10856, "\u0120Temple": 10857, "\u0120Represent": 10858, "\u0120presc": 10859, "\u0120tons": 10860, "\u0120golden": 10861, "\u0120punch": 10862, "\u0120GR": 10863, "iverpool": 10864, "\u0120enact": 10865, "\u0120lobby": 10866, "\u0120mos": 10867, "\u0120picking": 10868, "\u0120lifetime": 10869, "\u0120cognitive": 10870, "Each": 10871, "zo": 10872, "\u0120dub": 10873, "\u0120consists": 10874, "oln": 10875, "\u0120festival": 10876, "amous": 10877, "\u0120intellig": 10878, "words": 10879, "\u0120Smart": 10880, "\u0120dele": 10881, "\u0120lapt": 10882, "\u0120magical": 10883, "\u0120Sin": 10884, "bus": 10885, "urities": 10886, "ighth": 10887, "\u0120Ruby": 10888, "\u0120Sure": 10889, "olving": 10890, "\u0120jun": 10891, "OST": 10892, "\u0120imposed": 10893, "\u0120astron": 10894, "\u0120correl": 10895, "\u0120NS": 10896, "\u0120Kit": 10897, "\u0120Future": 10898, "burn": 10899, "\u0120immune": 10900, "ocus": 10901, "\u0120courses": 10902, "\u0120String": 10903, "\u0120lean": 10904, "\u0120ghost": 10905, "\u0120outcomes": 10906, "\u0120expense": 10907, "\u0120everyday": 10908, "\u0120acceptable": 10909, "Ah": 10910, "\u0120equipped": 10911, "\u0120orange": 10912, "FR": 10913, "\u0120Dutch": 10914, "Though": 10915, "\u0120Rank": 10916, "QU": 10917, "\u0120Roberts": 10918, "what": 10919, "rend": 10920, "\u0120disappear": 10921, "\u0120spawn": 10922, "\u0120Lam": 10923, "ois": 10924, "\u0120deserve": 10925, "\u0120minimal": 10926, "\u0120nervous": 10927, "\u0120Would": 10928, "\u0120rook": 10929, "\u0120Vancouver": 10930, "\u0120resign": 10931, "shire": 10932, "\u0120Works": 10933, "\u0120Build": 10934, "\u0120affordable": 10935, "\u0120Gary": 10936, "\u0120Arena": 10937, "\u0120hanging": 10938, "\u0120implications": 10939, "\u0120Song": 10940, "\u0120maintaining": 10941, "\u0120guards": 10942, "CON": 10943, "\u0120derived": 10944, "\u0120executed": 10945, "\u0120theories": 10946, "\u0120quoted": 10947, "\u0120Andre": 10948, "oga": 10949, "seless": 10950, "info": 10951, "\u0120Belg": 10952, "\u0120tears": 10953, "\u0120Surv": 10954, "\u0120birthday": 10955, "igious": 10956, "immer": 10957, "\u0120spectrum": 10958, "\u0120architecture": 10959, "\u0120recruit": 10960, "arma": 10961, "Table": 10962, "\u0120monsters": 10963, "\u0120Gov": 10964, "\u0120destination": 10965, "\u0120attractive": 10966, "\u0120foss": 10967, "\u0120Moreover": 10968, "\u0120presents": 10969, "THE": 10970, "\u0120reply": 10971, "pton": 10972, "\u0120cum": 10973, "\u0120delight": 10974, "\u0120affects": 10975, "\u0120donations": 10976, "\u0120Toy": 10977, "\u0120Him": 10978, "MENT": 10979, "\u0120overcome": 10980, "itched": 10981, "\u0120Fantasy": 10982, "\u0120Hat": 10983, "\u0120Beast": 10984, "bott": 10985, "\u0120investigations": 10986, "Run": 10987, "\u0120hunting": 10988, "di": 10989, "fund": 10990, "\u0120sessions": 10991, "estyle": 10992, "\u0120portray": 10993, "oids": 10994, "Yeah": 10995, "\u0120communicate": 10996, "\u0120comedy": 10997, "\u0120Yang": 10998, "\u0120belt": 10999, "\u0120Marine": 11000, "\u0120predicted": 11001, "Play": 11002, "\u0120importantly": 11003, "\u0120remarkable": 11004, "\u0120eliminate": 11005, "David": 11006, "\u0120bind": 11007, "VID": 11008, "\u0120advocates": 11009, "\u0120Gaza": 11010, "imp": 11011, "DB": 11012, "\u0120Na": 11013, "\u0120Similar": 11014, "IES": 11015, "\u0120charity": 11016, "vas": 11017, "math": 11018, "\u0120\u00e2\u0138": 11019, "oker": 11020, "ndum": 11021, "\u0120caps": 11022, "\u0120Hal": 11023, "2000": 11024, "ean": 11025, "\u0120fleet": 11026, "\u0120recre": 11027, "Right": 11028, "\u0120sleeping": 11029, "ijing": 11030, "kind": 11031, "\u0120designated": 11032, "\u00c3\u00a4": 11033, "\u0120animation": 11034, "kee": 11035, "\u0120Introdu": 11036, "\u0120/>": 11037, "\u0120delayed": 11038, "\u0120tremend": 11039, "\u0120curious": 11040, "Use": 11041, "\u0120lect": 11042, "dam": 11043, "\u0120innovation": 11044, "\u0120Points": 11045, "\u0120loading": 11046, "\u0120dispute": 11047, "ctic": 11048, "irds": 11049, "\u0120BY": 11050, "\u0120nurs": 11051, "\u0120Value": 11052, "IONS": 11053, "\u0120Hum": 11054, "\u0120template": 11055, "mers": 11056, "\u0120appearances": 11057, "\u0120Entertainment": 11058, "\u0120translation": 11059, "\u0120sake": 11060, "\u0120beneath": 11061, "\u0120inhib": 11062, "\u0120euro": 11063, "abetes": 11064, "\u0120studying": 11065, "\u0120Mas": 11066, "\u0120perceived": 11067, "\u0120examined": 11068, "\u0120eager": 11069, "\u0120coaches": 11070, "\u0120imper": 11071, "chi": 11072, "\u0120produces": 11073, "\").": 11074, "\u0120Everyone": 11075, "\u0120municip": 11076, "\u0120girlfriend": 11077, "\u0120hire": 11078, "\u0120Vice": 11079, "\u0120suitable": 11080, "opy": 11081, "\u0120inequ": 11082, "\u0120Duke": 11083, "fish": 11084, "first": 11085, "\u0120Obs": 11086, "\u0120interior": 11087, "\u0120Bruce": 11088, "\u0120Ry": 11089, "\u0120analys": 11090, "\u0120considerable": 11091, "\u0120forecast": 11092, "\u0120fert": 11093, "orship": 11094, "\u0120Drug": 11095, "\u0120ALL": 11096, ":\"": 11097, "thur": 11098, "\u0120Mail": 11099, "\u0120ballot": 11100, "\u0120instantly": 11101, "\u0120Channel": 11102, "\u0120picks": 11103, "\u01201989": 11104, "\u0120tent": 11105, "oli": 11106, "\u0120civilian": 11107, "bling": 11108, "ello": 11109, "bu": 11110, "\u0120inch": 11111, "\u0120logo": 11112, "\u0120cooperation": 11113, "\u0120walks": 11114, "\u0120investments": 11115, "\u0120imprison": 11116, "\u0120Festival": 11117, "\u0120Ky": 11118, "\u0120legally": 11119, "\u0120gri": 11120, "charg": 11121, "Sl": 11122, "\u0120threatening": 11123, "duction": 11124, "flow": 11125, "\u0120dismissed": 11126, "ibraries": 11127, "cap": 11128, "ele": 11129, "\u0120McG": 11130, "\u0120Harvard": 11131, "\u0120Conservative": 11132, "\u0120CBS": 11133, "png": 11134, "\u0120roots": 11135, "\u0120Having": 11136, "umbled": 11137, "\u0120Fun": 11138, "\\/": 11139, "\u0120Search": 11140, "plex": 11141, "\u0120discussing": 11142, "\u0120continu": 11143, "\u0120Tai": 11144, "\u0120Wik": 11145, "Free": 11146, "fit": 11147, "\u0120refuse": 11148, "\u0120managing": 11149, "\u0120synd": 11150, "ipedia": 11151, "walk": 11152, "\u0120professionals": 11153, "\u0120guidance": 11154, "\u0120universities": 11155, "\u0120assemb": 11156, "untu": 11157, "Finally": 11158, "ASE": 11159, "\u0120Auto": 11160, "\u0120Had": 11161, "\u0120anniversary": 11162, "LD": 11163, "\u0120Dur": 11164, "\u0120Ultimate": 11165, "ihad": 11166, "product": 11167, "\u0120transit": 11168, "\u0120restore": 11169, "\u0120explaining": 11170, "\u0120asset": 11171, "\u0120transferred": 11172, "\u0120burst": 11173, "apolis": 11174, "\u0120Magazine": 11175, "\u0120Cra": 11176, "\u0120BR": 11177, "gged": 11178, "\u0120HE": 11179, "Mich": 11180, "bet": 11181, "\u0120Lady": 11182, "ylum": 11183, "erves": 11184, "\u0120meets": 11185, "white": 11186, "Log": 11187, "\u0120corresponding": 11188, "\u0120insisted": 11189, "GG": 11190, "\u0120surrounded": 11191, "\u0120tens": 11192, "\u0120lane": 11193, "\u0120coinc": 11194, "home": 11195, "\u0120existed": 11196, "ected": 11197, "\u0120Double": 11198, "lamm": 11199, "\u0120skept": 11200, "exp": 11201, "\u0120perception": 11202, "iev": 11203, "\u0120Being": 11204, "oft": 11205, "\u0120adopt": 11206, ".:": 11207, "];": 11208, "Windows": 11209, "\u0120satellite": 11210, "ASH": 11211, "\u0120infant": 11212, "description": 11213, "\u0120Meanwhile": 11214, "cm": 11215, "oca": 11216, "\u0120Treat": 11217, "actor": 11218, "\u0120tobacco": 11219, "\u0120Norm": 11220, "emption": 11221, "\u0120flesh": 11222, "\u0120je": 11223, "oop": 11224, "\u0120Heaven": 11225, "\u0120beating": 11226, "anim": 11227, "\u0120gathering": 11228, "\u0120cultiv": 11229, "GO": 11230, "abe": 11231, "\u0120Jonathan": 11232, "\u0120Safety": 11233, "\u0120badly": 11234, "prot": 11235, "\u0120choosing": 11236, "\u0120contacted": 11237, "\u0120quit": 11238, "\u0120distur": 11239, "\u0120stir": 11240, "\u0120token": 11241, "Det": 11242, "\u0120Pa": 11243, "\u0120functionality": 11244, "003": 11245, "some": 11246, "\u0120limitations": 11247, "\u0120meth": 11248, "build": 11249, "config": 11250, "NT": 11251, "rell": 11252, "blem": 11253, "\u0120Mom": 11254, "\u0120veterans": 11255, "\u0120Hu": 11256, "\u0120trends": 11257, "arer": 11258, "\u0120Given": 11259, "\u0120Caption": 11260, "may": 11261, "AST": 11262, "\u0120wondering": 11263, "\u0120Clark": 11264, "normal": 11265, "\u0120separated": 11266, "\u0120desp": 11267, "stic": 11268, "brew": 11269, "\u0120relating": 11270, "\u0120Nik": 11271, "\u0120Farm": 11272, "\u0120enthusi": 11273, "good": 11274, "deb": 11275, "\u0120activist": 11276, "\u0120mart": 11277, "\u0120explosion": 11278, "\u0120Economic": 11279, "Link": 11280, "\u0120insight": 11281, "\u0120convenient": 11282, "\u0120counterpart": 11283, "support": 11284, "\u0120Virt": 11285, "agen": 11286, "\u0120Tennessee": 11287, "\u0120Simon": 11288, "\u0120Award": 11289, "OCK": 11290, "\u0120Figure": 11291, "\u0120overseas": 11292, "\u0120pride": 11293, "\u0120Cas": 11294, "note": 11295, "mg": 11296, "Current": 11297, "\u0120displays": 11298, "content": 11299, "\u0120traveling": 11300, "\u0120hospitals": 11301, "\u0120Financial": 11302, "\u0120Past": 11303, "\u0120defendant": 11304, "\u0120streaming": 11305, "mble": 11306, "\u0120Berlin": 11307, "uki": 11308, "\u0120distribut": 11309, "\u0120antib": 11310, "\u0120chocolate": 11311, "\u0120Castle": 11312, "\u0120interrupt": 11313, "\u0120Row": 11314, "\u0120conversion": 11315, "\u0120bugs": 11316, "\u0120Rather": 11317, "liest": 11318, "LY": 11319, "\u0120Jean": 11320, "common": 11321, "akh": 11322, "\u0120130": 11323, "otton": 11324, "\u0120Dean": 11325, "\u0120amendment": 11326, "\u0120gameplay": 11327, "\u0120Warren": 11328, "oda": 11329, "\u0120highlights": 11330, "\u0120irre": 11331, "\u0120NATO": 11332, "\u0120balls": 11333, "\u0120demanding": 11334, "URE": 11335, "\u0120Luke": 11336, "Figure": 11337, "stop": 11338, "onia": 11339, "zone": 11340, "izers": 11341, "\u0120WR": 11342, "\u0120awarded": 11343, "\u0120regulatory": 11344, "\u0120Hart": 11345, "\u0120SN": 11346, "pling": 11347, "\u0120sour": 11348, "\u0120Pixel": 11349, "usive": 11350, "\u0120fet": 11351, "\u0120Sent": 11352, "\u0120automatic": 11353, "\u0120fer": 11354, "vernment": 11355, "\u0120Khan": 11356, "TON": 11357, "father": 11358, "\u0120extraordinary": 11359, "throp": 11360, "\u0120Python": 11361, "\u0120GPU": 11362, "\u0120sexually": 11363, "\u0120desktop": 11364, "itivity": 11365, "\u0120Antonio": 11366, "\u0120orient": 11367, "\u0120ears": 11368, "obby": 11369, "ouses": 11370, "vertisements": 11371, "\u0120manufacturers": 11372, "icient": 11373, "minute": 11374, "\u0120conviction": 11375, "\u0120garden": 11376, "public": 11377, "\u0120satisfied": 11378, "fold": 11379, "OK": 11380, "\u0120inhab": 11381, "\u0120Think": 11382, "\u0120programme": 11383, "\u0120stomach": 11384, "\u0120coordin": 11385, "\u0120holy": 11386, "\u0120threshold": 11387, "\u0120rhet": 11388, "\u0120serial": 11389, "\u0120employers": 11390, "\u0120Everything": 11391, "rah": 11392, "\u0120bother": 11393, "\u0120brands": 11394, "Value": 11395, "\u0120Ted": 11396, "\u0120Planet": 11397, "\u0120pink": 11398, "\u0120Furthermore": 11399, "sa": 11400, "PE": 11401, "reck": 11402, "\u0120USD": 11403, "otte": 11404, "\u0120&&": 11405, "\u0120landed": 11406, "gets": 11407, "\u0120producers": 11408, "\u0120healthcare": 11409, "\u0120dominant": 11410, "\u0120destro": 11411, "\u0120amended": 11412, "chron": 11413, "\u0120fits": 11414, "\u0120Syd": 11415, "\u0120Authority": 11416, "ATCH": 11417, "\u0120fights": 11418, "\u0120LLC": 11419, "\u0120---": 11420, "\u0120Corp": 11421, "\u0120toxic": 11422, "specific": 11423, "\u0120Corn": 11424, "\u0120Chel": 11425, "\u0120telephone": 11426, "\u0120Pant": 11427, "\u0120mysterious": 11428, "aunch": 11429, "odox": 11430, "media": 11431, "\u0120witnesses": 11432, "agu": 11433, "\u0120questioned": 11434, "\u0120Brexit": 11435, "\u0120Remember": 11436, "enez": 11437, "\u0120endorse": 11438, "iatric": 11439, "\u0120Ident": 11440, "\u0120ridiculous": 11441, "110": 11442, "\u0120prayer": 11443, "\u0120scientist": 11444, "\u01201950": 11445, "\u0120Aqu": 11446, "\u0120underground": 11447, "\u0120UFC": 11448, "mare": 11449, "\u0120Later": 11450, "wich": 11451, "\u0120subscrib": 11452, "\u0120hosts": 11453, "\u0120err": 11454, "\u0120grants": 11455, "antom": 11456, "\u0120summon": 11457, "early": 11458, "\u0120Clear": 11459, "\u0120Prim": 11460, "\u0120suspension": 11461, "\u0120guaranteed": 11462, "apper": 11463, "\u0120rice": 11464, "\u0120Sean": 11465, "\u0120Shin": 11466, "\u0120referendum": 11467, "\u0120fled": 11468, "rust": 11469, "\u0120360": 11470, "tery": 11471, "\u0120shocked": 11472, "BR": 11473, "\u0120Oil": 11474, "\u0120Allah": 11475, "\u0120partly": 11476, "\u0120ignor": 11477, "\u0120transmission": 11478, "\u0120homosexual": 11479, "iversal": 11480, "\u0120hopefully": 11481, "\u00e3\u0124\u00a4": 11482, "\u0120lesson": 11483, "Leg": 11484, "\u0120..": 11485, "Yet": 11486, "table": 11487, "appropri": 11488, "rett": 11489, "\u0120boards": 11490, "\u0120incorrect": 11491, "\u0120bacteria": 11492, "aru": 11493, "amac": 11494, "\u0120snap": 11495, ".'\"": 11496, "\u0120parad": 11497, "tem": 11498, "heart": 11499, "\u0120availability": 11500, "\u0120wisdom": 11501, "\u0120(+": 11502, "\u0120priest": 11503, "\u0120\u00c2\u0142\u0120\u00c2\u0142": 11504, "Open": 11505, "\u0120span": 11506, "\u0120parameter": 11507, "\u0120convince": 11508, "\u0120(%)": 11509, "rac": 11510, "\u0120fo": 11511, "\u0120safely": 11512, "\u0120converted": 11513, "\u0120Olympic": 11514, "\u0120reserve": 11515, "\u0120healing": 11516, "\u0120Mine": 11517, "Max": 11518, "\u0120inherent": 11519, "\u0120Graham": 11520, "\u0120integrated": 11521, "Dem": 11522, "\u0120pipeline": 11523, "\u0120applying": 11524, "\u0120embed": 11525, "\u0120Charlie": 11526, "\u0120cave": 11527, "2008": 11528, "\u0120consensus": 11529, "\u0120rewards": 11530, "Pal": 11531, "\u0120HTML": 11532, "\u0120popularity": 11533, "looking": 11534, "\u0120Sword": 11535, "\u0120Arts": 11536, "')": 11537, "\u0120electron": 11538, "clusions": 11539, "\u0120integrity": 11540, "\u0120exclusively": 11541, "\u0120grace": 11542, "\u0120torture": 11543, "\u0120burned": 11544, "two": 11545, "\u0120180": 11546, "Produ": 11547, "\u0120entreprene": 11548, "raphics": 11549, "\u0120gym": 11550, "ricane": 11551, "\u0120Tam": 11552, "\u0120administrative": 11553, "\u0120manufacturer": 11554, "\u0120vel": 11555, "\u0120Ni": 11556, "\u0120isolated": 11557, "\u0120Medicine": 11558, "\u0120backup": 11559, "\u0120promoting": 11560, "\u0120commander": 11561, "\u0120flee": 11562, "\u0120Russell": 11563, "\u0120forgotten": 11564, "\u0120Missouri": 11565, "\u0120residence": 11566, "mons": 11567, "\u0120resemb": 11568, "\u0120wand": 11569, "\u0120meaningful": 11570, "PT": 11571, "\u0120bol": 11572, "\u0120helic": 11573, "\u0120wealthy": 11574, "\u0120rifle": 11575, "strong": 11576, "rowing": 11577, "plan": 11578, "asury": 11579, "\u00e2\u0122\u00a6.": 11580, "\u0120expanding": 11581, "\u0120Hamilton": 11582, "\u0120receives": 11583, "SI": 11584, "eatures": 11585, "\u0120Anim": 11586, "REE": 11587, "Put": 11588, "\u0120briefly": 11589, "rive": 11590, "\u0120stimul": 11591, "\u0120``(": 11592, "\u0120__": 11593, "\u0120chip": 11594, "\u0120haz": 11595, "\u0120prize": 11596, "\u0120Things": 11597, "ACE": 11598, "ulin": 11599, "dict": 11600, "oku": 11601, "\u0120associate": 11602, "ockets": 11603, "youtube": 11604, "Story": 11605, "ategory": 11606, "\u0120mild": 11607, "ailing": 11608, "\u0120Ye": 11609, "Orig": 11610, "\u0120Ka": 11611, "orig": 11612, "\u0120propaganda": 11613, "\u0120anonymous": 11614, "\u0120struggled": 11615, "\u0120outrage": 11616, "ATED": 11617, "\u0120Beijing": 11618, "rary": 11619, "\u0120leather": 11620, "\u0120worlds": 11621, "\u0120broader": 11622, "125": 11623, "idal": 11624, "\u0120Better": 11625, "\u0120tear": 11626, "Ext": 11627, "\u0120proposals": 11628, "\u0120iter": 11629, "\u0120Squad": 11630, "\u0120volunt": 11631, "mi": 11632, "Did": 11633, "\u0120Pu": 11634, "pin": 11635, "\u0120speakers": 11636, "\u0120borders": 11637, "\u0120figured": 11638, "='": 11639, "\u0120simultaneously": 11640, "aeda": 11641, "\u0120charging": 11642, "\u0120urged": 11643, "\u0120conj": 11644, "256": 11645, "\u0120Gordon": 11646, "merce": 11647, "\u0120documentary": 11648, "Share": 11649, "itol": 11650, "ONE": 11651, "\u0120Garden": 11652, "hatt": 11653, "\u0120Thompson": 11654, "aneous": 11655, "apore": 11656, "\u0120tanks": 11657, "\u0120lessons": 11658, "track": 11659, "\u0120outstanding": 11660, "\u0120volunteers": 11661, "\u0120spray": 11662, "\u0120managers": 11663, "large": 11664, "\u0120camps": 11665, "\u0120artificial": 11666, "\u0120Ru": 11667, "\u0120bags": 11668, "thal": 11669, "\u0120compatible": 11670, "\u0120Blade": 11671, "\u0120fed": 11672, "\u0120argues": 11673, "FI": 11674, "\u0120unfair": 11675, "\u0120corn": 11676, "\u0120offset": 11677, "\u0120directions": 11678, "\u0120disappointed": 11679, "\u0120Convention": 11680, "\u0120viewing": 11681, "ME": 11682, "ocity": 11683, "\u0120towns": 11684, "\u0120layers": 11685, "\u0120rolled": 11686, "\u0120jumped": 11687, "\u0120attribute": 11688, "\u0120unnecess": 11689, "incoln": 11690, "\u0120suppose": 11691, "\u0120Nether": 11692, "cha": 11693, "\u0120buried": 11694, "\u0120sixth": 11695, "Ben": 11696, "ressing": 11697, "OUR": 11698, "\u0120wound": 11699, "\u0120cycl": 11700, "\u0120mechanisms": 11701, "\u0120congressional": 11702, "\u0120Element": 11703, "\u0120agreements": 11704, "\u0120decor": 11705, "\u0120closest": 11706, "\u0120Mit": 11707, "Google": 11708, "}}": 11709, "\u0120mixture": 11710, "\u0120fluid": 11711, "Sign": 11712, "\u0120Scholar": 11713, "\u0120pist": 11714, "asket": 11715, "abling": 11716, "\u0120racing": 11717, "hero": 11718, "riel": 11719, "assy": 11720, "\u0120cheaper": 11721, "ben": 11722, "\u0120vertical": 11723, "amacare": 11724, "\u0120Reading": 11725, "gments": 11726, "\u0120helicop": 11727, "\u0120sacrifice": 11728, "aya": 11729, "paren": 11730, "VA": 11731, "\u0120Les": 11732, "\u0120Studio": 11733, "\u0120violations": 11734, "\u0120Anna": 11735, "acer": 11736, "\u00e9\u00be": 11737, "\u0120Rat": 11738, "\u0120Beck": 11739, "\u0120Dick": 11740, "\u0120ACT": 11741, "\u0120composition": 11742, "\u0120texture": 11743, "\u0120Own": 11744, "\u0120smartphone": 11745, "\u0120NA": 11746, "\u0120forb": 11747, "import": 11748, "\u0120defending": 11749, "ilst": 11750, "rer": 11751, "\u0120oh": 11752, "\u0120Jeremy": 11753, "\u0120banking": 11754, "ceptions": 11755, "\u0120respective": 11756, "/.": 11757, "\u0120drinks": 11758, "\u0120Wi": 11759, "\u0120bands": 11760, "\u0120Liverpool": 11761, "\u0120grip": 11762, "\u0120Buy": 11763, "\u0120openly": 11764, "\u0120reviewed": 11765, "pert": 11766, "\u0120verify": 11767, "\u0120Cole": 11768, "\u0120Wales": 11769, "MO": 11770, "\u0120unpre": 11771, "\u0120shelter": 11772, "\u0120Imperial": 11773, "\u0120gui": 11774, "\u0120Dak": 11775, "\u0120suggestions": 11776, "\u0120explicitly": 11777, "\u0120slave": 11778, "\u0120blockchain": 11779, "\u0120competing": 11780, "\u0120promising": 11781, "SON": 11782, "\u0120soccer": 11783, "\u0120constitution": 11784, "429": 11785, "\u0120distract": 11786, "\u0120User": 11787, "esides": 11788, "\u0120Method": 11789, "\u0120Tokyo": 11790, "\u0120accompanied": 11791, "Client": 11792, "sur": 11793, "alog": 11794, "\u0120identification": 11795, "\u0120invasion": 11796, "asma": 11797, "\u0120industries": 11798, "ppers": 11799, "\u0120subtle": 11800, "\u0120Unit": 11801, "natural": 11802, "\u0120survived": 11803, "\u0120flaw": 11804, "\u013a\u0127": 11805, "\u0120Holl": 11806, "\u0120deficit": 11807, "\u0120tutorial": 11808, "\u0120Chance": 11809, "\u0120arguing": 11810, "\u0120contemporary": 11811, "\u0120integration": 11812, "forward": 11813, "\u0120tum": 11814, "itis": 11815, "\u0120hiding": 11816, "\u0120Domin": 11817, "\u0120Tan": 11818, "\u0120Building": 11819, "\u0120Vin": 11820, "\u0120spokesperson": 11821, "\u0120Notes": 11822, "\u0120emerging": 11823, "\u0120preparation": 11824, "\u0120prost": 11825, "\u0120suspects": 11826, "\u0120autonom": 11827, "Description": 11828, "\u0120dealt": 11829, "\u0120Pear": 11830, "\u0120steady": 11831, "\u0120decreased": 11832, "\u0120sovere": 11833, "\u0120Clin": 11834, "\u0120gradually": 11835, "orses": 11836, "\u0120WAR": 11837, "Serv": 11838, "\u00e3\u0124\u00a2": 11839, "hr": 11840, "\u0120dirty": 11841, "\u0120Barn": 11842, "\u0120BC": 11843, "\u0120dil": 11844, "\u0120calendar": 11845, "\u0120compliance": 11846, "\u0120chamber": 11847, "bb": 11848, "\u0120passenger": 11849, "ateful": 11850, "\u0120Title": 11851, "\u0120Sydney": 11852, "\u0120Got": 11853, "\u0120darkness": 11854, "\u0120defect": 11855, "\u0120packed": 11856, "assion": 11857, "\u0120gods": 11858, "\u0120harsh": 11859, "ICK": 11860, "leans": 11861, "\u0120algorithm": 11862, "\u0120oxygen": 11863, "\u0120visits": 11864, "\u0120blade": 11865, "\u0120kilomet": 11866, "\u0120Kentucky": 11867, "\u0120killer": 11868, "Pack": 11869, "enny": 11870, "\u0120divine": 11871, "\u0120nomination": 11872, "being": 11873, "\u0120engines": 11874, "\u0120cats": 11875, "\u0120buffer": 11876, "\u0120Phill": 11877, "\u0120traff": 11878, "AGE": 11879, "\u0120tongue": 11880, "\u0120radiation": 11881, "erer": 11882, "mem": 11883, "\u0120Explicit": 11884, "\u00e9\u00be\u012f": 11885, "\u0120couples": 11886, "\u0120physics": 11887, "\u0120McK": 11888, "\u0120politically": 11889, "awks": 11890, "\u0120Bloom": 11891, "\u0120worship": 11892, "eger": 11893, "uter": 11894, "\u0120FO": 11895, "\u0120mathemat": 11896, "\u0120sentenced": 11897, "\u0120disk": 11898, "\u0120Marg": 11899, "\u0120/*": 11900, "PI": 11901, "\u0120optional": 11902, "\u0120babies": 11903, "\u0120seeds": 11904, "\u0120Scottish": 11905, "\u0120thy": 11906, "]]": 11907, "\u0120Hitler": 11908, "PH": 11909, "ngth": 11910, "\u0120recovered": 11911, "inge": 11912, "\u0120powder": 11913, "\u0120lips": 11914, "\u0120designer": 11915, "\u0120disorders": 11916, "\u0120courage": 11917, "\u0120chaos": 11918, "\"},{\"": 11919, "\u0120carrier": 11920, "bably": 11921, "High": 11922, "\u0120RT": 11923, "esity": 11924, "len": 11925, "\u0120routes": 11926, "uating": 11927, "Fil": 11928, "NOT": 11929, "wall": 11930, "sburgh": 11931, "\u0120engaging": 11932, "\u0120JavaScript": 11933, "orer": 11934, "lihood": 11935, "\u0120unions": 11936, "\u0120Federation": 11937, "\u0120Tesla": 11938, "\u0120completion": 11939, "\u0120Ta": 11940, "\u0120privilege": 11941, "\u0120Orange": 11942, "\u0120neur": 11943, "parency": 11944, "\u0120bones": 11945, "\u0120titled": 11946, "\u0120prosecutors": 11947, "\u0120ME": 11948, "\u0120engineer": 11949, "\u0120Universe": 11950, "\u0120Hig": 11951, "nie": 11952, "oard": 11953, "\u0120hearts": 11954, "\u0120Gre": 11955, "ussion": 11956, "\u0120ministry": 11957, "\u0120penet": 11958, "\u0120Nut": 11959, "\u0120Ow": 11960, "\u0120XP": 11961, "instein": 11962, "\u0120bulk": 11963, "System": 11964, "icism": 11965, "\u0120Marketable": 11966, "\u0120preval": 11967, "\u0120poster": 11968, "\u0120attending": 11969, "urable": 11970, "\u0120licensed": 11971, "\u0120Gh": 11972, "etry": 11973, "\u0120Tradable": 11974, "\u0120blast": 11975, "\u00e0\u00a4": 11976, "\u0120Titan": 11977, "elled": 11978, "die": 11979, "Have": 11980, "\u0120Flame": 11981, "\u0120profound": 11982, "\u0120participating": 11983, "\u0120anime": 11984, "\u0120Ess": 11985, "\u0120specify": 11986, "\u0120regarded": 11987, "\u0120Spell": 11988, "\u0120sons": 11989, "owned": 11990, "\u0120merc": 11991, "\u0120experimental": 11992, "lando": 11993, "hs": 11994, "\u0120Dungeon": 11995, "inos": 11996, "\u0120comply": 11997, "\u0120Systems": 11998, "arth": 11999, "\u0120seized": 12000, "local": 12001, "\u0120Girls": 12002, "udo": 12003, "oned": 12004, "\u0120Fle": 12005, "\u0120constructed": 12006, "\u0120hosted": 12007, "\u0120scared": 12008, "actic": 12009, "\u0120Islands": 12010, "\u0120MORE": 12011, "\u0120bless": 12012, "\u0120blocking": 12013, "\u0120chips": 12014, "\u0120evac": 12015, "Ps": 12016, "\u0120corporation": 12017, "\u0120ox": 12018, "\u0120lighting": 12019, "\u0120neighbors": 12020, "\u0120Ub": 12021, "aro": 12022, "\u0120beef": 12023, "\u0120Uber": 12024, "Facebook": 12025, "armed": 12026, "itate": 12027, "\u0120Rating": 12028, "\u0120Quick": 12029, "\u0120occupied": 12030, "\u0120aims": 12031, "\u0120Additionally": 12032, "\u0120Interest": 12033, "\u0120dramatically": 12034, "\u0120heal": 12035, "\u0120painting": 12036, "\u0120engineers": 12037, "MM": 12038, "\u0120Must": 12039, "\u0120quantity": 12040, "Paul": 12041, "\u0120earnings": 12042, "\u0120Posts": 12043, "stra": 12044, "\u00e3\u0125\u00bc\u00e3\u0125": 12045, "\u0120stance": 12046, "\u0120dropping": 12047, "script": 12048, "\u0120dressed": 12049, "Make": 12050, "\u0120justify": 12051, "\u0120Ltd": 12052, "\u0120prompted": 12053, "\u0120scrut": 12054, "\u0120speeds": 12055, "\u0120Giants": 12056, "omer": 12057, "\u0120Editor": 12058, "\u0120describing": 12059, "\u0120Lie": 12060, "mented": 12061, "\u0120nowhere": 12062, "ocaly": 12063, "\u0120instruction": 12064, "fortable": 12065, "\u0120entities": 12066, "\u0120cm": 12067, "\u0120Natural": 12068, "\u0120inquiry": 12069, "\u0120pressed": 12070, "izont": 12071, "forced": 12072, "\u0120raises": 12073, "\u0120Netflix": 12074, "\u0120Side": 12075, "\u0120outer": 12076, "\u0120amongst": 12077, "ims": 12078, "owski": 12079, "\u0120climb": 12080, "never": 12081, "\u0120combine": 12082, "ding": 12083, "\u0120compr": 12084, "\u0120significance": 12085, "\u0120remembered": 12086, "\u0120Nevada": 12087, "\u0120Tel": 12088, "\u0120Scar": 12089, "\u0120Warriors": 12090, "\u0120Jane": 12091, "\u0120coup": 12092, "bas": 12093, "\u0120terminal": 12094, ",-": 12095, "OH": 12096, "\u0120tension": 12097, "\u0120wings": 12098, "\u0120Myster": 12099, "\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd": 12100, "\u0120Unlike": 12101, "valid": 12102, "vironments": 12103, "\u0120Ali": 12104, "\u0120naked": 12105, "books": 12106, "\u0120Mun": 12107, "\u0120Gulf": 12108, "\u0120density": 12109, "\u0120dimin": 12110, "\u0120desperate": 12111, "\u0120presidency": 12112, "\u01201986": 12113, "hy": 12114, "IND": 12115, "\u0120unlock": 12116, "imens": 12117, "\u0120handled": 12118, "\u0120Eb": 12119, "\u0120disappeared": 12120, "\u0120genre": 12121, "\u01201988": 12122, "\u0120determination": 12123, "Stream": 12124, "iko": 12125, "apters": 12126, "\u0120acknowledge": 12127, "Jan": 12128, "\u0120capitalism": 12129, "Pat": 12130, "\u01202020": 12131, "\u0120painful": 12132, "\u0120curve": 12133, "\u0120bombs": 12134, "storm": 12135, "\u0120Metal": 12136, "encer": 12137, "\u0120Fig": 12138, "\u0120Aaron": 12139, "anches": 12140, "\u0120inspiration": 12141, "\u0120exhaust": 12142, "tains": 12143, "ashi": 12144, "\u0120descript": 12145, "\u0120ritual": 12146, "\u0120Chelsea": 12147, "\u0120promotion": 12148, "\u0120Hung": 12149, "\u0120Ward": 12150, "iva": 12151, "\u0120ET": 12152, "\u0120toss": 12153, "allow": 12154, "\u0120Francis": 12155, "Dep": 12156, "\u0120happiness": 12157, "\u0120Glass": 12158, "\u0120beta": 12159, "\u0120strengthen": 12160, "NE": 12161, "oa": 12162, "\u0120buttons": 12163, "\u0120Murray": 12164, "\u0120kicked": 12165, "Quest": 12166, "\u0120Talk": 12167, "\u0120Several": 12168, "\u0120Zero": 12169, "\u0120drone": 12170, "ulk": 12171, "\u0120cam": 12172, "\u0120Mobile": 12173, "\u0120preventing": 12174, "\u0120retro": 12175, "\u0120Ax": 12176, "\u0120cruel": 12177, "\u0120float": 12178, ".),": 12179, "\u0120filing": 12180, "\u0120Grant": 12181, "\u0120Bor": 12182, "\u0120rib": 12183, "\u0120championship": 12184, "\u0120Merc": 12185, "\u0120styles": 12186, "\u0120cake": 12187, "\u0120builds": 12188, "\u0120Self": 12189, "iox": 12190, "\u0120epic": 12191, "oyd": 12192, "Bel": 12193, "\u0120Stew": 12194, ".(": 12195, "ahu": 12196, "\u0120Beyond": 12197, "\u0120outs": 12198, "\u0120solo": 12199, "\u0120Tree": 12200, "\u0120preserve": 12201, "\u0120tub": 12202, "ARE": 12203, "roc": 12204, "\u0120Impro": 12205, "\u0120Wright": 12206, "\u0120bund": 12207, "\u0120traged": 12208, "\u0120occasional": 12209, "bian": 12210, "Second": 12211, "rons": 12212, "\u0120interactions": 12213, "formed": 12214, "sing": 12215, "\u0120owns": 12216, "\u0120hockey": 12217, "General": 12218, "\u0120logical": 12219, "\u0120expend": 12220, "\u0120escal": 12221, "\u0120Griff": 12222, "\u0120Crown": 12223, "\u0120Reserve": 12224, "\u0120stopping": 12225, "\u0120excuse": 12226, "second": 12227, "\u0120operated": 12228, "\u0120reaches": 12229, "\u0120Malays": 12230, "\u0120pollution": 12231, "\u0120Brooklyn": 12232, "\u0120delete": 12233, "\u0120hash": 12234, "Block": 12235, "aha": 12236, "\u00e2\u0122\u00b3": 12237, "\u0120shorter": 12238, "piece": 12239, "></": 12240, "\u0120horm": 12241, "\u0120Wat": 12242, "\u0120Break": 12243, "\u0120prohibited": 12244, "\u0120intensity": 12245, "\u0120Alan": 12246, "\u0120liability": 12247, "?!": 12248, "anded": 12249, "\u0120neighbour": 12250, "\u0120Collection": 12251, "\u0120fires": 12252, "\u0120revolutionary": 12253, "fly": 12254, "\u0120Orleans": 12255, "White": 12256, "\u0120Writ": 12257, "\u0120Dawn": 12258, "\u0120settle": 12259, "\u0120execute": 12260, "BM": 12261, "\u0120spokeswoman": 12262, "\u0120lifestyle": 12263, "\u0120clicking": 12264, "\u0120Kill": 12265, "\u0120Liberal": 12266, "\u0120Nazi": 12267, "\u0120trailer": 12268, "\u0120mountains": 12269, "\u0120damn": 12270, "zes": 12271, "pes": 12272, "\u0120pressing": 12273, "\u0120bail": 12274, "\u0120Organization": 12275, "\u0120pir": 12276, "\u0120thirty": 12277, "\u0120electrical": 12278, "\u0120115": 12279, "\u0120Poly": 12280, "\u0120Rap": 12281, "\u0120Strike": 12282, "\u0120Cann": 12283, "\u0120demanded": 12284, "\u0120backing": 12285, "default": 12286, "speed": 12287, "\u0120Legisl": 12288, "\u0120mothers": 12289, "\u0120Body": 12290, "\u0120variation": 12291, "cedented": 12292, "powered": 12293, "leading": 12294, "Never": 12295, "\u0120grave": 12296, "\u0120Anti": 12297, "AW": 12298, "\u0120interviewed": 12299, "\u0120Gab": 12300, "\u0120Fat": 12301, "\u0120rookie": 12302, "uu": 12303, "\u0120depos": 12304, "ixon": 12305, "\u0120ampl": 12306, "retion": 12307, "\u0120Heat": 12308, "\u0120peaceful": 12309, "SM": 12310, "ieve": 12311, "\u0120diver": 12312, "\u0120Victoria": 12313, "\u0120mic": 12314, "pdf": 12315, "\u0120stating": 12316, "\u0120lung": 12317, "\u0120criticized": 12318, "\u0120vaccine": 12319, "\u0120Loading": 12320, "urse": 12321, "Take": 12322, "\u0120Fran": 12323, "\u0120Sold": 12324, "\u0120Robin": 12325, "\u0120detected": 12326, "\u0120Script": 12327, "\u0120adjusted": 12328, "\u0120senator": 12329, "\u0120opposing": 12330, "Error": 12331, "Count": 12332, "\u0120conflicts": 12333, "\u0120ow": 12334, "\u0120Argent": 12335, "\u0120matching": 12336, "hh": 12337, "\u0120Trek": 12338, "starter": 12339, "\"),": 12340, "\u0120AF": 12341, "oder": 12342, "xxxx": 12343, "\u0120Alt": 12344, "acre": 12345, "\u0120Pick": 12346, "\u0120Solar": 12347, "\u0120Dal": 12348, "Oct": 12349, "\u0120Batt": 12350, "\u0120src": 12351, "\u0120engagement": 12352, "\u0120executives": 12353, "\u0120liberty": 12354, "java": 12355, "\u0120talented": 12356, "igenous": 12357, "\u0120consecut": 12358, ".....": 12359, "Info": 12360, "\u0120horrible": 12361, "\u0120surprisingly": 12362, "feed": 12363, "icating": 12364, "\u0120LED": 12365, "\u0120females": 12366, "Station": 12367, "eller": 12368, "\u0120Oakland": 12369, "\u0120mechanical": 12370, "iology": 12371, "\u0120Var": 12372, "\u0120robust": 12373, "ettings": 12374, "otta": 12375, "\u0120theoret": 12376, "\u0120retain": 12377, "kward": 12378, "\u0120da": 12379, "\u0120deployed": 12380, "del": 12381, "\u0120Andy": 12382, "\u0120subscribe": 12383, "web": 12384, "\u0120na": 12385, "\u0120Michel": 12386, "\u0120partially": 12387, "\u0120Comey": 12388, "\u0120crown": 12389, "\u0120Maj": 12390, "\u0120Blu": 12391, "rator": 12392, "Day": 12393, "INT": 12394, "\u0120documented": 12395, "\u0120GDP": 12396, "gi": 12397, "chell": 12398, "\u0120brutal": 12399, "\u0120Bab": 12400, "stration": 12401, "\u0120theft": 12402, "\u0120tube": 12403, "@@": 12404, "\u0120query": 12405, "\u0120Lincoln": 12406, "\u0120publishing": 12407, "\u0120wore": 12408, "orical": 12409, "\u0120ric": 12410, "\u0120notable": 12411, "\u0120subsequently": 12412, "nex": 12413, "\u0120observe": 12414, "\u0120Boe": 12415, "\u0120codes": 12416, "main": 12417, "WH": 12418, "\u0120SL": 12419, "\u0120residential": 12420, "avan": 12421, "\u0120mas": 12422, "arest": 12423, "adeon": 12424, "OUT": 12425, "\u0120sophistic": 12426, "ante": 12427, "\u0120cens": 12428, "\u0120**": 12429, "\u0120mortality": 12430, "\u0120yours": 12431, "\u0120occasions": 12432, "\u0120recalled": 12433, "\u0120Driver": 12434, "\u0120vocal": 12435, "\u0120bathroom": 12436, "\u0120shops": 12437, "\u0120collaboration": 12438, "\u0120Obamacare": 12439, "\u0120Cell": 12440, "Char": 12441, "Super": 12442, "Cre": 12443, "\u0120tends": 12444, "\u0120torn": 12445, "\u0120economics": 12446, "avery": 12447, "\u0120Raid": 12448, "\u0120Sem": 12449, "\u0120shoulders": 12450, "\u0120expecting": 12451, "\u0120examination": 12452, "ename": 12453, "\u0120UI": 12454, "iability": 12455, "olas": 12456, "\u0120Amb": 12457, "\u0120Dra": 12458, "\u0120midfield": 12459, "\u0120IC": 12460, "\u0120layout": 12461, "\u0120floating": 12462, "fi": 12463, "itative": 12464, "\u0120tremendous": 12465, "\u0120\u00d0": 12466, "\u0120abund": 12467, "Work": 12468, "\u0120Lightning": 12469, "\u0120similarly": 12470, "\u0120conservatives": 12471, "\u0120pray": 12472, "BE": 12473, "izarre": 12474, "\u0120tempt": 12475, "\u0120emphasis": 12476, "\u0120Metro": 12477, "\u0120fishing": 12478, "\u0120marry": 12479, "neg": 12480, "\u0120Study": 12481, "\u0120reck": 12482, "\u0120dispos": 12483, "oning": 12484, "bsite": 12485, "\u0120suspic": 12486, "\u0120merch": 12487, "\u0120Gib": 12488, "\u0120Description": 12489, "\u0120DVD": 12490, "whe": 12491, "\u0120Yemen": 12492, "\u0120environments": 12493, "ooting": 12494, "\u0120Modern": 12495, "eu": 12496, "\u0120reflects": 12497, "\u0120honey": 12498, "\u0120analyst": 12499, "\u0120gut": 12500, "dec": 12501, "Action": 12502, "\u0120households": 12503, "\u0120ster": 12504, "\u0120temple": 12505, "\u0120reforms": 12506, "\u0120favourite": 12507, "\u0120deadline": 12508, "\u0120LE": 12509, "Three": 12510, "\u0120Within": 12511, "Aug": 12512, "\u0120nights": 12513, "elta": 12514, "\u0120invalid": 12515, "\u0120Exchange": 12516, "\u0120Delhi": 12517, "when": 12518, "income": 12519, "\u0120\u00f0\u0141": 12520, "\u0120wireless": 12521, "scribe": 12522, "ista": 12523, "\u0120hostile": 12524, "\u0120ally": 12525, "\u0120gig": 12526, "\u0120outlets": 12527, "\u0120Dor": 12528, "EMENT": 12529, "\u0120ash": 12530, "\u0120abstract": 12531, "ORD": 12532, "\u0120Motor": 12533, "\u0120adviser": 12534, "istle": 12535, "\u0120bases": 12536, "\u0120courtesy": 12537, "\u0120crossing": 12538, "\u0120cleared": 12539, "\u0120refugee": 12540, "cosystem": 12541, "\u0120throws": 12542, "fun": 12543, "bourne": 12544, "days": 12545, "\u0120disagree": 12546, "\u0120Native": 12547, "\u0120reflected": 12548, "\u0120Fast": 12549, "\u0120Yellow": 12550, "\u0120Singapore": 12551, "\u0120Raven": 12552, "\u0120embrace": 12553, "\u0120Ku": 12554, "\u0120Chen": 12555, "\u0120Early": 12556, "\u0120appointment": 12557, "\u0120Mini": 12558, "itement": 12559, "\u0120placing": 12560, "\u0120bicy": 12561, "SR": 12562, "\u0120whis": 12563, "SU": 12564, "\u0120investigated": 12565, "\u0120photographs": 12566, "github": 12567, "\u0120Beat": 12568, "\u0120Ring": 12569, "ighed": 12570, "iar": 12571, "\u0120evolved": 12572, "erald": 12573, "\u0120dun": 12574, "\u0120hub": 12575, "IAL": 12576, "\u0120encouraging": 12577, "\u0120Print": 12578, "\u0120Days": 12579, "\u0120prosecution": 12580, "\u0120pants": 12581, "azy": 12582, "live": 12583, "\u0120fossil": 12584, "\u0120Ju": 12585, "\u0120rocks": 12586, "udge": 12587, "\u0120Race": 12588, "\u0120greet": 12589, "bie": 12590, "\u0120filling": 12591, "\u0120Len": 12592, "\u0120diabetes": 12593, "\u0120firearms": 12594, "uming": 12595, "enezuel": 12596, "\u0120BB": 12597, "\u0120accepting": 12598, "ATH": 12599, "\u0120resort": 12600, "\u0120hunt": 12601, "rik": 12602, "ucker": 12603, "aments": 12604, "\u0120sustained": 12605, "\u0120crossed": 12606, "\u0120breakfast": 12607, "\u0120attributes": 12608, "lected": 12609, "atile": 12610, "\u0120vibr": 12611, "\u0120Kal": 12612, "arson": 12613, "oples": 12614, "\u0120touched": 12615, "\u0120damages": 12616, "\u0120impressed": 12617, "rup": 12618, "\u0120anch": 12619, "\u0120Adams": 12620, "Hel": 12621, "\u0120Victor": 12622, "\u0120mounted": 12623, "\u0120CC": 12624, "\u0120delicious": 12625, "span": 12626, "ella": 12627, "\u0120elabor": 12628, "amples": 12629, "\u0120defic": 12630, "\u0120constitu": 12631, "uates": 12632, "\u0120Mission": 12633, "\u0120Ther": 12634, "\u0120Monster": 12635, "bes": 12636, "Reuters": 12637, "\u0120Indones": 12638, "hill": 12639, "munition": 12640, "\u0120confirmation": 12641, "\u0120Consider": 12642, "acent": 12643, "\u0120jet": 12644, "\u0120Employ": 12645, "\u0120GTX": 12646, "nan": 12647, "\u0120Spider": 12648, "\u0120processor": 12649, "\u0120patri": 12650, "\u0120Pentagon": 12651, "\u0120Robinson": 12652, "\u0120realistic": 12653, "\u00c3\u00b1": 12654, "\u0120appearing": 12655, "\u0120pipe": 12656, "omed": 12657, "\u0120fru": 12658, "\u0120awful": 12659, "\u0120evaluation": 12660, "\u0120intelligent": 12661, "\u0120Citiz": 12662, "\u0120fundra": 12663, "odium": 12664, "\u0120tweets": 12665, "\u0120worn": 12666, "pring": 12667, "\u0120kidn": 12668, "\u0120rebels": 12669, "\u0120Kam": 12670, "\u0120Netherlands": 12671, "\u0120SW": 12672, "\u0120acquisition": 12673, "\u0120Male": 12674, "\u00e3\u0125\u00aa": 12675, "ombies": 12676, "\u0120tradem": 12677, "\u0120Status": 12678, "Bre": 12679, "\u0120THIS": 12680, "\u0120adverse": 12681, "\u0120NEW": 12682, "sign": 12683, "\u0120organisation": 12684, "enc": 12685, "\u0120Harper": 12686, "apor": 12687, "\u0120Members": 12688, "\u0120Peace": 12689, "\u0120Airport": 12690, "\u0120Others": 12691, "\u0120scratch": 12692, "\u0120Pil": 12693, "\u0120sensor": 12694, "\u0120adoption": 12695, "\u0120Hotel": 12696, "\u0120Drag": 12697, "\u0120honestly": 12698, "\u0120yard": 12699, "\u0120Forces": 12700, "\u0120patent": 12701, "\u0120bass": 12702, "\u0120quietly": 12703, "\u0120breathing": 12704, "\u0120pose": 12705, "iors": 12706, "\u0120Jess": 12707, "static": 12708, "ITE": 12709, "Offic": 12710, "\u0120jew": 12711, "wcs": 12712, "\u0120140": 12713, "\u0120preview": 12714, "ippi": 12715, "\u0120unfortunately": 12716, "okemon": 12717, "\u0120horn": 12718, "\u0120reass": 12719, "\u0120peer": 12720, "ocker": 12721, "\u0120unto": 12722, "\u0120Gray": 12723, "\u0120cleaning": 12724, "\u0120attracted": 12725, "2007": 12726, "Point": 12727, "kill": 12728, "\u0120Agreement": 12729, "urches": 12730, "\u0120horr": 12731, "\u0120Mississ": 12732, "\u0120worthy": 12733, "\u0120flowers": 12734, "town": 12735, "dll": 12736, "\u0120reactions": 12737, "\u0120dece": 12738, "\u0120indicating": 12739, "MD": 12740, "\u0120preference": 12741, "\u0120MVP": 12742, "essional": 12743, "\u0120Target": 12744, "gence": 12745, "\u0120Indians": 12746, "\u0120misc": 12747, "\u0120freely": 12748, "\u0120muscles": 12749, "\u0120lineup": 12750, "\u0120impacts": 12751, "ousing": 12752, "omi": 12753, "acular": 12754, "\u0120controlling": 12755, "agine": 12756, "cery": 12757, "hell": 12758, "\u0120ranking": 12759, "\u0120Nich": 12760, "\u0120Ave": 12761, "128": 12762, "\u0120highway": 12763, "\u0120incons": 12764, "\u0120binding": 12765, "\u0120struggles": 12766, "\u0120Pittsburgh": 12767, "\u0120gray": 12768, "rin": 12769, "\u0120comics": 12770, "\u0120Sport": 12771, "\u0120relatives": 12772, "\u0120fright": 12773, "\u0120probe": 12774, "\u0120Portug": 12775, "\u0120voc": 12776, "\u0120tu": 12777, "\u0120Corps": 12778, "\u0120possibilities": 12779, "\u0120qualify": 12780, "wcsstore": 12781, "\u0120libraries": 12782, "\u0120migrants": 12783, "\u0120entries": 12784, "\u0120consecutive": 12785, "vals": 12786, "\u0120Chairman": 12787, "\u0120hill": 12788, "IME": 12789, "\u0120Gard": 12790, "\u0120inequality": 12791, "fox": 12792, "\u0120Save": 12793, "\u0120cort": 12794, "claimed": 12795, "\u0120traits": 12796, "\u0120pour": 12797, "\u0120missiles": 12798, "\u0120essence": 12799, "\u0120sends": 12800, "\u0120alliance": 12801, "\u0120wishes": 12802, "\u0120Christopher": 12803, "Big": 12804, "NY": 12805, "\u0120Jacob": 12806, "san": 12807, "urred": 12808, "\u0120SO": 12809, "lly": 12810, "\u0120advocate": 12811, "\u0120Bond": 12812, "\u0120\"/": 12813, "Using": 12814, "\u0120districts": 12815, "\u0120Gate": 12816, "\u0120Bir": 12817, "ridge": 12818, "\u0120Naz": 12819, "\u0120Rs": 12820, "boards": 12821, "\u0120Ga": 12822, "\u0120Reagan": 12823, "\u0120influenced": 12824, "1000": 12825, "apy": 12826, "\u0120challenged": 12827, "\u0120barg": 12828, "\u0120faculty": 12829, "\u0120Fif": 12830, "\u0120acquire": 12831, "Ac": 12832, "\u0120insect": 12833, "\u0120instruments": 12834, "\u0120leaf": 12835, "thodox": 12836, "Message": 12837, "\u0120tale": 12838, "\u0120thereby": 12839, "\u0120trap": 12840, "\u0120strongest": 12841, "\u0120Military": 12842, "isible": 12843, "\u01201984": 12844, "etheless": 12845, "\u0120flexible": 12846, "\u0120kills": 12847, "\u0120finishing": 12848, "\u0120Size": 12849, "\u0120reduces": 12850, "\u0120epid": 12851, "\u0120orientation": 12852, "full": 12853, "\u0120trace": 12854, "\u0120laser": 12855, "\u0120oppose": 12856, "\u0120editing": 12857, "\u0120momentum": 12858, "\u00e4\u00ba": 12859, "show": 12860, "VI": 12861, "\u0120Lad": 12862, "\u01201985": 12863, "\u0120murdered": 12864, "900": 12865, "uther": 12866, "\u0120probability": 12867, "\u0120Poll": 12868, "\u0120reluct": 12869, "\u0120Chem": 12870, "\u0120Montreal": 12871, "\u0120adequate": 12872, "\u0120Poland": 12873, "\u0120Sheriff": 12874, "umph": 12875, "\u0120ok": 12876, "\u0120000": 12877, "\u0120\"[": 12878, "\u0120operators": 12879, "\u0120Fer": 12880, "\u0120modes": 12881, "\u0120Eve": 12882, "\u0120discipline": 12883, "NET": 12884, "Hand": 12885, "\u0120oral": 12886, "\u0120WE": 12887, "email": 12888, "JP": 12889, "\u0120Palestinians": 12890, "\u0120hence": 12891, "\u0120Less": 12892, "\u0120overl": 12893, "dig": 12894, "\u0120intimid": 12895, "\u0120Coal": 12896, "\u0120ranging": 12897, "tha": 12898, "\u0120distant": 12899, "\u0120fib": 12900, "\u0120Index": 12901, "\u0120Wonder": 12902, "\u0120Pel": 12903, "hattan": 12904, "\u0120Hug": 12905, "\u00c3\u0139": 12906, "rait": 12907, "\u0120wrapped": 12908, "\u0120RPG": 12909, "\u0120chemicals": 12910, "\u0120Money": 12911, "\u0120frozen": 12912, "\u0120indirect": 12913, "\u0120Against": 12914, "End": 12915, "\u0120uncomfortable": 12916, "\u0120Gallery": 12917, "\u0120Posted": 12918, "\u00d8\u00a7": 12919, "onduct": 12920, "\u0120consequence": 12921, "\u0120bitter": 12922, "\u01201987": 12923, "pop": 12924, "\u0120countless": 12925, "\u0120Alaska": 12926, "ffff": 12927, "\u0120departure": 12928, "\u0120refund": 12929, "\u0120Ian": 12930, "iated": 12931, "\u0120seeks": 12932, "\u0120mechanics": 12933, "\u0120jurisdiction": 12934, "lynn": 12935, "\u0120alike": 12936, "\u0120Hunt": 12937, "athon": 12938, "\u0120resolved": 12939, "\u0120cache": 12940, "\u0120distinction": 12941, "direct": 12942, "\u0120encount": 12943, "oub": 12944, "beat": 12945, "\u0120Country": 12946, "search": 12947, "\u0120continuous": 12948, "\u0120modest": 12949, "\u0120Rail": 12950, "thood": 12951, "130": 12952, "BUG": 12953, "\u0120criminals": 12954, "\u0120indication": 12955, "\u0120encountered": 12956, "last": 12957, "\u0120Wy": 12958, "\u0120ideology": 12959, "\u0120PDF": 12960, "security": 12961, "])": 12962, "\u0120Jimmy": 12963, "\u0120EN": 12964, "\u0120hiring": 12965, "Tem": 12966, "\u0120pig": 12967, "aunt": 12968, "\u0120Crystal": 12969, "\u0120penalties": 12970, "\u0120capability": 12971, "\u0120py": 12972, "\u0120productive": 12973, "\u0120balanced": 12974, "\u0120GeForce": 12975, "click": 12976, "olitan": 12977, "ods": 12978, "\u0120afterwards": 12979, "\u0120playoffs": 12980, "\u0120Gill": 12981, "User": 12982, "\u0120backs": 12983, "pub": 12984, "tag": 12985, "\u0120absurd": 12986, "piring": 12987, "\u0120citing": 12988, "\u0120trillion": 12989, "\u0120obligation": 12990, "\u0120maxim": 12991, "ahoo": 12992, "cf": 12993, "umi": 12994, "\u0120Alpha": 12995, "\u0120Nelson": 12996, "\u0120pursuant": 12997, "initely": 12998, "\u0120fract": 12999, "entry": 13000, "bery": 13001, "\u0120Thor": 13002, "Added": 13003, "\u0120DJ": 13004, "\u0120Gene": 13005, "\u0120awkward": 13006, "Stud": 13007, "\u0120wallet": 13008, "\u0120Divine": 13009, "arios": 13010, "\u0120releasing": 13011, "\u0120edited": 13012, "\u0120accomplished": 13013, "Best": 13014, "\u0120edges": 13015, "\u0120planes": 13016, "\u0120feeding": 13017, "\"},\"": 13018, "\u0120disclosure": 13019, "\u0120grain": 13020, "airy": 13021, "oons": 13022, "ernand": 13023, "VR": 13024, "\u0120reasonably": 13025, "\u0120drum": 13026, "\u0120partial": 13027, "\u0120graphic": 13028, "\u0120unprecedented": 13029, "\u0120advised": 13030, "Micro": 13031, "\u0120Assad": 13032, "points": 13033, "scar": 13034, "\u0120Zone": 13035, "ttes": 13036, "\u0120700": 13037, "vo": 13038, "\u0120Hamp": 13039, "\u0120fixes": 13040, "\u0120caution": 13041, "\u0120strings": 13042, "\u0120panels": 13043, "\u0120leak": 13044, "\u0120pricing": 13045, "rowth": 13046, "\u0120Error": 13047, "\u0120Saints": 13048, "fix": 13049, "\u0120observations": 13050, "\u0120Abs": 13051, "\u0120suggestion": 13052, "\u0120Ukrainian": 13053, "\u0120barrier": 13054, "\u0120painted": 13055, "Bet": 13056, "imir": 13057, "\u0120Spect": 13058, "pot": 13059, "orneys": 13060, "\u0120compound": 13061, "\u0120bears": 13062, "\u0120Rush": 13063, "\u0120luxury": 13064, "Sum": 13065, "\u0120orbit": 13066, "\u0120Marc": 13067, "\u0120exempt": 13068, "\u0120Trail": 13069, "\u0120MO": 13070, "\u0120Hans": 13071, "\u0120Weapon": 13072, "ocused": 13073, "uminum": 13074, "\u0120Jerry": 13075, "\u0120bust": 13076, "\u0120AG": 13077, "\u0120Wiki": 13078, "\u0120endless": 13079, "\u0120Vlad": 13080, "\u0120Bah": 13081, "\u0120Radeon": 13082, "keys": 13083, "\u0120Survey": 13084, "\u0120Viol": 13085, "define": 13086, "lean": 13087, "\u0120commod": 13088, "\u0120revenues": 13089, "\u00c5\u012f": 13090, "\u0120furniture": 13091, "\u0120casting": 13092, "\u0120diplomatic": 13093, "\u0120Players": 13094, "\u0120Killed": 13095, "\u0120modify": 13096, "\u0120innovative": 13097, "\u0120Abu": 13098, "nor": 13099, "\u0120bonds": 13100, "\u0120coaching": 13101, "Mer": 13102, "\u0120modules": 13103, "\u0120Patriots": 13104, "\u0120enhanced": 13105, "\u0120proceedings": 13106, "\u0120teammates": 13107, "\u0120128": 13108, "ardo": 13109, "\u0120compromise": 13110, "\u0120Much": 13111, "\u0120flew": 13112, "\u0120Edge": 13113, "\u0120unnecessary": 13114, "\u0120doctrine": 13115, "report": 13116, "\u0120Orlando": 13117, "\u0120Profile": 13118, "\u0120playoff": 13119, "friendly": 13120, "\u0120complain": 13121, "\u0120MC": 13122, "\u0120Opt": 13123, "\u0120GB": 13124, "\u0120beaten": 13125, "\u0120golf": 13126, "\u0120placement": 13127, "Bit": 13128, "\u0120newsletter": 13129, "\u01202019": 13130, "visor": 13131, "rawl": 13132, "\u0120iPad": 13133, "\u0120acted": 13134, "\u0120juice": 13135, "\u0120decks": 13136, "PN": 13137, "success": 13138, "\u0120Half": 13139, "\u0120deleted": 13140, "\u0120secrets": 13141, "\u0120asylum": 13142, "Mart": 13143, "\u0120Activ": 13144, "\u0120Guy": 13145, "\u0120Ts": 13146, "\u0120dys": 13147, "\u0120assuming": 13148, "\u0120mana": 13149, "\u0120subur": 13150, "\u0120125": 13151, "Media": 13152, "ARY": 13153, "ride": 13154, "cp": 13155, "\u0120difficulties": 13156, "\u0120collecting": 13157, "\u0120bankrupt": 13158, "non": 13159, "\u0120composed": 13160, "\u0120volt": 13161, "\u0120militants": 13162, "\u0120>>>": 13163, "\u0120Mormon": 13164, "tor": 13165, "\u0120particles": 13166, "\u0120Bart": 13167, "ryption": 13168, "\u0120admin": 13169, "\u0120squee": 13170, "VIDIA": 13171, "\u0120creator": 13172, "iameter": 13173, "icular": 13174, "NBC": 13175, "\u0120grabbed": 13176, "\u0120nodd": 13177, "\u0120rated": 13178, "\u0120rotation": 13179, "\u0120grasp": 13180, "\u0120excessive": 13181, "\u0120EC": 13182, "\u0120Whit": 13183, "\u0120inventory": 13184, "aults": 13185, "\u0120FB": 13186, "\u0120ecosystem": 13187, "\u0120billions": 13188, "\u0120venture": 13189, "named": 13190, "\u0120defender": 13191, "oute": 13192, "Instead": 13193, "irable": 13194, "War": 13195, "\u0120assumption": 13196, "\u0120bite": 13197, "\u0120earthqu": 13198, "tail": 13199, "space": 13200, "\u0120gifts": 13201, "boys": 13202, "\u0120inevitable": 13203, "\u0120structural": 13204, "\u0120beneficial": 13205, "\u0120compelling": 13206, "hole": 13207, "ervation": 13208, "\u0120coat": 13209, "oj": 13210, "incarn": 13211, "\u0120Years": 13212, "\u0120determining": 13213, "\u0120rhetoric": 13214, "\u0120boundaries": 13215, "\u0120whites": 13216, "Ant": 13217, "addy": 13218, ")-": 13219, "raham": 13220, "etermin": 13221, "\u0120harvest": 13222, "\u0120Conc": 13223, "\u0120laptop": 13224, "\u0120Match": 13225, "\u0120enjoying": 13226, "cca": 13227, "ollar": 13228, "\u0120trips": 13229, "\u0120addiction": 13230, "\u0120Sak": 13231, "\u0120powered": 13232, "\u0120cous": 13233, "\u0120Russians": 13234, "iere": 13235, "\u0120retrie": 13236, "quality": 13237, "\u0120differ": 13238, "\u0120kingdom": 13239, "\u0120Laur": 13240, "\u0120Capitol": 13241, "\u0120conclusions": 13242, "\u0120Altern": 13243, "\u0120Nav": 13244, "\u0120transparent": 13245, "BER": 13246, "Group": 13247, "\u0120Complete": 13248, "\u0120infer": 13249, "\u0120intrig": 13250, "\u0120insane": 13251, "RO": 13252, "ophob": 13253, "isen": 13254, "qual": 13255, "Michael": 13256, "\u0120museum": 13257, "\u0120Pope": 13258, "\u0120reset": 13259, "rative": 13260, "five": 13261, "\u0120aggreg": 13262, "ittees": 13263, "ository": 13264, "\u0120carb": 13265, "\u0120Record": 13266, "\u0120decides": 13267, "\u0120Fix": 13268, "\u0120exceptions": 13269, "\u0120Commissioner": 13270, "uns": 13271, "\u0120Environmental": 13272, "\u0120legendary": 13273, "istence": 13274, "\u0120tunnel": 13275, "km": 13276, "\u0120insult": 13277, "\u0120troll": 13278, "\u0120shake": 13279, "\u0120detention": 13280, "ques": 13281, "\u0120Chrome": 13282, "\u0120Files": 13283, "\u0120subt": 13284, "\u0120prospects": 13285, "\u0120prol": 13286, "render": 13287, "proof": 13288, "\u0120performances": 13289, "Str": 13290, "\u0120href": 13291, "ername": 13292, "\u0120achievement": 13293, "\u0120fut": 13294, "Full": 13295, "\u0120Leban": 13296, "google": 13297, "\u00e3\u0125\u012a": 13298, "ampa": 13299, "Maybe": 13300, "\u0120projected": 13301, "\u0120Emb": 13302, "\u0120colleg": 13303, "\u0120awards": 13304, "\u0120\u00e2\u0136": 13305, "Gold": 13306, "\u0120Blake": 13307, "\u0120Raj": 13308, "ifting": 13309, "\u0120pending": 13310, "\u0120instinct": 13311, "\u0120developments": 13312, "Connect": 13313, "\u0120Mand": 13314, "\u0120WITH": 13315, "\u0120Philippines": 13316, "profile": 13317, "\u0120altogether": 13318, "\u0120Bund": 13319, "\u0120TD": 13320, "oooo": 13321, "amped": 13322, "iph": 13323, "\u0120steam": 13324, "\u0120oldest": 13325, "\u0120detection": 13326, "ulpt": 13327, "\u0120\u00e7": 13328, "\u0120Wayne": 13329, "2006": 13330, "fa": 13331, "\u0120circles": 13332, "\u0120Fu": 13333, "\u0120donors": 13334, "appropriate": 13335, "\u0120Dakota": 13336, "jamin": 13337, "\u0120motivated": 13338, "\u0120purchases": 13339, "\u0120Louisiana": 13340, "\u0120Spl": 13341, "\u0120globe": 13342, "\u0120105": 13343, "zip": 13344, "call": 13345, "\u0120departments": 13346, "\u0120sustainable": 13347, "105": 13348, "\u0120OP": 13349, "ifiers": 13350, "\u0120prevented": 13351, "\u0120incomp": 13352, "\u0120Commander": 13353, "\u0120dominated": 13354, "\u0120\u00c2\u00bb": 13355, "\u0120invested": 13356, "\u0120complexity": 13357, "\u0120incl": 13358, "\u0120ensuring": 13359, "\u0120realm": 13360, "ync": 13361, "\u0120Independent": 13362, "rained": 13363, "\u0120Jen": 13364, "\u0120Flight": 13365, "\u0120athe": 13366, "\u0120speculation": 13367, "\u0120TE": 13368, "ocate": 13369, "tic": 13370, "\u0120plaint": 13371, "herry": 13372, "\u0120toy": 13373, "\u0120111": 13374, "\u0120plates": 13375, "status": 13376, "\u0120Isa": 13377, "\u0120devoted": 13378, "Cop": 13379, "\u0120ES": 13380, "255": 13381, "urrency": 13382, "Main": 13383, "\u0120slaves": 13384, "\u0120pepper": 13385, "\u0120quotes": 13386, "\u0120ceiling": 13387, "\u0120Fish": 13388, "\u0120transformation": 13389, "\u0120fraction": 13390, "\u0120advantages": 13391, "\u0120toile": 13392, "\u0120stunning": 13393, "\u0120moist": 13394, "breaking": 13395, "si": 13396, "\u0120Location": 13397, "\u0120Medium": 13398, "\u0120texts": 13399, "\u0120ugly": 13400, "\u0120bio": 13401, ".\u00e2\u0122\u0136": 13402, "\u0120Based": 13403, "\u0120trains": 13404, "\u0120Wing": 13405, "\u0120Ancient": 13406, "\u0120Records": 13407, "\u0120Hope": 13408, "Special": 13409, "adesh": 13410, "obi": 13411, "[/": 13412, "\u0120temporarily": 13413, "Ver": 13414, "hu": 13415, "oser": 13416, "\u0120overnight": 13417, "\u0120mamm": 13418, "\u0120Treasury": 13419, "\u0120Venezuel": 13420, "\u0120Mega": 13421, "\u0120tar": 13422, "\u0120expects": 13423, "black": 13424, "orph": 13425, "\\\\\\\\": 13426, "\u0120acceptance": 13427, "\u0120radar": 13428, "sis": 13429, "\u0120junior": 13430, "\u0120frames": 13431, "\u0120observation": 13432, "acies": 13433, "Power": 13434, "\u0120Advanced": 13435, "Mag": 13436, "ologically": 13437, "\u0120Mechan": 13438, "\u0120sentences": 13439, "\u0120analysts": 13440, "aughters": 13441, "forcement": 13442, "\u0120vague": 13443, "\u0120clause": 13444, "\u0120directors": 13445, "\u0120evaluate": 13446, "\u0120cabinet": 13447, "Matt": 13448, "\u0120Classic": 13449, "Ang": 13450, "\u0120cler": 13451, "\u0120Buck": 13452, "\u0120researcher": 13453, "\u0120160": 13454, "\u0120poorly": 13455, "\u0120experiencing": 13456, "\u0120Ped": 13457, "\u0120Manhattan": 13458, "\u0120freed": 13459, "\u0120themes": 13460, "advant": 13461, "\u0120nin": 13462, "\u0120praise": 13463, "104": 13464, "\u0120Libya": 13465, "best": 13466, "\u0120trusted": 13467, "\u0120cease": 13468, "\u0120dign": 13469, "Direct": 13470, "\u0120bombing": 13471, "\u0120migration": 13472, "\u0120Sciences": 13473, "\u0120municipal": 13474, "\u0120Average": 13475, "\u0120glory": 13476, "\u0120revealing": 13477, "\u0120arena": 13478, "\u0120uncertainty": 13479, "\u0120battlefield": 13480, "iao": 13481, "God": 13482, "\u0120cinem": 13483, "rape": 13484, "elle": 13485, "apons": 13486, "\u0120listing": 13487, "\u0120waited": 13488, "\u0120spotted": 13489, "keley": 13490, "\u0120Audio": 13491, "eor": 13492, "arding": 13493, "idding": 13494, "igma": 13495, "\u0120Neg": 13496, "\u0120lone": 13497, "\u0120----": 13498, "exe": 13499, "deg": 13500, "\u0120transf": 13501, "\u0120wash": 13502, "\u0120slavery": 13503, "\u0120exploring": 13504, "\u0120WW": 13505, "atson": 13506, "\u0120encl": 13507, "lies": 13508, "\u0120Creek": 13509, "\u0120wooden": 13510, "Manager": 13511, "\u0120Brand": 13512, "ummy": 13513, "\u0120Arthur": 13514, "\u0120bureaucr": 13515, "\u0120blend": 13516, "arians": 13517, "Further": 13518, "\u0120supposedly": 13519, "\u0120winds": 13520, "\u01201979": 13521, "\u0120gravity": 13522, "\u0120analyses": 13523, "\u0120Travel": 13524, "\u0120Veter": 13525, "\u0120dumb": 13526, "\u0120alternate": 13527, "gal": 13528, "\u0120consumed": 13529, "\u0120effectiveness": 13530, ".''": 13531, "\u0120paths": 13532, "onda": 13533, "LA": 13534, "\u0120Strong": 13535, "\u0120enables": 13536, "\u0120escaped": 13537, "\u0120\"\"": 13538, "\u0120112": 13539, "\u01201983": 13540, "\u0120smiled": 13541, "\u0120tendency": 13542, "Fire": 13543, "\u0120pars": 13544, "\u0120Roc": 13545, "\u0120lake": 13546, "\u0120fitness": 13547, "\u0120Ath": 13548, "\u0120Horn": 13549, "\u0120hier": 13550, "\u0120impose": 13551, "mother": 13552, "\u0120pension": 13553, "icut": 13554, "borne": 13555, "iciary": 13556, "._": 13557, "\u0120SU": 13558, "\u0120polar": 13559, "isy": 13560, "engu": 13561, "itialized": 13562, "ATA": 13563, "write": 13564, "\u0120exercises": 13565, "\u0120Diamond": 13566, "otypes": 13567, "\u0120harmful": 13568, "onz": 13569, "\u0120printing": 13570, "story": 13571, "\u0120expertise": 13572, "\u0120Ger": 13573, "\u0120tragedy": 13574, "\u0120Fly": 13575, "\u0120divid": 13576, "ampire": 13577, "stock": 13578, "Mem": 13579, "\u0120reign": 13580, "\u0120unve": 13581, "\u0120amend": 13582, "\u0120Prophet": 13583, "\u0120mutual": 13584, "\u0120Fac": 13585, "\u0120replacing": 13586, "Har": 13587, "\u0120Circuit": 13588, "\u0120throat": 13589, "\u0120Shot": 13590, "\u0120batteries": 13591, "\u0120toll": 13592, "\u0120addressing": 13593, "\u0120Medicaid": 13594, "\u0120pupp": 13595, "\u0120Nar": 13596, "olk": 13597, "\u0120equity": 13598, "MR": 13599, "\u0120Hispan": 13600, "\u0120Large": 13601, "mid": 13602, "Dev": 13603, "\u0120exped": 13604, "\u0120demo": 13605, "\u0120Marshall": 13606, "ergus": 13607, "\u0120fiber": 13608, "\u0120divorce": 13609, "\u0120Create": 13610, "\u0120slower": 13611, "\u0120Parker": 13612, "\u0120Student": 13613, "\u0120Training": 13614, "Return": 13615, "\u0120Tru": 13616, "\u0120cub": 13617, "\u0120Reached": 13618, "\u0120panic": 13619, "\u0120quarters": 13620, "\u0120rect": 13621, "\u0120treating": 13622, "\u0120rats": 13623, "\u0120Christianity": 13624, "oler": 13625, "\u0120sacred": 13626, "\u0120declare": 13627, "ulative": 13628, "eting": 13629, "\u0120delivering": 13630, "estone": 13631, "\u0120tel": 13632, "\u0120Larry": 13633, "\u0120meta": 13634, "accept": 13635, "artz": 13636, "\u0120Roger": 13637, "handed": 13638, "\u0120header": 13639, "\u0120trapped": 13640, "\u0120Century": 13641, "\u0120knocked": 13642, "\u0120Oxford": 13643, "\u0120survivors": 13644, "bot": 13645, "\u0120demonstration": 13646, "\u0120dirt": 13647, "\u0120assists": 13648, "OME": 13649, "\u0120Draft": 13650, "ortunate": 13651, "folio": 13652, "pered": 13653, "usters": 13654, "gt": 13655, "\u0120Lock": 13656, "\u0120judicial": 13657, "verted": 13658, "\u0120secured": 13659, "outing": 13660, "\u0120Books": 13661, "\u0120hosting": 13662, "\u0120lifted": 13663, "length": 13664, "\u0120jer": 13665, "\u0120wheels": 13666, "\u0120Range": 13667, "umbnails": 13668, "\u0120diagnosis": 13669, "tech": 13670, "\u0120Stewart": 13671, "\u0120Pract": 13672, "\u0120nationwide": 13673, "\u0120dear": 13674, "\u0120obligations": 13675, "\u0120grows": 13676, "\u0120mandatory": 13677, "\u0120suspicious": 13678, "!'": 13679, "Apr": 13680, "Great": 13681, "\u0120mortgage": 13682, "\u0120prosecutor": 13683, "\u0120editorial": 13684, "\u0120Kr": 13685, "\u0120processed": 13686, "ungle": 13687, "\u0120flexibility": 13688, "Earlier": 13689, "\u0120Cart": 13690, "\u0120Sug": 13691, "\u0120focuses": 13692, "\u0120startup": 13693, "\u0120breach": 13694, "\u0120Tob": 13695, "cycle": 13696, "\u00e3\u0122\u012e": 13697, "rose": 13698, "\u0120bizarre": 13699, "\u00e3\u0122\u012f": 13700, "\u0120vegetables": 13701, "$$": 13702, "\u0120retreat": 13703, "oshi": 13704, "\u0120Shop": 13705, "\u0120Ground": 13706, "\u0120Stop": 13707, "\u0120Hawaii": 13708, "\u0120Ay": 13709, "Perhaps": 13710, "\u0120Beaut": 13711, "uffer": 13712, "enna": 13713, "\u0120productivity": 13714, "Fixed": 13715, "control": 13716, "\u0120absent": 13717, "\u0120Campaign": 13718, "Green": 13719, "\u0120identifying": 13720, "\u0120regret": 13721, "\u0120promoted": 13722, "\u0120Seven": 13723, "\u0120eru": 13724, "neath": 13725, "aughed": 13726, "\u0120Pin": 13727, "\u0120Living": 13728, "Cost": 13729, "omatic": 13730, "mega": 13731, "\u0120Nig": 13732, "ocy": 13733, "\u0120inbox": 13734, "\u0120empire": 13735, "\u0120horizont": 13736, "\u0120branches": 13737, "\u0120metaph": 13738, "Active": 13739, "edi": 13740, "\u0120Film": 13741, "\u0120Something": 13742, "\u0120mods": 13743, "incial": 13744, "\u0120Original": 13745, "Gen": 13746, "\u0120spirits": 13747, "\u0120earning": 13748, "Hist": 13749, "\u0120riders": 13750, "\u0120sacrific": 13751, "MT": 13752, "\u0120VA": 13753, "\u0120Salt": 13754, "\u0120occupation": 13755, "\u0120Mi": 13756, "\u0120disg": 13757, "lict": 13758, "\u0120nit": 13759, "\u0120nodes": 13760, "eem": 13761, "\u0120Pier": 13762, "\u0120hatred": 13763, "psy": 13764, "\u00e3\u0125\u012b": 13765, "\u0120theater": 13766, "\u0120sophisticated": 13767, "\u0120defended": 13768, "\u0120besides": 13769, "\u0120thoroughly": 13770, "\u0120Medicare": 13771, "\u0120blamed": 13772, "arently": 13773, "\u0120crying": 13774, "FOR": 13775, "priv": 13776, "\u0120singing": 13777, "\u0120Il": 13778, "\u0120cute": 13779, "oided": 13780, "olitical": 13781, "\u0120Neuro": 13782, "\u00e5\u00a4": 13783, "\u0120donation": 13784, "\u0120Eagles": 13785, "\u0120Give": 13786, "Tom": 13787, "\u0120substantially": 13788, "\u0120License": 13789, "\u0120Ja": 13790, "\u0120grey": 13791, "\u0120Animal": 13792, "\u0120ER": 13793, "\u0120Und": 13794, "\u0120keen": 13795, "\u0120conclude": 13796, "\u0120Mississippi": 13797, "Engine": 13798, "\u0120Studios": 13799, "Press": 13800, "overs": 13801, "llers": 13802, "\u0120350": 13803, "\u0120Rangers": 13804, "\u0120rou": 13805, "erto": 13806, "Ep": 13807, "issa": 13808, "ivan": 13809, "\u0120seal": 13810, "\u0120Regist": 13811, "display": 13812, "\u0120weaken": 13813, "uum": 13814, "\u0120Commons": 13815, "\u0120Say": 13816, "\u0120cultures": 13817, "\u0120laughed": 13818, "\u0120slip": 13819, "\u0120treatments": 13820, "izable": 13821, "mart": 13822, "\u0120Rice": 13823, "\u0120beast": 13824, "\u0120obesity": 13825, "\u0120Laure": 13826, "iga": 13827, "Which": 13828, "holder": 13829, "\u0120elderly": 13830, "\u0120pays": 13831, "\u0120complained": 13832, "\u0120crop": 13833, "\u0120proc": 13834, "\u0120explosive": 13835, "\u0120Fan": 13836, "\u0120Arsenal": 13837, "Author": 13838, "eful": 13839, "\u0120meals": 13840, "\u0120(-": 13841, "idays": 13842, "\u0120imagination": 13843, "\u0120annually": 13844, "\u0120ms": 13845, "asures": 13846, "Head": 13847, "ikh": 13848, "matic": 13849, "\u0120boyfriend": 13850, "\u0120Computer": 13851, "\u0120bump": 13852, "\u0120surge": 13853, "\u0120Craig": 13854, "\u0120Kirk": 13855, "Del": 13856, "mediate": 13857, "\u0120scenarios": 13858, "\u0120Mut": 13859, "\u0120Stream": 13860, "\u0120competitors": 13861, "\u00d9\u0126": 13862, "\u0120Stanford": 13863, "\u0120Resources": 13864, "azed": 13865, "bage": 13866, "\u0120organis": 13867, "\u0120Release": 13868, "\u0120separately": 13869, "\u0120habits": 13870, "\u0120measurements": 13871, "\u0120Close": 13872, "\u0120accompany": 13873, "\u0120gly": 13874, "\u0120tang": 13875, "\u0120Rou": 13876, "\u0120plugin": 13877, "\u0120convey": 13878, "\u0120Challenge": 13879, "oots": 13880, "jan": 13881, "\u0120curs": 13882, "\u0120Relations": 13883, "keeper": 13884, "\u0120approaching": 13885, "ping": 13886, "Speaking": 13887, "\u0120arrangement": 13888, "\u0120VI": 13889, "arettes": 13890, "\u0120affecting": 13891, "\u0120permits": 13892, "because": 13893, "\u0120useless": 13894, "\u0120Hus": 13895, "!!!!": 13896, "\u0120destroying": 13897, "Unfortunately": 13898, "\u0120fascinating": 13899, "Sem": 13900, "\u0120electoral": 13901, "\u0120transparency": 13902, "\u0120Chaos": 13903, "\u0120volunteer": 13904, "\u0120statistical": 13905, "\u0120activated": 13906, "rox": 13907, "Web": 13908, "HE": 13909, "\u0120Hampshire": 13910, "isive": 13911, "Map": 13912, "\u0120trash": 13913, "\u0120Lawrence": 13914, "stick": 13915, "Cr": 13916, "\u0120rings": 13917, "EXT": 13918, "\u0120operational": 13919, "opes": 13920, "Does": 13921, "\u0120Evans": 13922, "\u0120witnessed": 13923, "Port": 13924, "\u0120launching": 13925, "econom": 13926, "wear": 13927, "\u0120Particip": 13928, "umm": 13929, "cules": 13930, "\u0120RAM": 13931, "\u0120Tun": 13932, "\u0120assured": 13933, "\u0120binary": 13934, "\u0120betray": 13935, "\u0120exploration": 13936, "\u0120Fel": 13937, "\u0120admission": 13938, "itated": 13939, "Sy": 13940, "\u0120avoided": 13941, "\u0120Simulator": 13942, "\u0120celebrated": 13943, "\u0120Electric": 13944, "\u00a5\u0140": 13945, "\u0120cluster": 13946, "itzerland": 13947, "health": 13948, "Line": 13949, "\u0120Nash": 13950, "aton": 13951, "\u0120spare": 13952, "\u0120enterprise": 13953, "\u0120DIS": 13954, "cludes": 13955, "\u0120flights": 13956, "\u0120regards": 13957, "\u0120\u00c3\u0139": 13958, "half": 13959, "\u0120trucks": 13960, "\u0120contacts": 13961, "\u0120uncons": 13962, "\u0120Climate": 13963, "\u0120immense": 13964, "NEW": 13965, "occ": 13966, "ective": 13967, "\u0120embod": 13968, "\u0120patrol": 13969, "\u0120beside": 13970, "\u0120viable": 13971, "\u0120creep": 13972, "\u0120triggered": 13973, "verning": 13974, "\u0120comparable": 13975, "ql": 13976, "\u0120gaining": 13977, "asses": 13978, "\u0120();": 13979, "\u0120Grey": 13980, "\u0120MLS": 13981, "sized": 13982, "\u0120prosper": 13983, "\"?": 13984, "\u0120polling": 13985, "\u0120shar": 13986, "\u0120RC": 13987, "\u0120firearm": 13988, "orient": 13989, "\u0120fence": 13990, "\u0120variations": 13991, "giving": 13992, "\u0120Pi": 13993, "ospel": 13994, "\u0120pledge": 13995, "\u0120cure": 13996, "\u0120spy": 13997, "\u0120violated": 13998, "\u0120rushed": 13999, "\u0120stroke": 14000, "\u0120Blog": 14001, "sels": 14002, "\u0120Ec": 14003, ",''": 14004, "\u0120pale": 14005, "\u0120Collins": 14006, "terror": 14007, "\u0120Canadians": 14008, "\u0120tune": 14009, "\u0120laboratory": 14010, "\u0120nons": 14011, "tarian": 14012, "\u0120disability": 14013, "\u0120Gam": 14014, "\u0120singer": 14015, "alg": 14016, "\u0120Senior": 14017, "\u0120traded": 14018, "\u0120Warrior": 14019, "\u0120infring": 14020, "\u0120Franklin": 14021, "\u0120strain": 14022, "\u0120Swedish": 14023, "\u0120seventh": 14024, "\u0120Benn": 14025, "\u0120Tell": 14026, "\u0120syndrome": 14027, "\u0120wondered": 14028, "iden": 14029, "++++": 14030, "igo": 14031, "\u0120purple": 14032, "\u0120journalism": 14033, "\u0120rebel": 14034, "\u0120fu": 14035, "blog": 14036, "\u0120invite": 14037, "rencies": 14038, "\u0120Contact": 14039, "Israel": 14040, "\u0120Content": 14041, "\u0120cheer": 14042, "\u0120bedroom": 14043, "\u0120Engineering": 14044, "\u0120Queens": 14045, "\u0120dwell": 14046, "\u0120PlayStation": 14047, "\u0120Dim": 14048, "\u0120Colon": 14049, "lr": 14050, "\u0120operates": 14051, "\u0120motivation": 14052, "USA": 14053, "astered": 14054, "Core": 14055, "\u0120Truth": 14056, "olo": 14057, "OSE": 14058, "\u0120Memory": 14059, "\u0120predec": 14060, "\u0120anarch": 14061, "\u01201920": 14062, "\u0120Yam": 14063, "\u00c3\u00a8": 14064, "bid": 14065, "\u0120grateful": 14066, "\u0120excitement": 14067, "\u0120treasure": 14068, "\u0120longest": 14069, "ctive": 14070, "\u0120deserves": 14071, "\u0120reserves": 14072, "\u0120cops": 14073, "\u0120Ottawa": 14074, "\u0120Egyptian": 14075, "anked": 14076, "\u0120artif": 14077, "\u0120hypothesis": 14078, ":/": 14079, "\u0120purchasing": 14080, "\u0120lovely": 14081, "HP": 14082, "\u0120divide": 14083, "\u0120strictly": 14084, "\u0120questioning": 14085, "\u0120taxpayers": 14086, "\u0120Joy": 14087, "\u0120rolls": 14088, "\u0120Heavy": 14089, "\u0120ports": 14090, "\u0120magnetic": 14091, "\u0120inflamm": 14092, "\u0120brush": 14093, "tics": 14094, "\u00e2\u012a\u0134": 14095, "\u0120bottles": 14096, "ppy": 14097, "\u0120padd": 14098, "\u00e3\u0124\u00af": 14099, "million": 14100, "\u0120devastating": 14101, "\u0120compiled": 14102, "\u0120medication": 14103, "\u0120twelve": 14104, "\u0120Perry": 14105, "Space": 14106, "imb": 14107, "your": 14108, "\u0120leaked": 14109, "\u0120Tar": 14110, "\u0120unity": 14111, "\u0120infected": 14112, "\u0120traveled": 14113, "IDE": 14114, "\u0120McDonald": 14115, "txt": 14116, "\u0120Princ": 14117, "\u0120interven": 14118, "\u0120Taiwan": 14119, "\u0120Pow": 14120, "\u0120bearing": 14121, "\u0120Thread": 14122, "\u0120zones": 14123, "izards": 14124, "unks": 14125, "Chapter": 14126, "llor": 14127, "\u0120\u00c2\u00b7": 14128, "\u0120wounds": 14129, "\u0120discretion": 14130, "\u0120succeeded": 14131, "iking": 14132, "\u0120iconic": 14133, "Call": 14134, "\u0120screening": 14135, "\u0120Mis": 14136, "icts": 14137, "\u0120ministers": 14138, "\u0120separation": 14139, "Player": 14140, "\u0120bip": 14141, "\u0120beloved": 14142, "\u0120counting": 14143, "\u0120Eye": 14144, "around": 14145, "inging": 14146, "\u0120tablet": 14147, "\u0120offence": 14148, "inance": 14149, "have": 14150, "\u0120Info": 14151, "\u0120Ninja": 14152, "\u0120protective": 14153, "\u0120Cass": 14154, "Mac": 14155, "\u0120Quality": 14156, "North": 14157, "\u0120ic": 14158, "\u0120Cuba": 14159, "\u0120Chronicle": 14160, "\u0120Property": 14161, "\u0120fastest": 14162, "otos": 14163, "\u0120Germ": 14164, "OWN": 14165, "\u0120boom": 14166, "\u0120Stanley": 14167, "erguson": 14168, "\u0120clever": 14169, "\u0120enters": 14170, "mode": 14171, "terior": 14172, "\u0120Sens": 14173, "\u0120linear": 14174, "ARK": 14175, "\u0120comparing": 14176, "\u0120purely": 14177, "\u0120safer": 14178, "\u0120Potter": 14179, "\u0120cups": 14180, "RT": 14181, "\u0120gluc": 14182, "\u0120attributed": 14183, "\u0120dupl": 14184, "\u0120Pap": 14185, "\u0120precious": 14186, "\u0120pa": 14187, "ictionary": 14188, "\u0120Tig": 14189, "\u0120Too": 14190, "olutions": 14191, "stan": 14192, "\u0120robots": 14193, "\u0120lobb": 14194, "\u0120statute": 14195, "\u0120prevention": 14196, "western": 14197, "160": 14198, "\u0120Active": 14199, "\u0120Maria": 14200, "hal": 14201, "None": 14202, "ellar": 14203, "\u0120KB": 14204, "\u0120Partners": 14205, "\u0120Single": 14206, "\u0120Following": 14207, "ango": 14208, "acious": 14209, "\u0120thou": 14210, "\u0120kg": 14211, "\u0120influential": 14212, "\u0120Friends": 14213, "Sur": 14214, "ainted": 14215, "\u0120forums": 14216, "\u0120starter": 14217, "\u0120citizenship": 14218, "\u0120Election": 14219, "onge": 14220, "otation": 14221, "osph": 14222, ";;;;": 14223, "utical": 14224, "pur": 14225, "eren": 14226, "\u0120accusations": 14227, "bitious": 14228, "abbit": 14229, "\u0120Ord": 14230, "Posted": 14231, "irk": 14232, "\u0120sensitivity": 14233, "iche": 14234, "\u0120Amy": 14235, "\u0120Fab": 14236, "\u0120summit": 14237, "\u0120pedest": 14238, "\u0120rubber": 14239, "\u0120agricultural": 14240, "\u0120cancel": 14241, "AE": 14242, "\u0120inaug": 14243, "\u0120contam": 14244, "\u0120firmly": 14245, "iw": 14246, "stage": 14247, "\u0120Kan": 14248, "\u0120tier": 14249, "\u0120invention": 14250, "\u0120translated": 14251, "\u0120Rules": 14252, "Box": 14253, "Twitter": 14254, "IDS": 14255, "\u0120pizza": 14256, "\u0120debug": 14257, "\u0120Drop": 14258, "vs": 14259, "\u0120horses": 14260, "big": 14261, "\u0120boring": 14262, "\u0120hood": 14263, "\u0120McCain": 14264, "atched": 14265, "\u0120Bros": 14266, "\u0120skip": 14267, "\u0120essay": 14268, "stat": 14269, "\u0120Legends": 14270, "\u0120ammunition": 14271, "auc": 14272, "\u0120shooter": 14273, "\u0120unh": 14274, "\u0120supplied": 14275, "\u0120generic": 14276, "\u0120SK": 14277, "iban": 14278, "yrics": 14279, "\u0120255": 14280, "\u0120climbing": 14281, "Former": 14282, "\u0120flip": 14283, "\u0120jumping": 14284, "\u0120frustration": 14285, "\u0120Terry": 14286, "\u0120neighborhoods": 14287, "\u0120median": 14288, "bean": 14289, "\u0120brains": 14290, "Following": 14291, "\u0120shaped": 14292, "\u0120draws": 14293, "\u0120altered": 14294, "Jack": 14295, "\u0120recipes": 14296, "\u0120skilled": 14297, "wealth": 14298, "achi": 14299, "election": 14300, "\u0120behaviors": 14301, "deals": 14302, "\u0120Until": 14303, "Fe": 14304, "\u0120declaration": 14305, "marks": 14306, "\u0120Between": 14307, "celona": 14308, "\u0120reson": 14309, "\u0120bubble": 14310, "Among": 14311, "\u0120imperial": 14312, "GS": 14313, "\u0120feminist": 14314, "2005": 14315, "\u0120Kyle": 14316, "\u0120accounting": 14317, "\u0120Tele": 14318, "\u0120Tyr": 14319, "\u0120connecting": 14320, "\u0120rehab": 14321, "\u0120Pred": 14322, "sim": 14323, "\u0120meantime": 14324, "\u0120physician": 14325, "MW": 14326, "\u0120Campbell": 14327, "\u0120Brandon": 14328, "\u0120contributing": 14329, "\u0120Rule": 14330, "\u0120Weight": 14331, "\u0120Nap": 14332, "\u0120interactive": 14333, "\u0120vag": 14334, "\u0120helmet": 14335, "\u0120Comb": 14336, "four": 14337, "\u0120shipped": 14338, "\u0120completing": 14339, "\u0120PD": 14340, "PDATE": 14341, "\u0120spreading": 14342, "\u0120scary": 14343, "erving": 14344, "\u0120Gas": 14345, "\u0120frank": 14346, "school": 14347, "\u0120romantic": 14348, "\u0120stabil": 14349, "Rob": 14350, "\u0120accurately": 14351, "\u0120acute": 14352, "\u0120Hann": 14353, "\u0120symbols": 14354, "\u0120civilization": 14355, "\u0120AW": 14356, "\u0120lightning": 14357, "\u0120considers": 14358, "\u0120venue": 14359, "\u0120\u00d7": 14360, "\u0120oven": 14361, "\u0120SF": 14362, "his": 14363, "\u0120nu": 14364, "\u0120Learn": 14365, "\u0120peoples": 14366, "\u0120std": 14367, "\u0120slee": 14368, "\u0120slic": 14369, "\u0120Statistics": 14370, "\u0120corners": 14371, "\u0120Baker": 14372, "\u0120:)": 14373, "mentation": 14374, "olver": 14375, "\u0120laughing": 14376, "\u0120Todd": 14377, "onde": 14378, "\u0120Hills": 14379, "\u0120nuts": 14380, "\u0120Woman": 14381, "plane": 14382, "\u0120liver": 14383, "\u0120Inside": 14384, "Sorry": 14385, "\u0120agrees": 14386, "\u0120fundament": 14387, "\u0120Fisher": 14388, "\u0120auction": 14389, "\u0120threads": 14390, "glas": 14391, "\u0120Basic": 14392, "\u0120Nat": 14393, "\u0120lacking": 14394, "\u0120celebration": 14395, "ju": 14396, "\u0120silly": 14397, "Euro": 14398, "\u0120tatt": 14399, "ighty": 14400, "controlled": 14401, "Test": 14402, "\u0120Singh": 14403, "\u0120rage": 14404, "\u0120rhyth": 14405, "offic": 14406, "\u0120Phantom": 14407, "\u0120headlines": 14408, "\u0120responding": 14409, "\u0120Morning": 14410, "\u0120vitamin": 14411, "\u0120boots": 14412, "\u0120Site": 14413, "alin": 14414, "pi": 14415, "\u0120viral": 14416, "\u0120UC": 14417, "DER": 14418, "\u0120Sex": 14419, "\u0120stocks": 14420, "current": 14421, "\u0120churches": 14422, "\u0120Rare": 14423, "\u0120Murphy": 14424, "\u0120denial": 14425, "\u0120Gaming": 14426, "\u0120toug": 14427, "\u0120nick": 14428, "\u0120makers": 14429, "\u0120Ronald": 14430, "\u0120generous": 14431, "\u0120Doc": 14432, "\u0120Morris": 14433, "\u0120transformed": 14434, "\u0120Normal": 14435, "\u0120104": 14436, "\u0120Kickstarter": 14437, "\u0120Upon": 14438, "Online": 14439, "\u0120IRS": 14440, "\u0120wrap": 14441, "\u0120loving": 14442, "\u0120arrives": 14443, "\u0120Due": 14444, "\u0120heter": 14445, "\u0120Made": 14446, "\u0120rental": 14447, "\u0120belongs": 14448, "\u0120attorneys": 14449, "\u0120crops": 14450, "\u0120matched": 14451, "ulum": 14452, "oline": 14453, "109": 14454, "\u0120dispar": 14455, "\u0120buyers": 14456, "\u0120Cambridge": 14457, "\u0120ethics": 14458, "roups": 14459, "\u0120justified": 14460, "\u0120marginal": 14461, "\u0120respected": 14462, "winning": 14463, "\u0120nodded": 14464, "\u0120Serge": 14465, "\u0120Former": 14466, "Craft": 14467, "################": 14468, "\u0120Warner": 14469, "\u0120dash": 14470, "ete": 14471, "\u0120entert": 14472, "\u0120Escape": 14473, "outheast": 14474, "\u0120knees": 14475, "\u0120Bomb": 14476, "\u0120rug": 14477, "Pass": 14478, "\u0120attitudes": 14479, "government": 14480, "\u0120Prior": 14481, "\u0120qualities": 14482, "\u0120notification": 14483, "\u0120Phone": 14484, "lie": 14485, "\u0120anticipated": 14486, "\u0120Combat": 14487, "\u0120Barry": 14488, "\u01201982": 14489, "Users": 14490, "oner": 14491, "\u0120computing": 14492, "\u0120Connecticut": 14493, "\u0120lesser": 14494, "\u0120peers": 14495, "\u0120Cu": 14496, "\u0120technically": 14497, "\u0120submission": 14498, "\u0120Universal": 14499, "\u0120manually": 14500, "ourge": 14501, "\u0120respondents": 14502, "\u0120BTC": 14503, "\u0120Host": 14504, "\u0120fare": 14505, "\u0120Bird": 14506, "\u0120receipt": 14507, "also": 14508, "\u0120jack": 14509, "\u0120agriculture": 14510, "\u0120skull": 14511, "\u0120!=": 14512, "\u0120passive": 14513, "\u0120CI": 14514, "\u0120societies": 14515, "\u0120reminded": 14516, "\u0120interference": 14517, "Buy": 14518, "\u0120\u00e2\u013e": 14519, "gon": 14520, "\u0120scrutiny": 14521, "\u0120Witch": 14522, "\u0120conducting": 14523, "\u0120\u00e3\u0125": 14524, "\u0120exchanges": 14525, "\u0120Mitchell": 14526, "\u0120inhabit": 14527, "\u0120twist": 14528, "BD": 14529, "\u0120wherever": 14530, "groupon": 14531, "\u0120jokes": 14532, "\u0120Benjamin": 14533, "\u0120Random": 14534, "frame": 14535, "\u0120Lions": 14536, "\u0120highlighted": 14537, "\u0120Arkansas": 14538, "Ent": 14539, "\u0120pile": 14540, "\u0120prelim": 14541, "gs": 14542, "minded": 14543, "\u0120felony": 14544, "\u0120GA": 14545, "\u0120Luck": 14546, "\u0120practically": 14547, "\u0120Bos": 14548, "\u0120actress": 14549, "Dam": 14550, "\u0120Bou": 14551, "\u0120visa": 14552, "\u0120embedded": 14553, "\u0120hybrid": 14554, "\u0120earliest": 14555, "\u0120sooner": 14556, "social": 14557, "\u0120HA": 14558, "\u0120steep": 14559, "\u0120disadvant": 14560, "\u0120exploit": 14561, "\u0120Egg": 14562, "\u0120Ultra": 14563, "\u0120necessity": 14564, "Local": 14565, "iege": 14566, "\u0120dated": 14567, "\u0120masses": 14568, "\u0120subscription": 14569, "pless": 14570, "\u0120anonym": 14571, "\u0120presumably": 14572, "Blue": 14573, "Their": 14574, "asketball": 14575, "\u0120Philip": 14576, "\u0120comed": 14577, "loaded": 14578, "rane": 14579, "\u0120reflection": 14580, "China": 14581, "\u0120extends": 14582, "\u0120forming": 14583, "\u0120unders": 14584, "2001": 14585, "\u0120grat": 14586, "\u0120concentrations": 14587, "\u0120insulin": 14588, "\u0120secular": 14589, "\u0120whilst": 14590, "\u0120winners": 14591, "Advertisements": 14592, "\u0120deliberately": 14593, "\u0120Working": 14594, "\u0120sink": 14595, "etics": 14596, "dale": 14597, "\u0120mandate": 14598, "\u0120gram": 14599, "\u0120vacation": 14600, "\u0120warnings": 14601, "ripp": 14602, "\u0120THAT": 14603, "\u0120commentary": 14604, "\u0120intu": 14605, "\u0120aest": 14606, "\u0120reasoning": 14607, "\u0120breakdown": 14608, "\u0120Zombie": 14609, "\u0120-->": 14610, "\u0120Political": 14611, "cott": 14612, "\u0120thrust": 14613, "\u0120technological": 14614, "\u0120deciding": 14615, "\u0120trafficking": 14616, "Long": 14617, "Welcome": 14618, "prising": 14619, "\u0120Communications": 14620, "\u0120endors": 14621, "\u0120swift": 14622, "\u0120metabol": 14623, "coins": 14624, "resa": 14625, "\u0120HTTP": 14626, "\u0120enroll": 14627, "\u0120Happy": 14628, "usr": 14629, "intage": 14630, "\u0120[\"": 14631, "uably": 14632, "\u0120Material": 14633, "\u0120repeal": 14634, "Sept": 14635, "kh": 14636, "\u0120Modi": 14637, "\u0120underneath": 14638, "\u0120IL": 14639, "shore": 14640, "\u0120diagnosed": 14641, "aceutical": 14642, "\u0120shower": 14643, "aux": 14644, "\u0120Switch": 14645, "\u0120Strength": 14646, "\u0120jihad": 14647, "national": 14648, "\u0120trauma": 14649, "ussy": 14650, "oni": 14651, "\u0120consolid": 14652, "\u0120calories": 14653, "\u0120Flynn": 14654, "agged": 14655, "168": 14656, "\u0120Pink": 14657, "\u0120fulfill": 14658, "\u0120chains": 14659, "\u0120notably": 14660, "\u0120AV": 14661, "Life": 14662, "\u0120Chuck": 14663, "mus": 14664, "\u0120Urban": 14665, "\u0120Hend": 14666, "\u0120deposit": 14667, "\u0120Sad": 14668, "\u0120affair": 14669, "ORK": 14670, "ieval": 14671, "\u0120FDA": 14672, "\u0120trop": 14673, "\u0120Overall": 14674, "\u0120virtue": 14675, "\u0120satisfaction": 14676, "aund": 14677, "\u0120lun": 14678, "\u0120Switzerland": 14679, "\u0120Operation": 14680, "process": 14681, "\u0120shook": 14682, "\u0120counties": 14683, "leased": 14684, "\u0120Charlotte": 14685, "112": 14686, "\u0120transcript": 14687, "\u0120redd": 14688, "push": 14689, "\u0120Hey": 14690, "\u0120Analysis": 14691, "[\"": 14692, "\u0120alternatives": 14693, "ardless": 14694, "\u0120eleph": 14695, "\u0120prejud": 14696, "\u0120Leaf": 14697, "Having": 14698, "\u0120Hub": 14699, "\u0120expressions": 14700, "\u0120Volume": 14701, "\u0120shocking": 14702, "\u0120Reds": 14703, "\u0120readily": 14704, "\u0120planets": 14705, "adata": 14706, "\u0120collapsed": 14707, "\u0120Madrid": 14708, "\u0120irrit": 14709, "ipper": 14710, "\u0120Enc": 14711, "\u0120Wire": 14712, "\u0120buzz": 14713, "\u0120GP": 14714, "asha": 14715, "\u0120accidentally": 14716, "uru": 14717, "\u0120frustrated": 14718, "\u0120SA": 14719, "\u0120hungry": 14720, "\u0120Huff": 14721, "\u0120labels": 14722, "anto": 14723, "\u0120EP": 14724, "\u0120barriers": 14725, ")|": 14726, "\u0120Berkeley": 14727, "\u0120Jets": 14728, "\u0120pairs": 14729, "\u0120Lan": 14730, "James": 14731, "\u0120Bear": 14732, "\u0120humor": 14733, "\u0120Liberty": 14734, "\u0120magnitude": 14735, "\u0120aging": 14736, "\u0120Mason": 14737, "\u0120friendship": 14738, "umbling": 14739, "\u0120emerge": 14740, "\u0120newspapers": 14741, "\u0120ambitious": 14742, "\u0120Richards": 14743, "aternal": 14744, "\u01201981": 14745, "\u0120cookies": 14746, "\u0120sculpt": 14747, "\u0120pursuit": 14748, "Location": 14749, "\u0120scripts": 14750, "pc": 14751, "\u0120arrangements": 14752, "\u0120diameter": 14753, "\u0120loses": 14754, "amation": 14755, "\u0120liqu": 14756, "\u0120Jake": 14757, "arette": 14758, "\u0120understands": 14759, "\u0120Zen": 14760, "vm": 14761, "\u0120approve": 14762, "\u0120wip": 14763, "\u0120ultra": 14764, "\u0120intend": 14765, "\u0120DI": 14766, "ascular": 14767, "\u0120stays": 14768, "\u0120Kor": 14769, "\u0120Kl": 14770, "\u0120investing": 14771, "La": 14772, "\u0120believing": 14773, "bad": 14774, "mouth": 14775, "\u0120taxpayer": 14776, "\u00e3\u0125\u0125": 14777, "\u0120Quebec": 14778, "\u0120lap": 14779, "\u0120Swiss": 14780, "drop": 14781, "\u0120drain": 14782, "iri": 14783, "etc": 14784, "ften": 14785, "\u0120Nex": 14786, "\u0120straw": 14787, "\u0120screaming": 14788, "\u0120counted": 14789, "\u0120damaging": 14790, "\u0120ambassador": 14791, "century": 14792, "\u0120prox": 14793, "\u0120arrests": 14794, "uv": 14795, "ilateral": 14796, "\u0120Charg": 14797, "\u0120prescribed": 14798, "\u0120independently": 14799, "\u0120fierce": 14800, "\u0120Baby": 14801, "\u0120brave": 14802, "\u0120suits": 14803, "=>": 14804, "\u0120baseline": 14805, "\u0120Rate": 14806, "\u0120islands": 14807, "\u0120((": 14808, "green": 14809, "ixels": 14810, "\u0120namely": 14811, "\u0120Village": 14812, "than": 14813, "amy": 14814, "Version": 14815, "gmail": 14816, "entials": 14817, "\u0120Sud": 14818, "\u0120Melbourne": 14819, "\u0120arriving": 14820, "\u0120quantum": 14821, "eff": 14822, "ropolitan": 14823, "Tri": 14824, "\u0120funeral": 14825, "\u0120IR": 14826, "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124": 14827, "\u0120Cob": 14828, "itably": 14829, "\u0120turb": 14830, "\u0120combo": 14831, "Review": 14832, "\u0120deployment": 14833, "uity": 14834, "\u0120Bott": 14835, "\u0120invisible": 14836, "\u0120rendering": 14837, "\u0120unlocked": 14838, "\u0120aqu": 14839, "\u0120Vladimir": 14840, "\u0120pad": 14841, "\u0120Brain": 14842, "\u0120Legacy": 14843, "dragon": 14844, "\u0120Kurdish": 14845, "\u0120sounded": 14846, "\u0120detained": 14847, "\u0120DM": 14848, "gary": 14849, "\u0120daughters": 14850, "\u0120disturbing": 14851, "uka": 14852, "\u0120Parad": 14853, "\u0120tast": 14854, "\u0120unfortunate": 14855, "\u0120ul": 14856, "emin": 14857, "\u0120attendance": 14858, "trl": 14859, "\u0120parks": 14860, "\u0120Memorial": 14861, "\u0120Alice": 14862, "othy": 14863, "guard": 14864, "\u0120Dise": 14865, "\u0120Shan": 14866, "\u0120Forum": 14867, "Rich": 14868, "\u0120shifted": 14869, "uez": 14870, "\u0120lighter": 14871, "\u0120Magn": 14872, "\u0120cod": 14873, "Sch": 14874, "hammad": 14875, "Pub": 14876, "350": 14877, "\u0120Pokemon": 14878, "\u0120prototype": 14879, "\u0120unre": 14880, "Base": 14881, "\u0120Students": 14882, "\u0120Reply": 14883, "\u0120Communist": 14884, "\u0120gau": 14885, "\u0120Tyler": 14886, "IZ": 14887, "\u0120participated": 14888, "\u0120suprem": 14889, "\u0120Details": 14890, "\u0120vessels": 14891, "rod": 14892, "\u0120tribe": 14893, "keep": 14894, "\u0120assumptions": 14895, "\u0120pound": 14896, "\u0120crude": 14897, "\u0120Available": 14898, "\u0120swimming": 14899, "\u0120inclusion": 14900, "\u0120advances": 14901, "culation": 14902, "\u0120conservation": 14903, "\u0120overd": 14904, "\u0120Buffalo": 14905, "Article": 14906, "edge": 14907, "\u0120awa": 14908, "\u0120Madison": 14909, "\u0120sidew": 14910, "\u0120catast": 14911, "\u0120Krist": 14912, "ucle": 14913, "\u0120Highway": 14914, "\u0120Terror": 14915, "\u0120activation": 14916, "\u0120unconscious": 14917, "\u0120Satan": 14918, "\u0120Susan": 14919, "illery": 14920, "\u0120arranged": 14921, "iop": 14922, "\u0120rumors": 14923, "urring": 14924, "think": 14925, "\u0120Keith": 14926, "\u0120Kind": 14927, "\u0120avoiding": 14928, "byn": 14929, "nut": 14930, "\u0120Speaker": 14931, "rus": 14932, "names": 14933, "\u0120guilt": 14934, "\u0120Olympics": 14935, "\u0120sail": 14936, "\u0120Mes": 14937, "levant": 14938, "\u0120Columbus": 14939, "aft": 14940, "City": 14941, "South": 14942, "\u0120Harvey": 14943, "\u0120Pun": 14944, "Several": 14945, "\u0120mentally": 14946, "\u0120impress": 14947, "mount": 14948, "\u0120Ubuntu": 14949, "\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136": 14950, "\u0120Superman": 14951, "\u0120MPs": 14952, "\u0120intentions": 14953, "\u0120Racing": 14954, "\u0120likelihood": 14955, "\u0120240": 14956, "Total": 14957, "\u0120toys": 14958, "\u0120Watson": 14959, "\u0120urge": 14960, "Lear": 14961, "\u0120Paper": 14962, "\u0120occurring": 14963, "\u0120Beng": 14964, "\u0120Cert": 14965, "\u0120stones": 14966, "Tim": 14967, "\u0120Twin": 14968, "zb": 14969, "\u0120Dynam": 14970, "\u0120politician": 14971, "kens": 14972, "\u0120Enterprise": 14973, "UTERS": 14974, "\u0120abol": 14975, "\u0120refresh": 14976, "\u0120arbitrary": 14977, "pection": 14978, "\u0120troubles": 14979, "\u0120});": 14980, "tv": 14981, "\u0120pilots": 14982, "\u0120distribute": 14983, "\u0120audit": 14984, "\u0120pause": 14985, "original": 14986, "\u0120rivals": 14987, "\u00c2\u00a3": 14988, "Fig": 14989, "TL": 14990, "abil": 14991, "rying": 14992, "Lin": 14993, "ioned": 14994, "lon": 14995, "\u0120fancy": 14996, "\u0120crashed": 14997, "\u0120tract": 14998, "\u0120shed": 14999, "\u0120consume": 15000, "Based": 15001, "download": 15002, "init": 15003, "\u0120voltage": 15004, "Introdu": 15005, "\u0120condemned": 15006, "\u0120Finance": 15007, "respect": 15008, "\u0120excluded": 15009, "\u0120establishing": 15010, "heric": 15011, "\u0120heritage": 15012, "\u0120spectacular": 15013, "\u0120unst": 15014, "\u0120Snowden": 15015, "\u0120Lane": 15016, "San": 15017, "\u0120protections": 15018, "struction": 15019, "incinn": 15020, "\u0120macro": 15021, "Custom": 15022, "iosity": 15023, "\u0120esp": 15024, "\u0120functioning": 15025, "\u0120mush": 15026, "\u0120puzzle": 15027, "\u0120ethical": 15028, "Mal": 15029, "\u0120governing": 15030, "\u0120Ferguson": 15031, "\u0120restored": 15032, "\u0120stressed": 15033, "\u0120Counter": 15034, "\u0120Kas": 15035, "clip": 15036, "ANS": 15037, "\u0120seiz": 15038, "UK": 15039, "byss": 15040, "oldown": 15041, "api": 15042, "\u0120permanently": 15043, "ounters": 15044, "West": 15045, "Through": 15046, "Light": 15047, "atoes": 15048, "\u0120neat": 15049, "\u0120cord": 15050, "urer": 15051, "\u0120severely": 15052, "\u0120Aven": 15053, "\u0120interrog": 15054, "\u0120triple": 15055, "Given": 15056, "Number": 15057, "\u0120arise": 15058, "\u0120sher": 15059, "plant": 15060, "\u0120flower": 15061, "\u0120Cou": 15062, "\u0120ate": 15063, "\u0120newer": 15064, "bul": 15065, "\u0120meanwhile": 15066, "\u0120Lair": 15067, "\u0120adjustment": 15068, "\u0120Copyright": 15069, "\u0120divers": 15070, "iological": 15071, "\u0120gamers": 15072, "oat": 15073, "\u0120historically": 15074, "\u0120analog": 15075, "\u0120longtime": 15076, "\u0120prescription": 15077, "\u0120Mist": 15078, "\u0120Hyper": 15079, "\u0120Maine": 15080, "\u0120Deity": 15081, "\u0120multipl": 15082, "\u0120Reincarn": 15083, "\u0120Hyd": 15084, "\u0120Pic": 15085, "Sil": 15086, "rants": 15087, "\u0120Cris": 15088, ".;": 15089, "({": 15090, "ependence": 15091, "\u0120recy": 15092, "ateur": 15093, "\u0120quad": 15094, "\u0120glob": 15095, "\u0120conced": 15096, "team": 15097, "\u0120capitalist": 15098, "\u0120Lot": 15099, "\u0120royal": 15100, "\u0120Cyber": 15101, "\u0120blacks": 15102, "metic": 15103, "riv": 15104, "\u0120Danny": 15105, "\u0120spo": 15106, "\u0120RO": 15107, "\u0120animated": 15108, "rypted": 15109, "\u0120Deputy": 15110, "\u0120rendered": 15111, "FE": 15112, "\u0120streak": 15113, "\u0120clouds": 15114, "\u0120Doug": 15115, "~~~~~~~~": 15116, "\u0120discour": 15117, "\u0120Veh": 15118, "\u0120psychology": 15119, "\u0120Journey": 15120, "\u0120crystal": 15121, "\u0120Frost": 15122, "\u0120suspicion": 15123, "\u0120relate": 15124, "orus": 15125, "\u0120Crypt": 15126, "\u0120NVIDIA": 15127, "comed": 15128, "uting": 15129, "incinnati": 15130, "\u0120vulnerability": 15131, "ostic": 15132, "\u0120isolation": 15133, "\u0120cooling": 15134, "\u0120Coalition": 15135, "\u0120119": 15136, "Four": 15137, "\u0120Deal": 15138, "\u0120\u00e2\u012b": 15139, "semble": 15140, "rament": 15141, "\u0120Barcelona": 15142, "\u0120102": 15143, "\u0120cocaine": 15144, "ocalypse": 15145, "Feb": 15146, "ogenic": 15147, "\u0120mutation": 15148, "\u0120cryptoc": 15149, "\u0120Kel": 15150, "\u0120Git": 15151, "ais": 15152, "\u0120sisters": 15153, "ANK": 15154, "\u0120activate": 15155, "Ter": 15156, "\u0120dread": 15157, "ylon": 15158, "\u0120propri": 15159, "Aust": 15160, "\u0120Default": 15161, "\u0120outdoor": 15162, "\u0120sheer": 15163, "ceive": 15164, "\u0120gently": 15165, "\u00d0\u00be": 15166, "Program": 15167, "\u0120\u00e2\u0128\u0134": 15168, "\u0120vegan": 15169, "\u0120Crus": 15170, "\u0120responsibilities": 15171, "\u0120HR": 15172, "OLD": 15173, "\u0120prevents": 15174, "\u0120stiff": 15175, "\u0120Were": 15176, "\u0120athletic": 15177, "\u0120Score": 15178, "\u0120):": 15179, "\u0120columns": 15180, "\u0120Loc": 15181, "available": 15182, "\u0120Fram": 15183, "\u0120Sessions": 15184, "\u0120companion": 15185, "\u0120packs": 15186, "140": 15187, "\u0120Knights": 15188, "\u0120fart": 15189, "\u0120streams": 15190, "\u0120shore": 15191, "\u0120appeals": 15192, "\u0120Performance": 15193, "haul": 15194, "\u0120Stra": 15195, "\u0120Nag": 15196, "103": 15197, "\u0120Transportation": 15198, "BB": 15199, "Ev": 15200, "zan": 15201, "Public": 15202, "\u0120twin": 15203, "ulsion": 15204, "Mult": 15205, "\u0120electro": 15206, "\u0120statue": 15207, "ationally": 15208, "\u0120Nort": 15209, "\u0120inspection": 15210, "/*": 15211, "igue": 15212, "\u0120compassion": 15213, "\u0120Tales": 15214, "\u0120Stein": 15215, "\u0120Screen": 15216, "\u0120Bug": 15217, "\u0120Lion": 15218, "girl": 15219, "\u0120withdrawal": 15220, "\u0120objectives": 15221, "\u0120bloody": 15222, "\u0120preliminary": 15223, "\u0120jacket": 15224, "\u0120dimensions": 15225, "\u0120Cool": 15226, "\u0120Occup": 15227, "\u0120wreck": 15228, "\u0120doubled": 15229, "anking": 15230, "\u01201975": 15231, "\u0120glasses": 15232, "\u0120Wang": 15233, "prov": 15234, "Path": 15235, "connected": 15236, "\u0120Multi": 15237, "\u0120Norway": 15238, "agonist": 15239, "\u0120feared": 15240, "\u0120touching": 15241, "\u0120arguably": 15242, "\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af": 15243, "\u0120NCAA": 15244, "chem": 15245, "\u0120spat": 15246, "\u0120WWE": 15247, "\u0120Cel": 15248, "igger": 15249, "\u0120attacker": 15250, "\u0120Join": 15251, "object": 15252, "etta": 15253, "\u0120eliminated": 15254, "det": 15255, "\u0120destruct": 15256, "\u0120Lucas": 15257, "ctuary": 15258, "180": 15259, "\u0120Brady": 15260, "\u0120Blues": 15261, "Bay": 15262, "aukee": 15263, "\u0120timeline": 15264, "\u0120delegates": 15265, "written": 15266, "ufficient": 15267, "\u0120shapes": 15268, "Copyright": 15269, "ouble": 15270, "service": 15271, "\u0120pione": 15272, "\u0120colleges": 15273, "\u0120rows": 15274, "\u0120spite": 15275, "\u0120assessed": 15276, "360": 15277, "\u0120lease": 15278, "\u0120confidential": 15279, "cker": 15280, "\u0120Manning": 15281, "\u0120Voice": 15282, "\u0120sealed": 15283, "\u0120calculate": 15284, "NO": 15285, "\u0120Assistant": 15286, "\u0120teenager": 15287, "ulent": 15288, "atherine": 15289, "\u0120mock": 15290, "\u0120diamond": 15291, "\u0120fest": 15292, "\u0120switched": 15293, "\u0120resume": 15294, "\u0120Puerto": 15295, "\u0120lanes": 15296, "iration": 15297, "\u0120Similarly": 15298, "\u0120rod": 15299, "\u0120Sel": 15300, "\u0120Palace": 15301, "\u0120Limited": 15302, "eous": 15303, "\u0120variant": 15304, "\u0120ward": 15305, "\u0120))": 15306, "Show": 15307, "OOK": 15308, "Alex": 15309, "\u0120Nep": 15310, "bris": 15311, "\u0120Wikipedia": 15312, "\u0120exceptional": 15313, "\u0120manages": 15314, "\u0120Draw": 15315, "Again": 15316, "\u0120copper": 15317, "utt": 15318, "\u0120exports": 15319, "\u0120portfolio": 15320, "\u0120elevated": 15321, "Rated": 15322, "\u0120Otherwise": 15323, "\u0120Tact": 15324, "\u0120Shel": 15325, "\u0120TX": 15326, "\"\u00e2\u0122\u0136": 15327, "\u0120resur": 15328, "\u0120Wa": 15329, "venant": 15330, "\u0120monetary": 15331, "people": 15332, "Email": 15333, "\u0120fifty": 15334, "\u0120Sweet": 15335, "\u0120Malaysia": 15336, "\u0120confusing": 15337, "\u0120Rio": 15338, "uda": 15339, "utenant": 15340, "\");": 15341, "\u0120praised": 15342, "\u0120volumes": 15343, "turn": 15344, "\u0120mature": 15345, "\u0120nonprofit": 15346, "\u0120passionate": 15347, "\u0120Private": 15348, "\u0120103": 15349, "\u0120descend": 15350, "\u00e7\u00a5\u0140": 15351, "uffy": 15352, "headed": 15353, "Whether": 15354, "rien": 15355, "zech": 15356, "beit": 15357, "\u0120chrom": 15358, "\u0120McM": 15359, "\u0120dancing": 15360, "\u0120eleg": 15361, "\u0120Noticed": 15362, "115": 15363, "\u0120advocacy": 15364, "ENTS": 15365, "ambling": 15366, "\u0120Minor": 15367, "\u0120Finn": 15368, "\u0120priorities": 15369, "\u0120thereof": 15370, "\u0120Stage": 15371, "\u0120Rogers": 15372, "\u0120substitute": 15373, "\u0120Jar": 15374, "\u0120Jefferson": 15375, "\u0120lightly": 15376, "102": 15377, "\u0120Lisa": 15378, "uits": 15379, "ysical": 15380, "\u0120shifts": 15381, "\u0120drones": 15382, "\u0120workplace": 15383, "\u0120resid": 15384, "ensed": 15385, "ahn": 15386, "\u0120preferences": 15387, "server": 15388, "\u0120debates": 15389, "doc": 15390, "\u0120Gods": 15391, "\u0120helicopter": 15392, "\u0120honour": 15393, "\u0120considerably": 15394, "eded": 15395, "\u0120Female": 15396, "\u0120Anne": 15397, "\u0120reun": 15398, "\u0120Face": 15399, "\u0120Hallow": 15400, "\u0120Budget": 15401, "\u0120condemn": 15402, "\u0120tender": 15403, "Prof": 15404, "ocratic": 15405, "\u0120Turner": 15406, "\u0120Agric": 15407, "\u01201976": 15408, "\u0120apt": 15409, "disc": 15410, "\u0120Fighter": 15411, "\u0120Aur": 15412, "\u0120garbage": 15413, "input": 15414, "\u0120Karl": 15415, "\u0120Oliver": 15416, "\u0120Language": 15417, "kn": 15418, "Non": 15419, "\u0120Clar": 15420, "\u0120traditions": 15421, "\u0120advertisement": 15422, "\u0120Sor": 15423, "\u0120archive": 15424, "\u0120villages": 15425, "750": 15426, "\u0120implementing": 15427, "waukee": 15428, "\u0120dietary": 15429, "\u0120switching": 15430, "Republic": 15431, "\u0120velocity": 15432, "\u0120cit": 15433, "\u0120Awards": 15434, "\u0120financing": 15435, "\u0120lasted": 15436, ")]": 15437, "\u0120reminder": 15438, "Person": 15439, "\u0120precision": 15440, "\u0120designers": 15441, "\u0120Fried": 15442, "\u0120Border": 15443, "\u0120tragic": 15444, "\u0120wield": 15445, "\u0120initiatives": 15446, "\u0120Tank": 15447, "wer": 15448, "\u0120joins": 15449, "Ro": 15450, "inery": 15451, "\u0120arrow": 15452, "\u0120generating": 15453, "founder": 15454, "\u0120searches": 15455, "\u0120randomly": 15456, "Access": 15457, "\u0120batch": 15458, "\u0120posed": 15459, "lat": 15460, "\u0120pursuing": 15461, "asa": 15462, "\u0120testified": 15463, "forming": 15464, "\u0120Shar": 15465, "wiki": 15466, "\u0120Either": 15467, "Sometimes": 15468, "\u0120senators": 15469, "\u0120Johnny": 15470, "\u0120Taliban": 15471, "\u0120GPS": 15472, "\":\"/": 15473, "\u00e3\u0123\u00ae\u00e5": 15474, "\u0120analyzed": 15475, "\u0120Rubio": 15476, "\u0120Movement": 15477, "opard": 15478, "iii": 15479, "Stand": 15480, "fight": 15481, "\u0120ignoring": 15482, "iang": 15483, "\u0120GN": 15484, "soever": 15485, "\u0120STAT": 15486, "\u0120refusing": 15487, "\u0120sweat": 15488, "\u0120bay": 15489, "PORT": 15490, "irmed": 15491, "aky": 15492, "\u0120dispro": 15493, "\u0120labeled": 15494, "\u0120108": 15495, "Hello": 15496, "\u0120pleasant": 15497, "aba": 15498, "\u0120triumph": 15499, "\u0120aboard": 15500, "\u0120incom": 15501, "\u0120Crow": 15502, "lett": 15503, "\u0120folk": 15504, "\u0120chase": 15505, "``": 15506, "\u0120Brus": 15507, "\u0120teens": 15508, "cue": 15509, "\u0120terrain": 15510, "hyd": 15511, "ilight": 15512, "ORY": 15513, "Support": 15514, "ews": 15515, "lli": 15516, "raints": 15517, "\u0120Cand": 15518, "\u0120abused": 15519, "achment": 15520, "larg": 15521, "Bas": 15522, "\u0120Cancer": 15523, "\u01201978": 15524, "\u0120supporter": 15525, "access": 15526, "\u0120Termin": 15527, "\u0120Tampa": 15528, "\u0120ANY": 15529, "\u0120newest": 15530, "\u0120Criminal": 15531, "edu": 15532, "\u01201930": 15533, "\u0120admits": 15534, "\u0120ende": 15535, "\u0120failures": 15536, "urate": 15537, "fulness": 15538, "cycl": 15539, "\u0120Subject": 15540, "\u0120infinite": 15541, "three": 15542, "WA": 15543, "pit": 15544, "\u0120Install": 15545, "Rad": 15546, "iliation": 15547, "GM": 15548, "\u0120continent": 15549, "\u0120accommodate": 15550, "\u0120Clay": 15551, "\u0120pup": 15552, "\u0120Function": 15553, "\u0120hammer": 15554, "\u0120Alberta": 15555, "\u0120revised": 15556, "\u0120minorities": 15557, "\u0120measurement": 15558, "Connell": 15559, "\u0120disable": 15560, "\u0120Mix": 15561, "Incre": 15562, "\u0120fork": 15563, "\u0120Rosen": 15564, "\u0120implies": 15565, "umblr": 15566, "ANG": 15567, "\u0120proteins": 15568, "\u0120aggression": 15569, "\u0120facilitate": 15570, "SN": 15571, "\u0120illegally": 15572, "uer": 15573, "\u0120academ": 15574, "\u0120puzz": 15575, "\u0120Shift": 15576, "pay": 15577, "ollo": 15578, "\u0120audiences": 15579, "Build": 15580, "\u0120noble": 15581, "\u0120syntax": 15582, "\u00e2\u013a\u0127": 15583, "\u0120beam": 15584, "\u0120Bed": 15585, "\u0120Ald": 15586, "\u0120origins": 15587, "video": 15588, "\u01201977": 15589, "\u0120Assault": 15590, "\u0120garage": 15591, "Team": 15592, "\u0120verdict": 15593, "\u0120dwar": 15594, "\u0120Virtual": 15595, "event": 15596, "Keep": 15597, "\u0120sentiment": 15598, "\u0120wildlife": 15599, "shirt": 15600, "\u0120burg": 15601, "\u0120recommendation": 15602, "represent": 15603, "\u0120gallery": 15604, "owners": 15605, "\u0120scholar": 15606, "\u0120convenience": 15607, "\u0120Swift": 15608, "\u0120convinc": 15609, "Cap": 15610, "\u0120warfare": 15611, "\u0120Visual": 15612, "\u0120constitute": 15613, "\u0120abort": 15614, "\u0120Weather": 15615, "\u0120Looking": 15616, "\u0120Hem": 15617, "\u0120martial": 15618, "\u0120incoming": 15619, "etition": 15620, "\u0120tolerance": 15621, "\u0120Created": 15622, "\u0120flows": 15623, "\u0120Elder": 15624, "\u0120souls": 15625, "\u0120foul": 15626, "\u0120Pain": 15627, "\u0120CAN": 15628, "\u0120220": 15629, "bc": 15630, "hend": 15631, "\u0120genius": 15632, "Real": 15633, "\u0120Wr": 15634, "ometer": 15635, "pad": 15636, "\u0120limiting": 15637, "\u0120Si": 15638, "\u0120Lore": 15639, "\u0120Adventures": 15640, "\u0120varied": 15641, "Disc": 15642, "fin": 15643, "\u0120Personal": 15644, "Chris": 15645, "\u0120invented": 15646, "\u0120dive": 15647, "\u0120Rise": 15648, "\u0120oz": 15649, "\u0120Comics": 15650, "\u0120expose": 15651, "\u0120Reb": 15652, "letters": 15653, "site": 15654, "imated": 15655, "\u0120hacking": 15656, "\u0120educated": 15657, "\u0120Nobody": 15658, "\u0120depri": 15659, "\u0120incentive": 15660, "\u00e3\u0124\u00b7": 15661, "\u0120oversight": 15662, "\u0120tribes": 15663, "\u0120Belgium": 15664, "\u0120licensing": 15665, "ourt": 15666, "Product": 15667, "ahl": 15668, "\u0120Gem": 15669, "\u0120specialist": 15670, "\u0120cra": 15671, "anners": 15672, "\u0120Corbyn": 15673, "\u01201973": 15674, "READ": 15675, "\u0120summar": 15676, "\u0120overlook": 15677, "\u0120Application": 15678, "\u0120inappropriate": 15679, "\u0120downloaded": 15680, "Que": 15681, "\u0120Bears": 15682, "\u0120thumb": 15683, "\u0120Character": 15684, "\u0120Reincarnated": 15685, "\u0120Sid": 15686, "\u0120demonstrates": 15687, "sky": 15688, "\u0120Bloomberg": 15689, "\u0120Array": 15690, "\u0120Results": 15691, "\u0120Fourth": 15692, "\u0120EDT": 15693, "\u0120Oscar": 15694, "cend": 15695, "\u0120106": 15696, "\u0120NULL": 15697, "\u0120HERE": 15698, "match": 15699, "\u0120Brun": 15700, "\u0120glucose": 15701, "ieg": 15702, "egu": 15703, "\u0120certified": 15704, "\u0120relie": 15705, "\u0120humanitarian": 15706, "\u0120prayers": 15707, "King": 15708, "\u0120nan": 15709, "hou": 15710, "108": 15711, "ulu": 15712, "\u0120renewable": 15713, "\u0120distinguish": 15714, "\u0120dense": 15715, "\u0120Vent": 15716, "\u0120Package": 15717, "\u0120Boss": 15718, "\u0120editors": 15719, "\u0120migr": 15720, "Tra": 15721, "\u0120Peters": 15722, "\u0120Arctic": 15723, "2004": 15724, "\u0120Cape": 15725, "\u0120locally": 15726, "\u0120lasting": 15727, "\u0120handy": 15728, ".).": 15729, "Pan": 15730, "\u0120RES": 15731, "Index": 15732, "\u0120tensions": 15733, "\u0120formerly": 15734, "\u0120ideological": 15735, "\u0120sensors": 15736, "\u0120dealers": 15737, "\u0120defines": 15738, "Sk": 15739, "\u0120proceeds": 15740, "\u0120proxy": 15741, "azines": 15742, "\u0120Bash": 15743, "\u0120Pad": 15744, "\u0120Craft": 15745, "ealous": 15746, "\u0120sheets": 15747, "ometry": 15748, "June": 15749, "clock": 15750, "TT": 15751, "\u0120Theatre": 15752, "\u0120Buzz": 15753, "\u0120chapters": 15754, "\u0120millenn": 15755, "\u0120dough": 15756, "\u0120Congressional": 15757, "\u0120imagined": 15758, "avior": 15759, "\u0120clinic": 15760, "\u01201945": 15761, "\u0120holder": 15762, "root": 15763, "olester": 15764, "\u0120restart": 15765, "BN": 15766, "\u0120Hamas": 15767, "\u0120Job": 15768, "\u0120orb": 15769, "\u0120ram": 15770, "\u0120disclose": 15771, "\u0120translate": 15772, "\u0120immigrant": 15773, "\u0120annoying": 15774, "\u0120treaty": 15775, "anium": 15776, "\u0120Tea": 15777, "\u0120Legion": 15778, "\u0120crowds": 15779, "\u0120Bec": 15780, "\u0120Aer": 15781, "ohyd": 15782, "Bro": 15783, "Looking": 15784, "\u0120lbs": 15785, "\u0120aggress": 15786, "\u0120seam": 15787, "\u0120intercept": 15788, "\u0120MI": 15789, "mercial": 15790, "activ": 15791, "\u0120Cit": 15792, "\u0120dimension": 15793, "\u0120consistency": 15794, "\u0120rushing": 15795, "\u0120Douglas": 15796, "\u0120trim": 15797, "Install": 15798, "icker": 15799, "\u0120shy": 15800, "106": 15801, "\u0120mentions": 15802, "pelled": 15803, "\u0120Tak": 15804, "cost": 15805, "\u0120classroom": 15806, "\u0120fortune": 15807, "driven": 15808, "\u0120unle": 15809, "\u0120Wheel": 15810, "\u0120investor": 15811, "\u0120Masters": 15812, "kit": 15813, "\u0120associations": 15814, "\u0120Evolution": 15815, "oping": 15816, "uscript": 15817, "\u0120provincial": 15818, "\u0120Walter": 15819, "avi": 15820, "SO": 15821, "\u0120unlimited": 15822, "English": 15823, "\u0120Cards": 15824, "\u0120Ebola": 15825, "nered": 15826, "\u0120revenge": 15827, "\u0120outright": 15828, "umper": 15829, "\u0120fitting": 15830, "\u0120Solid": 15831, "\u0120formally": 15832, "\u0120problematic": 15833, "\u0120hazard": 15834, "\u0120encryption": 15835, "\u0120straightforward": 15836, "\u0120AK": 15837, "\u0120pse": 15838, "\u0120Orb": 15839, "\u0120Chamber": 15840, "\u0120Mak": 15841, "Contents": 15842, "\u0120loyalty": 15843, "\u0120lyrics": 15844, "\u0120Sym": 15845, "\u0120welcomed": 15846, "\u0120cooked": 15847, "\u0120monop": 15848, "\u0120nurse": 15849, "\u0120misleading": 15850, "\u0120eternal": 15851, "\u0120shifting": 15852, "\u0120+=": 15853, "Vis": 15854, "\u0120institutional": 15855, "illary": 15856, "\u0120pant": 15857, "VERT": 15858, "\u0120ACC": 15859, "\u0120Enh": 15860, "\u0120incon": 15861, "\u0120REUTERS": 15862, "\u0120donated": 15863, "\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6": 15864, "Intern": 15865, "\u0120exhibit": 15866, "\u0120tire": 15867, "\u0120Ric": 15868, "\u0120Champion": 15869, "\u0120Muhammad": 15870, "NING": 15871, "\u0120Soccer": 15872, "\u0120mobility": 15873, "\u0120varying": 15874, "\u0120Movie": 15875, "\u0120lord": 15876, "oak": 15877, "Field": 15878, "\u0120vector": 15879, "usions": 15880, "\u0120scrap": 15881, "\u0120enabling": 15882, "make": 15883, "Tor": 15884, ".*": 15885, "||": 15886, "\u0120Website": 15887, "\u0120NPC": 15888, "\u0120socialist": 15889, "\u0120Billy": 15890, "\u0120Additional": 15891, "\u0120cargo": 15892, "\u0120farms": 15893, "\u0120Soon": 15894, "\u0120Prize": 15895, "\u0120midnight": 15896, "\u0120900": 15897, "seen": 15898, "\u0120Spot": 15899, "\u0120sheep": 15900, "\u0120sponsored": 15901, "\u0120Hi": 15902, "\u0120Jump": 15903, "\u01201967": 15904, "Microsoft": 15905, "\u0120Agent": 15906, "\u0120charts": 15907, "dir": 15908, "\u0120adjacent": 15909, "\u0120tricks": 15910, "\u0120manga": 15911, "\u0120exagger": 15912, "/>": 15913, "football": 15914, "\u0120FCC": 15915, "GC": 15916, "\u0120Tier": 15917, "andra": 15918, "OUND": 15919, "%),": 15920, "\u0120fruits": 15921, "VC": 15922, "\u0120AA": 15923, "Rober": 15924, "\u0120midst": 15925, "\u00e2\u0139": 15926, "anka": 15927, "\u0120legislature": 15928, "\u0120Neil": 15929, "\u0120tourists": 15930, "\"\"": 15931, "\u0120Warning": 15932, "\u0120Nevertheless": 15933, "\u0120Official": 15934, "\u0120Whatever": 15935, "\u0120mold": 15936, "\u0120drafted": 15937, "\u0120substances": 15938, "\u0120breed": 15939, "\u0120tags": 15940, "\u0120Task": 15941, "\u0120verb": 15942, "\u0120manufactured": 15943, "comments": 15944, "\u0120Polish": 15945, "Prov": 15946, "\u0120determines": 15947, "Obama": 15948, "kers": 15949, "\u0120utterly": 15950, "\u0120sect": 15951, "sche": 15952, "\u0120Gates": 15953, "\u0120Chap": 15954, "\u0120aluminum": 15955, "\u0120zombie": 15956, "\u0120Touch": 15957, "\u0120UP": 15958, "\u0120satisfy": 15959, "\u0120predomin": 15960, "ascript": 15961, "\u0120elaborate": 15962, "\u01201968": 15963, "\u0120measuring": 15964, "\u0120Vari": 15965, "anyahu": 15966, "\u0120sir": 15967, "ulates": 15968, "idges": 15969, "ickets": 15970, "\u0120Spencer": 15971, "TM": 15972, "oubted": 15973, "\u0120prey": 15974, "\u0120installing": 15975, "\u0120Cab": 15976, "reed": 15977, "reated": 15978, "Supp": 15979, "\u0120wrist": 15980, "\u0120Kerry": 15981, "107": 15982, "\u0120Kle": 15983, "\u0120Rachel": 15984, "\u0120cotton": 15985, "\u0120ARE": 15986, "\u0120Ele": 15987, "Control": 15988, "\u0120loads": 15989, "\u0120Dod": 15990, "anas": 15991, "bone": 15992, "\u0120classical": 15993, "\u0120Regional": 15994, "\u0120Integ": 15995, "VM": 15996, "\u0120desires": 15997, "\u0120autism": 15998, "supported": 15999, "\u0120Message": 16000, "\u0120compact": 16001, "writer": 16002, "\u0120109": 16003, "\u0120Hurricane": 16004, "cision": 16005, "\u0120cycles": 16006, "\u0120drill": 16007, "\u0120colleague": 16008, "\u0120maker": 16009, "German": 16010, "\u0120mistaken": 16011, "Sun": 16012, "\u0120Gay": 16013, "\u0120whatsoever": 16014, "\u0120sells": 16015, "\u0120Airl": 16016, "liv": 16017, "\u0120Option": 16018, "\u0120solved": 16019, "\u0120sectors": 16020, "\u0120horizontal": 16021, "\u0120equation": 16022, "\u0120Skill": 16023, "\u0120Bio": 16024, "gement": 16025, "\u0120Snap": 16026, "\u0120Legal": 16027, "\u0120trademark": 16028, "\u0120makeup": 16029, "\u0120assembled": 16030, "\u0120saves": 16031, "\u0120Halloween": 16032, "\u0120Vermont": 16033, "\u0120FROM": 16034, "\u0120farming": 16035, "\u0120Podcast": 16036, "acceptable": 16037, "\u0120Higher": 16038, "\u0120asleep": 16039, "ullivan": 16040, "\u0120referen": 16041, "\u0120Lev": 16042, "\u0120bullets": 16043, "oko": 16044, "HC": 16045, "\u0120stairs": 16046, "\u0120maintains": 16047, "\u0120Lower": 16048, "\u0120Vi": 16049, "\u0120marine": 16050, "\u0120acres": 16051, "\u0120coordinator": 16052, "\u0120Joh": 16053, "\u0120counterparts": 16054, "\u0120Brothers": 16055, "\u0120indict": 16056, "bra": 16057, "\u0120chunk": 16058, "\u0120cents": 16059, "Home": 16060, "\u0120Month": 16061, "\u0120accordingly": 16062, "ifles": 16063, "\u0120Germans": 16064, "\u0120Syn": 16065, "Hub": 16066, "\u0120eyeb": 16067, "\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122": 16068, "\u0120ranges": 16069, "\u0120Holland": 16070, "\u0120Robot": 16071, "fc": 16072, "Mike": 16073, "\u0120plasma": 16074, "\u0120swap": 16075, "\u0120athlete": 16076, "\u0120Rams": 16077, ",'\"": 16078, "\u0120infections": 16079, "\u0120corrid": 16080, "\u0120vib": 16081, "\u0120patches": 16082, "\u0120traditionally": 16083, "\u0120revelation": 16084, "\u0120sweep": 16085, "\u0120glance": 16086, "\u0120inex": 16087, "2003": 16088, "\u0120Raw": 16089, "working": 16090, "osures": 16091, "\u0120Dat": 16092, "\u0120Lynch": 16093, "\u0120leverage": 16094, "\u0120Reid": 16095, "\u0120correlation": 16096, "iances": 16097, "avascript": 16098, "\u0120repository": 16099, "retty": 16100, "\u01201972": 16101, "240": 16102, "\u0120oun": 16103, "pol": 16104, "\u0120Reed": 16105, "\u0120tactical": 16106, "isite": 16107, "Apple": 16108, "\u0120Quinn": 16109, "\u0120raped": 16110, "illo": 16111, "Europe": 16112, "\u0120algorithms": 16113, "\u0120Rodrig": 16114, "iu": 16115, "\u0120illum": 16116, "\u0120fame": 16117, "\u0120introducing": 16118, "\u0120delays": 16119, "\u0120Raiders": 16120, "\u0120whistle": 16121, "\u0120novels": 16122, "\u0120Really": 16123, "\u0120deriv": 16124, "\u0120publications": 16125, "\u0120Neither": 16126, "\u0120Commerce": 16127, "\u0120aston": 16128, "language": 16129, "Notes": 16130, "\u0120Roth": 16131, "\u0120Fear": 16132, "\u0120mate": 16133, "\u0120parade": 16134, "\u0120QB": 16135, "\u0120maneu": 16136, "\u0120Cincinnati": 16137, "mitting": 16138, "\u0120waist": 16139, "\u0120Rew": 16140, "\u0120discont": 16141, "\u00d0\u00b0": 16142, "\u0120staring": 16143, "\u0120alias": 16144, "\u0120securities": 16145, "\u0120toilet": 16146, "\u0120Jedi": 16147, "\u0120unlaw": 16148, "vised": 16149, "////////": 16150, "](": 16151, "\u0120Weiss": 16152, "\u0120prest": 16153, "\u0120Compan": 16154, "\u0120memo": 16155, "\u0120Grace": 16156, "July": 16157, "\u0120Elite": 16158, "center": 16159, "\u0120Stay": 16160, "\u0120galaxy": 16161, "\u0120tooth": 16162, "\u0120Settings": 16163, "\u0120subjected": 16164, "\u00e3\u0124\u00a6": 16165, "\u0120lineback": 16166, "\u0120retailers": 16167, "\u0120Want": 16168, "\u0120dangers": 16169, "Air": 16170, "\u0120voluntary": 16171, "eway": 16172, "\u0120interpreted": 16173, "otine": 16174, "\u00c3\u00a7": 16175, "\u0120pel": 16176, "Service": 16177, "\u0120Eventually": 16178, "\u0120careers": 16179, "\u0120threaten": 16180, "\u0120memor": 16181, "\u0120Bradley": 16182, "ancies": 16183, "sn": 16184, "\u0120Unknown": 16185, "National": 16186, "\u0120shadows": 16187, "ailand": 16188, "\u0120Dash": 16189, "Everyone": 16190, "izzard": 16191, "March": 16192, "=(": 16193, "\u0120pulls": 16194, "\u0120stranger": 16195, "\u0120backwards": 16196, "\u0120Bernard": 16197, "imensional": 16198, "\u0120chron": 16199, "\u0120theoretical": 16200, "ktop": 16201, "\u0120ware": 16202, "\u0120Investig": 16203, "\u0120Initi": 16204, "\u0120Operations": 16205, "oven": 16206, "ocide": 16207, "*/": 16208, "\u0120flames": 16209, "\u0120Cash": 16210, "shit": 16211, "\u0120cab": 16212, "\u0120Analy": 16213, "\u0120Seah": 16214, "\u0120defining": 16215, "\u0120ordering": 16216, "\u0120immun": 16217, "\u0120persistent": 16218, "ACH": 16219, "Russian": 16220, "mans": 16221, "\u0120hind": 16222, "\u0120photography": 16223, "\u00c2\u00a9": 16224, "\u0120hug": 16225, "\u0120107": 16226, "\u0120Hence": 16227, "iots": 16228, "udeau": 16229, "\u0120subsidies": 16230, "\u0120routinely": 16231, "\u0120Device": 16232, "itic": 16233, "\u0120disgust": 16234, "lander": 16235, "\u01201940": 16236, "\u0120assignment": 16237, "\u0120Besides": 16238, "wick": 16239, "\u0120Dust": 16240, "usc": 16241, "structed": 16242, "111": 16243, "develop": 16244, "\u0120fond": 16245, "\u0120intersection": 16246, "\u0120dignity": 16247, "\u0120commissioner": 16248, "Without": 16249, "reach": 16250, "\u0120cartoon": 16251, "\u0120scales": 16252, "\u00e3\u0125\u0143": 16253, "FIG": 16254, "\u0120surveys": 16255, "\u0120Indonesia": 16256, "\u0120artwork": 16257, "\u0120unch": 16258, "\u0120cycling": 16259, "unct": 16260, "auer": 16261, "orate": 16262, "\u0120Obviously": 16263, "\u0120characterized": 16264, "feld": 16265, "\u0120affirm": 16266, "\u0120innings": 16267, "\u0120\u00e9": 16268, "\u0120aliens": 16269, "\u0120cloth": 16270, "etooth": 16271, "\u0120Certain": 16272, "\u00c2\u00a7": 16273, "\u0120digest": 16274, "know": 16275, "\u0120XL": 16276, "\u0120predictions": 16277, "\u0120din": 16278, "WAR": 16279, "\u0120aftermath": 16280, "Example": 16281, "\u0120Success": 16282, "\u0120Thr": 16283, "IGN": 16284, "\u0120miner": 16285, "Bus": 16286, "\u0120clarity": 16287, "heimer": 16288, "\u0120OUT": 16289, "\u0120Send": 16290, "\u0120Circle": 16291, "\u0120Diet": 16292, "\u0120pronounced": 16293, "\u0120creators": 16294, "\u0120earthquake": 16295, "attery": 16296, "geons": 16297, "\u0120od": 16298, "\u0120laying": 16299, "orp": 16300, "Ult": 16301, "project": 16302, "\u0120undermin": 16303, "\u0120sequel": 16304, "Sam": 16305, "\u0120Darkness": 16306, "\u0120reception": 16307, "bull": 16308, "YS": 16309, "\u0120Vir": 16310, "\u0120sequences": 16311, "\u0120Coin": 16312, "\u0120outfit": 16313, "\u0120Wait": 16314, "119": 16315, "\u0120delivers": 16316, "......": 16317, "\u0120blown": 16318, "\u0120Esc": 16319, "\u0120Math": 16320, "perm": 16321, "\u0120Ul": 16322, "\u0120glim": 16323, "\u0120facial": 16324, "\u0120greenhouse": 16325, "\u0120tokens": 16326, "/-": 16327, "\u0120Annual": 16328, "\u0120ONE": 16329, "\u0120teenage": 16330, "\u0120Physical": 16331, "\u0120Lang": 16332, "\u0120Celt": 16333, "\u0120sued": 16334, "ividually": 16335, "\u0120patience": 16336, "chair": 16337, "regular": 16338, "\u0120aug": 16339, "inv": 16340, "except": 16341, "\u0120Lil": 16342, "\u0120nest": 16343, "fd": 16344, "sum": 16345, "\u0120Chase": 16346, "Russia": 16347, "\u0120Jennifer": 16348, "\u0120offseason": 16349, "Overall": 16350, "Fore": 16351, "\u0120riot": 16352, "Aud": 16353, "former": 16354, "\u0120defenders": 16355, "\u0120CT": 16356, "iotic": 16357, "ribly": 16358, "\u0120automated": 16359, "\u0120penis": 16360, "\u0120insist": 16361, "\u0120diagram": 16362, "\u0120SQL": 16363, "\u0120Garc": 16364, "\u0120witch": 16365, "client": 16366, "ierra": 16367, "ambers": 16368, "\u0120recount": 16369, "far": 16370, "Very": 16371, "osterone": 16372, "\u0120appreciated": 16373, "\u0120Perfect": 16374, "Section": 16375, "\u0120doses": 16376, "ocaust": 16377, "\u0120costly": 16378, "\u0120grams": 16379, "\u0120Shi": 16380, "\u0120wrestling": 16381, "\u01201971": 16382, "\u0120trophy": 16383, "\u0120nerve": 16384, "\u0120Kaz": 16385, "\u0120Experience": 16386, "\u0120pledged": 16387, "\u0120playback": 16388, "\u0120creativity": 16389, "bye": 16390, "\u0120attackers": 16391, "\u0120holders": 16392, "\u0120Coach": 16393, "\u0120PhD": 16394, "\u0120transfers": 16395, "\u0120colored": 16396, "\u0120Hindu": 16397, "\u0120drown": 16398, "\u0120listened": 16399, "\u0120WA": 16400, "iasm": 16401, "PO": 16402, "\u0120appealing": 16403, "\u0120disclosed": 16404, "\u0120Chicken": 16405, "agging": 16406, "\u0120pleaded": 16407, "\u0120navigation": 16408, "\u0120Returns": 16409, "\u0120[[": 16410, "ROR": 16411, "EA": 16412, "\u0120photographer": 16413, "\u0120Rider": 16414, "ippers": 16415, "\u0120slice": 16416, "\u0120erect": 16417, "\u0120hed": 16418, "issance": 16419, "\u0120Vikings": 16420, "urious": 16421, "\u0120appet": 16422, "oubtedly": 16423, "Child": 16424, "\u0120authentic": 16425, "oos": 16426, "\u0120Making": 16427, "\u0120announcing": 16428, "\u0120bod": 16429, "\u0120meter": 16430, "\u0120Nine": 16431, "\u0120Rogue": 16432, "\u0120workforce": 16433, "\u0120renewed": 16434, "\u0120organisations": 16435, "acs": 16436, "PLE": 16437, "Short": 16438, "\u0120compounds": 16439, "\u0120Visit": 16440, "\u0120envelop": 16441, "earth": 16442, "\u0120supportive": 16443, "ggle": 16444, "\u0120Brussels": 16445, "\u0120Guild": 16446, "Create": 16447, "REL": 16448, "\u0120averaged": 16449, "\u01201969": 16450, "riages": 16451, "\u0120lengthy": 16452, "\u0120forgot": 16453, "Okay": 16454, "\u0120Erd": 16455, "\u0120dealer": 16456, "\u0120recession": 16457, "DD": 16458, "\u0120desperately": 16459, "\u0120hunger": 16460, "\u0120sticks": 16461, "\u0120mph": 16462, "\u0120Faith": 16463, "\u0120intentionally": 16464, "\u0120demol": 16465, "ueller": 16466, "\u0120Sale": 16467, "\u0120debris": 16468, "spring": 16469, "\u0120leap": 16470, ">>>>": 16471, "\u0120containers": 16472, "selling": 16473, "ranean": 16474, "attering": 16475, "\u0120commented": 16476, "\u0120CM": 16477, "onut": 16478, "\u0120woods": 16479, "especially": 16480, "\u0120organize": 16481, "ivic": 16482, "\u0120Woods": 16483, "anga": 16484, "squ": 16485, "\u0120maj": 16486, "amon": 16487, "\u0120axis": 16488, "\u01201974": 16489, "\u0120Denmark": 16490, "\u0120warrior": 16491, "\u0120Pand": 16492, "\u0120outlined": 16493, "\u0120BO": 16494, "insula": 16495, "zilla": 16496, "ebook": 16497, "\u0120dare": 16498, "\u0120searched": 16499, "\u0120navigate": 16500, "Sn": 16501, "writing": 16502, "\u0120united": 16503, "Japan": 16504, "\u0120Hebrew": 16505, "\u0120flame": 16506, "\u0120relies": 16507, "\u0120catching": 16508, "\u0120Sho": 16509, "\u0120imprisonment": 16510, "\u0120pockets": 16511, "\u0120closure": 16512, "\u0120Fam": 16513, "tim": 16514, "adequ": 16515, "Activity": 16516, "\u0120recruiting": 16517, "\u0120WATCH": 16518, "\u0120Argentina": 16519, "dest": 16520, "\u0120apologize": 16521, "oro": 16522, "\u0120lacks": 16523, "\u0120tuned": 16524, "\u0120Griffin": 16525, "\u0120infamous": 16526, "\u0120celebrity": 16527, "sson": 16528, "\u0120----------------------------------------------------------------": 16529, "\u0120Isis": 16530, "\u0120Display": 16531, "\u0120credibility": 16532, "\u0120economies": 16533, "\u0120headline": 16534, "\u0120Cowboys": 16535, "\u0120indef": 16536, "\u0120lately": 16537, "\u0120incentives": 16538, "button": 16539, "\u0120Mob": 16540, "Aut": 16541, "\u0120resigned": 16542, "\u0120Om": 16543, "camp": 16544, "\u0120profiles": 16545, "\u0120schemes": 16546, "olphins": 16547, "ayed": 16548, "Clinton": 16549, "enh": 16550, "\u0120Yahoo": 16551, "\u0120abst": 16552, "\u0120ank": 16553, "suits": 16554, "\u0120wished": 16555, "\u0120Marco": 16556, "udden": 16557, "\u0120sphere": 16558, "\u0120Bishop": 16559, "\u0120incorporated": 16560, "\u0120Plant": 16561, "114": 16562, "\u0120hated": 16563, "pic": 16564, "\u0120donate": 16565, "\u0120lined": 16566, "\u0120beans": 16567, "\u0120stealing": 16568, "\u0120costume": 16569, "\u0120sheriff": 16570, "\u0120forty": 16571, "\u0120intact": 16572, "\u0120adapted": 16573, "\u0120travelling": 16574, "bart": 16575, "\u0120nicely": 16576, "\u0120dried": 16577, "\u0120scal": 16578, "osity": 16579, "NOTE": 16580, "\u0120Bh": 16581, "\u0120Broncos": 16582, "\u0120Ign": 16583, "\u0120intimate": 16584, "\u0120chemistry": 16585, "\u0120optimal": 16586, "Deb": 16587, "\u0120Generation": 16588, "\u0120],": 16589, "ichi": 16590, "\u0120Wii": 16591, "\u0120YOUR": 16592, "ventions": 16593, "Write": 16594, "\u0120popul": 16595, "unning": 16596, "\u0120Wor": 16597, "Vol": 16598, "\u0120queen": 16599, "heads": 16600, "KK": 16601, "\u0120analyze": 16602, "opic": 16603, "earchers": 16604, "\u0120dot": 16605, "legraph": 16606, "astically": 16607, "\u0120upgrades": 16608, "\u0120cares": 16609, "\u0120extending": 16610, "\u0120freeze": 16611, "\u0120inability": 16612, "\u0120organs": 16613, "\u0120pretend": 16614, "\u0120outlet": 16615, "113": 16616, "olan": 16617, "\u0120Mall": 16618, "uling": 16619, "talk": 16620, "\u0120expressing": 16621, "\u0120Always": 16622, "\u0120Begin": 16623, "files": 16624, "\u0120licenses": 16625, "%%": 16626, "\u0120Mitt": 16627, "\u0120filters": 16628, "\u0120Milwaukee": 16629, "GN": 16630, "\u0120unfold": 16631, "Mo": 16632, "\u0120nutrition": 16633, "ppo": 16634, "Bo": 16635, "\u0120founding": 16636, "\u0120undermine": 16637, "\u0120easiest": 16638, "\u0120Czech": 16639, "\u0120Mack": 16640, "\u0120sexuality": 16641, "\u0120Nixon": 16642, "Win": 16643, "\u0120Arn": 16644, "\u0120Kin": 16645, "\u00e3\u0124\u00a3": 16646, "icer": 16647, "\u0120fortun": 16648, "\u0120surfaces": 16649, "aghd": 16650, "\u0120carriers": 16651, "\u0120PART": 16652, "\u0120Tib": 16653, "\u0120interval": 16654, "\u0120frustrating": 16655, "\u0120Ship": 16656, "\u0120Armed": 16657, "ffe": 16658, "\u0120boats": 16659, "\u0120Abraham": 16660, "inis": 16661, "\u0120suited": 16662, "thread": 16663, "iov": 16664, "abul": 16665, "\u0120Venezuela": 16666, "\u0120tom": 16667, "super": 16668, "\u0120castle": 16669, "although": 16670, "ioxide": 16671, "eches": 16672, "\u0120evolutionary": 16673, "\u0120negotiate": 16674, "\u0120confronted": 16675, "Remember": 16676, "\u0120170": 16677, "Such": 16678, "\u0120911": 16679, "mult": 16680, "\u0120Abyss": 16681, "urry": 16682, "kees": 16683, "spec": 16684, "\u0120Barbara": 16685, "\u0120belonging": 16686, "\u0120villain": 16687, "istani": 16688, "\u0120accountable": 16689, "\u0120portions": 16690, "\u0120Decl": 16691, "Ur": 16692, "\u0120Kate": 16693, "gre": 16694, "\u0120magazines": 16695, "UCK": 16696, "\u0120regulate": 16697, "omon": 16698, "\u0120Almost": 16699, "\u0120overview": 16700, "\u0120scram": 16701, "\u0120loot": 16702, "\u0120Fitz": 16703, "\u0120characteristic": 16704, "\u0120Snake": 16705, "say": 16706, "\u0120Rico": 16707, "\u0120trait": 16708, "\u0120Joined": 16709, "aucus": 16710, "\u0120adaptation": 16711, "\u0120Airlines": 16712, "\u0120archae": 16713, "\u0120Ide": 16714, "\u0120bikes": 16715, "\u0120literary": 16716, "\u0120influences": 16717, "\u0120Used": 16718, "Creat": 16719, "\u0120plea": 16720, "\u0120Defence": 16721, "\u0120Assass": 16722, "\u0120pond": 16723, "ULT": 16724, ")\"": 16725, "\u0120evaluated": 16726, "\u0120obtaining": 16727, "\u0120demographic": 16728, "\u0120vigil": 16729, "aley": 16730, "\u0120spouse": 16731, "\u0120Seahawks": 16732, "respons": 16733, "\u0120Belt": 16734, "umatic": 16735, "\u0120rises": 16736, "runner": 16737, "\u0120Michelle": 16738, "\u0120potent": 16739, "race": 16740, "\u0120PAC": 16741, "Find": 16742, "olesterol": 16743, "ISS": 16744, "\u0120Introduced": 16745, "resses": 16746, "ignment": 16747, "Os": 16748, "\u0120Tu": 16749, "\u0120Dex": 16750, "icides": 16751, "\u0120sparked": 16752, "\u0120Laura": 16753, "\u0120Bryant": 16754, "\u0120smiling": 16755, "\u0120Nexus": 16756, "\u0120defendants": 16757, "\u0120Catal": 16758, "\u0120dishes": 16759, "shaped": 16760, "\u0120prolong": 16761, "mt": 16762, "($": 16763, "\u00e3\u0122\u0124": 16764, "\u0120calculations": 16765, "\u0120Same": 16766, "\u0120piv": 16767, "HH": 16768, "\u0120cancelled": 16769, "\u0120grin": 16770, "\u0120territories": 16771, "istically": 16772, "Come": 16773, "\u0120Parent": 16774, "Project": 16775, "\u0120neglig": 16776, "\u0120Privacy": 16777, "\u0120ammo": 16778, "LECT": 16779, "olutely": 16780, "\u0120Epic": 16781, "\u0120misunder": 16782, "wal": 16783, "April": 16784, "mos": 16785, "pathy": 16786, "\u0120Carson": 16787, "\u0120albums": 16788, "\u0120Easy": 16789, "\u0120pistol": 16790, "<<": 16791, "\u0120\\(": 16792, "target": 16793, "help": 16794, "\u0120interpre": 16795, "conscious": 16796, "\u0120Housing": 16797, "\u0120Joint": 16798, "127": 16799, "\u0120beers": 16800, "science": 16801, "\u0120Firefox": 16802, "effective": 16803, "\u0120Cabin": 16804, "\u0120Okay": 16805, "\u0120Applic": 16806, "\u0120spacecraft": 16807, "\u0120SR": 16808, "vet": 16809, "\u0120Strange": 16810, "SB": 16811, "\u0120corps": 16812, "iberal": 16813, "efficient": 16814, "\u0120prevalence": 16815, "\u0120economists": 16816, "118": 16817, "Thread": 16818, "ordable": 16819, "ODE": 16820, "\u0120Cant": 16821, "=-=-": 16822, "ifiable": 16823, "\u0120Around": 16824, "\u0120pole": 16825, "\u0120willingness": 16826, "CLA": 16827, "\u0120Kid": 16828, "\u0120complement": 16829, "\u0120scattered": 16830, "\u0120inmates": 16831, "\u0120bleeding": 16832, "every": 16833, "\u0120queue": 16834, "\u0120Train": 16835, "\u0120hij": 16836, "\u0120melee": 16837, "pleted": 16838, "\u0120digit": 16839, "\u0120gem": 16840, "official": 16841, "\u0120lifting": 16842, "\u00d0\u00b5": 16843, "Requ": 16844, "itutes": 16845, "\u0120packaging": 16846, "\u0120Workers": 16847, "hran": 16848, "\u0120Lebanon": 16849, "olesc": 16850, "\u0120punished": 16851, "\u0120Juan": 16852, "\u0120jam": 16853, "\u0120Document": 16854, "\u0120mapping": 16855, "icates": 16856, "\u0120inevitably": 16857, "\u0120vanilla": 16858, "\u0120Ton": 16859, "\u0120watches": 16860, "\u0120leagues": 16861, "\u0120initiated": 16862, "degree": 16863, "portion": 16864, "\u0120recalls": 16865, "\u0120ruin": 16866, "\u0120melt": 16867, "IAN": 16868, "\u0120hem": 16869, "Exp": 16870, "\u0120baking": 16871, "\u0120Colomb": 16872, "atible": 16873, "\u0120radius": 16874, "plug": 16875, "\u0120IF": 16876, "etically": 16877, "\u0120fict": 16878, "HER": 16879, "\u0120Tap": 16880, "atinum": 16881, "\u0120ink": 16882, "\u0120coh": 16883, "\u0120Wizard": 16884, "both": 16885, "tex": 16886, "\u0120spends": 16887, "\u0120Currently": 16888, "\u0120Pit": 16889, "\u0120neurons": 16890, "ignt": 16891, "\u0120rall": 16892, "\u0120buses": 16893, "building": 16894, "\u0120adjustments": 16895, "\u0120cried": 16896, "iblical": 16897, "atted": 16898, "\u0120Zion": 16899, "\u0120Matter": 16900, "\u0120meditation": 16901, "\u0120Dennis": 16902, "\u0120ours": 16903, "\u0120Tab": 16904, "\u0120rankings": 16905, "ortal": 16906, "\u0120advers": 16907, "\u0120surrender": 16908, "\u0120Gob": 16909, "cium": 16910, "omas": 16911, "imeter": 16912, "\u0120multiplayer": 16913, "\u0120heroin": 16914, "\u0120optimistic": 16915, "\u0120indicator": 16916, "\u0120Brig": 16917, "\u0120grocery": 16918, "\u0120applicant": 16919, "\u0120Rocket": 16920, "vid": 16921, "Exception": 16922, "pent": 16923, "\u0120organizing": 16924, "\u0120encounters": 16925, "\u0120TOD": 16926, "\u0120jewel": 16927, "Save": 16928, "\u0120Christie": 16929, "\u0120heating": 16930, "\u0120lazy": 16931, "\u0120CP": 16932, "\u0120cousin": 16933, "Config": 16934, "\u0120regener": 16935, "\u0120nearest": 16936, "\u0120achieving": 16937, "ENS": 16938, "throw": 16939, "\u0120Richmond": 16940, "antle": 16941, "2002": 16942, "\u0120anten": 16943, "bird": 16944, "133": 16945, "\u0120narc": 16946, "raint": 16947, "unny": 16948, "\u0120Hispanic": 16949, "ournaments": 16950, "\u0120prophe": 16951, "\u0120Thailand": 16952, "\u0120Ti": 16953, "\u0120injection": 16954, "\u0120inherit": 16955, "ravis": 16956, "\u0120medi": 16957, "\u0120whoever": 16958, "\u0120DEBUG": 16959, "GP": 16960, "\u0120Hud": 16961, "Card": 16962, "prom": 16963, "\u0120por": 16964, "\u0120overhead": 16965, "Law": 16966, "\u0120violate": 16967, "\u0120heated": 16968, "\u0120descriptions": 16969, "\u0120achievements": 16970, "\u0120Beer": 16971, "\u0120Quant": 16972, "Was": 16973, "\u0120eighth": 16974, "\u0120Iv": 16975, "\u0120specialized": 16976, "UPDATE": 16977, "\u0120Delta": 16978, "Pop": 16979, "Jul": 16980, "\u0120Ask": 16981, "ophy": 16982, "\u0120newsletters": 16983, "\u0120Tool": 16984, "\u0120gard": 16985, "\u0120Confeder": 16986, "\u0120GMT": 16987, "\u0120Abbott": 16988, "\u0120immunity": 16989, "\u0120VM": 16990, "Islam": 16991, "\u0120implicit": 16992, "wd": 16993, "\u01201944": 16994, "ravity": 16995, "ometric": 16996, "\u0120surviving": 16997, "urai": 16998, "\u0120Prison": 16999, "\u0120rust": 17000, "\u0120Sketch": 17001, "\u0120bees": 17002, "\u0120Theory": 17003, "\u0120merit": 17004, "Tex": 17005, "chat": 17006, "\u0120mim": 17007, "\u0120paste": 17008, "\u0120Koch": 17009, "\u0120ignorance": 17010, "\u0120Shoot": 17011, "\u0120basement": 17012, "United": 17013, "\u0120Advis": 17014, "height": 17015, "\u0120foster": 17016, "\u0120detain": 17017, "information": 17018, "\u0120neural": 17019, "';": 17020, "\u0120proves": 17021, "allery": 17022, "\u0120invitation": 17023, "umbers": 17024, "\u0120cattle": 17025, "\u0120bicycle": 17026, "zi": 17027, "\u0120consultant": 17028, "\u0120apology": 17029, "\u0120Tiger": 17030, "\u0120123": 17031, "999": 17032, "\u0120individually": 17033, "rt": 17034, "igion": 17035, "\u0120Brazilian": 17036, "\u0120disturb": 17037, "\u0120entrepreneurs": 17038, "\u0120forests": 17039, "cerpt": 17040, "plates": 17041, "pher": 17042, "clipse": 17043, "\u0120twitter": 17044, "\u0120acids": 17045, "ographical": 17046, "hum": 17047, "\u0120Bald": 17048, "ifully": 17049, "\u0120compiler": 17050, "\u0120DA": 17051, "\u0120donor": 17052, "asi": 17053, "\u0120tribal": 17054, "lash": 17055, "\u0120Config": 17056, "\u0120applicants": 17057, "\u0120salaries": 17058, "135": 17059, "Putin": 17060, "\u0120Focus": 17061, "irs": 17062, "\u0120misconduct": 17063, "\u0120Haz": 17064, "\u0120eaten": 17065, "Mobile": 17066, "Muslim": 17067, "\u0120Marcus": 17068, "viol": 17069, "\u0120favorable": 17070, "\u0120stub": 17071, "adin": 17072, "\u0120Hob": 17073, "\u0120faithful": 17074, "\u0120electronics": 17075, "\u0120vacuum": 17076, "wait": 17077, "backed": 17078, "economic": 17079, "dist": 17080, "\u0120tenure": 17081, "\u0120sincere": 17082, "\u0120Together": 17083, "\u0120Wave": 17084, "\u0120progression": 17085, "\u0120denying": 17086, "\u0120distress": 17087, "braska": 17088, "third": 17089, "\u0120mixing": 17090, "\u0120colonial": 17091, "\u0120privately": 17092, "\u0120unrest": 17093, "aternity": 17094, "\u0120premises": 17095, "anti": 17096, "gregation": 17097, "\u0120licence": 17098, "\u0120Hind": 17099, "\u0120Samuel": 17100, "\u0120convincing": 17101, "\u0120Ace": 17102, "\u0120Rust": 17103, "\u0120Netanyahu": 17104, "\u0120handles": 17105, "\u0120Patch": 17106, "oriented": 17107, "aho": 17108, "\u0120Gonz": 17109, "\u0120hackers": 17110, "claimer": 17111, "\u0120customs": 17112, "\u0120Gran": 17113, "fighters": 17114, "\u0120luc": 17115, "\u0120manuscript": 17116, "arenthood": 17117, "\u0120devil": 17118, "\u0120warriors": 17119, "\u0120offenders": 17120, "William": 17121, "\u0120holidays": 17122, "\u0120nightmare": 17123, "\u0120lever": 17124, "ifferent": 17125, "Stat": 17126, "\u0120exhibition": 17127, "puted": 17128, "\u0120Pure": 17129, "\u0120alpha": 17130, "\u0120enthusiasm": 17131, "\u0120Representatives": 17132, "EAR": 17133, "\u0120Typ": 17134, "\u0120wheat": 17135, "\u0120Alf": 17136, "\u0120correction": 17137, "\u0120evangel": 17138, "ATT": 17139, "Miss": 17140, "\u0120soup": 17141, "\u0120implied": 17142, "param": 17143, "\u0120sexy": 17144, "\u0120Lux": 17145, "\u0120republic": 17146, "patch": 17147, "ablish": 17148, "\u0120icons": 17149, "\u0120fathers": 17150, "\u0120GET": 17151, "\u0120Carib": 17152, "\u0120regulated": 17153, "\u0120Cohen": 17154, "\u0120Bobby": 17155, "\u0120ner": 17156, "\u0120bent": 17157, "ventory": 17158, "\u0120Along": 17159, "\u0120EST": 17160, "\u0120Wallace": 17161, "\u0120murders": 17162, "rise": 17163, "kell": 17164, "\u0120Commonwealth": 17165, "\u0120nasty": 17166, "eta": 17167, "\u0120MIT": 17168, "\u0120administered": 17169, "\u0120genuinely": 17170, "Editor": 17171, "nick": 17172, "\u0120hydro": 17173, "********************************": 17174, "\u0120Ble": 17175, "\u0120fines": 17176, "\u0120gorge": 17177, "ausible": 17178, "rh": 17179, "\u0120apple": 17180, "mentioned": 17181, "\u0120rope": 17182, "otyp": 17183, "HR": 17184, "\u0120disappointing": 17185, "\u0120cage": 17186, "nik": 17187, "\u0120doubts": 17188, "\u0120FREE": 17189, "prints": 17190, "\u0120MUST": 17191, "\u0120vendors": 17192, "\u0120Inqu": 17193, "\u0120liberals": 17194, "\u0120contractor": 17195, "\u0120upside": 17196, "children": 17197, "\u0120tricky": 17198, "\u0120regulators": 17199, "charged": 17200, "liter": 17201, "\u0120***": 17202, "\u0120rebell": 17203, "lang": 17204, "\u0120locals": 17205, "\u0120physicians": 17206, "\u0120hey": 17207, "arse": 17208, "tm": 17209, "\u0120Lex": 17210, "\u0120behavioral": 17211, "successful": 17212, "FX": 17213, "\u0120brick": 17214, "ovic": 17215, "\u0120conform": 17216, "\u0120reviewing": 17217, "\u0120insights": 17218, "\u0120biology": 17219, "\u0120Remove": 17220, "\u0120Extra": 17221, "\u0120committing": 17222, "induced": 17223, "ignty": 17224, "igm": 17225, "\u0120atomic": 17226, "Common": 17227, "\u0120EM": 17228, "\u0120Pere": 17229, "\u0120Items": 17230, "eh": 17231, "\u0120preserved": 17232, "\u0120Hood": 17233, "\u0120prisoner": 17234, "\u0120bankruptcy": 17235, "\u0120gren": 17236, "ushes": 17237, "\u0120exploitation": 17238, "\u0120signatures": 17239, "\u0120finan": 17240, "],\"": 17241, "\u0120MR": 17242, "\u0120meg": 17243, "remlin": 17244, "\u0120musicians": 17245, "\u0120selecting": 17246, "\u0120examining": 17247, "INK": 17248, "lated": 17249, "Hi": 17250, "\u0120artic": 17251, "\u0120pets": 17252, "\u0120impair": 17253, "\u0120MAN": 17254, "\u0120tablets": 17255, "include": 17256, "Range": 17257, "\u0120caut": 17258, "\u0120logs": 17259, "\u0120mounting": 17260, "\u0120unaware": 17261, "\u0120dynamics": 17262, "\u0120Palestine": 17263, "\u0120Quarter": 17264, "\u0120Purple": 17265, "\u0120ma": 17266, "\u0120Import": 17267, "\u0120collections": 17268, "ciation": 17269, "\u0120successor": 17270, "\u0120clone": 17271, "\u0120aiming": 17272, "\u0120possessed": 17273, "\u0120sticking": 17274, "\u0120shaking": 17275, "\u0120locate": 17276, "\u0120Hockey": 17277, "Turn": 17278, "170": 17279, "\u0120fifteen": 17280, "\u0120Harrison": 17281, "\u0120continuously": 17282, "\u0120TC": 17283, "\u0120Valent": 17284, "\u0120Rescue": 17285, "\u0120bypass": 17286, "amount": 17287, "\u0120mast": 17288, "\u0120protects": 17289, "\u0120artistic": 17290, "\u0120sometime": 17291, "\u0120shoe": 17292, "\u0120shouted": 17293, "ificant": 17294, "etitive": 17295, "\u0120Register": 17296, "\u0120Jin": 17297, "\u0120concentrated": 17298, "lington": 17299, "onies": 17300, "\u0120generator": 17301, "yrim": 17302, "\u0120Armen": 17303, "\u0120clearing": 17304, "ido": 17305, "\u0120TW": 17306, "alph": 17307, "\u0120ladies": 17308, "Hard": 17309, "\u0120dialog": 17310, "\u0120inputs": 17311, "\u00e6\u013e": 17312, "\u0120poses": 17313, "\u0120slots": 17314, "\u0120Premium": 17315, "\u0120leaks": 17316, "\u0120bosses": 17317, "\u0120113": 17318, "course": 17319, "Acc": 17320, "\u0120Newton": 17321, "\u0120Austria": 17322, "\u0120Mage": 17323, "\u0120teaches": 17324, "abad": 17325, "\u0120wears": 17326, "\u0120cyl": 17327, "\u0120curse": 17328, "\u0120Sales": 17329, "\u0120Wings": 17330, "\u0120psy": 17331, "\u0120gaps": 17332, "\u0120Iceland": 17333, "\u0120Pinterest": 17334, "\u0120landlord": 17335, "\u0120definitions": 17336, "\u0120Ker": 17337, "\u0120sufficiently": 17338, "\u0120Pence": 17339, "\u0120Architect": 17340, "\u0120surpass": 17341, "\u0120114": 17342, "\u0120superhero": 17343, "\u0120Disease": 17344, "\u0120priests": 17345, "\u0120Culture": 17346, "\u0120definitive": 17347, "\u0120secretly": 17348, "\u0120Dance": 17349, "install": 17350, "chief": 17351, "\u0120Jessica": 17352, "Would": 17353, "Updated": 17354, "\u0120locker": 17355, "\u0120Kay": 17356, "\u0120memorial": 17357, "\u00e8\u00a6": 17358, "fat": 17359, "\u0120disgu": 17360, "\u0120flavors": 17361, "\u0120Baseball": 17362, "\u0120Resistance": 17363, "\u0120kicks": 17364, "\u0120env": 17365, "\u0120teenagers": 17366, "Dark": 17367, "\u0120CAR": 17368, "\u0120halt": 17369, "\u0120LG": 17370, "\u0120Gabriel": 17371, "\u0120fever": 17372, "\u0120satur": 17373, "\u0120mall": 17374, "\u0120affiliate": 17375, "\u0120Sleep": 17376, "\u0120Specific": 17377, "\u0120Vel": 17378, "\u0120jar": 17379, "\u0120Sacred": 17380, "\u0120Edwards": 17381, "\u0120ACL": 17382, "\u0120retained": 17383, "\u0120Giant": 17384, "\u0120limitation": 17385, "inces": 17386, "\u0120refusal": 17387, "\u0120Tale": 17388, "\u0120Butler": 17389, "\u0120accidents": 17390, "\u0120CSS": 17391, "\u0120imported": 17392, "\u0120Copy": 17393, "\u00ce\u00b1": 17394, "ERT": 17395, "zel": 17396, "\u0120divisions": 17397, "hots": 17398, "\u0120Alb": 17399, "\u0120DS": 17400, "Loader": 17401, "Washington": 17402, "atisf": 17403, "\u0120Creative": 17404, "\\.": 17405, "\u0120Autom": 17406, "redict": 17407, "\u0120receptor": 17408, "\u0120Carlos": 17409, "Method": 17410, "oka": 17411, "\u0120malicious": 17412, "\u0120stepping": 17413, ",[": 17414, "\u0120Dad": 17415, "\u0120attraction": 17416, "\u0120Effects": 17417, "\u0120Pirate": 17418, "\u0120Cer": 17419, "\u0120Industry": 17420, "\u0120Rud": 17421, "\u0120charter": 17422, "\u0120dining": 17423, "\u0120insists": 17424, "\u0120configure": 17425, "\u0120(#": 17426, "\u0120Simple": 17427, "\u0120Scroll": 17428, "UTC": 17429, "175": 17430, "\u0120Kon": 17431, "\u0120marketplace": 17432, "\u0120\u00e3\u0124": 17433, "\u0120refres": 17434, "\u0120gates": 17435, "erred": 17436, "\u0120Pod": 17437, "\u0120behave": 17438, "Frank": 17439, "node": 17440, "\u0120endorsed": 17441, "hett": 17442, "asive": 17443, "\u0120Homeland": 17444, "\u0120rides": 17445, "\u0120Leave": 17446, "erness": 17447, "\u0120flooding": 17448, "AFP": 17449, "\u0120risen": 17450, "\u0120continually": 17451, "\u0120unanim": 17452, "\u0120Contract": 17453, "\u0120Pas": 17454, "\u0120guided": 17455, "\u0120Chile": 17456, "bd": 17457, "\u0120succ": 17458, "ptic": 17459, "\u0120committees": 17460, "\u0120Luther": 17461, "\u0120Anyone": 17462, "\u0120sab": 17463, "124": 17464, "\u0120pixel": 17465, "\u0120Bak": 17466, "\u0120Tag": 17467, "\u0120Bennett": 17468, "Enter": 17469, "small": 17470, "\u0120Presidential": 17471, "\u0120pul": 17472, "\u0120contrace": 17473, "archive": 17474, "\u0120coastal": 17475, "\u0120Kids": 17476, "192": 17477, "\u00e2\u0122\u00b2": 17478, "icky": 17479, "INGTON": 17480, "\u0120wolf": 17481, "\u0120Stalin": 17482, "Tur": 17483, "idget": 17484, "amas": 17485, "\u0120Unless": 17486, "\u0120sponsor": 17487, "\u0120morph": 17488, "\u0120Choose": 17489, "\u0120runner": 17490, "\u0120unbel": 17491, "\u0120mud": 17492, "\u0120Mana": 17493, "\u0120dubbed": 17494, "\u0120godd": 17495, "urers": 17496, "window": 17497, "\u0120relied": 17498, "\u0120celebrating": 17499, "osc": 17500, "\u0120135": 17501, "\u0120lobbying": 17502, "\u0120incomplete": 17503, "\u0120restriction": 17504, "\u0120incap": 17505, "itus": 17506, "\u0120expectation": 17507, "\u0120Apollo": 17508, "\u0120intens": 17509, "\u0120sync": 17510, "GH": 17511, "\u0120manipulation": 17512, "BY": 17513, "\u0120spear": 17514, "\u0120breasts": 17515, "\u0120volcan": 17516, "ilia": 17517, "Material": 17518, "\u0120formats": 17519, "\u0120Bast": 17520, "\u0120parliamentary": 17521, "\u0120snake": 17522, "\u0120servants": 17523, "\u0120Trudeau": 17524, "\u0120Grim": 17525, "\u0120Arabic": 17526, "\u0120SCP": 17527, "\u0120Boys": 17528, "station": 17529, "\u0120prospective": 17530, "orde": 17531, "initialized": 17532, "\u0120bored": 17533, "ABLE": 17534, "\u0120accessed": 17535, "\u0120taxi": 17536, "\u0120Shell": 17537, "aiden": 17538, "ursed": 17539, "inates": 17540, "\u0120Insurance": 17541, "\u0120Pete": 17542, "September": 17543, "650": 17544, "\u0120adventures": 17545, "\u0120Cover": 17546, "\u0120tribute": 17547, "\u0120sketch": 17548, "\u0120empower": 17549, "\u0120\u00d8": 17550, "\u0120Glenn": 17551, "\u0120Daw": 17552, "=\\\"": 17553, "\u0120Politics": 17554, "\u0120guides": 17555, "\u0120dioxide": 17556, "\u0120Gore": 17557, "\u0120Bright": 17558, "\u0120Sierra": 17559, "\u0120valued": 17560, "cond": 17561, "\u0120pointer": 17562, "Select": 17563, "\u0120risky": 17564, "\u0120absorb": 17565, "images": 17566, "\u0120refuses": 17567, "\u0120bonuses": 17568, "___": 17569, "\u0120hilar": 17570, "\u0120Features": 17571, "220": 17572, "\u0120Collector": 17573, "Foot": 17574, "\u01201964": 17575, "culus": 17576, "\u0120dawn": 17577, "\u0120workout": 17578, "\u0120LO": 17579, "\u0120philosophical": 17580, "\u0120Sandy": 17581, "\u0120Youth": 17582, "\u0120liable": 17583, "Af": 17584, "blue": 17585, "\u0120overturn": 17586, "lessness": 17587, "\u0120Tribune": 17588, "\u0120Ing": 17589, "\u0120factories": 17590, "\u0120catches": 17591, "\u0120prone": 17592, "\u0120matrix": 17593, "\u0120login": 17594, "\u0120inacc": 17595, "\u0120exert": 17596, "sys": 17597, "\u0120needle": 17598, "\u0120Qur": 17599, "\u0120notified": 17600, "oulder": 17601, "tx": 17602, "\u0120reminds": 17603, "\u0120publishers": 17604, "\u0120nort": 17605, "\u0120git": 17606, "\u0120flies": 17607, "\u0120Emily": 17608, "\u0120flowing": 17609, "\u0120Alien": 17610, "\u0120Strateg": 17611, "\u0120hardest": 17612, "\u0120modification": 17613, "API": 17614, "\u0120MY": 17615, "\u0120crashes": 17616, "stairs": 17617, "number": 17618, "\u0120urging": 17619, "channel": 17620, "\u0120Falcon": 17621, "\u0120inhabitants": 17622, "\u0120terrifying": 17623, "\u0120utilize": 17624, "\u0120banner": 17625, "\u0120cigarettes": 17626, "\u0120senses": 17627, "\u0120Holmes": 17628, "\u0120practition": 17629, "\u0120Phillips": 17630, "otto": 17631, "\u0120compile": 17632, "Model": 17633, "\u0120Ko": 17634, "\u0120[]": 17635, "Americans": 17636, "\u0120Terms": 17637, "\u0120medications": 17638, "\u0120Ana": 17639, "\u0120fundamentally": 17640, "\u0120Notice": 17641, "\u0120weaker": 17642, "\u01200000": 17643, "\u0120garlic": 17644, "\u0120outbreak": 17645, "\u0120economist": 17646, "\u0120Birth": 17647, "\u0120obstacles": 17648, "arcer": 17649, "\u0120Orthodox": 17650, "\u0120placebo": 17651, "\u0120Crew": 17652, "aspberry": 17653, "\u0120Angels": 17654, "\u0120discharge": 17655, "\u0120destructive": 17656, "117": 17657, "\u0120Rising": 17658, "\u0120dairy": 17659, "late": 17660, "\u0120collision": 17661, "\u0120Tigers": 17662, "eanor": 17663, "ocumented": 17664, "\u0120Invalid": 17665, "\u0120dont": 17666, "\u0120Liter": 17667, "\u0120Va": 17668, "\u0120hydrogen": 17669, "\u0120variants": 17670, "\u0120Browns": 17671, "\u01201965": 17672, "\u0120indigenous": 17673, "\u0120trades": 17674, "\u0120remainder": 17675, "\u0120swept": 17676, "\u0120Impact": 17677, "\u0120redist": 17678, "\u0120unint": 17679, "graduate": 17680, "\u00e3\u0125\u0137": 17681, "\u0120WILL": 17682, "\u00e3\u0123\u00ae\u00e7": 17683, "\u0120Critical": 17684, "\u0120fisher": 17685, "\u0120vicious": 17686, "\u0120reversed": 17687, "Year": 17688, "\u0120Sox": 17689, "\u0120shootings": 17690, "\u0120filming": 17691, "\u0120touchdowns": 17692, "aires": 17693, "mel": 17694, "\u0120grandfather": 17695, "\u0120affection": 17696, "ingle": 17697, "\u0120overly": 17698, "Additional": 17699, "\u0120supreme": 17700, "\u0120Grad": 17701, "\u0120sporting": 17702, "\u0120mercy": 17703, "\u0120Brooks": 17704, "ounty": 17705, "\u0120performs": 17706, "\u0120tightly": 17707, "\u0120demons": 17708, "\u0120killings": 17709, "\u0120faction": 17710, "\u0120Nova": 17711, "auts": 17712, "\u0120undoubtedly": 17713, "arin": 17714, "\u0120underway": 17715, "rak": 17716, "\u0120liv": 17717, "\u0120Region": 17718, "\u0120briefing": 17719, "sers": 17720, "cloud": 17721, "\u0120Mik": 17722, "usp": 17723, "\u0120prediction": 17724, "azor": 17725, "\u0120portable": 17726, "\u0120Gand": 17727, "\u0120presenting": 17728, "\u01201080": 17729, "\u00c2\u00bb": 17730, "ushi": 17731, "\u0120Spark": 17732, "thereum": 17733, "\u0120justification": 17734, "\u0120Ny": 17735, "\u0120contractors": 17736, "mingham": 17737, "\u0120Style": 17738, "\u00e5\u0127": 17739, "\u0120Chronicles": 17740, "\u0120Picture": 17741, "\u0120proving": 17742, "\u0120wives": 17743, "sett": 17744, "\u0120molecules": 17745, "\u0120Fairy": 17746, "\u0120consisting": 17747, "\u0120pier": 17748, "alone": 17749, "inition": 17750, "\u0120nucle": 17751, "json": 17752, "\u0120gotta": 17753, "\u0120mobil": 17754, "\u0120verbal": 17755, "arium": 17756, "\u0120monument": 17757, "ucked": 17758, "\u0120256": 17759, "Tech": 17760, "minecraft": 17761, "\u0120Track": 17762, "\u0120tile": 17763, "\u0120compatibility": 17764, "asis": 17765, "\u0120sadd": 17766, "\u0120instructed": 17767, "\u0120Mueller": 17768, "\u0120lethal": 17769, "\u0120hormone": 17770, "\u0120orche": 17771, "else": 17772, "\u0120skelet": 17773, "\u0120entertaining": 17774, "\u0120minimize": 17775, "again": 17776, "\u0120undergo": 17777, "\u0120constraints": 17778, "\u0120cigarette": 17779, "\u0120Islamist": 17780, "\u0120travels": 17781, "\u0120Panthers": 17782, "lings": 17783, "Care": 17784, "\u0120lawsuits": 17785, "uras": 17786, "\u0120cryst": 17787, "\u0120lowered": 17788, "\u0120aerial": 17789, "\u0120combinations": 17790, "\u0120haun": 17791, "\u0120cha": 17792, "\u0120vine": 17793, "\u0120quantities": 17794, "\u0120linking": 17795, "bank": 17796, "\u0120soy": 17797, "Bill": 17798, "\u0120Angela": 17799, "\u0120recipient": 17800, "\u0120Protest": 17801, "\u0120socket": 17802, "\u0120solidarity": 17803, "\u0120\u00e2\u0128": 17804, "mill": 17805, "\u0120varies": 17806, "\u0120Pakistani": 17807, "Dragon": 17808, "\u0120une": 17809, "\u0120horizon": 17810, "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142": 17811, "\u0120provinces": 17812, "\u0120frankly": 17813, "\u0120enacted": 17814, "notes": 17815, "['": 17816, "\u0120192": 17817, "ocracy": 17818, "\u0120endorsement": 17819, "\u0120overtime": 17820, "True": 17821, "Lab": 17822, "licted": 17823, "\u0120DNC": 17824, "\u0120beats": 17825, "\u0120Jamie": 17826, "152": 17827, "\u0120INT": 17828, "Contact": 17829, "\u0120accounted": 17830, "hash": 17831, "\u0120Packers": 17832, "pires": 17833, "\u0120lesbian": 17834, "\u0120amendments": 17835, "\u0120hopeful": 17836, "\u0120Finland": 17837, "\u0120spotlight": 17838, "\u0120configured": 17839, "\u0120troubled": 17840, "\u0120gaze": 17841, "\u0120Calgary": 17842, "\u0120reliability": 17843, "\u0120insurg": 17844, "swer": 17845, "buy": 17846, "\u0120Skin": 17847, "\u0120pixels": 17848, "\u0120handgun": 17849, "\u0120paras": 17850, "\u0120categor": 17851, "\u0120EL": 17852, "\u0120Rex": 17853, "Indeed": 17854, "\u0120kinda": 17855, "\u0120conjunction": 17856, "\u0120Bryan": 17857, "\u0120Manufact": 17858, "yang": 17859, "Plus": 17860, "SQL": 17861, "ishment": 17862, "\u0120dominate": 17863, "\u0120nail": 17864, "\u0120oath": 17865, "\u0120erupt": 17866, "\u0120Fine": 17867, "itbart": 17868, "\u0120Chip": 17869, "\u0120Abd": 17870, "\u0120Nam": 17871, "\u0120buyer": 17872, "\u0120dissent": 17873, "Leaks": 17874, "Contin": 17875, "\u0120rider": 17876, "\u0120Someone": 17877, "\u0120illusion": 17878, "cin": 17879, "\u0120Boeing": 17880, "\u0120inadequ": 17881, "ovation": 17882, "iants": 17883, "\u0120rebuild": 17884, "450": 17885, "\u0120Destiny": 17886, "SW": 17887, "\u0120Till": 17888, "Hit": 17889, "iaz": 17890, "\u0120Bangl": 17891, "achers": 17892, "\u0120Reform": 17893, "\u0120segments": 17894, "\u0120systematic": 17895, "dc": 17896, "\u0120Conservatives": 17897, "\u0120portal": 17898, "hor": 17899, "\u0120Dragonbound": 17900, "\u0120dragged": 17901, "omo": 17902, "\u0120thee": 17903, "advert": 17904, "\u0120Reports": 17905, "\u0120Et": 17906, "\u0120barrels": 17907, "August": 17908, "\u0120comparisons": 17909, "\u0120hex": 17910, "\u0120anthrop": 17911, "\"[": 17912, "borough": 17913, "abi": 17914, "\u0120pictured": 17915, "playing": 17916, "\u0120Address": 17917, "\u0120Mirror": 17918, "Smith": 17919, "\u0120tires": 17920, "\u0120NPR": 17921, "AAAA": 17922, "\u0120classification": 17923, "\u0120Than": 17924, "\u0120Harm": 17925, "\u0120RA": 17926, "\u0120rejection": 17927, "mination": 17928, "\u0120ranged": 17929, "\u0120Falls": 17930, "DI": 17931, "Host": 17932, "\u00e3\u0124\u00b4": 17933, "\u0120Example": 17934, "listed": 17935, "thirds": 17936, "\u0120safegu": 17937, "brand": 17938, "\u0120probable": 17939, "Canada": 17940, "ITION": 17941, "\u0120Qaeda": 17942, "\u0120chick": 17943, "\u0120imports": 17944, "hit": 17945, "loc": 17946, "WW": 17947, "\u0120blew": 17948, "\u0120anytime": 17949, "\u0120wholes": 17950, "iked": 17951, "\u0120calculation": 17952, "create": 17953, "\u0120Ori": 17954, "\u0120upgraded": 17955, "\u0120appar": 17956, "utory": 17957, "\u0120Mol": 17958, "Brit": 17959, "\u0120Jong": 17960, "INAL": 17961, "\u0120Starting": 17962, "\u0120dice": 17963, "urtle": 17964, "\u0120relying": 17965, "closure": 17966, "\u0120profitable": 17967, "\u0120slaughter": 17968, "\u0120Manual": 17969, "caster": 17970, "\u0120\"$": 17971, "\u0120feather": 17972, "\u0120Simply": 17973, "ieves": 17974, "\u0120deterior": 17975, "\u0120PCI": 17976, "\u0120stamp": 17977, "\u0120flaws": 17978, "\u0120shade": 17979, "hammer": 17980, "\u0120passport": 17981, "\u0120conting": 17982, "amel": 17983, "\u0120observers": 17984, "\u0120neglect": 17985, "\u0120RB": 17986, "\u0120Brotherhood": 17987, "\u0120skeptical": 17988, "family": 17989, "usk": 17990, "\u0120emotionally": 17991, "\u00e2\u013b": 17992, "\u0120Beta": 17993, "asonable": 17994, "idity": 17995, "\u0120Mul": 17996, "\u0120kicking": 17997, "\u0120Carm": 17998, "ollah": 17999, "VERTIS": 18000, "\u0120Athen": 18001, "\u0120ladder": 18002, "\u0120Bullet": 18003, "\u00e5\u00a3": 18004, "0001": 18005, "\u0120Wildlife": 18006, "\u0120Mask": 18007, "\u0120Nan": 18008, "Rev": 18009, "\u0120unacceptable": 18010, "legal": 18011, "\u0120crowded": 18012, "agi": 18013, "\u0120Cox": 18014, "je": 18015, "\u0120morality": 18016, "\u0120fuels": 18017, "\u0120cables": 18018, "\u0120mankind": 18019, "\u0120Caribbean": 18020, "\u0120anchor": 18021, "\u0120byte": 18022, "\u0120Often": 18023, "\u0120Oz": 18024, "\u0120crafted": 18025, "\u0120historian": 18026, "\u0120Wu": 18027, "\u0120towers": 18028, "\u0120Citizens": 18029, "\u0120helm": 18030, "\u0120credentials": 18031, "\u0120singular": 18032, "\u0120Jesse": 18033, "\u0120tackles": 18034, "\u0120contempt": 18035, "\u0120afore": 18036, "\u0120Shadows": 18037, "\u0120nil": 18038, "\u0120urgent": 18039, "apple": 18040, "blood": 18041, "\u0120von": 18042, "\u0120offline": 18043, "\u0120breathe": 18044, "\u0120jumps": 18045, "\u0120irrelevant": 18046, "oxic": 18047, "omal": 18048, "important": 18049, "Jim": 18050, "\u0120gloves": 18051, "arming": 18052, "depth": 18053, "\u0120talents": 18054, "ookie": 18055, "\u0120SB": 18056, "\u0120palm": 18057, "uffs": 18058, "esta": 18059, "IGH": 18060, "\u0120canon": 18061, "\u0120Verizon": 18062, "\u0120Ple": 18063, "\u0120coupled": 18064, "velt": 18065, "\u0120fundraising": 18066, "\u0120Getting": 18067, "\u0120DLC": 18068, "\u0120mathematical": 18069, "\u0120HS": 18070, "\u0120Cardinals": 18071, "telling": 18072, "\u0120sponsors": 18073, "\u0120\u00cf": 18074, "\u0120Bulls": 18075, "option": 18076, "\u0120propose": 18077, "\u0120memorable": 18078, "\u0120embraced": 18079, "\u0120declining": 18080, "Health": 18081, "eda": 18082, "\u0120};": 18083, "\u0120spam": 18084, "mile": 18085, "\u0120pitcher": 18086, "\u0120Eight": 18087, "\u0120caring": 18088, "utic": 18089, "role": 18090, "\u0120airline": 18091, "ernandez": 18092, "\u0120Athlet": 18093, "\u0120certification": 18094, "uxe": 18095, "riger": 18096, "\u0120empir": 18097, "\u0120sensation": 18098, "\u0120dism": 18099, "\u0120bolt": 18100, "\u0120evolve": 18101, "House": 18102, "\u0120consultation": 18103, "\u0120Duty": 18104, "\u0120touches": 18105, "\u0120Nathan": 18106, "\u0120faint": 18107, "had": 18108, "\"(": 18109, "\u0120Consumer": 18110, "\u0120Extreme": 18111, "\u0120127": 18112, "\u0120Herm": 18113, "\u0120Sacrament": 18114, "izoph": 18115, "\u0120anxious": 18116, "ulously": 18117, "\u0120socially": 18118, "\u0120UTC": 18119, "\u0120solving": 18120, "\u0120Letter": 18121, "History": 18122, "educ": 18123, "Price": 18124, "));": 18125, "\u0120reload": 18126, "amic": 18127, "\u0120pork": 18128, "\u0120discourse": 18129, "\u0120tournaments": 18130, "airo": 18131, "\u0120Kur": 18132, "\u0120Costa": 18133, "\u0120violating": 18134, "\u0120interfere": 18135, "\u0120recreational": 18136, "uffle": 18137, "\u0120speeches": 18138, "\u0120needing": 18139, "\u0120remembers": 18140, "\u0120credited": 18141, "nia": 18142, "focused": 18143, "amera": 18144, "\u0120bru": 18145, "umbs": 18146, "\u0120Cuban": 18147, "\u0120preceding": 18148, "\u0120nonsense": 18149, "acial": 18150, "\u0120smartphones": 18151, "\u0120Stories": 18152, "Sports": 18153, "\u0120Emergency": 18154, "ouncing": 18155, "efined": 18156, "\u0120ber": 18157, "\u0120consulting": 18158, "\u0120masters": 18159, "heastern": 18160, ".\"[": 18161, "\u0120Running": 18162, "\u0120suscept": 18163, "\u0120Feng": 18164, "America": 18165, "prises": 18166, "stitial": 18167, "\u0120Weekly": 18168, "\u0120Greater": 18169, "modules": 18170, "ifter": 18171, "Graphics": 18172, "uler": 18173, "\u0120wholly": 18174, "\u0120suppress": 18175, "\u0120concealed": 18176, "\u0120happily": 18177, "\u0120accepts": 18178, "\u0120Enjoy": 18179, "\u0120rivers": 18180, "\u0120Except": 18181, "225": 18182, "\u0120NHS": 18183, "\u0120McConnell": 18184, "\u0120pussy": 18185, "ferred": 18186, "utable": 18187, "\u0120attain": 18188, "\u0120>=": 18189, "\u0120deposits": 18190, "rophic": 18191, "\u0120notorious": 18192, "\u0120Shaw": 18193, "ilitation": 18194, "\u0120epidemic": 18195, "allic": 18196, "\u0120smallest": 18197, "ovich": 18198, "\u0120accessories": 18199, "perties": 18200, "\u0120surplus": 18201, "\u0120Mech": 18202, "\u0120ambig": 18203, "\u0120Immigration": 18204, "\u0120chim": 18205, "eval": 18206, "\u0120practicing": 18207, "\u0120Mystery": 18208, "\u0120domains": 18209, "\u0120Silicon": 18210, "apps": 18211, "\u0120kilometers": 18212, "ea": 18213, "\u0120Smash": 18214, "\u0120warranty": 18215, "\u0120nost": 18216, "sil": 18217, "rev": 18218, "Jon": 18219, "\u0120Dublin": 18220, "\u0120tastes": 18221, "\u0120bout": 18222, "great": 18223, "error": 18224, "\u0120switches": 18225, "\u0120Bapt": 18226, "DO": 18227, "oki": 18228, "\u0120sourced": 18229, "produ": 18230, "\u0120attachment": 18231, "\u0120Issue": 18232, "\u0120Question": 18233, "Join": 18234, "\u0120fitted": 18235, "\u0120unlawful": 18236, "^^": 18237, "erek": 18238, "\u0120authentication": 18239, "\u0120stole": 18240, "\u0120accountability": 18241, "label": 18242, "Search": 18243, "\u0120albeit": 18244, "atican": 18245, "funded": 18246, "\u0120Adding": 18247, "\u0120IQ": 18248, "\u0120submar": 18249, "lit": 18250, "aque": 18251, "\u0120Learning": 18252, "\u0120integer": 18253, "Master": 18254, "\u0120Chrom": 18255, "\u0120premier": 18256, "Op": 18257, "\u0120Liu": 18258, "\u0120blessed": 18259, "\u0120Globe": 18260, "\u0120Response": 18261, "\u0120legitim": 18262, "\u0120Merkel": 18263, "\u0120disposal": 18264, "\u00c2\u00b4": 18265, "\u0120gauge": 18266, "peat": 18267, "\u0120induced": 18268, "\u0120questionable": 18269, "arthy": 18270, "\u0120Vit": 18271, "\u0120Feed": 18272, "Until": 18273, "Ut": 18274, "worthy": 18275, "RY": 18276, "\u0120Herald": 18277, "\u0120Hammer": 18278, "\u0120medal": 18279, "\u0120Rivers": 18280, "\u0120Hack": 18281, "\u0120clarify": 18282, "\u0120tracked": 18283, "\u0120autonomous": 18284, "\u0120tenant": 18285, "\u0120Qatar": 18286, "erie": 18287, "\u0120grim": 18288, "\u0120Monitor": 18289, "\u0120resistant": 18290, "\u0120Spec": 18291, "\u0120Wells": 18292, "NAS": 18293, "148": 18294, "\u0120miners": 18295, "iotics": 18296, "\u0120misses": 18297, "116": 18298, "gian": 18299, "git": 18300, "\u0120Eyes": 18301, "pres": 18302, "\u0120graduated": 18303, "\u0120angel": 18304, "\u0120synchron": 18305, "\u0120efficiently": 18306, "\u0120transmitted": 18307, "Harry": 18308, "\u0120globally": 18309, "ENCE": 18310, "\u0120Montana": 18311, "raged": 18312, "\u0120Prevention": 18313, "\u0120piss": 18314, "\u0120Ll": 18315, "\u0120shelf": 18316, "\u0120BJP": 18317, "\u0120Testament": 18318, "\u0120Late": 18319, "iker": 18320, "\u0120Happ": 18321, "\u0120Julian": 18322, "hall": 18323, "\u0120spont": 18324, "\u0120shutdown": 18325, "\u0120inconsistent": 18326, "\u0120subscribers": 18327, "\u0120skeleton": 18328, "\u0120Nebraska": 18329, "\u0120inspire": 18330, "\u0120Void": 18331, "Feed": 18332, "\u0120angles": 18333, "\u0120Springs": 18334, "\u0120benchmark": 18335, "\u0120vaccines": 18336, "izophren": 18337, "sexual": 18338, "uffed": 18339, "\u0120shine": 18340, "\u0120Kath": 18341, "\u0120gesture": 18342, "inea": 18343, "\u0120rip": 18344, "\u0120oppression": 18345, "\u0120conscience": 18346, "bt": 18347, "\u0120Lum": 18348, "\u0120incidence": 18349, "\u0120Fa": 18350, "wr": 18351, "\u0120mineral": 18352, "\u0120Spurs": 18353, "alky": 18354, "\u0120thunder": 18355, "\u0120opio": 18356, "Being": 18357, "\u0120Palm": 18358, "\u0120wasted": 18359, "\u0120lb": 18360, "iaries": 18361, "\u0120Initiative": 18362, "\u0120curric": 18363, "\u0120marker": 18364, "\u0120McL": 18365, "\u0120extensions": 18366, "\u0120Pv": 18367, "\u0120Arms": 18368, "\u0120offerings": 18369, "\u0120defenses": 18370, "\u0120vendor": 18371, "\u0120contradict": 18372, "\u0120Colin": 18373, "\u0120reddit": 18374, "\u0120peripher": 18375, "122": 18376, "\u0120sins": 18377, "Edit": 18378, "ICT": 18379, "Soft": 18380, "\u0120Shah": 18381, "\u0120administrator": 18382, "\u0120Trip": 18383, "\u0120pornography": 18384, "\u0120tuition": 18385, "inence": 18386, "\u0120Progress": 18387, "\u0120catalog": 18388, "\u0120suite": 18389, "\u0120hike": 18390, "\u0120reproductive": 18391, "engine": 18392, "\u0120drought": 18393, "\u0120Noah": 18394, "\u0120230": 18395, "\u0120dude": 18396, "\u0120relaxed": 18397, "\u0120partition": 18398, "\u0120participant": 18399, "\u0120telesc": 18400, "\u0120feas": 18401, "\u0120FF": 18402, "owner": 18403, "\u0120sweeping": 18404, "\u0120lenses": 18405, "\u0120matchup": 18406, "\u0120Repl": 18407, "ournals": 18408, "\u0120credible": 18409, "\u0120grandmother": 18410, "\u0120thermal": 18411, "\u0120subscribing": 18412, "\u0120identities": 18413, "colm": 18414, "UCT": 18415, "\u0120reluctant": 18416, "users": 18417, "\u0120Cort": 18418, "\u0120assisted": 18419, "OSS": 18420, "ATIONS": 18421, "ISH": 18422, "\u0120pharmaceutical": 18423, "icable": 18424, "adian": 18425, "\u0120Sonic": 18426, "\u0120Fury": 18427, "\u0120Mong": 18428, "AH": 18429, "\u0120Psychology": 18430, "\u0120phosph": 18431, "\u0120treats": 18432, "\u0143\u0136": 18433, "\u0120steadily": 18434, "\u0120Hello": 18435, "\u0120relates": 18436, "\u0120clue": 18437, "Expl": 18438, "auth": 18439, "\u0120revision": 18440, "\u0120eld": 18441, "osion": 18442, "\u0120bron": 18443, "144": 18444, "rikes": 18445, "\u0120mines": 18446, "\u0120blanket": 18447, "\u0120Fail": 18448, "eled": 18449, "\u0120Imagine": 18450, "\u0120Planned": 18451, "aic": 18452, "Request": 18453, "Mad": 18454, "\u0120Horse": 18455, "\u0120Eagle": 18456, "\u0120capac": 18457, "157": 18458, "\u0120ling": 18459, "\u0120Nice": 18460, "\u0120Parenthood": 18461, "minster": 18462, "ogs": 18463, "ensitive": 18464, "Nothing": 18465, "\u0120carn": 18466, "Fin": 18467, "\u0120PE": 18468, "\u0120rifles": 18469, "\u0120LP": 18470, "Sand": 18471, "\u0120guiActive": 18472, "\u0120tourist": 18473, "CNN": 18474, "\u0120unveiled": 18475, "\u0120predecessor": 18476, "}{": 18477, "uber": 18478, "\u0120offshore": 18479, "\u0120optical": 18480, "\u0120Rot": 18481, "\u0120Pearl": 18482, "eton": 18483, "\u0120stared": 18484, "\u0120farther": 18485, "atility": 18486, "contin": 18487, "\u0120Gy": 18488, "\u0120Foster": 18489, "\u0120Coc": 18490, "rients": 18491, "\u0120designing": 18492, "\u0120Economy": 18493, "ONG": 18494, "Women": 18495, "\u0120Nancy": 18496, "erver": 18497, "\u0120mascul": 18498, "\u0120casualties": 18499, "\u0120225": 18500, "\u0120Sullivan": 18501, "\u0120Choice": 18502, "\u0120aster": 18503, "ws": 18504, "\u0120hotels": 18505, "\u0120considerations": 18506, "\u0120couch": 18507, "\u0120Strip": 18508, "\u0120Gn": 18509, "\u0120manipulate": 18510, "lied": 18511, "\u0120synthetic": 18512, "\u0120assaulted": 18513, "\u0120offenses": 18514, "\u0120Drake": 18515, "\u0120impe": 18516, "October": 18517, "\u0120Heritage": 18518, "hl": 18519, "\u0120Blair": 18520, "Unlike": 18521, "\u0120grief": 18522, "\u0120450": 18523, "\u0120opted": 18524, "\u0120resignation": 18525, "ilo": 18526, "\u0120verse": 18527, "\u0120Tomb": 18528, "\u0120upt": 18529, "\u0120aired": 18530, "\u0120Hook": 18531, "\u0120MLB": 18532, "\u0120assumes": 18533, "outed": 18534, "\u0120Vers": 18535, "\u0120inferior": 18536, "\u0120bundle": 18537, "\u0120DNS": 18538, "ographer": 18539, "\u0120multip": 18540, "\u0120Souls": 18541, "\u0120illustrated": 18542, "\u0120tactic": 18543, "\u0120dressing": 18544, "\u0120duo": 18545, "Conf": 18546, "\u0120relent": 18547, "\u0120cant": 18548, "\u0120scarce": 18549, "\u0120candy": 18550, "\u0120CF": 18551, "\u0120affiliated": 18552, "\u0120sprint": 18553, "ylan": 18554, "\u0120Garcia": 18555, "\u0120junk": 18556, "Print": 18557, "exec": 18558, "Crit": 18559, "\u0120portrait": 18560, "iries": 18561, "\u0120OFF": 18562, "\u0120disputes": 18563, "WR": 18564, "Love": 18565, "\u00e3\u0123\u0126": 18566, "\u0120Reyn": 18567, "\u0120hipp": 18568, "opath": 18569, "\u0120floors": 18570, "\u0120Feel": 18571, "\u0120worries": 18572, "\u0120settlements": 18573, "\u0120Pos": 18574, "\u0120mosque": 18575, "\u0120finals": 18576, "\u0120crushed": 18577, "\u0120Probably": 18578, "\u0120Bot": 18579, "\u0120Mans": 18580, "\u0120Period": 18581, "\u0120sovereignty": 18582, "\u0120seller": 18583, "\u0120apost": 18584, "\u0120amateur": 18585, "\u0120dorm": 18586, "\u0120consuming": 18587, "\u0120armour": 18588, "\u0120Roose": 18589, "\u0120intensive": 18590, "\u0120eliminating": 18591, "\u0120Sunni": 18592, "\u0120Aleppo": 18593, "jin": 18594, "\u0120advise": 18595, "pal": 18596, "\u0120Halo": 18597, "\u0120descent": 18598, "\u0120simpler": 18599, "\u0120booth": 18600, "STR": 18601, "Later": 18602, "\u0120Cave": 18603, "===": 18604, "\u0120mol": 18605, "\u0120fist": 18606, "\u0120shotgun": 18607, "supp": 18608, "\u0120robbery": 18609, "Effect": 18610, "\u0120obscure": 18611, "\u0120Professional": 18612, "\u0120embassy": 18613, "\u0120militant": 18614, "\u0120incarcer": 18615, "\u0120generates": 18616, "\u0120launches": 18617, "\u0120administrators": 18618, "\u0120shaft": 18619, "\u0120circular": 18620, "\u0120freshman": 18621, "\u0120Wes": 18622, "\u0120Joel": 18623, "\u0120Drew": 18624, "\u0120Duncan": 18625, "\u0120Apparently": 18626, "sight": 18627, "\u0120Internal": 18628, "\u0120Individual": 18629, "\u0120FE": 18630, "\u0120bore": 18631, "\u0120Mt": 18632, "\u0120broadly": 18633, "\u0120Options": 18634, "ountain": 18635, "ipes": 18636, "\u0120Videos": 18637, "204": 18638, "\u0120hills": 18639, "\u0120simulation": 18640, "\u0120disappointment": 18641, "itan": 18642, "\u0120Laboratory": 18643, "\u0120upward": 18644, "\u0120boundary": 18645, "\u0120darker": 18646, "hart": 18647, "\u0120dominance": 18648, "Cong": 18649, "\u0120Oracle": 18650, "\u0120Lords": 18651, "\u0120scholarship": 18652, "\u0120Vincent": 18653, "ede": 18654, "\u0120Rah": 18655, "\u0120encourages": 18656, "rov": 18657, "\u0120quo": 18658, "\u0120premise": 18659, "\u0120Crisis": 18660, "\u0120Holocaust": 18661, "\u0120rhythm": 18662, "\u0120metric": 18663, "club": 18664, "\u0120transported": 18665, "\u0120nod": 18666, "\u0120Pist": 18667, "\u0120ancestors": 18668, "\u0120Freder": 18669, "thumbnails": 18670, "\u0120CE": 18671, "OND": 18672, "Phil": 18673, "venge": 18674, "\u0120Products": 18675, "castle": 18676, "\u0120qualifying": 18677, "\u0120Karen": 18678, "VERTISEMENT": 18679, "\u0120mighty": 18680, "\u0120explanations": 18681, "\u0120fixing": 18682, "Di": 18683, "\u0120declaring": 18684, "\u0120anonymity": 18685, "\u0120juven": 18686, "\u0120Nord": 18687, "\u0120Doom": 18688, "\u0120Actually": 18689, "Ok": 18690, "phis": 18691, "\u0120Desert": 18692, "\u0120116": 18693, "IK": 18694, "\u0120FM": 18695, "\u0120incomes": 18696, "VEL": 18697, "okers": 18698, "\u0120pecul": 18699, "\u0120lightweight": 18700, "gue": 18701, "\u0120accent": 18702, "\u0120increment": 18703, "\u0120Chan": 18704, "\u0120complaining": 18705, "\u0120Baghd": 18706, "\u0120midfielder": 18707, "\u0120overhaul": 18708, "Process": 18709, "\u0120Hollow": 18710, "\u0120Titans": 18711, "Small": 18712, "manuel": 18713, "\u0120Unity": 18714, "\u0120Events": 18715, "Sty": 18716, "\u0120disproportion": 18717, "nesty": 18718, "enes": 18719, "\u0120Cod": 18720, "\u0120demonstrations": 18721, "\u0120Crimson": 18722, "\u0120OH": 18723, "\u0120enrolled": 18724, "\u0120cel": 18725, "\u0120Brett": 18726, "\u0120aide": 18727, "\u0120heels": 18728, "\u0120broadband": 18729, "\u0120marking": 18730, "\u0120wizard": 18731, "\u0120NJ": 18732, "\u0120Chiefs": 18733, "\u0120ingredient": 18734, "\u0120dug": 18735, "\u0120Shut": 18736, "urchase": 18737, "endor": 18738, "\u0120farmer": 18739, "\u0120Goldman": 18740, "129": 18741, "155": 18742, "Order": 18743, "\u0120lion": 18744, "iably": 18745, "\u0120stain": 18746, "array": 18747, "ilitary": 18748, "\u0120FAQ": 18749, "\u0120exploded": 18750, "\u0120McCarthy": 18751, "\u0120Tweet": 18752, "\u0120Greens": 18753, "eking": 18754, "ln": 18755, "ensen": 18756, "\u0120motorcycle": 18757, "\u0120particle": 18758, "\u0120cholesterol": 18759, "Bron": 18760, "\u0120stair": 18761, "\u0120oxid": 18762, "\u0120desirable": 18763, "ibles": 18764, "\u0120theor": 18765, "forcing": 18766, "\u0120promotional": 18767, "ovo": 18768, "boot": 18769, "\u0120Bonus": 18770, "rawling": 18771, "\u0120shortage": 18772, "\u0120Psy": 18773, "\u0120recruited": 18774, "\u0120infants": 18775, "\u0120testosterone": 18776, "\u0120deduct": 18777, "\u0120distinctive": 18778, "\u0120firmware": 18779, "built": 18780, "145": 18781, "\u0120explored": 18782, "\u0120factions": 18783, "\u0120vide": 18784, "\u0120tattoo": 18785, "\u0120financially": 18786, "\u0120fatigue": 18787, "\u0120proceeding": 18788, "constitutional": 18789, "\u0120miser": 18790, "\u0120chairs": 18791, "gging": 18792, "ipple": 18793, "\u0120dent": 18794, "\u0120disreg": 18795, "\u00e7\u0136": 18796, "stant": 18797, "llo": 18798, "bps": 18799, "akening": 18800, "\u0120abnormal": 18801, "\u0120ERA": 18802, "\u00e5\u00a3\u00ab": 18803, "\u0120HBO": 18804, "\u0120MAR": 18805, "\u0120concess": 18806, "\u0120servant": 18807, "\u0120aspir": 18808, "lav": 18809, "\u0120Panel": 18810, "amo": 18811, "\u0120precip": 18812, "\u0120recordings": 18813, "\u0120proceeded": 18814, "\u0120colony": 18815, "\u0120Tang": 18816, "ablo": 18817, "\u0120stripped": 18818, "Left": 18819, "too": 18820, "\u0120potatoes": 18821, "\u0120finest": 18822, "%).": 18823, "\u0120crap": 18824, "\u0120Zach": 18825, "abases": 18826, "\u0120Goth": 18827, "\u0120billionaire": 18828, "wolf": 18829, "\u0120sanction": 18830, "SK": 18831, "\u0120logged": 18832, "Po": 18833, "eyed": 18834, "unal": 18835, "\u0120cricket": 18836, "\u0120armies": 18837, "\u0120uncovered": 18838, "Cloud": 18839, "\u00c3\u00b3n": 18840, "\u0120rebounds": 18841, "\u0120mes": 18842, "Oper": 18843, "Pac": 18844, "\u0120nationally": 18845, "\u0120inserted": 18846, "pict": 18847, "\u0120governance": 18848, "\u00d0\u00b8": 18849, "\u0120privileges": 18850, "GET": 18851, "\u0120favorites": 18852, "imity": 18853, "\u0120lover": 18854, "them": 18855, "empl": 18856, "\u0120gorgeous": 18857, "Ann": 18858, "\u0120slipped": 18859, "\u0120veto": 18860, "Bob": 18861, "\u0120slim": 18862, "ucc": 18863, "\u0120Fame": 18864, "uddenly": 18865, "\u0120denies": 18866, "\u0120Maur": 18867, "\u0120distances": 18868, "\u0120wanna": 18869, "tar": 18870, "\u0120SER": 18871, "\u0120\u00e2\u012a": 18872, "\u0120lemon": 18873, "athetic": 18874, "\u0120literal": 18875, "\u0120distinguished": 18876, "\u0120answering": 18877, "GI": 18878, "\u0120religions": 18879, "\u0120Philos": 18880, "\u0120Lay": 18881, "\u0120compos": 18882, "irements": 18883, "\u0120Kos": 18884, "inez": 18885, "rolling": 18886, "\u0120youngest": 18887, "andise": 18888, "\u0120Born": 18889, "\u0120altar": 18890, "amina": 18891, "\u0120Boot": 18892, "voc": 18893, "\u0120digging": 18894, "\u0120pressures": 18895, "\u0120len": 18896, "264": 18897, "\u0120assassination": 18898, "\u0120Birmingham": 18899, "\u0120Myth": 18900, "\u0120sovereign": 18901, "\u0120Artist": 18902, "\u0120Photograph": 18903, "\u0120depicted": 18904, "\u0120dispens": 18905, "orthy": 18906, "\u0120ambul": 18907, "integ": 18908, "\u0120Cele": 18909, "\u0120Tibet": 18910, "\u0120hierarchy": 18911, "\u0120cu": 18912, "\u0120preseason": 18913, "\u0120Peterson": 18914, "\u0120colours": 18915, "\u0120worrying": 18916, "\u0120backers": 18917, "\u0120Palmer": 18918, "\u0120\u00ce\u00bc": 18919, "\u0120contributor": 18920, "\u0120hearings": 18921, "\u0120urine": 18922, "\u0120\u00d9": 18923, "ourgeois": 18924, "Similar": 18925, "\u0120Zimmer": 18926, "something": 18927, "\u0120USC": 18928, "\u0120strengths": 18929, "\u0120FI": 18930, "\u0120logging": 18931, "Asked": 18932, "\u0120Thai": 18933, "inqu": 18934, "\u0120Walt": 18935, "\u0120crews": 18936, "itism": 18937, "301": 18938, "\u0120sharply": 18939, "umed": 18940, "\u0120redirect": 18941, "rators": 18942, "Inf": 18943, "\u0120Weapons": 18944, "\u0120teasp": 18945, "1999": 18946, "Live": 18947, "\u0120Especially": 18948, "\u0120Ster": 18949, "\u0120Veterans": 18950, "\u0120intro": 18951, "otherapy": 18952, "\u0120malware": 18953, "\u0120breeding": 18954, "\u0120molecular": 18955, "\u0120Route": 18956, "\u0120Comment": 18957, "ochem": 18958, "\u0120ain": 18959, "Season": 18960, "\u0120linebacker": 18961, "\u00c4\u00ab": 18962, "\u0120Economics": 18963, "esar": 18964, "\u0120Lives": 18965, "\u0120Emma": 18966, "\u0120kin": 18967, "\u0120Territ": 18968, "\u0120planted": 18969, "oton": 18970, "\u0120Butter": 18971, "\u0120Spons": 18972, "PER": 18973, "\u0120dungeon": 18974, "\u0120symbolic": 18975, "\u0120filmed": 18976, "\u0120diets": 18977, "\u0120concludes": 18978, "\u0120certainty": 18979, "\u0120Format": 18980, "\u0120strangers": 18981, "format": 18982, "\u0120Phase": 18983, "\u0120copied": 18984, "\u0120metres": 18985, "lda": 18986, "\u0120Users": 18987, "\u0120deliberate": 18988, "\u0120washed": 18989, "\u0120Lance": 18990, "imation": 18991, "\u0120improper": 18992, "\u0120Genesis": 18993, "ickr": 18994, "\u0120Kush": 18995, "\u0120realise": 18996, "\u0120embarrassing": 18997, "alking": 18998, "bucks": 18999, "\u0120verified": 19000, "\u0120outline": 19001, "years": 19002, "\u0120Income": 19003, "202": 19004, "\u0120zombies": 19005, "Final": 19006, "\u0120Millenn": 19007, "\u0120modifications": 19008, "\u0120Vision": 19009, "\u0120Moses": 19010, "verb": 19011, "iterranean": 19012, "\u0120Jet": 19013, "\u0120naval": 19014, "\u0120Agg": 19015, "\u0120url": 19016, "\u0120victories": 19017, "\u0120nonetheless": 19018, "\u0120injust": 19019, "\u0120Fact": 19020, "\u00e7\u013c": 19021, "\u0120insufficient": 19022, "review": 19023, "facebook": 19024, "\u0120negotiating": 19025, "\u0120guarantees": 19026, "imen": 19027, "utenberg": 19028, "\u0120gambling": 19029, "\u0120congr": 19030, "Loading": 19031, "\u0120nevertheless": 19032, "\u0120presidents": 19033, "\u0120Industrial": 19034, "\u0120118": 19035, "\u0120poured": 19036, "\u0120Tory": 19037, "\u0120175": 19038, "\u0120:=": 19039, "Scott": 19040, "angered": 19041, "Tok": 19042, "\u0120organizers": 19043, "Mat": 19044, "\u0120Growth": 19045, "\u0120adul": 19046, "\u0120ensures": 19047, "\u0120117": 19048, "\u00e9\u00be\u012f\u00e5": 19049, "\u0120massacre": 19050, "\u0120grades": 19051, "before": 19052, "ADVERTISEMENT": 19053, "\u0120Slow": 19054, "\u0120MMA": 19055, "\u00e2\u0122\u0136\"": 19056, "\u0120Vatican": 19057, "Qaeda": 19058, "\u0120owe": 19059, "6666": 19060, "\u0120Sorry": 19061, "\u0120Grass": 19062, "\u0120backgrounds": 19063, "\u0120exhausted": 19064, "\u0120clan": 19065, "\u0120compromised": 19066, "\u0120Elf": 19067, "\u0120Isaac": 19068, "enson": 19069, "Invest": 19070, "IFA": 19071, "\u0120interrupted": 19072, "\u00e3\u0125\u012b\u00e3\u0125\u00a9": 19073, "\u0120twisted": 19074, "\u0120Dragons": 19075, "Mode": 19076, "\u0120Kremlin": 19077, "\u0120fertil": 19078, "heres": 19079, "phan": 19080, "\u0120Node": 19081, "fed": 19082, "\u0120Orc": 19083, "\u0120unwilling": 19084, "Cent": 19085, "\u0120priorit": 19086, "\u0120graduates": 19087, "\u0120subjective": 19088, "\u0120issuing": 19089, "\u0120Lt": 19090, "\u0120viewer": 19091, "\u0120woke": 19092, "Thus": 19093, "brook": 19094, "\u0120depressed": 19095, "\u0120bracket": 19096, "\u0120Gor": 19097, "\u0120Fighting": 19098, "\u0120striker": 19099, "Report": 19100, "\u0120Portugal": 19101, "\u0120neo": 19102, "wed": 19103, "199": 19104, "\u0120fleeing": 19105, "shadow": 19106, "identified": 19107, "USE": 19108, "Steam": 19109, "\u0120stretched": 19110, "\u0120revelations": 19111, "arted": 19112, "\u0120Dw": 19113, "\u0120alignment": 19114, "eston": 19115, "\u0120Jared": 19116, "Sep": 19117, "\u0120blogs": 19118, "update": 19119, "gom": 19120, "risk": 19121, "\u0120clash": 19122, "\u0120Hour": 19123, "\u0120runtime": 19124, "\u0120unwanted": 19125, "\u0120scam": 19126, "\u0120rack": 19127, "\u0120enlight": 19128, "onest": 19129, "\u0120Ferr": 19130, "\u0120convictions": 19131, "\u0120piano": 19132, "\u0120circulation": 19133, "\u0120Welcome": 19134, "\u0120backlash": 19135, "\u0120Wade": 19136, "\u0120receivers": 19137, "otive": 19138, "Jeff": 19139, "\u0120networking": 19140, "\u0120Prep": 19141, "\u0120Explorer": 19142, "\u0120lecture": 19143, "\u0120uploaded": 19144, "\u0120Meat": 19145, "BLE": 19146, "\u0120Nazis": 19147, "\u0120Synd": 19148, "stud": 19149, "roots": 19150, "rians": 19151, "\u0120portrayed": 19152, "\u0120??": 19153, "\u0120Buddha": 19154, "sun": 19155, "Robert": 19156, "\u0120Complex": 19157, "\u0120oversee": 19158, "\u0120stealth": 19159, "Title": 19160, "\u0120Jobs": 19161, "\u0120Kum": 19162, "\u0120appreciation": 19163, "\u0120MOD": 19164, "\u0120basics": 19165, "\u0120clips": 19166, "\u0120nursing": 19167, "\u0120proposition": 19168, "\u0120realised": 19169, "\u0120NYC": 19170, "\u0120allocated": 19171, "rium": 19172, "aran": 19173, "\u0120Production": 19174, "\u0120Vote": 19175, "\u0120smugg": 19176, "\u0120hunter": 19177, "azer": 19178, "\u0120Changes": 19179, "\u0120fluct": 19180, "yon": 19181, "Array": 19182, "\u0120kits": 19183, "Water": 19184, "\u0120uncommon": 19185, "\u0120resting": 19186, "ells": 19187, "would": 19188, "\u0120pursued": 19189, "\u0120assertion": 19190, "ometown": 19191, "\u0120Mosul": 19192, "\u0120Platform": 19193, "iolet": 19194, "\u0120shareholders": 19195, "\u0120trails": 19196, "Pay": 19197, "\u0120Enforcement": 19198, "types": 19199, "\u0120Anonymous": 19200, "\u0120satisfying": 19201, "ilogy": 19202, "\u0120('": 19203, "wave": 19204, "city": 19205, "Steve": 19206, "\u0120confrontation": 19207, "\u0120Eld": 19208, "Capt": 19209, "ahan": 19210, "htm": 19211, "\u0120Ctrl": 19212, "ONS": 19213, "230": 19214, "ifa": 19215, "holding": 19216, "\u0120delicate": 19217, "\u0120jaw": 19218, "\u0120Going": 19219, "orum": 19220, "Sal": 19221, "\u0120dull": 19222, "\u0120Beth": 19223, "\u0120prisons": 19224, "\u0120ego": 19225, "\u0120Elsa": 19226, "avorite": 19227, "\u0120Gang": 19228, "\u0120Nuclear": 19229, "\u0120spider": 19230, "atsu": 19231, "\u0120sampling": 19232, "\u0120absorbed": 19233, "\u0120Pharm": 19234, "ieth": 19235, "\u0120bucket": 19236, "\u0120Recomm": 19237, "OF": 19238, "\u0120Factory": 19239, "ANCE": 19240, "\u0120bacter": 19241, "Has": 19242, "\u0120Observ": 19243, "121": 19244, "\u0120premiere": 19245, "Develop": 19246, "\u0120currencies": 19247, "Cast": 19248, "\u0120accompanying": 19249, "\u0120Nashville": 19250, "\u0120fatty": 19251, "\u0120Brend": 19252, "\u0120locks": 19253, "\u0120centered": 19254, "\u0120UT": 19255, "aughs": 19256, "orie": 19257, "\u0120Affordable": 19258, "vance": 19259, "DL": 19260, "emet": 19261, "\u0120throne": 19262, "\u0120Bluetooth": 19263, "\u0120naming": 19264, "ifts": 19265, "ADE": 19266, "\u0120corrected": 19267, "\u0120promptly": 19268, "\u0120STR": 19269, "\u0120genome": 19270, "\u0120cope": 19271, "\u0120valley": 19272, "\u0120rounded": 19273, "\u0120Kend": 19274, "alion": 19275, "pers": 19276, "\u0120tourism": 19277, "\u0120stark": 19278, "vl": 19279, "\u0120blowing": 19280, "\u0120Schedule": 19281, "std": 19282, "\u0120unhappy": 19283, "\u0120litigation": 19284, "cedes": 19285, "\u0120android": 19286, "\u0120integral": 19287, "erers": 19288, "uded": 19289, "tax": 19290, "\u0120reiter": 19291, "\u0120Motors": 19292, "ociated": 19293, "\u0120wonders": 19294, "\u0120Apost": 19295, "ucking": 19296, "\u0120Roosevelt": 19297, "fram": 19298, "\u0120yields": 19299, "\u0120constitutes": 19300, "awk": 19301, "Interest": 19302, "\u0120interim": 19303, "\u0120breakthrough": 19304, "\u0120Cher": 19305, "\u0120prosec": 19306, "\u0120Dj": 19307, "\u0120MT": 19308, "Resp": 19309, "\u0120PT": 19310, "\u0120sperm": 19311, "edit": 19312, "BT": 19313, "Linux": 19314, "country": 19315, "league": 19316, "\u0120dick": 19317, "\u0120oct": 19318, "\u0120inserting": 19319, "\u0120scra": 19320, "\u0120Brewing": 19321, "\u01201966": 19322, "\u0120runners": 19323, "\u0120plun": 19324, "idy": 19325, "\u0120Dian": 19326, "\u0120dysfunction": 19327, "\u0120exclusion": 19328, "\u0120disgr": 19329, "\u0120incorporate": 19330, "\u0120reconc": 19331, "\u0120nominated": 19332, "\u0120Archer": 19333, "draw": 19334, "achelor": 19335, "\u0120writings": 19336, "\u0120shallow": 19337, "\u0120hast": 19338, "\u0120BMW": 19339, "\u0120RS": 19340, "\u0120thigh": 19341, "\u01201963": 19342, "\u0120lamb": 19343, "\u0120favored": 19344, "agle": 19345, "\u0120cooler": 19346, "\u0120Hours": 19347, "\u0120GU": 19348, "\u0120Origin": 19349, "\u0120glimpse": 19350, "--------------------": 19351, "Lim": 19352, "\u0120cheek": 19353, "\u0120jealous": 19354, "-'": 19355, "\u0120harness": 19356, "\u0120Poison": 19357, "\u0120disabilities": 19358, "neapolis": 19359, "\u0120outlook": 19360, "\u0120notify": 19361, "\u0120Indianapolis": 19362, "\u0120abrupt": 19363, "nsic": 19364, "\u0120encrypted": 19365, "\u0120forfe": 19366, "reath": 19367, "\u0120rabb": 19368, "\u0120foundations": 19369, "\u0120compliment": 19370, "\u0120Interview": 19371, "\u0120Swe": 19372, "\u0120adolesc": 19373, "\u0120monitors": 19374, "\u0120Sacramento": 19375, "\u0120timely": 19376, "\u0120contempl": 19377, "\u0120positioned": 19378, "\u0120posters": 19379, "phies": 19380, "iovascular": 19381, "void": 19382, "\u0120Fifth": 19383, "\u0120investigative": 19384, "OUN": 19385, "\u0120integrate": 19386, "\u0120INC": 19387, "isha": 19388, "iblings": 19389, "\u0120Request": 19390, "\u0120Rodriguez": 19391, "\u0120slides": 19392, "\u0120DX": 19393, "\u0120feminism": 19394, "\u0120datas": 19395, "\u0120bend": 19396, "irus": 19397, "\u0120Nigeria": 19398, "Fox": 19399, "Change": 19400, "\u0120airplane": 19401, "\u0120Laden": 19402, "\u0120publicity": 19403, "ixty": 19404, "\u0120commitments": 19405, "\u0120aggregate": 19406, "\u0120displaying": 19407, "\u0120Arrow": 19408, "\u0120122": 19409, "\u0120respects": 19410, "android": 19411, "six": 19412, "\u0120Sha": 19413, "\u0120restoration": 19414, ")\\": 19415, "WS": 19416, "oys": 19417, "\u0120illustrate": 19418, "without": 19419, "126": 19420, "\u0120\u00e2\u0136\u0124": 19421, "\u0120pickup": 19422, "nels": 19423, "\u0120....": 19424, "food": 19425, "\u0120Fen": 19426, ")?": 19427, "\u0120phenomena": 19428, "\u0120companions": 19429, "\u0120Write": 19430, "\u0120spill": 19431, "\u0120bridges": 19432, "\u0120Updated": 19433, "\u0120Fo": 19434, "\u0120insects": 19435, "ASHINGTON": 19436, "\u0120scare": 19437, "iltr": 19438, "\u0120Zhang": 19439, "\u0120severity": 19440, "\u0120indul": 19441, "149": 19442, "\u0120Coffee": 19443, "\u0120norms": 19444, "\u0120pulse": 19445, "\u0120FT": 19446, "\u0120horrific": 19447, "\u0120Destroy": 19448, "\u0120JSON": 19449, "\u0120olive": 19450, "\u0120discusses": 19451, "Rest": 19452, "Elect": 19453, "\u0120Winn": 19454, "\u0120Surviv": 19455, "\u0120Hait": 19456, "Sure": 19457, "oped": 19458, "\u0120rooted": 19459, "\u0120Ske": 19460, "\u0120Bronze": 19461, "\u0120lol": 19462, "Default": 19463, "\u0120commodity": 19464, "redited": 19465, "\u0120libertarian": 19466, "\u0120forbidden": 19467, "\u0120gran": 19468, "\u00e0\u00a8": 19469, "\u0120lag": 19470, "enz": 19471, "drive": 19472, "\u0120mathematics": 19473, "\u0120wires": 19474, "\u0120critically": 19475, "\u0120carbohyd": 19476, "\u0120Chancellor": 19477, "\u0120Eddie": 19478, "\u0120banning": 19479, "\u0120Fri": 19480, "\u0120complications": 19481, "etric": 19482, "\u0120Bangladesh": 19483, "\u0120bandwidth": 19484, "Stop": 19485, "\u0120Originally": 19486, "\u0120halfway": 19487, "ynasty": 19488, "shine": 19489, "\u0120tales": 19490, "rities": 19491, "avier": 19492, "\u0120spinning": 19493, "\u0120WHO": 19494, "\u0120neighbourhood": 19495, "bach": 19496, "\u0120commerce": 19497, "\u0120Sle": 19498, "BU": 19499, "\u0120entrepreneur": 19500, "\u0120peculiar": 19501, "\u0120Comments": 19502, "fre": 19503, "320": 19504, "ICS": 19505, "\u0120imagery": 19506, "\u0120Canon": 19507, "\u0120Electronic": 19508, "short": 19509, "((": 19510, "Dig": 19511, "\u0120commem": 19512, "uced": 19513, "\u0120inclined": 19514, "\u0120Summon": 19515, "\u0120cliff": 19516, "\u0120Mediterranean": 19517, "\u0120poetry": 19518, "\u0120prosperity": 19519, "\u0120Rece": 19520, "\u0120pills": 19521, "member": 19522, "\u0120finale": 19523, "unc": 19524, "\u0120Gig": 19525, "\u00e4\u00bd": 19526, "\u0120lod": 19527, "\u0120backward": 19528, "-+": 19529, "\u0120Forward": 19530, "\u0120thri": 19531, "sure": 19532, "\u0120soap": 19533, "\u0120FX": 19534, "RES": 19535, "\u0120Sexual": 19536, "oulos": 19537, "\u0120foolish": 19538, "\u0120righteous": 19539, "\u0120coff": 19540, "terrorism": 19541, "ustain": 19542, "oter": 19543, "\u0120abuses": 19544, "next": 19545, "\u0120abusive": 19546, "\u0120thereafter": 19547, "\u0120prohibition": 19548, "\u0120SUP": 19549, "\u0120dip": 19550, "\u0120ripped": 19551, "\u0120inherited": 19552, "\u0120bats": 19553, "stru": 19554, "GT": 19555, "\u0120flawed": 19556, "phabet": 19557, "\u0120fog": 19558, "doors": 19559, "\u0120imaging": 19560, "\u0120digits": 19561, "\u0120Hungary": 19562, "\u0120arrog": 19563, "\u0120teachings": 19564, "\u0120protocols": 19565, "\u0120Banks": 19566, "\u00e0\u00b8": 19567, "pound": 19568, "\u0120Curt": 19569, ".\")": 19570, "./": 19571, "\u0120exemption": 19572, "endix": 19573, "\u0120Mull": 19574, "\u0120improves": 19575, "\u0120Gamer": 19576, "dimensional": 19577, "Icon": 19578, "\u0120Margaret": 19579, "Status": 19580, "dates": 19581, "\u0120intends": 19582, "\u0120depict": 19583, "\u0120parked": 19584, "Joe": 19585, "\u0120Marines": 19586, "chnology": 19587, "!).": 19588, "\u0120judged": 19589, "\u0120weights": 19590, "Ray": 19591, "\u0120apartments": 19592, "hester": 19593, "\u0120reinforce": 19594, "\u0120offender": 19595, "occup": 19596, "\u0120sore": 19597, "ept": 19598, "\u0120PHP": 19599, "\u0120Brow": 19600, "\u0120authorization": 19601, "\u0120Risk": 19602, "\u0120Delaware": 19603, "\u0120QU": 19604, "\u0120notifications": 19605, "\u0120sunlight": 19606, "\u0120exclude": 19607, "dat": 19608, "\u0120mesh": 19609, "\u0120Sudan": 19610, "\u0120belonged": 19611, "\u0120subway": 19612, "\u0120noon": 19613, "\u0120Interior": 19614, "olics": 19615, "\u0120Lakers": 19616, "\u0120coding": 19617, "Disclaimer": 19618, "Calif": 19619, "Old": 19620, "\u0120disl": 19621, "?????": 19622, "\u0120confirms": 19623, "\u0120recruitment": 19624, "\u0120homicide": 19625, "Consider": 19626, "\u0120Jeffrey": 19627, "fty": 19628, "};": 19629, "\u0120objection": 19630, "doing": 19631, "\u0120Leo": 19632, "Want": 19633, "\u0120glow": 19634, "\u0120Clarke": 19635, "\u0120Norman": 19636, "\u0120verification": 19637, "\u0120packet": 19638, "\u0120Formula": 19639, "\u0120plag": 19640, "esville": 19641, "\u0120shouting": 19642, "\u0120ov": 19643, "\u0120REC": 19644, "\u0120Bub": 19645, "\u0120ninth": 19646, "\u0120energ": 19647, "\u0120validity": 19648, "\u0120ups": 19649, "jack": 19650, "\u0120neighboring": 19651, "\u0120Nec": 19652, "eworks": 19653, "\u0120Hab": 19654, "arez": 19655, "\u0120spine": 19656, "\u0120eventual": 19657, "\u0120Leaders": 19658, "\u0120Carn": 19659, "\u0120probation": 19660, "\u0120romance": 19661, "msg": 19662, "\u0120Mechanical": 19663, "ERY": 19664, "Rock": 19665, "\u0120partisan": 19666, "Node": 19667, "assets": 19668, "minent": 19669, "\u0120foreigners": 19670, "\u0120testify": 19671, "\u0120Usually": 19672, "lords": 19673, "\u0120Gren": 19674, "\u0120Powell": 19675, "BIL": 19676, "\u0120sr": 19677, "\u0120addict": 19678, "\u0120shells": 19679, "\u0120sigh": 19680, "\u0120Yale": 19681, "ternity": 19682, "\u0120750": 19683, "EU": 19684, "\u0120Rifle": 19685, "\u0120patron": 19686, "ema": 19687, "\u0120Bannon": 19688, "anity": 19689, "\u0120tropical": 19690, "\u0120VII": 19691, "cross": 19692, "Everything": 19693, "\u0120ISO": 19694, "\u0120humble": 19695, "assing": 19696, "\u0120FIG": 19697, "\u0120updating": 19698, "yson": 19699, "\u0120calcium": 19700, "\u0120competent": 19701, "\u0120steering": 19702, "Prot": 19703, "\u0120SY": 19704, "\u0120Finals": 19705, "\u0120Rug": 19706, "159": 19707, "137": 19708, "\u0120Golf": 19709, "\u0120126": 19710, "\u0120accommodation": 19711, "\u0120Hughes": 19712, "\u0120aesthetic": 19713, "artisan": 19714, "\u0120Twilight": 19715, "\u0120prince": 19716, "\u0120Agriculture": 19717, "\u0120Disco": 19718, "\u0120precedent": 19719, "\u0120typing": 19720, "authorized": 19721, "Option": 19722, "\u0120Aub": 19723, "lishes": 19724, "acht": 19725, "mag": 19726, "Peter": 19727, "\u0120UFO": 19728, "monton": 19729, "\u0120Lith": 19730, "\u0120arom": 19731, "\u0120securing": 19732, "\u0120confined": 19733, "private": 19734, "\u0120swords": 19735, "\u0120markers": 19736, "\u0120metabolic": 19737, "select": 19738, "\u0120Curse": 19739, "\u0120Ot": 19740, "gressive": 19741, "\u0120incumb": 19742, "\u0120Saga": 19743, "\u0120priced": 19744, "\u0120clearance": 19745, "Content": 19746, "\u0120drilling": 19747, "\u0120notices": 19748, "\u0120bourgeois": 19749, "\u0120vest": 19750, "\u0120cookie": 19751, "\u0120Guardians": 19752, "rys": 19753, "inyl": 19754, "\u0120124": 19755, "\u0120plausible": 19756, "ongh": 19757, "\u0120Odin": 19758, "\u0120conception": 19759, "\u0120Yuk": 19760, "\u0120Baghdad": 19761, "\u0120Flag": 19762, "Austral": 19763, "\u0120IBM": 19764, "\u0120internationally": 19765, "\u0120WikiLeaks": 19766, "IED": 19767, "\u0120cyn": 19768, "\u0120chooses": 19769, "\u0120Pill": 19770, "\u0120combining": 19771, "\u0120radi": 19772, "\u0120Mohammed": 19773, "defense": 19774, "atching": 19775, "Subject": 19776, "iciency": 19777, "Frame": 19778, "\u0120{\"": 19779, "\u0120chess": 19780, "\u0120timer": 19781, "190": 19782, "\u0120tin": 19783, "\u0120ordinance": 19784, "emetery": 19785, "\u0120accusing": 19786, "\u0120noticeable": 19787, "\u0120centres": 19788, "\u0120lid": 19789, "\u0120Mills": 19790, "imgur": 19791, "\u0120zoom": 19792, "ergic": 19793, "\u0120compression": 19794, "prim": 19795, "find": 19796, "\u0120surg": 19797, "\u0120pand": 19798, "\u0120Kee": 19799, "\u0120Chad": 19800, "cellence": 19801, "oyle": 19802, "\u0120socialism": 19803, "\u0120Travis": 19804, "\u0120MHz": 19805, "\u0120guild": 19806, "ALLY": 19807, "\u0120Subscribe": 19808, "\u0120Related": 19809, "\u0120occurrence": 19810, "itching": 19811, "\u0120fictional": 19812, "\u0120crush": 19813, "\u0120EA": 19814, "cod": 19815, "mix": 19816, "\u0120Triple": 19817, "\u0120retrieve": 19818, "\u0120stimulus": 19819, "\u0120psychiat": 19820, "\u0120Door": 19821, "\u0120homosexuality": 19822, "\u0120elementary": 19823, "\u0120cellular": 19824, "idian": 19825, "\u0120Laun": 19826, "\u0120intriguing": 19827, "\u0120foam": 19828, "\u0120Bass": 19829, "idi": 19830, "itsu": 19831, "\u0120assure": 19832, "\u0120congrat": 19833, "\u0120businessman": 19834, "\u0120Boost": 19835, "close": 19836, "\u0120lied": 19837, "\u0120sciences": 19838, "\u0120Omega": 19839, "\u0120Graphics": 19840, "\u0120<=": 19841, "spoken": 19842, "\u0120connectivity": 19843, "Saturday": 19844, "\u0120Avengers": 19845, "\u0120toggle": 19846, "\u0120ankle": 19847, "\u0120nationalist": 19848, "model": 19849, "\u0120Pool": 19850, "ophobia": 19851, "Var": 19852, "\u0120Mons": 19853, "atories": 19854, "\u0120aggressively": 19855, "Clear": 19856, "Forge": 19857, "acters": 19858, "\u0120hedge": 19859, "\u0120pipes": 19860, "\u0120blunt": 19861, "\u0120sq": 19862, "\u0120remotely": 19863, "Wed": 19864, "asers": 19865, "\u0120refriger": 19866, "\u0120tiles": 19867, "\u0120rescued": 19868, "\u0120comprised": 19869, "insky": 19870, "\u0120manif": 19871, "avanaugh": 19872, "\u0120prolifer": 19873, "\u0120aligned": 19874, "xml": 19875, "\u0120triv": 19876, "\u0120coordination": 19877, "\u0120PER": 19878, "\u0120Quote": 19879, "134": 19880, "bf": 19881, "\u0120Saw": 19882, "\u0120termination": 19883, "\u0120190": 19884, "\u0120additions": 19885, "\u0120trio": 19886, "\u0120projections": 19887, "\u0120positively": 19888, "\u0120inclusive": 19889, "\u0120membr": 19890, "1990": 19891, "older": 19892, "\u0120practiced": 19893, "inkle": 19894, "Arch": 19895, "\u0120starters": 19896, "arius": 19897, "\u0120intermediate": 19898, "\u0120Benef": 19899, "\u0120Killer": 19900, "\u0120interventions": 19901, "\u0120Kil": 19902, "\u0120Flying": 19903, "Inv": 19904, "\u0120premature": 19905, "\u0120psychiatric": 19906, "\u0120indie": 19907, "\u0120collar": 19908, "\u0120Rainbow": 19909, "afi": 19910, "\u0120disruption": 19911, "\u0120FOX": 19912, "casting": 19913, "\u0120misdem": 19914, "cro": 19915, "\u0120wipe": 19916, "ardon": 19917, "\u0120bast": 19918, "\u0120Tommy": 19919, "\u0120Representative": 19920, "\u0120belly": 19921, "\u0120PO": 19922, "\u0120Breitbart": 19923, "132": 19924, "\u0120messaging": 19925, "Should": 19926, "References": 19927, "\u0120GRE": 19928, "istical": 19929, "LP": 19930, "\u0120Cav": 19931, "\u0120Crazy": 19932, "\u0120intuitive": 19933, "keeping": 19934, "\u0120Moss": 19935, "\u0120discontin": 19936, "\u0120Module": 19937, "\u0120unrelated": 19938, "\u0120Practice": 19939, "\u0120Transport": 19940, "\u0120statistically": 19941, "orns": 19942, "\u0120sized": 19943, "pu": 19944, "\u0120caf": 19945, "\u0120Worlds": 19946, "\u0120Rodgers": 19947, "\u0120Lun": 19948, "\u0120Comic": 19949, "living": 19950, "\u0120cared": 19951, "\u0120climbed": 19952, "){": 19953, "\u0120consisted": 19954, "\u0120medieval": 19955, "folk": 19956, "\u0120hacked": 19957, "\u0120dire": 19958, "\u0120Hermione": 19959, "\u0120tended": 19960, "ceans": 19961, "Daniel": 19962, "went": 19963, "\u0120legislators": 19964, "\u0120redes": 19965, "games": 19966, "\u0120gn": 19967, "amiliar": 19968, "\u0120++": 19969, "ggy": 19970, "threat": 19971, "\u0120magnet": 19972, "\u0120perceive": 19973, "\u0120zip": 19974, "\u0120indictment": 19975, "\u0120critique": 19976, "gard": 19977, "\u0120Safe": 19978, "\u0120Cream": 19979, "\u0120advent": 19980, "oba": 19981, "\u0120vowed": 19982, "ousands": 19983, "\u0120ski": 19984, "\u0120abortions": 19985, "uart": 19986, "\u0120stunned": 19987, "\u0120advancing": 19988, "\u0120lacked": 19989, "\u0120\\\"": 19990, "\u0120schizophren": 19991, "\u0120elegant": 19992, "\u0120conferences": 19993, "\u0120canceled": 19994, "\u0120Hudson": 19995, "\u0120Hopefully": 19996, "\u0120trump": 19997, "\u0120frequencies": 19998, "\u0120meteor": 19999, "\u0120Junior": 20000, "\u0120Fleet": 20001, "\u0120Malcolm": 20002, "\u0120Tools": 20003, "\u0120........": 20004, "\u0120hobby": 20005, "\u0120Europeans": 20006, "\u01201500": 20007, "\u0120Into": 20008, "\u0120sway": 20009, "\u0120Appro": 20010, "\u0120Compl": 20011, "Community": 20012, "\u0120tide": 20013, "\u0120Summit": 20014, "\u00e4\u00bb": 20015, "\u0120intervals": 20016, "\u0120Ether": 20017, "\u0120habitat": 20018, "\u0120Stevens": 20019, "lishing": 20020, "\u0120Domain": 20021, "\u0120triggers": 20022, "\u0120chasing": 20023, "\u0120charm": 20024, "\u0120Flower": 20025, "itored": 20026, "\u0120blessing": 20027, "\u0120textures": 20028, "Five": 20029, "\u0120liquor": 20030, "RP": 20031, "FIN": 20032, "\u01201962": 20033, "CAR": 20034, "Unknown": 20035, "\u0120resil": 20036, "\u0120Lily": 20037, "\u0120abundance": 20038, "\u0120predictable": 20039, "rar": 20040, "\u0120bullshit": 20041, "leen": 20042, "chet": 20043, "Mor": 20044, "Much": 20045, "\u00e4\u00b9": 20046, "\u0120emphasized": 20047, "\u0120crust": 20048, "\u0120primitive": 20049, "\u0120enjoyable": 20050, "\u0120Pictures": 20051, "\u0120teammate": 20052, "pler": 20053, "\u0120Tol": 20054, "\u0120Kane": 20055, "\u0120summoned": 20056, "thy": 20057, "rama": 20058, "\u0120Honda": 20059, "\u0120realizing": 20060, "\u0120quicker": 20061, "\u0120concentrate": 20062, "clear": 20063, "\u0120210": 20064, "\u0120Erdogan": 20065, "aris": 20066, "\u0120responds": 20067, "\u0120BI": 20068, "\u0120eligibility": 20069, "\u0120pushes": 20070, "\u0120Idaho": 20071, "\u0120aggrav": 20072, "\u0120ruins": 20073, "urations": 20074, "\u0120bans": 20075, "\u0120anat": 20076, "share": 20077, "\u0120grind": 20078, "hin": 20079, "umen": 20080, "\u0120utilities": 20081, "\u0120Yankees": 20082, "\u0120databases": 20083, "\u0120DD": 20084, "\u0120displaced": 20085, "\u0120dependencies": 20086, "\u0120stimulation": 20087, "hun": 20088, "houses": 20089, "\u0120Pretty": 20090, "\u0120Ravens": 20091, "\u0120TODAY": 20092, "\u0120associates": 20093, "\u0120therape": 20094, "cled": 20095, "\u0120deer": 20096, "\u0120repairs": 20097, "rentice": 20098, "\u0120receptors": 20099, "\u0120remed": 20100, "\u0120Ce": 20101, "\u0120marriages": 20102, "\u0120ballots": 20103, "\u0120Soldier": 20104, "\u0120hilarious": 20105, "opl": 20106, "138": 20107, "\u0120inherently": 20108, "\u0120ignorant": 20109, "\u0120bounce": 20110, "\u0120Easter": 20111, "RELATED": 20112, "\u0120Currency": 20113, "EV": 20114, "\u00e3\u0125\u0140": 20115, "\u0120Lead": 20116, "\u0120deceased": 20117, "Brien": 20118, "\u0120Musk": 20119, "JS": 20120, "\u0120merge": 20121, "hearted": 20122, "creat": 20123, "mitt": 20124, "mund": 20125, "\u0120\u00e2\u0122\u012d": 20126, "\u0120Bag": 20127, "\u0120projection": 20128, "\u0120java": 20129, "\u0120Standards": 20130, "\u0120Leonard": 20131, "\u0120coconut": 20132, "\u0120Population": 20133, "\u0120traject": 20134, "\u0120imply": 20135, "\u0120curiosity": 20136, "\u0120DB": 20137, "\u0120Fresh": 20138, "\u0120Por": 20139, "\u0120heavier": 20140, "neys": 20141, "gomery": 20142, "\u0120deserved": 20143, "\u0120phrases": 20144, "\u0120GC": 20145, "\u0120yeast": 20146, "desc": 20147, "Death": 20148, "\u0120reboot": 20149, "\u0120metadata": 20150, "ICAL": 20151, "\u0120repay": 20152, "\u0120Independence": 20153, "\u0120suburban": 20154, "icals": 20155, "\u0120atop": 20156, "\u0120allocation": 20157, "generation": 20158, "\u0120Gram": 20159, "\u0120moisture": 20160, "\u0120pine": 20161, "\u0120Liberals": 20162, "\u0120aides": 20163, "\u0120underest": 20164, "\u0120Berry": 20165, "\u0120ceremon": 20166, "370": 20167, "astrous": 20168, "\u0120Pirates": 20169, "\u0120tense": 20170, "\u0120Industries": 20171, "\u0120Appeals": 20172, "\u0120Near": 20173, "\u0120\u00e8\u00a3\u0131\u00e7": 20174, "\u0120lovers": 20175, "\u0120CAP": 20176, "\u0120Craw": 20177, "\u0120giants": 20178, "\u0120efficacy": 20179, "Element": 20180, "\u0120Behavior": 20181, "\u0120Toyota": 20182, "\u0120intest": 20183, "Priv": 20184, "AI": 20185, "\u0120maneuver": 20186, "\u0120perfection": 20187, "\u0120bang": 20188, "paper": 20189, "rill": 20190, "George": 20191, "border": 20192, "inters": 20193, "\u0120Seth": 20194, "\u0120clues": 20195, "\u0120Levi": 20196, "\u0120Revenue": 20197, "147": 20198, "\u0120vapor": 20199, "\u0120fortunate": 20200, "\u0120threatens": 20201, "\u0120vet": 20202, "\u0120dependency": 20203, "ersed": 20204, "article": 20205, "\u0120Blizzard": 20206, "\u0120chlor": 20207, "\u0120minus": 20208, "\u0120Bills": 20209, "\u0120cryptocurrency": 20210, "\u0120metabolism": 20211, "tering": 20212, "\u0120pestic": 20213, "steps": 20214, "\u0120Treasure": 20215, "racted": 20216, "\u0120Constant": 20217, "\u0120temp": 20218, "139": 20219, "\u0120Detective": 20220, "urally": 20221, "\u0120recovering": 20222, "\u0120cortex": 20223, "\u0120144": 20224, "closed": 20225, "\u0120prejudice": 20226, "aunted": 20227, "\u0120storms": 20228, "\u0120NOW": 20229, "\u0120machinery": 20230, "Address": 20231, "\u0120compelled": 20232, "270": 20233, "\u0120despair": 20234, "bane": 20235, "\u0120vegetable": 20236, "\u0120beds": 20237, "Learn": 20238, "\u0120colorful": 20239, "\u0120spike": 20240, "\u0120margins": 20241, "\u0120sympathy": 20242, "\u0120workshop": 20243, "\u0120CBC": 20244, "Sat": 20245, "\u0120burns": 20246, "\u0120Gender": 20247, "\u0120129": 20248, "\u0120Cable": 20249, "\u0120debts": 20250, "\u0120Theresa": 20251, "\u0120reflecting": 20252, "\u0120airst": 20253, "\u0120rim": 20254, "ramid": 20255, "\u0120weaknesses": 20256, "Writ": 20257, "oggle": 20258, "ti": 20259, "\u0120Charge": 20260, "\u0120weighed": 20261, "\u0120(.": 20262, "\u0120laughter": 20263, "\u0120router": 20264, "\u0120Democracy": 20265, "Dear": 20266, "\u0120hasht": 20267, "\u0120dy": 20268, "\u0120hints": 20269, "running": 20270, "\u0120finishes": 20271, "arus": 20272, "Mass": 20273, "result": 20274, "ascus": 20275, "\u0120vintage": 20276, "\u0120conqu": 20277, "\u0120wildly": 20278, "acist": 20279, "\u0120lingu": 20280, "\u0120protagonist": 20281, "strom": 20282, "teenth": 20283, "\u0120Solo": 20284, "mac": 20285, "filled": 20286, "\u0120renown": 20287, "itives": 20288, "\u0120motive": 20289, "\u0120Antar": 20290, "\u0120Mann": 20291, "\u0120Adjust": 20292, "\u0120rockets": 20293, "\u0120troubling": 20294, "ei": 20295, "\u0120organisms": 20296, "assis": 20297, "Christian": 20298, "\u0120145": 20299, "\u0120Hass": 20300, "\u0120swall": 20301, "\u0120wax": 20302, "\u0120Survival": 20303, "VS": 20304, "\u0120Murd": 20305, "vd": 20306, "standard": 20307, "\u0120dragons": 20308, "\u0120acceleration": 20309, "rational": 20310, "final": 20311, "\u0120paired": 20312, "\u0120Ethereum": 20313, "\u0120interfaces": 20314, "\u0120resent": 20315, "\u0120artifacts": 20316, "\u00c5\u00ab": 20317, "arel": 20318, "\u0120competitor": 20319, "\u0120Nicholas": 20320, "\u0120Surface": 20321, "cpp": 20322, "\u0120Tot": 20323, "\u0120economically": 20324, "\u0120organised": 20325, "\u0120enforced": 20326, "inho": 20327, "\u0120varieties": 20328, "\u0120abdom": 20329, "\u0120Bailey": 20330, "idav": 20331, "\u0120Salv": 20332, "paid": 20333, "\u0120altitude": 20334, "essert": 20335, "\u0120Gutenberg": 20336, "area": 20337, "opoulos": 20338, "\u0120professors": 20339, "iggs": 20340, "\u0120Fate": 20341, "hey": 20342, "\u01203000": 20343, "Dist": 20344, "\u0120twins": 20345, "cill": 20346, "\u0120Maps": 20347, "\u0120traps": 20348, "\u0120weed": 20349, "\u0120Kiss": 20350, "\u0120yoga": 20351, "\u0120recipients": 20352, "\u0120Westminster": 20353, "\u0120pools": 20354, "\u0120Walmart": 20355, "188": 20356, "\u0120Schools": 20357, "attack": 20358, "\u0120ARM": 20359, "paragraph": 20360, "Warning": 20361, "jl": 20362, "\u0120selfish": 20363, "anchez": 20364, "\u0120Heights": 20365, "Fre": 20366, "\u0120Soph": 20367, "\u0120--------------------------------": 20368, "tml": 20369, "333": 20370, "\u0120raids": 20371, "\u0120satellites": 20372, "KEY": 20373, "\u0120lasts": 20374, "\u00d1\u0124": 20375, "Ins": 20376, "\u0120Dame": 20377, "\u0120unpredict": 20378, "///": 20379, "ghai": 20380, "\u0120artillery": 20381, "\u0120cruise": 20382, "\u0120gel": 20383, "\u0120Cabinet": 20384, "\u0120blows": 20385, "\u0120Esp": 20386, "\u0120proximity": 20387, "othe": 20388, "\u0120Skills": 20389, "\u0120Upper": 20390, "obo": 20391, "\u0120NDP": 20392, "\u0120enjoys": 20393, "\u0120repeating": 20394, "\u0120Construction": 20395, "\u0120Questions": 20396, "Hillary": 20397, "\u0120uint": 20398, "\u0120processors": 20399, "\u0120Gibson": 20400, "\u0120Multiple": 20401, "qa": 20402, "\u0120Bom": 20403, "\u0120Miles": 20404, "ventional": 20405, "\u0120hurts": 20406, "skin": 20407, "\u0120AIDS": 20408, "\u0120advisers": 20409, "\u0120Root": 20410, "\u0120methodology": 20411, "\u0120Dale": 20412, "\u0120deton": 20413, "\u0120Knowledge": 20414, "sequently": 20415, "\u0120121": 20416, "\u0120connects": 20417, "Cy": 20418, "\u0120Danger": 20419, "\u0120contributors": 20420, "\u0120Bent": 20421, "\u0120brass": 20422, "\u0120Guns": 20423, "into": 20424, "\u0120Fortune": 20425, "\u0120broker": 20426, "balance": 20427, "\u0120lengths": 20428, "\u0120vic": 20429, "\u0120averaging": 20430, "\u0120appropriately": 20431, "\u0120Camera": 20432, "\u0120sandwich": 20433, "\u0120CDC": 20434, "\u0120coordinate": 20435, "\u0120navig": 20436, "\u0120goodness": 20437, "laim": 20438, "\u0120brake": 20439, "\u0120extremist": 20440, "\u0120Wake": 20441, "\u0120Mend": 20442, "\u0120Tiny": 20443, "\u0120COL": 20444, "\u0120RF": 20445, "\u0120Dual": 20446, "\u0120Wine": 20447, "Case": 20448, "\u0120refined": 20449, "\u0120lamp": 20450, "Lead": 20451, "\u0120bapt": 20452, "\u0120Carb": 20453, "\u0120Sadd": 20454, "\u0120Minneapolis": 20455, "PDF": 20456, "Early": 20457, "\u0120Hidden": 20458, "Its": 20459, "\u0120TIME": 20460, "\u0120pap": 20461, "\u0120commissioned": 20462, "\u0120Few": 20463, "\u0120Colts": 20464, "\u0120Bren": 20465, "\u0120bothered": 20466, "\u0120likewise": 20467, "Exper": 20468, "\u0120Schw": 20469, "cry": 20470, "nn": 20471, "\u0120Mitch": 20472, "imon": 20473, "MG": 20474, "bm": 20475, "UMP": 20476, "rays": 20477, "\u0120registry": 20478, "\u0120270": 20479, "achine": 20480, "rella": 20481, "anting": 20482, "00000": 20483, "\u0120ruined": 20484, "spot": 20485, "\u0120ta": 20486, "\u0120maximize": 20487, "\u0120inconven": 20488, "Dead": 20489, "Human": 20490, "Enabled": 20491, "\u0120Marie": 20492, "\u0120chill": 20493, "\u0120Paradise": 20494, "\u0120starring": 20495, "\u0120Latino": 20496, "\u0120Protocol": 20497, "\u0120EVER": 20498, "\u0120suppliers": 20499, "message": 20500, "\u0120Brock": 20501, "\u0120serum": 20502, "\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a": 20503, "\u0120encomp": 20504, "\u0120ambition": 20505, "uese": 20506, "\u0120arrows": 20507, "Andrew": 20508, "\u0120antenna": 20509, "\u01201961": 20510, "\u0120Bark": 20511, "\u0120bool": 20512, "\u00e3\u0124\u00aa": 20513, "\u0120Storage": 20514, "\u0120railway": 20515, "\u0120tougher": 20516, "\u0120Cad": 20517, "\u0120washing": 20518, "Py": 20519, "']": 20520, "embed": 20521, "\u0120Memphis": 20522, "ackle": 20523, "\u0120famously": 20524, "\u0120Fortunately": 20525, "ovies": 20526, "\u0120mindset": 20527, "\u0120sneak": 20528, "\u0120Dh": 20529, "RAW": 20530, "\u0120Simpson": 20531, "\u0120livest": 20532, "\u0120landmark": 20533, "\u0120cement": 20534, "Low": 20535, "\u0120thrilled": 20536, "\u0120Course": 20537, "inel": 20538, "\u0120chuck": 20539, "idate": 20540, "global": 20541, "\u0120whit": 20542, "\u0120\u00ef\u00bf\u00bd": 20543, "adays": 20544, "ski": 20545, "\u0120SV": 20546, "\u0120viruses": 20547, "306": 20548, "\u0120Respons": 20549, "\u0120theaters": 20550, "\u0120Branch": 20551, "\u0120Geneva": 20552, "\u0120MK": 20553, "\u0120unbeliev": 20554, "\u0120communist": 20555, "Original": 20556, "\u0120Received": 20557, "\u0120Transfer": 20558, "\u0120Arg": 20559, "Input": 20560, "\u0120Strategy": 20561, "\u0120palace": 20562, "thening": 20563, "Dri": 20564, "\u0120sentencing": 20565, "umbnail": 20566, "\u0120pins": 20567, "recy": 20568, "\u0120siblings": 20569, "Getting": 20570, "\u0120BU": 20571, "\u0120Northwest": 20572, "\u0120prolonged": 20573, "\u0120Sakura": 20574, "Comb": 20575, "\u0120Bour": 20576, "\u0120inadequate": 20577, "\u0120Kash": 20578, "\u0120username": 20579, "\u0120Improve": 20580, "\u0120battling": 20581, "\u0120MAC": 20582, "\u0120curriculum": 20583, "\u0120soda": 20584, "\u0120Cannon": 20585, "\u0120sensible": 20586, "spons": 20587, "December": 20588, "\u0120wicked": 20589, "\u0120Pengu": 20590, "\u0120dictators": 20591, "\u0120Hearts": 20592, "ogyn": 20593, "\u0120similarities": 20594, "\u0120Stats": 20595, "\u0120hollow": 20596, "itations": 20597, "\":[": 20598, "\u0120hover": 20599, "\u0120Listen": 20600, "sch": 20601, "Sund": 20602, "\u0120cad": 20603, "\u0120Parks": 20604, "\u0120lur": 20605, "\u0120hype": 20606, "\u0120Lem": 20607, "NAME": 20608, "isure": 20609, "Friday": 20610, "\u0120shoots": 20611, "\u0120closes": 20612, "\u0120db": 20613, "\u0120Ridge": 20614, "\u0120Different": 20615, "\u0120replies": 20616, "\u0120Broadway": 20617, "opers": 20618, "\u0120intoler": 20619, "\u0120Zeus": 20620, "akespe": 20621, "\u0120proprietary": 20622, "\u0120requesting": 20623, "\u0120controllers": 20624, "\u0120MIN": 20625, "imedia": 20626, "becca": 20627, "\u0120expans": 20628, "\u0120oils": 20629, "Bot": 20630, "\u0120Chand": 20631, "\u0120printer": 20632, "\u0120topped": 20633, "\u0120POL": 20634, "\u0120Earlier": 20635, "Social": 20636, "avin": 20637, "\u0120decreases": 20638, "\u0120Seb": 20639, "\u0120specifications": 20640, "\u0120Blast": 20641, "\u0120Kurt": 20642, "\u0120freel": 20643, "Brown": 20644, "\u0120dilig": 20645, "roe": 20646, "\u0120Problem": 20647, "\u0120Quad": 20648, "\u0120decentral": 20649, "\u0120Vector": 20650, "anut": 20651, "\u0120plugins": 20652, "\u0120Gregory": 20653, "\u0120fucked": 20654, "elines": 20655, "\u0120Ambassador": 20656, "take": 20657, "\u0120cleans": 20658, "ongyang": 20659, "Anonymous": 20660, "stro": 20661, "\"}": 20662, "aline": 20663, "\u0120Odd": 20664, "\u0120Eug": 20665, "216": 20666, "\u0120boil": 20667, "\u0120Powers": 20668, "\u0120nurses": 20669, "Obviously": 20670, "\u0120Technical": 20671, "\u0120exceeded": 20672, "ORS": 20673, "\u0120extremists": 20674, "\u0120traces": 20675, "expl": 20676, "\u0120comr": 20677, "\u0120Sach": 20678, ")/": 20679, "\u0120masks": 20680, "\u0120sci": 20681, "Bon": 20682, "\u0120regression": 20683, "wegian": 20684, "\u0120advisor": 20685, "itures": 20686, "\u0120Vo": 20687, "example": 20688, "\u0120Instruct": 20689, "\u0120siege": 20690, "\u0120reductions": 20691, "ptr": 20692, "\u0120statutory": 20693, "\u0120removes": 20694, "\u0120puck": 20695, "redits": 20696, "\u0120bee": 20697, "\u0120salad": 20698, "\u0120promotions": 20699, "\u0120Joshua": 20700, "withstanding": 20701, "ETH": 20702, "\u0120Cha": 20703, "imus": 20704, "\u0120expenditure": 20705, "aunting": 20706, "\u0120delighted": 20707, "\u0120155": 20708, "beh": 20709, "\u0120carpet": 20710, "\u0120Spart": 20711, "\u0120jungle": 20712, "lists": 20713, "\u0120bullying": 20714, "\u0120Nobel": 20715, "\u0120Glen": 20716, "\u0120referenced": 20717, "\u0120introduces": 20718, "sein": 20719, "\u0120chopped": 20720, "glass": 20721, "\u0120Wrest": 20722, "\u0120neutrality": 20723, "\u0120\u00e2\u013b": 20724, "\u0120investigator": 20725, "\u0120shelves": 20726, "\u0120unconstitutional": 20727, "\u0120reproduction": 20728, "\u0120merchant": 20729, "mia": 20730, "\u0120metrics": 20731, "\u0120explosives": 20732, "\u0120Sonia": 20733, "\u0120bodily": 20734, "\u0120thickness": 20735, "\u0120predominantly": 20736, "\u0120Ability": 20737, "\u0120monitored": 20738, "ICH": 20739, "\u0120].": 20740, "\u0120Martinez": 20741, "\u0120visibility": 20742, "\u0120queries": 20743, "\u0120genocide": 20744, "\u0120Warfare": 20745, "Query": 20746, "\u0120studios": 20747, "\u0120embry": 20748, "\u0120corridor": 20749, "\u0120cleaned": 20750, "complete": 20751, "\u0120MH": 20752, "\u0120enrollment": 20753, "INGS": 20754, "\u0120impacted": 20755, "\u0120disastrous": 20756, "\u0120Yun": 20757, "\u0120Claire": 20758, "\u0120Basically": 20759, "yt": 20760, "usterity": 20761, "\u0120indirectly": 20762, "wik": 20763, "\u0120dod": 20764, "\u0120Carr": 20765, "\u0120amp": 20766, "\u0120prohibit": 20767, "\u0120Initial": 20768, "\u0120Rd": 20769, "iji": 20770, "\u0120educate": 20771, "corn": 20772, "iott": 20773, "\u0120Beauty": 20774, "\u0120detective": 20775, "\u0120Conn": 20776, "since": 20777, "\u0120stagger": 20778, "\u0120obese": 20779, "\u0120bree": 20780, "ologic": 20781, "isse": 20782, "walker": 20783, "\u0120blades": 20784, "\u0120lawful": 20785, "func": 20786, "\u0120Behind": 20787, "\u0120appetite": 20788, "\u0120(*": 20789, "\u0120tennis": 20790, "\u0120offspring": 20791, "\u0120jets": 20792, "\u0120structured": 20793, "\u0120aforementioned": 20794, "Nov": 20795, "\u0120scaling": 20796, "fill": 20797, "\u0120stew": 20798, "\u0120curb": 20799, "\u0120Stephan": 20800, "edIn": 20801, "SF": 20802, "obic": 20803, "\u00e9\u0143\u0136": 20804, "oug": 20805, "\u0120MM": 20806, "\u0120genetically": 20807, "opez": 20808, "136": 20809, "\u0120umb": 20810, "ancers": 20811, "\u0120cohort": 20812, "\u0120merchandise": 20813, "\u0120imposing": 20814, "\u0120Legislature": 20815, "\u0120Archive": 20816, "ivia": 20817, "\u0120Naval": 20818, "\u0120offences": 20819, "\u0120miracle": 20820, "\u0120snapped": 20821, "\u0120foes": 20822, "\u0120extensively": 20823, "\u0120Raf": 20824, "\u0120cater": 20825, "edience": 20826, "Kit": 20827, "\u0120Bin": 20828, "\u0120recommends": 20829, "\u0120Cities": 20830, "\u0120rigid": 20831, "\u0120READ": 20832, "\u0120Noble": 20833, "\u0120Tian": 20834, "\u0120certificates": 20835, "antis": 20836, "oiler": 20837, "\u0120Buddhist": 20838, "did": 20839, "\u0120surveyed": 20840, "\u0120downward": 20841, "\u0120prints": 20842, "\u0120Motion": 20843, "ronics": 20844, "\u0120Sans": 20845, "ossibly": 20846, "uctions": 20847, "\u0120colonies": 20848, "\u0120Danish": 20849, "unit": 20850, "\u0120spoil": 20851, "\u0120advisory": 20852, "berries": 20853, "Plan": 20854, "\u0120specification": 20855, "ophers": 20856, "\u0120Resource": 20857, "\u0120shirts": 20858, "prisingly": 20859, "communications": 20860, "\u0120trivial": 20861, "\u0120mentioning": 20862, "isexual": 20863, "\u0120supplements": 20864, "\u0120supervision": 20865, "BP": 20866, "vor": 20867, "\u0120wit": 20868, "\u0120cooldown": 20869, "\u0120plaintiff": 20870, "\u0120Reviews": 20871, "\u0120Sri": 20872, "\u0120Mint": 20873, "\u0120Sugar": 20874, "\u0120afterward": 20875, "\u0120Priest": 20876, "\u0120Investment": 20877, "ogene": 20878, "\u0120Taking": 20879, "\u0120stretching": 20880, "\u0120inflammation": 20881, "\u0120Tehran": 20882, "\u0120lining": 20883, "\u0120freezing": 20884, "\u0120Entity": 20885, "\u0120inspiring": 20886, "special": 20887, "price": 20888, "\u0120sue": 20889, "\u0120Porter": 20890, "ounge": 20891, "ETA": 20892, "\u0120Derek": 20893, "\u0120Luis": 20894, "uo": 20895, "ymph": 20896, "\u0120exterior": 20897, "ihil": 20898, "\u0120Ashley": 20899, "inator": 20900, "\u0120nutrients": 20901, "\u0120Thrones": 20902, "\u0120finances": 20903, "\u0120Inspect": 20904, "\u0120specially": 20905, "\u0120Required": 20906, "\u0120PTS": 20907, "\u0120Violence": 20908, "ointed": 20909, "shots": 20910, "\u0120excerpt": 20911, "coon": 20912, "INS": 20913, "\u0120Gri": 20914, "\u0120recognised": 20915, "Week": 20916, "Young": 20917, "\u0120vom": 20918, "isle": 20919, "\u0120Curry": 20920, "\u0120Buddh": 20921, "\u0120notebook": 20922, "\u0120durable": 20923, "/?": 20924, "\u0120Gad": 20925, "\u0120Pupp": 20926, "\u0120forgive": 20927, "park": 20928, "\u0120personalities": 20929, "analysis": 20930, "clamation": 20931, "\u0120elevator": 20932, "\u0120warehouse": 20933, "\u0120Role": 20934, "unn": 20935, "\u0120illustration": 20936, "\u0120Scan": 20937, "\u0120atmospheric": 20938, "Import": 20939, "ANC": 20940, "ricted": 20941, "fu": 20942, "010": 20943, "\u0120arche": 20944, "\u0120rewarded": 20945, "akespeare": 20946, "\u0120internally": 20947, "\u0120RBI": 20948, "alker": 20949, "\u0120elephant": 20950, "owitz": 20951, "\u0120Pizza": 20952, "\u0120bipartisan": 20953, "\u00c3\u00a9s": 20954, "\u0120slowed": 20955, "\u0120Stark": 20956, "\u0120override": 20957, "OUS": 20958, "\u0120320": 20959, "undreds": 20960, "\u0120Deck": 20961, "\u0120Census": 20962, "bee": 20963, "146": 20964, "otor": 20965, "\u0120ip": 20966, "\u0120ub": 20967, "ocations": 20968, "\u0120Button": 20969, "rice": 20970, "\u0120cripp": 20971, "fff": 20972, "\u0120originated": 20973, "\u0120overwhelmed": 20974, "appa": 20975, "\u0120foremost": 20976, "\u00e2\u0122\u0133": 20977, "\u0120LEG": 20978, "release": 20979, "eatured": 20980, "atches": 20981, "\u0120reps": 20982, "\u0120lending": 20983, "\u0120Reference": 20984, "\u0120Client": 20985, "165": 20986, "venth": 20987, "Complete": 20988, "\u0120Patrol": 20989, "\u0120sworn": 20990, "cam": 20991, "\u0120shuttle": 20992, "\u0120Ralph": 20993, "\u0120hometown": 20994, "-,": 20995, "onal": 20996, "\u0120BP": 20997, "\u00e5\u0131": 20998, "\u0120persuade": 20999, "\u0120Alexand": 21000, "\u0120combines": 21001, "\u0120vivid": 21002, "\u0120Lag": 21003, "\u0120encoding": 21004, "\u0120salvation": 21005, "wen": 21006, "\u0120Recovery": 21007, "iya": 21008, "University": 21009, "\u0120Biden": 21010, "\u0120budgets": 21011, "\u0120Texans": 21012, "fits": 21013, "\u0120honored": 21014, "\u0120python": 21015, "TD": 21016, "###": 21017, "clone": 21018, "\u0120blink": 21019, "\u0120Liquid": 21020, "\u0120unemployed": 21021, "\u0120clashes": 21022, "\u0120Counsel": 21023, "\u0120directing": 21024, "\u0120punct": 21025, "\u0120Falcons": 21026, "\u0120shark": 21027, "\u0120Damascus": 21028, "\u0120jeans": 21029, "\u0120embark": 21030, "\u0120seize": 21031, "\u0120upwards": 21032, "280": 21033, "\u0120Ez": 21034, "\u0120Anything": 21035, "\u0120exotic": 21036, "lower": 21037, "\u0120Creator": 21038, "\u0120Um": 21039, "\u0120suburbs": 21040, "berger": 21041, "\u0120Wend": 21042, "\u0120mint": 21043, "\u0120XX": 21044, "\u0120Dro": 21045, "\u0120suffers": 21046, "\u0120herb": 21047, "tree": 21048, "\u0120fragile": 21049, "\u0120flooded": 21050, "\u0120Alcohol": 21051, "olean": 21052, "nyder": 21053, "\u0120KO": 21054, "Fram": 21055, "\u0120136": 21056, "\u0120owed": 21057, "\u0120Melee": 21058, "\u0120Hash": 21059, "\u0120whisk": 21060, "\u0120sudo": 21061, "rr": 21062, "Quick": 21063, "appro": 21064, "\u0120ii": 21065, "\u0120Examples": 21066, "hee": 21067, "\u0120promotes": 21068, "perature": 21069, "kar": 21070, "\u0120Honor": 21071, "\u0120sodium": 21072, "\u0120Lif": 21073, "rosso": 21074, "intendent": 21075, "\u0120correspondent": 21076, "Found": 21077, "secret": 21078, "\u0120identifies": 21079, "agne": 21080, "\u0120lou": 21081, "\u0120PP": 21082, "\u0120coincidence": 21083, "move": 21084, "\u0120militia": 21085, "\u0120infiltr": 21086, "\u0120Primary": 21087, "\u0120pitching": 21088, "\u0120Ib": 21089, "\u0120GOOD": 21090, "\u00e3\u0124\u00b8": 21091, "\u0120Wizards": 21092, "iral": 21093, "\u0120Venus": 21094, "RR": 21095, "\u0120\u00e2\u0122\u0137": 21096, "\u0120Casey": 21097, "\u0120sadly": 21098, "\u0120admire": 21099, "\u0120embarrassed": 21100, "cb": 21101, "Mel": 21102, "\u0120tubes": 21103, "\u0120beautifully": 21104, "\u0120Queensland": 21105, "Below": 21106, "rez": 21107, "quet": 21108, "pleasant": 21109, "\u0120\u00c2\u00ab": 21110, "Camp": 21111, "\u0120decisive": 21112, "1998": 21113, "\u0120Lamb": 21114, "utton": 21115, "hn": 21116, "\u0120Jagu": 21117, "aunder": 21118, "\u0120Cord": 21119, "\u0120clerk": 21120, "\u0120caffe": 21121, "\u0120wiped": 21122, "\u0120reim": 21123, "\u0120Mountains": 21124, "\u0120imprisoned": 21125, "\u0120develops": 21126, "\u0120Pra": 21127, "\u0120modeling": 21128, "Anyone": 21129, "ancel": 21130, "\u0120Sit": 21131, "\u0120shields": 21132, "\u0120lawn": 21133, "\u0120cardiovascular": 21134, "\u0120demonstrating": 21135, "\u0120parse": 21136, "\u0120Israelis": 21137, "\u0120euros": 21138, "143": 21139, "\u0120glorious": 21140, "inski": 21141, "ecd": 21142, "\u0120conditioning": 21143, "\u0120helpless": 21144, "\u0120microsc": 21145, "\u0120Harbor": 21146, "\u0120stakes": 21147, "\u0120260": 21148, "\u0120unequ": 21149, "\u0120Floyd": 21150, "\u0120damp": 21151, "\u0120apparatus": 21152, "\u0120Laws": 21153, "\u0120counters": 21154, "\u0120induce": 21155, "atable": 21156, "\u0120Ahmed": 21157, "\u0120slam": 21158, "November": 21159, "\u0120persist": 21160, "\u0120imminent": 21161, "\u00c3\u00a1n": 21162, "\u0120shred": 21163, "\u0120phases": 21164, "\u0120Edmonton": 21165, "\u0120Armstrong": 21166, "\u0120Meet": 21167, "\u0120Kitty": 21168, "\u00d1\u0122": 21169, "circ": 21170, "\u0120Adult": 21171, "\u0120arose": 21172, "\u0120Xen": 21173, "Dan": 21174, "gow": 21175, "\u0120superf": 21176, "\u0120Admir": 21177, "\u0120endure": 21178, "\u0120keyword": 21179, "yrus": 21180, "\u0120yarn": 21181, "\u0120pathway": 21182, "\u0120Hopkins": 21183, "midt": 21184, "\u0120censorship": 21185, "dependent": 21186, "\u0120instructor": 21187, "Sources": 21188, "\u0120toe": 21189, "\u0120balloon": 21190, "Nob": 21191, "\u0120swear": 21192, "\u0120Castro": 21193, "\u0120gloss": 21194, "\u0120Kavanaugh": 21195, "\u0120remarkably": 21196, "Photos": 21197, "\u0120Nom": 21198, "\u0120Southeast": 21199, "yers": 21200, "\u0120validation": 21201, "\u0120cannon": 21202, "\u0120Victory": 21203, "\u0120Pierre": 21204, "\u0120cautious": 21205, "Audio": 21206, "\u0120fetch": 21207, "\u0120Gift": 21208, "\u0120Hyp": 21209, "\u0120remedy": 21210, "ZE": 21211, "\u0120scent": 21212, "\u0120beard": 21213, "\u0120Rut": 21214, "-\"": 21215, "\u0120patents": 21216, "Hy": 21217, "\u0120unjust": 21218, "\u0120potato": 21219, "\u0120forthcoming": 21220, "\u0120chef": 21221, "\u0120Rift": 21222, "affe": 21223, "\u0120ROM": 21224, "\u0120Launch": 21225, "\u0120pads": 21226, "\u0120Neo": 21227, "\u0120onset": 21228, "\u0120squeeze": 21229, "safe": 21230, "\u0120prefix": 21231, "\u0120TM": 21232, "\u0120Nearly": 21233, "\u0120Clinical": 21234, "\u0120Mental": 21235, "otiation": 21236, "\u0120Unic": 21237, "antry": 21238, "\u0120Cir": 21239, "\u0120epit": 21240, "\u00c3\u00a6": 21241, "\u0120extracted": 21242, "versely": 21243, "riad": 21244, "\u0120strains": 21245, "\u0120tops": 21246, "\u0120poem": 21247, "\u0120Randy": 21248, "\u0120Maple": 21249, "THER": 21250, "upiter": 21251, "\u0120SSD": 21252, "\u013c\u00e9": 21253, "\u0120uncon": 21254, "pering": 21255, "\u0120slept": 21256, "iners": 21257, "\u0120underwater": 21258, "\u0120Evidence": 21259, "gone": 21260, "205": 21261, "\u0120historians": 21262, "\u0120synthesis": 21263, "\u0120frog": 21264, "basketball": 21265, "\u0120vibrant": 21266, "\u0120subord": 21267, "\u0120365": 21268, "\u0120Dial": 21269, "\u0120cooperate": 21270, "HAHA": 21271, "\u0120greeted": 21272, "158": 21273, "\u0120jazz": 21274, "\u0120intox": 21275, "\u0120Walking": 21276, "\u0120supervisor": 21277, "\u0120Fusion": 21278, "\u0120Mercedes": 21279, "send": 21280, "Ham": 21281, "sd": 21282, "nl": 21283, "\u0120tours": 21284, "\u0120FIFA": 21285, "\u0120culp": 21286, "gd": 21287, "304": 21288, "\u0120pleas": 21289, "\u0120illustrates": 21290, "\u0120Colombia": 21291, "\u0120highlighting": 21292, "\u0120Summary": 21293, "\u0120exposing": 21294, "\u0120Dru": 21295, "\u0120irony": 21296, "ritional": 21297, "\u0120Carroll": 21298, "\u0120Ellis": 21299, "Pict": 21300, "\u0120Rapt": 21301, "\u0120adapter": 21302, "\u0120unm": 21303, "\u0120corpse": 21304, "\u0120celebrities": 21305, "Den": 21306, "atum": 21307, "\u0120Apocalypse": 21308, "\u0120Wag": 21309, "lining": 21310, "\u0120hormones": 21311, "Rub": 21312, "\u0120Xi": 21313, "\u0120Vaults": 21314, "208": 21315, "alkyrie": 21316, "inosaur": 21317, "\u0120feeds": 21318, "vity": 21319, "\u0120defeating": 21320, "Wait": 21321, "\u0120emphasize": 21322, "\u0120Steelers": 21323, "yrinth": 21324, "leys": 21325, "\u0120Whenever": 21326, "Currently": 21327, "\u0120Clock": 21328, "\u0120collectively": 21329, "anyon": 21330, "\u0120JP": 21331, "\u0120mentality": 21332, "\u0120downloads": 21333, "\u0120surroundings": 21334, "\u0120Barnes": 21335, "\u0120flagship": 21336, "\u0120indicators": 21337, "\u0120grapp": 21338, "January": 21339, "\u0120Elemental": 21340, "\u0120Athena": 21341, "ibal": 21342, "\u0120sights": 21343, "\u0120capita": 21344, "\u0120Treaty": 21345, "\u0120voiced": 21346, "\u0120Gaz": 21347, "lette": 21348, "\u0120ya": 21349, "\u0120expired": 21350, "Legend": 21351, "Hot": 21352, "nature": 21353, "\u0120unstable": 21354, "\u0120280": 21355, "\u00c3\u00ba": 21356, "Comment": 21357, "ALE": 21358, "\u0120quests": 21359, "\u0120handler": 21360, "nis": 21361, "\u0120versatile": 21362, "\u0120conceal": 21363, "engeance": 21364, "\u0120Interactive": 21365, "\u0120obsessed": 21366, "\u0120Dogs": 21367, "\u0120cracked": 21368, "Sound": 21369, "sv": 21370, "\u0120Dylan": 21371, "roads": 21372, "fx": 21373, "\u0120Catholics": 21374, "\u0120Hag": 21375, "\u0120slammed": 21376, "\u0120glowing": 21377, "sale": 21378, "\u0120tissues": 21379, "\u0120Chi": 21380, "nee": 21381, "\u0120cher": 21382, "sic": 21383, "urrection": 21384, "\u0120bacon": 21385, "ulatory": 21386, ").\"": 21387, "\u0120irregular": 21388, "FORM": 21389, "assed": 21390, "\u0120intentional": 21391, "\u0120compensate": 21392, "\u0120Speaking": 21393, "\u0120Sets": 21394, "153": 21395, "\u0120conventions": 21396, "bands": 21397, "emade": 21398, "\u0120ecc": 21399, "\u0120Winston": 21400, "\u0120Assassin": 21401, "\u0120Belgian": 21402, "\u0120dependence": 21403, "\u0120niche": 21404, "\u0120bark": 21405, "\u0120Jazz": 21406, "\u0120disadvantage": 21407, "\u0120gasoline": 21408, "\u0120165": 21409, "\u00e7\u013c\u0126": 21410, "essa": 21411, "module": 21412, "angular": 21413, "OY": 21414, "\u0120Treatment": 21415, "itas": 21416, "olation": 21417, "\u0120Arnold": 21418, "\u0120feud": 21419, "\u0120Nest": 21420, "\u0120theatre": 21421, "ewater": 21422, "\u0120minors": 21423, "olicy": 21424, "\u0120Haven": 21425, "division": 21426, "\u0120trunk": 21427, "Far": 21428, "\u0120Pull": 21429, "\u0120capturing": 21430, "\u01201800": 21431, "\u0120Teen": 21432, "\u0120exempl": 21433, "\u0120clinics": 21434, "\u0120Burg": 21435, "\u0120substit": 21436, "\u0120payload": 21437, "\u0120Lav": 21438, "\u0120Troy": 21439, "\u0120Witness": 21440, "\u0120fragments": 21441, "\u0120passwords": 21442, "\u0120gospel": 21443, "\u0120Gin": 21444, "\u0120tenants": 21445, "olith": 21446, "Six": 21447, "Previous": 21448, "\u0120Ages": 21449, "\u0120Darwin": 21450, "\u0120blat": 21451, "\u0120empathy": 21452, "smith": 21453, "bag": 21454, "\u0120Echo": 21455, "\u0120Camb": 21456, "\u0120Madd": 21457, "\u0120Boo": 21458, "\u0120rede": 21459, "\u0120Burning": 21460, "\u0120smoothly": 21461, "\u0120Adrian": 21462, "\u0120Vampire": 21463, "\u0120Monsters": 21464, "steam": 21465, "Style": 21466, "Ma": 21467, "rea": 21468, "\u0120Dwar": 21469, "alyst": 21470, "ursor": 21471, "\u0120elimination": 21472, "\u0120crypto": 21473, "cht": 21474, "\u0120Eternal": 21475, "\u00e2\u0122\u00a6]": 21476, "\u0120Sorce": 21477, "Ill": 21478, "NER": 21479, "\u0120uh": 21480, "Conclusion": 21481, "wage": 21482, "\u0120respir": 21483, "\u0120reminis": 21484, "hetical": 21485, "\u0120gy": 21486, "\u0120utilized": 21487, "icidal": 21488, "\u01201900": 21489, "\u0120hunters": 21490, "\u0120Swan": 21491, "\u0120React": 21492, "\u0120visitor": 21493, "\u0120Thanksgiving": 21494, "308": 21495, "Posts": 21496, "\u0120hips": 21497, "1997": 21498, "omers": 21499, "\u0120knocking": 21500, "\u0120Vehicle": 21501, "\u0120til": 21502, "\u0120138": 21503, "\u0120mi": 21504, "\u0120Investigation": 21505, "\u0120Kenya": 21506, "\u0120casino": 21507, "\u0120motives": 21508, "\u0120regain": 21509, "rex": 21510, "\u0120weekends": 21511, "\u0120stabbed": 21512, "boro": 21513, "\u0120exploited": 21514, "\u0120HAVE": 21515, "\u0120Television": 21516, "cock": 21517, "\u0120preparations": 21518, "\u0120endeav": 21519, "\u0120Remote": 21520, "\u0120Maker": 21521, "\u0120Produ": 21522, "\u0120Evan": 21523, "\u0120informational": 21524, "\u0120Louisville": 21525, "154": 21526, "\u0120Dreams": 21527, "\u0120plots": 21528, "\u0120Runner": 21529, "\u0120hurting": 21530, "\u0120academy": 21531, "\u0120Montgomery": 21532, "nm": 21533, "\u0120Lanc": 21534, "\u0120Alz": 21535, "210": 21536, "elong": 21537, "\u0120retailer": 21538, "\u0120arising": 21539, "\u0120rebellion": 21540, "\u0120blonde": 21541, "played": 21542, "\u0120instrumental": 21543, "Cross": 21544, "\u0120retention": 21545, "\u0120therapeutic": 21546, "\u0120seas": 21547, "\u0120infantry": 21548, "\u0120Clint": 21549, "\u0120prompting": 21550, "\u0120bitch": 21551, "\u0120stems": 21552, "\u0120Kra": 21553, "\u0120thesis": 21554, "\u0120Bog": 21555, "rued": 21556, "\u0120kings": 21557, "\u0120clay": 21558, "ificent": 21559, "\u0120YES": 21560, "\u0120Thing": 21561, "\u0120Cubs": 21562, "veyard": 21563, "elsh": 21564, "inarily": 21565, "\u0120Ey": 21566, "\u0120Rolling": 21567, "\u0120evolving": 21568, "India": 21569, "\u0120recognizes": 21570, "\u0120graduation": 21571, "isers": 21572, "\u0120fertility": 21573, "\u0120Milan": 21574, "Command": 21575, "\u0120boxing": 21576, "\u01201943": 21577, "\u0120gluten": 21578, "\u0120Emir": 21579, "\u0120idol": 21580, "\u0120conceived": 21581, "\u0120Creation": 21582, "Merit": 21583, "uddy": 21584, "ussions": 21585, "\u0120Lieutenant": 21586, "ietal": 21587, "\u0120unchanged": 21588, "\u0120Scale": 21589, "\u0120Crimea": 21590, "balls": 21591, "atorial": 21592, "\u0120depths": 21593, "\u0120empirical": 21594, "\u0120transm": 21595, "\u0120unsafe": 21596, "missible": 21597, "comfort": 21598, "156": 21599, "\u0120mechanic": 21600, "002": 21601, "lins": 21602, "\u0120smoked": 21603, "Pos": 21604, "\u0120slowing": 21605, "\u0120lav": 21606, "Texas": 21607, "\u0120cheating": 21608, "\u0120Metropolitan": 21609, "ethyl": 21610, "\u0120discovering": 21611, "asse": 21612, "\u0120pencil": 21613, "\u0120Pyongyang": 21614, "\u0120closet": 21615, "\u0120Sheet": 21616, "\u0120Entry": 21617, "oustic": 21618, "\u0120myst": 21619, "erate": 21620, "ariat": 21621, "\u0120minerals": 21622, "\u0120musician": 21623, "\u0120Pul": 21624, "\u0120Maz": 21625, "249": 21626, "\u0120permissions": 21627, "\u0120iv": 21628, "enary": 21629, "ickers": 21630, "\u0120Bing": 21631, "hea": 21632, "enable": 21633, "\u0120griev": 21634, "\u0120asserted": 21635, "\u0120Colonel": 21636, "\u0120affidav": 21637, "wo": 21638, "\u0120seated": 21639, "\u0120Ride": 21640, "\u0120paintings": 21641, "\u0120Pix": 21642, "\u0120137": 21643, "ishi": 21644, "umbai": 21645, "gotten": 21646, "\u0120Earl": 21647, "\u0120inning": 21648, "\u0120census": 21649, "\u0120travelled": 21650, "\u0120Consult": 21651, "185": 21652, "bind": 21653, "\u0120simplicity": 21654, "\u0120overlooked": 21655, "\u0120Helpful": 21656, "\u0120monkey": 21657, "\u0120overwhelmingly": 21658, "Blood": 21659, "\u0120Flint": 21660, "\u0120Jama": 21661, "\u0120Present": 21662, "\u0120Rage": 21663, "\u0120TA": 21664, "ptive": 21665, "\u0120turnout": 21666, "wald": 21667, "\u0120Dolphins": 21668, "\u0120VPN": 21669, "\u0120onion": 21670, "\u0120crafting": 21671, "mma": 21672, "\u0120Mercury": 21673, "\u0120arrange": 21674, "\u0120alerts": 21675, "\u0120OT": 21676, "zbollah": 21677, "\u0120gases": 21678, "\u0120Richardson": 21679, "sal": 21680, "lar": 21681, "\u0120frost": 21682, "\u0120lowering": 21683, "\u0120acclaim": 21684, "\u0120startups": 21685, "\u0120Gain": 21686, "essment": 21687, "\u0120guardian": 21688, "\u00e4\u00ba\u00ba": 21689, "\u0120Pie": 21690, "\u0120Links": 21691, "\u0120merits": 21692, "\u0120awake": 21693, "\u0120parental": 21694, "\u0120exceeds": 21695, "\u0120idle": 21696, "\u0120Pilot": 21697, "\u0120eBay": 21698, "\u0120Accept": 21699, "ipeg": 21700, "Cam": 21701, "\u0120Kot": 21702, "\u0120traders": 21703, "olitics": 21704, "unker": 21705, "\u0120Pale": 21706, "osi": 21707, "anmar": 21708, "\u01201947": 21709, "\u0120Fell": 21710, "estial": 21711, "itating": 21712, "GF": 21713, "\u0120Sr": 21714, "ifted": 21715, "\u0120connector": 21716, "\u0120Bone": 21717, "illes": 21718, "260": 21719, "hma": 21720, "\u0120overlap": 21721, "\u0120GitHub": 21722, "\u0120cleaner": 21723, "\u0120Baptist": 21724, "\u0120WAS": 21725, "\u0120lungs": 21726, "\u00d1\u0123": 21727, "\u0120BUT": 21728, "\u0120cite": 21729, "\u0120pitched": 21730, "reatment": 21731, "\u0120trophies": 21732, "\u0120Nu": 21733, "386": 21734, "\u0120Pride": 21735, "\u0120attendees": 21736, "[]": 21737, "179": 21738, "\u0120spatial": 21739, "\u0120prizes": 21740, "\u0120Religion": 21741, "\u0120showcase": 21742, "\u0120Category": 21743, "vidia": 21744, "Target": 21745, "Property": 21746, "?,": 21747, "\u0120fusion": 21748, "pie": 21749, "\u0120UCLA": 21750, "\u0120soundtrack": 21751, "\u0120princess": 21752, "\u0120Caval": 21753, "should": 21754, "\u0120limbs": 21755, "Background": 21756, "\u0120lonely": 21757, "\u0120cores": 21758, "\u0120Tail": 21759, "sheet": 21760, "\u0120132": 21761, "Ra": 21762, "\u00e3\u0124\u00ab": 21763, "\u0120Bolt": 21764, "\u0120booked": 21765, "\u0120administer": 21766, "\u0120equals": 21767, "wy": 21768, "\u0120observing": 21769, "\u0120Baron": 21770, "\u0120Adobe": 21771, "\u0120virgin": 21772, "\u0120Socialist": 21773, "Move": 21774, "ghazi": 21775, "\u0120Linda": 21776, "212": 21777, "\u0120brewing": 21778, "\u0120merchants": 21779, "burse": 21780, "\u0120divor": 21781, "\u0120metals": 21782, "\u0120Ner": 21783, "\u0120sums": 21784, "\u0120Enemy": 21785, "\u0120envision": 21786, "\u0120granting": 21787, "\u0120Honey": 21788, "\u0120Skyrim": 21789, "\u0120socio": 21790, "graded": 21791, "\u0120selective": 21792, "WASHINGTON": 21793, "\u01201948": 21794, "\u0120Sirius": 21795, "\u0120Gross": 21796, "activity": 21797, "\u0120Ivan": 21798, "\u0120furious": 21799, "BSD": 21800, "\u0120Previous": 21801, "\u0120responsive": 21802, "\u0120charitable": 21803, "\u0120leaning": 21804, "\u0120Pew": 21805, "\u0120violates": 21806, "\\\\\\\\\\\\\\\\": 21807, "\u0120Coming": 21808, "wire": 21809, "\u0120poet": 21810, "\u0120resolutions": 21811, "command": 21812, "\u0120Portuguese": 21813, "\u0120nickname": 21814, "\u0120deaf": 21815, "February": 21816, "\u0120recognise": 21817, "\u0120entirety": 21818, "\u0120seasonal": 21819, "placed": 21820, "\u0120Telegraph": 21821, "\u0120microphone": 21822, "ouring": 21823, "\u0120grains": 21824, "\u0120governed": 21825, "\u0120postp": 21826, "\u0120Waters": 21827, "inement": 21828, "\u0120undocumented": 21829, "\u0120Comcast": 21830, "\u0120fox": 21831, "\u0120assaults": 21832, "reon": 21833, "many": 21834, "\u0120Jenkins": 21835, "\u0120Anyway": 21836, "\u0120assessments": 21837, "\u0120downs": 21838, "\u0120Mouse": 21839, "\u0120superb": 21840, "kt": 21841, "\u0120Dow": 21842, "\u0120taxation": 21843, "401": 21844, "\u0120smiles": 21845, "\u0120undertaken": 21846, "\u0120exh": 21847, "\u0120enthusiastic": 21848, "\u0120twent": 21849, "\u0120governmental": 21850, "\u0120autonomy": 21851, "\u0120Technologies": 21852, "\u0120Chain": 21853, "\u0120prevalent": 21854, "fb": 21855, "\u0120nicotine": 21856, "ogram": 21857, "job": 21858, "\u0120awaiting": 21859, "\u0120Menu": 21860, "\u0120deputies": 21861, "kov": 21862, "ishops": 21863, "Button": 21864, "\u0120Shanghai": 21865, "\u0120diesel": 21866, "\u0120Duck": 21867, "Ryan": 21868, "\u0120PCs": 21869, "NF": 21870, "jury": 21871, "ente": 21872, "\u0120inaccurate": 21873, "eddy": 21874, "Whatever": 21875, "\u0120showc": 21876, "\u0120Nad": 21877, "odus": 21878, "etr": 21879, "\u0120plaintiffs": 21880, "\u0120WOR": 21881, "\u0120Assange": 21882, "\u0120privat": 21883, "\u0120premiums": 21884, "\u0120tam": 21885, "URL": 21886, "\u0120elites": 21887, "\u0120Ranger": 21888, "ottenham": 21889, "\u0120Hoff": 21890, "\u0120Athens": 21891, "\u0120definite": 21892, "\u0120sighed": 21893, "\u0120evenly": 21894, "211": 21895, "\u0120Amber": 21896, "akia": 21897, "\u0120mailing": 21898, "\u0120crashing": 21899, "\u0120Confederate": 21900, "rugged": 21901, "Wal": 21902, "\u0120Depths": 21903, "\u0120juvenile": 21904, "\u0120reactor": 21905, "Introduction": 21906, "\u0120Deluxe": 21907, "1995": 21908, "\u0120Sanchez": 21909, "\u0120Mead": 21910, "ivable": 21911, ":-": 21912, "\u0120Planning": 21913, "\u0120Trap": 21914, "quin": 21915, "\u0120Protect": 21916, "vered": 21917, "Information": 21918, "\u0120kidney": 21919, "innamon": 21920, "las": 21921, "\u0120policing": 21922, "\u0120tolerate": 21923, "\u0120Qi": 21924, "\u0120biased": 21925, "Fort": 21926, "\u0120Ki": 21927, "save": 21928, "\u0120privileged": 21929, "\u0120beasts": 21930, "\u0120Glas": 21931, "\u0120Cinem": 21932, "\u0120comeback": 21933, "Sunday": 21934, "\u0120extinction": 21935, "hops": 21936, "\u0120transmit": 21937, "\u0120doubles": 21938, "\u0120Flat": 21939, "167": 21940, "\u0120disputed": 21941, "\u0120injustice": 21942, "foo": 21943, "Vict": 21944, "roleum": 21945, "\u0120Julie": 21946, "Context": 21947, "\u0120Rarity": 21948, "issue": 21949, "Component": 21950, "\u0120counseling": 21951, "anne": 21952, "dark": 21953, "\u0120objections": 21954, "uilt": 21955, "\u0120gast": 21956, "\u0120plac": 21957, "\u0120unused": 21958, "\u00e3\u0125\u0129": 21959, "\u0120Trial": 21960, "\u0120Jas": 21961, "hedral": 21962, "obb": 21963, "\u0120temporal": 21964, "\u0120PRO": 21965, "\u0120NW": 21966, "\u0120Anniversary": 21967, "Large": 21968, "\u0120therm": 21969, "\u0120david": 21970, "\u0120systemic": 21971, "\u0120Shir": 21972, "mut": 21973, "\u0120Nept": 21974, "address": 21975, "\u0120scanning": 21976, "\u0120understandable": 21977, "\u0120canvas": 21978, "Cat": 21979, "\u0120Zoo": 21980, "\u0120angels": 21981, "LO": 21982, "\u0120Statement": 21983, "\u0120Sig": 21984, "ovable": 21985, "\u0120Away": 21986, "sharing": 21987, "ocrats": 21988, "stated": 21989, "\u0120weighing": 21990, "Nor": 21991, "wild": 21992, "Bey": 21993, "\u0120astonishing": 21994, "\u0120Reynolds": 21995, "\u0120opener": 21996, "\u0120trainer": 21997, "\u0120surgical": 21998, "pn": 21999, "\u0120adjusting": 22000, "wheel": 22001, "\u0120frown": 22002, "ervative": 22003, "\u0120suspend": 22004, "Within": 22005, "tein": 22006, "\u0120obstacle": 22007, "\u0120liberties": 22008, "ymes": 22009, "\u0120uranium": 22010, "ansom": 22011, "anol": 22012, "uba": 22013, "\u0120Loss": 22014, "\u0120arous": 22015, "\u0120Henderson": 22016, "Wow": 22017, "spl": 22018, "cur": 22019, "\u0120\u00c2\u0143": 22020, "\u0120theirs": 22021, "Damage": 22022, "\u0120downloading": 22023, "\u0120discern": 22024, "\u0120Sto": 22025, "\u0120Fla": 22026, "\u0120hath": 22027, "\u0120Aj": 22028, "\u0120unpleasant": 22029, "European": 22030, "expensive": 22031, "\u0120screenshot": 22032, "\u0120UV": 22033, "\u0120allied": 22034, "\u0120Persian": 22035, "\u0120monopoly": 22036, "\u0120atom": 22037, "\u0120Redskins": 22038, "\"><": 22039, "\u0120cancell": 22040, "\u0120cinema": 22041, "131": 22042, "fair": 22043, "\u0120Alfred": 22044, "\u0120duck": 22045, "args": 22046, "223": 22047, "\u0120ISI": 22048, "\u0120signaling": 22049, "inar": 22050, "\u0120laughs": 22051, "\u0120forwards": 22052, "\u0120reckless": 22053, "\u0120listeners": 22054, "ativity": 22055, "\u0120vastly": 22056, "nant": 22057, "Less": 22058, "\u0120Hunting": 22059, "\u0120Scientific": 22060, "ITED": 22061, "\u0120knight": 22062, "\u0120HTC": 22063, "usa": 22064, "tmp": 22065, "\u0120rude": 22066, "\u0120Legendary": 22067, "\u0120arises": 22068, "Bad": 22069, "\u0120Claim": 22070, "peg": 22071, "\u0120realities": 22072, "Think": 22073, "\u0120\u00c2\u00b0": 22074, "\u0120rode": 22075, "\u0120strive": 22076, "\u0120anecd": 22077, "\u0120shorts": 22078, "\u0120hypothes": 22079, "\u0120coordinated": 22080, "\u0120Gandhi": 22081, "\u0120FPS": 22082, "RED": 22083, "\u0120susceptible": 22084, "\u0120shrink": 22085, "\u0120Chart": 22086, "Help": 22087, "\u0120ion": 22088, "deep": 22089, "ribes": 22090, "\u0120Kai": 22091, "\u0120Customer": 22092, "Summary": 22093, "\u0120cough": 22094, "wife": 22095, "\u0120lend": 22096, "\u0120positioning": 22097, "\u0120lottery": 22098, "\u0120Canyon": 22099, "\u0120fade": 22100, "\u0120bronze": 22101, "\u0120Kenny": 22102, "\u0120boasts": 22103, "\u0120Enhanced": 22104, "record": 22105, "\u0120emergence": 22106, "\u0120akin": 22107, "\u0120Bert": 22108, "itous": 22109, "\u00e2\u0138\u0133": 22110, "\u0120stip": 22111, "\u0120exchanged": 22112, "omore": 22113, "alsh": 22114, "\u0120reservoir": 22115, "\u0120standpoint": 22116, "WM": 22117, "\u0120initiate": 22118, "\u0120decay": 22119, "\u0120brewery": 22120, "\u0120terribly": 22121, "\u0120mortal": 22122, "levard": 22123, "\u0120revis": 22124, "NI": 22125, "elo": 22126, "\u0120confess": 22127, "\u0120MSNBC": 22128, "\u0120submissions": 22129, "Controller": 22130, "\u0120202": 22131, "\u0120Ruth": 22132, "});": 22133, "\u0120Azure": 22134, "\u0120.\"": 22135, "206": 22136, "\u0120Marketing": 22137, "\u0120laund": 22138, "iencies": 22139, "\u0120renowned": 22140, "\u0120Trou": 22141, "\u0120NGO": 22142, "blems": 22143, "\u0120terrified": 22144, "\u0120warns": 22145, "\u0120pert": 22146, "\u0120unsure": 22147, "480": 22148, "alez": 22149, "ultz": 22150, "\u0120Outside": 22151, "\u0120styl": 22152, "\u0120Underground": 22153, "\u0120panc": 22154, "\u0120dictionary": 22155, "\u0120foe": 22156, "riminal": 22157, "\u0120Norwegian": 22158, "\u0120jailed": 22159, "\u0120maternal": 22160, "\u00c3\u00a9e": 22161, "\u0120Lucy": 22162, "cop": 22163, "Cho": 22164, "\u0120unsigned": 22165, "\u0120Zelda": 22166, "\u0120Insider": 22167, "\u0120Continued": 22168, "\u0120133": 22169, "\u0120Naruto": 22170, "\u0120Majority": 22171, "169": 22172, "\u0120Wo": 22173, "\u00e3\u0124\u0135": 22174, "\u0120pastor": 22175, "\u0120informal": 22176, "\u00d0\u00bd": 22177, "anthrop": 22178, "join": 22179, "\u00e3\u0123\u0139": 22180, "itational": 22181, "NP": 22182, "\u0120Writing": 22183, "fn": 22184, "\u0120Bever": 22185, "195": 22186, "\u0120yelling": 22187, "\u0120drastically": 22188, "\u0120eject": 22189, "\u0120neut": 22190, "\u0120thrive": 22191, "\u0120Frequ": 22192, "oux": 22193, "\u0120possesses": 22194, "\u0120Senators": 22195, "\u0120DES": 22196, "\u0120Shakespeare": 22197, "\u0120Franco": 22198, "\u0120LB": 22199, "uchi": 22200, "\u0120incarn": 22201, "\u0120founders": 22202, "Function": 22203, "\u0120brightness": 22204, "\u0120BT": 22205, "\u0120whale": 22206, "\u0120Theater": 22207, "mass": 22208, "\u0120Doll": 22209, "Something": 22210, "\u0120echoed": 22211, "\u0120Hex": 22212, "crit": 22213, "afia": 22214, "\u0120goddess": 22215, "\u0120eleven": 22216, "\u0120Preview": 22217, "\u0120Aurora": 22218, "\u0120401": 22219, "ulsive": 22220, "\u0120Logan": 22221, "inburgh": 22222, "\u0120Centers": 22223, "\u0120ONLY": 22224, "\u0120Aid": 22225, "\u0120paradox": 22226, "\u0120hurd": 22227, "\u0120LC": 22228, "Due": 22229, "court": 22230, "\u0120offended": 22231, "\u0120evaluating": 22232, "\u0120Matthews": 22233, "\u0120tomb": 22234, "\u0120payroll": 22235, "\u0120extraction": 22236, "\u0120Hands": 22237, "ifi": 22238, "\u0120supernatural": 22239, "\u0120COMM": 22240, "]=": 22241, "dogs": 22242, "\u0120512": 22243, "\u0120Meeting": 22244, "Richard": 22245, "\u0120Maximum": 22246, "\u0120ideals": 22247, "Things": 22248, "mand": 22249, "\u0120Regardless": 22250, "\u0120humili": 22251, "buffer": 22252, "Little": 22253, "\u0120Dani": 22254, "\u0120Nak": 22255, "\u0120liberation": 22256, "\u0120Abe": 22257, "\u0120OL": 22258, "\u0120stuffed": 22259, "aca": 22260, "inda": 22261, "raphic": 22262, "\u0120mosqu": 22263, "\u0120campaigning": 22264, "\u0120occupy": 22265, "Squ": 22266, "rina": 22267, "\u0120Wel": 22268, "\u0120VS": 22269, "\u0120physic": 22270, "\u0120puls": 22271, "rint": 22272, "oaded": 22273, "ETF": 22274, "\u0120Archives": 22275, "\u0120venues": 22276, "hner": 22277, "\u0120Turbo": 22278, "\u0120lust": 22279, "\u0120appealed": 22280, "quez": 22281, "ilib": 22282, "\u0120Timothy": 22283, "\u0120omn": 22284, "dro": 22285, "\u0120obsession": 22286, "\u0120Savage": 22287, "1996": 22288, "Global": 22289, "Jes": 22290, "214": 22291, "\u0120sliding": 22292, "\u0120disappro": 22293, "\u0120Magical": 22294, "\u0120voluntarily": 22295, "gb": 22296, "aney": 22297, "\u0120prophet": 22298, "\u0120Rein": 22299, "\u0120Julia": 22300, "\u0120Worth": 22301, "aurus": 22302, "\u0120bounds": 22303, "ieu": 22304, ")))": 22305, "\u0120crore": 22306, "\u0120Citizen": 22307, "Sky": 22308, "\u0120columnist": 22309, "\u0120seekers": 22310, "ondo": 22311, "ISA": 22312, "\u0120Length": 22313, "\u0120nostalg": 22314, "\u0120newcom": 22315, "\u0120detrim": 22316, "entric": 22317, "375": 22318, "\u0120GE": 22319, "\u0120autop": 22320, "\u0120academics": 22321, "AppData": 22322, "\u0120Shen": 22323, "\u0120idiot": 22324, "\u0120Transit": 22325, "\u0120teaspoon": 22326, "Wil": 22327, "KO": 22328, "\u0120Comedy": 22329, ">,": 22330, "\u0120populated": 22331, "WD": 22332, "\u0120pigs": 22333, "\u0120Oculus": 22334, "\u0120sympathetic": 22335, "\u0120marathon": 22336, "198": 22337, "\u0120seizure": 22338, "sided": 22339, "\u0120dop": 22340, "irtual": 22341, "Land": 22342, "\u0120Floor": 22343, "osaurs": 22344, "...]": 22345, "\u0120los": 22346, "\u0120subsidiary": 22347, "EY": 22348, "\u0120Parts": 22349, "\u0120Stef": 22350, "\u0120Judiciary": 22351, "\u0120134": 22352, "\u0120mirrors": 22353, "\u0120ket": 22354, "times": 22355, "\u0120neurolog": 22356, "\u0120cav": 22357, "\u0120Guest": 22358, "\u0120tumor": 22359, "scill": 22360, "\u0120Lloyd": 22361, "Est": 22362, "\u0120clearer": 22363, "\u0120stereotypes": 22364, "\u0120dur": 22365, "nothing": 22366, "Reddit": 22367, "\u0120negotiated": 22368, "------------------------": 22369, "235": 22370, "\u0120flown": 22371, "\u0120Seoul": 22372, "\u0120Resident": 22373, "\u0120SCH": 22374, "\u0120disappearance": 22375, "\u0120Vince": 22376, "grown": 22377, "\u0120grabs": 22378, "ril": 22379, "\u0120Infinite": 22380, "\u0120Twenty": 22381, "\u0120pedestrian": 22382, "\u0120jersey": 22383, "\u0120Fur": 22384, "\u0120Infinity": 22385, "\u0120Elliott": 22386, "\u0120mentor": 22387, "\u0120morally": 22388, "\u0120obey": 22389, "secure": 22390, "iffe": 22391, "\u0120antibiotics": 22392, "angled": 22393, "\u0120Freeman": 22394, "\u0120Introduction": 22395, "Jun": 22396, "\u0120marsh": 22397, "icans": 22398, "\u0120EVENTS": 22399, "ochond": 22400, "Wall": 22401, "iculty": 22402, "\u0120misdemeanor": 22403, "\u0120ly": 22404, "Thomas": 22405, "\u0120Resolution": 22406, "\u0120animations": 22407, "\u0120Dry": 22408, "\u0120intercourse": 22409, "\u0120Newcastle": 22410, "\u0120Hog": 22411, "\u0120Equipment": 22412, "177": 22413, "\u0120territorial": 22414, "\u0120archives": 22415, "203": 22416, "Filter": 22417, "\u0120Munich": 22418, "\u0120commanded": 22419, "\u0120Wand": 22420, "\u0120pitches": 22421, "\u0120Croat": 22422, "\u0120ratios": 22423, "\u0120Mits": 22424, "\u0120accumulated": 22425, "\u0120Specifically": 22426, "\u0120gentleman": 22427, "acerb": 22428, "\u0120penn": 22429, "\u0120aka": 22430, "\u0120Fuk": 22431, "\u0120intervene": 22432, "\u0120Refuge": 22433, "\u0120Alzheimer": 22434, "\u0120succession": 22435, "ohan": 22436, "does": 22437, "Lord": 22438, "\u0120separat": 22439, "\u0120correspondence": 22440, "\u0120shiny": 22441, "Prior": 22442, "\u0120sulf": 22443, "\u0120miserable": 22444, "\u0120dedication": 22445, "().": 22446, "\u0120specialists": 22447, "\u0120defects": 22448, "\u0120Cult": 22449, "\u0120Xia": 22450, "\u0120jeopard": 22451, "\u0120Ore": 22452, "Ability": 22453, "\u0120lear": 22454, "\u0120ambitions": 22455, "\u0120BMI": 22456, "\u0120Arabs": 22457, "\u01201942": 22458, "\u0120preservation": 22459, "ificate": 22460, "\u0120ashamed": 22461, "loss": 22462, "\u0120Restaur": 22463, "\u0120resemble": 22464, "\u0120enrich": 22465, "\u0120KN": 22466, "\u0120Clan": 22467, "float": 22468, "\u0120playable": 22469, "ITT": 22470, "\u0120harmony": 22471, "arrison": 22472, "\u0120Weinstein": 22473, "were": 22474, "\u0120poisoning": 22475, "\u0120Comput": 22476, "\u0120WordPress": 22477, "major": 22478, "\u0120Valve": 22479, "Fan": 22480, "\u0120Throw": 22481, "\u0120Romans": 22482, "\u0120Depression": 22483, "ados": 22484, "\u0120tortured": 22485, "\u0120balancing": 22486, "bottom": 22487, "\u0120acquiring": 22488, "\u0120Monte": 22489, "ardi": 22490, "\u0120aura": 22491, "\u0120##": 22492, "\u0120Standing": 22493, "\u0120Atlas": 22494, "CF": 22495, "\u0120intrins": 22496, "\u0120Benghazi": 22497, "\u0120camping": 22498, "\u0120tapped": 22499, "blade": 22500, "strous": 22501, "\u0120Rabb": 22502, "\u0120Written": 22503, "tip": 22504, "\u0120Neigh": 22505, "sterdam": 22506, "\u0120Allow": 22507, "\u0120Healing": 22508, "\u0120Rhod": 22509, "num": 22510, "\u0120caffeine": 22511, "\u0120Percent": 22512, "\u0120boo": 22513, "\u0120apples": 22514, "305": 22515, "\u0120welcoming": 22516, "\u0120applaud": 22517, "\u0120austerity": 22518, "\u00c2\u00b1": 22519, "\u0120Reality": 22520, "efe": 22521, "\u00e5\u00ae": 22522, "\u0120sucks": 22523, "\u0120tabs": 22524, "\u0120PayPal": 22525, "\u0120backpack": 22526, "\u0120gifted": 22527, "abulary": 22528, "\u0120Scout": 22529, "irteen": 22530, "\u0120chin": 22531, "\u0120omitted": 22532, "\u0120negatively": 22533, "\u0120accessing": 22534, "\u0120Earn": 22535, "\u0120ambulance": 22536, "\u0120headphones": 22537, "\u0120205": 22538, "\u0120Refresh": 22539, "president": 22540, "\u0120Kitchen": 22541, "\u0120Entered": 22542, "\u0120Snyder": 22543, "005": 22544, "omical": 22545, "\u0120borrowed": 22546, "\u0120Nem": 22547, "\u0120aviation": 22548, "\u0120stall": 22549, "rimination": 22550, "\u0120uniforms": 22551, "itime": 22552, "\u0120Simmons": 22553, "energy": 22554, "ablished": 22555, "yy": 22556, "qualified": 22557, "\u0120rallies": 22558, "\u0120Stuart": 22559, "flight": 22560, "\u0120gangs": 22561, "rag": 22562, "\u0120vault": 22563, "lux": 22564, "\u0120Compar": 22565, "\u0120designation": 22566, "209": 22567, "\u0120Jos": 22568, "dollar": 22569, "zero": 22570, "\u0120wells": 22571, "303": 22572, "\u0120constituents": 22573, "\u0120heck": 22574, "\u0120cows": 22575, "\u0120commanders": 22576, "\u0120differential": 22577, "\u0120Catherine": 22578, "299": 22579, "\u0120valve": 22580, "\u0120brace": 22581, "\u0120perspectives": 22582, "cert": 22583, "fact": 22584, "icularly": 22585, "\u0120McN": 22586, "planes": 22587, "\u0120intric": 22588, "\u0120peas": 22589, "ovan": 22590, "\u0120tossed": 22591, "retch": 22592, "\u0120Lopez": 22593, "\u0120unfamiliar": 22594, "death": 22595, "\u0120Apart": 22596, "\u0120Chang": 22597, "\u0120relieved": 22598, "rophe": 22599, "\u0120airports": 22600, "\u0120freak": 22601, "util": 22602, "Mill": 22603, "\u0120Chin": 22604, "\u0120Owen": 22605, "male": 22606, "\u0120Broken": 22607, "\u0120Winds": 22608, "rob": 22609, "rising": 22610, "\u0120firefighters": 22611, "\u0120authoritarian": 22612, "\u0120148": 22613, "Bitcoin": 22614, "external": 22615, "\u0120browsers": 22616, "ichever": 22617, "orian": 22618, "\u0120unb": 22619, "\u0120poke": 22620, "\u0120Zot": 22621, "Mid": 22622, "\u0120Popular": 22623, "\u0120covert": 22624, "\u0120contributes": 22625, "\u0120650": 22626, "\u0120contention": 22627, "Gate": 22628, "\u0120consoles": 22629, "\u0120chromos": 22630, "\u0120IX": 22631, "\u0120visually": 22632, "\u0120Eisen": 22633, "\u0120jewelry": 22634, "\u0120delegation": 22635, "\u0120accelerate": 22636, "\u0120Riley": 22637, "\u0120slope": 22638, "\u0120indoor": 22639, "itially": 22640, "\u0120hugely": 22641, "\u0120tunnels": 22642, "\u0120fined": 22643, "\u0120directive": 22644, "\u0120forehead": 22645, "ustomed": 22646, "\u0120skate": 22647, "Music": 22648, "gas": 22649, "\u0120recognizing": 22650, "ambo": 22651, "\u0120overweight": 22652, "\u0120Grade": 22653, "\u00d9\u012c": 22654, "\u0120sounding": 22655, "\u0120locking": 22656, "\u0120REM": 22657, "Store": 22658, "\u0120excav": 22659, "\u0120Likewise": 22660, "\u0120Lights": 22661, "\u0120elbow": 22662, "\u0120Supply": 22663, "wic": 22664, "\u0120handsome": 22665, "1994": 22666, "Coll": 22667, "\u0120adequately": 22668, "\u0120Associate": 22669, "\u0120strips": 22670, "\u0120crackdown": 22671, "\u0120marvel": 22672, "\u0120Kun": 22673, "\u0120passages": 22674, "@@@@": 22675, "\u0120Tall": 22676, "\u0120thoughtful": 22677, "namese": 22678, "\u0120prostitution": 22679, "business": 22680, "\u0120ballistic": 22681, "personal": 22682, "cig": 22683, "izational": 22684, "Round": 22685, "\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142": 22686, "\u0120Coleman": 22687, "\u0120admitting": 22688, "\u0120Plug": 22689, "\u0120bitcoins": 22690, "\u0120Suz": 22691, "\u0120fairness": 22692, "\u0120supplier": 22693, "\u0120catastrophic": 22694, "\u0120Helen": 22695, "oqu": 22696, "Marc": 22697, "\u0120Articles": 22698, "gie": 22699, "\u0120endangered": 22700, "\u0120destiny": 22701, "\u0120Volt": 22702, "olia": 22703, "axis": 22704, "\u0120cheat": 22705, "\u0120unified": 22706, "ICO": 22707, "quote": 22708, "302": 22709, "\u0120Sed": 22710, "\u0120suppression": 22711, "\u0120analyzing": 22712, "\u0120squat": 22713, "\u0120figuring": 22714, "\u0120coordinates": 22715, "\u0120chunks": 22716, "\u01201946": 22717, "\u0120subp": 22718, "\u0120wiki": 22719, "\u0120Forbes": 22720, "\u0120Jupiter": 22721, "\u0120Erik": 22722, "imer": 22723, "\u0120Commercial": 22724, "\\)": 22725, "\u0120legitimacy": 22726, "\u0120dental": 22727, "\u0120Mean": 22728, "\u0120deficits": 22729, "550": 22730, "Originally": 22731, "\u0120Horror": 22732, "\u0120contamination": 22733, "llah": 22734, "\u0120confisc": 22735, "\u0120Clare": 22736, "TB": 22737, "\u0120Failed": 22738, "aned": 22739, "\u0120ruler": 22740, "\u0120Controller": 22741, "\u0120feminists": 22742, "Fix": 22743, "gay": 22744, "207": 22745, "\u0120rabbit": 22746, "Third": 22747, "owntown": 22748, "\u0120glue": 22749, "\u0120volatile": 22750, "\u0120shining": 22751, "\u0120foll": 22752, "\u0120impaired": 22753, "\u0120supers": 22754, "\u00e6\u012a": 22755, "\u0120clutch": 22756, "\u013c\u00e9\u0128\u0134": 22757, "\u0120prolet": 22758, "\u0120(!": 22759, "\u0120yelled": 22760, "\u0120Kiev": 22761, "\u0120Ern": 22762, "\u0120Shock": 22763, "KB": 22764, "\u0120situated": 22765, "query": 22766, "\u0120Nas": 22767, "\u0120annex": 22768, "character": 22769, "\u0120Holiday": 22770, "\u0120automation": 22771, "\u0120Jill": 22772, "\u0120Remastered": 22773, "\u0120linem": 22774, "\u0120wilderness": 22775, "\u0120Horizon": 22776, "\u0120Guinea": 22777, "AZ": 22778, "\u0120mainland": 22779, "\u0120secrecy": 22780, "LEASE": 22781, "\u0120punk": 22782, "\u0120Province": 22783, "(),": 22784, "Speed": 22785, "\u0120handing": 22786, "\u0120Sebast": 22787, "Sir": 22788, "rase": 22789, "\u0120journals": 22790, "\u0120congest": 22791, "\u0120Tut": 22792, "irrel": 22793, "\u0120schizophrenia": 22794, "\u0120misogyn": 22795, "healthy": 22796, "Iron": 22797, "\u0120reacted": 22798, "-$": 22799, "252": 22800, "\u0120plural": 22801, "\u0120plum": 22802, "\u0120bargain": 22803, "\u0120grounded": 22804, "finder": 22805, "\u0120disse": 22806, "\u0120Laz": 22807, "OOD": 22808, "\u0120atroc": 22809, "Factory": 22810, "\u0120minions": 22811, "\u0120ori": 22812, "\u0120Brave": 22813, "\u0120PRE": 22814, "\u0120Myanmar": 22815, "\u0120Hod": 22816, "\u0120expedition": 22817, "\u0120explode": 22818, "\u0120Coord": 22819, "\u0120extr": 22820, "\u0120Brief": 22821, "\u0120ADHD": 22822, "\u0120hardcore": 22823, "feeding": 22824, "\u0120dile": 22825, "\u0120Fruit": 22826, "\u0120vaccination": 22827, "\u0120Mao": 22828, "osphere": 22829, "\u0120contests": 22830, "-|": 22831, "\u0120fren": 22832, "isphere": 22833, "Rom": 22834, "\u0120Sharp": 22835, "\u0120Trend": 22836, "\u0120disconnect": 22837, "\u00e2\u0122\u00a2\u00e2\u0122\u00a2": 22838, "\u0120persecution": 22839, "Earth": 22840, "\u0120healthier": 22841, "384": 22842, "\u0120cob": 22843, "\u0120Trinity": 22844, "OWS": 22845, "ANN": 22846, "\u0120specialty": 22847, "\u0120gru": 22848, "\u0120cooperative": 22849, "why": 22850, "Starting": 22851, "\u0120Issues": 22852, "stre": 22853, "ensor": 22854, "\u0120185": 22855, "Adv": 22856, "!?": 22857, "\u0120Revel": 22858, "emia": 22859, "\u0120Hulk": 22860, "\u0120celebrations": 22861, "\u0120Sou": 22862, "raud": 22863, "\u0120Klein": 22864, "\u0120unreal": 22865, "context": 22866, "\u0120partnerships": 22867, "\u0120adopting": 22868, "tical": 22869, "\u0120splash": 22870, "\u0120Hezbollah": 22871, "category": 22872, "cyclop": 22873, "xton": 22874, "\u0120Dot": 22875, "urdy": 22876, "tz": 22877, "\u0120envelope": 22878, "\u0120NL": 22879, "\u00e2\u0137": 22880, "\u0120wherein": 22881, "Spec": 22882, "184": 22883, "\u0120telev": 22884, "aliation": 22885, "\u0120myths": 22886, "\u00e5\u00b0": 22887, "\u0120rigorous": 22888, "\u0120communicating": 22889, "\u0120observer": 22890, "\u0120rehe": 22891, "\u0120Wash": 22892, "\u0120apologized": 22893, "\u0120Tin": 22894, "\u0120expenditures": 22895, "workers": 22896, "document": 22897, "\u0120hesitate": 22898, "\u0120Lenin": 22899, "\u0120unpredictable": 22900, "\u0120renewal": 22901, "cler": 22902, "okia": 22903, "\u0120CONT": 22904, "\u0120postseason": 22905, "Tokens": 22906, "\u0120exacerb": 22907, "\u0120betting": 22908, "\u0120147": 22909, "\u0120elevation": 22910, "Wood": 22911, "\u0120Solomon": 22912, "194": 22913, "004": 22914, "output": 22915, "\u0120redund": 22916, "\u0120Mumbai": 22917, "\u0120pH": 22918, "\u0120reproduce": 22919, "\u0120Duration": 22920, "MAX": 22921, "\u0120bog": 22922, "CBS": 22923, "\u0120Balance": 22924, "\u0120Sgt": 22925, "\u0120Recent": 22926, "\u0120cd": 22927, "\u0120popped": 22928, "\u0120incompet": 22929, "prop": 22930, "ayan": 22931, "guy": 22932, "Pacific": 22933, "\u0120tyr": 22934, "\u0120{{": 22935, "\u0120Mystic": 22936, "\u0120Dana": 22937, "\u0120masturb": 22938, "\u0120geometry": 22939, "\u00c3\u00a2": 22940, "\u0120Correct": 22941, "\u0120trajectory": 22942, "\u0120distracted": 22943, "\u0120foo": 22944, "\u0120Welsh": 22945, "Luc": 22946, "mith": 22947, "\u0120rugby": 22948, "\u0120respiratory": 22949, "\u0120triangle": 22950, "\u0120215": 22951, "\u0120undergraduate": 22952, "\u0120Superior": 22953, "changing": 22954, "_-": 22955, "\u0120rightly": 22956, "\u0120referee": 22957, "\u0120lucrative": 22958, "\u0120unauthorized": 22959, "\u0120resembles": 22960, "\u0120GNU": 22961, "\u0120Derby": 22962, "\u0120pathways": 22963, "\u0120Led": 22964, "\u0120endurance": 22965, "\u0120stint": 22966, "\u0120collector": 22967, "Fast": 22968, "\u0120dots": 22969, "\u0120nationals": 22970, "\u0120Securities": 22971, "\u0120whip": 22972, "Param": 22973, "\u0120learns": 22974, "Magic": 22975, "\u0120detailing": 22976, "moon": 22977, "\u0120broadcasting": 22978, "\u0120baked": 22979, "265": 22980, "holm": 22981, "\u0120Sah": 22982, "\u0120Hussein": 22983, "\u0120Courtesy": 22984, "174": 22985, "\u0120146": 22986, "\u0120geographic": 22987, "peace": 22988, "\u0120judging": 22989, "\u0120Stern": 22990, "Bur": 22991, "\u0120storyline": 22992, "Gun": 22993, "\u0120Stick": 22994, "245": 22995, "307": 22996, "\u00e3\u0124\u00b4\u00e3\u0125\u00b3": 22997, "\u0120Administrator": 22998, "\u0120burnt": 22999, "\u0120pave": 23000, "choes": 23001, "Exec": 23002, "\u0120campuses": 23003, "Result": 23004, "\u0120mutations": 23005, "\u0120Charter": 23006, "\u0120captures": 23007, "\u0120compares": 23008, "\u0120badge": 23009, "Scient": 23010, "\u0120erad": 23011, "iery": 23012, "oi": 23013, "ettes": 23014, "\u0120Estate": 23015, "\u0120strap": 23016, "\u0120proudly": 23017, "\u0120fried": 23018, "\u0120withdrawn": 23019, "\u0120Voy": 23020, "phony": 23021, "Items": 23022, "\u0120Pierce": 23023, "bard": 23024, "\u0120annotation": 23025, "anton": 23026, "illon": 23027, "Impro": 23028, "...)": 23029, "\u0120happier": 23030, "------": 23031, "adjust": 23032, "\u0120staffers": 23033, "\u0120activism": 23034, "\u0120perf": 23035, "\u0120alright": 23036, "Need": 23037, "\u0120commence": 23038, "\u0120opioid": 23039, "\u0120Amanda": 23040, "Es": 23041, "\u0120Pars": 23042, "\u0120Kaw": 23043, "Works": 23044, "248": 23045, "\u0120indo": 23046, "tc": 23047, "endant": 23048, "\u0120Moto": 23049, "\u0120legalization": 23050, "OTE": 23051, "\u0120tasked": 23052, "\u0120tsp": 23053, "\u0120ACTIONS": 23054, "166": 23055, "\u0120refreshing": 23056, "\u0120NR": 23057, "\u0120Perez": 23058, "\u0120infringement": 23059, "SY": 23060, "Listen": 23061, "inning": 23062, "ku": 23063, "\u0120rotate": 23064, "program": 23065, "arah": 23066, "Design": 23067, "\u0120(\u00c2\u00a3": 23068, "\u0120storing": 23069, "\u0120warrants": 23070, "\u0120judgement": 23071, "\u0120Brist": 23072, "usually": 23073, "photo": 23074, "\u0120Ran": 23075, "\u0120Pine": 23076, "\u0120outrageous": 23077, "\u0120Valentine": 23078, "luence": 23079, "\u0120Everybody": 23080, "Altern": 23081, "\u0120relevance": 23082, "\u0120terminated": 23083, "\u0120dessert": 23084, "\u0120fulfilled": 23085, "\u0120prosecuted": 23086, "\u0120Words": 23087, "\u0120migrant": 23088, "\u0120cultivation": 23089, "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124": 23090, "idelity": 23091, "\u0120Vern": 23092, "\u0120Login": 23093, "\u0120metaphor": 23094, "\u0120Tip": 23095, "\u0120recruits": 23096, "\u0120Pig": 23097, "ribing": 23098, "\u0120enthusiasts": 23099, "exper": 23100, "\u0120frightening": 23101, "\u0120Hair": 23102, "anson": 23103, "strate": 23104, "\u0120hi": 23105, "Height": 23106, "\u0120owning": 23107, "none": 23108, "\u0120dislike": 23109, "\u0120knives": 23110, "pherd": 23111, "\u0120loudly": 23112, "\u0120APIs": 23113, "Display": 23114, "\u0120Lac": 23115, "\u0120USS": 23116, "abl": 23117, "verages": 23118, "Jew": 23119, "\u0120172": 23120, "\u0120Historical": 23121, "atoon": 23122, "\u0120Physics": 23123, "intern": 23124, "\u0120warmth": 23125, "\u0120topp": 23126, "DM": 23127, "\u0120gunman": 23128, "\u0120emperor": 23129, "odi": 23130, "\u00e3\u0125\u00a3": 23131, "inatory": 23132, "\u0120Rib": 23133, "\u0120131": 23134, "\u0120Saturn": 23135, "\u0120Shining": 23136, "\u0120waking": 23137, "Quotes": 23138, "\u0120comedian": 23139, "enberg": 23140, "\u00c2\u00bd": 23141, "\u0120believers": 23142, "\u0120paperwork": 23143, "custom": 23144, "\u0120lev": 23145, "\u0120lament": 23146, "\u0120pouring": 23147, "222": 23148, "political": 23149, "\u0120Supplement": 23150, "maid": 23151, "\u0120cruelty": 23152, "\u0120tread": 23153, "ysics": 23154, "Aw": 23155, "rites": 23156, "\u0120modifier": 23157, "\u0120Position": 23158, "Adam": 23159, "lb": 23160, "ubs": 23161, "\u0120imperfect": 23162, "\u0120clusters": 23163, "\u0120Engineer": 23164, "\u0120Cherry": 23165, "\u0120inauguration": 23166, "\u0120Sau": 23167, "\u0120embodiment": 23168, "\u0120Uncle": 23169, "\u0120overr": 23170, "\u0120explosions": 23171, "cule": 23172, "\u0120Princeton": 23173, "\u0120Andrea": 23174, "\u0120incorrectly": 23175, "\u0120earnest": 23176, "\u0120pilgr": 23177, "\u0120Sprint": 23178, "\u0120sleeve": 23179, "\u0120hears": 23180, "\u0120Amazing": 23181, "\u0120browsing": 23182, "agin": 23183, "\u0120homeland": 23184, "\u0120haw": 23185, "\u0120diving": 23186, "istered": 23187, "178": 23188, "\u0120bargaining": 23189, "\u0120Arcade": 23190, "\u0120delegate": 23191, "terson": 23192, "................................................................": 23193, "\u0120Jacksonville": 23194, "275": 23195, "\u0120stagn": 23196, "\u0120adam": 23197, "\u0120Sherman": 23198, "CB": 23199, "\u0120suburb": 23200, "\u0120Foods": 23201, "\u0120converting": 23202, "\u0120Arist": 23203, "\u0120chambers": 23204, "love": 23205, "\u0120amino": 23206, "\u0120Gan": 23207, "\u0120madness": 23208, "mc": 23209, "\u0120USE": 23210, "defined": 23211, "\u0120ultr": 23212, "indust": 23213, "\u0120wolves": 23214, "lance": 23215, "Additionally": 23216, "\u0120cracks": 23217, "asia": 23218, "\u0120Reason": 23219, "\u0120Pump": 23220, "\u0120accidental": 23221, "\u0120Laser": 23222, "\u0120Rid": 23223, "\u0120initialized": 23224, "elli": 23225, "\u0120unnamed": 23226, "\u0120noun": 23227, "\u0120Passed": 23228, "\u0120hostage": 23229, "\u0120Ethiop": 23230, "shirts": 23231, "\u0120unrel": 23232, "\u0120Embassy": 23233, "\u01201941": 23234, "\u0120atoms": 23235, "\u0120purported": 23236, "164": 23237, "\u0120Fi": 23238, "\u0120gallons": 23239, "\u0120Monica": 23240, "\u0120pg": 23241, "enment": 23242, "\u0120sorted": 23243, "\u0120Gospel": 23244, "\u0120heights": 23245, "\u0120traced": 23246, "\u0120undergoing": 23247, "Shell": 23248, "\u0120sacks": 23249, "\u0120proportions": 23250, "\u0120halluc": 23251, "Font": 23252, "acet": 23253, "\u0120warmer": 23254, "\u0120INTER": 23255, "\u0120grabbing": 23256, "Plug": 23257, "\u0120realization": 23258, "\u0120Burke": 23259, "\u0120enchant": 23260, "ATER": 23261, "\u0120Seed": 23262, "\u0120abundant": 23263, "FM": 23264, "\u0120civic": 23265, "Vs": 23266, "isi": 23267, "\u0120vow": 23268, "\u0120reper": 23269, "\u0120Partnership": 23270, "\u0120penetration": 23271, "\u0120axe": 23272, "\u0120shattered": 23273, "\u0120Zombies": 23274, "\u0120vinyl": 23275, "\u0120Alert": 23276, "eon": 23277, "\u0120obliged": 23278, "\u0120Illust": 23279, "\u0120Plaza": 23280, "\u0120Frontier": 23281, "\u0120davidjl": 23282, "\u0120Serial": 23283, "\u0120Hav": 23284, "\u0120Nutrition": 23285, "Bi": 23286, "\u0120\u00e2\u0138\u012a": 23287, "\u0120Jays": 23288, "linux": 23289, "\u0120hurry": 23290, "\u0120voy": 23291, "\u0120hopeless": 23292, "\u0120Stealth": 23293, "\u0120\u00e3\u0123": 23294, "essors": 23295, "ttle": 23296, "borg": 23297, "\u0120Safari": 23298, "fell": 23299, "\u0120wary": 23300, "due": 23301, "\u0120Above": 23302, "Ha": 23303, "ELL": 23304, "\u0120notor": 23305, "\u0120Won": 23306, "Too": 23307, "\u0120occupations": 23308, "\u0120possessions": 23309, "\u0120inviting": 23310, "\u0120predators": 23311, "\u0120accelerated": 23312, "\u0120157": 23313, "uterte": 23314, "\u0120Cube": 23315, "east": 23316, "account": 23317, "Give": 23318, "\u0120transplant": 23319, "redients": 23320, "idable": 23321, "\u0120screenshots": 23322, "\u0120Gund": 23323, "\u0120FS": 23324, "\u0120travelers": 23325, "\u0120sensory": 23326, "\u0120Fiat": 23327, "\u0120Rockets": 23328, "\u0130\u012d": 23329, "_{": 23330, "Friend": 23331, "\u0120charming": 23332, "ALS": 23333, "\u0120enjoyment": 23334, "mph": 23335, "\u01205000": 23336, "\u0120REG": 23337, "\u00d9\u0128": 23338, "bia": 23339, "\u0120compilation": 23340, "rost": 23341, "\u0120VP": 23342, "\u0120Schne": 23343, "2019": 23344, "\u0120copying": 23345, "MORE": 23346, "\u0120Flore": 23347, "falls": 23348, "215": 23349, "total": 23350, "\u0120disciples": 23351, "double": 23352, "\u0120exceeding": 23353, "\u0120smashed": 23354, "\u0120conceptual": 23355, "\u0120Romania": 23356, "\u0120Brent": 23357, "\u0120ICE": 23358, "\u0120Tou": 23359, "\u0120grap": 23360, "\u0120nails": 23361, "189": 23362, "\u00e3\u0125\u013a": 23363, "\u0120procure": 23364, "eur": 23365, "\u0120confirming": 23366, "\u0120Cec": 23367, "awi": 23368, "\u0120Eden": 23369, "\u0120ng": 23370, "\u0120engineered": 23371, "atics": 23372, "\u0120hooked": 23373, "\u0120disgusting": 23374, "\u0120Murder": 23375, "\u00e3\u0124\u00bf": 23376, "Library": 23377, "\u0120168": 23378, "Almost": 23379, "hematic": 23380, "Menu": 23381, "\u0120Notre": 23382, "\u0120Jur": 23383, "\u0120kidnapped": 23384, "\u0120hacker": 23385, "\u0120Jade": 23386, "\u0120creepy": 23387, "\u0120drawings": 23388, "\u0120Sponsor": 23389, "\u0120cyclists": 23390, "\u0120Goblin": 23391, "\u0120optimized": 23392, "\u0120staged": 23393, "\u0120McD": 23394, "between": 23395, "Age": 23396, "eno": 23397, "Sex": 23398, "\u0120Wide": 23399, "nings": 23400, "avis": 23401, "\u0120incapable": 23402, "\u0120Kob": 23403, "\u0120rewarding": 23404, "\u0120Lone": 23405, "olescent": 23406, "\u0120contracted": 23407, "\u0120sticky": 23408, "Jose": 23409, "Ball": 23410, "fest": 23411, "\u0120Input": 23412, "\u0120Recently": 23413, "\u0120tomat": 23414, "square": 23415, "Application": 23416, "\u0120nitrogen": 23417, "\u0120duplicate": 23418, "\u0120Recon": 23419, "\u0120Dear": 23420, "London": 23421, "\u0120intra": 23422, "\u0120dock": 23423, "\u0120outreach": 23424, "\u0120Million": 23425, "\u0120mammals": 23426, "ampton": 23427, "VAL": 23428, "\u0120snaps": 23429, "\u0120dos": 23430, "\u0120Whole": 23431, "\u0120Ready": 23432, "Try": 23433, "\u0120Winnipeg": 23434, "earance": 23435, "\u0120incurred": 23436, "renched": 23437, "\u0120NSW": 23438, "ilot": 23439, "raine": 23440, "\u0120cube": 23441, "got": 23442, "\u0120runway": 23443, "etermined": 23444, "\u0120Hawks": 23445, "\u0120survivor": 23446, "\u0120Wish": 23447, "\u0120Din": 23448, "\u0120DEF": 23449, "\u0120Vault": 23450, "187": 23451, "\u0120mushrooms": 23452, "\u0120crisp": 23453, "bey": 23454, "\u0120Discovery": 23455, "\u0120developmental": 23456, "\u0120paradigm": 23457, "\u0120chaotic": 23458, "\u0120Tsu": 23459, "\u0120333": 23460, "bons": 23461, "\u0120bacterial": 23462, "\u0120commits": 23463, "\u0120cosmic": 23464, "\u0120mega": 23465, "ocative": 23466, "\u0120Paint": 23467, "ophobic": 23468, "\u0120vain": 23469, "\u0120carved": 23470, "\u0120Thief": 23471, "\u0120Gul": 23472, "owship": 23473, "\u0120cites": 23474, "\u0120Edinburgh": 23475, "\u0120diminished": 23476, "\u0120acknowledges": 23477, "\u0120Kills": 23478, "\u0120microw": 23479, "\u0120Hera": 23480, "\u0120seniors": 23481, "\u0120whereby": 23482, "Hop": 23483, "atron": 23484, "\u0120unavailable": 23485, "\u0120Nate": 23486, "\u0120480": 23487, "\u0120slated": 23488, "\u0120Rebecca": 23489, "\u0120Battery": 23490, "\u0120grammar": 23491, "\u0120headset": 23492, "\u0120cursor": 23493, "\u0120excluding": 23494, "anye": 23495, "aundering": 23496, "ebin": 23497, "\u0120feasible": 23498, "\u0120Publishing": 23499, "\u0120Labs": 23500, "\u0120Cliff": 23501, "\u0120Ferrari": 23502, "\u0120pac": 23503, "visible": 23504, "marked": 23505, "pell": 23506, "\u0120polite": 23507, "\u0120staggering": 23508, "\u0120Galactic": 23509, "\u0120superst": 23510, "\u0120paran": 23511, "\u0120Officers": 23512, "\u00e3\u0122\u0123": 23513, "\u0120specifics": 23514, "ulus": 23515, "239": 23516, "\u0120Paste": 23517, "AMP": 23518, "\u0120Panama": 23519, "\u0120Delete": 23520, "anguard": 23521, "restrial": 23522, "\u0120heroic": 23523, "\u0120Dy": 23524, "\u00d8\u00a7\u00d9\u0126": 23525, "\u0120incumbent": 23526, "\u0120crunch": 23527, "tro": 23528, "\u0120scoop": 23529, "\u0120blogger": 23530, "\u0120sellers": 23531, "uren": 23532, "\u0120medicines": 23533, "\u0120Caps": 23534, "\u0120Animation": 23535, "oxy": 23536, "\u0120outward": 23537, "\u0120inquiries": 23538, "229": 23539, "\u0120psychologist": 23540, "\u0120Sask": 23541, "evil": 23542, "\u0120contaminated": 23543, "\u00e3\u0124\u00a8": 23544, "herence": 23545, "\u0120branded": 23546, "\u0120Abdul": 23547, "zh": 23548, "\u0120paragraphs": 23549, "\u0120mins": 23550, "\u0120correlated": 23551, "erb": 23552, "\u0120impart": 23553, "\u0120milestone": 23554, "\u0120Solutions": 23555, "otle": 23556, "\u0120undercover": 23557, "\u0120marched": 23558, "\u0120Chargers": 23559, "fax": 23560, "\u0120Secrets": 23561, "\u0120ruth": 23562, "weather": 23563, "\u0120feminine": 23564, "\u0120sham": 23565, "\u0120prestigious": 23566, "iggins": 23567, "\u0120sung": 23568, "history": 23569, "ettle": 23570, "ggie": 23571, "\u0120outdated": 23572, "oland": 23573, "\u0120perceptions": 23574, "\u0120Session": 23575, "\u0120Dodgers": 23576, "uj": 23577, "\u0120END": 23578, "Doc": 23579, "\u0120deficiency": 23580, "Grand": 23581, "\u0120Joker": 23582, "\u0120retrospect": 23583, "\u0120diagnostic": 23584, "\u0120harmless": 23585, "\u0120rogue": 23586, "\u0120Aval": 23587, "Equ": 23588, "\u0120transc": 23589, "\u0120Robertson": 23590, "\u0120Depending": 23591, "\u0120Burns": 23592, "ivo": 23593, "\u0120hostility": 23594, "Features": 23595, "\u0135\u013a": 23596, "\u0120discomfort": 23597, "\u0120LCD": 23598, "specified": 23599, "\u0120Expect": 23600, "340": 23601, "\u0120imperative": 23602, "\u0120Regular": 23603, "Chinese": 23604, "\u0120statewide": 23605, "\u0120symm": 23606, "\u0120loops": 23607, "\u0120autumn": 23608, "Nick": 23609, "\u0120shaping": 23610, "\u0120quot": 23611, "\u0120cherry": 23612, "\u0120Crossref": 23613, "\u00e8\u00a6\u013c\u00e9\u0128\u0134": 23614, "Standard": 23615, "heed": 23616, "\u0120Dell": 23617, "\u0120Vietnamese": 23618, "\u0120ost": 23619, "\u0120Valkyrie": 23620, "OA": 23621, "Assad": 23622, "\u0120rebound": 23623, "\u0120Traffic": 23624, "places": 23625, "\u00e6\u013a": 23626, "\u0120Buc": 23627, "172": 23628, "\u0120shelters": 23629, "\u0120insisting": 23630, "\u0120Certainly": 23631, "\u0120Kenneth": 23632, "\u0120TCP": 23633, "\u0120penal": 23634, "\u0120Replay": 23635, "heard": 23636, "\u0120dialect": 23637, "iza": 23638, "\u0120FY": 23639, "itcher": 23640, "\u0120DL": 23641, "\u0120spiral": 23642, "\u0120quarterbacks": 23643, "\u0120hull": 23644, "\u0120google": 23645, "\u0120todd": 23646, "\u0120Sterling": 23647, "\u0120Plate": 23648, "\u0120spying": 23649, "mbol": 23650, "\u0120Realm": 23651, "\u0120Proced": 23652, "\u0120Crash": 23653, "\u0120terminate": 23654, "\u0120protesting": 23655, "Center": 23656, "guided": 23657, "\u0120uncover": 23658, "\u0120boycott": 23659, "\u0120realizes": 23660, "sound": 23661, "\u0120pretending": 23662, "\u0120Vas": 23663, "1980": 23664, "\u0120framed": 23665, "\u0120139": 23666, "\u0120descended": 23667, "\u0120rehabilitation": 23668, "\u0120borrowing": 23669, "\u0120Buch": 23670, "\u0120blur": 23671, "Ron": 23672, "\u0120Frozen": 23673, "enza": 23674, "Chief": 23675, "\u0120Poor": 23676, "\u0120translates": 23677, "MIN": 23678, "\u0120212": 23679, "JECT": 23680, "\u0120erupted": 23681, "\u0120successes": 23682, "SEC": 23683, "\u0120plague": 23684, "\u0120gems": 23685, "doms": 23686, "\u0120stretches": 23687, "\u0120Spy": 23688, "\u0120storytelling": 23689, "Credit": 23690, "\u0120Push": 23691, "\u0120traction": 23692, "\u0120ineffective": 23693, "\u0120Luna": 23694, "\u0120tapes": 23695, "\u0120analytics": 23696, "ercise": 23697, "\u0120programmes": 23698, "\u0120Carbon": 23699, "\u0120behold": 23700, "heavy": 23701, "\u0120Conservation": 23702, "\u0120FIR": 23703, "\u0120sack": 23704, "termin": 23705, "ricks": 23706, "\u0120housed": 23707, "\u0120unusually": 23708, "Ice": 23709, "\u0120executing": 23710, "\u0120Moroc": 23711, "eday": 23712, "\u0120editions": 23713, "\u0120smarter": 23714, "\u0120BA": 23715, "\u0120outlaw": 23716, "\u0120vanished": 23717, "iba": 23718, "ALSE": 23719, "\u0120Silva": 23720, "238": 23721, "Could": 23722, "\u0120philosopher": 23723, "\u0120evacuated": 23724, "Secret": 23725, "142": 23726, "\u0120visas": 23727, "\u00e3\u0124\u00ac": 23728, "\u0120Malt": 23729, "\u0120Clearly": 23730, "\u0120Niger": 23731, "\u0120Cairo": 23732, "\u0120Fist": 23733, "380": 23734, "\u0120XML": 23735, "auto": 23736, "itant": 23737, "\u0120reinforced": 23738, "Record": 23739, "\u0120Survivor": 23740, "GHz": 23741, "\u0120screws": 23742, "parents": 23743, "\u0120oceans": 23744, "mares": 23745, "\u0120brakes": 23746, "vasive": 23747, "\u0120hello": 23748, "\u0120SIM": 23749, "rimp": 23750, "\u0120ore": 23751, "\u0120Armour": 23752, "247": 23753, "\u0120terrific": 23754, "\u0120tones": 23755, "141": 23756, "\u0120Minutes": 23757, "Episode": 23758, "\u0120curves": 23759, "\u0120inflammatory": 23760, "\u0120batting": 23761, "\u0120Beautiful": 23762, "Lay": 23763, "\u0120unpop": 23764, "vable": 23765, "\u0120riots": 23766, "\u0120Tactics": 23767, "baugh": 23768, "\u0120Cock": 23769, "\u0120orgasm": 23770, "\u0120Sas": 23771, "\u0120constructor": 23772, "etz": 23773, "Gov": 23774, "\u0120antagon": 23775, "\u0120theat": 23776, "\u0120deeds": 23777, "hao": 23778, "cuts": 23779, "\u0120McCl": 23780, "\u0120um": 23781, "\u0120Scientists": 23782, "\u0120grassroots": 23783, "yssey": 23784, "\"]=>": 23785, "\u0120surfaced": 23786, "\u0120shades": 23787, "\u0120neighbours": 23788, "\u0120advertis": 23789, "oya": 23790, "\u0120merged": 23791, "Upon": 23792, "\u0120gad": 23793, "\u0120anticipate": 23794, "Anyway": 23795, "\u0120slogan": 23796, "\u0120disrespect": 23797, "Iran": 23798, "\u0120TB": 23799, "acted": 23800, "\u0120subpoen": 23801, "mediately": 23802, "OOOO": 23803, "\u0120waiver": 23804, "\u0120vulnerabilities": 23805, "ottesville": 23806, "\u0120Huffington": 23807, "Josh": 23808, "\u0120DH": 23809, "Monday": 23810, "\u0120Ellen": 23811, "Know": 23812, "xon": 23813, "items": 23814, "228": 23815, "\u0120fills": 23816, "\u0120Nike": 23817, "\u0120cumulative": 23818, "andals": 23819, "Ir": 23820, "\u0120\u00ec": 23821, "\u0120friction": 23822, "igator": 23823, "\u0120scans": 23824, "\u0120Vienna": 23825, "ldom": 23826, "\u0120performers": 23827, "Prim": 23828, "\u0120bidding": 23829, "Mur": 23830, "\u0120leaned": 23831, "\u0120Prix": 23832, "alks": 23833, "\u0120[\u00e2\u0122\u00a6]": 23834, "\u0120Twitch": 23835, "\u0120Developer": 23836, "\u0120Gir": 23837, "\u0120callback": 23838, "Abstract": 23839, "\u0120accustomed": 23840, "\u0120freedoms": 23841, "\u0120PG": 23842, "uracy": 23843, "\u0120lump": 23844, "isman": 23845, ",,,,": 23846, "1992": 23847, "\u0120RED": 23848, "\u0120worm": 23849, "Match": 23850, "\u0120Platinum": 23851, "IJ": 23852, "\u0120Owner": 23853, "Trivia": 23854, "compl": 23855, "\u0120newborn": 23856, "\u0120fantas": 23857, "Own": 23858, "\u01201959": 23859, "\u0120sympath": 23860, "\u0120ubiqu": 23861, "\u0120outputs": 23862, "\u0120allev": 23863, "\u0120prag": 23864, "Kevin": 23865, "\u0120favors": 23866, "\u0120burial": 23867, "\u0120nurt": 23868, "solete": 23869, "cache": 23870, "\u0120156": 23871, "\u0120unlocks": 23872, "techn": 23873, "Making": 23874, "\u0120conquer": 23875, "adic": 23876, "\u00e6\u0138": 23877, "\u0120elf": 23878, "\u0120electorate": 23879, "\u0120Kurds": 23880, "\u0120Stack": 23881, "\u0120Samurai": 23882, "\u0120\u00e2\u013a\u0127": 23883, "\u0120{}": 23884, "\u0120Said": 23885, "\u0120Fallout": 23886, "\u0120kindness": 23887, "\u0120Customs": 23888, "\u0120Boulevard": 23889, "\u0120helicopters": 23890, "otics": 23891, "\u0120Veget": 23892, "comment": 23893, "\u0120criticised": 23894, "\u0120polished": 23895, "\u0120Remix": 23896, "\u0120Cultural": 23897, "\u0120recons": 23898, "\u0120doi": 23899, "atem": 23900, "Screen": 23901, "\u0120barred": 23902, "Comments": 23903, "\u0120Generally": 23904, "\u0120slap": 23905, "720": 23906, "Vari": 23907, "pine": 23908, "\u0120empt": 23909, "\u0120hats": 23910, "\u0120Playing": 23911, "lab": 23912, "average": 23913, "forms": 23914, "\u0120Cotton": 23915, "\u0120cans": 23916, "\u0120DON": 23917, "\u0120Somalia": 23918, "Crypt": 23919, "\u0120Increases": 23920, "Ever": 23921, "modern": 23922, "\u0120surgeon": 23923, "3000": 23924, "\u0120randomized": 23925, "================================================================": 23926, "Bern": 23927, "impl": 23928, "\u0120COR": 23929, "\u0120proclaim": 23930, "thouse": 23931, "\u0120toes": 23932, "\u0120ample": 23933, "\u0120preserving": 23934, "\u0120disbel": 23935, "grand": 23936, "Besides": 23937, "\u0120silk": 23938, "\u0120Pattern": 23939, "hm": 23940, "\u0120enterprises": 23941, "\u0120affidavit": 23942, "\u0120Advisory": 23943, "\u0120advertised": 23944, "\u0120Religious": 23945, "sections": 23946, "psych": 23947, "\u0120Fields": 23948, "aways": 23949, "\u0120hashtag": 23950, "\u0120Nightmare": 23951, "\u0120vampire": 23952, "\u0120forensic": 23953, "rossover": 23954, "nar": 23955, "\u0120navy": 23956, "\u0120vacant": 23957, "\u0120Duel": 23958, "\u0120hallway": 23959, "\u0120facebook": 23960, "identally": 23961, "\u0120NRA": 23962, "\u0120matt": 23963, "\u0120hurricane": 23964, "\u0120Kirby": 23965, "\u0120Puzzle": 23966, "\u0120skirt": 23967, "oust": 23968, "dullah": 23969, "\u0120analogy": 23970, "inion": 23971, "\u0120tomatoes": 23972, "\u0120NV": 23973, "\u0120Peak": 23974, "\u0120Meyer": 23975, "\u0120appointments": 23976, "\u0120masc": 23977, "\u0120alley": 23978, "rehend": 23979, "\u0120charities": 23980, "\u0120undo": 23981, "\u0120destinations": 23982, "\u0120Testing": 23983, "\"></": 23984, "\u0120destined": 23985, "\u0120implements": 23986, "\u0120Harold": 23987, "RECT": 23988, "\u0120optimization": 23989, "\u0120kilometres": 23990, "\u0120cmd": 23991, "\u0120impairment": 23992, "\u0120unsuccessful": 23993, "\u0120swiftly": 23994, "\u0120Glasgow": 23995, "arten": 23996, "\u0120Shares": 23997, "\u0120Answer": 23998, "\u0120Album": 23999, "\u0120nutritional": 24000, "\u00e3\u0125\u0138": 24001, "\u0120Fut": 24002, "\u0120bloc": 24003, "\u0120NFC": 24004, "\u0120wholesale": 24005, "\u0120CW": 24006, "\u0120neglected": 24007, "\u0120launcher": 24008, "\u0120announcements": 24009, "OULD": 24010, "comb": 24011, "\u0120rotating": 24012, "\u0120rests": 24013, "\u0120Ticket": 24014, "chedel": 24015, "Lou": 24016, "\u0120Vic": 24017, "\u0120\"'": 24018, "\u0120templates": 24019, "\u0120replaces": 24020, "Arc": 24021, "::::": 24022, "\u0120Gilbert": 24023, "\u0120illnesses": 24024, "\u0120schedules": 24025, "\u0120heterosexual": 24026, "LINE": 24027, "\u0120herein": 24028, "\u0120coerc": 24029, "\u0120decreasing": 24030, "\u0120deportation": 24031, "sudo": 24032, "\u0120Indigenous": 24033, "\u0120weighs": 24034, "Along": 24035, "');": 24036, "\u0120Bengals": 24037, "707": 24038, "\u0120joints": 24039, "verts": 24040, "\u0120149": 24041, "naire": 24042, "\u0120simplest": 24043, "\u0120lore": 24044, "1080": 24045, "fiction": 24046, "\u0120Database": 24047, "\u0120reservation": 24048, "\u0120sou": 24049, "\u0120sanctuary": 24050, "audio": 24051, "aple": 24052, "\u0120vegetarian": 24053, "\u0120anticipation": 24054, "micro": 24055, "\u0120enduring": 24056, "\u0120departed": 24057, "\u0120sidewalk": 24058, "\u0120prohibits": 24059, "\u0120Font": 24060, "\u0120compute": 24061, "\u0120Sect": 24062, "\u0120158": 24063, "Battle": 24064, "\u0120bomber": 24065, "\u0120distraction": 24066, "\u0120endured": 24067, "\u0120practitioners": 24068, "\u0120disturbed": 24069, "\u0120drank": 24070, "ordered": 24071, "\u0120surprises": 24072, "seat": 24073, "Security": 24074, "\u0120Wisdom": 24075, "ogo": 24076, "\u0120subparagraph": 24077, "\u0120Peninsula": 24078, "\u0120Origins": 24079, "iren": 24080, "\u0120Pav": 24081, "iggle": 24082, "\u0120gratitude": 24083, "\u0120Gravity": 24084, "overty": 24085, "iman": 24086, "ctr": 24087, "\u0120Caesar": 24088, "could": 24089, "gem": 24090, "\u0120skies": 24091, "\u0120champ": 24092, "\u0120agreeing": 24093, "Family": 24094, "Div": 24095, "176": 24096, "\u0120messy": 24097, "umption": 24098, "Federal": 24099, "erno": 24100, "\u0120Chat": 24101, "Beyond": 24102, "\u0120devote": 24103, "\u0120Walsh": 24104, "\u0120dumped": 24105, "\u0120accumulation": 24106, "stad": 24107, "hibition": 24108, "\u0120smokers": 24109, "\u0120inspector": 24110, "French": 24111, "issan": 24112, "\u0120Vita": 24113, "\u0120researching": 24114, "RAM": 24115, "\u0120Celtics": 24116, "\u0120cloak": 24117, "\u0120Terra": 24118, "Mary": 24119, "sold": 24120, "\u0120DOM": 24121, "mods": 24122, "Intel": 24123, "\u0120multitude": 24124, "\u0120Improved": 24125, "\u0120reliance": 24126, "\u0120artifact": 24127, "\u0120alarming": 24128, "Prom": 24129, "hon": 24130, "TION": 24131, "medium": 24132, "\u0120reflex": 24133, "\u0120Excel": 24134, "\u0120weakened": 24135, "163": 24136, "224": 24137, "\u0120costumes": 24138, "\u0120uniquely": 24139, "\u0120sorrow": 24140, "\u0120mansion": 24141, "wp": 24142, "\u0120salv": 24143, "\u0120Grove": 24144, "bsp": 24145, "\u0120Sniper": 24146, "\u0120Shipping": 24147, "\u0120POW": 24148, "\u0120undis": 24149, "\u0120branding": 24150, "Girl": 24151, "\u0120Ahmad": 24152, "\u0120Lakes": 24153, "\u0120Corey": 24154, "\u0120inheritance": 24155, "enery": 24156, "\u0120packing": 24157, "\u0120Prest": 24158, "Dest": 24159, "FW": 24160, "\u0120regulator": 24161, "locked": 24162, "\u0120contested": 24163, "\u0120Melissa": 24164, "\u0120Duc": 24165, "\u0120unpopular": 24166, "\u0120stacked": 24167, "\u01201917": 24168, "\u0120yearly": 24169, "\u0120stare": 24170, "\u0120assessing": 24171, "\u00c3\u00b8": 24172, "\u0120beverages": 24173, "\u0120competitions": 24174, "\u0120strengthening": 24175, "along": 24176, "\u0120Lud": 24177, "\u0120melted": 24178, "stanbul": 24179, "\u0120bounty": 24180, "ENC": 24181, "\u0120Lands": 24182, "\u0120declares": 24183, "\u0120customize": 24184, "\u0120composite": 24185, "\u00e3\u0125\u00ac": 24186, "CM": 24187, "ographics": 24188, "\u0120Temp": 24189, "\u0120contender": 24190, "\u0120insign": 24191, "\u0120LAN": 24192, "\u0120disasters": 24193, "inspired": 24194, "\u0120judgments": 24195, "ustainable": 24196, "ursion": 24197, "\u0120variance": 24198, "\u0120Ultimately": 24199, "\u0120--------": 24200, "uador": 24201, "\u0120RX": 24202, "\u0120melting": 24203, "\u0120Extended": 24204, "\u0120Twe": 24205, "Major": 24206, "\u0120Bil": 24207, "\u0120syrup": 24208, "quick": 24209, "\u0120Holder": 24210, "\u0120innocence": 24211, "ULE": 24212, "\u0120Might": 24213, "9999": 24214, "\u0120fal": 24215, "\u0120continuity": 24216, "\u01201953": 24217, "\u0120BS": 24218, "still": 24219, "Lat": 24220, "\u0120Abuse": 24221, "\u0120unsupported": 24222, "xxxxxxxx": 24223, "\u0120institute": 24224, "\u0120fragment": 24225, "\u0120Pep": 24226, "Western": 24227, "\u0120Cause": 24228, "\u0120Frag": 24229, "\u0120Ars": 24230, "\u00e0\u00a5": 24231, "astics": 24232, "\u0120bishop": 24233, "\u0120crosses": 24234, "\u0120154": 24235, "\u0120Upgrade": 24236, "\u0120mitigate": 24237, "\u0120Raymond": 24238, "Mods": 24239, "\u0120tomato": 24240, "\u0120stumbled": 24241, "\u0120differs": 24242, "Initial": 24243, "\u0120Raspberry": 24244, "\u0120ignores": 24245, "\u0120tant": 24246, "\u00c3\u0142": 24247, "\u0120relay": 24248, "\u0120bisexual": 24249, "\u0120confession": 24250, "\u0120dement": 24251, "inas": 24252, "\u0120Heather": 24253, "platform": 24254, "driving": 24255, "bourg": 24256, "\u0120Mush": 24257, "\u0120hyster": 24258, "Details": 24259, "\u0120drift": 24260, "\u0120Wald": 24261, "\u0120Luckily": 24262, "orf": 24263, "\u0120expire": 24264, "\u0120Punch": 24265, "zyme": 24266, "gold": 24267, "\u0120unpaid": 24268, "\u0120Trent": 24269, "\u0120unarmed": 24270, "\u0120illicit": 24271, "\u0120Tottenham": 24272, "\u0120smash": 24273, "International": 24274, "inker": 24275, "\u0120sting": 24276, "\u0120Saddam": 24277, "\u0120ART": 24278, "\u0120truths": 24279, "birth": 24280, "\u0120sober": 24281, "\u0120Nit": 24282, "\u0120ib": 24283, "\u0120usable": 24284, "\u0120stacks": 24285, "\u0120Sylv": 24286, "\u0120northeast": 24287, "\u0120domination": 24288, "\u0120Mour": 24289, "ENSE": 24290, "\u0120Measure": 24291, "\u0120programmer": 24292, "\u0120<-": 24293, "182": 24294, "\u0120Condition": 24295, "\u0120backyard": 24296, "irling": 24297, "\u0120Jeb": 24298, "\u0120Creed": 24299, "\u0120Hang": 24300, "\u0120COMP": 24301, "FER": 24302, "\u0120Ish": 24303, "\u0120detectives": 24304, "---------------": 24305, "\u0120Messenger": 24306, "\u0120looph": 24307, "\u0120gateway": 24308, "151": 24309, "\u0120Materials": 24310, "\u0120DT": 24311, "\u0120doomed": 24312, "odo": 24313, "\u0120slices": 24314, "\u0120emailed": 24315, "\u0120Perl": 24316, "\u0120renov": 24317, "UTH": 24318, "odynam": 24319, "\u0120Southwest": 24320, "getic": 24321, "\u0120TPP": 24322, "\u0120optimism": 24323, "\u0120Tow": 24324, "ulators": 24325, "protected": 24326, "yles": 24327, "\u00c2\u00ab": 24328, "\u0120exile": 24329, "env": 24330, "Prop": 24331, "\u0120Zimmerman": 24332, "\u00d9\u0130": 24333, "Ca": 24334, "omaly": 24335, "\u00e3\u0125\u0128": 24336, "\u0120railroad": 24337, "Lee": 24338, "232": 24339, "\u0120replicate": 24340, "\u0120comfortably": 24341, "actly": 24342, "\u0120rav": 24343, "\u0120telescope": 24344, "\u0120honesty": 24345, "\u0120Pepper": 24346, "\u0120Bring": 24347, "\u0120richest": 24348, "\u0120outdoors": 24349, "\u0120halls": 24350, "\u0120contend": 24351, "ISE": 24352, "\u0120submitting": 24353, "\u0120naive": 24354, "arations": 24355, "\u0120143": 24356, "\u0120poised": 24357, "responsible": 24358, "\u0120socks": 24359, "\u0120Skull": 24360, "Question": 24361, "\u0120discoveries": 24362, "Joined": 24363, "\u0120Enemies": 24364, "\u0120Wireless": 24365, "\u0120Revenge": 24366, "\u0120puzzles": 24367, "\u0120ceased": 24368, "290": 24369, "criptions": 24370, "\u0120Console": 24371, "\u0120boiling": 24372, "\u0120discrep": 24373, "\u0120deduction": 24374, "\u0120arsenal": 24375, "XXXX": 24376, "\u0120Amsterdam": 24377, "roximately": 24378, "\u0120Shane": 24379, "\u0120posing": 24380, "\u0120ACLU": 24381, "\u0120Companies": 24382, "\u0120theology": 24383, "\u0120Ug": 24384, "quarter": 24385, "\u0120Hank": 24386, "Coin": 24387, "\u0120Lv": 24388, "\u0120allegation": 24389, "\u0120Avoid": 24390, "\u0120indefinitely": 24391, "\u0120commodities": 24392, "\u0120brig": 24393, "\u0120Manit": 24394, "\u0120tenth": 24395, "method": 24396, "\u0120Knicks": 24397, "\u0120\u00e2\u0122\u0130": 24398, "\u0120invoked": 24399, "Dial": 24400, "ARA": 24401, "\u0120caucus": 24402, "227": 24403, "\u0120Jab": 24404, "\u0120ounces": 24405, "bay": 24406, "\u0120buddy": 24407, "fan": 24408, "234": 24409, "\u0120Hil": 24410, "adh": 24411, "\u0120TY": 24412, "\u0120IND": 24413, "\u01201939": 24414, "\u0120iteration": 24415, "\u0120Gonzalez": 24416, "\u0120Vert": 24417, "\u0120IO": 24418, "emb": 24419, "rera": 24420, "ench": 24421, "\u0120Requirements": 24422, "\u0120Wins": 24423, "\u0120livestock": 24424, "hours": 24425, "\"\u00e2\u0122\u00a6": 24426, "bral": 24427, "Marg": 24428, "\u0120Done": 24429, "\u0120wasting": 24430, "inged": 24431, "groups": 24432, "\u0120wishing": 24433, "\u0120Tumblr": 24434, "\u0120tapping": 24435, "\u0120nationalism": 24436, "\u0120Byr": 24437, "\u0120squares": 24438, "\u0120Actions": 24439, "\u00e3\u0125\u00a5": 24440, "Inside": 24441, "debug": 24442, "\u0120append": 24443, "\u0120stubborn": 24444, "\u0120Cind": 24445, "Tell": 24446, "\u0120tearing": 24447, "\u0120Rey": 24448, "orc": 24449, "\u0120Dayton": 24450, "\u0120NH": 24451, "\u0120Madness": 24452, "Charl": 24453, "\u0120Morrison": 24454, "filter": 24455, "\u0120accuse": 24456, "\u0120./": 24457, "\u0120torrent": 24458, "\u0120declines": 24459, "gallery": 24460, "Mine": 24461, "\u0120negotiation": 24462, "\u0120Bashar": 24463, "opia": 24464, "1993": 24465, "emort": 24466, "\u0120Novel": 24467, "\u0120Fang": 24468, "ersive": 24469, "\u0120Instant": 24470, "\u0120roller": 24471, "Around": 24472, "\u0120Elections": 24473, "Games": 24474, "\u0120inexpensive": 24475, "\u0120wors": 24476, "\u0120vul": 24477, "\u0120Hole": 24478, "\u0120unbelievable": 24479, "\u0120nause": 24480, "\u0120entr": 24481, "boat": 24482, "\u0120STE": 24483, "\u0120bush": 24484, "\u0120Hassan": 24485, "\u0120wo": 24486, "\u0120paused": 24487, "\u0120Mig": 24488, "lived": 24489, "\u0120scout": 24490, "\u0120lith": 24491, "Published": 24492, "duino": 24493, "cool": 24494, "\u0120circulating": 24495, "idas": 24496, "\u0120Pam": 24497, "violent": 24498, "\u0120Crawford": 24499, "uddle": 24500, "\u0120Letters": 24501, "Guard": 24502, "morph": 24503, "\u0120wandering": 24504, "\u0120sophomore": 24505, "\u0120queer": 24506, "\u0120Blind": 24507, "rue": 24508, "\u0120Marriage": 24509, "Dom": 24510, "\u0120padding": 24511, "\u0120folders": 24512, "\u0120meaningless": 24513, "\u0120candidacy": 24514, "afort": 24515, "\u0120whistlebl": 24516, "\u0120Identified": 24517, "\u0120cigar": 24518, "\u0120hid": 24519, "\u0120Dubai": 24520, "\u0120posture": 24521, "\u0120hiking": 24522, "\u0120Terminal": 24523, "Legendary": 24524, "\u0120TP": 24525, "\u0120ATK": 24526, "\u0120Starbucks": 24527, "\u0120Riot": 24528, "1991": 24529, "\u0120Bottom": 24530, "effic": 24531, "\u0120Eugene": 24532, "\u0120Wyoming": 24533, "\u0120Rocky": 24534, "\u0120salmon": 24535, "\u0120metro": 24536, "\u0120bilateral": 24537, "\u0120celebrates": 24538, "Length": 24539, "billion": 24540, "Bat": 24541, "\u0120releg": 24542, "\u0120pseudo": 24543, "DT": 24544, "\u0120Rhode": 24545, "Parent": 24546, "pletion": 24547, "\u0120attribut": 24548, "\u0120tuning": 24549, "\u0120NOTE": 24550, "\u0120Rebel": 24551, "icus": 24552, "Fund": 24553, "\u0120cocktail": 24554, "\u0120501": 24555, "\u0120spoon": 24556, "\u0120brutality": 24557, "\u0120unite": 24558, "\u0120microbi": 24559, "\u0120Reich": 24560, "positive": 24561, "\u0120amazed": 24562, "\u0120NT": 24563, "Desc": 24564, "ECTION": 24565, "\u0120falsely": 24566, "\u0120Highlander": 24567, "\u0120Crist": 24568, "\u0120Victorian": 24569, "\u0120distributions": 24570, "their": 24571, "\u0120Einstein": 24572, "\u0120pod": 24573, "\u0120epidem": 24574, "\u0120heap": 24575, "\u0120Ranch": 24576, "\u0120anthem": 24577, "\u0120reapp": 24578, "\u0120Auburn": 24579, "\u0120concurrent": 24580, "\u0120Throughout": 24581, "\u0120POST": 24582, "\u00e2\u013a": 24583, "\u0120homemade": 24584, "kick": 24585, "Beg": 24586, "\u0120chassis": 24587, "counter": 24588, "\u0120merger": 24589, "\u0120laps": 24590, "217": 24591, "union": 24592, "\u0120Trigger": 24593, "\u0120debated": 24594, "\u0120silently": 24595, "\u0120restraint": 24596, "Bal": 24597, "0000000": 24598, "\u0120formidable": 24599, "\u0120Filip": 24600, "\u0120sacrifices": 24601, "Food": 24602, "\u0120dwarf": 24603, "\u0120Sequ": 24604, "inian": 24605, "Moreover": 24606, "\u0120tangible": 24607, "opsis": 24608, "\u0120Minecraft": 24609, "\u0120Registration": 24610, "oan": 24611, "\u0120representations": 24612, "\u0120thirst": 24613, "\u0120corp": 24614, "irement": 24615, "Made": 24616, "loe": 24617, ">\"": 24618, "cats": 24619, "*.": 24620, "\u0120gestures": 24621, "general": 24622, "League": 24623, "\u0120packets": 24624, "\u0120Inspector": 24625, "\u0120Berg": 24626, "\u0120fraudulent": 24627, "\u0120criticize": 24628, "Fun": 24629, "\u0120blaming": 24630, "ndra": 24631, "\u0120slash": 24632, "\u0120Eston": 24633, "\u0120proposing": 24634, "\u0120whales": 24635, "\u0120therapist": 24636, "\u0120subset": 24637, "\u0120leisure": 24638, "ELD": 24639, "\u0120CVE": 24640, "\u0120Activity": 24641, "\u0120culmin": 24642, "shop": 24643, "\u0120DAY": 24644, "ischer": 24645, "\u0120Admiral": 24646, "\u0120Attacks": 24647, "\u01201958": 24648, "\u0120memoir": 24649, "\u0120folded": 24650, "\u0120sexist": 24651, "\u0120153": 24652, "\u0120LI": 24653, "\u0120readings": 24654, "\u0120embarrassment": 24655, "\u0120Employment": 24656, "wart": 24657, "chin": 24658, "\u0120continuation": 24659, "lia": 24660, "Recently": 24661, "\u0120duel": 24662, "\u0120evacuation": 24663, "\u0120Kashmir": 24664, "\u0120disposition": 24665, "\u0120Rig": 24666, "\u0120bolts": 24667, "\u0120insurers": 24668, "467": 24669, "Mex": 24670, "\u0120retaliation": 24671, "\u0120misery": 24672, "\u0120unreasonable": 24673, "raining": 24674, "Imm": 24675, "\u0120PU": 24676, "emer": 24677, "\u0120genital": 24678, "\u00e3\u0124\u00b3": 24679, "\u0120Candy": 24680, "\u0120onions": 24681, "\u0120Patt": 24682, "liner": 24683, "\u0120conceded": 24684, "\u0120fa": 24685, "\u0120forc": 24686, "\u0120Hernandez": 24687, "\u0120Geoff": 24688, "debian": 24689, "\u0120Teams": 24690, "\u0120cries": 24691, "\u0120homeowners": 24692, "237": 24693, "ABC": 24694, "\u0120stitch": 24695, "\u0120statistic": 24696, "\u0120headers": 24697, "\u0120Biology": 24698, "\u0120motors": 24699, "\u0120GEN": 24700, "\u0120Lip": 24701, "\u0120hates": 24702, "\u0120heel": 24703, "Self": 24704, "ipl": 24705, "EDIT": 24706, "orting": 24707, "\u0120annot": 24708, "\u0120Speech": 24709, "oldemort": 24710, "\u0120Javascript": 24711, "\u0120LeBron": 24712, "\u0120footprint": 24713, "\u0120fn": 24714, "\u0120seizures": 24715, "nas": 24716, "hide": 24717, "\u01201954": 24718, "\u0120Bee": 24719, "\u0120Declaration": 24720, "\u0120Katie": 24721, "\u0120reservations": 24722, "NR": 24723, "female": 24724, "\u0120saturated": 24725, "\u0120biblical": 24726, "\u0120trolls": 24727, "Device": 24728, "photos": 24729, "\u0120drums": 24730, "\u00e3\u0125\u012b\u00e3\u0125\u00a9\u00e3\u0124\u00b4\u00e3\u0125\u00b3": 24731, "Night": 24732, "fighter": 24733, "\u0120Hak": 24734, "riber": 24735, "\u0120cush": 24736, "\u0120disciplinary": 24737, "baum": 24738, "\u0120GH": 24739, "\u0120Schmidt": 24740, "ilibrium": 24741, "\u0120sixty": 24742, "\u0120Kushner": 24743, "rots": 24744, "\u0120pund": 24745, "\u0120Rac": 24746, "\u0120springs": 24747, "\u0120conve": 24748, "Business": 24749, "Fall": 24750, "\u0120qualifications": 24751, "\u0120verses": 24752, "\u0120narciss": 24753, "\u0120Koh": 24754, "\u0120Wow": 24755, "\u0120Charlottesville": 24756, "edo": 24757, "\u0120interrogation": 24758, "\u0120Wool": 24759, "365": 24760, "Brian": 24761, "\u0120\u00e2\u013e\u0135": 24762, "\u0120alleges": 24763, "onds": 24764, "idation": 24765, "\u0120Jackie": 24766, "yu": 24767, "\u0120lakes": 24768, "\u0120worthwhile": 24769, "\u0120crystals": 24770, "\u0120Juda": 24771, "\u0120comprehend": 24772, "\u0120flush": 24773, "\u0120absorption": 24774, "\u0120OC": 24775, "\u0120frightened": 24776, "\u0120Chocolate": 24777, "Martin": 24778, "\u0120buys": 24779, "\u0120bucks": 24780, "\u0120appell": 24781, "\u0120Championships": 24782, "\u0120listener": 24783, "\u0120Defensive": 24784, "\u0120cz": 24785, "uds": 24786, "\u0120Mate": 24787, "\u0120replay": 24788, "\u0120decorated": 24789, "\u0120sunk": 24790, "\u0120VIP": 24791, "\u0120Ank": 24792, "\u0120195": 24793, "aaaa": 24794, "Nobody": 24795, "\u0120Milk": 24796, "\u0120Gur": 24797, "\u0120Mk": 24798, "\u0120Sara": 24799, "\u0120seating": 24800, "\u0120Wid": 24801, "Track": 24802, "\u0120employs": 24803, "\u0120gigantic": 24804, "APP": 24805, "\u00e3\u0124\u00a7": 24806, "inventory": 24807, "\u0120towel": 24808, "atche": 24809, "lasting": 24810, "\u0120TL": 24811, "\u0120latency": 24812, "\u0120kne": 24813, "Ber": 24814, "meaning": 24815, "\u0120upheld": 24816, "\u0120playground": 24817, "\u0120mant": 24818, "Side": 24819, "\u0120stereo": 24820, "\u0120northwest": 24821, "\u0120exceptionally": 24822, "\u0120rays": 24823, "\u0120recurring": 24824, "Drive": 24825, "\u0120upright": 24826, "\u0120abduct": 24827, "\u0120Marathon": 24828, "\u0120goodbye": 24829, "\u0120alphabet": 24830, "hp": 24831, "\u0120courtroom": 24832, "rington": 24833, "othing": 24834, "Tag": 24835, "\u0120diplomats": 24836, "\u0120barbar": 24837, "\u0120Aqua": 24838, "183": 24839, "3333": 24840, "\u0120maturity": 24841, "\u0120instability": 24842, "\u0120Apache": 24843, "\u0120===": 24844, "\u0120fasting": 24845, "\u0120Grid": 24846, "ModLoader": 24847, "\u0120152": 24848, "Abs": 24849, "\u0120Operating": 24850, "etti": 24851, "\u0120acquaint": 24852, "Donnell": 24853, "\u0120Kem": 24854, "\u0120Forge": 24855, "\u0120armored": 24856, "Mil": 24857, "\u0120philosophers": 24858, "invest": 24859, "Players": 24860, "\u00e2\u012a": 24861, "\u0120myriad": 24862, "\u0120comrades": 24863, "Rot": 24864, "\u0120remembering": 24865, "\u0120corresponds": 24866, "\u0120programmers": 24867, "\u0120Lynn": 24868, "\u0120olig": 24869, "\u0120coherent": 24870, "ynchron": 24871, "\u0120Chemical": 24872, "\u0120jugg": 24873, "pair": 24874, "posts": 24875, "Eye": 24876, "\u0120Inner": 24877, "\u0120semester": 24878, "ottest": 24879, "\u0120Emirates": 24880, "ricanes": 24881, "orously": 24882, "mits": 24883, "\u0120Wis": 24884, "\u0120dodge": 24885, "location": 24886, "\u0120faded": 24887, "Amazon": 24888, "\u0120Proceed": 24889, "\u0120INFO": 24890, "journal": 24891, "\u0120Truck": 24892, "Ten": 24893, "\u0120217": 24894, "\u0120statutes": 24895, "mobile": 24896, "\u0120Types": 24897, "Recomm": 24898, "buster": 24899, "pex": 24900, "\u0120legends": 24901, "\u0120headache": 24902, "faced": 24903, "\u0120WiFi": 24904, "ifty": 24905, "\u0120HER": 24906, "\u0120circuits": 24907, "ERROR": 24908, "226": 24909, "olin": 24910, "\u0120cylinder": 24911, "ospace": 24912, "ikers": 24913, "Prem": 24914, "Quant": 24915, "\u0120conflicting": 24916, "\u0120slightest": 24917, "\u0120forged": 24918, "ionage": 24919, "Stephen": 24920, "\u0120Kub": 24921, "\u0120Opportun": 24922, "\u0120Heal": 24923, "\u0120blo": 24924, "\u0120rulers": 24925, "\u0120huh": 24926, "\u0120submarine": 24927, "fy": 24928, "asser": 24929, "\u0120allowance": 24930, "\u0120Kasich": 24931, "\u0120Tas": 24932, "\u0120Australians": 24933, "ForgeModLoader": 24934, "\u0120\u00e2\u0128\u0133": 24935, "\u0120Matrix": 24936, "amins": 24937, "\u01201200": 24938, "\u0120Acqu": 24939, "236": 24940, "Document": 24941, "\u0120Breaking": 24942, "193": 24943, "\u0120Subst": 24944, "\u0120Roller": 24945, "\u0120Properties": 24946, "\u0120NI": 24947, "tier": 24948, "\u0120crushing": 24949, "\u0120advocating": 24950, "Furthermore": 24951, "keepers": 24952, "\u0120sexism": 24953, "xd": 24954, "\u0120caller": 24955, "\u0120Sense": 24956, "chieve": 24957, "\u0120TF": 24958, "\u0120fueled": 24959, "\u0120reminiscent": 24960, "\u0120obsess": 24961, "urst": 24962, "\u0120uphold": 24963, "\u0120Fans": 24964, "hetics": 24965, "\u0120\u00e2\u0139": 24966, "\u0120Bath": 24967, "\u0120beverage": 24968, "\u0120oscill": 24969, "254": 24970, "\u0120poles": 24971, "\u0120gradual": 24972, "\u0120exting": 24973, "\u0120Suff": 24974, "\u0120Suddenly": 24975, "\u0120liking": 24976, "\u01201949": 24977, "unciation": 24978, "amination": 24979, "\u0120Omar": 24980, "\u0120LV": 24981, "\u0120Consequently": 24982, "\u0120synthes": 24983, "\u0120GIF": 24984, "\u0120pains": 24985, "\u0120interacting": 24986, "uously": 24987, "incre": 24988, "\u0120rumor": 24989, "\u0120Scientology": 24990, "197": 24991, "\u0120Zig": 24992, "\u0120spelling": 24993, "\u0120ASS": 24994, "\u0120extingu": 24995, "mson": 24996, "\u0120gh": 24997, "\u0120remarked": 24998, "\u0120Strategic": 24999, "\u0120MON": 25000, "\u00e5\u00a5": 25001, "gae": 25002, "\u0120WHAT": 25003, "Eric": 25004, "\u0120Campus": 25005, "\u0120methane": 25006, "\u0120imagin": 25007, "JUST": 25008, "\u0120Alm": 25009, "XT": 25010, "iq": 25011, "\u0120RSS": 25012, "\u0120wrongdoing": 25013, "atta": 25014, "\u0120bigot": 25015, "\u0120demonstrators": 25016, "\u0120Calvin": 25017, "\u0120Villa": 25018, "\u0120membrane": 25019, "\u0120Awesome": 25020, "\u0120benefic": 25021, "268": 25022, "\u0120magnificent": 25023, "\u0120Lots": 25024, "Greg": 25025, "\u0120Boris": 25026, "\u0120detainees": 25027, "\u0120Herman": 25028, "\u0120whispered": 25029, "\u0120awe": 25030, "Professor": 25031, "funding": 25032, "\u0120physiological": 25033, "\u0120Destruction": 25034, "\u0120limb": 25035, "\u0120manipulated": 25036, "\u0120bubbles": 25037, "\u0120pseud": 25038, "\u0120hydra": 25039, "\u0120Bristol": 25040, "\u0120stellar": 25041, "\u0120Expansion": 25042, "\u0120Kell": 25043, "\u0120Interestingly": 25044, "\u0120mans": 25045, "\u0120dragging": 25046, "\u0120ecological": 25047, "\u0120Fit": 25048, "\u0120gent": 25049, "\u0120benefited": 25050, "\u0120Haiti": 25051, "\u0120polyg": 25052, "\u00e3\u0125\u0130": 25053, "\u01202030": 25054, "\u0120prow": 25055, "\u0120reconstruction": 25056, "\u0120wast": 25057, "\u0120psychic": 25058, "\u0120Greeks": 25059, "Handler": 25060, "162": 25061, "\u0120Pulse": 25062, "\u0120solicit": 25063, "\u0120sys": 25064, "\u0120influx": 25065, "\u0120Gentle": 25066, "percent": 25067, "\u0120proliferation": 25068, "\u0120taxable": 25069, "\u0120disregard": 25070, "\u0120escaping": 25071, "\u0120ginger": 25072, "\u0120withstand": 25073, "\u0120devastated": 25074, "\u0120Dew": 25075, "series": 25076, "\u0120injected": 25077, "elaide": 25078, "\u0120turnover": 25079, "heat": 25080, "\u013b\u0124": 25081, "Happy": 25082, "\u0120Silent": 25083, "\u00e3\u0124\u0143": 25084, "ivism": 25085, "\u0120irrational": 25086, "AMA": 25087, "\u0120reef": 25088, "rub": 25089, "\u0120162": 25090, "\u0120bankers": 25091, "\u0120Ethics": 25092, "vv": 25093, "\u0120criticisms": 25094, "Kn": 25095, "186": 25096, "Movie": 25097, "\u0120Tories": 25098, "\u0120nood": 25099, "\u0120distortion": 25100, "False": 25101, "odore": 25102, "\u0120tasty": 25103, "Research": 25104, "\u0120UID": 25105, "-)": 25106, "\u0120divorced": 25107, "\u0120MU": 25108, "\u0120Hayes": 25109, "\u0120Isn": 25110, "iani": 25111, "\u0120HQ": 25112, "\u0120\"#": 25113, "ignant": 25114, "\u0120traumatic": 25115, "\u0120Ling": 25116, "Hun": 25117, "\u0120sabot": 25118, "online": 25119, "random": 25120, "\u0120renamed": 25121, "rared": 25122, "KA": 25123, "dead": 25124, "\u00c3\u00a9t": 25125, "\u0120Assistance": 25126, "\u0120seaf": 25127, "++++++++": 25128, "\u0120seldom": 25129, "\u0120Webb": 25130, "\u0120boolean": 25131, "ulet": 25132, "\u0120refrain": 25133, "\u0120DIY": 25134, "rule": 25135, "\u0120shutting": 25136, "\u0120utilizing": 25137, "loading": 25138, "\u0120Param": 25139, "coal": 25140, "ooter": 25141, "\u0120attracting": 25142, "\u0120Dol": 25143, "\u0120hers": 25144, "agnetic": 25145, "\u0120Reach": 25146, "imo": 25147, "\u0120discarded": 25148, "\u0120Pip": 25149, "015": 25150, "\u00c3\u00bcr": 25151, "\u0120mug": 25152, "Imagine": 25153, "COL": 25154, "\u0120cursed": 25155, "\u0120Shows": 25156, "\u0120Curtis": 25157, "\u0120Sachs": 25158, "speaking": 25159, "\u0120Vista": 25160, "\u0120Framework": 25161, "ongo": 25162, "\u0120subreddit": 25163, "\u0120crus": 25164, "\u0120Oval": 25165, "Row": 25166, "growing": 25167, "\u0120installment": 25168, "\u0120glac": 25169, "\u0120Advance": 25170, "ECK": 25171, "\u0120LGBTQ": 25172, "LEY": 25173, "\u0120acet": 25174, "\u0120successive": 25175, "\u0120Nicole": 25176, "\u01201957": 25177, "Quote": 25178, "\u0120circumstance": 25179, "ackets": 25180, "\u0120142": 25181, "ortium": 25182, "\u0120guessed": 25183, "\u0120Frame": 25184, "\u0120perpetrators": 25185, "\u0120Aviation": 25186, "\u0120Bench": 25187, "\u0120handc": 25188, "Ap": 25189, "\u01201956": 25190, "259": 25191, "rand": 25192, "NetMessage": 25193, "din": 25194, "urtles": 25195, "hig": 25196, "\u0120VIII": 25197, "ffiti": 25198, "\u0120Swords": 25199, "bial": 25200, "\u0120kidnapping": 25201, "device": 25202, "\u0120barn": 25203, "\u0120Eli": 25204, "aucas": 25205, "Send": 25206, "Constructed": 25207, "\u0120\u00c2\u00bd": 25208, "\u0120needles": 25209, "\u0120advertisements": 25210, "\u0120vou": 25211, "\u0120exhibited": 25212, "\u0120Fortress": 25213, "Ask": 25214, "Berry": 25215, "TYPE": 25216, "\u0120cancers": 25217, "umping": 25218, "\u0120Territory": 25219, "\u0120prud": 25220, "\u0120nas": 25221, "\u0120atheist": 25222, "\u0120balances": 25223, "\u00e3\u0123\u0141": 25224, "\u0120Shawn": 25225, "&&": 25226, "\u0120landsc": 25227, "\u0120RGB": 25228, "\u0120petty": 25229, "\u0120excellence": 25230, "\u0120translations": 25231, "\u0120parcel": 25232, "\u0120Chev": 25233, "East": 25234, "\u0120Output": 25235, "imi": 25236, "\u0120ambient": 25237, "\u0120Threat": 25238, "\u0120villains": 25239, "\u0120550": 25240, "ICA": 25241, "\u0120taller": 25242, "\u0120leaking": 25243, "cup": 25244, "\u0120polish": 25245, "\u0120infectious": 25246, "\u0120KC": 25247, "\u0120@@": 25248, "background": 25249, "\u0120bureaucracy": 25250, "\u0120Sai": 25251, "unless": 25252, "itious": 25253, "\u0120Skype": 25254, "Atl": 25255, "IDENT": 25256, "008": 25257, "\u0120hypocr": 25258, "\u0120pitchers": 25259, "\u0120guessing": 25260, "\u0120FINAL": 25261, "Between": 25262, "\u0120villagers": 25263, "\u0120252": 25264, "fashion": 25265, "\u0120Tunis": 25266, "Beh": 25267, "\u0120Exc": 25268, "\u0120MID": 25269, "288": 25270, "\u0120Haskell": 25271, "196": 25272, "\u0120NOR": 25273, "\u0120specs": 25274, "\u0120invari": 25275, "\u0120glut": 25276, "\u0120Cars": 25277, "\u0120impulse": 25278, "\u0120honors": 25279, "gel": 25280, "\u0120jurisdictions": 25281, "\u0120Bundle": 25282, "ulas": 25283, "California": 25284, "\u0120Increase": 25285, "\u0120pear": 25286, "\u0120singles": 25287, "\u0120cues": 25288, "\u0120underwent": 25289, "\u0120WS": 25290, "\u0120exaggerated": 25291, "\u0120dubious": 25292, "\u0120flashing": 25293, "LOG": 25294, ")].": 25295, "Journal": 25296, "tg": 25297, "Van": 25298, "\u0120Istanbul": 25299, "\u0120Insp": 25300, "\u0120Franken": 25301, "Draw": 25302, "\u0120sadness": 25303, "\u0120ironic": 25304, "\u0120Fry": 25305, "xc": 25306, "\u0120164": 25307, "isch": 25308, "Way": 25309, "\u0120Protestant": 25310, "horn": 25311, "\u0120unaff": 25312, "\u0120Viv": 25313, "illas": 25314, "\u0120Productions": 25315, "\u0120Hogan": 25316, "\u0120perimeter": 25317, "\u0120Sisters": 25318, "\u0120spontaneous": 25319, "\u0120downside": 25320, "\u0120descendants": 25321, "\u0120orn": 25322, "worm": 25323, "Japanese": 25324, "\u01201955": 25325, "\u0120151": 25326, "\u0120Doing": 25327, "elsen": 25328, "umbles": 25329, "\u0120radically": 25330, "\u0120Drum": 25331, "\u0120Bach": 25332, "\u0120liabilities": 25333, "\u0120OB": 25334, "\u0120Elementary": 25335, "\u0120meme": 25336, "ynes": 25337, "\u0120fingerprint": 25338, "\u0120Grab": 25339, "\u0120undertake": 25340, "Members": 25341, "\u0120Reader": 25342, "\u0120Sims": 25343, "god": 25344, "\u0120hypothetical": 25345, "scient": 25346, "\u0120AJ": 25347, "\u0120charism": 25348, "\u0120admissions": 25349, "\u0120Missile": 25350, "trade": 25351, "\u0120exercising": 25352, "\u0120Background": 25353, "Written": 25354, "\u0120vocals": 25355, "whether": 25356, "\u0120vi": 25357, "\u0120Winner": 25358, "\u0120litter": 25359, "\u0120Shooting": 25360, "STEM": 25361, "\u00e3\u0124\u00a1": 25362, "\u0120AFL": 25363, "\u0120variability": 25364, "\u0120eats": 25365, "\u0120DPS": 25366, "brow": 25367, "\u0120elephants": 25368, "\u0120strat": 25369, "\u0120\u00c5": 25370, "\u0120settlers": 25371, "Matthew": 25372, "\u0120inadvert": 25373, "HI": 25374, "\u0120IMF": 25375, "\u0120Goal": 25376, "\u0120nerves": 25377, "Johnson": 25378, "eye": 25379, "ablishment": 25380, "Thursday": 25381, "BILITY": 25382, "Had": 25383, "amoto": 25384, "hetamine": 25385, "eps": 25386, "\u0120mitochond": 25387, "\u0120compressed": 25388, "\u0120Trevor": 25389, "\u0120Animals": 25390, "Tool": 25391, "Lock": 25392, "\u0120tweak": 25393, "\u0120pinch": 25394, "\u0120cancellation": 25395, "Pot": 25396, "\u0120focal": 25397, "\u0120Astron": 25398, "173": 25399, "\u0120ASC": 25400, "\u0120OTHER": 25401, "umni": 25402, "\u0120demise": 25403, "dl": 25404, "\u00d9\u0127": 25405, "Semitism": 25406, "\u0120cracking": 25407, "\u0120collaborative": 25408, "\u0120explores": 25409, "sql": 25410, "\u0120herbs": 25411, "\u0120configurations": 25412, "mis": 25413, "\u0120Result": 25414, "acey": 25415, "\u0120Smoke": 25416, "\u0120sanct": 25417, "elia": 25418, "\u0120degener": 25419, "\u0120deepest": 25420, "\u0120screamed": 25421, "\u0120nap": 25422, "Software": 25423, "\u0120STAR": 25424, "EF": 25425, "\u0120Xin": 25426, "sponsored": 25427, "manship": 25428, "233": 25429, "\u0120primaries": 25430, "\u0120filtering": 25431, "\u0120assemble": 25432, "mil": 25433, "\u0120Myers": 25434, "bows": 25435, "\u0120punched": 25436, "Mic": 25437, "\u0120innovations": 25438, "\u0120func": 25439, "ando": 25440, "\u0120fracking": 25441, "\u0120Vul": 25442, "\u00d0\u00be\u00d0": 25443, "oshop": 25444, "\u0120Immun": 25445, "\u0120settling": 25446, "\u0120adolescents": 25447, "\u0120rebuilding": 25448, "\u0120transforming": 25449, "\u0120parole": 25450, "\u0120harbor": 25451, "\u0120booking": 25452, "otional": 25453, "ongevity": 25454, "\u0120Yo": 25455, "bug": 25456, "\u0120emerges": 25457, "\u0120Methods": 25458, "\u0120Chu": 25459, "Pres": 25460, "\u0120Dungeons": 25461, "\u0120trailing": 25462, "\u0120Rum": 25463, "\u0120Hugh": 25464, "\u00e5\u00a4\u00a9": 25465, "\u0120Era": 25466, "\u0120Battles": 25467, "Results": 25468, "\u0120Trading": 25469, "\u0120versa": 25470, "css": 25471, "axies": 25472, "heet": 25473, "\u0120greed": 25474, "1989": 25475, "\u0120gardens": 25476, "\u0120contingent": 25477, "Park": 25478, "\u0120Leafs": 25479, "hook": 25480, "robe": 25481, "\u0120diplomacy": 25482, "\u0120Fuel": 25483, "\u0120Invasion": 25484, "\u0120upgrading": 25485, "Male": 25486, "\u0120elic": 25487, "\u0120relentless": 25488, "\u0120Covenant": 25489, "apesh": 25490, "\u0120Trop": 25491, "Ty": 25492, "production": 25493, "arty": 25494, "\u0120punches": 25495, "ako": 25496, "cyclopedia": 25497, "\u0120Rabbit": 25498, "\u0120HDMI": 25499, "\u0120141": 25500, "\u0120foil": 25501, "ItemImage": 25502, "\u0120FG": 25503, "\u0120implementations": 25504, "\u0120Pom": 25505, "ixtures": 25506, "\u0120await": 25507, "\u0120330": 25508, "amus": 25509, "\u0120umbrella": 25510, "\u0120foresee": 25511, "separ": 25512, "\u0120circumcision": 25513, "\u0120peripheral": 25514, "Say": 25515, "\u0120Expert": 25516, "Inc": 25517, "\u0120withdrew": 25518, "\u0120Anders": 25519, "fried": 25520, "\u0120radioactive": 25521, "\u0120Opening": 25522, "\u0120boarding": 25523, "\u0120ND": 25524, "\u0120overthrow": 25525, "Activ": 25526, "WP": 25527, "\u0120Acts": 25528, "\u00d7\u013b": 25529, "\u0120motions": 25530, "vic": 25531, "\u0120Mighty": 25532, "\u0120Defender": 25533, "aer": 25534, "\u0120thankful": 25535, "\u0120Killing": 25536, "\u0120Bris": 25537, "moil": 25538, "\u0120predicting": 25539, "266": 25540, "choice": 25541, "\u0120killers": 25542, "\u0120incub": 25543, "\u0120Chest": 25544, "athering": 25545, "\u0120proclaimed": 25546, "flower": 25547, "ossom": 25548, "umbledore": 25549, "\u0120Cycling": 25550, "\u0120Occupy": 25551, "AGES": 25552, "Pen": 25553, "\u0120Yug": 25554, "\u0120packaged": 25555, "\u0120heightened": 25556, "cot": 25557, "stack": 25558, "Cond": 25559, "\u0120stamps": 25560, "mage": 25561, "\u0120persuaded": 25562, "\u0120ensl": 25563, "\u0120Cardinal": 25564, "\u0120solitary": 25565, "\u0120possessing": 25566, "\u0120Cork": 25567, "\u0120evid": 25568, "\u0120Tay": 25569, "\u0120blues": 25570, "\u0120extremism": 25571, "\u0120lunar": 25572, "\u0120clown": 25573, "Techn": 25574, "\u0120festivals": 25575, "\u0120PvP": 25576, "\u0120Lar": 25577, "\u0120consequently": 25578, "present": 25579, "\u0120someday": 25580, "\u00e7\u0130\u012d": 25581, "\u0120Meteor": 25582, "\u0120touring": 25583, "culture": 25584, "\u0120beaches": 25585, "Ship": 25586, "cause": 25587, "\u0120Flood": 25588, "\u00e3\u0125\u00af": 25589, "\u0120purity": 25590, "those": 25591, "\u0120emission": 25592, "bolt": 25593, "\u0120chord": 25594, "\u0120Scripture": 25595, "Lu": 25596, "\u0120${": 25597, "created": 25598, "Others": 25599, "258": 25600, "\u0120elemental": 25601, "\u0120annoyed": 25602, "\u0120AE": 25603, "dan": 25604, "\u0120Sag": 25605, "Researchers": 25606, "\u0120fairy": 25607, "\u00e2\u0122\u0135\u00e2\u0122\u0135": 25608, "============": 25609, "Smart": 25610, "GGGG": 25611, "\u0120skeletons": 25612, "\u0120pupils": 25613, "linked": 25614, "\u0120urgency": 25615, "enabled": 25616, "\u0120Fuck": 25617, "\u0120councill": 25618, "rab": 25619, "UAL": 25620, "TI": 25621, "\u0120lifes": 25622, "\u0120confessed": 25623, "Bug": 25624, "\u0120harmon": 25625, "\u0120CONFIG": 25626, "\u0120Neutral": 25627, "Double": 25628, "\u0120staple": 25629, "\u0120SHA": 25630, "British": 25631, "\u0120SNP": 25632, "ATOR": 25633, "oco": 25634, "\u0120swinging": 25635, "gex": 25636, "oleon": 25637, "plain": 25638, "\u0120Missing": 25639, "\u0120Trophy": 25640, "vari": 25641, "ranch": 25642, "\u0120301": 25643, "440": 25644, "0000000000000000": 25645, "\u0120restoring": 25646, "\u0120haul": 25647, "ucing": 25648, "nerg": 25649, "\u0120futures": 25650, "\u0120strategist": 25651, "question": 25652, "\u0120lateral": 25653, "\u0120Bard": 25654, "\u0120sor": 25655, "\u0120Rhodes": 25656, "\u0120Downtown": 25657, "?????-": 25658, "\u0120Lit": 25659, "\u0120Bened": 25660, "\u0120coil": 25661, "street": 25662, "\u0120Portal": 25663, "FILE": 25664, "\u0120Gru": 25665, "*,": 25666, "231": 25667, "neum": 25668, "\u0120sucked": 25669, "\u0120rapper": 25670, "\u0120tendencies": 25671, "\u0120Lauren": 25672, "cellaneous": 25673, "267": 25674, "\u0120browse": 25675, "\u0120overc": 25676, "header": 25677, "oise": 25678, "\u0120beet": 25679, "\u0120Gle": 25680, "Stay": 25681, "\u0120mum": 25682, "\u0120typed": 25683, "\u0120discounts": 25684, "Talk": 25685, "\u0120Og": 25686, "existing": 25687, "\u0120Sell": 25688, "uph": 25689, "CI": 25690, "\u0120Austrian": 25691, "\u0120Warm": 25692, "\u0120dismissal": 25693, "\u0120averages": 25694, "camera": 25695, "\u0120allegiance": 25696, "LAN": 25697, "=\"#": 25698, "\u0120commentators": 25699, "\u0120Setting": 25700, "\u0120Midwest": 25701, "\u0120pharmac": 25702, "\u0120EXP": 25703, "\u0120stainless": 25704, "Chicago": 25705, "\u0120tan": 25706, "244": 25707, "\u0120countryside": 25708, "\u0120Vac": 25709, "295": 25710, "\u0120pinned": 25711, "\u0120crises": 25712, "\u0120standardized": 25713, "Task": 25714, "\u0120Jail": 25715, "\u0120Docker": 25716, "colored": 25717, "forth": 25718, "\"},": 25719, "\u0120patrons": 25720, "\u0120spice": 25721, "\u0120mourn": 25722, "\u0120Mood": 25723, "\u0120laundry": 25724, "\u0120equip": 25725, "\u0120Mole": 25726, "yll": 25727, "\u0120THC": 25728, "nation": 25729, "\u0120Sherlock": 25730, "\u0120issu": 25731, "\u0120Kre": 25732, "\u0120Americas": 25733, "\u0120AAA": 25734, "\u0120systematically": 25735, "\u0120contra": 25736, "\u0120Sally": 25737, "\u0120rationale": 25738, "\u0120carriage": 25739, "\u0120peaks": 25740, "\u0120contradiction": 25741, "ensation": 25742, "\u0120Failure": 25743, "\u0120props": 25744, "\u0120namespace": 25745, "\u0120cove": 25746, "fields": 25747, "\u00e3\u0124\u012d": 25748, "\u0120wool": 25749, "\u0120Catch": 25750, "\u0120presumed": 25751, "\u0120Diana": 25752, "ragon": 25753, "igi": 25754, "\u0120hamm": 25755, "\u0120stunt": 25756, "\u0120GUI": 25757, "\u0120Observatory": 25758, "\u0120Shore": 25759, "\u0120smells": 25760, "annah": 25761, "\u0120cockpit": 25762, "\u0120Duterte": 25763, "850": 25764, "\u0120oppressed": 25765, "breaker": 25766, "\u0120Contribut": 25767, "\u0120Peru": 25768, "\u0120Monsanto": 25769, "\u0120Attempt": 25770, "\u0120commanding": 25771, "\u0120fridge": 25772, "\u0120Rin": 25773, "\u0120Chess": 25774, "uality": 25775, "\u0120ol": 25776, "Republican": 25777, "\u0120Glory": 25778, "\u0120WIN": 25779, ".......": 25780, "agent": 25781, "reading": 25782, "\u0120inh": 25783, "Jones": 25784, "\u0120clicks": 25785, "alan": 25786, "\u0120[];": 25787, "\u0120Majesty": 25788, "\u0120Ced": 25789, "opus": 25790, "atel": 25791, "\u00c3\u00aa": 25792, "ARC": 25793, "\u0120Ecuador": 25794, "\u00e3\u0125\u0142": 25795, "\u0120Kuro": 25796, "\u0120rituals": 25797, "\u0120captive": 25798, "\u0120ounce": 25799, "\u0120disagreement": 25800, "\u0120slog": 25801, "fuel": 25802, "Pet": 25803, "Mail": 25804, "\u0120exercised": 25805, "\u0120solic": 25806, "\u0120rainfall": 25807, "\u0120devotion": 25808, "\u0120Assessment": 25809, "\u0120robotic": 25810, "options": 25811, "\u0120RP": 25812, "\u0120Families": 25813, "\u0120Flames": 25814, "\u0120assignments": 25815, "007": 25816, "akedown": 25817, "\u0120vocabulary": 25818, "Reilly": 25819, "\u0120caval": 25820, "gars": 25821, "\u0120suppressed": 25822, "\u0120SET": 25823, "\u0120Johns": 25824, "\u0120warp": 25825, "broken": 25826, "\u0120statues": 25827, "\u0120advocated": 25828, "\u0120275": 25829, "\u0120peril": 25830, "omorph": 25831, "\u0120Femin": 25832, "perfect": 25833, "\u0120hatch": 25834, "Lib": 25835, "512": 25836, "\u0120lifelong": 25837, "313": 25838, "\u0120cheeks": 25839, "\u0120numbered": 25840, "\u0120Mug": 25841, "Body": 25842, "ravel": 25843, "Weight": 25844, "\u0120Jak": 25845, "\u0120Heath": 25846, "\u0120kissing": 25847, "\u0120JUST": 25848, "\u0120waving": 25849, "upload": 25850, "\u0120insider": 25851, "\u0120Progressive": 25852, "\u0120Filter": 25853, "tta": 25854, "\u0120Beam": 25855, "\u0120violently": 25856, "ipation": 25857, "\u0120skepticism": 25858, "\u01201918": 25859, "\u0120Annie": 25860, "\u0120SI": 25861, "\u0120genetics": 25862, "\u0120onboard": 25863, "atl": 25864, "\u0120Friedman": 25865, "\u0120Bri": 25866, "ceptive": 25867, "\u0120pirate": 25868, "\u0120Reporter": 25869, "278": 25870, "\u0120mythology": 25871, "\u0120eclipse": 25872, "\u0120skins": 25873, "\u0120glyph": 25874, "ingham": 25875, "Files": 25876, "Cour": 25877, "women": 25878, "\u0120regimes": 25879, "\u0120photographed": 25880, "Kat": 25881, "\u0120MAX": 25882, "Officials": 25883, "\u0120unexpectedly": 25884, "\u0120impressions": 25885, "Front": 25886, ";;;;;;;;": 25887, "\u0120supremacy": 25888, "\u0120sang": 25889, "\u0120aggravated": 25890, "\u0120abruptly": 25891, "\u0120Sector": 25892, "\u0120excuses": 25893, "\u0120costing": 25894, "idepress": 25895, "Stack": 25896, "\u0120RNA": 25897, "obil": 25898, "\u0120ghosts": 25899, "ldon": 25900, "atibility": 25901, "Topics": 25902, "\u0120reimburse": 25903, "\u0120HM": 25904, "\u0120Deg": 25905, "\u0120thief": 25906, "yet": 25907, "ogenesis": 25908, "leaning": 25909, "\u0120Kol": 25910, "\u0120Basketball": 25911, "\u0120fi": 25912, "\u0120Seeing": 25913, "\u0120recycling": 25914, "\u0120[-": 25915, "Congress": 25916, "\u0120lectures": 25917, "Psy": 25918, "\u0120nep": 25919, "\u0120maid": 25920, "\u0120oriented": 25921, "AX": 25922, "\u0120respectful": 25923, "rene": 25924, "flush": 25925, "\u0120Unloaded": 25926, "request": 25927, "grid": 25928, "\u0120Alternatively": 25929, "\u0120Hugo": 25930, "\u0120decree": 25931, "\u0120Buddhism": 25932, "andum": 25933, "Android": 25934, "\u0120Congo": 25935, "\u0120Joyce": 25936, "\u0120acknowledging": 25937, "hesive": 25938, "\u0120Tomorrow": 25939, "\u0120Hiro": 25940, "thren": 25941, "\u0120Maced": 25942, "\u0120hoax": 25943, "\u0120Increased": 25944, "\u0120Pradesh": 25945, "Wild": 25946, "______": 25947, "161": 25948, "\u0120aunt": 25949, "\u0120distributing": 25950, "\u0120Tucker": 25951, "\u0120SSL": 25952, "\u0120Wolves": 25953, "Building": 25954, "oult": 25955, "\u0120Luo": 25956, "\u0120Yas": 25957, "\u0120Spir": 25958, "\u0120Shape": 25959, "\u0120Cambod": 25960, "\u0120IPv": 25961, "\u0120ml": 25962, "\u0120extrad": 25963, "390": 25964, "\u0120Penny": 25965, "dream": 25966, "\u0120stationed": 25967, "optional": 25968, "eworthy": 25969, ".</": 25970, "\u0120undertaking": 25971, "\u0120chickens": 25972, "\u0120stimuli": 25973, "\u0120Else": 25974, "igators": 25975, "\u0120Beginning": 25976, "ctory": 25977, "\u0120prepares": 25978, "\u0120delta": 25979, "\u0120vicinity": 25980, "tool": 25981, "\u0120workshops": 25982, "MHz": 25983, "\u0120accusation": 25984, "\u0120histories": 25985, "ropolis": 25986, "\u0120Churchill": 25987, "\u0120neon": 25988, "\u0120baff": 25989, "dies": 25990, "maybe": 25991, "\u0120\u00e8\u00a3\u0131\u00e8\u00a6\u013c\u00e9\u0128\u0134": 25992, "\u0120symptom": 25993, "ECH": 25994, "\u0120Manuel": 25995, "\u0120banana": 25996, "\u0120HB": 25997, "\u0120****": 25998, "\u0120Koreans": 25999, "coll": 26000, "FB": 26001, "\u0120praying": 26002, "\u0120Cannot": 26003, "\u0120Mile": 26004, "\u0120embracing": 26005, "\u0120Silk": 26006, "393": 26007, "oters": 26008, "FD": 26009, "\u0120daylight": 26010, "alias": 26011, "\u0120Brigade": 26012, "\u0120Hannah": 26013, "\u0120clergy": 26014, "\u0120southeast": 26015, "\u0120alcoholic": 26016, "\u0120proposes": 26017, "livion": 26018, "\u0120calculating": 26019, "\u0120stimulate": 26020, "\u0120splitting": 26021, "eight": 26022, "\u0120Indy": 26023, "plays": 26024, "\u0120Pik": 26025, "\u0120domest": 26026, "\u0120forgiveness": 26027, "\u0120Rings": 26028, "patient": 26029, "kinson": 26030, "Mont": 26031, "igible": 26032, ";\"": 26033, "\u0120periodically": 26034, "ammad": 26035, "\u0120Britt": 26036, "pard": 26037, "\u0120arbitration": 26038, "\u0120Schneider": 26039, "\u0120Corporate": 26040, "\u0120Maya": 26041, "\u0120snakes": 26042, "aum": 26043, "\u0120blasted": 26044, "\u0120mysteries": 26045, "\u0120revive": 26046, "ocamp": 26047, "\u0120Dodge": 26048, "\u0120Opera": 26049, "279": 26050, "\u0120orphan": 26051, "\u0120specifies": 26052, "\u0120Mets": 26053, "Duration": 26054, "Hen": 26055, "\u0120fireworks": 26056, "\u0120prosecute": 26057, "\u0120Tillerson": 26058, "dp": 26059, "usage": 26060, "liness": 26061, "\u0120Debian": 26062, "\u0120224": 26063, "rises": 26064, "\u0120Infect": 26065, "atra": 26066, "\u0120RR": 26067, "\u0120Lor": 26068, "diff": 26069, "\u0120Charleston": 26070, "\u0120acoustic": 26071, "\u0120amuse": 26072, "330": 26073, "\u0120cer": 26074, "\u0120Tac": 26075, "\u0120[+": 26076, "\u0120cardiac": 26077, "\u0120Restaurant": 26078, "ergy": 26079, "\u0120fuzz": 26080, "\u0120bites": 26081, "\u0120hazardous": 26082, "\u0120brighter": 26083, "rans": 26084, "\u0120Stephanie": 26085, "extra": 26086, "RET": 26087, "\u0120Christine": 26088, "\u0120Sue": 26089, "statement": 26090, "\u0120bolster": 26091, "\u0120antit": 26092, "Radio": 26093, "BIT": 26094, "\u00e3\u0124\u00b0": 26095, "\u0120visions": 26096, "\u0120Concept": 26097, "\u0120inline": 26098, "\u0120Philosophy": 26099, "isans": 26100, "\u0120Irving": 26101, "\u00c3\u00a3": 26102, "taking": 26103, "\u0120inconsist": 26104, "\u0120Kumar": 26105, "\u0120lig": 26106, "\u0120Schumer": 26107, "\u0120Regulations": 26108, "\u0120Hz": 26109, "thro": 26110, "\u0120Voldemort": 26111, "\u0120MED": 26112, "\u0120Frederick": 26113, "Pad": 26114, "221": 26115, "\u0120alleging": 26116, "\u0120Communication": 26117, "\u0120167": 26118, "\u0120forecasts": 26119, "\u0120spiders": 26120, "Organ": 26121, "\u0120Participants": 26122, "\u0120Ops": 26123, "design": 26124, "Close": 26125, "\u0120facto": 26126, "\u0120bombers": 26127, "resistant": 26128, "ategories": 26129, "School": 26130, "\u0120homework": 26131, "\u0120corro": 26132, "Tuesday": 26133, "\u0120Brendan": 26134, "\u0120MX": 26135, "\u0120TS": 26136, "\u0120Stri": 26137, "\u0120stakeholders": 26138, "\u0120Millennium": 26139, "\u0120transferring": 26140, "Jud": 26141, "\u0120tac": 26142, "\u01201600": 26143, "\u0120SDK": 26144, "rb": 26145, "\u0120interpretations": 26146, "\u0120SG": 26147, "\u0120upstairs": 26148, "\u0120Harvest": 26149, "\u0120vagina": 26150, "\u0120ingest": 26151, "xf": 26152, "\u0120Orion": 26153, "\u0120Joey": 26154, "\u0120sandwic": 26155, "\u0120immortal": 26156, "\u0120flipped": 26157, "ortex": 26158, "threatening": 26159, "\u0120sniper": 26160, "\u0120converts": 26161, "\u0120installations": 26162, "\u0120Bulgar": 26163, "orsche": 26164, "mails": 26165, "\u0120lure": 26166, "\u0120narrowly": 26167, "\u0120grenade": 26168, "\u0120Ging": 26169, "\u0120underwear": 26170, "--------------": 26171, "\u0120chased": 26172, "\u0120VAL": 26173, "\u0120parenting": 26174, "\u0120Hamb": 26175, "\u0120Blaz": 26176, "\u0120anarchist": 26177, "\u0120Median": 26178, "\u0120Programs": 26179, "\u00ce\u00bd": 26180, "\u0120obj": 26181, "\u0120Nokia": 26182, "orman": 26183, "anqu": 26184, "atism": 26185, "opa": 26186, "\u0120fulfilling": 26187, "\u0120puppy": 26188, "\u0120entit": 26189, "\u0120Sebastian": 26190, "\u0120shooters": 26191, "\u0120richer": 26192, "\u00e8\u00a1": 26193, "\u0120tempted": 26194, "\u0120ATT": 26195, "\u0120CV": 26196, "\u0120tore": 26197, "Resource": 26198, "\u0120Devils": 26199, "408": 26200, "inational": 26201, "\u0120assurance": 26202, "\u0120Darren": 26203, "\u0120whichever": 26204, "posure": 26205, "\u0120fury": 26206, "Stock": 26207, "\u0120universally": 26208, "response": 26209, "\u0120oak": 26210, "\u0120workload": 26211, "\u0120Corner": 26212, "eele": 26213, "\"...": 26214, "\u0120deprived": 26215, "kowski": 26216, "\u0120casts": 26217, "\u0120affiliation": 26218, "\u0120Ach": 26219, "\u0120Asked": 26220, "athe": 26221, "\u0120lact": 26222, "\u0120Thu": 26223, "rm": 26224, "\u0120airlines": 26225, "\u0120notions": 26226, "Format": 26227, "\u0120FAA": 26228, "\u00e3\u0125\u012c": 26229, "driver": 26230, "\u0120transcend": 26231, "Settings": 26232, "\u0120Prosecut": 26233, "\u0120spinal": 26234, "\u0120defaults": 26235, "FK": 26236, "\u0120prefers": 26237, "rendered": 26238, "thus": 26239, "film": 26240, "\u0120tiger": 26241, "\u0120Spicer": 26242, "recogn": 26243, "\u0120Rugby": 26244, "Network": 26245, "\u0120pity": 26246, "\u0120compartment": 26247, "casters": 26248, "\u0120Monroe": 26249, "\u0120720": 26250, "\u0120corrections": 26251, "\u0120dopamine": 26252, "\u0120AZ": 26253, "Cut": 26254, "\u0120roomm": 26255, "\u0120speculate": 26256, "Hash": 26257, "\u0120restrictive": 26258, "1111": 26259, "redible": 26260, "onel": 26261, "\u0120rampant": 26262, "reported": 26263, "\u0120Suite": 26264, "\u0120Minimum": 26265, "alys": 26266, "azard": 26267, "loop": 26268, "\u0120lent": 26269, "sha": 26270, "\u0120vandal": 26271, "menu": 26272, "\u0120Boehner": 26273, "\u0120narratives": 26274, "\u0120authenticity": 26275, "269": 26276, "anic": 26277, "duty": 26278, "285": 26279, "\u0120thanked": 26280, "\u0120betrayed": 26281, "lift": 26282, "\u0120southwest": 26283, "\u0120Dexter": 26284, "\u0120Bod": 26285, "\u0120keywords": 26286, "Average": 26287, "DIS": 26288, "\u0120ethnicity": 26289, "!),": 26290, "\u0120Nationals": 26291, "\u00e1\u00b9": 26292, "\u0120Tah": 26293, "ioxid": 26294, "\u0120widget": 26295, "\u0120pasta": 26296, "\u0120billing": 26297, "\u0120trilogy": 26298, "\u0120Lines": 26299, "\u0120sniff": 26300, "\u0120nephew": 26301, "Late": 26302, "\u0120princip": 26303, "\u0120Loop": 26304, "\u0120Marxist": 26305, "\u0120dissolved": 26306, "\u0120contexts": 26307, "\u0120Amount": 26308, "\u0120Spike": 26309, "\u0120totals": 26310, "\u0120organizer": 26311, "\u0120uprising": 26312, "ships": 26313, "YY": 26314, "\u0120Northeast": 26315, "money": 26316, "gradation": 26317, "\u0120goalkeeper": 26318, "\u0120Hear": 26319, "\u0120steak": 26320, "\u0120BuzzFeed": 26321, "\u0120solemn": 26322, "\u0120Scand": 26323, "\u0120popping": 26324, "\u0120adhere": 26325, "\u0120Alleg": 26326, "byte": 26327, "\u0120Wolver": 26328, "\u0120unin": 26329, "\u0120recol": 26330, "itud": 26331, "\u0120mimic": 26332, "ibus": 26333, "\u0120predicts": 26334, "\u0120Keeper": 26335, "iating": 26336, "\u0120deception": 26337, "\u0120learnt": 26338, "\u0120diary": 26339, "\u0120conditional": 26340, "\u0120relic": 26341, "\u0120invoke": 26342, "ienced": 26343, "\u00e5\u012a": 26344, "\u0120Pont": 26345, "\u0120cellphone": 26346, "\u0120speeding": 26347, "\u0120tackling": 26348, "\u0120nude": 26349, "opened": 26350, "\u0120Manafort": 26351, "\u01201952": 26352, "\u0120majors": 26353, "\u0120Silence": 26354, "\u0120logistics": 26355, "\u0120weighted": 26356, "\u0120Psychiat": 26357, "\":[\"": 26358, "\u0120sickness": 26359, "\u0120dividends": 26360, "zon": 26361, "Release": 26362, "\u0120Keys": 26363, "\u0120Ich": 26364, "\u0120enz": 26365, "\u0120Fernand": 26366, "\u0120\u00ce\u00b1": 26367, "\u0120meanings": 26368, "\u0120penny": 26369, "\u0120stern": 26370, "\u0120lar": 26371, "\u0120Published": 26372, "\u0120backdrop": 26373, "Kim": 26374, "\u0120Synt": 26375, "\u0120debuted": 26376, "wm": 26377, "\u0120Isle": 26378, "\u0120regulating": 26379, "otti": 26380, "\u0120Scholars": 26381, "icester": 26382, "\u0120Chef": 26383, "\u0120pops": 26384, "\u0120Launcher": 26385, "\u0120Various": 26386, "\u0120commenting": 26387, "oslav": 26388, "enzie": 26389, "\u0120rivalry": 26390, "\u00e2\u0124\u00ac": 26391, "Really": 26392, "\u0120orc": 26393, "\u0120bean": 26394, "\u0120Judy": 26395, "Notice": 26396, "\u0120Bike": 26397, "?]": 26398, "\u0120rented": 26399, "sten": 26400, "\u0120forefront": 26401, "\u0120Baldwin": 26402, "\u0120yielded": 26403, "tails": 26404, "Prime": 26405, "\u0120Sources": 26406, "icator": 26407, "Sean": 26408, "\u0120marching": 26409, "Output": 26410, "\u0120Jungle": 26411, "\u0120reside": 26412, "zzle": 26413, "\u0120Andrews": 26414, "\u0120torque": 26415, "Basic": 26416, "Actually": 26417, "strap": 26418, "penter": 26419, "\u0120exams": 26420, "\u0120Ya": 26421, "\u0120159": 26422, "\u0120Decision": 26423, "\u0120ransom": 26424, "eteenth": 26425, "ensing": 26426, "213": 26427, "\u0120sunset": 26428, "404": 26429, "\u0120Rapid": 26430, "\u0120Hein": 26431, "\u0120Aboriginal": 26432, "\u0120organism": 26433, "\u0120Sever": 26434, "\u0120cla": 26435, "aji": 26436, "Simple": 26437, "\u0120Flavor": 26438, "\u0120Eval": 26439, "prus": 26440, "\u0120chorus": 26441, "DAY": 26442, "\u0120denounced": 26443, "\u0120biography": 26444, "\u0120Turnbull": 26445, "Recent": 26446, "Normal": 26447, "lections": 26448, "Word": 26449, "\u0120ferry": 26450, "\u0120Wagner": 26451, "hom": 26452, "Unit": 26453, "\u0120supermarket": 26454, "\u0120Sith": 26455, "\u0120nominees": 26456, "\u0120dictatorship": 26457, "iddler": 26458, "\u0120announces": 26459, "\u0120Them": 26460, "\u0120Neptune": 26461, "\u0120deity": 26462, "\u0120Yi": 26463, "\u0120monarch": 26464, "ARR": 26465, "\u0120invaded": 26466, "\u0120Hok": 26467, "untary": 26468, "Certain": 26469, "ega": 26470, "\u0120kidding": 26471, "\u0120Regulation": 26472, "\u0120tray": 26473, "\u0120photographers": 26474, "\u0120Arcane": 26475, "\u0120discharged": 26476, "\u0120evangelical": 26477, "\u0120interchange": 26478, "\u0120filmmaker": 26479, "\u0120Endless": 26480, "\u0120290": 26481, "\u0120Salvador": 26482, "ASY": 26483, "\u0120Signal": 26484, "\u0120wrath": 26485, "\u00e2\u013e": 26486, "lot": 26487, "'/": 26488, "\u0120projectile": 26489, "\u0120employing": 26490, "\u0120Interface": 26491, "191": 26492, "atellite": 26493, "\u0120Rath": 26494, "package": 26495, "\u0120indications": 26496, "Jason": 26497, "\u0120args": 26498, "\u0120GHz": 26499, "\u0120tilt": 26500, "nants": 26501, "won": 26502, "\u00e3\u0124\u00b5": 26503, "redd": 26504, "rescent": 26505, "\u0120Calendar": 26506, "\u0120modular": 26507, "\u0120assisting": 26508, "\u0120redeem": 26509, "\u0120Bean": 26510, "\u0120worsh": 26511, "\u0120decentralized": 26512, ")...": 26513, "377": 26514, "\u0120arrays": 26515, "\u0120accomplishments": 26516, "\u00ce\u00bf": 26517, "dot": 26518, "\u0120mutually": 26519, "\u0120obstruct": 26520, "\u0120misrepresent": 26521, "orest": 26522, "ionic": 26523, "ruce": 26524, "%;": 26525, "\u0120knowingly": 26526, "porting": 26527, "inently": 26528, "Ari": 26529, "\u0120Schultz": 26530, "Da": 26531, "\u0120Cere": 26532, "\u0120obsolete": 26533, "\u0127\u012d": 26534, "give": 26535, "\u0120bait": 26536, "\u0120enlarg": 26537, "Neill": 26538, "\u01201933": 26539, "\u0120reconsider": 26540, "\u0120Sergeant": 26541, "\u0120Diane": 26542, "\u0120Cogn": 26543, "\u0120Icon": 26544, "Position": 26545, "\u0120fost": 26546, "\u0120stirring": 26547, "seven": 26548, "\u0120SpaceX": 26549, "uggets": 26550, "\u0120medd": 26551, "Gal": 26552, "\u0120Sister": 26553, "Boy": 26554, "\u0120triggering": 26555, "Taking": 26556, "\u0120screams": 26557, "\u0120causal": 26558, "\u0120awaken": 26559, "Arm": 26560, "297": 26561, "\u0120dispatched": 26562, "\u0120FALSE": 26563, "\u0120organizational": 26564, "\u0120Tong": 26565, "\u0120dilemma": 26566, "demon": 26567, "Spl": 26568, "\u0120hooks": 26569, "uding": 26570, "\u0120validate": 26571, "\u0120potion": 26572, "\u0120claw": 26573, "\u0120burgl": 26574, "\u0120quir": 26575, "ACA": 26576, "\u0120Brennan": 26577, "\u0120durability": 26578, "\u0120bombings": 26579, "\u0120Window": 26580, "\u0120culprit": 26581, "325": 26582, "Therefore": 26583, "umbered": 26584, "performance": 26585, "warts": 26586, "\u0120enforcing": 26587, "\u0120Blow": 26588, "\u0120reprint": 26589, "ifax": 26590, "alpha": 26591, "\u0120sinister": 26592, "\u0120burger": 26593, "fighting": 26594, "Score": 26595, "\u0120Stones": 26596, "iem": 26597, "405": 26598, "chemy": 26599, "\u0120vinegar": 26600, "nom": 26601, "\u0120prevailing": 26602, "\u0120Latest": 26603, "\u00c2\u00b6": 26604, "\u0120ba": 26605, "\u0120Writer": 26606, "\u0120177": 26607, "\u0120Conway": 26608, "\u0120collects": 26609, "\u0120quantitative": 26610, "\u0120horrors": 26611, "ogens": 26612, "\u0120Slov": 26613, "\u0120lays": 26614, "haw": 26615, "\u0120Slash": 26616, "\u0120nightclub": 26617, "\u0120Davies": 26618, "\u0120bride": 26619, "\u0120Scarlet": 26620, "ymm": 26621, "\u0120Applications": 26622, "velength": 26623, "\u0120revival": 26624, "\u0120softly": 26625, "\u0120zoo": 26626, "itaire": 26627, "Cur": 26628, "\u0120electrom": 26629, "\u0120planting": 26630, "OTO": 26631, "\u0120Elements": 26632, "\u0120swallow": 26633, "porter": 26634, "\u0120laptops": 26635, "\u0120peanut": 26636, "\u0120lobbyists": 26637, "\u00ce\u00b2": 26638, "Panel": 26639, "\u0120Joan": 26640, "imil": 26641, "tnc": 26642, "\u0120resisted": 26643, "\u0120outwe": 26644, "\u0120retaining": 26645, "atri": 26646, "\u0120poorer": 26647, "\u0120Syrians": 26648, "\u0120Hammond": 26649, "\u0120weld": 26650, "uder": 26651, "topic": 26652, "\u0120TT": 26653, "ricia": 26654, "\u0120thieves": 26655, "Lic": 26656, "\u0120Gust": 26657, "\u0120Ways": 26658, "areth": 26659, "243": 26660, "\u0120broadcaster": 26661, "shield": 26662, "assium": 26663, "uble": 26664, "\u0120airstrikes": 26665, "onso": 26666, "\u0120pedal": 26667, "\u0120collectors": 26668, "\u0120Vander": 26669, "\u0120Mesa": 26670, "\u0120dictator": 26671, "\u0120dir": 26672, "enton": 26673, "cart": 26674, "score": 26675, "adder": 26676, "Cry": 26677, "\u0120ssh": 26678, "gger": 26679, "\u0120drunken": 26680, "\u0120GS": 26681, "\u0120Seat": 26682, "\u0120cornerback": 26683, "\u0120skipped": 26684, "\u0120Researchers": 26685, "\u0120Audi": 26686, "Reference": 26687, "\u0120haunted": 26688, "\u00c3\u00ab": 26689, "\u0120Clinic": 26690, "cz": 26691, "\u0120ps": 26692, "\u0120Paladin": 26693, "\u0120Recipe": 26694, "\u0120stigma": 26695, "oppy": 26696, "\u0120monkeys": 26697, "\u0120Hawk": 26698, "Sad": 26699, "\"/>": 26700, "\u0120Workshop": 26701, "\u0120Retail": 26702, "\u0120Avatar": 26703, "625": 26704, "Na": 26705, "\u0120VC": 26706, "\u0120Secure": 26707, "MY": 26708, "1988": 26709, "ossip": 26710, "\u0120prostate": 26711, "\u0120unden": 26712, "\u0120gamer": 26713, "\u0120Contents": 26714, "\u0120Warhammer": 26715, "\u0120Sentinel": 26716, "310": 26717, "\u0120segregation": 26718, "\u0120Flex": 26719, "\u0120MAY": 26720, "\u0120drills": 26721, "\u0120Drugs": 26722, "Islamic": 26723, "\u0120spur": 26724, "\u0120cafe": 26725, "\u0120imaginary": 26726, "\u0120guiding": 26727, "\u0120swings": 26728, "\u0120Theme": 26729, "oby": 26730, "\u0120nud": 26731, "\u0120begging": 26732, "\u0120strongh": 26733, "\u0120rejecting": 26734, "\u0120pedestrians": 26735, "\u0120Prospect": 26736, "Rare": 26737, "sle": 26738, "\u0120concessions": 26739, "\u0120Constitutional": 26740, "\u0120beams": 26741, "\u0120fibers": 26742, "poon": 26743, "\u0120instincts": 26744, "property": 26745, "\u0120BIG": 26746, "Sanders": 26747, "imates": 26748, "\u0120coating": 26749, "\u0120corpses": 26750, "\u0120TRUE": 26751, "checked": 26752, "\u0120166": 26753, "Ash": 26754, "\u0120JS": 26755, "\u0120Fiction": 26756, "\u0120communal": 26757, "\u0120energetic": 26758, "oooooooo": 26759, "\u0120nowadays": 26760, "ILD": 26761, "ibo": 26762, "\u0120SUV": 26763, "Ren": 26764, "\u0120dwelling": 26765, "Silver": 26766, "\u0120tally": 26767, "\u0120Moving": 26768, "\u0120coward": 26769, "\u0120generals": 26770, "\u0120horns": 26771, "\u0120circulated": 26772, "\u0120robbed": 26773, "\u0120Unlimited": 26774, "\u0120harassed": 26775, "\u0120inhibit": 26776, "\u0120composer": 26777, "\u0120Spotify": 26778, "\u0120spreads": 26779, "364": 26780, "\u0120suicidal": 26781, "\u0120noises": 26782, "\u0120Stur": 26783, "\u0120saga": 26784, "\u0120Kag": 26785, "iso": 26786, "\u0120theoretically": 26787, "Money": 26788, "\u0120similarity": 26789, "\u0120sliced": 26790, "utils": 26791, "inges": 26792, "\"-": 26793, "\u0120anth": 26794, "\u0120imped": 26795, "Module": 26796, "Throughout": 26797, "\u0120menus": 26798, "committee": 26799, "andi": 26800, "obj": 26801, "inav": 26802, "fired": 26803, "\u0120Abdullah": 26804, "\u0120undead": 26805, "\u0120fonts": 26806, "Hold": 26807, "ENG": 26808, "\u0120sustainability": 26809, "\u0120flick": 26810, "\u0120razor": 26811, "\u0120Fest": 26812, "\u0120Characters": 26813, "\u0120wording": 26814, "\u0120populist": 26815, "\u0120criticizing": 26816, "\u0120muse": 26817, "vine": 26818, "\u0120cardboard": 26819, "\u0120kindly": 26820, "\u0120fringe": 26821, "\u0120Theft": 26822, "icultural": 26823, "\u0120governors": 26824, "\u0120\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd": 26825, "\u0120163": 26826, "\u0120timeout": 26827, "\u0120Auth": 26828, "Children": 26829, "AU": 26830, "\u0120redemption": 26831, "\u0120Alger": 26832, "\u01201914": 26833, "\u0120waved": 26834, "\u0120astronauts": 26835, "ograms": 26836, "\u0120swamp": 26837, "\u0120Finnish": 26838, "\u0120candle": 26839, "\u0120tonnes": 26840, "utm": 26841, "\u0120ray": 26842, "\u0120spun": 26843, "\u0120fearful": 26844, "articles": 26845, "\u0120caus": 26846, "orically": 26847, "\u0120Requires": 26848, "\u0120Gol": 26849, "\u0120pope": 26850, "\u0120inaugural": 26851, "\u0120gle": 26852, "ADA": 26853, "\u0120ISIL": 26854, "\u0120Offensive": 26855, "\u0120watchdog": 26856, "\u0120balcon": 26857, "entity": 26858, "\u0120Hoo": 26859, "\u0120gallon": 26860, "ACC": 26861, "\u0120doubling": 26862, "\u0120implication": 26863, "\u0120Sight": 26864, "\u0120doctr": 26865, "-------": 26866, "\u0120\\\\": 26867, "\u0120malt": 26868, "Roll": 26869, "\u0120\u00e2\u012b\u00a5": 26870, "\u0120recap": 26871, "adding": 26872, "uces": 26873, "\u0120Bend": 26874, "figure": 26875, "\u0120turkey": 26876, "\u0120societal": 26877, "\u0120Tickets": 26878, "\u0120commercially": 26879, "\u0120spicy": 26880, "\u0120216": 26881, "\u0120Ramp": 26882, "\u0120superiority": 26883, "\u00c3\u00af": 26884, "\u0120Tracker": 26885, "Carl": 26886, "\u0120Coy": 26887, "\u0120Patriot": 26888, "\u0120consulted": 26889, "\u0120listings": 26890, "\u0120slew": 26891, "reenshot": 26892, "\u0120Gone": 26893, "\u0120[...]": 26894, "309": 26895, "\u0120hottest": 26896, "\u00d8\u00b1": 26897, "\u0120rocky": 26898, "\u0120Diaz": 26899, "\u0120massage": 26900, "\u0120paraly": 26901, "\u0120pony": 26902, "Az": 26903, "\u0120cartridge": 26904, "\u0120NZ": 26905, "\u0120snack": 26906, "\u0120Lamar": 26907, "plement": 26908, "\u0120Leslie": 26909, "\u0120mater": 26910, "\u0120snipp": 26911, "246": 26912, "\u0120jointly": 26913, "\u0120Brisbane": 26914, "\u0120iPod": 26915, "\u0120pumping": 26916, "\u0120goat": 26917, "\u0120Sharon": 26918, "ealing": 26919, "\u0120coron": 26920, "\u0120anomal": 26921, "rahim": 26922, "\u0120Connection": 26923, "\u0120sculpture": 26924, "\u0120scheduling": 26925, "\u0120Daddy": 26926, "athing": 26927, "\u0120eyebrows": 26928, "\u0120curved": 26929, "\u0120sentiments": 26930, "\u0120drafting": 26931, "Drop": 26932, "([": 26933, "\u0120nominal": 26934, "\u0120Leadership": 26935, "\u0120Grow": 26936, "\u0120176": 26937, "\u0120constructive": 26938, "ivation": 26939, "\u0120corrupted": 26940, "gerald": 26941, "\u0120Cros": 26942, "\u0120Chester": 26943, "\u0120Lap": 26944, "\u00e3\u0123\u00aa": 26945, "OTH": 26946, "DATA": 26947, "\u0120almond": 26948, "probably": 26949, "Imp": 26950, "\u0120feast": 26951, "\u0120Warcraft": 26952, "Flor": 26953, "\u0120checkpoint": 26954, "\u0120transcription": 26955, "\u0120204": 26956, "\u0120tweaks": 26957, "\u0120relieve": 26958, "Science": 26959, "\u0120performer": 26960, "Zone": 26961, "\u0120turmoil": 26962, "igated": 26963, "hibit": 26964, "\u0120Cafe": 26965, "themed": 26966, "\u0120fluor": 26967, "bench": 26968, "\u0120decom": 26969, "\u0120Unt": 26970, "\u0120Barrett": 26971, "\u0120Facts": 26972, "\u0120tasting": 26973, "\u0120PTSD": 26974, "\u0120Seal": 26975, "\u0120Judaism": 26976, "\u0120Dynamic": 26977, "\u0120Cors": 26978, "Ve": 26979, "\u0120Ming": 26980, "\u0120Transform": 26981, "von": 26982, "\u0120Defenders": 26983, "\u0120Tactical": 26984, "\u0120Von": 26985, "\u0120Univers": 26986, "\u0120distorted": 26987, "\u0120Breath": 26988, "?'\"": 26989, "\u0120agon": 26990, "\u0120Deadly": 26991, "\u0120lan": 26992, "\u0120Cycle": 26993, "orned": 26994, "\u0120reliably": 26995, "\u0120glor": 26996, "\u0120Monkey": 26997, "\u00e3\u0125\u00a1": 26998, "\u0120adren": 26999, "\u0120microwave": 27000, "\u0120Alban": 27001, "ircraft": 27002, "digit": 27003, "smart": 27004, "\u0120Dread": 27005, "\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af\u00c2\u00af": 27006, "{{": 27007, "\u0120Rochester": 27008, "\u0120simplified": 27009, "\u0120inflicted": 27010, "\u0120takeover": 27011, "\u0120yourselves": 27012, "aditional": 27013, "\u0120muscular": 27014, "KS": 27015, "\u0120ingen": 27016, "Tax": 27017, "\u0120Feature": 27018, "277": 27019, "\u0120cruc": 27020, "\u0120crate": 27021, "\u0120unidentified": 27022, "\u0120acclaimed": 27023, "\u0120Manga": 27024, "\u0120Frances": 27025, "\u0120Nepal": 27026, "\u0120Gerald": 27027, "\u0120Kuwait": 27028, "\u0120slain": 27029, "\u0120Heb": 27030, "\u0120Goku": 27031, "\u00e3\u0123\u00ae\u00e6": 27032, "286": 27033, "Mrs": 27034, "\u0120Cody": 27035, "\u0120Sanctuary": 27036, "016": 27037, "\u0120dismant": 27038, "\u0120dataset": 27039, "\u0120Hond": 27040, "buck": 27041, "\u0120Patterson": 27042, "\u0120palette": 27043, "\u0120GD": 27044, "icol": 27045, "\u0120Lodge": 27046, "\u0120planetary": 27047, "akin": 27048, "\u0120Registered": 27049, "abwe": 27050, "\u0120Petersburg": 27051, "\u0120hailed": 27052, "\u0120Piece": 27053, "Sche": 27054, "\u0120DOJ": 27055, "\u0120enumer": 27056, "181": 27057, "\u0120Observer": 27058, "\u0120Bold": 27059, "founded": 27060, "commerce": 27061, "\u0120exploits": 27062, "\u0120Finding": 27063, "URN": 27064, "\u0120Sne": 27065, "\u0120Acid": 27066, "ayette": 27067, "\u0120Values": 27068, "\u0120drastic": 27069, "\u0120architectural": 27070, "\u0120\".": 27071, "\u00d7\u0137": 27072, "umped": 27073, "\u0120wrapping": 27074, "\u0120widow": 27075, "\u0120Slayer": 27076, "lace": 27077, "once": 27078, "Germany": 27079, "avoid": 27080, "\u0120temples": 27081, "PAR": 27082, "\u00c3\u00b4": 27083, "\u0120Lucifer": 27084, "\u0120Flickr": 27085, "lov": 27086, "forces": 27087, "\u0120scouting": 27088, "\u0120louder": 27089, "tesy": 27090, "\u0120beforehand": 27091, "\u00c4\u0135": 27092, "\u0120Neon": 27093, "\u0120Wol": 27094, "\u0120Typically": 27095, "\u0120Politico": 27096, "-+-+": 27097, "\u0120builder": 27098, "\u0120derive": 27099, "Kill": 27100, "\u0120poker": 27101, "\u0120ambiguous": 27102, "\u0120lifts": 27103, "\u0120cyt": 27104, "\u0120ribs": 27105, "oodle": 27106, "\u0120Sounds": 27107, "hair": 27108, "\u0120Syndrome": 27109, "tf": 27110, "\u0120proportional": 27111, "uid": 27112, "\u0120pertaining": 27113, "\u0120Kindle": 27114, "\u0120Negro": 27115, "\u0120reiterated": 27116, "\u0120Tonight": 27117, "oths": 27118, "\u0120Cornell": 27119, "\u0120owing": 27120, "\u0120208": 27121, "elfare": 27122, "ocating": 27123, "\u0120Birds": 27124, "Subscribe": 27125, "\u0120essays": 27126, "\u0120burdens": 27127, "\u0120illustrations": 27128, "arious": 27129, "ERAL": 27130, "\u0120Calcul": 27131, "\u0120xen": 27132, "\u0120LinkedIn": 27133, "\u0120Jung": 27134, "\u0120redesign": 27135, "Connor": 27136, "296": 27137, "\u0120reversal": 27138, "\u0120Adelaide": 27139, "\u0120LL": 27140, "\u0120sinking": 27141, "\u0120gum": 27142, "USH": 27143, "capt": 27144, "\u0120Grimm": 27145, "\u0120footsteps": 27146, "\u0120CBD": 27147, "ispers": 27148, "\u0120prose": 27149, "Wednesday": 27150, "\u0120Movies": 27151, "edin": 27152, "\u0120overturned": 27153, "\u0120contentious": 27154, "USB": 27155, "~~~~~~~~~~~~~~~~": 27156, "\u0120Copper": 27157, "\u0120pointless": 27158, "NV": 27159, "values": 27160, "olphin": 27161, "dain": 27162, "\u0120deposited": 27163, "\u0120GW": 27164, "\u0120preceded": 27165, "\u0120Cla": 27166, "\u0120Golem": 27167, "\u0120Nim": 27168, "\u0120\u00ce\u00b2": 27169, "\u0120Engineers": 27170, "middle": 27171, "\u0120flatt": 27172, "operative": 27173, "\u0120councils": 27174, "imbabwe": 27175, "elin": 27176, "\u0120stressful": 27177, "\u0120LD": 27178, "\u0120resh": 27179, "lake": 27180, "\u0120wheelchair": 27181, "\u0120Alternative": 27182, "\u0120optimize": 27183, "operation": 27184, "\u0120peek": 27185, "\u0120oneself": 27186, "igil": 27187, "\u0120transitions": 27188, "opathy": 27189, "blank": 27190, "\u0120169": 27191, "171": 27192, "________________________________________________________________": 27193, "\u0120laundering": 27194, "Enc": 27195, "\u0120DEC": 27196, "\u0120workouts": 27197, "\u0120spikes": 27198, "\u0120dinosaurs": 27199, "\u0120discriminatory": 27200, "Pool": 27201, "Rather": 27202, "385": 27203, "RNA": 27204, "testers": 27205, "eto": 27206, "\u0120Identity": 27207, "\u0120vein": 27208, "\u0120Burton": 27209, "\u0120arcade": 27210, "420": 27211, "Ultimately": 27212, "\u0120Sadly": 27213, "\u00c3\u00b0": 27214, "pill": 27215, "\u0120cubic": 27216, "\u0120Spectrum": 27217, "these": 27218, "states": 27219, "\u0120unofficial": 27220, "hawks": 27221, "\u0120EVERY": 27222, "\u0120rainbow": 27223, "\u0120incarceration": 27224, "anding": 27225, "\u0120syll": 27226, "\u0120Everton": 27227, "\u0120179": 27228, "\u0120Serbia": 27229, "\u0120189": 27230, "meter": 27231, "\u0120Mickey": 27232, "\u0120antiqu": 27233, "\u0120factual": 27234, "neck": 27235, "\u0120Nare": 27236, "norm": 27237, "must": 27238, "\u0120highways": 27239, "\u0120glam": 27240, "\u0120dividing": 27241, "\u0120Squadron": 27242, "\u0120Martha": 27243, "\u0120births": 27244, "Cover": 27245, "////////////////": 27246, "\u0120Wong": 27247, "Phot": 27248, "\u0120ALS": 27249, "rio": 27250, "\u0120Nonetheless": 27251, "\u0120Lemon": 27252, "\u0120206": 27253, "\u0120EE": 27254, "\u0120derivative": 27255, "\u0120WWII": 27256, "vote": 27257, "\u0120therein": 27258, "\u0120separating": 27259, "446": 27260, "sync": 27261, "\u0120Streets": 27262, "\u0120ratt": 27263, "\u0120municipality": 27264, "\u0120Shortly": 27265, "\u0120monk": 27266, "),\"": 27267, "\u0120scrub": 27268, "\u0120operatives": 27269, "Neither": 27270, "Place": 27271, "\u0120Limit": 27272, "Female": 27273, "\u0120Actor": 27274, "Character": 27275, "\u0120constituted": 27276, "357": 27277, "\u0120protested": 27278, "\u0120Straw": 27279, "\u0120Height": 27280, "ilda": 27281, "\u0120Typh": 27282, "\u0120floods": 27283, "\u0120cosmetic": 27284, "WAY": 27285, "perture": 27286, "upon": 27287, "tons": 27288, "essing": 27289, "\u0120Pocket": 27290, "\u0120rooft": 27291, "\u0120Caucas": 27292, "\u0120antidepress": 27293, "\u0120incompatible": 27294, "ECD": 27295, "\u0120opera": 27296, "\u0120Contest": 27297, "\u0120generators": 27298, "lime": 27299, "Defense": 27300, "1987": 27301, "forum": 27302, "\u0120savage": 27303, "\u0120Hungarian": 27304, "nz": 27305, "\u0120metallic": 27306, "\u0120expelled": 27307, "\u0120residency": 27308, "\u0120dresses": 27309, "666": 27310, "\u0120Clement": 27311, "fires": 27312, "Category": 27313, "\u0120geek": 27314, "alis": 27315, "\u0120cemetery": 27316, "educated": 27317, "\u0120crawl": 27318, "\u0120Unable": 27319, "\u0120Tyson": 27320, "akis": 27321, "\u0120pardon": 27322, "\u0120Wra": 27323, "\u0120strengthened": 27324, "\u0120Fors": 27325, "335": 27326, "\u0120HC": 27327, "\u0120Mond": 27328, "\u0120visuals": 27329, "\u0120Beatles": 27330, "ettlement": 27331, "\u0120\u00ef": 27332, "gro": 27333, "\u0120bash": 27334, "\u0120poorest": 27335, "\u0120excel": 27336, "\u0120aspirations": 27337, "\u0120Municip": 27338, "ensible": 27339, "\u0120ceremonies": 27340, "\u0120intimidation": 27341, "\u0120CONTR": 27342, "beck": 27343, "\u0120Kap": 27344, "asu": 27345, "\u0120trademarks": 27346, "\u0120Sew": 27347, "\u0120Competition": 27348, "network": 27349, "\u0120Arri": 27350, "\u0120Tet": 27351, "Roaming": 27352, "WC": 27353, "Dat": 27354, "\u0120sob": 27355, "\u0120pairing": 27356, "\u0120overdose": 27357, "SAY": 27358, "aber": 27359, "\u0120revolt": 27360, "\u0120Fah": 27361, "acting": 27362, "eq": 27363, "estation": 27364, "Fight": 27365, "\u0120Marks": 27366, "273": 27367, "\u0120178": 27368, "Raw": 27369, "\u00e3\u0123\u012d": 27370, "349": 27371, "blocks": 27372, "\u0120verge": 27373, "estine": 27374, "\u0120Podesta": 27375, "\u0120invasive": 27376, "\u0120profoundly": 27377, "\u0120Ao": 27378, "each": 27379, "\u0120lest": 27380, "interpret": 27381, "\u0120shrinking": 27382, "\u0120errone": 27383, "\u0120chees": 27384, "lys": 27385, "\u0120Ivy": 27386, "\u0120Directory": 27387, "\u0120hinted": 27388, "VICE": 27389, "\u0120contacting": 27390, "\u0120Gent": 27391, "hei": 27392, "\u0120labeling": 27393, "\u0120mercury": 27394, "\u0120Lite": 27395, "\u0120expires": 27396, "\u0120destabil": 27397, "ritis": 27398, "cu": 27399, "\u0120feathers": 27400, "\u0120steer": 27401, "\u0120programmed": 27402, "\u0120Vader": 27403, "Going": 27404, "\u0120Elim": 27405, "\u0120yo": 27406, "\u0120Miche": 27407, "\u0120203": 27408, "\u0120sleeves": 27409, "\u0120bully": 27410, "\u0120Humans": 27411, "368": 27412, "\u0120compress": 27413, "\u0120Banner": 27414, "ARS": 27415, "\u0120awhile": 27416, "\u0120calib": 27417, "\u0120sponsorship": 27418, "\u0120Difficulty": 27419, "\u0120Papers": 27420, "\u0120identifier": 27421, "}.": 27422, "\u0120yog": 27423, "\u0120Shia": 27424, "\u0120cleanup": 27425, "\u0120vibe": 27426, "introdu": 27427, "imming": 27428, "Australia": 27429, "\u0120outlines": 27430, "\u0120Youtube": 27431, "train": 27432, "\u0120Makes": 27433, "\u0120deported": 27434, "\u0120centr": 27435, "\u0120Dug": 27436, "\u0120Boulder": 27437, "\u0120Buffy": 27438, "\u0120injunction": 27439, "\u0120Harley": 27440, "\u0120Groups": 27441, "\u0120Dumbledore": 27442, "\u0120Clara": 27443, "\u0120\"-": 27444, "\u0120sacrificed": 27445, "eph": 27446, "Shadow": 27447, "ibling": 27448, "\u0120freelance": 27449, "\u0120evidently": 27450, "phal": 27451, "\u0120retains": 27452, "Mir": 27453, "\u0120finite": 27454, "dar": 27455, "\u0120Cous": 27456, "\u0120repaired": 27457, "\u0120periodic": 27458, "\u0120championships": 27459, "\u0120asteroid": 27460, "blind": 27461, "\u0120expressly": 27462, "\u0120Astros": 27463, "\u0120scaled": 27464, "\u0120geographical": 27465, "\u0120Rapids": 27466, "Enjoy": 27467, "\u0120elastic": 27468, "\u0120Mohamed": 27469, "Market": 27470, "begin": 27471, "\u0120discovers": 27472, "\u0120telecommunications": 27473, "\u0120scanner": 27474, "\u0120enlarge": 27475, "\u0120sharks": 27476, "\u0120psychedel": 27477, "\u0120Rouge": 27478, "\u0120snapshot": 27479, "isine": 27480, "XP": 27481, "\u0120pesticides": 27482, "\u0120LSD": 27483, "\u0120Distribution": 27484, "really": 27485, "\u0120degradation": 27486, "\u0120disguise": 27487, "\u0120biom": 27488, "\u0120EXT": 27489, "\u0120equations": 27490, "\u0120hazards": 27491, "\u0120Compared": 27492, ")*": 27493, "\u0120virtues": 27494, "\u0120elders": 27495, "\u0120enhancing": 27496, "\u0120Across": 27497, "eros": 27498, "angling": 27499, "\u0120combust": 27500, "ucci": 27501, "\u0120concussion": 27502, "\u0120contraception": 27503, "\u0120Kang": 27504, "\u0120expresses": 27505, "\u0120aux": 27506, "\u0120Pione": 27507, "\u0120exhibits": 27508, "Debug": 27509, "OTAL": 27510, "\u0120Already": 27511, "\u0120Wheeler": 27512, "\u0120expands": 27513, "?:": 27514, "\u0120reconciliation": 27515, "\u0120pirates": 27516, "\u0120purse": 27517, "\u0120discourage": 27518, "\u0120spectacle": 27519, "Rank": 27520, "\u0120wraps": 27521, "\u0120Thought": 27522, "\u0120impending": 27523, "Opp": 27524, "\u0120Anglo": 27525, "\u0120EUR": 27526, "\u0120screwed": 27527, "retched": 27528, "\u0120encouragement": 27529, "models": 27530, "\u0120confuse": 27531, "mmm": 27532, "\u0120Vitamin": 27533, "\u00e2\u0138\u0133\u00e2\u0138\u0133": 27534, "Cru": 27535, "\u0120knights": 27536, "\u0120discard": 27537, "\u0120bishops": 27538, "\u0120Wear": 27539, "\u0120Garrett": 27540, "kan": 27541, "\u00e3\u0125\u0141": 27542, "\u0120masculine": 27543, "capital": 27544, "\u0120Aus": 27545, "\u0120fatally": 27546, "thanks": 27547, "\u0120AU": 27548, "\u0120Gut": 27549, "1200": 27550, "\u012000000000": 27551, "\u0120surrog": 27552, "\u0120BIOS": 27553, "raits": 27554, "\u0120Watts": 27555, "\u0120resurrection": 27556, "\u0120Electoral": 27557, "\u0120Tips": 27558, "4000": 27559, "\u0120nutrient": 27560, "\u0120depicting": 27561, "\u0120sprink": 27562, "\u0120muff": 27563, "\u0120LIM": 27564, "\u0120Sample": 27565, "psc": 27566, "ibi": 27567, "generated": 27568, "\u0120specimens": 27569, "\u0120dissatisf": 27570, "\u0120tailored": 27571, "\u0120holdings": 27572, "\u0120Monthly": 27573, "\u0120Eat": 27574, "poons": 27575, "\u0120nec": 27576, "\u0120Cage": 27577, "\u0120Lotus": 27578, "\u0120Lantern": 27579, "\u0120frontier": 27580, "\u0120pensions": 27581, "\u0120joked": 27582, "\u0120Hardy": 27583, "=-=-=-=-": 27584, "rade": 27585, "UID": 27586, "\u0120rails": 27587, "\u0120emit": 27588, "\u0120slate": 27589, "\u0120smug": 27590, "\u0120spit": 27591, "\u0120Calls": 27592, "\u0120Jacobs": 27593, "feat": 27594, "\u0120UE": 27595, "\u0120restruct": 27596, "\u0120regeneration": 27597, "\u0120energies": 27598, "\u0120Connor": 27599, "OHN": 27600, "\u0120Cheese": 27601, "\u0120ger": 27602, "\u0120resurrect": 27603, "management": 27604, "NW": 27605, "\u0120presently": 27606, "\u0120Bruins": 27607, "Member": 27608, "\u0120Mang": 27609, "idan": 27610, "\u0120boosting": 27611, "wyn": 27612, "+.": 27613, "requisite": 27614, "\u0120NYPD": 27615, "\u0120Megan": 27616, "\u0120Conditions": 27617, "\u0120pics": 27618, "nesium": 27619, "\u0120Rash": 27620, "\u0120174": 27621, "\u0120Ducks": 27622, "\u0120embro": 27623, "zu": 27624, "onian": 27625, "religious": 27626, "\u0120craz": 27627, "\u0120ACA": 27628, "\u0120Zucker": 27629, "EMA": 27630, "\u0120Pros": 27631, "Weapon": 27632, "\u0120Knox": 27633, "\u0120Arduino": 27634, "\u0120stove": 27635, "\u0120heavens": 27636, "\u0120Purchase": 27637, "\u0120herd": 27638, "\u0120fundraiser": 27639, "Digital": 27640, "5000": 27641, "\u0120proponents": 27642, "/\u00e2\u0122\u012d": 27643, "\u0120jelly": 27644, "\u0120Visa": 27645, "\u0120monks": 27646, "\u0120advancement": 27647, "\u0120Wer": 27648, "\u0120187": 27649, "eus": 27650, "ertility": 27651, "\u0120fetal": 27652, "\u01201936": 27653, "Lo": 27654, "\u0120outfits": 27655, "\u0120staircase": 27656, "bomb": 27657, "\u0120customized": 27658, "clair": 27659, "Tree": 27660, "\u0120mapped": 27661, "\u0120Considering": 27662, "\u0120Torres": 27663, "\u0120methyl": 27664, "\u0120approximate": 27665, "\u0120doom": 27666, "\u0120Hansen": 27667, "\u0120crossover": 27668, "\u0120standalone": 27669, "\u00e4\u00bc": 27670, "\u0120invites": 27671, "\u0120graveyard": 27672, "\u0120hp": 27673, "DonaldTrump": 27674, "\u0120escort": 27675, "Gar": 27676, "\u0120predecessors": 27677, "\u0120hay": 27678, "\u0120enzyme": 27679, "\u0120Straight": 27680, "visors": 27681, "Ing": 27682, "aneously": 27683, "\u0120Applied": 27684, "\u0120fec": 27685, "\u0120Durant": 27686, "\u0120outspoken": 27687, "orb": 27688, "\u0120zeal": 27689, "\u0120disgrace": 27690, "').": 27691, "\u0120Cheng": 27692, "289": 27693, "\u0120Rena": 27694, "\u0120Suicide": 27695, "294": 27696, "\u0120outraged": 27697, "\u0120Newman": 27698, "\u0120Nvidia": 27699, "\u0120Aber": 27700, "\u0120Bers": 27701, "\u0120recreation": 27702, "Window": 27703, "\u0120DP": 27704, "xe": 27705, "\u0120pedoph": 27706, "\u0120fallout": 27707, "amboo": 27708, "\u0120presentations": 27709, "\u0120Apps": 27710, "\u0120html": 27711, "345": 27712, "\u0120XXX": 27713, "\u0120rubbing": 27714, "\u0120Leather": 27715, "\u0120humidity": 27716, "seys": 27717, "established": 27718, "\u0120Units": 27719, "646": 27720, "\u0120respectable": 27721, "Auto": 27722, "\u0120thriving": 27723, "\u0120Innovation": 27724, "angs": 27725, "Extra": 27726, "regulation": 27727, "298": 27728, "pick": 27729, "Examples": 27730, "\u0120CJ": 27731, "Attack": 27732, "\u0120dracon": 27733, "LT": 27734, "\u0120sticker": 27735, "rers": 27736, "\u0120sunny": 27737, "Iss": 27738, "regulated": 27739, "dim": 27740, "\u0120Abstract": 27741, "\u0120husbands": 27742, "Office": 27743, "omination": 27744, "itars": 27745, "ANGE": 27746, "ascal": 27747, "\u0120Kris": 27748, "\u0120Infantry": 27749, "\u0120malf": 27750, "\u0120Athe": 27751, "\u0120Rally": 27752, "balanced": 27753, "........................": 27754, "OUP": 27755, "\u0120molecule": 27756, "metics": 27757, "\u0120Split": 27758, "\u0120Instructions": 27759, "\u0120Nights": 27760, "cards": 27761, "\u0120tug": 27762, "\u0120cone": 27763, "\u00e5\u0143": 27764, "\u0120tx": 27765, "\u0120Discussion": 27766, "\u0120catastrophe": 27767, "ppe": 27768, "gio": 27769, "\u0120communism": 27770, "\u0120halted": 27771, "\u0120Guant": 27772, "clean": 27773, "\u0120Sched": 27774, "\u0120Kanye": 27775, "\u0120wander": 27776, "\u0120Seriously": 27777, "\u0120188": 27778, "ennial": 27779, "follow": 27780, "productive": 27781, "\u0120Flow": 27782, "\u0120Sail": 27783, "\u0120craw": 27784, "\u0120simulations": 27785, "oru": 27786, "angles": 27787, "\u0120Nolan": 27788, "\u0120menstru": 27789, "470": 27790, "\u0120207": 27791, "aja": 27792, "\u0120casually": 27793, "boarding": 27794, "\u0120222": 27795, "ovy": 27796, "\u0120Numbers": 27797, "umat": 27798, "OE": 27799, "287": 27800, "\u0120Clemson": 27801, "\u0120certs": 27802, "\u0120slid": 27803, "\u0120Tribe": 27804, "\u0120toast": 27805, "\u0120fortunes": 27806, "\u0120fals": 27807, "\u0120Committees": 27808, "\u0120gp": 27809, "\u0120fiery": 27810, "\u0120Nets": 27811, "\u0120Anime": 27812, "Package": 27813, "\u0120Compare": 27814, "laughter": 27815, "infect": 27816, "\u0120atrocities": 27817, "\u0120justices": 27818, "\u0120insults": 27819, "\u0120Vernon": 27820, "\u0120shaken": 27821, "\u0120persona": 27822, "estamp": 27823, "367": 27824, "brain": 27825, "\u0120experimenting": 27826, "Ken": 27827, "\u0120Electronics": 27828, "\u0120161": 27829, "domain": 27830, "\u0120graphical": 27831, "bishop": 27832, "\u0120whopping": 27833, "\u0120Evangel": 27834, "\u0120advertisers": 27835, "\u0120Spear": 27836, "\u0120bids": 27837, "\u0120destroys": 27838, "utz": 27839, "\u0120undersc": 27840, "\u0120ADD": 27841, "\u0120ants": 27842, "\u0120Cum": 27843, "ipples": 27844, "\u0120Fill": 27845, "\u0120glanced": 27846, "\u0120indicted": 27847, "\u0120Eff": 27848, "\u0120miscon": 27849, "\u0120Desktop": 27850, "\u0120abide": 27851, "\u00e3\u0125\u0122": 27852, "\u0120Io": 27853, "\u0120Coul": 27854, "\u0120capsule": 27855, "\u0120Chrys": 27856, "MON": 27857, "\u0120undes": 27858, "\u0120IRA": 27859, "\u0120citation": 27860, "\u0120dictate": 27861, "\u0120Networks": 27862, "\u0120Conflict": 27863, "\u0120Stuff": 27864, "xa": 27865, "isec": 27866, "\u0120Chemistry": 27867, "\u0120quarterly": 27868, "Williams": 27869, "anan": 27870, "Opt": 27871, "\u0120Alexandria": 27872, "outheastern": 27873, "\u0120Springfield": 27874, "\u0120Blacks": 27875, "\u0120geography": 27876, "242": 27877, "\u0120utmost": 27878, "\u0120Exxon": 27879, "abouts": 27880, "EVA": 27881, "\u0120Enable": 27882, "\u0120Barr": 27883, "\u0120disagreed": 27884, "\u0120Cyprus": 27885, "\u0120dementia": 27886, "\u0120labs": 27887, "\u0120ubiquitous": 27888, "\u0120LOVE": 27889, "\u0120consolidated": 27890, "sr": 27891, "\u0120creamy": 27892, "\u0120Timber": 27893, "Regardless": 27894, "\u0120Certificate": 27895, "\u0120\"...": 27896, "ogenous": 27897, "Captain": 27898, "\u0120insulting": 27899, "\u0120Soros": 27900, "\u0120Instr": 27901, "\u0120Bulgaria": 27902, "better": 27903, "\u0120sucking": 27904, "\u0120Davidson": 27905, "atz": 27906, "\u0120collateral": 27907, "gif": 27908, "\u0120plagued": 27909, "\u0120Cancel": 27910, "\u0120Gardner": 27911, "RB": 27912, "\u0120sixteen": 27913, "Remove": 27914, "uristic": 27915, "cook": 27916, "Rod": 27917, "\u0120comprising": 27918, "fle": 27919, ")\u00e2\u0122\u0136": 27920, "\u0120Viking": 27921, "growth": 27922, "agonal": 27923, "\u0120srf": 27924, "afety": 27925, "mot": 27926, "Nearly": 27927, "stown": 27928, "\u0120Factor": 27929, "\u0120automobile": 27930, "\u0120procedural": 27931, "mask": 27932, "ampires": 27933, "\u0120disappears": 27934, "jab": 27935, "315": 27936, "\u01201951": 27937, "needed": 27938, "\u0120daring": 27939, "leader": 27940, "\u0120podium": 27941, "\u0120unhealthy": 27942, "\u0120mund": 27943, "\u0120pyramid": 27944, "ocre": 27945, "\u0120kissed": 27946, "\u0120dreamed": 27947, "\u0120Fantastic": 27948, "\u0120Gly": 27949, "\u00e5\u012c": 27950, "\u0120greatness": 27951, "\u0120spices": 27952, "\u0120metropolitan": 27953, "\u0120compuls": 27954, "iets": 27955, "1016": 27956, "\u0120Sham": 27957, "\u0120Pyr": 27958, "flies": 27959, "\u0120Midnight": 27960, "\u0120swallowed": 27961, "\u0120genres": 27962, "\u0120Lucky": 27963, "\u0120Rewards": 27964, "\u0120dispatch": 27965, "\u0120IPA": 27966, "\u0120Apply": 27967, "\u0120aven": 27968, "alities": 27969, "312": 27970, "things": 27971, "\u0120().": 27972, "\u0120mates": 27973, "\u0120Sz": 27974, "\u0120COP": 27975, "olate": 27976, "OFF": 27977, "\u0120recharge": 27978, "caps": 27979, "\u0120Yorker": 27980, "icone": 27981, "\u0120galaxies": 27982, "ileaks": 27983, "Dave": 27984, "\u0120Puzz": 27985, "\u0120Celtic": 27986, "\u0120AFC": 27987, "276": 27988, "\u0120Sons": 27989, "\u0120affirmative": 27990, "Hor": 27991, "\u0120tutorials": 27992, "\u0120CITY": 27993, "\u0120Rosa": 27994, "\u0120Extension": 27995, "Series": 27996, "\u0120fats": 27997, "\u0120rab": 27998, "lis": 27999, "\u0120unic": 28000, "\u0120eve": 28001, "\u0120Spin": 28002, "\u0120adulthood": 28003, "typ": 28004, "\u0120sectarian": 28005, "\u0120checkout": 28006, "\u0120Cycl": 28007, "Single": 28008, "\u0120martyr": 28009, "\u0120chilling": 28010, "888": 28011, "oufl": 28012, "\u0120];": 28013, "\u0120congestion": 28014, "mk": 28015, "\u0120Whereas": 28016, "\u01201938": 28017, "urrencies": 28018, "erion": 28019, "\u0120boast": 28020, "\u0120Patients": 28021, "\u0120chap": 28022, "\u0120BD": 28023, "realDonaldTrump": 28024, "\u0120examines": 28025, "hov": 28026, "\u0120startling": 28027, "\u0120Babylon": 28028, "wid": 28029, "omew": 28030, "brance": 28031, "\u0120Odyssey": 28032, "wig": 28033, "\u0120torch": 28034, "\u0120Vox": 28035, "\u0120Moz": 28036, "\u0120Troll": 28037, "\u0120Ans": 28038, "Similarly": 28039, "\u0120Ful": 28040, "006": 28041, "Unless": 28042, "\u0120Alone": 28043, "stead": 28044, "\u0120Publisher": 28045, "rights": 28046, "tu": 28047, "\u0120Doesn": 28048, "\u0120professionally": 28049, "\u0120clo": 28050, "icz": 28051, "\u0120steals": 28052, "\u0120\u00e1": 28053, "1986": 28054, "\u0120sturdy": 28055, "\u0120Johann": 28056, "\u0120medals": 28057, "\u0120filings": 28058, "\u0120Fraser": 28059, "done": 28060, "\u0120multinational": 28061, "\u0120feder": 28062, "\u0120worthless": 28063, "\u0120pest": 28064, "Yesterday": 28065, "ankind": 28066, "\u0120gays": 28067, "\u0120borne": 28068, "\u0120POS": 28069, "Picture": 28070, "\u0120percentages": 28071, "251": 28072, "rame": 28073, "\u0120potions": 28074, "AMD": 28075, "\u0120Lebanese": 28076, "\u0120rang": 28077, "\u0120LSU": 28078, "ongs": 28079, "\u0120peninsula": 28080, "\u0120Clause": 28081, "ALK": 28082, "oha": 28083, "\u0120MacBook": 28084, "\u0120unanimous": 28085, "\u0120lenders": 28086, "\u0120hangs": 28087, "\u0120franchises": 28088, "orers": 28089, "\u0120Updates": 28090, "\u0120isolate": 28091, "andro": 28092, "Soon": 28093, "\u0120disruptive": 28094, "\u0120Surve": 28095, "\u0120stitches": 28096, "\u0120Scorp": 28097, "\u0120Dominion": 28098, "\u0120supplying": 28099, "Arg": 28100, "\u0120turret": 28101, "\u0120Luk": 28102, "\u0120brackets": 28103, "*)": 28104, "\u0120Revolutionary": 28105, "\u0120Honest": 28106, "\u0120noticing": 28107, "\u0120Shannon": 28108, "\u0120afforded": 28109, "\u0120tha": 28110, "\u0120Janet": 28111, "!--": 28112, "\u0120Narendra": 28113, "\u0120Plot": 28114, "Hol": 28115, "sever": 28116, "eenth": 28117, "\u0120obstruction": 28118, "\u01201024": 28119, "staff": 28120, "jas": 28121, "orget": 28122, "scenes": 28123, "laughs": 28124, "\u0120Fargo": 28125, "crime": 28126, "\u0120orchestr": 28127, "\u0120delet": 28128, "iliary": 28129, "rieved": 28130, "\u0120militar": 28131, "\u0120Greene": 28132, "\u00e2\u0139\u0131": 28133, "\u00e3\u0123\u00a6": 28134, "\u0120Guards": 28135, "\u0120unleashed": 28136, "\u0120Weber": 28137, "\u0120adjustable": 28138, "\u0120caliber": 28139, "\u0120motivations": 28140, "\u0120\u00c3\u0142": 28141, "mAh": 28142, "\u0120Lanka": 28143, "handle": 28144, "\u0120pent": 28145, "\u0120Rav": 28146, "\u0120Angular": 28147, "\u0120Kau": 28148, "umbing": 28149, "\u0120philanthrop": 28150, "\u0120dehyd": 28151, "\u0120toxicity": 28152, "eer": 28153, "\u0120YORK": 28154, "witz": 28155, "\u00e5\u00bc": 28156, "\u0120IE": 28157, "community": 28158, "\u0120AH": 28159, "\u0120retali": 28160, "\u0120massively": 28161, "\u0120Daniels": 28162, "\u0120DEL": 28163, "\u0120carcin": 28164, "Url": 28165, "\u0120routing": 28166, "\u0120NPCs": 28167, "\u0120RAF": 28168, "ryce": 28169, "\u0120waived": 28170, "\u0120Guatem": 28171, "Everybody": 28172, "\u0120covenant": 28173, "\u0120173": 28174, "\u0120relaxing": 28175, "\u0120quart": 28176, "almost": 28177, "\u0120guarded": 28178, "\u0120Soldiers": 28179, "\u0120PLAY": 28180, "\u0120outgoing": 28181, "LAND": 28182, "\u0120rewrite": 28183, "\u0120MOV": 28184, "\u0120Imper": 28185, "\u0120Solution": 28186, "\u0120phenomenal": 28187, "\u0120longevity": 28188, "\u0120impat": 28189, "\u0120Nissan": 28190, "irie": 28191, "\u0120odor": 28192, "\u0120Zar": 28193, "oks": 28194, "\u0120militias": 28195, "\u0120SPEC": 28196, "\u0120tolerated": 28197, "arser": 28198, "\u0120Bradford": 28199, "+,": 28200, "\u0120surreal": 28201, "sf": 28202, "Canadian": 28203, "\u0120resemblance": 28204, "\u0120carbohydrate": 28205, "VIEW": 28206, "\u0120accessory": 28207, "meal": 28208, "largest": 28209, "iegel": 28210, "Someone": 28211, "\u0120toughest": 28212, "oso": 28213, "\u0120funnel": 28214, "\u0120condemnation": 28215, "luent": 28216, "\u0120wired": 28217, "\u0120Sunset": 28218, "Jesus": 28219, "\u0120PST": 28220, "\u0120Pages": 28221, "\u0120Tycoon": 28222, "\u0120PF": 28223, "\u0120selections": 28224, "\u0120\u00e0\u00a4": 28225, "partisan": 28226, "\u0120highs": 28227, "\u0120Rune": 28228, "\u0120crafts": 28229, "lead": 28230, "\u0120Parents": 28231, "\u0120reclaim": 28232, "eker": 28233, "\u0120Allied": 28234, "aeper": 28235, "\u0120looming": 28236, "\u0120beneficiaries": 28237, "\u0120Hull": 28238, "Students": 28239, "Jewish": 28240, "dj": 28241, "\u0120pact": 28242, "template": 28243, "\u0120Officials": 28244, "\u0120Baylor": 28245, "\u0120hemp": 28246, "\u0120youths": 28247, "\u0120Levels": 28248, "\u0120Xiao": 28249, "\u0120Ches": 28250, "\u0120endeavor": 28251, "\u0120Removed": 28252, "\u0120hippocamp": 28253, "Hell": 28254, "\u00e3\u0124\u012c": 28255, "805": 28256, "\u0120dinosaur": 28257, "\u0120Wrath": 28258, "\u0120Indonesian": 28259, "\u0120calculator": 28260, "\u0120Dictionary": 28261, "\u0120420": 28262, "\u0120MAG": 28263, "(_": 28264, "!,": 28265, "tarians": 28266, "\u0120restricting": 28267, "racuse": 28268, "\u0120weekday": 28269, "OUNT": 28270, "\u0120shrugged": 28271, "leground": 28272, "\u0120bald": 28273, "\u0120Doctors": 28274, "\u0120touted": 28275, "\u0120Maxwell": 28276, "\u0120214": 28277, "\u0120diplomat": 28278, "\u0120repression": 28279, "\u0120constituency": 28280, "vice": 28281, "ranked": 28282, "\u0120Napoleon": 28283, "gang": 28284, "\u0120Forever": 28285, "tun": 28286, "\u0120bulb": 28287, "\u0120PDT": 28288, "\u0120Cisco": 28289, "VEN": 28290, "\u0120resumed": 28291, "Steven": 28292, "\u0120Manitoba": 28293, "\u0120fabulous": 28294, "\u0120Agents": 28295, "1984": 28296, "\u0120amusing": 28297, "\u0120Mysteries": 28298, "\u0120orthodox": 28299, "floor": 28300, "\u0120questionnaire": 28301, "\u0120penetrate": 28302, "\u0120filmmakers": 28303, "\u0120Unc": 28304, "\u0120stamped": 28305, "\u0120thirteen": 28306, "\u0120outfield": 28307, "\u0120forwarded": 28308, "\u0120appra": 28309, "\u0120aided": 28310, "try": 28311, "\u0120unfocused": 28312, "\u0120Liz": 28313, "\u0120Wendy": 28314, "\u0120Scene": 28315, "Charg": 28316, "\u0120rejects": 28317, "\u0120leftist": 28318, "\u0120Providence": 28319, "\u0120Brid": 28320, "regn": 28321, "\u0120prophecy": 28322, "\u0120LIVE": 28323, "499": 28324, "\u0120forge": 28325, "\u0120FML": 28326, "\u0120intrinsic": 28327, "\u0120Frog": 28328, "\u0120wont": 28329, "\u0120Holt": 28330, "\u0120famed": 28331, "CLUS": 28332, "aepernick": 28333, "\u0120Hate": 28334, "\u0120Cay": 28335, "\u0120registering": 28336, "ortality": 28337, "ropy": 28338, "ocalyptic": 28339, "aan": 28340, "nav": 28341, "\u0120fascist": 28342, "IFIED": 28343, "\u0120implicated": 28344, "\u0120Resort": 28345, "\u0120Chandler": 28346, "\u0120Brick": 28347, "Pin": 28348, "ysc": 28349, "Usage": 28350, "\u0120Helm": 28351, "usra": 28352, "\u00e2\u013a\u0127\u00e2\u013a\u0127": 28353, "\u0120Abbas": 28354, "\u0120unanimously": 28355, "\u0120keeper": 28356, "\u0120addicted": 28357, "???": 28358, "\u0120helmets": 28359, "\u0120antioxid": 28360, "apsed": 28361, "808": 28362, "giene": 28363, "\u0120waits": 28364, "\u0120minion": 28365, "raved": 28366, "\u0120Porsche": 28367, "\u0120dreaming": 28368, "\u0120171": 28369, "\u0120Cain": 28370, "\u0120unfor": 28371, "asso": 28372, "\u0120Configuration": 28373, "kun": 28374, "hardt": 28375, "\u0120nested": 28376, "\u0120LDS": 28377, "LES": 28378, "\u0120tying": 28379, "enos": 28380, "\u0120cue": 28381, "\u0120Marqu": 28382, "skirts": 28383, "\u0120clicked": 28384, "\u0120expiration": 28385, "\u0120Accordingly": 28386, "\u0120WC": 28387, "\u0120blessings": 28388, "\u0120addictive": 28389, "\u0120Narr": 28390, "yx": 28391, "\u0120Jaguars": 28392, "\u0120rents": 28393, "\u0120Siber": 28394, "\u0120tipped": 28395, "ousse": 28396, "\u0120Fitzgerald": 28397, "\u0120hierarch": 28398, "outine": 28399, "\u0120wavelength": 28400, ">.": 28401, "chid": 28402, "\u0120Processing": 28403, "/+": 28404, "ranking": 28405, "Easy": 28406, "\u0120Construct": 28407, "\u0120tet": 28408, "insured": 28409, "HUD": 28410, "\u0120quoting": 28411, "\u0120communicated": 28412, "inx": 28413, "\u0120inmate": 28414, "\u0120erected": 28415, "\u0120Absolutely": 28416, "\u0120Surely": 28417, "\u0120unim": 28418, "\u0120Throne": 28419, "heid": 28420, "\u0120claws": 28421, "\u0120superstar": 28422, "\u0120Lenn": 28423, "\u0120Whis": 28424, "Uk": 28425, "abol": 28426, "\u0120sket": 28427, "\u0120Niet": 28428, "\u0120perks": 28429, "\u0120affinity": 28430, "\u0120openings": 28431, "phasis": 28432, "\u0120discriminate": 28433, "Tip": 28434, "vc": 28435, "\u0120grinding": 28436, "\u0120Jenny": 28437, "\u0120asthma": 28438, "holes": 28439, "\u0120Homer": 28440, "\u0120registers": 28441, "\u0120Glad": 28442, "\u0120creations": 28443, "\u0120lithium": 28444, "\u0120applause": 28445, "until": 28446, "Justice": 28447, "\u0120Turks": 28448, "\u0120scandals": 28449, "\u0120bake": 28450, "tank": 28451, "Mech": 28452, "\u0120Means": 28453, "\u0120Maid": 28454, "Republicans": 28455, "isal": 28456, "windows": 28457, "\u0120Santos": 28458, "\u0120vegetation": 28459, "338": 28460, "tri": 28461, "\u0120flux": 28462, "insert": 28463, "\u0120clarified": 28464, "\u0120mortg": 28465, "\u0120Chim": 28466, "\u0120Tort": 28467, "\u0120disclaim": 28468, "metal": 28469, "\u0120Aside": 28470, "\u0120induction": 28471, "\u0120infl": 28472, "\u0120atheists": 28473, "amph": 28474, "\u0120ether": 28475, "\u0120Vital": 28476, "\u0120Built": 28477, "Mind": 28478, "\u0120weaponry": 28479, "SET": 28480, "\u0120186": 28481, "admin": 28482, "gam": 28483, "contract": 28484, "afa": 28485, "\u0120derivatives": 28486, "\u0120snacks": 28487, "\u0120churn": 28488, "Econom": 28489, "\u0120capped": 28490, "\u0120Understanding": 28491, "\u0120Hers": 28492, "\u0120Iz": 28493, "\u0120duct": 28494, "IENT": 28495, "aughty": 28496, "\u0120\u00e2\u013e\u0136": 28497, "\u0120NP": 28498, "\u0120sailing": 28499, "Initialized": 28500, "\u0120ted": 28501, "\u0120reactors": 28502, "\u0120Lomb": 28503, "\u0120choke": 28504, "\u0120Worm": 28505, "\u0120admiration": 28506, "\u0120swung": 28507, "ensibly": 28508, "\u0120rash": 28509, "\u0120Goals": 28510, "\u0120Important": 28511, "Shot": 28512, "\u0120Ras": 28513, "\u0120trainers": 28514, "\u0120Bun": 28515, "Working": 28516, "\u0120harmed": 28517, "\u0120Pandora": 28518, "\u0120LTE": 28519, "\u0120mushroom": 28520, "\u0120CHAR": 28521, "\u0120Fee": 28522, "\u0120Moy": 28523, "Born": 28524, "oliberal": 28525, "\u0120Martial": 28526, "\u0120gentlemen": 28527, "\u0120lingering": 28528, "Official": 28529, "\u0120graffiti": 28530, "\u0120Names": 28531, "Der": 28532, "\u0120quint": 28533, "istrate": 28534, "azeera": 28535, "\u0120NOTICE": 28536, "\u0120Florence": 28537, "\u0120payable": 28538, "\u0120depicts": 28539, "\u0120Species": 28540, "Heart": 28541, "\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122\u00e2\u0136\u0122": 28542, "\u0120enclosed": 28543, "Increases": 28544, "Daily": 28545, "\u0120Lis": 28546, "\u0120enactment": 28547, "\u0120Bacon": 28548, "\u0120Steele": 28549, "demand": 28550, "\u0120183": 28551, "\u0120mouths": 28552, "\u0120stranded": 28553, "\u0120enhancement": 28554, "011": 28555, "\u0120Whats": 28556, "\u0120healed": 28557, "eny": 28558, "\u0120Rab": 28559, "\u0120340": 28560, "\u0120Labyrinth": 28561, "roach": 28562, "\u0120Yosh": 28563, "\u0120Clippers": 28564, "\u0120concerts": 28565, "Internet": 28566, "355": 28567, "\u0120stickers": 28568, "\u0120termed": 28569, "\u0120Axe": 28570, "\u0120grandparents": 28571, "France": 28572, "\u0120Clim": 28573, "\u0120Uh": 28574, "ulic": 28575, "\u0120thrill": 28576, "centric": 28577, "\u0120Overview": 28578, "\u0120Conduct": 28579, "\u0120substantive": 28580, "\u0120182": 28581, "mur": 28582, "\u0120stray": 28583, "\u0120Coff": 28584, "\u0120repetitive": 28585, "\u0120Forgotten": 28586, "\u0120qualification": 28587, "ewitness": 28588, "\u0120Zimbabwe": 28589, "\u0120simulated": 28590, "\u0120JD": 28591, "253": 28592, "\u0120Ware": 28593, "\u0120unsc": 28594, "Times": 28595, "\u0120summons": 28596, "\u0120disconnected": 28597, "\u0120184": 28598, "cius": 28599, "\u0120Gujar": 28600, "odka": 28601, "\u0120erase": 28602, "\u0120Tobacco": 28603, "elected": 28604, "\u0120uncont": 28605, "\u0120Shepard": 28606, "\u0120Lamp": 28607, "\u0120alerted": 28608, "\u0120operative": 28609, "arna": 28610, "uint": 28611, "\u0120negligence": 28612, "acements": 28613, "\u0120supra": 28614, "\u0120prevail": 28615, "\u0120Shark": 28616, "\u0120belts": 28617, "\u00e3\u0123\u00ab": 28618, "\u0120tighter": 28619, "Engineers": 28620, "\u0120inactive": 28621, "\u0120exponent": 28622, "\u0120Willie": 28623, "aples": 28624, "\u0120heir": 28625, "\u0120Hits": 28626, "iann": 28627, "\u0120Says": 28628, "\u0120currents": 28629, "\u0120Bengal": 28630, "\u0120arist": 28631, "Buffer": 28632, "\u0120breeze": 28633, "\u0120Wesley": 28634, "Cola": 28635, "\u0120pronoun": 28636, "\u0120deed": 28637, "\u0120Kling": 28638, "\u0120oft": 28639, "\u0120inflict": 28640, "\u0120punishing": 28641, "\u0120nm": 28642, "iku": 28643, "ODUCT": 28644, "014": 28645, "\u0120subsidy": 28646, "\u0120DEA": 28647, "\u0120Herbert": 28648, "\u0120Jal": 28649, "Bank": 28650, "\u0120deferred": 28651, "\u0120shipment": 28652, "Bott": 28653, "\u0120alle": 28654, "bearing": 28655, "HTML": 28656, "Offline": 28657, "\u0120213": 28658, "\u0120scrolling": 28659, "\u0120scanned": 28660, "\u0120Libyan": 28661, "\u0120TOP": 28662, "chrom": 28663, "dt": 28664, "column": 28665, "PsyNetMessage": 28666, "Zero": 28667, "\u0120torso": 28668, "050": 28669, "\u00e2\u0137\u0132": 28670, "\u0120imperson": 28671, "\u0120Schwartz": 28672, "udic": 28673, "\u0120pissed": 28674, "\u0120Sapp": 28675, "257": 28676, "\u0120ISPs": 28677, "ogl": 28678, "\u0120supervised": 28679, "\u0120adolescent": 28680, "\u0120attained": 28681, "\u0120Delivery": 28682, "\u0120Bunny": 28683, "\u01201937": 28684, "\u0120miniature": 28685, "\u0120os": 28686, "\u0120370": 28687, "608": 28688, "\u0120Mourinho": 28689, "\u0120innate": 28690, "\u0120tempo": 28691, "\u0120NM": 28692, "\u0120Fallen": 28693, "009": 28694, "\u0120provocative": 28695, "Streamer": 28696, "\u0120Benedict": 28697, "\u0120Bolshe": 28698, "\u0120turtle": 28699, "\u0120PCB": 28700, "\u0120Equal": 28701, "Director": 28702, "\u0120Rend": 28703, "\u0120fluids": 28704, "Authorities": 28705, "\u0120cousins": 28706, "requency": 28707, "\u0120Neighbor": 28708, "sets": 28709, "shared": 28710, "Charles": 28711, "password": 28712, "\u0120gears": 28713, "\u0120211": 28714, "\u0120Hardware": 28715, "rika": 28716, "\u0120upstream": 28717, "Hom": 28718, "\u0120disproportionately": 28719, "ivities": 28720, "\u0120undefined": 28721, "\u0120electrons": 28722, "\u0120commemor": 28723, "Eventually": 28724, "\u0120><": 28725, "\u0120irresponsible": 28726, "218": 28727, "\u0120Released": 28728, "\u0120OVER": 28729, "\u0120IGN": 28730, "\u0120Bread": 28731, "stellar": 28732, "\u0120Sage": 28733, "tted": 28734, "damage": 28735, "edition": 28736, "\u0120Prec": 28737, "\u0120lime": 28738, "\u0120confinement": 28739, "\u0120calorie": 28740, "weapon": 28741, "\u0120differing": 28742, "\u0120Sina": 28743, "mys": 28744, "amd": 28745, "\u0120intricate": 28746, "kk": 28747, "\u0120PAT": 28748, "\u00c3\u00a3o": 28749, "stones": 28750, "links": 28751, "\u0120ranch": 28752, "Semitic": 28753, "\u0120differentiate": 28754, "\u0120Singer": 28755, "occupied": 28756, "\u0120fortress": 28757, "cmd": 28758, "\u0120interception": 28759, "\u0120Ankara": 28760, "\u0120rept": 28761, "\u0120Solitaire": 28762, "\u0120remake": 28763, "pred": 28764, "\u0120dared": 28765, "autions": 28766, "\u0120BACK": 28767, "Running": 28768, "\u0120debugging": 28769, "\u0120graphs": 28770, "399": 28771, "\u0120Nigel": 28772, "\u0120bun": 28773, "\u0120pillow": 28774, "\u0120progressed": 28775, "fashioned": 28776, "\u0120obedience": 28777, "ERN": 28778, "\u0120rehears": 28779, "Cell": 28780, "tl": 28781, "Sher": 28782, "\u0120herald": 28783, "\u0120Payment": 28784, "\u0120Cory": 28785, "\u0120Dept": 28786, "\u0120repent": 28787, "\u0120Weak": 28788, "uckland": 28789, "\u0120pleasing": 28790, "\u0120shortages": 28791, "\u0120jurors": 28792, "\u0120Kab": 28793, "qqa": 28794, "Anti": 28795, "\u0120wow": 28796, "\u0120RCMP": 28797, "\u0120tsun": 28798, "\u0120Sic": 28799, "\u0120comprises": 28800, "\u0120spies": 28801, "\u0120precinct": 28802, "nu": 28803, "\u0120urges": 28804, "\u0120timed": 28805, "\u0120stripes": 28806, "\u0120Boots": 28807, "\u0120yen": 28808, "Advanced": 28809, "\u0120discrete": 28810, "\u0120Archangel": 28811, "employment": 28812, "Diff": 28813, "\u0120monuments": 28814, "\u0120209": 28815, "worker": 28816, "\u0120196": 28817, "\u0120Ig": 28818, "utterstock": 28819, "TPS": 28820, "Jac": 28821, "\u0120homelessness": 28822, "\u0120commentator": 28823, "\u0120racially": 28824, "fing": 28825, "seed": 28826, "Ele": 28827, "ellation": 28828, "\u0120ethanol": 28829, "\u0120parish": 28830, "\u0120Dong": 28831, "\u0120Awakening": 28832, "\u0120deviation": 28833, "\u0120Bearing": 28834, "\u0120Tsuk": 28835, "\u0120recess": 28836, "\u0120lymph": 28837, "\u0120Cannabis": 28838, "\u00e5\u013e": 28839, "\u0120NEWS": 28840, "\u0120dra": 28841, "\u0120Stefan": 28842, "\u0120Wrong": 28843, "\u0120SAM": 28844, "\u0120loosely": 28845, "\u0120interpreter": 28846, "\u0120Plain": 28847, "Government": 28848, "\u0120bigotry": 28849, "\u0120grenades": 28850, "avez": 28851, "pictured": 28852, "\u0120mandated": 28853, "\u0120Monk": 28854, "\u0120Pedro": 28855, "\u0120lava": 28856, "274": 28857, "\u0120cynical": 28858, "\u0120Scrolls": 28859, "locks": 28860, "Mp": 28861, "\u0120congregation": 28862, "ornings": 28863, "phil": 28864, "\u0120Ibid": 28865, "\u0120ferv": 28866, "\u0120disappearing": 28867, "\u0120arrogant": 28868, "syn": 28869, "\u0120Maver": 28870, "\u0120Suit": 28871, "241": 28872, "\u0120abbre": 28873, "ackers": 28874, "Pa": 28875, "\u0120Yel": 28876, "Whenever": 28877, "\u0120235": 28878, "\u0120Vine": 28879, "\u0120Anat": 28880, "\u0120extinct": 28881, "LET": 28882, "\u0120executable": 28883, "VERS": 28884, "oxide": 28885, "DNA": 28886, "\u0120Prel": 28887, "\u0120resentment": 28888, "\u0120comprise": 28889, "\u0120Aviv": 28890, "\u0120interceptions": 28891, "\u0120prolific": 28892, "INA": 28893, "\u0120Erin": 28894, "thought": 28895, "219": 28896, "\u0120Psychiatry": 28897, "unky": 28898, "chemist": 28899, "Ho": 28900, "\u0120McCoy": 28901, "\u0120bricks": 28902, "Los": 28903, "rily": 28904, "\u0120USSR": 28905, "\u0120rud": 28906, "\u0120laud": 28907, "\u0120Wise": 28908, "\u0120Emerald": 28909, "\u0120revived": 28910, "\u0120damned": 28911, "\u0120Repair": 28912, "idem": 28913, "ctica": 28914, "\u0120patriarch": 28915, "\u0120Nurs": 28916, "meg": 28917, "\u0120cheapest": 28918, "reements": 28919, "empty": 28920, "\u0120Celebr": 28921, "\u0120deprivation": 28922, "chanted": 28923, "\u0120Thumbnails": 28924, "Energy": 28925, "\u0120Ethan": 28926, "\u0120Qing": 28927, "\u0120opposes": 28928, "WIND": 28929, "vik": 28930, "\u0120Mau": 28931, "\u0120SUB": 28932, "667": 28933, "GRE": 28934, "\u0120Volunte": 28935, "nton": 28936, "Cook": 28937, "\u00e5\u0132": 28938, "esque": 28939, "\u0120plummet": 28940, "\u0120suing": 28941, "\u0120pronounce": 28942, "\u0120resisting": 28943, "\u0120Fishing": 28944, "\u0120Trials": 28945, "\u0120yell": 28946, "\u0120310": 28947, "\u0120induct": 28948, "\u0120personalized": 28949, "often": 28950, "Reb": 28951, "EMBER": 28952, "\u0120viewpoint": 28953, "\u0120existential": 28954, "())": 28955, "remove": 28956, "MENTS": 28957, "lasses": 28958, "\u0120evapor": 28959, "\u0120aisle": 28960, "meta": 28961, "\u0120reflective": 28962, "\u0120entitlement": 28963, "\u0120devised": 28964, "music": 28965, "ascade": 28966, "\u0120winding": 28967, "offset": 28968, "\u0120accessibility": 28969, "kered": 28970, "Better": 28971, "\u0120Johnston": 28972, "thinking": 28973, "Snow": 28974, "\u0120Croatia": 28975, "\u0120Atomic": 28976, "271": 28977, "348": 28978, "\u0120textbook": 28979, "\u0120Sixth": 28980, "\u0120\u00d8\u00a7\u00d9\u0126": 28981, "\u0120slider": 28982, "\u0120Burger": 28983, "bol": 28984, "Sync": 28985, "\u0120grandchildren": 28986, "\u0120cerv": 28987, "+)": 28988, "\u0120eternity": 28989, "\u0120tweeting": 28990, "\u0120speculative": 28991, "\u0120pivotal": 28992, "\u0120WP": 28993, "\u0120TER": 28994, "ynamic": 28995, "\u0120upl": 28996, "\u0120Cats": 28997, "perhaps": 28998, "\u0120classmates": 28999, "\u0120blatant": 29000, "'-": 29001, "\u0120lakh": 29002, "antine": 29003, "\u0120Borg": 29004, "iom": 29005, "/(": 29006, "\u0120Athletic": 29007, "\u0120sar": 29008, "OTA": 29009, "\u0120Hoffman": 29010, "Nevertheless": 29011, "\u0120adorable": 29012, "\u0120spawned": 29013, "Associated": 29014, "\u0120Domestic": 29015, "\u0120implant": 29016, "\u0120Luxem": 29017, "\u0120Kens": 29018, "\u0120pumps": 29019, "\u0120SAT": 29020, "Attributes": 29021, "509": 29022, "avour": 29023, "\u0120centralized": 29024, "\u0120TN": 29025, "\u0120freshly": 29026, "\u0120Achieve": 29027, "\u0120outsiders": 29028, "herty": 29029, "\u0120Ree": 29030, "\u0120Towers": 29031, "\u0120Dart": 29032, "akable": 29033, "\u0120mp": 29034, "\u0120Heavenly": 29035, "\u0120ripe": 29036, "\u0120Caroline": 29037, "ryan": 29038, "\u0120classics": 29039, "\u0120retiring": 29040, "\u0120228": 29041, "\u0120ah": 29042, "\u0120dealings": 29043, "\u0120punching": 29044, "\u0120Chapman": 29045, "Options": 29046, "maxwell": 29047, "volume": 29048, "\u0120stal": 29049, "\u0120exported": 29050, "\u0120Quite": 29051, "\u0120numerical": 29052, "Burn": 29053, "Fact": 29054, "\u0120Keystone": 29055, "\u0120trending": 29056, "\u0120altering": 29057, "\u0120Africans": 29058, "478": 29059, "\u0120MN": 29060, "\u0120Knock": 29061, "\u0120temptation": 29062, "\u0120prestige": 29063, "Overview": 29064, "\u0120Traditional": 29065, "\u0120Bahrain": 29066, "Private": 29067, "\u0120HOU": 29068, "\u0120barr": 29069, "\u0120Tat": 29070, "Cube": 29071, "USD": 29072, "\u0120Grande": 29073, "\u0120Gat": 29074, "\u0120Flo": 29075, "\u0120resides": 29076, "\u0120indec": 29077, "volent": 29078, "\u0120perpetual": 29079, "ubes": 29080, "\u0120worldview": 29081, "\u0120Quantum": 29082, "\u0120filtered": 29083, "\u0120ensu": 29084, "orgetown": 29085, "ERSON": 29086, "\u0120Mild": 29087, "379": 29088, "OTT": 29089, "\u00c3\u00a5": 29090, "\u0120vitamins": 29091, "\u0120ribbon": 29092, "\u0120sincerely": 29093, "\u0120Hin": 29094, "\u0120eighteen": 29095, "\u0120contradictory": 29096, "\u0120glaring": 29097, "\u0120expectancy": 29098, "\u0120conspir": 29099, "\u0120monstrous": 29100, "\u0120380": 29101, "reci": 29102, "\u0120handic": 29103, "\u0120pumped": 29104, "\u0120indicative": 29105, "\u0120rapp": 29106, "\u0120avail": 29107, "\u0120LEGO": 29108, "\u0120Marijuana": 29109, "1985": 29110, "erton": 29111, "\u0120twentieth": 29112, "################################": 29113, "\u0120Swamp": 29114, "\u0120valuation": 29115, "\u0120affiliates": 29116, "adjusted": 29117, "\u0120Facility": 29118, "262": 29119, "\u0120enzymes": 29120, "itudinal": 29121, "\u0120imprint": 29122, "Site": 29123, "\u0120installer": 29124, "\u0120TRA": 29125, "mology": 29126, "linear": 29127, "\u0120Collective": 29128, "igating": 29129, "\u0120Token": 29130, "\u0120speculated": 29131, "KN": 29132, "\u0120Cly": 29133, "ority": 29134, "\u0120defer": 29135, "\u0120inspectors": 29136, "approved": 29137, "RM": 29138, "\u0120Suns": 29139, "\u0120informing": 29140, "\u0120Syracuse": 29141, "ibli": 29142, "765": 29143, "\u0120glove": 29144, "\u0120authorize": 29145, "\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6\u00e2\u0122\u00a6": 29146, "\u0120Cruise": 29147, "\u0120contracting": 29148, "shell": 29149, "IFE": 29150, "\u0120Jewel": 29151, "pract": 29152, "\u0120Photoshop": 29153, "\u0120Knowing": 29154, "harm": 29155, "\u0120attractions": 29156, "adan": 29157, "etus": 29158, "018": 29159, "wagen": 29160, "Alt": 29161, "\u0120multiply": 29162, "\u0120equilibrium": 29163, ":{": 29164, "\u0120Fighters": 29165, "\u0120Edgar": 29166, "\u0120fourteen": 29167, "Govern": 29168, "\u0120misuse": 29169, "\u0120abusing": 29170, "\u0120ancestry": 29171, "ramer": 29172, "644": 29173, "\u0120worms": 29174, "\u0120thicker": 29175, "\u0120Combine": 29176, "\u0120peasants": 29177, "\u0120vind": 29178, "\u0120conquest": 29179, "\u0120mocked": 29180, "\u0120cinnamon": 29181, "\u0120Cald": 29182, "\u0120Gallup": 29183, "\u0120avoidance": 29184, "\u0120incarnation": 29185, "\u0120Strat": 29186, "\u0120tasted": 29187, "enta": 29188, "\u0120Neal": 29189, "pared": 29190, "\u0120terminology": 29191, "jection": 29192, "Scientists": 29193, "\u0120INS": 29194, "\u0120Dee": 29195, "\u0120directories": 29196, "Road": 29197, "\u0120Shap": 29198, "bright": 29199, "\u0120Directors": 29200, "\u0120Column": 29201, "\u0120bob": 29202, "\u0120preferably": 29203, "\u0120glitch": 29204, "furt": 29205, "\u0120eg": 29206, "idis": 29207, "CBC": 29208, "\u0120surrendered": 29209, "\u0120testament": 29210, "336": 29211, "uggest": 29212, "\u0120Nil": 29213, "another": 29214, "\u0120pathetic": 29215, "\u0120Donna": 29216, "\u0120218": 29217, "\u0120Avery": 29218, "\u0120whiskey": 29219, "\u0120fixture": 29220, "\u0120Conquest": 29221, "\u0120bets": 29222, "Occ": 29223, "\u0120Leicester": 29224, "].\"": 29225, "\u0120));": 29226, "\u0120flashes": 29227, "456": 29228, "\u0120masked": 29229, "gebra": 29230, "\u0120computed": 29231, "chel": 29232, "auder": 29233, "\u0120defeats": 29234, "\u0120Liberation": 29235, "\u0120Osama": 29236, "\u0120Vive": 29237, "Changes": 29238, "Channel": 29239, "\u0120tariffs": 29240, "\u0120mage": 29241, "\u0120Sax": 29242, "\u0120inadvertently": 29243, "\u0120CRE": 29244, "\u0120Reaper": 29245, "inky": 29246, "grading": 29247, "\u0120stereotyp": 29248, "\u0120curl": 29249, "\u0120FANT": 29250, "\u0120frameworks": 29251, "Mom": 29252, "\u0120Anch": 29253, "\u0120flavour": 29254, "carbon": 29255, "\u0120permitting": 29256, "letcher": 29257, "\u0120Mozilla": 29258, "\u0120Parking": 29259, "\u0120Champ": 29260, "Scroll": 29261, "\u0120murderer": 29262, "\u0120rested": 29263, "\u0120owes": 29264, "\u0120Poss": 29265, "ADD": 29266, "IFF": 29267, "resolution": 29268, "\u0120Mining": 29269, "\u0120comparative": 29270, "Dim": 29271, "\u0120neighbouring": 29272, "\u0120AST": 29273, "\u0120Toxic": 29274, "\u0120biases": 29275, "\u0120gunfire": 29276, "urous": 29277, "\u0120Moment": 29278, "1983": 29279, "\u0120pervasive": 29280, "ttp": 29281, "\u0120Normally": 29282, "rir": 29283, "Sarah": 29284, "\u0120Albany": 29285, "\u0120unsett": 29286, "\u0120SMS": 29287, "ipers": 29288, "layer": 29289, "\u0120Whites": 29290, "uple": 29291, "\u0120turbo": 29292, "\u0120Leeds": 29293, "\u0120thats": 29294, "\u0120Miner": 29295, "MER": 29296, "\u0120Reign": 29297, "\u0120perme": 29298, "\u0120Blitz": 29299, "\u01201934": 29300, "\u0120intimidating": 29301, "tube": 29302, "\u0120eccentric": 29303, "abolic": 29304, "boxes": 29305, "\u0120Associates": 29306, "votes": 29307, "\u0120simulate": 29308, "umbo": 29309, "astery": 29310, "\u0120shipments": 29311, "FFFF": 29312, "anth": 29313, "\u0120seasoned": 29314, "\u0120experimentation": 29315, "\u00e2\u0138\u0142": 29316, "laws": 29317, "Meet": 29318, "iddles": 29319, "antics": 29320, "Rating": 29321, "ISIS": 29322, "hift": 29323, "\u0120fronts": 29324, "buf": 29325, "017": 29326, "\u0120unatt": 29327, "\u0120Dil": 29328, "leases": 29329, "\u0120Gardens": 29330, "777": 29331, "touch": 29332, "vell": 29333, "458": 29334, "\u0120=====": 29335, "saving": 29336, "\u0120erosion": 29337, "\u0120Quin": 29338, "\u0120earns": 29339, "\u0120accomplishment": 29340, "\u0120Wei": 29341, "\u0120<[": 29342, "_____": 29343, "\u0120irrig": 29344, "\u0120Teddy": 29345, "\u0120conquered": 29346, "\u0120Armored": 29347, "\u0120asserts": 29348, "\u0120manipulating": 29349, "r\u00c3\u00a9": 29350, "\u0120transcripts": 29351, "Gallery": 29352, "\u0120plotting": 29353, "Neil": 29354, "\u0120betrayal": 29355, "loader": 29356, "\u0120Sul": 29357, "\u0120displacement": 29358, "\u0120royalty": 29359, "\u0120WI": 29360, "heit": 29361, "\u0120Devices": 29362, "allel": 29363, "\u0120municipalities": 29364, "\u0120canal": 29365, "Stars": 29366, "\u0120UAE": 29367, "\u0120\"\u00e2\u0122\u00a6": 29368, "\u0120CU": 29369, "above": 29370, "\u0120resonance": 29371, "\u0120guiActiveUn": 29372, "added": 29373, "\u0120Braves": 29374, "\u0120Ibn": 29375, "\u0120hereby": 29376, "\u0120BRE": 29377, "\u0120shareholder": 29378, "\u0120Hir": 29379, "\u0120Ji": 29380, "\u0120strangely": 29381, "\u0120admired": 29382, "\u0120plight": 29383, "\u0120bachelor": 29384, "\u0120Pole": 29385, "ciplinary": 29386, "Tony": 29387, "\u0120Armenian": 29388, "\u0120unman": 29389, "\u0120Zionist": 29390, "Stage": 29391, "iscover": 29392, "\u0120automotive": 29393, "\u0120sidelines": 29394, "\u0120slick": 29395, "\u0120Renaissance": 29396, "\u0120FUN": 29397, "Images": 29398, "\u0120Haj": 29399, "\u0120ping": 29400, "\u0120shortcut": 29401, "\u0120Blvd": 29402, "\u0120Looks": 29403, "\u0120bursts": 29404, "\u0120clamp": 29405, "\u0120mish": 29406, "\u0120sorting": 29407, "\u0120patriot": 29408, "\u0120correctness": 29409, "\u0120Scandinav": 29410, "\u0120Cavaliers": 29411, "python": 29412, "azar": 29413, "\u0120375": 29414, "\u0120Jaune": 29415, "409": 29416, "\u0120detrimental": 29417, "\u0120stabbing": 29418, "\u0120poisoned": 29419, "\u0120fountain": 29420, "ocent": 29421, "orst": 29422, "\u0120Mari": 29423, "\u0120rains": 29424, "\u0120Overs": 29425, "\u0120Institution": 29426, "udget": 29427, "AMY": 29428, "tale": 29429, "\u0120KR": 29430, "\u0120Prices": 29431, "\u0120headaches": 29432, "\u0120landsl": 29433, "\u0120Aura": 29434, "Bonus": 29435, "\u0120Zhao": 29436, "\u0120Hip": 29437, "\u0120hops": 29438, "\u0120Kurdistan": 29439, "\u0120exploiting": 29440, "ryn": 29441, "\u0120hypocrisy": 29442, "opening": 29443, "\u0120gunshot": 29444, "\u0120wed": 29445, "interstitial": 29446, "Interstitial": 29447, "\u0120amen": 29448, "Breaking": 29449, "\u0120marketed": 29450, "Wire": 29451, "\u0120Crowd": 29452, "Continue": 29453, "\u0120Known": 29454, "\u0120Effective": 29455, "orean": 29456, "izons": 29457, "Joseph": 29458, "\u0120escalation": 29459, "username": 29460, "\u0120curtain": 29461, "ATES": 29462, "\u0120PAR": 29463, "\u0120Miy": 29464, "\u0120counterfe": 29465, "lene": 29466, "\u0120contenders": 29467, "daily": 29468, "\u0120Asc": 29469, "\u0120Phillip": 29470, "mostly": 29471, "\u0120filename": 29472, "hene": 29473, "\u0120resembling": 29474, "\u0120staging": 29475, "\u0120Chloe": 29476, "\u0120wiring": 29477, "Hon": 29478, "\u0120Renew": 29479, "ottage": 29480, "\u0120Hybrid": 29481, "much": 29482, "\u0120strokes": 29483, "\u0120policymakers": 29484, "APTER": 29485, "\u0120Arkham": 29486, "plot": 29487, "\u0120assistants": 29488, "\u0120deport": 29489, "\u0120Sega": 29490, "\u0120influenza": 29491, "\u0120Cursed": 29492, "\u0120Kobe": 29493, "\u0120skinny": 29494, "Provider": 29495, "\u0120Rip": 29496, "\u0120incremental": 29497, "products": 29498, "BF": 29499, "\u0120dome": 29500, "\u0120Credits": 29501, "\u0120losers": 29502, "ints": 29503, "\u0120Betty": 29504, "\u0120Talent": 29505, "\u0120DAM": 29506, "Lv": 29507, "Ess": 29508, "\u0120dens": 29509, "temp": 29510, "Judge": 29511, "odic": 29512, "\u0120'(": 29513, "URES": 29514, "etsk": 29515, "VO": 29516, "\u0120retrieved": 29517, "\u0120architects": 29518, "\u00d9\u0129": 29519, "\u0120ethic": 29520, "\u0120Secondary": 29521, "stocks": 29522, "adia": 29523, "\u0120325": 29524, "\u0120Opinion": 29525, "\u0120simultaneous": 29526, "\u0120dizz": 29527, "ulp": 29528, "\u0120smuggling": 29529, "ippery": 29530, "Random": 29531, "facing": 29532, "\u0120Das": 29533, "\u0120stockp": 29534, "\u0120disclosures": 29535, "pointer": 29536, "\u0120coral": 29537, "\u0120Selection": 29538, "\u0120Pike": 29539, "ivalent": 29540, "\u0120ruthless": 29541, "\u0120Rim": 29542, "\u0120ensuing": 29543, "\u0120Experiment": 29544, "\u0120congressman": 29545, "\u0120believer": 29546, "\u0120unspecified": 29547, "\u0120Mord": 29548, "\u0120knowledgeable": 29549, "\u0120VERY": 29550, "TX": 29551, "\u0120straps": 29552, "\u0120turf": 29553, "apeshifter": 29554, "\u0120marital": 29555, "\u0120flock": 29556, "\u00e3\u0123\u0128": 29557, "263": 29558, "AMES": 29559, "\u0120Opposition": 29560, "\u0120treasures": 29561, "\u0120GOD": 29562, "\u0120modeled": 29563, "\u0120WORLD": 29564, "\u0120([": 29565, "\u0120Usage": 29566, "HF": 29567, "\u0120$(": 29568, "ussed": 29569, "\u0120pioneer": 29570, "Eight": 29571, "parse": 29572, "bread": 29573, "ritz": 29574, "\u0120Miranda": 29575, "\u0120Kant": 29576, "++)": 29577, "oren": 29578, "\u0120provoked": 29579, "\u0120breeds": 29580, "\u0120Includes": 29581, "\u0120Pastebin": 29582, "\u0120Flip": 29583, "Java": 29584, "\u0120brink": 29585, "\u0120rumored": 29586, "\u0120unseen": 29587, "\u0120garnered": 29588, "\u0120Defin": 29589, "alted": 29590, "\u0120tattoos": 29591, "\u0120hesitation": 29592, "isitions": 29593, "\u0120Weaver": 29594, "\u0120Reporting": 29595, "\u0120therapies": 29596, "\u0120consultants": 29597, "\u0120residual": 29598, "\u0120Mali": 29599, "\u0120Roma": 29600, "iago": 29601, "\u0120Residents": 29602, "ubi": 29603, "\u0120remedies": 29604, "\u0120adaptive": 29605, "\u0120Alive": 29606, "\u0120Barcl": 29607, "\u0120wallets": 29608, "crypt": 29609, "etermination": 29610, "\u0120Pelosi": 29611, "\u0120slipping": 29612, "otonin": 29613, "\u0120alliances": 29614, "patrick": 29615, "iris": 29616, "\u0120orth": 29617, "\u0120Perkins": 29618, "\u0120DeV": 29619, "\u0120Gets": 29620, "\u0120drying": 29621, "gee": 29622, "forest": 29623, "\u0120Forget": 29624, "orem": 29625, "339": 29626, "\u0120vaguely": 29627, "\u0120Dion": 29628, "\u0120Porn": 29629, "\u0120HOW": 29630, "\u0120pneum": 29631, "\u0120rubble": 29632, "\u0120Taste": 29633, "encia": 29634, "\u0120Gel": 29635, "\u0120dst": 29636, "\u0120245": 29637, "\u0120Morocco": 29638, "inflamm": 29639, "\u0120Twins": 29640, "\u0120bots": 29641, "daughter": 29642, "\u0120Balk": 29643, "\u0120brethren": 29644, "\u0120logos": 29645, "\u0120gobl": 29646, "fps": 29647, "\u0120subdivision": 29648, "\u0120pawn": 29649, "\u0120squeezed": 29650, "\u0120morale": 29651, "\u0120DW": 29652, "'\"": 29653, "\u0120knot": 29654, "ooky": 29655, "\u0120divisive": 29656, "\u0120boosted": 29657, "chy": 29658, "\u00e3\u0125\u0132": 29659, "ifact": 29660, "\u0120newcomers": 29661, "\u0120Wrestling": 29662, "\u0120scouts": 29663, "wolves": 29664, "Rat": 29665, "\u0120nineteenth": 29666, "\u0120Osborne": 29667, "Stats": 29668, "\u0120empowered": 29669, "\u0120psychopath": 29670, "\u0120OEM": 29671, "uggage": 29672, "\u0120PK": 29673, "\u0120Mohammad": 29674, "Pak": 29675, "\u0120anarchists": 29676, "\u0120Extract": 29677, "esthes": 29678, "\u0120Stockholm": 29679, "loo": 29680, "\u0120Graph": 29681, "\u0120deploying": 29682, "\u0120Stranger": 29683, "\u0120Mold": 29684, "\u0120staffer": 29685, "\u0120discounted": 29686, "uckle": 29687, "please": 29688, "\u0120Landing": 29689, "\u00c3\u0143a": 29690, "\u0120193": 29691, "\u0120ante": 29692, "\u0120repetition": 29693, "\u0120+/-": 29694, "\u0120parody": 29695, "\u0120lively": 29696, "AAA": 29697, "\u0120Horus": 29698, "\u0120pits": 29699, "inders": 29700, "LOC": 29701, "\u0120Venice": 29702, "406": 29703, "\u0120Discover": 29704, "\u00e2\u0128": 29705, "ellectual": 29706, "\u0120pens": 29707, "\u0120eyel": 29708, "iguous": 29709, "Impl": 29710, "\u0120joking": 29711, "\u0120inval": 29712, "\u0120Belfast": 29713, "\u0120creditors": 29714, "\u0120Skywalker": 29715, "ovsky": 29716, "\u0120ceasefire": 29717, "\u0120seals": 29718, "isoft": 29719, ")).": 29720, "\u0120Felix": 29721, "ITS": 29722, "\u0120tresp": 29723, "\u0120Blockchain": 29724, "eware": 29725, "\u0120Schwar": 29726, "enne": 29727, "mounted": 29728, "\u0120Beacon": 29729, "lesh": 29730, "\u0120immensely": 29731, "\u0120cheering": 29732, "Employ": 29733, "scene": 29734, "ishly": 29735, "atchewan": 29736, "\u0120Nicolas": 29737, "\u0120drained": 29738, "\u0120Exit": 29739, "\u0120Azerb": 29740, "jun": 29741, "\u0120floated": 29742, "uania": 29743, "Deep": 29744, "\u0120superv": 29745, "\u0120mystical": 29746, "\u0120Dollar": 29747, "\u0120Apostle": 29748, "\u0120REL": 29749, "\u0120Provided": 29750, "\u0120Bucks": 29751, "\u00e3\u0125\u00b4": 29752, "cutting": 29753, "\u0120enhancements": 29754, "\u0120Penguins": 29755, "\u0120Isaiah": 29756, "\u0120jerk": 29757, "\u0120Wyn": 29758, "\u0120stalled": 29759, "\u0120cryptocurrencies": 29760, "\u0120Roland": 29761, "single": 29762, "\u0120lumin": 29763, "\u0120Fellow": 29764, "\u0120Capacity": 29765, "\u0120Kazakh": 29766, "WN": 29767, "\u0120financed": 29768, "389": 29769, "\u0120tid": 29770, "\u0120collusion": 29771, "\u0120Myr": 29772, "\u00ee\u0122": 29773, "Senator": 29774, "\u0120pediatric": 29775, "\u0120neatly": 29776, "\u0120sandwiches": 29777, "\u0120Architecture": 29778, "\u0120tucked": 29779, "\u0120balcony": 29780, "\u0120earthquakes": 29781, "quire": 29782, "Future": 29783, "\u0120hefty": 29784, "\u00e9\u0139": 29785, "\u0120specializes": 29786, "\u0120stresses": 29787, "\u0120sender": 29788, "\u0120misunderstanding": 29789, "\u0120epile": 29790, "\u0120provoke": 29791, "\u0120Colors": 29792, "\u0120dismay": 29793, "uko": 29794, "[_": 29795, "586": 29796, "neutral": 29797, "\u0120donating": 29798, "\u0120Randall": 29799, "Multi": 29800, "\u0120conveniently": 29801, "\u0120Sung": 29802, "\u0120Coca": 29803, "\u0120tents": 29804, "\u0120Acceler": 29805, "\u0120partnered": 29806, "272": 29807, "irming": 29808, "\u0120BAS": 29809, "sometimes": 29810, "\u0120objected": 29811, "ubric": 29812, "posed": 29813, "LCS": 29814, "grass": 29815, "\u0120attributable": 29816, "VIS": 29817, "Israeli": 29818, "\u0120repeats": 29819, "\u0120RM": 29820, "vag": 29821, "uta": 29822, "inous": 29823, "\u0120inert": 29824, "\u0120Miguel": 29825, "\u00e6\u0143": 29826, "\u0120Hawaiian": 29827, "Board": 29828, "\u0120artific": 29829, "\u0120Azerbai": 29830, "asio": 29831, "\u0120Rent": 29832, "AIN": 29833, "\u0120appliances": 29834, "\u0120nationality": 29835, "\u0120asshole": 29836, "\u0120Neb": 29837, "\u0120notch": 29838, "hani": 29839, "\u0120Bride": 29840, "Availability": 29841, "\u0120intercepted": 29842, "\u0120continental": 29843, "\u0120swelling": 29844, "\u0120Perspect": 29845, "bies": 29846, ".<": 29847, "ithmetic": 29848, "\u0120Lara": 29849, "\u0120tempting": 29850, "addr": 29851, "\u0120overseeing": 29852, "clad": 29853, "\u0120DV": 29854, "\u0120Gingrich": 29855, "\u0120mun": 29856, "\u0120Appropri": 29857, "\u0120alterations": 29858, "\u0120Patreon": 29859, "\u0120havoc": 29860, "\u0120disciplines": 29861, "\u0120notoriously": 29862, "akuya": 29863, "ieri": 29864, "?).": 29865, "\u0120Went": 29866, "\u0120silicon": 29867, "\u0120tremb": 29868, "Container": 29869, "Known": 29870, "\u0120mortar": 29871, "este": 29872, "icka": 29873, "Arthur": 29874, "\u0120Previously": 29875, "\u0120Marty": 29876, "\u0120sparse": 29877, "gins": 29878, "\u0120inward": 29879, "\u0120Participant": 29880, "Copy": 29881, "\u0120Misc": 29882, "\u0120antibiotic": 29883, "\u0120Retro": 29884, "\u0120elusive": 29885, "\u0120assail": 29886, "\u0120Battalion": 29887, "\u0120Bought": 29888, "\u0120diminish": 29889, "\u0120Europa": 29890, "session": 29891, "\u0120Dangerous": 29892, "iesel": 29893, "\u0120disbelief": 29894, "\u0120blasts": 29895, "extreme": 29896, "\u0120Boyd": 29897, "\u0120Projects": 29898, "\u0120Guys": 29899, "\u0120undergone": 29900, "\u0120grill": 29901, "\u0120Dwight": 29902, "\u0120197": 29903, "USER": 29904, "\u0120filesystem": 29905, "\u0120clocks": 29906, "Taylor": 29907, "\u0120wrapper": 29908, "\u0120folding": 29909, "ousand": 29910, "\u0120Philippine": 29911, "ATIONAL": 29912, "\u0120Perth": 29913, "\u0120ashes": 29914, "\u0120accumulate": 29915, "\u0120Gateway": 29916, "Shop": 29917, "orkshire": 29918, "Han": 29919, "\u0120Barrel": 29920, "\u0120Leh": 29921, "\u0120XV": 29922, "\u0120whim": 29923, "\u0120repo": 29924, "\u0120CG": 29925, "\u0120Mam": 29926, "\u0120incorporating": 29927, "\u0120bailout": 29928, "\u0120linguistic": 29929, "\u0120disinteg": 29930, "CLE": 29931, "\u0120cinematic": 29932, "\u0120Fiber": 29933, "Syn": 29934, "ilion": 29935, "\u0120Compos": 29936, "chens": 29937, "\u0120neoc": 29938, "\u0120boiled": 29939, "FINE": 29940, "ono": 29941, "uncle": 29942, "iken": 29943, "\u0120BM": 29944, "\u00ce\u00b9": 29945, "\u0120receipts": 29946, "\u0120disposed": 29947, "\u0120Thirty": 29948, "\u0120Rough": 29949, "\u0120ABS": 29950, "\u0120notwithstanding": 29951, "ollen": 29952, "#$": 29953, "\u0120unreliable": 29954, "\u0120bloom": 29955, "\u0120mediocre": 29956, "\u0120tram": 29957, "\u0120Tasman": 29958, "\u0120shakes": 29959, "\u0120manifesto": 29960, "\u0120MW": 29961, "\u0120satisfactory": 29962, "\u0120shores": 29963, "\u0120computation": 29964, "\u0120assertions": 29965, "ormons": 29966, "arag": 29967, "abit": 29968, "Democrats": 29969, "\u0120Loot": 29970, "\u0120Volks": 29971, "haired": 29972, "\u0120gravitational": 29973, "Sing": 29974, "\u0120Miz": 29975, "\u0120throttle": 29976, "\u0120tyranny": 29977, "\u0120Views": 29978, "\u0120robber": 29979, "\u0120Minority": 29980, "\u0120shrine": 29981, "scope": 29982, "purpose": 29983, "\u0120nucleus": 29984, "ourcing": 29985, "\u0120USDA": 29986, "\u0120DHS": 29987, "wra": 29988, "\u0120Bowie": 29989, "Scale": 29990, "\u0120BEL": 29991, "xi": 29992, "Iter": 29993, "\u0120(),": 29994, "wright": 29995, "\u0120sailors": 29996, "oused": 29997, "NASA": 29998, "\u0120Proof": 29999, "\u0120Mineral": 30000, "token": 30001, "\u0120FD": 30002, "Rew": 30003, "\u0120ell": 30004, "630": 30005, "\u0120chancellor": 30006, "\u0120Gos": 30007, "\u0120amounted": 30008, "\u0120Recre": 30009, "omez": 30010, "\u0120Optim": 30011, "\u0120Olive": 30012, "\u0120tracker": 30013, "owler": 30014, "\u0120Unique": 30015, "Root": 30016, "\u0120maritime": 30017, "\u0120Quran": 30018, "\u0120Adapt": 30019, "\u0120ecosystems": 30020, "\u0120Repeat": 30021, "\u0120Soy": 30022, "\u0120IMP": 30023, "\u0120graduating": 30024, "andem": 30025, "Pur": 30026, "\u0120Reset": 30027, "\u0120Trick": 30028, "\u0120Philly": 30029, "\u0120Tue": 30030, "\u0120Malaysian": 30031, "\u0120climax": 30032, "\u0120bury": 30033, "\u0120conspic": 30034, "\u0120Southampton": 30035, "\u0120Flowers": 30036, "\u0120escorted": 30037, "\u0120Educational": 30038, "\u0120IRC": 30039, "\u0120brutally": 30040, "eating": 30041, "\u0120pillar": 30042, "\u0120Sang": 30043, "\u0120Jude": 30044, "arling": 30045, "\u0120Amnesty": 30046, "\u0120reminding": 30047, "\u0120Administrative": 30048, "hesda": 30049, "\u0120flashed": 30050, "\u0120PBS": 30051, "perate": 30052, "feature": 30053, "\u0120swipe": 30054, "\u0120graves": 30055, "oultry": 30056, "261": 30057, "breaks": 30058, "\u0120Guer": 30059, "\u0120shrimp": 30060, "\u0120Voting": 30061, "quist": 30062, "\u0120analytical": 30063, "\u0120tablespoons": 30064, "\u0120SOU": 30065, "\u0120researched": 30066, "\u0120disrupted": 30067, "\u0120jour": 30068, "\u0120replica": 30069, "\u0120cartoons": 30070, "bians": 30071, "})": 30072, "copy": 30073, "Got": 30074, "ouched": 30075, "PUT": 30076, "\u0120swarm": 30077, "notations": 30078, "said": 30079, "\u0120rebuilt": 30080, "\u0120collaborate": 30081, "\u0120raging": 30082, "\u0120nar": 30083, "\u0120demographics": 30084, "\u0120DDR": 30085, "\u0120distrust": 30086, "ossier": 30087, "\u0120Kro": 30088, "\u0120pumpkin": 30089, "\u0120regrets": 30090, "\u0120fatalities": 30091, "\u0120Lens": 30092, "\u0120Ole": 30093, "pd": 30094, "\u0120puppet": 30095, "\u0120Outlook": 30096, "\u0120Stam": 30097, "Ol": 30098, "Fair": 30099, "UU": 30100, "\u0120rewritten": 30101, "\u00c4\u00b1": 30102, "\u0120fascinated": 30103, "\u0120vectors": 30104, "\u0120tribunal": 30105, "uay": 30106, "\u0120Mats": 30107, "\u0120Coins": 30108, "[[": 30109, "\u0120181": 30110, "\u0120renders": 30111, "\u0120Kaepernick": 30112, "\u0120espionage": 30113, "\u0120summ": 30114, "\u0120ditch": 30115, "Account": 30116, "\u0120spreadsheet": 30117, "\u0120mutant": 30118, "past": 30119, "407": 30120, "\u0120dye": 30121, "\u0120initiation": 30122, "\u01204000": 30123, "\u0120punishable": 30124, "\u0120thinner": 30125, "\u0120Khal": 30126, "\u0120intermedi": 30127, "Dun": 30128, "\u0120Gotham": 30129, "\u0120eagerly": 30130, "\u0120vaginal": 30131, "powers": 30132, "VW": 30133, "\u0120WATCHED": 30134, "\u0120predator": 30135, "amsung": 30136, "\u0120disparity": 30137, "\u0120[*": 30138, "\u0120amph": 30139, "\u0120outskirts": 30140, "\u0120Spirits": 30141, "\u0120skeletal": 30142, "\u00d0\u00bb": 30143, "\u0120Rear": 30144, "\u0120issuance": 30145, "\u0120Logic": 30146, "released": 30147, "ZZ": 30148, "\u0120Bound": 30149, "Entry": 30150, "\u0120exits": 30151, "isol": 30152, "\u0120Founder": 30153, "\u0120wre": 30154, "\u0120Greenland": 30155, "\u0120MMO": 30156, "taker": 30157, "INC": 30158, "\u00e3\u0123\u00be": 30159, "\u0120hourly": 30160, "henko": 30161, "\u0120fantasies": 30162, "\u0120disob": 30163, "\u0120demolition": 30164, "\u00e3\u0125\u012d": 30165, "\u0120enlisted": 30166, "ratulations": 30167, "\u0120misguided": 30168, "\u0120ensured": 30169, "\u0120discouraged": 30170, "mort": 30171, "\u0120flank": 30172, "\u0120cess": 30173, "\u0120reacts": 30174, "\u0120Sere": 30175, "sensitive": 30176, "\u0120Serpent": 30177, "assad": 30178, "\u0120247": 30179, "\u0120calmly": 30180, "busters": 30181, "\u0120bleed": 30182, "\u0120Stro": 30183, "\u0120amusement": 30184, "\u0120Antarctica": 30185, "\u0120scept": 30186, "\u0120Gaw": 30187, "aq": 30188, "asonic": 30189, "\u0120sprawling": 30190, "native": 30191, "aturated": 30192, "\u0120Battlefield": 30193, "IVERS": 30194, "EB": 30195, "\u0120Gems": 30196, "\u0120Northwestern": 30197, "\u0120Films": 30198, "\u0120Automatic": 30199, "\u0120apprehend": 30200, "\u00e3\u0123\u00a8": 30201, "\u0120guiName": 30202, "\u0120backend": 30203, "\u0120evidenced": 30204, "geant": 30205, "012": 30206, "\u0120Siege": 30207, "\u0120externalTo": 30208, "\u0120unfocusedRange": 30209, "\u0120guiActiveUnfocused": 30210, "\u0120guiIcon": 30211, "\u0120externalToEVA": 30212, "\u0120externalToEVAOnly": 30213, "Fri": 30214, "chard": 30215, "enaries": 30216, "\u0120chiefs": 30217, "\u0120cf": 30218, "\u0120HUD": 30219, "\u0120corrobor": 30220, "\u0120dB": 30221, "\u0120Taken": 30222, "\u0120Patricia": 30223, "rail": 30224, "\u0120Charm": 30225, "\u0120Libertarian": 30226, "rieve": 30227, "Personal": 30228, "\u0120OUR": 30229, "geries": 30230, "\u0120dumping": 30231, "\u0120neurological": 30232, "itimate": 30233, "\u0120Clintons": 30234, "rafted": 30235, "\u0120Molly": 30236, "\u0120terminals": 30237, "register": 30238, "\u0120flare": 30239, "\u0120encoded": 30240, "\u0120autopsy": 30241, "pel": 30242, "machine": 30243, "\u0120exemptions": 30244, "\u0120Royals": 30245, "distance": 30246, "\u0120drafts": 30247, "\u0120lame": 30248, "\u0120Cunning": 30249, "\u0120spouses": 30250, "\u0120Markets": 30251, "\u0120Carrier": 30252, "\u0120implying": 30253, "\u0120Yak": 30254, "sid": 30255, "\u0120loser": 30256, "\u0120vigilant": 30257, "\u0120impeachment": 30258, "\u0120augmented": 30259, "\u0120Employees": 30260, "\u0120unintended": 30261, "ternally": 30262, "\u0120Watt": 30263, "\u0120recognizable": 30264, "essim": 30265, "\u00e6\u013f": 30266, "\u0120coated": 30267, "rha": 30268, "\u0120lieutenant": 30269, "\u0120Legislation": 30270, "published": 30271, "444": 30272, "013": 30273, "\u0120ideally": 30274, "\u0120Password": 30275, "\u0120simplify": 30276, "\u0120Meta": 30277, "\u0120MRI": 30278, "\u0120pleading": 30279, "organized": 30280, "handler": 30281, "\u0120unravel": 30282, "correct": 30283, "\u0120icy": 30284, "\u0120paranoid": 30285, "\u0120passer": 30286, "\u0120inspections": 30287, "ofer": 30288, "\u0120Healthcare": 30289, "283": 30290, "\u0120Brut": 30291, "iola": 30292, "forge": 30293, "\u0120Medieval": 30294, "MSN": 30295, "ievers": 30296, "\u0120Programming": 30297, "\u00e5\u012b": 30298, "\u0120223": 30299, "mu": 30300, "\u0120CLE": 30301, "uga": 30302, "\u0120shoppers": 30303, "\u0120informative": 30304, "\u0120Plans": 30305, "\u0120supplementation": 30306, "\u0120Tests": 30307, "tyard": 30308, "ocytes": 30309, "\u0120Vega": 30310, "\u0120Gujarat": 30311, "ermanent": 30312, "Except": 30313, "\u0120LOT": 30314, "alla": 30315, "\u0120Cumm": 30316, "\u0120Osw": 30317, "\u0120venom": 30318, "\u0120Debt": 30319, "\u0120DOWN": 30320, "\u0120reunion": 30321, "\u0120muc": 30322, "\u0120Relief": 30323, "\u0120geop": 30324, "\u0120\u00f0\u0141\u013a": 30325, "alogue": 30326, "Anth": 30327, "echo": 30328, "\u0120corros": 30329, "\u0120replication": 30330, "\u0120Blazing": 30331, "\u0120Daughter": 30332, "\u0120inflic": 30333, "\u0120Lindsey": 30334, "\u00d9\u012a": 30335, "284": 30336, "Exit": 30337, "\u0120gloom": 30338, "TAIN": 30339, "\u0120undermining": 30340, "\u0120advising": 30341, "hidden": 30342, "\u0120overflow": 30343, "\u0120gor": 30344, "urdue": 30345, "\u0120echoes": 30346, "enhagen": 30347, "\u0120impuls": 30348, "drug": 30349, "cash": 30350, "\u0120async": 30351, "\u0120mirac": 30352, "atts": 30353, "punk": 30354, "\u0120pivot": 30355, "\u0120Legislative": 30356, "\u0120bloggers": 30357, "\u0120Claw": 30358, "sburg": 30359, "dyl": 30360, "\u0120Recommend": 30361, "\u0120verte": 30362, "\u0120prohibiting": 30363, "\u0120Panther": 30364, "Jonathan": 30365, "\u0120omin": 30366, "\u0120hateful": 30367, "281": 30368, "\u0120Orche": 30369, "\u0120Murdoch": 30370, "downs": 30371, "\u0120asymm": 30372, "GER": 30373, "Always": 30374, "\u0120informs": 30375, "\u0120WM": 30376, "\u0120Pony": 30377, "\u0120Appendix": 30378, "\u0120Arlington": 30379, "Jam": 30380, "\u0120medicinal": 30381, "\u0120Slam": 30382, "ITIES": 30383, "\u0120reaff": 30384, "\u0120Ri": 30385, "FG": 30386, "Spring": 30387, "bool": 30388, "\u0120thighs": 30389, "\u0120markings": 30390, "\u0120Raqqa": 30391, "\u0120Lak": 30392, "poll": 30393, "tsky": 30394, "\u0120Morty": 30395, "\u0120Definition": 30396, "\u0120debunk": 30397, "endered": 30398, "\u0120Leone": 30399, "avers": 30400, "\u0120mortgages": 30401, "Apparently": 30402, "Nic": 30403, "haus": 30404, "\u0120Thousands": 30405, "auld": 30406, "\u0120mash": 30407, "shoot": 30408, "\u0120diarr": 30409, "\u0120consciously": 30410, "Hero": 30411, "eas": 30412, "\u0120Naturally": 30413, "\u0120Destroyer": 30414, "\u0120dashboard": 30415, "services": 30416, "Rog": 30417, "\u0120millennials": 30418, "\u0120invade": 30419, "-(": 30420, "\u0120commissions": 30421, "\u0120Auckland": 30422, "\u0120broadcasts": 30423, "\u0120frontal": 30424, "\u0120crank": 30425, "\u0120Historic": 30426, "\u0120rumours": 30427, "CTV": 30428, "\u0120steril": 30429, "\u0120booster": 30430, "rocket": 30431, "\u00e3\u0124\u00bc": 30432, "utsche": 30433, "\u0120PI": 30434, "\u0120233": 30435, "\u0120Producer": 30436, "\u0120Analytics": 30437, "\u0120invaluable": 30438, "\u0120unintention": 30439, "\u0120CY": 30440, "\u0120scrutin": 30441, "\u0120gigg": 30442, "\u0120engulf": 30443, "\u0120proletariat": 30444, "\u0120hacks": 30445, "\u0120Hew": 30446, "arak": 30447, "\u0120Slime": 30448, "ielding": 30449, "agher": 30450, "\u0120Elliot": 30451, "\u0120telecom": 30452, "\u0120219": 30453, "ultan": 30454, "\u0120Arbor": 30455, "\u0120Scouts": 30456, "Ban": 30457, "\u0120lifespan": 30458, "\u0120blasp": 30459, "388": 30460, "\u0120judiciary": 30461, "\u0120Continental": 30462, "asking": 30463, "McC": 30464, "LED": 30465, "\u0120baggage": 30466, "\u0120Sorcerer": 30467, "\u0120remnants": 30468, "\u0120Griffith": 30469, "etsu": 30470, "\u0120Subaru": 30471, "\u0120Personality": 30472, "designed": 30473, "ushima": 30474, "agnar": 30475, "\u0120recoil": 30476, "\u0120passions": 30477, "\\\":": 30478, "\u0120tee": 30479, "\u0120abolition": 30480, "\u0120Creating": 30481, "jac": 30482, "\u0120194": 30483, "019": 30484, "\u0120pillars": 30485, "riched": 30486, "/\"": 30487, "tk": 30488, "\u0120livelihood": 30489, "\u0120roasted": 30490, "ahon": 30491, "\u0120Hutch": 30492, "assert": 30493, "\u0120dividend": 30494, "\u0120knit": 30495, "\u0120daunting": 30496, "\u0120disturbance": 30497, "\u0120shale": 30498, "\u0120cultivated": 30499, "\u0120refrigerator": 30500, "LB": 30501, "\u0120NET": 30502, "\u0120commercials": 30503, "\u0120thinkers": 30504, "455": 30505, "\u0120chop": 30506, "Broad": 30507, "\u0120suspicions": 30508, "\u0120tagged": 30509, "lifting": 30510, "\u0120stylish": 30511, "\u0120Shields": 30512, "Shortly": 30513, "\u0120tails": 30514, "Auth": 30515, "STE": 30516, "\u0120GAME": 30517, "\u0120seism": 30518, "\u0120Kis": 30519, "ologne": 30520, "\u0120cowork": 30521, "\u0120forcibly": 30522, "\u0120thyroid": 30523, "\u0120PB": 30524, "ANE": 30525, "married": 30526, "horse": 30527, "\u0120polymer": 30528, "\u0120Chal": 30529, "odor": 30530, "DEBUG": 30531, "\u0120Context": 30532, "\u0120bliss": 30533, "\u0120pinpoint": 30534, "\u0120Mathemat": 30535, "legram": 30536, "\u0120Weekend": 30537, "\u0120labelled": 30538, "\u0120bart": 30539, "itles": 30540, "\u0120estrogen": 30541, "\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136\u00e2\u0122\u0136": 30542, "\"'": 30543, "\u0120visibly": 30544, "\u0120outsider": 30545, "aida": 30546, "Area": 30547, "\u0120dissemin": 30548, "\u0120dishonest": 30549, "\u0120Closed": 30550, "\u0120Bulletin": 30551, "\u0120Ramsey": 30552, "sword": 30553, "\u0120XI": 30554, "ourced": 30555, "Same": 30556, "346": 30557, "\u0120Repe": 30558, "\u0120Kou": 30559, "cake": 30560, "emis": 30561, "Cache": 30562, "\u0120Meaning": 30563, "\u0120Enlight": 30564, "onomy": 30565, "\u0120manifestation": 30566, "sworth": 30567, "Jay": 30568, "\u0120chore": 30569, "\u00c3\u00b6r": 30570, "Dream": 30571, "\u0120sanctioned": 30572, "\u0120culturally": 30573, "\u0120Ara": 30574, "Nav": 30575, "\u0120theological": 30576, "\u0120strut": 30577, "\u0120VO": 30578, "\u0120Handbook": 30579, "\u0120constructing": 30580, "\u0120\u00c2\u00b6": 30581, "\u0120Benefits": 30582, "\u0120Psychological": 30583, "sac": 30584, "\u00e5\u00b8": 30585, "policy": 30586, "\u0120Matters": 30587, "\u0120Reported": 30588, "\u0120Byte": 30589, "\u0120vitro": 30590, "\u0120Maiden": 30591, "\u0120lam": 30592, "\u0120Jennings": 30593, "\u0120garment": 30594, "\u0120Rutgers": 30595, "\u0120Stafford": 30596, "\u0120Wellington": 30597, "\u0120intermitt": 30598, "\u0120npm": 30599, "\u0120ordeal": 30600, "\u0120plugged": 30601, "ooming": 30602, "inished": 30603, "framework": 30604, "\u0120timber": 30605, "\u0120cass": 30606, "\u0120850": 30607, "iless": 30608, "\u0120Redux": 30609, "768": 30610, "Stre": 30611, "\u0120surpassed": 30612, "whel": 30613, "\u0120parallels": 30614, "\u0120veil": 30615, "\u0120GI": 30616, "\u0120REST": 30617, "\u0120readiness": 30618, "sort": 30619, "\u0120modifying": 30620, "\u0120Slate": 30621, "ruff": 30622, "\u0120marble": 30623, "\u0120infrared": 30624, "\u0120auditor": 30625, "\u0120FANTASY": 30626, "\u0120Poverty": 30627, "\u0120SPD": 30628, "\u0120\"(": 30629, "Ky": 30630, "RAY": 30631, "\u0120executions": 30632, "\u0120Beverly": 30633, "\u0120Marxism": 30634, "\u0120Burst": 30635, "\u0120Kali": 30636, "estones": 30637, "Clearly": 30638, "Ell": 30639, "\u00e3\u0123\u00a7": 30640, "\u0120Proceedings": 30641, "Token": 30642, "IFIC": 30643, "\u00c3\u00b1a": 30644, "Central": 30645, "\u0120Haley": 30646, "\u0120Drama": 30647, "\u0120formations": 30648, "ORN": 30649, "Books": 30650, "\u0120dominating": 30651, "\u0120Flyers": 30652, "\u0120Companion": 30653, "\u0120disciplined": 30654, "\u0120Yugoslav": 30655, "\u0120Spells": 30656, "\u0120vengeance": 30657, "\u0120landlords": 30658, "Len": 30659, "\u0120Ogre": 30660, "anoia": 30661, "\u0120piercing": 30662, "\u0120congreg": 30663, "\u0120scorer": 30664, "obia": 30665, "\u0120nickel": 30666, "\u0120Learns": 30667, "\u0120rejo": 30668, "\u0120masterpiece": 30669, "Flash": 30670, "\u0120inhabited": 30671, "\u0120OpenGL": 30672, "\u0120Dud": 30673, "\u0120ICO": 30674, "\u0120arter": 30675, "\u0120plur": 30676, "\u0120mastery": 30677, "\u0120longstanding": 30678, "sted": 30679, "\u0120wines": 30680, "\u0120televised": 30681, "\u0120Shrine": 30682, "\u0120Bayern": 30683, "\u0120\u00e2\u0135\u013a": 30684, "\u0120enclosure": 30685, "john": 30686, "\u0120prophets": 30687, "\u0120Resurrection": 30688, "\u0120Orders": 30689, "\u0120uneven": 30690, "rals": 30691, "\u0120dwind": 30692, "\u0120Lah": 30693, "\u0120Sloven": 30694, "378": 30695, "\u0120insistence": 30696, "affle": 30697, "\u0120Clone": 30698, "\u0120hardship": 30699, "\u0120Congressman": 30700, "\u0120plead": 30701, "\u0120reviewers": 30702, "\u0120cured": 30703, "\u01201935": 30704, "asley": 30705, "fake": 30706, "\u0120Thinking": 30707, "ydia": 30708, "PART": 30709, "\u0120Dota": 30710, "oit": 30711, "\u0120whipped": 30712, "\u0120bouncing": 30713, "\u0120Hispanics": 30714, "comings": 30715, "\u0120cannabin": 30716, "\u0120Chambers": 30717, "\u0120Zack": 30718, "Optional": 30719, "\u0120coats": 30720, "\u0120prowess": 30721, "\u0120Norton": 30722, "\u0120plainly": 30723, "\u0120freight": 30724, "\u0120inhibition": 30725, "\u0120clam": 30726, "\u0120303": 30727, "kef": 30728, "aleigh": 30729, "Luke": 30730, "\u0120psycho": 30731, "atorium": 30732, "MED": 30733, "\u0120treaties": 30734, "\u0120indisc": 30735, "\u0120dc": 30736, "OPS": 30737, "\u0120resilient": 30738, "\u0120Interstate": 30739, "\u0120slack": 30740, "\u0120mundane": 30741, "\u0120establishes": 30742, "359": 30743, "\u0120strained": 30744, "\u0120nond": 30745, "Sus": 30746, "\u0120caste": 30747, "arate": 30748, "ieving": 30749, "\u0120unfairly": 30750, "\u0120parser": 30751, "onial": 30752, "ursive": 30753, "Via": 30754, "\u0120Otto": 30755, "\u0120Authorities": 30756, "stroke": 30757, "KR": 30758, "\u0120Mercy": 30759, "\u0120furnished": 30760, "\u0120outset": 30761, "\u0120metic": 30762, "1982": 30763, "olithic": 30764, "\u0120Tent": 30765, "ogical": 30766, "\u0120Aircraft": 30767, "\u0120hides": 30768, "\u0120Became": 30769, "\u0120educators": 30770, "reaching": 30771, "\u0120volatility": 30772, "\u0120toddler": 30773, "\u0120NASCAR": 30774, "\u0120Twelve": 30775, "\u0120Highlights": 30776, "\u0120grape": 30777, "\u0120splits": 30778, "\u0120peasant": 30779, "\u0120reneg": 30780, "\u0120MSI": 30781, "Temp": 30782, "stars": 30783, "\u0120trek": 30784, "\u0120Hyde": 30785, "binding": 30786, "\u0120realism": 30787, "\u0120oxide": 30788, "\u0120Hos": 30789, "\u0120mounts": 30790, "\u0120biting": 30791, "\u0120collapsing": 30792, "\u0120postal": 30793, "\u0120museums": 30794, "\u0120detached": 30795, "\u0120respecting": 30796, "\u0120monopol": 30797, "\u0120workflow": 30798, "\u0120Cake": 30799, "Template": 30800, "\u0120Organisation": 30801, "\u0120persistence": 30802, "369": 30803, "Coming": 30804, "Brad": 30805, "\u0120redundant": 30806, "\u0120GTA": 30807, "\u0120bending": 30808, "\u0120revoked": 30809, "\u0120offending": 30810, "\u0120framing": 30811, "\u0120printf": 30812, "Commun": 30813, "members": 30814, "Outside": 30815, "\u0120construed": 30816, "\u0120coded": 30817, "FORE": 30818, "\u0120chast": 30819, "Chat": 30820, "Indian": 30821, "\u0120Yard": 30822, "?!\"": 30823, "\u0120Ports": 30824, "\u0120Xavier": 30825, "\u0120RET": 30826, "'.\"": 30827, "\u0120Boat": 30828, "ivated": 30829, "icht": 30830, "umerable": 30831, "Ds": 30832, "\u0120Dunn": 30833, "\u0120coffin": 30834, "\u0120securely": 30835, "\u0120Raptors": 30836, "\u0120Bes": 30837, "Installation": 30838, "\u0120inception": 30839, "\u0120Healthy": 30840, "endants": 30841, "\u0120psychologists": 30842, "\u0120Sheikh": 30843, "cultural": 30844, "\u0120BlackBerry": 30845, "shift": 30846, "Fred": 30847, "oche": 30848, "\u0120cakes": 30849, "\u0120SEO": 30850, "\u0120Gian": 30851, "\u0120Asians": 30852, "ogging": 30853, "element": 30854, "\u0120pundits": 30855, "\u0120Vaugh": 30856, "\u0120Gavin": 30857, "\u0120hitter": 30858, "\u0120drowned": 30859, "\u0120chalk": 30860, "\u0120Zika": 30861, "\u0120measles": 30862, "802": 30863, "\u00e2\u0122\u00a6..": 30864, "\u0120AWS": 30865, "]\"": 30866, "\u0120distort": 30867, "\u0120Mast": 30868, "\u0120antibodies": 30869, "\u0120Mash": 30870, "Memory": 30871, "\u0120Uganda": 30872, "\u0120Prob": 30873, "\u0120vomiting": 30874, "\u0120Turns": 30875, "\u0120occupying": 30876, "\u0120evasion": 30877, "\u0120Therapy": 30878, "\u0120promo": 30879, "\u0120electr": 30880, "\u0120blueprint": 30881, "\u0120Dre": 30882, "priced": 30883, "\u0120Depot": 30884, "\u0120alleviate": 30885, "\u0120Somali": 30886, "marg": 30887, "nine": 30888, "\u0120nostalgia": 30889, "\u0120Shepherd": 30890, "\u0120cavalry": 30891, "\u0120torped": 30892, "\u0120Bloody": 30893, "xb": 30894, "\u0120sank": 30895, "\u0120goalt": 30896, "reportprint": 30897, "embedreportprint": 30898, "cloneembedreportprint": 30899, "\u0120Initially": 30900, "\u0120Fischer": 30901, "\u0120noteworthy": 30902, "cern": 30903, "\u0120inefficient": 30904, "rawdownload": 30905, "rawdownloadcloneembedreportprint": 30906, "cation": 30907, "\u0120Dynasty": 30908, "lag": 30909, "DES": 30910, "\u0120distinctly": 30911, "\u0120Estonia": 30912, "\u0120openness": 30913, "\u0120gossip": 30914, "ruck": 30915, "Width": 30916, "\u0120Ibrahim": 30917, "\u0120petroleum": 30918, "\u0120avatar": 30919, "\u0120Hed": 30920, "atha": 30921, "\u0120Hogwarts": 30922, "\u0120caves": 30923, "678": 30924, "\u0120safeguard": 30925, "\u0120Mog": 30926, "isson": 30927, "\u0120Durham": 30928, "slaught": 30929, "\u0120Graduate": 30930, "\u0120subconscious": 30931, "\u0120Excellent": 30932, "\u0120Dum": 30933, "-----": 30934, "\u0120piles": 30935, "\u0120WORK": 30936, "\u0120Garn": 30937, "\u0120Fol": 30938, "\u0120ATM": 30939, "\u0120avoids": 30940, "\u0120Tul": 30941, "\u0120bleak": 30942, "ELY": 30943, "ivist": 30944, "lightly": 30945, "Pers": 30946, "\u0120Dob": 30947, "\u0120LS": 30948, "\u0120insanity": 30949, "\u00ce\u00b5": 30950, "atalie": 30951, "Enlarge": 30952, "\u0120twists": 30953, "\u0120faulty": 30954, "\u0120piracy": 30955, "\u0120impover": 30956, "\u0120rugged": 30957, "\u0120Fashion": 30958, "\u0120sands": 30959, "'?": 30960, "swick": 30961, "\u0120natives": 30962, "\u0120hen": 30963, "\u0120Noise": 30964, "\u00e3\u0125\u0139": 30965, "\u0120greens": 30966, "\u0120freezer": 30967, "\u0120dynasty": 30968, "\u0120Fathers": 30969, "\u0120Newark": 30970, "\u0120archaeological": 30971, "\u0120ot": 30972, "obar": 30973, "\u0120blockade": 30974, "\u0120allerg": 30975, "LV": 30976, "\u0120debit": 30977, "\u0120RFC": 30978, "\u0120Milton": 30979, "\u0120Pressure": 30980, "\u0120willingly": 30981, "\u0120disproportionate": 30982, "\u0120oppressive": 30983, "\u0120diamonds": 30984, "\u0120belongings": 30985, "1970": 30986, "\u0120bells": 30987, "\u0120imperialism": 30988, "\u0120227": 30989, "\u0120exploding": 30990, "\u0120Eclipse": 30991, "\u01201919": 30992, "\u0120rant": 30993, "\u0120nominations": 30994, "347": 30995, "\u0120peacefully": 30996, "rica": 30997, "\u0120FUCK": 30998, "\u0120vibration": 30999, "malink": 31000, "\u0120ropes": 31001, "\u0120Ivanka": 31002, "\u0120Brewery": 31003, "\u0120Booker": 31004, "\u0120Owens": 31005, "goers": 31006, "Services": 31007, "\u0120Snape": 31008, "\u0120191": 31009, "395": 31010, "\u0120299": 31011, "justice": 31012, "\u0120bri": 31013, "\u0120discs": 31014, "\u0120prominently": 31015, "\u0120vulgar": 31016, "\u0120skipping": 31017, "lves": 31018, "\u0120tsunami": 31019, "374": 31020, "\u0120Urug": 31021, "\u0120Eid": 31022, "recated": 31023, "phen": 31024, "\u0120faults": 31025, "\u0120Started": 31026, "950": 31027, "\u0120pi": 31028, "\u0120detector": 31029, "\u0120bastard": 31030, "\u0120validated": 31031, "SpaceEngineers": 31032, "OURCE": 31033, "\u0120(~": 31034, "\u0120unsur": 31035, "\u0120affirmed": 31036, "\u0120fascism": 31037, "\u0120resolving": 31038, "\u0120Chavez": 31039, "\u0120Cyn": 31040, "\u0120detract": 31041, "Lost": 31042, "\u0120rigged": 31043, "\u0120homage": 31044, "\u0120Bruno": 31045, "555": 31046, "eca": 31047, "\u0120presses": 31048, "\u0120humour": 31049, "\u0120spacing": 31050, "\u0120'/": 31051, "olkien": 31052, "Coun": 31053, "OPER": 31054, "Tre": 31055, "Son": 31056, "\u0120Cambodia": 31057, "ierre": 31058, "mong": 31059, "ozy": 31060, "\u0120liquidity": 31061, "\u0120Soviets": 31062, "\u0120Fernando": 31063, "\u0120229": 31064, "\u0120slug": 31065, "\u0120Catalan": 31066, "electric": 31067, "\u0120scenery": 31068, "\u0120Hearth": 31069, "\u0120constrained": 31070, "\u0120goalie": 31071, "\u0120Guidelines": 31072, "\u0120Ammo": 31073, "\u0120Pearson": 31074, "\u0120taxed": 31075, "\u0120fetus": 31076, "Response": 31077, "\u0120Alexis": 31078, "thia": 31079, "Guy": 31080, "\u0120reconstruct": 31081, "\u0120extremes": 31082, "\u0120concluding": 31083, "\u0120Peg": 31084, "ooks": 31085, "\u0120deductions": 31086, "Rose": 31087, "\u0120groundbreaking": 31088, "\u0120Targ": 31089, "\u00e3\u0125\u0123": 31090, "\u0120Reve": 31091, "resource": 31092, "\u0120moons": 31093, "\u0120electromagnetic": 31094, "\u0120amidst": 31095, "\u0120Viktor": 31096, "NESS": 31097, "BACK": 31098, "\u0120commute": 31099, "\u0120Anaheim": 31100, "\u0120fluctuations": 31101, "640": 31102, "\u0120noodles": 31103, "\u0120Copenhagen": 31104, "\u0120Tide": 31105, "\u0120Grizz": 31106, "\u0120SEE": 31107, "\u0120pipelines": 31108, "\u0120scars": 31109, "endo": 31110, "agus": 31111, "\u0120ETF": 31112, "/#": 31113, "\u0120Become": 31114, "448": 31115, "\u0120visc": 31116, "\u0120Recommended": 31117, "\u0120jumper": 31118, "\u0120cognition": 31119, "\u0120assassin": 31120, "\u0120witnessing": 31121, "\u0120Setup": 31122, "\u0120lac": 31123, "vim": 31124, "ISM": 31125, "pages": 31126, "SSL": 31127, "358": 31128, "\u0120adject": 31129, "industrial": 31130, "lore": 31131, "chery": 31132, "\u0120glitter": 31133, "\u0120calf": 31134, "Florida": 31135, "\u0120spoilers": 31136, "\u0120succeeds": 31137, "\u0120chanting": 31138, "\u0120slogans": 31139, "\u0120Tracy": 31140, "Visit": 31141, "rology": 31142, "\u0120mornings": 31143, "\u0120lineage": 31144, "\u0120sip": 31145, "\u0120intensely": 31146, "\u0120flourish": 31147, "\u0120Sleeping": 31148, "\u0120Fem": 31149, "orpor": 31150, "\u0120Klan": 31151, "\u0120Darth": 31152, "hack": 31153, "\u0120Nielsen": 31154, "\u0120tumors": 31155, "\u0120procurement": 31156, "\u0120Yorkshire": 31157, "\u0120raided": 31158, "KY": 31159, "Anna": 31160, "\u0120//[": 31161, "\u0120Disorder": 31162, "\u0120Mustang": 31163, "\u0120Wen": 31164, "\u0120Trying": 31165, "sq": 31166, "\u0120deliveries": 31167, "\u0120shutter": 31168, "\u0120cerebral": 31169, "\u0120bipolar": 31170, "\u0120CN": 31171, "lass": 31172, "jet": 31173, "\u0120debating": 31174, ">:": 31175, "\u0120eagle": 31176, "grades": 31177, "\u0120Dixon": 31178, "UGC": 31179, "MAS": 31180, "\u0120Draco": 31181, "\u0120Machines": 31182, "affer": 31183, "\u0120eman": 31184, "\u00c2\u00b2": 31185, "pron": 31186, "\u0120Gym": 31187, "\u0120comparatively": 31188, "\u0120Tribunal": 31189, "PRO": 31190, "\u0120lex": 31191, "\u0120fertile": 31192, "\u0120depressing": 31193, "\u0120superficial": 31194, "essential": 31195, "\u0120Hunters": 31196, "gp": 31197, "\u0120prominence": 31198, "Liber": 31199, "\u0120Ancest": 31200, "otechnology": 31201, "\u0120mocking": 31202, "\u0120Traff": 31203, "\u0138\u013c": 31204, "Medium": 31205, "Iraq": 31206, "\u0120psychiatrist": 31207, "Quantity": 31208, "\u0120Lect": 31209, "\u0120noisy": 31210, "520": 31211, "GY": 31212, "\u0120slapped": 31213, "\u0120MTV": 31214, "\u0120para": 31215, "pull": 31216, "Multiple": 31217, "asher": 31218, "\u0120nour": 31219, "\u0120Seg": 31220, "Spell": 31221, "vous": 31222, "ordial": 31223, "Senior": 31224, "\u0120Goldberg": 31225, "\u0120Plasma": 31226, "need": 31227, "\u0120messenger": 31228, "eret": 31229, "\u0120teamed": 31230, "\u0120literacy": 31231, "\u0120Leah": 31232, "\u0120Doyle": 31233, "\u0120emitted": 31234, "UX": 31235, "\u0120evade": 31236, "\u0120maze": 31237, "\u0120wrongly": 31238, "\u0120Lars": 31239, "\u0120stereotype": 31240, "\u0120pledges": 31241, "\u0120aroma": 31242, "\u0120MET": 31243, "\u0120acre": 31244, "\u0120OD": 31245, "\u0120ff": 31246, "\u0120breweries": 31247, "\u0120Hilton": 31248, "undle": 31249, "\u0120Kak": 31250, "\u0120Thankfully": 31251, "\u0120Canucks": 31252, "inctions": 31253, "\u0120Appears": 31254, "\u0120coer": 31255, "\u0120undermined": 31256, "rovers": 31257, "Andre": 31258, "\u0120blaze": 31259, "umers": 31260, "\u0120famine": 31261, "amphetamine": 31262, "ulkan": 31263, "Amount": 31264, "\u0120desperation": 31265, "wikipedia": 31266, "development": 31267, "\u0120Corinth": 31268, "ussia": 31269, "Jackson": 31270, "LI": 31271, "Native": 31272, "Rs": 31273, "Ohio": 31274, "\u0120Kathleen": 31275, "Fortunately": 31276, "\u0120attendant": 31277, "\u0120Preferred": 31278, "\u0120Didn": 31279, "\u0120Vs": 31280, "Mis": 31281, "\u0120respondent": 31282, "\u0120boun": 31283, "stable": 31284, "\u0120paved": 31285, "\u0120unexpl": 31286, "\u0120Cheney": 31287, "LM": 31288, "\u0120Cull": 31289, "blown": 31290, "\u0120confronting": 31291, "ocese": 31292, "serving": 31293, "Wi": 31294, "\u0120Lithuania": 31295, "anni": 31296, "\u0120stalk": 31297, "hd": 31298, "\u0120vener": 31299, "APH": 31300, "ynchronous": 31301, "URR": 31302, "umably": 31303, "historic": 31304, "Half": 31305, "Hay": 31306, "\u0120resilience": 31307, "spection": 31308, "\u0120abandoning": 31309, "Obs": 31310, "\u0120Debbie": 31311, "\u0120gradient": 31312, "\u0120Plaint": 31313, "\u0120Canal": 31314, "ARCH": 31315, "\u0120expansive": 31316, "\u0120fung": 31317, "\u0120bounced": 31318, "Und": 31319, "\u0120precautions": 31320, "\u0120clarification": 31321, "\u0120dagger": 31322, "\u0120grips": 31323, "\u0120\u00c2\u00b5": 31324, "\u0120Rivera": 31325, "\u0120Undead": 31326, "isites": 31327, "\u0120FIRST": 31328, "\u00c3\u00b1o": 31329, "audi": 31330, "\u0120hostages": 31331, "\u0120compliant": 31332, "\u0120alumni": 31333, "Seven": 31334, "\u0120cybersecurity": 31335, "either": 31336, "Collect": 31337, "\u0120invariably": 31338, "\u0120Soci": 31339, "\u0120lawmaker": 31340, "\u0120ale": 31341, "\u0120Personally": 31342, "Nazi": 31343, "\u0120customization": 31344, "\u0120Proc": 31345, "\u0120Saskatchewan": 31346, "eaturing": 31347, "\u0120spared": 31348, "\u0120discontinued": 31349, "\u0120computational": 31350, "\u0120Motorola": 31351, "\u0120supremacist": 31352, "governmental": 31353, "\u0120paradise": 31354, "\u0120Downing": 31355, "\u0120Nikon": 31356, "\u0120catalyst": 31357, "berra": 31358, "Toronto": 31359, "875": 31360, "beta": 31361, "\u0120Macron": 31362, "\u0120unrealistic": 31363, "vector": 31364, "\u0120Vehicles": 31365, "itiveness": 31366, "\u0120RV": 31367, "\u0120Colbert": 31368, "sin": 31369, "oji": 31370, "entin": 31371, "\u0120Krish": 31372, "hello": 31373, "ffield": 31374, "oky": 31375, "\u0120Tate": 31376, "\u0120maple": 31377, "\u0120aids": 31378, "chemical": 31379, "334": 31380, "nuts": 31381, "\u0120Warp": 31382, "\u0120xx": 31383, "\u0120Robb": 31384, "umerous": 31385, "_-_": 31386, "ftime": 31387, "\u0120VW": 31388, "\u0120winger": 31389, "\u0120Dome": 31390, "tools": 31391, "\u0120PV": 31392, "\u0120Georgetown": 31393, "\u0120geared": 31394, "\u0120jihadists": 31395, "\u0120cp": 31396, "\u0120steroids": 31397, "Mother": 31398, "clerosis": 31399, "\u0120DRM": 31400, "nesia": 31401, "\u0120linger": 31402, "\u0120immersive": 31403, "\u0120COUN": 31404, "\u0120outweigh": 31405, "ensual": 31406, "Band": 31407, "\u0120transforms": 31408, "matched": 31409, "psons": 31410, "\u0120Judicial": 31411, "factor": 31412, "\u0120referral": 31413, "\u0120oddly": 31414, "\u0120Wenger": 31415, "Bring": 31416, "\u0120Bows": 31417, "602": 31418, "ICLE": 31419, "\u0120lions": 31420, "\u0120Academic": 31421, "\u0120Thorn": 31422, "\u0120Raider": 31423, "kefeller": 31424, "Storage": 31425, "Lower": 31426, "\u0120Ort": 31427, "\u0120Equality": 31428, "ALT": 31429, "\u0120SOC": 31430, "Types": 31431, "\u0120lyn": 31432, "\u0120Asset": 31433, "coat": 31434, "TPP": 31435, "CVE": 31436, "\u0120Pioneer": 31437, "application": 31438, "Modern": 31439, "\u0120HK": 31440, "Environment": 31441, "Alright": 31442, "Rain": 31443, "IPP": 31444, "\u0120Shiite": 31445, "\u0120mound": 31446, "\u0120Abilities": 31447, "condition": 31448, "Staff": 31449, "\u0120competence": 31450, "\u0120Moor": 31451, "\u0120Diablo": 31452, "\u0120withheld": 31453, "\u0120ostensibly": 31454, "\u0120Brom": 31455, "\u0120msg": 31456, "\u0120denomin": 31457, "\u0120References": 31458, "\u0120FP": 31459, "\u0120plunged": 31460, "\u0120pamph": 31461, "moving": 31462, "central": 31463, "\u0120downright": 31464, "\u0120fading": 31465, "Tal": 31466, "Typ": 31467, "\u0120Thy": 31468, "ukes": 31469, "ithe": 31470, "\u0120ove": 31471, "\u0120battled": 31472, "\u0120seafood": 31473, "\u0120figur": 31474, "\u0120RD": 31475, "crop": 31476, "\u0120squads": 31477, "{\\": 31478, "\u00e0\u00b9": 31479, "\u0120Eh": 31480, "\u0120interviewing": 31481, "\u0120Qin": 31482, "\u0120aspiring": 31483, "PLIC": 31484, "\u0120clauses": 31485, "\u0120Gast": 31486, "\u0120Nir": 31487, "\u0120luggage": 31488, "\u0120hose": 31489, "\u0120systemd": 31490, "\u0120descending": 31491, "\u0120Revised": 31492, "\u0120Rails": 31493, "align": 31494, "709": 31495, "337": 31496, "\u0120fug": 31497, "charging": 31498, "tags": 31499, "\u0120uter": 31500, "kish": 31501, "WARNING": 31502, "490": 31503, "profits": 31504, "\u0120voyage": 31505, "\u0120ace": 31506, "\u0120Vanguard": 31507, "\u0120Tanks": 31508, "\u0120Muk": 31509, "\u0120226": 31510, "Safe": 31511, "Armor": 31512, "\u0120volcanic": 31513, "\u0120womb": 31514, "\u0120MIL": 31515, "\u0120beginner": 31516, "\u0120Recogn": 31517, "\u0120AAP": 31518, "PLAY": 31519, ")!": 31520, "\u0120detecting": 31521, "cn": 31522, "\u0120breaches": 31523, "Basically": 31524, "\u0120Pag": 31525, "\u0120Municipal": 31526, "\u0120Indie": 31527, "\u0120Laf": 31528, "\u0120Disable": 31529, "\u0120Olson": 31530, "\u0120restrained": 31531, "\u0120rulings": 31532, "\u0120humane": 31533, "events": 31534, "\u0120Cinema": 31535, "displayText": 31536, "\u0120Hatch": 31537, "actionDate": 31538, "onnaissance": 31539, "\u0120assaulting": 31540, "\u0120Lug": 31541, "CHAT": 31542, "\u0120vigorous": 31543, "\u0120Perse": 31544, "\u0120intolerance": 31545, "\u0120Snapchat": 31546, "\u0120Sharks": 31547, "\u0120dummy": 31548, "\u0120Diagn": 31549, "\u0120Guitar": 31550, "imeters": 31551, "403": 31552, "REG": 31553, "Ax": 31554, "\u0120separates": 31555, "\u0120Mahm": 31556, "\u0120tv": 31557, "jah": 31558, "OOL": 31559, "Circ": 31560, "\u0120Windsor": 31561, "ussian": 31562, "\u0120intuition": 31563, "\u0120disdain": 31564, "\u0120Donovan": 31565, "\u0120221": 31566, "Emb": 31567, "\u0120condemning": 31568, "\u0120generosity": 31569, "zzy": 31570, "\u0120panties": 31571, "\u0120Prevent": 31572, "ActionCode": 31573, "ANA": 31574, "342": 31575, "externalActionCode": 31576, "\u0120specifying": 31577, "\u0120crystall": 31578, "Jere": 31579, "\u0120rupt": 31580, "\u0120Apprentice": 31581, "\u0120profiling": 31582, "\u00d0\u00ba": 31583, "Strike": 31584, "\u0120sideline": 31585, "\u0120obligated": 31586, "\u0120occult": 31587, "\u0120bureaucratic": 31588, "antically": 31589, "rupted": 31590, "negative": 31591, "\u0120Ethiopia": 31592, "\u0120Civic": 31593, "\u0120insiders": 31594, "eligible": 31595, "\u0120TVs": 31596, "\u0120BAR": 31597, "\u0120TI": 31598, "iologist": 31599, "\u0120AIR": 31600, "\u0120substituted": 31601, "Arab": 31602, "\u0120Saul": 31603, "\u0120Yog": 31604, "prem": 31605, "\u0120builders": 31606, "\u0120stationary": 31607, "\u0120doubtful": 31608, "\u0120vigorously": 31609, "\u0120thrilling": 31610, "Physical": 31611, "\u0120Carey": 31612, "\u0120Hydra": 31613, "geoning": 31614, "\u0120Sly": 31615, "yton": 31616, "\u0120borrowers": 31617, "\u0120Parkinson": 31618, "\u0120\u00eb": 31619, "\u0120Jamaica": 31620, "\u0120satir": 31621, "\u0120insurgents": 31622, "\u0120Firm": 31623, "\u0120isot": 31624, "\u0120Karn": 31625, "ourning": 31626, "akens": 31627, "docs": 31628, "little": 31629, "\u0120Monaco": 31630, "CLASS": 31631, "Turkey": 31632, "Ly": 31633, "\u0120Conan": 31634, "assic": 31635, "\u0120starred": 31636, "\u0120Pacers": 31637, "eties": 31638, "\u0120tipping": 31639, "Moon": 31640, "\u0120Rw": 31641, "same": 31642, "\u0120cavity": 31643, "\u0120goof": 31644, "\u0120Zo": 31645, "Shock": 31646, "ummer": 31647, "\u0120emphasizes": 31648, "\u0120regrett": 31649, "\u0120novelty": 31650, "\u0120envy": 31651, "\u0120Passive": 31652, "rw": 31653, "505": 31654, "\u0120indifferent": 31655, "\u0120Rica": 31656, "\u0120Himself": 31657, "\u0120Freddie": 31658, "\u0120adip": 31659, "\u00e4\u00b8\u0122": 31660, "\u0120breakout": 31661, "\u0120hurried": 31662, "\u0120Huang": 31663, "\u0120Disk": 31664, "\u0120roaming": 31665, "?????-?????-": 31666, "UV": 31667, "\u0120Ricky": 31668, "\u0120Sigma": 31669, "\u0120marginalized": 31670, "\u0120edits": 31671, "\u0120304": 31672, "memory": 31673, "\u0120specimen": 31674, "293": 31675, "\u00e3\u0123\u00af": 31676, "\u0120vertically": 31677, "\u0120audition": 31678, "\u0120Heck": 31679, "\u0120caster": 31680, "\u0120Holdings": 31681, "adal": 31682, "\u0120Cron": 31683, "\u0120Liam": 31684, "\u0120deflect": 31685, "Pick": 31686, "\u0120Debug": 31687, "REF": 31688, "\u0120versatility": 31689, "othes": 31690, "classified": 31691, "\u0120Mahar": 31692, "\u0120Hort": 31693, "Counter": 31694, "stasy": 31695, "noticed": 31696, "331": 31697, "\u0120Shim": 31698, "fuck": 31699, "\u0120Bie": 31700, "\u0120airing": 31701, "\u0120Protein": 31702, "\u0120Holding": 31703, "\u0120spectators": 31704, "iliated": 31705, "\u0120Thatcher": 31706, "nosis": 31707, "\u00e3\u0125\u00bc\u00e3\u0125\u00b3": 31708, "Tele": 31709, "Boston": 31710, "\u0120Templ": 31711, "stay": 31712, "\u0120declarations": 31713, "479": 31714, "Volume": 31715, "\u0120Designer": 31716, "\u0120Overwatch": 31717, "idae": 31718, "\u0120onwards": 31719, "\u0120nets": 31720, "\u0120Manila": 31721, "particularly": 31722, "\u0120politic": 31723, "oother": 31724, "\u0120portraits": 31725, "\u0120pavement": 31726, "cffff": 31727, "\u0120saints": 31728, "\u0120beginners": 31729, "ESPN": 31730, "\u0120shortcomings": 31731, "\u00e2\u0137\u0132\u00e2\u0137\u0132": 31732, "\u0120comet": 31733, "\u0120Organic": 31734, "quel": 31735, "\u0120hospitalized": 31736, "Break": 31737, "\u0120peel": 31738, "dylib": 31739, "aspx": 31740, "urances": 31741, "\u0120TIM": 31742, "Pg": 31743, "\u0120readable": 31744, "\u0120Malik": 31745, "\u0120muzzle": 31746, "\u0120benchmarks": 31747, "dal": 31748, "\u0120Vacc": 31749, "\u0120Hicks": 31750, "609": 31751, "\u0120Biblical": 31752, "heng": 31753, "\u0120overload": 31754, "\u0120Civilization": 31755, "\u0120immoral": 31756, "\u0120fries": 31757, "\u00e3\u0124\u0134": 31758, "\u0120reproduced": 31759, "\u0120formulation": 31760, "jug": 31761, "irez": 31762, "gear": 31763, "\u0120coached": 31764, "MpServer": 31765, "\u0120SJ": 31766, "\u0120Kw": 31767, "Init": 31768, "deal": 31769, "\u0120Oro": 31770, "\u0120Loki": 31771, "\u0120Songs": 31772, "\u0120232": 31773, "\u0120Louise": 31774, "asionally": 31775, "\u0120uncond": 31776, "ollywood": 31777, "\u0120progressives": 31778, "\u0120Enough": 31779, "\u0120Doe": 31780, "\u0120wreckage": 31781, "\u0120brushed": 31782, "\u0120BaseType": 31783, "\u0120zoning": 31784, "ishable": 31785, "hetically": 31786, "\u0120Caucus": 31787, "\u0120Hue": 31788, "\u0120karma": 31789, "\u0120Sporting": 31790, "\u0120trader": 31791, "\u0120seeming": 31792, "\u0120Capture": 31793, "430": 31794, "bish": 31795, "\u0120tunes": 31796, "\u0120indoors": 31797, "\u0120Sphere": 31798, "\u0120Dancing": 31799, "TERN": 31800, "\u0120nob": 31801, "\u0120GST": 31802, "maps": 31803, "\u0120peppers": 31804, "Fit": 31805, "\u0120oversees": 31806, "\u0120Rabbi": 31807, "\u0120Ruler": 31808, "vertising": 31809, "office": 31810, "xxx": 31811, "\u0120raft": 31812, "Changed": 31813, "\u0120textbooks": 31814, "Links": 31815, "\u0120Omn": 31816, "\u00e3\u0122\u0133": 31817, "\u0120inconvenience": 31818, "\u0120Donetsk": 31819, "=~": 31820, "\u0120implicitly": 31821, "\u0120boosts": 31822, "\u0120Bones": 31823, "\u0120Boom": 31824, "Courtesy": 31825, "\u0120sensational": 31826, "ANY": 31827, "\u0120greedy": 31828, "eden": 31829, "\u0120inexper": 31830, "\u0120Ler": 31831, "\u0120Vale": 31832, "\u0120tighten": 31833, "\u0120EAR": 31834, "\u0120Num": 31835, "\u0120ancestor": 31836, "Sent": 31837, "\u0120Horde": 31838, "urgical": 31839, "allah": 31840, "\u0120sap": 31841, "amba": 31842, "\u0120Spread": 31843, "twitch": 31844, "\u0120grandson": 31845, "\u0120fracture": 31846, "\u0120moderator": 31847, "\u0120Seventh": 31848, "\u0120Reverse": 31849, "\u0120estimation": 31850, "Choose": 31851, "\u0120parach": 31852, "\u0120barric": 31853, "\u00e3\u0122\u0132": 31854, "\u0120compass": 31855, "\u0120allergic": 31856, "\u00e2\u0122\u0137": 31857, "OTHER": 31858, "errilla": 31859, "\u0120wagon": 31860, "\u0120zinc": 31861, "\u0120rubbed": 31862, "\u0120Fuller": 31863, "\u0120Luxembourg": 31864, "\u0120Hoover": 31865, "\u0120liar": 31866, "\u0120Evening": 31867, "\u0120Cobb": 31868, "esteem": 31869, "\u0120selector": 31870, "\u0120Brawl": 31871, "isance": 31872, "\u0120Ek": 31873, "\u0120troop": 31874, "\u0120guts": 31875, "\u0120Appeal": 31876, "\u0120Tibetan": 31877, "\u0120routines": 31878, "\u0120Ment": 31879, "\u0120summarized": 31880, "steamapps": 31881, "\u0120tranqu": 31882, "\u01201929": 31883, "oran": 31884, "\u0120Authent": 31885, "\u0120gmaxwell": 31886, "\u0120apprehens": 31887, "\u0120poems": 31888, "\u0120sausage": 31889, "\u0120Webster": 31890, "urus": 31891, "\u0120themed": 31892, "\u0120lounge": 31893, "\u0120charger": 31894, "Spoiler": 31895, "\u0120spilled": 31896, "hog": 31897, "\u0120Sunder": 31898, "\u0120Ain": 31899, "\u0120Angry": 31900, "\u0120disqual": 31901, "\u0120Frequency": 31902, "\u0120Ethernet": 31903, "\u0120helper": 31904, "Percent": 31905, "\u0120horrifying": 31906, "\u0120ail": 31907, "\u0120Allan": 31908, "EEE": 31909, "\u0120Crossing": 31910, "449": 31911, "\u0120holog": 31912, "\u0120Puzzles": 31913, "\u0120Goes": 31914, "erenn": 31915, "604": 31916, "\u00e3\u0123\u0131": 31917, "\u0120Rafael": 31918, "\u0120atten": 31919, "\u0120Emanuel": 31920, "\u0120upro": 31921, "\u0120Susp": 31922, "Psych": 31923, "\u0120Trainer": 31924, "\u0120NES": 31925, "\u0120Hunts": 31926, "becue": 31927, "\u0120counselor": 31928, "Rule": 31929, "\u0120toxins": 31930, "\u0120banners": 31931, "rifice": 31932, "\u0120greeting": 31933, "\u0120frenzy": 31934, "\u0120allocate": 31935, "\u0120*)": 31936, "expr": 31937, "503": 31938, "\u0120Chick": 31939, "\u0120Torn": 31940, "\u0120consolidation": 31941, "\u0120Fletcher": 31942, "switch": 31943, "frac": 31944, "clips": 31945, "\u0120McKin": 31946, "\u0120Lunar": 31947, "Month": 31948, "ITCH": 31949, "\u0120scholarly": 31950, "raped": 31951, "398": 31952, "\u01201910": 31953, "\u0120egreg": 31954, "\u0120insecure": 31955, "\u0120victorious": 31956, "cffffcc": 31957, "\u0120singled": 31958, "\u0120elves": 31959, "\u0120Wond": 31960, "burst": 31961, "\u0120camoufl": 31962, "\u0120BLACK": 31963, "\u0120conditioned": 31964, "\u00e7\u012b": 31965, "answered": 31966, "\u0120compulsory": 31967, "ascist": 31968, "\u0120podcasts": 31969, "\u0120Frankfurt": 31970, "bnb": 31971, "\u0120neoliberal": 31972, "\u0120Keyboard": 31973, "\u0120Belle": 31974, "warm": 31975, "\u0120trusts": 31976, "\u0120insured": 31977, "\u0120Bucc": 31978, "usable": 31979, "607": 31980, "\u0120Plains": 31981, "\u01201890": 31982, "\u0120sabotage": 31983, "\u0120lodged": 31984, "felt": 31985, "\u0120ga": 31986, "\u0120Narc": 31987, "\u0120Salem": 31988, "\u0120seventy": 31989, "\u0120Blank": 31990, "pocket": 31991, "\u0120whisper": 31992, "\u0120mating": 31993, "omics": 31994, "\u0120Salman": 31995, "\u0120Kad": 31996, "\u0120angered": 31997, "\u0120collisions": 31998, "\u0120extraordinarily": 31999, "\u0120coercion": 32000, "Ghost": 32001, "birds": 32002, "\u00e8\u0122": 32003, "kok": 32004, "\u0120permissible": 32005, "avorable": 32006, "\u0120pointers": 32007, "\u0120dissip": 32008, "aci": 32009, "\u0120theatrical": 32010, "\u0120Cosmic": 32011, "\u0120forgetting": 32012, "\u0120finalized": 32013, "\u00e5\u00a4\u00a7": 32014, "yout": 32015, "library": 32016, "\u0120booming": 32017, "\u0120Believe": 32018, "\u0120Teacher": 32019, "\u0120Liv": 32020, "\u0120GOODMAN": 32021, "\u0120Dominican": 32022, "ORED": 32023, "\u0120Parties": 32024, "\u0120precipitation": 32025, "\u0120Slot": 32026, "Roy": 32027, "\u0120Combined": 32028, "\u0120integrating": 32029, "\u0120chrome": 32030, "\u0120intestinal": 32031, "\u0120Rebell": 32032, "\u0120matchups": 32033, "\u0120blockbuster": 32034, "\u0120Loren": 32035, "\u0120Levy": 32036, "\u0120preaching": 32037, "\u0120Sending": 32038, "\u0120Purpose": 32039, "rax": 32040, "fif": 32041, "\u0120authoritative": 32042, "\u0120PET": 32043, "astical": 32044, "\u0120dishon": 32045, "\u0120chatting": 32046, "\u0120\"$:/": 32047, "Connection": 32048, "\u0120recreate": 32049, "\u0120delinqu": 32050, "\u0120broth": 32051, "\u0120Dirty": 32052, "\u0120Admin": 32053, "zman": 32054, "\u0120scholarships": 32055, "\u0120253": 32056, "contact": 32057, "alsa": 32058, "767": 32059, "creen": 32060, "abbage": 32061, "\u01201915": 32062, "\u0120blended": 32063, "\u0120alarmed": 32064, "Language": 32065, "356": 32066, "\u0120blends": 32067, "\u0120Changed": 32068, "Wolf": 32069, "\u0120hepat": 32070, "Creating": 32071, "\u0120persecut": 32072, "\u0120sweetness": 32073, "arte": 32074, "\u0120forfeiture": 32075, "\u0120Roberto": 32076, "impro": 32077, "NFL": 32078, "\u0120Magnet": 32079, "Detailed": 32080, "\u0120insignificant": 32081, "\u0120POLIT": 32082, "\u0120BBQ": 32083, "\u0120CPS": 32084, "\u0120seaw": 32085, "aminer": 32086, "mL": 32087, "endif": 32088, "finals": 32089, "\u0120265": 32090, "uish": 32091, "\u0120})": 32092, "\u0120Problems": 32093, "\u0120emblem": 32094, "\u0120seriousness": 32095, "\u0120parsing": 32096, "\u0120substitution": 32097, "\u0120pressured": 32098, "\u0120recycled": 32099, "aleb": 32100, "Ruby": 32101, "\u0120proficiency": 32102, "Driver": 32103, "\u0120Wester": 32104, ":'": 32105, "AFTA": 32106, "\u0120mantle": 32107, "\u0120Clayton": 32108, "flag": 32109, "\u0120practitioner": 32110, "covered": 32111, "\u0120Struct": 32112, "addafi": 32113, "425": 32114, "\u0120Township": 32115, "\u0120Hydro": 32116, "Louis": 32117, "343": 32118, "\u0120condo": 32119, "\u0120Tao": 32120, "\u0120utilization": 32121, "\u0120nausea": 32122, "\u0120Dems": 32123, "ridges": 32124, "pause": 32125, "\u0120formulas": 32126, "\u0120challenger": 32127, "376": 32128, "\u0120defective": 32129, "\u0120Railway": 32130, "\u0120PubMed": 32131, "\u0120yogurt": 32132, "lbs": 32133, "\u0120Norfolk": 32134, "OPE": 32135, "\u0120Moody": 32136, "\u0120distributor": 32137, "\u0120scrolls": 32138, "\u0120extracts": 32139, "Stan": 32140, "\u0120viability": 32141, "\u0120exposes": 32142, "\u0120starvation": 32143, "\u0120Steps": 32144, "\u0120Dodd": 32145, "few": 32146, "STD": 32147, "332": 32148, "\u0120closures": 32149, "\u0120complementary": 32150, "\u0120Sasha": 32151, "umpy": 32152, "\u0120monet": 32153, "\u0120articulate": 32154, "\u0120Doct": 32155, "killer": 32156, "\u0120scrim": 32157, "\u0120264": 32158, "\u0120prostitutes": 32159, "\u0120severed": 32160, "\u0120attachments": 32161, "\u0120cooled": 32162, "Lev": 32163, "\u0120Falk": 32164, "fail": 32165, "\u0120policeman": 32166, "\u0120Dag": 32167, "\u0120prayed": 32168, "\u0120Kernel": 32169, "\u0120clut": 32170, "\u0120cath": 32171, "\u0120anomaly": 32172, "Storm": 32173, "emaker": 32174, "\u0120Breakfast": 32175, "uli": 32176, "oire": 32177, "JJ": 32178, "hz": 32179, "Operation": 32180, "\u0120Sick": 32181, "354": 32182, "\u0120Guatemala": 32183, "Rate": 32184, "\u0120exposures": 32185, "faces": 32186, "\u0120Archae": 32187, "raf": 32188, "\u0120Mia": 32189, "\u01202025": 32190, "\u0120opaque": 32191, "\u0120disguised": 32192, "\u0120Headquarters": 32193, "Sah": 32194, "\u0120pots": 32195, "978": 32196, "\u0120Malf": 32197, "\u0120frowned": 32198, "\u0120poisonous": 32199, "\u0120Convers": 32200, "eeks": 32201, "\u0120crab": 32202, ".\"\"": 32203, "\u0120treason": 32204, "\u0120ranc": 32205, "\u0120escalating": 32206, "\u0120warr": 32207, "\u0120mobs": 32208, "\u0120lamps": 32209, "\u0120Sunshine": 32210, "\u0120Brunswick": 32211, "Phones": 32212, "\u0120spelled": 32213, "\u0120Skip": 32214, "\u01202050": 32215, "\u01201911": 32216, "\u0120Pluto": 32217, "\u0120Amend": 32218, "\u0120meats": 32219, "387": 32220, "\u0120stomp": 32221, "\u0120Zhou": 32222, "\u0120Leviathan": 32223, "\u0120Hazard": 32224, "adv": 32225, "\u0120Orwell": 32226, "\u0120aloud": 32227, "\u0120bumper": 32228, "\u0120Anarch": 32229, "ubuntu": 32230, "\u0120Serious": 32231, "fitting": 32232, "\u0120Optional": 32233, "\u0120Cecil": 32234, "REAM": 32235, "\u0120serotonin": 32236, "\u0120cultivate": 32237, "agogue": 32238, "}\\": 32239, "\u0120mosques": 32240, "\u0120Sunny": 32241, "\u0120reactive": 32242, "revolution": 32243, "\u0120Lup": 32244, "\u0120Fedora": 32245, "\u0120defenseman": 32246, "\u0120VID": 32247, "istine": 32248, "\u0120drowning": 32249, "\u0120Broadcasting": 32250, "\u0120thriller": 32251, "\u0120Scy": 32252, "\u0120accelerating": 32253, "\u0120directs": 32254, "odied": 32255, "bike": 32256, "duration": 32257, "\u0120painfully": 32258, "Redd": 32259, "\u0120productions": 32260, "\u0120gag": 32261, "\u0120whist": 32262, "\u0120sock": 32263, "\u0120infinitely": 32264, "\u0120Concern": 32265, "\u0120Citadel": 32266, "\u0120lieu": 32267, "\u0120candles": 32268, "ogeneous": 32269, "arger": 32270, "\u0120heavenly": 32271, "inflammatory": 32272, "Performance": 32273, "Cs": 32274, "ructose": 32275, "azaki": 32276, "\u0120pessim": 32277, "\u0120inference": 32278, "\u0120powd": 32279, "\u0120Zoe": 32280, "\u0120paints": 32281, "\u0120dazz": 32282, "pta": 32283, "-----------": 32284, "\u0120inspir": 32285, "\u0120Experimental": 32286, "\u0120Knife": 32287, "regor": 32288, "bors": 32289, "\u0120showers": 32290, "romeda": 32291, "\u0120saint": 32292, "\u0120benign": 32293, "\u0120Jiang": 32294, "\u0120envisioned": 32295, "\u0120shroud": 32296, "IFT": 32297, "HO": 32298, "\u0120shuff": 32299, "\u0120ICC": 32300, "\u0120segreg": 32301, "\u0120revisit": 32302, "ighthouse": 32303, "Li": 32304, "\u0120substrate": 32305, "\u0120Seas": 32306, "\u0120Reward": 32307, "\u0120Hep": 32308, "\u0120Brass": 32309, "sbm": 32310, "\u0120eliminates": 32311, "\u0120stamina": 32312, "\u0120VAT": 32313, "\u0120Loan": 32314, "\u0120constraint": 32315, "\u0120appropriated": 32316, "\u0120pes": 32317, "\u0120ALE": 32318, "ranging": 32319, "\u0120404": 32320, "392": 32321, "\u0120intellectuals": 32322, "achu": 32323, "\u0120restructuring": 32324, "\u0120Levin": 32325, "\u0120runes": 32326, "\u0120delightful": 32327, "\u0120carbohydrates": 32328, "\u0120Models": 32329, "\u0120Expo": 32330, "\u0120transporting": 32331, "alloc": 32332, "\u0120ringing": 32333, "Samsung": 32334, "\u0120scarcely": 32335, "\u0120URLs": 32336, "\u0120MAS": 32337, "\u0120prototypes": 32338, "\u0120narrator": 32339, "\u0120CPUs": 32340, "cdn": 32341, "\u0120Barton": 32342, "\u0120decidedly": 32343, "\u0120Shu": 32344, "ixir": 32345, "ocious": 32346, "\u0120Myst": 32347, "Nintendo": 32348, "\u0120reuse": 32349, "\u0120forgiven": 32350, "Few": 32351, "inical": 32352, "nat": 32353, "\u0120seamless": 32354, "\u0120Eva": 32355, "\u0120EVE": 32356, "\u0120JO": 32357, "landers": 32358, "\u0120softer": 32359, "negie": 32360, "\u0120transient": 32361, "\u0120orbital": 32362, "\u0120fulfil": 32363, "\u0120Kom": 32364, "Hopefully": 32365, "\u0120dynamically": 32366, "\u0120Hunger": 32367, "\u00e5\u013d": 32368, "\u0120Armenia": 32369, "elman": 32370, "berto": 32371, "\u0120pige": 32372, "\u0120IDs": 32373, "limit": 32374, "\u0120veins": 32375, "\u0120soaring": 32376, "packs": 32377, "Golden": 32378, "\u0120Crab": 32379, "istor": 32380, "\u0120RPM": 32381, "\u0120$$": 32382, "gression": 32383, "\u0120jihadist": 32384, "\u0120gamble": 32385, "\u0120careg": 32386, "\u0120inflated": 32387, "Face": 32388, "\u0120Firearms": 32389, "\u0120Emmanuel": 32390, "\u00e2\u013f": 32391, "\u0120shocks": 32392, "grab": 32393, "\u0120splend": 32394, "\u0120HPV": 32395, "abortion": 32396, "Above": 32397, "Entity": 32398, "players": 32399, "\u0120commenced": 32400, "ulence": 32401, "\u0120fulfillment": 32402, "\u0120embodiments": 32403, "\u0120Welfare": 32404, "\u0120hail": 32405, "\u0120<@": 32406, "tten": 32407, "\u0120catcher": 32408, "\u0120Jazeera": 32409, "\u0120volcano": 32410, "\u0120stabilize": 32411, "\u0120Handler": 32412, "\u0120intensified": 32413, "\u0120Abrams": 32414, "\u0120humiliation": 32415, "paced": 32416, "605": 32417, "\u0120CentOS": 32418, "Specific": 32419, "\u0120heed": 32420, "\u0120CAM": 32421, "\u0120Galile": 32422, "Die": 32423, "\u0120abolished": 32424, "\u0120Thomson": 32425, "\u0120Teachers": 32426, "\u0120Wass": 32427, "jong": 32428, "\u0120ISBN": 32429, "\u0120Allies": 32430, "shake": 32431, "\u00e5\u00b7": 32432, "vict": 32433, "Howard": 32434, "\u0120deem": 32435, "\u0120exceedingly": 32436, "\u0120Smartstocks": 32437, "ibe": 32438, "\u0120doorway": 32439, "\u0120competed": 32440, "igmat": 32441, "\u0120nationalists": 32442, "\u0120groom": 32443, "\u0120Keen": 32444, "\u0120disposable": 32445, "decl": 32446, "\u0120Tolkien": 32447, "\u0120Scheme": 32448, "\u0120biod": 32449, "\u0120avid": 32450, "\u0120Elon": 32451, "agar": 32452, "\u0120TSA": 32453, "Roman": 32454, "\u0120artificially": 32455, "\u0120advisors": 32456, "XL": 32457, "\u0120Inferno": 32458, "366": 32459, "\u0120tedious": 32460, "\u0120Photography": 32461, "\u0120Carrie": 32462, "\u0120trope": 32463, "\u0120Sandra": 32464, "\u0120decimal": 32465, "Queen": 32466, "\u0120Gundam": 32467, "\u0120OM": 32468, "otech": 32469, "NBA": 32470, "\u01201932": 32471, "\u0120entrenched": 32472, "\u0120Marion": 32473, "\u0120fraternity": 32474, "Labour": 32475, "Henry": 32476, "\u0120latitude": 32477, "Either": 32478, "\u0120enhances": 32479, "\u0120Potential": 32480, "\u0120shines": 32481, "idad": 32482, "\u0120breadth": 32483, "\u0120capacities": 32484, "\u0120\u00f0\u0141\u013b\u0124": 32485, "\u0120Bronx": 32486, "\u0120sexes": 32487, "\u0120differentiation": 32488, "\u0120heavyweight": 32489, "\u0120Taj": 32490, "dra": 32491, "\u0120migrate": 32492, "\u0120exhaustion": 32493, "\u0120RUN": 32494, "elsius": 32495, "\u0120Cuomo": 32496, "\u0120guitars": 32497, "\u0120clones": 32498, "\u0120Somew": 32499, "\u0120Pry": 32500, "-------------": 32501, "\u0120warranted": 32502, "cycles": 32503, "\u0120salvage": 32504, "\u0120disks": 32505, "RANT": 32506, "\u0120NGOs": 32507, "\u0120Martian": 32508, "\":[{\"": 32509, "\u0120addicts": 32510, "ojure": 32511, "illet": 32512, "\u0120amazingly": 32513, "artments": 32514, "pixel": 32515, "\u0120GPUs": 32516, "Layout": 32517, "\u00e8\u00a3": 32518, "\u0120Tamil": 32519, "\u0120Basil": 32520, "\u0120impartial": 32521, "\u0120Structure": 32522, "fork": 32523, "bryce": 32524, "\u0120ridge": 32525, "\u0120Hamburg": 32526, "rious": 32527, "\u0120blitz": 32528, "cigarettes": 32529, "\u0120canned": 32530, "402": 32531, "\u0120ironically": 32532, "\u0120compassionate": 32533, "\u0120Hawkins": 32534, ".#": 32535, "\u0120Cathedral": 32536, "\u0120rallied": 32537, "internal": 32538, "\u0120quota": 32539, "stakes": 32540, "TEXT": 32541, "mom": 32542, "\u0120completes": 32543, "\u0120238": 32544, "\u0120shrug": 32545, "\u00e3\u0125\u0133": 32546, "\u0120Ninth": 32547, "\u0120revise": 32548, "\u0120Provider": 32549, "\u0120treacher": 32550, "\u0120quasi": 32551, "\u0120PRES": 32552, "\u0120deposition": 32553, "\u0120confidentiality": 32554, "issors": 32555, "\u0120imbalance": 32556, "\u0120spanning": 32557, "\u0120angular": 32558, "\u0120Cul": 32559, "communication": 32560, "\u0120Nora": 32561, "\u0120Genius": 32562, "opter": 32563, "\u0120sacked": 32564, "Spot": 32565, "\u0120finely": 32566, "\u0120CHR": 32567, "282": 32568, "waves": 32569, "Palest": 32570, "\u0120Rohing": 32571, "NL": 32572, "\u00e8\u00bf": 32573, "\u0120shitty": 32574, "\u0120Scalia": 32575, "475": 32576, "Progress": 32577, "\u0120referencing": 32578, "\u0120classrooms": 32579, "abee": 32580, "\u0120sod": 32581, "hesion": 32582, "708": 32583, "\u0120Zuckerberg": 32584, "\u0120Finish": 32585, "\u0120Scotia": 32586, "\u0120Savior": 32587, "\u0120Installation": 32588, "antha": 32589, "(-": 32590, "\u0120302": 32591, "\u0120Punk": 32592, "\u0120crater": 32593, "youtu": 32594, "\u0120roast": 32595, "\u0120influencing": 32596, "\u0120dup": 32597, "\u0120JR": 32598, "\u0120Grav": 32599, "\u0120stature": 32600, "\u0120bathrooms": 32601, "Aside": 32602, "Wiki": 32603, "mean": 32604, "\u0120Zak": 32605, "\u0120Ones": 32606, "\u0120Nath": 32607, "\u0120hypert": 32608, "\u0120commencement": 32609, "Civil": 32610, "\u0120moderately": 32611, "\u0120distributors": 32612, "\u0120breastfeeding": 32613, "\u0120980": 32614, "\u0120Sik": 32615, "\u0120Cig": 32616, "\u0120AMER": 32617, "RIP": 32618, "\u0120Career": 32619, "usting": 32620, "\u0120messed": 32621, "\u0120eh": 32622, "\u0120Jensen": 32623, "/$": 32624, "\u0120blackmail": 32625, "\u0120conversions": 32626, "\u0120scientifically": 32627, "\u0120mantra": 32628, "paying": 32629, "\u0120ivory": 32630, "\u0120Courts": 32631, "OUGH": 32632, "auntlet": 32633, "Serial": 32634, "Brow": 32635, "\u0120Hundreds": 32636, "323": 32637, "\u0120pee": 32638, "\u0120linux": 32639, "\u0120submer": 32640, "\u0120Principal": 32641, "485": 32642, "\u0120DSL": 32643, "\u0120Cousins": 32644, "\u0120doctrines": 32645, "\u0120Athletics": 32646, "\u0120315": 32647, "\u0120Karma": 32648, "\u0120attent": 32649, "urger": 32650, "\u0120prescribe": 32651, "\u0120encaps": 32652, "\u0120Came": 32653, "\u0120secretive": 32654, "\u0120Crimes": 32655, "dn": 32656, "Clean": 32657, "\u0120Egyptians": 32658, "\u0120Carpenter": 32659, "\u0120ll": 32660, "Hum": 32661, "\u0120Milo": 32662, "\u0120capitalists": 32663, "\u0120briefed": 32664, "Twe": 32665, "\u0120Basin": 32666, "elvet": 32667, "Mos": 32668, "\u0120plunge": 32669, "\u0120Kaiser": 32670, "\u0120Fuj": 32671, "illin": 32672, "\u0120safeguards": 32673, "\u0120oste": 32674, "\u0120Opportunity": 32675, "\u0120Mafia": 32676, "\u0120Calling": 32677, "apa": 32678, "urban": 32679, "brush": 32680, "illard": 32681, "c\u00c3\u00a9": 32682, "intelligence": 32683, "\u0120Lob": 32684, "\u0120Druid": 32685, "\u0120smoother": 32686, "\u0120footing": 32687, "\u0120motorists": 32688, "arcity": 32689, "\u0120masculinity": 32690, "\u0120mism": 32691, "\u0120abdominal": 32692, "\u0120Tavern": 32693, "\u0120Roh": 32694, "\u0120escapes": 32695, "signed": 32696, "Anthony": 32697, "\u0120sacrificing": 32698, "\u0120intimacy": 32699, "\u0120anterior": 32700, "\u0120Kod": 32701, "\u0120motif": 32702, "\u0120graz": 32703, "\u0120visualization": 32704, "\u0120guitarist": 32705, "\u0120Trotsky": 32706, "magic": 32707, "Dar": 32708, "\u0120Mori": 32709, "\u0120wards": 32710, "\u0120toilets": 32711, "lest": 32712, "\u0120teleport": 32713, "\u0120Sundays": 32714, "\u0120Plat": 32715, "ETS": 32716, "\u0120eSports": 32717, "Patrick": 32718, "\u0120Katherine": 32719, "enko": 32720, "\u0120hassle": 32721, "\u0120Mick": 32722, "ggles": 32723, "\u0120hob": 32724, "aintain": 32725, "\u0120airborne": 32726, "\u0120spans": 32727, "\u0120chili": 32728, "\u0120aperture": 32729, "\u0120volunteered": 32730, "\u0120Incident": 32731, "\u0120Fres": 32732, "\u0120Veteran": 32733, "aughtered": 32734, "ingo": 32735, "\u0120uninsured": 32736, "CLOSE": 32737, "\u0120fuse": 32738, "\u0120erotic": 32739, "\u0120advertise": 32740, "raising": 32741, "Texture": 32742, "\u0120attends": 32743, "\u0120REAL": 32744, "uddled": 32745, "\u0120smoot": 32746, "\u0120305": 32747, "\u0120Willis": 32748, "\u0120blond": 32749, "Analysis": 32750, "\u0120VT": 32751, "onica": 32752, "\u0120stronghold": 32753, "RF": 32754, "NM": 32755, ".>>": 32756, "\u0120prosperous": 32757, "\u0120boasted": 32758, "292": 32759, "\u0120Manufacturing": 32760, "PRESS": 32761, "gren": 32762, "\u0120pharmacy": 32763, "\u0120Rockefeller": 32764, "kai": 32765, "\u0120thumbs": 32766, "\u0120Hut": 32767, "\u0120motherboard": 32768, "\u0120guardians": 32769, "\u0120Alter": 32770, "llular": 32771, "\u0120shack": 32772, "\u0120wisely": 32773, "\u0120backbone": 32774, "erva": 32775, "\u0120suicides": 32776, "\u0120McGregor": 32777, "ijah": 32778, "Emer": 32779, "\u0120Brav": 32780, "\u0120designate": 32781, "POST": 32782, "produced": 32783, "\u0120cleansing": 32784, "irlwind": 32785, "existent": 32786, "\u0120Humph": 32787, "\u0120Payne": 32788, "\u0120vested": 32789, "\u00c5\u00a1": 32790, "\u0120stringent": 32791, "iona": 32792, "\u0120unsub": 32793, "\u0120summed": 32794, "\u0120Hercules": 32795, "subject": 32796, "\u0120Ragnar": 32797, "\u0120Nos": 32798, "\u0120characterization": 32799, "\u0120savvy": 32800, "\u0120Dawson": 32801, "\u0120Casino": 32802, "\u0120fri": 32803, "\u0120Barrier": 32804, "\u0120misinformation": 32805, "\u0120insulation": 32806, "\u0120corridors": 32807, "\u0120airplanes": 32808, "\u0120Noct": 32809, "ahi": 32810, "\u01201916": 32811, "kb": 32812, "armac": 32813, "\u0120shun": 32814, "\u0120schema": 32815, "\u0120horrified": 32816, "\u0120239": 32817, "aunders": 32818, "NB": 32819, "iates": 32820, "erity": 32821, "\u0120Shard": 32822, "\u0120rarity": 32823, "\u0120grouped": 32824, "\u0120Ghana": 32825, "against": 32826, "\u0120Biological": 32827, "\u0120Aware": 32828, "owell": 32829, "\u00cf\u0126": 32830, "\u0120Beau": 32831, "shaw": 32832, "Hack": 32833, "\u0120Julius": 32834, "USS": 32835, "olson": 32836, "auna": 32837, "cru": 32838, "\u0120Maurice": 32839, "\u0120Ik": 32840, "\u0120sequencing": 32841, "\u0120radicals": 32842, "\u0120(?,": 32843, "virtual": 32844, "\u0120anyways": 32845, "\u0120reperc": 32846, "\u0120handlers": 32847, "\u0120hesitant": 32848, "\u00e9\u0125": 32849, "\u0120MF": 32850, "plementation": 32851, "associated": 32852, "\u0120campaigned": 32853, "\u0120Yue": 32854, "utations": 32855, "\u0120Yoga": 32856, "\u0120simmer": 32857, "\u0120rods": 32858, "\u0120melody": 32859, "\u0120convoy": 32860, "videos": 32861, "\u0120screened": 32862, "Neg": 32863, "ochemical": 32864, "\u0120())": 32865, "\u0120ultras": 32866, "\u0120antip": 32867, "\u0120Islanders": 32868, "704": 32869, "\u0120fetish": 32870, "\u0120ridiculously": 32871, "\u0120Kart": 32872, "\u0120mitochondrial": 32873, "\u0120interfering": 32874, "Builder": 32875, "\u0120overfl": 32876, "\u0120acne": 32877, "\u0120Mud": 32878, "\u0120Kerr": 32879, "flex": 32880, "\u0120Postal": 32881, "\u0120Baltic": 32882, "477": 32883, "\u0120Persons": 32884, "ourage": 32885, "HB": 32886, "\u0120Muse": 32887, "\u0120Immortal": 32888, "\u0120Driving": 32889, "\u0120petitions": 32890, "\u0120subscript": 32891, "\u0120sorce": 32892, "\u0120Processor": 32893, "uton": 32894, "Sony": 32895, "\u0120phon": 32896, "\u0120raced": 32897, "\u0120Anthrop": 32898, "\u0120daytime": 32899, "\u0120Exercise": 32900, "Adding": 32901, "\u0120engages": 32902, "\u0120Qualcomm": 32903, "\u0120miracles": 32904, "\u0120memes": 32905, "\u0120Drink": 32906, "\u0120Orioles": 32907, "\u0120hairs": 32908, "\u0120Polar": 32909, "athom": 32910, "\u0120slippery": 32911, "\u0120Remy": 32912, "\u0120caramel": 32913, "\u0120YEAR": 32914, "\u0120alk": 32915, "Ign": 32916, "aution": 32917, "\u0120Merlin": 32918, "\u0120Cran": 32919, "\u0120apologies": 32920, "\u0120410": 32921, "\u0120outing": 32922, "\u0120Memories": 32923, "appointed": 32924, "\u0120countered": 32925, "uld": 32926, "posing": 32927, "\u0120firewall": 32928, "\u0120Wast": 32929, "\u0120Wet": 32930, "worked": 32931, "seller": 32932, "\u0120repealed": 32933, "ereo": 32934, "assuming": 32935, "BLIC": 32936, "mite": 32937, "\u0120CEOs": 32938, "\u0120Chapel": 32939, "elligent": 32940, "________________________": 32941, "Dog": 32942, "\u0120wart": 32943, "\u0120subscriber": 32944, "sports": 32945, "\u0120begged": 32946, "\u0120MV": 32947, "\u0120semif": 32948, "ethical": 32949, "\u0120preach": 32950, "\u0120revital": 32951, "\u0120punitive": 32952, "\u0120shortcuts": 32953, "\u0120instituted": 32954, "\u0120Warsaw": 32955, "\u0120abdomen": 32956, "\u0120KING": 32957, "\u0120superintendent": 32958, "\u0120fry": 32959, "\u0120Geo": 32960, "TOR": 32961, "\u0120contradictions": 32962, "aptic": 32963, "\u0120landscapes": 32964, "bugs": 32965, "\u0120clust": 32966, "\u0120volley": 32967, "cribed": 32968, "\u0120tandem": 32969, "\u0120robes": 32970, "WHAT": 32971, "\u0120promoter": 32972, "\u0120eloqu": 32973, "reviewed": 32974, "\u0120DK": 32975, "\u0120Plato": 32976, "\u0120fps": 32977, "Tank": 32978, "\u0120Derrick": 32979, "\u0120prioritize": 32980, "asper": 32981, "\u0120Honduras": 32982, "\u0120Completed": 32983, "nec": 32984, "\u0120mog": 32985, "nir": 32986, "\u0120Mayo": 32987, "DEF": 32988, "stall": 32989, "inness": 32990, "\u0120Volkswagen": 32991, "\u0120precaution": 32992, "\u0120Mell": 32993, "iak": 32994, "istries": 32995, "\u0120248": 32996, "\u0120overlapping": 32997, "Senate": 32998, "\u0120Enhance": 32999, "resy": 33000, "racial": 33001, "ORTS": 33002, "\u0120Mormons": 33003, "Strong": 33004, "\u0120Coch": 33005, "Mexico": 33006, "\u0120Maduro": 33007, "\u0120jars": 33008, "\u0120cane": 33009, "Wik": 33010, "olla": 33011, "ifference": 33012, "\u0120physicist": 33013, "\u0120Maggie": 33014, "\u0120285": 33015, "\u0120depiction": 33016, "\u0120McLaren": 33017, "Ju": 33018, "\u0120slows": 33019, "\u0120commissioners": 33020, "\u0120Willow": 33021, "\u0120Explos": 33022, "hovah": 33023, "\u0120technician": 33024, "\u0120homicides": 33025, "\u0120Flav": 33026, "\u0120Truman": 33027, "\u012010000": 33028, "uctor": 33029, "\u0120shader": 33030, "Newsletter": 33031, "457": 33032, "\u0120rever": 33033, "\u0120hardened": 33034, "\u0120whereabouts": 33035, "\u0120redevelop": 33036, "\u0120carbs": 33037, "\u0120travers": 33038, "\u0120squirrel": 33039, "\u0120follower": 33040, "\u0120sings": 33041, "508": 33042, "\u0120rabbits": 33043, "emonium": 33044, "\u0120documenting": 33045, "\u0120misunderstood": 33046, ")'": 33047, "Rick": 33048, "ggies": 33049, "\u0120premie": 33050, "\u0120skating": 33051, "\u0120passports": 33052, "\u0120fists": 33053, "ageddon": 33054, "Haw": 33055, "ACP": 33056, "080": 33057, "\u0120Thoughts": 33058, "\u0120Carlson": 33059, "\u0120priesthood": 33060, "hua": 33061, "\u0120dungeons": 33062, "\u0120Loans": 33063, "\u0120antis": 33064, "\u0120familiarity": 33065, "\u0120Sabb": 33066, "opal": 33067, "\u0120Ink": 33068, "strike": 33069, "\u0120cram": 33070, "\u0120legalized": 33071, "\u0120cuisine": 33072, "\u0120fibre": 33073, "Travel": 33074, "\u0120Monument": 33075, "ODY": 33076, "ethy": 33077, "\u0120interstate": 33078, "\u0120PUR": 33079, "emporary": 33080, "\u0120Arabian": 33081, "developed": 33082, "\u0120saddle": 33083, "\u0120github": 33084, "\u0120Offer": 33085, "\u0120ISP": 33086, "rolet": 33087, "\u0120SUPER": 33088, "\u0120Denis": 33089, "\u0120multiplier": 33090, "\u0120stirred": 33091, "Interestingly": 33092, "\u0120customary": 33093, "\u0120billed": 33094, "hex": 33095, "\u0120multiplied": 33096, "\u0120flipping": 33097, "\u0120Crosby": 33098, "\u0120fundamentals": 33099, "iae": 33100, "\u0120Played": 33101, "\u0120Atom": 33102, "amazon": 33103, "\u0120Flam": 33104, "eez": 33105, "activated": 33106, "\u0120tablespoon": 33107, "\u0120liberalism": 33108, "\u0120Palin": 33109, "\u0120Patel": 33110, "Num": 33111, "\u0120TAM": 33112, "\u0120surn": 33113, "\u0120Reloaded": 33114, "\u0120coined": 33115, "\"],": 33116, "\u0120Clash": 33117, "\u0120Agu": 33118, "\u0120pragmatic": 33119, "\u0120Activate": 33120, "\u0120802": 33121, "\u0120trailers": 33122, "\u0120silhou": 33123, "\u0120probes": 33124, "\u0120circus": 33125, "\u0120Bain": 33126, "\u0120Lindsay": 33127, "\u0120Abbey": 33128, "Delivery": 33129, "\u0120concession": 33130, "\u0120gastro": 33131, "\u0120Sprite": 33132, "\u00c4\u0141": 33133, "andel": 33134, "\u0120gimm": 33135, "\u0120autobi": 33136, "\u0120Turtle": 33137, "\u0120wonderfully": 33138, "\u0120Haram": 33139, "\u0120Worldwide": 33140, "\u0120Handle": 33141, "\u0120theorists": 33142, "\u0120sleek": 33143, "\u0120Zhu": 33144, "ographically": 33145, "EGA": 33146, "\u0120Owners": 33147, "aths": 33148, "\u0120Antarctic": 33149, "natal": 33150, "=\"\"": 33151, "flags": 33152, "````": 33153, "\u0120sul": 33154, "Kh": 33155, "\u0120potassium": 33156, "\u0120lineman": 33157, "\u0120cereal": 33158, "\u0120Seasons": 33159, "\u01202022": 33160, "\u0120mathematic": 33161, "\u0120astronomers": 33162, "professional": 33163, "\u0120fares": 33164, "cknowled": 33165, "\u0120chi": 33166, "\u0120youngsters": 33167, "\u0120mistakenly": 33168, "\u0120hemisphere": 33169, "\u0120Divinity": 33170, "rone": 33171, "\u0120\",": 33172, "rings": 33173, "\u0120attracts": 33174, "vana": 33175, "\u00e5\u00b9": 33176, "CAP": 33177, "\u0120playlist": 33178, "\u0120porch": 33179, "\u00e3\u0123\u00a3": 33180, "\u0120incorporates": 33181, "\u0120soak": 33182, "\u0120asserting": 33183, "\u0120Terrorism": 33184, "\u0120Pablo": 33185, "Ja": 33186, "cester": 33187, "\u0120fearing": 33188, "\u0120Prayer": 33189, "\u0120escalated": 33190, "GW": 33191, "\u0120robe": 33192, "\u0120Brighton": 33193, "acists": 33194, "\u0120Symphony": 33195, "\u0120Dwarf": 33196, "\u0120Parade": 33197, "\u0120Lego": 33198, "\u0120inexpl": 33199, "\u0120lords": 33200, "leaf": 33201, "RAG": 33202, "liber": 33203, "\u0120cigars": 33204, "\u0120Jehovah": 33205, "606": 33206, "WINDOWS": 33207, "\u0120Liberia": 33208, "ebus": 33209, "Heavy": 33210, "\u0120lubric": 33211, "\u0120RW": 33212, "anguages": 33213, "\u0120narrowed": 33214, "computer": 33215, "\u0120Ember": 33216, "\u0120murdering": 33217, "\u0120downstream": 33218, "\u0120Tuls": 33219, "\u0120Tables": 33220, "Topic": 33221, "\u0120Accuracy": 33222, "=/": 33223, "lost": 33224, "\u0120Rei": 33225, "\u0120progresses": 33226, "bear": 33227, "\u0120establishments": 33228, "Justin": 33229, "\u0120Peach": 33230, "\u0120Gomez": 33231, "\u00e5\u00bf": 33232, "\u0120Triangle": 33233, "Ident": 33234, "\u0120Hive": 33235, "Resources": 33236, "\u0120mixes": 33237, "\u0120Assuming": 33238, "Mu": 33239, "\u0120hypoc": 33240, "\u0120sane": 33241, "\u0120Wan": 33242, "idious": 33243, "Success": 33244, "\u0120io": 33245, "Angel": 33246, "\u0120dangerously": 33247, "\u0120Creature": 33248, "WORK": 33249, ":[": 33250, "\u0120Katrina": 33251, "Listener": 33252, "Miller": 33253, "\u0120Idlib": 33254, "hang": 33255, "\u0120circumvent": 33256, "href": 33257, "\u0120celestial": 33258, "\u0120Weeks": 33259, "\u0120Pug": 33260, "\u0120Dalton": 33261, "\u0120subpoena": 33262, "uku": 33263, "\u0120persisted": 33264, "pei": 33265, "olding": 33266, "\u0120Documents": 33267, "\u0120Hast": 33268, "\u0120CENT": 33269, "\u0120primer": 33270, "\u0120synonymous": 33271, "\u0120nib": 33272, "ombs": 33273, "\u0120notation": 33274, "\u0120Dish": 33275, "\u0120Atmosp": 33276, "\u0120forbid": 33277, "\u0120ANG": 33278, "pattern": 33279, "los": 33280, "\u0120projectiles": 33281, "brown": 33282, ".\",": 33283, "\u0120Venom": 33284, "\u0120fiercely": 33285, "ublished": 33286, "\u0120Uran": 33287, "\u0120Nicarag": 33288, "410": 33289, "\u0120CAL": 33290, "OTOS": 33291, "\u0120Miracle": 33292, "\u0120Enchant": 33293, "\u0120guarding": 33294, "append": 33295, "Attach": 33296, "\u0120leveled": 33297, "\u0120condoms": 33298, "ihilation": 33299, "649": 33300, "\u0120nightmares": 33301, "\u0120THEY": 33302, "\u0120START": 33303, "\u0120Kinn": 33304, "\u0120roommate": 33305, "\u0120hygiene": 33306, "opping": 33307, "Job": 33308, "\u0120lvl": 33309, "\u0120VER": 33310, "\u0120Keeping": 33311, "abetic": 33312, "\u0120formatting": 33313, "erala": 33314, "\u0120revisions": 33315, "\u0120resurg": 33316, "Tel": 33317, "\u0120Goodman": 33318, "353": 33319, "pod": 33320, "\u0120indisp": 33321, "\u0120Translation": 33322, "\u0120gown": 33323, "\u0120Mund": 33324, "\u0120cis": 33325, "\u0120bystand": 33326, "collect": 33327, "\u0120Punjab": 33328, "actively": 33329, "\u0120Gamb": 33330, "tell": 33331, "\u0120importing": 33332, "gencies": 33333, "\u0120locom": 33334, "\u0120Brill": 33335, "Holy": 33336, "\u0120Berger": 33337, "\u0120showdown": 33338, "\u0120responders": 33339, "ILY": 33340, "\u0120takedown": 33341, "leted": 33342, "\u0120mattered": 33343, "\u0120predictive": 33344, "\u0120overlay": 33345, "GPU": 33346, "\u0120Vick": 33347, "\u0120conveyed": 33348, "Tab": 33349, "peer": 33350, "Scan": 33351, "\u0120defensively": 33352, "vae": 33353, "\u0120approving": 33354, "\u0120tiers": 33355, "\u0120Via": 33356, "querade": 33357, "\u0120Saudis": 33358, "\u0120demolished": 33359, "\u0120Prophe": 33360, "\u0120mono": 33361, "\u0120hospitality": 33362, "HAM": 33363, "\u0120Ariel": 33364, "MOD": 33365, "\u0120Torah": 33366, "\u0120blah": 33367, "\u0120Belarus": 33368, "erential": 33369, "\u0120Tuc": 33370, "\u0120banker": 33371, "397": 33372, "\u0120mosquit": 33373, "\u0120Scientist": 33374, "\u0120Musical": 33375, "\u0120hust": 33376, "Shift": 33377, "\u0120torment": 33378, "\u0120standoff": 33379, "Educ": 33380, "\u0120Fog": 33381, "\u0120amplifier": 33382, "Shape": 33383, "Instance": 33384, "\u0120Critics": 33385, "\u0120daemon": 33386, "Houston": 33387, "\u0120mattress": 33388, "\u0120IDF": 33389, "\u0120obscene": 33390, "\u0120Amer": 33391, "hetti": 33392, "\u0120compiling": 33393, "352": 33394, "verett": 33395, "\u0120Reduction": 33396, "istration": 33397, "\u0120Blessed": 33398, "\u0120Bachelor": 33399, "316": 33400, "\u0120prank": 33401, "\u0120Vulcan": 33402, "dding": 33403, "\u0120mourning": 33404, "\u0120Quint": 33405, "\u0120Blaster": 33406, "testing": 33407, "\u0120sediment": 33408, ">>>": 33409, "\u0120Eternity": 33410, "\u0120WHERE": 33411, "\u0120Maze": 33412, "\u0120reacting": 33413, "\u0120Alv": 33414, "omsday": 33415, "\u0120CRA": 33416, "\u0120translator": 33417, "\u0120bogus": 33418, "atu": 33419, "Website": 33420, "olls": 33421, "\u0120baptism": 33422, "\u0120sibling": 33423, "\u0120Autumn": 33424, "vez": 33425, "\u00e3\u0123\u00ae\u00e9": 33426, "guards": 33427, "Georg": 33428, "assadors": 33429, "\u0120Freud": 33430, "\u0120continents": 33431, "\u0120Registry": 33432, "Bernie": 33433, "\u0138\u013c\u00e5\u00a3\u00ab": 33434, "\u0120tolerant": 33435, "\u0120UW": 33436, "\u0120horribly": 33437, "995": 33438, "\u0120MIDI": 33439, "\u0120impatient": 33440, "ocado": 33441, "eri": 33442, "\u0120Worst": 33443, "\u0120Norris": 33444, "\u0120Talking": 33445, "\u0120defends": 33446, "ensable": 33447, "\u01202021": 33448, "\u0120anatomy": 33449, "Lew": 33450, "\u0120drawer": 33451, "\u0120Canberra": 33452, "\u0120patriotic": 33453, "\u00e9\u00be\u012f\u00e5\u0138\u013c\u00e5\u00a3\u00ab": 33454, "\u0120Avg": 33455, "ARM": 33456, "\u0120undisclosed": 33457, "\u0120farewell": 33458, "459": 33459, "bable": 33460, "\u0120Allison": 33461, "OLOG": 33462, "\u0120conco": 33463, "tight": 33464, "\u0120ACPI": 33465, "\u0120Mines": 33466, "lich": 33467, "\u0120\u00e2\u0136\u013e": 33468, "represented": 33469, "200000": 33470, "\u0120enthusiast": 33471, "OTS": 33472, "bil": 33473, "\u0120Ingredients": 33474, "\u0120inventor": 33475, "\u0120MySQL": 33476, "\u00c2\u0142\u00c2\u0142\u00c2\u0142": 33477, "\u0120ABOUT": 33478, "within": 33479, "\u0120mk": 33480, "Bul": 33481, "\u0120Fake": 33482, "\u0120draconian": 33483, "Wa": 33484, "helm": 33485, "\u0120Terran": 33486, "erville": 33487, "\u0120commonplace": 33488, "SIZE": 33489, "\u0120\"<": 33490, "replace": 33491, "ographs": 33492, "\u0120SELECT": 33493, "incible": 33494, "\u0120Mostly": 33495, "\u0120Sheffield": 33496, "\u0120IDE": 33497, "uggle": 33498, "\u0120citations": 33499, "hurst": 33500, "\u0120Unix": 33501, "\u0120unleash": 33502, "\u0120Piper": 33503, "\u0120Nano": 33504, "\u0120succumb": 33505, "\u0120reluctance": 33506, "\u01202500": 33507, "\u0120Merchant": 33508, "\u0120wiret": 33509, "\u0120combos": 33510, "\u0120Birthday": 33511, "\u0120charcoal": 33512, "\u0120UPS": 33513, "\u0120Fairfax": 33514, "\u0120driveway": 33515, "\u0120Tek": 33516, "\u0120Pitch": 33517, "overe": 33518, "\u0120technicians": 33519, "\u0120Actual": 33520, "flation": 33521, "\u0120Fiscal": 33522, "\u0120Empty": 33523, "anamo": 33524, "\u0120magnesium": 33525, "\u0120slut": 33526, "\u0120growers": 33527, "Investigators": 33528, "():": 33529, "\u0120Satellite": 33530, "\u0120Keynes": 33531, "missive": 33532, "lane": 33533, "\u0120borough": 33534, "344": 33535, "\u0120TEAM": 33536, "\u0120Bethesda": 33537, "CV": 33538, "hower": 33539, "\u0120RAD": 33540, "\u0120chant": 33541, "\u0120Riy": 33542, "\u0120compositions": 33543, "\u0120mildly": 33544, "\u0120meddling": 33545, "\u0120agility": 33546, "aneers": 33547, "501": 33548, "\u0120synth": 33549, "linger": 33550, "291": 33551, "\u0120exclaimed": 33552, "Party": 33553, "\u0120contamin": 33554, "\u0120Manor": 33555, "\u0120Respond": 33556, "\u0120praising": 33557, "\u0120manners": 33558, "fleet": 33559, "Summer": 33560, "\u0120Lynd": 33561, "\u0120Definitely": 33562, "grim": 33563, "\u0120bowling": 33564, "stri": 33565, "\u00e7\u013d": 33566, "ynt": 33567, "\u0120mandates": 33568, "DIV": 33569, "\u0120reconcile": 33570, "views": 33571, "\u0120Damon": 33572, "vette": 33573, "Flo": 33574, "\u0120Greatest": 33575, "ilon": 33576, "icia": 33577, "\u0120portrayal": 33578, "\u0120cushion": 33579, "504": 33580, "1979": 33581, "ossal": 33582, "Applic": 33583, "scription": 33584, "\u0120mitigation": 33585, "ATS": 33586, "pac": 33587, "\u0120erased": 33588, "\u0120deficiencies": 33589, "\u0120Hollande": 33590, "\u0120Xu": 33591, "\u0120bred": 33592, "\u0120pregnancies": 33593, "femin": 33594, "\u0120emph": 33595, "\u0120planners": 33596, "\u0120outper": 33597, "uttering": 33598, "\u0120perpetrator": 33599, "\u0120motto": 33600, "\u0120Ellison": 33601, "\u0120NEVER": 33602, "\u0120admittedly": 33603, "ARI": 33604, "\u0120Azerbaijan": 33605, "\u0120millisec": 33606, "\u0120combustion": 33607, "\u0120Bottle": 33608, "\u0120Lund": 33609, "\u0120Ps": 33610, "\u0120Dress": 33611, "\u0120fabricated": 33612, "\u0120battered": 33613, "\u0120sidel": 33614, "\u0120Notting": 33615, "Foreign": 33616, "\u0120Jerome": 33617, "020": 33618, "\u0120Arbit": 33619, "\u0120knots": 33620, "\u0120RIGHT": 33621, "Moving": 33622, "\u00e3\u0123\u013b": 33623, "\u0120surgeries": 33624, "\u0120courthouse": 33625, "\u0120mastered": 33626, "\u0120hovering": 33627, "\u0120Bran": 33628, "\u0120Alison": 33629, "\u0120safest": 33630, "military": 33631, "\u0120bullied": 33632, "\u0120barrage": 33633, "Reader": 33634, "ESE": 33635, "\u0120Geographic": 33636, "Tools": 33637, "314": 33638, "\u0120Geek": 33639, "roth": 33640, "glers": 33641, "\u0120FIN": 33642, "\u00cf\u0123": 33643, "\u0120Aston": 33644, "altern": 33645, "488": 33646, "\u0120veterin": 33647, "Gamer": 33648, "\u0120intel": 33649, "renches": 33650, "Shield": 33651, "\u0120amnesty": 33652, "\u0120Bhar": 33653, "\u0120piled": 33654, "\u0120honorable": 33655, "\u0120Institutes": 33656, "\u0120soaked": 33657, "\u0120coma": 33658, "\u0120EFF": 33659, "341": 33660, "bytes": 33661, "\u0120Gmail": 33662, "lein": 33663, "\u0120Canadiens": 33664, "material": 33665, "Il": 33666, "\u0120instructors": 33667, "\u0120KY": 33668, "\u0120conceive": 33669, "ubb": 33670, "\u0120Possible": 33671, "\u0120easing": 33672, "\u0120Christina": 33673, "\u0120caric": 33674, "\u0120HDR": 33675, "ROM": 33676, "\u0120shovel": 33677, "delete": 33678, "\u0120puff": 33679, "\u0120Changing": 33680, "\u0120seamlessly": 33681, "Attribute": 33682, "\u0120acquisitions": 33683, "akery": 33684, "\u0120EF": 33685, "\u0120autistic": 33686, "\u0120Takes": 33687, "\u0120Powder": 33688, "\u0120Stir": 33689, "510": 33690, "\u0120Bubble": 33691, "settings": 33692, "\u0120Fowler": 33693, "\u0120mustard": 33694, "\u0120moreover": 33695, "\u0120copyrighted": 33696, "\u0120LEDs": 33697, "1500": 33698, "\u00e6\u012b": 33699, "\u0120HIS": 33700, "enf": 33701, "\u0120custod": 33702, "\u0120Huck": 33703, "Gi": 33704, "\u0120img": 33705, "Answer": 33706, "Ct": 33707, "jay": 33708, "\u0120Infrastructure": 33709, "\u0120federally": 33710, "Loc": 33711, "\u0120microbes": 33712, "\u0120overrun": 33713, "dds": 33714, "otent": 33715, "adiator": 33716, ">>>>>>>>": 33717, "\u0120tornado": 33718, "\u0120adjud": 33719, "\u0120intrigued": 33720, "\u0120si": 33721, "\u0120Revelation": 33722, "progress": 33723, "\u0120burglary": 33724, "\u0120Saiyan": 33725, "\u0120Kathy": 33726, "\u0120serpent": 33727, "\u0120Andreas": 33728, "\u0120compel": 33729, "essler": 33730, "\u0120Plastic": 33731, "\u0120Advent": 33732, "\u0120Positive": 33733, "\u0120Qt": 33734, "\u0120Hindus": 33735, "registered": 33736, "ularity": 33737, "\u0120righteousness": 33738, "\u0120demonic": 33739, "uitive": 33740, "\u0120BDS": 33741, "\u0120Gregg": 33742, "cia": 33743, "\u0120Crusade": 33744, "\u0120Sinai": 33745, "WARE": 33746, "+(": 33747, "\u0120mell": 33748, "\u0120derail": 33749, "yards": 33750, "Ast": 33751, "\u0120noticeably": 33752, "\u0120Ober": 33753, "Ram": 33754, "\u0120unnoticed": 33755, "\u0120seq": 33756, "avage": 33757, "Ts": 33758, "\u0120640": 33759, "\u0120concede": 33760, "\u0120])": 33761, "Fill": 33762, "\u0120captivity": 33763, "\u0120Improvement": 33764, "\u0120Crusader": 33765, "araoh": 33766, "MAP": 33767, "\u00e6\u0139": 33768, "\u0120stride": 33769, "always": 33770, "Fly": 33771, "Nit": 33772, "\u0120algae": 33773, "\u0120Cooking": 33774, "\u0120Doors": 33775, "Malley": 33776, "\u0120policemen": 33777, "\u00e3\u0123\u012f": 33778, "\u0120astronaut": 33779, "accessible": 33780, "495": 33781, "\u0120RAW": 33782, "cliffe": 33783, "udicrous": 33784, "\u0120depended": 33785, "alach": 33786, "\u0120ventures": 33787, "rake": 33788, "\u0120tits": 33789, "\u0120Hou": 33790, "\u0120condom": 33791, "ormonal": 33792, "\u0120indent": 33793, "\u0120uploading": 33794, "Footnote": 33795, "Important": 33796, "\u0120271": 33797, "\u0120mindful": 33798, "\u0120contends": 33799, "Cra": 33800, "\u0120calibr": 33801, "\u0120OECD": 33802, "plugin": 33803, "Fat": 33804, "\u0120ISS": 33805, "\u0120Dynamics": 33806, "ansen": 33807, "686": 33808, "'),": 33809, "\u0120sprite": 33810, "\u0120handheld": 33811, "\u0120Hipp": 33812, "=~=~": 33813, "Trust": 33814, "\u0120semantics": 33815, "\u0120Bundes": 33816, "\u0120Reno": 33817, "\u0120Literature": 33818, "sense": 33819, "Gary": 33820, "\u0120Aeg": 33821, "\u0120Trin": 33822, "EEK": 33823, "\u0120cleric": 33824, "\u0120SSH": 33825, "\u0120christ": 33826, "\u0120invading": 33827, "ibu": 33828, "\u0120enum": 33829, "aura": 33830, "\u0120allege": 33831, "\u0120Incredible": 33832, "BBC": 33833, "\u0120thru": 33834, "\u0120sailed": 33835, "\u0120emulate": 33836, "\u0120insecurity": 33837, "\u0120crou": 33838, "\u0120accommodations": 33839, "\u0120incompetent": 33840, "\u0120slips": 33841, "\u0120Earthqu": 33842, "sama": 33843, "ILLE": 33844, "\u0120iPhones": 33845, "asaki": 33846, "\u0120bye": 33847, "\u0120ard": 33848, "\u0120extras": 33849, "\u0120slaughtered": 33850, "\u0120crowdfunding": 33851, "resso": 33852, "\u0120filib": 33853, "\u0120ERROR": 33854, "\u0120TLS": 33855, "egg": 33856, "\u0120Ital": 33857, "\u0120enlist": 33858, "\u0120Catalonia": 33859, "\u0120Scots": 33860, "\u0120sergeant": 33861, "\u0120dissolve": 33862, "NH": 33863, "\u0120standings": 33864, "rique": 33865, "IQ": 33866, "\u0120beneficiary": 33867, "\u0120aquarium": 33868, "YouTube": 33869, "\u0120PowerShell": 33870, "\u0120brightest": 33871, "\u0120Warrant": 33872, "Sold": 33873, "Writing": 33874, "\u0120beginnings": 33875, "\u0120Reserved": 33876, "\u0120Latinos": 33877, "heading": 33878, "\u0120440": 33879, "\u0120rooftop": 33880, "ATING": 33881, "\u0120390": 33882, "VPN": 33883, "Gs": 33884, "kernel": 33885, "turned": 33886, "\u0120preferable": 33887, "\u0120turnovers": 33888, "\u0120Hels": 33889, "Sa": 33890, "\u0120Shinji": 33891, "veh": 33892, "\u0120MODULE": 33893, "Viol": 33894, "\u0120exiting": 33895, "\u0120jab": 33896, "\u0120Vanilla": 33897, "\u0120acron": 33898, "\u0120Gap": 33899, "bern": 33900, "Ak": 33901, "\u0120McGu": 33902, "\u0120endlessly": 33903, "\u0120Farage": 33904, "\u0120Noel": 33905, "Va": 33906, "MK": 33907, "\u0120brute": 33908, "\u0120Kru": 33909, "\u0120ESV": 33910, "\u0120Olivia": 33911, "\u00e2\u0122\u0142": 33912, "\u0120Kaf": 33913, "\u0120trusting": 33914, "\u0120hots": 33915, "324": 33916, "\u0120malaria": 33917, "\u0120json": 33918, "\u0120pounding": 33919, "ortment": 33920, "Country": 33921, "\u0120postponed": 33922, "\u0120unequiv": 33923, "?),": 33924, "\u0120Rooney": 33925, "udding": 33926, "\u0120Leap": 33927, "urrence": 33928, "shapeshifter": 33929, "\u0120HAS": 33930, "osate": 33931, "\u0120cavern": 33932, "\u0120conservatism": 33933, "\u0120BAD": 33934, "\u0120mileage": 33935, "\u0120arresting": 33936, "Vaults": 33937, "\u0120mixer": 33938, "Democratic": 33939, "\u0120Benson": 33940, "\u0120authored": 33941, "8000": 33942, "\u0120proactive": 33943, "\u0120Spiritual": 33944, "tre": 33945, "\u0120incarcerated": 33946, "\u0120Sort": 33947, "\u0120peaked": 33948, "\u0120wielding": 33949, "reciation": 33950, "\u00d7\u013b\u00d7": 33951, "Patch": 33952, "\u0120Emmy": 33953, "\u0120exqu": 33954, "tto": 33955, "\u0120Ratio": 33956, "\u0120Picks": 33957, "\u0120Gry": 33958, "phant": 33959, "\u0120fret": 33960, "\u0120ethn": 33961, "\u0120archived": 33962, "%-": 33963, "cases": 33964, "\u0120Blaze": 33965, "\u0120imb": 33966, "cv": 33967, "yss": 33968, "imony": 33969, "\u0120countdown": 33970, "\u0120awakening": 33971, "\u0120Tunisia": 33972, "\u0120Refer": 33973, "\u0120MJ": 33974, "\u0120unnatural": 33975, "\u0120Carnegie": 33976, "izen": 33977, "\u0120Nuggets": 33978, "hess": 33979, "\u0120evils": 33980, "647": 33981, "\u0120introductory": 33982, "loving": 33983, "\u0120McMahon": 33984, "\u0120ambiguity": 33985, "Label": 33986, "\u0120Almighty": 33987, "\u0120coloring": 33988, "\u0120Claus": 33989, "setting": 33990, "NULL": 33991, "\u0120Favorite": 33992, "\u0120SIG": 33993, ">(": 33994, "\u0120Shiva": 33995, "\u0120Mayer": 33996, "\u0120stormed": 33997, "\u0120Coverage": 33998, "weapons": 33999, "igham": 34000, "\u0120unanswered": 34001, "\u0120leve": 34002, "\u0120coy": 34003, "cas": 34004, "bags": 34005, "asured": 34006, "Seattle": 34007, "\u0120Santorum": 34008, "serious": 34009, "\u0120courageous": 34010, "\u0120Soup": 34011, "\u0120confiscated": 34012, "\u0120///": 34013, "\u0120unconventional": 34014, "\u0120moms": 34015, "\u0120Rohingya": 34016, "\u0120Orchestra": 34017, "\u0120Potion": 34018, "\u0120discredit": 34019, "\u0120FIL": 34020, "fixed": 34021, "\u0120Deer": 34022, "doi": 34023, "\u0120Dimension": 34024, "\u0120bureaucrats": 34025, "eteen": 34026, "\u0120actionGroup": 34027, "ohm": 34028, "\u0120bumps": 34029, "\u0120Utility": 34030, "\u0120submarines": 34031, "renheit": 34032, "research": 34033, "\u0120Shapiro": 34034, "\u0120sketches": 34035, "\u0120deceptive": 34036, "\u0120Vil": 34037, "esame": 34038, "\u0120Essentially": 34039, "\u0120rampage": 34040, "isky": 34041, "\u0120muttered": 34042, "thritis": 34043, "\u0120236": 34044, "fet": 34045, "bars": 34046, "\u0120pupil": 34047, "\u0120Thou": 34048, "oS": 34049, "song": 34050, "\u0120fractured": 34051, "\u0120revert": 34052, "picture": 34053, "\u0120criterion": 34054, "usher": 34055, "\u0120repercussions": 34056, "\u0120Vintage": 34057, "\u0120Superintendent": 34058, "Officers": 34059, "\u0120flagged": 34060, "\u0120blames": 34061, "\u0120inverse": 34062, "ographers": 34063, "\u0120makeshift": 34064, "\u0120devoid": 34065, "\u0120fossils": 34066, "\u0120Aristotle": 34067, "\u0120Funds": 34068, "\u0120depleted": 34069, "\u0120Flu": 34070, "\u0120Yuan": 34071, "\u0120woes": 34072, "\u0120lipid": 34073, "\u0120situ": 34074, "requisites": 34075, "\u0120furnish": 34076, "\u0120Samar": 34077, "\u0120shameful": 34078, "\u0120adversely": 34079, "\u0120adept": 34080, "\u0120remorse": 34081, "\u0120murderous": 34082, "uckles": 34083, "\u0120ESL": 34084, "\u0120314": 34085, "sent": 34086, "\u0120redef": 34087, "\u0120Cache": 34088, "\u0120Purs": 34089, "igans": 34090, "\u0120460": 34091, "\u0120prescriptions": 34092, "\u0120fres": 34093, "Fuck": 34094, "ocrates": 34095, "Twenty": 34096, "\u0120Weird": 34097, "\u0120Toggle": 34098, "\u0120Called": 34099, "itizens": 34100, "\u0120poultry": 34101, "\u0120harvesting": 34102, "\u00e3\u0124\u00a6\u00e3\u0124\u00b9": 34103, "Bottom": 34104, "\u0120cautioned": 34105, "tn": 34106, "396": 34107, "\u0120Nikki": 34108, "\u0120evaluations": 34109, "\u0120harassing": 34110, "\u0120bindings": 34111, "\u0120Monetary": 34112, "\u0120hitters": 34113, "\u0120adversary": 34114, "unts": 34115, "\u0120setback": 34116, "\u0120encrypt": 34117, "\u0120Cait": 34118, "\u0120lows": 34119, "enges": 34120, "\u0120Norn": 34121, "\u0120bulbs": 34122, "\u0120bottled": 34123, "\u0120Voyager": 34124, "317": 34125, "\u0120spheres": 34126, "politics": 34127, "\u0120subtract": 34128, "\u0120sensations": 34129, "\u0120appalling": 34130, "\u0120316": 34131, "\u0120environmentally": 34132, "\u0120STEM": 34133, "\u0120publishes": 34134, "560": 34135, "\u0120diligence": 34136, "484": 34137, "\u0120advises": 34138, "\u0120petrol": 34139, "\u0120imagining": 34140, "\u0120patrols": 34141, "\u0120Integer": 34142, "\u0120Ashes": 34143, "actus": 34144, "\u0120Radiant": 34145, "\u0120LT": 34146, "itability": 34147, "htaking": 34148, "Setting": 34149, "\u0120nuanced": 34150, "\u0120Reef": 34151, "\u0120Developers": 34152, "Ni": 34153, "pieces": 34154, "990": 34155, "License": 34156, "\u0120lowers": 34157, "\u0120Ottoman": 34158, "327": 34159, "ooo": 34160, "\u0120quitting": 34161, "markets": 34162, "Behind": 34163, "\u0120basin": 34164, "\u0120docs": 34165, "anie": 34166, "flash": 34167, "ctl": 34168, "\u0120civilized": 34169, "\u0120Fukushima": 34170, "\"],\"": 34171, "\u0120KS": 34172, "\u0120Honestly": 34173, "arat": 34174, "\u0120constructs": 34175, "\u0120Lans": 34176, "\u0120Dire": 34177, "\u0120LIKE": 34178, "\u0120Trouble": 34179, "\u0120withholding": 34180, "\u0120Oblivion": 34181, "\u0120sanity": 34182, "anya": 34183, "Const": 34184, "\u0120grocer": 34185, "\u0120Celsius": 34186, "\u0120recounted": 34187, "\u0120Wife": 34188, "Border": 34189, "atered": 34190, "happy": 34191, "\u0120spoiler": 34192, "\u0120logically": 34193, "Hall": 34194, "\u0120succeeding": 34195, "\u0120polymorph": 34196, "\u0120axes": 34197, "\u0120Shotgun": 34198, "\u0120Slim": 34199, "\u0120Principles": 34200, "\u0120Leth": 34201, "arta": 34202, "\u0120scor": 34203, "Screenshot": 34204, "\u0120relaxation": 34205, "#$#$": 34206, "\u0120deterrent": 34207, "iddy": 34208, "\u0120powerless": 34209, "\u0120lesbians": 34210, "\u0120chords": 34211, "\u0120Edited": 34212, "selected": 34213, "\u0120separatists": 34214, "0002": 34215, "\u0120airspace": 34216, "\u0120turnaround": 34217, "\u0120cunning": 34218, "PATH": 34219, "Poly": 34220, "\u0120bombed": 34221, "\u0120tion": 34222, "xs": 34223, "\u0120withhold": 34224, "\u0120waged": 34225, "\u0120Liberties": 34226, "Flag": 34227, "\u0120comforting": 34228, "454": 34229, "\u0120Iris": 34230, "arers": 34231, "\u0120rag": 34232, "\u0120relocated": 34233, "\u0120Guarant": 34234, "\u0120strategically": 34235, "\u0120gamma": 34236, "uberty": 34237, "\u0120Lockheed": 34238, "gres": 34239, "\u0120grilled": 34240, "\u0120Lowe": 34241, "stats": 34242, "\u0120Rocks": 34243, "\u0120sensing": 34244, "\u0120renting": 34245, "\u0120Geological": 34246, "\u00d8\u00a7\u00d8": 34247, "otrop": 34248, "\u0120sew": 34249, "\u0120improperly": 34250, "486": 34251, "\u0120\u00e2\u0138\u0142": 34252, "\u0120starving": 34253, "\u0120Bj": 34254, "Discussion": 34255, "328": 34256, "\u0120Combo": 34257, "\u0120Fixes": 34258, "NAT": 34259, "\u0120striving": 34260, "thora": 34261, "\u0120harvested": 34262, "\u0120Ping": 34263, "\u0120playful": 34264, "\u0120avenues": 34265, "\u0120occupational": 34266, "\u0120wakes": 34267, "\u0120Courier": 34268, "\u0120drummer": 34269, "\u0120Browser": 34270, "\u0120Houth": 34271, "itu": 34272, "\u0120apparel": 34273, "paste": 34274, "\u0120hunted": 34275, "\u0120Secondly": 34276, "lain": 34277, "XY": 34278, "\u0120PIN": 34279, "icons": 34280, "\u0120cocktails": 34281, "\u0120sizable": 34282, "\u0120hurdles": 34283, "estinal": 34284, "\u0120Recreation": 34285, "\u0120eco": 34286, "648": 34287, "\u0120Died": 34288, "mint": 34289, "\u0120fingerprints": 34290, "\u0120dispose": 34291, "\u0120Bosnia": 34292, "tsy": 34293, "2200": 34294, "\u0120inspected": 34295, "\u0120Fou": 34296, "\u0120fuss": 34297, "\u0120ambush": 34298, "\u0120Rak": 34299, "\u0120manifested": 34300, "Prosecut": 34301, "\u0120suffice": 34302, "rences": 34303, "\u0120compensated": 34304, "\u0120Cyrus": 34305, "\u0120genus": 34306, "\u0120Wolverine": 34307, "\u0120Trends": 34308, "\u0120hikes": 34309, "\u0120Seen": 34310, "\u0120enrol": 34311, "Cold": 34312, "\u0120politely": 34313, "\u0120Slav": 34314, "\u0120Rupert": 34315, "\u0120eyewitness": 34316, "\u0120Alto": 34317, "\u0120uncomp": 34318, "\u0120posterior": 34319, "Must": 34320, "\u0120Herz": 34321, "\u0120progressively": 34322, "\u0120234": 34323, "\u0120indifference": 34324, "\u0120Cunningham": 34325, "\u0120academia": 34326, "\u0120sewer": 34327, "\u0120astounding": 34328, "\u0120AES": 34329, "rather": 34330, "\u0120eldest": 34331, "\u0120climbs": 34332, "\u0120Adds": 34333, "\u0120outcry": 34334, "\u0120contag": 34335, "\u0120Houses": 34336, "\u0120pept": 34337, "\u0120Melania": 34338, "interested": 34339, "\u0120UCH": 34340, "\u0120Roots": 34341, "\u0120Hubbard": 34342, "\u0120TBD": 34343, "\u0120Romanian": 34344, "filename": 34345, "Stone": 34346, "\u0120Impl": 34347, "\u0120chromosome": 34348, "Cle": 34349, "dx": 34350, "\u0120scrambled": 34351, "\u0120Pt": 34352, "\u0120242": 34353, "OPLE": 34354, "\u0120tremendously": 34355, "Street": 34356, "\u0120craving": 34357, "\u0120bundled": 34358, "\u0120RG": 34359, "pipe": 34360, "\u0120injuring": 34361, "\u0120arcane": 34362, "Particip": 34363, "\u0120Heroic": 34364, "sty": 34365, "\u0120topping": 34366, "\u0120Tempest": 34367, "rentices": 34368, "bh": 34369, "\u0120paranoia": 34370, "\u0120Unicode": 34371, "\u0120egregious": 34372, "\u0120\\'": 34373, "\u0120Oswald": 34374, "\u0120gravel": 34375, "\u0120Simpsons": 34376, "\u0120bland": 34377, "\u0120Guantanamo": 34378, "Writer": 34379, "liners": 34380, "\u0120Dice": 34381, "JC": 34382, "\u0120parity": 34383, "\u0120sided": 34384, "\u0120237": 34385, "\u0120Pyrrha": 34386, "atters": 34387, "dk": 34388, "Fine": 34389, "compan": 34390, "\u0120formulated": 34391, "\u0120Idol": 34392, "ilers": 34393, "hemoth": 34394, "\u0120Fav": 34395, "\u0120intrusion": 34396, "\u0120carrots": 34397, "\u0120Layer": 34398, "\u0120Hacker": 34399, "\u0120----------------": 34400, "\u0120moderation": 34401, "\u00e9\u0123": 34402, "ococ": 34403, "\u0120characterize": 34404, "\u0120Teresa": 34405, "\u0120socioeconomic": 34406, "\u0120perk": 34407, "\u0120Participation": 34408, "training": 34409, "\u0120Paulo": 34410, "phys": 34411, "\u0120trustworthy": 34412, "\u0120embodied": 34413, "\u0120Merch": 34414, "currency": 34415, "\u0120Priority": 34416, "\u0120teasing": 34417, "\u0120absorbing": 34418, "\u0120unfinished": 34419, "\u0120Comparison": 34420, "\u0120disple": 34421, "writers": 34422, "\u0120professions": 34423, "\u0120Penguin": 34424, "\u0120angrily": 34425, "\u0120LINK": 34426, "688": 34427, "\u0120Correspond": 34428, "\u0120prevailed": 34429, "\u0120cartel": 34430, "lp": 34431, "asms": 34432, "\u0120Redemption": 34433, "\u0120Islamists": 34434, "effects": 34435, "dose": 34436, "\u0120Latter": 34437, "\u0120Halifax": 34438, "\u0120vas": 34439, "\u0120Topics": 34440, "\u0120Named": 34441, "advertising": 34442, "zza": 34443, "ICES": 34444, "\u0120retarded": 34445, "achable": 34446, "\u0120Puppet": 34447, "\u0120ItemLevel": 34448, "\u0120retract": 34449, "\u0120identifiable": 34450, "Aaron": 34451, "\u0120Buster": 34452, "sol": 34453, "helle": 34454, "assemb": 34455, "Hope": 34456, "ranged": 34457, "Ba": 34458, "\u0120Purch": 34459, "\u00e9\u0122": 34460, "\u0120Siri": 34461, "\u0120arrivals": 34462, "\u01201912": 34463, "\u0120shortened": 34464, "\u0120312": 34465, "\u0120discrepancy": 34466, "\u0120Temperature": 34467, "\u0120Walton": 34468, "\u0120kinderg": 34469, "polit": 34470, "\u0120remix": 34471, "\u0120connectors": 34472, "\u00e3\u0125\u013a\u00e3\u0125\u00a9": 34473, "\u0120Kazakhstan": 34474, "dominated": 34475, "\u0120sugars": 34476, "imble": 34477, "\u0120Panic": 34478, "\u0120Demand": 34479, "\u0120Colony": 34480, "onen": 34481, "\u0120MER": 34482, "775": 34483, "uria": 34484, "azaar": 34485, "\u0120Degree": 34486, "Pri": 34487, "\u0120sunshine": 34488, "\u0120251": 34489, "\u0120psychedelic": 34490, "\u0120digitally": 34491, "\u0120Braun": 34492, "\u0120shimmer": 34493, "\u0120shave": 34494, "\u0120Telesc": 34495, "\u0120Astral": 34496, "\u0120Venezuelan": 34497, "\u0120OG": 34498, "\u0120crawling": 34499, "Integ": 34500, "\u0120Feather": 34501, "\u0120unfolding": 34502, "\u0120appropriation": 34503, "\u0120\u00e8\u00a3\u0131\u00e8": 34504, "\u0120Mobility": 34505, "\u0120Ney": 34506, "-.": 34507, "bilt": 34508, "LIN": 34509, "\u0120Tube": 34510, "\u0120Conversely": 34511, "\u0120keyboards": 34512, "\u0120Cao": 34513, "\u0120overth": 34514, "\u0120laure": 34515, ">>\\": 34516, "\u0120Viper": 34517, "acha": 34518, "Offset": 34519, "\u0120Raleigh": 34520, "\u0120Jae": 34521, "Jordan": 34522, "jp": 34523, "\u0120totalitarian": 34524, "Connector": 34525, "\u0120observes": 34526, "\u0120Spartan": 34527, "\u0120Immediately": 34528, "\u0120Scal": 34529, "Cool": 34530, "\u0120taps": 34531, "\u0120roar": 34532, "Past": 34533, "\u0120chars": 34534, "\u0120Bender": 34535, "\u0120Sheldon": 34536, "\u0120painter": 34537, "\u0120beacon": 34538, "\u0120Creatures": 34539, "\u0120downturn": 34540, "\u0120hinder": 34541, "\u0120Andromeda": 34542, "\u00c3\u013d": 34543, "ccoli": 34544, "\u0120Fitness": 34545, "etrical": 34546, "\u0120utilizes": 34547, "\u0120senate": 34548, "\u0120ensemble": 34549, "\u0120cheers": 34550, "TW": 34551, "\u0120affluent": 34552, "kil": 34553, "rylic": 34554, "ordering": 34555, "Computer": 34556, "\u0120gruesome": 34557, "ostics": 34558, "\u0120Ubisoft": 34559, "\u0120Kelley": 34560, "\u0120wrench": 34561, "\u0120bourgeoisie": 34562, "IBLE": 34563, "\u0120Preston": 34564, "worn": 34565, "arist": 34566, "reating": 34567, "\u0120stained": 34568, "arine": 34569, "\u0120slime": 34570, "ENN": 34571, "\u0120chests": 34572, "\u0120groundwater": 34573, "annot": 34574, "\u0120Tray": 34575, "\u0120Locke": 34576, "\u0120CTR": 34577, "\u0120dudes": 34578, "\u0120External": 34579, "\u0120Decoder": 34580, "\u0120paramed": 34581, "\u0120Medline": 34582, "809": 34583, "\u0120Dinner": 34584, "rupal": 34585, "gz": 34586, "\u0120Gum": 34587, "\u0120Demo": 34588, "jee": 34589, "\u0120dh": 34590, "berman": 34591, "archs": 34592, "\u0120enqu": 34593, "\u0120Epstein": 34594, "\u0120devastation": 34595, "\u0120friendships": 34596, "\u0120Ard": 34597, "\u0120231": 34598, "\u0120Rubin": 34599, "\u0120Distance": 34600, "\u0120spurred": 34601, "\u0120dossier": 34602, "\u0120overlooking": 34603, "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\": 34604, "Forest": 34605, "\u0120Comes": 34606, "\\\",": 34607, "\u0120Iranians": 34608, "\u0120fixtures": 34609, "Laughs": 34610, "\u0120curry": 34611, "\u0120Kingston": 34612, "\u0120squash": 34613, "\u0120catalogue": 34614, "\u0120abnormalities": 34615, "\u0120digestive": 34616, ".........": 34617, "\u0120subordinate": 34618, "ogly": 34619, "\u0120249": 34620, "Middle": 34621, "\u0120massac": 34622, "\u0120burgers": 34623, "\u0120downstairs": 34624, "\u01201931": 34625, "394": 34626, "\u0120VG": 34627, "\u0120lasers": 34628, "\u0120Sikh": 34629, "\u0120Alexa": 34630, "derived": 34631, "\u0120cyclist": 34632, "\u00e3\u0123\u00ae\u00e9\u0143\u0136": 34633, "oneliness": 34634, "!!!!!!!!": 34635, "\u0120buffs": 34636, "legate": 34637, "\u0120raping": 34638, "\u0120recommending": 34639, "rored": 34640, "\u0120multicultural": 34641, "unique": 34642, "\u0120businessmen": 34643, "\u0120uneasy": 34644, "\u0120MAP": 34645, "\u0120dispersed": 34646, "cipline": 34647, "Jess": 34648, "\u0120Kerala": 34649, "\u00e5\u00a7": 34650, "\u0120abstraction": 34651, "Surv": 34652, "Uh": 34653, "\u0120printers": 34654, "ija": 34655, "owder": 34656, "\u0120analogous": 34657, "\u0120ASP": 34658, "afer": 34659, "\u0120unfolded": 34660, "\u0120leveling": 34661, "\u0120breached": 34662, "\u0120Hearing": 34663, "\u0120nat": 34664, "\u0120translating": 34665, "critical": 34666, "\u0120antagonist": 34667, "\u0120Yesterday": 34668, "\u0120fuzzy": 34669, "wash": 34670, "mere": 34671, "\u0120bewild": 34672, "\u0120Mae": 34673, "Virgin": 34674, "phrase": 34675, "\u0120signaled": 34676, "\u0120HIGH": 34677, "\u0120protester": 34678, "\u0120garner": 34679, "unknown": 34680, "\u0120kay": 34681, "\u0120abducted": 34682, "\u0120stalking": 34683, "amn": 34684, "\u0120deserving": 34685, "\u0120Riv": 34686, "\u0120Jorge": 34687, "\u0120scratching": 34688, "\u0120Saving": 34689, "iping": 34690, "\u0120tease": 34691, "\u0120missionary": 34692, "\u0120Morrow": 34693, "TIME": 34694, "Present": 34695, "\u0120chemotherapy": 34696, "terness": 34697, "\u0120Homes": 34698, "\u0120Purdue": 34699, "\u0120staunch": 34700, "\u0120Whitney": 34701, "\u0120THERE": 34702, "\u00ce\u00bc": 34703, "iatus": 34704, "\u0120Ernest": 34705, "\u0120Deploy": 34706, "\u0120coveted": 34707, "FML": 34708, "\u0120Dialogue": 34709, "\u0120exited": 34710, "fruit": 34711, "\u0120nerd": 34712, "\":\"\",\"": 34713, "\u0120vivo": 34714, "ruly": 34715, "460": 34716, "\u0120Amen": 34717, "rehensible": 34718, "\u0120\u00e2\u013a": 34719, "DIR": 34720, "\u0120adherence": 34721, "\u0120chew": 34722, "\u0120Coke": 34723, "\u0120Sergei": 34724, "digital": 34725, "\u0120Neck": 34726, "gently": 34727, "enthal": 34728, "/)": 34729, "\u0120weary": 34730, "\u0120guise": 34731, "\u0120Concord": 34732, "\u0120Onion": 34733, "atcher": 34734, "\u0120binge": 34735, "\u0120Directive": 34736, "\u0120manned": 34737, "ansk": 34738, "\u0120illusions": 34739, "\u0120billionaires": 34740, "383": 34741, "olyn": 34742, "odynamic": 34743, "\u0120Wheat": 34744, "\u0120Alic": 34745, "\u0120coloured": 34746, "\u0120NAFTA": 34747, "abo": 34748, "\u0120macros": 34749, "independent": 34750, "sweet": 34751, "\u0120spac": 34752, "\u0120Kabul": 34753, "\u0120\u00c4": 34754, "eme": 34755, "\u0120dictated": 34756, "\u0120shouts": 34757, "={": 34758, "\u0120ripping": 34759, "\u0120Shay": 34760, "\u0120Cricket": 34761, "directed": 34762, "\u0120analysed": 34763, "\u0120WARRANT": 34764, "agons": 34765, "\u0120Blazers": 34766, "\u0120cheered": 34767, "\u0120arithmetic": 34768, "\u0120Tanz": 34769, "373": 34770, "\u0120Flags": 34771, "\u0120295": 34772, "\u0120witches": 34773, "\u0120Included": 34774, "\u0120Gained": 34775, "\u0120Blades": 34776, "Gam": 34777, "\u0120Samantha": 34778, "\u0120Atlantis": 34779, "\u0120Pratt": 34780, "\u0120spoiled": 34781, "\u0120IB": 34782, "\u0120Ramirez": 34783, "Probably": 34784, "rero": 34785, "\u0120Ng": 34786, "\u0120Warlock": 34787, "tp": 34788, "\u0120overhe": 34789, "\u0120administrations": 34790, "\u0120tint": 34791, "\u0120regiment": 34792, "\u0120pistols": 34793, "\u0120blankets": 34794, "\u0120epist": 34795, "\u0120bowls": 34796, "\u0120hydraulic": 34797, "\u0120dean": 34798, "\u0120jung": 34799, "\u0120ascend": 34800, "705": 34801, "\u0120Santiago": 34802, "\u00c3\u00ae": 34803, "\u0120unavoid": 34804, "\u0120Shaman": 34805, "reb": 34806, "\u0120stemming": 34807, "998": 34808, "\u0120MG": 34809, "sticks": 34810, "esthesia": 34811, "ERO": 34812, "\u0120morbid": 34813, "\u0120Grill": 34814, "\u0120Poe": 34815, "anyl": 34816, "\u0120deleting": 34817, "\u0120Surveillance": 34818, "\u0120directives": 34819, "\u0120iterations": 34820, "\u0120Rox": 34821, "\u0120Milky": 34822, "Father": 34823, "\u0120patented": 34824, "447": 34825, "\u0120precursor": 34826, "\u0120maiden": 34827, "\u0120Phen": 34828, "\u0120Vegan": 34829, "\u0120Patent": 34830, "Kelly": 34831, "Redditor": 34832, "\u0120nods": 34833, "\u0120ventilation": 34834, "\u0120Schwarz": 34835, "\u0120wizards": 34836, "\u0120ominous": 34837, "\u0120Heads": 34838, "\u0120BG": 34839, "\u0120lumber": 34840, "\u0120Spiel": 34841, "\u0120isEnabled": 34842, "\u0120ancestral": 34843, "\u0120Ships": 34844, "\u0120wrestler": 34845, "phi": 34846, "\u0120yuan": 34847, "\u0120Rebellion": 34848, "\u0120iceberg": 34849, "\u0120magically": 34850, "\u0120diversion": 34851, "arro": 34852, "ythm": 34853, "\u0120Riders": 34854, "\u0120Robbie": 34855, "\u0120Kara": 34856, "\u0120Maintenance": 34857, "\u0120Herb": 34858, "\u0120harms": 34859, "packed": 34860, "\u0120Feinstein": 34861, "\u0120marrying": 34862, "\u0120blending": 34863, "\u0120Rates": 34864, "\u01201880": 34865, "\u0120wrink": 34866, "\u0120Unch": 34867, "\u0120Torch": 34868, "described": 34869, "\u0120humanoid": 34870, "ilitating": 34871, "\u0120Conv": 34872, "\u0120Feld": 34873, "IGHTS": 34874, "\u0120whistleblower": 34875, "ortmund": 34876, "etsy": 34877, "arrett": 34878, "\u0120Mono": 34879, "\u0120Ike": 34880, "\u0120CNBC": 34881, "\u0120WAY": 34882, "\u0120MDMA": 34883, "\u0120Individuals": 34884, "\u0120supplemental": 34885, "\u0120powerhouse": 34886, "\u0120Stru": 34887, "Focus": 34888, "aphael": 34889, "\u0120Colleg": 34890, "atti": 34891, "ZA": 34892, "\u0120perenn": 34893, "\u0120Signature": 34894, "\u0120Rodney": 34895, "\u0120cubes": 34896, "iddled": 34897, "\u0120Dante": 34898, "\u0120INV": 34899, "ilingual": 34900, "\u0120Cth": 34901, "\u0120sofa": 34902, "\u0120intimidate": 34903, "\u0120Roe": 34904, "\u0120Diplom": 34905, "\u0120Countries": 34906, "ayson": 34907, "\u0120extradition": 34908, "\u0120disabling": 34909, "\u0120Cardiff": 34910, "\u0120memorandum": 34911, "\u0120Trace": 34912, "\u0120???": 34913, "sector": 34914, "\u0120Rouhani": 34915, "\u0120Yates": 34916, "\u0120Freeze": 34917, "\u0120bladder": 34918, "Motor": 34919, "\u0120Promise": 34920, "antasy": 34921, "\u0120foreseeable": 34922, "\u0120Cologne": 34923, "container": 34924, "\u0120Trees": 34925, "\u0120Gors": 34926, "\u0120Sinclair": 34927, "\u0120barring": 34928, "keye": 34929, "\u0120slashed": 34930, "\u0120Statistical": 34931, "\u00e9\u0129": 34932, "\u0120\u00e2\u0138\u00ba": 34933, "Allows": 34934, "\u0120humility": 34935, "\u0120drilled": 34936, "\u0120Furn": 34937, "443": 34938, "\u0120sewage": 34939, "\u0120homepage": 34940, "\u0120courtyard": 34941, "\u0120vile": 34942, "\u0120subsidiaries": 34943, "ajo": 34944, "directory": 34945, "\u0120ammon": 34946, "Vers": 34947, "charges": 34948, "\u0120}}": 34949, "\u0120Chains": 34950, "\u0120246": 34951, "nob": 34952, "\u0120percept": 34953, "\u0120grit": 34954, "\u0120fishermen": 34955, "\u0120Iraqis": 34956, "\u0120DISTR": 34957, "\u0120FULL": 34958, "\u0120Evaluation": 34959, "graph": 34960, "atial": 34961, "\u0120cooperating": 34962, "\u0120melan": 34963, "\u0120enlightened": 34964, "\u0120ali": 34965, "tailed": 34966, "\u0120salute": 34967, "\u0120weakest": 34968, "\u0120Bulldogs": 34969, "UA": 34970, "\u0120Alloy": 34971, "\u0120semen": 34972, "ocene": 34973, "\u0120Williamson": 34974, "spr": 34975, ",\u00e2\u0122\u0136": 34976, "\u0120GF": 34977, "ittens": 34978, "Beat": 34979, "\u0120Junk": 34980, "iphate": 34981, "\u0120Farmers": 34982, "\u0120Bitcoins": 34983, "igers": 34984, "dh": 34985, "\u0120Loyal": 34986, "payer": 34987, "\u0120entertained": 34988, "\u0120penned": 34989, "\u0120coupon": 34990, "Queue": 34991, "\u0120weakening": 34992, "carry": 34993, "\u0120underestimate": 34994, "\u0120shootout": 34995, "\u0120charismatic": 34996, "\u0120Procedure": 34997, "\u0120prudent": 34998, "inances": 34999, "\u0120riches": 35000, "\u0120cortical": 35001, "\u0120strides": 35002, "\u0120drib": 35003, "\u0120Oilers": 35004, "540": 35005, "\u0120Perform": 35006, "\u0120Bangkok": 35007, "\u0120euth": 35008, "SER": 35009, "\u0120simplistic": 35010, "tops": 35011, "campaign": 35012, "Quality": 35013, "\u0120impoverished": 35014, "\u0120Eisenhower": 35015, "\u0120augment": 35016, "\u0120Harden": 35017, "\u0120intervened": 35018, "\u0120listens": 35019, "\u0120Kok": 35020, "\u0120sage": 35021, "\u0120rubbish": 35022, "\u0120Ded": 35023, "\u0120mull": 35024, "pelling": 35025, "\u0120videot": 35026, "Production": 35027, "DJ": 35028, "miah": 35029, "\u0120adaptations": 35030, "\u0120medically": 35031, "\u0120boarded": 35032, "\u0120arrogance": 35033, "\u0120scrapped": 35034, "\u0120oppress": 35035, "FORMATION": 35036, "\u0120junction": 35037, "415": 35038, "EEEE": 35039, "Skill": 35040, "\u0120subdu": 35041, "\u0120Suggest": 35042, "\u0120Pett": 35043, "\u0120lett": 35044, "\u0120Manip": 35045, "\u0120Caf": 35046, "\u0120Cooperation": 35047, "Ther": 35048, "\u0120regained": 35049, "\u00b6\u00e6": 35050, "reflect": 35051, "\u0120thugs": 35052, "\u0120Shelby": 35053, "\u0120dictates": 35054, "\u0120Weiner": 35055, "\u0120Hale": 35056, "\u0120battleground": 35057, "schild": 35058, "\u0120condol": 35059, "hunt": 35060, "ositories": 35061, "\u0120accuses": 35062, "Filename": 35063, "\u0120shri": 35064, "\u0120motivate": 35065, "\u0120reflections": 35066, "Null": 35067, "\u0120Lobby": 35068, "\u00a5\u00b5": 35069, "\u0120SATA": 35070, "\u0120Backup": 35071, "\u00d1\u0125": 35072, "nin": 35073, "\u0120Correction": 35074, "\u0120juicy": 35075, "utra": 35076, "\u0120Pric": 35077, "\u0120restraining": 35078, "\u0120Airbnb": 35079, "\u0120Arrest": 35080, "\u0120appropriations": 35081, "\u0120slopes": 35082, "\u0120manslaughter": 35083, "\u0120workings": 35084, "\u0120Huss": 35085, "\u0120Frey": 35086, "Leave": 35087, "\u0120Harmony": 35088, "\u0120Feder": 35089, "\u0120430": 35090, "\u0120trench": 35091, "\u0120gladly": 35092, "\u0120bullpen": 35093, "\u0120Gau": 35094, "bones": 35095, "\u0120groove": 35096, "\u0120pretext": 35097, "\u00e3\u0127\u012d": 35098, "\u0120transmitter": 35099, "\u0120Component": 35100, "\u0120underage": 35101, "\u0120Empires": 35102, "Tile": 35103, "\u0120oy": 35104, "\u0120Marvin": 35105, "\u0120CAS": 35106, "\u0120bloss": 35107, "\u0120replicated": 35108, "\u0120Mariners": 35109, "Marcus": 35110, "\u0120Blocks": 35111, "\u0120liberated": 35112, "\u0120butterfly": 35113, "Feel": 35114, "\u0120fermentation": 35115, "\u0120youtube": 35116, "\u0120offend": 35117, "\u0120Term": 35118, "resist": 35119, "\u0120cessation": 35120, "\u0120insurgency": 35121, "\u0120bir": 35122, "\u0120Raise": 35123, "595": 35124, "\u0120hypotheses": 35125, "502": 35126, "\u0120plaque": 35127, "ocrat": 35128, "\u0120jackets": 35129, "\u0120HuffPost": 35130, "among": 35131, "\u0120confer": 35132, "487": 35133, "\u0120Lilly": 35134, "\u0120adapting": 35135, "\u0120Fay": 35136, "\u0120shoved": 35137, "vec": 35138, "\u0120refine": 35139, "\u0120gon": 35140, "\u0120gunmen": 35141, "zai": 35142, "\u0120Shuttle": 35143, "\u0120Izan": 35144, "\u01201913": 35145, "\u0120plethora": 35146, "\u00c2\u00b7\u00c2\u00b7": 35147, "\u0120510": 35148, "\u0120puberty": 35149, "\u0120241": 35150, "\u0120Wealth": 35151, "\u0120Alma": 35152, "\u0120MEM": 35153, "\u0120Adults": 35154, "Cas": 35155, "prison": 35156, "Race": 35157, "\u0120waterproof": 35158, "\u0120athleticism": 35159, "\u0120capitalize": 35160, "\u0120Juice": 35161, "\u0120illuminated": 35162, "\u0120Pascal": 35163, "\u0120irritation": 35164, "\u0120Witnesses": 35165, "adle": 35166, "\u0120Astro": 35167, "\u0120fax": 35168, "\u0120Elvis": 35169, "Primary": 35170, "\u0120Lich": 35171, "\u0120Elves": 35172, "\u0120residing": 35173, "\u0120stumble": 35174, "319": 35175, "\u0120PKK": 35176, "\u0120adversaries": 35177, "DOS": 35178, "\u0120Ritual": 35179, "\u0120smear": 35180, "\u0120arson": 35181, "idental": 35182, "\u0120scant": 35183, "\u0120monarchy": 35184, "\u0120halftime": 35185, "\u0120residue": 35186, "\u0120indign": 35187, "\u0120Shaun": 35188, "\u0120Elm": 35189, "auri": 35190, "Aff": 35191, "WATCH": 35192, "\u0120Lyon": 35193, "helps": 35194, "361": 35195, "\u0120lobbyist": 35196, "\u0120diminishing": 35197, "\u0120outbreaks": 35198, "\u0120goats": 35199, "favorite": 35200, "\u0120Nah": 35201, "sonian": 35202, "\u0120Booster": 35203, "\u0120sandbox": 35204, "\u0120Fare": 35205, "\u0120Malta": 35206, "\u0120attRot": 35207, "\u0120MOR": 35208, "lde": 35209, "\u0120navigating": 35210, "Touch": 35211, "\u0120untrue": 35212, "\u0120Disaster": 35213, "\u0120ludicrous": 35214, "Password": 35215, "\u0120JFK": 35216, "blogspot": 35217, "416": 35218, "\u0120UNDER": 35219, "ernal": 35220, "\u0120delaying": 35221, "TOP": 35222, "\u0120implants": 35223, "\u0120AVG": 35224, "\u0120Huge": 35225, "attr": 35226, "\u0120journalistic": 35227, "\u0120Peyton": 35228, "\u0120IA": 35229, "Rap": 35230, "goal": 35231, "\u0120Programme": 35232, "\u0120smashing": 35233, "wives": 35234, "println": 35235, "\u0120Plague": 35236, "inus": 35237, "EEP": 35238, "\u0120cruiser": 35239, "\u0120Parish": 35240, "uminium": 35241, "\u0120occupants": 35242, "\u0120Jihad": 35243, "mop": 35244, "\u0120pint": 35245, "\u0120hect": 35246, "\u0120Mecca": 35247, "director": 35248, "\u0120Funding": 35249, "\u0120Mixed": 35250, "\u0120stag": 35251, "Tier": 35252, "\u0120gust": 35253, "\u0120brightly": 35254, "orsi": 35255, "\u0120uphill": 35256, "RD": 35257, "\u0120lesions": 35258, "\u0120Bundy": 35259, "livious": 35260, "\u0120biologist": 35261, "\u0120Faculty": 35262, "\u0120Authorization": 35263, "\u0120244": 35264, "Allow": 35265, "\u00ef\u00b8": 35266, "\u0120Giul": 35267, "\u0120pertinent": 35268, "otaur": 35269, "esse": 35270, "\u0120Roof": 35271, "\u0120unmanned": 35272, "351": 35273, "\u0120Shak": 35274, "\u0120Orient": 35275, "\u0120endanger": 35276, "Dir": 35277, "\u0120replen": 35278, "edient": 35279, "\u0120tailor": 35280, "\u0120gadgets": 35281, "\u0120audible": 35282, "\u00e2\u013a\u0128": 35283, "Nice": 35284, "\u0120bombard": 35285, "\u0120Rape": 35286, "\u0120defiance": 35287, "\u0120TWO": 35288, "\u0120Filipino": 35289, "\u0120unaffected": 35290, "ervatives": 35291, "\u0120soared": 35292, "\u0120Bolton": 35293, "\u0120compromising": 35294, "\u0120Brewers": 35295, "RAL": 35296, "\u0120AHL": 35297, "icycle": 35298, "\u0120vampires": 35299, "\u0120dipped": 35300, "oyer": 35301, "\u0120XIII": 35302, "\u0120sideways": 35303, "\u0120Waste": 35304, "\u0120Diss": 35305, "\u0120\u00e2\u0136\u013e\u00e2\u0136\u0122\u00e2\u0136\u0122": 35306, "$.": 35307, "\u0120habitats": 35308, "\u0120Beef": 35309, "truth": 35310, "trained": 35311, "split": 35312, "Rus": 35313, "Andy": 35314, "\u0120Bram": 35315, "REP": 35316, "pid": 35317, "\u00e8\u00a3\u0127": 35318, "\u0120Mutant": 35319, "Anim": 35320, "\u0120Marina": 35321, "\u0120futile": 35322, "highest": 35323, "frequency": 35324, "\u0120epilepsy": 35325, "\u0120coping": 35326, "\u0120concise": 35327, "\u0120tracing": 35328, "\u0120SUN": 35329, "panel": 35330, "\u0120Sophie": 35331, "\u0120Crowley": 35332, "\u0120Adolf": 35333, "\u0120Shooter": 35334, "\u0120shaky": 35335, "\u0120IG": 35336, "\u0120Lies": 35337, "\u0120Barber": 35338, "pkg": 35339, "\u0120uptake": 35340, "\u0120predatory": 35341, "ULTS": 35342, "/**": 35343, "\u0120intoxicated": 35344, "\u0120Westbrook": 35345, "odder": 35346, "hement": 35347, "\u0120baseman": 35348, "APD": 35349, "storage": 35350, "\u0120Fifty": 35351, "editor": 35352, "GEN": 35353, "UTION": 35354, "irting": 35355, "\u0120sewing": 35356, "rift": 35357, "\u0120agony": 35358, "\u0120Sands": 35359, "\u0120254": 35360, "Cash": 35361, "\u0120lodge": 35362, "\u0120punt": 35363, "Natural": 35364, "\u0120Ideas": 35365, "\u0120erroneous": 35366, "\u0120Sensor": 35367, "\u0120Hannity": 35368, "\u01201921": 35369, "\u0120mould": 35370, "\u0120Gon": 35371, "kaya": 35372, "\u0120anonymously": 35373, "\u0120KEY": 35374, "\u0120simulator": 35375, "Winter": 35376, "\u0120streamed": 35377, "507": 35378, "?\",": 35379, "\u0120teased": 35380, "\u0120coefficient": 35381, "\u0120wartime": 35382, "\u0120THR": 35383, "''.": 35384, "\u0120Banking": 35385, "mpire": 35386, "\u0120fandom": 35387, "\u0120lia": 35388, "Ga": 35389, "\u0120downhill": 35390, "\u0120interpreting": 35391, "Individual": 35392, "Norm": 35393, "\u0120jealousy": 35394, "bitcoin": 35395, "\u0120pleasures": 35396, "\u0120Toys": 35397, "\u0120Chevrolet": 35398, "\u0120Advisor": 35399, "IZE": 35400, "\u0120receptions": 35401, "706": 35402, "Cro": 35403, "\u0120262": 35404, "\u0120citrus": 35405, "iru": 35406, "Reviewer": 35407, "jected": 35408, "UES": 35409, "anz": 35410, "1981": 35411, "\u0120Worker": 35412, "\u0120complied": 35413, "orescent": 35414, "continental": 35415, "Ton": 35416, "\u0120Prism": 35417, "\u0120Sheep": 35418, "\u0120288": 35419, "nox": 35420, "\u0120Vog": 35421, "Ord": 35422, "\u0120realms": 35423, "tek": 35424, "\u0120irrigation": 35425, "\u0120bicycles": 35426, "\u0120electronically": 35427, "poly": 35428, "tall": 35429, "());": 35430, "\u0120aesthetics": 35431, "\u0120Integrated": 35432, "Explore": 35433, "\u0120dunk": 35434, "476": 35435, "pain": 35436, "\u0120Jacques": 35437, "\u0120Dmit": 35438, "Frames": 35439, "\u0120reunited": 35440, "\u0120humid": 35441, "Dro": 35442, "Political": 35443, "\u0120youthful": 35444, "\u0120entails": 35445, "\u0120mosquito": 35446, "363": 35447, "species": 35448, "\u0120coordinating": 35449, "\u0120Mayhem": 35450, "\u0120Magnus": 35451, "Mount": 35452, "Improved": 35453, "\u0120STATE": 35454, "ATTLE": 35455, "\u0120flowed": 35456, "\u0120tackled": 35457, "\u0120fashioned": 35458, "\u0120reorgan": 35459, "ivari": 35460, "finger": 35461, "\u0120reluctantly": 35462, "etting": 35463, "\u0120Vand": 35464, "young": 35465, "\u0120Garland": 35466, "\u0120presumption": 35467, "\u0120amenities": 35468, "\u0120Pleasant": 35469, "onential": 35470, "\u0120Oxy": 35471, "\u0120morals": 35472, "\u0120Yah": 35473, "Ready": 35474, "Simon": 35475, "Enh": 35476, "Demon": 35477, "\u0120clich": 35478, "Monitor": 35479, "\u0120DU": 35480, "\u0120welcomes": 35481, "\u0120standout": 35482, "\u0120dreadful": 35483, "\u0120bananas": 35484, "\u0120balloons": 35485, "hooting": 35486, "basic": 35487, "\u0120suffix": 35488, "\u0120duly": 35489, "cano": 35490, "Chain": 35491, "atos": 35492, "\u0120geopolitical": 35493, "\u0120(&": 35494, "\u0120Gemini": 35495, "\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124\u00c3\u0125\u00c3\u0124": 35496, "\u0120acquitted": 35497, "Luck": 35498, "protect": 35499, "1024": 35500, "\u0120scarcity": 35501, "\u0120mindfulness": 35502, "ecided": 35503, "DN": 35504, "prime": 35505, "\u0120Presidents": 35506, "\u0120VIDEO": 35507, "\u0120(\u00e2\u012a\u0134": 35508, "addock": 35509, "NOR": 35510, "\u0120Pru": 35511, "pun": 35512, "\u0120LOL": 35513, "))))": 35514, "\u0120Liqu": 35515, "\u0120SAS": 35516, "\u0120styling": 35517, "\u0120punishments": 35518, "\u0120numb": 35519, "\u0120ascertain": 35520, "\u0120Rockies": 35521, "flu": 35522, "Thumbnail": 35523, "\u0120perpetrated": 35524, "\u0120Semi": 35525, "\u0120disarm": 35526, "\u0120Older": 35527, "\u0120Exception": 35528, "\u0120exponentially": 35529, "\u0120Communities": 35530, "\u0120abolish": 35531, "\u0120Partner": 35532, "ptoms": 35533, "\u0120777": 35534, "\u0120Foley": 35535, "\u0120Cases": 35536, "\u0120grease": 35537, "\u0120Rebirth": 35538, "Ground": 35539, "\u0120;)": 35540, "\u0120Doctrine": 35541, "ikini": 35542, "Ye": 35543, "\u0120Blossom": 35544, "\u0120persists": 35545, "bill": 35546, "\u0120infusion": 35547, "\u0120buddies": 35548, "911": 35549, "\u0120Patient": 35550, "\u0120demos": 35551, "\u0120acquaintance": 35552, "\u0120Paw": 35553, "atari": 35554, "\u0120xml": 35555, "\u0120fascination": 35556, "\u0120Serve": 35557, "\u00cf\u0124": 35558, "branded": 35559, "\u0120az": 35560, "Returns": 35561, "\u0120overshadow": 35562, "\u0120roam": 35563, "\u0120speedy": 35564, "numbered": 35565, "helial": 35566, "\u0120disciple": 35567, "\u0120assurances": 35568, "given": 35569, "pecting": 35570, "\u0120Natalie": 35571, "\u00e7\u0136\u00b0": 35572, "\u0120mosquitoes": 35573, "rotein": 35574, "\u0120numeric": 35575, "\u0120independents": 35576, "\u0120transitional": 35577, "\u0120reactionary": 35578, "\u0120Mechdragon": 35579, "doctor": 35580, "\u0120shortest": 35581, "\u0120sequential": 35582, "\u0120Bac": 35583, "\u0120Accounts": 35584, "\u00e3\u0123\u012e": 35585, "achy": 35586, "ractive": 35587, "\u0120Regiment": 35588, "\u0120breathtaking": 35589, "fficiency": 35590, "\u0120Bates": 35591, "\u0120311": 35592, "\u0120wardrobe": 35593, "fts": 35594, "\u0120Berk": 35595, "Simply": 35596, "\u0120Riverside": 35597, "ivering": 35598, "idential": 35599, "lucent": 35600, "\u0120enriched": 35601, "\u0120Conver": 35602, "\u0120Giving": 35603, "\u00e3\u0125\u013b": 35604, "\u0120legalize": 35605, "\u0120FTC": 35606, "\u0120freaking": 35607, "Mix": 35608, "\u0120terrestrial": 35609, "esian": 35610, "cients": 35611, "Wing": 35612, "LOAD": 35613, "\u0120ledge": 35614, "\u0120Violent": 35615, "\u0120Metall": 35616, "\u0120308": 35617, "\u0120southeastern": 35618, "hetto": 35619, "Meat": 35620, "\u0120slowdown": 35621, "\u0120retreated": 35622, "Jeremy": 35623, "endas": 35624, "*****": 35625, "eric": 35626, "\u0120reins": 35627, "oppable": 35628, "\u0120Humanity": 35629, "earances": 35630, "rigan": 35631, "Camera": 35632, "\u0120waivers": 35633, "soc": 35634, "\u0120alteration": 35635, "transform": 35636, "\u0120Cemetery": 35637, "506": 35638, "\u0120indefinite": 35639, "\u0120stimulating": 35640, "yg": 35641, "603": 35642, "\u0120Sop": 35643, "\u0120descriptive": 35644, "Phase": 35645, "\u0120Edmund": 35646, "\u0120pneumonia": 35647, "ventus": 35648, "Amb": 35649, "\u0120laboratories": 35650, "\u0120Exclusive": 35651, "ugar": 35652, "Were": 35653, "\u0120malfunction": 35654, "\u0120homosexuals": 35655, "\u0120-------": 35656, "uni": 35657, "\u0120turbines": 35658, "\u0120Equity": 35659, "Du": 35660, "\u0120minded": 35661, "\u0120RH": 35662, "\u0120Blackhawks": 35663, "\u0120feats": 35664, "\u01201700": 35665, "repl": 35666, "362": 35667, "laden": 35668, "\u0120indispensable": 35669, "lyss": 35670, "tti": 35671, "\u0120reel": 35672, "\u0120diverted": 35673, "\u0120likeness": 35674, "\u0120subscriptions": 35675, "\u0120fingert": 35676, "\u0120filthy": 35677, "destruct": 35678, "draft": 35679, "\u0120Bernardino": 35680, "launch": 35681, "\u0120perplex": 35682, "\u0120SUM": 35683, "carb": 35684, "\u0120sweater": 35685, "\u0120Venture": 35686, "\u0120Jag": 35687, "\u0120Celeb": 35688, "\u0120Voters": 35689, "\u0120steadfast": 35690, "\u0120athletics": 35691, "\u0120Hanson": 35692, "\u0120Drac": 35693, "Tracker": 35694, "\u0120commend": 35695, "\u0120Presidency": 35696, "\u0120DID": 35697, "informed": 35698, "\u0120webpage": 35699, "Pretty": 35700, "\u0120forcefully": 35701, "\u00e3\u0125\u0125\u00e3\u0124\u00af": 35702, "\u0120relocation": 35703, "\u0120satire": 35704, "\u00e2\u012b": 35705, "\u0120Sunderland": 35706, "\u00e6\u0126": 35707, "Voice": 35708, "????????": 35709, "\u0120informant": 35710, "\u0120bowel": 35711, "\u0120Uniform": 35712, "\u0120...\"": 35713, "\u0120purge": 35714, "\u0120picnic": 35715, "\u0120Umb": 35716, "\u0120UPDATE": 35717, "\u0120Sapphire": 35718, "\u0120Stall": 35719, "learn": 35720, "\u0120objectively": 35721, "\u0120obliter": 35722, "\u0120loophole": 35723, "\u0120journeys": 35724, "\u0120omission": 35725, "Pros": 35726, "\u0120Sidney": 35727, "ploma": 35728, "\u0120sprayed": 35729, "\u0120guru": 35730, "\u0120traitor": 35731, "\u0120timet": 35732, "\u0120snapping": 35733, "\u0120Sevent": 35734, "urnal": 35735, "\u0120Ukip": 35736, "\u0120bowed": 35737, "poral": 35738, "liberal": 35739, "Ros": 35740, "Questions": 35741, "iOS": 35742, "\u0120summarize": 35743, "STAT": 35744, "\u01201850": 35745, "apest": 35746, "\u0120lender": 35747, "\u0120Variable": 35748, "bringing": 35749, "\u0120LORD": 35750, ",)": 35751, "\u0120collapses": 35752, "xiety": 35753, "\u0120Ned": 35754, "YD": 35755, "\u0120Scha": 35756, "\u0120antibody": 35757, "\u0120disband": 35758, "yre": 35759, "illusion": 35760, "\u0120rover": 35761, "shed": 35762, "\u0120Hirosh": 35763, "cci": 35764, "\u0120calam": 35765, "\u0120Morton": 35766, "Pinterest": 35767, "\u01201928": 35768, "\u0120Euras": 35769, "ordes": 35770, "\u0120fences": 35771, "\u0120Inventory": 35772, "\u0120Valencia": 35773, "\u0120Ud": 35774, "\u0120Tiff": 35775, "\u0120sque": 35776, "\u0120quotation": 35777, "\u0120troublesome": 35778, "erker": 35779, "QUEST": 35780, "\u0120Kingdoms": 35781, "south": 35782, "\u0120levy": 35783, "Prince": 35784, "\u0120Sting": 35785, "\u0120nicknamed": 35786, "\u0120appe": 35787, "\u0120photographic": 35788, "\u0120corpus": 35789, "reference": 35790, "\u0120Trog": 35791, "Unt": 35792, ")=(": 35793, "\u0120Latvia": 35794, "\u0120activating": 35795, "\u0120licensee": 35796, "\u0120disparities": 35797, "\u0120Newsletter": 35798, "\u00e3\u0125\u0125\u00e3\u0125\u012a": 35799, "\u0120freeing": 35800, "\u0120Jeep": 35801, "\u0120Perception": 35802, "insk": 35803, "\u0120silicone": 35804, "\u0120Hayden": 35805, "Lean": 35806, "\u0120Suzuki": 35807, "ibrarian": 35808, "668": 35809, "\u0120spor": 35810, "\u0120correlations": 35811, "aghetti": 35812, "\u0120tuber": 35813, "\u0120IPCC": 35814, "ilus": 35815, "\u0120Vu": 35816, "\u0120wealthiest": 35817, "\u0120Carbuncle": 35818, "anza": 35819, "\u0120fooled": 35820, "\u0120Zur": 35821, "\u0120daddy": 35822, "rano": 35823, "ilian": 35824, "\u0120knockout": 35825, "fman": 35826, "required": 35827, "\u0120Wikileaks": 35828, "\u0120Duffy": 35829, "ONT": 35830, "\u0120insol": 35831, "\u0120Objects": 35832, "\u0120bou": 35833, "\u0120Nordic": 35834, "\u0120Insert": 35835, "scan": 35836, "\u0120dancers": 35837, "\u0120idiots": 35838, "majority": 35839, "\u0120Neville": 35840, "\u0120FreeBSD": 35841, "\u0120tart": 35842, "panic": 35843, "690": 35844, "\u0120cocoa": 35845, "\u0120sampled": 35846, "\u0120lookup": 35847, "Indust": 35848, "\u0120injections": 35849, "genre": 35850, "\u0120au": 35851, "\u0120roadway": 35852, "\u0120genitals": 35853, "Kind": 35854, "\u0120Examiner": 35855, "\u0120Yaz": 35856, "Fresh": 35857, "\u0120paralysis": 35858, "\u0120Aluminum": 35859, "\u0120reap": 35860, "ok\u00c3\u00a9": 35861, "\u0120sloppy": 35862, "\u0120Tunnel": 35863, "posium": 35864, "nery": 35865, "enic": 35866, "\u0120herbal": 35867, "\u0120Outer": 35868, "\u0120Builder": 35869, "\u0120incur": 35870, "\u0120ideologies": 35871, "\u0120backups": 35872, "consuming": 35873, "\u0120Detect": 35874, "deck": 35875, "\u0120KNOW": 35876, "\u0120Gret": 35877, "\u0120MIC": 35878, "\u0120toughness": 35879, "\u0120Exhibit": 35880, "\u0120hive": 35881, "Les": 35882, "\u0120SCHOOL": 35883, "\u0120Atari": 35884, "alde": 35885, "\u0120Null": 35886, "andestine": 35887, "mouse": 35888, "\u0120brigade": 35889, "489": 35890, "\u0120revol": 35891, "\u0120Lawson": 35892, "\u0120Wah": 35893, "opoly": 35894, "ebted": 35895, "\u0120Saunders": 35896, "\u0120313": 35897, "\u0120Winc": 35898, "\u0120taboo": 35899, "\u0120Helmet": 35900, "\u0120wedge": 35901, "chip": 35902, "\u0120Tina": 35903, "bg": 35904, "\u0120infuri": 35905, "rn": 35906, "\u0120anomalies": 35907, "\u0120Sync": 35908, "\u0120Exam": 35909, "\u0120Commit": 35910, "\u0120Diary": 35911, "\u0120ALSO": 35912, "\u0120Debor": 35913, "omedical": 35914, "\u0120comprehension": 35915, "655": 35916, "\u0120empowering": 35917, "\u0120ire": 35918, "\u0120juices": 35919, "\u0120ETH": 35920, "\u0120Boxing": 35921, "=\"/": 35922, "\u0120facilitated": 35923, "poke": 35924, "\u0120Parsons": 35925, "\u0120Moder": 35926, "travel": 35927, "\u0120civilizations": 35928, "\u0120libertarians": 35929, "\u0120rune": 35930, "\u0120Clarks": 35931, "athed": 35932, "\u0120campaigners": 35933, "\u0120Dispatch": 35934, "\u0120Fahrenheit": 35935, "\u0120Capcom": 35936, "----------": 35937, "\u0120lace": 35938, "\u0120draining": 35939, "\u0120liner": 35940, "\u0120Artificial": 35941, "\u00c3\u00a9n": 35942, "task": 35943, "]).": 35944, "\u0120GMO": 35945, "\u0120Operator": 35946, "ordinary": 35947, "\u0120Influence": 35948, "\u0120Ups": 35949, "\u0120potency": 35950, "ussen": 35951, "ospons": 35952, "\u0120Swim": 35953, "\u0120Deadline": 35954, "Unity": 35955, "\u0120culinary": 35956, "\u0120enlightenment": 35957, "\u0120wearer": 35958, "\u0120mined": 35959, "\u0120ply": 35960, "\u0120incest": 35961, "\u0120DVDs": 35962, "Walk": 35963, "BTC": 35964, "Trade": 35965, "\u0120deval": 35966, "iband": 35967, "\u0120Oversight": 35968, "Palestinian": 35969, "\u0120dart": 35970, "\u0120mul": 35971, "LR": 35972, "\u0120removable": 35973, "\u0120Realms": 35974, "\u00ec\u013f": 35975, "\u0120miscar": 35976, "\u0120Vulkan": 35977, "685": 35978, "\u00c3\u00a8re": 35979, "\u0120Sap": 35980, "\u0120merging": 35981, "\u0120Carly": 35982, "chester": 35983, "\u0120brisk": 35984, "\u0120luxurious": 35985, "\u0120Generator": 35986, "\u0120bitterness": 35987, "\u0120edible": 35988, "\u0120243": 35989, "TG": 35990, "\u0120rectangle": 35991, "WithNo": 35992, "below": 35993, "Jenn": 35994, "\u0120darkest": 35995, "\u0120hitch": 35996, "\u0120dosage": 35997, "\u0120scaven": 35998, "\u0120Keller": 35999, "\u0120Illustrated": 36000, "Certainly": 36001, "\u0120Mavericks": 36002, "Marginal": 36003, "\u0120diarrhea": 36004, "\u0120enormously": 36005, "\u0120999": 36006, "shr": 36007, "quart": 36008, "\u0120adamant": 36009, "\u0120Mew": 36010, "\u0120renovation": 36011, "\u0120cervical": 36012, "\u0120Percentage": 36013, "eners": 36014, "\u0120Kimber": 36015, "\u0120floats": 36016, "\u0120dex": 36017, "\u0120Witcher": 36018, "\u0120Swansea": 36019, "dm": 36020, "\u0120salty": 36021, "yellow": 36022, "\u0120cape": 36023, "\u0120Drain": 36024, "\u0120Paula": 36025, "\u0120Toledo": 36026, "lesi": 36027, "Magazine": 36028, "\u0120Wick": 36029, "\u0120Mn": 36030, "\u0120Ack": 36031, "\u0120Riding": 36032, "ASON": 36033, "\u0120homophobic": 36034, "ARP": 36035, "\u0120wandered": 36036, "CPU": 36037, "oodoo": 36038, "\u0120Pipe": 36039, "\u0120tightening": 36040, "\u0120Butt": 36041, "318": 36042, "\u0120deserted": 36043, "Session": 36044, "\u0120facilitating": 36045, "Jump": 36046, "\u0120emergencies": 36047, "OWER": 36048, "\u0120exhaustive": 36049, "\u0120AFTER": 36050, "\u0120heartbeat": 36051, "\u0120Label": 36052, "acky": 36053, "\u0120Certified": 36054, "iltration": 36055, "Ze": 36056, "\u0120Utt": 36057, "\u01201300": 36058, "\u0120presume": 36059, "\u0120Disp": 36060, "\u0120surged": 36061, "\u0120dolls": 36062, "Columb": 36063, "\u0120chimpan": 36064, "\u0120Razor": 36065, "\u0120ticks": 36066, "\u0120councillor": 36067, "\u0120pilgrimage": 36068, "\u0120Rebels": 36069, "\u0120QC": 36070, "\u0120Auction": 36071, "xia": 36072, "ikk": 36073, "bred": 36074, "\u0120insertion": 36075, "\u0120coarse": 36076, "dB": 36077, "SEE": 36078, "\u0120Zap": 36079, "\u0120Foo": 36080, "\u0120contempor": 36081, "\u0120Quarterly": 36082, "otions": 36083, "\u0120Alchemist": 36084, "\u0120Trey": 36085, "\u0120Duo": 36086, "Sweet": 36087, "804": 36088, "\u0120Giov": 36089, "\u0120funn": 36090, "Nin": 36091, "hoff": 36092, "\u0120ramifications": 36093, "\u01201922": 36094, "\u0120Experts": 36095, "azes": 36096, "\u0120garments": 36097, "arial": 36098, "\u0120Nab": 36099, "\u0120257": 36100, "\u0120Ved": 36101, "\u0120humorous": 36102, "\u0120Pompe": 36103, "\u0120nylon": 36104, "\u0120lurking": 36105, "\u0120Sergey": 36106, "\u0120Mattis": 36107, "\u0120misogyny": 36108, "\u0120Components": 36109, "\u0120Watching": 36110, "\u0120Folk": 36111, "ractical": 36112, "Bush": 36113, "\u0120taped": 36114, "\u0120grouping": 36115, "\u0120beads": 36116, "\u01202048": 36117, "\u0120condu": 36118, "querque": 36119, "Reading": 36120, "\u0120grievances": 36121, "Ultra": 36122, "\u0120endpoint": 36123, "Hig": 36124, "\u0120Static": 36125, "\u0120Scarborough": 36126, "Lua": 36127, "\u0120Messi": 36128, "aqu": 36129, "\u0120PsyNet": 36130, "\u0120Rudd": 36131, "\u0120avenue": 36132, "vp": 36133, "Jer": 36134, "\u0120shady": 36135, "\u0120Resist": 36136, "\u0120Artemis": 36137, "\u0120careless": 36138, "\u0120brokers": 36139, "\u0120temperament": 36140, "\u0120520": 36141, "Tags": 36142, "\u0120Turning": 36143, "\u0120uttered": 36144, "\u0120pedd": 36145, "\u0120improvised": 36146, "\u0120:(": 36147, "\u0120tabl": 36148, "\u0120plains": 36149, "1600": 36150, "pressure": 36151, "\u0120Essence": 36152, "margin": 36153, "friends": 36154, "\u0120Restoration": 36155, "\u0120pollut": 36156, "\u0120Poker": 36157, "\u0120Augustine": 36158, "\u0120CIS": 36159, "\u0120SEAL": 36160, "orama": 36161, "\u0120thwart": 36162, "seek": 36163, "\u0120pagan": 36164, "\u00c2\u00ba": 36165, "cpu": 36166, "\u0120garn": 36167, "\u0120assortment": 36168, "\u0120ILCS": 36169, "tower": 36170, "Recommended": 36171, "\u0120unborn": 36172, "\u0120RandomRedditor": 36173, "\u0120RandomRedditorWithNo": 36174, "\u0120paralyzed": 36175, "\u0120eruption": 36176, "\u0120intersect": 36177, "\u0120Stoke": 36178, "\u0120Sco": 36179, "Bind": 36180, "\u00e5\u00be": 36181, "\u0120PNG": 36182, "\u0120Negative": 36183, "\u0120NOAA": 36184, "Leon": 36185, "\u0120alloy": 36186, "\u0120Lama": 36187, "\u0120Diversity": 36188, "575": 36189, "\u0120underestimated": 36190, "\u0120Scor": 36191, "\u0120mural": 36192, "\u0120busted": 36193, "soon": 36194, "lif": 36195, "\u0120nonex": 36196, "\u0120allergy": 36197, "\u0120Underworld": 36198, "\u0120Rays": 36199, "\u0120Blasio": 36200, "\u0120hrs": 36201, "\u0120Dir": 36202, "\u0120327": 36203, "byter": 36204, "\u0120replacements": 36205, "\u0120activates": 36206, "rived": 36207, "MH": 36208, "\u0120pans": 36209, "\u0120HI": 36210, "\u0120longitudinal": 36211, "\u0120nuisance": 36212, "aler": 36213, "\u0120swell": 36214, "\u0120Signed": 36215, "sci": 36216, "\u0120Isles": 36217, "\u0120AGA": 36218, "\u0120defiant": 36219, "\u0120sonic": 36220, "ocon": 36221, "KC": 36222, "\u0120Aim": 36223, "tie": 36224, "ahah": 36225, "\u0120mL": 36226, "DX": 36227, "\u0120bisc": 36228, "\u0120Billboard": 36229, "\u0120SYSTEM": 36230, "NEY": 36231, "gaard": 36232, "\u0120distressed": 36233, "formerly": 36234, "Alan": 36235, "\u0120chefs": 36236, "\u0120optics": 36237, "\u0120Comet": 36238, "\u0120AMC": 36239, "\u0120redesigned": 36240, "irmation": 36241, "\u0120sightings": 36242, "382": 36243, "311": 36244, "\u0120WB": 36245, "\u0120contraction": 36246, "\u0120TOTAL": 36247, "Dual": 36248, "\u0120startled": 36249, "\u0120understandably": 36250, "\u0120sunglasses": 36251, "ETHOD": 36252, "\u0120docker": 36253, "\u0120surfing": 36254, "\u0120HEL": 36255, "\u0120Slack": 36256, "tones": 36257, "\u0120shalt": 36258, "Visual": 36259, "498": 36260, "Department": 36261, "cussion": 36262, "\u0120unrestricted": 36263, "\u0120tad": 36264, "\u0120rename": 36265, "employed": 36266, "\u0120educating": 36267, "\u0120grinned": 36268, "bedroom": 36269, "\u0120Activities": 36270, "\u0120Velvet": 36271, "\u0120SWAT": 36272, "\u0120shuffle": 36273, "igor": 36274, "\u0120saturation": 36275, "Finding": 36276, "cream": 36277, "icter": 36278, "\u0120vodka": 36279, "tracking": 36280, "tec": 36281, "\u0120foreground": 36282, "iesta": 36283, "\u0120vehement": 36284, "\u0120ECB": 36285, "\u0120Tie": 36286, "Ey": 36287, "\u0120turtles": 36288, "\u0120Railroad": 36289, "\u0120Katz": 36290, "\u0120Frames": 36291, "\u0120menace": 36292, "\u0120Fellowship": 36293, "\u0120Essential": 36294, "uggish": 36295, "\u0120drip": 36296, "chwitz": 36297, "\u0120Kyoto": 36298, "sb": 36299, "\u0120Nina": 36300, "Parameter": 36301, "\u0120alarms": 36302, "\u0120Claud": 36303, "\u0120pioneering": 36304, "\u0120chiefly": 36305, "\u0120Scream": 36306, "Collection": 36307, "\u0120thankfully": 36308, "\u0120Ronaldo": 36309, "\u00e5\u0143\u0132": 36310, "strip": 36311, "\u0120Disneyland": 36312, "commercial": 36313, "Seeing": 36314, "Soul": 36315, "\u0120evacuate": 36316, "\u0120civ": 36317, "\u0120Ashe": 36318, "\u0120divides": 36319, "\u0120Dagger": 36320, "rehensive": 36321, "\u0120berries": 36322, "\u0120DF": 36323, "\u0120sushi": 36324, "\u0120plurality": 36325, "WI": 36326, "\u0120disadvantaged": 36327, "\u0120battalion": 36328, "obiles": 36329, "451": 36330, "\u0120cling": 36331, "\u0120undeniable": 36332, "\u0120Lounge": 36333, "\u0120haunt": 36334, "phe": 36335, "\u0120quantify": 36336, "\u0120differed": 36337, "\u0120[*]": 36338, "\u0120Viz": 36339, "cum": 36340, "slave": 36341, "\u0120videog": 36342, "\u0120quar": 36343, "\u0120bundles": 36344, "\u0120Alonso": 36345, "tackle": 36346, "\u0120neuronal": 36347, "\u0120landslide": 36348, "confirmed": 36349, "\u0120Depth": 36350, "\u0120renewables": 36351, "Bear": 36352, "\u0120Macedonia": 36353, "\u0120jerseys": 36354, "\u0120bunk": 36355, "\u0120Spawn": 36356, "\u0120Controls": 36357, "\u0120Buchanan": 36358, "\u0120robotics": 36359, "\u0120emphasizing": 36360, "\u0120Tutorial": 36361, "hyp": 36362, "iston": 36363, "\u0120monumental": 36364, "\u00e6\u00b0": 36365, "\u0120Carry": 36366, "\u0120tbsp": 36367, "enance": 36368, "Hill": 36369, "arthed": 36370, "\u0120rotten": 36371, "Dean": 36372, "\u0120twisting": 36373, "\u0120goodwill": 36374, "\u0120immersion": 36375, "Living": 36376, "\u0120brushes": 36377, "\u0120CGI": 36378, "\u0120Atk": 36379, "traditional": 36380, "\u0120phantom": 36381, "\u0120Stamina": 36382, "\u0120expansions": 36383, "\u0120Marin": 36384, "\u0120embarked": 36385, "\u0120Eg": 36386, "intestinal": 36387, "\u0120PEOPLE": 36388, "\u0120Booth": 36389, "\u0120Appalach": 36390, "\u0120relegated": 36391, "VT": 36392, "MIT": 36393, "\u0120muster": 36394, "\u0120withdrawing": 36395, "\u0120microscope": 36396, "\u0120Gathering": 36397, "\u0120Crescent": 36398, "\u0120Argentine": 36399, "\u0120Decre": 36400, "\u0120Dominic": 36401, "\u0120buds": 36402, "antage": 36403, "\u0120Ion": 36404, "\u0120widened": 36405, "ONSORED": 36406, "\u0120Gloves": 36407, "iannopoulos": 36408, "razen": 36409, "feel": 36410, "\u0120repayment": 36411, "\u0120hindsight": 36412, "\u0120REALLY": 36413, "\u0120Pistol": 36414, "\u0120Brah": 36415, "\u0120watts": 36416, "\u0120survives": 36417, "\u0120flurry": 36418, "issy": 36419, "Alert": 36420, "\u0120Uruguay": 36421, "Phoenix": 36422, "Slow": 36423, "\u0120Grave": 36424, "\u0120Fir": 36425, "\u0120manageable": 36426, "\u0120tariff": 36427, "\u0120UDP": 36428, "\u0120Pistons": 36429, "\u0120Nigerian": 36430, "\u0120strikeouts": 36431, "\u0120cosmetics": 36432, "whelming": 36433, "fab": 36434, "cape": 36435, "proxy": 36436, "\u0120rethink": 36437, "\u0120overcoming": 36438, "simple": 36439, "\u0120woo": 36440, "\u0120distracting": 36441, "\u0120Stanton": 36442, "\u0120Tulsa": 36443, "\u0120Dock": 36444, "659": 36445, "\u0120discord": 36446, "\u0120Emacs": 36447, "\u0120Ves": 36448, "\u0120ROB": 36449, "\u0120reassuring": 36450, "\u0120consortium": 36451, "Muslims": 36452, "321": 36453, "\u0120prompts": 36454, "sei": 36455, "\u0120Hitch": 36456, "imposed": 36457, "\u0120Fool": 36458, "\u0120indiscrim": 36459, "wrong": 36460, "buquerque": 36461, "Davis": 36462, "!]": 36463, "\u0120timeless": 36464, "\u0120NEED": 36465, "\u0120pesticide": 36466, "\u0120rallying": 36467, "\u0120Calder": 36468, "\u0120\u00e5\u00a4": 36469, "\u0120xp": 36470, "\u0120Unle": 36471, "\u0120Export": 36472, "luaj": 36473, "Buff": 36474, ")</": 36475, "Boot": 36476, "\u0120Chrysler": 36477, "orative": 36478, "Mess": 36479, "\u0120negligible": 36480, "ertodd": 36481, "\u0120Mushroom": 36482, "\u0120Gale": 36483, "gc": 36484, "\u0120Cosby": 36485, "\u0120Rural": 36486, "ritical": 36487, "Bell": 36488, "\u0120turbine": 36489, "00200000": 36490, "\u0120legitimately": 36491, "\u0120Animated": 36492, "TED": 36493, "\u0120Theodore": 36494, "conduct": 36495, "\u0120Hier": 36496, "\u0120counterfeit": 36497, "\u0120Algeria": 36498, "\u0120unbeat": 36499, "controller": 36500, "\u0120unres": 36501, "\u0120scrambling": 36502, "\u0120Fallon": 36503, "Tes": 36504, "\u0120amber": 36505, "\u0120royalties": 36506, "\u0120Shelter": 36507, "\u0120Lester": 36508, "\u0120classify": 36509, "Remote": 36510, "\u0120unheard": 36511, "\u0120controversies": 36512, "\u0120enrichment": 36513, "\u0120Yankee": 36514, "gamer": 36515, "\u0120platinum": 36516, "\u0120ecology": 36517, "\u0120Sark": 36518, "\u0120untouched": 36519, "\u0120supervisors": 36520, "\u0120\"%": 36521, "\u0120footh": 36522, "\u0120commons": 36523, "\u0120narcotics": 36524, "\u0120indices": 36525, "\u0120Ply": 36526, "\u0120additionally": 36527, "\u0120Gawker": 36528, "\u0120EQ": 36529, "Playing": 36530, "\u0120caveat": 36531, "\u0120Absolute": 36532, "ossus": 36533, "Baby": 36534, "\u0120ration": 36535, "\u0120resin": 36536, "\u0120calibration": 36537, "\u0120Newport": 36538, "\u0120knocks": 36539, "vt": 36540, "\u0120compost": 36541, "Scene": 36542, "\u0120sarcast": 36543, "\u0120kisses": 36544, "\u0120ns": 36545, "alli": 36546, "\u0120Marcel": 36547, "\u0120Piet": 36548, "iatrics": 36549, "\u0120surrounds": 36550, "\u0120Reprodu": 36551, "\u0120Phillies": 36552, "\u0120uncertainties": 36553, "\u0120Eur": 36554, "\u0120Romance": 36555, "\u0120Hath": 36556, "\u0120Needs": 36557, "\u0120Cloak": 36558, "\u0120crem": 36559, "queue": 36560, "\u0120355": 36561, "\u0120upfront": 36562, "]);": 36563, "\u0120reciproc": 36564, "\u01201927": 36565, "\u01201100": 36566, "utsu": 36567, "\u0120depressive": 36568, "owment": 36569, "Fans": 36570, "\u0120mech": 36571, "\u0120annihil": 36572, "\u0120counterterrorism": 36573, "\u0120Figures": 36574, "bold": 36575, "\u0120Moines": 36576, "\u0120Drivers": 36577, "\u0120manuscripts": 36578, "\u0120Crypto": 36579, "\u0120hypnot": 36580, "reddits": 36581, "\u0120prosecutions": 36582, "\u0120divert": 36583, "CRIP": 36584, "\u0120Bene": 36585, "\u0120Reggie": 36586, "\u0120taxing": 36587, "\u0120Morales": 36588, "enting": 36589, "tur": 36590, "significant": 36591, "\u0120PROV": 36592, "\u0120strands": 36593, "\u0120pouch": 36594, "\u0120Rookie": 36595, "\u00bb\u0134": 36596, "\u0120nicer": 36597, "hemy": 36598, "hw": 36599, "ECA": 36600, "\u0120intimidated": 36601, "\u0120stricter": 36602, "\u0120microbial": 36603, "details": 36604, "\u0120vows": 36605, "\u0120quake": 36606, "hhhh": 36607, "\u0120reinvent": 36608, "Ub": 36609, "\u0120relinqu": 36610, "\u0120Buffett": 36611, "licensed": 36612, "ittered": 36613, "\u0120Picard": 36614, "\u0120chewing": 36615, "ucl": 36616, "organic": 36617, "\u0120localized": 36618, "\u0120Economist": 36619, "\u0120acquainted": 36620, "Definition": 36621, "sed": 36622, "Critics": 36623, "\u0120cc": 36624, "453": 36625, "381": 36626, "\u0120fellows": 36627, "\u0120checkpoints": 36628, "025": 36629, "\u0120reelection": 36630, "\u0120mediated": 36631, "\u0120KDE": 36632, "\u0120hurdle": 36633, "\u0120texting": 36634, "Perfect": 36635, "\u0120trustees": 36636, "fecture": 36637, "\u0120dich": 36638, "monary": 36639, "\u0120distinctions": 36640, "\u01201400": 36641, "\u0120usher": 36642, "\u0120parasites": 36643, "\u0120Sharing": 36644, "\u0120Vim": 36645, "\u0120barbecue": 36646, "\u0120Ministers": 36647, "erella": 36648, "\u0120eb": 36649, "\u0120mc": 36650, "\u0120Somehow": 36651, "\u0120Insect": 36652, "changes": 36653, "broad": 36654, "\u0120Byz": 36655, "\u0120grapes": 36656, "669": 36657, "\u0120=================": 36658, "\u0120assimil": 36659, "\u0120haunting": 36660, "\u0120firepower": 36661, "\u0120defamation": 36662, "emphasis": 36663, "\u0120compose": 36664, "\u0120allergies": 36665, "\u0120strang": 36666, "rollers": 36667, "bang": 36668, "\u0120brewers": 36669, "rongh": 36670, "riot": 36671, "poor": 36672, "cold": 36673, "Sample": 36674, "\u0120buoy": 36675, "040": 36676, "\u0120Courtney": 36677, "\u0120268": 36678, "\u0120Wedding": 36679, "702": 36680, "\u0120obsessive": 36681, "\u0120braking": 36682, "\u0120Lal": 36683, "anical": 36684, "\u00e5\u00a6": 36685, "aten": 36686, "Construction": 36687, "\u0120clinically": 36688, "iership": 36689, "Names": 36690, "\u0120Discuss": 36691, "\u0120Ramos": 36692, "\u0120locale": 36693, "\u0120Agricultural": 36694, "Enable": 36695, "\u0120horsepower": 36696, "enture": 36697, "Pref": 36698, "Court": 36699, "\u0120staffing": 36700, "\u0120futuristic": 36701, "drivers": 36702, "\u0120Marketplace": 36703, "\u00e6\u012a\u00a6": 36704, "Friends": 36705, "\u0120damning": 36706, "\u0120Customers": 36707, "\u0120weeds": 36708, "\u0120Mai": 36709, "\u0120agile": 36710, "\u0120Tatt": 36711, "icent": 36712, "Ranked": 36713, "croft": 36714, "\u0120Katy": 36715, "Extreme": 36716, "\u0120carve": 36717, "\u0120Rover": 36718, "\u0120Byron": 36719, "372": 36720, "\u0120conducts": 36721, "ratch": 36722, "itia": 36723, "\u0120Pumpkin": 36724, "Sadly": 36725, "Reloaded": 36726, "Policy": 36727, "\u0120lick": 36728, "peak": 36729, "isks": 36730, "\u0120CDs": 36731, "\u0120Encyclopedia": 36732, "initial": 36733, "Cos": 36734, "\u0120Awareness": 36735, "\u0120Dram": 36736, "$$$$": 36737, "\u0120riff": 36738, "\u0120scripture": 36739, "runners": 36740, "\u0120boiler": 36741, "onson": 36742, "oin": 36743, "\u0120hamstring": 36744, "\u0120cataly": 36745, "\u0120Archbishop": 36746, "chall": 36747, "\u0120faux": 36748, "okin": 36749, "localhost": 36750, "\u0120NAME": 36751, "adobe": 36752, "SAN": 36753, "amate": 36754, "\u0120scramble": 36755, "\u0120carc": 36756, "\u0120Manifest": 36757, "\u0120Cedar": 36758, "\u0120Sergio": 36759, "later": 36760, "ffer": 36761, "\u0120grappling": 36762, "\u0120Deutsche": 36763, "agonists": 36764, "\u0120Newsp": 36765, "\u0120pretended": 36766, "archment": 36767, "\u0120curated": 36768, "\u0120headphone": 36769, "\u0120Uncommon": 36770, "\u0120SIGN": 36771, "Agent": 36772, "\u0120deadlines": 36773, "\u0120horizontally": 36774, "\u0120MAT": 36775, "\u0120Summers": 36776, "\u0120ordained": 36777, "\u0120Lastly": 36778, "\u0120Kendall": 36779, "\u0120frig": 36780, "\u0120Machina": 36781, "\u0120Waterloo": 36782, "\u0120Mexicans": 36783, "\u0120protector": 36784, "\u0120glare": 36785, "}\"": 36786, "Premium": 36787, "\u0120rift": 36788, "\u0120Telescope": 36789, "Metal": 36790, "\u0120recapt": 36791, "\u0120;;": 36792, "\u0120inclination": 36793, "\u0120imposes": 36794, "ingen": 36795, "^{": 36796, "\u0120haste": 36797, "\u0120dolphins": 36798, "\u0120commuters": 36799, "planned": 36800, "cong": 36801, "mx": 36802, "\u0120Upload": 36803, "\u0120extrap": 36804, "\u0120Tucson": 36805, "\u0120Exploration": 36806, "efeated": 36807, "\u0120slender": 36808, "703": 36809, "\u0120Buk": 36810, "isel": 36811, "\u0120competitiveness": 36812, "chlor": 36813, "\u0120Permanent": 36814, "\u0120Everett": 36815, "\u0120Specialist": 36816, "\u0120SOL": 36817, "\u0120cyan": 36818, "\u0120Exactly": 36819, "UF": 36820, "\u0120LIFE": 36821, "aryl": 36822, "onet": 36823, "\u0120Employee": 36824, "awed": 36825, "\u0120Ratings": 36826, "\u0120extravag": 36827, "ulhu": 36828, "\u0120Plane": 36829, "\u0120elevate": 36830, "\u0120Coordinator": 36831, "\u0120Watkins": 36832, "\u0120excludes": 36833, "\u0120sentient": 36834, "\u0120epoch": 36835, "\u0120alloc": 36836, "Previously": 36837, "\u0120Shy": 36838, "\u0120Slovakia": 36839, "LOCK": 36840, "\u0120markedly": 36841, "\u0120knob": 36842, "\u0120adventurers": 36843, "\u0120Been": 36844, "\u0120Costs": 36845, "ammers": 36846, "\u0120onslaught": 36847, "\u0120Supported": 36848, "\u0120Tau": 36849, "ikarp": 36850, "\u0120Sovere": 36851, "\u0120Hampton": 36852, "\u00e3\u0124\u012b": 36853, "Prev": 36854, "\u0120Worse": 36855, "\u0120cottage": 36856, "\u0120Hades": 36857, "lez": 36858, "bowl": 36859, "\u0120fragrance": 36860, "\u0120Lok": 36861, "EMOTE": 36862, "\u0120Petro": 36863, "\u01201925": 36864, "\u0120Pend": 36865, "producing": 36866, "\u0120relocate": 36867, "vati": 36868, "pole": 36869, "\u0120semin": 36870, "\u0120NUM": 36871, "\u0120rocked": 36872, "buff": 36873, "bly": 36874, "Reply": 36875, "\u0120Hai": 36876, "\u0120articulated": 36877, "\u0120Islamabad": 36878, "665": 36879, "\u0120Claims": 36880, "Desktop": 36881, "\u0120trustee": 36882, "\u0120scripting": 36883, "\u0120Sob": 36884, "\u0120Asylum": 36885, "STDOUT": 36886, "\u0120Clown": 36887, "\u0120Dortmund": 36888, "\u0120Devon": 36889, "lite": 36890, "\u0120Marble": 36891, "\u0120bunker": 36892, "\u0120crest": 36893, "\u0120arousal": 36894, "\u0120Sears": 36895, "\u0120Buddy": 36896, "eredith": 36897, "\u0120Polly": 36898, "\u0120decode": 36899, "\u0120Vish": 36900, "\u0120Reflect": 36901, "anon": 36902, "\u0120refunds": 36903, "immers": 36904, "HM": 36905, "\u0120wiping": 36906, "\u0120puzzled": 36907, "\u0120matte": 36908, "uno": 36909, "Pierre": 36910, ")),": 36911, "\u0120tainted": 36912, "\u0120symbolism": 36913, "\u0120Fraz": 36914, "\u0120protestors": 36915, "etheus": 36916, "%%%%": 36917, "Wra": 36918, "\u0120lax": 36919, "adem": 36920, "aturation": 36921, "\u00e3\u0125\u0135": 36922, "\u0120Trailer": 36923, "\u0120ENG": 36924, "\u0120Bowser": 36925, "\u0120attm": 36926, "Dur": 36927, "807": 36928, "\u0120sidx": 36929, "\u0120cider": 36930, "\u0120Affect": 36931, "\u0120woven": 36932, "\u0120Barker": 36933, "benef": 36934, "\u0120dstg": 36935, "\u0120Ryu": 36936, ">[": 36937, "\u0120sqor": 36938, "Saudi": 36939, "\u0120istg": 36940, "\u0120indulge": 36941, "proc": 36942, "\u0120disgusted": 36943, "\u0120compounded": 36944, "\u0120nem": 36945, "\u0120schooling": 36946, "\u0120Cure": 36947, "processing": 36948, "Sol": 36949, "\u0120proverb": 36950, "itized": 36951, "\u0120Alvarez": 36952, "\u0120scarf": 36953, "\u0120rectangular": 36954, "reve": 36955, "\u0120hormonal": 36956, "\u0120Stress": 36957, "itizen": 36958, "\u0120425": 36959, "girls": 36960, "\u0120Noir": 36961, "\u0120Rapp": 36962, "\u0120marches": 36963, "church": 36964, "\u0120Uses": 36965, "\u0120405": 36966, "\u0120Berm": 36967, "\u0120ordinances": 36968, "\u0120Judgment": 36969, "Charges": 36970, "\u0120Zin": 36971, "\u0120dusty": 36972, "\u0120strawberries": 36973, "\u0120perce": 36974, "\u0120Thur": 36975, "\u0120Deborah": 36976, "netflix": 36977, "\u0120Lambert": 36978, "\u0120amused": 36979, "\u0120Guang": 36980, "YOU": 36981, "RGB": 36982, "\u0120CCTV": 36983, "\u0120fiat": 36984, "rang": 36985, "\u0120federation": 36986, "\u0120Mant": 36987, "\u0120Bust": 36988, "\u0120Mare": 36989, "respective": 36990, "\u0120Migration": 36991, "\u0120BIT": 36992, "590": 36993, "\u0120patriotism": 36994, "\u0120outlining": 36995, "region": 36996, "\u0120Jos\u00c3\u00a9": 36997, "\u0120blasting": 36998, "\u0120Ezra": 36999, "Bs": 37000, "\u0120undermines": 37001, "\u0120Smooth": 37002, "\u0120clashed": 37003, "radio": 37004, "\u0120transitioning": 37005, "\u0120Buccaneers": 37006, "\u0120Owl": 37007, "\u0120plugs": 37008, "\u0120hiatus": 37009, "\u0120Pinball": 37010, "\u0120mig": 37011, "\u0120Nutr": 37012, "\u0120Wolfe": 37013, "\u0120integers": 37014, "\u0120orbits": 37015, "\u0120Edwin": 37016, "\u0120DirectX": 37017, "bite": 37018, "\u0120blazing": 37019, "vr": 37020, "Edge": 37021, "\u0120PID": 37022, "exit": 37023, "\u0120Comed": 37024, "\u0120Pathfinder": 37025, "\u0120Guid": 37026, "\u0120Signs": 37027, "\u0120Zer": 37028, "\u0120Agenda": 37029, "\u0120reimbursement": 37030, "Mesh": 37031, "iPhone": 37032, "\u0120Marcos": 37033, "\u0120Sites": 37034, "hate": 37035, "enburg": 37036, "\u0120sockets": 37037, "pend": 37038, "Batman": 37039, "vir": 37040, "\u0120SHOW": 37041, "\u0120provisional": 37042, "conn": 37043, "\u0120Deaths": 37044, "ATIVE": 37045, "Profile": 37046, "sym": 37047, "JA": 37048, "\u0120ninja": 37049, "installed": 37050, "idates": 37051, "ebra": 37052, "\u0120Omaha": 37053, "\u0120seizing": 37054, "\u0120Beasts": 37055, "\u0120salts": 37056, "Mission": 37057, "Generally": 37058, "\u0120Trilogy": 37059, "heon": 37060, "legates": 37061, "\u0120dime": 37062, "\u0120faire": 37063, "parable": 37064, "Graph": 37065, "\u0120totaling": 37066, "\u0120diagrams": 37067, "\u0120Yanuk": 37068, "plet": 37069, "\u0120Meh": 37070, "\u0120mythical": 37071, "\u0120Stephens": 37072, "autical": 37073, "ochemistry": 37074, "\u0120kilograms": 37075, "\u0120elbows": 37076, "ancock": 37077, "\u0120BCE": 37078, "\u0120Prague": 37079, "\u0120improv": 37080, "\u0120Devin": 37081, "\u0120\"\\": 37082, "paralle": 37083, "\u0120supremacists": 37084, "\u0120Billion": 37085, "\u0120regimen": 37086, "innacle": 37087, "\u0120requisite": 37088, "angan": 37089, "\u0120Burlington": 37090, "ainment": 37091, "\u0120Objective": 37092, "omsky": 37093, "GV": 37094, "\u0120unilateral": 37095, "\u0120tc": 37096, "\u0120hires": 37097, "mental": 37098, "\u0120involuntary": 37099, "\u0120transpl": 37100, "\u0120ASCII": 37101, "\u00c2\u00a8": 37102, "Events": 37103, "\u0120doubted": 37104, "\u0120Kaplan": 37105, "\u0120Courage": 37106, "igon": 37107, "\u0120Managing": 37108, "\u0120Tart": 37109, "\u0120falsehood": 37110, "\u0120Violet": 37111, "\u0120airs": 37112, "\u0120fertilizer": 37113, "Britain": 37114, "\u0120aquatic": 37115, "ouf": 37116, "Words": 37117, "\u0120Hartford": 37118, "\u0120evenings": 37119, "\u0120Vengeance": 37120, "quite": 37121, "Gall": 37122, "\u0120Pret": 37123, "\u0120pdf": 37124, "\u0120LM": 37125, "\u0120Sochi": 37126, "\u0120Intercept": 37127, "920": 37128, "\u0120profitability": 37129, "\u0120Idle": 37130, "\u0120MacDonald": 37131, "\u0120Establishment": 37132, "umsy": 37133, "\u0120gatherings": 37134, "\u0120Naj": 37135, "Charlie": 37136, "\u0120ascent": 37137, "\u0120Protector": 37138, "\u0120algebra": 37139, "\u0120bios": 37140, "forums": 37141, "ELS": 37142, "Introduced": 37143, "\u0120335": 37144, "\u0120astronomy": 37145, "Contribut": 37146, "\u0120Polic": 37147, "Platform": 37148, "\u0120containment": 37149, "wrap": 37150, "\u0120coronary": 37151, "\u0120Jelly": 37152, "manager": 37153, "\u0120heartbreaking": 37154, "cair": 37155, "\u0120Chero": 37156, "cgi": 37157, "Medical": 37158, "\u0120Accountability": 37159, "!!\"": 37160, "ophile": 37161, "\u0120psychotic": 37162, "\u0120Restrict": 37163, "\u0120equitable": 37164, "issues": 37165, "\u01201905": 37166, "\u0120Nek": 37167, "cised": 37168, "\u0120Tracking": 37169, "\u0120ozone": 37170, "\u0120cooker": 37171, "rosis": 37172, "\u0120reopen": 37173, "\u0120infinity": 37174, "\u0120Pharmaceutical": 37175, "ensional": 37176, "Attempt": 37177, "\u0120Rory": 37178, "Marco": 37179, "\u0120awaits": 37180, "HOW": 37181, "treated": 37182, "\u0120bolst": 37183, "\u0120revered": 37184, "\u0120pods": 37185, "oppers": 37186, "0010": 37187, "\u0120amplitude": 37188, "rican": 37189, "SPONSORED": 37190, "\u0120trousers": 37191, "\u0120halves": 37192, "\u0120Kaine": 37193, "\u0120Cutler": 37194, "\u0120AUTH": 37195, "\u0120splendid": 37196, "\u0120preventive": 37197, "\u0120Dudley": 37198, "ifacts": 37199, "uminati": 37200, "\u0120Yin": 37201, "\u0120admon": 37202, "\u0120Vag": 37203, "\u0120inverted": 37204, "\u0120hastily": 37205, "\u0120Hague": 37206, "Lyn": 37207, "\u0120ledger": 37208, "\u0120astronomical": 37209, "getting": 37210, "\u0120circa": 37211, "\u0120Cic": 37212, "\u0120Tennis": 37213, "Limited": 37214, "\u0120dru": 37215, "\u0120BYU": 37216, "\u0120travellers": 37217, "\u0120pane": 37218, "\u0120Intro": 37219, "\u0120patiently": 37220, "\u0120aiding": 37221, "\u0120loos": 37222, "\u0120Tough": 37223, "\u0120293": 37224, "\u0120consumes": 37225, "SourceFile": 37226, "\u0120\"\"\"": 37227, "\u0120bonding": 37228, "\u0120tilted": 37229, "\u0120menstrual": 37230, "\u0120Celestial": 37231, "ULAR": 37232, "Plugin": 37233, "\u0120risking": 37234, "Naz": 37235, "\u0120Riyadh": 37236, "\u0120accredited": 37237, "\u0120skirm": 37238, "\u00e9\u013d": 37239, "\u0120examiner": 37240, "\u0120messing": 37241, "\u0120nearing": 37242, "\u0120Chern": 37243, "\u0120Beckham": 37244, "\u0120swapped": 37245, "\u0120goose": 37246, "Kay": 37247, "\u0120lofty": 37248, "\u0120Wallet": 37249, "\u0120['": 37250, "\u0120apocalypse": 37251, "\u0120bamboo": 37252, "\u0120SPACE": 37253, "\u0120Elena": 37254, "\u0120306": 37255, "acons": 37256, "\u0120tightened": 37257, "\u0120adolescence": 37258, "\u0120rainy": 37259, "\u0120vandalism": 37260, "\u0120Newtown": 37261, "\u0120conject": 37262, "cakes": 37263, "\u0120cheated": 37264, "\u0120moderators": 37265, "params": 37266, "EFF": 37267, "\u0120deceit": 37268, "\u0120STL": 37269, "\u0120Tanzania": 37270, "\u0120RI": 37271, "\u01201923": 37272, "\u0120Exile": 37273, "thel": 37274, "\u0120theolog": 37275, "\u0120quirky": 37276, "\u0120Irvine": 37277, "\u0120needy": 37278, "oris": 37279, "Um": 37280, "Ka": 37281, "\u0120mailbox": 37282, "322": 37283, "\u0120bos": 37284, "\u0120Petra": 37285, "KING": 37286, "\u0120enlarged": 37287, "Often": 37288, "\u0120badass": 37289, "\u0120343": 37290, "\u0120Places": 37291, "\u0120CAD": 37292, "\u0120pristine": 37293, "\u0120intervening": 37294, "direction": 37295, "\u0120laz": 37296, "\u0120DSM": 37297, "\u0120projecting": 37298, "\u0120Funk": 37299, "agog": 37300, "payment": 37301, "nov": 37302, "\u0120chatter": 37303, "ARB": 37304, "\u0120examinations": 37305, "\u0120Household": 37306, "\u0120Gus": 37307, "Ford": 37308, "414": 37309, "Boss": 37310, "\u0120mystic": 37311, "\u0120leaps": 37312, "\u0120Bav": 37313, "ulz": 37314, "budget": 37315, "Football": 37316, "\u0120subsidized": 37317, "\u0120firsthand": 37318, "\u0120coincide": 37319, "ocular": 37320, "Conn": 37321, "\u0120Collabor": 37322, "\u0120fools": 37323, "amura": 37324, "ahar": 37325, "rists": 37326, "\u0120swollen": 37327, "\u0120expended": 37328, "\u0120Pau": 37329, "sup": 37330, "\u0120spar": 37331, "\u0120keynote": 37332, "suff": 37333, "\u0120unequal": 37334, "\u0120progressing": 37335, "strings": 37336, "\u0120Gamergate": 37337, "Disney": 37338, "\u0120Eleven": 37339, "omnia": 37340, "\u0120scripted": 37341, "\u0120earners": 37342, "brother": 37343, "\u0120Enabled": 37344, "\u00e6\u00b3": 37345, "\u0120larvae": 37346, "\u0120LOC": 37347, "mess": 37348, "Wilson": 37349, "\u0120Template": 37350, "successfully": 37351, "\u0120paramount": 37352, "\u0120camouflage": 37353, "\u0120binds": 37354, "\u0120Quiet": 37355, "\u0120Shutterstock": 37356, "rush": 37357, "\u0120mascot": 37358, "fortune": 37359, "\u0120Colt": 37360, "\u0120Beyon": 37361, "habi": 37362, "\u0120hairc": 37363, "\u0120267": 37364, "\u0120Deus": 37365, "\u0120twitch": 37366, "\u0120concentrating": 37367, "\u0120nipples": 37368, "cible": 37369, "\u0120gir": 37370, "NZ": 37371, "Math": 37372, "nih": 37373, "Required": 37374, "\u0120ponder": 37375, "\u0120SAN": 37376, "\u0120weddings": 37377, "\u0120loneliness": 37378, "NES": 37379, "\u0120Mahjong": 37380, "695": 37381, "addle": 37382, "\u0120Garner": 37383, "\u0120COUR": 37384, "Bridge": 37385, "\u0120spree": 37386, "\u0120Caldwell": 37387, "\u0120bribery": 37388, "\u0120\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd": 37389, "plugins": 37390, "\u0120racket": 37391, "\u0120champagne": 37392, "versible": 37393, "Vote": 37394, "\u0120modifiers": 37395, "Mayor": 37396, "680": 37397, "\u0120assemblies": 37398, "\u0120Sultan": 37399, "\u0120Ning": 37400, "\u0120Ladies": 37401, "\u0120sulfur": 37402, "\u0120orbs": 37403, "\u0120-----": 37404, "_______": 37405, "\u0120Journalism": 37406, "\u0120esports": 37407, "\u0120lush": 37408, "\u0120hue": 37409, "\u0120spectral": 37410, "Honest": 37411, "\u00e3\u0125\u0131": 37412, "\u0120bushes": 37413, "\u0120reinforcement": 37414, "\u0120reopened": 37415, "\u0120Wheels": 37416, "\u0120Morg": 37417, "rieving": 37418, "\u0120auxiliary": 37419, "\u0120jQuery": 37420, "\u0120BAT": 37421, "tesque": 37422, "\u0120vertex": 37423, "pure": 37424, "frey": 37425, "\u00e3\u0124\u00ba": 37426, "dos": 37427, "\u0120typh": 37428, "\u0120cull": 37429, "\u0120eq": 37430, "\u0120decon": 37431, "\u0120tossing": 37432, "\u0120disparate": 37433, "\u0120Brigham": 37434, "printf": 37435, "ledged": 37436, "\u0120sund": 37437, "\u0120cozy": 37438, "\u0120hepatitis": 37439, "performing": 37440, "\u0120aval": 37441, "\u0120GG": 37442, "future": 37443, "\u0120petertodd": 37444, "\u0120Kosovo": 37445, "\u0120magnets": 37446, "Already": 37447, "\u0120Edison": 37448, "\u0120Ceres": 37449, "\u0120RAID": 37450, "\u0120brilliance": 37451, "576": 37452, "\u0120derives": 37453, "\u0120hypertension": 37454, "\u0120\u00ce\u0136": 37455, "\u0120lambda": 37456, "\u0120flair": 37457, "\u0120missionaries": 37458, "\u0120rapes": 37459, "\u0120Starter": 37460, "\u0120Months": 37461, "\u0120defy": 37462, "\u0120seismic": 37463, "\u0120Raphael": 37464, "\u0120eurozone": 37465, "656": 37466, "zsche": 37467, "\u0120scratched": 37468, "\u0120bows": 37469, "\u0120Lennon": 37470, "\u0120Gaia": 37471, "\u0120dripping": 37472, "facts": 37473, "Ale": 37474, "\u0120frogs": 37475, "\u0120Breast": 37476, "ogeneity": 37477, "\u0120Prosecutor": 37478, "\u0120amplified": 37479, "\u0120Hodg": 37480, "\u0120Fn": 37481, "Thousands": 37482, "\u0120NIH": 37483, "\u0120Monitoring": 37484, "FTWARE": 37485, "\u0120Priebus": 37486, "\u0120Growing": 37487, "hunter": 37488, "\u0120diagnose": 37489, "\u0120Mald": 37490, "\u0120LR": 37491, "\u0120crowned": 37492, "\u0120bursting": 37493, "\u0120dissolution": 37494, "javascript": 37495, "\u0120usefulness": 37496, "\u0120Execution": 37497, ":(": 37498, "\u0120Ivory": 37499, "aah": 37500, "\u0120persecuted": 37501, "violence": 37502, "istas": 37503, "\u0120Crate": 37504, "\u0120impulses": 37505, "\u0120Spani": 37506, "edes": 37507, "Handle": 37508, "\u0120Zerg": 37509, "thinkable": 37510, "Lastly": 37511, "\u0120spontaneously": 37512, "\u0120inconvenient": 37513, "\u0120dismissing": 37514, "\u0120plotted": 37515, "\u0120eighty": 37516, "\u0120737": 37517, "rish": 37518, "\u0120Thornton": 37519, "atham": 37520, "\u0120sitcom": 37521, "Ven": 37522, "Recipe": 37523, "tel": 37524, "lund": 37525, "\u0120clears": 37526, "\u0120Sasuke": 37527, "\u0120258": 37528, "\u0120opting": 37529, "\u0120enraged": 37530, "esthetic": 37531, "\u0120Ae": 37532, "uchs": 37533, "Prep": 37534, "Flow": 37535, "\u0120runoff": 37536, "\u0120Eating": 37537, "\u0120Giles": 37538, "\u0120Acting": 37539, "resources": 37540, "ibaba": 37541, "\u0120rpm": 37542, "\u0120skewed": 37543, "\u0120Blanc": 37544, "\u0120Sakuya": 37545, "\u0120hotter": 37546, "\u01201924": 37547, "opian": 37548, "cko": 37549, "\u0120crumbling": 37550, "\u0120captains": 37551, "\u0120Appropriations": 37552, "leaders": 37553, "dropping": 37554, "anuts": 37555, "\u0120reversing": 37556, "\u0120Pose": 37557, "\u0120Sek": 37558, "Scot": 37559, "\u0120Idea": 37560, "cise": 37561, "\u0120Slovenia": 37562, "\u0120317": 37563, "Doctor": 37564, "\u0120crocod": 37565, "aldi": 37566, "Sea": 37567, "\u0120Farrell": 37568, "\u0120mercenaries": 37569, "\u0120RNC": 37570, "\u0120Guess": 37571, "\u0120pacing": 37572, "Machine": 37573, "StreamerBot": 37574, "\u0120Charity": 37575, "\u0120298": 37576, "\u0120cannons": 37577, "\u0120Toby": 37578, "TPPStreamerBot": 37579, "\u0120Passion": 37580, "cfg": 37581, "Thom": 37582, "\u0120badges": 37583, "\u0120Bernstein": 37584, ".\u00e2\u0122\u0135": 37585, "\u0120POP": 37586, "\u0120Conj": 37587, "\u0120initialization": 37588, "\u0120biodiversity": 37589, "Dub": 37590, "\u0120feudal": 37591, "\u0120disclaimer": 37592, "\u0120crow": 37593, "\u0120ignition": 37594, "arf": 37595, "SHA": 37596, "\u0120kHz": 37597, "hazard": 37598, "\u0120Artists": 37599, "oeuv": 37600, "679": 37601, "\u0120Rudy": 37602, "Nine": 37603, "\u0120Ramadan": 37604, "\u00e5\u00bd": 37605, "itto": 37606, "\u0120adrenaline": 37607, "Cert": 37608, "\u0120smelled": 37609, "\u0120impunity": 37610, "\u0120agendas": 37611, "\u0120Reborn": 37612, "\u0120Concent": 37613, "\u0120Seems": 37614, "\u0120omega": 37615, "\u0120Dustin": 37616, "\u0120backer": 37617, "\u0120Sauce": 37618, "\u0120Boyle": 37619, "WIN": 37620, "\u0120spins": 37621, "\u0120pauses": 37622, "upt": 37623, "\u0120shredded": 37624, "\u0120strapped": 37625, "\u0120Corruption": 37626, "\u0120scratches": 37627, "\u0120ni": 37628, "\u0120attire": 37629, "\u0120SAF": 37630, "FactoryReloaded": 37631, "\u0120IPS": 37632, "\u0120(%": 37633, "\u0120seminar": 37634, "focus": 37635, "civil": 37636, "\u01201860": 37637, "intosh": 37638, "\u0120continual": 37639, "\u0120abbrevi": 37640, "\u0120Sok": 37641, "ocobo": 37642, "XM": 37643, "\u0120frantic": 37644, "\u0120unavoidable": 37645, "\u0120artery": 37646, "\u0120annotations": 37647, "bath": 37648, "Climate": 37649, "\u0120dors": 37650, "\u0120Slide": 37651, "coord": 37652, "\u0120Reload": 37653, "\u0120LDL": 37654, "\u0120Lovecraft": 37655, "\u0120unimagin": 37656, "\u0120resembled": 37657, "\u0120barracks": 37658, "np": 37659, "\u0120surrogate": 37660, "\u0120categorized": 37661, "\u00e3\u0124\u00a9": 37662, "\u0120vaccinated": 37663, "\u0120drainage": 37664, "\u0120indist": 37665, "\u0120WhatsApp": 37666, "\u01201870": 37667, "olerance": 37668, "invoke": 37669, "amorph": 37670, "\u0120reconnect": 37671, "\u0120emanc": 37672, "\u0120blindness": 37673, "\u01201280": 37674, "internet": 37675, "collar": 37676, "\u0120altru": 37677, "\u0120abyss": 37678, "\u0120TRI": 37679, "657": 37680, "\u0120infused": 37681, "HEAD": 37682, "\u0120forestry": 37683, "\u0120Woody": 37684, "\u0120Ci": 37685, "wi": 37686, "sam": 37687, "784": 37688, "holiday": 37689, "\u0120mogul": 37690, "\u0120Fees": 37691, "\u0120DEN": 37692, "Internal": 37693, "urbed": 37694, "fusc": 37695, "atom": 37696, "\u0120Illusion": 37697, "\u0120polled": 37698, "\u0120flap": 37699, "\u0120coax": 37700, "LGBT": 37701, "Analy": 37702, "\u0120Sections": 37703, "\u0120Californ": 37704, "emn": 37705, "\u0120hither": 37706, "\u0120NIGHT": 37707, "\u0120nailed": 37708, "\u0120Pipeline": 37709, "391": 37710, "oof": 37711, "\u0120Primal": 37712, "verend": 37713, "\u0120slashing": 37714, "\u0120retri": 37715, "aviour": 37716, "\u0120departing": 37717, "gil": 37718, "ISC": 37719, "\u0120midway": 37720, "\u0120ultrasound": 37721, "\u0120behaving": 37722, "\u0120Tara": 37723, "classes": 37724, "Virtual": 37725, "\u0120Colonial": 37726, "\u0120stripping": 37727, "\u0120orchestrated": 37728, "\u0120Graves": 37729, "452": 37730, "\u0120Ironically": 37731, "\u0120Writers": 37732, "\u0120lends": 37733, "\u0120Manz": 37734, "\u0120raven": 37735, "\u0120oxidative": 37736, "\u0120266": 37737, "ELF": 37738, "actually": 37739, "ascar": 37740, "Draft": 37741, "\u0120favourable": 37742, "\u0120humiliating": 37743, "\u0120fidelity": 37744, "\u0120Hof": 37745, "\u0120Xuan": 37746, "496": 37747, "\u0120layered": 37748, "atis": 37749, "790": 37750, "\u0120paycheck": 37751, "iton": 37752, "Kar": 37753, "\u0120VMware": 37754, "\u0120Farmer": 37755, "\u0120servic": 37756, "glomer": 37757, "\u0120slump": 37758, "\u0120Fabric": 37759, "\u0120DOC": 37760, "esting": 37761, "\u0120reassure": 37762, "\u0120phyl": 37763, "volt": 37764, "itory": 37765, "Rules": 37766, "\u0120oxidation": 37767, "\u0120prized": 37768, "\u0120mistress": 37769, "\u0120Django": 37770, "WARN": 37771, "\u00e5\u0133": 37772, "\u0120encode": 37773, "\u0120Feedback": 37774, "\u0120stupidity": 37775, "Ian": 37776, "\u0120Yugoslavia": 37777, "\u00d7\u00a8": 37778, "acl": 37779, "UTE": 37780, "1977": 37781, "\u0120qualifies": 37782, "\u0120pulses": 37783, "pretty": 37784, "\u0120froze": 37785, "\u0120ss": 37786, "Iterator": 37787, "\u0120urgently": 37788, "\u0120mailed": 37789, "\u0120Cham": 37790, "\u0120sustaining": 37791, "\u0120basil": 37792, "\u0120puppies": 37793, "ilant": 37794, "\u0120PLEASE": 37795, "lap": 37796, "aceous": 37797, "Fear": 37798, "\u0120Mastery": 37799, "automatic": 37800, "\u0120TAG": 37801, "\u0120antim": 37802, "agles": 37803, "473": 37804, "frames": 37805, "\u0120whispers": 37806, "\u0120Whoever": 37807, "\u0120bravery": 37808, "\u0120UKIP": 37809, "ractions": 37810, "\"\"\"": 37811, "\u0120tame": 37812, "\u0120parted": 37813, "everything": 37814, "CONT": 37815, "\u0120indebted": 37816, "\u0120addr": 37817, "rek": 37818, "IRED": 37819, "\u0120eminent": 37820, "clinton": 37821, "\u0120ousted": 37822, "\u0120reviewer": 37823, "\u0120meltdown": 37824, "\u0120rearr": 37825, "\u0120Yao": 37826, "thereal": 37827, "abyte": 37828, "\u0120stumbling": 37829, "\u0120batches": 37830, "\u0120259": 37831, "\u0120contraceptive": 37832, "\u0120prostitute": 37833, "ensis": 37834, "Decl": 37835, "\u0120Strikes": 37836, "Military": 37837, "\u0120Oath": 37838, "vacc": 37839, "ppings": 37840, "052": 37841, "\u0120partName": 37842, "amping": 37843, "Reports": 37844, "KI": 37845, "CHR": 37846, "\u0120subtly": 37847, "swers": 37848, "Blake": 37849, "usual": 37850, "\u0120contestants": 37851, "\u0120cartridges": 37852, "\u0120GREAT": 37853, "\u0120blush": 37854, "\u0120\u00e2\u0122\u00ba": 37855, "472": 37856, "\u0120reasoned": 37857, "\u00e3\u0125\u00a4": 37858, "paralleled": 37859, "\u0120dyn": 37860, "agate": 37861, "\u0120nightly": 37862, "\u00e5\u0128": 37863, "556": 37864, "\u0120semantic": 37865, "\u0120Advoc": 37866, "\u0120!!": 37867, "\u0120disagrees": 37868, "\u0120BW": 37869, "Veh": 37870, "\u0120harming": 37871, "\u0120embraces": 37872, "\u0120strives": 37873, "\u0120inland": 37874, "\u0120Kard": 37875, "\u0120heats": 37876, "\u0120Ginny": 37877, "utan": 37878, "ernaut": 37879, "ylene": 37880, "\u0120Elev": 37881, "JD": 37882, "\u0120hars": 37883, "\u0120Starr": 37884, "\u0120skysc": 37885, "\u0120collaborators": 37886, "Usually": 37887, "\u0120revolutions": 37888, "\u0120STATS": 37889, "\u0120dismantle": 37890, "\u0120confidently": 37891, "\u0120kinetic": 37892, "Ali": 37893, "\u0120percentile": 37894, "\u0120extracting": 37895, "illian": 37896, "estead": 37897, "\u0120physicists": 37898, "\u0120Marshal": 37899, "\u0120fellowship": 37900, "\u0120dashed": 37901, "\u0120UR": 37902, "\u0120Sioux": 37903, "\u0120Compact": 37904, "amide": 37905, "Python": 37906, "\u0120Leigh": 37907, "\u0120Pharmac": 37908, "istrates": 37909, "herical": 37910, "\u0120fue": 37911, "\u0120Emin": 37912, "\u0120({": 37913, "\u0120Neighborhood": 37914, "\u0120disrupting": 37915, "\u0120Dup": 37916, "\u0120gland": 37917, "\u0120Sev": 37918, "\u0120Marian": 37919, "argon": 37920, "\u0120Dund": 37921, "\u0120<!--": 37922, "\u0120strand": 37923, "\u0120stadiums": 37924, "zos": 37925, "\u0120psychosis": 37926, "\u0120Rack": 37927, "\u0120brilliantly": 37928, "\u00ef\u00b8\u0131": 37929, "\u0120submerged": 37930, "\u0120Instit": 37931, "\u0120Chow": 37932, "\u0120cages": 37933, "\u0120Hats": 37934, "\u0120Urs": 37935, "\u0120diluted": 37936, "usat": 37937, "ienne": 37938, "\u0120Membership": 37939, "\u0120Burk": 37940, "\u0120ie": 37941, "\u0120archetype": 37942, "Drug": 37943, "ulton": 37944, "\u0120Spock": 37945, "\u0120McKay": 37946, "\u0120Depend": 37947, "Featured": 37948, "Soc": 37949, "1978": 37950, "\u0120Bere": 37951, "\u0120relentlessly": 37952, "\u0120crippling": 37953, "\u0120arthritis": 37954, "\u00e7\u0136\u0141": 37955, "\u0120Tropical": 37956, "\u0120Bulg": 37957, "\u0120Cheryl": 37958, "\u0120admirable": 37959, "\u0120subtitle": 37960, "Override": 37961, "\u0120originating": 37962, "\u0120CCP": 37963, "\u0120swore": 37964, "\u0120Sole": 37965, "\u0120Disorders": 37966, "329": 37967, "\u0120procession": 37968, "\u0120refurb": 37969, "\u0120immersed": 37970, "requently": 37971, "\u0120skeptics": 37972, "\u0120ceramic": 37973, "mitter": 37974, "enstein": 37975, "belt": 37976, "\u0120TIT": 37977, "bidden": 37978, "\u0120fir": 37979, "mist": 37980, ">]": 37981, "\u0120weave": 37982, "\u0120Paradox": 37983, "\u0120entrusted": 37984, "\u0120Barclays": 37985, "\u0120novelist": 37986, "ogie": 37987, "806": 37988, "\u0120ninety": 37989, "\u0120disagreements": 37990, "@@@@@@@@": 37991, "\u0120Auschwitz": 37992, "cars": 37993, "\u0120LET": 37994, "tub": 37995, "arantine": 37996, "POS": 37997, "\u0120backstory": 37998, "\u0120cheerful": 37999, "\u0120Rag": 38000, "eka": 38001, "biased": 38002, "\u0120inexperienced": 38003, "akra": 38004, "\u0120Witt": 38005, "tan": 38006, "\u0120rapist": 38007, "\u0120plateau": 38008, "chal": 38009, "\u0120Inquis": 38010, "expression": 38011, "\u0120cipher": 38012, "\u0120shaving": 38013, "adden": 38014, "rely": 38015, "(\\": 38016, "isma": 38017, "\u0120Regulatory": 38018, "CHAR": 38019, "ilyn": 38020, "NVIDIA": 38021, "GU": 38022, "\u0120murm": 38023, "laus": 38024, "Christopher": 38025, "\u0120contractual": 38026, "\u0120Proxy": 38027, "\u0120Jaime": 38028, "\u0120Methodist": 38029, "\u0120stewards": 38030, "sta": 38031, "peria": 38032, "\u0120physiology": 38033, "\u0120bumped": 38034, "\u0120fructose": 38035, "Australian": 38036, "\u0120Metallic": 38037, "\u0120Masquerade": 38038, "arb": 38039, "\u0120promul": 38040, "\u0120downfall": 38041, "\u0120butcher": 38042, "\u0120bour": 38043, "\u0120INFORMATION": 38044, "\u0120Bis": 38045, "pects": 38046, "adena": 38047, "\u0120contemplating": 38048, "aroo": 38049, "centered": 38050, "\u0120Peaks": 38051, "Used": 38052, "\u0120modem": 38053, "\u0120genders": 38054, "\u01208000": 38055, "371": 38056, "\u0120maternity": 38057, "\u0120Raz": 38058, "\u0120rocking": 38059, "\u0120handguns": 38060, "\u0120DACA": 38061, "Autom": 38062, "\u0120Nile": 38063, "\u0120tumult": 38064, "\u0120Benefit": 38065, "\u0120Approach": 38066, "workshop": 38067, "\u0120Leaving": 38068, "Ger": 38069, "instead": 38070, "\u0120vibrations": 38071, "\u0120repositories": 38072, "497": 38073, "\u0120Aunt": 38074, "\u0120Jub": 38075, "\u0120Expedition": 38076, "Alpha": 38077, "\u0120sans": 38078, "\u0120overdue": 38079, "\u0120overcrowd": 38080, "\u0120legislatures": 38081, "\u0120paternal": 38082, "\u0120Leonardo": 38083, "\u0120expressive": 38084, "\u0120distractions": 38085, "\u0120silenced": 38086, "trust": 38087, "\u0120biking": 38088, "\u0120560": 38089, "\u0120propriet": 38090, "\u0120imposition": 38091, "\u0120conglomer": 38092, "\u0120=================================================================": 38093, "\u0120Teaching": 38094, "\u0120Yose": 38095, "intensive": 38096, "Town": 38097, "\u0120trolling": 38098, "\u0120Grac": 38099, "\u0120ASUS": 38100, "Yo": 38101, "\u0120specials": 38102, "\u0120Neph": 38103, "\u0120Godzilla": 38104, "Database": 38105, "\u0120Hegel": 38106, "\u0120272": 38107, "1976": 38108, "\u0120Gloria": 38109, "\u0120disemb": 38110, "\u0120Investigations": 38111, "\u0120Bane": 38112, "agements": 38113, "Strange": 38114, "\u0120treasury": 38115, "\u0120Plays": 38116, "\u0120undesirable": 38117, "\u0120widening": 38118, "\u0120verbally": 38119, "\u0120infancy": 38120, "\u0120cutter": 38121, "fml": 38122, "\u01202100": 38123, "prototype": 38124, "fine": 38125, "\u0120decriminal": 38126, "\u0120dysfunctional": 38127, "\u0120besie": 38128, "\u0120Ernst": 38129, "zeb": 38130, "\u0120northeastern": 38131, "\u0120aust": 38132, "porate": 38133, "\u0120Marlins": 38134, "\u0120segregated": 38135, "eworld": 38136, "\u0120Maher": 38137, "\u0120traverse": 38138, "\u0120monastery": 38139, "urgy": 38140, "Gear": 38141, "sand": 38142, "Compl": 38143, "\u0120EMP": 38144, "\u0120plent": 38145, "\u0120Mercer": 38146, "\u0120276": 38147, "TABLE": 38148, "Configuration": 38149, "Hundreds": 38150, "\u0120pric": 38151, "\u0120collaborating": 38152, "\u0120Paramount": 38153, "\u0120Cummings": 38154, "\u0120(<": 38155, "\u0120recorder": 38156, "\u0120flats": 38157, "\u0120416": 38158, "whose": 38159, "FontSize": 38160, "\u0120Orbit": 38161, "YR": 38162, "\u0120wrists": 38163, "\u0120bakery": 38164, ")}": 38165, "\u0120Bounty": 38166, "\u0120Lancaster": 38167, "\u0120endings": 38168, "according": 38169, "\u0120Salam": 38170, "easy": 38171, "755": 38172, "\u0120Burr": 38173, "\u0120Barnett": 38174, "onomous": 38175, "Union": 38176, "\u0120precedence": 38177, "\u0120Scholarship": 38178, "\u0120UX": 38179, "\u0120rollout": 38180, "\u0120boon": 38181, "alm": 38182, "\u0120Canter": 38183, "\u00e6\u00b5": 38184, "\u0120rounding": 38185, "\u0120clad": 38186, "\u0120vap": 38187, "\u0120Featured": 38188, "isations": 38189, "\u0120540": 38190, "police": 38191, "\u0120unsettling": 38192, "\u0120drifting": 38193, "\u0120Lumia": 38194, "\u0120ObamaCare": 38195, "\u0120Favor": 38196, "Hyper": 38197, "\u0120Rothschild": 38198, "\u0120Miliband": 38199, "analy": 38200, "\u0120Juliet": 38201, "Hu": 38202, "\u0120recalling": 38203, "ahead": 38204, "696": 38205, "\u0120unfavorable": 38206, "\u0120dances": 38207, "Ox": 38208, "\u0120legality": 38209, "\u0120403": 38210, "romancer": 38211, "\u0120inquire": 38212, "\u0120Moves": 38213, "\\\">": 38214, "\u0120Variant": 38215, "\u0120Messiah": 38216, "\u0120LCS": 38217, "\u0120Bah\u00c3\u00a1": 38218, "756": 38219, "\u0120eyebrow": 38220, "\u0120\u00c2\u00a5": 38221, "\u0120McF": 38222, "\u0120Forty": 38223, "Mas": 38224, "\u0120panicked": 38225, "\u0120transformations": 38226, "qq": 38227, "\u0120revolves": 38228, "ringe": 38229, "\u0120Ai": 38230, "axe": 38231, "\u0120onward": 38232, "\u0120CFR": 38233, "\u0120Bare": 38234, "login": 38235, "\u0120liquids": 38236, "\u0120decomp": 38237, "secondary": 38238, "ilan": 38239, "\u0120Convert": 38240, "amiya": 38241, "\u0120prosecuting": 38242, "\u0120\u00e2\u012b\u00a1": 38243, "\u0120Yorkers": 38244, "\u0120Byrne": 38245, "slow": 38246, "awei": 38247, "Jean": 38248, "\u0120269": 38249, "\u0120Skydragon": 38250, "\u0120\u00c3\u00a9": 38251, "\u0120Nicaragua": 38252, "\u0120Huckabee": 38253, "\u0120Highly": 38254, "\u0120amphib": 38255, "\u0120Pastor": 38256, "\u0120Lets": 38257, "\u0120blurred": 38258, "\u0120visceral": 38259, "\u0120CBO": 38260, "\u0120collaborated": 38261, "zig": 38262, "Legal": 38263, "\u0120apartheid": 38264, "\u0120brid": 38265, "\u0120preset": 38266, "\u0120DET": 38267, "\u0120AMA": 38268, "\u00d7\u0136": 38269, "arching": 38270, "aucuses": 38271, "builder": 38272, "\u0120poetic": 38273, "\u0120emulator": 38274, "\u0120Molecular": 38275, "\u0120honoring": 38276, "iseum": 38277, "\u0120tractor": 38278, "\u0120Cluster": 38279, "\u0120Calm": 38280, "aredevil": 38281, "\u0120sidewalks": 38282, "\u0120violin": 38283, "\u0120generalized": 38284, "\u0120Alec": 38285, "\u0120embargo": 38286, "\u0120fastball": 38287, "\u0120HTTPS": 38288, "\u0120Lack": 38289, "\u0120Chill": 38290, "river": 38291, "Chel": 38292, "\u0120Swarm": 38293, "\u0120Levine": 38294, "roying": 38295, "Launch": 38296, "\u0120kicker": 38297, "\u0120additive": 38298, "\u0120Deals": 38299, "Widget": 38300, "containing": 38301, "\u0120escalate": 38302, "\u0120OPEN": 38303, "\u0120tweaked": 38304, "\u0120stash": 38305, "\u0120sparks": 38306, "\u0120Essex": 38307, "\u0120Ecc": 38308, "\u0120convict": 38309, "\u0120blogging": 38310, "IER": 38311, "\u0120HL": 38312, "\u0120murderers": 38313, "759": 38314, "\u0120Hib": 38315, "\u0120depl": 38316, "\u0120Jord": 38317, "Sac": 38318, "\u0120dissect": 38319, "\u0120Howe": 38320, "osher": 38321, "\u0120customizable": 38322, "\u0120Franz": 38323, "\u0120atro": 38324, "\u00c4\u0129": 38325, "\u01200004": 38326, "\u0120outpost": 38327, "Ross": 38328, "\u0120glyphosate": 38329, "\u0120Hastings": 38330, "\u0120BEFORE": 38331, "\u0120shove": 38332, "opped": 38333, "\u0120Scala": 38334, "\u0120amulet": 38335, "anian": 38336, "\u0120exacerbated": 38337, "\u0120eater": 38338, "471": 38339, "UME": 38340, "\u0120pulp": 38341, "izontal": 38342, "\u0120Zam": 38343, "\u0120ATI": 38344, "immune": 38345, "abytes": 38346, "\u0120unnecessarily": 38347, "\u0120CAT": 38348, "\u0120Axis": 38349, "\u0120visualize": 38350, "\u00c3\u012b": 38351, "\u0120Radical": 38352, "fm": 38353, "Documents": 38354, "\u0120Forrest": 38355, "\u0120contextual": 38356, "\u0120Symbol": 38357, "\u0120tentative": 38358, "\u0120DOES": 38359, "\u0120Goods": 38360, "\u0120intermittent": 38361, "}:": 38362, "mediated": 38363, "\u0120ridicule": 38364, "\u0120atheism": 38365, "\u0120pathogens": 38366, "\u0120Mum": 38367, "\u0120reintrodu": 38368, "\u0120307": 38369, "iHUD": 38370, "\u0120flashlight": 38371, "\u0120swearing": 38372, "\u0120pengu": 38373, "Bu": 38374, "\u0120rotated": 38375, "\u0120Crane": 38376, "\u0120());": 38377, "\u0120fashionable": 38378, "\u0120endorsing": 38379, "463": 38380, ")[": 38381, "\u0120ingestion": 38382, "\u0120cooks": 38383, "\u0120950": 38384, "otomy": 38385, "\u0120Imam": 38386, "\u0120ka": 38387, "\u0120teaser": 38388, "\u0120Ghosts": 38389, "\u0120\u00e3\u0124\u00b5": 38390, "1969": 38391, "\u00cf\u0125": 38392, "ubby": 38393, "\u0120converter": 38394, "zanne": 38395, "ende": 38396, "\u0120Prepar": 38397, "\u0120Nickel": 38398, "\u0120Chimera": 38399, "him": 38400, "\u0120Tyrann": 38401, "\u0120Sabbath": 38402, "\u0120Nichols": 38403, "\u0120rapt": 38404, "ihar": 38405, "\u0120shelling": 38406, "\u0120illuminate": 38407, "\u0120dentist": 38408, "utor": 38409, "\u0120Integration": 38410, "\u0120whims": 38411, "\u0120Literary": 38412, "Beaut": 38413, "\u0120parchment": 38414, "agara": 38415, "Brand": 38416, "\u0120derog": 38417, "\u00e2\u0122\u00a6)": 38418, "\u0120Norse": 38419, "\u0120unwitting": 38420, "\u0120cuc": 38421, "\u0120borderline": 38422, "\u0120upsetting": 38423, "\u0120recourse": 38424, "\u0120draped": 38425, "\u0120Radar": 38426, "\u0120colder": 38427, "\u0120Pepsi": 38428, "iminary": 38429, "],[": 38430, "658": 38431, "Vi": 38432, "\u0120Frem": 38433, "\u0120Pes": 38434, "\u0120veterinary": 38435, "\u0120TED": 38436, "\u0120Epidem": 38437, "nova": 38438, "kid": 38439, "\u0120devout": 38440, "oct": 38441, "jad": 38442, "Moh": 38443, "\u0120PAY": 38444, "\u0120geometric": 38445, "\u0120323": 38446, "\u0120circumference": 38447, "ichick": 38448, "1975": 38449, "\u0120Yuri": 38450, "\u0120Shall": 38451, "\u0120Hover": 38452, "unin": 38453, "Spr": 38454, "\u0120graft": 38455, "\u0120Happiness": 38456, "\u0120disadvantages": 38457, "attacks": 38458, "\u0120hubs": 38459, "\u0120StarCraft": 38460, "\u00e9\u0138": 38461, "\u0120galleries": 38462, "\u0120Korra": 38463, "\u0120groceries": 38464, "\u0120Gorsuch": 38465, "\u0120rapists": 38466, "\u0120fungi": 38467, "\u0120Typhoon": 38468, "Vector": 38469, "\u0120Empress": 38470, "battle": 38471, "468": 38472, "\u0120parasite": 38473, "\u0120Bomber": 38474, "SG": 38475, "exist": 38476, "\u0120Pf": 38477, "\u0120unse": 38478, "\u0120surgeons": 38479, "Birth": 38480, "\u0120Unsure": 38481, "\u0120Printed": 38482, "\u0120Behavioral": 38483, "\u0120Aster": 38484, "Pakistan": 38485, "\u0120unethical": 38486, "\u0120sv": 38487, "\u0120IoT": 38488, "\u0120layouts": 38489, "Pain": 38490, "\u0120constants": 38491, "\u0120LW": 38492, "\u0120Bake": 38493, "\u0120towels": 38494, "\u0120deterioration": 38495, "\u0120Bolivia": 38496, "\u0120blinded": 38497, "\u0120Warden": 38498, "\u0120Mistress": 38499, "\u0120onstage": 38500, "\u0120clans": 38501, "\u0120BEST": 38502, "1960": 38503, "\u0120antique": 38504, "\u0120rhetorical": 38505, "\u0120Percy": 38506, "\u0120Rwanda": 38507, ",.": 38508, "Bruce": 38509, "\u0120traumat": 38510, "\u0120Parliamentary": 38511, "\u0120footnote": 38512, "idia": 38513, "\u0120Learned": 38514, "seeking": 38515, "genic": 38516, "\u0120dimensional": 38517, "Hide": 38518, "\u00e8\u0122\u0127": 38519, "\u0120intrigue": 38520, "inse": 38521, "\u0120leases": 38522, "\u0120apprentices": 38523, "washing": 38524, "\u01201926": 38525, "VILLE": 38526, "\u0120swoop": 38527, "scl": 38528, "\u0120bedrooms": 38529, "onics": 38530, "\u0120Crunch": 38531, "compatible": 38532, "\u0120incapac": 38533, "\u0120Yemeni": 38534, "ashtra": 38535, "zhou": 38536, "danger": 38537, "\u0120manifestations": 38538, "\u0120Demons": 38539, "AAF": 38540, "Secretary": 38541, "ACTED": 38542, "LOD": 38543, "\u0120amy": 38544, "raper": 38545, "ethnic": 38546, "417": 38547, "\u0120positives": 38548, "\u0120273": 38549, "\u0120Refugees": 38550, "\u0120usb": 38551, "\u0120Vald": 38552, "oddy": 38553, "\u0120Mahmoud": 38554, "Asia": 38555, "\u0120skulls": 38556, "\u0120Exodus": 38557, "\u0120Compet": 38558, "\u0120LIC": 38559, "\u0120Mansion": 38560, "\u0120Ame": 38561, "\u0120consolidate": 38562, "storms": 38563, "ontent": 38564, "996": 38565, "\u0120clen": 38566, "\u0120mummy": 38567, "flat": 38568, "758": 38569, "\u0120VOL": 38570, "oteric": 38571, "nen": 38572, "\u0120Minute": 38573, "Sov": 38574, "\u0120finer": 38575, "Rh": 38576, "lycer": 38577, "\u0120reinforcements": 38578, "\u0120Johannes": 38579, "\u0120Gallagher": 38580, "\u0120gymn": 38581, "Suddenly": 38582, "\u0120extortion": 38583, "kr": 38584, "iator": 38585, "Ta": 38586, "\u0120hippocampus": 38587, "NPR": 38588, "\u0120Computing": 38589, "\u0120squarely": 38590, "\u0120modelling": 38591, "\u0120Forums": 38592, "\u0120Lisp": 38593, "\u0120Krishna": 38594, "\u0120324": 38595, "\u0120rushes": 38596, "\u0120ensued": 38597, "\u0120creeping": 38598, "onte": 38599, "nai": 38600, "ilater": 38601, "\u0120Hornets": 38602, "\u0120oblivious": 38603, "INST": 38604, "559": 38605, "\u0120jeopardy": 38606, "\u0120distinguishing": 38607, "jured": 38608, "\u0120begs": 38609, "similar": 38610, "phot": 38611, "530": 38612, "\u0120Parkway": 38613, "\u0120sinks": 38614, "\u0120Hearthstone": 38615, "ibur": 38616, "\u0120Baton": 38617, "Avoid": 38618, "\u0120dancer": 38619, "\u0120magistrate": 38620, "aryn": 38621, "\u0120disturbances": 38622, "\u0120Romero": 38623, "\u0120paraph": 38624, "\u0120mischief": 38625, "\u00e2\u0138\u0135": 38626, "\u0120Sharia": 38627, "\u0120urinary": 38628, "route": 38629, "ivas": 38630, "fitted": 38631, "\u0120ejected": 38632, "\u0120Albuquerque": 38633, "\u0120470": 38634, "\u0120irritated": 38635, "\u0120Zip": 38636, "\u0120Biol": 38637, "\u00c3\u012f": 38638, "\u0120denounce": 38639, "\u0120binaries": 38640, "\u0120Verse": 38641, "\u0120oppos": 38642, "\u0120Kendrick": 38643, "\u0120GPL": 38644, "\u0120spew": 38645, "\u0120Elijah": 38646, "\u0120Eas": 38647, "\u0120drifted": 38648, "sofar": 38649, "\u0120annoyance": 38650, "\u0120BET": 38651, "474": 38652, "\u0120Strongh": 38653, "itates": 38654, "\u0120Cognitive": 38655, "ophone": 38656, "\u0120Identification": 38657, "ocrine": 38658, "connection": 38659, "\u0120boxer": 38660, "\u0120ASD": 38661, "\u0120Areas": 38662, "Yang": 38663, "tch": 38664, "ullah": 38665, "\u0120deceive": 38666, "Combat": 38667, "episode": 38668, "crete": 38669, "Witness": 38670, "\u0120condolences": 38671, "htar": 38672, "\u0120heals": 38673, "\u0120buckets": 38674, "\u0120LAW": 38675, "Blu": 38676, "\u0120slab": 38677, "\u0120ORDER": 38678, "ocl": 38679, "atton": 38680, "\u0120Stevenson": 38681, "\u0120Ginger": 38682, "\u0120Friendly": 38683, "\u0120Vanderbilt": 38684, "spirit": 38685, "igl": 38686, "\u0120Regarding": 38687, "\u0120PROG": 38688, "\u0120sealing": 38689, "starting": 38690, "\u0120cardinal": 38691, "\u0120Vec": 38692, "\u0120Beir": 38693, "\u0120milliseconds": 38694, "weak": 38695, "perse": 38696, "\u0120sterile": 38697, "\u0120Contemporary": 38698, "\u0120Phant": 38699, "\u0120Clo": 38700, "\u0120outp": 38701, "\u0120exiled": 38702, "\u0120277": 38703, "\u0120selfie": 38704, "\u0120manic": 38705, "\u0120nano": 38706, "terms": 38707, "Alexander": 38708, "\u0120resolves": 38709, "\u0120millennia": 38710, "\u0120explodes": 38711, "\u0120constellation": 38712, "\u0120adultery": 38713, "motion": 38714, "DOC": 38715, "\u0120broadcasters": 38716, "\u0120kindergarten": 38717, "\u0120Mayweather": 38718, "\u0120Eco": 38719, "icho": 38720, "\u0120287": 38721, "laun": 38722, "\u0120mute": 38723, "\u0120discreet": 38724, "\u0120preschool": 38725, "\u0120preempt": 38726, "Delete": 38727, "\u0120Freed": 38728, "Pi": 38729, "HK": 38730, "\u0120blocker": 38731, "\u0120Cumber": 38732, "\u0120wrought": 38733, "dating": 38734, "\u0120insurer": 38735, "\u0120quotas": 38736, "\u0120preached": 38737, "\u0120eviction": 38738, "\u0120Regina": 38739, "\u0120Pens": 38740, "\u0120seventeen": 38741, "\u0120Nass": 38742, "Dick": 38743, "\u0120folds": 38744, "\u0120dotted": 38745, "\u0120Aad": 38746, "Universal": 38747, "\u0120pizz": 38748, "\u0120Guru": 38749, "\u0120soils": 38750, "\u0120novice": 38751, "\u0120Neander": 38752, "\u0120stool": 38753, "\u0120detonated": 38754, "\u0120Pikachu": 38755, "\u0120Massive": 38756, "IVER": 38757, "\u0120Abdel": 38758, "\u0120subdued": 38759, "\u0120tallest": 38760, "\u0120precarious": 38761, "\u0120ay": 38762, "rification": 38763, "\u0120Obj": 38764, "cale": 38765, "\u0120unquestion": 38766, "culosis": 38767, "adas": 38768, "igrated": 38769, "Days": 38770, "\u0120queens": 38771, "\u0120Gazette": 38772, "\u0120Colour": 38773, "\u0120Bowman": 38774, "\u0120JJ": 38775, "\u00c3\u00afve": 38776, "\u0120dominates": 38777, "Student": 38778, "\u0120mu": 38779, "\u0120backlog": 38780, "\u0120Electro": 38781, "Truth": 38782, "483": 38783, "\u0120condensed": 38784, "rules": 38785, "\u0120Conspiracy": 38786, "\u0120acronym": 38787, "handled": 38788, "\u0120Matte": 38789, "jri": 38790, "\u0120Impossible": 38791, "lude": 38792, "creation": 38793, "\u0120warmed": 38794, "\u0120Slave": 38795, "\u0120misled": 38796, "\u0120ferment": 38797, "\u0120Kah": 38798, "inki": 38799, "keleton": 38800, "cyl": 38801, "\u0120Karin": 38802, "Hunter": 38803, "Register": 38804, "\u0120Surrey": 38805, "\u0120stares": 38806, "\u0120Width": 38807, "\u0120Nay": 38808, "\u0120Ski": 38809, "\u0120blacklist": 38810, "ucket": 38811, "\u0120expulsion": 38812, "imet": 38813, "\u0120retweet": 38814, "vantage": 38815, "Feature": 38816, "\u0120troopers": 38817, "\u0120homers": 38818, "969": 38819, "\u0120contingency": 38820, "\u0120WTC": 38821, "\u0120Brewer": 38822, "foreign": 38823, "Ware": 38824, "Solar": 38825, "\u0120undue": 38826, "REC": 38827, "ulnerable": 38828, "pathic": 38829, "\u0120Boise": 38830, "\u0120322": 38831, "\u0120aroused": 38832, "\u0120Ying": 38833, "\u00e4\u00b8\u012f": 38834, "ueless": 38835, "\u0120pas": 38836, "\u0120morp": 38837, "\u0120floral": 38838, "Express": 38839, "udging": 38840, "kB": 38841, "\u0120Granted": 38842, "\u00d8\u00af": 38843, "\u0120Micha": 38844, "\u0120Gothic": 38845, "\u0120SPECIAL": 38846, "\u0120Ricardo": 38847, "Fran": 38848, "\u0120administering": 38849, "620": 38850, "pora": 38851, "\u0120\u00c2\u00ae": 38852, "\u0120compromises": 38853, "\u0120bitten": 38854, "Accept": 38855, "Thirty": 38856, "\u00d0\u00b2": 38857, "\u0120materially": 38858, "\u0120Terr": 38859, "igmatic": 38860, "chains": 38861, "\u0120dove": 38862, "stadt": 38863, "Marvel": 38864, "FAULT": 38865, "\u0120windshield": 38866, "\u0120336": 38867, "adier": 38868, "\u0120swapping": 38869, "\u0120flawless": 38870, "\u0120Predator": 38871, "\u0120Michele": 38872, "\u0120propulsion": 38873, "\u0120Psychic": 38874, "\u0120assigning": 38875, "\u0120fabrication": 38876, "\u0120barley": 38877, "lust": 38878, "\u0120towering": 38879, "\u0120altercation": 38880, "\u0120Bentley": 38881, "Sphere": 38882, "\u0120tuna": 38883, "\u0120Classes": 38884, "Freedom": 38885, "uner": 38886, "Lady": 38887, "voice": 38888, "\u0120coolest": 38889, "orr": 38890, "\u0120palp": 38891, "${": 38892, "\u0120hysteria": 38893, "\u0120Metatron": 38894, "pants": 38895, "\u0120spawning": 38896, "Experts": 38897, "\u0120Investors": 38898, "\u0120Anarchy": 38899, "\u0120shrunk": 38900, "\u0120Victim": 38901, "\u0120289": 38902, "\u0120ecstasy": 38903, "\u0120Binding": 38904, "585": 38905, "\u0120Melody": 38906, "578": 38907, "otally": 38908, "\u0120Etsy": 38909, "liga": 38910, "\u0120applauded": 38911, "\u0120sweating": 38912, "\u0120redistributed": 38913, "\u0120popcorn": 38914, "\u0120seminal": 38915, "fur": 38916, "\u0120Neuroscience": 38917, "Rand": 38918, "\u0120Ost": 38919, "\u0120Madden": 38920, "\u0120Increasing": 38921, "\u0120Dawkins": 38922, "\u0120Subway": 38923, "\u0120arsen": 38924, "conserv": 38925, "BUR": 38926, "\u0120spiked": 38927, "\u0120Lyft": 38928, "\u0120Imperium": 38929, "\u0120Dropbox": 38930, "\u0120favoured": 38931, "\u0120encompasses": 38932, "ghost": 38933, "\u0120inspires": 38934, "\u0120burgeoning": 38935, "\u0120Yoshi": 38936, "\u0120Vertical": 38937, "\u0120Auditor": 38938, "\u0120intending": 38939, "\u0120filibuster": 38940, "Bloom": 38941, "fac": 38942, "\u0120Cavs": 38943, "igning": 38944, "\u0120coworkers": 38945, "\u0120Barbarian": 38946, "remember": 38947, "FLAG": 38948, "\u0120auditory": 38949, "asonry": 38950, "College": 38951, "\u0120muted": 38952, "gemony": 38953, "obin": 38954, "\u0120Psycho": 38955, "968": 38956, "\u0120lavish": 38957, "\u0120hierarchical": 38958, "\u0120Drone": 38959, "ouk": 38960, "\u0120crippled": 38961, "\u0120Maxim": 38962, "Slot": 38963, "\u0120quiz": 38964, "\u0120Vid": 38965, "ifling": 38966, "\u0120archaeologists": 38967, "\u0120abandonment": 38968, "dial": 38969, "leon": 38970, "\u0120Fas": 38971, "Ted": 38972, "\u0120raspberry": 38973, "\u0120maneuvers": 38974, "\u0120behaviours": 38975, "\u0120insure": 38976, "\u0120remod": 38977, "Switch": 38978, "hoe": 38979, "\u0120spaced": 38980, "\u0120affordability": 38981, "\u0120Fern": 38982, "notation": 38983, "\u0120Balanced": 38984, "\u0120occupies": 38985, "environment": 38986, "\u0120necklace": 38987, "\u0120sedan": 38988, "FU": 38989, "\u0120Bravo": 38990, "\u0120abusers": 38991, "\u0120Anita": 38992, "metadata": 38993, "\u0120Github": 38994, "aito": 38995, "\u0120Faster": 38996, "\u0120Wasserman": 38997, "\u0120Flesh": 38998, "\u0120thorn": 38999, "rarily": 39000, "\u0120Merry": 39001, "wine": 39002, "\u0120populace": 39003, "\u0120Lann": 39004, "\u0120repairing": 39005, "\u0120psyche": 39006, "\u0120modulation": 39007, "awaru": 39008, "\u00e2\u0122\u012d\u00e2\u0122\u012d": 39009, "arij": 39010, "\u0120decorations": 39011, "\u0120apologise": 39012, "\u0120Garg": 39013, "apply": 39014, "\u0120giveaway": 39015, "\u0120Flan": 39016, "\u0120Wyatt": 39017, "Uber": 39018, "\u0120authorised": 39019, "\u0120Moral": 39020, "HAHAHAHA": 39021, "activate": 39022, "\u0120torpedo": 39023, "\u0120FAR": 39024, "\u0120amassed": 39025, "\u0120Aram": 39026, "arkin": 39027, "\u0120Victims": 39028, "stab": 39029, "\u0120om": 39030, "\u0120ECO": 39031, "\u0120opioids": 39032, "\u0120purposely": 39033, "\u0120Vest": 39034, "\u0120erg": 39035, "atan": 39036, "\u0120Surgery": 39037, "\u0120correcting": 39038, "\u0120Ortiz": 39039, "\u0120Beet": 39040, "\u0120revoke": 39041, "\u0120freeway": 39042, "\u0120Higgins": 39043, "Fail": 39044, "\u0120Farms": 39045, "\u0120ATP": 39046, "hound": 39047, "\u0120poking": 39048, "\u0120Communists": 39049, "monster": 39050, "imentary": 39051, "\u0120unlocking": 39052, "\u0120unfit": 39053, "weed": 39054, "enario": 39055, "atical": 39056, "\u0120Enlightenment": 39057, "\u0120NG": 39058, "\u0120Compensation": 39059, "deen": 39060, "\u0120Widow": 39061, "\u0120Cindy": 39062, "\u0120Afterwards": 39063, "\u01206000": 39064, "ikhail": 39065, "agically": 39066, "\u0120ratified": 39067, "\u0120casualty": 39068, "HOME": 39069, "psey": 39070, "fee": 39071, "\u0120sparkling": 39072, "\u0120d\u00c3\u00a9": 39073, "\u0120concerted": 39074, "Catal": 39075, "\u0120complying": 39076, "\u0120Ares": 39077, "\u0120Dent": 39078, "Shut": 39079, "\u0120skim": 39080, "administ": 39081, "\u0120hostilities": 39082, "\u0120Gins": 39083, "\u0120608": 39084, "\u0120muddy": 39085, "\u0120McInt": 39086, "\u0120Decay": 39087, "525": 39088, "\u0120conspicuous": 39089, "\u0120Exposure": 39090, "\u0120rescind": 39091, "\u0120wearable": 39092, "\u0120328": 39093, "ourmet": 39094, "ahs": 39095, "\u0120Robots": 39096, "\u0120eclips": 39097, "instance": 39098, "\u0120REPORT": 39099, "\u0120Appl": 39100, "030": 39101, "\u0120Skies": 39102, "0100": 39103, "\u0120fallacy": 39104, "Socket": 39105, "\u0120Receiver": 39106, "\u0120solves": 39107, "\u0120Butterfly": 39108, "\u0120Shopping": 39109, "\u0120FIRE": 39110, "654": 39111, "Medic": 39112, "\u0120singers": 39113, "\u0120Needless": 39114, "''''": 39115, "ishers": 39116, "\u0120Dive": 39117, "588": 39118, "\u0120selectively": 39119, "\u0120clumsy": 39120, "889": 39121, "\u0120purchaser": 39122, "earned": 39123, "ardy": 39124, "\u0120benefiting": 39125, "english": 39126, "\u0120yielding": 39127, "\u0120Pour": 39128, "\u0120spinach": 39129, "\u0120delve": 39130, "\u0120Crom": 39131, "610": 39132, "\u0120exporting": 39133, "\u0120MAKE": 39134, "\u0120263": 39135, "\u0120grop": 39136, "\u0120envoy": 39137, "\u0120Inquiry": 39138, "\u0120Luigi": 39139, "dry": 39140, "\u0120Turing": 39141, "ThumbnailImage": 39142, "\u0120Variety": 39143, "\u0120facet": 39144, "\u0120fluffy": 39145, "\u0120excerpts": 39146, "\u0120shorth": 39147, "\u0120Olsen": 39148, "CLUD": 39149, "\u0120reliant": 39150, "\u0120UNC": 39151, "Tour": 39152, "\u0120bathing": 39153, "Company": 39154, "\u0120globalization": 39155, "Pred": 39156, "\u0120Malfoy": 39157, "\u0120hoc": 39158, "jam": 39159, "crafted": 39160, "\u0120Bonds": 39161, "\u0120Kissinger": 39162, "England": 39163, "\u0120orderly": 39164, "catentry": 39165, "\u0120261": 39166, "\u0120exchanging": 39167, "\u0120Intent": 39168, "\u0120Amendments": 39169, "DOM": 39170, "\u0120stout": 39171, "\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142\u00c2\u0142": 39172, "\u0120Airbus": 39173, "\u0120278": 39174, "hyde": 39175, "Poll": 39176, "ItemThumbnailImage": 39177, "\u0120loopholes": 39178, "\u0120Pillar": 39179, "\u0120explor": 39180, "Stretch": 39181, "Apart": 39182, "\u0120unmarried": 39183, "Limit": 39184, "\u0120Transformers": 39185, "\u0120intellectually": 39186, "uncture": 39187, "1800": 39188, "\u0120darn": 39189, "Brazil": 39190, "\u0120leftover": 39191, "berus": 39192, "fred": 39193, "Minecraft": 39194, "326": 39195, "\u0120Forms": 39196, "\u0120proofs": 39197, "\u0120Designed": 39198, "\u0120indexes": 39199, "\u0120Suppose": 39200, "EMS": 39201, "\u0120Loving": 39202, "\u0120Bonnie": 39203, "imating": 39204, "OTUS": 39205, "\u0120conductor": 39206, "\u0120behaved": 39207, "\u0120Fren": 39208, "\u0120synerg": 39209, "\u0120millennium": 39210, "\u0120catering": 39211, "\u0120Lauder": 39212, "Wr": 39213, "\u0120Yiannopoulos": 39214, "\u0120ATF": 39215, "\u0120enslaved": 39216, "\u0120awakened": 39217, "DVD": 39218, "\u0120EDITION": 39219, "\u0120Concert": 39220, "\u0120Challenger": 39221, "\u0120Haku": 39222, "umeric": 39223, "\u0120deprecated": 39224, "\u0120SHAR": 39225, "412": 39226, "\u0120dystop": 39227, "\u0120trembling": 39228, "\u0120dreaded": 39229, "\u0120Spac": 39230, "padding": 39231, "Repl": 39232, "\u0120Garrison": 39233, "Mini": 39234, "\u0120unparalleled": 39235, "amar": 39236, "URRENT": 39237, "wreck": 39238, "certain": 39239, "tal": 39240, "\u0120CLS": 39241, "appings": 39242, "\u0120sensed": 39243, "\u0120fencing": 39244, "\u0120Paso": 39245, "\u0120Desk": 39246, "\u0120scoff": 39247, "\u0120contemplate": 39248, "\u0120Liga": 39249, "liquid": 39250, "757": 39251, "\u0120apprentice": 39252, "\u0120UCHIJ": 39253, "570": 39254, "\u0120Thousand": 39255, "\u0120Illum": 39256, "\u0120championed": 39257, "\u00e3\u0124\u012e": 39258, "\u0120electors": 39259, "\u0120398": 39260, "\u0120Hancock": 39261, "rounded": 39262, "\u0120JOHN": 39263, "\u0120unsatisf": 39264, "\u0120qualifier": 39265, "\u0120Gadget": 39266, "ENE": 39267, "\u0120deadliest": 39268, "\u0120Plants": 39269, "\u0120ions": 39270, "\u0120accents": 39271, "\u0120tweaking": 39272, "\u0120shaved": 39273, "FREE": 39274, "\u0120Chaser": 39275, "Against": 39276, "960": 39277, "\u0120methamphetamine": 39278, "\u0120normalized": 39279, "\u0120$\\": 39280, "\u0120Precision": 39281, "\u0120Guam": 39282, "\u0120choked": 39283, "\u0120XII": 39284, "\u0120Casting": 39285, "Torrent": 39286, "\u0120scalp": 39287, "\u0120Jaguar": 39288, "wit": 39289, "\u0120semic": 39290, "ixie": 39291, "\u0120Gould": 39292, "\u0120confines": 39293, "Nusra": 39294, "\u0120Lon": 39295, "\u0120Jugg": 39296, "ycle": 39297, "\u0120Codec": 39298, "Egypt": 39299, "\u0120restrain": 39300, "\u0120Aliens": 39301, "\u0120choking": 39302, "\u0120Dunk": 39303, "\u0120Bella": 39304, "abc": 39305, "\u0120slang": 39306, "\u0120neurotrans": 39307, "sav": 39308, "\u0120empowerment": 39309, "\u00e2\u0128\u0134": 39310, "\u0120climbers": 39311, "\u0120Mim": 39312, "\u0120Fra": 39313, "rosse": 39314, "Capital": 39315, "\u0120Cthulhu": 39316, "Interface": 39317, "\u0120proficient": 39318, "\u0120INTO": 39319, "\u0120318": 39320, "rontal": 39321, "580": 39322, "\u0120Despair": 39323, "Kenn": 39324, "\u0120scrimmage": 39325, "\u0120Coat": 39326, "asions": 39327, "\u0120wallpaper": 39328, "\u0120Jol": 39329, "\u0120resurgence": 39330, "\u0120antiv": 39331, "\u0120Balls": 39332, "\u00b2\u00be": 39333, "\u0120buffers": 39334, "\u0120subsystem": 39335, "\u0120Stellar": 39336, "\u0120Lung": 39337, "AIDS": 39338, "\u0120eradicate": 39339, "\u0120blatantly": 39340, "\u0120behaves": 39341, "\u0120Nun": 39342, "\u0120antics": 39343, "export": 39344, "DEV": 39345, "wb": 39346, "\u0120php": 39347, "\u0120Integrity": 39348, "\u0120explorer": 39349, "\u0120revolving": 39350, "authored": 39351, "gans": 39352, "\u0120bask": 39353, "\u0120asynchronous": 39354, "\u00e5\u012f": 39355, "THING": 39356, "698": 39357, "Gene": 39358, "\u0120Racer": 39359, "\u0120Nico": 39360, "issued": 39361, "\u0120sermon": 39362, "possibly": 39363, "\u0120sizeof": 39364, "\u0120entrepreneurial": 39365, "oxin": 39366, "\u0120Minerva": 39367, "\u0120platoon": 39368, "nos": 39369, "riks": 39370, "AUT": 39371, "\u0120Avalanche": 39372, "\u0120Desc": 39373, "\u0133\u00e5\u00a3\u00ab": 39374, "\u0120Poc": 39375, "\u0120conferred": 39376, "\u00ce\u00bb": 39377, "\u0120patched": 39378, "FBI": 39379, "662": 39380, "\u0120fractures": 39381, "\u0120detects": 39382, "\u0120dedicate": 39383, "\u0120constituent": 39384, "\u0120cosmos": 39385, "WT": 39386, "\u0120sweats": 39387, "\u0120sprung": 39388, "bara": 39389, "solid": 39390, "\u0120unsus": 39391, "\u0120bulky": 39392, "\u0120Philippe": 39393, "\u0120Fenrir": 39394, "\u0120therapists": 39395, "oreal": 39396, "^^^^": 39397, "\u0120totaled": 39398, "\u0120booze": 39399, "\u0120RPC": 39400, "Prosecutors": 39401, "\u0120diseng": 39402, "\u0120Shared": 39403, "\u0120motorcycles": 39404, "\u0120inventions": 39405, "\u0120lettuce": 39406, "\u0120Merge": 39407, "\u0120JC": 39408, "\u0120spirituality": 39409, "\u0120WARNING": 39410, "\u0120unlucky": 39411, "\u0120Tess": 39412, "\u0120tongues": 39413, "\u0120DUI": 39414, "Tumblr": 39415, "\u0120leans": 39416, "\u0120invaders": 39417, "\u0120canopy": 39418, "\u0120Hurricanes": 39419, "\u0120Bret": 39420, "\u0120APPLIC": 39421, "idine": 39422, "ickle": 39423, "Regarding": 39424, "\u0120veggies": 39425, "\u0120ejac": 39426, "juven": 39427, "Fish": 39428, "DEM": 39429, "\u0120Dino": 39430, "Throw": 39431, "\u0120Checking": 39432, "beard": 39433, "(&": 39434, "\u0120jails": 39435, "\u0120hr": 39436, "transfer": 39437, "ivating": 39438, "\u0120fleets": 39439, "\u0120Imag": 39440, "\u0120McDonnell": 39441, "\u0120snippet": 39442, "Isa": 39443, "\u0120Chatt": 39444, "\u0120Stain": 39445, "\u0120SetFontSize": 39446, "\u0120Oy": 39447, "\u0120Mathematics": 39448, "494": 39449, "\u0120electroly": 39450, "\u0120Gott": 39451, "\u0120Bras": 39452, "BOOK": 39453, "\u0120Finger": 39454, "dump": 39455, "\u0120mutants": 39456, "\u0120rentals": 39457, "\u0120intertw": 39458, "\u0120creek": 39459, "aila": 39460, "Brother": 39461, "\u0120Discord": 39462, "pee": 39463, "rawler": 39464, "\u0120carp": 39465, "\u0120279": 39466, "\u00e3\u0124\u00b7\u00e3\u0125\u00a3": 39467, "relations": 39468, "\u0120contrasts": 39469, "Column": 39470, "\u0120reconnaissance": 39471, "\u0120unknow": 39472, "\u0120looting": 39473, "\u0120regulates": 39474, "\u0120optimum": 39475, "\u0120Cherokee": 39476, "\u0120Ary": 39477, "Latest": 39478, "\u0120roadside": 39479, "\u0120danced": 39480, "\u0120Unicorn": 39481, "Acknowled": 39482, "\u0120uncontroll": 39483, "\u0120MUS": 39484, "atio": 39485, "chance": 39486, "haven": 39487, "VALUE": 39488, "\u0120favourites": 39489, "\u0120ceremonial": 39490, "binary": 39491, "peed": 39492, "woods": 39493, "EMP": 39494, "\u0120vascular": 39495, "\u0120contemplated": 39496, "\u0120barren": 39497, "\u0120LIST": 39498, "Yellow": 39499, "osponsors": 39500, "\u0120whisky": 39501, "\u0120Mamm": 39502, "\u0120DeVos": 39503, "minimum": 39504, "Hung": 39505, "442": 39506, "Pic": 39507, "\u0120Snapdragon": 39508, "776": 39509, "\u0120carving": 39510, "\u0120undecided": 39511, "\u0120advantageous": 39512, "\u0120palms": 39513, "\u0120AQ": 39514, "\u0120starch": 39515, "Loop": 39516, "\u0120paddle": 39517, "\u0120flaming": 39518, "\u0120Horizons": 39519, "Animation": 39520, "boost": 39521, "\u0120probabilities": 39522, "\u0120Mish": 39523, "\u0120exodus": 39524, "\u0120Editorial": 39525, "\u0120fungus": 39526, "\u0120dissenting": 39527, "\u0120Delicious": 39528, "rogram": 39529, "\u0120Dyn": 39530, "disk": 39531, "tom": 39532, "\u0120fabrics": 39533, "\u0120Cove": 39534, "\u0120Bans": 39535, "\u0120soften": 39536, "\u0120CONS": 39537, "\u0120ineligible": 39538, "\u0120estimating": 39539, "\u0120Lexington": 39540, "practice": 39541, "ofi": 39542, "\u0120shedding": 39543, "\u0120Nope": 39544, "\u0120breathed": 39545, "\u0120Corinthians": 39546, "yne": 39547, "eki": 39548, "Bull": 39549, "\u0120attaching": 39550, "reenshots": 39551, "\u0120analyse": 39552, "\u0120Kappa": 39553, "\u0120unsustainable": 39554, "\u0120interpol": 39555, "anky": 39556, "hemer": 39557, "\u0120protagonists": 39558, "\u0120formatted": 39559, "\u0120Bryce": 39560, "\u0120Achilles": 39561, "\u0120Abedin": 39562, "shock": 39563, "\u0120bum": 39564, "bos": 39565, "qua": 39566, "\u0120Warn": 39567, "qt": 39568, "\u0120Diabetes": 39569, "864": 39570, "\u0120Invisible": 39571, "\u0120vanish": 39572, "\u0120transmitting": 39573, "\u0120murky": 39574, "\u0120Fei": 39575, "\u0120awaited": 39576, "\u0120Jurassic": 39577, "ummies": 39578, "\u0120menacing": 39579, "gall": 39580, "Cath": 39581, "Built": 39582, "ildo": 39583, "\u0120Votes": 39584, "\u0120ont": 39585, "\u0120munitions": 39586, "\u0120Freem": 39587, "\u00c3\u0143n": 39588, "\u0120decency": 39589, "lopp": 39590, "ieved": 39591, "\u0120Gord": 39592, "\u0120unthinkable": 39593, "\u0120Newsweek": 39594, "\u0120321": 39595, "Heat": 39596, "\u0120presenter": 39597, "jiang": 39598, "\u0120plank": 39599, "\u0120Avalon": 39600, "\u0120benz": 39601, "\u0120Rout": 39602, "\u0120slamming": 39603, "\u0120Dai": 39604, "outer": 39605, "\u0120Cookie": 39606, "\u0120Alicia": 39607, "gey": 39608, "\u0120vanity": 39609, "\u0120owl": 39610, "\u00e1\u00b5": 39611, "tested": 39612, "\u0120Awakens": 39613, "\u0120canv": 39614, "\u0120blindly": 39615, "\u0120Ridley": 39616, "\u0120Emails": 39617, "Requires": 39618, "\u0120Serbian": 39619, "ographed": 39620, "iframe": 39621, "eteria": 39622, "\u0120alternating": 39623, "quiet": 39624, "\u0120sociology": 39625, "\u0120Unlock": 39626, "\u0120Communism": 39627, "\u0120ops": 39628, "\u0120attribution": 39629, "\u0120abduction": 39630, "\u0120Abram": 39631, "\u0120sidelined": 39632, "\u0120BOOK": 39633, "\u0120refining": 39634, "\u0120Feeling": 39635, "\u0120Oslo": 39636, "\u0120Pruitt": 39637, "rack": 39638, "angible": 39639, "\u0120cautiously": 39640, "\u0120MARK": 39641, "eeds": 39642, "Mouse": 39643, "\u0120Steph": 39644, "\u0120Pair": 39645, "Sab": 39646, "997": 39647, "\u0120Baal": 39648, "Bec": 39649, "\u0120comma": 39650, "\u0120Pall": 39651, "\u0120Gael": 39652, "\u0120misunderstand": 39653, "\u0120Pesh": 39654, "Orderable": 39655, "\u0120dismal": 39656, "\u0120Shiny": 39657, "%\"": 39658, "\u0120realistically": 39659, "\u0120patio": 39660, "\u0120Gw": 39661, "\u0120Virtue": 39662, "\u0120exhausting": 39663, "whatever": 39664, "ophys": 39665, "yip": 39666, "418": 39667, "Adjust": 39668, "\u0120Waiting": 39669, "esson": 39670, "\u0120Mazda": 39671, "\u0120Dozens": 39672, "\u0120streamlined": 39673, "\u0120incompetence": 39674, "\u0120Meth": 39675, "\u0120ethos": 39676, "ONES": 39677, "\u0120incentiv": 39678, "\u0120gritty": 39679, "\u0120Butcher": 39680, "Header": 39681, "\u0120exponential": 39682, "\u00c3\u0141": 39683, "\u0120correlate": 39684, "\u0120consensual": 39685, "sounding": 39686, "Ring": 39687, "Origin": 39688, "\u0120conclusive": 39689, "feet": 39690, "acly": 39691, "\u0120Fernandez": 39692, "Buyable": 39693, "\u0120ducks": 39694, "auntlets": 39695, "\u0120elong": 39696, "\u0120286": 39697, "\u0120simul": 39698, "Gas": 39699, "\u0120Kirst": 39700, "\u0120protr": 39701, "\u0120Robo": 39702, "\u0120AoE": 39703, "opol": 39704, "\u0120psychologically": 39705, "spin": 39706, "ilaterally": 39707, "\u0120Conrad": 39708, "Wave": 39709, "441": 39710, "\u0120Advertisement": 39711, "\u0120Harmon": 39712, "\u0120Oriental": 39713, "isSpecial": 39714, "\u0120presumptive": 39715, "\u0120wil": 39716, "\u0120Kier": 39717, "nea": 39718, "\u0120ppm": 39719, "\u0120harbour": 39720, "\u0120Wired": 39721, "company": 39722, "\u0120coroner": 39723, "aturdays": 39724, "\u0120Proud": 39725, "\u0120NEXT": 39726, "\u0120Flake": 39727, "valued": 39728, "ceiver": 39729, "\u0120fraught": 39730, "\u0120casing": 39731, "\u0120runaway": 39732, "\u0120gin": 39733, "\u0120Laurent": 39734, "\u0120Harlem": 39735, "\u0120Curiosity": 39736, "quished": 39737, "\u0120neuroscience": 39738, "\u0120Hulu": 39739, "\u0120borrower": 39740, "\u0120petitioner": 39741, "\u0120Cooldown": 39742, "WARD": 39743, "\u0120invoking": 39744, "confidence": 39745, "Forward": 39746, "\u0120sts": 39747, "population": 39748, "DeliveryDate": 39749, "Film": 39750, "\u0120Cov": 39751, "quickShip": 39752, "quickShipAvailable": 39753, "primary": 39754, "isSpecialOrderable": 39755, "inventoryQuantity": 39756, "channelAvailability": 39757, "BOX": 39758, "\u0120Multiplayer": 39759, "\u0120Jenner": 39760, "778": 39761, "\u0120Md": 39762, "\u0120~/.": 39763, "MN": 39764, "\u0120childish": 39765, "\u0120antioxidant": 39766, "\u0120Chromebook": 39767, "\u0120274": 39768, "\u0120screenplay": 39769, "\u0120adventurous": 39770, "\u0120Relationship": 39771, "responsive": 39772, "mington": 39773, "\u0120cornerstone": 39774, "\u0120Fey": 39775, "FIR": 39776, "\u0120rookies": 39777, "\u0120Featuring": 39778, "\u0120originate": 39779, "\u0120electrodes": 39780, "antes": 39781, "\u0120scriptures": 39782, "\u0120glued": 39783, "\u0120discontent": 39784, "\u0120afflicted": 39785, "layout": 39786, "Brave": 39787, "\u0120mosa": 39788, "\u0120Quantity": 39789, "\u0120Hik": 39790, "winner": 39791, "Hours": 39792, "\u0120entail": 39793, "\u0120Cells": 39794, "ologue": 39795, "\u0120vil": 39796, "\u0120preacher": 39797, "\u0120decorative": 39798, "different": 39799, "\u0120prejudices": 39800, "\u0120Smoking": 39801, "\u0120Nottingham": 39802, "soType": 39803, "\u0120rhythms": 39804, "\u0120Alph": 39805, "blast": 39806, "Steel": 39807, "\u0120Danielle": 39808, "\u0120strife": 39809, "\u0120rematch": 39810, "soDeliveryDate": 39811, "\u0120Fork": 39812, "trip": 39813, "olulu": 39814, "heses": 39815, "CG": 39816, "\u0120POLITICO": 39817, "osta": 39818, "\u0120Drift": 39819, "\u00e9\u00be\u012f\u00e5\u00a5": 39820, "\u00e9\u00be\u012f\u00e5\u00a5\u0133\u00e5\u00a3\u00ab": 39821, "\u0120vetting": 39822, "\u0120Jinping": 39823, "\u0120Recession": 39824, "Minor": 39825, "\u0120Fraud": 39826, "enfranch": 39827, "\u0120convened": 39828, "\u0120NAACP": 39829, "\u0120Millions": 39830, "\u0120Farming": 39831, "\u0120Woo": 39832, "\u0120Flare": 39833, "rito": 39834, "immigrant": 39835, "\u0120vacancy": 39836, "\u0120HEAD": 39837, "\u0120Vaj": 39838, "egal": 39839, "\u0120Vigil": 39840, "Study": 39841, "\u0120ruining": 39842, "\u0120racks": 39843, "\u0120heater": 39844, "\u0120Randolph": 39845, "\u0120Brush": 39846, "\u0120Tir": 39847, "\u00d8\u00a8": 39848, "\u0120cov": 39849, "%]": 39850, "\u0120recounts": 39851, "\u0120OPT": 39852, "\u0120Melt": 39853, "\u0120truce": 39854, "\u0120casinos": 39855, "\u0120crusade": 39856, "\u0120carnage": 39857, "\u0120stripe": 39858, "\u0120Kyl": 39859, "Textures": 39860, "\u0120698": 39861, "\u0120proclamation": 39862, "\u0120goodies": 39863, "\u0120..........": 39864, "proclaimed": 39865, "Polit": 39866, "\u0120topical": 39867, "\u0120specialize": 39868, "\u0120Amin": 39869, "gm": 39870, "\u0120anchored": 39871, "\u0120bearings": 39872, "sample": 39873, "\u0120Highland": 39874, "\u0120Autism": 39875, "\u0120mercenary": 39876, "\u0120interviewer": 39877, "LER": 39878, "\u0120Somers": 39879, "\u0120embryo": 39880, "\u0120Assy": 39881, "\u0120281": 39882, "\u0120Editing": 39883, "\u0120Chosen": 39884, "660": 39885, "\u0120pci": 39886, "\u0120Thunderbolt": 39887, "BILL": 39888, "\u0120chuckled": 39889, "jriwal": 39890, "hof": 39891, "\u0120earthly": 39892, "(){": 39893, "independence": 39894, "\u0120dispers": 39895, "\u0120Vendor": 39896, "\u0120Gareth": 39897, "\u0120pals": 39898, "Penn": 39899, "\u0120Submit": 39900, "icum": 39901, "Thu": 39902, "\u0120clandestine": 39903, "\u0120cannibal": 39904, "\u0120Clerk": 39905, "EStream": 39906, "galitarian": 39907, "\u00e2\u013b\u00a5": 39908, "gew": 39909, "\u0120horrend": 39910, "\u0120Lov": 39911, "\u0120Reaction": 39912, "ocrin": 39913, "Classic": 39914, "\u0120echoing": 39915, "\u0120disclosing": 39916, "\u0120Insight": 39917, "ogun": 39918, "\u0120Incarn": 39919, "uploads": 39920, "pperc": 39921, "guyen": 39922, "\u01201901": 39923, "\u0120Bars": 39924, "687": 39925, "\u0120bribes": 39926, "\u0120Fresno": 39927, "urat": 39928, "\u0120Reese": 39929, "\u0120intrusive": 39930, "\u0120gripping": 39931, "\u0120Blueprint": 39932, "\u0120Rasm": 39933, "unia": 39934, "managed": 39935, "\u0120Hebdo": 39936, "\u0120345": 39937, "\u0120decoding": 39938, "\u0120poets": 39939, "\u0120jaws": 39940, "\u0120FIGHT": 39941, "ameless": 39942, "\u0120Meadows": 39943, "\u0120Harbaugh": 39944, "Interview": 39945, "\u0120Hosp": 39946, "\u0120BRA": 39947, "\u0120deletion": 39948, "mob": 39949, "Walker": 39950, "\u0120Moonlight": 39951, "\u0120Jed": 39952, "\u0120Sophia": 39953, "\u0120usur": 39954, "\u0120fortunately": 39955, "\u0120Putting": 39956, "\u0120Fold": 39957, "\u0120sanitation": 39958, "\u0120partisans": 39959, "ISON": 39960, "Bow": 39961, "\u0120CONC": 39962, "\u0120Reduced": 39963, "\u0120Sutton": 39964, "\u0120touchscreen": 39965, "\u0120embryos": 39966, "\u00e2\u0122\u00a2\u00e2\u0122\u00a2\u00e2\u0122\u00a2\u00e2\u0122\u00a2": 39967, "\u0120Krug": 39968, "combat": 39969, "\u0120Petroleum": 39970, "\u0120amd": 39971, "\u0120Cosmos": 39972, "\u0120prescribing": 39973, "\u0120conformity": 39974, "ourses": 39975, "\u0120plentiful": 39976, "\u0120disillusion": 39977, "\u0120Ecology": 39978, "ittal": 39979, "\u0120fanc": 39980, "\u0120assassinated": 39981, "regnancy": 39982, "\u0120perennial": 39983, "\u0120Bullets": 39984, "\u0120stale": 39985, "\u0120cached": 39986, "\u0120Judith": 39987, "\u0120Diseases": 39988, "Allen": 39989, "\u0120las": 39990, "\u0120shards": 39991, "\u0120Suarez": 39992, "\u0120Friendship": 39993, "interface": 39994, "\u0120Supporters": 39995, "addons": 39996, "462": 39997, "\u0120Imran": 39998, "\u0120Wim": 39999, "\u0120newfound": 40000, "\u0120Mb": 40001, "Animal": 40002, "\u0120darling": 40003, "ande": 40004, "\u0120rhy": 40005, "\u0120Twisted": 40006, "posal": 40007, "ynski": 40008, "Various": 40009, "\u00d7\u013e": 40010, "\u0120Kiw": 40011, "uyomi": 40012, "\u0120wellbeing": 40013, "\u0120Lau": 40014, "anos": 40015, "\u0120unmist": 40016, "\u0120macOS": 40017, "\u0120restroom": 40018, "\u0120Oliv": 40019, "\u0120Airways": 40020, "\u0120timetable": 40021, "980": 40022, "\u0120radios": 40023, "voy": 40024, "iasco": 40025, "\u0120cloudy": 40026, "\u0120Drawing": 40027, "Anything": 40028, "Syria": 40029, "\u0120Hert": 40030, "staking": 40031, "\u0120unchecked": 40032, "\u0120brazen": 40033, "\u0120NRS": 40034, "697": 40035, "onomic": 40036, "establish": 40037, "\u0120leng": 40038, "\u0120diagonal": 40039, "\u0120Fior": 40040, "Lair": 40041, "\u0120Stard": 40042, "\u0120deficient": 40043, "joining": 40044, "beam": 40045, "\u0120omnip": 40046, "\u0120blender": 40047, "\u0120sunrise": 40048, "Moore": 40049, "\u0120Fault": 40050, "\u0120Costume": 40051, "\u0120Mub": 40052, "Flags": 40053, "anse": 40054, "\u0120payout": 40055, "\u0120Governors": 40056, "\u0120Dillon": 40057, "\u0120Banana": 40058, "Nar": 40059, "\u0120trailed": 40060, "\u0120imperialist": 40061, "umann": 40062, "atsuki": 40063, "435": 40064, "\u0120Roads": 40065, "\u0120slur": 40066, "\u0120Ideally": 40067, "\u0120trenches": 40068, "Ctrl": 40069, "\u0120mirrored": 40070, "\u0120Zel": 40071, "\u0120Crest": 40072, "Compat": 40073, "\u0120Rolls": 40074, "scrib": 40075, "\u0120Trails": 40076, "ometers": 40077, "winter": 40078, "\u0120immortality": 40079, "ilated": 40080, "\u0120contradicts": 40081, "universal": 40082, "illions": 40083, "\u0120Mama": 40084, "optim": 40085, "ATURE": 40086, "\u0120geo": 40087, "etter": 40088, "\u0120Carlo": 40089, "424": 40090, "\u0120canonical": 40091, "\u0120Stronghold": 40092, "near": 40093, "\u0120perfume": 40094, "\u0120orchestra": 40095, "odiac": 40096, "\u0120uphe": 40097, "\u0120reigning": 40098, "versive": 40099, "\u0120caucuses": 40100, "\u0120DEM": 40101, "\u0120insulted": 40102, "\u0120------": 40103, "\u0120Crush": 40104, "\u0120rooting": 40105, "\u0120Wraith": 40106, "\u0120whore": 40107, "\u0120tofu": 40108, "Cmd": 40109, "\u0120Bree": 40110, "\u0120$_": 40111, "\u0120rive": 40112, "\u0120Advertising": 40113, "\u0120watt": 40114, "\u0120HO": 40115, "\u0120persuasive": 40116, "\u0120Parameters": 40117, "\u0120observational": 40118, "\u0120NCT": 40119, "\u0120Moj": 40120, "\u0120Salon": 40121, "\u0120trunc": 40122, "\u0120exquisite": 40123, "\u0120Mara": 40124, "\u0120poop": 40125, "\u0120ANN": 40126, "Exc": 40127, "\u0120Wonderful": 40128, "\u0120Taco": 40129, "\u0120homeowner": 40130, "\u0120Smithsonian": 40131, "orporated": 40132, "mmmm": 40133, "\u0120loaf": 40134, "\u0120Yamato": 40135, "\u0120Indo": 40136, "\u0120clinging": 40137, "\u00c3\u00a1s": 40138, "\u0120immutable": 40139, "hub": 40140, "Orange": 40141, "\u0120fingertips": 40142, "\u0120Wooden": 40143, "\u0120Kidd": 40144, "\u0120JPM": 40145, "\u0120Damn": 40146, "Cow": 40147, "codes": 40148, "482": 40149, "\u0120initiating": 40150, "\u0120Elk": 40151, "\u0120Cutting": 40152, "\u0120absentee": 40153, "\u0120Vance": 40154, "\u0120Lilith": 40155, "GUI": 40156, "\u0120obscured": 40157, "\u0120dwarves": 40158, "\u0120Chop": 40159, "\u0120Boko": 40160, "Values": 40161, "\u0120multimedia": 40162, "\u0120brewed": 40163, "Regular": 40164, "CRIPTION": 40165, "\u0120Mortal": 40166, "\u0120apex": 40167, "\u0120traveler": 40168, "\u0120boils": 40169, "\u0120spraying": 40170, "Represent": 40171, "\u0120Starship": 40172, "428": 40173, "\u0120disapproval": 40174, "\u0120shadowy": 40175, "\u0120lamented": 40176, "\u0120Replace": 40177, "\u0120Fran\u00c3\u00a7": 40178, "677": 40179, "dor": 40180, "\u0120unstoppable": 40181, "\u0120cohorts": 40182, "gyn": 40183, "\u0120Classics": 40184, "\u0120Amph": 40185, "\u0120sluggish": 40186, "\u0120Addiction": 40187, "\u0120Padres": 40188, "\u0120inscription": 40189, "\u0120inhuman": 40190, "minus": 40191, "\u0120Jeremiah": 40192, "atars": 40193, "Terror": 40194, "\u0120Tos": 40195, "\u0120Sharma": 40196, "asta": 40197, "catch": 40198, "\u0120plumbing": 40199, "\u0120Timbers": 40200, "Shar": 40201, "Hal": 40202, "\u0120Osc": 40203, "\u0120coupling": 40204, "humans": 40205, "\u0120sponge": 40206, "\u0120idols": 40207, "\u0120Spa": 40208, "\u0120Advocate": 40209, "\u0120Beats": 40210, "lua": 40211, "\u0120ticking": 40212, "\u0120loader": 40213, "\u0120Gron": 40214, "810": 40215, "\u0120stimulated": 40216, "\u0120sidebar": 40217, "\u0120Manufacturer": 40218, "oreAnd": 40219, "1973": 40220, "\u0120praises": 40221, "\u0120Flores": 40222, "disable": 40223, "\u0120Electrical": 40224, "raise": 40225, "Eth": 40226, "\u0120migrated": 40227, "\u0120lecturer": 40228, "Kids": 40229, "\u0120Cavern": 40230, "\u0120kettle": 40231, "\u0120glyc": 40232, "\u0120Mandela": 40233, "\u0120Fully": 40234, "\u00e5\u00a7\u00ab": 40235, "FINEST": 40236, "\u0120squeezing": 40237, "\u0120Ryder": 40238, "ampoo": 40239, "oreAndOnline": 40240, "InstoreAndOnline": 40241, "BuyableInstoreAndOnline": 40242, "\u0120commemorate": 40243, "\u0120Rampage": 40244, "Austin": 40245, "\u0120Shroud": 40246, "\u0120Ruins": 40247, "915": 40248, "\u0120KH": 40249, "\u0120waterfront": 40250, "\u0120ESC": 40251, "baby": 40252, "\u0120Cout": 40253, "\u0120Emblem": 40254, "\u0120equivalents": 40255, "492": 40256, "Unique": 40257, "\u0120Nietzsche": 40258, "browser": 40259, "\u0120imitation": 40260, "\u0120Werewolf": 40261, "\u0120Kirin": 40262, "acas": 40263, "',\"": 40264, "\u0120\u00c3\u00be": 40265, "Reviewed": 40266, "\u0120cunt": 40267, "\u0120voic": 40268, "\u0120Lenovo": 40269, "\u0120bonded": 40270, "481": 40271, "\u0120inhibitors": 40272, "\u0120endeavors": 40273, "\u0120Havana": 40274, "\u0120Stout": 40275, "\u0120Jolly": 40276, "Actor": 40277, "*/(": 40278, "\u0120occurrences": 40279, "\u0120Tens": 40280, "Increased": 40281, "\u0120ACTION": 40282, "\u0120\u00e3\u0122\u012e": 40283, "\u0120Rankings": 40284, "\u0120Breat": 40285, "\u0120309": 40286, "Dou": 40287, "\u0120impacting": 40288, "\u0120Duchess": 40289, "prefix": 40290, "QB": 40291, "\u0120summoning": 40292, "\u0120bestowed": 40293, "\u0120Kepler": 40294, "\u0120POWER": 40295, "cube": 40296, "\u0120Kits": 40297, "\u0120Grip": 40298, "\u0120opium": 40299, "\u0120reputable": 40300, "toc": 40301, "ichael": 40302, "\u0120Ripple": 40303, "\u0120caf\u00c3\u00a9": 40304, "\u0120Zoom": 40305, "\u0120Burma": 40306, "\u0120waive": 40307, "\u0120stalls": 40308, "\u0120demeanor": 40309, "incerity": 40310, "\u0120fluoride": 40311, "\u0120SHOULD": 40312, "Paris": 40313, "\u0120longing": 40314, "\u0120plat": 40315, "\u0120grossly": 40316, "\u0120bulls": 40317, "\u0120showcasing": 40318, "expected": 40319, "\u0120Gaddafi": 40320, "engineering": 40321, "Repeat": 40322, "\u0120Kut": 40323, "\u0120conceivable": 40324, "\u0120trimmed": 40325, "oscope": 40326, "\u0120Candidate": 40327, "\u0120Tears": 40328, "rolog": 40329, "Lewis": 40330, "SUP": 40331, "\u0120roadmap": 40332, "\u0120saliva": 40333, "\u0120trumpet": 40334, "Jimmy": 40335, "\u0120miraculous": 40336, "\u0120colonization": 40337, "\u0120amput": 40338, "\u0120GNOME": 40339, "atech": 40340, "Different": 40341, "\u0120ELE": 40342, "\u0120Governments": 40343, "\u0120Ahead": 40344, "\u00e3\u0127\u012d\u00e3\u0127\u012d": 40345, "wordpress": 40346, "LIB": 40347, "\u0120Include": 40348, "\u0120Dorothy": 40349, "045": 40350, "\u0120Colombian": 40351, "\u0120leased": 40352, "884": 40353, "\u0120degrading": 40354, "\u0120Daisy": 40355, "iations": 40356, "\u0120baptized": 40357, "\u0120surname": 40358, "cox": 40359, "\u0120blinked": 40360, "\u00e3\u0125\u00a2": 40361, "\u0120pollen": 40362, "\u0120dermat": 40363, "\u0120regex": 40364, "\u0120Nicholson": 40365, "\u0120Eater": 40366, "\u00e7\u013e": 40367, "rador": 40368, "\u0120narrower": 40369, "\u0120hurricanes": 40370, "\u0120hallucinations": 40371, "ridden": 40372, "ISSION": 40373, "\u0120Firefly": 40374, "\u0120attainment": 40375, "\u0120nominate": 40376, "\u0120avocado": 40377, "\u0120Meredith": 40378, "\u0120ts": 40379, "\u0120reverence": 40380, "\u0120euph": 40381, "\u0120crates": 40382, "\u0120TEXT": 40383, "\u0120443": 40384, "\u0120319": 40385, "JSON": 40386, "iquette": 40387, "\u0120shortstop": 40388, "ickey": 40389, "\u0120propelled": 40390, "\u0120api": 40391, "\u0120Thieves": 40392, "779": 40393, "\u0120oversaw": 40394, "\u0120coli": 40395, "\u0120Nicola": 40396, "\u0120overcl": 40397, "ikawa": 40398, "\u0120Cyr": 40399, "\u0120384": 40400, "789": 40401, "\u0120Allows": 40402, "1027": 40403, "Detroit": 40404, "TRY": 40405, "setup": 40406, "\u0120Socialism": 40407, "Soviet": 40408, "susp": 40409, "\u0120APR": 40410, "\u0120Shutdown": 40411, "\u0120aluminium": 40412, "zbek": 40413, "\u0120Lover": 40414, "GGGGGGGG": 40415, "\u0120democracies": 40416, "\u01201908": 40417, "\u0120Merrill": 40418, "\u0120Francois": 40419, "gdala": 40420, "\u0120traffickers": 40421, "\u0120Til": 40422, "\u0120Goat": 40423, "\u0120sped": 40424, "\u0120Reserv": 40425, "\u0120prod": 40426, "552": 40427, "\u0120cac": 40428, "\u0120Univ": 40429, "\u0120Schwe": 40430, "\u0120swirling": 40431, "\u0120Wilderness": 40432, "\u0120Eggs": 40433, "\u0120saddened": 40434, "\u0120archaic": 40435, "Hyd": 40436, "\u0120excessively": 40437, "BRE": 40438, "\u0120aerospace": 40439, "\u0120Voices": 40440, "Craig": 40441, "\u0120ignited": 40442, "Initially": 40443, "\u0120McA": 40444, "\u0120handset": 40445, "\u0120reforming": 40446, "\u0120frustrations": 40447, "\u0120Deadpool": 40448, "\u0120Belichick": 40449, "ractor": 40450, "\u0120Ragnarok": 40451, "\u0120Drupal": 40452, "\u0120Approximately": 40453, "1920": 40454, "\u0120Hubble": 40455, "armor": 40456, "\u0120Saras": 40457, "\u0120Jonas": 40458, "\u0120nostalgic": 40459, "\u0120feasibility": 40460, "Saharan": 40461, "\u0120orbiting": 40462, "\u0120970": 40463, "Ru": 40464, "\u0120shin": 40465, "\u0120Investigators": 40466, "\u0120inconsistencies": 40467, "\u0120PAN": 40468, "BG": 40469, "\u0120grazing": 40470, "\u0120detectors": 40471, "\u0120Startup": 40472, "\u0120Funny": 40473, "\u0120Naomi": 40474, "Considering": 40475, "\u0120hog": 40476, "utf": 40477, "cemic": 40478, "\u0120fortified": 40479, "\u0120Functions": 40480, "\u0120codec": 40481, "nutrition": 40482, "Hat": 40483, "\"!": 40484, "microsoft": 40485, "558": 40486, "\u0120Thin": 40487, "\u0120ACE": 40488, "Alias": 40489, "\u0120OPS": 40490, "papers": 40491, "PK": 40492, "\u00e3\u0122\u0130": 40493, "\u0120improbable": 40494, "Northern": 40495, "equal": 40496, "\u0120lookout": 40497, "\u0120tyres": 40498, "\u0120Modified": 40499, "\u0120Kop": 40500, "Absolutely": 40501, "\u0120buildup": 40502, "silver": 40503, "\u0120audi": 40504, "\u0120grotesque": 40505, "\u0120Saber": 40506, "\u0120Presbyter": 40507, "ONY": 40508, "\u0120glaciers": 40509, "\u0120Shoals": 40510, "\u0120Kass": 40511, "\u0120HRC": 40512, "\u0120Nicol": 40513, "\u0120Lunch": 40514, "\u0120Foss": 40515, "\u00e2\u0138\u0134": 40516, "ADRA": 40517, "\u0120OnePlus": 40518, "oing": 40519, "grounds": 40520, "\u0120incidental": 40521, "\u0120datasets": 40522, "689": 40523, "\u0120Clarkson": 40524, "\u0120assembling": 40525, "\u0120Corrections": 40526, "\u0120drinkers": 40527, "\u0120qualifiers": 40528, "\u0120leash": 40529, "\u0120unfounded": 40530, "\u0120Hundred": 40531, "\u0120kickoff": 40532, "Ti": 40533, "\u0120reconcil": 40534, "\u0120Grants": 40535, "\u0120Compliance": 40536, "\u0120Dexterity": 40537, "\u01201906": 40538, "warn": 40539, "Dallas": 40540, "Maximum": 40541, "nard": 40542, "avia": 40543, "beaut": 40544, "ensitivity": 40545, "trace": 40546, "\u0120pioneers": 40547, "\u0120Fract": 40548, "\u00e3\u0122\u0131": 40549, "\u0120precept": 40550, "\u0120glossy": 40551, "\u0120IEEE": 40552, "Across": 40553, "\u0120680": 40554, "Sleep": 40555, "cheon": 40556, "\u0120satirical": 40557, "\u0120Minotaur": 40558, "\u0120Claude": 40559, "\u0120r\u00c3\u00a9": 40560, "apego": 40561, "\u0120carrot": 40562, "\u0120Semin": 40563, "inoa": 40564, "\u0120zo": 40565, "Independent": 40566, "\u0120diagnoses": 40567, "\u0120Cue": 40568, "MAR": 40569, "\u0120rendition": 40570, "\u0120Kik": 40571, "\u0120pathology": 40572, "\u0120selects": 40573, "LinkedIn": 40574, "\u0120assay": 40575, "\u0120Dres": 40576, "\u0120textual": 40577, "posted": 40578, "ITAL": 40579, "\u0120Maul": 40580, "Neal": 40581, "\u0120interconnected": 40582, "\u0120erratic": 40583, "\u0120Virus": 40584, "\u0120530": 40585, "\u0120environmentalists": 40586, "\u0120Phelps": 40587, "\u0120engagements": 40588, "\u0120INST": 40589, "\u0120economical": 40590, "noxious": 40591, "\u0120gearing": 40592, "izzy": 40593, "\u0120favorably": 40594, "\u0120McGill": 40595, "Term": 40596, "\u0120hanged": 40597, "\u0120ballpark": 40598, "\u0120Reyes": 40599, "\u0120beware": 40600, "\u0120Psal": 40601, "\u0120Massacre": 40602, "qi": 40603, "\u0120inaccessible": 40604, "aclysm": 40605, "\u0120fray": 40606, "illac": 40607, "\u0120bitterly": 40608, "\u0120Certification": 40609, "Michigan": 40610, "\u0120irrespective": 40611, "alore": 40612, "Empty": 40613, "\u0120endorsements": 40614, "\u0120undet": 40615, "fg": 40616, "equipped": 40617, "\u0120merciless": 40618, "\u0120Cust": 40619, "\u0120immature": 40620, "\u0120voucher": 40621, "\u0120Blackwell": 40622, "\u00d1\u0131": 40623, "hawk": 40624, "disciplinary": 40625, "ilee": 40626, "\u0120Makoto": 40627, "\u0120Dude": 40628, "\u00e3\u0125\u0129\u00e3\u0124\u00a3": 40629, "Years": 40630, "\u0120inver": 40631, "\u0120shaman": 40632, "\u0120Yong": 40633, "ipel": 40634, "ellen": 40635, "\u0120Cathy": 40636, "brids": 40637, "\u0120sarc": 40638, "651": 40639, "Near": 40640, "\u0120groundwork": 40641, "\u0120amaz": 40642, "\u0120415": 40643, "\u0120Huntington": 40644, "hews": 40645, "\u0120Bung": 40646, "\u0120arbitrarily": 40647, "\u0120Wit": 40648, "\u0120Alberto": 40649, "\u0120disqualified": 40650, "bestos": 40651, "461": 40652, "\u0120pc": 40653, "\u0120284": 40654, "robat": 40655, "Robin": 40656, "\u0120hugs": 40657, "\u0120Transition": 40658, "\u0120Occasionally": 40659, "\u0120326": 40660, "\u0120Whilst": 40661, "\u0120Ley": 40662, "\u0120spaceship": 40663, "csv": 40664, "\u0120unsuccessfully": 40665, "\u0120Au": 40666, "leck": 40667, "\u0120Winged": 40668, "\u0120Grizzlies": 40669, ".\u00ef\u00bf\u00bd": 40670, "\u0120nearer": 40671, "\u0120Sorceress": 40672, "\u0120Indigo": 40673, "Else": 40674, "840": 40675, "letes": 40676, "Coach": 40677, "\u0120upbringing": 40678, "\u0120Kes": 40679, "\u0120separatist": 40680, "\u0120racists": 40681, "\u0120chained": 40682, "\u0120abstinence": 40683, "learning": 40684, "\u0120reinstated": 40685, "\u0120symmetry": 40686, "\u0120reminders": 40687, "\u0120Chevy": 40688, "\u0120mont": 40689, "\u0120exemplary": 40690, "\u0120TOR": 40691, "ZX": 40692, "\u0120qualitative": 40693, "\u0120Stamp": 40694, "\u0120Savannah": 40695, "\u0120Rossi": 40696, "\u0120paed": 40697, "\u0120dispensaries": 40698, "\u0120Walls": 40699, "\u0120Chronic": 40700, "\u0120complimentary": 40701, "\u0120Beirut": 40702, "\u0120+---": 40703, "igslist": 40704, "\u0120cryptographic": 40705, "masters": 40706, "\u0120Capitals": 40707, "\u0120maximal": 40708, "\u0120entropy": 40709, "Points": 40710, "\u0120combatants": 40711, "lip": 40712, "\u0120Glob": 40713, "\u0120BMC": 40714, "phase": 40715, "thank": 40716, "HTTP": 40717, "\u0120commuter": 40718, "\u0120\\(\\": 40719, "../": 40720, "\u0120Regener": 40721, "\u0120DOI": 40722, "\u0120Activision": 40723, "\u0120slit": 40724, "osal": 40725, "REM": 40726, "\u0120chants": 40727, "Yu": 40728, "Keys": 40729, "Brexit": 40730, "\u0120Forced": 40731, "Arizona": 40732, "\u0120squadron": 40733, "ISO": 40734, "\u0120Malone": 40735, "\u0120338": 40736, "\u0120contrasting": 40737, "\u0120tidal": 40738, "\u0120libel": 40739, "\u0120implanted": 40740, "\u0120uproar": 40741, "\u0120Cater": 40742, "\u0120propositions": 40743, "Manchester": 40744, "\u0120Euros": 40745, "itamin": 40746, "Gil": 40747, "\u0120Elven": 40748, "\u0120Seek": 40749, "\u0120Bai": 40750, "\u0120redevelopment": 40751, "\u0120Towns": 40752, "\u0120Lub": 40753, "!\",": 40754, "alon": 40755, "Krist": 40756, "\u0120measurable": 40757, "\u0120imaginable": 40758, "\u0120apostles": 40759, "YN": 40760, "760": 40761, "\u0120steroid": 40762, "\u0120specificity": 40763, "\u0120Located": 40764, "\u0120Becker": 40765, "\u0120Edu": 40766, "\u0120Dietary": 40767, "utsch": 40768, "\u0120Marilyn": 40769, "\u0120blister": 40770, "\u0120MEP": 40771, "\u0120Koz": 40772, "\u0120CMS": 40773, "yahoo": 40774, "\u0120Carney": 40775, "\u0120boasting": 40776, "\u0120Caleb": 40777, "Byte": 40778, "reads": 40779, "aden": 40780, "Problem": 40781, "\u0120Woodward": 40782, "Swe": 40783, "Sup": 40784, "\u0120KGB": 40785, "Setup": 40786, "\u0120tacit": 40787, "\u0120retribution": 40788, "\u0120dues": 40789, "\u0120M\u00c3\u00bc": 40790, ".?": 40791, "\u00e4\u00b8\u0143": 40792, "pots": 40793, "\u0120cameo": 40794, "\u0120PAL": 40795, "education": 40796, "Amy": 40797, "likely": 40798, "gling": 40799, "\u0120constitutionally": 40800, "\u0120Hamm": 40801, "\u0120Speak": 40802, "\u0120widgets": 40803, "brate": 40804, "\u0120crappy": 40805, "\u0120Iter": 40806, "\u0120anticipating": 40807, "\u0120Bout": 40808, "Pixel": 40809, "\u0120Yep": 40810, "\u0120Laurie": 40811, "\u0120hut": 40812, "\u0120bulletin": 40813, "\u0120Salvation": 40814, "\u0120chats": 40815, "earable": 40816, "Honestly": 40817, "ALTH": 40818, "onsequ": 40819, "cult": 40820, "iscovery": 40821, "ovych": 40822, "\u0120selves": 40823, "\u0120Satoshi": 40824, "Sounds": 40825, "\u0120convergence": 40826, "\u0120Rosenberg": 40827, "1974": 40828, "\u0120nasal": 40829, "\u0120fullest": 40830, "\u0120ferocious": 40831, "xus": 40832, "iste": 40833, "AMS": 40834, "\u0120lobbied": 40835, "\u0120soothing": 40836, "\u0120Gunn": 40837, "today": 40838, "024": 40839, "\u0120inspirational": 40840, "\u0120NBN": 40841, "pb": 40842, "gewater": 40843, "orah": 40844, "allowed": 40845, "\u0120Coliseum": 40846, "\u0120specializing": 40847, "\u0120insanely": 40848, "\u0120Tape": 40849, "delay": 40850, "\u0120tarn": 40851, "\u0120Pound": 40852, "\u0120melanch": 40853, "\u0120deployments": 40854, "iland": 40855, "\u0120lessen": 40856, "\u0120furry": 40857, "\u0120UEFA": 40858, "\u0120bloodshed": 40859, "\u0120Meier": 40860, "ithering": 40861, "\u0120heirs": 40862, "\u0120Jaw": 40863, "axter": 40864, "\u0120Publications": 40865, "\u0120alters": 40866, "intention": 40867, "\u0120Winchester": 40868, "determination": 40869, "\u0120Lifetime": 40870, "thin": 40871, "Monster": 40872, "780": 40873, "\u0120approximation": 40874, "\u0120supermarkets": 40875, "\u0120Seconds": 40876, "oros": 40877, "huge": 40878, "\u0120bribe": 40879, "\u0120LIMITED": 40880, "uned": 40881, "\u0120misinterpret": 40882, "\u0120Injury": 40883, "\u0120367": 40884, "\u0120thresholds": 40885, "\u0120Carnival": 40886, "\u0120gastrointestinal": 40887, "\u0120guideline": 40888, "\u0120deceived": 40889, "features": 40890, "\u0120purportedly": 40891, "\u0120Ronnie": 40892, "\u0120Newt": 40893, "\u0120spacious": 40894, "asus": 40895, "\u0120superheroes": 40896, "\u0120Cynthia": 40897, "legged": 40898, "kamp": 40899, "chio": 40900, "\u0120thumbnail": 40901, "\u0120Shirley": 40902, "illation": 40903, "\u0120sheds": 40904, "\u0120Zy": 40905, "EPA": 40906, "\u0120dams": 40907, "\u0120yawn": 40908, "nah": 40909, "\u0120Peggy": 40910, "\u0120Erie": 40911, "\u0120Juventus": 40912, "\u0120Fountain": 40913, "rx": 40914, "donald": 40915, "album": 40916, "\u0120Comprehensive": 40917, "\u0120caching": 40918, "\u0120Uz": 40919, "ulnerability": 40920, "\u0120Principle": 40921, "\u0120Jian": 40922, "ingers": 40923, "casts": 40924, "\u0120Osiris": 40925, "chart": 40926, "tile": 40927, "\u0120Tiffany": 40928, "\u0120Patton": 40929, "\u0120Whip": 40930, "\u0120oversized": 40931, "Je": 40932, "\u0120Cinderella": 40933, "\u0120Borders": 40934, "\u0120Daesh": 40935, "Mah": 40936, "\u0120dogma": 40937, "\u0120communists": 40938, "vu": 40939, "Council": 40940, "\u0120freshwater": 40941, "\u0120wounding": 40942, "\u0120debacle": 40943, "\u0120youngster": 40944, "\u0120threaded": 40945, "\u0120Bots": 40946, "\u0120Savings": 40947, "\u00e3\u0123\u0124": 40948, "oling": 40949, "oho": 40950, "\u0120illumination": 40951, "MRI": 40952, "\u0120loosen": 40953, "trump": 40954, "agency": 40955, "urion": 40956, "\u0120momentarily": 40957, "\u0120Chun": 40958, "\u0120Budapest": 40959, "\u0120Alley": 40960, "Disk": 40961, "\u0120astonished": 40962, "\u0120Conquer": 40963, "\u0120Accounting": 40964, "having": 40965, "\u0120Wein": 40966, "\u0120Alright": 40967, "\u0120revolver": 40968, "\u0120delusion": 40969, "\u0120relics": 40970, "\u0120adherent": 40971, "quant": 40972, "\u0120handmade": 40973, "orio": 40974, "\u0120combating": 40975, "coded": 40976, "\u0120quadru": 40977, "reth": 40978, "Nik": 40979, "\u0120Tribal": 40980, "\u0120Mysterious": 40981, "\u0120inhal": 40982, "\u0120Winning": 40983, "\u0120Classification": 40984, "changed": 40985, "\u0120unab": 40986, "\u0120scorn": 40987, "icipated": 40988, "wl": 40989, "onductor": 40990, "\u0120reinforcing": 40991, "\u0120Childhood": 40992, "anova": 40993, "\u0120adventurer": 40994, "\u0120doctoral": 40995, "\u0120Strategies": 40996, "\u0120engulfed": 40997, "\u0120Encounter": 40998, "\u0120lashes": 40999, "Critical": 41000, "ricular": 41001, "\u0120UTF": 41002, "ociation": 41003, "checking": 41004, "\u0120Consulting": 41005, "Runtime": 41006, "period": 41007, "\u0120Asgard": 41008, "\u0120distilled": 41009, "\u0120Pasadena": 41010, "\u0120Dying": 41011, "\u0120COUNTY": 41012, "\u0120granite": 41013, "\u0120smack": 41014, "\u0120parachute": 41015, "\u0120SUR": 41016, "Virginia": 41017, "\u0120Furious": 41018, "787": 41019, "\u0120Okin": 41020, "\u0120camel": 41021, "\u0120Mbps": 41022, "1972": 41023, "\u0120Chao": 41024, "\u0120Cyan": 41025, "joice": 41026, "efer": 41027, "\u0120Wrap": 41028, "\u0120Debate": 41029, "Seg": 41030, "\u0120forearm": 41031, "\u0120Ignore": 41032, "\u0120timestamp": 41033, "\u0120probing": 41034, "\u0120Noon": 41035, "\u0120Grail": 41036, "fen": 41037, "\u0120dormant": 41038, "\u0120Firstly": 41039, "\u0120Eighth": 41040, "\u0120HUN": 41041, "\u0120Desire": 41042, "oras": 41043, "Girls": 41044, "\u0120Desmond": 41045, "zar": 41046, "amines": 41047, "OAD": 41048, "execute": 41049, "\u0120boobs": 41050, "\u0120ATL": 41051, "_(": 41052, "Chelsea": 41053, "\u0120masturbation": 41054, "\u0120CoC": 41055, "\u0120destroyer": 41056, "\u0120Chomsky": 41057, "\u0120scatter": 41058, "\u0120Assets": 41059, "796": 41060, "\u0120Cargo": 41061, "\u0120receptive": 41062, "\u0120Scope": 41063, "\u0120marketers": 41064, "\u0120launchers": 41065, "\u0120axle": 41066, "\u0120SEA": 41067, "seq": 41068, "\u0120Moff": 41069, "finding": 41070, "\u0120Gibbs": 41071, "Georgia": 41072, "extremely": 41073, "NJ": 41074, "\u0120laborers": 41075, "stals": 41076, "\u0120mediation": 41077, "\u0120Hedge": 41078, "atown": 41079, "\u0120iod": 41080, "despite": 41081, "vill": 41082, "Jane": 41083, "existence": 41084, "\u0120coincided": 41085, "\u0120Utilities": 41086, "\u0120Cheap": 41087, "\u0120logistical": 41088, "\u0120culmination": 41089, "\u0120Nicotine": 41090, "pak": 41091, "Folder": 41092, "\u0120rodents": 41093, "stuff": 41094, "\u0120lawfully": 41095, "\u0120reperto": 41096, "ioch": 41097, "jj": 41098, "Dialogue": 41099, "HHHH": 41100, "liction": 41101, "Looks": 41102, "\u0120297": 41103, "\u0120turrets": 41104, "\u0120Abandon": 41105, "\u0120incess": 41106, "\u0120Trafford": 41107, "\u0120curled": 41108, "\u0120preferring": 41109, "\u0120privatization": 41110, "\u0120irresist": 41111, "\u0120Panda": 41112, "\u0120Shake": 41113, "\u0120McGr": 41114, "\u00e3\u0125\u0126": 41115, "unders": 41116, "\u0120discriminated": 41117, "\u0120bartender": 41118, "ILE": 41119, "Atlantic": 41120, "\u0120propensity": 41121, "\u0120Wiz": 41122, "\u0120Gim": 41123, "conference": 41124, "\u0120reinforces": 41125, "Gh": 41126, "wagon": 41127, "\u0120eerie": 41128, "Fal": 41129, "\u0120hugged": 41130, "racist": 41131, "RIC": 41132, "Fu": 41133, "\u0120filler": 41134, "\u0120Stub": 41135, "\u0120engraved": 41136, "\u0120Wrestle": 41137, "\u0120imaginative": 41138, "\u0120Peer": 41139, "\u0120Factors": 41140, "anus": 41141, "\u0120Dracula": 41142, "monitor": 41143, "\u0120routers": 41144, "ibia": 41145, "\u0120Boolean": 41146, "endale": 41147, "\u0120Slaughter": 41148, "\u0120Shack": 41149, "RFC": 41150, "\u0120Spielberg": 41151, "Sax": 41152, "\u0120PHOTO": 41153, "\u0120Clover": 41154, "\u0120Rae": 41155, "Depending": 41156, "\u0120Memor": 41157, "aram": 41158, "\u0120pierced": 41159, "\u0120curtains": 41160, "vale": 41161, "\u0120Inquisition": 41162, "\u0120Poke": 41163, "\u0120forecasting": 41164, "\u0120complains": 41165, "Sense": 41166, "\u0120Hermes": 41167, "iscovered": 41168, "\u0120bible": 41169, "\u0120Morph": 41170, "\u0120germ": 41171, "785": 41172, "DON": 41173, "\u0120congen": 41174, "\u0120crane": 41175, "\u0120DPR": 41176, "\u0120respectfully": 41177, "Room": 41178, "\u0120Naw": 41179, "\u0120Dalai": 41180, "reason": 41181, "\u0120Angus": 41182, "Education": 41183, "\u0120Titanic": 41184, "\u00cb\u013e": 41185, "\u0120oval": 41186, "united": 41187, "\u0120thirds": 41188, "\u0120moistur": 41189, "\u0120CPC": 41190, "Miami": 41191, "\u0120tentacles": 41192, "\u0120Polaris": 41193, "exc": 41194, "exclusive": 41195, "\u0120Prairie": 41196, "\u0120colossal": 41197, "\u0120Blend": 41198, "surprisingly": 41199, "\u00c3\u0143s": 41200, "\u0120indoctr": 41201, "\u0120basal": 41202, "\u0120MPEG": 41203, "undo": 41204, "Split": 41205, "Development": 41206, "\u0120lantern": 41207, "1971": 41208, "\u0120provocation": 41209, "\u0120anguish": 41210, "\u0120Bind": 41211, "\u0120Leia": 41212, "ducers": 41213, "ippy": 41214, "conservancy": 41215, "\u0120initialize": 41216, "\u0120Twice": 41217, "\u0120Suk": 41218, "\u0120predic": 41219, "\u0120diploma": 41220, "\u0120sociop": 41221, "Ingredients": 41222, "\u0120hammered": 41223, "\u0120Irma": 41224, "Qaida": 41225, "\u0120glimps": 41226, "\u0120Bian": 41227, "\u0120stacking": 41228, "\u0120fend": 41229, "govtrack": 41230, "\u0120unn": 41231, "democratic": 41232, "igree": 41233, "\u0120580": 41234, "\u0120294": 41235, "\u0120strawberry": 41236, "IDER": 41237, "\u0120cherished": 41238, "\u0120Hots": 41239, "\u0120inferred": 41240, "\u0120808": 41241, "\u0120Socrates": 41242, "Oregon": 41243, "\u0120Roses": 41244, "\u0120FOIA": 41245, "\u0120insensitive": 41246, "\u0120408": 41247, "Recommend": 41248, "\u0120Shine": 41249, "\u0120painstaking": 41250, "UGE": 41251, "\u0120Heller": 41252, "\u0120Enterprises": 41253, "IOR": 41254, "adj": 41255, "NRS": 41256, "LG": 41257, "\u0120alienated": 41258, "\u0120acknowledgement": 41259, "\u0120AUD": 41260, "\u0120Reneg": 41261, "\u0120vouchers": 41262, "\u0120960": 41263, "\u0120moot": 41264, "\u0120Dimensions": 41265, "\u0120cabbage": 41266, "Bright": 41267, "gat": 41268, "\u0120Klu": 41269, "\u0120latent": 41270, "\u0120ze": 41271, "\u0120Meng": 41272, "\u0120disperse": 41273, "\u0120pandemonium": 41274, "HQ": 41275, "\u0120virtuous": 41276, "\u0120Locations": 41277, "eeper": 41278, "provided": 41279, "\u0120seams": 41280, "\u0120WT": 41281, "izo": 41282, "PROV": 41283, "\u0120titanium": 41284, "\u0120recollection": 41285, "\u0120cran": 41286, "\u0120780": 41287, "\u0120NF": 41288, "491": 41289, "642": 41290, "packing": 41291, "598": 41292, "texture": 41293, "Spider": 41294, "freedom": 41295, "cipled": 41296, "\u0120TAMADRA": 41297, "\u00e2\u013b\u00a6": 41298, "authent": 41299, "\u0120WANT": 41300, "rified": 41301, "\u0120rites": 41302, "\u0120uterus": 41303, "kiss": 41304, "\u0120\u00e2\u012b\u00a4": 41305, "\u0120skillet": 41306, "\u0120disenfranch": 41307, "\u0120Gaal": 41308, "Compan": 41309, "\u0120ageing": 41310, "guide": 41311, "Balt": 41312, "\u0120iterator": 41313, "\u0120discretionary": 41314, "tips": 41315, "\u0120primates": 41316, "\u0120Technique": 41317, "\u0120Payments": 41318, "azel": 41319, "\u0120ROCK": 41320, "stantial": 41321, "060": 41322, "\u0120dmg": 41323, "\u0120Jackets": 41324, "\u0120Playoff": 41325, "\u0120nursery": 41326, "\u0120Symb": 41327, "arton": 41328, "\u0120annexation": 41329, "Colorado": 41330, "\u0120coils": 41331, "\u0120Shoes": 41332, "\u00e2\u0126\u00a2:": 41333, "\u0120Roz": 41334, "COMPLE": 41335, "\u0120Everest": 41336, "\u0120Triumph": 41337, "Joy": 41338, "Grid": 41339, "\u00e0\u00bc": 41340, "processor": 41341, "\u0120Prosper": 41342, "\u0120Severus": 41343, "\u0120Selected": 41344, "rg": 41345, "\u0120Tayyip": 41346, "Stra": 41347, "\u0120skiing": 41348, "\u0120?)": 41349, "\u0120peg": 41350, "Tesla": 41351, "\u0120timeframe": 41352, "\u0120mastermind": 41353, "\u0120NB": 41354, "scientific": 41355, "\u0120Shit": 41356, "generic": 41357, "INTER": 41358, "NUM": 41359, "\u0120stroll": 41360, "\u0120Enix": 41361, "\u0120MMR": 41362, "\u0120EMS": 41363, "movie": 41364, "\u0124\u00aa": 41365, "\u0120minimizing": 41366, "iddling": 41367, "\u0120illegitimate": 41368, "\u0120prototyp": 41369, "\u0120prematurely": 41370, "\u0120manuals": 41371, "obbies": 41372, "\u0120Cassidy": 41373, "DEC": 41374, "desktop": 41375, "\u0120aeros": 41376, "\u0120screenings": 41377, "\u0120debilitating": 41378, "\u0120Grind": 41379, "natureconservancy": 41380, "\u0120fades": 41381, "termination": 41382, "assetsadobe": 41383, "Factor": 41384, "\u0120definitively": 41385, "Pok\u00c3\u00a9": 41386, "apult": 41387, "\u0120Lafayette": 41388, "Corn": 41389, "\u0120Coral": 41390, "\u0120stagnant": 41391, "Tue": 41392, "\u0120dissatisfaction": 41393, "Gender": 41394, "\u0120kidneys": 41395, "\u0120Gow": 41396, "\u0120Defeat": 41397, "\u0120Ashton": 41398, "\u0120cartels": 41399, "\u0120foreclosure": 41400, "\u0120Explore": 41401, "strength": 41402, "otin": 41403, "\u0120veterinarian": 41404, "\u0120fumble": 41405, "\u0120parap": 41406, "\u0120Strait": 41407, "rils": 41408, "\u0120prick": 41409, "\u0120Bermuda": 41410, "\u0120Ammunition": 41411, "skinned": 41412, "\u0120abound": 41413, "\u0120Braz": 41414, "\u0120sharper": 41415, "\u0120Ascension": 41416, "\u0120978": 41417, "\u0120previews": 41418, "\u0120communion": 41419, "\u0120XY": 41420, "\u0120phony": 41421, "\u0120newcomer": 41422, "\u0120332": 41423, ".\",\"": 41424, "\u0120redistribution": 41425, "Protect": 41426, "\u0120Sof": 41427, "Kal": 41428, "\u0120lipstick": 41429, "worst": 41430, "\u0120tangled": 41431, "\u0120retrospective": 41432, "integer": 41433, "\u0120volunteering": 41434, "\u01201907": 41435, "\u0120--------------------": 41436, "ichen": 41437, "\u0120unveiling": 41438, "\u0120senseless": 41439, "\u0120fisheries": 41440, "\\-": 41441, "\u0120hinges": 41442, "\u0120calculus": 41443, "Myth": 41444, "\u0120undefeated": 41445, "\u0120optimizations": 41446, "\u0120depress": 41447, "\u0120billboard": 41448, "\u0120Yad": 41449, "\u0120Pyramid": 41450, "Isn": 41451, "Ide": 41452, "\u0120legion": 41453, "\u0120Kramer": 41454, "entanyl": 41455, "\u0120penetrating": 41456, "\u0120Hawth": 41457, "\u0120PRODUCT": 41458, "\u0120Gerard": 41459, "\u0120Pact": 41460, "\u0120Including": 41461, "\u0120Elias": 41462, "\u0120Elaine": 41463, "visual": 41464, "\u0120humming": 41465, "\u0120condesc": 41466, "\u0120Fasc": 41467, "\u00e4\u00b8\u012c": 41468, "\u0120egalitarian": 41469, "\u0120devs": 41470, "\u0120Dahl": 41471, "Ops": 41472, "DH": 41473, "\u0120Bounce": 41474, "idated": 41475, "aldo": 41476, "\u0120republican": 41477, "\u0120hamb": 41478, "\u0120Sett": 41479, "ographies": 41480, "CHAPTER": 41481, "\u0120transsexual": 41482, "\u0120skyrocket": 41483, "answer": 41484, "\u0120markup": 41485, "\u00d8\u00aa": 41486, "\u0120heroine": 41487, "Compare": 41488, "\u0120Tav": 41489, "Beast": 41490, "\u0120successors": 41491, "\u0120na\u00c3\u00afve": 41492, "\u0120Buckley": 41493, "stress": 41494, "meat": 41495, "\u0120downloadable": 41496, "\u0120indexed": 41497, "\u0120scaff": 41498, "\u0120Lump": 41499, "\u0120Homo": 41500, "Studio": 41501, "Insp": 41502, "\u0120racked": 41503, "farious": 41504, "\u0120Petty": 41505, "External": 41506, "\u01201909": 41507, "Wars": 41508, "commit": 41509, "puters": 41510, "\u0120unob": 41511, "\u0120Err": 41512, "\u0120EG": 41513, "\u0120Alam": 41514, "\u0120Siberia": 41515, "\u0120Atmospheric": 41516, "ISTER": 41517, "\u0120Satanic": 41518, "translation": 41519, "\u0120Loud": 41520, "traumatic": 41521, "lique": 41522, "\u0120resonate": 41523, "\u0120Welch": 41524, "\u0120sparking": 41525, "\u0120TOM": 41526, "tone": 41527, "\u0120outl": 41528, "\u0120handcuffed": 41529, "\u0120Serie": 41530, "801": 41531, "\u0120landmarks": 41532, "\u0120Reeves": 41533, "\u0120softened": 41534, "\u0120dazzling": 41535, "\u0120Wanted": 41536, "months": 41537, "Magikarp": 41538, "\u0120untreated": 41539, "\u0120Bedford": 41540, "Mi": 41541, "\u0120Dynamo": 41542, "Ore": 41543, "795": 41544, "\u0120wrongful": 41545, "\u0120lured": 41546, "\u0120cortisol": 41547, "\u0120vex": 41548, "drawn": 41549, "ilet": 41550, "Downloadha": 41551, "\u0120Faction": 41552, "\u0120labyrinth": 41553, "\u0120hijacked": 41554, "waters": 41555, "erick": 41556, "\u0120superiors": 41557, "\u0120Rowling": 41558, "\u0120Guinness": 41559, "\u0120td": 41560, "992": 41561, "\u0120unearthed": 41562, "\u0120centrif": 41563, "\u0120shameless": 41564, "Pod": 41565, "\u0120Fib": 41566, "\u0120icing": 41567, "\u0120predictor": 41568, "\u0120292": 41569, "forestation": 41570, "construct": 41571, "Cand": 41572, "@#": 41573, "\u0120agitated": 41574, "\u0120repr": 41575, "OVA": 41576, "\u0120knitting": 41577, "\u0120Lima": 41578, "\u0120fodder": 41579, "684": 41580, "\u0120Persona": 41581, "kl": 41582, "701": 41583, "\u0120breakup": 41584, "\u00e1\u00b8": 41585, "\u0120appalled": 41586, "\u0120antidepressants": 41587, "\u0120Sussex": 41588, "Harris": 41589, "\u0120Thermal": 41590, "eeee": 41591, "Upload": 41592, "\u0120gulf": 41593, "\u0120doorstep": 41594, "\u0120Shank": 41595, "LU": 41596, "\u0120MEN": 41597, "\u0120Pond": 41598, "sorry": 41599, "\u0120misfortune": 41600, "nance": 41601, "\u0120bona": 41602, "Mut": 41603, "\u0120degraded": 41604, "\u0120LOG": 41605, "\u0120Ness": 41606, "animal": 41607, "\u0120aversion": 41608, "undown": 41609, "\u0120supplemented": 41610, "\u0120Cups": 41611, "\u0120504": 41612, "\u0120deprive": 41613, "\u0120Sparkle": 41614, "\u00c5\u0124": 41615, "\u0120Meditation": 41616, "authors": 41617, "\u0120Saban": 41618, "\u0120Naked": 41619, "aird": 41620, "\u0120Mandarin": 41621, "\u0120Scriptures": 41622, "\u0120Personnel": 41623, "\u0120Maharashtra": 41624, "\u01201903": 41625, "\u0120Pai": 41626, "\u0120Mirage": 41627, "ombat": 41628, "Accessory": 41629, "\u0120fragmented": 41630, "Together": 41631, "\u0120believable": 41632, "\u0120Gladiator": 41633, "aligned": 41634, "\u0120Slug": 41635, "MAT": 41636, "\u0120convertible": 41637, "\u0120Bourbon": 41638, "ameron": 41639, "\u0120Rehab": 41640, "ntax": 41641, "\u0120powdered": 41642, "pillar": 41643, "\u0120smoker": 41644, "\u0120Manson": 41645, "\u0120BF": 41646, "511": 41647, "\u0120Goodell": 41648, "\u0120DAR": 41649, "mud": 41650, "gart": 41651, "\u0120obedient": 41652, "\u0120Transmission": 41653, "\u0120Donation": 41654, "880": 41655, "\u0120bothering": 41656, "Materials": 41657, "\u00e3\u0124\u00b1": 41658, "destroy": 41659, "\u0120foregoing": 41660, "\u0120anarchism": 41661, "\u0120Kry": 41662, "iceps": 41663, "\u0120littered": 41664, "\u0120Schiff": 41665, "\u0120anecdotal": 41666, "units": 41667, "\u0120fian": 41668, "\u0120Stim": 41669, "\u0120SOME": 41670, "\u0120Invaders": 41671, "\u0120behavioural": 41672, "\u0120Ventures": 41673, "\u0120sublime": 41674, "\u0120fruition": 41675, "\u0120Penalty": 41676, "\u0120corrosion": 41677, "\u00b6\u0127": 41678, "\u0120likened": 41679, "\u0120besieged": 41680, "weeney": 41681, "\u0120Creep": 41682, "\u0120linemen": 41683, "multi": 41684, "icably": 41685, "udder": 41686, "\u0120vitality": 41687, "\u0120shortfall": 41688, "\u0120Pants": 41689, "apist": 41690, "Hidden": 41691, "\u0120Drops": 41692, "medical": 41693, "\u0120pronunciation": 41694, "\u0120NRL": 41695, "\u0120insightful": 41696, "JV": 41697, "\u0120Beard": 41698, "\u0120Chou": 41699, "\u0120charms": 41700, "\u0120bins": 41701, "\u0120ambassadors": 41702, "\u0120Saturdays": 41703, "\u0120inhibitor": 41704, "\u0120Franch": 41705, "601": 41706, "','": 41707, "\u0120Conor": 41708, "artney": 41709, "\u0120Xperia": 41710, "grave": 41711, "bees": 41712, "\u0120Protestants": 41713, "\u0120soaking": 41714, "\u0120Mandal": 41715, "\u0120phased": 41716, "\u0120660": 41717, "\u0120scams": 41718, "\u0120buzzing": 41719, "\u0120Italians": 41720, "\u0120Lorenzo": 41721, "\u0120JA": 41722, "\u0120hesitated": 41723, "\u0120cliffs": 41724, "\u0120GOT": 41725, "inguishable": 41726, "\u0120ko": 41727, "\u0120interruption": 41728, "Zip": 41729, "Learning": 41730, "\u0120underscores": 41731, "\u0120Blink": 41732, "Ku": 41733, "579": 41734, "\u0120Autob": 41735, "IRE": 41736, "\u0120watering": 41737, "\u0120pastry": 41738, "820": 41739, "\u0120visionary": 41740, "\u0120Templar": 41741, "awaited": 41742, "\u0120piston": 41743, "\u0120antid": 41744, "currently": 41745, "\u0120pard": 41746, "\u0120waging": 41747, "\u0120nobility": 41748, "\u0120Yus": 41749, "\u0120injecting": 41750, "faith": 41751, "\u0120PASS": 41752, "\u00e5\u00ba": 41753, "\u0120retake": 41754, "\u0120PROC": 41755, "\u0120cathedral": 41756, "bash": 41757, "\u0120wrestlers": 41758, "\u0120partnering": 41759, "\u0120noses": 41760, "\u0120358": 41761, "Transform": 41762, "amen": 41763, "\u0120bouts": 41764, "\u0120Ideal": 41765, "\u0120Constantin": 41766, "\u0120sep": 41767, "\u0120Monarch": 41768, "atten": 41769, "\u0120Peoples": 41770, "modified": 41771, "\u0120moratorium": 41772, "\u0120penchant": 41773, "\u0120offensively": 41774, "\u0120proxies": 41775, "okane": 41776, "\u0120Taiwanese": 41777, "\u0120Poo": 41778, "\u0120HOME": 41779, "usional": 41780, "\u0120verbs": 41781, "\u0120Oman": 41782, "visory": 41783, "\u0120persuasion": 41784, "\u0120multit": 41785, "\u0120scissors": 41786, "Gay": 41787, "oway": 41788, "ophysical": 41789, "lus": 41790, "gnu": 41791, "\u0120apocalyptic": 41792, "\u0120absurdity": 41793, "\u0120playbook": 41794, "\u0120autobiography": 41795, "IUM": 41796, "\u0120sneaking": 41797, "\u0120Simulation": 41798, "pps": 41799, "ellery": 41800, "Planet": 41801, "\u0120rightfully": 41802, "\u0120niece": 41803, "\u0120NEC": 41804, "\u0120IPO": 41805, "\u0120Disclosure": 41806, "leanor": 41807, "ousy": 41808, "STER": 41809, "\u0120282": 41810, "Cruz": 41811, "Chall": 41812, "643": 41813, "\u0120Survive": 41814, "\u0120Fatal": 41815, "\u0120Amid": 41816, "apo": 41817, "Weapons": 41818, "DEN": 41819, "770": 41820, "\u0120Greenwald": 41821, "\u0120linen": 41822, "alos": 41823, "\u0120pollutants": 41824, "\u0120PCIe": 41825, "kat": 41826, "\u0120paw": 41827, "\u0120Kraft": 41828, "Chem": 41829, "\u0120Terminator": 41830, "\u0120reincarn": 41831, "\u0120][": 41832, "\u0120Seeds": 41833, "\u0120silhouette": 41834, "\u0120Stores": 41835, "\u0120grooming": 41836, "\u0120Direction": 41837, "\u0120Isabel": 41838, "\u0120Bridges": 41839, "\u00f0\u0141\u0133": 41840, "EED": 41841, "\u0120Morsi": 41842, "\u0120valves": 41843, "\u0120Ranked": 41844, "\u0120Pharma": 41845, "\u0120Organizations": 41846, "\u0120penetrated": 41847, "\u0120Rodham": 41848, "\u0120Protoss": 41849, "\u0120overest": 41850, "\u0120exasper": 41851, "\u0120TJ": 41852, "\u0120000000": 41853, "\u0120trickle": 41854, "\u0120bourbon": 41855, "WHO": 41856, "\u0120wretched": 41857, "\u0120microscopic": 41858, "\u0120checklist": 41859, "\u0120adorned": 41860, "Royal": 41861, "Administ": 41862, "\u0120Retirement": 41863, "\u0120Highest": 41864, "Weather": 41865, "ilege": 41866, "\u0120increments": 41867, "\u0120Cosponsors": 41868, "\u0120masse": 41869, "\u0120Sinn": 41870, "rf": 41871, "\u0120hordes": 41872, "assembly": 41873, "754": 41874, "\u0120Natasha": 41875, "\u0120TYPE": 41876, "\u0120GENERAL": 41877, "\u0120arranging": 41878, "\u0120407": 41879, "lator": 41880, "\u0120glean": 41881, "\u0120discredited": 41882, "\u0120clinicians": 41883, "UNE": 41884, "\u0120achieves": 41885, "\u0120Emerson": 41886, "complex": 41887, "=[": 41888, "\u0120principally": 41889, "\u0120frail": 41890, "picked": 41891, "\u0120thanking": 41892, "\u0120recl": 41893, "\u0120LAST": 41894, "\u0120suppressing": 41895, "ilic": 41896, "\u0120antidepressant": 41897, "\u0120Lisbon": 41898, "\u0120thor": 41899, "\u0120spa": 41900, "\u0120kingdoms": 41901, "\u0120Pearce": 41902, "emo": 41903, "\u0120plung": 41904, "\u0120divest": 41905, "\u0120********************************": 41906, "bis": 41907, "ospels": 41908, "adr": 41909, "Spirit": 41910, "halla": 41911, "Pink": 41912, "endez": 41913, "\u0120resurrected": 41914, "escape": 41915, "\u0120Rosenstein": 41916, "\u0120geological": 41917, "\u0120necessities": 41918, "\u0120carniv": 41919, "\u0120Elys": 41920, "\u0120Barney": 41921, "\u0120296": 41922, "digy": 41923, "STON": 41924, "DOWN": 41925, "\u0120milestones": 41926, "\u0120ker": 41927, "\u0120dismantling": 41928, "\u0120reprim": 41929, "\u0120crossings": 41930, "1945": 41931, "\u0120patriarchy": 41932, "\u0120blasphemy": 41933, "\u0120359": 41934, "metry": 41935, "\u0120Obesity": 41936, "\u0120Differences": 41937, "blocking": 41938, "\u00e3\u0125\u0137\u00e3\u0124\u00a1": 41939, "ichita": 41940, "\u0120Sabha": 41941, "phalt": 41942, "\u0120Colo": 41943, "uala": 41944, "efficients": 41945, "\u0120Medina": 41946, "console": 41947, "557": 41948, "\u0120Hannibal": 41949, "\u0120Habit": 41950, "\u0120Fever": 41951, "\u0120thence": 41952, "\u0120synagogue": 41953, "\u0120essentials": 41954, "\u0120wink": 41955, "\u0120Trader": 41956, "IDA": 41957, "\u0120Spoiler": 41958, "\u0120Icelandic": 41959, "\u0120Hayward": 41960, "\u0120peac": 41961, "\u0120malice": 41962, "\u0120flashback": 41963, "\u0120thw": 41964, "\u0120layoffs": 41965, "Liquid": 41966, "\u0120trooper": 41967, "\u0120hinge": 41968, "\u0120Readers": 41969, "Phill": 41970, "\u0120Bauer": 41971, "Created": 41972, "\u0120audits": 41973, "accompan": 41974, "\u0120unsuspecting": 41975, "iera": 41976, "66666666": 41977, "\u0120broch": 41978, "\u0120apprehended": 41979, "\u0120Malk": 41980, "cerning": 41981, "\u0120Codex": 41982, "OVER": 41983, "Marsh": 41984, "\u0120Deng": 41985, "\u0120Expression": 41986, "\u0120disrespectful": 41987, "\u0120ascending": 41988, "tests": 41989, "\u0120Plaintiff": 41990, "stery": 41991, "\u0120Alibaba": 41992, "dinand": 41993, "\u0120Dempsey": 41994, "Applications": 41995, "moral": 41996, "\u0120throughput": 41997, "\u0120quarrel": 41998, "\u0120mills": 41999, "\u0120hemor": 42000, "\u0120CASE": 42001, "terrorist": 42002, "stim": 42003, "ifestyle": 42004, "rozen": 42005, "CEPT": 42006, "Ark": 42007, "uci": 42008, "lectic": 42009, "\u0120irritating": 42010, "sheets": 42011, "Ay": 42012, "\u0120redeemed": 42013, "\u0120horny": 42014, "\u0120Teach": 42015, "\u0120Sear": 42016, "democracy": 42017, "465": 42018, "\u0120Restore": 42019, "\u0120standby": 42020, "\u0120Pis": 42021, "iffin": 42022, "\u0120sleepy": 42023, "\u0120extrater": 42024, "\u0120compliments": 42025, "Frameworks": 42026, "\u0120installs": 42027, "\u0120banging": 42028, "surface": 42029, "foundland": 42030, "\u0120metaphysical": 42031, "\u0120283": 42032, "ouls": 42033, "devices": 42034, "Args": 42035, "\u0120Sacrifice": 42036, "\u0120McCorm": 42037, "eson": 42038, "Conservative": 42039, "\u0120Mikhail": 42040, "seeing": 42041, "isively": 42042, "\u0120Rooms": 42043, "\u0120Generic": 42044, "\u0120enthusiastically": 42045, "\u0120gripped": 42046, "\u0120comedic": 42047, "\u0120Electricity": 42048, "\u0120guerrilla": 42049, "\u0120decoration": 42050, "\u0120Perspective": 42051, "\u0120consultations": 42052, "\u0120unamb": 42053, "\u0120plagiar": 42054, "\u0120magician": 42055, "\u0120erection": 42056, "\u0120Tourism": 42057, "oried": 42058, "roxy": 42059, "1100": 42060, "Tam": 42061, "\u012a\u00e8": 42062, "\u00ce\u00b3": 42063, "\u00d7\u00aa": 42064, "\u0120Predators": 42065, "Nitrome": 42066, "\u0120telescopes": 42067, "projects": 42068, "\u0120unprotected": 42069, "\u0120stocked": 42070, "\u0120Entreprene": 42071, "nexpected": 42072, "\u0120wastewater": 42073, "Vill": 42074, "\u0120intimately": 42075, "\u0120iCloud": 42076, "\u0120Constable": 42077, "\u0120spoof": 42078, "\u0120nefarious": 42079, "\u0120fins": 42080, "\u0120censor": 42081, "\u0120Modes": 42082, "\u0120Esper": 42083, "arbon": 42084, "\u0120intersections": 42085, "\u0120lauded": 42086, "\u0120physi": 42087, "\u0120generously": 42088, "\u0120TheNitrome": 42089, "\u0120TheNitromeFan": 42090, "\u0120arisen": 42091, "\u0120\u00d9\u012a": 42092, "\u0120glands": 42093, "\u0120Pavilion": 42094, "\u0120Gupta": 42095, "\u0120uniformly": 42096, "\u0120ramps": 42097, "riet": 42098, "\u0120WHEN": 42099, "\u0120Vanessa": 42100, "\u0120routed": 42101, "\u0120limp": 42102, "\u0120CPI": 42103, "pter": 42104, "intuitive": 42105, "\u0120vaping": 42106, "\u0120experimented": 42107, "\u0120Olympus": 42108, "\u0120Amon": 42109, "\u0120sighting": 42110, "\u0120infiltrate": 42111, "\u0120Gentleman": 42112, "\u0120signings": 42113, "\u0120Meow": 42114, "\u0120Navigation": 42115, "checks": 42116, "433": 42117, "\u0120elapsed": 42118, "\u0120Bulgarian": 42119, "espie": 42120, "\u0120SOM": 42121, "during": 42122, "\u0120spills": 42123, "anca": 42124, "\u0120Plymouth": 42125, "MAL": 42126, "\u0120domestically": 42127, "\u0120Watergate": 42128, "\u0120FAM": 42129, "killed": 42130, "edited": 42131, "\u0120Yourself": 42132, "\u0120synchronization": 42133, "\u0120Practices": 42134, "STEP": 42135, "\u0120genomes": 42136, "\u0120QR": 42137, "notice": 42138, "\u0120locating": 42139, "zin": 42140, "\u0120329": 42141, "alcohol": 42142, "\u0120kitten": 42143, "Vo": 42144, "\u0120rinse": 42145, "\u0120grapple": 42146, "\u0120Screw": 42147, "\u0120Dul": 42148, "AIR": 42149, "\u0120leasing": 42150, "\u0120Caf\u00c3\u00a9": 42151, "\u0120roses": 42152, "\u0120Respect": 42153, "\u0120mislead": 42154, "\u0120perfected": 42155, "\u0120nudity": 42156, "\u0120nonpartisan": 42157, "\u0120Consumption": 42158, "Reporting": 42159, "\u0120nuances": 42160, "\u0120deductible": 42161, "\u0120Shots": 42162, "\u0120377": 42163, "\u0120\u00e6\u013e": 42164, "anooga": 42165, "Benef": 42166, "\u0120Bam": 42167, "\u0120Samp": 42168, "ifix": 42169, "\u0120galvan": 42170, "\u0120Medals": 42171, "radius": 42172, "\u0120nobles": 42173, "\u0120eaves": 42174, "igrate": 42175, "KT": 42176, "\u0120Harbour": 42177, "uers": 42178, "\u0120risked": 42179, "req": 42180, "\u0120neurot": 42181, "gettable": 42182, "aina": 42183, "Romney": 42184, "\u0120underpin": 42185, "\u0120loft": 42186, "\u0120Subcommittee": 42187, "\u0120Mongol": 42188, "biz": 42189, "\u0120manifests": 42190, "assisted": 42191, "\u0120Gaga": 42192, "\u0120synergy": 42193, "\u0120religiously": 42194, "\u0120Pref": 42195, "\u0120Gerry": 42196, "TAG": 42197, "\u0120Choi": 42198, "466": 42199, "behind": 42200, "\u0120Ou": 42201, "GoldMagikarp": 42202, "\u0120hemorrh": 42203, "River": 42204, "\u0120tendon": 42205, "\u0120injure": 42206, "\u0120Fiona": 42207, "\u0120pag": 42208, "\u0120agitation": 42209, "||||": 42210, "uran": 42211, "\u0120ESA": 42212, "\u0120esteem": 42213, "\u0120dodging": 42214, "\u0120412": 42215, "rss": 42216, "\u0120ceases": 42217, "excluding": 42218, "\u0120intakes": 42219, "\u0120inserts": 42220, "\u0120embold": 42221, "\u0120Oral": 42222, "upuncture": 42223, "411": 42224, "\u0120Unified": 42225, "\u0120Dele": 42226, "\u0120furnace": 42227, "\u0120Coyotes": 42228, "\u0120Brach": 42229, "Labor": 42230, "\u0120handshake": 42231, "\u0120bruises": 42232, "Grade": 42233, "\u00e9\u0139\u013a": 42234, "\u0120Grammy": 42235, "ileen": 42236, "States": 42237, "\u0120Scandinavian": 42238, "\u0120Kardash": 42239, "866": 42240, "\u0120effortlessly": 42241, "\u0120DIRECT": 42242, "\u0120THEN": 42243, "\u0120Mei": 42244, "ertation": 42245, "1968": 42246, "\u0120groin": 42247, "witch": 42248, "Requirements": 42249, "985": 42250, "\u0120roofs": 42251, "\u0120estates": 42252, "\u0120HF": 42253, "\u0120haha": 42254, "\u0120densely": 42255, "\u0120OCT": 42256, "\u0120plastics": 42257, "\u0120incidentally": 42258, "\u0120Tracks": 42259, "\u0120Taxes": 42260, "\u0120chanted": 42261, "\u0120forceful": 42262, "\u0120Bieber": 42263, "\u0120Kahn": 42264, "Kent": 42265, "\u0120Cot": 42266, "licts": 42267, "Fed": 42268, "\u0120hideous": 42269, "\u0120Verd": 42270, "\u0120Syndicate": 42271, "\u0120Illegal": 42272, "Jet": 42273, "\u0120DAV": 42274, "reasonable": 42275, "crew": 42276, "\u0120fundamentalist": 42277, "\u0120truthful": 42278, "\u0120Jing": 42279, "\u0120lil": 42280, "\u0120downed": 42281, "\u0120enchanted": 42282, "\u0120Policies": 42283, "\u0120McMaster": 42284, "\u0120Hare": 42285, "ideshow": 42286, "\u0120params": 42287, "encers": 42288, "gorithm": 42289, "\u0120allowances": 42290, "\u0120turbulent": 42291, "\u0120complexities": 42292, "\u0120KT": 42293, "\u0120337": 42294, "\u0120Genetic": 42295, "FUN": 42296, "Doug": 42297, "tick": 42298, "\u0120gigs": 42299, "umenthal": 42300, "\u0120patriarchal": 42301, "\u0120calc": 42302, ",...": 42303, "\u0120cout": 42304, "\u0120Guan": 42305, "\u0120pathological": 42306, "\u0120Rivals": 42307, "\u0120underrated": 42308, "\u0120fluorescent": 42309, "\u0120Jiu": 42310, "arnaev": 42311, "\u0120Quan": 42312, "\u0120429": 42313, "\u0120\u00e0\u00a8": 42314, "Mario": 42315, "Construct": 42316, "\u0120Citation": 42317, "\u0120Racial": 42318, "\u0120RSA": 42319, "\u0120Fidel": 42320, "\u0120395": 42321, "Personally": 42322, "Cause": 42323, "\u00c3\u00bb": 42324, "radical": 42325, "inen": 42326, "\u0120vehemently": 42327, "\u0120Papa": 42328, "\u0120internship": 42329, "\u0120flakes": 42330, "\u0120Reck": 42331, "Luckily": 42332, "Bra": 42333, "2020": 42334, "ravings": 42335, "RN": 42336, "Wonder": 42337, "Seriously": 42338, "\u0120reusable": 42339, "\u0120polluted": 42340, "\u0120Peng": 42341, "leigh": 42342, "indle": 42343, "\u0120circuitry": 42344, "\u0120Madonna": 42345, "\u0120BART": 42346, "Residents": 42347, "attribute": 42348, "Philadelphia": 42349, "Club": 42350, "\u0120planner": 42351, "\u0120frantically": 42352, "\u0120faithfully": 42353, "\u0120Territories": 42354, "\u0120LAT": 42355, "\u0120Andersen": 42356, "anu": 42357, "\u0120PARK": 42358, "\u0120Sora": 42359, "iage": 42360, "\u0120Playoffs": 42361, "\u0120GCC": 42362, "427": 42363, "\u0120abnorm": 42364, "\u0120Lever": 42365, "\u0120disobedience": 42366, "Async": 42367, "\u0120Shea": 42368, "Vert": 42369, "\u0120skirts": 42370, "\u0120Sawyer": 42371, "xp": 42372, "\u0120worsening": 42373, "\u0120scapego": 42374, "\u0120Angle": 42375, "othal": 42376, "\u0120trove": 42377, "\u0120Sty": 42378, "\u0120Nguyen": 42379, "marine": 42380, "ideon": 42381, "Depths": 42382, "Blog": 42383, "\u0120Illuminati": 42384, "\u0120tracts": 42385, "\u0120organise": 42386, "\u0120ostr": 42387, "Fs": 42388, "\u0120leveraging": 42389, "\u0120Daredevil": 42390, "asar": 42391, "\u0120lang": 42392, "\u0120extermin": 42393, "ursions": 42394, "\u0120Romo": 42395, "\u00e3\u0124\u00a4\u00e3\u0125\u012a": 42396, "\u0120contended": 42397, "\u0120encountering": 42398, "\u0120Tablet": 42399, "\u0120Alternate": 42400, "skill": 42401, "\u0120sweets": 42402, "\u0120cohesive": 42403, "capacity": 42404, "\u0120repud": 42405, "\u0120lizard": 42406, "roo": 42407, "\u0120pilgrims": 42408, "\u0120Ruff": 42409, "\u0120Instrument": 42410, "\u0120Logo": 42411, "uitous": 42412, "EH": 42413, "\u0120salesman": 42414, "\u0120ankles": 42415, "Led": 42416, "\u0120Patty": 42417, "udos": 42418, "Owner": 42419, "\u0120discrepancies": 42420, "kj": 42421, "MU": 42422, "\u0120unconditional": 42423, "DragonMagazine": 42424, "iard": 42425, "Oak": 42426, "\u0120Conversation": 42427, "beer": 42428, "\u0120Osaka": 42429, "Delta": 42430, "usky": 42431, "\u0120secretion": 42432, "\u0120plaza": 42433, "\u0120ming": 42434, "\u0120depletion": 42435, "\u0120Mous": 42436, "\u0120ITS": 42437, "\u0120Himal": 42438, "\u0120Fleming": 42439, "\u0120cytok": 42440, "\u0120Hick": 42441, "\u0120batters": 42442, "\u0120Intellectual": 42443, "675": 42444, "\u00c3\u00a9r": 42445, "ISION": 42446, "\u0120Quentin": 42447, "\u0120Chapters": 42448, "ihadi": 42449, "\u0120coaster": 42450, "WAYS": 42451, "\u0120Lizard": 42452, "\u0120Yor": 42453, "andering": 42454, "Skin": 42455, "haust": 42456, "abby": 42457, "\u0120portraying": 42458, "\u0120wielded": 42459, "dash": 42460, "\u0120proponent": 42461, "\u0120ripple": 42462, "\u0120graphene": 42463, "\u0120flyer": 42464, "\u0120recurrent": 42465, "\u0120devils": 42466, "\u0120waterfall": 42467, "\u00e6\u013a\u00af": 42468, "goo": 42469, "TextColor": 42470, "\u0120tampering": 42471, "IVES": 42472, "TRUMP": 42473, "\u0120Abel": 42474, "\u0120SAL": 42475, "\u0120Hendricks": 42476, "\u0120Lucius": 42477, "bots": 42478, "\u01204096": 42479, "ISTORY": 42480, "Guest": 42481, "\u0120NX": 42482, "inant": 42483, "Benz": 42484, "\u0120Loaded": 42485, "\u0120Clever": 42486, "treatment": 42487, "\u0120tavern": 42488, "\u0120339": 42489, "\u0120TNT": 42490, "ificantly": 42491, "Temperature": 42492, "Fel": 42493, "\u0120underworld": 42494, "\u0120Judges": 42495, "\u0120<+": 42496, "\u0120stump": 42497, "\u0120occupancy": 42498, "\u0120aber": 42499, "\u0120Finder": 42500, ")\",": 42501, "\u0120Nunes": 42502, "reset": 42503, "inet": 42504, "ectomy": 42505, "\u0120wellness": 42506, "\u0120Peb": 42507, "quartered": 42508, "andan": 42509, "\u0120negatives": 42510, "\u0120Thiel": 42511, "\u0120Clip": 42512, "\u0120LTD": 42513, "\u0120blight": 42514, "\u0120repertoire": 42515, "Kyle": 42516, "\u0120quer": 42517, "\u0120Ces": 42518, "\u0120hapl": 42519, "989": 42520, "\u0120Thames": 42521, "iscopal": 42522, "Desk": 42523, "ivariate": 42524, "\u0120Excellence": 42525, "foundation": 42526, "\u0120\u00e2\u0129": 42527, "Xi": 42528, "\u0120mysteriously": 42529, "estyles": 42530, "\u0120perish": 42531, "\u0120Engels": 42532, "\u0120DEAD": 42533, "090": 42534, "}}}": 42535, "\u0120Unreal": 42536, "\u0120restless": 42537, "IDES": 42538, "orthodox": 42539, "\u0120Intermediate": 42540, "\u0120dinners": 42541, "\u0120Trout": 42542, "\u0120Seym": 42543, "\u0120Halls": 42544, "ogged": 42545, "\u0120tragedies": 42546, "\u0120didnt": 42547, "676": 42548, "\u0120ailments": 42549, "\u0120observable": 42550, "\u0120Vide": 42551, "adapt": 42552, "\u0120Dusk": 42553, "\u0120professionalism": 42554, "\u0120Prescott": 42555, "\u0120Indies": 42556, "pox": 42557, "\u0120Mehran": 42558, "Wide": 42559, "\u0120endemic": 42560, "\u0120Paran": 42561, "Bird": 42562, "\u0120pedals": 42563, "\u0120IU": 42564, "\u0120Adamant": 42565, "\u0120Hurt": 42566, "\u0120correlates": 42567, "urden": 42568, "\u0120sponsoring": 42569, "climate": 42570, "\u0120Universities": 42571, "\u0120Knot": 42572, "ennes": 42573, "\u0120Damian": 42574, "\u0120Axel": 42575, "Sport": 42576, "\u0120barb": 42577, "\u0120Sno": 42578, "shown": 42579, "steen": 42580, "udence": 42581, "\u0120nonviolent": 42582, "\u0120homophobia": 42583, "\u0120biomass": 42584, "\u0120Detail": 42585, "\u0120srfN": 42586, "\u0120Tune": 42587, "accompanied": 42588, "IENCE": 42589, "Albert": 42590, "\u0120Mongo": 42591, "zx": 42592, "\u0120Cerberus": 42593, "orbit": 42594, "cens": 42595, "\u0120slay": 42596, "SHARE": 42597, "HY": 42598, "\u0120brawl": 42599, "\u0120Probe": 42600, "\u0120nonexistent": 42601, "\u0120Clarence": 42602, "\u0120Blackburn": 42603, "\u0120portals": 42604, "\u0120Rita": 42605, "\u0120Remain": 42606, "\u0120Levant": 42607, "\u0120tricked": 42608, "\u0120Ferry": 42609, "avering": 42610, "\u0120Strawberry": 42611, "\u0120Answers": 42612, "\u0120horrendous": 42613, "\u0120Aman": 42614, "Supplement": 42615, "\u0120Toad": 42616, "\u0120peeled": 42617, "\u0120manoeuv": 42618, "\u0120Uzbek": 42619, "monds": 42620, "\u0120Hector": 42621, "\u0120402": 42622, "pees": 42623, "fixes": 42624, "\u0120dj": 42625, "\u0120resumes": 42626, "\u0120accountant": 42627, "\u0120adversity": 42628, "\u0120hampered": 42629, "\u0120Larson": 42630, "\u0120doping": 42631, "parts": 42632, "Hur": 42633, "\u0120bearded": 42634, "\u0120yr": 42635, "\u0120Plugin": 42636, "\u00e5\u00a5\u00b3": 42637, "\u0120/**": 42638, "rolley": 42639, "\u0120watershed": 42640, "\u0120Submission": 42641, "iflower": 42642, "ASC": 42643, "\u0120choir": 42644, "\u0120sculptures": 42645, "mA": 42646, "increasing": 42647, "aii": 42648, "\u0120sneakers": 42649, "\u0120confronts": 42650, "\u0120Elephant": 42651, "\u0120Elixir": 42652, "\u0120recal": 42653, "\u0120TTL": 42654, "widget": 42655, "\u0120Wax": 42656, "\u0120Grayson": 42657, "\u0120hairst": 42658, "\u0120humiliated": 42659, "\u0120WARN": 42660, "appiness": 42661, "\u0120TTC": 42662, "Fuel": 42663, "\u0120polio": 42664, "\u0120complexes": 42665, "\u0120babe": 42666, "\u0120XIV": 42667, "PF": 42668, ").[": 42669, "Parts": 42670, "\u0120435": 42671, "Meg": 42672, "\u0120Yards": 42673, "\u0120ALP": 42674, "\u0120yells": 42675, "\u0120princes": 42676, "\u0120bullies": 42677, "\u0120Capitalism": 42678, "exempt": 42679, "FAQ": 42680, "\u0120Sponge": 42681, "\u0120Ala": 42682, "\u0120pleasantly": 42683, "\u0120buf": 42684, "\u0120denote": 42685, "\u0120unpublished": 42686, "\u0120kneeling": 42687, "asca": 42688, "\u0120lapse": 42689, "alien": 42690, "994": 42691, "\u0120referees": 42692, "\u0120Lawyers": 42693, "Santa": 42694, "\u0120puzzling": 42695, "\u0120Prometheus": 42696, "\u0120Pharaoh": 42697, "\u0120Delay": 42698, "\u0120facilitates": 42699, "\u0120CES": 42700, "\u0120jewels": 42701, "\u0120booklet": 42702, "onding": 42703, "\u0120polarization": 42704, "\u0120Moran": 42705, "\u0120Salad": 42706, "\u0120SOS": 42707, "\u0120Advice": 42708, "PHOTOS": 42709, "ICAN": 42710, "iatures": 42711, "express": 42712, "\u0120Wonderland": 42713, "\u0120CODE": 42714, "\u0120CLASS": 42715, "975": 42716, "\u0120grep": 42717, "\u0120Diesel": 42718, "\u0120Glac": 42719, "!?\"": 42720, "\u0120rm": 42721, "oine": 42722, "discrimination": 42723, "\u0120Nurse": 42724, "mallow": 42725, "\u0120vortex": 42726, "\u0120Consortium": 42727, "\u0120largeDownload": 42728, "straight": 42729, "aughlin": 42730, "Grad": 42731, "\u0120publicized": 42732, "\u0120Waves": 42733, "\u0120Redd": 42734, "\u0120festivities": 42735, "\u0120Mane": 42736, "arov": 42737, "\u0120fleeting": 42738, "\u0120Drunk": 42739, "ugen": 42740, "Cele": 42741, "\u0120chromosomes": 42742, "\u0120DOT": 42743, "-+-+-+-+": 42744, "\u0120busiest": 42745, "\u0120Beaver": 42746, "Syrian": 42747, "\u0120Kyr": 42748, "kas": 42749, "\u0120CrossRef": 42750, "1950": 42751, "7601": 42752, "\u0120repealing": 42753, "\u0120Winners": 42754, "\u0120Macro": 42755, "\u0120DOD": 42756, "blance": 42757, "Sort": 42758, "641": 42759, "\u0120metre": 42760, "\u0120Dirk": 42761, "\u0120goggles": 42762, "\u0120drawbacks": 42763, "\u0120complainant": 42764, "\u0120authorizing": 42765, "\u0120antitrust": 42766, "operated": 42767, "\u0120mah": 42768, "\u0120exaggeration": 42769, "Amazing": 42770, "\u0120Seraph": 42771, "\u0120haze": 42772, "wow": 42773, "\u0120extinguished": 42774, "\u0120canyon": 42775, "\u0120Bosh": 42776, "\u0120vents": 42777, "\u0120scrape": 42778, "Correct": 42779, "426": 42780, "\u0120avg": 42781, "Demand": 42782, "\u0120\u00e2\u012a\u00bc": 42783, "\u0120microbiota": 42784, "\"}],\"": 42785, "\u0120Stev": 42786, "Bio": 42787, "\u0120Planes": 42788, "\u0120suggestive": 42789, "\u0120decipher": 42790, "\u0120Refugee": 42791, "\u0120Kejriwal": 42792, "\u0120Greenpeace": 42793, "\u0120declass": 42794, "\u0120Sounders": 42795, "\u0120tho": 42796, "\u0120decrypt": 42797, "\u0120brushing": 42798, "\u0120Janeiro": 42799, "ipop": 42800, "Si": 42801, "877": 42802, "\u0120Geoffrey": 42803, "\u0120cpu": 42804, "\u0120Hazel": 42805, "\u0120viewpoints": 42806, "\u0120crispy": 42807, "\u0120Notification": 42808, "\u0120solder": 42809, "\u0120Modest": 42810, "\u0120Hemisphere": 42811, "\u0120cassette": 42812, "includes": 42813, "\u0120identifiers": 42814, "\u0120CALL": 42815, "incent": 42816, "Todd": 42817, "\u0120Sweep": 42818, "\u0120334": 42819, "boss": 42820, "\u0120smir": 42821, "ginx": 42822, "\u0120township": 42823, "\u0120grieving": 42824, "\u0120Mosque": 42825, "Netflix": 42826, "ASED": 42827, "\u0120Millennials": 42828, "ocom": 42829, "1967": 42830, "\u0120boldly": 42831, "sleep": 42832, "\u0120esche": 42833, "arijuana": 42834, "\u0120swirl": 42835, "\u0120Penal": 42836, "\u0120negligent": 42837, "\u0120Stephenson": 42838, "KER": 42839, "\u0120Zoro": 42840, "risis": 42841, "\u0120localization": 42842, "\u0120Seymour": 42843, "\u0120Anglic": 42844, "reditation": 42845, "protection": 42846, "\u0120Paige": 42847, "\u0120omit": 42848, "\u0120Rousse": 42849, "\u0120Tub": 42850, "\u0120invitations": 42851, "tty": 42852, "\u0120moss": 42853, "physical": 42854, "Credits": 42855, "\u0120anarchy": 42856, "\u0120childcare": 42857, "\u0120lull": 42858, "\u0120Mek": 42859, "\u0120Languages": 42860, "latest": 42861, "\u0120Sanford": 42862, "\u0120usability": 42863, "\u0120diffuse": 42864, "\u0120DATA": 42865, "\u0120sprites": 42866, "\u0120Vegeta": 42867, "\u0120Promotion": 42868, "\u00e3\u0125\u00bc\u00e3\u0124\u00af": 42869, "ricting": 42870, "zee": 42871, "Turkish": 42872, "\u0120TDs": 42873, "proven": 42874, "571": 42875, "\u0120smugglers": 42876, "70710": 42877, "\u0120reformed": 42878, "\u0120Lois": 42879, "\u0120unfl": 42880, "\u0120WITHOUT": 42881, "\u0120Returning": 42882, "annie": 42883, "\u0120Tomas": 42884, "Franc": 42885, "\u0120Profit": 42886, "\u0120SERV": 42887, "\u0120Rumble": 42888, "ikuman": 42889, "esan": 42890, "\u0120testers": 42891, "\u0120gadget": 42892, "\u0120bracelet": 42893, "\u0120FSA": 42894, "component": 42895, "\u0120paramedics": 42896, "\u0120jan": 42897, "\u0120Remem": 42898, "\u0120Skinner": 42899, "\u0120lov": 42900, "\u0120Quake": 42901, "roma": 42902, "\u0120flask": 42903, "Princ": 42904, "\u0120overpower": 42905, "\u0120lodging": 42906, "\u0120KKK": 42907, "rette": 42908, "\u0120absorbs": 42909, "wrote": 42910, "\u0120,\"": 42911, "Kings": 42912, "\u0120Hail": 42913, "\u0120Falling": 42914, "xtap": 42915, "\u0120Helena": 42916, "irens": 42917, "Larry": 42918, "\u0120pamphlet": 42919, "\u0120CPR": 42920, "Gro": 42921, "\u0120Hiroshima": 42922, "\u0120holistic": 42923, "\".[": 42924, "\u0120detachment": 42925, "\u0120aspire": 42926, "\u0120complicit": 42927, "\u0120Greenwood": 42928, "\u0120respawn": 42929, "\u0120Stupid": 42930, "\u0120Finished": 42931, "fal": 42932, "bass": 42933, "\u0120abhor": 42934, "\u0120mockery": 42935, "\u0120Feast": 42936, "VIDEO": 42937, "\u0120consec": 42938, "\u0120Hungry": 42939, "Pull": 42940, "\u0120Hust": 42941, "itance": 42942, "?\u00e3\u0122\u012f": 42943, ")--": 42944, "\u0120Parallel": 42945, "conv": 42946, "469": 42947, "haar": 42948, "want": 42949, "Paper": 42950, "mins": 42951, "\u0120Toro": 42952, "\u0120TRUMP": 42953, "\u0120Rai": 42954, "DW": 42955, "\u0120Wicked": 42956, "\u0120Lep": 42957, "\u0120funky": 42958, "\u0120detriment": 42959, "iosis": 42960, "achev": 42961, "\u0120degrade": 42962, "imilation": 42963, "\u0120retard": 42964, "\u0120fragmentation": 42965, "\u0120cowboy": 42966, "\u0120YPG": 42967, "\u0120HAL": 42968, "Parents": 42969, "\u0120Sieg": 42970, "\u0120Strauss": 42971, "\u0120Rubber": 42972, "\u00d7\u0132": 42973, "Frag": 42974, "\u0120pt": 42975, "\u0120optionally": 42976, "\u0120ZIP": 42977, "\u0120Transcript": 42978, "\u0120Dwell": 42979, "882": 42980, "Merc": 42981, "\u0120MOT": 42982, "\u00e3\u0125\u00af\u00e3\u0125\u00b3": 42983, "\u0120hunts": 42984, "\u0120executes": 42985, "Includes": 42986, "\u0120acidic": 42987, "\u0120Responsibility": 42988, "\u0120Dumb": 42989, "wei": 42990, "Anderson": 42991, "\u0120Jasper": 42992, "ighton": 42993, "absolutely": 42994, "Adult": 42995, "\u0120plunder": 42996, "Morning": 42997, "\u0120Tours": 42998, "\u0120Dane": 42999, "\u00ce\u00ba": 43000, "\u0120TEST": 43001, "\u0120Gina": 43002, "\u0120canine": 43003, "awan": 43004, "\u0120socialists": 43005, "\u0120Soda": 43006, "\u0120impetus": 43007, "\u0120Supplementary": 43008, "oliath": 43009, "\u0120Kinnikuman": 43010, "mittedly": 43011, "seconds": 43012, "\u0120organisers": 43013, "\u0120documentaries": 43014, "Variable": 43015, "GREEN": 43016, "\u0120resorts": 43017, "\u0120bragging": 43018, "\u0120368": 43019, "Artist": 43020, "wk": 43021, "blers": 43022, "Uncommon": 43023, "\u0120Retrieved": 43024, "\u0120hectares": 43025, "\u0120toxin": 43026, "rank": 43027, "\u0120faiths": 43028, "\u0120Graphic": 43029, "\u0120vec": 43030, "\u0120LIA": 43031, "African": 43032, "\u0120ardent": 43033, "endiary": 43034, "Lake": 43035, "\u0120DOS": 43036, "cientious": 43037, "\u0120Okawaru": 43038, "\u0120Ally": 43039, "\u0120Timeline": 43040, "Dash": 43041, "\u0120Ic": 43042, "continue": 43043, "\u0120tidy": 43044, "\u0120instinctively": 43045, "\u0120Possibly": 43046, "\u0120Outdoor": 43047, "\u0120Wouldn": 43048, "\u0120lich": 43049, "\u0120Bray": 43050, "\u0120AX": 43051, "\u0120\u00c3\u012b": 43052, "\u0120+#": 43053, "\\'": 43054, "Directory": 43055, "abiding": 43056, "\u0120feral": 43057, "icative": 43058, "butt": 43059, "\u0120perverse": 43060, "Salt": 43061, "\u0120warped": 43062, "\u0120nineteen": 43063, "\u0120cabinets": 43064, "\u0120srfAttach": 43065, "\u0120Sloan": 43066, "\u0120powering": 43067, "regation": 43068, "Flight": 43069, "severe": 43070, "\u0120stren": 43071, "\u0120cog": 43072, "apache": 43073, "\u0120\u00e2\u013f": 43074, "\u0120cafeteria": 43075, "paces": 43076, "\u0120Grimoire": 43077, "utonium": 43078, "\u0120raining": 43079, "\u0120circling": 43080, "\u0120linebackers": 43081, "credit": 43082, "\u0120repatri": 43083, "\u0120Camden": 43084, "license": 43085, "\u0120lyric": 43086, "\u0120descriptor": 43087, "\u0120valleys": 43088, "\u0120req": 43089, "\u0120backstage": 43090, "\u0120Prohibition": 43091, "\u0120Ket": 43092, "Opening": 43093, "Sym": 43094, "\u00e6\u0138\u00b9": 43095, "\u0120servings": 43096, "\u0120overseen": 43097, "\u0120asteroids": 43098, "\u0120Mods": 43099, "\u0120Springer": 43100, "\u0120Container": 43101, "\u00e8\u00bb": 43102, "\u0120Mens": 43103, "\u0120multim": 43104, "\u0120firefighter": 43105, "pec": 43106, "\u0120chlorine": 43107, "\u00d0\u00bc": 43108, "endi": 43109, "\u0120sparing": 43110, "\u0120polygamy": 43111, "\u0120RN": 43112, "\u0120Pell": 43113, "\u0120tigers": 43114, "\u0120flashy": 43115, "\u0120Madame": 43116, "Sword": 43117, "\u0120prefrontal": 43118, "\u0120prerequisite": 43119, "uca": 43120, "\u0120wifi": 43121, "\u0120misconception": 43122, "\u0120harshly": 43123, "\u0120Streaming": 43124, "otom": 43125, "\u0120Giuliani": 43126, "footed": 43127, "\u0120tubing": 43128, "individual": 43129, "zek": 43130, "nuclear": 43131, "mol": 43132, "\u0120rightful": 43133, "493": 43134, "\u0120specialization": 43135, "\u0120passionately": 43136, "\u0120Velocity": 43137, "\u0120Availability": 43138, "Tenn": 43139, "\u0120latch": 43140, "\u0120Somebody": 43141, "\u0120helium": 43142, "claw": 43143, "\u0120dipping": 43144, "XXX": 43145, "\u0120interpersonal": 43146, "710": 43147, "\u0120subter": 43148, "\u0120biologists": 43149, "\u0120Lighting": 43150, "\u0120optic": 43151, "\u0120denim": 43152, "endon": 43153, "\u0120Corm": 43154, "\u0120341": 43155, "\u0120Coup": 43156, "\u0120fearless": 43157, "\u0120alot": 43158, "\u0120Clifford": 43159, "\u0120Runtime": 43160, "\u0120Provision": 43161, "updated": 43162, "leneck": 43163, "\u0120neuron": 43164, "\u0120grading": 43165, "\u0120Ct": 43166, "sequence": 43167, "inia": 43168, "concept": 43169, "\u0120roaring": 43170, "rival": 43171, "\u0120Caucasian": 43172, "\u0120monog": 43173, "keyes": 43174, "\u0120appellate": 43175, "\u0120liaison": 43176, "EStreamFrame": 43177, "\u0120Plum": 43178, "!.": 43179, "\u0120spherical": 43180, "\u0120perished": 43181, "\u0120blot": 43182, "\u0120benches": 43183, "\u0120411": 43184, "\u0120pioneered": 43185, "\u0120hurled": 43186, "Jennifer": 43187, "\u0120Yosemite": 43188, "Chair": 43189, "\u0120reefs": 43190, "\u0120elector": 43191, "\u0120Anthem": 43192, "652": 43193, "\u0120uninstall": 43194, "\u0120impede": 43195, "\u0120blinking": 43196, "\u0120goto": 43197, "Decre": 43198, "Aren": 43199, "\u0120stabilization": 43200, "\u0120Disabled": 43201, "\u0120Yanukovych": 43202, "\u0120outlawed": 43203, "\u0120Ventura": 43204, "teness": 43205, "\u0120plantation": 43206, "\u0120yacht": 43207, "\u0120Huawei": 43208, "\u0120solvent": 43209, "\u0120gracious": 43210, "\u0120curiously": 43211, "\u0120capacitor": 43212, "\u0120cx": 43213, "\u0120Reflex": 43214, "Phys": 43215, "\u0120Cf": 43216, "ptin": 43217, "conservative": 43218, "\u0120invocation": 43219, "cour": 43220, "FN": 43221, "\u0120Newly": 43222, "Hour": 43223, "Asian": 43224, "\u0120Leading": 43225, "\u0120Aerospace": 43226, "Anne": 43227, "\u0120prenatal": 43228, "\u0120deteriorating": 43229, "HCR": 43230, "\u0120Normandy": 43231, "olini": 43232, "\u0120Ambro": 43233, "910": 43234, "\u0120setbacks": 43235, "\u0120TRE": 43236, "\u0120sig": 43237, "\u0120Scourge": 43238, "597": 43239, "798": 43240, "Gameplay": 43241, "\u0120msec": 43242, "MX": 43243, "\u0120pricey": 43244, "\u0120LLP": 43245, "akeru": 43246, "\u0120overarching": 43247, "\u0120Bale": 43248, "\u0120worldly": 43249, "Clark": 43250, "\u0120scenic": 43251, "\u0120disliked": 43252, "\u0120Controlled": 43253, "Tickets": 43254, "\u0120EW": 43255, "abies": 43256, "\u0120Plenty": 43257, "Nonetheless": 43258, "\u0120artisan": 43259, "Transfer": 43260, "\u0120Famous": 43261, "\u0120infield": 43262, "bley": 43263, "\u0120unresolved": 43264, "\u0120MLA": 43265, "\u00e3\u0124\u0124": 43266, "Correction": 43267, "\u0120democrat": 43268, "\u0120Moreno": 43269, "rocal": 43270, "ilings": 43271, "\u0120sailor": 43272, "\u0120rife": 43273, "hung": 43274, "\u0120tropes": 43275, "\u0120snatched": 43276, "\u0120LIN": 43277, "\u0120Bib": 43278, "ESA": 43279, "\u0120Prev": 43280, "\u0120Camel": 43281, "runtime": 43282, "\u0120obnoxious": 43283, "437": 43284, "\u0120summers": 43285, "\u0120unexplained": 43286, "\u0120Walters": 43287, "caliber": 43288, "\u0120gull": 43289, "\u0120Endurance": 43290, "\u00e4\u00bd\u013e": 43291, "\u0120347": 43292, "Irish": 43293, "\u0120aerobic": 43294, "\u0120cramped": 43295, "\u0120Honolulu": 43296, "\u00e0\u00a9": 43297, "userc": 43298, "ecast": 43299, "ACY": 43300, "\u0120Query": 43301, "\u00e3\u0124\u00b9\u00e3\u0125\u012a": 43302, "Beta": 43303, "\u0120susceptibility": 43304, "\u0120Shiv": 43305, "\u0120Limbaugh": 43306, "\u0120\u00c3\u0138": 43307, "\u0120NXT": 43308, "\u0120Muss": 43309, "\u0120Britons": 43310, "ESCO": 43311, "EGIN": 43312, "\u0120%%": 43313, "\u0120secession": 43314, "\u0120Patron": 43315, "\u0120Lua": 43316, "naires": 43317, "\u0120JPMorgan": 43318, "usb": 43319, "ocyte": 43320, "\u0120councillors": 43321, "\u0120Liang": 43322, "farm": 43323, "\u0120nervously": 43324, "\u0120attractiveness": 43325, "\u0120Kov": 43326, "jump": 43327, "Plot": 43328, "\u0120stains": 43329, "\u0120Statue": 43330, "\u0120Apostles": 43331, "heter": 43332, "\u0120SUPPORT": 43333, "\u0120overwhelm": 43334, "YES": 43335, "\u0120291": 43336, "density": 43337, "\u0120trapping": 43338, "Mit": 43339, "\u0120fide": 43340, "\u0120Pamela": 43341, "atlantic": 43342, "Damn": 43343, "\u0120pts": 43344, "OPA": 43345, "\u0120servicing": 43346, "\u0120overflowing": 43347, "ulo": 43348, "\u0120Erit": 43349, "ticket": 43350, "lighting": 43351, "\u0120Hmm": 43352, "\u00e3\u0125\u00bc\u00e3\u0125\u00ab": 43353, "imoto": 43354, "\u0120chuckle": 43355, "423": 43356, "\u00e3\u0123\u0137": 43357, "shape": 43358, "\u0120queues": 43359, "\u0120anchors": 43360, "\u00e3\u0124\u00bc\u00e3\u0124\u00a6\u00e3\u0124\u00b9": 43361, "Fer": 43362, "\u0120awoke": 43363, "\u0120666": 43364, "hands": 43365, "\u0120divergence": 43366, "\u0120505": 43367, "Tips": 43368, "\u0120depot": 43369, "\u0120skew": 43370, "\u0120Deliver": 43371, "opot": 43372, "\u0120divul": 43373, "\u0120EB": 43374, "unsigned": 43375, "\u0120Uni": 43376, "Xbox": 43377, "\u0120forks": 43378, "\u0120702": 43379, "\u00e5\u00af": 43380, "\u0120promoters": 43381, "\u0120Vapor": 43382, "\u0120levied": 43383, "slot": 43384, "\u0120pigment": 43385, "\u0120cylinders": 43386, "CRE": 43387, "\u0120snatch": 43388, "\u0120perpetually": 43389, "\u0120licking": 43390, "\u0120Feet": 43391, "\u0120Kraken": 43392, "\u0120Holden": 43393, "\u0120CLSID": 43394, "mr": 43395, "\u0120projector": 43396, "\u0120denotes": 43397, "\u0120chapel": 43398, "\u0120Torrent": 43399, "bler": 43400, "Route": 43401, "\u0120Defendant": 43402, "\u0120Publishers": 43403, "\u0120Males": 43404, "\u0120Innov": 43405, "\u0120Agility": 43406, "riter": 43407, "tymology": 43408, "stores": 43409, "Lind": 43410, "\u0120folly": 43411, "\u0120Zurich": 43412, "Ble": 43413, "\u0120nurture": 43414, "\u0120coastline": 43415, "uchin": 43416, "Domin": 43417, "\u0120frivol": 43418, "\u0120Consolid": 43419, "results": 43420, "MJ": 43421, "\u0120phylogen": 43422, "\u0120hauled": 43423, "\u0120Wiley": 43424, "\u0120Jessie": 43425, "\u0120Prepare": 43426, "\u0120Eps": 43427, "\u0120treasurer": 43428, "IAS": 43429, "\u0120colonists": 43430, "\u0120inund": 43431, "\u0120WWF": 43432, "\u0120Converted": 43433, "6000": 43434, "outside": 43435, "\u0120Appearance": 43436, "\u0120Relic": 43437, "\u0120Mister": 43438, "saw": 43439, "\u0120resultant": 43440, "\u0120adjective": 43441, "\u0120Laurel": 43442, "\u0120Hindi": 43443, "bda": 43444, "Peace": 43445, "\u0120rebirth": 43446, "\u0120membranes": 43447, "\u0120forwarding": 43448, "\u0120collided": 43449, "\u0120Carolyn": 43450, "Kansas": 43451, "599": 43452, "\u0120SolidGoldMagikarp": 43453, "Beck": 43454, "\u0120stressing": 43455, "\u0120Goo": 43456, "\u0120Cooperative": 43457, "\u0120fs": 43458, "\u0120Archie": 43459, "Liter": 43460, "\u0120Klopp": 43461, "Jerry": 43462, "\u0120footwear": 43463, "Warren": 43464, "\u0120scree": 43465, "hare": 43466, "Understanding": 43467, "Ped": 43468, "\u0120anthology": 43469, "\u0120Announce": 43470, "Mega": 43471, "\u0120fluent": 43472, "\u0120bondage": 43473, "\u0120Discount": 43474, "ilial": 43475, "Cart": 43476, "\u0120Nightmares": 43477, "Sham": 43478, "\u0120Boll": 43479, "ussie": 43480, "Http": 43481, "Atlanta": 43482, "\u0120unrecogn": 43483, "\u0120Bid": 43484, "\u0120undergrad": 43485, "\u0120forgiving": 43486, "\u0120Glover": 43487, "AAAAAAAA": 43488, "445": 43489, "VG": 43490, "paio": 43491, "killers": 43492, "\u0120responsibly": 43493, "\u0120mobilize": 43494, "\u0120effected": 43495, "\u0120Lumin": 43496, "\u0120kale": 43497, "\u0120infringing": 43498, "announced": 43499, "\u0120fitt": 43500, "batch": 43501, "\u0120Tackle": 43502, "\u0120Lime": 43503, "\u0120APP": 43504, "ukemia": 43505, "\u0120ruby": 43506, "\u0120exoner": 43507, "\u0120Casual": 43508, "070": 43509, "\u0120pelvic": 43510, "\u0120automate": 43511, "\u0120Kear": 43512, "\u0120Coastal": 43513, "\u0120creed": 43514, "\u0120boredom": 43515, "\u0120Stun": 43516, "riott": 43517, "\u0124\u0130": 43518, "\u0120regenerate": 43519, "\u0120comedians": 43520, "\u0120OPER": 43521, "Spons": 43522, "idium": 43523, "onis": 43524, "Located": 43525, "057": 43526, "\u0120suspense": 43527, "\u0120Dating": 43528, "Cass": 43529, "\u0120neocons": 43530, "\u0120Shinzo": 43531, "\u0120awoken": 43532, "christ": 43533, "\u0120Messages": 43534, "attled": 43535, "\u0120Spray": 43536, "\u0120Spice": 43537, "CW": 43538, "\u0120shielding": 43539, "\u0120Gaul": 43540, "Amid": 43541, "\u0120paramilitary": 43542, "\u0120multif": 43543, "\u0120Tanner": 43544, "ilk": 43545, "\u0120goddamn": 43546, "gements": 43547, "\u0120befriend": 43548, "mobi": 43549, "\u0120388": 43550, "folder": 43551, "acca": 43552, "\u0120insin": 43553, "gap": 43554, "Nev": 43555, "fifth": 43556, "\u0120psychiatry": 43557, "banks": 43558, "THIS": 43559, "\u0120harb": 43560, "acqu": 43561, "\u0120facade": 43562, "\u0120PowerPoint": 43563, "803": 43564, "\u0120bluff": 43565, "Shares": 43566, "\u0120favoring": 43567, "Elizabeth": 43568, "\u00c3\u012f\u00c3\u012f": 43569, "\u0120ranger": 43570, "772": 43571, "\u0120Arche": 43572, "hak": 43573, "\u0120Genetics": 43574, "\u0120FEMA": 43575, "\u0120evolves": 43576, "\u0120este": 43577, "\u0120Pets": 43578, "\u0120M\u00c3\u00a9": 43579, "\u0120Interesting": 43580, "\u0120Canterbury": 43581, "chapter": 43582, "\u0120Starfleet": 43583, "Spanish": 43584, "\u0120drawback": 43585, "\u0120Norwich": 43586, "970": 43587, "north": 43588, "aganda": 43589, "\u0120transformative": 43590, "ramids": 43591, "biology": 43592, "aday": 43593, "\u0120propagation": 43594, "\u0120Gamma": 43595, "\u0120Denise": 43596, "\u0120Calculator": 43597, "entimes": 43598, "\u0120Bett": 43599, "\u0120appendix": 43600, "\u0120HDD": 43601, "AKING": 43602, "\u0120stigmat": 43603, "\u0120holster": 43604, "\u0120ordinarily": 43605, "Chance": 43606, "\u0120Contrary": 43607, "\u0120adhesive": 43608, "\u0120gathers": 43609, "612": 43610, "reau": 43611, "onyms": 43612, "eways": 43613, "\u0120induces": 43614, "\u0120interchangeable": 43615, "sem": 43616, "Whit": 43617, "\u0120trance": 43618, "\u0120incorporation": 43619, "\u0120Extras": 43620, "Financial": 43621, "\u0120awkwardly": 43622, "\u0120Sturgeon": 43623, "\u0120HY": 43624, "Normally": 43625, "\u0120Ending": 43626, "\u0120Assist": 43627, "encrypted": 43628, "\u0120subjug": 43629, "\u0120nos": 43630, "\u0120fanatic": 43631, "Cub": 43632, "CU": 43633, "?\".": 43634, "\u0120irreversible": 43635, "\u00e5\u0124": 43636, "031": 43637, "\u0120HAR": 43638, "spread": 43639, "ulia": 43640, "=$": 43641, "Scope": 43642, "Lots": 43643, "\u0120lifestyles": 43644, "olon": 43645, "\u0120feds": 43646, "\u0120congratulate": 43647, "webkit": 43648, "\u0120indistinguishable": 43649, "\u0120Swing": 43650, "\u0120commandments": 43651, "quila": 43652, "abella": 43653, "methyl": 43654, "annabin": 43655, "\u0120overe": 43656, "\u0120lobster": 43657, "\u0120QUEST": 43658, "\u0120CONTIN": 43659, "bernatorial": 43660, "::::::::": 43661, "\u0120Trave": 43662, "\u0120Samoa": 43663, "ANI": 43664, "752": 43665, "\u00d0\u00b4": 43666, "usercontent": 43667, "\u0120Moderate": 43668, "yeah": 43669, "\u0120Kitt": 43670, "\u0120wee": 43671, "\u0120stuffing": 43672, "\u0120Intervention": 43673, "\u0120Dign": 43674, "\u0120warehouses": 43675, "\u0120Fiji": 43676, "\u0120pellets": 43677, "\u0120takeaway": 43678, "\u0120TABLE": 43679, "\u0120Classical": 43680, "collection": 43681, "\u0120landfall": 43682, "\u0120Muscle": 43683, "\u0120settles": 43684, "\u0120ADV": 43685, "\u0120344": 43686, "Laura": 43687, "\u0120fared": 43688, "\u0120Partial": 43689, "436": 43690, "ossibility": 43691, "\u0120Daly": 43692, "\u0120Tarant": 43693, "\u0120Fuji": 43694, "aml": 43695, "cence": 43696, "551": 43697, "\u0120Procedures": 43698, "\u0120OCD": 43699, "\u0120UD": 43700, "tin": 43701, "QUI": 43702, "acho": 43703, "438": 43704, "\u0120glitches": 43705, "\u0120enchantment": 43706, "\u0120calculates": 43707, "IRO": 43708, "\u0120Hua": 43709, "alyses": 43710, "\u0120Lift": 43711, "umo": 43712, "\u0120leapt": 43713, "\u0120hypothesized": 43714, "\u0120Gustav": 43715, "itans": 43716, "VERSION": 43717, "\u00e6\u0142": 43718, "Roger": 43719, "\u0120rand": 43720, "\u0120Adapter": 43721, "\u0120331": 43722, "\u0120Petition": 43723, "kies": 43724, "Mars": 43725, "\u0120undercut": 43726, "zees": 43727, "\u0120Lyons": 43728, "\u0120DHCP": 43729, "Missing": 43730, "\u0120retirees": 43731, "\u0120insidious": 43732, "eli": 43733, ">)": 43734, ".\u00e3\u0122\u012f": 43735, "\u0120finalists": 43736, "\u0120Aure": 43737, "\u0120accuser": 43738, "\u0120wastes": 43739, "\u0120Ys": 43740, "\u0120Lori": 43741, "\u0120constituencies": 43742, "\u0120supper": 43743, "\u0120mayhem": 43744, "orange": 43745, "\u0120misplaced": 43746, "\u0120managerial": 43747, "\u0120exce": 43748, "\u0120CLI": 43749, "\u0120primal": 43750, "\u0120Lent": 43751, "Crystal": 43752, "hover": 43753, "\u0120NTS": 43754, "endum": 43755, "\u0120dw": 43756, "\u0120Alc": 43757, "nostic": 43758, "\u0120preserves": 43759, "\u0120Tsarnaev": 43760, "\u0120tripled": 43761, "relative": 43762, "Arcade": 43763, "killing": 43764, "\u0120WEEK": 43765, "\u0120Hanna": 43766, "Dust": 43767, "Completed": 43768, "\u0123\u00ab": 43769, "\u0120approves": 43770, "\u0120Surf": 43771, "\u0120Lutheran": 43772, "venants": 43773, "\u0120robberies": 43774, "weights": 43775, "software": 43776, "atana": 43777, "ugal": 43778, "\u0120gravy": 43779, "\u0120Cance": 43780, "OLOGY": 43781, "lyak": 43782, "Tonight": 43783, "\u0120unveil": 43784, "\u01201904": 43785, "\u0120Minion": 43786, "entious": 43787, "stice": 43788, "packages": 43789, "\u0120GEAR": 43790, "\u0120gol": 43791, "\u0120Hutchinson": 43792, "\u0120Profession": 43793, "\u0120GUN": 43794, "\u0120Difference": 43795, "\u0120Tsukuyomi": 43796, "\u0120Lesbian": 43797, "670": 43798, "\u0120fugitive": 43799, "\u0120Planetary": 43800, "--------------------------------------------------------": 43801, "\u0120accrued": 43802, "\u0120chicks": 43803, "\u0120stopp": 43804, "\u0120blockers": 43805, "Cod": 43806, "\u0120commenters": 43807, "\u0120Somewhere": 43808, "\u0120Photographer": 43809, "theme": 43810, "\u0120mayoral": 43811, "wu": 43812, "\u0120antennas": 43813, "\u0120revamped": 43814, "\u0120Subjects": 43815, "it\u00c3\u00a9": 43816, "imura": 43817, "\u0120entrances": 43818, "literally": 43819, "\u0120tenets": 43820, "\u0120OMG": 43821, "\u0120MPH": 43822, "\u0120Donkey": 43823, "\u0120Offense": 43824, "\u0120\"+": 43825, "Snap": 43826, "\u0120AFB": 43827, "\u0120animate": 43828, "\u0120Sod": 43829, "Hispanic": 43830, "\u0120inconsistency": 43831, "Db": 43832, "FY": 43833, "Export": 43834, "\u0120ape": 43835, "\u0120pearl": 43836, "ibel": 43837, "\u0120PACs": 43838, "\u0120{\\": 43839, "\u0120actu": 43840, "\u0120HSBC": 43841, "campus": 43842, "\u0120payoff": 43843, "\u0120deities": 43844, "\u0120Nato": 43845, "ouple": 43846, "\u0120censored": 43847, "\u0120Clojure": 43848, "\u0120confounding": 43849, "eni": 43850, "\u0120reckon": 43851, "ophe": 43852, "\u0120spotting": 43853, "\u0120signifies": 43854, "\u0120propel": 43855, "\u0120festive": 43856, "Suggest": 43857, "\u0120pledging": 43858, "\u0120Berman": 43859, "\u0120rebellious": 43860, "\u0120overshadowed": 43861, "\u0120infiltrated": 43862, "jobs": 43863, "672": 43864, "\u0120scalable": 43865, "\u0120dominion": 43866, "\u0120Newfoundland": 43867, "\u0120Meadow": 43868, "\u0120partitions": 43869, "AMI": 43870, "\u0120supplementary": 43871, "strument": 43872, "\u0120hairy": 43873, "\u0120perpetuate": 43874, "\u0120nutshell": 43875, "\u0120Potato": 43876, "\u0120Hobbit": 43877, "\u0120curses": 43878, "Float": 43879, "\u0120quieter": 43880, "\u0120fueling": 43881, "\u0120capsules": 43882, "\u0120Lust": 43883, "\u0120Haunted": 43884, "Executive": 43885, "\u0120childbirth": 43886, "Gre": 43887, "\u0120radiant": 43888, "\u00e5\u0130": 43889, "\u0120malls": 43890, "\u0120inept": 43891, "\u0120Warranty": 43892, "\u0120spectator": 43893, "Eh": 43894, "thens": 43895, "\u0120culminating": 43896, "\u00e6\u00a9": 43897, "arya": 43898, "\u00e3\u0124\u00ae": 43899, "ilitarian": 43900, "\u0120ORIG": 43901, "\u0120Spending": 43902, "ptives": 43903, "\u0120Siren": 43904, "\u0120Recording": 43905, "ayne": 43906, "\u0120vim": 43907, "\u0120sprang": 43908, "Tang": 43909, "\u0120MFT": 43910, "morning": 43911, "\u0120Weed": 43912, "mpeg": 43913, "cession": 43914, "\u0120Chung": 43915, "730": 43916, "warning": 43917, "562": 43918, "handedly": 43919, "Poor": 43920, "Politics": 43921, ":#": 43922, "\u0120pian": 43923, "\u0120feces": 43924, "\u0120Documentation": 43925, "\u0120banished": 43926, "\u0120399": 43927, "\u0120ARC": 43928, "\u0120heinous": 43929, "Jake": 43930, "\u0120Amir": 43931, "wayne": 43932, "vre": 43933, "oshenko": 43934, "\u0120notebooks": 43935, "\u0120foundational": 43936, "\u0120marvelous": 43937, "ixtape": 43938, "\u0120withdrawals": 43939, "\u0120horde": 43940, "\u0120Dhabi": 43941, "isable": 43942, "\u0120KD": 43943, "\u0120contagious": 43944, "\u0120Dip": 43945, "\u0120Arrows": 43946, "\u0120pronouns": 43947, "\u0120morphine": 43948, "\u0120BUS": 43949, "682": 43950, "\u0120kosher": 43951, "finished": 43952, "\u0120Instruments": 43953, "\u0120fused": 43954, "yden": 43955, "\u0120Salmon": 43956, "Fab": 43957, "affected": 43958, "KEN": 43959, "CENT": 43960, "Domain": 43961, "\u0120pokemon": 43962, "\u0120Drinking": 43963, "Growing": 43964, "\u0120Investigative": 43965, "\u0120Aether": 43966, "emi": 43967, "\u0120tabloid": 43968, "\u0120repro": 43969, "\u0120Notwithstanding": 43970, "\u0120Berserker": 43971, "\u0120dramas": 43972, "\u0120clich\u00c3\u00a9": 43973, "\u0120bung": 43974, "\u0120URI": 43975, "\u0120Dos": 43976, "044": 43977, "\u0120pastors": 43978, "\u0120ls": 43979, "\u0120acrylic": 43980, "aunts": 43981, "Edward": 43982, "\u0120majorities": 43983, "Bang": 43984, "\u0120fielding": 43985, "\u0120Replacement": 43986, "\u0120Alchemy": 43987, "ppard": 43988, "\u0120Romeo": 43989, "\u0120Sanct": 43990, "\u0120Lavrov": 43991, "ibble": 43992, "Instruct": 43993, "\u0120impractical": 43994, "\u0120Playboy": 43995, "cephal": 43996, "\u0120swaps": 43997, "\u0120kan": 43998, "\u0120Theo": 43999, "\u0120illustrating": 44000, "\u0120dismantled": 44001, "\u0120Transgender": 44002, "\u0120Guth": 44003, "UGH": 44004, "\u0120triumphant": 44005, "\u0120encompass": 44006, "\u0120bookmark": 44007, "uddin": 44008, "jer": 44009, "\u0120predicate": 44010, "ESH": 44011, "\u0120whence": 44012, "\u0120ABE": 44013, "\u0120nonprofits": 44014, "Sequ": 44015, "\u0120diabetic": 44016, "\u0120pend": 44017, "\u0120heartfelt": 44018, "shi": 44019, "\u0120interacts": 44020, "\u0120Telecom": 44021, "\u0120bombardment": 44022, "depending": 44023, "\u0120Lowry": 44024, "\u0120Admission": 44025, "\u0120Blooming": 44026, "ustration": 44027, "enegger": 44028, "Brew": 44029, "\u0120molten": 44030, "\u0120Nerd": 44031, "PIN": 44032, "\u00e2\u0138\u0122": 44033, "avement": 44034, "\u0120toured": 44035, "\u0120coefficients": 44036, "\u0120Trayvon": 44037, "ansson": 44038, "\u0120sandy": 44039, "told": 44040, "flows": 44041, "\u0120populous": 44042, "\u0120Tinder": 44043, "\u0120Bliss": 44044, "Rachel": 44045, "Minimum": 44046, "\u0120contestant": 44047, "\u0120Reduce": 44048, "\u0120Morse": 44049, "\u0120Grassley": 44050, "\u0120Clicker": 44051, "\u0120expr": 44052, "\u0120sincerity": 44053, "\u0120marqu": 44054, "\u0120elicit": 44055, "\u0120Proposition": 44056, "\u0120Demonic": 44057, "\u0120tacos": 44058, "Greek": 44059, "\u0120postwar": 44060, "\u0120insofar": 44061, "\u0120Pork": 44062, "\u0120352": 44063, "doctoral": 44064, "walking": 44065, "\u0120midterm": 44066, "\u0120Sammy": 44067, "sighted": 44068, "\u0120TRANS": 44069, "ici": 44070, "ALD": 44071, "\u0120USL": 44072, "\u0120FISA": 44073, "\u0120Ampl": 44074, "\u0120Alexandra": 44075, "inelli": 44076, "Train": 44077, "\u0120signify": 44078, "\u0120Versus": 44079, "\u0120obfusc": 44080, "\u0120kh": 44081, "\u0120aggro": 44082, "\u0120Renault": 44083, "\u0120348": 44084, "518": 44085, "oxicity": 44086, "022": 44087, "\u0120Twist": 44088, "\u0120goofy": 44089, "Dynamic": 44090, "\u0120briefings": 44091, "might": 44092, "899": 44093, "\u0120derogatory": 44094, "Tro": 44095, "\u0120forging": 44096, "\u0120Koran": 44097, "\u0120Married": 44098, "\u0120Bucs": 44099, "\u0120palate": 44100, "\u0120Conversion": 44101, "mable": 44102, "413": 44103, "\u0120(_": 44104, "\u0120siph": 44105, "\u0120NEO": 44106, "college": 44107, "\u0120marginally": 44108, "\u0120flirt": 44109, "\u0120Traps": 44110, "\u0120Pace": 44111, "\u00e9\u00bb\u0134": 44112, "\u0120goaltender": 44113, "\u0120forbids": 44114, "\u0120clerks": 44115, "\u0120Tant": 44116, "\u0120Robbins": 44117, "\u0120Printing": 44118, "\u0120premiered": 44119, "\u0120magnification": 44120, "\u0120TG": 44121, "\u0120Rouse": 44122, "\u0120Mock": 44123, "odynamics": 44124, "\u0120preclude": 44125, "ismo": 44126, "\u0120Pulitzer": 44127, "\u0120avalanche": 44128, "\u0120Kodi": 44129, "ribune": 44130, "\u0120Lena": 44131, "Electric": 44132, "\u0120refinery": 44133, "\u0120endowed": 44134, "\u0120counselors": 44135, "\u0120dolphin": 44136, "\u0120Mith": 44137, "\u0120armoured": 44138, "hibited": 44139, "Begin": 44140, "\u0120PW": 44141, "Oil": 44142, "\u0120Vor": 44143, "\u0120Sharif": 44144, "\u0120Frazier": 44145, "estate": 44146, "\u0120jams": 44147, "Proxy": 44148, "\u0120bandits": 44149, "\u0120Presbyterian": 44150, "\u0120Premiere": 44151, "tiny": 44152, "\u0120Cruel": 44153, "Testing": 44154, "\u0120homer": 44155, "\u0120VERS": 44156, "\u0120Prol": 44157, "\u0120Deposit": 44158, "\u0120Coffin": 44159, "\u0120seminars": 44160, "\u0120sql": 44161, "\u0120Defendants": 44162, "Alternatively": 44163, "\u0120Rats": 44164, "\u00e7\u00ab": 44165, "ethyst": 44166, "'>": 44167, "\u0120issuer": 44168, "589": 44169, "\u0120chaired": 44170, "\u0120Accessories": 44171, "manent": 44172, "\u0120marrow": 44173, "\u0120Primordial": 44174, "CN": 44175, "\u0120limitless": 44176, "\u0120Carnage": 44177, "\u0120undrafted": 44178, "qv": 44179, "INESS": 44180, "onew": 44181, "\u0120cohesion": 44182, "987": 44183, "\u0120necks": 44184, "\u0120footballer": 44185, "\u0120GER": 44186, "\u0120detectable": 44187, "\u0120Supporting": 44188, "\u0120CSV": 44189, "ocally": 44190, "kHz": 44191, "\u0120unde": 44192, "\u0120shone": 44193, "\u0120budding": 44194, "trak": 44195, "Standing": 44196, "\u0120Starcraft": 44197, "\u0120Kemp": 44198, "Bench": 44199, "\u0120thwarted": 44200, "\u0120Grounds": 44201, "athi": 44202, "Lisa": 44203, "Dialog": 44204, "\u0120SX": 44205, "Vision": 44206, "\u0120ingenious": 44207, "\u00d9\u0132": 44208, "\u0120fostering": 44209, "\u0120Za": 44210, "\u0120Ingram": 44211, "\u0120\"@": 44212, "Naturally": 44213, "616": 44214, "035": 44215, "\u0120FAC": 44216, "Hmm": 44217, "554": 44218, "\u0120accelerator": 44219, "\u0120Vend": 44220, "\u0120sunscreen": 44221, "\u0120tuberculosis": 44222, "raviolet": 44223, "\u0120Functional": 44224, "\u0120Errors": 44225, "edar": 44226, "1966": 44227, "\u0120Spectre": 44228, "\u0120Recipes": 44229, "885": 44230, "\u0120Mankind": 44231, "Liverpool": 44232, "\u0120|--": 44233, "\u0120substitutes": 44234, "\u0120XT": 44235, "wired": 44236, "\u0120inco": 44237, "\u0120Afgh": 44238, "Eva": 44239, "icc": 44240, "Song": 44241, "Knight": 44242, "\u0120diligently": 44243, "\u0120Broadcast": 44244, "Aid": 44245, "\u0120afar": 44246, "\u0120HMS": 44247, "atonin": 44248, "\u0120Grateful": 44249, "\u0120fireplace": 44250, "\u0120Omni": 44251, "euro": 44252, "\u0120FRE": 44253, "\u0120Shib": 44254, "\u0120Digest": 44255, "toggle": 44256, "\u0120headsets": 44257, "\u0120diffusion": 44258, "\u0120Squirrel": 44259, "\u0120FN": 44260, "\u0120darkened": 44261, "outher": 44262, "\u0120sleeps": 44263, "\u0120Xer": 44264, "guns": 44265, "\u0120setups": 44266, "\u0120parsed": 44267, "\u0120mammoth": 44268, "\u0120Curious": 44269, "gob": 44270, "\u0120Fitzpatrick": 44271, "\u0120Emil": 44272, "imov": 44273, ".............": 44274, "\u0120Benny": 44275, "Secondly": 44276, "\u0120hearty": 44277, "\u0120conson": 44278, "stained": 44279, "\u0120galactic": 44280, "clave": 44281, "\u0120plummeted": 44282, "\u0120pests": 44283, "\u0120swat": 44284, "\u0120referrals": 44285, "\u0120Lionel": 44286, "holy": 44287, "\u0120underdog": 44288, "\u0120Slater": 44289, "\u0120Provide": 44290, "\u0120Amar": 44291, "ressor": 44292, "\u00e5\u012e": 44293, "onga": 44294, "\u0120timid": 44295, "\u0120piety": 44296, "\u0120Dek": 44297, "\u0120surging": 44298, "azo": 44299, "\u0120610": 44300, "\u0120desks": 44301, "\u0120Spokane": 44302, "\u0120Anfield": 44303, "\u0120warships": 44304, "\u0120Cobra": 44305, "\u0120arming": 44306, "clusively": 44307, "\u0120Badge": 44308, "agascar": 44309, "\u0120PRESS": 44310, "\u0120McKenzie": 44311, "\u0120Ferdinand": 44312, "burning": 44313, "Afee": 44314, "\u0120tyrann": 44315, "\u0120Iw": 44316, "\u0120Boone": 44317, "1007": 44318, "\u0120Rept": 44319, "\u010a\u00c2\u0142": 44320, "\u0120caravan": 44321, "\u0120Dill": 44322, "\u0120Bundesliga": 44323, "Chuck": 44324, "\u0120healer": 44325, "\u00e3\u0125\u00bc\u00e3\u0125\u0128": 44326, "\u0120Hobby": 44327, "\u0120negate": 44328, "\u0120critiques": 44329, "sectional": 44330, "mopolitan": 44331, "\u0120dx": 44332, "\u0120outsourcing": 44333, "\u0120Cipher": 44334, "tap": 44335, "Sharp": 44336, "\u0120upbeat": 44337, "\u0120hangar": 44338, "\u0120cruising": 44339, "\u0120Niagara": 44340, "\u0120342": 44341, "illus": 44342, "\u0120Sv": 44343, "\u0120subtitles": 44344, "\u0120squared": 44345, "\u0120bookstore": 44346, "\u0120revolutionaries": 44347, "\u0120Carlton": 44348, "abal": 44349, "Utah": 44350, "\u0120despise": 44351, "\u0120UM": 44352, "consider": 44353, "aido": 44354, "\u0120carts": 44355, "\u0120Turtles": 44356, "Training": 44357, "\u0120honorary": 44358, "\u00c2\u00a2": 44359, "\u0120triangles": 44360, "422": 44361, "\u0120reprinted": 44362, "\u0120graceful": 44363, "\u0120Mongolia": 44364, "\u0120disruptions": 44365, "\u0120Boh": 44366, "\u0120349": 44367, "\u0120drains": 44368, "\u0120consulate": 44369, "\u0120bends": 44370, "\u0120mafia": 44371, "uron": 44372, "\u0120Fulton": 44373, "misc": 44374, "\u0120renal": 44375, "\u0120inaction": 44376, "cking": 44377, "\u0120photons": 44378, "\u0120bruised": 44379, "\u0120Codes": 44380, "ogi": 44381, "\u0120nests": 44382, "\u0120Lovely": 44383, "\u0120Libre": 44384, "\u0120Daryl": 44385, "\u0120###": 44386, "Sys": 44387, ".,\"": 44388, "\u0120freezes": 44389, "establishment": 44390, "andowski": 44391, "\u0120cumbers": 44392, "\u0120Starg": 44393, "\u0120Bombs": 44394, "\u0120legions": 44395, "\u0120handwriting": 44396, "\u0120grun": 44397, "\u0120Cah": 44398, "sequent": 44399, "\u0120moth": 44400, "\u0120MSM": 44401, "Insert": 44402, "Fif": 44403, "\u0120motel": 44404, "\u0120dexter": 44405, "\u0120Bild": 44406, "heartedly": 44407, "\u0120prope": 44408, "\u0120Texture": 44409, "\u0120Junction": 44410, "ynthesis": 44411, "ocard": 44412, "\u0120Vera": 44413, "\u0120Barth": 44414, "\u0120\u00ce\u00bcg": 44415, "\u0120lashed": 44416, "\u0120351": 44417, "\u0120Zamb": 44418, "\u0120Staples": 44419, "\u0120Cortex": 44420, "\u0120Corker": 44421, "\u0120continuum": 44422, "\u0120WRITE": 44423, "unta": 44424, "ridor": 44425, "\u0120deems": 44426, "033": 44427, "\u0120GOLD": 44428, "pas": 44429, "\u0120repressive": 44430, "\u00e3\u0125\u0128\u00e3\u0124\u00a3": 44431, "\u0120baffled": 44432, "Scar": 44433, "\u0120crave": 44434, "\u0120______": 44435, "\u0120entrepreneurship": 44436, "\u0120Directorate": 44437, "\u0120'[": 44438, "\u0120vines": 44439, "\u0120ascended": 44440, "\u0120GROUP": 44441, "\u0120Goodbye": 44442, "\u0120dogged": 44443, "\u00e3\u0125\u00b4\u00e3\u0124\u00a1": 44444, "Manufact": 44445, "\u0120unimaginable": 44446, "riots": 44447, "ierrez": 44448, "\u0120relativity": 44449, "\u0120Crafting": 44450, "raught": 44451, "uden": 44452, "cookie": 44453, "\u0120assassins": 44454, "\u0120dissatisfied": 44455, "acci": 44456, "\u0120conduit": 44457, "Spread": 44458, "\u0120Rican": 44459, "nice": 44460, "izzle": 44461, "\u0120scares": 44462, "\u0120WHY": 44463, "phans": 44464, "535": 44465, "\u0120protracted": 44466, "\u0120Kristen": 44467, "536": 44468, "\u0120Scrib": 44469, "\u0120Neh": 44470, "\u0120twenties": 44471, "\u0120predicament": 44472, "\u0120handcuffs": 44473, "\u0120fruitful": 44474, "\u0120UL": 44475, "\u0120Ludwig": 44476, "\u0120attest": 44477, "\u0120Breaker": 44478, "\u0120biologically": 44479, "\u0120Dealer": 44480, "\u0120renovations": 44481, "fw": 44482, "essen": 44483, "Alice": 44484, "\u0120Henri": 44485, "\u0120unilaterally": 44486, "\u0120Sidd": 44487, "hai": 44488, "\u0120Stretch": 44489, "Sales": 44490, "\u0120cumbersome": 44491, "\u0120Javier": 44492, "\u0120trendy": 44493, "\u0120rotting": 44494, "\u0120Challenges": 44495, "\u0120scraps": 44496, "\u0120facets": 44497, "\u0120Veronica": 44498, "\u0120Verge": 44499, "\u0120Sana": 44500, "Alien": 44501, "\u0120Rih": 44502, "\u0120radial": 44503, "ectar": 44504, "\u0120630": 44505, "cli": 44506, "Marie": 44507, "\u0120wildfire": 44508, "\u0120Cato": 44509, "hander": 44510, "\u0120waitress": 44511, "\u0120chops": 44512, "\u0120SECTION": 44513, "\u0120bluntly": 44514, "\u0120Catalog": 44515, "nian": 44516, "study": 44517, "\u0120patrolling": 44518, "\u0120Tenth": 44519, "nexus": 44520, "\u0120NON": 44521, "opsy": 44522, "\u0120scathing": 44523, "sie": 44524, "\u0120deteriorated": 44525, "VB": 44526, "Nazis": 44527, "\u0120depictions": 44528, "\u0120authenticated": 44529, "\u0120Conce": 44530, "krit": 44531, "\u0120promulg": 44532, "\u0120LONG": 44533, "UFC": 44534, "\u0120Visitors": 44535, "\u0120Recall": 44536, "\u0120rehabilit": 44537, "\u0120SLI": 44538, "\u0120glacier": 44539, "\u0120Bite": 44540, "\u0120503": 44541, "\u0120vomit": 44542, "\u0120fermented": 44543, "\u0120Khalid": 44544, "\u0120graded": 44545, "\u0120Magicka": 44546, "\u0120Ichigo": 44547, "powerful": 44548, "icators": 44549, "753": 44550, "\u0120shrew": 44551, "\u0120356": 44552, "\u0120legalizing": 44553, "\u0120allotted": 44554, "\u0120Archdemon": 44555, "ithing": 44556, "iggurat": 44557, "VOL": 44558, "Leod": 44559, "\u0120oily": 44560, "\u0120inducing": 44561, "\u0120amygdala": 44562, "\u0120admins": 44563, "\u0120Acquisition": 44564, "CAN": 44565, "\u0120schematic": 44566, "\u0120moan": 44567, "\u0120Cameroon": 44568, "\u0120tink": 44569, "\u0120merry": 44570, "\u0120butterflies": 44571, "\u0120Goff": 44572, "\u0120workspace": 44573, "\u0120Corona": 44574, "\u0120javascript": 44575, "\u0120Dolphin": 44576, "\u0120Cantor": 44577, "464": 44578, "toe": 44579, "APS": 44580, "\u0120Aging": 44581, "\u0120padded": 44582, "\u0120Zheng": 44583, "\u0120Held": 44584, "\u0120estranged": 44585, "\u0120770": 44586, ".}": 44587, "\u0120Dunham": 44588, "\u0120smokes": 44589, "\u0120capitals": 44590, "undai": 44591, "Shin": 44592, "\u0120Founding": 44593, "\u0120entitle": 44594, "\u0120centerpiece": 44595, "Discover": 44596, "\u0120thereto": 44597, "alert": 44598, "\u0120Nou": 44599, "\u0120Analyst": 44600, "lc": 44601, "FH": 44602, "FIELD": 44603, "\u0120POV": 44604, "gray": 44605, "\u0120arcs": 44606, "\u0120HOT": 44607, "\u0120rs": 44608, "\u0120obligatory": 44609, "\u0120Architects": 44610, "\u0120Sven": 44611, "\u0120FEC": 44612, "0200": 44613, "Christmas": 44614, "\u0120Albania": 44615, "ratom": 44616, "587": 44617, "\u0120hardships": 44618, "\u0120autos": 44619, "\u0120Charges": 44620, "\u0120apes": 44621, "\u0120376": 44622, "wallet": 44623, "\u0120intoxication": 44624, "\u0120goblin": 44625, "\u0120570": 44626, "++++++++++++++++": 44627, "\u0120Yelp": 44628, "\u0120Magnetic": 44629, "\u0120Briggs": 44630, "Rail": 44631, "\u0120spawns": 44632, "\u0120Wiggins": 44633, "\u0120showcased": 44634, "\u0120resorted": 44635, "uben": 44636, "\u0120whipping": 44637, "\u0120imitate": 44638, "\u0120digestion": 44639, "\u0120USPS": 44640, "\u0120Gest": 44641, "\u0120yea": 44642, "\u0120Tight": 44643, "indal": 44644, "icas": 44645, "`.": 44646, "CAST": 44647, "'';": 44648, "\u0120Fet": 44649, "opathic": 44650, "Invalid": 44651, "\u0120regretted": 44652, "\u0120broccoli": 44653, "\u0120Scores": 44654, "eve": 44655, "\u0120postings": 44656, "\u0120accumulating": 44657, "\u0120needless": 44658, "elfth": 44659, "\u0120mayors": 44660, "\u0120scrib": 44661, "\u0120anecdotes": 44662, "\u0120botched": 44663, "\u0120Ribbon": 44664, "\u0120Constantine": 44665, "iuses": 44666, "esses": 44667, "\u0120devise": 44668, "Compared": 44669, "\u0120pudding": 44670, "\u0120garg": 44671, "\u0120evoke": 44672, "797": 44673, "\u0120detox": 44674, "909": 44675, "\u0120Pieces": 44676, "\u0120McCartney": 44677, "\u0120metast": 44678, "\u0120Krypt": 44679, "POR": 44680, "\u0120tending": 44681, "\u0120Merchants": 44682, "Proof": 44683, "\u0120Varg": 44684, "\u0120Portable": 44685, "\u00e3\u0125\u00bc\u00e3\u0125\u0128\u00e3\u0124\u00a3": 44686, "Brain": 44687, "2500": 44688, "\u0120foliage": 44689, "\u00d8\u00b9": 44690, "\u0120mentors": 44691, "\u0120Aires": 44692, "\u0120minimalist": 44693, "\u0120ingested": 44694, "\u0120Trojan": 44695, "\u0120Qian": 44696, "involved": 44697, "027": 44698, "\u0120eroded": 44699, "RAFT": 44700, "\u0120blurry": 44701, "Mob": 44702, "\u0120buffet": 44703, "\u0120Fnatic": 44704, "aea": 44705, "KNOWN": 44706, "\u0120Init": 44707, "safety": 44708, "enum": 44709, "ACTION": 44710, "\u0120Crusher": 44711, "\u0120Dates": 44712, "\u0120................": 44713, "calling": 44714, "akov": 44715, "\u0120ventured": 44716, "\u0120555": 44717, "auga": 44718, "Hart": 44719, "\u0120Aero": 44720, "MAC": 44721, "\u0120thinly": 44722, "\u0120arra": 44723, "STATE": 44724, "ilde": 44725, "\u0120Jacqu": 44726, "\u0120Females": 44727, "\u0120theorem": 44728, "\u0120346": 44729, "\u0120smartest": 44730, "\u0120PUBLIC": 44731, "\u0120Kron": 44732, "\u0120Bits": 44733, "\u0120Vessel": 44734, "\u0120Telephone": 44735, "\u0120decap": 44736, "\u0120adjunct": 44737, "\u0120SEN": 44738, "merga": 44739, "\u0120redacted": 44740, "\u0120prehistoric": 44741, "\u0120explanatory": 44742, "\u0120Runs": 44743, "\u0120Uttar": 44744, "\u0120Manny": 44745, "\u0120AUTHOR": 44746, "\u0120Unleashed": 44747, "\u0120Bowling": 44748, "beans": 44749, "793": 44750, "\u0120universes": 44751, "\u0120sensit": 44752, "\u0120Kung": 44753, "repeat": 44754, "ctrl": 44755, "\u0120paced": 44756, "\u0120fuller": 44757, "Clock": 44758, "\u0120recomb": 44759, "\u0120Faul": 44760, "\u0120Bunker": 44761, "\u0120pooled": 44762, "\u0120ana": 44763, "\u0120Mouth": 44764, "LLOW": 44765, "humane": 44766, "\u0120bulldo": 44767, "\u0120Michaels": 44768, "fam": 44769, "\u0120wrecked": 44770, "\u0120portrays": 44771, "\u0120Whale": 44772, "\u0120Hes": 44773, "\u0120guesses": 44774, "\u0120Browse": 44775, "\u0120LAPD": 44776, "\u0120consequential": 44777, "\u0120Innocent": 44778, "\u0120DRAG": 44779, "\u0120transgress": 44780, "\u0120Oaks": 44781, "\u0120trivia": 44782, "\u0120Reson": 44783, "\u0120ADS": 44784, "--+": 44785, "\u0120Toll": 44786, "\u0120grasping": 44787, "\u0120THEM": 44788, "\u0120Tags": 44789, "\u0120Conclusion": 44790, "\u0120practicable": 44791, "\u0120hoop": 44792, "\u0120unintentionally": 44793, "\u0120ignite": 44794, "\u0120Mov": 44795, "urized": 44796, "lehem": 44797, "Termin": 44798, "\u0120colourful": 44799, "\u0120Linear": 44800, "\u0120Ellie": 44801, "Gy": 44802, "\u0120manpower": 44803, "\u0120js": 44804, "\u0120emoji": 44805, "\u0120SHARES": 44806, "_.": 44807, "00007": 44808, "\u0120sophistication": 44809, "\u0120underscore": 44810, "\u0120practise": 44811, "\u0120blob": 44812, "opens": 44813, "Ukraine": 44814, "Keeping": 44815, "YC": 44816, "JR": 44817, "ultimate": 44818, "Claim": 44819, "\u0120automobiles": 44820, "993": 44821, "steel": 44822, "\u0120parting": 44823, "\u0120Lank": 44824, "...?": 44825, "\u0120385": 44826, "\u0120remembrance": 44827, "\u0120eased": 44828, "\u0120covari": 44829, "\u0120Sind": 44830, "Effective": 44831, "\u0120dissemination": 44832, "\u0120Moose": 44833, "\u0120Clapper": 44834, "brates": 44835, "Apply": 44836, "\u0120invis": 44837, "\u0120worsened": 44838, "\u00e2\u0122\u0136-": 44839, "\u0120legislator": 44840, "\u0120Lol": 44841, "\u0120Rowe": 44842, "\u0120dealership": 44843, "umar": 44844, "idences": 44845, "\u0120investigates": 44846, "\u0120cascade": 44847, "\u0120bidder": 44848, "\u0120BEN": 44849, "Ironically": 44850, "\u0120presiding": 44851, "\u0120ding": 44852, "\u0120contradicted": 44853, "\u0120shuts": 44854, "\u0120FIX": 44855, "\u0120366": 44856, "District": 44857, "\u0120sinful": 44858, "\u0120Charisma": 44859, "oops": 44860, "\u0120totality": 44861, "\u0120restitution": 44862, "\u0120Optimus": 44863, "\u0120Dah": 44864, "\u0120clueless": 44865, "urned": 44866, "\u0120nutrit": 44867, "\u0120landowners": 44868, "\u0120flushed": 44869, "\u0120broaden": 44870, "mie": 44871, "\u0120println": 44872, "\u0120nig": 44873, "\u0120Corpus": 44874, "Jen": 44875, "\u0120proto": 44876, "\u0120Wikimedia": 44877, "\u0120Palo": 44878, "COR": 44879, "\u0120storylines": 44880, "\u0120evangelicals": 44881, "\u0120Darrell": 44882, "\u0120rotor": 44883, "\u0120HW": 44884, "skilled": 44885, "eryl": 44886, "\u0120begg": 44887, "\u0120Blumenthal": 44888, "\u0120weaving": 44889, "\u0120downwards": 44890, "\u0120Jacket": 44891, "\u0120ANGEL": 44892, "Technology": 44893, "\u0120esoteric": 44894, "aldehyde": 44895, "\u0120furiously": 44896, "\u0120foreigner": 44897, "Weak": 44898, "CHO": 44899, "\u0120Hound": 44900, "Experience": 44901, "\u0120Playstation": 44902, "\u0120MIA": 44903, "\u0120Ung": 44904, "cloth": 44905, "agall": 44906, "\u0120calming": 44907, "izens": 44908, "Struct": 44909, "\u0120Witches": 44910, "\u0120Celebration": 44911, "\u0120..............": 44912, "ptroller": 44913, "\u0120TCU": 44914, "\u0120bunny": 44915, "\u00e3\u0125\u012f": 44916, "utorial": 44917, "\u0120upscale": 44918, "\u0120Sta": 44919, "\u0120Colossus": 44920, "\u0120chloride": 44921, "\u0120Zac": 44922, "\u0120Reasons": 44923, "\u0120Brookings": 44924, "\u0120WHITE": 44925, "][/": 44926, "\u0120Lose": 44927, "905": 44928, "\u0120underside": 44929, "ernels": 44930, "\u0120vape": 44931, "dozen": 44932, "uppet": 44933, "\u0120STOP": 44934, "matical": 44935, "\u0120Statements": 44936, "heddar": 44937, "PAC": 44938, "Customer": 44939, "\u0120memos": 44940, "\u0120PJ": 44941, "endars": 44942, "\u0120Limits": 44943, "laugh": 44944, "\u0120stabilized": 44945, "\u0120ALEC": 44946, "YA": 44947, "Upgrade": 44948, "alam": 44949, "\u0120techno": 44950, "\u0120anew": 44951, "foreseen": 44952, "\u0120collegiate": 44953, "\u0120Pyro": 44954, "\u0120Dism": 44955, "\u0120frontline": 44956, "\u0120ammonia": 44957, "IU": 44958, "Quite": 44959, "Johnny": 44960, "assin": 44961, "GOP": 44962, "\u0120Styles": 44963, "\u0120Sovereign": 44964, "acterial": 44965, "549": 44966, "\u0120RIP": 44967, "\u0120Lists": 44968, "\u0120364": 44969, "\u0120Recep": 44970, "socket": 44971, "\u0120Byrd": 44972, "\u0120Candle": 44973, "Ancient": 44974, "\u0120appellant": 44975, "enforcement": 44976, "acea": 44977, "anski": 44978, "\u0120olds": 44979, "886": 44980, "\u0120slurs": 44981, "\u0120empires": 44982, "\u0120buckle": 44983, "\u0120alienation": 44984, "\u0120Aberdeen": 44985, "\u0120unicorn": 44986, "\u0120overriding": 44987, "\u0120LX": 44988, "ppa": 44989, "\u0120despised": 44990, "\u0120Bugs": 44991, "\u0120BST": 44992, "Southern": 44993, "533": 44994, "\u0120hallmark": 44995, "\u0120Poster": 44996, "\u0120stemmed": 44997, "\u0120principals": 44998, "\u0120TECH": 44999, "\u0120Sandwich": 45000, "Italy": 45001, "\u0120cheesy": 45002, "\u0120SetTextColor": 45003, "\u0120Protective": 45004, "\u0120Cohn": 45005, "JO": 45006, "aptop": 45007, "Reason": 45008, "Leader": 45009, "\u0120Understand": 45010, "\u0120Fridays": 45011, "\u0120Continuous": 45012, "\u0120clipping": 45013, "\u0120Rye": 45014, "\u0120berth": 45015, "timer": 45016, "annis": 45017, "react": 45018, "\u0120buffalo": 45019, "\u0120Paras": 45020, "\u0120655": 45021, "\u0120presided": 45022, "\u0120Sunrise": 45023, "\u0120vets": 45024, "\u0120cloves": 45025, "\u0120McCull": 45026, "Strength": 45027, "GAN": 45028, "\u0120illiter": 45029, "\u0120Pricing": 45030, "l\u00c3\u00a9": 45031, "\u0120resistor": 45032, "\u0120brun": 45033, "\u0120Suffolk": 45034, "\u00d1\u012d": 45035, "\u0120Liver": 45036, "Released": 45037, "\u0120whats": 45038, "860": 45039, "\u0120Measures": 45040, "\u0120denouncing": 45041, "\u0120Ryzen": 45042, "\u0120souven": 45043, "\u0120caregivers": 45044, "chini": 45045, "\u0120Scarlett": 45046, "\u0120trough": 45047, "Congratulations": 45048, "\u0120taxis": 45049, "\u0120Tradition": 45050, "jit": 45051, "\u0120tabletop": 45052, "\u0120hitherto": 45053, "\u0120disinformation": 45054, "offensive": 45055, "hra": 45056, "\u0120DISTRICT": 45057, "\u0120complicate": 45058, "chenko": 45059, "\u0120Reconstruction": 45060, "\u0120palpable": 45061, "\u0120ausp": 45062, "\u0120428": 45063, "\u0120showcases": 45064, "\u0120Publication": 45065, "knowledge": 45066, "innon": 45067, "419": 45068, "\u0120retrieval": 45069, "anders": 45070, "\u0120refute": 45071, "\u0120inquired": 45072, "gur": 45073, "\u0120negativity": 45074, "\u0120conserve": 45075, "\u0120afterlife": 45076, "\u0120presupp": 45077, "\u0120Gillespie": 45078, "\u0120mt": 45079, "\u0120DN": 45080, "Tap": 45081, "\u0120perpend": 45082, "\u0120Smy": 45083, "doesn": 45084, "\u0120spilling": 45085, "\u0120hypers": 45086, "Kate": 45087, "\u00c2\u00ae,": 45088, "kept": 45089, "\u0120Powered": 45090, "\u0120ja": 45091, "\u0120Klux": 45092, "arde": 45093, "aban": 45094, "\u0120444": 45095, "\u0120flattened": 45096, "\u0120Improvements": 45097, "urga": 45098, "\u0120Kund": 45099, "\u0120inscribed": 45100, "\u0120facult": 45101, "\u0120unprepared": 45102, "\u0120Consumers": 45103, "\u0120satisfies": 45104, "\u0120pulmonary": 45105, "\u0120infiltration": 45106, "\u0120externally": 45107, "\u0120congratulations": 45108, "aghan": 45109, "\u0120airliner": 45110, "\u0120flung": 45111, "\u0120flyers": 45112, "GD": 45113, "\u0120snippets": 45114, "\u0120recursive": 45115, "\u0120mastering": 45116, "Lex": 45117, "\u0120overtly": 45118, "vg": 45119, "\u0120luckily": 45120, "\u0120encro": 45121, "\u0120Lancet": 45122, "\u0120Abyssal": 45123, "functional": 45124, "\u0120sow": 45125, "\u0120squid": 45126, "\u0120narration": 45127, "\u0120naughty": 45128, "\u0120Honour": 45129, "\u0120Spartans": 45130, "\u0120shatter": 45131, "\u0120Tacoma": 45132, "\u0120Calories": 45133, "\u0120Races": 45134, "Submit": 45135, "\u0120purposefully": 45136, "wav": 45137, "\u0120Yok": 45138, "Fest": 45139, "\u0120Gerr": 45140, "Metro": 45141, "\u0120itiner": 45142, "famous": 45143, "\u0120\"{": 45144, "inline": 45145, "washer": 45146, "Issue": 45147, "\u0120CLIENT": 45148, "ozo": 45149, "Versions": 45150, "725": 45151, "\u0120Glock": 45152, "\u0120shielded": 45153, "\u0120PCR": 45154, "ENCY": 45155, "\u0120Weld": 45156, "\u0120Simpl": 45157, "\u0120redirected": 45158, "\u0120Kham": 45159, "\u0120(>": 45160, "\u0120labou": 45161, "\u0120diapers": 45162, "ssl": 45163, "\u0120cellar": 45164, "organisms": 45165, "oresc": 45166, "\u0120Berks": 45167, "didn": 45168, "Shipping": 45169, "Chest": 45170, "\u0120undone": 45171, "\u0120millionaire": 45172, "\u0120cords": 45173, "\u0120Younger": 45174, "appropriately": 45175, "\u0120sequels": 45176, "uve": 45177, "anticipated": 45178, "\u0120lewd": 45179, "\u0120Shirt": 45180, "\u0120Dmitry": 45181, "Veter": 45182, "\u0120slaying": 45183, "\u0120Yar": 45184, "\u0120complication": 45185, "Iowa": 45186, "\u0120Erica": 45187, "\u0120BLM": 45188, "girlfriend": 45189, "bodied": 45190, "626": 45191, "1963": 45192, "\u0120intermediary": 45193, "\u0120consolation": 45194, "Mask": 45195, "\u0120Siem": 45196, "owan": 45197, "Beginning": 45198, "\u0120fixme": 45199, "\u0120culminated": 45200, "\u0120conduc": 45201, "\u0120Volunteer": 45202, "\u0120positional": 45203, "\u0120greets": 45204, "\u0120Definitions": 45205, "\u0120thinker": 45206, "\u0120ingenuity": 45207, "\u0120freshmen": 45208, "\u0120Moments": 45209, "\u0120357": 45210, "ateurs": 45211, "\u0120FedEx": 45212, "sg": 45213, "694": 45214, "\u0120dwindling": 45215, "\u0120BOX": 45216, "selage": 45217, "\u0120tmp": 45218, "\u0120sten": 45219, "\u0120Sut": 45220, "\u0120neighbourhoods": 45221, "\u0120classmate": 45222, "fledged": 45223, "\u0120leftists": 45224, "\u0120climates": 45225, "ATHER": 45226, "\u0120Scythe": 45227, "uliffe": 45228, "\u0120sag": 45229, "\u0120hopped": 45230, "\u0120Ft": 45231, "\u0120Eck": 45232, "\u0120CK": 45233, "\u0120Doomsday": 45234, "kids": 45235, "\u0120gasped": 45236, "\u0120moniker": 45237, "\u0120Lod": 45238, "\u0120CFL": 45239, "tions": 45240, "rums": 45241, "folios": 45242, "\u0120md": 45243, "\u0120uncanny": 45244, "\u0120transports": 45245, "\u0120Labrador": 45246, "\u0120railways": 45247, "\u0120appliance": 45248, "\u0120CTRL": 45249, "\u00e6\u0122": 45250, "Population": 45251, "\u0120Confederacy": 45252, "\u0120unbearable": 45253, "\u0120dorsal": 45254, "\u0120Inform": 45255, "opted": 45256, "\u0120KILL": 45257, "Marx": 45258, "\u0120hypocritical": 45259, "qus": 45260, "\u0120Numerous": 45261, "\u0120Georgian": 45262, "\u0120Ambrose": 45263, "\u0120Loch": 45264, "\u0120gubernatorial": 45265, "\u0120Xeon": 45266, "\u0120Supports": 45267, "enser": 45268, "eely": 45269, "\u0120Avenger": 45270, "1965": 45271, "Army": 45272, "\u0120juxtap": 45273, "\u0120chopping": 45274, "\u0120Splash": 45275, "\u0120Sustainable": 45276, "\u0120Finch": 45277, "\u01201861": 45278, "ictive": 45279, "atmeal": 45280, "\u0120Gohan": 45281, "\u0120lightsaber": 45282, "\u0120GPA": 45283, "ugu": 45284, "\u0120REPL": 45285, "variable": 45286, "\u0120herpes": 45287, "\u0120deserts": 45288, "aciously": 45289, "\u0120situational": 45290, "weekly": 45291, "obl": 45292, "\u0120textile": 45293, "\u0120Cornwall": 45294, "\u0120contraceptives": 45295, "\u0120Ake": 45296, "]-": 45297, "\u00e4\u00b9\u012d": 45298, ":,": 45299, "\u0120Wem": 45300, "\u0120Bihar": 45301, "\u0120'.": 45302, "\u0120bere": 45303, "\u0120analogue": 45304, "\u0120Cookies": 45305, "\u0120takeoff": 45306, "Wheel": 45307, "\u0120majestic": 45308, "\u0120commuting": 45309, "023": 45310, "\u0120Corpse": 45311, "assment": 45312, "mini": 45313, "\u0120gorilla": 45314, "\u0120Alas": 45315, "eree": 45316, "\u0120acquaintances": 45317, "\u0120Advantage": 45318, "\u0120spiritually": 45319, "\u0120eyed": 45320, "pmwiki": 45321, "\u0120Ender": 45322, "\u0120translucent": 45323, "\u0120nighttime": 45324, "\u0120IMAGES": 45325, "545": 45326, "\u0120Kamp": 45327, "\u0120Freak": 45328, "\u0120ig": 45329, "Portland": 45330, "432": 45331, "\u0120Mata": 45332, "\u0120marines": 45333, "\u0120hors": 45334, "aterasu": 45335, "\u0120Attribution": 45336, "\u0120---------": 45337, "\u0120kins": 45338, "\u0120BELOW": 45339, "+++": 45340, "\u0120reeling": 45341, "oled": 45342, "\u0120clutter": 45343, "\u0120Relative": 45344, "\u0120427": 45345, "BUS": 45346, "\u0120avert": 45347, "\u0120Cheong": 45348, "\u0120Able": 45349, "\u0120Pryor": 45350, "Developer": 45351, "\u0120encyclopedia": 45352, "\u0120USAF": 45353, "\u0120Garry": 45354, "Spain": 45355, "Blocks": 45356, "\u0120exposition": 45357, "\u0120GamerGate": 45358, "WOR": 45359, "\u0120stockpile": 45360, "\u0120clothed": 45361, "\u0120Tone": 45362, "\u0120Rue": 45363, "tumblr": 45364, "\u0120treacherous": 45365, "\u0120frying": 45366, "\u00d1\u012e": 45367, "\u0120Sph": 45368, "\u0120restraints": 45369, "\u0120embodies": 45370, "\u0120Ges": 45371, "Safety": 45372, "\u0120negotiators": 45373, "mining": 45374, "\u0120Appalachian": 45375, "LOS": 45376, "\u0120Jenna": 45377, "\u0120passers": 45378, "\u00e7\u012d": 45379, "snap": 45380, "\u0120shorten": 45381, "creator": 45382, "\u0120innumerable": 45383, "utherland": 45384, "674": 45385, "\u0120WOM": 45386, "\u0120Ascend": 45387, "\u0120Armory": 45388, "\u0120Transaction": 45389, "Kick": 45390, "\u0120suitcase": 45391, "dayName": 45392, "\u0120wasteful": 45393, "marriage": 45394, "\u0120McCabe": 45395, "itech": 45396, "\u0120Oss": 45397, "Closure": 45398, "\u0120Treasurer": 45399, "\u0120indecent": 45400, "\u0120Dull": 45401, "\u0120residences": 45402, "1959": 45403, "\u0120Settlement": 45404, "Hamilton": 45405, "\u0120selfies": 45406, "\u0120Ranking": 45407, "\u0120Barkley": 45408, "\u0120Bore": 45409, "\u0120WCS": 45410, "\u0120Maritime": 45411, "\u0120Huh": 45412, "\u0120Forestry": 45413, "\u0120cultivating": 45414, "\u0120Ballard": 45415, "\u0120garrison": 45416, "\u0120SDL": 45417, "930": 45418, "\u0120nascent": 45419, "\u0120irresistible": 45420, "\u0120awfully": 45421, "\\/\\/": 45422, "\u0120equate": 45423, "\u0120anthropology": 45424, "\u0120Sylvia": 45425, "\u0120intestine": 45426, "\u0120innocuous": 45427, "cessive": 45428, "agra": 45429, "\u0120Metroid": 45430, "Grant": 45431, "855": 45432, "\u0123\u0138": 45433, "\u0120\"_": 45434, "\u00e3\u0125\u0125\u00e3\u0125\u012b": 45435, "\u0120appraisal": 45436, "\u0120Freddy": 45437, "046": 45438, "\u0120406": 45439, "\u01201830": 45440, "\u0120docking": 45441, "Static": 45442, "\u0120pont": 45443, "\u0120Voltage": 45444, "\u0120Stead": 45445, "\u0120Mortgage": 45446, "\u0120Jonah": 45447, "YL": 45448, "CLASSIFIED": 45449, "\u0120asbestos": 45450, "nikov": 45451, "\u0120collagen": 45452, "\u0120Orbital": 45453, "Pocket": 45454, "799": 45455, "\u0120hybrids": 45456, "inches": 45457, "\u0120invoice": 45458, "undy": 45459, "\u0120inequalities": 45460, "Trend": 45461, "washed": 45462, "BALL": 45463, "\u0120lucid": 45464, "\u0120Commentary": 45465, "\u0120witty": 45466, "Brandon": 45467, "\u0120bruising": 45468, "\u0120620": 45469, "escent": 45470, "boxing": 45471, "POL": 45472, "\u0120378": 45473, "Rect": 45474, "\u0120licences": 45475, "\u0120McGee": 45476, "pressed": 45477, "Danny": 45478, "\u0120jammed": 45479, "ordinate": 45480, "\u0120leth": 45481, "\u0120distinguishes": 45482, "\u0120Yamaha": 45483, "ILS": 45484, "\u0120Hume": 45485, "\u0120Categories": 45486, "Roberts": 45487, "Chart": 45488, "\u0120beetle": 45489, "\u0120Graveyard": 45490, "\u0120($)": 45491, "o\u00c4\u0141": 45492, "\u0120twilight": 45493, "arella": 45494, "\u00e1\u00bd": 45495, "\u0120booths": 45496, "\u0120HHS": 45497, "\u0120Feldman": 45498, "\u0120excavation": 45499, "\u0120philosophies": 45500, "atography": 45501, "\u0120Garage": 45502, "technology": 45503, "\u0120unforgettable": 45504, "\u0120verifying": 45505, "\u0120subordinates": 45506, "Els": 45507, "\u0120neb": 45508, "Gaming": 45509, "ENA": 45510, "\u0120Achievement": 45511, "itters": 45512, "\u0120Gabe": 45513, "\u0120dumps": 45514, "forcer": 45515, "\u0120poignant": 45516, "\u0120MBA": 45517, "\u0120Heidi": 45518, "imei": 45519, "\u0120mages": 45520, "\u0120liberate": 45521, "\u0120circumcised": 45522, "\u0120Mermaid": 45523, "\u0120Matth": 45524, "together": 45525, "\u0120Wichita": 45526, "\u0120storefront": 45527, "\u0120Adin": 45528, "VII": 45529, "Fourth": 45530, "\u0120explorers": 45531, "WER": 45532, "Notable": 45533, "Brook": 45534, "mens": 45535, "Faith": 45536, "---------": 45537, "\u0120Jou": 45538, "\u00ac\u00bc": 45539, "\u0120pineapple": 45540, "\u0120amalg": 45541, "eln": 45542, "arkable": 45543, "\u0120\u00e3\u0124\u00b5\u00e3\u0125\u00bc\u00e3\u0125\u0128\u00e3\u0124\u00a3": 45544, "\u0120\u00e3\u0124\u00b5\u00e3\u0125\u00bc\u00e3\u0125\u0128\u00e3\u0124\u00a3\u00e3\u0125\u00af\u00e3\u0125\u00b3": 45545, "\u0120ovarian": 45546, "\u0120Echoes": 45547, "\u0120haircut": 45548, "\u0120pav": 45549, "\u0120chilled": 45550, "anasia": 45551, "\u0120styled": 45552, "\u0120dab": 45553, "niper": 45554, "\u0120ministerial": 45555, "\u0120DUP": 45556, "Tan": 45557, "\u0120sulph": 45558, "\u0120Deter": 45559, "\u0120Bohem": 45560, "odan": 45561, "\u0120educator": 45562, "\u00e2\u0135\u013a": 45563, "spir": 45564, "Chicken": 45565, "\u0120Eleanor": 45566, "\u0120qui": 45567, "\u0120heaviest": 45568, "\u0120grasped": 45569, "URA": 45570, "\u0120crooked": 45571, "Jessica": 45572, "problem": 45573, "\u0120predetermined": 45574, "\u0120maniac": 45575, "\u0120breaths": 45576, "\u0120Lauderdale": 45577, "\u0120hobbies": 45578, "yz": 45579, "Crime": 45580, "\u0120charisma": 45581, "dL": 45582, "\u0120leaping": 45583, "\u0120kittens": 45584, "Angelo": 45585, "\u0120JACK": 45586, "\u0120Suzanne": 45587, "\u0120halting": 45588, "ENTION": 45589, "\u0120swallowing": 45590, "\u0120Earthquake": 45591, "\u0120eighteenth": 45592, "\u0120NIC": 45593, "\u0120INF": 45594, "\u0120Conscious": 45595, "\u0120particulars": 45596, "circle": 45597, "740": 45598, "\u0120benevolent": 45599, "\u0120747": 45600, "\u0120490": 45601, "\u0120rundown": 45602, "\u0120Valerie": 45603, "\u0120BUR": 45604, "\u0120civilisation": 45605, "\u0120Schn": 45606, "WB": 45607, "otide": 45608, "international": 45609, "\u0120john": 45610, "\u01201902": 45611, "\u0120peanuts": 45612, "\u0120flavored": 45613, "kus": 45614, "\u0120roared": 45615, "\u0120cutoff": 45616, "\u00e9\u00a3": 45617, "\u0120ornament": 45618, "\u0120architectures": 45619, "\u0120369": 45620, "olor": 45621, "\u0120Wilde": 45622, "\u0120CRC": 45623, "\u0120Adjusted": 45624, "\u0120provoking": 45625, "landish": 45626, "\u0120rationality": 45627, "\u0120justifies": 45628, "\u0120dispel": 45629, "\u0120americ": 45630, "\u0120Poles": 45631, "\u00d8\u00a9": 45632, "\u0120envis": 45633, "\u0120Doodle": 45634, "\u00e4\u00bd\u00bf": 45635, "igsaw": 45636, "auldron": 45637, "Technical": 45638, "Teen": 45639, "uphem": 45640, "\u0120Xiang": 45641, "\u0120detractors": 45642, "\u0120Zi": 45643, "\u0120Journalists": 45644, "\u0120conducive": 45645, "\u0120Volunteers": 45646, "\u0120sd": 45647, "Knowing": 45648, "\u0120transmissions": 45649, "\u0120PLAN": 45650, "\u0120LIB": 45651, "\u0120alluded": 45652, "\u0120obe": 45653, "\u0120dope": 45654, "\u0120Goldstein": 45655, "\u0120wavelengths": 45656, "\u0120Destination": 45657, "nda": 45658, "ugi": 45659, "\u0120attentive": 45660, "\u0120Lean": 45661, "raltar": 45662, "\u0120mang": 45663, "mbuds": 45664, "akings": 45665, "bender": 45666, "\u0120accol": 45667, "\u0120crawled": 45668, "NOW": 45669, "Minnesota": 45670, "\u0120flourished": 45671, "\u0120Zup": 45672, "\u0120Supervisor": 45673, "\u0120Olivier": 45674, "Excellent": 45675, "\u0120widen": 45676, "Done": 45677, "\u0120wig": 45678, "\u0120misconceptions": 45679, "Corp": 45680, "Wan": 45681, "\u0120venerable": 45682, "\u0120Notably": 45683, "\u0120Klingon": 45684, "animate": 45685, "Boost": 45686, "\u0120SAY": 45687, "missing": 45688, "ibliography": 45689, "melon": 45690, "\u0120payday": 45691, "\u00d8\u00b3": 45692, "bole": 45693, "\u0120veiled": 45694, "\u0120Alphabet": 45695, "Italian": 45696, "\u0120everlasting": 45697, "\u0120RIS": 45698, "\u0120Cree": 45699, "rompt": 45700, "\u0120hating": 45701, "\u0120grinning": 45702, "\u0120geographically": 45703, "OSH": 45704, "\u0120weeping": 45705, "\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142\u0120\u00c2\u0142": 45706, "\u0120impecc": 45707, "Letter": 45708, "\u0120bloated": 45709, "PLA": 45710, "\u0120Fein": 45711, "\u0120persever": 45712, "Thunder": 45713, "\u0120aur": 45714, "\u0120RL": 45715, "\u0120pitfalls": 45716, "\u00e2\u0138\u00ba": 45717, "\u0120predominant": 45718, "\u0120525": 45719, "718": 45720, "APE": 45721, "714": 45722, "\u0120farmland": 45723, "\u0120Qiao": 45724, "\u0120violet": 45725, "\u0120Bahamas": 45726, "\u0120inflicting": 45727, "\u0120Efficiency": 45728, "\u0120homebrew": 45729, "\u0120undertook": 45730, "\u0120curly": 45731, "\u0120Harding": 45732, "mania": 45733, "596": 45734, "\u0120tempered": 45735, "\u0120harrowing": 45736, "\u0120Pledge": 45737, "\u0120Frankenstein": 45738, "\u00e8\u00aa": 45739, "Motion": 45740, "\u0120predictably": 45741, "\u0120Explosion": 45742, "ocusing": 45743, "erd": 45744, "colo": 45745, "FFER": 45746, "\u0120backfield": 45747, "\u0120VIDE": 45748, "uebl": 45749, "Narr": 45750, "\u0120Argument": 45751, "\u0120genomic": 45752, "\u0120boutique": 45753, "\u0120batted": 45754, "\u0120Binary": 45755, "\u0120gamb": 45756, "\u0120Rhythm": 45757, "673": 45758, "\u0120afloat": 45759, "\u0120Olympia": 45760, "YING": 45761, "\u0120endif": 45762, "isin": 45763, "\u0120winters": 45764, "\u0120scattering": 45765, "Iv": 45766, "Distance": 45767, "\u0120tru": 45768, "\u0120Comfort": 45769, "\u0120nexus": 45770, "\u0120airflow": 45771, "\u0120Byzantine": 45772, "payers": 45773, "coni": 45774, "\u0120Betsy": 45775, "Deal": 45776, "\u0120Nug": 45777, "\u0120Continent": 45778, "redibly": 45779, "\u0120optimizing": 45780, "albeit": 45781, "\u0120ecstatic": 45782, "\u0120Proto": 45783, "\u00e7\u00b7": 45784, "ivot": 45785, "\u00e2\u0138\u0126": 45786, "emp": 45787, "rounder": 45788, "\u0120clout": 45789, "\u0120IST": 45790, "663": 45791, "\u0120Dollars": 45792, "\u0120DAC": 45793, "\u0120subscribed": 45794, "\u0120rehearsal": 45795, "\u0120amps": 45796, "\u0120Shang": 45797, "esm": 45798, "\u0120sprinkle": 45799, "\u0120assailant": 45800, "\u0120Oo": 45801, "\u0120Coinbase": 45802, "Tact": 45803, "\u0120retina": 45804, "\u0120nuns": 45805, "RON": 45806, "atto": 45807, "\u0120jug": 45808, "\u0120SVG": 45809, "\u0120bikini": 45810, "\u0120FILE": 45811, "\u0120Founders": 45812, "eport": 45813, "\u0120KP": 45814, "\u0120restores": 45815, "\u0120Thick": 45816, "\u0120ashore": 45817, "\u0120approvals": 45818, "Render": 45819, "MAG": 45820, "Graham": 45821, "\u0120Cortana": 45822, "\u00e3\u0125\u00b3\u00e3\u0124\u00b8": 45823, "ssh": 45824, "orians": 45825, "arsity": 45826, "\u0120Inspired": 45827, "upper": 45828, "\u0120signalling": 45829, "\u0120rebuke": 45830, "\u0120flares": 45831, "\u0120downtime": 45832, "Studies": 45833, "\u0120stagnation": 45834, "\u0120Sequence": 45835, "\u0120grunt": 45836, "\u0120assures": 45837, "\u0120PLA": 45838, "592": 45839, "\u0120intraven": 45840, "depend": 45841, "Susan": 45842, "\u0120Manziel": 45843, "Mania": 45844, "Contract": 45845, "\u0120slams": 45846, "\u0120cultured": 45847, "\u0120creditor": 45848, "LIST": 45849, "\u0120HUM": 45850, "\u0120Chattanooga": 45851, "served": 45852, "\u0120cloaked": 45853, "\u0120FTP": 45854, "powder": 45855, "\u0120Stella": 45856, "uctive": 45857, "\u0120cheaply": 45858, "\u0120MUCH": 45859, "\u0120Galileo": 45860, "\u0120suites": 45861, "speech": 45862, "\u0120deliberations": 45863, "\u0120Chips": 45864, "\u00ab\u013a": 45865, "Balance": 45866, "\u0120Wynne": 45867, "\u0120Akron": 45868, "Asset": 45869, "\u0120honoured": 45870, "\u0120edged": 45871, "Likewise": 45872, "animous": 45873, "\u0120Wage": 45874, "\u0120Ezek": 45875, "advertisement": 45876, "\u0120RTX": 45877, "\u0120MAD": 45878, "\u0120migrating": 45879, "\u0120SQU": 45880, "\u0120475": 45881, "Edited": 45882, "\u0120shorthand": 45883, "\u0120Basics": 45884, "\u0120crotch": 45885, "\u0120EVEN": 45886, "\u0120vm": 45887, "efficiency": 45888, "\u0120calves": 45889, "\u0120Frie": 45890, "\u0120Brilliant": 45891, "\u0120strikers": 45892, "\u0120repentance": 45893, "\u0120arteries": 45894, "rl": 45895, "Bed": 45896, "hap": 45897, "\u0120cryptography": 45898, "\u0120Sabres": 45899, "\u0120414": 45900, "viks": 45901, "ihara": 45902, "apses": 45903, "Talking": 45904, "\u0120intertwined": 45905, "\u0120docks": 45906, "\u0120allele": 45907, "\u0120Artifact": 45908, "\u0120HIM": 45909, "torn": 45910, "\u00e7\u0137": 45911, "\u0120opacity": 45912, "\u0120Ely": 45913, "osuke": 45914, "\u0120nipple": 45915, "\u0120handwritten": 45916, "\u0120VK": 45917, "\u0120Chamberlain": 45918, "\u0120Laos": 45919, "igraph": 45920, "grow": 45921, "\u0120trillions": 45922, "\u0120descendant": 45923, "\u0120Sailor": 45924, "asuring": 45925, "\u0120ceilings": 45926, "\u0120Warehouse": 45927, "flying": 45928, "\u0120Glow": 45929, "\u0120nont": 45930, "\u0120miscarriage": 45931, "\u0120rigs": 45932, "\u0120ministries": 45933, "\u0120elaborated": 45934, "\u0120delusional": 45935, "\u0120Humane": 45936, "\u0120379": 45937, "nets": 45938, "\u0120blackout": 45939, "adders": 45940, "\u0120np": 45941, "\u0120Tire": 45942, "rosc": 45943, "\u0120subdiv": 45944, "\u0120linkage": 45945, "\u0120chronological": 45946, "\u0120HERO": 45947, "\u0120resettlement": 45948, "\u0120Vinyl": 45949, "\u0120pastoral": 45950, "\u0120Mobil": 45951, "\u0120Barbar": 45952, "Cooldown": 45953, "\u0120Fritz": 45954, "criminal": 45955, "repe": 45956, "\u0120bellig": 45957, "\u0120Breed": 45958, "\u0120418": 45959, "\u0120semblance": 45960, "ijk": 45961, "\u0120curtail": 45962, "\u0120clinch": 45963, "contained": 45964, "\u0120Prompt": 45965, "aston": 45966, "\u0120wi": 45967, "\u0120pursuits": 45968, "515": 45969, "\u0120Gloss": 45970, "\u0120flips": 45971, "\u0120coupons": 45972, "\u0120cloning": 45973, "\u0120Likely": 45974, "Removed": 45975, "\u0120Quartz": 45976, "rices": 45977, "\u0120Spears": 45978, "\u0120pious": 45979, "\u0120depreciation": 45980, "\u0120Dare": 45981, "ounces": 45982, "amaz": 45983, "Ont": 45984, "\u0120pinnacle": 45985, "docker": 45986, "026": 45987, "\u0120Wyr": 45988, "\u0120Proper": 45989, "\u00cb\u012a": 45990, "nil": 45991, "Bytes": 45992, "\u0120seeker": 45993, "trial": 45994, "\u0120unfolds": 45995, "\u0120Marse": 45996, "\u0120extravagant": 45997, "\u0120Survivors": 45998, "REDACTED": 45999, "\u0120Speedway": 46000, "\u0120Craigslist": 46001, "submit": 46002, "\u0120Generations": 46003, "\u0120upholding": 46004, "\u0120bloodstream": 46005, "\u0120Missions": 46006, "\u0120Lawn": 46007, "\u0120limbo": 46008, "enei": 46009, "Huh": 46010, "\u0120Wildcats": 46011, "prep": 46012, "\u0120Markus": 46013, "\u0120Forbidden": 46014, "ritic": 46015, "INO": 46016, "\u0120exhibiting": 46017, "requent": 46018, "chuk": 46019, "\u0120habitual": 46020, "\u0120Compatibility": 46021, "Drag": 46022, "RIPT": 46023, "ujah": 46024, "GROUND": 46025, "\u0120delinquent": 46026, "\u0120burner": 46027, "\u0120contemporaries": 46028, "\u0120gimmick": 46029, "loads": 46030, "\u0120nozzle": 46031, "podcast": 46032, "\u0120Wak": 46033, "\u0120Staten": 46034, "\u0120Kuh": 46035, "\u00e3\u0123\u0135": 46036, "interrupted": 46037, "\u0120invincible": 46038, "\u0120Burnett": 46039, "cigarette": 46040, "\u0120Pebble": 46041, "\u0120Temporary": 46042, "\u0120Marino": 46043, "582": 46044, "\u0120wasteland": 46045, "idently": 46046, "Tx": 46047, "\u0120rite": 46048, "\u0120Panasonic": 46049, "\u0120Middles": 46050, "\u0120Horton": 46051, "aeus": 46052, "\u0120curing": 46053, "\u0120mats": 46054, "\u0120adjourn": 46055, "\u0120fearsome": 46056, "pez": 46057, "boats": 46058, "\u0120propell": 46059, "\u0120conflicted": 46060, "\u0120Anger": 46061, "\u0120insurgent": 46062, "Karl": 46063, "\u0120coales": 46064, "\u0120southwestern": 46065, "\u0120dissu": 46066, "\u0120Overt": 46067, "************": 46068, "\u0120boxed": 46069, "\u0120Brune": 46070, "aaa": 46071, "\u0120gardening": 46072, "\u0120Engel": 46073, "tracks": 46074, "\u0120purified": 46075, "\u0120placeholder": 46076, "\u0120Likes": 46077, "\u0120dan": 46078, "Gab": 46079, "\u0120ect": 46080, "\u0120Faw": 46081, "\u0120Eliot": 46082, "\u0120',": 46083, "otropic": 46084, "\u0120Ruin": 46085, "hedon": 46086, "\u0120caul": 46087, "\u0120aft": 46088, "\u0120Cadillac": 46089, "gha": 46090, "assian": 46091, "udeb": 46092, "\u0120Tick": 46093, "\u0120adjusts": 46094, "ARGET": 46095, "537": 46096, "ische": 46097, "anty": 46098, "\u0120Friedrich": 46099, "\u0120Blizz": 46100, "\u0120AOL": 46101, "Campaign": 46102, "\u0120mammal": 46103, "\u0120Veil": 46104, "\u0120Kev": 46105, "\u0120Maurit": 46106, "\u0120Damien": 46107, "Nation": 46108, "Eastern": 46109, "\u0120{:": 46110, "\u0120=================================": 46111, "\u0120stereotypical": 46112, "\u0120attic": 46113, "\u0120Cyborg": 46114, "require": 46115, "\u0120awarding": 46116, "\u0120Papua": 46117, "btn": 46118, "bent": 46119, "Boo": 46120, "\u0120(=": 46121, "\u0120Xander": 46122, "\u0120Somerset": 46123, "\u0120catchy": 46124, "\u0120certify": 46125, "STRUCT": 46126, "\u0120ital": 46127, "\u0120tides": 46128, "\u0120Brands": 46129, "Gray": 46130, "competitive": 46131, "\u0120curator": 46132, "\u0120DG": 46133, "ominium": 46134, "\u0120GMOs": 46135, "ciating": 46136, "\u0120Carmen": 46137, "oward": 46138, "Baltimore": 46139, "\u0120rgb": 46140, "Cu": 46141, "\u0120wipes": 46142, "spell": 46143, "ITNESS": 46144, "\u0120summarizes": 46145, "\u0120Revis": 46146, "\u0120whistleblowers": 46147, "\u0120Breach": 46148, "\u0120crochet": 46149, "kos": 46150, "ewski": 46151, "\u0120repet": 46152, "\u0120crimson": 46153, "\u0120Karachi": 46154, "readable": 46155, "dimension": 46156, "\u0120Igor": 46157, "ilded": 46158, "\u0120Zed": 46159, "\u0120Keane": 46160, "\u0120Cosmetic": 46161, "DEP": 46162, "\u0120retreating": 46163, "\u0120UA": 46164, "ensical": 46165, "\u0120dusk": 46166, "\u0120Dickens": 46167, "\u0120arenas": 46168, "\u0120Passage": 46169, "levels": 46170, "\u0120curv": 46171, "Pope": 46172, "\u0120chores": 46173, "\u0120Elise": 46174, "\u0120Compass": 46175, "bub": 46176, "\u0120mammalian": 46177, "\u0120Sanskrit": 46178, "\u0120ANC": 46179, "\u0120Crack": 46180, "Qual": 46181, "Laun": 46182, "ampunk": 46183, "\u0120learners": 46184, "\u0120glamorous": 46185, "\u0120furthe": 46186, "ermott": 46187, "cand": 46188, "Generic": 46189, "\u0120narrated": 46190, "\u0120disorderly": 46191, "\u0120Transactions": 46192, "\u0120Detention": 46193, "\u0120Roku": 46194, "\u00c4\u012f": 46195, "\u0120understatement": 46196, "\u0120Saur": 46197, "\u0120Rodrigo": 46198, "\u0120ASAP": 46199, "Sin": 46200, "\u0120rejoice": 46201, "Methods": 46202, "\u0120electrode": 46203, "\u0120worshipped": 46204, "\u0120idi": 46205, "\u0120Physicians": 46206, "\u0120popup": 46207, "\u0120deft": 46208, "\u0120Removal": 46209, "\u0120Buenos": 46210, "verbs": 46211, "\u0120funk": 46212, "usha": 46213, "riction": 46214, "orea": 46215, "\u0120Bangalore": 46216, "\u0120Kenobi": 46217, "zzi": 46218, "\u0120normative": 46219, "\u0120goblins": 46220, "\u0120cafes": 46221, "\u0120UNCLASSIFIED": 46222, "\u0120Fired": 46223, "SIGN": 46224, "\u0120sclerosis": 46225, "\u0120Voter": 46226, "\u0120Sonny": 46227, "\u0120Extend": 46228, "\u0120EVs": 46229, "Arsenal": 46230, "\u0120psi": 46231, "\u0120widest": 46232, "\u0120Tus": 46233, "\u0120looms": 46234, "\u0120justifying": 46235, "\u0120Granger": 46236, "\u00e8\u00af": 46237, "Refer": 46238, "583": 46239, "\u0120flourishing": 46240, "abre": 46241, "\u0120rave": 46242, "\u0120Contra": 46243, "\u01201898": 46244, "Adds": 46245, "\u0120ful": 46246, "\u0120Cooke": 46247, "someone": 46248, "=#": 46249, "671": 46250, "\u0120yak": 46251, "\u0120arte": 46252, "\u0120Miscellaneous": 46253, "\u0120Detection": 46254, "\u0120Clancy": 46255, "\u00e2\u0123": 46256, "assies": 46257, "\u0120valiant": 46258, "\u0120Feminist": 46259, "corruption": 46260, "Vel": 46261, "Pear": 46262, "\u0120succinct": 46263, "\u0120quickest": 46264, "kw": 46265, "\u0120spitting": 46266, "\u0120Libraries": 46267, "\u00e5\u0127\u012b": 46268, "antz": 46269, "Dad": 46270, "\u0120Specifications": 46271, "rupulous": 46272, "andr": 46273, "RESULTS": 46274, "\u0120snowball": 46275, "\u0120predis": 46276, "\u0120Baxter": 46277, "\u0120Nursing": 46278, "\u0120Chaff": 46279, "swe": 46280, "\u0120outage": 46281, "\u0120nesting": 46282, "\u0120notoriety": 46283, "trigger": 46284, "onite": 46285, "jon": 46286, "\u0120fou": 46287, "ooked": 46288, "\u0120Celebrity": 46289, "reality": 46290, "\u0120fatig": 46291, "\u0120hugging": 46292, "\u0120bothers": 46293, "\u0120Panzer": 46294, "\u0120Chandra": 46295, "figured": 46296, "\u0120volts": 46297, "\u0120Clouds": 46298, "\u0120feeble": 46299, "\u0120Curve": 46300, "\u0120Asus": 46301, "786": 46302, "absor": 46303, "\u0120VICE": 46304, "\u0120Hess": 46305, "\u0120manufactures": 46306, "\u0120grizz": 46307, "\u0120Powerful": 46308, "acid": 46309, "\u0120subsections": 46310, "\u0120Krugman": 46311, "\u0120Alps": 46312, "isu": 46313, "\u0120sequest": 46314, "\u0120Ultron": 46315, "\u0120Tinker": 46316, "\u0120Goose": 46317, "\u0120mismatch": 46318, "Attorney": 46319, "\u0120morphology": 46320, "\u0120Sixers": 46321, "uttered": 46322, "\u0120ELECT": 46323, "gran": 46324, "Russell": 46325, "\u0120GSL": 46326, "\u0120fortnight": 46327, "\u0120.)": 46328, "\u0120apostle": 46329, "prone": 46330, "elist": 46331, "Untitled": 46332, "\u0120Implementation": 46333, "istors": 46334, "\u0120tanker": 46335, "\u0120plush": 46336, "\u0120attendants": 46337, "\u0120Tik": 46338, "\u0120Greenwich": 46339, "\u0120Yon": 46340, "\u0120SPL": 46341, "cells": 46342, "untled": 46343, "Solution": 46344, "\u0120Qu\u00c3\u00a9": 46345, "\u0120vacated": 46346, "\u0120uptick": 46347, "\u0120Meridian": 46348, "\u00e6\u0125": 46349, "\u0120Drill": 46350, "925": 46351, "584": 46352, "\u0120renovated": 46353, "\u0120Kubrick": 46354, "zyk": 46355, "\u0120lousy": 46356, "ppel": 46357, "ohydrate": 46358, "\u0120Izzy": 46359, "lesiastical": 46360, "CCC": 46361, "\u0120Ajax": 46362, "\u0120adapters": 46363, "\u0120Petraeus": 46364, "\u0120affirmation": 46365, "\u0120STOR": 46366, "lems": 46367, "adoes": 46368, "\u0120Constantinople": 46369, "\u0120ponies": 46370, "\u0120lighthouse": 46371, "\u0120adherents": 46372, "\u0120Brees": 46373, "omorphic": 46374, "Fighting": 46375, "\u0120plaster": 46376, "\u0120PVC": 46377, "\u0120Obst": 46378, "\u0120dearly": 46379, "\u0120Tooth": 46380, "ickson": 46381, "\u0120shaming": 46382, "Plex": 46383, "Agg": 46384, "\u0120\u00e2\u0122\u00a6\"": 46385, "\u0120subreddits": 46386, "\u0120pigeon": 46387, "\u0120Residential": 46388, "\u0120Passing": 46389, "\u0120lum": 46390, "\u0120Pension": 46391, "\u0120pessimistic": 46392, "\u0120432": 46393, "zinski": 46394, "cade": 46395, "075": 46396, "\u0120apologised": 46397, "iyah": 46398, "Putting": 46399, "\u0120gloomy": 46400, "\u0120Lyme": 46401, "=-=-=-=-=-=-=-=-": 46402, "\u0120Tome": 46403, "\u0120Psychiatric": 46404, "\u0120HIT": 46405, "cms": 46406, "apolog": 46407, "\u0120breaker": 46408, "\u0120deepen": 46409, "\u0120theorist": 46410, "\u0120Highlands": 46411, "\u0120baker": 46412, "\u0120staples": 46413, "\u0120interfered": 46414, "\u0120Abortion": 46415, "joined": 46416, "chu": 46417, "\u0120formulate": 46418, "\u0120vaccinations": 46419, "\u0120banter": 46420, "pheus": 46421, "\u0120outfielder": 46422, "\u0120Meter": 46423, "\u0120#####": 46424, "\u01201895": 46425, "\u0120narrowing": 46426, "\u0120STORY": 46427, "fp": 46428, "\u0120CST": 46429, "ignore": 46430, "\u0120proclaiming": 46431, "\u0120RU": 46432, "\u0120BALL": 46433, "yna": 46434, "653": 46435, "\u0120posit": 46436, "PRE": 46437, "594": 46438, "\u0120Registrar": 46439, "\u0120Pilgrim": 46440, "icio": 46441, "\u0120prett": 46442, "\u0120lifeless": 46443, "\u0120___": 46444, "Neigh": 46445, "\u0120Churches": 46446, "orno": 46447, "\u0120orcs": 46448, "\u0120kindred": 46449, "\u0120Audit": 46450, "\u0120millennial": 46451, "\u0120Persia": 46452, "gravity": 46453, "\u0120Disability": 46454, "\u0120DARK": 46455, "Ws": 46456, "odon": 46457, "\u0120granddaughter": 46458, "\u0120Brooke": 46459, "\u0120ADA": 46460, "ERA": 46461, "\u0120pickups": 46462, "\u0120Wilkinson": 46463, "\u0120Shards": 46464, "\u0120NK": 46465, "\u0120expel": 46466, "\u0120Kislyak": 46467, "\u0120jargon": 46468, "\u0120polarized": 46469, "iane": 46470, "Publisher": 46471, "\u0120rebutt": 46472, "\u0120apprehension": 46473, "\u0120Kessler": 46474, "\u0120prism": 46475, "FUL": 46476, "1964": 46477, "\u0120Loll": 46478, "\u00e4\u00bf": 46479, "lethal": 46480, "\u00c5\u0141": 46481, "\u0120ghetto": 46482, "\u0120boulder": 46483, "\u0120Slowly": 46484, "\u0120Oscars": 46485, "\u0120Instruction": 46486, "\u0120Ultr": 46487, "\u0120Moe": 46488, "Nich": 46489, "\u0120PATH": 46490, "(*": 46491, "\u0120RELEASE": 46492, "uning": 46493, "rouse": 46494, "eneg": 46495, "\u0120reimb": 46496, "\u0120Detected": 46497, "DoS": 46498, "\u0120sterling": 46499, "\u0120aggregation": 46500, "\u0120Lonely": 46501, "\u0120Attend": 46502, "higher": 46503, "\u0120airstrike": 46504, "kson": 46505, "SELECT": 46506, "\u0120deflation": 46507, "\u0120Herrera": 46508, "Cole": 46509, "ritch": 46510, "\u0120advisable": 46511, "Fax": 46512, "\u0120workaround": 46513, "\u0120pid": 46514, "mortem": 46515, "ersen": 46516, "\u0120typo": 46517, "\u0120alum": 46518, "782": 46519, "\u0120Jamal": 46520, "scripts": 46521, "\u0120captives": 46522, "\u0120Presence": 46523, "\u0120Lieberman": 46524, "angelo": 46525, "\u0120alcoholism": 46526, "assi": 46527, "\u0120recite": 46528, "\u0120gaping": 46529, "\u0120baskets": 46530, "\u0120Gou": 46531, "Browser": 46532, "neau": 46533, "\u0120corrective": 46534, "unda": 46535, "scoring": 46536, "\u0120XD": 46537, "\u0120filament": 46538, "\u0120deepening": 46539, "\u0120Stainless": 46540, "Integer": 46541, "\u0120buggy": 46542, "\u0120tenancy": 46543, "\u0120Mubarak": 46544, "\u0120tuple": 46545, "\u0120Droid": 46546, "\u0120Sitting": 46547, "\u0120forfeit": 46548, "\u0120Rasmussen": 46549, "ixties": 46550, "esi": 46551, "\u0120Kimmel": 46552, "\u0120meticulously": 46553, "\u0120apopt": 46554, "\u0120Seller": 46555, "088": 46556, "ecake": 46557, "hematically": 46558, "TN": 46559, "\u0120mindless": 46560, "\u0120digs": 46561, "\u0120Accord": 46562, "onsense": 46563, "eming": 46564, "brace": 46565, "\u0120eBook": 46566, "\u0120Distribut": 46567, "\u0120Investments": 46568, "wt": 46569, "]),": 46570, "behavior": 46571, "563": 46572, "\u0120blinding": 46573, "\u0120Protesters": 46574, "topia": 46575, "\u0120reborn": 46576, "\u0120Kelvin": 46577, "\u0120Dover": 46578, "\u0120Dairy": 46579, "\u0120Outs": 46580, "\u0120[/": 46581, "\u00cf\u0122": 46582, "bp": 46583, "\u0120Vanity": 46584, "\u0120Recap": 46585, "\u0120HOUSE": 46586, "\u0120FACE": 46587, "\u0120422": 46588, "692": 46589, "\u0120Antioch": 46590, "cooked": 46591, "\u0120collide": 46592, "\u0120apr": 46593, "\u0120sleeper": 46594, "\u0120Jarvis": 46595, "\u0120alternatively": 46596, "\u0120Leaves": 46597, "\u0120Maw": 46598, "\u0120antiquity": 46599, "\u0120Adinida": 46600, "\u0120abuser": 46601, "Pok\u00c3\u00a9mon": 46602, "\u0120assorted": 46603, "\u0120Revision": 46604, "\u0120Piano": 46605, "\u0120Gideon": 46606, "Ocean": 46607, "\u0120salon": 46608, "\u0120bustling": 46609, "ognitive": 46610, "\u0120Rahman": 46611, "\u0120waiter": 46612, "\u0120presets": 46613, "\u0120Osh": 46614, "\u0120GHC": 46615, "operator": 46616, "\u0120reptiles": 46617, "\u0120413": 46618, "\u0120Garr": 46619, "\u0120Chak": 46620, "\u0120hashes": 46621, "\u0120failings": 46622, "\u0120folklore": 46623, "\u0120abl": 46624, "\u0120Cena": 46625, "\u0120MacArthur": 46626, "\u0120COURT": 46627, "\u0120periphery": 46628, "appers": 46629, "\u0120reckoned": 46630, "\u0120Influ": 46631, "\u0120CET": 46632, "\u0120372": 46633, "\u0120Definitive": 46634, "assault": 46635, "421": 46636, "\u0120reservoirs": 46637, "\u0120dives": 46638, "\u0120Coil": 46639, "DAQ": 46640, "\u0120vividly": 46641, "\u0120RJ": 46642, "\u0120Bellev": 46643, "\u0120eclectic": 46644, "\u0120Showdown": 46645, "\u0120KM": 46646, "iped": 46647, "reetings": 46648, "\u0120Asuka": 46649, "Liberal": 46650, "\u0120\u00cf\u0126": 46651, "\u0120bystanders": 46652, "\u0120Goodwin": 46653, "ukong": 46654, "Sit": 46655, "\u0120Trem": 46656, "\u0120criminally": 46657, "\u0120Circus": 46658, "chrome": 46659, "887": 46660, "\u0120nanop": 46661, "\u0120Obi": 46662, "\u0120LOW": 46663, "ogh": 46664, "\u0120Authors": 46665, "obyl": 46666, "Urban": 46667, "\u0120ti": 46668, "\u0120Weir": 46669, "trap": 46670, "agy": 46671, "\u0120parentheses": 46672, "\u0120outnumbered": 46673, "\u0120counterproductive": 46674, "\u0120Tobias": 46675, "ubis": 46676, "Parser": 46677, "STAR": 46678, "\u0120synaptic": 46679, "\u0120Gears": 46680, "\u0120hiber": 46681, "\u0120debunked": 46682, "\u0120exalted": 46683, "awatts": 46684, "HOU": 46685, "Church": 46686, "\u0120Pixie": 46687, "\u0120Uri": 46688, "\u0120Formation": 46689, "\u0120Prediction": 46690, "CEO": 46691, "\u0120thrott": 46692, "\u0120Britann": 46693, "\u0120Madagascar": 46694, "\u00eb\u012d": 46695, "\u0120billboards": 46696, "\u0120RPGs": 46697, "\u0120Bees": 46698, "completely": 46699, "FIL": 46700, "\u0120doesnt": 46701, "\u0120Greenberg": 46702, "reys": 46703, "\u0120sling": 46704, "\u0120emptied": 46705, "\u0120Pixar": 46706, "\u0120Dharma": 46707, "luck": 46708, "inguished": 46709, "\u0120endot": 46710, "\u0120babys": 46711, "059": 46712, "chest": 46713, "rats": 46714, "\u0120ridden": 46715, "\u0120beetles": 46716, "\u0120illuminating": 46717, "\u0120fictitious": 46718, "\u0120Provincial": 46719, "\u0120768": 46720, "\u0120shepherd": 46721, "\u0120Render": 46722, "\u01201896": 46723, "Crew": 46724, "\u0120molded": 46725, "\u0120Xiaomi": 46726, "\u0120Spiral": 46727, "\u0120delim": 46728, "\u0120organising": 46729, "\u0120hoops": 46730, "\u0120Bei": 46731, "zhen": 46732, "\u0120fuckin": 46733, "\u0120decad": 46734, "\u0120unbiased": 46735, "ammy": 46736, "swing": 46737, "\u0120smuggled": 46738, "\u0120kios": 46739, "\u0120PERSON": 46740, "\u0120Inquisitor": 46741, "\u0120snowy": 46742, "\u0120scraping": 46743, "\u0120Burgess": 46744, "Ptr": 46745, "agame": 46746, "RW": 46747, "\u0120droid": 46748, "\u0120Lys": 46749, "\u0120Cassandra": 46750, "Jacob": 46751, "\u0120354": 46752, "\u0120pasture": 46753, "\u0120franc": 46754, "\u0120Scotch": 46755, "\u0120Ends": 46756, "\u0120IGF": 46757, "definition": 46758, "\u0120hysterical": 46759, "\u0120Browne": 46760, "771": 46761, "\u0120mobilization": 46762, "\u00e6\u0137": 46763, "iqueness": 46764, "Thor": 46765, "\u0120spearheaded": 46766, "\u0120embroiled": 46767, "\u0120conjecture": 46768, "judicial": 46769, "Choice": 46770, "\u0120paperback": 46771, "Pir": 46772, "\u0120recovers": 46773, "\u0120Surge": 46774, "\u0120Shogun": 46775, "\u0120Pediatrics": 46776, "\u00e3\u0123\u0142": 46777, "\u0120sweeps": 46778, "\u0120Laboratories": 46779, "\u0120Packs": 46780, "alus": 46781, "addin": 46782, "\u0120headlights": 46783, "gra": 46784, "Evidence": 46785, "COLOR": 46786, "Admin": 46787, "\u012c\u00b1": 46788, "\u0120concoct": 46789, "sufficient": 46790, "\u0120unmarked": 46791, "\u0120richness": 46792, "\u0120dissertation": 46793, "\u0120seasoning": 46794, "\u0120gib": 46795, "\u0120Mages": 46796, "unctions": 46797, "\u0120Nid": 46798, "cheat": 46799, "\u0120TMZ": 46800, "citizens": 46801, "\u0120Catholicism": 46802, "nb": 46803, "\u0120disembark": 46804, "\u0120PROGRAM": 46805, "aques": 46806, "Tyler": 46807, "Org": 46808, "\u0120Slay": 46809, "\u0120Nero": 46810, "\u0120Townsend": 46811, "INTON": 46812, "tele": 46813, "\u0120mesmer": 46814, "901": 46815, "\u0120fireball": 46816, "evidence": 46817, "affiliated": 46818, "\u0120Frenchman": 46819, "\u0120Augusta": 46820, "021": 46821, "\u0120sled": 46822, "\u0120reused": 46823, "\u0120Immunity": 46824, "\u0120wrestle": 46825, "assembled": 46826, "Maria": 46827, "\u0120gunshots": 46828, "\u0120Barbie": 46829, "\u0120cannabinoids": 46830, "\u0120Toast": 46831, "\u0120Kinder": 46832, "IRD": 46833, "\u0120rejuven": 46834, "\u0120gore": 46835, "\u0120rupture": 46836, "\u0120breaching": 46837, "\u0120Cartoon": 46838, "\u0120455": 46839, "\u0120Paleo": 46840, "614": 46841, "\u0120spears": 46842, "\u0120Ames": 46843, "abus": 46844, "Madison": 46845, "GROUP": 46846, "\u0120aborted": 46847, "yah": 46848, "\u0120felon": 46849, "\u0120causation": 46850, "\u0120prepaid": 46851, "\u0120pitted": 46852, "oplan": 46853, "\u0120Shelley": 46854, "\u0120Russo": 46855, "\u0120Pagan": 46856, "\u0120willfully": 46857, "\u0120Canaver": 46858, "undrum": 46859, "\u0120Salary": 46860, "\u0120Arpaio": 46861, "reader": 46862, "\u0120Rational": 46863, "\u0120Overse": 46864, "\u0120Causes": 46865, "\u0120*.": 46866, "\u0120wob": 46867, "Keith": 46868, "\u0120Consent": 46869, "manac": 46870, "773": 46871, "623": 46872, "\u0120fateful": 46873, "etimes": 46874, "\u0120spirited": 46875, "\u0120Dys": 46876, "\u0120hegemony": 46877, "\u0120boycot": 46878, "\u0120Enrique": 46879, "emouth": 46880, "\u0120timelines": 46881, "\u0120Sahara": 46882, "\u0120Relax": 46883, "\u0120Quincy": 46884, "\u0120Lessons": 46885, "\u0120EQU": 46886, "SEA": 46887, "NK": 46888, "\u0120Costco": 46889, "Increase": 46890, "\u0120motivating": 46891, "\u0120Chong": 46892, "amaru": 46893, "\u0120Divide": 46894, "\u0120pedigree": 46895, "\u0120Tasmania": 46896, "\u0120Prelude": 46897, "Las": 46898, "940": 46899, "574": 46900, "\u0120chau": 46901, "\u0120Spiegel": 46902, "unic": 46903, "-->": 46904, "\u0120Philips": 46905, "\u0120Kafka": 46906, "\u0120upheaval": 46907, "\u0120sentimental": 46908, "\u0120sax": 46909, "\u0120Akira": 46910, "serial": 46911, "Matrix": 46912, "\u0120electing": 46913, "\u0120commenter": 46914, "\u0120Nebula": 46915, "plets": 46916, "\u0120Nadu": 46917, "\u0120Adren": 46918, "\u0120enshr": 46919, "\u0120RAND": 46920, "financial": 46921, "\u0120Clyde": 46922, "utherford": 46923, "\u0120signage": 46924, "\u0120deline": 46925, "\u0120phosphate": 46926, "roversial": 46927, "fascist": 46928, "\u0120Vall": 46929, "\u0120Bethlehem": 46930, "\u0120fors": 46931, "\u0120english": 46932, "Solid": 46933, "Nature": 46934, "\u0120va": 46935, "\u0120Guests": 46936, "\u0120tantal": 46937, "\u0120autoimmune": 46938, ";;;;;;;;;;;;": 46939, "\u0120Totally": 46940, "\u0120Ov": 46941, "\u0120defences": 46942, "\u0120Coconut": 46943, "\u0120tranquil": 46944, "\u0120ploy": 46945, "\u0120flavours": 46946, "\u0120Flask": 46947, "\u00e3\u0124\u00a8\u00e3\u0125\u00ab": 46948, "\u0120Weston": 46949, "\u0120Volvo": 46950, "870": 46951, "\u0120microphones": 46952, "verbal": 46953, "RPG": 46954, "\u0120iii": 46955, ";}": 46956, "028": 46957, "\u0120headlined": 46958, "\u0120primed": 46959, "\u0120hoard": 46960, "\u0120Shad": 46961, "\u0120ENTER": 46962, "\u0120triangular": 46963, "\u0120capit": 46964, "lik": 46965, "\u0120Ancients": 46966, "\u0120lash": 46967, "\u0120convol": 46968, "\u0120colonel": 46969, "enemy": 46970, "Gra": 46971, "\u0120pubs": 46972, "utters": 46973, "\u0120assigns": 46974, "\u0120Penet": 46975, "\u0120Monstrous": 46976, "\u0120Bowen": 46977, "ilver": 46978, "Haunted": 46979, "\u0120Ding": 46980, "started": 46981, "plin": 46982, "\u0120contaminants": 46983, "\u0120DOE": 46984, "ffen": 46985, "\u0120Technician": 46986, "Ry": 46987, "\u0120robbers": 46988, "\u0120hotline": 46989, "\u0120Guardiola": 46990, "\u0120Kaufman": 46991, "rower": 46992, "\u0120Dresden": 46993, "\u0120Alpine": 46994, "Elf": 46995, "\u0120fmt": 46996, "\u0120Sard": 46997, "urses": 46998, "gpu": 46999, "Unix": 47000, "\u0120unequivocally": 47001, "\u0120Citizenship": 47002, "quad": 47003, "mire": 47004, "\u0120Sweeney": 47005, "Battery": 47006, "615": 47007, "\u0120pancakes": 47008, "\u0120oats": 47009, "Maps": 47010, "\u0120Contrast": 47011, "mbudsman": 47012, "\u0120EPS": 47013, "\u0120subcommittee": 47014, "\u0120sourcing": 47015, "\u0120sizing": 47016, "\u0120Buffer": 47017, "\u0120Mandatory": 47018, "\u0120moderates": 47019, "\u0120Patterns": 47020, "\u0120Chocobo": 47021, "\u0120Zan": 47022, "\u0120STATES": 47023, "\u0120Judging": 47024, "\u0120Inher": 47025, "*:": 47026, "\u0120bil": 47027, "\u0120Yen": 47028, "\u0120exhilar": 47029, "ollower": 47030, "zers": 47031, "\u0120snug": 47032, "maximum": 47033, "\u0120despicable": 47034, "\u0120PACK": 47035, "\u0120Annex": 47036, "\u0120sarcastic": 47037, "\u0120latex": 47038, "\u0120tamp": 47039, "\u0120Sao": 47040, "bah": 47041, "\u0120Reverend": 47042, "\u0120Chinatown": 47043, "\u0120AUT": 47044, "documented": 47045, "\u0120GABA": 47046, "\u0120Canaan": 47047, "\u0120\u00d9\u0127": 47048, "\u0120governs": 47049, "prev": 47050, "Esc": 47051, "\u0120Estimates": 47052, "OSP": 47053, "\u0120endeavour": 47054, "\u0120Closing": 47055, "ometime": 47056, "everyone": 47057, "\u0120worsen": 47058, "\u0120scanners": 47059, "\u0120deviations": 47060, "\u0120Robotics": 47061, "\u0120Compton": 47062, "\u0120sorcerer": 47063, "\u0120endogenous": 47064, "\u0120emulation": 47065, "\u0120Piercing": 47066, "\u0120Aph": 47067, "\u0120Socket": 47068, "\u0120bould": 47069, "\u0120OU": 47070, "\u0120Borderlands": 47071, "\u01201863": 47072, "Gordon": 47073, "\u0120WTO": 47074, "\u0120restricts": 47075, "\u0120mosaic": 47076, "\u0120melodies": 47077, "\u00e7\u0126": 47078, "Tar": 47079, "\u0120disson": 47080, "\u0120Provides": 47081, "\u0120......": 47082, "bek": 47083, "FIX": 47084, "\u0120broom": 47085, "anship": 47086, "Doctors": 47087, "\u0120nerds": 47088, "\u0120Regions": 47089, "naissance": 47090, "\u0120mete": 47091, "\u0120crept": 47092, "plings": 47093, "\u0120girlfriends": 47094, "knit": 47095, "igent": 47096, "owe": 47097, "\u0120ushered": 47098, "\u0120Baz": 47099, "Mobil": 47100, "434": 47101, "\u0120Presents": 47102, "origin": 47103, "\u0120insomnia": 47104, "\u0120Aux": 47105, "439": 47106, "\u0120Chili": 47107, "irsch": 47108, "GAME": 47109, "\u0120gestation": 47110, "algia": 47111, "romising": 47112, "$,": 47113, "crow": 47114, "\u0120Inspection": 47115, "atomic": 47116, "Relations": 47117, "JOHN": 47118, "roman": 47119, "\u0120Clockwork": 47120, "\u0120Bakr": 47121, "mone": 47122, "MET": 47123, "\u0120thirsty": 47124, "\u0120bc": 47125, "\u0120faculties": 47126, "Rum": 47127, "\u0120nuance": 47128, "\u0120Darius": 47129, "pleting": 47130, "fters": 47131, "etchup": 47132, "Registration": 47133, "\u0120KE": 47134, "Rah": 47135, "\u0120preferential": 47136, "\u0120Lash": 47137, "\u0120HH": 47138, "Valid": 47139, "\u0120NAV": 47140, "\u0120starve": 47141, "\u0120Gong": 47142, "zynski": 47143, "\u0120Actress": 47144, "\u0120wik": 47145, "\u0120unaccompanied": 47146, "lvl": 47147, "Bride": 47148, "ADS": 47149, "\u0120Commando": 47150, "\u0120Vaughn": 47151, "Wallet": 47152, "\u0120hopping": 47153, "\u0120Vie": 47154, "\u0120caveats": 47155, "\u0120alas": 47156, "ifled": 47157, "abuse": 47158, "661": 47159, "\u0120ibn": 47160, "\u0120gul": 47161, "\u0120robbing": 47162, "til": 47163, "ILA": 47164, "\u0120mitigating": 47165, "\u0120aptly": 47166, "\u0120tyrant": 47167, "\u0120midday": 47168, "\u0120Gilmore": 47169, "\u0120Decker": 47170, "\u0120\u00c2\u00a7\u00c2\u00a7": 47171, "partial": 47172, "Exactly": 47173, "\u0120phenotype": 47174, "\u0120[+]": 47175, "\u0120Plex": 47176, "\u0120Ips": 47177, "versions": 47178, "\u0120ebook": 47179, "\u0120chic": 47180, "gross": 47181, "\":\"\"},{\"": 47182, "\u0120Surprisingly": 47183, "Morgan": 47184, "\u0120residues": 47185, "\u0120Confederation": 47186, "infeld": 47187, "\u0120lyr": 47188, "moderate": 47189, "\u0120perpendicular": 47190, "VK": 47191, "\u0120synchronized": 47192, "\u0120refreshed": 47193, "\u0120adore": 47194, "\u0120Torment": 47195, "olina": 47196, "\u01202600": 47197, "ItemTracker": 47198, "\u0120pies": 47199, "\u0120FAT": 47200, "\u0120RHP": 47201, "048": 47202, "\u0120RESP": 47203, "\u0120BJ": 47204, "allows": 47205, "Pand": 47206, "\u0120unwelcome": 47207, "\u0120Voc": 47208, "\u0120Bastard": 47209, "\u0120OW": 47210, "\u0120LAR": 47211, "\u0120Healer": 47212, "Environmental": 47213, "\u0120Kenyan": 47214, "\u0120Trance": 47215, "\u0120Pats": 47216, "\u0120aliases": 47217, "\u0120Garfield": 47218, "\u0120campaigner": 47219, "\u0120advancements": 47220, "\u0120Okinawa": 47221, "\u0120Coh": 47222, "owsky": 47223, "\u0120starved": 47224, "\u0120sizeable": 47225, "\u0120:-)": 47226, "\u0120mRNA": 47227, "\u0120suspensions": 47228, "istar": 47229, "Scotland": 47230, "Prin": 47231, "------------------------------------------------": 47232, "\u0120502": 47233, "\u0120teaspoons": 47234, "\u01201050": 47235, "\u0120coercive": 47236, "\u0120Masonic": 47237, "edded": 47238, "\u0120Passenger": 47239, "\u0120latt": 47240, "\u0120braces": 47241, "\u0120Steal": 47242, "\u0120NYT": 47243, "\u0120Kats": 47244, "\u0120Celest": 47245, "aez": 47246, "Tu": 47247, "\u0120Coulter": 47248, "\u00f0\u0141\u013a": 47249, "Flickr": 47250, "\u0120Wilmington": 47251, "iths": 47252, "++;": 47253, "\u0120vending": 47254, "\u0120negro": 47255, "\u0120Phi": 47256, "\u0120Yellowstone": 47257, "Callback": 47258, "\u0120shampoo": 47259, "\u0120Shades": 47260, "wat": 47261, "\u0120superhuman": 47262, "\u0120ridiculed": 47263, "\u0120holiest": 47264, "ombo": 47265, "\u0120interns": 47266, "\u0120hone": 47267, "\u0120Paragu": 47268, "URI": 47269, "\u0120dangling": 47270, "\u00e3\u0124\u00bb": 47271, "sov": 47272, "ictional": 47273, "availability": 47274, "\u0120revocation": 47275, "\u0120dow": 47276, "inic": 47277, "\u0120THEIR": 47278, "\u0120iso": 47279, "\u0120outings": 47280, "\u0120Lethal": 47281, "\u0120)))": 47282, "\u0120inaccur": 47283, "\u0120outlandish": 47284, "\u0120anus": 47285, "letico": 47286, "idon": 47287, "lol": 47288, "\u0120unregulated": 47289, "\u0120succumbed": 47290, "\u0120cuff": 47291, "\u0120Wasteland": 47292, "letal": 47293, "\u0120substr": 47294, "\u0120coffers": 47295, "\u0120automakers": 47296, "ovi": 47297, "\u0120Xue": 47298, "\u0120Daytona": 47299, "\u0120jarring": 47300, "\u0120fumes": 47301, "\u0120disbanded": 47302, "zik": 47303, "itton": 47304, "\u0120strikingly": 47305, "\u0120spores": 47306, "Adapter": 47307, ".):": 47308, "\u0120Lyndon": 47309, "ivalry": 47310, "\u0120orally": 47311, "\u0120tumultuous": 47312, "\u0120displeasure": 47313, "\u0120cones": 47314, "orrect": 47315, "\u0120appease": 47316, "\u0120derby": 47317, "\u0120Tripoli": 47318, "\u0120Aless": 47319, "\u0120poked": 47320, "\u0120Guilty": 47321, "vP": 47322, "Enough": 47323, "\u0120originals": 47324, "699": 47325, "\u0120rabbi": 47326, "\u0120proverbial": 47327, "\u0120postpone": 47328, "elope": 47329, "\u0120Misty": 47330, "\u0120staffed": 47331, "\u0120Unemployment": 47332, "reditary": 47333, "\u0120diligent": 47334, "recomm": 47335, "measures": 47336, "asin": 47337, "825": 47338, "\u0120ponds": 47339, "\u0120mmol": 47340, "\u0120SAR": 47341, "\u0120CARE": 47342, "\u0120371": 47343, "\u0120clenched": 47344, "\u0120Corsair": 47345, "\u0120caricature": 47346, "zn": 47347, "attach": 47348, "\u0120Schro": 47349, "speak": 47350, "painted": 47351, "\u0120Suc": 47352, "\u0120ENT": 47353, "\u0120cellul": 47354, "\u0120Paid": 47355, "diagn": 47356, "WHERE": 47357, "\u0120texted": 47358, "Barn": 47359, "\u0120retracted": 47360, "\u0120Referred": 47361, "Sav": 47362, "\u0120upkeep": 47363, "\u0120workplaces": 47364, "\u0120Tokens": 47365, "\u0120amplify": 47366, "clinical": 47367, "\u0120multic": 47368, "mberg": 47369, "\u0120convoluted": 47370, "Region": 47371, "565": 47372, "\u0120Topic": 47373, "\u0120snail": 47374, "\u0120saline": 47375, "\u0120insurrection": 47376, "\u0120Petr": 47377, "forts": 47378, "BAT": 47379, "\u0120Navajo": 47380, "\u0120rudimentary": 47381, "\u0120Laksh": 47382, "ONDON": 47383, "Measure": 47384, "\u0120transformer": 47385, "\u0120Goddard": 47386, "\u0120coincides": 47387, "irin": 47388, "Rex": 47389, "\u0120Bok": 47390, "quit": 47391, "\u0120shotguns": 47392, "\u0120proletarian": 47393, "\u0120scorp": 47394, "\u0120Ada": 47395, "514": 47396, "\u0120slander": 47397, "recorded": 47398, "\u0120embell": 47399, "risome": 47400, "\u0120apologizing": 47401, "\u0120Mulcair": 47402, "\u0120Gibraltar": 47403, "Cla": 47404, "\u0120allot": 47405, "\u0120Attention": 47406, "\u0120433": 47407, "leave": 47408, "\u0120whine": 47409, "\u0120Issa": 47410, "\u0120Faust": 47411, "\u0120Barron": 47412, "heny": 47413, "\u0120victimized": 47414, "Jews": 47415, "\u0120nurturing": 47416, "ettel": 47417, "Winged": 47418, "\u0120Subtle": 47419, "\u0120flavorful": 47420, "\u0120Reps": 47421, "enged": 47422, "callback": 47423, "\u0120directional": 47424, "\u0120clasp": 47425, "\u0120Directions": 47426, "planet": 47427, "iculture": 47428, "Helper": 47429, "icion": 47430, "acia": 47431, "\u0120\u00e7\u00a5\u0140": 47432, "\u0120surges": 47433, "\u0120canoe": 47434, "\u0120Premiership": 47435, "been": 47436, "\u0120defied": 47437, "\u0120Trooper": 47438, "\u0120tripod": 47439, "\u0120gasp": 47440, "\u0120Euph": 47441, "\u0120Ads": 47442, "vernight": 47443, "highly": 47444, "Role": 47445, "\u0120entangled": 47446, "\u0120Zeit": 47447, "618": 47448, "\u0120Rusty": 47449, "\u0120havens": 47450, "\u0120Vaughan": 47451, "HAEL": 47452, "\u0120SERVICE": 47453, "/,": 47454, "\u0120stricken": 47455, "\u0120delusions": 47456, "\u0120bis": 47457, "\u0120Haf": 47458, "\u0120gratification": 47459, "\u0120enticing": 47460, "UNCH": 47461, "Adams": 47462, "\u0120OLED": 47463, "\u0120Beetle": 47464, "\u01201899": 47465, "\u0120SOFTWARE": 47466, "ategor": 47467, "VL": 47468, "\u0120Totem": 47469, "\u0120Gators": 47470, "ATURES": 47471, "\u0120impedance": 47472, "Registered": 47473, "\u0120Cary": 47474, "\u0120Aerial": 47475, "onne": 47476, "enium": 47477, "\u0120dred": 47478, "\u0120Beg": 47479, "\u0120concurrently": 47480, "\u0120superpower": 47481, "\u0120Xan": 47482, "jew": 47483, "imester": 47484, "\u0120Dickinson": 47485, "\u00e2\u0136\u0123": 47486, "Fla": 47487, "\u0120pree": 47488, "\u0120Rollins": 47489, "\u00a9\u00b6\u00e6": 47490, "\u0120denomination": 47491, "\u0120Lana": 47492, "516": 47493, "\u0120inciting": 47494, "scribed": 47495, "juries": 47496, "\u0120Wonders": 47497, "approximately": 47498, "\u0120suspending": 47499, "\u0120mountainous": 47500, "\u0120Laugh": 47501, "oidal": 47502, "Ns": 47503, "Detect": 47504, ")=": 47505, "\u0120Luthor": 47506, "\u0120Schwarzenegger": 47507, "\u0120Muller": 47508, "\u0120Devi": 47509, "ecycle": 47510, "Jar": 47511, "613": 47512, "\u0120Longh": 47513, "Bah": 47514, "\u0120SPORTS": 47515, "nw": 47516, "\u0120refinement": 47517, "\u0120waterways": 47518, "\u0120diner": 47519, "Blade": 47520, "683": 47521, "Fac": 47522, "\u0120initials": 47523, "\u0120rog": 47524, "\u0120paranormal": 47525, "BUT": 47526, "\u0120[(": 47527, "\u0120Swanson": 47528, "\u0120Mesh": 47529, "\u00e2\u0138\u00ac": 47530, "Improve": 47531, "\u0120Radiation": 47532, "\u0120Esther": 47533, "\u0120Esk": 47534, "\u0120Aly": 47535, "iky": 47536, "\u0120irrad": 47537, "\u0120Buckingham": 47538, "\u0120refill": 47539, "\u0120._": 47540, "Repe": 47541, "CONCLUS": 47542, "\u0120differentiated": 47543, "\u0120chirop": 47544, "\u0120Atkins": 47545, "Pattern": 47546, "\u0120excise": 47547, "\u0120cabal": 47548, "NSA": 47549, "\u0120STA": 47550, "\u0120SIL": 47551, "\u0120Paraly": 47552, "\u0120rye": 47553, "\u0120Howell": 47554, "\u0120Countdown": 47555, "nesses": 47556, "alysed": 47557, "\u0120resize": 47558, "\u00e3\u0124\u00bd": 47559, "\u0120budgetary": 47560, "\u0120Stras": 47561, "wang": 47562, "\u0120apiece": 47563, "\u0120precincts": 47564, "\u0120peach": 47565, "\u0120skyline": 47566, "\u0120353": 47567, "popular": 47568, "Appearances": 47569, "\u0120Mechanics": 47570, "\u0120DevOnline": 47571, "Sullivan": 47572, "Zen": 47573, "\u0120pu": 47574, "opolis": 47575, "544": 47576, "\u0120deform": 47577, "\u0120counteract": 47578, "\u0120Lange": 47579, "\u0120417": 47580, "Console": 47581, "774": 47582, "\u0120nodding": 47583, "\u0120populism": 47584, "\u0120hep": 47585, "\u0120counselling": 47586, "compliance": 47587, "UFF": 47588, "\u0120undeniably": 47589, "\u0120railing": 47590, "\u0120Horowitz": 47591, "\u0120Simone": 47592, "\u0120Bungie": 47593, "\u0120ak": 47594, "\u0120Talks": 47595, "xff": 47596, "flake": 47597, "Crash": 47598, "\u0120sweaty": 47599, "\u0120banquet": 47600, "\u0120OFFIC": 47601, "\u0120inventive": 47602, "\u0120astronomer": 47603, "\u0120Stamford": 47604, "\u0120Scare": 47605, "\u0120GREEN": 47606, "olicited": 47607, "\u0120rusher": 47608, "\u0120centrist": 47609, "ighting": 47610, "\u0120subclass": 47611, "\u0120disav": 47612, "\u0120defund": 47613, "\u0120Nanto": 47614, "ociate": 47615, "mast": 47616, "\u0120pacif": 47617, "\u0120mend": 47618, "eers": 47619, "immigration": 47620, "ESSION": 47621, "\u0120numbering": 47622, "\u0120laughable": 47623, "\u0120Ended": 47624, "viation": 47625, "emark": 47626, "Pitt": 47627, "\u0120meticulous": 47628, "\u0120LF": 47629, "\u0120congratulated": 47630, "\u0120Birch": 47631, "\u0120swayed": 47632, "\u0120semifinals": 47633, "\u0120humankind": 47634, "matter": 47635, "\u0120Equip": 47636, "opausal": 47637, "Said": 47638, "\u0120Layout": 47639, "\u0120voicing": 47640, "\u0120thug": 47641, "\u0120pornographic": 47642, "IPS": 47643, "\u0120moaning": 47644, "\u0120grievance": 47645, "\u0120confessions": 47646, "escal": 47647, "TEXTURE": 47648, "Authent": 47649, "osaurus": 47650, "Purchase": 47651, "\u0120relegation": 47652, "alter": 47653, "\u0120\u00c2\u0142\u00c2\u0142": 47654, "\u0120riddled": 47655, "\u0120ogre": 47656, "\u0120Lowell": 47657, "Occup": 47658, "Eat": 47659, "\u0120Hyder": 47660, "\u0120Adviser": 47661, "Commerce": 47662, "Hunt": 47663, "\u0120Orth": 47664, "\u0120Competitive": 47665, "\u0120CLA": 47666, "CDC": 47667, "\u0120salads": 47668, "Fle": 47669, "\u0120industrialized": 47670, "`,": 47671, "\u0120OWN": 47672, "\u0120beck": 47673, "\u0120Particularly": 47674, "oubt": 47675, "\u0120mM": 47676, "\u0120Hussain": 47677, "\u0120Chennai": 47678, "\u0120920": 47679, "\u0120appointing": 47680, "\u0120Cullen": 47681, ",,,,,,,,": 47682, "\u0120pores": 47683, "verified": 47684, "\u0120biochemical": 47685, "emate": 47686, "\u0120cowardly": 47687, "\u0120Helsinki": 47688, "\u0120Ethiopian": 47689, "SOURCE": 47690, "ERC": 47691, "estro": 47692, "\u0120biotech": 47693, "\u0120Sour": 47694, "\u0120brewer": 47695, "Bloomberg": 47696, "\u0120intensify": 47697, "Glass": 47698, "anco": 47699, "\u0120FDR": 47700, "greSQL": 47701, "\u0120Fires": 47702, "\u00a9\u00b6\u00e6\u00a5\u00b5": 47703, "eco": 47704, "1001": 47705, "\u0120Homeless": 47706, "\u0120instantaneous": 47707, "\u0120Haste": 47708, "igel": 47709, "Diamond": 47710, "\u0120paving": 47711, "\u0120landfill": 47712, "\u0120dads": 47713, "houn": 47714, ":]": 47715, "\u0120incendiary": 47716, "\u0120Livingston": 47717, "\u0120Hilbert": 47718, "\u0120Checks": 47719, "styles": 47720, "inators": 47721, "\u0120Clive": 47722, "phrine": 47723, "\u0120chimpanzees": 47724, "\u0120pall": 47725, "\u0120JM": 47726, "\u0120Aadhaar": 47727, "\u00f0\u013f": 47728, "\u0120achievable": 47729, "disabled": 47730, "PET": 47731, "OOOOOOOO": 47732, "Mot": 47733, "\u0120intangible": 47734, "\u0120ballet": 47735, "\u0120Webs": 47736, "\u0120Estimated": 47737, "Effects": 47738, "\u0120bailed": 47739, "Joshua": 47740, "\u0120turbulence": 47741, "\u0120occupant": 47742, "\u0120Daylight": 47743, "\u0120361": 47744, "meet": 47745, "\u0120statically": 47746, "\u0120onlook": 47747, "\u0120ki": 47748, "illegal": 47749, "\u0120velvet": 47750, "\u0120dehydration": 47751, "\u0120acquies": 47752, "\u0120Rez": 47753, "akura": 47754, "\u0120Upton": 47755, "atro": 47756, "\u0120incomprehensible": 47757, "\u0120backdoor": 47758, "\u0120Rhino": 47759, "727": 47760, "\u0120maths": 47761, ")+": 47762, "\u0120heresy": 47763, "\u0120df": 47764, "\u0120Roche": 47765, "\u0120Lydia": 47766, "\u0120pancreat": 47767, "reply": 47768, "arrell": 47769, "\u0120solicitation": 47770, "\u0120circadian": 47771, "BIP": 47772, "\u0120foray": 47773, "\u0120cryptic": 47774, "izu": 47775, "imeo": 47776, "\u0120Tomato": 47777, "\u0120Homs": 47778, "examination": 47779, "\u0120quarry": 47780, "\u0120Valiant": 47781, "\u0120Jericho": 47782, "\u0120INCLUD": 47783, "\u01201840": 47784, "519": 47785, "\u0120resists": 47786, "\u0120snapshots": 47787, "\u0120Spur": 47788, "\u0120Antiqu": 47789, "Login": 47790, "\u0120bestselling": 47791, "\u0120antic": 47792, "\u0120Sutherland": 47793, "\u00e3\u0124\u00a2\u00e3\u0125\u00ab": 47794, "\u0120~/": 47795, "\u0120Parm": 47796, "\u00e8\u0125": 47797, "Pages": 47798, "intensity": 47799, "\u0120immobil": 47800, "\u01201865": 47801, "zzo": 47802, "\u0120nifty": 47803, "\u0120fentanyl": 47804, "\u0120Preservation": 47805, "ophen": 47806, "\u0120darts": 47807, "\u0120Dinosaur": 47808, "pointers": 47809, "\u0120Rite": 47810, "suggest": 47811, "awareness": 47812, "\u0120Sheridan": 47813, "\u0120stances": 47814, "\u0120sorcery": 47815, "\u0120perjury": 47816, "\u0120Nikola": 47817, "iever": 47818, "\u0120fiance": 47819, "\u0120Jordanian": 47820, "\u0120Balloon": 47821, "\u0120nab": 47822, "\u0120kb": 47823, "\u0120humanities": 47824, "\u0120Tanaka": 47825, "hillary": 47826, "\u0120consultancy": 47827, "\u0120Zub": 47828, "\u0120remission": 47829, "\u0120confid": 47830, "CHQ": 47831, "\u0120Fug": 47832, "\u0120improvis": 47833, "Yep": 47834, "/_": 47835, "\u0120unwillingness": 47836, "\u0120portfolios": 47837, "055": 47838, "\u0120Instructor": 47839, "aiman": 47840, "\u0120claimants": 47841, "Mbps": 47842, "\u0120Bye": 47843, "received": 47844, "Tweet": 47845, "\u0120indemn": 47846, "riz": 47847, "amara": 47848, "Nat": 47849, "\u0120evaluates": 47850, "\u0120Lur": 47851, "epad": 47852, "FOX": 47853, "\u0120Thro": 47854, "\u0120rusty": 47855, "\u0120bedrock": 47856, "\u0120Oprah": 47857, "JB": 47858, "\u0120manipulative": 47859, "\u0120willful": 47860, "\u0120relapse": 47861, "\u0120extant": 47862, "Theme": 47863, "Sensor": 47864, "\u0120Stability": 47865, "govern": 47866, "\u0120poppy": 47867, "\u0120knack": 47868, "\u0120insulated": 47869, "\u0120Tile": 47870, "\u0120Extrem": 47871, "\u0120untold": 47872, "\u0120converge": 47873, "\u0120refuel": 47874, "igroup": 47875, "\u0120distortions": 47876, "\u0120ravaged": 47877, "\u0120mechanically": 47878, "\u0120Reilly": 47879, "\u0120Nose": 47880, "\u0120Incarnation": 47881, "\u0120Becky": 47882, "abbling": 47883, "\u0120taco": 47884, "\u0120rake": 47885, "\u0120melancholy": 47886, "\u0120illustrious": 47887, "\u0120Dartmouth": 47888, "Guide": 47889, "\u0120Razer": 47890, "\u0120Benz": 47891, "Ultimate": 47892, "\u0120Surprise": 47893, "\u0120pageant": 47894, "offer": 47895, "Whoever": 47896, "\u0120wiser": 47897, "\u0120chemist": 47898, "\u0120HELL": 47899, "\u0120Bulk": 47900, "\u0120plutonium": 47901, "\u0120COVER": 47902, "\u00d6\u00bc": 47903, "failed": 47904, "\u0120tirelessly": 47905, "\u0120infertility": 47906, "\u0120Trident": 47907, "\u0120Showtime": 47908, "\u0120Civ": 47909, "Vice": 47910, "requires": 47911, "ittance": 47912, "\u0120uncontrolled": 47913, "interesting": 47914, "561": 47915, "\u0120innovate": 47916, "ategic": 47917, "Lie": 47918, "\u0120Selling": 47919, "Ul": 47920, "\u0120savior": 47921, "\u0120Tosh": 47922, "\u0120swast": 47923, "PASS": 47924, "\u0120rink": 47925, "\u0120cardio": 47926, "\u0120Iro": 47927, "udi": 47928, "\u0120vantage": 47929, "\u0120vans": 47930, "\u0120Ni\u00c3\u00b1o": 47931, "+=": 47932, "\u0120propagate": 47933, "<?": 47934, "\u0120methodological": 47935, "20439": 47936, "\u0120triglycer": 47937, "\u0120ingrained": 47938, "\u0120Annotations": 47939, "arranted": 47940, "617": 47941, "\u0120Sodium": 47942, "\u0120AAC": 47943, "technical": 47944, "multipl": 47945, "\u0120373": 47946, "\u00e5\u012d": 47947, "\u0120decisively": 47948, "\u0120boosters": 47949, "\u0120desserts": 47950, "\u0120Grenade": 47951, "\u0120testifying": 47952, "\u0120Scully": 47953, "IDs": 47954, "\u0120lockdown": 47955, "\u0120Scher": 47956, "\u0120R\u00c3\u00a9": 47957, "\u0120Whitman": 47958, "\u0120Ramsay": 47959, "remote": 47960, "\u0120hikers": 47961, "\u0120Hyundai": 47962, "\u0120conscientious": 47963, "\u0120clerics": 47964, "\u0120Siberian": 47965, "uti": 47966, "isbury": 47967, "\u0120relayed": 47968, "\u0120quartz": 47969, "\u0120CBI": 47970, "seekers": 47971, "ulla": 47972, "\u0120welding": 47973, "\u0120Shal": 47974, "bleacher": 47975, "Tai": 47976, "\u0120Samson": 47977, "\u0120tumble": 47978, "\u0120Investor": 47979, "\u0120subcontract": 47980, "\u0120Shinra": 47981, "owicz": 47982, "jandro": 47983, "dad": 47984, "\u0120terminating": 47985, "\u0120Neural": 47986, "\u00e4\u00bb\u00a3": 47987, "\u0120leakage": 47988, "\u0120Midlands": 47989, "\u0120Caucasus": 47990, "\u00ed\u0137": 47991, "cit": 47992, "llan": 47993, "ivably": 47994, "\u0120Albion": 47995, "\u0120457": 47996, "\u0120registrations": 47997, "\u0120comrade": 47998, "\u0120clipboard": 47999, "047": 48000, "\u0120discouraging": 48001, "\u0120Oops": 48002, "Adapt": 48003, "\u0120empath": 48004, "nv": 48005, "\u0120PROT": 48006, "\u0120Donn": 48007, "\u0120Pax": 48008, "\u0120Bayer": 48009, "tis": 48010, "Square": 48011, "\u0120footprints": 48012, "particip": 48013, "\u0120Chilean": 48014, "Brend": 48015, "inducing": 48016, "Magn": 48017, "\u0120clubhouse": 48018, "\u0120Magnum": 48019, "\u0120encamp": 48020, "\u0120Ethnic": 48021, "ucha": 48022, "erey": 48023, "\u0120watered": 48024, "\u0120Calais": 48025, "\u0120complexion": 48026, "\u0120sects": 48027, "\u0120renters": 48028, "\u0120bras": 48029, "o\u00c4\u0141an": 48030, "Timeout": 48031, "Management": 48032, "\u0120infographic": 48033, "Pokemon": 48034, "Clar": 48035, "\u0120locality": 48036, "\u0120flora": 48037, "asel": 48038, "Pont": 48039, "\u0120populate": 48040, "\u0120Ong": 48041, "\u0120subsistence": 48042, "\u0120auctions": 48043, "\u0120McAuliffe": 48044, "\u0120LOOK": 48045, "bringer": 48046, "\u0120titan": 48047, "\u0120manifold": 48048, "\u0120\u00e2\u0139\u0131": 48049, "\u0120calibrated": 48050, "\u0120caliphate": 48051, "\u0120SHE": 48052, "\u0120Commissioners": 48053, "ceivable": 48054, "jc": 48055, "Winner": 48056, "524": 48057, "\u0120condone": 48058, "Otherwise": 48059, "\u0120piling": 48060, "\u0120embody": 48061, "\u0120Crimean": 48062, "utics": 48063, "\u0120Exhibition": 48064, "\u0120426": 48065, "eering": 48066, "\u0120vying": 48067, "\u0120HUGE": 48068, "*=-": 48069, "\u0120principled": 48070, "\u00e0\u00a6": 48071, "\u0120quirks": 48072, "\u0120Editors": 48073, "puting": 48074, "GES": 48075, "\u0120FTA": 48076, "\u00e0\u00a4\u00be": 48077, "addon": 48078, "\u0120HAM": 48079, "\u0120Frieza": 48080, "Woman": 48081, ".$": 48082, "\u0120crib": 48083, "\u0120Herod": 48084, "\u0120timers": 48085, "\u0120Spaces": 48086, "\u0120Macintosh": 48087, "ataka": 48088, "\u0120glide": 48089, "\u0120smelling": 48090, "\u0120BAL": 48091, "\u0120unsu": 48092, "\u0120condos": 48093, "\u0120bicycl": 48094, "\u0120Revival": 48095, "553": 48096, "\u0120juggling": 48097, "Hug": 48098, "\u0120Kardashian": 48099, "\u0120Balkans": 48100, "multiple": 48101, "\u0120nutritious": 48102, "ocry": 48103, "1900": 48104, "\u0120integrates": 48105, "\u0120adjoining": 48106, "\u0120Folder": 48107, "rollment": 48108, "venient": 48109, "\u0120uber": 48110, "yi": 48111, "\u0120whiff": 48112, "\u0120Juven": 48113, "\u0120Borough": 48114, "nette": 48115, "\u0120bilingual": 48116, "\u0120Sparks": 48117, "phthal": 48118, "manufact": 48119, "\u0120touting": 48120, "\u0120PHI": 48121, "Keefe": 48122, "Reward": 48123, "\u0120infall": 48124, "\u0120Temper": 48125, "typically": 48126, "\u0120Nikol": 48127, "\u0120regulars": 48128, "\u0120pseudonym": 48129, "\u0120exhibitions": 48130, "\u0120blaster": 48131, "\u0120409": 48132, "warming": 48133, "\u0120reverber": 48134, "\u0120reciprocal": 48135, "\u0120670": 48136, "ipient": 48137, "bett": 48138, "\u0120Begins": 48139, "\u0120itching": 48140, "\u0120Phar": 48141, "Assuming": 48142, "\u0120emitting": 48143, "\u0120MLG": 48144, "\u0120birthplace": 48145, "\u0120taunt": 48146, "\u0120Luffy": 48147, "\u0120Amit": 48148, "\u0120circled": 48149, "\u0120Nost": 48150, "ennett": 48151, "\u0120deforestation": 48152, "\u0120Historically": 48153, "\u0120Everyday": 48154, "\u0120overtake": 48155, "792": 48156, "\u0120nun": 48157, "\u0120Lucia": 48158, "\u0120accompanies": 48159, "\u0120Seeking": 48160, "\u0120Trash": 48161, "anism": 48162, "Rogue": 48163, "\u0120northwestern": 48164, "\u0120Supplemental": 48165, "\u0120NYU": 48166, "\u0120FRI": 48167, "\u0120Satisf": 48168, "xes": 48169, "517": 48170, "\u0120reassured": 48171, "\u0120sporadic": 48172, "\u0120701": 48173, "\u0120medial": 48174, "\u0120cannabinoid": 48175, "\u0120barbaric": 48176, "\u0120epis": 48177, "\u0120Explosive": 48178, "\u0120Dough": 48179, "\u0120unsolved": 48180, "Supported": 48181, "\u0120acknowledgment": 48182, "spawn": 48183, "\u0120kitchens": 48184, "\u0120-=": 48185, "talking": 48186, "icist": 48187, "\u0120Pegasus": 48188, "\u0120PSU": 48189, "\u0120photon": 48190, "\u0120Authentication": 48191, "RG": 48192, "@#&": 48193, "762": 48194, "\u0120Clair": 48195, "\u0120diaper": 48196, "\u0120brist": 48197, "\u0120Prosecutors": 48198, "\u0120Jem": 48199, "628": 48200, "\u0120Everywhere": 48201, "\u0120Jeanne": 48202, "equality": 48203, "\u00e3\u0125\u00a9\u00e3\u0125\u00b3": 48204, "objects": 48205, "\u0120Pelicans": 48206, "\u0120392": 48207, "\u0120blu": 48208, "bys": 48209, "\u0120Ago": 48210, "\u0120instructional": 48211, "\u0120discriminating": 48212, "\u0120TRAN": 48213, "\u0120Cornel": 48214, "agos": 48215, "\u0120tyre": 48216, "\u0120aspiration": 48217, "\u0120Bridgewater": 48218, "\":-": 48219, "!\".": 48220, "\u0120Ens": 48221, "\u0120Coco": 48222, "Pie": 48223, "\u0120detach": 48224, "\u0120Couch": 48225, "\u0120physique": 48226, "\u0120Occupations": 48227, "oscopic": 48228, "enough": 48229, "Buzz": 48230, "Appearance": 48231, "YP": 48232, "\u0120racer": 48233, "\u0120complicity": 48234, "rpm": 48235, "Toy": 48236, "\u0120interrupts": 48237, "\u0120Catalyst": 48238, "\u0120utilitarian": 48239, "impact": 48240, "\u0120spaghetti": 48241, "\u0120porous": 48242, "\u0120esteemed": 48243, "\u0120inciner": 48244, "\u0120IOC": 48245, "748": 48246, "\u0120espresso": 48247, "\u0120Smile": 48248, "abilia": 48249, "635": 48250, "\u0120mathematician": 48251, "\u0120424": 48252, "\u0120KL": 48253, "\u0120HIP": 48254, "\u0120overheard": 48255, "\u0120Tud": 48256, "\u0120Tec": 48257, "\u0120quizz": 48258, "\u0120flattering": 48259, "\u0120conn": 48260, "\u00e2\u0122\u0130": 48261, "\u0120attaches": 48262, "\u0120ROS": 48263, "\u0120ACS": 48264, "\u0120tcp": 48265, "\u0120Shame": 48266, "skip": 48267, "respected": 48268, "\u0120Trinidad": 48269, "grain": 48270, "\u0120foothold": 48271, "\u0120Uncharted": 48272, "\u0120Julio": 48273, "zl": 48274, "avored": 48275, "\u0120Anxiety": 48276, "errors": 48277, "\u0120Centauri": 48278, "itsch": 48279, "Daddy": 48280, "\u0120clutching": 48281, "\u0120Implement": 48282, "\u0120Gutierrez": 48283, "\u0120760": 48284, "\u0120teleportation": 48285, "endra": 48286, "\u0120reversible": 48287, "stros": 48288, "Adventure": 48289, "083": 48290, "\u0120liberating": 48291, "\u0120asphalt": 48292, "\u0120Spend": 48293, "ARDS": 48294, "imsy": 48295, "PRES": 48296, "\u0120Emerging": 48297, "\u0120wildfires": 48298, "\u0120technologically": 48299, "\u0120emits": 48300, "\u0120ARTICLE": 48301, "\u0120irregularities": 48302, "\u0120cherish": 48303, "\u00e7\u012b\u012a": 48304, "\u0120stink": 48305, "\u0120Rost": 48306, "Economic": 48307, "\u0120coughing": 48308, "\u0120McCann": 48309, "properties": 48310, "ilantro": 48311, "\u0120renegoti": 48312, "Translation": 48313, "\u0120inquest": 48314, "\u0120Grape": 48315, "ooters": 48316, "gui": 48317, "\u0120Swordsman": 48318, "aceae": 48319, "hitting": 48320, "\u0120rc": 48321, "\u0120exerted": 48322, "\u0120SAP": 48323, "itent": 48324, "\u0120perilous": 48325, "\u0120obscurity": 48326, "\u0120assassinate": 48327, "\u0120aboriginal": 48328, "\u0120rescuing": 48329, "\u0120Shattered": 48330, "locking": 48331, "allion": 48332, "Changing": 48333, "\u0120Harrington": 48334, "\u0120Bord": 48335, "\u0120Afghans": 48336, "Jamie": 48337, "aretz": 48338, "\u0120Augustus": 48339, "\u0120386": 48340, "830": 48341, "\u0120jog": 48342, "okingly": 48343, "Trigger": 48344, "\u0120HOR": 48345, "Statistics": 48346, "\u0120viewership": 48347, "\u0120additives": 48348, "hur": 48349, "\u0120maximizing": 48350, "\u0120Rove": 48351, "\u0120Louie": 48352, "\u0120Bucket": 48353, "\u0120CHRIST": 48354, "ousel": 48355, "\u0120streaks": 48356, "irted": 48357, "\u0120tert": 48358, "\u0120colonialism": 48359, "\u0120burying": 48360, "yk": 48361, "Condition": 48362, "\u0120DPRK": 48363, "ById": 48364, "751": 48365, "\u00e2\u0139\u00bc": 48366, "\u0120worrisome": 48367, "\u0120vocational": 48368, "slice": 48369, "\u0120sails": 48370, "\u0120Correctional": 48371, "954": 48372, "\u0120tul": 48373, "Kid": 48374, "luster": 48375, "\u0120familial": 48376, "\u0120Spit": 48377, "\u0120Episcopal": 48378, "Specifically": 48379, "\u0120Volcano": 48380, "runs": 48381, "qs": 48382, "\u0120vetted": 48383, "\u0120crammed": 48384, "trop": 48385, "herer": 48386, "Thankfully": 48387, "\u0120percussion": 48388, "\u0120oranges": 48389, "\u0120roundup": 48390, "\u0120499": 48391, "xious": 48392, "Characters": 48393, "\u0120Zionism": 48394, "\u0120Rao": 48395, "\u00c3\u013d\u00c3\u013d": 48396, "WF": 48397, "\u0120unintentional": 48398, "ONEY": 48399, "Grab": 48400, "Commercial": 48401, "\u0120glutamate": 48402, "\u0120McKenna": 48403, "ruciating": 48404, "nington": 48405, "ihu": 48406, "Chan": 48407, "\u0120Swap": 48408, "\u0120leaflets": 48409, "\u0120functionally": 48410, "erous": 48411, "Farm": 48412, "\u0120caloric": 48413, "\u0120Literally": 48414, "concert": 48415, "\u0120shenan": 48416, "\u0120repaid": 48417, "eyes": 48418, "\u0120bashing": 48419, "\u0120Gorge": 48420, "\u0120collaborations": 48421, "\u0120unaccount": 48422, "itchie": 48423, "\u0120teamwork": 48424, "ppelin": 48425, "\u0120piping": 48426, "\u0120minced": 48427, "\u0120diam": 48428, "rieg": 48429, "\u0120mascara": 48430, "\u0120sucker": 48431, "\u0120Moons": 48432, "Apps": 48433, "\u0120Peck": 48434, "\u0120perv": 48435, "\u0120Float": 48436, "oley": 48437, "\u0120Nish": 48438, "imize": 48439, "\u0120aromatic": 48440, "uin": 48441, "endish": 48442, "!/": 48443, "\u0120Bicycle": 48444, "\u0120ASIC": 48445, "ileged": 48446, "\u0120Quadro": 48447, "iosyn": 48448, "\u0120lockout": 48449, "\u0120Wink": 48450, "SPEC": 48451, "Attempts": 48452, "\u0120seeded": 48453, "redo": 48454, "iasis": 48455, "\u0120snag": 48456, "\u00e3\u0125\u0137\u00e3\u0124\u00a9": 48457, "\u00e3\u0124\u00b6": 48458, "\u0120grounding": 48459, "\u0120reliever": 48460, "\u0120frivolous": 48461, "\u0120Gifts": 48462, "\u0120Faces": 48463, "Especially": 48464, "\u0120microbiome": 48465, "imag": 48466, "\u0120Schl": 48467, "\u0120Ples": 48468, "\u0120Bleach": 48469, "\u0120Irwin": 48470, "\u0120Eaton": 48471, "\u0120Disciple": 48472, "\u0120multiplication": 48473, "\u0120coerced": 48474, "\u0120419": 48475, "sth": 48476, "Evil": 48477, "Bomb": 48478, "\u0120exorc": 48479, "\u0120staggered": 48480, "LESS": 48481, "\u0120inertia": 48482, "\u0120EDIT": 48483, "\u0120gob": 48484, "Traditional": 48485, "\u0120classy": 48486, "Leary": 48487, "\u0120PAGE": 48488, "yrs": 48489, "\u0120transporter": 48490, "\u0120matured": 48491, "\u0120hijab": 48492, "\u0120biome": 48493, "Whereas": 48494, "\u0120extermination": 48495, "\u0120Tues": 48496, "\u0120Takeru": 48497, "\u0120Audrey": 48498, "erial": 48499, "\u0120Aden": 48500, "affles": 48501, "\u0120narcissistic": 48502, "\u0120Baird": 48503, "UTF": 48504, "Ire": 48505, "\u0120Connie": 48506, "Champ": 48507, "\u0120whispering": 48508, "\u0120Hatt": 48509, "DK": 48510, "\u0120disinfect": 48511, "\u0120deducted": 48512, "\u0120partake": 48513, "\u0120downgrade": 48514, "\u0120Esports": 48515, "\u0120Continuing": 48516, "\u0120democratically": 48517, "icrobial": 48518, "itta": 48519, "\u0120limestone": 48520, "\u0120exempted": 48521, "\u0120Frenzy": 48522, "Herm": 48523, "728": 48524, "\u0120fledgling": 48525, "Meta": 48526, "76561": 48527, "693": 48528, "%:": 48529, "wake": 48530, "526": 48531, "\u0120Discipline": 48532, "\u0120virginity": 48533, "\u0120Legions": 48534, "\u0120Frankie": 48535, "intent": 48536, "\u0120restrooms": 48537, "\u0120Router": 48538, "daq": 48539, "\u0120objectionable": 48540, "\u00e2\u0128\u0133": 48541, "wark": 48542, "\u0120Rahul": 48543, "gain": 48544, "activation": 48545, "absolute": 48546, "\u0120Accessed": 48547, "\u01202400": 48548, "oggles": 48549, "\u0120secondly": 48550, "\u0120DEFENSE": 48551, "\u0120postage": 48552, "wrapper": 48553, "sharp": 48554, "729": 48555, "\u0120communicates": 48556, "\u0120addon": 48557, "\u0120Militia": 48558, "Hong": 48559, "\u0120slumped": 48560, "\u0120JPEG": 48561, "\u0120Icar": 48562, "adish": 48563, "681": 48564, "\u0120majesty": 48565, "\u0120Wolfgang": 48566, "\u0120Elastic": 48567, "uper": 48568, "\u0120viz": 48569, "\u0120unconsciously": 48570, "\u0120STD": 48571, "\u0120Sass": 48572, "\u0120flowering": 48573, "\u0120Helic": 48574, "\u0120Draper": 48575, "\u0120Amateur": 48576, "\u0120manure": 48577, "\u0120disingen": 48578, "\u0120Lei": 48579, "bring": 48580, "949": 48581, "\u0120inhibited": 48582, "\u0120headquartered": 48583, "\u0120enigmatic": 48584, "\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd\u00ef\u00bf\u00bd": 48585, "\u0120redress": 48586, "RH": 48587, "\u0120rattled": 48588, "\u0120diction": 48589, "lio": 48590, "\u0120TBA": 48591, "\u0120SNAP": 48592, "Calling": 48593, "\u0120fascists": 48594, "\u0120Dove": 48595, "iewicz": 48596, "036": 48597, "\u0120coasts": 48598, "\u0120Rect": 48599, "\u0120)]": 48600, "Lot": 48601, "629": 48602, "\u0120SEM": 48603, "\u0120Petersen": 48604, "\u0120Explain": 48605, "\u0120Boards": 48606, "\u0120Bezos": 48607, "\u0120Journals": 48608, "\u01202024": 48609, "parser": 48610, "\u0120mistrust": 48611, "\u0120grate": 48612, "\u0120Locked": 48613, "boa": 48614, "Saint": 48615, "gaming": 48616, "\u0120vowel": 48617, "inately": 48618, "blow": 48619, "Allah": 48620, "\u0120unmatched": 48621, "\u0120bordering": 48622, "\u0120Expend": 48623, "nr": 48624, "Oracle": 48625, "rouch": 48626, "\u0120contiguous": 48627, "acus": 48628, "\u0120distraught": 48629, "581": 48630, "\u0120anatomical": 48631, "OX": 48632, "apixel": 48633, "833": 48634, "\u0120PLUS": 48635, "\u0120resusc": 48636, "\u0120abiding": 48637, "573": 48638, "\u0120vacancies": 48639, "Emily": 48640, "\u0120hypothal": 48641, "\u0120Werner": 48642, "\u0120Wee": 48643, "\u0120DJs": 48644, "513": 48645, "\u0120witchcraft": 48646, "\u0120acupuncture": 48647, "entary": 48648, "benefit": 48649, "Products": 48650, "\u0120PSP": 48651, "\u0120MPG": 48652, "\u0120Jinn": 48653, "\u0120Jarrett": 48654, "\u0120445": 48655, "\u0120Imaging": 48656, "\u0120Pyth": 48657, "Finish": 48658, "\u0120tex": 48659, "\u0120juveniles": 48660, "\u0120heroism": 48661, "\u0120doubtless": 48662, "\u0120Aki": 48663, "\u0120Tend": 48664, "\u0120Patriarch": 48665, "\u0120bitters": 48666, "\u0120Telecommunications": 48667, "itatively": 48668, "agna": 48669, "\u0120rg": 48670, "\u0120SOLD": 48671, "\u0120compulsion": 48672, "\u0120Nasa": 48673, "\u0120Kathryn": 48674, "\u0120millionaires": 48675, "\u0120intrinsically": 48676, "\u0120bolstered": 48677, "timeout": 48678, "flo": 48679, "\u0120tutor": 48680, "pour": 48681, "Statement": 48682, "\u0120{*": 48683, "\u0120Rudolph": 48684, "\u0120Kimberly": 48685, "rogens": 48686, "adiq": 48687, "]+": 48688, "\u0120indignation": 48689, "\u0120fracturing": 48690, "\u0120Releases": 48691, "\u0120Grain": 48692, "protein": 48693, "Lago": 48694, "\u0120vacations": 48695, "\u0120booted": 48696, "\u0120THREE": 48697, "\u0120HG": 48698, "orescence": 48699, "\u0120tf": 48700, "\u0120soar": 48701, "iosyncr": 48702, "\u0120glances": 48703, "\u0120Spoon": 48704, "\u0120Jury": 48705, "\u0120Cowboy": 48706, "\u0120creatively": 48707, "Higher": 48708, "\u0120solicitor": 48709, "\u0120hawk": 48710, "acio": 48711, "896": 48712, "\u0120superflu": 48713, "\u0120bombshell": 48714, "cture": 48715, "\u0120brokerage": 48716, "\u0120raiding": 48717, "\u0120french": 48718, "\u0120angled": 48719, "Transaction": 48720, "\u0120Genocide": 48721, "upe": 48722, "\u0120Haitian": 48723, "572": 48724, "!:": 48725, "\u0120unwittingly": 48726, "iterator": 48727, "scroll": 48728, "\u0120tallied": 48729, "\u0120biomedical": 48730, "\u0120CARD": 48731, "\u0120euphem": 48732, "\u0120brainstorm": 48733, "aquin": 48734, "Ko": 48735, "Michelle": 48736, "\u0120Runes": 48737, "\u0120Ballistic": 48738, "uders": 48739, "\u0120modesty": 48740, "\u0120iPads": 48741, "\u0120Ezekiel": 48742, "YE": 48743, "\u0120starship": 48744, "\u0120powerfully": 48745, "\u0120perl": 48746, "\u0120Shade": 48747, "\u0120Quart": 48748, "\u0120EEG": 48749, "\u0120fisherman": 48750, "OSED": 48751, "\u0120Typical": 48752, "dfx": 48753, "\u0120meshes": 48754, "\u0120etched": 48755, "worthiness": 48756, "\u0120toppled": 48757, "\u0120396": 48758, "orius": 48759, "Weiss": 48760, "\u0120mysql": 48761, "\u0120Valhalla": 48762, "\u00d9\u0134": 48763, "leasing": 48764, "\u0120recomp": 48765, "rapnel": 48766, "Sel": 48767, "043": 48768, "\u0120derailed": 48769, "\u0120Guides": 48770, "IRT": 48771, "\u0120dehuman": 48772, "\u0120Brittany": 48773, "\"))": 48774, "\u0120exclaim": 48775, "\u0120balk": 48776, "\u0120840": 48777, "CLAIM": 48778, "intel": 48779, "LAB": 48780, "\u0120pegged": 48781, "\u0120astroph": 48782, "smoking": 48783, "\u0120rigging": 48784, "\u0120fixation": 48785, "\u0120catapult": 48786, "inside": 48787, "\u0120Cascade": 48788, "\u0120Bolshevik": 48789, "Gaza": 48790, "Depth": 48791, "\u0120loudspe": 48792, "\u0120almonds": 48793, "meyer": 48794, "leness": 48795, "jen": 48796, "fresh": 48797, "\u0120unbeaten": 48798, "\u0120Squid": 48799, "\u0120Presumably": 48800, "Timer": 48801, "BW": 48802, "\u0120rosters": 48803, "\u0120ellipt": 48804, "\u0120Harriet": 48805, "database": 48806, "\u0120Mutual": 48807, "\u0120Commodore": 48808, "uked": 48809, "knife": 48810, "\u0120COMMUN": 48811, "hya": 48812, "\u0120melts": 48813, "archives": 48814, "\u0120ratification": 48815, "\u0120multiplying": 48816, "\u0120interoper": 48817, "\u0120ascert": 48818, "wings": 48819, "verting": 48820, "\u0120Scorpion": 48821, "aye": 48822, "\u0120Portsmouth": 48823, "\u0120MTA": 48824, "nit": 48825, "iazep": 48826, "\u0120quarantine": 48827, "\u0120slideshow": 48828, "\u0120centimeters": 48829, "\u0120synopsis": 48830, "\u0120spate": 48831, "thirst": 48832, "\u0120nominating": 48833, "\u0120Melvin": 48834, "Preview": 48835, "\u0120throb": 48836, "\u0120generational": 48837, "\u0120Radius": 48838, "restling": 48839, "putable": 48840, "awar": 48841, "NECT": 48842, "\u0120unlawfully": 48843, "\u0120Revelations": 48844, "Wikipedia": 48845, "surv": 48846, "\u0120eyeing": 48847, "ijn": 48848, "\u0120FW": 48849, "\u0120brunt": 48850, "\u0120interstellar": 48851, "\u0120clitor": 48852, "\u0120Croatian": 48853, "\u0120Chic": 48854, "eva": 48855, "\u0120Disapp": 48856, "\u0120Akin": 48857, "ineries": 48858, "dust": 48859, "Interested": 48860, "\u0120genesis": 48861, "\u0120Eucl": 48862, "\u00c3\u00b6n": 48863, "picking": 48864, "\u0120mutated": 48865, "\u0120disapprove": 48866, "\u0120HDL": 48867, "\u0120625": 48868, "\u00cc\u00b6": 48869, "cancer": 48870, "\u0120squats": 48871, "\u0120levers": 48872, "Discuss": 48873, "=]": 48874, "Dex": 48875, "\u0120VIDEOS": 48876, "AUD": 48877, "\u0120transact": 48878, "\u0120Kinect": 48879, "\u0120Kuala": 48880, "\u0120Cyp": 48881, "747": 48882, "\u0120shattering": 48883, "\u0120arsenic": 48884, "\u0120Intake": 48885, "\u0120Angelo": 48886, "\u0120Quit": 48887, "\u0120Khe": 48888, "\u01201893": 48889, "Maker": 48890, "029": 48891, "\u0120Painting": 48892, "Disable": 48893, "916": 48894, "\u0120analges": 48895, "\u0120tactile": 48896, "\u0120prophes": 48897, "\u0120diced": 48898, "\u0120Travels": 48899, "\u0120Header": 48900, "\u0120Clubs": 48901, "Assistant": 48902, "\u0120incrim": 48903, "\u0120dips": 48904, "\u0120crucifix": 48905, "\u0120Shanahan": 48906, "\u0120Interpret": 48907, "\u01204090": 48908, "alogy": 48909, "abba": 48910, "\u0120simulac": 48911, "husband": 48912, "SIM": 48913, "\u0120recycle": 48914, "ucer": 48915, "edged": 48916, "\u0120renaissance": 48917, "\u0120Bombay": 48918, "Catholic": 48919, "\u0120LINE": 48920, "\u0120Clothing": 48921, "reports": 48922, "\u0120plaus": 48923, "\u0120dag": 48924, "\u0120Mace": 48925, "ZI": 48926, "\u0120intruder": 48927, "\u0120Veterinary": 48928, "gru": 48929, "\u0120sneaky": 48930, "\u0120Sie": 48931, "\u0120Cinnamon": 48932, "POSE": 48933, "\u0120courier": 48934, "\u0120CNS": 48935, "\u0120emancipation": 48936, "sit": 48937, "\u0120playthrough": 48938, "\u0120Facilities": 48939, "virt": 48940, "\u0120Gauntlet": 48941, "Thompson": 48942, "\u0120unbelievably": 48943, "Parameters": 48944, "\u0120stitching": 48945, "igne": 48946, "\u0120THESE": 48947, "Privacy": 48948, "\u0120shenanigans": 48949, "\u0120vitri": 48950, "\u0120Valid": 48951, "591": 48952, "\u0143\u00b7": 48953, "\u0120Prototype": 48954, "inka": 48955, "SCP": 48956, "\u0120Tid": 48957, "\u00e8\u012a": 48958, "olded": 48959, "\u0120individuality": 48960, "\u0120barking": 48961, "\u0120mars": 48962, "\u0120WD": 48963, "\u0120820": 48964, "\u0120tir": 48965, "\u0120slapping": 48966, "\u0120disgruntled": 48967, "\u0120Angola": 48968, "rius": 48969, "\u0120Tornado": 48970, "\u0120Thurs": 48971, "\u0120captcha": 48972, "\u0120angst": 48973, "\u0120Pog": 48974, "\u0120Assassins": 48975, "\u0120Adidas": 48976, "\u0120joyful": 48977, "\u0120whining": 48978, "Emergency": 48979, "\u0120phosphorus": 48980, "\u0120attrition": 48981, "ophon": 48982, "\u0120Timberwolves": 48983, "\u0120Jah": 48984, "\u0120Bringing": 48985, "\u0120Wad": 48986, "\u0120Ensure": 48987, "ohl": 48988, "\u0120Xie": 48989, "ommel": 48990, "cmp": 48991, "\u0120zipper": 48992, "\u0120relat": 48993, "\u0120Corridor": 48994, "milo": 48995, "TING": 48996, "Avg": 48997, "\u0120cropped": 48998, "]}": 48999, "\u0120raged": 49000, "\u0120Lumpur": 49001, "\u0120Guerrero": 49002, "ourke": 49003, "Nut": 49004, "\u0120offsets": 49005, "oglu": 49006, "drm": 49007, "\u0120mortals": 49008, "latable": 49009, "\u0120dismissive": 49010, "\u00e4\u00b8\u012b": 49011, "\u0120throats": 49012, "\u0120chipset": 49013, "\u0120Spotlight": 49014, "Catalog": 49015, "artist": 49016, "Gb": 49017, "\u0120chilly": 49018, "\u0120stoked": 49019, "\u0120374": 49020, "Ward": 49021, "Latin": 49022, "\u0120fiasco": 49023, "\u0120bleach": 49024, "\u0120brav": 49025, "Enhanced": 49026, "\u0120inoc": 49027, "\u0120Fiorina": 49028, "_>": 49029, "\u0120leukemia": 49030, "\u0120eluc": 49031, "\u0120announcer": 49032, "\u0120Lithuan": 49033, "\u0120Armageddon": 49034, "\u00e5\u0129": 49035, "Lenin": 49036, "\u0120Ruk": 49037, "\u0120pepp": 49038, "\u0120Romantic": 49039, "\u0120PIT": 49040, "\u0120Interstellar": 49041, "\u0120Atkinson": 49042, "Raid": 49043, "Js": 49044, "Goal": 49045, "Course": 49046, "\u0120vanishing": 49047, "esley": 49048, "\u0120Rounds": 49049, "Elsa": 49050, "593": 49051, "\u0120redundancy": 49052, "\u0120STAND": 49053, "\u0120prophetic": 49054, "\u0120habitable": 49055, "ryu": 49056, "\u0120faintly": 49057, "MODE": 49058, "\u0120flanked": 49059, "IRC": 49060, "Awesome": 49061, "\u0120spurious": 49062, "\u0120Zah": 49063, "\u0120MSG": 49064, "\u0120shading": 49065, "\u0120motivational": 49066, "\u0120Santana": 49067, "\u0120SPR": 49068, "\u0120excruciating": 49069, "omial": 49070, "\u0120Miko": 49071, "\u0120Leopard": 49072, "Abyss": 49073, "\u0120[|": 49074, "dirty": 49075, "\u0120baths": 49076, "\u0120demoral": 49077, "andre": 49078, "PB": 49079, "\u0120unification": 49080, "\u0120sacrament": 49081, "\u0120[&": 49082, "\u0120priceless": 49083, "\u0120gelatin": 49084, "\u0120emanating": 49085, "\u0120Allaah": 49086, "986": 49087, "\u0120outburst": 49088, "\u0120eras": 49089, "\u0120XVI": 49090, "\u0120SPI": 49091, "Ott": 49092, "\u0120Lazarus": 49093, "PLIED": 49094, "Flying": 49095, "blogs": 49096, "Wisconsin": 49097, "Raven": 49098, "\u0120rebate": 49099, "\u0120creeps": 49100, "\u0120Span": 49101, "\u0120Painter": 49102, "\u0120Kira": 49103, "\u0120Amos": 49104, "\u0120Corvette": 49105, "Consumer": 49106, "\u0120Recover": 49107, "cki": 49108, "\u0120pesky": 49109, "\u0120Invention": 49110, "Companies": 49111, "\u0120challengers": 49112, "ademic": 49113, "\u0120Ukrainians": 49114, "\u0120Neurolog": 49115, "\u0120Forsaken": 49116, "\u0120entrants": 49117, "\u0120embattled": 49118, "\u0120defunct": 49119, "\u0120Glacier": 49120, "\u0120poisons": 49121, "\u0120Horses": 49122, "makes": 49123, "\u0120Dirt": 49124, "\u0120423": 49125, "hhh": 49126, "\u0120Transformation": 49127, "QUIRE": 49128, "..................": 49129, "\u0120traveller": 49130, "\u0120Sexy": 49131, "\u0120Kern": 49132, "ipolar": 49133, "\u0120ransomware": 49134, "oooooooooooooooo": 49135, "Ec": 49136, "ruby": 49137, "Professional": 49138, "\u0120Outbreak": 49139, "argument": 49140, "Grey": 49141, "\u0120Fifa": 49142, "\u0120CHO": 49143, "\u0120FORM": 49144, "\u0120Amtrak": 49145, "-[": 49146, "\u0120cradle": 49147, "\u0120antioxidants": 49148, "\u00e3\u0123\u00ae\u00e5\u00ae": 49149, "736": 49150, "\u0120NASL": 49151, "\u0120Contributions": 49152, "Indiana": 49153, "\u0120STEP": 49154, "CSS": 49155, "\u0120salient": 49156, "\u0120allocations": 49157, "yrights": 49158, "\u0120mashed": 49159, "\u0120Cutter": 49160, "Sexual": 49161, "\u0120pounded": 49162, "\u0120fanbase": 49163, "\u0120casc": 49164, "\u0120Transparency": 49165, "\u0120analytic": 49166, "\u0120Summoner": 49167, "\u00d7\u0140": 49168, "\u0120ADC": 49169, "detail": 49170, "\u0120vanquished": 49171, "\u0120crabs": 49172, "arie": 49173, "Destroy": 49174, "\u0120Sack": 49175, "\u0120transistor": 49176, "Alabama": 49177, "\u0120Koen": 49178, "\u0120Fisheries": 49179, "cone": 49180, "\u0120annexed": 49181, "\u0120MGM": 49182, "esa": 49183, "\u0120faked": 49184, "\u0120Congratulations": 49185, "\u0120hindered": 49186, "\u0120correctional": 49187, "\u0120ITV": 49188, "leeve": 49189, "\u0120inappropriately": 49190, "licks": 49191, "\u0120trespass": 49192, "\u0120paws": 49193, "\u0120negotiator": 49194, "\u0120Christensen": 49195, "limits": 49196, "\u0120Dianne": 49197, "\u0120elegance": 49198, "\u0120Contracts": 49199, "anke": 49200, "Obj": 49201, "\u0120vigilance": 49202, "\u0120castles": 49203, "\u0120NAD": 49204, "\u0120Holo": 49205, "\u0120emphatically": 49206, "\u0120Titus": 49207, "\u0120Serving": 49208, "\u0120Richie": 49209, "\u0120Pigs": 49210, "568": 49211, "\u0120animosity": 49212, "\u0120Attributes": 49213, "\u0120Uriel": 49214, "MQ": 49215, "myra": 49216, "\u0120Applicant": 49217, "\u0120psychiatrists": 49218, "\u0120Vij": 49219, "\u0120Abby": 49220, "agree": 49221, "Push": 49222, "\u0120kWh": 49223, "hiba": 49224, "\u0120incite": 49225, "\u0120Weasley": 49226, "\u0120Taxi": 49227, "ministic": 49228, "hyper": 49229, "\u0120Farn": 49230, "\u0120601": 49231, "\u0120Nationwide": 49232, "Fake": 49233, "952": 49234, "\u0120maize": 49235, "\u0120interacted": 49236, "\u0120transitioned": 49237, "\u0120parasitic": 49238, "\u0120harmonic": 49239, "\u0120decaying": 49240, "\u0120baseless": 49241, "nsics": 49242, "\u0120transpired": 49243, "\u0120abundantly": 49244, "\u0120Forensic": 49245, "\u0120treadmill": 49246, "\u0120Jav": 49247, "aband": 49248, "\u0120sshd": 49249, "\u0120frontman": 49250, "\u0120Jakarta": 49251, "oller": 49252, "drops": 49253, "\u0120SERVICES": 49254, "romptu": 49255, "ophical": 49256, "hospital": 49257, "bledon": 49258, "645": 49259, "\u0120midrange": 49260, "\u0120EVENT": 49261, "culated": 49262, "rawled": 49263, "\u0120perched": 49264, "\u0120overboard": 49265, "\u0120Peel": 49266, "\u0120Pwr": 49267, "\u0120Carth": 49268, "\u0120COMPLE": 49269, "coe": 49270, "shall": 49271, "\u0120deterrence": 49272, "METHOD": 49273, "\u0120Absent": 49274, "MEN": 49275, "\u0120sill": 49276, "\u0120LEVEL": 49277, "York": 49278, "\u0120sinners": 49279, "\u0120OPEC": 49280, "\u0120Nur": 49281, "\u0120Designs": 49282, "selection": 49283, "\u0120unworthy": 49284, "CHA": 49285, "\u0120strengthens": 49286, "883": 49287, "edly": 49288, "\u0120slicing": 49289, "\u0120malnutrition": 49290, "\u0120filmmaking": 49291, "\u0120Polk": 49292, "urated": 49293, "\u0120421": 49294, "breakers": 49295, "!'\"": 49296, "\u0120wetlands": 49297, "\u0120Discrimination": 49298, "\u0120allowable": 49299, "\u0120steered": 49300, "\u0120Sicily": 49301, "SAM": 49302, "\u0120mustache": 49303, "\u0120mids": 49304, "\u0120clipped": 49305, "\u0120circulate": 49306, "\u0120brittle": 49307, "\u0120Buildings": 49308, "raised": 49309, "\u0120Roundup": 49310, "\u0120wealthier": 49311, "\u0120overwrite": 49312, "\u0120overpowered": 49313, "\u0120Gerrard": 49314, "sites": 49315, "PDATED": 49316, "\u0120acutely": 49317, "\u0120Gamble": 49318, "\u0120pim": 49319, "\u0120Kus": 49320, "Typically": 49321, "Deploy": 49322, "\u0120Moroccan": 49323, "potion": 49324, "combe": 49325, "\u0120vigilante": 49326, "\u0120363": 49327, "Stew": 49328, "\u0120Bagg": 49329, "\u0120resided": 49330, "\u0120Spo": 49331, "\u0120remnant": 49332, "\u0120emptiness": 49333, "brainer": 49334, "\u0120outpatient": 49335, "priority": 49336, "\u0120leptin": 49337, "\u0120Payton": 49338, "\u0120Gleaming": 49339, "\u0120Shed": 49340, "\u0120Polo": 49341, "\u0120Mormonism": 49342, "restricted": 49343, "arlane": 49344, "wx": 49345, "\u0120creatine": 49346, "\u0120Anon": 49347, "\u0120STUD": 49348, "\u0120JUL": 49349, "\u0120Tee": 49350, "528": 49351, "089": 49352, "\u0120hatched": 49353, "Dispatch": 49354, "\u0120Composite": 49355, "\u0120451": 49356, "puff": 49357, "\u0120XCOM": 49358, "\u0120Orn": 49359, "\u0120THANK": 49360, "ENDED": 49361, "\u0120Asheville": 49362, "\u0120\u00c3\u013e": 49363, "\u0120mango": 49364, "\u0120Slightly": 49365, "worldly": 49366, "\u0120Wander": 49367, "\u0120Expand": 49368, "\u0120Chr": 49369, "Mist": 49370, "\u0120orthodoxy": 49371, "\u0120UNESCO": 49372, "regate": 49373, "Elsewhere": 49374, "kie": 49375, "irled": 49376, "\u0120topple": 49377, "\u0120adoptive": 49378, "\u0120Legs": 49379, "dress": 49380, "\u0120Sagan": 49381, "bare": 49382, "\u0120Glou": 49383, "Crunch": 49384, "\u0120helpers": 49385, "\u0120chronically": 49386, "\u0120Huma": 49387, "10000": 49388, "\u0120accommodating": 49389, "\u00e4\u00ba\u0136": 49390, "\u0120wrinkles": 49391, "\u0120dodged": 49392, "fourth": 49393, "\u0120precon": 49394, "\u0120compressor": 49395, "\u0120Kare": 49396, "\u0120evict": 49397, "\u0120Warwick": 49398, "imar": 49399, "\u0120modernization": 49400, "\u0120bandwagon": 49401, "\u0120refuted": 49402, "\u0120netted": 49403, "\u0120Naples": 49404, "\u0120Genie": 49405, "perors": 49406, "\u0120fielded": 49407, "\u0120dere": 49408, "\u0120Parables": 49409, "lees": 49410, "\u0120trout": 49411, "aspers": 49412, "\u0120nihil": 49413, "\u0120happiest": 49414, "\u0120floppy": 49415, "\u0120Loft": 49416, "\u0120Heard": 49417, "\u0120unison": 49418, "\u0120lug": 49419, "\u0120Redmond": 49420, "classic": 49421, "Supporters": 49422, "SHIP": 49423, "GMT": 49424, "\u0120fuelled": 49425, "\u00e7\u0132": 49426, "\u0120dd": 49427, "\u0120Eminem": 49428, "\u01201897": 49429, "NYSE": 49430, "\u0120secretaries": 49431, "\u0120FIA": 49432, "\u0120Canaveral": 49433, "Favorite": 49434, "\u0120pomp": 49435, "\u0120detainee": 49436, "ership": 49437, "aimon": 49438, "iour": 49439, "\u0120Apex": 49440, "\u0120plantations": 49441, "amia": 49442, "acion": 49443, "Rust": 49444, "\u0120towed": 49445, "\u0120Truly": 49446, "577": 49447, "\u0120sheltered": 49448, "rider": 49449, "Wo": 49450, "\u0120lair": 49451, "\u0120Intelligent": 49452, "improve": 49453, "matically": 49454, "\u0120etiquette": 49455, "adra": 49456, "allo": 49457, "\u0120Juno": 49458, "anything": 49459, "\u0120Struggle": 49460, "\u0120Predict": 49461, "\u0120Grimes": 49462, "\u0120AMERICA": 49463, "ctx": 49464, "\u0120Situation": 49465, "WOOD": 49466, "\u0120soluble": 49467, "meier": 49468, "\u0120intolerable": 49469, "angering": 49470, "\u0120uninterrupted": 49471, "\u0120tooltip": 49472, "\u0120interrogated": 49473, "\u0120gunned": 49474, "\u0120Sneak": 49475, "\u00e6\u0143\u00a6": 49476, "\u0120tether": 49477, "\u0120crumble": 49478, "Lens": 49479, "\u0120clustered": 49480, "\u0120Syl": 49481, "\u0120Hasan": 49482, "\u0120dystopian": 49483, "wana": 49484, "\u0120joystick": 49485, "\u0120Thib": 49486, "ammu": 49487, "Tomorrow": 49488, "546": 49489, "\u0120overcame": 49490, "\u0120minimized": 49491, "ceptor": 49492, "Runner": 49493, "ENGTH": 49494, "\u0120Brenda": 49495, "\u0120Achievements": 49496, "\u0120torches": 49497, "\u0120rapport": 49498, "\u0120Investigator": 49499, "\u0120Handling": 49500, "relation": 49501, "grey": 49502, "815": 49503, "\u0120kcal": 49504, "\u0120Commands": 49505, "dq": 49506, "\u0120curls": 49507, "\u0120bearer": 49508, "\u0120cynicism": 49509, "itri": 49510, "\u0120Useful": 49511, "Bee": 49512, "DCS": 49513, "\u0120abras": 49514, "Pract": 49515, "BILITIES": 49516, "712": 49517, "\u0120debugger": 49518, "\u0120debtor": 49519, "\u0120Lia": 49520, "\u0120Kers": 49521, "\u0120exacerbate": 49522, "\u0120Stacy": 49523, "\u0120Bland": 49524, "\u0120Scenes": 49525, "\u0120branching": 49526, "\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a\u00e2\u0138\u012a": 49527, "apeake": 49528, "\u0120salsa": 49529, "\u0120mishand": 49530, "\u0120Konami": 49531, "\u0120Nib": 49532, "\u0120anecdote": 49533, "\u0120agreeable": 49534, "\u00cf\u012b": 49535, "\u0120Nathaniel": 49536, "\u0120Heisman": 49537, "\u0120Beware": 49538, "\u01201886": 49539, "spective": 49540, "691": 49541, "522": 49542, "\u0120inhibits": 49543, "\u0120hashing": 49544, "\u01201889": 49545, "\u00e5\u00b0\u0128": 49546, "vich": 49547, "Pure": 49548, "\u0120solidly": 49549, "\u0120aspirin": 49550, "imaru": 49551, "\u0120streetcar": 49552, "\u0120UCS": 49553, "\u0120Judd": 49554, "\u0120flashbacks": 49555, "pins": 49556, "\u01201440": 49557, "\u0120UNHCR": 49558, "\u0120Symptoms": 49559, "TIT": 49560, "538": 49561, "Fra": 49562, "%);": 49563, "\u0120ooz": 49564, "\u0120curfew": 49565, "\u0120calmed": 49566, "\u0120participates": 49567, "TeX": 49568, "\u0120nonsensical": 49569, "\u0120fullback": 49570, "\u0120DeL": 49571, "monkey": 49572, "hari": 49573, "\u0120metabolites": 49574, "\u0120looted": 49575, "\u0120ALWAYS": 49576, "\u0120BCC": 49577, "Lt": 49578, "ochet": 49579, "Bone": 49580, "\u0120vetoed": 49581, "\u0120gcc": 49582, "\u0120CLICK": 49583, "\u01201888": 49584, "saf": 49585, "\u0120stiffness": 49586, "\u0120lowly": 49587, "\u0120Geh": 49588, "verson": 49589, "orset": 49590, "\u0120unforeseen": 49591, "\u0120anesthesia": 49592, "\u0120Optical": 49593, "\u0120reconstructed": 49594, "\u0120Tup": 49595, "shows": 49596, "NEWS": 49597, "\u0120Newspaper": 49598, "\u0120ASA": 49599, "tera": 49600, "Numbers": 49601, "\u0120inexplicable": 49602, "\u00d7\u0133": 49603, "\u0120hardness": 49604, "untarily": 49605, "\u0120Acer": 49606, "gradient": 49607, "ARDIS": 49608, "\u0120woodland": 49609, "\u0120metaphors": 49610, "\u0120Wembley": 49611, "\u0120Pavel": 49612, "philis": 49613, "\u0120rewriting": 49614, "\u0120perceptual": 49615, "\u01201070": 49616, "worms": 49617, "\u0120Downs": 49618, "\u0120unsurprisingly": 49619, "\u0120tagging": 49620, "flame": 49621, "\u0120litres": 49622, "\u0120bounces": 49623, "\u0120Babe": 49624, "shut": 49625, "\u0120overdoses": 49626, "\u0120Sheila": 49627, "\u0120Chau": 49628, "\u0120Bless": 49629, "Capture": 49630, "\u0120Significant": 49631, "\u0120Scion": 49632, "\u0120389": 49633, "\u0120McH": 49634, "\u0120Titanium": 49635, "\u0120Meal": 49636, "ameda": 49637, "agents": 49638, "aggressive": 49639, "Billy": 49640, "763": 49641, "\u0120Saying": 49642, "DERR": 49643, "itone": 49644, "Collins": 49645, "Bound": 49646, "\u0120bolted": 49647, "\u0120DMCA": 49648, "953": 49649, "\u0120uniqueness": 49650, "\u0120epigen": 49651, "unci": 49652, "antam": 49653, "\u0120reckoning": 49654, "chairs": 49655, "OGR": 49656, "\u0120Senegal": 49657, "\u01201862": 49658, "relevant": 49659, "\u0120\u00c2\u00af": 49660, "\u0120pharmacies": 49661, "\u0120Geral": 49662, "vier": 49663, "Yan": 49664, "ORPG": 49665, "\u0120rabid": 49666, "bending": 49667, "\u0120UNITED": 49668, "\u0120465": 49669, "Assembly": 49670, "\u0120weep": 49671, "\u0120behest": 49672, "\u0120Mothers": 49673, "\u0120Jace": 49674, "hid": 49675, "\u0120whirlwind": 49676, "\u0120UNIVERS": 49677, "\u0120utopian": 49678, "\u0120kidnap": 49679, "Philipp": 49680, "Kin": 49681, "893": 49682, "\u0120livestream": 49683, "\u0120MISS": 49684, "\u0120subversive": 49685, "\u0120Techniques": 49686, "\u0120JUSTICE": 49687, "\u0120BASE": 49688, "\u0120387": 49689, "\u0120assailants": 49690, "\u0120Hardcore": 49691, "\u0120sprinkled": 49692, "\u0120Pse": 49693, "\u00e9\u013c": 49694, "printed": 49695, "\u0120Hau": 49696, "ORGE": 49697, "\u0120TOUR": 49698, "\u0120laced": 49699, "\u0120itch": 49700, "Giving": 49701, "\u0120ported": 49702, "781": 49703, "////////////////////////////////": 49704, "breeding": 49705, "\u0120logger": 49706, "\u0120HOL": 49707, "innie": 49708, "Firstly": 49709, "\u0120embryonic": 49710, "\u0120delegated": 49711, "pai": 49712, "OIL": 49713, "\u0120centrally": 49714, "\u0120Rx": 49715, "\u0120Scouting": 49716, "Dutch": 49717, "\u0120hereditary": 49718, "\u0120Cruiser": 49719, "sat": 49720, "529": 49721, "\u0120Marriott": 49722, "othermal": 49723, "\u0120prohibitions": 49724, "Earn": 49725, "\u0120Stab": 49726, "\u0120Colleges": 49727, "\u0120Belief": 49728, "stretched": 49729, "\u0120LH": 49730, "\u0120EntityItem": 49731, "CIA": 49732, "\u0120unrem": 49733, "\u0120laureate": 49734, "\u0120denominations": 49735, "summary": 49736, "hler": 49737, "Spect": 49738, "\u0120Klaus": 49739, "\u0120Beans": 49740, "\u0120insur": 49741, "\u0120PAX": 49742, "\u0120fielder": 49743, "\u0120Vet": 49744, "\u0120Sparrow": 49745, "zie": 49746, "\u0120SQ": 49747, "\u0120Mondays": 49748, "\u0120Offline": 49749, "\u0120Lerner": 49750, "\u0120Extensions": 49751, "Ireland": 49752, "\u0120patronage": 49753, "\u0120contrasted": 49754, "\u0120Mania": 49755, "hirt": 49756, "Moscow": 49757, "\u0120condemns": 49758, "\u0120Ange": 49759, "\u0120composing": 49760, "\u0120Pepe": 49761, "\u0120Paddock": 49762, "\u0120heterogeneity": 49763, "\u0120ideologically": 49764, "\u0120fishes": 49765, "\u0120cursing": 49766, "\u0120Rutherford": 49767, "\u0120Floating": 49768, "\u0120Amelia": 49769, "Tea": 49770, "Synopsis": 49771, "\u0120stunts": 49772, "\u0120bead": 49773, "\u0120stocking": 49774, "\u0120MILL": 49775, "obook": 49776, "massive": 49777, "\\<": 49778, "\u0120hump": 49779, "\u0120Preferences": 49780, "EngineDebug": 49781, "geist": 49782, "\u0120Nieto": 49783, "omever": 49784, "ishy": 49785, "evaluate": 49786, "colonial": 49787, "Alternative": 49788, "\u0120GoPro": 49789, "\u0120Vortex": 49790, "\u0120NETWORK": 49791, "ansky": 49792, "Secure": 49793, "\u0120Thrust": 49794, "Snake": 49795, "\u0120parcels": 49796, "\u0120samurai": 49797, "\u0120actresses": 49798, "Nap": 49799, "MF": 49800, "iferation": 49801, "Beer": 49802, "523": 49803, "\u0120Ily": 49804, "ointment": 49805, "Ping": 49806, "\u0120striped": 49807, "\u0120Mellon": 49808, "ossession": 49809, "\u0120neutron": 49810, "endium": 49811, "\u0120aph": 49812, "\u0120Flavoring": 49813, "\u0120383": 49814, "\u0120responsiveness": 49815, "\u0120Jindal": 49816, "\u0120Hitchcock": 49817, "Denver": 49818, "\u0120DRAGON": 49819, "smanship": 49820, "\u0120Dupl": 49821, "\u0120sly": 49822, "\u0120webcam": 49823, "\u0120Twain": 49824, "\u0120Darling": 49825, "iliate": 49826, "consumer": 49827, "DIT": 49828, "\u0120namesake": 49829, "\u0120unorthodox": 49830, "\u0120funer": 49831, "\u0120PLoS": 49832, "\u0120CONTROL": 49833, "ozyg": 49834, "oglobin": 49835, "FACE": 49836, "ERG": 49837, "\u0120Dia": 49838, "\u0120Fiesta": 49839, "cele": 49840, "034": 49841, "\u0120enclave": 49842, "\u00e2\u0138\u00ac\u00e2\u0138\u00ac": 49843, "onement": 49844, "alist": 49845, "Mand": 49846, "\u0120homegrown": 49847, "\u0120Fancy": 49848, "\u0120conceptions": 49849, "\u0120Contains": 49850, "ureen": 49851, "\u0120reiterate": 49852, "\u0120meager": 49853, "\u0120installments": 49854, "Spawn": 49855, "627": 49856, "\u0120photoc": 49857, "\u0120Cabrera": 49858, "\u0120Rosenthal": 49859, "\u0120Lansing": 49860, "isner": 49861, "\u0120invests": 49862, "\u0120UFOs": 49863, "EXP": 49864, "Hardware": 49865, "\u0120tragically": 49866, "\u0120concedes": 49867, "ieft": 49868, "cham": 49869, "borgh": 49870, "\u0120Schr": 49871, "\u0120Melanie": 49872, "\u0120Hoy": 49873, "\u0120visitation": 49874, "\u0120idiosyncr": 49875, "\u0120fractions": 49876, "\u0120foreskin": 49877, "obos": 49878, "\u0120poaching": 49879, "\u0120VIEW": 49880, "\u0120stimulates": 49881, "\u0120Gork": 49882, "canon": 49883, "MIC": 49884, "\u0120Nemesis": 49885, "\u0120Indra": 49886, "\u0120DMV": 49887, "\u0120529": 49888, "\u0120inspecting": 49889, "\u0120grandma": 49890, "\u0120Whedon": 49891, "\u0120Shant": 49892, "\u0120Purg": 49893, "ikan": 49894, "\u0120Teg": 49895, "\u0120CLR": 49896, "zac": 49897, "Victoria": 49898, "\u0120Verify": 49899, "ionics": 49900, "\u0120partying": 49901, "\u0120Mou": 49902, "colour": 49903, "\u0120testimonies": 49904, "lations": 49905, "\u0120pressuring": 49906, "hiro": 49907, "acers": 49908, "\u0120fid": 49909, "angler": 49910, "\u0120CSI": 49911, "\u0120hereafter": 49912, "\u0120dissidents": 49913, "reporting": 49914, "iphany": 49915, "chev": 49916, "\u0120solitude": 49917, "\u0120lobe": 49918, "\u0120indis": 49919, "\u0120credential": 49920, "recent": 49921, "adult": 49922, "\u0120Nirvana": 49923, "\u0120Franchise": 49924, "Layer": 49925, "Hyp": 49926, "\u0120Berkshire": 49927, "\u0120wills": 49928, "tif": 49929, "\u0120totem": 49930, "\u0120Judah": 49931, "repair": 49932, "Instant": 49933, "548": 49934, "\u0120embassies": 49935, "\u0120bottleneck": 49936, "\u0120bount": 49937, "\u0120typew": 49938, "\u0120Alvin": 49939, "jing": 49940, "imilar": 49941, "Rush": 49942, "\u0120brim": 49943, "\u0120HELP": 49944, "Aim": 49945, "]'": 49946, "\u0120passively": 49947, "\u0120bounded": 49948, "\u0120Rated": 49949, "\u0120criminality": 49950, "\u0120biomark": 49951, "\u0120dispatcher": 49952, "\u0120Towards": 49953, "\u0120+++": 49954, "righteous": 49955, "frog": 49956, "\u0120Panc": 49957, "Carter": 49958, "032": 49959, "\u00e6\u00a9\u0141": 49960, "\u0120ultraviolet": 49961, "\u0120Licensed": 49962, "\u0120Tata": 49963, "\u0120Blessing": 49964, "\u0120GAM": 49965, "\u0120chemically": 49966, "\u0120Seaf": 49967, "\u0120RELE": 49968, "\u0120Mercenary": 49969, "capitalist": 49970, "\u0120formulations": 49971, "\u0120annihilation": 49972, "\u0120Verb": 49973, "\u0120Argon": 49974, "\u0120unloaded": 49975, "\u0120morphed": 49976, "\u0120conquering": 49977, "backer": 49978, "IELD": 49979, "\u0120thefts": 49980, "\u0120frontrunner": 49981, "\u0120Royale": 49982, "\u0120Fundamental": 49983, "elight": 49984, "Chip": 49985, "necessary": 49986, "ayn": 49987, "\u0120Slip": 49988, "\u0120448": 49989, "cerned": 49990, "Pause": 49991, "\u0120shockingly": 49992, "\u0120ABV": 49993, "\u0120composure": 49994, "733": 49995, "\u0120Motorsport": 49996, "ahime": 49997, "Murray": 49998, "Mach": 49999, "\u0120grids": 50000, "\u0120debian": 50001, "\u0120furthermore": 50002, "\u0120dexterity": 50003, "\u0120Collections": 50004, "oslov": 50005, "ilage": 50006, "bj": 50007, "\u0120Monteneg": 50008, "\u0120strutConnector": 50009, "\u0120massacres": 50010, "\u0120briefs": 50011, "fetched": 50012, "uvian": 50013, "olition": 50014, "Failure": 50015, "emonic": 50016, "\u0120flared": 50017, "\u0120claimant": 50018, "\u0120cures": 50019, "\u0120giveaways": 50020, "\u0120Substance": 50021, "alions": 50022, "\u0120cringe": 50023, "\u0120Kul": 50024, "\u0120aristocracy": 50025, "\u0120Ulster": 50026, "olated": 50027, "housing": 50028, "\u0120MIS": 50029, "\u0120glared": 50030, "\u0120Wilhelm": 50031, "needs": 50032, "lambda": 50033, "builders": 50034, "\u0120VIS": 50035, "\u0120radiator": 50036, "\u0120Ghostbusters": 50037, "\u0120436": 50038, "actual": 50039, "\u0120herds": 50040, "\u00c3\u00a7a": 50041, "watching": 50042, "\u0120countering": 50043, "Charge": 50044, "\u0120charred": 50045, "\u0120warheads": 50046, "\u0120iodine": 50047, "\u0120Macy": 50048, "041": 50049, "\u0120departures": 50050, "\u0120Sins": 50051, "\u0120dyed": 50052, "\u0120Concepts": 50053, "gado": 50054, "713": 50055, "\u0120quotations": 50056, "\u0120gist": 50057, "\u0120Christy": 50058, "\u0120antigen": 50059, "\u0120Hemp": 50060, "\u0120Drawn": 50061, "\u0120Barg": 50062, "ezvous": 50063, "\u0120paternity": 50064, "\u0120ardu": 50065, "\u0120Anchorage": 50066, "\u0120Rik": 50067, "\u0120overloaded": 50068, "\u0120Username": 50069, "\u0120Tammy": 50070, "\u0120Nau": 50071, "\u0120Cellular": 50072, "\u0120waning": 50073, "\u0120rodent": 50074, "\u0120Worcester": 50075, "ilts": 50076, "\u0120Tad": 50077, "\u0120dwellings": 50078, "\u0120bullish": 50079, "431": 50080, "\u0120retaliate": 50081, "\u0120migraine": 50082, "\u0120Chevron": 50083, "CHECK": 50084, "\u0120donkey": 50085, "crim": 50086, "SPA": 50087, "\u0120Analog": 50088, "\u0120marquee": 50089, "\u0120Haas": 50090, "Bir": 50091, "\u0120GDDR": 50092, "\u0120Downloads": 50093, "\u0120willpower": 50094, "\u0120Forth": 50095, "\u0120Recorded": 50096, "\u0120impossibility": 50097, "\u0120Logged": 50098, "\u0120Franks": 50099, "\u0120Ratt": 50100, "initions": 50101, "\u0120cleaners": 50102, "\u0120sorely": 50103, "\u0120flickering": 50104, "\u0120Examination": 50105, "catching": 50106, "alloween": 50107, "Msg": 50108, "\u0120dunno": 50109, "Fa": 50110, "\u0120dysph": 50111, "crazy": 50112, ".''.": 50113, "\u0120mainline": 50114, "\u0120cs": 50115, "\u0120ptr": 50116, "\u0120Wally": 50117, "igun": 50118, "951": 50119, "\u0120Bigfoot": 50120, "fights": 50121, "\u0120retrieving": 50122, "Jr": 50123, "\u0120duplication": 50124, "\u0120Explan": 50125, "\u0120relational": 50126, "\u0120quaint": 50127, "\u0120biscuits": 50128, "\u0120ado": 50129, "\u0120shudder": 50130, "\u0120antidote": 50131, "blooded": 50132, "ksh": 50133, "\u0120sauces": 50134, "\u0120reinvest": 50135, "\u0120dispensary": 50136, "\u0120Diver": 50137, "\u01209000": 50138, "student": 50139, "\u0120insepar": 50140, "escap": 50141, "\u0120toddlers": 50142, "\u0120GPIO": 50143, "\u0120Assignment": 50144, "headers": 50145, "\u0120lackluster": 50146, "\u0120aback": 50147, "956": 50148, "\u0120toolbar": 50149, "745": 50150, "\u0120oust": 50151, "\u0120contemplation": 50152, "\u0120PRESIDENT": 50153, "\u0120458": 50154, "======": 50155, "\u0120guaranteeing": 50156, "\u0120Heist": 50157, "\u0120Cannes": 50158, "\u013b\u00bd": 50159, "\u0120collaborator": 50160, "\u0120Amp": 50161, "\u0120gou": 50162, "\u0120SHALL": 50163, "stories": 50164, "783": 50165, "\u0120mobilized": 50166, "\u0120brood": 50167, "\u0120LU": 50168, "\u0120\u00f0\u0141\u0133": 50169, "\u0120refin": 50170, "\u0120Anthropology": 50171, "vind": 50172, "illi": 50173, "\u0120warranties": 50174, "\u0120Babel": 50175, "\u0120swath": 50176, "\u0120caches": 50177, "\u0120antagonists": 50178, "artifacts": 50179, "\u0120hotly": 50180, "\u0120Starts": 50181, "\u0120G\u00c3\u00b6": 50182, "zag": 50183, "!!!!!": 50184, "\u0120scourge": 50185, "\u0120conspiring": 50186, "ruits": 50187, "reverse": 50188, "\u0120Sheen": 50189, "\u0120Jesuit": 50190, "\u0120Giovanni": 50191, "adies": 50192, "\u0120buttocks": 50193, "earcher": 50194, "acan": 50195, "\u0120volleyball": 50196, "\u0120shrouded": 50197, "\u0120scoreboard": 50198, "bats": 50199, "\u0120IPM": 50200, "\u0120asses": 50201, "\u0120deregulation": 50202, "\u0120Telegram": 50203, "\u0120Reboot": 50204, "\u01207000": 50205, "\u0120Canary": 50206, "\u0120kernels": 50207, "\u0120Fran\u00c3\u00a7ois": 50208, "\u0120Duff": 50209, "\u0120Pon": 50210, "\u0120Leica": 50211, "\u0120Garmin": 50212, "\u0120orphans": 50213, "\u0120Claudia": 50214, "\u0120calendars": 50215, "\u0120Leilan": 50216, "ento": 50217, "Rocket": 50218, "\u0120brunch": 50219, "\u0120Hawking": 50220, "ainers": 50221, "\u0120sensibilities": 50222, "\u0120kW": 50223, "\u0120Kand": 50224, "\u0120reclaimed": 50225, "\u0120interestingly": 50226, "\u00d7\u00a9": 50227, "romy": 50228, "JM": 50229, "\u0120Enhancement": 50230, "bush": 50231, "Skip": 50232, "\u0120rappers": 50233, "\u0120gazing": 50234, "pedia": 50235, "athlon": 50236, "Revolution": 50237, "\u0120snipers": 50238, "\u0120reverted": 50239, "\u0120conglomerate": 50240, "Terry": 50241, "794": 50242, "\u0120harsher": 50243, "\u0120desolate": 50244, "\u0120Hitman": 50245, "Commission": 50246, "\u0120(/": 50247, "\u00e2\u0122\u00a6.\"": 50248, "Compar": 50249, "\u0120amplification": 50250, "ominated": 50251, "\u0120regress": 50252, "\u0120Collider": 50253, "\u0120informants": 50254, "\u0120gazed": 50255, "<|endoftext|>": 50256}
\ No newline at end of file
diff --git a/utils/BPE/vocab.bpe b/utils/BPE/vocab.bpe
new file mode 100644
index 0000000000000000000000000000000000000000..226b0752cac7789c48f0cb3ec53eda48b7be36cc
--- /dev/null
+++ b/utils/BPE/vocab.bpe
@@ -0,0 +1,50001 @@
+#version: 0.2
+Ġ t
+Ġ a
+h e
+i n
+r e
+o n
+Ġt he
+e r
+Ġ s
+a t
+Ġ w
+Ġ o
+e n
+Ġ c
+i t
+i s
+a n
+o r
+e s
+Ġ b
+e d
+Ġ f
+in g
+Ġ p
+o u
+Ġa n
+a l
+a r
+Ġt o
+Ġ m
+Ġo f
+Ġ in
+Ġ d
+Ġ h
+Ġan d
+i c
+a s
+l e
+Ġt h
+i on
+o m
+l l
+en t
+Ġ n
+Ġ l
+s t
+Ġ re
+v e
+Ġ e
+r o
+l y
+Ġb e
+Ġ g
+Ġ T
+c t
+Ġ S
+i d
+o t
+Ġ I
+u t
+e t
+Ġ A
+Ġ is
+Ġ on
+i m
+a m
+o w
+a y
+a d
+s e
+Ġth at
+Ġ C
+i g
+Ġf or
+a c
+Ġ y
+v er
+u r
+Ġ u
+l d
+Ġs t
+Ġ M
+' s
+Ġ he
+Ġ it
+at ion
+it h
+i r
+c e
+Ġy ou
+i l
+Ġ B
+Ġw h
+o l
+Ġ P
+Ġw ith
+Ġ 1
+t er
+c h
+Ġa s
+Ġw e
+Ġ (
+n d
+i ll
+Ġ D
+i f
+Ġ 2
+a g
+er s
+k e
+Ġ "
+Ġ H
+e m
+Ġc on
+Ġ W
+Ġ R
+he r
+Ġw as
+Ġ r
+o d
+Ġ F
+u l
+at e
+Ġa t
+r i
+p p
+o re
+ĠT he
+Ġs e
+u s
+Ġp ro
+Ġh a
+u m
+Ġa re
+Ġd e
+a in
+an d
+Ġo r
+ig h
+es t
+is t
+a b
+r om
+Ġ N
+t h
+Ġc om
+Ġ G
+u n
+o p
+0 0
+Ġ L
+Ġn ot
+es s
+Ġe x
+Ġ v
+re s
+Ġ E
+e w
+it y
+an t
+Ġb y
+e l
+o s
+or t
+o c
+q u
+Ġf rom
+Ġha ve
+Ġs u
+i ve
+ou ld
+Ġs h
+Ġth is
+n t
+r a
+p e
+igh t
+ar t
+m ent
+Ġa l
+u st
+en d
+- -
+al l
+Ġ O
+ac k
+Ġc h
+Ġ le
+i es
+re d
+ar d
+â Ģ
+ou t
+Ġ J
+Ġa b
+e ar
+i v
+al ly
+ou r
+o st
+g h
+p t
+Ġp l
+as t
+Ġc an
+a k
+om e
+u d
+T he
+Ġh is
+Ġd o
+Ġg o
+Ġh as
+g e
+' t
+Ġ U
+r ou
+Ġs a
+Ġ j
+Ġb ut
+Ġw or
+Ġa ll
+e ct
+Ġ k
+am e
+Ġw ill
+o k
+Ġw he
+Ġthe y
+id e
+0 1
+f f
+ic h
+p l
+t her
+Ġt r
+. .
+Ġin t
+i e
+u re
+ag e
+Ġn e
+i al
+a p
+in e
+ic e
+Ġm e
+Ġo ut
+an s
+on e
+on g
+ion s
+Ġwh o
+Ġ K
+Ġu p
+Ġthe ir
+Ġa d
+Ġ 3
+Ġu s
+at ed
+ou s
+Ġm ore
+u e
+o g
+ĠS t
+in d
+i ke
+Ġs o
+im e
+p er
+. "
+b er
+i z
+a ct
+Ġon e
+Ġsa id
+Ġ -
+a re
+Ġyou r
+c c
+ĠT h
+Ġc l
+e p
+a ke
+ab le
+i p
+Ġcon t
+Ġwh ich
+i a
+Ġ im
+Ġab out
+Ġwe re
+ver y
+u b
+Ġh ad
+Ġ en
+Ġcom p
+, "
+ĠI n
+Ġu n
+Ġa g
+i re
+ac e
+a u
+ar y
+Ġw ould
+as s
+r y
+Ġ âĢ
+c l
+o ok
+e re
+s o
+Ġ V
+ig n
+i b
+Ġof f
+Ġt e
+v en
+Ġ Y
+i le
+o se
+it e
+or m
+Ġ2 01
+Ġre s
+Ġm an
+Ġp er
+Ġo ther
+or d
+ul t
+Ġbe en
+Ġl ike
+as e
+an ce
+k s
+ay s
+ow n
+en ce
+Ġd is
+ct ion
+Ġan y
+Ġa pp
+Ġs p
+in t
+res s
+ation s
+a il
+Ġ 4
+ic al
+Ġthe m
+Ġhe r
+ou nt
+ĠC h
+Ġa r
+Ġ if
+Ġthe re
+Ġp e
+Ġy ear
+a v
+Ġm y
+Ġs ome
+Ġwhe n
+ou gh
+ac h
+Ġth an
+r u
+on d
+ic k
+Ġo ver
+ve l
+Ġ qu
+Ċ Ċ
+Ġs c
+re at
+re e
+ĠI t
+ou nd
+p ort
+Ġal so
+Ġp art
+f ter
+Ġk n
+Ġbe c
+Ġt ime
+en s
+Ġ 5
+op le
+Ġwh at
+Ġn o
+d u
+m er
+an g
+Ġn ew
+-- --
+Ġg et
+or y
+it ion
+ing s
+Ġj ust
+Ġint o
+Ġ 0
+ent s
+o ve
+t e
+Ġpe ople
+Ġp re
+Ġit s
+Ġre c
+Ġt w
+i an
+ir st
+ar k
+or s
+Ġwor k
+ad e
+o b
+Ġs he
+Ġo ur
+w n
+in k
+l ic
+Ġ1 9
+ĠH e
+is h
+nd er
+au se
+Ġh im
+on s
+Ġ [
+Ġ ro
+f orm
+i ld
+at es
+ver s
+Ġon ly
+o ll
+Ġs pe
+c k
+e ll
+am p
+Ġa cc
+Ġb l
+i ous
+ur n
+f t
+o od
+Ġh ow
+he d
+Ġ '
+Ġa fter
+a w
+Ġat t
+o v
+n e
+Ġpl ay
+er v
+ic t
+Ġc ould
+it t
+Ġa m
+Ġf irst
+Ġ 6
+Ġa ct
+Ġ $
+e c
+h ing
+u al
+u ll
+Ġcom m
+o y
+o ld
+c es
+at er
+Ġf e
+Ġbe t
+w e
+if f
+Ġtw o
+oc k
+Ġb ack
+) .
+id ent
+Ġu nder
+rou gh
+se l
+x t
+Ġm ay
+rou nd
+Ġp o
+p h
+is s
+Ġd es
+Ġm ost
+Ġd id
+Ġad d
+j ect
+Ġin c
+f ore
+Ġp ol
+on t
+Ġag ain
+cl ud
+ter n
+Ġkn ow
+Ġne ed
+Ġcon s
+Ġc o
+Ġ .
+Ġw ant
+Ġse e
+Ġ 7
+n ing
+i ew
+ĠTh is
+c ed
+Ġe ven
+Ġin d
+t y
+ĠW e
+at h
+Ġthe se
+Ġp r
+Ġu se
+Ġbec ause
+Ġf l
+n g
+Ġn ow
+ĠâĢ ĵ
+c om
+is e
+Ġm ake
+Ġthe n
+ow er
+Ġe very
+ĠU n
+Ġse c
+os s
+u ch
+Ġe m
+Ġ =
+ĠR e
+i ed
+r it
+Ġin v
+le ct
+Ġsu pp
+at ing
+Ġl ook
+m an
+pe ct
+Ġ 8
+ro w
+Ġb u
+Ġwhe re
+if ic
+Ġyear s
+i ly
+Ġd iff
+Ġsh ould
+Ġre m
+T h
+I n
+Ġe v
+d ay
+' re
+ri b
+Ġre l
+s s
+Ġde f
+Ġr ight
+Ġs y
+) ,
+l es
+00 0
+he n
+Ġth rough
+ĠT r
+_ _
+Ġw ay
+Ġd on
+Ġ ,
+Ġ1 0
+as ed
+Ġas s
+ub lic
+Ġre g
+ĠA nd
+i x
+Ġ very
+Ġin clud
+ot her
+Ġim p
+ot h
+Ġsu b
+ĠâĢ Ķ
+Ġbe ing
+ar g
+ĠW h
+= =
+ib le
+Ġdo es
+an ge
+r am
+Ġ 9
+er t
+p s
+it ed
+ation al
+Ġb r
+Ġd own
+Ġman y
+ak ing
+Ġc all
+ur ing
+it ies
+Ġp h
+ic s
+al s
+Ġde c
+at ive
+en er
+Ġbe fore
+il ity
+Ġwe ll
+Ġm uch
+ers on
+Ġth ose
+Ġsu ch
+Ġ ke
+Ġ end
+ĠB ut
+as on
+t ing
+Ġl ong
+e f
+Ġth ink
+y s
+Ġbe l
+Ġs m
+it s
+a x
+Ġo wn
+Ġpro v
+Ġs et
+if e
+ment s
+b le
+w ard
+Ġsh ow
+Ġp res
+m s
+om et
+Ġo b
+Ġs ay
+ĠS h
+t s
+f ul
+Ġe ff
+Ġg u
+Ġin st
+u nd
+re n
+c ess
+Ġ ent
+ĠY ou
+Ġgo od
+Ġst art
+in ce
+Ġm ade
+t t
+st em
+ol og
+u p
+Ġ |
+um p
+Ġhe l
+ver n
+ul ar
+u ally
+Ġa c
+Ġm on
+Ġl ast
+Ġ2 00
+1 0
+Ġst ud
+u res
+ĠA r
+sel f
+ar s
+mer ic
+u es
+c y
+Ġm in
+oll ow
+Ġc ol
+i o
+Ġm od
+Ġc ount
+ĠC om
+he s
+Ġf in
+a ir
+i er
+âĢ Ķ
+re ad
+an k
+at ch
+e ver
+Ġst r
+Ġpo int
+or k
+ĠN ew
+Ġs ur
+o ol
+al k
+em ent
+Ġus ed
+ra ct
+we en
+Ġs ame
+ou n
+ĠA l
+c i
+Ġdiff ere
+Ġwh ile
+---- ----
+Ġg ame
+ce pt
+Ġs im
+.. .
+Ġin ter
+e k
+Ġre port
+Ġpro du
+Ġst ill
+l ed
+a h
+Ġhe re
+Ġwor ld
+Ġth ough
+Ġn um
+ar ch
+im es
+al e
+ĠS e
+ĠI f
+/ /
+ĠL e
+Ġre t
+Ġre f
+Ġtr ans
+n er
+ut ion
+ter s
+Ġt ake
+ĠC l
+Ġcon f
+w ay
+a ve
+Ġgo ing
+Ġs l
+u g
+ĠA meric
+Ġspe c
+Ġh and
+Ġbet ween
+ist s
+ĠD e
+o ot
+I t
+Ġe ar
+Ġagain st
+Ġh igh
+g an
+a z
+at her
+Ġex p
+Ġo p
+Ġin s
+Ġg r
+Ġhel p
+Ġre qu
+et s
+in s
+ĠP ro
+is m
+Ġf ound
+l and
+at a
+us s
+am es
+Ġp erson
+Ġg reat
+p r
+Ġs ign
+ĠA n
+' ve
+Ġs omet
+Ġs er
+h ip
+Ġr un
+Ġ :
+Ġt er
+ire ct
+Ġf ollow
+Ġd et
+ic es
+Ġf ind
+1 2
+Ġm em
+Ġc r
+e red
+e x
+Ġex t
+ut h
+en se
+c o
+Ġte am
+v ing
+ou se
+as h
+at t
+v ed
+Ġsy stem
+ĠA s
+d er
+iv es
+m in
+Ġle ad
+ĠB l
+c ent
+Ġa round
+Ġgo vern
+Ġc ur
+vel op
+an y
+Ġc our
+al th
+ag es
+iz e
+Ġc ar
+od e
+Ġl aw
+Ġre ad
+' m
+c on
+Ġre al
+Ġsupp ort
+Ġ1 2
+.. ..
+Ġre ally
+n ess
+Ġf act
+Ġd ay
+Ġb oth
+y ing
+Ġs erv
+ĠF or
+Ġth ree
+Ġw om
+Ġm ed
+od y
+ĠThe y
+5 0
+Ġex per
+t on
+Ġe ach
+ak es
+Ġc he
+Ġc re
+in es
+Ġre p
+1 9
+g g
+ill ion
+Ġg rou
+ut e
+i k
+W e
+g et
+E R
+Ġm et
+Ġs ays
+o x
+Ġd uring
+er n
+iz ed
+a red
+Ġf am
+ic ally
+Ġha pp
+ĠI s
+Ġch ar
+m ed
+v ent
+Ġg ener
+i ent
+p le
+i et
+re nt
+1 1
+v es
+pt ion
+Ġ2 0
+form ation
+Ġc or
+Ġoff ic
+ie ld
+Ġto o
+is ion
+Ġin f
+Ġ Z
+t he
+o ad
+Ġp ublic
+Ġpro g
+r ic
+* *
+Ġw ar
+Ġp ower
+v iew
+Ġf ew
+Ġl oc
+Ġdiffere nt
+Ġst ate
+Ġhe ad
+' ll
+Ġp oss
+Ġst at
+re t
+ant s
+Ġv al
+Ġis s
+Ġc le
+i vers
+an c
+Ġex pl
+Ġan other
+Ġ Q
+Ġa v
+th ing
+n ce
+W h
+Ġch ild
+Ġs ince
+i red
+l ess
+Ġl ife
+Ġde velop
+itt le
+Ġde p
+Ġp ass
+ã ĥ
+Ġt urn
+or n
+Th is
+b ers
+ro ss
+ĠA d
+Ġf r
+Ġres p
+Ġsec ond
+o h
+Ġ /
+Ġdis c
+Ġ &
+Ġsomet hing
+Ġcomp le
+Ġ ed
+Ġf il
+Ġmon th
+a j
+u c
+Ġgovern ment
+Ġwith out
+Ġle g
+Ġd ist
+Ġp ut
+Ġqu est
+an n
+Ġpro t
+2 0
+Ġne ver
+i ence
+Ġle vel
+Ġar t
+Ġth ings
+Ġm ight
+Ġeff ect
+Ġcont ro
+Ġc ent
+Ġ1 8
+Ġall ow
+Ġbel ie
+ch ool
+ot t
+Ġinc re
+Ġfe el
+Ġres ult
+Ġl ot
+Ġf un
+ot e
+Ġt y
+ere st
+Ġcont in
+Ġus ing
+Ġb ig
+2 01
+Ġas k
+Ġb est
+Ġ )
+I N
+Ġo pp
+3 0
+Ġnum ber
+in ess
+S t
+le ase
+Ġc a
+Ġm ust
+Ġd irect
+Ġg l
+Ġ <
+Ġop en
+Ġp ost
+Ġcom e
+Ġse em
+ord ing
+Ġwe ek
+ate ly
+it al
+Ġe l
+ri end
+Ġf ar
+Ġt ra
+in al
+Ġp ri
+ĠU S
+Ġpl ace
+Ġfor m
+Ġto ld
+" :
+ain s
+at ure
+ĠTr ump
+Ġst and
+Ġ #
+id er
+ĠF r
+Ġne xt
+Ġs oc
+Ġp ur
+Ġle t
+Ġl ittle
+Ġh um
+Ġ i
+r on
+1 5
+Ġ1 5
+Ġcomm un
+Ġm ark
+ĠThe re
+Ġw r
+ĠTh at
+Ġin formation
+w ays
+Ġb us
+a pp
+Ġinv est
+m e
+Ġh ard
+ain ed
+e ad
+Ġim port
+Ġapp ro
+Ġt est
+Ġt ri
+Ġre st
+os ed
+Ġf ull
+Ġc are
+ĠS p
+Ġc ase
+O N
+Ġs k
+Ġl ess
+Ġ +
+Ġpart ic
+ĠP l
+ab ly
+u ck
+is hed
+ch n
+b e
+Ġl ist
+at or
+Ġto p
+Ġad v
+ĠB e
+ru ct
+Ġd em
+r ation
+l ing
+g y
+re en
+g er
+Ġh ome
+Ġle ft
+Ġbet ter
+Ġd ata
+Ġ1 1
+Ġatt ack
+Ġpro ble
+l ine
+ard s
+Ġbe h
+r al
+ĠH ow
+ĠS he
+ar ge
+Ġ --
+: //
+Ġb ro
+ĠP h
+at s
+Ġbu ild
+w w
+id ed
+a im
+as es
+en cy
+Ġm ain
+in ed
+Ġinclud ing
+Ġ {
+Ġg ot
+Ġint erest
+Ġke ep
+Ġ X
+Ġe as
+ain ing
+Ġcl ass
+âĢ ¦
+ĠN o
+Ġv ar
+Ġsm all
+amp le
+A T
+Ġ ide
+ĠS o
+Ġre ce
+Ġpol it
+Ġm ov
+Ġpl an
+Ġper cent
+iv ing
+Ġc amp
+Ġp ay
+1 4
+s c
+is ed
+Ġu nt
+one y
+pl oy
+== ==
+Ġdid n
+ĠI nd
+el s
+ert ain
+Ġp os
+__ __
+i ver
+Ġpro cess
+Ġprog ram
+if ied
+ĠR ep
+1 6
+u ro
+olog y
+at ter
+in a
+Ġn ame
+ĠA ll
+Ġf our
+Ġret urn
+v ious
+b s
+Ġcall ed
+Ġm ove
+ĠS c
+ir d
+Ġgrou p
+Ġb re
+Ġm en
+Ġc ap
+t en
+e e
+Ġd ri
+le g
+he re
+uth or
+Ġp at
+Ġcur rent
+id es
+Ġp op
+t o
+ent ion
+Ġal ways
+Ġm il
+Ġwom en
+Ġ1 6
+Ġo ld
+iv en
+ra ph
+ĠO r
+r or
+ent ly
+Ġn ear
+ĠE x
+re am
+s h
+Ġ1 4
+Ġf ree
+iss ion
+st and
+ĠC on
+al ity
+us ed
+1 3
+Ġdes ign
+Ġch ange
+Ġch ang
+Ġb o
+Ġv is
+em ber
+Ġb ook
+read y
+Ġk ill
+2 5
+pp ed
+Ġa way
+Ġab le
+Ġcount ry
+Ġcon st
+ar n
+Ġor der
+A R
+i or
+i um
+or th
+1 8
+ail able
+Ġs w
+Ġm illion
+Ġ1 3
+at ic
+t ed
+ĠG o
+Ġo per
+en g
+Ġth ing
+aj or
+con om
+ĠCom m
+Ġwh y
+u red
+ur al
+Ġs chool
+b y
+ĠM ar
+Ġa ff
+Ġd ays
+Ġan n
+us h
+an e
+I f
+e g
+Ġpro f
+Ġhe alth
+ou th
+B ut
+ion al
+. ,
+Ġs ol
+Ġal ready
+Ġ3 0
+Ġchar act
+H e
+Ġf riend
+E S
+i ans
+ic le
+' d
+ĠO n
+Ġle ast
+Ġp rom
+Ġd r
+Ġh ist
+it her
+Ġ est
+i qu
+1 7
+s on
+Ġte ll
+Ġt alk
+oh n
+o int
+le ction
+A N
+Ġunt il
+au gh
+Ġl ater
+Ġ ve
+Ġv iew
+end ing
+iv ed
+Ġwor d
+w are
+Ġc ost
+Ġen ough
+Ġg ive
+ĠUn ited
+Ġte chn
+are nt
+O R
+Ġp ar
+ĠD r
+Ġ201 6
+r ist
+er ing
+Ġ Â
+Ġl arge
+s ide
+ac y
+cc ess
+Ġw in
+Ġimport ant
+Ġ19 9
+Ġdoes n
+Ġ1 7
+Ġbus iness
+Ġcle ar
+Ġre se
+" ,
+ur y
+Ġe qu
+as ter
+al f
+ĠAmeric an
+n ect
+Ġex pect
+ivers ity
+Ġo cc
+ĠF l
+Ġk ind
+Ġme an
+Ġp ast
+Ġde v
+Ġb as
+le t
+ra ft
+Ġor gan
+Ġde l
+Ġper form
+Ġst ory
+Ġse ason
+ĠC ol
+Ġcl aim
+Ġc ame
+Ġwith in
+Ġl ine
+Ġpro ject
+ĠA t
+Ġcontro l
+end ed
+ĠS y
+Ġa ir
+iz ation
+Ġ *
+le y
+Ġm oney
+id d
+Y ou
+f or
+Ġfam ily
+Ġm aking
+Ġb it
+Ġpol ice
+Ġhapp en
+Ġ vers
+on y
+u ff
+ĠW hen
+Ġs it
+ide o
+l f
+is on
+Ġsu re
+g in
+Ġapp ear
+Ġl ight
+Ġ es
+o f
+Ġw ater
+Ġt imes
+n ot
+Ġg row
+Ġcomp any
+ĠT e
+ow s
+Ġm ar
+our ce
+i ol
+ar m
+b r
+Ġex ample
+Ġcon c
+Ġf ore
+ĠT o
+p ro
+E N
+ri es
+Ġ2 5
+ĠC an
+ne y
+Ġact ually
+Ġe ver
+ur ity
+ak en
+ap s
+Ġt ax
+Ġm ajor
+am a
+Ġof ten
+er al
+Ġhum an
+Ġj ob
+is ter
+Ġav ailable
+oc r
+en n
+a id
+iv id
+Ġrec ord
+? "
+Ġs ing
+ĠA m
+id ence
+Ġnew s
+st er
+Ġe conom
+Ġfollow ing
+ĠB r
+is ing
+Ġh our
+m ost
+um ent
+Ġse x
+Ġdes c
+Ġbec ome
+ĠE d
+Ġto ok
+Ġha ving
+Ġprodu ct
+a ult
+A s
+ar ing
+Ġme ans
+Ġh op
+un e
+Ġch o
+Ġc ertain
+Ġn on
+Ġde al
+2 4
+le ment
+oc i
+en e
+Ġs ide
+ĠP r
+ĠM ay
+Ġre ason
+u ed
+c hed
+ul ation
+Ġe lect
+Ġoffic ial
+Ġposs ible
+Ġh old
+and s
+ot s
+Ġc ity
+or ies
+Ġse ver
+Ġchild ren
+Ġon ce
+Ġact iv
+l er
+Ġn ight
+it ions
+ĠJ ohn
+a pe
+pl ay
+Ġd one
+Ġl im
+Ġwork ing
+ĠP res
+or ld
+e b
+ĠC o
+Ġb ody
+ail s
+ut es
+ĠM r
+Ġwhe ther
+Ġa uthor
+ro p
+Ġpro per
+Ġse en
+) ;
+Ġf ac
+ĠS u
+Ġcon d
+it ing
+Ġcour se
+Ġ }
+-------- --------
+a ign
+Ġev ent
+Ġen g
+Ġp ot
+Ġin tern
+i am
+Ġsh ort
+em pt
+ã Ĥ
+ĠG od
+il ar
+8 0
+Ġor ig
+I S
+our n
+ab ility
+it ive
+Ġd am
+Ġ1 00
+Ġp ress
+Ġdo ing
+Ġprot ect
+r ing
+Ġthough t
+Ġquest ion
+re w
+ĠW ar
+Ġsever al
+ĠSt ate
+Ġg iven
+Ġf und
+ĠT w
+Ġw ent
+an ces
+w ork
+p or
+m y
+4 0
+Ġar g
+art ment
+ust om
+Ġpol ic
+Ġme et
+Ġc reat
+2 2
+ĠSt ates
+Ġg ames
+ra w
+ut ure
+Ġunder stand
+ur s
+ĠO b
+l ish
+s y
+Ġm akes
+Ġw on
+ag on
+Ġh tt
+Ġl ove
+ent ial
+Ġcomple te
+p ar
+ĠI m
+A L
+Ġacc ount
+Â ł
+ore d
+ver t
+Ġ ident
+Ġ201 5
+Ġother s
+ĠM in
+i ber
+ver age
+The re
+ition al
+d d
+Ġpro b
+Ġyou ng
+Ġal ong
+Ġacc ording
+Ġy et
+Ġmem bers
+ĠWh at
+o id
+ĠM an
+A nd
+Ġam ong
+a i
+Ġem ploy
+ĠR es
+Ġ >
+Ġinv ol
+Ġl ow
+a f
+ĠC ar
+Ġh ig
+ĠO ne
+ĠS ec
+in ation
+Ġlike ly
+Ġan t
+ag ed
+ĠR uss
+Ġb en
+Ġre le
+F or
+b ack
+ĠN ot
+Ġpres ident
+b all
+Ġacc ess
+ivid ual
+ĠD em
+ĠE uro
+6 0
+Ġkn own
+ir l
+ĠG r
+Ġear ly
+u se
+iet y
+âĢ ĵ
+Ġf ight
+Ġs ent
+Ġto day
+Ġmark et
+" .
+Ġb ased
+Ġstr ong
+ur ther
+Ġde b
+m ber
+Ġproble m
+Ġde ath
+Ġsoc ial
+im ate
+A S
+ort un
+Ġcamp aign
+er y
+C h
+Ġe y
+i ally
+Ġm us
+w h
+p os
+Ġ er
+Ġsa f
+Ġmonth s
+ir on
+Ġv iol
+Ġf ive
+Ġst re
+Ġplay ers
+in c
+al d
+y ear
+a un
+Ġsu ccess
+Ġpres ent
+ere nce
+Ġ201 4
+Ġsu gg
+Ġpartic ular
+Ġtr y
+Ġsugg est
+ĠCh rist
+on es
+Ġpri v
+2 3
+Ġc rit
+Ġl and
+Ġloc al
+if y
+2 9
+Ġa ut
+E D
+ĠG u
+Ġm ult
+Ġpolit ical
+Ġask ed
+Ġfor mer
+it ter
+ri pt
+Ġcl ose
+Ġp ract
+ĠY ork
+Ġget ting
+Ġac ross
+Ġcom b
+Ġbelie ve
+Ġ z
+Ġto get
+Ġtoget her
+ĠC ent
+ir c
+Ġind ividual
+ĠM c
+2 7
+is k
+ĠE ng
+Ġf ace
+Ġ2 4
+Ġval ue
+Ġare a
+e v
+Ġw rit
+ĠPres ident
+Ġv ot
+Ġke y
+Ġm om
+p ut
+Ġany thing
+Ġexper ience
+att le
+Ġm ind
+a ff
+om m
+Ġf uture
+g ed
+Ġc ut
+Ġto t
+it ch
+Ġv ideo
+Ġinvest ig
+Ġn et
+ĠM y
+r ict
+i en
+. )
+Ġimp ro
+th ough
+ward s
+Ġcon nect
+ĠM ed
+sel ves
+ens ive
+m b
+o ber
+at ors
+A n
+Ġ5 0
+Ġre du
+res ent
+Ġab ove
+Ġf re
+ĠEuro pe
+s w
+Ġam ount
+ĠA pp
+Ġe ither
+Ġmil it
+Ġan al
+Ġf ail
+ĠE n
+al es
+Ġspec ial
+Ġbl ack
+I T
+c her
+Ġlook ing
+Ġf ire
+y n
+Ġal most
+o on
+Ġstud y
+Ġm iss
+c hes
+ro wn
+Ġt re
+Ġcommun ity
+Ġmed ia
+Ġf ood
+Ġcom es
+ĠUn iversity
+Ġsing le
+Wh at
+u ly
+Ġh alf
+ag ue
+h od
+ĠRep ublic
+Ġstart ed
+Ġqu ick
+ot o
+b ook
+Ġiss ue
+it or
+Ġel se
+Ġcons ider
+2 6
+ro du
+Ġt aken
+2 8
+9 9
+ĠW ith
+Ġtr ue
+Ġw a
+Ġtr ad
+Ġag o
+Ġm ess
+ie f
+Ġadd ed
+o ke
+Ġb ad
+Ġf av
+3 3
+Ġsim ilar
+as k
+ĠD on
+Ġcharact er
+ort s
+ĠH ouse
+Ġreport ed
+Ġty pe
+v al
+i od
+ĠHow ever
+Ġt arg
+Ġent ire
+pp ing
+Ġhist ory
+Ġl ive
+ff ic
+.... ....
+ed eral
+Ġtr ying
+Ġdisc uss
+ĠH ar
+ac es
+l ished
+Ġse lf
+os p
+re st
+Ġro om
+el t
+Ġf all
+ol ution
+Ġe t
+Ġ x
+Ġis n
+Ġide a
+b o
+Ġs ound
+ĠD ep
+Ġsome one
+ci ally
+ull y
+Ġf oc
+Ġob ject
+if t
+ap er
+Ġplay er
+Ġr ather
+Ġserv ice
+as hing
+ĠD o
+ĠP art
+ru g
+m on
+p ly
+Ġm or
+Ġnot hing
+Ġprov ide
+I C
+un g
+Ġpart y
+Ġex ist
+Ġm ag
+7 0
+Ġr ul
+Ġh ouse
+Ġbeh ind
+Ġhow ever
+ĠW orld
+Ġs um
+Ġapp lic
+Ġ ;
+Ġfun ction
+g r
+ĠP ol
+Ġfr ont
+2 00
+Ġser ies
+Ġt em
+Ġty p
+ill s
+Ġo pt
+Ġpoint s
+Ġbel ow
+itt ed
+Ġspec ific
+Ġ201 7
+um b
+Ġr a
+Ġpre vious
+Ġpre t
+re me
+Ġc ustom
+Ġcour t
+ĠM e
+Ġre pl
+Ġwho le
+g o
+c er
+Ġt reat
+ĠA ct
+Ġprob ably
+Ġle arn
+end er
+ĠA ss
+Ġvers ion
+n ow
+Ġche ck
+ĠC al
+R E
+min ist
+O n
+our ces
+Ġben ef
+Ġd oc
+Ġdet er
+Ġen c
+Ġsu per
+Ġadd ress
+Ġv ict
+Ġ201 3
+Ġme as
+t r
+Ġf ield
+W hen
+Ġsign ific
+u ge
+Ġfe at
+Ġcomm on
+l oad
+Ġbe gin
+Ġbr ing
+Ġa ction
+er man
+Ġdesc rib
+Ġind ust
+Ġwant ed
+ri ed
+m ing
+Ġatt empt
+4 5
+f er
+Ġd ue
+ress ion
+# #
+Ġsh all
+Ġs ix
+o o
+Ġst ep
+Ġp ub
+Ġhim self
+Ġ2 3
+Ġc op
+Ġd est
+Ġst op
+A C
+ib ility
+Ġl ab
+ic ult
+Ġhour s
+Ġcre ate
+Ġf urther
+ĠAmeric a
+ĠC ity
+Ġd ou
+he ad
+S T
+ĠN orth
+c ing
+Ġn ational
+u le
+ĠIn st
+Ġt aking
+ĠQ u
+ir t
+Ġre d
+Ġrese arch
+v iron
+ĠG e
+Ġbre ak
+an a
+Ġsp ace
+ater ial
+Ġrec ent
+ĠA b
+Ġgener al
+Ġh it
+Ġper iod
+Ġevery thing
+ive ly
+Ġph ys
+Ġsay ing
+an ks
+Ġc ou
+Ġc ult
+ac ed
+e al
+u ation
+Ġc oun
+l u
+Ġinclud e
+Ġpos ition
+ĠA fter
+ĠCan ad
+ĠE m
+Ġim m
+ĠR ed
+Ġp ick
+Ġcom pl
+Ġm atter
+re g
+e xt
+ang u
+is c
+o le
+a ut
+Ġcomp et
+e ed
+f ect
+Ġ2 1
+ĠS en
+ĠThe se
+as ing
+Ġcan not
+Ġin it
+Ġrel ations
+ac hed
+Ġb ar
+Ġ4 0
+ĠT H
+Ġ201 2
+Ġv ol
+Ġg round
+Ġsec urity
+Ġup d
+il t
+3 5
+Ġconc ern
+ĠJ ust
+Ġwh ite
+Ġseem s
+ĠH er
+pe cially
+i ents
+Ġann oun
+Ġf ig
+ight s
+Ġst ri
+l ike
+id s
+Ġs us
+Ġw atch
+Ġ â
+Ġw ind
+ĠC ont
+Ġit self
+Ġm ass
+A l
+y le
+iqu e
+ĠN ational
+Ġab s
+Ġp ack
+Ġout side
+Ġan im
+Ġp ain
+et er
+Ġman ag
+du ct
+og n
+Ġ ]
+ĠSe pt
+se c
+o ff
+ĠJ an
+Ġf oot
+ad es
+Ġth ird
+Ġm ot
+Ġev idence
+int on
+Ġth reat
+a pt
+pl es
+c le
+Ġl o
+Ġde cl
+Ġit em
+med i
+Ġrep resent
+om b
+am er
+Ġsignific ant
+og raph
+s u
+Ġc al
+i res
+00 00
+I D
+A M
+Ġsim ply
+Ġlong er
+Ġf ile
+O T
+c he
+S o
+ate g
+or g
+ĠH is
+Ġen er
+Ġd om
+Ġup on
+il i
+": "
+Ġthem selves
+Ġcom ing
+Ġqu ite
+Ġdiff icult
+ĠB ar
+il ities
+re l
+end s
+c ial
+6 4
+Ġwom an
+ra p
+y r
+Ġne cess
+ip s
+Ġte xt
+Ġrequ ire
+Ġmilit ary
+Ġre view
+Ġresp ons
+7 5
+Ġsub ject
+Ġinst ead
+Ġiss ues
+Ġg en
+" ,"
+Ġmin utes
+Ġwe ap
+r ay
+am ed
+t ime
+b l
+H ow
+Ġc ode
+ĠS m
+Ġhig her
+ĠSt e
+r is
+Ġp age
+Ġstud ents
+ĠIn tern
+Ġmet hod
+ĠA ug
+ĠP er
+ĠA g
+Ġpolic y
+ĠS w
+Ġex ec
+Ġac cept
+um e
+rib ut
+Ġword s
+Ġfin al
+Ġchang es
+ĠDem ocr
+Ġfriend s
+Ġres pect
+Ġe p
+Ġcomp an
+iv il
+Ġdam age
+** **
+og le
+viron ment
+Ġne g
+ent al
+Ġa p
+Ġtot al
+iv al
+! "
+l im
+Ġneed s
+Ġag re
+Ġdevelop ment
+Ġa ge
+ip le
+2 1
+Ġresult s
+ĠA f
+S h
+Ġg un
+ĠOb ama
+ro ll
+Ġ @
+Ġright s
+ĠB rit
+Ġrun ning
+Ġwas n
+Ġp ort
+Ġr ate
+Ġpret ty
+Ġtarg et
+Ġsa w
+Ġc irc
+Ġwor ks
+ic ro
+al t
+o ver
+ww w
+Th at
+l ier
+Ġevery one
+ud e
+Ġp ie
+idd le
+ra el
+Ġr ad
+Ġbl ock
+Ġw alk
+T o
+ã ģ
+n es
+ĠA ust
+a ul
+ro te
+ĠS outh
+ess ion
+op h
+Ġshow s
+Ġs ite
+Ġj o
+Ġr isk
+cl us
+l t
+Ġin j
+id ing
+ĠS pe
+Ġch all
+ir m
+Ġ2 2
+itt ing
+st r
+Ġh y
+L E
+ke y
+Ġbe gan
+at ur
+ashing ton
+l am
+ĠD av
+b it
+Ġs ize
+ĠP ar
+3 8
+ourn al
+f ace
+Ġdec ision
+Ġl arg
+Ġj ud
+re ct
+Ġcontin ue
+ĠO ct
+ove red
+ĠI nt
+==== ====
+Ġp arent
+ĠW ill
+Ġeas y
+Ġd rug
+ang er
+Ġs ense
+Ġd i
+id ay
+Ġener gy
+ist ic
+Ġass oci
+ar ter
+ob al
+e ks
+ĠE l
+ur ch
+Ġg irl
+o e
+it le
+Ġ2 8
+ĠC he
+Ġrequ est
+Ġso on
+Ġh ost
+k y
+Ġst ates
+om es
+Ġm aterial
+le x
+Ġmom ent
+Ġan sw
+on se
+Ġes pecially
+Ġn orm
+Ġserv ices
+p ite
+r an
+Ġro le
+4 4
+) :
+Ġc red
+C l
+____ ____
+Ġm at
+Ġl og
+ĠCl inton
+O U
+Ġoff ice
+Ġ2 6
+Ġch arg
+Ġtr ack
+m a
+Ġhe art
+Ġb all
+Ġperson al
+Ġbuild ing
+n a
+s et
+b ody
+ĠBl ack
+Ġincre ase
+itt en
+Ġneed ed
+3 6
+3 2
+= "
+Ġl ost
+Ġbec ame
+Ġgrou ps
+ĠM us
+Ġw rote
+ĠP e
+Ġpro p
+j oy
+Ã ©
+ĠWh ite
+Ġde ad
+. '
+Ġhtt p
+Ġwe bs
+O S
+Ġins ide
+Ġwr ong
+Ġstat ement
+Ġ ...
+y l
+Ġfil m
+Ġmus ic
+Ġsh are
+ific ation
+Ġre lease
+Ġfor ward
+Ġst ay
+Ġcomp ut
+it te
+s er
+Ġorig inal
+Ġc ard
+Ġc and
+Ġd iv
+at ural
+Ġfav or
+O M
+Ġc ases
+us es
+Ġse ction
+Ġle ave
+g ing
+ov ed
+ĠW ashington
+3 9
+ĠG l
+Ġrequ ired
+act ion
+ap an
+o or
+it er
+ĠK ing
+Ġcount ries
+ĠG erman
+ll ing
+Ġ2 7
+3 4
+Ġquest ions
+Ġpr im
+Ġc ell
+Ġsh oot
+Ġany one
+ĠW est
+Ġaff ect
+ep end
+Ġon line
+ĠIs rael
+ĠSept ember
+Ġab ility
+Ġcont ent
+is es
+Ġre ve
+Ġl aun
+Ġind ic
+Ġfor ce
+c ast
+Ġso ld
+av ing
+f l
+Ġso ft
+Ġcompan ies
+ce ed
+Ġart icle
+Ġa ud
+Ġre v
+Ġed uc
+Ġplay ing
+0 5
+Ġhe ld
+ct or
+Ġrele ased
+Ġf ederal
+3 7
+Ġad minist
+Ġinter view
+Ġinst all
+Ġrece ived
+Ġs ource
+u k
+P h
+Ġser ious
+Ġcre ated
+Ġc ause
+Ġim medi
+Ġdef in
+u el
+ĠDep artment
+ct ions
+ĠC our
+ĠN ow
+z e
+it es
+it ution
+Ġl ate
+Ġspe ak
+n ers
+Ġleg al
+ar i
+ĠC or
+Ġwe eks
+Ġmod el
+Ġp red
+Ġex act
+B C
+ĠB y
+IN G
+os ing
+Ġt akes
+Ġreg ard
+Ġopp ortun
+Ġpr ice
+Ġ19 8
+ĠA pr
+f ully
+Ġor d
+Ġproble ms
+ru ction
+h am
+ĠC ount
+le ge
+Ġlead ers
+E T
+le v
+Ġde ep
+olog ical
+es e
+h aps
+ĠS ome
+Ġp ers
+Ġcont ract
+Ġrelations hip
+s p
+ou d
+Ġb ase
+4 8
+m it
+A d
+anc ial
+Ġcons um
+Ġpot ential
+Ġl angu
+re m
+et h
+Ġrel ig
+ress ed
+6 6
+Ġl ink
+Ġl ower
+ay er
+ĠJ une
+Ġf em
+un t
+er c
+ur d
+Ġcont act
+Ġ ill
+Ġm other
+Ġest ab
+h tt
+ĠM arch
+ĠB ro
+ĠCh ina
+Ġ2 9
+Ġs qu
+Ġprov ided
+Ġa verage
+as ons
+Ġ201 1
+Ġex am
+l in
+5 5
+n ed
+Ġper fect
+Ġt ou
+al se
+u x
+Ġbu y
+Ġsh ot
+Ġcol lect
+Ġph ot
+Ġplay ed
+Ġsur pr
+Ġofficial s
+Ġsim ple
+av y
+Ġindust ry
+Ġhand s
+g round
+Ġp ull
+Ġr ound
+Ġus er
+Ġr ange
+u ary
+Ġpriv ate
+op s
+e es
+Ġw ays
+ĠM ich
+Ġve h
+Ġex cept
+Ġter ms
+im um
+pp er
+I ON
+ore s
+ĠDr agon
+ou l
+Ġd en
+Ġperform ance
+Ġb ill
+c il
+4 7
+Ġen vironment
+Ġex c
+ad d
+Ġwor th
+Ġp ict
+Ġch ance
+Ġ201 8
+b or
+Ġspe ed
+ict ion
+Ġal leg
+ĠJ apan
+at ory
+re et
+Ġm atch
+ĠI I
+Ġst ru
+ord er
+Ġst e
+Ġl iving
+Ġst ruct
+in o
+Ġse par
+her n
+Ġresp onse
+Ġen joy
+Ġv ia
+A D
+um ents
+ace book
+Ġmem ber
+ib r
+iz ing
+Ġto ol
+ĠM on
+ĠWh ile
+h ood
+ĠA ng
+ĠD ef
+Ġoff er
+T r
+a ur
+Ġturn ed
+ĠJ uly
+d own
+an ced
+Ġrec ently
+ĠE ar
+Ġc e
+ĠSt ar
+ĠC ong
+rough t
+Ġbl ood
+Ġhop e
+Ġcom ment
+ain t
+Ġar ri
+il es
+Ġpartic ip
+ough t
+ri ption
+0 8
+4 9
+Ġg ave
+Ġse lect
+Ġkill ed
+sy ch
+Ġgo es
+i j
+Ġc oll
+Ġimp act
+at ives
+ĠS er
+0 9
+ĠAug ust
+Ġb oy
+d e
+ĠD es
+Ġf elt
+U S
+Ġexpect ed
+Ġim age
+ĠM ark
+cc ording
+o ice
+E C
+ĠM ag
+en ed
+h old
+ĠP ost
+Ġpre vent
+N o
+Ġinvol ved
+Ġey es
+Ġquick ly
+A t
+un k
+Ġbeh av
+Ġ ur
+Ġl ed
+c ome
+e y
+Ġcand id
+Ġear lier
+Ġfoc us
+et y
+P ro
+led ge
+ix ed
+ill ed
+Ġpop ular
+A P
+Ġset t
+l ight
+Ġvar ious
+in ks
+Ġlevel s
+Ġro ad
+ell ig
+ab les
+he l
+itte e
+ĠG ener
+y pe
+Ġhe ard
+ic les
+Ġm is
+Ġus ers
+ĠS an
+Ġimpro ve
+Ġf ather
+Ġse arch
+The y
+v il
+Ġprof ess
+Ġkn ew
+Ġl oss
+Ġev ents
+6 5
+Ġb illion
+0 7
+0 2
+ĠNew s
+ĠA M
+Ġco ver
+w here
+ens ion
+Ġb ott
+Ġare as
+en ces
+op e
+ĠTw itter
+a el
+Ġget s
+ĠGo ogle
+Ġs n
+i ant
+Ġv ote
+Ġnear ly
+Ġinclud ed
+Ġrec ogn
+z z
+m m
+al ed
+Ġhappen ed
+0 4
+Ġh ot
+Ġwho se
+Ġc ivil
+Ġsu ff
+o es
+it iz
+ĠSy ri
+Ġresp ond
+Ġh on
+Ġfeat ures
+Ġeconom ic
+ĠApr il
+r im
+Ġtechn ology
+Ġo ption
+ag ing
+Ġpur ch
+R e
+Ġl at
+ch ie
+is l
+Ġrec omm
+u f
+Ġtr aining
+Ġeffect s
+Ġf ast
+Ġ201 0
+Ġocc ur
+Ġwebs ite
+Ġem ail
+Ġs ens
+e ch
+Ġo il
+Ġinf lu
+Ġcurrent ly
+ĠS ch
+ĠAd d
+Ġgo al
+Ġsc ient
+Ġcon v
+1 00
+em y
+Ġdec ided
+Ġtra vel
+Ġm ention
+L L
+0 3
+Ġe lection
+Ġph one
+Ġlook s
+Ġsit uation
+Ġc y
+Ġh or
+b ed
+ĠCour t
+a ily
+av es
+Ġqu ality
+ĠCom p
+w ise
+Ġt able
+Ġst aff
+ĠW ind
+et t
+Ġtri ed
+ide red
+Ġadd ition
+Ġb ox
+Ġl ack
+ar ily
+Ġw ide
+Ġm id
+Ġbo ard
+ys is
+Ġant i
+h a
+Ġd ig
+en ing
+Ġd ro
+C on
+6 8
+Ġsl ow
+b ased
+se qu
+Ġp ath
+E x
+ak er
+Ġwork ed
+Ġp en
+Ġeng ine
+Ġlook ed
+ĠSu per
+ĠS erv
+Ġvict im
+U n
+Ġproper ty
+Ġint rodu
+Ġexec ut
+ĠP M
+L e
+Ġcol or
+ĠM ore
+Ġ6 0
+Ġnet work
+Ġd ate
+c ul
+id ge
+Ġext ra
+3 1
+Ġs le
+6 7
+Ġw ond
+Ġreport s
+j ust
+ĠAust ral
+Ġcap ital
+Ġen s
+Ġcomm and
+Ġallow ed
+Ġpre p
+Ġca pt
+h ib
+Ġnum bers
+ch an
+Ġf air
+m p
+om s
+Ġre ach
+W ith
+t ain
+Ġbro ad
+Ġcou ple
+ec ause
+ly ing
+ĠF eb
+Ġsc reen
+Ġl ives
+Ġpri or
+ĠCong ress
+A r
+Ġappro ach
+Ġe mer
+ar ies
+ĠD is
+s erv
+ĠN e
+Ġbu ilt
+c ies
+Ġre pe
+Ġrul es
+for ce
+ĠP al
+Ġfin ancial
+Ġcons idered
+ĠCh ar
+n ces
+ĠI S
+Ġb rought
+Ġb i
+i ers
+ĠS im
+O P
+Ġproduct s
+Ġvis it
+Ġdoc ument
+Ġcon duct
+Ġcomplete ly
+in ing
+ĠCal if
+ib ly
+Ġwr itten
+ĠT V
+em ents
+Ġd raw
+O ne
+Ġpub lished
+Ġsec ret
+r ain
+he t
+ĠF acebook
+ond ay
+ĠU p
+Ġsex ual
+Ġth ous
+ĠP at
+Ġ ess
+Ġstand ard
+Ġar m
+g es
+ect ion
+Ġf ell
+Ġfore ign
+an i
+ĠFr iday
+Ġreg ular
+in ary
+Ġincre ased
+Ġus ually
+Ġdem on
+Ġd ark
+Ġadd itional
+ro l
+ĠO f
+Ġprodu ction
+! !
+und red
+Ġintern ational
+id ents
+ĠF ree
+rou p
+Ġr ace
+Ġm ach
+Ġh uge
+A ll
+le ar
+ove mber
+Ġto wn
+Ġatt ention
+ĠO ff
+y ond
+ĠThe n
+f ield
+Ġter ror
+ra z
+ĠB o
+Ġmeet ing
+ĠP ark
+Ġar rest
+Ġf ear
+Ġa w
+ĠV al
+or ing
+' ,
+Ġext reme
+ar r
+Ġwork ers
+A fter
+Ġ3 1
+n et
+am ent
+Ġdirect ly
+Ġpop ulation
+ub e
+ĠOct ober
+ĠI N
+ĠJan uary
+5 9
+ĠDav id
+Ġc ross
+ce mber
+ĠF irst
+Ġmess age
+ir it
+Ġn ation
+Ġp oll
+is ions
+Ġansw er
+n y
+is ode
+Ġcar ry
+ĠRuss ia
+Ġhe ar
+eng th
+ro y
+Ġn atural
+in ally
+Ġdo g
+m itted
+Ġtr ade
+Ġsub st
+Ġmult iple
+ĠAf ric
+Ġf ans
+Ġs ort
+Ġgl obal
+ic ation
+ĠW ed
+ar a
+Ġa chie
+Ġlangu age
+ve y
+Ġt al
+Ġnecess ary
+Ġdet ails
+Ġs en
+ĠS und
+ĠRe g
+ĠR ec
+0 6
+Ġs il
+ress ive
+Ġmed ical
+un ch
+orn ia
+Ġu nd
+f ort
+oc ks
+ĠM onday
+ues day
+c raft
+7 7
+ur t
+Ġ ver
+ĠH ill
+Ġrece ive
+Ġmor ning
+es tern
+Ġb ank
+Ġs at
+ir th
+ĠH igh
+Ġdev ice
+ĠTH E
+ĠCent er
+Ġsaf e
+Ġp le
+ĠCanad a
+Ġsystem s
+Ġass ist
+Ġsur v
+Ġb attle
+ĠS oc
+vert is
+S he
+Ġp aper
+Ġgrow th
+Ġc ast
+S c
+Ġpl ans
+ll ed
+Ġpart s
+Ġw all
+Ġmove ment
+Ġpract ice
+im ately
+Ġdis play
+Ġsomet imes
+om p
+ĠP aul
+ĠY es
+k ing
+5 8
+o ly
+Ġs on
+Ġav oid
+ok es
+ĠJ ew
+Ġto wards
+as c
+Ġ //
+ĠK ore
+Ġtalk ing
+Ġcor rect
+Ġsp ent
+ic ks
+i able
+e ared
+Ġter m
+Ġwant s
+om ing
+Ġ ut
+Ġdou b
+Ġfor ces
+Ġp lease
+6 9
+ĠN ovember
+at form
+ond on
+Ġon es
+Ġimmedi ately
+ĠRuss ian
+ĠM et
+Ġde g
+Ġparent s
+C H
+ĠAmeric ans
+al y
+ĠM od
+Ġsh own
+Ġcond itions
+Ġst uff
+Ġre b
+ĠY our
+Ġinclud es
+n own
+ĠS am
+Ġexper ien
+m ission
+ĠE ven
+augh t
+Ġannoun ced
+ĠRepublic an
+Ġdeter min
+Ġdescrib ed
+ĠCount y
+( )
+Ġdo or
+Ġchang ed
+Ġne igh
+ĠH ere
+Ġcle an
+Ġp an
+ĠDe cember
+ĠEurope an
+ir ing
+ap ter
+Ġcl ub
+ĠT uesday
+Ġp aid
+ĠN et
+Ġattack s
+Ġcharact ers
+Ġal one
+Ġdirect or
+d om
+Ġ3 5
+Ġl oad
+Ġr out
+ĠCalif ornia
+Ġfin ally
+Ġr ac
+Ġcont r
+Ġexact ly
+res h
+p ri
+ĠIs lam
+Ġn ature
+Ġcare er
+Ġlat est
+Ġcon vers
+ĠS l
+p ose
+ci ent
+ĠIn c
+iv ity
+8 8
+ĠA tt
+ĠM or
+nes day
+Ġwe ight
+k en
+Ġnot e
+Ġteam s
+Ġ \
+air s
+ĠG reen
+Ġh undred
+on ent
+Ġstre ng
+Ġcons ist
+ic ated
+Ġreg ul
+Ġl ic
+ast ic
+Ġt en
+urs day
+ellig ence
+ous ly
+ĠU K
+B I
+Ġcost s
+Ġind epend
+ĠA P
+Ġnorm al
+Ġh om
+Ġob vious
+Ġs we
+Ġst ar
+Ġread y
+ac her
+Ġimp lement
+g est
+Ġs ong
+ĠG et
+ĠL ab
+Ġinterest ing
+us ing
+Ġg iving
+ĠSund ay
+Ġet c
+Ġm iddle
+Ġrem ember
+r ight
+os ition
+ut ions
+Ġm ax
+4 6
+Ġyour self
+Ġdem and
+Ġtreat ment
+Ġd anger
+ĠC ons
+Ġgu y
+ĠBrit ish
+Ġphys ical
+Ġrel ated
+Ġrem ain
+Ġcould n
+Ġref er
+Ġc itiz
+b ox
+EN T
+bo ard
+Ġin n
+I G
+er o
+ĠSt reet
+osp ital
+ren ch
+cher s
+Ġst ra
+O L
+ag er
+ĠA N
+Ġeas ily
+I A
+en ge
+in y
+Ġcl os
+ock ed
+Ġus es
+ĠC oun
+I m
+u ild
+? ?
+m ore
+Ġan g
+Ġwr ite
+ol ute
+5 7
+Ġlead er
+Ġread ing
+< /
+Ġaut om
+est s
+4 3
+Ġleg isl
+ĠG old
+Ġdesign ed
+ĠS T
+ĠLe g
+a res
+Ġbe aut
+ĠT ex
+Ġappear s
+Ġstru gg
+ĠR om
+Ġ 00
+Ġcho ice
+Ġparticular ly
+ĠF rom
+op er
+ĠL ondon
+ann ed
+Ġallow s
+ob ile
+Ġdiffere nce
+âĢ ¢
+ĠV iew
+ĠWed nesday
+Ġal though
+Ġrel ative
+Ġapplic ation
+ate ver
+Ġare n
+Ġmy self
+Ġim ag
+Ġdis e
+Ġsoc iety
+Ġfre qu
+ĠEng lish
+Ġpo or
+ĠD ay
+Ġwrit ing
+Ġse ven
+Ġstart ing
+Ġb ud
+Ġpr int
+ĠTr ans
+uf act
+ĠSt ud
+n ew
+Ġcr im
+Ġg ives
+Ġco ol
+a e
+i ance
+ĠGener al
+Ġthink ing
+Ġsa ve
+Ġlim ited
+ĠPart y
+Ġmean ing
+p en
+ow ers
+ĠJ ack
+E M
+Ġn ice
+ru pt
+Ġg as
+Ġe ight
+Ġfe et
+Ġeff ort
+Ġ ign
+ic it
+B l
+co in
+Ġop in
+Ġbr ain
+Wh ile
+he st
+ĠTh ursday
+Ġwould n
+augh ter
+Ġtou ch
+le ments
+Ġstud ies
+Ġcent er
+c ont
+or ge
+Ġcomput er
+Ġinvestig ation
+P l
+or ks
+Ġ200 8
+Ġincre asing
+Ġst ore
+Ġcom ments
+Ġb al
+m en
+Ġdo ll
+Ġl iber
+Ġw ife
+Ġlaw s
+atur day
+it ness
+Ġmod ern
+ĠS k
+Ġadminist ration
+Ġopportun ity
+Ġs al
+Ġpower ful
+M y
+Ġclaim s
+ĠEar th
+ord s
+Ġt itle
+Ġes c
+n ame
+N ot
+om en
+Ġbe yond
+Ġc amer
+Ġse ll
+it ute
+ear ch
+Ġapp l
+im ent
+4 2
+ĠAr t
+Ġun f
+Ġviol ence
+ur g
+ĠE ast
+Ġcomp ared
+Ġopt ions
+Ġthrough out
+Ġv s
+ig r
+. [
+ac hes
+7 8
+Ġfil es
+F L
+E L
+ar ian
+ĠJ ames
+ĠA ir
+an ch
+Ġdet ail
+Ġpie ce
+P S
+Ġn amed
+Ġeduc ation
+Ġdri ve
+Ġitem s
+Ġstud ent
+ic ed
+: :
+ic o
+Ġth row
+Ġsc ene
+Ġcomple x
+Ġ200 9
+Ġpre c
+ĠB re
+7 9
+Ġcon cept
+Ġstat us
+am ing
+Ġd ied
+Ġknow ledge
+Ġbegin ning
+O D
+ru ary
+Ġcertain ly
+Ġgu ys
+Ġsl ight
+in n
+ound s
+Ġf ine
+Ġf at
+ic ations
+Ġper haps
+ĠA nt
+Ġinc ome
+Ġhtt ps
+Ġmajor ity
+port s
+st on
+Ġgreat er
+Ġfe ed
+ent ially
+Ġsaf ety
+Ġun ique
+and om
+Ġg one
+Ġshow ed
+Ġhist or
+Ġcoun ter
+i us
+id a
+Ġlead ing
+i pe
+Ġs end
+ĠDon ald
+er ve
+Ġdef ense
+ines e
+Ġy es
+ĠF ire
+ĠMus lim
+ra q
+Ġcontin ued
+os h
+Ġprov ides
+Ġpr ison
+ĠP re
+Ġhapp y
+Ġeconom y
+Ġtr ust
+ag s
+ĠG ame
+Ġweap ons
+um an
+ĠC le
+it ation
+Ġanal ysis
+ĠT imes
+Ġsc ience
+- >
+Ġfig ure
+Ġdis app
+ent y
+Ġsoft ware
+Ġu lt
+Ġoffic ers
+N ew
+I s
+Ġrem ains
+ĠInd ia
+Ġp sych
+ri ef
+Ġc at
+es c
+Ġob serv
+Ġst age
+ĠD ark
+Ġent er
+ch ange
+Ġpass ed
+Ġdes pite
+ĠO ut
+Ġmov ie
+r s
+Ġv oice
+m ine
+ĠPl ay
+Ġto ward
+ĠT er
+Ġreg ion
+Ġval ues
+or ters
+Ġm ount
+Ġoffic er
+ĠO ther
+b an
+Ġh ous
+w ood
+ro om
+I V
+ĠS un
+se e
+ĠO ver
+ro g
+9 0
+Ġl ay
+ĠT ur
+a wn
+Ġpress ure
+ĠS ub
+Ġbook s
+ed om
+ĠS and
+A A
+ag o
+Ġre asons
+f ord
+Ġactiv ity
+U T
+N ow
+ĠSen ate
+ce ll
+n ight
+Ġcall s
+in ter
+Ġlet ter
+ĠR ob
+ĠJ e
+Ġcho ose
+ĠL aw
+G et
+B e
+Ġro b
+Ġtyp es
+Ġpl atform
+Ġqu arter
+R A
+ĠT ime
+Ġmay be
+ĠC r
+9 5
+p re
+Ġmov ing
+Ġl if
+Ġgo ld
+Ġs om
+Ġpat ients
+Ġtr uth
+ĠK e
+ur ance
+ant ly
+m ar
+Ġchar ge
+ĠG reat
+Ġce le
+---------------- ----------------
+Ġro ck
+ro id
+an cy
+Ġcred it
+a ud
+B y
+ĠE very
+Ġmov ed
+ing er
+rib ution
+Ġn ames
+Ġstra ight
+ĠHe alth
+ĠW ell
+Ġfe ature
+Ġr ule
+Ġsc he
+in ated
+ĠMich ael
+ber g
+4 1
+il ed
+b and
+Ġcl ick
+ĠAng el
+on ents
+Â Ń
+ĠI raq
+ĠS aturday
+Ġa ware
+p art
+Ġpat tern
+O W
+ĠL et
+Ġgr ad
+ign ed
+Ġassoci ated
+Ġst yle
+n o
+i ation
+a ith
+il ies
+Ġst ories
+ur ation
+Ġindividual s
+ĠâĢ ¦
+m iss
+ĠAss oci
+ish ing
+ab y
+Ġsum mer
+ĠB en
+Ġ3 2
+Ġar ch
+ut y
+ĠTex as
+h ol
+Ġfull y
+Ġm ill
+Ġfollow ed
+ĠB ill
+ĠInd ian
+ĠSec ret
+ĠB el
+ĠFeb ruary
+Ġjob s
+Ġseem ed
+ĠGo vern
+i pped
+Ġreal ity
+Ġl ines
+Ġp ark
+Ġmeas ure
+ĠO ur
+I M
+Ġbro ther
+Ġgrow ing
+Ġb an
+Ġest im
+Ġc ry
+ĠS chool
+Ġme chan
+ĠO F
+ĠWind ows
+Ġr ates
+ĠO h
+Ġpos itive
+Ġcult ure
+ist ics
+ic a
+Ġh ar
+y a
+ite ly
+i pp
+Ġm ap
+en cies
+ĠWill iam
+I I
+ak ers
+5 6
+ĠM art
+ĠR em
+Ġal tern
+it ude
+Ġco ach
+row d
+D on
+Ġk ids
+Ġj ournal
+Ġcor por
+Ġf alse
+Ġwe b
+Ġsle ep
+Ġcont ain
+Ġst o
+Ġb ed
+iver se
+ĠR ich
+ĠCh inese
+Ġp un
+Ġme ant
+k nown
+Ġnot ice
+Ġfavor ite
+a ven
+Ġcond ition
+Ġpur pose
+) )
+Ġorgan ization
+Ġchall eng
+Ġman ufact
+Ġsus p
+ĠA c
+Ġcrit ic
+un es
+uc lear
+Ġm er
+vent ion
+Ġ8 0
+Ġm ist
+ĠU s
+ĠT or
+htt p
+ol f
+Ġlarg er
+Ġadv ant
+Ġrese ar
+Ġact ions
+m l
+Ġke pt
+Ġa im
+, '
+c ol
+Ġbenef its
+if ying
+Ġact ual
+ĠIntern ational
+Ġveh icle
+Ġch ief
+Ġeff orts
+ĠLe ague
+ĠM ost
+Ġwa it
+Ġad ult
+Ġover all
+Ġspe ech
+Ġhigh ly
+Ġfem ale
+Ġer ror
+Ġeffect ive
+5 4
+Ġenc our
+w ell
+Ġfail ed
+Ġcons erv
+Ġprogram s
+Ġt rou
+Ġa head
+5 00
+vertis ement
+I P
+ĠF ound
+p ir
+Ġ %
+Ġcr ime
+and er
+Ġloc ation
+ĠI ran
+Ġbehav ior
+az ing
+Ġr are
+Ġem b
+Ġca used
+Ġsh ip
+Ġact ive
+Ġcont ribut
+Ġg reen
+Ġac qu
+Ġref lect
+ven ue
+Ġf irm
+Ġb irth
+] .
+Ġclear ly
+Ġem ot
+Ġag ency
+ri age
+Ġmem ory
+9 8
+S A
+ĠSe e
+ac ing
+C C
+Ġbig gest
+Ġr ap
+Ġbas ic
+Ġb and
+e at
+Ġsus pect
+ĠM ac
+Ġ9 0
+m ark
+ist an
+Ġsp read
+am s
+k i
+as y
+ra v
+ĠR ober
+Ġdemon str
+r ated
+Ġabs olute
+Ġpl aces
+Ġim pl
+ibr ary
+Ġc ards
+Ġdest roy
+Ġv irt
+ve re
+Ġapp eared
+y an
+p oint
+Ġbe g
+Ġtem per
+s pe
+ant ed
+ear s
+ĠD irect
+Ġl ength
+Ġbl og
+am b
+Ġint eg
+Ġres ources
+ac c
+if ul
+Ġsp ot
+Ġfor ced
+Ġthous ands
+ĠMin ister
+Ġqu al
+ĠF rench
+at ically
+Ġgener ally
+Ġdr ink
+Ġth us
+I L
+od es
+Ġappro pri
+ĠRe ad
+Ġwh om
+Ġey e
+Ġcol lege
+Ġ4 5
+ire ction
+Ġens ure
+Ġapp arent
+id ers
+Ġrelig ious
+Ġmin or
+ol ic
+Ġt ro
+ĠWh y
+rib ute
+m et
+Ġprim ary
+Ġdevelop ed
+Ġpe ace
+Ġsk in
+st e
+av a
+Ġbl ue
+Ġfam ilies
+Ġ ir
+Ġapp ly
+Ġin form
+ĠSm ith
+C T
+i i
+Ġlim it
+Ġres ist
+........ ........
+um n
+Ġconf lic
+Ġtw e
+ud d
+ĠT om
+Ġl iter
+qu e
+b on
+Ġha ir
+Ġevent ually
+Ġp us
+Ġhelp ed
+Ġag g
+or ney
+ĠApp le
+Ġf it
+ĠS ur
+Ġpre m
+Ġs ales
+Ġsecond s
+Ġstreng th
+Ġfeel ing
+¿ ½
+Ġt our
+Ġknow s
+o om
+Ġex erc
+Ġsom ew
+ï ¿½
+> >
+Ġsp okes
+Ġide as
+Ġreg ist
+so ft
+ĠD el
+ĠP C
+Ġpro pos
+Ġlaun ch
+Ġbott om
+T H
+ĠP lease
+v est
+it z
+ĠIn ter
+Ġsc ript
+Ġr at
+ar ning
+Ġ il
+ĠJ er
+ĠA re
+Ġwh atever
+ok en
+ci ence
+Ġmod e
+Ġag ree
+Ġs ources
+Ġinit ial
+Ġrest rict
+Ġwond er
+us ion
+## ##
+ĠS il
+vil le
+Ġb urn
+t w
+as ion
+ĠÂ £
+Ġn or
+u ing
+Ġre ached
+Ġs un
+Ġc ateg
+ig ration
+Ġc ook
+Ġprom ot
+Ġm ale
+Ġcl imate
+Ġf ix
+Ġalleg ed
+U R
+all ed
+Ġim ages
+C ont
+ot a
+Ġschool s
+i os
+Ġd rop
+Ġst ream
+ĠM o
+Ġprevious ly
+al ing
+Ġp et
+Ġdou ble
+Ġ( @
+ann el
+Ġdef ault
+t ies
+Ġr ank
+ĠD ec
+ĠCoun cil
+Ġweap on
+Ġst ock
+Ġanal y
+ĠSt r
+Ġpict ure
+ĠPol ice
+f erence
+Ġcent ury
+Ġcitiz ens
+Ġon to
+Ġexp and
+Ġhe ro
+ĠS ol
+Ġw ild
+Ġupd ate
+Ġcustom ers
+r ont
+d ef
+Ġl ik
+Ġcrim inal
+ĠChrist ian
+S P
+7 6
+Ġle aving
+Ġother wise
+ĠD ist
+Ġbas is
+5 2
+5 3
+ic ip
+ĠB er
+Ġrecomm end
+Ġfl oor
+Ġc rowd
+ol es
+Ġ7 0
+Ġcent ral
+ĠE v
+Ġd ream
+Ġdown load
+Ġconf ir
+ĠTh om
+Ġwind ow
+Ġhapp ens
+Ġun it
+Ġt end
+Ġs pl
+Ġbec omes
+Ġfight ing
+Ġpred ict
+ĠP ress
+ĠP ower
+Ġhe avy
+ak ed
+Ġf an
+or ter
+ate gy
+B A
+iz es
+Ġsp end
+H ere
+Ġ200 7
+Ġad op
+ĠH am
+Ġfoot ball
+ĠP ort
+od ay
+5 1
+amp ions
+Ġtrans fer
+h t
+Ġ3 8
+ter m
+ac ity
+Ġb ur
+] ,
+tern al
+r ig
+b ut
+Ġthere fore
+ĠB ecause
+res p
+re y
+Ġm ission
+S ome
+Ġnot ed
+Ġass um
+Ġdise ase
+Ġed it
+Ġprog ress
+r d
+ĠB rown
+oc al
+Ġadd ing
+Ġra ised
+ĠAn y
+Ġt ick
+Ġsee ing
+ĠPe ople
+Ġagre ement
+Ġser ver
+Ġw at
+Ġdeb ate
+Ġsupp osed
+il ing
+Ġlarg est
+Ġsuccess ful
+ĠP ri
+ĠDemocr atic
+Ġj ump
+ĠSyri a
+Ġown ers
+Ġoff ers
+Ġshoot ing
+Ġeff ic
+se y
+Ġha ven
+ver se
+te red
+ĠL ight
+im al
+ĠB ig
+Ġdef end
+Ġbe at
+Ġrecord s
+% )
+Ġsc en
+Ġemploy ees
+Ġdev ices
+he m
+Ġcom mer
+ĠM ex
+Ġbenef it
+ĠPro f
+Ġil leg
+Ġsur face
+ĠAl so
+Ġh arm
+ing ly
+w ide
+ĠA lex
+Ġsh ut
+ĠC ur
+Ġl ose
+p m
+Ġchall enge
+se mb
+Ġst ation
+Ġint elligence
+Ġacc ur
+ĠFl or
+Ġrequ ires
+ĠM al
+b um
+Ġh ospital
+Ġsp irit
+Ġoff ered
+Ġprodu ce
+ĠComm un
+Ġcreat ing
+Ġcr is
+s pect
+Ġend ed
+Ġd aily
+Ġvot ers
+land s
+i as
+i h
+on a
+Ġsm art
+ĠOff ice
+ĠL ord
+ri al
+ĠIntern et
+Ġcirc um
+Ġextreme ly
+' .
+Ġopin ion
+ĠM il
+Ġg ain
+B S
+ĠF in
+y p
+Ġuse ful
+Ġbud get
+Ġcom fort
+is f
+Ġback ground
+el ine
+Ġep isode
+Ġen emy
+Ġtri al
+Ġestab lish
+d ate
+ĠC ap
+Ġcontin ues
+Ġshow ing
+ĠUn ion
+w ith
+Ġpost ed
+ĠSy stem
+Ġe at
+ri an
+Ġr ise
+ĠGerman y
+il s
+Ġsign ed
+Ġv ill
+Ġgr and
+m or
+ĠEng land
+Ġproject s
+um ber
+Ġconf erence
+z a
+Ġrespons ible
+ĠAr ab
+Ġlearn ed
+âĢĶ âĢĶ
+i pping
+ĠGe orge
+O C
+Ġreturn ed
+ĠAustral ia
+Ġb rief
+Q u
+Ġbr and
+ill ing
+ab led
+Ġhig hest
+Ġtr ain
+ĠComm ission
+wh ile
+Ġn om
+cept ion
+Ġm ut
+ĠBl ue
+Ġinc ident
+v ant
+8 6
+ĠI D
+Ġn uclear
+7 4
+ĠL ike
+ĠR E
+ĠM icro
+l i
+m ail
+Ġcharg es
+8 9
+Ġad just
+ad o
+Ġear th
+N A
+Ġpr ices
+P A
+Ġd raft
+Ġrun s
+Ġcandid ate
+ens es
+Ġmanag ement
+ĠPh il
+ĠM iss
+Ġte ach
+g ram
+Ġunderstand ing
+a it
+ic ago
+A dd
+ĠE p
+sec ut
+Ġsepar ate
+Ġinst ance
+Ġe th
+Ġun less
+**** ****
+ĠF ore
+in ate
+Ġoper ations
+S p
+Ġf aith
+g ar
+ĠCh urch
+ron ic
+Ġconf ig
+os ure
+Ġactiv ities
+Ġtrad itional
+Ġ3 6
+Ġd irection
+Ġmach ine
+Ġsur round
+Ġp ush
+un ction
+ĠE U
+Ġeas ier
+Ġarg ument
+G B
+Ġm icro
+Ġsp ending
+iz ations
+Ġthe ory
+ad ow
+Ġcall ing
+ĠL ast
+Ġd er
+Ġinflu ence
+Ġcomm it
+Ġph oto
+Ġun c
+ist ry
+g n
+ast e
+ack s
+Ġdis p
+ad y
+d o
+ĠG ood
+Ġ `
+Ġw ish
+Ġreve aled
+Âł Âł
+l ig
+Ġen force
+ĠComm ittee
+Ġche m
+Ġmil es
+Ġinterest ed
+Ġsol ution
+ic y
+in ct
+Ġ- >
+ĠD et
+Ġrem oved
+Ġcomp ar
+e ah
+Ġpl ant
+ĠS ince
+Ġachie ve
+Ġadvant age
+Ġslight ly
+b ing
+Ġpl aced
+u nder
+201 5
+ĠM ad
+Ġt im
+os es
+Ġc ru
+ĠR ock
+Ġmost ly
+Ġneg ative
+Ġset ting
+Ġprodu ced
+Ġm ur
+Ġconnect ion
+ĠM er
+Ġdri ver
+Ġexecut ive
+Ġass ault
+Ġb orn
+ĠV er
+t ained
+Ġstruct ure
+Ġredu ce
+Ġdec ades
+Ġd ed
+u ke
+ĠM any
+idd en
+Ġle ague
+S e
+Ġjo in
+Ġdis co
+Ġd ie
+c ks
+act ions
+Ġass ess
+ag n
+Ġgo als
+our s
+I R
+Ġsen ior
+ill er
+m od
+ip ment
+oc ol
+u y
+ĠQ ue
+Ġpart ies
+ir gin
+Ġle arning
+it able
+Ġstre et
+Ġcamer a
+A pp
+Ġsk ills
+b re
+c ious
+Ġcele br
+ĠFr anc
+Ġexist ing
+Ġwill ing
+l or
+Ġ id
+ĠSp ace
+Ġcrit ical
+ĠL a
+ortun ately
+Ġser ve
+Ġc old
+Ġspec ies
+T S
+Ġanim als
+ĠB ay
+Ġold er
+ĠU nder
+est ic
+ĠT re
+Ġte acher
+Ġpre fer
+v is
+Ġth read
+ĠM att
+Ġmanag er
+ãĥ »
+Ġprofess ional
+ĠV ol
+Ġnot es
+The se
+ul a
+Ġf resh
+ent ed
+u zz
+ed y
+clus ion
+ĠR el
+Ġdoub t
+E O
+Ġopen ed
+ĠB it
+Ad vertisement
+Ġgu ess
+ĠU N
+Ġse qu
+Ġexpl ain
+ott en
+Ġatt ract
+ak s
+Ġstr ing
+Ġcont ext
+oss ible
+ĠRepublic ans
+Ġsol id
+Ġc ities
+Ġask ing
+Ġr andom
+u ps
+ur ies
+ar ant
+dd en
+g l
+ĠFlor ida
+Ġdep end
+ĠSc ott
+Ġ3 3
+Ġi T
+ic on
+Ġmention ed
+Ġ2 000
+Ġclaim ed
+Ġdefin itely
+ul f
+Ġc ore
+Ġopen ing
+ĠCon st
+wh ich
+ĠT ra
+A G
+7 2
+Ġbelie ved
+ad a
+Ġ4 8
+ĠSec urity
+yr ight
+ĠP et
+ĠL ou
+Ġhold ing
+======== ========
+Ġ ice
+Ġb row
+Ġauthor ities
+h ost
+w ord
+Ġsc ore
+ĠD iv
+Ġcell s
+Ġtrans l
+Ġneigh bor
+Ġrem ove
+u ct
+Ġdist rict
+ĠA ccording
+Ġwor se
+Ġconcern s
+Ġpresident ial
+Ġpolic ies
+ĠH all
+7 3
+Ġh us
+A Y
+Ġ200 6
+ĠJ ud
+Ġindepend ent
+ĠJust ice
+ili ar
+pr int
+igh ter
+Ġprotect ion
+z en
+Ġsu dden
+h ouse
+ĠJ es
+P R
+ĠIn f
+Ġb ul
+Ġ _
+ĠServ ice
+ĠP R
+Ġstr ategy
+ff ect
+Ġgirl s
+Ġmiss ing
+oy al
+ĠTe am
+ul ated
+Ġd at
+Ġpolit ics
+ab or
+A ccording
+Ġspe ll
+Ġg raph
+ort hern
+T C
+A b
+Ġlab or
+is her
+Ġk ick
+ĠiT unes
+Ġstep s
+pos es
+Ġsmall er
+E n
+ber t
+Ġro ll
+Ġresear chers
+Ġcl osed
+Ġtrans port
+Ġlaw y
+________ ________
+ĠCh icago
+Ġas pect
+Ġn one
+Ġmar riage
+9 6
+Ġe lements
+ĠF re
+ĠS al
+Ġd ram
+F C
+t op
+e qu
+Ġhe aring
+Ġsupport ed
+Ġtest ing
+co hol
+Ġmass ive
+Ġst ick
+Ġgu ard
+is co
+ph one
+F rom
+How ever
+Ġb order
+Ġcop y
+ograph y
+l ist
+7 1
+Ġown er
+cl ass
+ru it
+r ate
+ĠO nce
+Ġdig ital
+Ġt ask
+ER S
+Ġinc red
+t es
++ +
+ĠFr ance
+Ġb reat
+ow l
+Ġiss ued
+ĠW estern
+Ġdet ect
+Ġpart ners
+Ġsh ared
+ĠC all
+Ġcan cer
+ac he
+rib e
+Ġexpl ained
+Ġhe at
+{ "
+Ġinvest ment
+ĠB ook
+Ġw ood
+Ġtool s
+ĠAl though
+Ġbelie f
+Ġcris is
+Ġg e
+ĠM P
+Ġoper ation
+ty pe
+~ ~
+g a
+Ġcont ains
+ant a
+Ġexp ress
+ĠG roup
+ĠJ ournal
+k a
+Ġam b
+ĠUS A
+Ġfind ing
+Ġfund ing
+h ow
+Ġestab lished
+ide os
+Ġdeg ree
+Ġdanger ous
+ang ing
+Ġfre edom
+pp ort
+out hern
+Ġch urch
+Ġc atch
+ĠTw o
+Ġpres ence
+ĠGu ard
+U p
+Ġauthor ity
+ĠPro ject
+Ġbut ton
+Ġcon sequ
+Ġval id
+Ġwe ak
+Ġstart s
+Ġref erence
+ĠM em
+" )
+U N
+or age
+ĠO pen
+Ġcol lection
+y m
+g ency
+Ġbeaut iful
+ro s
+Ġtell s
+Ġwa iting
+n el
+Ġprov iding
+ĠDemocr ats
+Ġd aughter
+Ġm aster
+Ġpur poses
+ĠJapan ese
+Ġequ al
+Ġturn s
+Ġdoc uments
+Ġwatch ing
+R es
+Ġr an
+201 4
+Ġre ject
+ĠKore a
+Ġvictim s
+Le vel
+ere nces
+Ġw itness
+Ġ3 4
+Ġre form
+com ing
+Ġocc up
+Ġc aught
+Ġtra ffic
+ad ing
+Ġmod els
+ar io
+Ġserv ed
+Ġb atter
+u ate
+ĠSecret ary
+Ġagre ed
+Ġtr uly
+yn am
+ĠR et
+Ġun its
+ĠRes earch
+h and
+az ine
+ĠM ike
+Ġvar iety
+ot al
+Ġam azing
+Ġconfir med
+Ġentire ly
+Ġpurch ase
+Ġe lement
+Ġc ash
+Ġdeter mine
+D e
+Ġc ars
+ĠW all
+â ĸ
+Ġview s
+Ġdrug s
+Ġdep artment
+ĠSt ep
+u it
+Ġ3 9
+as ure
+ĠCl ass
+Ġc overed
+ĠB ank
+Ġme re
+u ana
+Ġmult i
+Ġm ix
+Ġun like
+lev ision
+Ġsto pped
+Ġs em
+ĠG al
+ul es
+Ġwe l
+ĠJohn son
+l a
+Ġsk ill
+Ġbec oming
+ri e
+Ġappropri ate
+f e
+ell ow
+ĠPro t
+ul ate
+oc ation
+Ġweek end
+od ies
+Ġsit es
+Ġanim al
+ĠT im
+Ġsc ale
+Ġcharg ed
+Ġinst ruct
+ill a
+Ġmethod s
+Ġc ert
+Ġjud ge
+ĠH el
+Ġdoll ars
+Ġstand ing
+ĠS qu
+Ġdeb t
+l iam
+Ġdri ving
+ĠS um
+ĠEd ition
+Ġal bum
+and on
+I F
+ĠU k
+6 3
+ad er
+Ġcommer cial
+es h
+ĠGovern ment
+Ġdisc overed
+Ġout put
+ĠHill ary
+ĠCar ol
+Ġ200 5
+Ġab use
+anc ing
+Ġsw itch
+Ġann ual
+T w
+Ġst ated
+ag ement
+in ner
+Ġdem ocr
+Ġres idents
+Ġallow ing
+Ġfact ors
+od d
+Ġf uck
+em ies
+Ġoccur red
+ot i
+Ġn orth
+ĠP ublic
+Ġinj ury
+Ġins urance
+C L
+oll y
+ã Ģ
+Ġrepe ated
+Ġar ms
+ang ed
+Ġconst ruction
+Ġf le
+P U
+ic ians
+Ġfor ms
+ĠMc C
+ant ic
+Ġm ental
+p ire
+Ġequ ipment
+Ġf ant
+Ġdiscuss ion
+Ġregard ing
+k in
+ar p
+Ġch air
+og ue
+Ġpro ceed
+ĠI d
+O ur
+Ġmur der
+M an
+Ġ4 9
+as p
+Ġsupp ly
+Ġin put
+Ġwe alth
+liam ent
+Ġpro ced
+or ial
+ĠSt at
+ĠN FL
+hen s
+ĠInst itute
+Ġput ting
+ourn ament
+et ic
+Ġloc ated
+Ġk id
+er ia
+r un
+Ġpr inc
+Ġ !
+go ing
+ĠB et
+Ġcl ot
+Ġtell ing
+Ġprop osed
+i ot
+or ry
+Ġfund s
+g ment
+ĠL ife
+Ġb aby
+ĠB ack
+Ġsp oke
+Im age
+Ġear n
+ĠA T
+g u
+Ġex change
+ĠL in
+ov ing
+Ġp air
+M ore
+az on
+Ġarrest ed
+Ġkill ing
+c an
+ĠC ard
+y d
+Ġident ified
+Ġm obile
+Ġthan ks
+ony m
+ĠF orm
+Ġhundred s
+ĠCh ris
+ĠC at
+Ġtre nd
+h at
+ĠA v
+om an
+Ġelect ric
+ĠW il
+S E
+O f
+Ġrest aur
+ot ed
+Ġtr ig
+Ġn ine
+Ġb omb
+Wh y
+Â ¯
+Ġco verage
+Ġapp eal
+ĠRober t
+ĠS up
+Ġfin ished
+Ġfl ow
+Ġdel iver
+Ġcal cul
+Ġphot os
+Ġph il
+Ġpie ces
+Ġapp re
+k es
+Ġr ough
+D o
+Ġpart ner
+Ġconcern ed
+Ġ3 7
+ĠG en
+C ol
+ct ors
+Ġ= >
+st ate
+Ġsuggest ed
+ĠFor ce
+C E
+Ġher self
+ĠPl an
+w orks
+o oth
+ren cy
+Ġcor ner
+Ġhus band
+Ġintern et
+ĠA ut
+em s
+os en
+ĠAt l
+g en
+Ġbal ance
+6 2
+Ġsound s
+te xt
+Ġar r
+ov es
+Ġmill ions
+Ġrad io
+Ġsat isf
+ĠD am
+M r
+G o
+S pe
+Ġcomb at
+r ant
+ĠG ree
+Ġf uel
+Ġdist ance
+Ġtest s
+Ġdec re
+ĠE r
+Ġman aged
+D S
+Ġt it
+Ġmeas ures
+ĠL iber
+Ġatt end
+as hed
+ĠJ ose
+ĠN ight
+d it
+ĠN ov
+ĠE nd
+out s
+Ġgener ation
+Ġadv oc
+y th
+Ġconvers ation
+ĠS ky
+act ive
+ce l
+ri er
+ĠFr ank
+Ġg ender
+Ġcon cent
+Ġcar ried
+and a
+ĠV irgin
+Ġarri ved
+ic ide
+ad ed
+Ġfail ure
+Ġmin imum
+le ts
+Ġwor st
+Ġkeep ing
+Ġint ended
+Ġilleg al
+Ġsub sc
+Ġdetermin ed
+Ġtri p
+Y es
+Ġra ise
+Ġ ~
+Ġfeel s
+Ġpack age
+ĠJ o
+h i
+201 6
+re al
+Ġf ra
+Ġsy mb
+M e
+uck y
+p ret
+ĠK h
+ĠEd it
+ĠWe b
+em ic
+ĠCol or
+Ġjust ice
+I nt
+Ġfar m
+ck now
+" >
+el ess
+Ġredu ced
+Ġ5 00
+x x
+ĠR ad
+ĠW ood
+Ġcl in
+Ġhy p
+il er
+ur a
+k ins
+8 5
+6 1
+ĠThe ir
+ĠM ary
+Ġs an
+Ġno vel
+ĠWh o
+Ġcap acity
+Ġimp ossible
+Ġpl ays
+Ġmin ister
+ij uana
+ic ate
+ĠS et
+Ġf ram
+Ġ ing
+Ġcommun ities
+ĠF BI
+it a
+Ġb on
+Ġstr ateg
+Ġinterest s
+l ock
+g ers
+m as
+ĠAN D
+Ġconflic t
+Ġrequire ments
+Ġs ac
+Ġoper ating
+in i
+rel ated
+Ġcomm itted
+Ġrelative ly
+Ġs outh
+Â¯ Â¯
+Ġaff ord
+Ġident ity
+Ġdec isions
+Ġacc used
+pl ace
+Ġvict ory
+o ch
+i at
+N ame
+C om
+t ion
+ed s
+Ġsee k
+Ġt ight
+ĠIm ages
+Ġinit i
+Ġhum ans
+Ġfam iliar
+Ġaud ience
+Ġintern al
+vent ure
+Ġs ides
+ĠT O
+Ġd im
+Ġcon clud
+Ġapp oint
+Ġenforce ment
+ĠJ im
+ĠAssoci ation
+Ġcircum st
+ĠCanad ian
+Ġjo ined
+Ġdiffere nces
+ĠL os
+Ġprot est
+Ġtw ice
+w in
+Ġgl ass
+ars h
+ĠAr my
+Ġexp ression
+Ġdec ide
+Ġplan ning
+an ia
+Ġhand le
+ĠMicro soft
+ĠN or
+Ġmax imum
+ĠRe v
+Ġse a
+Ġev al
+Ġhel ps
+re f
+Ġb ound
+Ġm outh
+Ġstand ards
+Ġcl im
+ĠC amp
+ĠF ox
+cl es
+Ġar my
+ĠTe chn
+ack ing
+x y
+S S
+Ġ4 2
+Ġbu g
+ĠUk rain
+ĠM ax
+ĠJ ones
+ĠSh ow
+l o
+Ġplan et
+Ġ7 5
+Ġwin ning
+Ġf aster
+Ġspe ct
+Ġbro ken
+T R
+Ġdef ined
+Ġhealth y
+Ġcompet ition
+htt ps
+ĠIs land
+ĠF e
+Ġannoun ce
+ĠC up
+ĠInst ead
+Ġcl ient
+Ġposs ibly
+se ction
+ock et
+l ook
+Ġfin ish
+Ġcre w
+Ġres erv
+Ġed itor
+Ġh ate
+Ġs ale
+Ġcontro vers
+Ġp ages
+w ing
+Ġnum er
+Ġopp osition
+Ġ200 4
+Ġref uge
+Ġfl ight
+Ġap art
+ĠL at
+A meric
+ĠAfric a
+Ġapplic ations
+ĠPal est
+ĠB ur
+Ġg ar
+ĠSoc ial
+Ġup gr
+Ġsh ape
+Ġspe aking
+ans ion
+a o
+ĠS n
+Ġwor ry
+ĠBrit ain
+P lease
+rou d
+Ġh un
+Ġintrodu ced
+Ġd iet
+I nd
+ĠSec ond
+Ġfun ctions
+ut s
+ĠE ach
+ĠJe ff
+Ġst ress
+Ġaccount s
+Ġgu arant
+ĠAn n
+ed ia
+Ġhon est
+Ġt ree
+ĠAfric an
+ĠB ush
+} ,
+Ġs ch
+ĠOn ly
+Ġf if
+ig an
+Ġexerc ise
+ĠEx p
+Ġscient ists
+Ġlegisl ation
+ĠW ork
+ĠS pr
+Ã Ĥ
+ĠH uman
+Ġ è
+Ġsur vey
+Ġr ich
+ri p
+Ġmain tain
+Ġfl o
+Ġleaders hip
+st ream
+ĠIslam ic
+Ġ 01
+ĠCol lege
+Ġmag ic
+ĠPr ime
+Ġfig ures
+201 7
+ind er
+x ual
+ĠDe ad
+Ġabsolute ly
+Ġfour th
+Ġpresent ed
+resp ond
+rib le
+Ġal cohol
+at o
+ĠD E
+por ary
+Ġgr ab
+Ġvar i
+Ġqu ant
+ĠPh oto
+Ġpl us
+r ick
+ar ks
+Ġaltern ative
+Ġp il
+Ġappro x
+th at
+Ġobject s
+ĠR o
+ĠAnd roid
+Ġsignificant ly
+ĠR oad
+k ay
+R ead
+av or
+Ġa cknow
+ĠH D
+ĠS ing
+O r
+ĠM ont
+Ġun s
+pro f
+Ġneg oti
+ĠAr ch
+ik i
+Ġte levision
+ĠJew ish
+Ġcomm ittee
+Ġmot or
+Ġappear ance
+Ġs itting
+Ġstri ke
+ĠD own
+com p
+ĠH ist
+Ġf old
+ac ement
+ĠLou is
+Ġbel ong
+ĠâĢ ¢
+Ġm ort
+Ġprep ared
+Ġ6 4
+ĠM aster
+Ġind eed
+ĠD en
+Ġre nt
+T A
+our ney
+ar c
+S u
+9 7
+Ġadv ice
+Ġchang ing
+Ġlist ed
+Ġlaun ched
+is ation
+ĠP eter
+is hes
+Ġl ived
+ĠM el
+ĠSup reme
+ĠF ederal
+Ġ) ;
+ruct ure
+Ġset s
+Ġphil os
+u ous
+ĠÂ ł
+Ġappl ied
+ĠN OT
+Ġhous ing
+ĠM ount
+Ġo dd
+Ġsu st
+D A
+ffic ient
+Ġ ?
+ol ved
+Ġp owers
+Ġth r
+Ġrem aining
+ĠW ater
+L C
+Ġca uses
+ãģ ®
+Ġman ner
+ad s
+Ġsuggest s
+Ġend s
+stand ing
+f ig
+ĠD un
+id th
+Ġg ay
+Ġter min
+ĠAngel es
+M S
+Ġscient ific
+Ġco al
+ap ers
+b ar
+ĠThom as
+Ġsy m
+ĠR un
+th is
+P C
+igr ants
+Ġmin ute
+ĠDist rict
+cell ent
+Ġle aves
+Ġcomple ted
+am in
+Ġfoc used
+Ġmon itor
+Ġveh icles
+M A
+ĠM ass
+ĠGr and
+Ġaffect ed
+itution al
+Ġconst ruct
+Ġfollow s
+Ġt on
+re ens
+Ġh omes
+ĠE xt
+ĠLe vel
+r ast
+ĠI r
+Ġel im
+Ġlarge ly
+ĠJ oe
+Ġvot es
+all s
+Ġbusiness es
+ĠFound ation
+ĠCent ral
+Ġy ards
+Ġmaterial s
+ul ner
+Ġgu ide
+Ġclos er
+um s
+Ġsp orts
+ed er
+J ust
+Ġtax es
+8 4
+ĠO ld
+Ġdec ade
+ol a
+Ġv ir
+Ġdro pped
+Ġdel ay
+it ect
+Ġsec ure
+ste in
+le vel
+Ġtre ated
+Ġfil ed
+ain e
+Ġv an
+Ġm ir
+Ġcol umn
+ict ed
+e per
+Ġro t
+Ġcons ult
+Ġent ry
+Ġmar ijuana
+ĠD ou
+Ġapparent ly
+ok ing
+clus ive
+Ġincre ases
+an o
+Ġspecific ally
+Ġte le
+ens ions
+Ġrelig ion
+ab ilities
+Ġfr ame
+ĠN ote
+ĠLe e
+Ġhelp ing
+Ġed ge
+ost on
+Ġorgan izations
+Ã ĥ
+ĠB oth
+hip s
+Ġbig ger
+Ġbo ost
+ĠSt and
+Ġro w
+ul s
+ab ase
+Ġr id
+L et
+are n
+ra ve
+Ġst ret
+P D
+Ġv ision
+Ġwe aring
+Ġappre ci
+Ġa ward
+ĠU se
+Ġfact or
+w ar
+ul ations
+) (
+Ġg od
+Ġter rit
+Ġpar am
+ast s
+8 7
+Ġen emies
+ĠG ames
+F F
+Ġacc ident
+W ell
+ĠMart in
+T ER
+Ġat h
+ĠHe ll
+Ġfor g
+Ġve ter
+ĠMed ic
+f ree
+Ġst ars
+Ġexp ensive
+Ġac ad
+ra wn
+ĠW he
+Ġl ock
+Ġform at
+Ġsold iers
+s m
+Ġag ent
+Ġrespons ibility
+or a
+ĠS cience
+Ġrap id
+Ġt ough
+ĠJes us
+Ġbelie ves
+M L
+Ġwe ar
+le te
+Ãĥ ÃĤ
+ĠD ri
+Ġcomm ission
+ĠB ob
+O h
+ap ed
+Ġwar m
+ÃĥÃĤ ÃĥÃĤ
+Ġ200 3
+ort ion
+Ġhas n
+ust er
+Ġun ivers
+ĠI ll
+Ġk ing
+olog ies
+9 4
+ĠT em
+ĠM os
+Ġpat ient
+ĠMex ico
+ce an
+ĠDe ath
+ĠSand ers
+y ou
+ĠC ast
+ĠComp any
+pt y
+Ġhappen ing
+F P
+ĠB attle
+Ġb ought
+A m
+M od
+U s
+ut ers
+ĠC re
+ĠTh ose
+Ġ4 4
+is er
+Ġs oul
+ĠT op
+ĠHar ry
+ĠA w
+Ġse at
+ff ee
+Ġrev olution
+Ġ( "
+ĠD uring
+et te
+Ġr ing
+Ġoff ensive
+Ġreturn s
+Ġv ideos
+Ġdis cl
+Ġfam ous
+en ced
+ĠS ign
+ĠR iver
+Ġ3 00
+P M
+ĠB us
+ĠC H
+Ġcandid ates
+ard en
+Ġpercent age
+Ġvis ual
+Ġthan k
+Ġtrou ble
+ner gy
+Ġ200 1
+Ġpro ve
+ash ion
+Ġen h
+ĠL ong
+U M
+Ġconnect ed
+Ġposs ibility
+O ver
+Ġexper t
+Ġl ibrary
+art s
+ĠDirect or
+Ġfell ow
+9 2
+ir ty
+Ġd ry
+Ġsign s
+ĠL ove
+Ġqu iet
+f oot
+Ġp ure
+ĠH un
+Ġf illed
+ph as
+ĠE lect
+end ment
+ĠEx pl
+Ġun able
+n s
+m o
+Ġv ast
+ob e
+Ġident ify
+app ing
+ĠCarol ina
+g ress
+Ġpro te
+Ġf ish
+Ġcircumst ances
+raz y
+ĠPh ot
+Ġb odies
+ĠM ur
+Ġdevelop ing
+ĠA R
+Ġexperien ced
+Ġsubst ant
+ĠBo ard
+es ome
+Ġdom estic
+Ġcomb ined
+ĠP ut
+Ġchem ical
+ĠCh ild
+Ġpo ol
+ĠC y
+Ġe gg
+c ons
+st ers
+Ġh urt
+Ġmark ets
+Ġconserv ative
+Ġsupp orters
+Ġag encies
+id el
+O b
+ur b
+Ġ4 3
+ĠDef ense
+y e
+ĠA p
+du le
+Ġtemper ature
+Ġconduct ed
+ĠCh ief
+Ġpull ed
+Ġf ol
+L ast
+ont o
+os is
+V ER
+D es
+ĠP an
+F irst
+Ġadv ance
+Ġlic ense
+r ors
+ĠJ on
+Ġimag ine
+Ġhe ll
+Ġf ixed
+Ġinc or
+os ite
+ĠL og
+ick en
+] :
+Ġsurpr ise
+h ab
+Ġc raft
+ol t
+ĠJ ul
+Ġd ial
+Ġrele vant
+Ġent ered
+Ġlead s
+ĠA D
+ĠCle an
+Ġpict ures
+ess or
+Ġal t
+Ġpay ing
+P er
+ĠMark et
+Ġupd ates
+am ily
+ĠT ype
+ĠH ome
+Ġ5 5
+semb ly
+rom e
+8 3
+Ġgreat est
+Ġhe ight
+Ġhe av
+ain ts
+Ġlist en
+as er
+ĠS H
+Ġcap able
+ac le
+Ġpers pect
+in ating
+Ġoff ering
+ry pt
+ĠDe velop
+ab in
+r c
+Ġbr ight
+al ty
+ar row
+Ġsupp l
+ind ing
+ack ed
+gy pt
+ĠAn other
+p g
+ĠVirgin ia
+ĠL u
+Ġpl anned
+Ġp it
+Ġswe et
+T ype
+ĠD i
+Ġtyp ically
+ĠFranc isco
+Ġpro spect
+ĠD an
+Ġte en
+re es
+Ġsc hed
+Ġh ol
+Ġsc r
+Ġlot s
+l ife
+Ġnews p
+Ġfor get
+ĠN one
+ĠM iddle
+ĠR yan
+ed d
+Ġse vere
+Ġsu it
+ll er
+9 3
+Ġcor respond
+Ġexpl os
+u ations
+Ġfl ag
+g ame
+r id
+Ġpr in
+ĠD ata
+Ġde ploy
+ĠEn ter
+su it
+gh an
+ĠM en
+Ġthough ts
+Ġmat ters
+Ġad apt
+ĠA ri
+Ġf ill
+Ġfor th
+Ġs am
+Ġ4 1
+Ġpay ment
+ĠH or
+Ġsp ring
+du c
+Ġl osing
+Ġbring ing
+F O
+al a
+Ġdist ribution
+he red
+b our
+ĠIsrael i
+om a
+Ġcomb ination
+Ġpl enty
+V E
+C an
+ĠH aw
+Ġper man
+ĠSpe cial
+Ġto w
+Ġsee king
+Ġexam ples
+Ġclass es
+c r
+Ġbe er
+Ġmov es
+ĠI P
+ĠK n
+Ġpan el
+E ven
+Ġproper ly
+Ġr is
+Ġpl ug
+Ġestim ated
+E very
+Ġdef ensive
+ag raph
+Ġpre gn
+Ġinst it
+ĠV ict
+Ġvol ume
+Ġpos itions
+Ġl inks
+ĠPro gram
+ĠWe ek
+ag ues
+Ġtrans form
+k er
+ĠC EO
+Ġc as
+Ġopp onent
+Ġtwe et
+ĠC ode
+Ġsh op
+Ġf ly
+Ġtal ks
+Ġb ag
+Ph one
+Ġa id
+Ġpl ants
+Ġ6 5
+Ġatt orney
+ar ters
+qu est
+ĠMag ic
+Ġbeg ins
+Ġmy ster
+Ġenvironment al
+Ġst orage
+N N
+Ġm arg
+Ġs ke
+Ġmet al
+ell y
+Ġord ered
+Ġrem ained
+Ġl oved
+Ġprom pt
+Ġupd ated
+Ġexper ts
+Ġwalk ing
+Ġan cient
+Ġperform ed
+AT E
+Ġne ither
+i ency
+Ġmanufact ure
+ĠP ak
+Ġselect ed
+Ġm ine
+Ġult imately
+Ġexpl an
+Ġlab el
+ĠServ ices
+ribut ed
+Tr ump
+Ġsy n
+ĠU lt
+S C
+Ġme at
+Ġg iant
+ĠW ars
+ĠO N
+Ġad m
+Ġinter pret
+Ġeven ing
+Ġev il
+ĠB oston
+ĠW ild
+Ġ Ã
+ĠBit coin
+ĠAm azon
+D r
+ĠIn formation
+Ġobvious ly
+Ġadv anced
+Ph oto
+ol ar
+Ġwe ather
+Ġsymb ol
+Ġso le
+Ġpot entially
+ost er
+Ġorig inally
+m un
+3 00
+az e
+ess ions
+Ġde ck
+Ġst ood
+Ġyou th
+ĠB ern
+R ep
+ĠT est
+Ġbas ically
+ot ic
+Ġinvol ve
+ol it
+ly n
+S ee
+Ġair craft
+Ġconf irm
+E W
+Ġmess ages
+ĠRich ard
+Ġk it
+Ġpro hib
+Ġv ulner
+is ters
+Ġexist ence
+Ġturn ing
+ĠS P
+Ġdes ire
+Ġfl at
+Ġm ent
+se ason
+ang es
+Ġneighbor hood
+ĠL ake
+AT ION
+Ġpoint ed
+b ur
+Ġinn ov
+uc ks
+U L
+Ġprofess or
+Ġexp ressed
+A B
+ic ious
+Ġ200 2
+ĠDe v
+Ġs ession
+Ġb are
+s en
+Ġdis s
+ĠC ath
+ĠP ass
+ĠP oint
+Ġdo ctor
+or row
+ail ed
+ĠR ub
+ĠD C
+ĠChar l
+p erson
+Ġwrit er
+igh ters
+ure au
+Ġob lig
+Ġrecord ed
+Ġbro ke
+Ġord ers
+il ty
+Ġmot ion
+in ity
+l aw
+ad ium
+Ġimm igration
+Ġcontr ast
+Ġb att
+Ġex cellent
+Ġtechn ical
+am i
+Ġt un
+Ġcl oud
+ĠY ear
+ge on
+Ġcre ation
+Ġstr ange
+Ġa uth
+Ġfor t
+b orn
+Ġext ent
+ĠT oday
+ĠCl ub
+Ġr ain
+Ġs ample
+Ġaccept ed
+Ġt act
+Ġf ired
+ĠS on
+Ġstand s
+Ġb oot
+Ġ4 7
+Ġstat ements
+Ġvers ions
+Ġse lling
+ound ed
+Ġ199 0
+Ġwere n
+ĠW atch
+Ġexper iment
+P ost
+Ġret ail
+ul ed
+In st
+un te
+ãĥ ¼
+Ġdep art
+Ġb ond
+i very
+om pl
+Ġre action
+ĠSyri an
+ĠP ac
+app ed
+ani el
+D P
+Ġres olution
+Ġre act
+Ġappro ved
+on om
+m ond
+ĠO ffic
+-- -
+Ġrepl ace
+Ġt ack
+Ġsp ort
+Ġch ain
+Ġemer gency
+r ad
+ĠPalest in
+Ġ4 6
+Ġautom atically
+Ġrout e
+Ġp al
+Ġb anks
+ĠPar is
+ĠMed ia
+ro ad
+ic ing
+i xt
+ist ed
+Ġg rew
+Ġco ord
+ĠW here
+om in
+Ġsub s
+ï¿½ ï¿½
+ĠÂ ±
+Ġcorpor ate
+Ġse lection
+n oon
+ĠRep ort
+c s
+clud ing
+ord ers
+anc he
+ĠIt s
+Ġslow ly
+ĠE gypt
+ĠA cc
+Ġcol le
+iqu es
+E X
+Ġattempt s
+ur l
+ĠC ross
+Ġfind ings
+ĠS C
+ĠO R
+Ġind ex
+ens ity
+ĠW ay
+ĠL and
+Ġsh ock
+d is
+Ġd ynam
+Ġc art
+m osp
+S ince
+i est
+ĠB oy
+Ġst orm
+ĠCont in
+201 3
+he w
+il it
+Ġess ential
+iqu id
+O ther
+ive red
+Ġreason able
+A ct
+Ġsub sequ
+ĠP ack
+ĠF ort
+Ġconsider ing
+Ġun iversity
+l og
+Ġmar ried
+Ġill ust
+ĠTr ue
+£ ı
+Ġnumer ous
+rast ructure
+Ġserious ly
+Ġrefer red
+u a
+Ġconsist ent
+on na
+ĠRe al
+ru ption
+ci ples
+Ġfact s
+9 1
+ot es
+er g
+The n
+Ġacc ompl
+N ote
+Ġre venue
+Ġpass ing
+Ġm al
+e en
+ĠY et
+Ġg ather
+ter day
+ew ork
+ĠA uthor
+P e
+Ġopt im
+Ġr ub
+Ġè £ı
+Ġun known
+st one
+Ġun ion
+ol ve
+Ġopportun ities
+Ġbrow ser
+ĠW al
+ĠC ost
+Ġreport ing
+st s
+p et
+Ġs and
+Ġsudden ly
+Ġsurpr ising
+ĠV R
+Ġsomew hat
+ĠB as
+ult ure
+iz z
+ĠC D
+Ġchalleng es
+Ġsett ings
+Ġexperien ces
+ĠF ull
+Ġcan n
+Ġrece iving
+ES T
+Ġj oint
+Ġcult ural
+Ġa st
+8 2
+as tern
+ce ived
+ĠC ru
+Ġb ull
+p ired
+am m
+Ġfac ing
+p ower
+Ġb oss
+ĠH ol
+Ġinst r
+Ġincreasing ly
+Ġsh ift
+Ġstre ets
+ĠWilliam s
+ab b
+Ġl ie
+Ġl augh
+ĠC a
+P L
+Ġadult s
+Ġcustom er
+Ġob tained
+Ġsupport ing
+ht ml
+f ire
+Ġdetail ed
+Ġpick ed
+ĠR ight
+ld er
+E E
+st ood
+ĠK im
+Ġw ire
+Ġs ight
+Ġdevelop ers
+Ġpers ons
+Ġs ad
+Ġc up
+Ġwar ning
+Ġboy s
+l ong
+Ġb ird
+f o
+Ġw al
+Ġobserv ed
+Ġz one
+iven ess
+Ġch annel
+c ript
+Ġref used
+ĠAg ain
+Ġsu c
+Ġspokes man
+ĠRe f
+r ite
+ou ston
+ãĥ ³
+ĠS her
+Ġact s
+ĠN ame
+Ġstrugg le
+ar ry
+omet imes
+Ġdisc rim
+H T
+Ġcateg ory
+Ġreal ize
+Ġemploy ee
+ĠAf ghan
+en ger
+Ġgun s
+ĠSte ve
+ĠM ot
+ĠO l
+ok ed
+Ġth ick
+Ġfair ly
+ill y
+Ġsur ve
+ĠM at
+we ight
+â Ķ
+Ġtro ops
+Ġag ents
+Ġbatter y
+Ġmot iv
+Ã ¡
+S ec
+d en
+o very
+L S
+Ġfl u
+Ġconf ident
+ĠO per
+Ġem pty
+Ġp hen
+Ġse ctor
+Ġexc ited
+Ġrem ote
+ap h
+o en
+Ġdestroy ed
+Ġmor al
+ĠH P
+ĠR on
+Ġd ress
+ĠB at
+Ġl it
+ĠM S
+Ġa f
+H L
+r um
+is ms
+Ġshould n
+Ġsym pt
+ĠTor onto
+het ic
+Ġcar bon
+Ġinstall ed
+Ġviol ent
+Ġsol ar
+j a
+Ġpract ices
+Ġr ide
+ĠP enn
+Ġimpro ved
+Ġaud io
+Ġbehav i
+ĠP S
+Ġe ating
+D ata
+ĠRe view
+p ass
+cl aim
+u ated
+ang ers
+c hen
+Ġproper ties
+Ġany where
+An other
+Ġbl ow
+ĠJack son
+Ġp roud
+Ġplan e
+l ines
+Ġsqu are
+Ġpro of
+ans as
+Ġtalk ed
+m akers
+Ġs ister
+Ġhold s
+Ġres ident
+Ġ= =
+Ġresist ance
+Ġspl it
+Ġpro secut
+Ġconf idence
+res ents
+Ġcut s
+Ġexcept ion
+Ġz ero
+Get ty
+Ġcop yright
+Ġtot ally
+orm al
+ific ations
+ĠAustral ian
+Ġs ick
+Ġ1 50
+Ġhouse hold
+Ġfe es
+Ġdri vers
+og en
+ĠN Y
+Ġnecess arily
+Ġregul ations
+ear ing
+s l
+Ġperspect ive
+c are
+ic ial
+H is
+Ġesc ape
+Ġsurpr ised
+ĠV an
+ur rent
+Ġv ac
+8 1
+ĠTh us
+Ġem phas
+ĠCh ampions
+ĠI ce
+Ġn arr
+Ġhead s
+Ġca using
+b el
+f ortunately
+ĠM a
+Ġtarg ets
+ci pl
+Ġafter noon
+Ġadd s
+ĠMay be
+ĠF our
+ess ed
+ple te
+Ġus ual
+ch o
+ing u
+Ġwith d
+ĠE nergy
+ĠE conom
+O O
+Ġart icles
+Ġinj ured
+Ġman age
+Ġexpl ains
+Ġdi agn
+R ec
+at ures
+Ġlink ed
+Ġdiscuss ed
+Ġexpl o
+Ġocc asion
+ath an
+Ġopp osite
+Ġfac es
+Ġden ied
+ĠK night
+Ġn ut
+Ġapprox imately
+Ġdisapp oint
+onym ous
+ĠB est
+ĠL o
+ĠH y
+ĠA ff
+Ġvot ing
+an while
+ĠII I
+Ġinstit utions
+ag ram
+ĠD aily
+Ġdr ag
+Ġnear by
+Ġgu ilty
+Ġcon ver
+P re
+s hip
+Ġre ward
+Ġphilos oph
+ĠS S
+u gh
+Ġapp s
+f riend
+Ġu pper
+Ġad vert
+Ġs now
+Ġfr ust
+Ġour selves
+F r
+ĠD ie
+amp ion
+Ġdis miss
+Ġc ere
+Ġsign al
+f rom
+Ġ ).
+Ġ5 2
+Ġcr imes
+it ors
+est ival
+use um
+Ġcoun cil
+ĠS aud
+M ay
+ĠG un
+ic ian
+et her
+Ġsu fficient
+ĠH en
+so le
+Ġhistor ical
+ĠF ar
+ĠT urn
+Ġp in
+Ġsuc ceed
+m at
+ly mp
+Ġtrad ition
+ĠO k
+Ġc ro
+Ġdesc ription
+al le
+Ġsk y
+T e
+Ġwide ly
+Ġw ave
+Ġdefin ition
+ĠJew s
+Ġcy cle
+Ġref ere
+Ġbr ings
+us al
+Ġal ive
+Ġfrequ ently
+Ġint ention
+ĠCont rol
+l v
+y stem
+Ġpriv acy
+g ent
+ren ce
+ĠQu est
+ĠChrist mas
+Ġr ail
+Ġco oper
+Ġtest ed
+ĠC apt
+as ks
+Ġcomfort able
+Ġdel ivered
+sc ape
+Ġdep th
+ĠG OP
+Ġwrit es
+Ġass ets
+Ġsa v
+im ents
+Ġtrans ition
+Ġart ist
+ĠL ook
+Ġl ob
+Ġcomp onents
+ar ity
+Ġwalk ed
+Ġro ot
+Ġparticip ants
+Ġnot iced
+Ġres c
+Ġn av
+ĠAd minist
+d a
+ut ral
+pl ate
+Ġimport ance
+Ġass ert
+ious ly
+c ription
+Ġinj uries
+ĠChe ck
+Ġregist ered
+Ġint ent
+Ġmiss ed
+ograph ic
+Ġsent ence
+oun ter
+Ġassist ance
+ev in
+Ġdat abase
+Ġbuild ings
+Ġclass ic
+Ġth inks
+ĠOh io
+P r
+ug g
+Ġfe e
+p an
+Ġeffect ively
+Ġfac ility
+Ġbe ar
+Ġch apter
+Ġdog s
+ĠCol umb
+Ġl atter
+it ial
+Ġad mitted
+T V
+ĠGe org
+Ġpost s
+\ \
+Ġlawy er
+Ġequ ival
+Ġm and
+Ġcontro lled
+ĠW alk
+ĠAnd rew
+Ġmen u
+am ental
+Ġprotect ed
+v a
+Ġadminist r
+or al
+Ġre in
+ĠS ar
+Ġamount s
+Ġn ative
+ĠM oon
+Ġrep resents
+Ġab andon
+Ġcarry ing
+Ġt ank
+m ary
+Ġdecl ared
+T ube
+Ġh at
+Ġpun ish
+el lect
+m es
+Ġun iverse
+ĠR od
+ph y
+Ġinf rastructure
+Ġ5 1
+Ġopp osed
+ow nt
+c a
+ĠM ake
+Ġhard ware
+Ġco ffee
+R el
+b al
+w orld
+ĠS af
+ĠSe a
+in als
+Ġown ed
+Ġh all
+ers ion
+Ġdescrib e
+ĠP ot
+Ġport ion
+Ġat mosp
+Ġgovern ments
+Ġdep ending
+Ġoff ense
+Ġtr ick
+aw a
+ĠL ine
+ĠV is
+ĠH ard
+ĠOr ig
+ĠCl ick
+Ġdes k
+ĠVal ley
+ĠS ov
+Ġmov ies
+Ġrem ark
+Ġm ail
+Ġcons cious
+Ġrul ing
+ĠR ights
+Ġmed ic
+he nt
+ĠW omen
+> <
+Ġrepl aced
+ĠP rem
+ĠTh anks
+Ġre new
+ĠB all
+if orm
+Ġsh ots
+C omm
+Ġar med
+Ġconst ant
+Ġt aste
+Ġreal ized
+Ġbu ff
+Ġm o
+Ġeffic ient
+M ost
+or ation
+if ies
+Ġcommun ication
+Ġfl ood
+Ġconsequ ences
+Ġany way
+ig g
+ĠG M
+ĠTh ank
+Ġ iron
+Ġev olution
+ĠC op
+tw itter
+Ġ9 5
+Ġrelationship s
+ad el
+ĠYou ng
+Ġpropos al
+ay ers
+uild ing
+ĠH ot
+OR E
+c os
+Ġcoll abor
+P G
+ax y
+Ġknow ing
+Ġsupport s
+ow ed
+Ġcontrol s
+Ġmere ly
+um er
+Ġath let
+Ġf ashion
+p ath
+Ġg ift
+Ġer a
+AN D
+Ġkind s
+ĠKore an
+Ġleg it
+ul ous
+Ġess entially
+Ġthe rap
+n ic
+Ġsuff ered
+Ġh ur
+Ġprom ise
+Ġex cess
+Ġover w
+Ġpr ime
+ĠH ouston
+er ry
+ĠM s
+R S
+201 2
+Ġst ores
+ĠO lymp
+Ġj ourney
+Al though
+S ub
+ĠE duc
+ĠCh apter
+Ġrequest s
+Ġconsum ers
+Ġt iny
+Ġis ol
+ĠF air
+b a
+ĠY OU
+Ġcr ash
+ce ler
+Ġemot ional
+Ġgood s
+Ġelect ed
+Ġmod er
+ĠLin ux
+Ġbl ocks
+Ġis land
+ĠSoc iety
+Ġelect ions
+Ġbroad cast
+Ġche ap
+Ġn ations
+Ġse asons
+4 00
+Ġwas te
+ĠS at
+Ġfield s
+em ploy
+Ġprof ile
+Ġauth ors
+AL L
+ĠG ra
+w est
+ĠT y
+Ġdeath s
+Ġv acc
+Ġfor med
+Ġd u
+Ġon going
+ĠMuslim s
+el f
+ig ure
+Ġass ume
+ĠUkrain e
+w ater
+Ġco ast
+Ġvot ed
+g or
+ĠA S
+ĠMich igan
+az a
+ĠAr m
+i ro
+Ġf lex
+as ters
+' '
+Ġwel come
+ar l
+Ġloc ations
+ig ation
+ĠF il
+Ġbu ying
+Ġarch itect
+Ġhard er
+ĠC ub
+Ġinter face
+Ġrestaur ant
+Ġdisco ver
+Ġex ceed
+Ġfav our
+ger y
+Ġd uty
+Ġp itch
+ad or
+ĠM ach
+b oy
+Ġrespond ed
+Ġext ended
+her s
+M any
+ra id
+if er
+ĠIn s
+S er
+Ġmed ium
+s he
+ĠS ports
+Ġmag azine
+ut ation
+Ġlim its
+ĠG all
+Ġex ternal
+raz il
+Ġyoung er
+t le
+Ġrem ind
+ĠC ON
+Ġimmedi ate
+Ġh idden
+Ġvol unte
+Ġsim pl
+od cast
+Ġph ase
+d r
+Ġpl ot
+Ġexp osure
+R I
+og rap
+v in
+an ish
+ĠAc ad
+ĠEng ine
+Ġexp ansion
+ĠP ay
+Y our
+Ġpus hed
+ĠE ll
+ĠHe ad
+Ġmarket ing
+ĠA C
+k et
+Ġh its
+Ġg ro
+ĠA ge
+ĠSc ot
+] [
+Ġst im
+Ġi Phone
+Ī Ĵ
+Ġn arrow
+ĠGet ty
+ĠTur key
+Ġperfect ly
+Ġen able
+ut ch
+Ġprec ise
+Ġreg ime
+Ġsh if
+Ġcomp ens
+g un
+d iv
+Ġch osen
+ĠK en
+An y
+Ġtre es
+Ġrecomm ended
+ĠR en
+u able
+ĠH T
+F ollow
+E G
+ĠH and
+ĠK enn
+Ġarg uments
+Ġex ists
+Ġb ike
+ĠCons erv
+Ġbre aking
+ĠG ar
+Ġc razy
+Ġvirt ual
+ay lor
+ix el
+Ġ19 80
+Ġper mission
+ĠSer ies
+Ġconsum er
+Ġclose ly
+c alled
+Ġ5 4
+Ġhop es
+Ġar ray
+ĠW in
+ĠLab our
+Ġsp ons
+ĠI re
+Ġp ow
+Ġread ers
+Ġemploy ment
+Ġcreat ure
+Ġresult ing
+Ġaccur ate
+Ġmom ents
+Ġarg ued
+Ġp ed
+D uring
+Ġ5 3
+ĠT al
+Ġs ought
+Ġsuff ering
+Ġ icon
+le e
+Ġ( $
+al ian
+Â °
+Ġp ra
+Ġbon us
+( "
+k o
+Ġact ing
+D E
+f all
+Ġcompar ison
+Ġsm ooth
+ĠN AS
+u pp
+ĠJose ph
+ep ing
+ĠT ake
+ĠM id
+Ġs ending
+f ast
+ĠF all
+Ġdeal ing
+us er
+ĠOr gan
+C o
+Ġatt ached
+Ġse es
+% .
+Ġtyp ical
+AR T
+Ġfind s
+ĠAs ia
+um in
+ĠC ore
+ĠE nt
+in ent
+u ce
+ĠBl ood
+ĠN ever
+Ġem ails
+Ġhigh light
+Ġconf ront
+at us
+ut ed
+Ġun us
+Ġtop ic
+ĠAd am
+Ġb le
+at i
+Ġunder stood
+S et
+st ruct
+T P
+Ġm ob
+a a
+ĠSt art
+pect ed
+se ll
+Ġded icated
+ĠC A
+u an
+Ġsong s
+esc ription
+Ġte ch
+Ġr ape
+Ġas ide
+Ġgr ant
+Ġ5 6
+s ub
+Ġarg ue
+Ġcont aining
+Ġsche dule
+Ġliber al
+Ġpublic ly
+Ġheav ily
+ĠU t
+in er
+ĠS ection
+ĠC are
+we et
+l s
+D is
+âĶ Ģ
+ĠF ollow
+B ack
+ĠI T
+Ġb es
+j i
+ĠH it
+est ed
+Ġevery body
+ĠSw ed
+Ġfem in
+Ġfac ilities
+Ġcon ven
+C omp
+ĠO S
+c ore
+Ġan x
+Ġdiv ision
+ĠC am
+ĠSt an
+m ates
+Ġexpl ore
+pl om
+Ġsh ares
+pl oad
+an es
+Ġide al
+et ers
+ĠB ase
+Ġpl astic
+Ġdist inct
+ĠNet work
+ĠSe attle
+Ġtrad ing
+ens us
+int end
+Ġex hib
+Ġinit ially
+ĠF ood
+Ġthous and
+ĠBus iness
+act er
+Ġpar agraph
+Ġrough ly
+Ġw ww
+Ġcreat ive
+ĠCon f
+Ġconsum ption
+Ġfil ms
+ag an
+Ġob tain
+Ġt all
+Ġt or
+Ġacknow led
+Ġg rown
+al o
+K E
+Ġ4 00
+end ers
+t aining
+U G
+Ġsu icide
+Ġwat ched
+ĠL ist
+al i
+re hens
+Ġsurround ing
+Ġp ip
+Ġf lying
+ĠJ ava
+ord an
+Ġserv ing
+in ations
+p ost
+Ġsh o
+A v
+Ġj ail
+z y
+Ġ199 9
+Ġ< /
+Ġliter ally
+ĠS ir
+Ġexp osed
+Ġl ies
+st ar
+Ġb at
+Ġear ned
+ĠD ig
+Ġspec ified
+ĠSe ason
+Ġdeg rees
+Don ald
+Ġcent re
+Ġsh aring
+Ġwin ter
+ĠC O
+C he
+Ġ Î
+M P
+Ġun w
+Ġfew er
+ĠM ir
+Ġsomew here
+ĠK ey
+Ġattack ed
+ĠK ir
+Ġdom ain
+Ġstrong er
+Ġ9 9
+Ġpen alty
+I d
+Sc ript
+Ġdecl ined
+Ġne ck
+Ġfra ud
+Ġcur rency
+Ġr ising
+R C
+âĢ¦ âĢ¦
+H z
+Ġt ab
+Ġtal ent
+n am
+ĠN BA
+Ġvill age
+Ġleg s
+ĠN ext
+E d
+Ġac id
+Ġhy d
+8 00
+Ġinvol ving
+ĠIm age
+ĠBe fore
+F l
+Ġyes terday
+S ource
+Ġterror ist
+Ġsu p
+Ġsy nt
+ĠSaud i
+Ġw est
+Ġr u
+b urg
+Ġvis ible
+Ġstru ck
+r ison
+Ġaw esome
+Ġd rawn
+Ġansw ers
+ĠG irl
+ĠR am
+Ġthreat s
+Ġdef eat
+os it
+Ġv ent
+atur ally
+Americ an
+end a
+ĠH oly
+Ġr um
+% ,
+c ase
+ĠHist ory
+ĠYou Tube
+Ġsit uations
+ĠD NA
+S te
+Ġsa ved
+It em
+Ġrec ip
+olog ist
+Ġfac ed
+Ġel ig
+O nce
+ĠL i
+u h
+Ġmist ake
+ĠDiv ision
+ĠB ell
+Ġsympt oms
+Â ®
+Ġdom in
+Ġfall ing
+Ġend ing
+as hes
+Ġmat ches
+ĠOn line
+Ġexplan ation
+D ef
+red it
+Ġany more
+ĠT otal
+ĠF OR
+us hed
+Ġlet ters
+Ġris ks
+ĠO K
+Ġreported ly
+: \
+Ġpl ate
+Ġsubject s
+Ġattempt ed
+if ier
+ian a
+Ġunlike ly
+ĠTh ough
+um a
+ĠIn vest
+ĠPr in
+ic an
+ĠD ar
+ĠColor ado
+au g
+Ġve get
+a os
+ri a
+Ġshe l
+Ġmark ed
+Ġ( )
+Ġsp r
+p o
+ĠL ink
+Ġdef e
+ĠJ r
+Ġthem e
+Ġpass ion
+ĠP en
+Ġinf o
+iz er
+Ġsh it
+ĠC ivil
+ap se
+c re
+Ġpo ly
+Ġcomp onent
+ĠChar les
+ĠIre land
+ĠPro v
+Ġdo ctors
+Ġgr anted
+Ġpain t
+Ġhon or
+Ġsm oke
+Ġpay ments
+Ġprim arily
+ĠKing dom
+r ich
+ate ll
+Ġde als
+Ġsched uled
+Ġfund amental
+Ġprote in
+Ġnewsp aper
+Ġcl ients
+yth on
+ĠD ate
+h us
+Ġfeed back
+Ġstret ch
+Ġc ock
+Ġhot el
+ĠQue en
+Ġsu gar
+Ġj u
+Ġmil k
+Ġappro val
+ĠL ive
+Ġequival ent
+ef ully
+Ġins ert
+z ona
+Ġext ension
+d ri
+J ohn
+Ġacc omp
+S m
+ĠF und
+Ġconst antly
+Ġ` `
+Ġgener ated
+ĠA ction
+ĠP sych
+ĠT ri
+Ġrecogn ize
+Ġv ary
+ph a
+ĠR a
+d f
+et ch
+ĠSov iet
+Tw o
+Ġpattern s
+Ġprof ession
+an ing
+T ime
+ĠL im
+Ġcol ors
+ĠA z
+ĠT R
+Ġinf ect
+Ġphen omen
+Ġshe ll
+Al so
+Ġput s
+Ġdel ivery
+Ġbro wn
+Ġprocess ing
+Ġlight s
+ess age
+ĠBro ok
+ĠA ud
+l ation
+Ġindust rial
+L ike
+ĠB razil
+rou s
+ES S
+ĠL uc
+Ġsome how
+Ġ8 5
+Ġpro port
+Ġpolit icians
+Ġindic ate
+Ġh ole
+Ġtechn iques
+Ġcompet itive
+Ġph r
+Ġv o
+ist ent
+ĠD ream
+Ġcamp us
+Ġaspect s
+Ġhelp ful
+Ġsh ield
+or se
+Ġtrig ger
+m al
+Ġ5 8
+Ġt ort
+Ġperson ally
+Ġt ag
+Ġkeep s
+ĠV ideo
+Ġben ch
+Ġg ap
+a ire
+Ġe ast
+Ġrec overy
+per ial
+Ġprof it
+ĠM ic
+Ġ5 7
+Ġcol on
+Ġstrong ly
+st yle
+Ġalleg ations
+h an
+Ġrep orters
+j o
+r ine
+arg et
+and al
+Ġ0 3
+Ġfl ash
+tr ans
+Ġstr ict
+Ġpark ing
+ĠPak istan
+Ġl i
+Ġwe ird
+ĠE ric
+Ġreg ions
+ĠJ un
+Ġint ellect
+ĠW H
+od ing
+rib utes
+up id
+ĠT it
+Ġf inger
+or ia
+Ġe lev
+ĠF ield
+Ġcon clusion
+; ;
+Ġfeel ings
+Ġext ensive
+Ġm ixed
+Ġne uro
+v y
+Ġhar ass
+ĠC irc
+ou ch
+Ġterrit ory
+Ġsuccess fully
+M ar
+Ġing red
+Ġoverw hel
+Ġl ayer
+V iew
+Ġall ies
+ill ance
+ĠTh ree
+Ġb unch
+Ġnorm ally
+Ġnet works
+Ġsac r
+ĠC IA
+b les
+Ġch ose
+Ġopp onents
+Ġregard less
+Ġfr anch
+Ġpre f
+ĠP o
+Ġbr idge
+ann a
+ĠSil ver
+Ġw age
+p age
+ri or
+Ġrad ical
+ĠL ittle
+Ġman ip
+Ġsecret ary
+Ġg ang
+D R
+F A
+Ġdec ent
+ĠSp irit
+Ġun cle
+ĠDevelop ment
+Ġinvest ors
+Ġwall s
+Ġpub lish
+Ġgener ate
+iss ions
+c ar
+Ġprom ote
+Ġcut ting
+Ġche st
+Ġdrink ing
+Ġcollect ed
+Ġ7 2
+Ġhop ing
+Ġem br
+gor ith
+Ġwar ned
+Ġinstruct ions
+O G
+ĠD id
+ĠAg ency
+Ġg ear
+Ġcritic ism
+ĠF urther
+Ġut il
+ann y
+R ed
+Ġcoun sel
+ĠAs ian
+Ġredu ction
+p ool
+Ġteach ing
+Ġdeep ly
+i y
+Ġestim ates
+Ġcho ices
+Ġperman ent
+in em
+ke l
+Ġf asc
+p se
+f ile
+ĠL ow
+ĠP erson
+Ġt ournament
+st al
+Ġm el
+U ST
+ĠR ay
+az i
+V al
+Ġcont ained
+ĠH olly
+Ġw ake
+Ġreve al
+Ġprocess es
+ĠIS IS
+Ġ0 9
+Ġbl ind
+Ġste el
+ĠB ad
+Ġcare fully
+app y
+ro it
+Ġg aming
+Ġhous es
+ĠC oll
+Ġtr uck
+er m
+Ġsc ored
+Ġocc as
+ret urn
+b ound
+v ar
+Ġsh arp
+Ġaf raid
+ĠE X
+am ber
+c ific
+Ġsche me
+N C
+ĠPol it
+Ġdecl ine
+Ġ199 8
+Ġpus hing
+Ġposs ession
+Ġpriv ile
+Ġteacher s
+Ġy ield
+H A
+ĠDav is
+it led
+#### ####
+Ġr ig
+ĠD aniel
+ac on
+Ġh ide
+ut en
+Ġcolle agues
+Ġprin ciples
+Ġl oud
+Ġs in
+ĠDem on
+Ġst one
+Ġ0 2
+Ġt aught
+Ġter rible
+Ġst uck
+ĠPol icy
+te en
+Ġimplement ation
+ĠB BC
+ĠAP I
+Ġwhe el
+all as
+Ġch ampions
+ol ars
+play er
+Ġrepeated ly
+ĠSt ill
+Ġlik es
+ast y
+es ter
+ĠCath olic
+R L
+Ġb ath
+Ġno ise
+t itle
+Ġn orthern
+P art
+Ġmag n
+Ġf ab
+ĠAs h
+Ġdis pl
+Ġtick et
+Ġm urd
+Ġalong side
+ĠMus ic
+Ġr iver
+ĠSte el
+ĠC L
+ĠPl ayer
+ĠM ult
+ow ing
+re p
+s ize
+Ġt ur
+ĠGeorg ia
+isc al
+ra ction
+Ġc able
+Ġ5 9
+Ġw ins
+Ġup coming
+Ġsurv ive
+Ġins pired
+ĠEduc ation
+Ġstat istics
+ĠF oot
+iam i
+Ġy ellow
+ĠP age
+. -
+ĠH as
+Ġur ban
+Ġa x
+es sel
+\ "
+Ġquarter back
+Ġreg ister
+ĠLab or
+Ġab ilities
+ĠF amily
+Ġvar iable
+ĠPr ice
+Ġcont em
+Ġth in
+ĠE qu
+d ata
+Ġg otten
+Ġconst it
+Ġas ks
+Ġt ail
+Ġexc iting
+ĠE ffect
+ĠSp anish
+Ġencour age
+ins on
+ĠA h
+Ġcommit ment
+C S
+Ġr ally
+Ġ: :
+Ġsubs id
+Ġsp in
+Ġcapt ured
+201 8
+Ġinn oc
+Ġalleged ly
+ĠC ome
+Ġart ists
+ĠN umber
+Ġelect ronic
+Ġreg ional
+ap es
+Ġw ra
+Ġmy th
+pr ise
+ĠM iller
+ĠC reat
+ĠEp isode
+b ell
+Ġdirect ed
+Ġext ract
+Ġs orry
+Ġv ice
+ag ger
+ĠSu pport
+Ġ6 6
+ĠI ron
+Ġwonder ful
+Ġg ra
+N et
+ion e
+E ng
+Ġsh ips
+ik es
+ĠK evin
+it ar
+Ġactiv ists
+tr ue
+ĠAri zona
+ent h
+ĠDes pite
+ĠS E
+Ġha bit
+ern el
+Ġin qu
+Ġab ortion
+Ġv oid
+Ġexpl icit
+Ġeng aged
+Ġang ry
+Ġr ating
+Ġfr ag
+b ro
+ick ing
+d ev
+Ġwor ried
+Ġob ser
+Ġap artment
+ĠG T
+Ġest ate
+ĠConst itution
+em on
+ĠS now
+Ġcount y
+Ġdis ag
+ĠStep hen
+Ġimm igrants
+w ind
+ĠN ations
+Ġfol ks
+O ut
+Ġg all
+Ġtarget ed
+Ġst ead
+ĠB on
+ĠL ib
+Ġinform ed
+Ġ12 0
+ch ain
+idel ines
+or ough
+Ġdri ven
+Ġregular ly
+Ġbas ket
+Ġprinc iple
+oc ument
+Ġst un
+ib ilities
+ĠRom an
+ĠAb out
+Ġal ert
+Ġdemocr acy
+Ġrepresent ed
+H S
+c ers
+p arent
+Ar t
+p ack
+Ġdi plom
+re ts
+ĠN O
+Ġcapt ure
+ĠAd v
+Ħ ¢
+Ġannounce ment
+ĠL ear
+Ġh ook
+Ġpur s
+ĠS uch
+ĠC amer
+Ġrefuge es
+ĠV e
+P ol
+Ġrecogn ized
+l ib
+Ġhad n
+A ss
+Ġpil ot
+us hing
+Ġreturn ing
+Ġtra il
+ĠSt one
+Ġrout ine
+Ġcour ts
+Ġdes per
+Ġfriend ly
+ĠIt aly
+Ġpl ed
+Ġbreat h
+Ġstud io
+N S
+Ġimp ressive
+ĠAfghan istan
+Ġf ing
+Ġd ownt
+ink ing
+ĠR og
+i ary
+col or
+se x
+ar on
+Ġf ault
+ĠN ick
+D own
+ĠR ose
+ĠS outhern
+X X
+is odes
+L ist
+6 00
+Ġout come
+er r
+Ġelse where
+Ġret ire
+Ġp ounds
+ĠGl obal
+Pe ople
+Ġcommun ications
+Ġlo an
+Ġrat io
+ĠEm pire
+Ġg onna
+Ġinv ent
+D F
+Ġ19 70
+ĠComm on
+p at
+Ġprom ised
+Ġd inner
+ĠH om
+Ġcreat es
+Ġoper ate
+ver ty
+ĠJ ordan
+et ime
+Ġsust ain
+R eg
+Ġincred ible
+im a
+Ġwar rant
+Ġm m
+A tt
+Ġlaw suit
+Ġreview s
+it ure
+ĠS ource
+l ights
+ĠF ord
+Ġ6 3
+g roup
+st ore
+Ġfeat ured
+Ġfore ver
+Ġpo verty
+ĠP op
+ĠC NN
+az z
+ab is
+ach ing
+Ġl aid
+ĠSu pp
+Ġfil ter
+en a
+ĠCommun ity
+Ġcreat ures
+u ction
+ĠR oyal
+Ġassoci ation
+ĠCon nect
+ĠBr ad
+âĸ Ī
+l ers
+the re
+ĠG i
+Ġval uable
+AC K
+ĠT aylor
+Ġl iquid
+ĠAtt orney
+ĠCar l
+ĠF inal
+ag a
+ĠWil son
+B ecause
+ĠProf essor
+ak a
+Ġincred ibly
+r ance
+! )
+R ef
+s k
+Ġsol utions
+Ġatmosp here
+Ġbl ame
+um es
+ĠN ob
+C A
+um ps
+r ical
+ĠPut in
+ĠD est
+or ic
+ĠP A
+Ġrespect ively
+w an
+Ġfif th
+â Ħ¢
+ĠC ry
+Ġgovern or
+res ident
+Ġpurch ased
+Ġh ack
+Ġint ense
+ob s
+Ġorig in
+Ġdef ine
+Ġcare ful
+** *
+Ġshould er
+Cl ick
+Ġt ied
+Ġdest ruction
+ou red
+Ġno body
+Ġh o
+ĠEx per
+Ġt ip
+" ;
+Ġtechn ique
+Ġj ur
+ĠP ok
+b ow
+Ġleg end
+Ġacc ord
+Ġbus y
+ĠInt el
+Ġh ang
+ak i
+. ]
+âĢĶâĢĶ âĢĶâĢĶ
+Ġsur gery
+Ġrep rodu
+Ġun iform
+Ġscen es
+c ode
+Ġ6 2
+l isher
+ĠH ave
+ph ia
+Ġcry pt
+Ġrec on
+Ġsc ream
+Ġadop ted
+Ġsc ores
+N e
+ĠIt alian
+in cluding
+B O
+Ġindic ated
+Ġent ertain
+G u
+T ext
+i el
+Ġtw enty
+Ġeng age
+off s
+ĠPac ific
+Ġsm ile
+Ġperson nel
+Ġto ler
+Ġdo ors
+Ġt one
+Ġmach ines
+Ġent ering
+ten ance
+C O
+ĠJer sey
+Ġfore st
+Ġhor se
+Ġcompl aint
+ĠSpr ing
+y o
+ĠPl us
+ed ing
+ĠRet urn
+qu arters
+ial s
+c ow
+Ġacad emic
+Ġf ruit
+Ġ199 6
+og ether
+Ġw ine
+Ġpur su
+ĠSte ven
+Ġlic ens
+Wh o
+Ġclot hes
+re ction
+Ġsqu ad
+Ġst able
+Ġr aw
+z ens
+St ar
+ut ies
+anc er
+Ġke ys
+ĠM u
+Ġcompl icated
+ig er
+ĠTe xt
+Ġabs or
+Ġ6 8
+Ġfun ny
+Ġrel ief
+ĠL ew
+ĠC ook
+Ġch art
+Ġdraw ing
+G E
+Ġmod ule
+ĠB ull
+I LL
+Ġs alt
+0000 0000
+il le
+Ġres ource
+aw ay
+adel phia
+ĠB ru
+Ġ6 7
+Ġsome body
+Ġparticip ate
+Ġro se
+we red
+Ġmus cle
+Ġcons ent
+Ġcontin uing
+ĠGuard ian
+ĠOr der
+reg on
+Ġre ar
+Ġprov ision
+Ġlik ed
+ri ent
+Ġb ra
+Tr ans
+Ġmeet ings
+Ġto x
+Ġcon vent
+Ġaut o
+Ġrec ording
+ĠSo ft
+00 1
+ĠR oll
+Ġprogram ming
+Ġp ic
+Ġprov ed
+Ġst ab
+ĠA st
+Ġca ption
+ul ating
+ĠAtt ack
+Ġnew ly
+Ġ199 7
+f r
+Ġdis cipl
+ĠGree k
+Ġed ition
+ĠDo es
+ĠB ox
+if le
+ack et
+Ġpass es
+Ġgu est
+Ġac celer
+it als
+U D
+Ġaut hent
+ĠR est
+ov al
+t a
+u ine
+Ġarm or
+ĠT own
+Ġcomp at
+Ġinc hes
+Des pite
+Ġass ign
+he rent
+Ġprep are
+ĠM eg
+oc key
+Ġdep ends
+Ġtrack s
+w atch
+Ġl ists
+ĠN orthern
+Ġal ter
+re c
+ĠE astern
+Ġcond em
+Ġevery where
+? '
+Ġaff ili
+Ġf ought
+": {"
+Ġm ac
+it arian
+Ġsc ope
+ĠA L
+aw s
+ar ms
+Ġqu e
+Ġenjoy ed
+nes ota
+Ġagg ressive
+ĠSt ory
+ĠI V
+Ġrec ipe
+Ġrare ly
+ĠMed ical
+val ue
+ang el
+ay ing
+omet hing
+Ġsub section
+Ġs outhern
+Ġfrequ ency
+re te
+roll ed
+ult s
+ĠN ic
+Ġbeh alf
+Ġsequ ence
+ab et
+Ġcontrovers ial
+Ġcomp rom
+Ġwork er
+Ġmain ly
+Ġal gorith
+ĠM ajor
+or ce
+g ender
+Ġorgan ized
+Ġf ake
+Ġconclud ed
+ĠE D
+ĠEx ec
+r age
+Ġch ances
+ber ry
+ĠTr ad
+Ġconfig uration
+Ġwithd raw
+Ġf ro
+ud es
+ĠBro ther
+ĠB rian
+Ġtri es
+Ġsam ples
+Ġb id
+ĠGold en
+Ġphot ograph
+if est
+ĠD O
+ĠPar liament
+******** ********
+R em
+Ġcont est
+Ġsign ing
+p x
+ĠZ eal
+âĶĢ âĶĢ
+E ar
+Ġex it
+Be fore
+ĠCor por
+n ull
+mon th
+Ġrac ial
+ott ed
+ĠV eg
+ĠRe uters
+Ġsw ord
+ps on
+ĠRom ney
+a ed
+Ġt rib
+Ġin ner
+Ġprot ocol
+ĠB i
+ĠM iami
+ever al
+p ress
+Ġsh ipping
+ĠAm endment
+ĠHow ard
+con nect
+ĠD isc
+ĠJ ac
+iam ond
+ĠThere fore
+s es
+ĠPrin cess
+ĠUS B
+ĠAn th
+Ġsurve illance
+Ġap olog
+Ġ6 1
+ow a
+Ġf ulf
+j s
+Ġl uck
+ust ed
+ĠÂ §
+n i
+Ġant icip
+em an
+Ġwin ner
+Ġsil ver
+ll a
+ic ity
+Ġunus ual
+Ġcr ack
+Ġt ies
+e z
+Ġpract ical
+Ġprov ince
+ĠPl ace
+Ġprior ity
+IC E
+Ġdescrib es
+Ġbr anch
+F orm
+ask a
+miss ions
+b i
+Ġp orn
+ĠTur k
+Ġent hus
+Ġf ighters
+Ġ0 8
+ĠDet roit
+Ġfound ation
+av id
+A re
+Ġjud gment
+cl ing
+Ġsol ve
+ĠDes ign
+W here
+hes is
+ĠT ro
+a fter
+Ġne utral
+ĠPalestin ian
+ĠHolly wood
+Ġadv is
+ĠN on
+y es
+ol is
+Ġrep utation
+Ġsm ell
+Ġb read
+ĠB ul
+ĠBe ach
+Ġclaim ing
+Ġgen etic
+Ġtechn ologies
+Ġupgr ade
+row s
+Ġdevelop er
+ĠJ osh
+ĠDis ney
+erv ed
+ip al
+Ġun ex
+Ġbare ly
+t hen
+ĠP ub
+Ġill ness
+et ary
+ĠB al
+Ġp atch
+Ġbut t
+Ġst upid
+ĠD og
+ĠD allas
+f ront
+ie ce
+Ġprot ests
+Ġch at
+oen ix
+Ġw ing
+Ġpar liament
+Ġ7 7
+ose xual
+Ġre nder
+pt ions
+ĠCo ast
+os a
+ĠG reg
+h op
+ĠMan agement
+Ġbit coin
+Ġrec over
+Ġincor por
+or ne
+ĠUs ing
+Ġpre ced
+Ġthreat ened
+Ġspirit ual
+ĠE vent
+ĠF red
+Ġadvert ising
+Ġimprove ments
+ĠC ustom
+Ġer rors
+Ġsens itive
+ĠN avy
+Ġcre am
+L ook
+Ġex clusive
+Ġcomp rehens
+Ġde leg
+Ġcon ce
+Ġrem em
+Ġstruct ures
+Ġst ored
+N D
+Ġ1 000
+U P
+ĠB udd
+A F
+w oman
+ĠAcad emy
+ð Ł
+se a
+Ġtem porary
+Ab out
+es ters
+Ġtick ets
+Ġposs ess
+in ch
+o z
+Ġl a
+Ġcontract s
+Ġun p
+Ġc ig
+ĠK at
+ult ural
+as m
+Ġmount ain
+ĠCapt ain
+St ep
+m aking
+ĠSp ain
+Ġequ ally
+Ġl ands
+at ers
+Ġreject ed
+er a
+im m
+ri x
+C D
+Ġtrans action
+g ener
+less ly
+Ġ| |
+Ġc os
+ĠHen ry
+Ġprov isions
+Ġg ained
+Ġdirect ory
+Ġra ising
+ĠS ep
+ol en
+ond er
+Ġcon sole
+in st
+Ġb om
+Ġunc ertain
+1 50
+ock ing
+Ġmeas ured
+Ġpl ain
+Ġse ats
+Ġd ict
+S L
+af e
+Ġest imate
+iz on
+at hered
+Ġcontribut ed
+Ġep isodes
+omm od
+G r
+AN T
+Ġ6 9
+G ener
+Ġ2 50
+vious ly
+rog en
+Ġterror ism
+Ġmove ments
+ent le
+oun ce
+ĠS oul
+Ġpre v
+ĠT able
+act s
+ri ors
+t ab
+Ġsuff er
+Ġn erv
+Ġmain stream
+ĠW olf
+Ġfranch ise
+b at
+Ġdem ands
+Ġag enda
+Ġdo zen
+Ġclin ical
+iz ard
+ĠO p
+t d
+Ġvis ited
+ĠPer haps
+Ġact or
+Ġde lic
+Ġcont ribute
+Ġin ject
+ĠE s
+ac co
+Ġlist ening
+Ġcon gress
+epend ent
+Ġprem ium
+Ġ7 6
+ĠIr ish
+Ġass igned
+ĠPh ys
+Ġworld wide
+Ġnarr ative
+ot ype
+m ont
+b ase
+ĠB owl
+ĠAdminist ration
+Ġrel ation
+ĠE V
+C P
+Ġco vers
+Ġ7 8
+Ġcert ific
+Ġgr ass
+Ġ0 4
+pir acy
+ir a
+Ġengine ering
+ĠM ars
+Ġun employ
+ĠFore ign
+st ract
+Ġv en
+Ġst eal
+Ġrepl ied
+Ġult imate
+Ġtit les
+d ated
+Ġj oy
+a us
+Ġhy per
+ak u
+Ġoffic ially
+ĠPro duct
+Ġdifficult y
+per or
+Ġresult ed
+rib ed
+l ink
+wh o
+~~ ~~
+ĠSpe ed
+ĠV iet
+W ind
+ĠBar ack
+Ġrestrict ions
+ĠSh are
+Ġ199 5
+ition ally
+Ġbeaut y
+op t
+Ġm aps
+ĠC R
+ĠN ation
+ĠCru z
+W ill
+Ġelectric ity
+Ġor g
+Ġb urd
+Ġviol ation
+Ġus age
+Ġper mit
+ĠCh ron
+ĠF ant
+Ġn aturally
+Ġ0 7
+Ġth rown
+ĠAw oken
+Ġal ien
+ĠHer o
+ĠK ent
+ĠR ick
+ri ke
+Ġp ace
+}, {"
+G L
+Ġpo ison
+ĠT ower
+Ġform al
+al ysis
+Ġgen uine
+Ġk il
+a ver
+Ġproced ure
+ĠPro p
+intend o
+ĠM ain
+as ant
+Ġtr ained
+G ame
+ĠL oad
+ĠM A
+Ġcru cial
+Ġle ts
+ĠF R
+Ġch ampion
+1 01
+ĠCon ference
+Ġwrit ers
+Ġconnect ions
+Ġo kay
+ir ms
+ĠR and
+Ġenc ounter
+ĠB uff
+Ġachie ved
+Ġche cks
+isc ons
+Ġassist ant
+Ġwhen ever
+ĠA ccess
+ĠU r
+b in
+Ġcl ock
+is p
+op her
+Ġb orrow
+Ġm ad
+Ġperson ality
+on ly
+IS T
+ab ama
+Ġg ains
+Ġcommon ly
+Ġter r
+Ġhyp ot
+Ġre ly
+Ġt iss
+iscons in
+Ġrid ic
+f unction
+ĠO regon
+Ġun com
+r ating
+el and
+ĠN C
+Ġm oon
+ann on
+Ġvulner able
+ut ive
+ÂłÂł ÂłÂł
+ĠRad io
+Ġw estern
+se ct
+ĠT ony
+Ġocc urs
+ĠO s
+ĠH on
+Ã Ń
+Ġv essel
+ĠScot land
+Ġdiscrim ination
+Ġsubsequ ent
+st ring
+Ġfant asy
+ĠSh adow
+Ġtest im
+W E
+it i
+r as
+Ġbo at
+Ġmar ks
+Ġord inary
+Ġre n
+Ġrepresent ative
+Ġpet ition
+Ġ7 3
+Ġad venture
+Ġign ore
+ĠPhil adelphia
+ĠS av
+V P
+Ġfact ory
+Ġt asks
+Ġdep ression
+z ed
+................ ................
+ĠSt orm
+Ġc ogn
+Ġelig ible
+Ġredu cing
+v ia
+Ġ0 5
+Ġstri king
+Ġdoll ar
+h o
+O V
+Ġinstr ument
+Ġphilosoph y
+ĠMo ore
+ĠA venue
+Ġrul ed
+ĠFr ont
+IN E
+ĠM ah
+Ġscen ario
+ĠNAS A
+Ġen orm
+Ġdeb ut
+Ġte a
+T oday
+Ġabs ence
+S im
+Ġh am
+le ep
+Ġt ables
+ĠHe art
+M I
+K e
+re qu
+V D
+m ap
+Ġchair man
+Ġp ump
+Ġrapid ly
+v i
+Ġsubstant ial
+E P
+d es
+ch ant
+ili pp
+ĠS anta
+ri ers
+anche ster
+L oad
+ĠC ase
+Ġsa ving
+Ġ7 4
+ĠA FP
+er ning
+oun ced
+ĠMin nesota
+ĠW as
+Ġrec ru
+Ġassess ment
+ĠB ron
+U E
+Ġdynam ic
+Ġf urn
+ul ator
+Ġprop ag
+h igh
+Ġacc ommod
+Ġst ack
+ĠS us
+w rit
+Ġre ven
+ĠGod d
+ĠZeal and
+ab s
+Ġbr ut
+Ġper pet
+h ot
+Ġhard ly
+ĠB urn
+ãĤ ¹
+Ġst y
+Ġtrans actions
+Ġg ate
+Ġsc reens
+Ġsub mitted
+Ġ1 01
+Ġlangu ages
+ugh t
+em en
+Ġfall s
+Ġc oc
+Ĥ ¬
+Ġstri kes
+p a
+Ġdel iber
+ĠI M
+Ġrel ax
+ann els
+ĠSen ator
+Ġext rem
+Ġ} ,
+ĠDe b
+Ġbe ll
+Ġdis order
+c ut
+Ġi OS
+Ġl ocked
+Ġem issions
+Ġshort ly
+" ]
+ĠJud ge
+ĠS ometimes
+Ġr ival
+Ġd ust
+Ġreach ing
+F ile
+Â¯Â¯ Â¯Â¯
+ino is
+ĠJ ason
+Ġs atell
+are t
+Ġst ations
+Ġag ric
+ĠTechn ology
+com es
+ĠUn fortunately
+ĠChild ren
+Ġappl ies
+ast ed
+Ġan ger
+ail ability
+ĠDam age
+Ġcomp are
+ĠStand ard
+Ġaim ed
+ĠB a
+angu age
+Ġreg ulation
+Ġj ury
+Ġair port
+Ġse ctions
+ĠPr ince
+em ed
+Ġmedic ine
+Ġh itting
+Ġsp ark
+ol ves
+Ġad s
+St ate
+Ġfood s
+Ġrepl acement
+Ġch icken
+Ġlow est
+Ġmind s
+Ġinvol ves
+u i
+Ġarr ang
+Ġproced ures
+ĠWh ich
+ivers ary
+Ġb ills
+Ġimprove ment
+Ġin ev
+Ġexpect ations
+Ġintellect ual
+Ġsp aces
+Ġmechan ism
+2 50
+bre ak
+ĠZ e
+ĠT enn
+ĠB alt
+Ġbar rel
+Ġstat ic
+man n
+Pol ice
+Ġt ips
+Ġhand ling
+c us
+od ed
+il ton
+ir y
+Ġjournal ists
+our se
+Ġcom ic
+Ġnom ine
+IT Y
+Ġvers us
+Ġlo op
+Ġsur f
+ĠInd ust
+ĠHun ter
+Ġbelief s
+is an
+Ġset up
+Ġbre w
+im age
+Ġcomput ers
+f ol
+} ,"
+ĠMed al
+Ġtax p
+Ġdisplay ed
+Ġg rav
+Ġf iscal
+M on
+ĠMos cow
+ĠK ong
+ĠCent re
+Ġcamer as
+ĠMr s
+ĠH ay
+Ġa ver
+ĠK elly
+p y
+Ġrequire ment
+Ġent itled
+omb ie
+Ġsh adow
+ag ic
+ĠA k
+Ġel ite
+Ġdiv ided
+Ġhead ing
+Ġcop ies
+Ġloss es
+Ġv it
+k ed
+ĠB ry
+Ġan s
+ĠSte am
+Ġrep orter
+he im
+ĠIt em
+Ġsuper ior
+d on
+ere nt
+Ã ¶
+Ġtherap y
+Ġpe ak
+ĠMod el
+Ġl ying
+Ġg am
+z er
+r itten
+Ġrespons es
+Ġconsider ation
+ĠB ible
+Ġl oyal
+Ġinst ant
+Ġp m
+ĠFore st
+Ã ¼
+Ġext end
+Ġconv icted
+Ġfound er
+Ġconv in
+ĠO ak
+che ck
+Ġsch olars
+p ed
+Ġover se
+T op
+c ount
+ĠAr k
+Â ·
+Ġ0 6
+ĠL A
+m d
+ĠLat in
+im ental
+ĠC PU
+Ġsubst ance
+Ġminor ity
+Ġmanufact uring
+E r
+ocol ate
+Ġatt ended
+ĠMan ager
+r ations
+Ġappreci ate
+om y
+GB T
+id ency
+B L
+Ġguarant ee
+pos ition
+Ġo cean
+clud e
+Ġhead ed
+Ġt ape
+Ġlo ose
+Ġlog ic
+Ġpro ven
+Ġsp ir
+Ġad mit
+is a
+Ġinvestig ate
+Ġ199 4
+sy lv
+ĠL ost
+c est
+Ġ7 1
+Ġrequest ed
+Ġwind ows
+ĠPok Ã©
+ĠWith out
+M et
+Ġbehavi our
+Ġread er
+Ġh ung
+ĠKe ep
+Ġro les
+Ġimplement ed
+Ġbl ank
+Ġserv es
+ĠJ ay
+Ġc ited
+ĠF riend
+prof it
+ap on
+Ġrep air
+it em
+arr ass
+Ġcrit ics
+ad i
+ĠF ather
+Ġsh out
+Ġf ool
+Ġ8 8
+Ġprodu cing
+Ġl ib
+Ġround s
+Ġcirc le
+Ġpre par
+Ġsub mit
+Ġn ic
+mor row
+ãĥ «
+U nder
+Ġv ital
+ater n
+Ġpass word
+Ġpublic ation
+Ġprom inent
+Ġspeak s
+Ġb ars
+Ġde eper
+ĠM ill
+port ed
+Ġw id
+Ġbut ter
+Ġsm oking
+Ġindic ates
+K ey
+rop ri
+ĠF ile
+all ing
+ast ing
+ĠR us
+Ġad j
+Ġ7 9
+av al
+Ġpres um
+bur gh
+on ic
+Ġf ur
+Ġpoll s
+ik a
+Ġsecond ary
+Ġmon ster
+ig s
+ĠCur rent
+E vent
+Ġowners hip
+end ar
+Ġarri ve
+ĠT ax
+Ġn ull
+ĠPri v
+Ġth ro
+Ġk iss
+c at
+Ġup set
+ang le
+it ches
+ect or
+olog ists
+ĠGal axy
+Ġcor ruption
+Ġh int
+ent er
+ĠH ospital
+Ġgreat ly
+Ġbeg un
+es y
+Ġso il
+ĠAnt on
+Ġmain tenance
+ãĥ ©
+Ġdo zens
+Ġhuman ity
+ĠAl abama
+Ġr om
+w orth
+ap ing
+sylv ania
+l ah
+Ġg athered
+G A
+Ġattack ing
+f ound
+ĠSqu are
+Ġar bit
+ict ions
+ĠW isconsin
+Ġd ance
+ĠS aint
+arch y
+Ġbase ball
+Ġcontribut ions
+Ġliter ature
+Ġex ha
+per ty
+t est
+Ġb ab
+Ġcontain er
+let ter
+Ġfall en
+Ġwebs ites
+Ġbott le
+ĠS ac
+Ġbre ast
+ĠP L
+Ġveter an
+Ġinterview s
+ĠA le
+Ġb anned
+eng ers
+ĠRev olution
+in th
+Ġconc erning
+IV E
+Ġexp enses
+ĠMatt hew
+ĠColumb ia
+d s
+ist ance
+Ġent ity
+.. ."
+Ġrel iable
+Ġpar alle
+ĠChrist ians
+Ġopin ions
+Ġin du
+l ow
+Ġcompet e
+Ġth orough
+Ġemploy ed
+Ġestablish ment
+ig en
+ĠC ro
+Ġlawy ers
+ĠSt ation
+T E
+ĠL ind
+ĠP ur
+it ary
+Ġeffic iency
+âĢ Ĳ
+ĠL y
+Ġm ask
+Ġdis aster
+Ġag es
+ER E
+es is
+ĠH old
+Ġcas ual
+b led
+Ġen abled
+ĠEn vironment
+ĠInt elligence
+i per
+ĠM ap
+ĠB E
+Ġemer ged
+is dom
+Ġc abin
+Ġregist ration
+Ġfing ers
+Ġro ster
+Ġfram ework
+ĠDo ctor
+et ts
+Ġtransport ation
+Ġaware ness
+H er
+Ġattempt ing
+O ff
+ĠSt ore
+ÃĥÃĤÃĥÃĤ ÃĥÃĤÃĥÃĤ
+ĠK now
+Ġdef ence
+Ġsc an
+ĠT en
+ĠCh air
+ĠP H
+ĠAtl anta
+Ġfuck ing
+Ġans wered
+b n
+ĠK ar
+Ġcateg ories
+Ġr ational
+Ġc ust
+Ġrob ot
+Ġcorrect ly
+Ġg if
+Ġgraph ics
+m ic
+Ġground s
+ĠO pp
+i ate
+Ġdist ributed
+Ġsan ctions
+Ġchalleng ing
+ut o
+Ġingred ients
+Ġinv ited
+Ġfound ed
+ĠRe qu
+d ed
+Ġb owl
+Ġbrother s
+ĠH a
+I O
+Ġw ages
+im ore
+oc ial
+Ġse ed
+ative ly
+Ġaddress es
+ĠI owa
+ab eth
+Ġatt itude
+is d
+ch ild
+Ġm ole
+Ġdisco very
+y ard
+B r
+Ġ8 2
+Ġsuppl ies
+ell ing
+Ġdist ingu
+C R
+Ġre cept
+Ġ vert
+Ġsw im
+b ec
+d oor
+ĠY eah
+Ġg al
+Ġinter act
+ĠE SP
+ĠC S
+amp s
+Ġconvin ced
+Ġobject ive
+Ġdis h
+ĠPhot os
+l ad
+Ġdownt own
+o il
+in ction
+Ġto morrow
+ĠC OM
+Ġsurv ival
+sh ot
+Ġsett lement
+C ons
+ĠX box
+int erest
+ĠS M
+arg o
+en ess
+Ġeth nic
+b ered
+M in
+ĠT ok
+Ġinc ent
+ĠComm and
+Ġmain tained
+Ġbreak s
+br idge
+at ar
+ag g
+ĠF inally
+un icip
+ĠO nt
+le ft
+Ġrecogn ition
+Ġ* /
+ĠP ers
+Ġwe lf
+Ġaddress ed
+ĠK ansas
+Ġvir us
+Ġwhere as
+Ġp apers
+ram s
+ĠMin istry
+Ġple asure
+Ġacqu ired
+Ġd uration
+j pg
+Ġcal m
+ĠN HL
+Ġburn ing
+Ġfold er
+ick ed
+ĠP y
+ĠIll inois
+Cl ass
+ĠGodd ess
+Ġperform ing
+Ġwelf are
+j ar
+In ter
+Ġl in
+Ġenh ance
+Ġnot ion
+f are
+yp es
+ĠAre a
+Ġcann abis
+ĠDie go
+f s
+ĠM anchester
+com m
+in ite
+Ġcover ing
+ĠS ound
+Ġ19 60
+Ġ8 4
+e lect
+z ing
+Ġcitiz en
+Ġph ones
+Ġr aid
+Ġign ored
+ĠOb ject
+Ġu pload
+c ard
+Ġmod ified
+Ġroom s
+ia h
+r ange
+he ast
+ach us
+Ġsuggest ing
+âĢ ĭ
+gr ade
+E l
+Ġclot hing
+Ġr h
+ĠH an
+un ity
+en cing
+ĠAust in
+sec ution
+t ra
+d em
+ĠQ ual
+Ġhe aven
+Ġst ages
+Ġw edd
+pl us
+ific ial
+ĠIm m
+ĠH o
+iet ies
+Ġphr ase
+Ġbr ill
+act ory
+Ġprov iders
+Ġsil ence
+Ġa er
+ĠA I
+ĠAd venture
+Ġplatform s
+Ġdemonstr ated
+Ġinter f
+ing ton
+Ġr aces
+Ġgr ade
+ult ane
+ĠTh rough
+f alse
+Ġb ow
+ĠA B
+Ġfl avor
+Ġhistor ic
+g ov
+Ġcol our
+Ġview ed
+ĠEm ail
+el come
+Ġinter vention
+Ġd iversity
+Ġperiod s
+Ġre verse
+ĠV ery
+Ġqu ote
+ĠLe ft
+th rough
+Ġsc rew
+Ġland ing
+Ġp ill
+Ġw et
+Ġprot esters
+Ġrepe at
+av ed
+er k
+Ġsal ary
+ĠPenn sylvania
+St ill
+Ġmay or
+Ġkit chen
+Ġfeat uring
+ĠM useum
+ĠT ournament
+ĠF al
+Ġser vers
+U C
+Ġany body
+im g
+ĠTr ade
+ixt ure
+the less
+Ġfin ance
+Ġcl osing
+ĠPat ri
+i ac
+ab el
+Ġ> >
+or ous
+Ġf irms
+sc reen
+un a
+Ġemb arrass
+ul se
+Ġlet ting
+Ġth rew
+ile y
+Ġch annels
+l an
+ĠVeg as
+Ġse ar
+Ġfant astic
+ar re
+uzz le
+ĠD er
+Th ose
+Ġsw ing
+Ġshe et
+ind ex
+co ver
+og an
+Ġvari ables
+ĠTe ch
+Ġsp oken
+ac hel
+ĠD a
+ĠMount ain
+Ġload ed
+Ġfoot age
+vers ion
+Ġun l
+ĠPh oenix
+Ġthrow ing
+Ġf iring
+Ġtrack ing
+Ġw idth
+Ġstrugg ling
+ro oms
+ot ion
+Ġmonth ly
+ĠSer ver
+Ġegg s
+op en
+M C
+Ġ199 3
+Ġh ired
+Ġstay ed
+ĠAll en
+Ġst ro
+Ġ9 8
+st ep
+ĠTurk ish
+Ġfab ric
+ist ing
+ĠD om
+Ġd ates
+Ġpr on
+Ġbasket ball
+Ġl ucky
+ĠArab ia
+Ġassum ed
+est y
+Ġaff airs
+Ġgl ad
+ĠInd eed
+ĠF A
+ĠW ord
+Ġjo ining
+if ice
+p read
+ir ts
+ĠSe lect
+Ġpop ulations
+aw are
+Ġn ose
+Ġcompl aints
+st art
+Ġsc oring
+Th anks
+Ġmin ing
+Ġvisit ors
+S H
+Ġdam aged
+Ġcharacter istics
+ĠP ent
+D C
+Ġ8 3
+ĠS ix
+r ates
+Ġfl ags
+ĠB rew
+d og
+M ark
+// //
+Ġexec ution
+Ġj oke
+ph ones
+Ġtestim ony
+Ġob st
+Q L
+ĠC ut
+Ġstud ied
+ĠN intendo
+ick et
+ĠN BC
+Ġl ad
+ĠB ra
+ĠM oh
+Ġk ernel
+Ġoverwhel ming
+Ġag ed
+Ġapplic able
+ĠC ond
+Ġroad s
+ĠBl ock
+m ade
+od ge
+Ġcomm ands
+Ġoff ices
+vel and
+Ġt ut
+Ġrece iver
+ĠF ro
+Ġsho pping
+Ġi P
+ĠSt re
+ĠA BC
+Ġentertain ment
+ĠB ow
+ort ed
+M c
+Ġread s
+gr ad
+ĠCol lect
+Ġâ ĪĴ
+ĠCap ital
+eder ation
+Ġemploy er
+Ġinvolve ment
+Ġanx iety
+al ia
+Ġro of
+ĠAm ong
+ĠDemocr at
+Ġstat s
+ĠV ill
+Ġconst itutional
+Ġrefer ring
+itt y
+Ġtack le
+out ube
+Ġback ed
+ĠH ong
+ĠBro ad
+Ġe le
+ĠO tt
+Ġ199 2
+h our
+achus etts
+C al
+Ġdefe ated
+Ġ8 1
+es p
+Ġseem ingly
+w as
+ĠJ enn
+ĠK urd
+Ġg ene
+Ġdisc ount
+R et
+EC T
+( );
+Ġclub s
+Ġs id
+ĠM arsh
+Che ck
+Ġp p
+ĠE ag
+ides pread
+Ġbe ings
+F T
+Ġintrodu ction
+ĠCh ange
+AR D
+Ġ1 10
+ad ows
+ier ce
+Ġme al
+a uthor
+ĠB ang
+lah oma
+Ġr anks
+201 1
+?? ??
+m ax
+Ġcoll apse
+Ġop ens
+Ġe cho
+Ġs oph
+Ġrac ist
+Ġenorm ous
+Ġw aves
+Ġt ap
+Ġcomprehens ive
+. --
+ĠR oy
+Ġfarm ers
+Rel ated
+a ired
+ron es
+ĠC rim
+Ġproport ion
+Ġdesign s
+Ġnegoti ations
+Ġvirt ually
+ĠBat man
+Ġwar n
+Ġlegit imate
+m ate
+Ġcon vention
+, ,
+net ic
+ĠS D
+Ġconsist ently
+Ġcompens ation
+Ġpunish ment
+Ġy e
+Ġt ie
+ĠB ureau
+ir lf
+ĠB u
+ĠA ren
+ĠPh ilipp
+Ġkn ife
+Ġmem ories
+ĠR oss
+Ġang le
+Ġ8 6
+ĠTh under
+Ġre nd
+ĠT our
+Ġcount s
+s ung
+ĠIm p
+Ġeduc ational
+Ġaccess ible
+C OM
+Ġd rew
+y er
+G l
+am ine
+OR T
+O B
+I B
+m aster
+Ġtri als
+og y
+h ar
+ĠTr ust
+Ġprefer red
+irlf riend
+ĠN ev
+Ġb in
+Ġc ow
+P age
+Ġsign ature
+ĠB L
+7 00
+Ġret ired
+Ġby tes
+Ġneigh b
+ĠLeg end
+Ġdev ast
+Ġsuspect ed
+is ons
+ĠPokÃ© mon
+sc ale
+Ġcap abilities
+Ġre vel
+Ġche ese
+d y
+igr ant
+Ġfail ing
+b its
+ĠHer oes
+ĠG host
+ĠS cient
+Ġappoint ed
+ur i
+Ġinst itution
+Ġexpand ed
+g reg
+Ġmonitor ing
+Ġp odcast
+Ġcoal ition
+Ġ9 6
+J o
+Ġst olen
+ĠS ab
+Ġstop s
+Ġhol iday
+Ġint r
+C ar
+Bl ack
+ĠL GBT
+Ġwar ming
+ĠAnd erson
+Ġ8 9
+Ġprodu cer
+M ed
+Ġaccur acy
+ĠMar vel
+iz abeth
+ĠPat rick
+m ony
+Ġmin i
+ac les
+Ġover t
+the y
+Ġmembers hip
+ĠV en
+Ġex ch
+Ġrem oval
+ĠD ave
+T Y
+m ad
+ĠF ind
+Ġad equ
+Ġe c
+Ġte eth
+Ġemot ion
+Ġper m
+Ġsole ly
+d b
+Ġextra ord
+IG HT
+c al
+Ġgu idelines
+Ġd ying
+Ġsusp ended
+ĠPrem ier
+ĠAnth ony
+el ve
+Ġd ad
+ĠE th
+ĠFoot ball
+Ġabandon ed
+Ġ< <
+Ġm arch
+Ġhor ror
+âĢ¦ "
+Ġchild hood
+Ġcampaign s
+Ġl unch
+ĠAl bert
+bl ock
+âĸĪ âĸĪ
+ound ing
+Ġb one
+or gan
+ad ers
+ĠFl ash
+ĠDri ve
+Ġton ight
+Ġw ars
+ĠF L
+Ġform ation
+con st
+New s
+Ġcom pe
+or ious
+ĠSt aff
+Ġdiscuss ions
+ĠProt ection
+ĠJ am
+Ġcrit eria
+Ġinstall ation
+Ġaccompl ish
+iz za
+Ġpub lisher
+Ġresc ue
+ĠT ry
+U LL
+ĠS om
+ĠH op
+ore t
+th s
+ord on
+Ġp ocket
+ĠIn v
+Down load
+ĠCr ime
+Ġb ene
+ĠGu ide
+ĠAs sembly
+Ġparam eters
+I E
+ĠAlex ander
+Ġconc ert
+ĠSc he
+Ġsh oes
+Ġvis iting
+Ġrec all
+Ġb ub
+Ġr ural
+Ġconc rete
+ĠR os
+N ext
+R uss
+Ġlo ans
+ĠSh ield
+Ġtre m
+hem at
+k g
+ĠHar ris
+is ition
+ĠM ove
+ĠF C
+Ġf ate
+ĠCh o
+Ġt ired
+Ġprinc ipal
+h ist
+ien ces
+ath y
+Ġse vent
+Ġm ood
+Ġstrateg ic
+Ġdise ases
+Ġfor um
+Ġtem por
+Ġhead quarters
+P ar
+ig e
+fl ix
+Ġgu itar
+Ġ9 4
+On ly
+Ġrele ases
+ro ph
+================ ================
+Ġ6 00
+ĠContin ue
+ig ate
+ĠC rit
+sy stem
+Ġdis abled
+Ġunex pected
+ith ub
+Ġuncle ar
+ĠE st
+Ġcontr ad
+Ġstrateg ies
+vent ures
+Ġpass age
+AM E
+Ġimpro ving
+Ġreve als
+Ġdecre ase
+ov a
+Ġann oy
+ĠSh ort
+ĠL ibrary
+Ġcy ber
+n ell
+ĠH ur
+ĠC B
+Ġphot ograp
+U I
+Ġs ed
+G e
+Ġ8 7
+Ġd iverse
+Ġencour aged
+Ġcons piracy
+Ġbird s
+Ġoper ator
+Ġhand ful
+Ġclass ified
+? )
+Ġdram atic
+Ġinvestig ators
+it o
+Ġw idespread
+ĠR oom
+-------------------------------- --------------------------------
+Ġcollect ive
+Ġjournal ist
+St ring
+Ġtemper atures
+il a
+Ġgu id
+Ġins pect
+Ġmiss ile
+ĠMay or
+Ġman ual
+Ġsim ultane
+Ġrat ings
+Ġsu ck
+Ġ9 7
+Ġunivers al
+Ġph arm
+Ġdis rupt
+ian o
+A V
+Ġf t
+Ġstat ist
+old s
+ĠWalk er
+ph p
+Ġunder t
+ĠL as
+ish op
+nt il
+res hold
+ĠWhe ther
+M s
+Ġden y
+ĠCl oud
+Ġprov ider
+Ġsurv iv
+ĠUp date
+h as
+Ġmist akes
+ch arge
+pl ed
+r ity
+Ġn ode
+ĠMass achusetts
+ool s
+lic ation
+Ġf ails
+em ale
+or i
+back s
+Ġsh irt
+Ġ' '
+ĠN AT
+Ġwat ers
+els on
+Ġe ase
+Ġsc ar
+Ġcont ents
+m ind
+Ġcont ribution
+Ġsh r
+Ġhand ed
+Ġst ability
+Ġtra ve
+E m
+Ġmir ror
+12 3
+Ġwe igh
+Ġf iction
+ou ver
+ist ant
+r ition
+ĠF ed
+Ġphys ically
+Ġst ake
+ĠArt icle
+ĠAr c
+ĠLew is
+ĠM ind
+Ġdemonstr ate
+Ġprof its
+v ision
+om ic
+ol id
+Ġbatt les
+Ġdri ves
+Ġeas tern
+ĠS ony
+!! !
+ar ation
+v ard
+ĠG L
+port ation
+Ġ9 2
+Ġlaw makers
+Ġprotect ing
+ĠE PA
+Ġy eah
+Ġsh ame
+ol ph
+e ven
+x it
+Ġatt ach
+Ġrepresent ing
+Ġob s
+ĠUt ah
+iff s
+ĠFre edom
+Ã ³
+A K
+Ġinc idents
+it age
+Ġview ers
+c d
+Ġm ouse
+Ġcl ar
+Ġaccord ance
+Ġb ot
+c or
+ĠSum mer
+he ld
+Ġinnoc ent
+Ġiniti ative
+ol s
+________________ ________________
+Ġsp ots
+p ace
+Ġconvent ional
+Ġcorpor ations
+Ġblock ed
+H D
+at tered
+Ġref ers
+Ġbu ck
+ĠDig ital
+12 0
+Ġtop ics
+T F
+Ä ģ
+br id
+re ement
+Ġunder lying
+ĠM ember
+Ġinvestig ating
+Ġpregn ancy
+Ġtouch down
+ĠB and
+ĠCall er
+Ġinst ances
+P P
+w a
+G ood
+Ġ199 1
+ĠC old
+Ġfear s
+Ġrem arks
+Ĩ Ĵ
+at al
+Ġm it
+Ġexper iments
+i pt
+Col or
+ind u
+Up date
+Ġ9 3
+A g
+Ġ å
+anc ouver
+B oth
+Ġjud ges
+Ob ject
+Ġst ere
+umb n
+Ġparticip ation
+ĠSt ars
+ĠJ ere
+Ġweek ly
+ĠB an
+Ġconvers ations
+ĠP itt
+u z
+ĠIndian a
+ĠK ick
+Ġinf ection
+Ġhero es
+Ġsett led
+Ġstri p
+Ġh al
+Ġd ump
+ĠS ci
+Ġl es
+Ġref erences
+ĠU RL
+ĠBr idge
+Ġwant ing
+For ce
+Ġex clus
+Me anwhile
+m n
+Ġg entle
+m aker
+sen al
+ĠG ro
+ou ri
+ĠR ain
+ĠAll iance
+Ġl ift
+el a
+S D
+ĠCle veland
+Ġrank ed
+Ġst adium
+Ġdead ly
+ä ¸
+Ġr iding
+ar ia
+ĠAr mor
+Ġdocument ation
+ĠGree ce
+ree k
+Ġl ens
+ĠS a
+Ġg ross
+ĠE mer
+ag ers
+ĠD ub
+ĠR h
+ĠAM D
+Ġarri val
+Ġdes ert
+Ġsupp lement
+ĠRes p
+Ġkn ee
+Ġmarg in
+f ont
+og g
+201 0
+ĠP ir
+ĠP rom
+iv als
+Ġint ake
+Ġdifferent ly
+ug s
+Ġb its
+clud ed
+Ġsearch ing
+ĠD u
+um ble
+Ġfunction al
+ĠBalt imore
+ĠC ould
+Ġdes ired
+Ġcirc uit
+ĠL yn
+ĠG O
+ĠF alse
+re pre
+' :
+alt ies
+Ġmin im
+Ġdro ve
+ĠSh ould
+Ġh ip
+Ġpro s
+Ġut ility
+ĠN ature
+ĠM ode
+P resident
+o pp
+r at
+form ance
+Ġconcent ration
+Ġf ont
+ĠB ud
+Ġam id
+Ġre vers
+ĠM L
+B ar
+Ġinter action
+Ġjur isd
+Ġspell s
+d ep
+f il
+Ġcivil ians
+ut ter
+ĠCo oper
+ĠBel ow
+Ġent rance
+Ġcon vert
+Ġcontrovers y
+ow ered
+Ġcontr ary
+Ġar c
+ĠExec utive
+ĠOffic er
+Ġpack ages
+Ġprog ressive
+w idth
+Ġreserv ed
+v ol
+ĠSam sung
+Ġprint ed
+Ġcent ers
+Ġintrodu ce
+ĠKenn edy
+Ġodd s
+Ġsure ly
+Ġindepend ence
+Ġpass engers
+repre ne
+ĠBe h
+Ġl oves
+ĠESP N
+Ġfac ilit
+Ġident ical
+Ġdo ct
+Ġpartners hip
+con f
+ĠH ide
+Ġconf used
+ĠC ow
+M en
+Ġw rest
+ĠIraq i
+Ġh oles
+ĠStud ies
+Ġpregn ant
+h ard
+Ġsign als
+I X
+Ġpull ing
+Ġgrad uate
+Ġnomine e
+D ate
+Ġper mitted
+Ġâ Ĥ¬
+ĠOk lahoma
+St art
+Ġauthor ized
+Ġal arm
+ĠC os
+v an
+Ġgener ations
+c ular
+Ġdr agon
+ĠSoft ware
+ĠEd ward
+Ġcontro ller
+S en
+ge red
+ĠV ik
+Ġappro ached
+Th ank
+Ġcan ce
+Ġform ula
+ĠSm all
+Ġweak ness
+Ġr amp
+it udes
+j ud
+Ġbrill iant
+Ġacc us
+s ource
+Ġ8 00
+ĠE vil
+S w
+Ġhom eless
+we ek
+i ens
+r ics
+ĠTh ird
+T O
+Ġorgan ic
+Ġpresent ation
+ag h
+ĠDown load
+v ation
+Ġas sembly
+or able
+hold ers
+ĠBern ie
+ĠHel p
+Ġt ong
+ĠF ight
+Ġbe ach
+B ook
+ĠL ic
+Ġr ush
+ĠR ound
+ou p
+ĠMar x
+Ġcalcul ated
+ĠDe vil
+ĠSar ah
+Ġoccasion ally
+Ġbul let
+Av ailable
+g ate
+Ġ9 1
+Ġh osp
+Ġprom ises
+ĠH IV
+ĠSt adium
+ĠSt ock
+ĠCorpor ation
+g age
+N G
+ĠC redit
+Ġs ne
+ib l
+Ġacc um
+s uch
+Ġterror ists
+Ġconscious ness
+ĠZ h
+Ġdram a
+ool a
+pir ation
+Ġlab our
+ĠN in
+Ġut ter
+Ġdemocr atic
+Ġass ass
+il ation
+Ġg est
+Ġab road
+Ġmet ab
+Ġs orts
+Ġfl av
+U B
+Ġm g
+ĠNot hing
+ĠO d
+Ġmus ical
+200 9
+Ġdro ps
+oc ated
+ater al
+0000 00
+Ġg re
+Ġequ ality
+Ġburd en
+Ġv ig
+ĠLe ader
+-------- ----
+Ġcere mony
+Ġf ighter
+Ġact ors
+Ġ æ
+am an
+F i
+Ġal ign
+put er
+Ġe lder
+ĠN SA
+Ġrepresent ation
+ĠOnt ario
+IT H
+usal em
+Ġharass ment
+itz er
+Ġsy mp
+Ġbox es
+ĠD R
+Ġman ifest
+at re
+Ġ ^
+Ġd ies
+le ton
+Ġmiss ions
+et he
+Ġres olve
+Ġfollow ers
+Ġas c
+Ġk m
+l ord
+am med
+Ġsil ent
+ĠAssoci ated
+Ġtim ing
+Ġprison ers
+ĠK ings
+ĠF ive
+Ġtow er
+Ġappro aches
+Ġprecise ly
+Ġb ureau
+ĠM other
+ĠI ss
+Ġkey board
+it ual
+Ġfund ed
+Ġstay ing
+Ġpsych ological
+Ġm ile
+ĠLe on
+ĠBar b
+w ill
+Ġw ider
+ĠAtl antic
+Ġt ill
+ĠR ome
+ro t
+Ġaccomp an
+Ġfl our
+ac o
+W orld
+ĠExp ress
+ĠY u
+C or
+Ġple ased
+part y
+Ġpoint ing
+Ġinf lation
+Ġro y
+Ġ ),
+ain er
+Ġwedd ing
+orm on
+Ġrequ iring
+Ġqual ified
+Ġse gment
+EN D
+Ġs izes
+e als
+Ġcor rupt
+ass ador
+Ġcele b
+Ġdream s
+ĠM ess
+Ġcheck ing
+ĠV ersion
+Ġprep aring
+Ġact ively
+ĠD iff
+Ġl ux
+ĠW inter
+act eria
+ĠN E
+Ġdep uty
+Ġtrans gender
+Ġsum mary
+Ġin her
+er ies
+ch ar
+ĠY an
+Ġkn ock
+ĠP ath
+Ġl ip
+roll er
+Ġimp ression
+Ġcelebr ate
+Ġsl ide
+Ġgu ests
+Ġcl ip
+F S
+Ġsav ings
+Ġcapt ain
+Ġleg acy
+ĠDen ver
+Ġw ounded
+tab oola
+AC T
+Ġpurs ue
+Ġo xy
+Ġ q
+Ġsem i
+ĠN eed
+ĠAff airs
+Ġob sc
+Ġcheck ed
+Ġd ual
+C ode
+ĠM D
+le m
+ult y
+ĠÂ ©
+ĠEl izabeth
+Ġcent uries
+ard ed
+s rc
+Ġev ident
+enn is
+at in
+Ġunemploy ment
+ĠMar io
+Ġint im
+Ch rist
+Ġbi ological
+Ġsold ier
+ĠAdd ed
+Ġm ath
+ĠG il
+Ġbi as
+Ġd ating
+ĠO cean
+Ġm ice
+M us
+h ire
+ĠT es
+Ser ver
+lim ited
+S ize
+Ġmet ers
+Ġrock et
+es see
+Ġcertific ate
+ĠIran ian
+AS S
+Ġgr id
+D ec
+Ġro lling
+com mun
+ĠSwed en
+b ury
+Ġtiss ue
+Ġrac ism
+ĠL ocal
+Ġmyster y
+Ġexam ine
+Ġst em
+Ġs its
+Ġhop ed
+ot ing
+Ġdial ogue
+Ġpers u
+W atch
+l ay
+M AN
+Ġch ronic
+ĠPort land
+mark et
+ĠS EC
+Ġparalle l
+Ġsc andal
+Ġcar ries
+Ġphenomen on
+h uman
+ack er
+ĠO x
+Ġretire ment
+tain ment
+ov ie
+ĠG ear
+Ġd uties
+Ġdo se
+Ġsc roll
+M B
+in f
+Ġsa uce
+Ġland scape
+red dit
+ĠChampions hip
+ĠRed dit
+al id
+Ġco in
+Ġover s
+Ġpost ing
+ab out
+Ġf el
+and y
+Ġb old
+Ġfocus ing
+e ffect
+G R
+Ġde emed
+Ġrecommend ations
+Ġste pped
+Ġvot er
+ĠDe ep
+ĠInst agram
+Ġmoder ate
+ĠMary land
+Ġrestrict ed
+ĠM B
+ĠCh all
+Ġto b
+Ġc ir
+ĠO cc
+ĠE ver
+Ġcoll aps
+IN FO
+= -
+ĠP ict
+ĠAcc ount
+n c
+Ġo ught
+Ġex port
+Ġdr unk
+( '
+Ġw ise
+ĠM ort
+ne cess
+Ġan cest
+ĠInc re
+Ġfrequ ent
+m ir
+Ġinterpret ation
+Ġdepend ent
+Ġco ins
+ĠB ol
+V ideo
+ĠJust in
+Ġfat al
+Ġcook ing
+Ġconf usion
+ip her
+Ġcust ody
+ĠMor gan
+om ach
+ĠGovern or
+Ġrestaur ants
+el ing
+Ġacknowled ged
+Ġthe r
+Ġgen es
+ch ing
+He y
+Ġtact ics
+ĠMex ican
+Ġv end
+Ġhe s
+qu er
+Ġnot ing
+ĠCamer on
+Ġtarget ing
+ro ck
+Ġcred its
+Ġemot ions
+Ġrepresent atives
+new s
+Ġlegisl ative
+Ġrem oving
+Ġtweet ed
+ĠCar ter
+ĠF ixed
+Ġfor cing
+Ġspeak er
+Ġm ales
+ĠViet nam
+l ined
+Ġconcept s
+Ġvo ices
+o ir
+ĠT rib
+W he
+ĠJer usalem
+ĠS ant
+Ġc ul
+Ġl ady
+ĠHaw ai
+Ġar ts
+ĠIn n
+ĠMach ine
+ĠEm peror
+Ġsl ot
+g ly
+ĠPro cess
+II I
+Ġathlet es
+ĠTem ple
+ĠRep resent
+Ġpres c
+Ġt ons
+Ġgold en
+Ġp unch
+ĠG R
+iver pool
+Ġen act
+Ġlob by
+Ġm os
+Ġpick ing
+Ġlif etime
+Ġcogn itive
+E ach
+z o
+Ġd ub
+Ġcons ists
+ol n
+Ġf estival
+am ous
+Ġint ellig
+w ords
+ĠSm art
+Ġde le
+Ġl apt
+Ġmag ical
+ĠS in
+b us
+ur ities
+igh th
+ĠRub y
+ĠS ure
+ol ving
+Ġj un
+O ST
+Ġimp osed
+Ġast ron
+Ġcor rel
+ĠN S
+ĠK it
+ĠF uture
+b urn
+Ġimm une
+oc us
+Ġcour ses
+ĠSt ring
+Ġle an
+Ġg host
+Ġout comes
+Ġexp ense
+Ġevery day
+Ġaccept able
+A h
+Ġequ ipped
+Ġor ange
+F R
+ĠD utch
+Th ough
+ĠR ank
+Q U
+ĠRober ts
+wh at
+re nd
+Ġdisapp ear
+Ġsp awn
+ĠL am
+o is
+Ġdes erve
+Ġmin imal
+Ġnerv ous
+ĠW ould
+Ġro ok
+ĠV ancouver
+Ġres ign
+sh ire
+ĠW orks
+ĠB uild
+Ġafford able
+ĠG ary
+ĠAren a
+Ġh anging
+Ġimpl ications
+ĠS ong
+Ġmain taining
+Ġgu ards
+C ON
+Ġder ived
+Ġexecut ed
+Ġthe ories
+Ġqu oted
+ĠAnd re
+og a
+sel ess
+in fo
+ĠBel g
+Ġt ears
+ĠSur v
+Ġbirth day
+ig ious
+im mer
+Ġspect rum
+Ġarchitect ure
+Ġrec ruit
+arm a
+T able
+Ġmon sters
+ĠG ov
+Ġdest ination
+Ġattract ive
+Ġf oss
+ĠMore over
+Ġpres ents
+TH E
+Ġrep ly
+pt on
+Ġc um
+Ġdel ight
+Ġaffect s
+Ġdon ations
+ĠT oy
+ĠH im
+M ENT
+Ġover come
+it ched
+ĠFant asy
+ĠH at
+ĠBe ast
+b ott
+Ġinvestig ations
+R un
+Ġhun ting
+d i
+f und
+Ġs essions
+est yle
+Ġport ray
+oid s
+Y eah
+Ġcommun icate
+Ġcom edy
+ĠY ang
+Ġbel t
+ĠMar ine
+Ġpredict ed
+Pl ay
+Ġimportant ly
+Ġremark able
+Ġelim inate
+D avid
+Ġb ind
+V ID
+Ġadvoc ates
+ĠG aza
+im p
+D B
+ĠN a
+ĠSim ilar
+I ES
+Ġchar ity
+v as
+m ath
+Ġâ ĸ
+ok er
+nd um
+Ġcap s
+ĠH al
+2 000
+e an
+Ġfle et
+Ġrec re
+R ight
+Ġsleep ing
+ij ing
+k ind
+Ġdesign ated
+Ã ¤
+Ġanim ation
+ke e
+ĠInt rodu
+Ġ/ >
+Ġdelay ed
+Ġtrem end
+Ġcur ious
+U se
+Ġle ct
+d am
+Ġinnov ation
+ĠPoint s
+Ġload ing
+Ġdisp ute
+ct ic
+ird s
+ĠB Y
+Ġn urs
+ĠVal ue
+ION S
+ĠH um
+Ġtem plate
+m ers
+Ġappear ances
+ĠEnter tainment
+Ġtransl ation
+Ġsa ke
+Ġbene ath
+Ġin hib
+Ġe uro
+abet es
+Ġstud ying
+ĠM as
+Ġper ceived
+Ġexam ined
+Ġe ager
+Ġco aches
+Ġim per
+ch i
+Ġprodu ces
+" ).
+ĠEvery one
+Ġm unicip
+Ġg irlfriend
+Ġh ire
+ĠV ice
+Ġsu itable
+op y
+Ġin equ
+ĠD uke
+f ish
+f irst
+ĠO bs
+Ġinter ior
+ĠBru ce
+ĠR y
+Ġanal ys
+Ġconsider able
+Ġfore cast
+Ġf ert
+ors hip
+ĠD rug
+ĠA LL
+: "
+th ur
+ĠM ail
+Ġball ot
+Ġinst antly
+ĠCh annel
+Ġp icks
+Ġ198 9
+Ġt ent
+ol i
+Ġcivil ian
+b ling
+ell o
+b u
+Ġin ch
+Ġlog o
+Ġcooper ation
+Ġwal ks
+Ġinvest ments
+Ġimp rison
+ĠF estival
+ĠK y
+Ġleg ally
+Ġg ri
+ch arg
+S l
+Ġthreat ening
+du ction
+fl ow
+Ġdismiss ed
+ibr aries
+c ap
+e le
+ĠMc G
+ĠHar vard
+ĠConserv ative
+ĠC BS
+p ng
+Ġro ots
+ĠH aving
+umb led
+ĠF un
+\ /
+ĠS earch
+ple x
+Ġdiscuss ing
+Ġcontin u
+ĠT ai
+ĠW ik
+F ree
+f it
+Ġref use
+Ġmanag ing
+Ġsy nd
+ip edia
+w alk
+Ġprofession als
+Ġguid ance
+Ġunivers ities
+Ġas semb
+unt u
+F inally
+AS E
+ĠAut o
+ĠH ad
+Ġann iversary
+L D
+ĠD ur
+ĠUlt imate
+ih ad
+pro duct
+Ġtrans it
+Ġrest ore
+Ġexpl aining
+Ġass et
+Ġtransfer red
+Ġbur st
+ap olis
+ĠMag azine
+ĠC ra
+ĠB R
+gg ed
+ĠH E
+M ich
+b et
+ĠL ady
+yl um
+erv es
+Ġme ets
+wh ite
+L og
+Ġcorrespond ing
+Ġins isted
+G G
+Ġsurround ed
+Ġt ens
+Ġl ane
+Ġco inc
+h ome
+Ġexist ed
+ect ed
+ĠDou ble
+lam m
+Ġske pt
+ex p
+Ġper ception
+ie v
+ĠBe ing
+o ft
+Ġadop t
+. :
+] ;
+Wind ows
+Ġsatell ite
+AS H
+Ġinf ant
+d escription
+ĠMe anwhile
+c m
+oc a
+ĠT reat
+act or
+Ġtob acco
+ĠN orm
+em ption
+Ġfl esh
+Ġj e
+o op
+ĠHe aven
+Ġbe ating
+an im
+Ġgather ing
+Ġcult iv
+G O
+ab e
+ĠJon athan
+ĠSaf ety
+Ġbad ly
+pro t
+Ġcho osing
+Ġcontact ed
+Ġqu it
+Ġdist ur
+Ġst ir
+Ġto ken
+D et
+ĠP a
+Ġfunction ality
+00 3
+s ome
+Ġlimit ations
+Ġmet h
+b uild
+con fig
+N T
+re ll
+ble m
+ĠM om
+Ġveter ans
+ĠH u
+Ġtrend s
+are r
+ĠG iven
+ĠCa ption
+m ay
+AS T
+Ġwond ering
+ĠCl ark
+n ormal
+Ġsepar ated
+Ġdes p
+st ic
+b rew
+Ġrel ating
+ĠN ik
+ĠF arm
+Ġenthus i
+g ood
+d eb
+Ġactiv ist
+Ġm art
+Ġexplos ion
+ĠEconom ic
+L ink
+Ġins ight
+Ġconven ient
+Ġcounter part
+su pport
+ĠV irt
+ag en
+ĠTenn essee
+ĠSim on
+ĠA ward
+OC K
+ĠF igure
+Ġoverse as
+Ġpr ide
+ĠC as
+n ote
+m g
+C urrent
+Ġdispl ays
+cont ent
+Ġtravel ing
+Ġhosp itals
+ĠFin ancial
+ĠP ast
+Ġdefend ant
+Ġstream ing
+m ble
+ĠBer lin
+uk i
+Ġdist ribut
+Ġant ib
+Ġch ocolate
+ĠCast le
+Ġinter rupt
+ĠR ow
+Ġconvers ion
+Ġbug s
+ĠR ather
+li est
+L Y
+ĠJe an
+com mon
+ak h
+Ġ1 30
+ot ton
+ĠDe an
+Ġam endment
+Ġgame play
+ĠWar ren
+od a
+Ġhigh lights
+Ġir re
+ĠNAT O
+Ġball s
+Ġdemand ing
+U RE
+ĠL uke
+F igure
+st op
+on ia
+z one
+iz ers
+ĠW R
+Ġaward ed
+Ġregul atory
+ĠH art
+ĠS N
+pl ing
+Ġs our
+ĠP ixel
+us ive
+Ġf et
+ĠS ent
+Ġautom atic
+Ġf er
+vern ment
+ĠKh an
+T ON
+f ather
+Ġextraord inary
+th rop
+ĠP ython
+ĠG PU
+Ġsex ually
+Ġdesk top
+it ivity
+ĠAnton io
+Ġo rient
+Ġe ars
+ob by
+ous es
+vertis ements
+Ġmanufacture rs
+ic ient
+min ute
+Ġconv iction
+Ġg arden
+p ublic
+Ġsatisf ied
+f old
+O K
+Ġin hab
+ĠTh ink
+Ġprogram me
+Ġst omach
+Ġcoord in
+Ġh oly
+Ġth reshold
+Ġr het
+Ġser ial
+Ġemploy ers
+ĠEvery thing
+ra h
+Ġb other
+Ġbr ands
+Val ue
+ĠT ed
+ĠPlan et
+Ġp ink
+ĠFurther more
+s a
+P E
+re ck
+ĠUS D
+ot te
+Ġ& &
+Ġland ed
+g ets
+Ġprodu cers
+Ġhealth care
+Ġdomin ant
+Ġdest ro
+Ġam ended
+ch ron
+Ġf its
+ĠSy d
+ĠAuthor ity
+AT CH
+Ġfight s
+ĠL LC
+Ġ-- -
+ĠCor p
+Ġtox ic
+spe cific
+ĠC orn
+ĠChe l
+Ġtele phone
+ĠP ant
+Ġmyster ious
+aun ch
+od ox
+med ia
+Ġwitness es
+ag u
+Ġquestion ed
+ĠBre xit
+ĠRem ember
+ene z
+Ġend orse
+iat ric
+ĠId ent
+Ġridic ulous
+1 10
+Ġpr ayer
+Ġscient ist
+Ġ19 50
+ĠA qu
+Ġunder ground
+ĠU FC
+m are
+ĠL ater
+w ich
+Ġsubsc rib
+Ġhost s
+Ġer r
+Ġgr ants
+ant om
+Ġsum mon
+ear ly
+ĠC lear
+ĠPr im
+Ġsusp ension
+Ġguarant eed
+app er
+Ġr ice
+ĠSe an
+ĠSh in
+Ġrefere ndum
+Ġfl ed
+r ust
+Ġ3 60
+ter y
+Ġsh ocked
+B R
+ĠO il
+ĠAll ah
+Ġpart ly
+Ġign or
+Ġtrans mission
+Ġhom osexual
+ivers al
+Ġhop efully
+ãĤ ¤
+Ġless on
+L eg
+Ġ ..
+Y et
+t able
+app ropri
+re tt
+Ġbo ards
+Ġincor rect
+Ġb acteria
+ar u
+am ac
+Ġsn ap
+.' "
+Ġpar ad
+t em
+he art
+Ġav ailability
+Ġw isdom
+Ġ( +
+Ġpri est
+ĠÂł ĠÂł
+O pen
+Ġsp an
+Ġparam eter
+Ġconv ince
+Ġ( %)
+r ac
+Ġf o
+Ġsafe ly
+Ġconver ted
+ĠOlymp ic
+Ġres erve
+Ġhe aling
+ĠM ine
+M ax
+Ġin herent
+ĠGra ham
+Ġinteg rated
+D em
+Ġpip eline
+Ġapp lying
+Ġem bed
+ĠCharl ie
+Ġc ave
+200 8
+Ġcons ensus
+Ġre wards
+P al
+ĠHT ML
+Ġpopular ity
+look ing
+ĠSw ord
+ĠAr ts
+' )
+Ġelect ron
+clus ions
+Ġinteg rity
+Ġexclus ively
+Ġgr ace
+Ġtort ure
+Ġburn ed
+tw o
+Ġ18 0
+P rodu
+Ġent reprene
+raph ics
+Ġg ym
+ric ane
+ĠT am
+Ġadministr ative
+Ġmanufacture r
+Ġ vel
+ĠN i
+Ġisol ated
+ĠMedic ine
+Ġback up
+Ġpromot ing
+Ġcommand er
+Ġfle e
+ĠRus sell
+Ġforg otten
+ĠMiss ouri
+Ġres idence
+m ons
+Ġrese mb
+Ġw and
+Ġmeaning ful
+P T
+Ġb ol
+Ġhe lic
+Ġwealth y
+Ġr ifle
+str ong
+row ing
+pl an
+as ury
+âĢ¦ .
+Ġexpand ing
+ĠHam ilton
+Ġrece ives
+S I
+eat ures
+ĠAn im
+RE E
+P ut
+Ġbrief ly
+ri ve
+Ġstim ul
+Ġ`` (
+Ġ __
+Ġch ip
+Ġha z
+Ġpri ze
+ĠTh ings
+AC E
+ul in
+d ict
+ok u
+Ġassoci ate
+ock ets
+y outube
+St ory
+ateg ory
+Ġm ild
+ail ing
+ĠY e
+O rig
+ĠK a
+or ig
+Ġpropag anda
+Ġan onymous
+Ġstrugg led
+Ġout rage
+AT ED
+ĠBe ijing
+r ary
+Ġle ather
+Ġworld s
+Ġbroad er
+12 5
+id al
+ĠBet ter
+Ġt ear
+E xt
+Ġpropos als
+Ġit er
+ĠSqu ad
+Ġvol unt
+m i
+D id
+ĠP u
+p in
+Ġspeak ers
+Ġb orders
+Ġfig ured
+= '
+Ġsimultane ously
+aed a
+Ġcharg ing
+Ġur ged
+Ġcon j
+25 6
+ĠG ordon
+mer ce
+Ġdocument ary
+Sh are
+it ol
+ON E
+ĠG arden
+h att
+ĠThom pson
+ane ous
+ap ore
+Ġt anks
+Ġless ons
+tr ack
+Ġout standing
+Ġvolunte ers
+Ġsp ray
+Ġmanag ers
+l arge
+Ġcamp s
+Ġart ificial
+ĠR u
+Ġb ags
+th al
+Ġcompat ible
+ĠBl ade
+Ġf ed
+Ġarg ues
+F I
+Ġunf air
+Ġcor n
+Ġoff set
+Ġdirect ions
+Ġdisappoint ed
+ĠCon vention
+Ġview ing
+M E
+oc ity
+Ġtown s
+Ġlay ers
+Ġro lled
+Ġjump ed
+Ġatt ribute
+Ġun necess
+inc oln
+Ġsupp ose
+ĠNet her
+ch a
+Ġbur ied
+Ġsix th
+B en
+ress ing
+OU R
+Ġw ound
+Ġcy cl
+Ġmechan isms
+Ġcongress ional
+ĠE lement
+Ġagre ements
+Ġdec or
+Ġclos est
+ĠM it
+Go ogle
+} }
+Ġm ixture
+Ġflu id
+S ign
+ĠSch olar
+Ġp ist
+ask et
+ab ling
+Ġrac ing
+he ro
+ri el
+ass y
+Ġche aper
+b en
+Ġvert ical
+amac are
+ĠRead ing
+g ments
+Ġhelic op
+Ġsacr ifice
+ay a
+p aren
+V A
+ĠL es
+ĠStud io
+Ġviol ations
+ĠAn na
+ac er
+é ¾
+ĠR at
+ĠBe ck
+ĠD ick
+ĠA CT
+Ġcomp osition
+Ġtext ure
+ĠO wn
+Ġsmart phone
+ĠN A
+Ġfor b
+im port
+Ġdef ending
+il st
+re r
+Ġo h
+ĠJere my
+Ġbank ing
+cept ions
+Ġrespect ive
+/ .
+Ġdr inks
+ĠW i
+Ġb ands
+ĠL iverpool
+Ġg rip
+ĠB uy
+Ġopen ly
+Ġreview ed
+per t
+Ġver ify
+ĠCo le
+ĠW ales
+M O
+Ġun pre
+Ġshel ter
+ĠIm perial
+Ġgu i
+ĠD ak
+Ġsuggest ions
+Ġexplicit ly
+Ġsl ave
+Ġblock chain
+Ġcompet ing
+Ġprom ising
+S ON
+Ġsoc cer
+Ġconst itution
+4 29
+Ġdist ract
+ĠU ser
+es ides
+ĠMet hod
+ĠTok yo
+Ġaccompan ied
+Cl ient
+s ur
+al og
+Ġident ification
+Ġinv asion
+as ma
+Ġindust ries
+pp ers
+Ġsub tle
+ĠUn it
+n atural
+Ġsurv ived
+Ġfl aw
+ĺ ħ
+ĠH oll
+Ġdef icit
+Ġtut orial
+ĠCh ance
+Ġarg uing
+Ġcontem porary
+Ġinteg ration
+for ward
+Ġt um
+it is
+Ġh iding
+ĠD omin
+ĠT an
+ĠB uilding
+ĠV in
+Ġspokes person
+ĠNot es
+Ġemer ging
+Ġprepar ation
+Ġpro st
+Ġsuspect s
+Ġaut onom
+D escription
+Ġdeal t
+ĠP ear
+Ġstead y
+Ġdecre ased
+Ġso vere
+ĠCl in
+Ġgrad ually
+ors es
+ĠW AR
+S erv
+ãĤ ¢
+h r
+Ġd irty
+ĠB arn
+ĠB C
+Ġd il
+Ġcal endar
+Ġcompl iance
+Ġch amber
+b b
+Ġpass enger
+ate ful
+ĠT itle
+ĠSyd ney
+ĠG ot
+Ġdark ness
+Ġdef ect
+Ġpack ed
+ass ion
+Ġgod s
+Ġh arsh
+IC K
+le ans
+Ġalgorith m
+Ġoxy gen
+Ġvis its
+Ġbl ade
+Ġkil omet
+ĠKent ucky
+Ġkill er
+P ack
+enn y
+Ġdiv ine
+Ġnom ination
+be ing
+Ġeng ines
+Ġc ats
+Ġbuff er
+ĠPh ill
+Ġtra ff
+AG E
+Ġtong ue
+Ġrad iation
+ere r
+m em
+ĠExpl icit
+é¾ į
+Ġcou ples
+Ġphys ics
+ĠMc K
+Ġpolit ically
+aw ks
+ĠBl oom
+Ġwor ship
+e ger
+ut er
+ĠF O
+Ġmat hemat
+Ġsent enced
+Ġdis k
+ĠM arg
+Ġ/ *
+P I
+Ġoption al
+Ġbab ies
+Ġse eds
+ĠScott ish
+Ġth y
+] ]
+ĠHit ler
+P H
+ng th
+Ġrec overed
+ing e
+Ġpow der
+Ġl ips
+Ġdesign er
+Ġdis orders
+Ġcour age
+Ġch aos
+" },{"
+Ġcar rier
+b ably
+H igh
+ĠR T
+es ity
+l en
+Ġrout es
+u ating
+F il
+N OT
+w all
+s burgh
+Ġeng aging
+ĠJava Script
+ore r
+li hood
+Ġun ions
+ĠF ederation
+ĠTes la
+Ġcomple tion
+ĠT a
+Ġprivile ge
+ĠOr ange
+Ġne ur
+paren cy
+Ġb ones
+Ġtit led
+Ġprosecut ors
+ĠM E
+Ġengine er
+ĠUn iverse
+ĠH ig
+n ie
+o ard
+Ġheart s
+ĠG re
+uss ion
+Ġmin istry
+Ġpen et
+ĠN ut
+ĠO w
+ĠX P
+in stein
+Ġbul k
+S ystem
+ic ism
+ĠMarket able
+Ġpre val
+Ġpost er
+Ġatt ending
+ur able
+Ġlicens ed
+ĠG h
+et ry
+ĠTrad able
+Ġbl ast
+à ¤
+ĠTit an
+ell ed
+d ie
+H ave
+ĠFl ame
+Ġprof ound
+Ġparticip ating
+Ġan ime
+ĠE ss
+Ġspec ify
+Ġregard ed
+ĠSpe ll
+Ġs ons
+own ed
+Ġm erc
+Ġexper imental
+land o
+h s
+ĠDun geon
+in os
+Ġcomp ly
+ĠSystem s
+ar th
+Ġse ized
+l ocal
+ĠGirl s
+ud o
+on ed
+ĠF le
+Ġconstruct ed
+Ġhost ed
+Ġsc ared
+act ic
+ĠIs lands
+ĠM ORE
+Ġbl ess
+Ġblock ing
+Ġch ips
+Ġev ac
+P s
+Ġcorpor ation
+Ġo x
+Ġlight ing
+Ġneighb ors
+ĠU b
+ar o
+Ġbe ef
+ĠU ber
+F acebook
+ar med
+it ate
+ĠR ating
+ĠQu ick
+Ġoccup ied
+Ġaim s
+ĠAdd itionally
+ĠInt erest
+Ġdram atically
+Ġhe al
+Ġpain ting
+Ġengine ers
+M M
+ĠM ust
+Ġquant ity
+P aul
+Ġearn ings
+ĠPost s
+st ra
+ãĥ¼ ãĥ
+Ġst ance
+Ġdro pping
+sc ript
+Ġd ressed
+M ake
+Ġjust ify
+ĠL td
+Ġprompt ed
+Ġscr ut
+Ġspeed s
+ĠGi ants
+om er
+ĠEd itor
+Ġdescrib ing
+ĠL ie
+ment ed
+Ġnow here
+oc aly
+Ġinst ruction
+fort able
+Ġent ities
+Ġc m
+ĠN atural
+Ġinqu iry
+Ġpress ed
+iz ont
+for ced
+Ġra ises
+ĠNet flix
+ĠS ide
+Ġout er
+Ġamong st
+im s
+ows ki
+Ġclim b
+ne ver
+Ġcomb ine
+d ing
+Ġcomp r
+Ġsignific ance
+Ġremem bered
+ĠNev ada
+ĠT el
+ĠSc ar
+ĠWar riors
+ĠJ ane
+Ġcou p
+b as
+Ġtermin al
+, -
+O H
+Ġt ension
+Ġw ings
+ĠMy ster
+ï¿½ï¿½ ï¿½ï¿½
+ĠUn like
+val id
+viron ments
+ĠAl i
+Ġn aked
+book s
+ĠM un
+ĠG ulf
+Ġd ensity
+Ġdim in
+Ġdesper ate
+Ġpres idency
+Ġ198 6
+h y
+IN D
+Ġun lock
+im ens
+Ġhand led
+ĠE b
+Ġdisapp eared
+Ġgen re
+Ġ198 8
+Ġdetermin ation
+St ream
+ik o
+ap ters
+Ġacknow ledge
+J an
+Ġcapital ism
+P at
+Ġ20 20
+Ġpain ful
+Ġcur ve
+Ġbom bs
+st orm
+ĠMet al
+en cer
+ĠF ig
+ĠA aron
+anc hes
+Ġins piration
+Ġexha ust
+t ains
+ash i
+Ġdesc ript
+Ġr itual
+ĠChel sea
+Ġpromot ion
+ĠH ung
+ĠW ard
+iv a
+ĠE T
+Ġto ss
+all ow
+ĠFranc is
+D ep
+Ġhapp iness
+ĠGl ass
+Ġbet a
+Ġstreng then
+N E
+o a
+Ġbutt ons
+ĠMur ray
+Ġkick ed
+Qu est
+ĠT alk
+ĠS everal
+ĠZ ero
+Ġdr one
+ul k
+Ġc am
+ĠM obile
+Ġprevent ing
+Ġret ro
+ĠA x
+Ġcru el
+Ġflo at
+. ),
+Ġfil ing
+ĠGr ant
+ĠB or
+Ġr ib
+Ġchampions hip
+ĠM erc
+Ġsty les
+Ġc ake
+Ġbuild s
+ĠS elf
+io x
+Ġep ic
+oy d
+B el
+ĠSt ew
+. (
+ah u
+ĠBe yond
+Ġout s
+Ġsol o
+ĠT ree
+Ġpres erve
+Ġt ub
+AR E
+ro c
+ĠIm pro
+ĠW right
+Ġbu nd
+Ġtr aged
+Ġoccas ional
+b ian
+Sec ond
+r ons
+Ġinter actions
+form ed
+s ing
+Ġown s
+Ġh ockey
+Gener al
+Ġlog ical
+Ġexp end
+Ġesc al
+ĠGr iff
+ĠC rown
+ĠRes erve
+Ġsto pping
+Ġexc use
+sec ond
+Ġoper ated
+Ġre aches
+ĠMal ays
+Ġpoll ution
+ĠBrook lyn
+Ġde lete
+Ġhas h
+Bl ock
+ah a
+âĢ ³
+Ġsh orter
+p iece
+> </
+Ġh orm
+ĠW at
+ĠBre ak
+Ġprohib ited
+Ġint ensity
+ĠAl an
+Ġli ability
+? !
+and ed
+Ġneigh bour
+ĠCol lection
+Ġf ires
+Ġrevolution ary
+f ly
+ĠOr leans
+Wh ite
+ĠW rit
+ĠD awn
+Ġsett le
+Ġexec ute
+B M
+Ġspokes woman
+Ġlif estyle
+Ġclick ing
+ĠK ill
+ĠLiber al
+ĠN azi
+Ġtra iler
+Ġmount ains
+Ġdam n
+z es
+p es
+Ġpress ing
+Ġb ail
+ĠOrgan ization
+Ġp ir
+Ġth irty
+Ġelect rical
+Ġ1 15
+ĠP oly
+ĠR ap
+ĠSt rike
+ĠC ann
+Ġdemand ed
+Ġback ing
+def ault
+spe ed
+ĠLeg isl
+Ġmother s
+ĠB ody
+Ġvar iation
+ced ented
+p owered
+le ading
+N ever
+Ġg rave
+ĠAnt i
+A W
+Ġinterview ed
+ĠG ab
+ĠF at
+Ġrook ie
+u u
+Ġdep os
+ix on
+Ġam pl
+ret ion
+ĠHe at
+Ġpeace ful
+S M
+ie ve
+Ġd iver
+ĠVict oria
+Ġm ic
+p df
+Ġst ating
+Ġl ung
+Ġcritic ized
+Ġvacc ine
+ĠLoad ing
+ur se
+T ake
+ĠFr an
+ĠS old
+ĠRob in
+Ġdetect ed
+ĠSc ript
+Ġadjust ed
+Ġsen ator
+Ġopp osing
+Er ror
+C ount
+Ġconflic ts
+Ġo w
+ĠAr gent
+Ġmatch ing
+h h
+ĠTre k
+st arter
+" ),
+ĠA F
+od er
+xx xx
+ĠAl t
+ac re
+ĠP ick
+ĠSol ar
+ĠD al
+O ct
+ĠB att
+Ġs rc
+Ġeng agement
+Ġexecut ives
+Ġliber ty
+j ava
+Ġtal ented
+igen ous
+Ġcon secut
+.. ...
+In fo
+Ġhor rible
+Ġsurprising ly
+f eed
+ic ating
+ĠL ED
+Ġfem ales
+St ation
+ell er
+ĠOak land
+Ġmechan ical
+i ology
+ĠV ar
+Ġrob ust
+ett ings
+ott a
+Ġthe oret
+Ġret ain
+k ward
+Ġd a
+Ġdeploy ed
+d el
+ĠAnd y
+Ġsubsc ribe
+we b
+Ġn a
+ĠMic hel
+Ġpart ially
+ĠCome y
+Ġc rown
+ĠM aj
+ĠBl u
+r ator
+D ay
+IN T
+Ġdocument ed
+ĠG DP
+g i
+che ll
+Ġbrut al
+ĠB ab
+st ration
+Ġthe ft
+Ġt ube
+@ @
+Ġqu ery
+ĠL incoln
+Ġpublish ing
+Ġw ore
+or ical
+Ġr ic
+Ġnot able
+Ġsubsequ ently
+ne x
+Ġobser ve
+ĠB oe
+Ġc odes
+m ain
+W H
+ĠS L
+Ġresident ial
+av an
+Ġm as
+are st
+ade on
+OU T
+Ġsoph istic
+ant e
+Ġc ens
+Ġ **
+Ġmort ality
+Ġyour s
+Ġoccas ions
+Ġrec alled
+ĠDri ver
+Ġv ocal
+Ġbath room
+Ġsh ops
+Ġcollabor ation
+ĠOb amacare
+ĠC ell
+Ch ar
+Su per
+C re
+Ġt ends
+Ġt orn
+Ġeconom ics
+a very
+ĠR aid
+ĠS em
+Ġshould ers
+Ġexpect ing
+Ġexam ination
+en ame
+ĠU I
+i ability
+ol as
+ĠAm b
+ĠD ra
+Ġmid field
+ĠI C
+Ġlay out
+Ġflo ating
+f i
+it ative
+Ġtremend ous
+Ġ Ð
+Ġab und
+W ork
+ĠLight ning
+Ġsimilar ly
+Ġconserv atives
+Ġpr ay
+B E
+iz arre
+Ġt empt
+Ġemphas is
+ĠMet ro
+Ġf ishing
+Ġmar ry
+ne g
+ĠStud y
+Ġrec k
+Ġdis pos
+on ing
+bs ite
+Ġsusp ic
+Ġmer ch
+ĠG ib
+ĠDes cription
+ĠD VD
+w he
+ĠY emen
+Ġen vironments
+oot ing
+ĠMod ern
+e u
+Ġreflect s
+Ġh oney
+Ġanaly st
+Ġg ut
+d ec
+A ction
+Ġhousehold s
+Ġst er
+Ġtem ple
+Ġreform s
+Ġfavour ite
+Ġdead line
+ĠL E
+Th ree
+ĠWith in
+A ug
+Ġnight s
+elt a
+Ġinv alid
+ĠEx change
+ĠDel hi
+w hen
+inc ome
+Ġ ðŁ
+Ġwire less
+sc ribe
+ist a
+Ġhost ile
+Ġall y
+Ġg ig
+Ġout lets
+ĠD or
+EM ENT
+Ġas h
+Ġab stract
+OR D
+ĠMot or
+Ġadv iser
+ist le
+Ġb ases
+Ġcourt esy
+Ġcross ing
+Ġcle ared
+Ġrefuge e
+cos ystem
+Ġthrow s
+f un
+bour ne
+d ays
+Ġdisag ree
+ĠN ative
+Ġreflect ed
+ĠF ast
+ĠY ellow
+ĠSing apore
+ĠR aven
+Ġembr ace
+ĠK u
+ĠC hen
+ĠEar ly
+Ġappoint ment
+ĠMin i
+it ement
+Ġpl acing
+Ġb icy
+S R
+Ġwh is
+S U
+Ġinvestig ated
+Ġphotograph s
+g ithub
+ĠBe at
+ĠR ing
+ig hed
+i ar
+Ġev olved
+eral d
+Ġd un
+Ġh ub
+I AL
+Ġencour aging
+ĠPr int
+ĠD ays
+Ġpro secution
+Ġp ants
+az y
+l ive
+Ġfoss il
+ĠJ u
+Ġro cks
+ud ge
+ĠR ace
+Ġg reet
+b ie
+Ġf illing
+ĠL en
+Ġdi abetes
+Ġfire arms
+um ing
+enez uel
+ĠB B
+Ġaccept ing
+AT H
+Ġres ort
+Ġh unt
+ri k
+uck er
+am ents
+Ġsust ained
+Ġcross ed
+Ġbreak fast
+Ġatt ributes
+lect ed
+at ile
+Ġv ibr
+ĠK al
+ars on
+op les
+Ġtou ched
+Ġdam ages
+Ġimp ressed
+ru p
+Ġan ch
+ĠAd ams
+H el
+ĠVict or
+Ġmount ed
+ĠC C
+Ġdelic ious
+sp an
+ell a
+Ġel abor
+am ples
+Ġdef ic
+Ġconstit u
+u ates
+ĠM ission
+ĠT her
+ĠMon ster
+b es
+Re uters
+ĠInd ones
+h ill
+mun ition
+Ġconfirm ation
+ĠCons ider
+ac ent
+Ġj et
+ĠEm ploy
+ĠGT X
+n an
+ĠSp ider
+Ġprocess or
+Ġpat ri
+ĠPent agon
+ĠRob inson
+Ġreal istic
+Ã ±
+Ġappear ing
+Ġp ipe
+om ed
+Ġf ru
+Ġaw ful
+Ġeval uation
+Ġintellig ent
+ĠC itiz
+Ġfund ra
+od ium
+Ġtwe ets
+Ġwor n
+pr ing
+Ġkid n
+Ġreb els
+ĠK am
+ĠNether lands
+ĠS W
+Ġacqu isition
+ĠM ale
+ãĥ ª
+omb ies
+Ġtrad em
+ĠStat us
+B re
+ĠTH IS
+Ġad verse
+ĠN EW
+s ign
+Ġorgan isation
+en c
+ĠHar per
+ap or
+ĠMem bers
+ĠPe ace
+ĠAir port
+ĠOther s
+Ġscr atch
+ĠP il
+Ġsens or
+Ġadop tion
+ĠHot el
+ĠDr ag
+Ġhonest ly
+Ġy ard
+ĠFor ces
+Ġpat ent
+Ġb ass
+Ġquiet ly
+Ġbreat hing
+Ġp ose
+i ors
+ĠJ ess
+st atic
+IT E
+O ffic
+Ġj ew
+w cs
+Ġ14 0
+Ġpre view
+ipp i
+Ġunf ortunately
+oke mon
+Ġh orn
+Ġre ass
+Ġpe er
+ock er
+Ġunt o
+ĠGr ay
+Ġclean ing
+Ġattract ed
+200 7
+P oint
+k ill
+ĠAg reement
+ur ches
+Ġhor r
+ĠMiss iss
+Ġworth y
+Ġfl owers
+t own
+d ll
+Ġre actions
+Ġde ce
+Ġindic ating
+M D
+Ġpre ference
+ĠM VP
+ess ional
+ĠT arget
+g ence
+ĠInd ians
+Ġm isc
+Ġfree ly
+Ġmus cles
+Ġline up
+Ġimpact s
+ous ing
+om i
+ac ular
+Ġcontro lling
+ag ine
+c ery
+he ll
+Ġrank ing
+ĠN ich
+ĠA ve
+12 8
+Ġhigh way
+Ġinc ons
+Ġb inding
+Ġstrugg les
+ĠPitt sburgh
+Ġgr ay
+r in
+Ġcom ics
+ĠS port
+Ġrel atives
+Ġfr ight
+Ġpro be
+ĠPort ug
+Ġv oc
+Ġt u
+ĠCor ps
+Ġposs ibilities
+Ġqual ify
+wcs store
+Ġl ibraries
+Ġm igrants
+Ġent ries
+Ġconsecut ive
+v als
+ĠChair man
+Ġh ill
+IM E
+ĠG ard
+Ġinequ ality
+f ox
+ĠS ave
+Ġc ort
+claim ed
+Ġtra its
+Ġp our
+Ġmiss iles
+Ġess ence
+Ġs ends
+Ġall iance
+Ġw ishes
+ĠChrist opher
+B ig
+N Y
+ĠJac ob
+s an
+ur red
+ĠS O
+ll y
+Ġadvoc ate
+ĠB ond
+Ġ" /
+Us ing
+Ġdistrict s
+ĠG ate
+ĠB ir
+r idge
+ĠN az
+ĠR s
+bo ards
+ĠG a
+ĠRe agan
+Ġinflu enced
+1 000
+ap y
+Ġchalleng ed
+Ġb arg
+Ġfac ulty
+ĠF if
+Ġacqu ire
+A c
+Ġin sect
+Ġinstr uments
+Ġle af
+th odox
+M essage
+Ġt ale
+Ġthere by
+Ġtra p
+Ġstrong est
+ĠMil itary
+is ible
+Ġ198 4
+ethe less
+Ġflex ible
+Ġkill s
+Ġfin ishing
+ĠS ize
+Ġredu ces
+Ġep id
+Ġorient ation
+f ull
+Ġtr ace
+Ġl aser
+Ġopp ose
+Ġed iting
+Ġmoment um
+ä º
+sh ow
+V I
+ĠL ad
+Ġ198 5
+Ġmurd ered
+9 00
+ut her
+Ġprob ability
+ĠP oll
+Ġrel uct
+ĠChe m
+ĠMont real
+Ġadequ ate
+ĠPol and
+ĠSher iff
+um ph
+Ġo k
+Ġ 000
+Ġ" [
+Ġoper ators
+ĠF er
+Ġmod es
+ĠE ve
+Ġdiscipl ine
+N ET
+H and
+Ġor al
+ĠW E
+em ail
+J P
+ĠPalestin ians
+Ġhe nce
+ĠL ess
+Ġover l
+d ig
+Ġintim id
+ĠCo al
+Ġr anging
+th a
+Ġdist ant
+Ġf ib
+ĠInd ex
+ĠW onder
+ĠP el
+hatt an
+ĠH ug
+Ã Ĺ
+ra it
+Ġwra pped
+ĠR PG
+Ġchemical s
+ĠM oney
+Ġfro zen
+Ġind irect
+ĠAgain st
+E nd
+Ġuncom fortable
+ĠGall ery
+ĠPost ed
+Ø §
+ond uct
+Ġconsequ ence
+Ġbit ter
+Ġ198 7
+p op
+Ġcount less
+ĠAl aska
+ff ff
+Ġdepart ure
+Ġref und
+ĠI an
+i ated
+Ġsee ks
+Ġmechan ics
+Ġjurisd iction
+lyn n
+Ġal ike
+ĠH unt
+ath on
+Ġres olved
+Ġc ache
+Ġdist inction
+d irect
+Ġenc ount
+ou b
+be at
+ĠCount ry
+se arch
+Ġcontin uous
+Ġmod est
+ĠR ail
+th ood
+1 30
+B UG
+Ġcrim inals
+Ġindic ation
+Ġencount ered
+l ast
+ĠW y
+Ġide ology
+ĠP DF
+sec urity
+] )
+ĠJim my
+ĠE N
+Ġh iring
+T em
+Ġp ig
+aun t
+ĠCry stal
+Ġpen alties
+Ġcap ability
+Ġp y
+Ġproduct ive
+Ġbal anced
+ĠGe Force
+cl ick
+olit an
+od s
+Ġafter wards
+Ġplay offs
+ĠG ill
+U ser
+Ġback s
+p ub
+t ag
+Ġabs urd
+p iring
+Ġc iting
+Ġtr illion
+Ġoblig ation
+Ġmax im
+ah oo
+c f
+um i
+ĠAl pha
+ĠN elson
+Ġpursu ant
+in itely
+Ġf ract
+ent ry
+ber y
+ĠTh or
+Add ed
+ĠD J
+ĠG ene
+Ġaw kward
+St ud
+Ġwal let
+ĠDiv ine
+ari os
+Ġrele asing
+Ġed ited
+Ġaccompl ished
+B est
+Ġed ges
+Ġplan es
+Ġfeed ing
+" },"
+Ġdiscl osure
+Ġgr ain
+air y
+o ons
+ern and
+V R
+Ġreason ably
+Ġdr um
+Ġpart ial
+Ġgraph ic
+Ġunpre cedented
+Ġadv ised
+M icro
+ĠAss ad
+point s
+sc ar
+ĠZ one
+tt es
+Ġ7 00
+v o
+ĠH amp
+Ġfix es
+Ġca ution
+Ġstr ings
+Ġpan els
+Ġle ak
+Ġpr icing
+row th
+ĠEr ror
+ĠS aints
+f ix
+Ġobserv ations
+ĠA bs
+Ġsuggest ion
+ĠUkrain ian
+Ġbar rier
+Ġpain ted
+B et
+im ir
+ĠS pect
+p ot
+orne ys
+Ġcomp ound
+Ġbe ars
+ĠR ush
+Ġlux ury
+S um
+Ġor bit
+ĠMar c
+Ġex empt
+ĠTra il
+ĠM O
+ĠH ans
+ĠWe apon
+oc used
+umin um
+ĠJer ry
+Ġb ust
+ĠA G
+ĠW iki
+Ġend less
+ĠV lad
+ĠB ah
+ĠR adeon
+ke ys
+ĠSur vey
+ĠV iol
+def ine
+le an
+Ġcomm od
+Ġreven ues
+Å į
+Ġfurn iture
+Ġcast ing
+Ġdiplom atic
+ĠPlay ers
+ĠK illed
+Ġmod ify
+Ġinnov ative
+ĠAb u
+n or
+Ġbond s
+Ġcoach ing
+M er
+Ġmod ules
+ĠPatri ots
+Ġenh anced
+Ġproceed ings
+Ġteam mates
+Ġ12 8
+ard o
+Ġcomprom ise
+ĠM uch
+Ġfle w
+ĠEd ge
+Ġunnecess ary
+Ġdoct rine
+re port
+ĠOr lando
+ĠProf ile
+Ġplay off
+friend ly
+Ġcompl ain
+ĠM C
+ĠO pt
+ĠG B
+Ġbeat en
+Ġg olf
+Ġpl acement
+B it
+Ġnews letter
+Ġ201 9
+vis or
+raw l
+ĠiP ad
+Ġact ed
+Ġju ice
+Ġdec ks
+P N
+su ccess
+ĠH alf
+Ġdele ted
+Ġsec rets
+Ġas ylum
+M art
+ĠAct iv
+ĠGu y
+ĠT s
+Ġd ys
+Ġassum ing
+Ġman a
+Ġsub ur
+Ġ12 5
+M edia
+AR Y
+r ide
+c p
+Ġdifficult ies
+Ġcollect ing
+Ġbank rupt
+n on
+Ġcomp osed
+Ġvol t
+Ġmilit ants
+Ġ> >>
+ĠM ormon
+t or
+Ġpartic les
+ĠB art
+ry ption
+Ġad min
+Ġsqu ee
+VID IA
+Ġcreat or
+iam eter
+ic ular
+N BC
+Ġgrab bed
+Ġn odd
+Ġr ated
+Ġrot ation
+Ġgr asp
+Ġexcess ive
+ĠE C
+ĠWh it
+Ġinvent ory
+ault s
+ĠF B
+Ġe cosystem
+Ġbill ions
+Ġvent ure
+n amed
+Ġdef ender
+out e
+Inst ead
+ir able
+W ar
+Ġassum ption
+Ġb ite
+Ġearth qu
+t ail
+sp ace
+Ġgif ts
+boy s
+Ġinev itable
+Ġstruct ural
+Ġbenef icial
+Ġcompe lling
+h ole
+erv ation
+Ġco at
+o j
+inc arn
+ĠY ears
+Ġdetermin ing
+Ġrhet oric
+Ġbound aries
+Ġwh ites
+A nt
+add y
+) -
+ra ham
+eter min
+Ġhar vest
+ĠCon c
+Ġlapt op
+ĠM atch
+Ġenjoy ing
+cc a
+oll ar
+Ġtri ps
+Ġadd iction
+ĠS ak
+Ġpow ered
+Ġc ous
+ĠRuss ians
+ie re
+Ġret rie
+qu ality
+Ġdiff er
+Ġking dom
+ĠL aur
+ĠCap itol
+Ġcon clusions
+ĠAl tern
+ĠN av
+Ġtrans parent
+B ER
+G roup
+ĠCom plete
+Ġinf er
+Ġint rig
+Ġins ane
+R O
+oph ob
+is en
+qu al
+Mich ael
+Ġm useum
+ĠP ope
+Ġres et
+r ative
+f ive
+Ġagg reg
+itte es
+osit ory
+Ġcar b
+ĠRec ord
+Ġdec ides
+ĠF ix
+Ġexcept ions
+ĠCommission er
+un s
+ĠEnvironment al
+Ġlegend ary
+ist ence
+Ġtun nel
+k m
+Ġins ult
+Ġt roll
+Ġsh ake
+Ġdet ention
+qu es
+ĠCh rome
+ĠF iles
+Ġsub t
+Ġprospect s
+Ġpro l
+re nder
+pro of
+Ġperform ances
+St r
+Ġh ref
+ern ame
+Ġachieve ment
+Ġf ut
+F ull
+ĠLe ban
+go ogle
+ãĥ Ī
+amp a
+May be
+Ġproject ed
+ĠE mb
+Ġcol leg
+Ġa wards
+Ġâ Ķ
+G old
+ĠBl ake
+ĠR aj
+if ting
+Ġp ending
+Ġinst inct
+Ġdevelop ments
+Con nect
+ĠM and
+ĠW ITH
+ĠPhilipp ines
+prof ile
+Ġalt ogether
+ĠB und
+ĠT D
+oo oo
+amp ed
+ip h
+Ġste am
+Ġold est
+Ġdet ection
+ul pt
+Ġ ç
+ĠWay ne
+200 6
+f a
+Ġcir cles
+ĠF u
+Ġdon ors
+appropri ate
+ĠDak ota
+j amin
+Ġmotiv ated
+Ġpurch ases
+ĠLouis iana
+ĠS pl
+Ġgl obe
+Ġ10 5
+z ip
+c all
+Ġdepart ments
+Ġsustain able
+10 5
+ĠO P
+if iers
+Ġprevent ed
+Ġinc omp
+ĠComm ander
+Ġdom inated
+ĠÂ »
+Ġinvest ed
+Ġcomplex ity
+Ġin cl
+Ġens uring
+Ġreal m
+yn c
+ĠInd ependent
+r ained
+ĠJ en
+ĠFl ight
+Ġat he
+Ġspec ulation
+ĠT E
+oc ate
+t ic
+Ġpl aint
+her ry
+Ġto y
+Ġ1 11
+Ġpl ates
+st atus
+ĠIs a
+Ġdev oted
+C op
+ĠE S
+25 5
+ur rency
+M ain
+Ġsl aves
+Ġpe pper
+Ġqu otes
+Ġce iling
+ĠF ish
+Ġtrans formation
+Ġfra ction
+Ġadvant ages
+Ġto ile
+Ġstun ning
+Ġmo ist
+bre aking
+s i
+ĠL ocation
+ĠMed ium
+Ġtext s
+Ġu gly
+Ġb io
+. âĢĶ
+ĠB ased
+Ġtr ains
+ĠW ing
+ĠAn cient
+ĠRec ords
+ĠH ope
+Spe cial
+ades h
+ob i
+[ /
+Ġtempor arily
+V er
+h u
+os er
+Ġover night
+Ġm amm
+ĠTre asury
+ĠV enezuel
+ĠMeg a
+Ġt ar
+Ġexpect s
+bl ack
+or ph
+\\ \\
+Ġaccept ance
+Ġrad ar
+s is
+Ġjun ior
+Ġfram es
+Ġobserv ation
+ac ies
+P ower
+ĠAdv anced
+M ag
+olog ically
+ĠMe chan
+Ġsent ences
+Ġanaly sts
+augh ters
+force ment
+Ġv ague
+Ġcl ause
+Ġdirect ors
+Ġeval uate
+Ġcabin et
+M att
+ĠClass ic
+A ng
+Ġcl er
+ĠB uck
+Ġresear cher
+Ġ16 0
+Ġpoor ly
+Ġexperien cing
+ĠP ed
+ĠMan hattan
+Ġfre ed
+Ġthem es
+ad vant
+Ġn in
+Ġpra ise
+10 4
+ĠLib ya
+b est
+Ġtrust ed
+Ġce ase
+Ġd ign
+D irect
+Ġbomb ing
+Ġm igration
+ĠSci ences
+Ġmunicip al
+ĠA verage
+Ġgl ory
+Ġreve aling
+Ġare na
+Ġuncertain ty
+Ġbattle field
+ia o
+G od
+Ġc inem
+ra pe
+el le
+ap ons
+Ġlist ing
+Ġwa ited
+Ġsp otted
+ke ley
+ĠAud io
+e or
+ard ing
+idd ing
+ig ma
+ĠN eg
+Ġl one
+Ġ ----
+ex e
+d eg
+Ġtrans f
+Ġwas h
+Ġsl avery
+Ġexpl oring
+ĠW W
+ats on
+Ġen cl
+l ies
+ĠC reek
+Ġwood en
+Man ager
+ĠBr and
+um my
+ĠAr thur
+Ġbureau cr
+Ġbl end
+ar ians
+F urther
+Ġsupposed ly
+Ġwind s
+Ġ19 79
+Ġgrav ity
+Ġanalys es
+ĠTra vel
+ĠV eter
+Ġd umb
+Ġaltern ate
+g al
+Ġconsum ed
+Ġeffect iveness
+.' '
+Ġpath s
+ond a
+L A
+ĠStr ong
+Ġen ables
+Ġesc aped
+Ġ" "
+Ġ1 12
+Ġ198 3
+Ġsm iled
+Ġtend ency
+F ire
+Ġp ars
+ĠR oc
+Ġl ake
+Ġf itness
+ĠA th
+ĠH orn
+Ġh ier
+Ġimp ose
+m other
+Ġp ension
+ic ut
+bor ne
+ic iary
+. _
+ĠS U
+Ġpol ar
+is y
+eng u
+itial ized
+AT A
+w rite
+Ġexerc ises
+ĠD iamond
+ot ypes
+Ġharm ful
+on z
+Ġprint ing
+st ory
+Ġexpert ise
+ĠG er
+Ġtraged y
+ĠF ly
+Ġd ivid
+amp ire
+st ock
+M em
+Ġre ign
+Ġun ve
+Ġam end
+ĠProp het
+Ġmut ual
+ĠF ac
+Ġrepl acing
+H ar
+ĠCirc uit
+Ġthro at
+ĠSh ot
+Ġbatter ies
+Ġto ll
+Ġaddress ing
+ĠMedic aid
+Ġp upp
+ĠN ar
+ol k
+Ġequ ity
+M R
+ĠHis pan
+ĠL arge
+m id
+D ev
+Ġexp ed
+Ġdem o
+ĠMarsh all
+erg us
+Ġf iber
+Ġdiv orce
+ĠCre ate
+Ġsl ower
+ĠPark er
+ĠStud ent
+ĠTr aining
+Ret urn
+ĠT ru
+Ġc ub
+ĠRe ached
+Ġpan ic
+Ġqu arters
+Ġre ct
+Ġtreat ing
+Ġr ats
+ĠChristian ity
+ol er
+Ġsac red
+Ġdecl are
+ul ative
+et ing
+Ġdeliver ing
+est one
+Ġt el
+ĠL arry
+Ġmet a
+ac cept
+art z
+ĠRog er
+hand ed
+Ġhead er
+Ġtra pped
+ĠCent ury
+Ġkn ocked
+ĠOx ford
+Ġsurviv ors
+b ot
+Ġdemon stration
+Ġd irt
+Ġass ists
+OM E
+ĠD raft
+ortun ate
+fol io
+pe red
+ust ers
+g t
+ĠL ock
+Ġjud icial
+ver ted
+Ġsec ured
+out ing
+ĠBook s
+Ġhost ing
+Ġlif ted
+l ength
+Ġj er
+Ġwhe els
+ĠR ange
+umbn ails
+Ġdiagn osis
+te ch
+ĠStew art
+ĠP ract
+Ġnation wide
+Ġde ar
+Ġoblig ations
+Ġgrow s
+Ġmand atory
+Ġsusp icious
+! '
+A pr
+G reat
+Ġmort gage
+Ġprosecut or
+Ġeditor ial
+ĠK r
+Ġprocess ed
+ung le
+Ġflex ibility
+Ear lier
+ĠC art
+ĠS ug
+Ġfoc uses
+Ġstart up
+Ġbre ach
+ĠT ob
+cy cle
+ãĢ Į
+ro se
+Ġb izarre
+ãĢ į
+Ġveget ables
+$ $
+Ġret reat
+osh i
+ĠSh op
+ĠG round
+ĠSt op
+ĠHawai i
+ĠA y
+Per haps
+ĠBe aut
+uff er
+enn a
+Ġproduct ivity
+F ixed
+cont rol
+Ġabs ent
+ĠCamp aign
+G reen
+Ġident ifying
+Ġreg ret
+Ġpromot ed
+ĠSe ven
+Ġer u
+ne ath
+aug hed
+ĠP in
+ĠL iving
+C ost
+om atic
+me ga
+ĠN ig
+oc y
+Ġin box
+Ġem pire
+Ġhor izont
+Ġbr anches
+Ġmet aph
+Act ive
+ed i
+ĠFil m
+ĠS omething
+Ġmod s
+inc ial
+ĠOrig inal
+G en
+Ġspir its
+Ġear ning
+H ist
+Ġr iders
+Ġsacr ific
+M T
+ĠV A
+ĠS alt
+Ġoccup ation
+ĠM i
+Ġdis g
+lic t
+Ġn it
+Ġn odes
+e em
+ĠP ier
+Ġhat red
+ps y
+ãĥ ī
+Ġthe ater
+Ġsophistic ated
+Ġdef ended
+Ġbes ides
+Ġthorough ly
+ĠMedic are
+Ġbl amed
+arent ly
+Ġcry ing
+F OR
+pri v
+Ġsing ing
+ĠI l
+Ġc ute
+o ided
+olit ical
+ĠNe uro
+å ¤
+Ġdon ation
+ĠEag les
+ĠG ive
+T om
+Ġsubstant ially
+ĠLic ense
+ĠJ a
+Ġg rey
+ĠAn imal
+ĠE R
+ĠU nd
+Ġke en
+Ġconclud e
+ĠMississ ippi
+Eng ine
+ĠStud ios
+P ress
+o vers
+ll ers
+Ġ3 50
+ĠR angers
+Ġr ou
+ert o
+E p
+iss a
+iv an
+Ġse al
+ĠReg ist
+dis play
+Ġwe aken
+u um
+ĠComm ons
+ĠS ay
+Ġcult ures
+Ġl aughed
+Ġsl ip
+Ġtreat ments
+iz able
+m art
+ĠR ice
+Ġbe ast
+Ġob esity
+ĠLa ure
+ig a
+Wh ich
+hold er
+Ġelder ly
+Ġp ays
+Ġcompl ained
+Ġc rop
+Ġpro c
+Ġexplos ive
+ĠF an
+ĠAr senal
+A uthor
+ef ul
+Ġme als
+Ġ( -
+id ays
+Ġimag ination
+Ġann ually
+Ġm s
+as ures
+H ead
+ik h
+m atic
+Ġboy friend
+ĠCom puter
+Ġb ump
+Ġsur ge
+ĠCra ig
+ĠKir k
+D el
+medi ate
+Ġscen arios
+ĠM ut
+ĠSt ream
+Ġcompet itors
+Ù Ħ
+ĠStan ford
+ĠRes ources
+az ed
+b age
+Ġorgan is
+ĠRe lease
+Ġsepar ately
+Ġha bits
+Ġmeasure ments
+ĠCl ose
+Ġaccomp any
+Ġg ly
+Ġt ang
+ĠR ou
+Ġplug in
+Ġcon vey
+ĠChall enge
+oot s
+j an
+Ġcur s
+ĠRel ations
+ke eper
+Ġapproach ing
+p ing
+Spe aking
+Ġarrang ement
+ĠV I
+are ttes
+Ġaffect ing
+Ġperm its
+b ecause
+Ġu seless
+ĠH us
+!! !!
+Ġdestro ying
+Un fortunately
+Ġfasc inating
+S em
+Ġelect oral
+Ġtrans parency
+ĠCh aos
+Ġvolunte er
+Ġstatist ical
+Ġactiv ated
+ro x
+We b
+H E
+ĠHamp shire
+is ive
+M ap
+Ġtr ash
+ĠLaw rence
+st ick
+C r
+Ġr ings
+EX T
+Ġoper ational
+op es
+D oes
+ĠEv ans
+Ġwitness ed
+P ort
+Ġlaunch ing
+ec onom
+w ear
+ĠPart icip
+um m
+cul es
+ĠR AM
+ĠT un
+Ġass ured
+Ġb inary
+Ġbet ray
+Ġexpl oration
+ĠF el
+Ġad mission
+it ated
+S y
+Ġav oided
+ĠSim ulator
+Ġcelebr ated
+ĠElect ric
+¥ ŀ
+Ġcl uster
+itzer land
+he alth
+L ine
+ĠN ash
+at on
+Ġsp are
+Ġenter prise
+ĠD IS
+clud es
+Ġfl ights
+Ġreg ards
+ĠÃ Ĺ
+h alf
+Ġtr ucks
+Ġcontact s
+Ġunc ons
+ĠCl imate
+Ġimm ense
+N EW
+oc c
+ect ive
+Ġemb od
+Ġpat rol
+Ġbes ide
+Ġv iable
+Ġcre ep
+Ġtrig gered
+ver ning
+Ġcompar able
+q l
+Ġg aining
+ass es
+Ġ( );
+ĠG rey
+ĠM LS
+s ized
+Ġpros per
+" ?
+Ġpoll ing
+Ġsh ar
+ĠR C
+Ġfire arm
+or ient
+Ġf ence
+Ġvari ations
+g iving
+ĠP i
+osp el
+Ġpled ge
+Ġc ure
+Ġsp y
+Ġviol ated
+Ġr ushed
+Ġstro ke
+ĠBl og
+sel s
+ĠE c
+,' '
+Ġp ale
+ĠColl ins
+ter ror
+ĠCanad ians
+Ġt une
+Ġlabor atory
+Ġn ons
+t arian
+Ġdis ability
+ĠG am
+Ġsing er
+al g
+ĠSen ior
+Ġtrad ed
+ĠWar rior
+Ġinf ring
+ĠFrank lin
+Ġstr ain
+ĠSwed ish
+Ġsevent h
+ĠB enn
+ĠT ell
+Ġsynd rome
+Ġwond ered
+id en
+++ ++
+ig o
+Ġpur ple
+Ġjournal ism
+Ġreb el
+Ġf u
+bl og
+Ġinv ite
+ren cies
+ĠCont act
+Is rael
+ĠCont ent
+Ġche er
+Ġbed room
+ĠEngine ering
+ĠQue ens
+Ġd well
+ĠPlay Station
+ĠD im
+ĠCol on
+l r
+Ġoper ates
+Ġmotiv ation
+US A
+ast ered
+C ore
+ĠTr uth
+ol o
+OS E
+ĠMem ory
+Ġpred ec
+Ġan arch
+Ġ19 20
+ĠY am
+Ã ¨
+b id
+Ġgr ateful
+Ġexc itement
+Ġtre asure
+Ġlong est
+ct ive
+Ġdes erves
+Ġreserv es
+Ġcop s
+ĠOtt awa
+ĠEgypt ian
+ank ed
+Ġart if
+Ġhypot hesis
+: /
+Ġpurch asing
+Ġlove ly
+H P
+Ġdiv ide
+Ġstrict ly
+Ġquestion ing
+Ġtaxp ayers
+ĠJ oy
+Ġroll s
+ĠHe avy
+Ġp orts
+Ġmag netic
+Ġinf lamm
+Ġbr ush
+t ics
+â ĪĴ
+Ġbott les
+pp y
+Ġp add
+ãĤ ¯
+m illion
+Ġdevast ating
+Ġcomp iled
+Ġmed ication
+Ġtw elve
+ĠPer ry
+Sp ace
+im b
+y our
+Ġle aked
+ĠT ar
+Ġun ity
+Ġinfect ed
+Ġtravel ed
+ID E
+ĠMc Donald
+t xt
+ĠPr inc
+Ġinter ven
+ĠTai wan
+ĠP ow
+Ġbe aring
+ĠTh read
+Ġz ones
+iz ards
+un ks
+Ch apter
+ll or
+ĠÂ ·
+Ġw ounds
+Ġdisc retion
+Ġsucceed ed
+ik ing
+Ġicon ic
+C all
+Ġscreen ing
+ĠM is
+ict s
+Ġmin isters
+Ġsepar ation
+Pl ayer
+Ġb ip
+Ġbel oved
+Ġcount ing
+ĠE ye
+ar ound
+ing ing
+Ġtable t
+Ġoff ence
+in ance
+h ave
+ĠInf o
+ĠNin ja
+Ġprotect ive
+ĠC ass
+M ac
+ĠQual ity
+N orth
+Ġ ic
+ĠCub a
+ĠChron icle
+ĠPro perty
+Ġfast est
+ot os
+ĠG erm
+OW N
+Ġbo om
+ĠStan ley
+ergus on
+Ġcle ver
+Ġent ers
+m ode
+ter ior
+ĠS ens
+Ġlin ear
+AR K
+Ġcomp aring
+Ġpure ly
+Ġsaf er
+ĠPot ter
+Ġc ups
+R T
+Ġgl uc
+Ġatt ributed
+Ġdu pl
+ĠP ap
+Ġprec ious
+Ġp a
+iction ary
+ĠT ig
+ĠTo o
+ol utions
+st an
+Ġrob ots
+Ġlob b
+Ġstat ute
+Ġprevent ion
+w estern
+16 0
+ĠAct ive
+ĠMar ia
+h al
+N one
+ell ar
+ĠK B
+ĠPart ners
+ĠSing le
+ĠFollow ing
+ang o
+ac ious
+Ġth ou
+Ġk g
+Ġinflu ential
+ĠFriend s
+S ur
+ain ted
+Ġfor ums
+Ġst arter
+Ġcitizens hip
+ĠE lection
+on ge
+ot ation
+os ph
+;; ;;
+ut ical
+p ur
+ere n
+Ġaccus ations
+bit ious
+ab bit
+ĠOr d
+Post ed
+ir k
+Ġsens itivity
+ic he
+ĠAm y
+ĠF ab
+Ġsum mit
+Ġped est
+Ġrub ber
+Ġagric ultural
+Ġcan cel
+A E
+Ġin aug
+Ġcont am
+Ġfirm ly
+i w
+st age
+ĠK an
+Ġt ier
+Ġinv ention
+Ġtransl ated
+ĠR ules
+B ox
+Tw itter
+ID S
+Ġp izza
+Ġdeb ug
+ĠD rop
+v s
+Ġh orses
+b ig
+Ġb oring
+Ġh ood
+ĠMcC ain
+at ched
+ĠBro s
+Ġsk ip
+Ġess ay
+st at
+ĠLeg ends
+Ġam munition
+au c
+Ġshoot er
+Ġun h
+Ġsuppl ied
+Ġgener ic
+ĠS K
+ib an
+yr ics
+Ġ25 5
+Ġclim bing
+Form er
+Ġfl ip
+Ġjump ing
+Ġfrust ration
+ĠTer ry
+Ġneighborhood s
+Ġmed ian
+be an
+Ġbr ains
+Follow ing
+Ġsh aped
+Ġdraw s
+Ġal tered
+J ack
+Ġrecip es
+Ġsk illed
+we alth
+ach i
+e lection
+Ġbehavi ors
+de als
+ĠU ntil
+F e
+Ġdecl aration
+mar ks
+ĠBet ween
+cel ona
+Ġres on
+Ġbub ble
+Am ong
+Ġim perial
+G S
+Ġfemin ist
+200 5
+ĠK yle
+Ġaccount ing
+ĠTe le
+ĠT yr
+Ġconnect ing
+Ġre hab
+ĠP red
+s im
+Ġmeant ime
+Ġphys ician
+M W
+ĠCamp bell
+ĠBr andon
+Ġcontribut ing
+ĠR ule
+ĠWe ight
+ĠN ap
+Ġinter active
+Ġv ag
+Ġhel met
+ĠCom b
+f our
+Ġsh ipped
+Ġcomple ting
+ĠP D
+PD ATE
+Ġspread ing
+Ġsc ary
+erv ing
+ĠG as
+Ġfr ank
+s chool
+Ġrom antic
+Ġstab il
+R ob
+Ġaccur ately
+Ġac ute
+ĠH ann
+Ġsymbol s
+Ġcivil ization
+ĠA W
+Ġlight ning
+Ġcons iders
+Ġven ue
+Ġ ×
+Ġo ven
+ĠS F
+h is
+Ġn u
+ĠLear n
+Ġpe oples
+Ġst d
+Ġsle e
+Ġs lic
+ĠStat istics
+Ġcor ners
+ĠB aker
+Ġ: )
+ment ation
+ol ver
+Ġlaugh ing
+ĠT odd
+ond e
+ĠH ills
+Ġn uts
+ĠW oman
+pl ane
+Ġl iver
+ĠIn side
+S orry
+Ġagre es
+Ġfund ament
+ĠF isher
+Ġa uction
+Ġthread s
+gl as
+ĠBas ic
+ĠN at
+Ġlack ing
+Ġceleb ration
+j u
+Ġs illy
+E uro
+Ġt att
+ight y
+cont rolled
+T est
+ĠSing h
+Ġr age
+Ġrh yth
+o ffic
+ĠPh antom
+Ġhead lines
+Ġrespond ing
+ĠMor ning
+Ġvit amin
+Ġboot s
+ĠS ite
+al in
+p i
+Ġvir al
+ĠU C
+D ER
+ĠSe x
+Ġst ocks
+c urrent
+Ġch urches
+ĠR are
+ĠMur phy
+Ġden ial
+ĠG aming
+Ġtou g
+Ġn ick
+Ġm akers
+ĠRon ald
+Ġgener ous
+ĠD oc
+ĠMor ris
+Ġtransform ed
+ĠN ormal
+Ġ10 4
+ĠKick starter
+ĠUp on
+On line
+ĠI RS
+Ġw rap
+Ġl oving
+Ġarri ves
+ĠD ue
+Ġhe ter
+ĠM ade
+Ġrent al
+Ġbelong s
+Ġatt orneys
+Ġcro ps
+Ġmat ched
+ul um
+ol ine
+10 9
+Ġdis par
+Ġbuy ers
+ĠCam bridge
+Ġeth ics
+rou ps
+Ġjust ified
+Ġmarg inal
+Ġrespect ed
+win ning
+Ġnodd ed
+ĠSer ge
+ĠForm er
+C raft
+######## ########
+ĠWar ner
+Ġd ash
+et e
+Ġent ert
+ĠE scape
+out heast
+Ġkn ees
+ĠB omb
+Ġr ug
+P ass
+Ġatt itudes
+go vernment
+ĠPri or
+Ġqual ities
+Ġnot ification
+ĠPh one
+l ie
+Ġanticip ated
+ĠCom bat
+ĠBar ry
+Ġ198 2
+Us ers
+on er
+Ġcomput ing
+ĠConnect icut
+Ġless er
+Ġpe ers
+ĠC u
+Ġtechn ically
+Ġsub mission
+ĠUn iversal
+Ġman ually
+our ge
+Ġrespond ents
+ĠB TC
+ĠH ost
+Ġf are
+ĠB ird
+Ġrece ipt
+al so
+Ġj ack
+Ġagric ulture
+Ġsk ull
+Ġ! =
+Ġpass ive
+ĠC I
+Ġsoc ieties
+Ġremind ed
+Ġinter ference
+B uy
+Ġâ ľ
+g on
+Ġscrut iny
+ĠW itch
+Ġconduct ing
+Ġ ãĥ
+Ġexch anges
+ĠMit chell
+Ġinhab it
+Ġtw ist
+B D
+Ġwhere ver
+group on
+Ġj okes
+ĠBen jamin
+ĠR andom
+fr ame
+ĠL ions
+Ġhighlight ed
+ĠArk ansas
+E nt
+Ġp ile
+Ġpre lim
+g s
+mind ed
+Ġfel ony
+ĠG A
+ĠL uck
+Ġpract ically
+ĠB os
+Ġact ress
+D am
+ĠB ou
+Ġvis a
+Ġembed ded
+Ġhy brid
+Ġear liest
+Ġsoon er
+s ocial
+ĠH A
+Ġste ep
+Ġdis advant
+Ġexplo it
+ĠE gg
+ĠUlt ra
+Ġnecess ity
+L ocal
+ie ge
+Ġd ated
+Ġmass es
+Ġsubsc ription
+pl ess
+Ġan onym
+Ġpresum ably
+Bl ue
+The ir
+asket ball
+ĠPhil ip
+Ġcom ed
+load ed
+r ane
+Ġref lection
+Ch ina
+Ġext ends
+Ġform ing
+Ġund ers
+200 1
+Ġgr at
+Ġconcent rations
+Ġins ulin
+Ġsec ular
+Ġwh ilst
+Ġwin ners
+Ad vertisements
+Ġdeliber ately
+ĠWork ing
+Ġs ink
+et ics
+d ale
+Ġmand ate
+Ġg ram
+Ġvac ation
+Ġwarn ings
+ri pp
+ĠTH AT
+Ġcomment ary
+Ġint u
+Ġa est
+Ġreason ing
+Ġbreak down
+ĠZ ombie
+Ġ-- >
+ĠPolit ical
+c ott
+Ġthr ust
+Ġtechn ological
+Ġdec iding
+Ġtraff icking
+L ong
+W elcome
+pr ising
+ĠCommun ications
+Ġend ors
+Ġsw ift
+Ġmetab ol
+co ins
+res a
+ĠHT TP
+Ġen roll
+ĠH appy
+us r
+int age
+Ġ[ "
+u ably
+ĠM aterial
+Ġrepe al
+Se pt
+k h
+ĠMod i
+Ġunder neath
+ĠI L
+sh ore
+Ġdiagn osed
+ace utical
+Ġsh ower
+au x
+ĠSw itch
+ĠStre ngth
+Ġj ihad
+n ational
+Ġtra uma
+uss y
+on i
+Ġcons olid
+Ġcal ories
+ĠF lynn
+ag ged
+16 8
+ĠP ink
+Ġfulf ill
+Ġch ains
+Ġnot ably
+ĠA V
+L ife
+ĠCh uck
+m us
+ĠUr ban
+ĠH end
+Ġdep osit
+ĠS ad
+Ġaff air
+OR K
+ie val
+ĠF DA
+Ġt rop
+ĠOver all
+Ġvirt ue
+Ġsatisf action
+au nd
+Ġl un
+ĠSw itzerland
+ĠOper ation
+pro cess
+Ġsh ook
+Ġcount ies
+le ased
+ĠCharl otte
+1 12
+Ġtrans cript
+Ġre dd
+p ush
+ĠHe y
+ĠAn alysis
+[ "
+Ġaltern atives
+ard less
+Ġele ph
+Ġpre jud
+ĠLe af
+H aving
+ĠH ub
+Ġexpress ions
+ĠVol ume
+Ġshock ing
+ĠRed s
+Ġread ily
+Ġplan ets
+ad ata
+Ġcollaps ed
+ĠMad rid
+Ġir rit
+i pper
+ĠEn c
+ĠW ire
+Ġbu zz
+ĠG P
+ash a
+Ġaccident ally
+ur u
+Ġfrust rated
+ĠS A
+Ġhung ry
+ĠH uff
+Ġlab els
+ant o
+ĠE P
+Ġbar riers
+) |
+ĠBer keley
+ĠJ ets
+Ġp airs
+ĠL an
+J ames
+ĠB ear
+Ġhum or
+ĠLiber ty
+Ġmagn itude
+Ġag ing
+ĠM ason
+Ġfriends hip
+umb ling
+Ġemer ge
+Ġnewsp apers
+Ġam bitious
+ĠRich ards
+atern al
+Ġ198 1
+Ġcook ies
+Ġsc ulpt
+Ġpur suit
+L ocation
+Ġscript s
+p c
+Ġarrang ements
+Ġd iameter
+Ġl oses
+am ation
+Ġl iqu
+ĠJ ake
+aret te
+Ġunderstand s
+ĠZ en
+v m
+Ġappro ve
+Ġw ip
+Ġult ra
+Ġint end
+ĠD I
+asc ular
+Ġst ays
+ĠK or
+ĠK l
+Ġinvest ing
+L a
+Ġbelie ving
+b ad
+m outh
+Ġtaxp ayer
+ãĥ ĥ
+ĠQue bec
+Ġl ap
+ĠSw iss
+d rop
+Ġdr ain
+ir i
+et c
+ft en
+ĠN ex
+Ġst raw
+Ġscream ing
+Ġcount ed
+Ġdam aging
+Ġamb assador
+cent ury
+Ġpro x
+Ġarrest s
+u v
+il ateral
+ĠCh arg
+Ġpresc ribed
+Ġindepend ently
+Ġf ierce
+ĠB aby
+Ġb rave
+Ġsu its
+= >
+Ġbas eline
+ĠR ate
+Ġis lands
+Ġ( (
+g reen
+ix els
+Ġname ly
+ĠVill age
+th an
+am y
+V ersion
+g mail
+ential s
+ĠS ud
+ĠMel bourne
+Ġarri ving
+Ġquant um
+e ff
+rop olitan
+T ri
+Ġfun eral
+ĠI R
+ÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤ ÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤ
+ĠC ob
+it ably
+Ġt urb
+Ġcomb o
+Re view
+Ġdeploy ment
+u ity
+ĠB ott
+Ġinv isible
+Ġrender ing
+Ġunl ocked
+Ġa qu
+ĠVlad imir
+Ġp ad
+ĠBr ain
+ĠLeg acy
+dr agon
+ĠKurd ish
+Ġsound ed
+Ġdet ained
+ĠD M
+g ary
+Ġd aughters
+Ġdistur bing
+uk a
+ĠPar ad
+Ġt ast
+Ġunf ortunate
+Ġu l
+em in
+Ġattend ance
+tr l
+Ġpar ks
+ĠMem orial
+ĠAl ice
+oth y
+gu ard
+ĠD ise
+ĠSh an
+ĠFor um
+R ich
+Ġshif ted
+ue z
+Ġl ighter
+ĠMag n
+Ġc od
+S ch
+ham mad
+P ub
+3 50
+ĠP okemon
+Ġprot otype
+Ġun re
+B ase
+ĠStud ents
+ĠRep ly
+ĠCommun ist
+Ġg au
+ĠTy ler
+I Z
+Ġparticip ated
+Ġsup rem
+ĠDet ails
+Ġvessel s
+ro d
+Ġt ribe
+ke ep
+Ġassum ptions
+Ġp ound
+Ġcr ude
+ĠAv ailable
+Ġswim ming
+Ġin clusion
+Ġadv ances
+c ulation
+Ġconserv ation
+Ġover d
+ĠBuff alo
+Art icle
+ed ge
+Ġaw a
+ĠMad ison
+Ġsid ew
+Ġcat ast
+ĠK rist
+uc le
+ĠHigh way
+ĠTer ror
+Ġactiv ation
+Ġuncons cious
+ĠSat an
+ĠSus an
+ill ery
+Ġarr anged
+i op
+Ġrum ors
+ur ring
+th ink
+ĠKe ith
+ĠK ind
+Ġavoid ing
+by n
+n ut
+ĠSpe aker
+r us
+n ames
+Ġgu ilt
+ĠOlymp ics
+Ġsa il
+ĠM es
+lev ant
+ĠColumb us
+a ft
+C ity
+S outh
+ĠHar vey
+ĠP un
+S everal
+Ġment ally
+Ġimp ress
+m ount
+ĠUb untu
+âĢĶâĢĶâĢĶâĢĶ âĢĶâĢĶâĢĶâĢĶ
+ĠSuper man
+ĠMP s
+Ġintent ions
+ĠR acing
+Ġlike lihood
+Ġ2 40
+T otal
+Ġto ys
+ĠW atson
+Ġur ge
+L ear
+ĠP aper
+Ġoccur ring
+ĠB eng
+ĠC ert
+Ġst ones
+T im
+ĠTw in
+z b
+ĠD ynam
+Ġpolit ician
+k ens
+ĠEnter prise
+UT ERS
+Ġab ol
+Ġref resh
+Ġarbit rary
+pe ction
+Ġtrou bles
+Ġ} );
+t v
+Ġpil ots
+Ġdist ribute
+Ġaud it
+Ġp ause
+orig inal
+Ġr ivals
+Â £
+F ig
+T L
+ab il
+ry ing
+L in
+ion ed
+l on
+Ġf ancy
+Ġcr ashed
+Ġt ract
+Ġshe d
+Ġcons ume
+B ased
+down load
+in it
+Ġvolt age
+Int rodu
+Ġcondem ned
+ĠFin ance
+res pect
+Ġex cluded
+Ġestablish ing
+her ic
+Ġher itage
+Ġspect acular
+Ġun st
+ĠSnow den
+ĠL ane
+S an
+Ġprotect ions
+st ruction
+inc inn
+Ġmac ro
+C ustom
+ios ity
+Ġes p
+Ġfunction ing
+Ġm ush
+Ġp uzzle
+Ġeth ical
+M al
+Ġgo verning
+ĠF erguson
+Ġrest ored
+Ġst ressed
+ĠCoun ter
+ĠK as
+cl ip
+AN S
+Ġse iz
+U K
+by ss
+old own
+ap i
+Ġperman ently
+oun ters
+W est
+Th rough
+L ight
+at oes
+Ġne at
+Ġc ord
+ure r
+Ġsevere ly
+ĠA ven
+Ġinter rog
+Ġtri ple
+G iven
+N umber
+Ġar ise
+Ġs her
+pl ant
+Ġfl ower
+ĠC ou
+Ġat e
+Ġnew er
+b ul
+Ġmean while
+ĠL air
+Ġadjust ment
+ĠCop yright
+Ġd ivers
+i ological
+Ġgam ers
+o at
+Ġhistor ically
+Ġanal og
+Ġlong time
+Ġpres cription
+ĠM ist
+ĠHy per
+ĠM aine
+ĠDe ity
+Ġmulti pl
+ĠRe incarn
+ĠH yd
+ĠP ic
+S il
+r ants
+ĠC ris
+. ;
+( {
+epend ence
+Ġrec y
+ate ur
+Ġqu ad
+Ġgl ob
+Ġcon ced
+te am
+Ġcapital ist
+ĠL ot
+Ġroy al
+ĠCy ber
+Ġblack s
+met ic
+ri v
+ĠD anny
+Ġsp o
+ĠR O
+Ġanim ated
+rypt ed
+ĠDep uty
+Ġrend ered
+F E
+Ġstre ak
+Ġcloud s
+ĠDou g
+~~~~ ~~~~
+Ġdisc our
+ĠVe h
+Ġpsych ology
+ĠJ ourney
+Ġcry stal
+ĠFro st
+Ġsuspic ion
+Ġrel ate
+or us
+ĠC rypt
+ĠN VIDIA
+com ed
+ut ing
+incinn ati
+Ġvulner ability
+ost ic
+Ġisol ation
+Ġcool ing
+ĠCoal ition
+Ġ1 19
+F our
+ĠDe al
+Ġâ ī
+se mble
+ram ent
+ĠBar celona
+Ġ10 2
+Ġcoc aine
+ocaly pse
+F eb
+ogen ic
+Ġmut ation
+Ġcrypt oc
+ĠK el
+ĠG it
+a is
+Ġs isters
+AN K
+Ġactiv ate
+T er
+Ġd read
+yl on
+Ġprop ri
+A ust
+ĠDef ault
+Ġout door
+Ġshe er
+ce ive
+Ġg ently
+Ð ¾
+Pro gram
+Ġâ ĨĴ
+Ġve gan
+ĠCr us
+Ġrespons ibilities
+ĠH R
+OL D
+Ġprev ents
+Ġst iff
+ĠW ere
+Ġathlet ic
+ĠSc ore
+Ġ) :
+Ġcolumn s
+ĠL oc
+av ailable
+ĠF ram
+ĠS essions
+Ġcompan ion
+Ġpack s
+14 0
+ĠKn ights
+Ġf art
+Ġstream s
+Ġsh ore
+Ġapp eals
+ĠPer formance
+h aul
+ĠSt ra
+ĠN ag
+10 3
+ĠTrans portation
+B B
+E v
+z an
+P ublic
+Ġtw in
+uls ion
+M ult
+Ġelect ro
+Ġstat ue
+ation ally
+ĠN ort
+Ġins pection
+/ *
+ig ue
+Ġcomp assion
+ĠT ales
+ĠSte in
+ĠSc reen
+ĠB ug
+ĠL ion
+g irl
+Ġwithdraw al
+Ġobject ives
+Ġblood y
+Ġprelim inary
+Ġj acket
+Ġdim ensions
+ĠC ool
+ĠOcc up
+Ġw reck
+Ġdoub led
+ank ing
+Ġ19 75
+Ġglass es
+ĠW ang
+pro v
+P ath
+connect ed
+ĠMult i
+ĠNor way
+agon ist
+Ġfe ared
+Ġtouch ing
+Ġarg uably
+Â¯Â¯Â¯Â¯ Â¯Â¯Â¯Â¯
+ĠNC AA
+che m
+Ġsp at
+ĠW WE
+ĠC el
+ig ger
+Ġattack er
+ĠJo in
+ob ject
+ett a
+Ġelim inated
+d et
+Ġdest ruct
+ĠLuc as
+ct uary
+18 0
+ĠBr ady
+ĠBl ues
+B ay
+au kee
+Ġtim eline
+Ġdeleg ates
+w ritten
+uff icient
+Ġsh apes
+Cop yright
+ou ble
+serv ice
+Ġp ione
+Ġcolleg es
+Ġrow s
+Ġsp ite
+Ġassess ed
+3 60
+Ġle ase
+Ġconfident ial
+ck er
+ĠMan ning
+ĠV oice
+Ġse aled
+Ġcalcul ate
+N O
+ĠAss istant
+Ġteen ager
+ul ent
+ather ine
+Ġm ock
+Ġd iamond
+Ġf est
+Ġsw itched
+Ġres ume
+ĠPu erto
+Ġl anes
+ir ation
+ĠSimilar ly
+Ġro d
+ĠS el
+ĠPal ace
+ĠLim ited
+e ous
+Ġvar iant
+Ġw ard
+Ġ) )
+Sh ow
+OO K
+A lex
+ĠN ep
+br is
+ĠWik ipedia
+Ġexcept ional
+Ġman ages
+ĠD raw
+Ag ain
+Ġco pper
+ut t
+Ġex ports
+Ġport folio
+Ġelev ated
+R ated
+ĠOther wise
+ĠT act
+ĠShe l
+ĠT X
+" âĢĶ
+Ġres ur
+ĠW a
+ven ant
+Ġmon etary
+pe ople
+E mail
+Ġfif ty
+ĠS weet
+ĠMalays ia
+Ġconf using
+ĠR io
+ud a
+uten ant
+" );
+Ġpra ised
+Ġvol umes
+t urn
+Ġm ature
+Ġnon profit
+Ġpassion ate
+ĠPriv ate
+Ġ10 3
+Ġdesc end
+ç ¥ŀ
+uff y
+head ed
+Whe ther
+ri en
+ze ch
+be it
+Ġch rom
+ĠMc M
+Ġd ancing
+Ġe leg
+ĠNot iced
+11 5
+Ġadvoc acy
+ENT S
+amb ling
+ĠMin or
+ĠF inn
+Ġprior ities
+Ġthere of
+ĠSt age
+ĠRog ers
+Ġsubst itute
+ĠJ ar
+ĠJeff erson
+Ġlight ly
+10 2
+ĠL isa
+u its
+ys ical
+Ġshif ts
+Ġd rones
+Ġwork place
+Ġres id
+ens ed
+ah n
+Ġpref erences
+ser ver
+Ġdeb ates
+d oc
+ĠGod s
+Ġhelicop ter
+Ġhon our
+Ġconsider ably
+ed ed
+ĠF emale
+ĠAn ne
+Ġre un
+ĠF ace
+ĠHall ow
+ĠBud get
+Ġcondem n
+Ġt ender
+Pro f
+ocr atic
+ĠTurn er
+ĠAg ric
+Ġ19 76
+Ġa pt
+d isc
+ĠF ighter
+ĠA ur
+Ġgar bage
+in put
+ĠK arl
+ĠOl iver
+ĠL anguage
+k n
+N on
+ĠCl ar
+Ġtrad itions
+Ġad vertisement
+ĠS or
+Ġarch ive
+Ġvill ages
+7 50
+Ġimplement ing
+w aukee
+Ġdiet ary
+Ġswitch ing
+Rep ublic
+Ġvel ocity
+Ġc it
+ĠA wards
+Ġfin ancing
+Ġlast ed
+) ]
+Ġrem inder
+P erson
+Ġprec ision
+Ġdesign ers
+ĠF ried
+ĠB order
+Ġtr agic
+Ġw ield
+Ġiniti atives
+ĠT ank
+w er
+Ġjo ins
+R o
+in ery
+Ġar row
+Ġgener ating
+found er
+Ġsear ches
+Ġrandom ly
+A ccess
+Ġb atch
+Ġp osed
+l at
+Ġpursu ing
+as a
+Ġtest ified
+form ing
+ĠSh ar
+w iki
+ĠE ither
+S ometimes
+Ġsen ators
+ĠJohn ny
+ĠTal iban
+ĠG PS
+":" /
+ãģ® å
+Ġanaly zed
+ĠRub io
+ĠMove ment
+op ard
+ii i
+St and
+f ight
+Ġign oring
+i ang
+ĠG N
+so ever
+ĠST AT
+Ġref using
+Ġswe at
+Ġb ay
+P ORT
+ir med
+ak y
+Ġdis pro
+Ġlabel ed
+Ġ10 8
+H ello
+Ġple asant
+ab a
+Ġtri umph
+Ġab oard
+Ġinc om
+ĠC row
+le tt
+Ġfol k
+Ġch ase
+` `
+ĠBr us
+Ġte ens
+c ue
+Ġter rain
+h yd
+il ight
+OR Y
+Su pport
+ew s
+ll i
+rain ts
+ĠC and
+Ġab used
+ach ment
+l arg
+B as
+ĠC ancer
+Ġ19 78
+Ġsupp orter
+ac cess
+ĠTer min
+ĠT ampa
+ĠAN Y
+Ġnew est
+ĠCrim inal
+ed u
+Ġ19 30
+Ġadm its
+Ġend e
+Ġfail ures
+ur ate
+ful ness
+cy cl
+ĠSub ject
+Ġinf inite
+th ree
+W A
+p it
+ĠInst all
+R ad
+ili ation
+G M
+Ġcontin ent
+Ġaccommod ate
+ĠCl ay
+Ġp up
+ĠF unction
+Ġham mer
+ĠAlbert a
+Ġrev ised
+Ġminor ities
+Ġmeasure ment
+Con nell
+Ġdis able
+ĠM ix
+In cre
+Ġfor k
+ĠR osen
+Ġimpl ies
+umb lr
+AN G
+Ġprote ins
+Ġagg ression
+Ġfacilit ate
+S N
+Ġilleg ally
+u er
+Ġacad em
+Ġp uzz
+ĠSh ift
+p ay
+oll o
+Ġaud iences
+B uild
+Ġno ble
+Ġsynt ax
+â ĺħ
+Ġbe am
+ĠB ed
+ĠA ld
+Ġorig ins
+v ideo
+Ġ19 77
+ĠAss ault
+Ġgar age
+Te am
+Ġver dict
+Ġd war
+ĠVirt ual
+e vent
+Ke ep
+Ġsent iment
+Ġwild life
+sh irt
+Ġb urg
+Ġrecommend ation
+rep resent
+Ġgall ery
+own ers
+Ġsch olar
+Ġconven ience
+ĠSw ift
+Ġconv inc
+C ap
+Ġwar fare
+ĠVis ual
+Ġconst itute
+Ġab ort
+ĠWe ather
+ĠLook ing
+ĠH em
+Ġmart ial
+Ġinc oming
+et ition
+Ġtoler ance
+ĠCre ated
+Ġfl ows
+ĠE lder
+Ġsoul s
+Ġf oul
+ĠP ain
+ĠC AN
+Ġ2 20
+b c
+he nd
+Ġgen ius
+R eal
+ĠW r
+omet er
+p ad
+Ġlim iting
+ĠS i
+ĠL ore
+ĠAd ventures
+Ġvar ied
+D isc
+f in
+ĠPerson al
+Ch ris
+Ġinv ented
+Ġd ive
+ĠR ise
+Ġo z
+ĠCom ics
+Ġexp ose
+ĠRe b
+let ters
+s ite
+im ated
+Ġh acking
+Ġeduc ated
+ĠNob ody
+Ġdep ri
+Ġincent ive
+ãĤ ·
+Ġovers ight
+Ġtrib es
+ĠBelg ium
+Ġlicens ing
+our t
+Produ ct
+ah l
+ĠG em
+Ġspecial ist
+Ġc ra
+ann ers
+ĠCor byn
+Ġ19 73
+RE AD
+Ġsum mar
+Ġover look
+ĠApp lication
+Ġin appropriate
+Ġdownload ed
+Q ue
+ĠB ears
+Ġth umb
+ĠChar acter
+ĠReincarn ated
+ĠS id
+Ġdemonstr ates
+s ky
+ĠBloom berg
+ĠAr ray
+ĠRes ults
+ĠFour th
+ĠED T
+ĠO scar
+c end
+Ġ10 6
+ĠN ULL
+ĠH ERE
+m atch
+ĠBr un
+Ġgluc ose
+ie g
+eg u
+Ġcert ified
+Ġrel ie
+Ġhuman itarian
+Ġpr ayers
+K ing
+Ġn an
+h ou
+10 8
+ul u
+Ġrenew able
+Ġdistingu ish
+Ġd ense
+ĠV ent
+ĠPack age
+ĠB oss
+Ġedit ors
+Ġm igr
+T ra
+ĠPet ers
+ĠAr ctic
+200 4
+ĠC ape
+Ġloc ally
+Ġlast ing
+Ġhand y
+. ).
+P an
+ĠR ES
+Ind ex
+Ġt ensions
+Ġformer ly
+Ġide ological
+Ġsens ors
+Ġdeal ers
+Ġdef ines
+S k
+Ġproceed s
+Ġpro xy
+az ines
+ĠB ash
+ĠP ad
+ĠC raft
+eal ous
+Ġshe ets
+omet ry
+J une
+cl ock
+T T
+ĠThe atre
+ĠB uzz
+Ġch apters
+Ġmill enn
+Ġd ough
+ĠCongress ional
+Ġimag ined
+av ior
+Ġclin ic
+Ġ19 45
+Ġhold er
+ro ot
+oles ter
+Ġrest art
+B N
+ĠHam as
+ĠJ ob
+Ġor b
+Ġr am
+Ġdiscl ose
+Ġtransl ate
+Ġimm igrant
+Ġannoy ing
+Ġtreat y
+an ium
+ĠTe a
+ĠLeg ion
+Ġcrowd s
+ĠB ec
+ĠA er
+oh yd
+B ro
+Look ing
+Ġl bs
+Ġagg ress
+Ġse am
+Ġinter cept
+ĠM I
+mer cial
+act iv
+ĠC it
+Ġdim ension
+Ġconsist ency
+Ġr ushing
+ĠDou glas
+Ġtr im
+Inst all
+ick er
+Ġsh y
+10 6
+Ġment ions
+pe lled
+ĠT ak
+c ost
+Ġclass room
+Ġfort une
+dri ven
+Ġun le
+ĠWhe el
+Ġinvest or
+ĠM asters
+k it
+Ġassoci ations
+ĠEv olution
+op ing
+us cript
+Ġprov incial
+ĠWal ter
+av i
+S O
+Ġun limited
+Eng lish
+ĠC ards
+ĠEb ola
+ne red
+Ġreven ge
+Ġout right
+um per
+Ġf itting
+ĠSol id
+Ġform ally
+Ġproblem atic
+Ġhaz ard
+Ġenc ryption
+Ġstraight forward
+ĠA K
+Ġp se
+ĠOr b
+ĠCh amber
+ĠM ak
+Cont ents
+Ġloyal ty
+Ġl yrics
+ĠSy m
+Ġwel comed
+Ġcook ed
+Ġmon op
+Ġn urse
+Ġmis leading
+Ġe ternal
+Ġshif ting
+Ġ+ =
+V is
+Ġinst itutional
+ill ary
+Ġp ant
+VER T
+ĠA CC
+ĠEn h
+Ġinc on
+ĠRE UTERS
+Ġdon ated
+âĢ¦âĢ¦ âĢ¦âĢ¦
+In tern
+Ġexhib it
+Ġt ire
+ĠR ic
+ĠCh ampion
+ĠMu hammad
+N ING
+ĠSoc cer
+Ġmob ility
+Ġvary ing
+ĠM ovie
+Ġl ord
+o ak
+F ield
+Ġve ctor
+us ions
+Ġsc rap
+Ġen abling
+m ake
+T or
+. *
+| |
+ĠWe bsite
+ĠN PC
+Ġsocial ist
+ĠBill y
+ĠAdd itional
+Ġc argo
+Ġfar ms
+ĠSo on
+ĠPri ze
+Ġmid night
+Ġ9 00
+se en
+ĠSp ot
+Ġshe ep
+Ġspons ored
+ĠH i
+ĠJ ump
+Ġ19 67
+Micro soft
+ĠAg ent
+Ġch arts
+d ir
+Ġadj acent
+Ġtr icks
+Ġman ga
+Ġex agger
+/ >
+foot ball
+ĠF CC
+G C
+ĠT ier
+and ra
+OU ND
+% ),
+Ġfru its
+V C
+ĠA A
+R ober
+Ġmid st
+â Ĺ
+ank a
+Ġlegisl ature
+ĠNe il
+Ġtour ists
+" "
+ĠWar ning
+ĠNever theless
+ĠOffic ial
+ĠWh atever
+Ġm old
+Ġdraft ed
+Ġsubst ances
+Ġbre ed
+Ġt ags
+ĠT ask
+Ġver b
+Ġmanufact ured
+com ments
+ĠPol ish
+Pro v
+Ġdetermin es
+Ob ama
+k ers
+Ġutter ly
+Ġse ct
+sc he
+ĠG ates
+ĠCh ap
+Ġal uminum
+Ġz ombie
+ĠT ouch
+ĠU P
+Ġsatisf y
+Ġpred omin
+asc ript
+Ġelabor ate
+Ġ19 68
+Ġmeas uring
+ĠV ari
+any ahu
+Ġs ir
+ul ates
+id ges
+ick ets
+ĠSp encer
+T M
+oub ted
+Ġpre y
+Ġinstall ing
+ĠC ab
+re ed
+re ated
+Su pp
+Ġwr ist
+ĠK erry
+10 7
+ĠK le
+ĠR achel
+Ġc otton
+ĠA RE
+ĠE le
+Cont rol
+Ġload s
+ĠD od
+an as
+b one
+Ġclass ical
+ĠReg ional
+ĠInt eg
+V M
+Ġdes ires
+Ġaut ism
+support ed
+ĠM essage
+Ġcomp act
+writ er
+Ġ10 9
+ĠHur ricane
+c ision
+Ġcy cles
+Ġdr ill
+Ġcolle ague
+Ġm aker
+G erman
+Ġmist aken
+S un
+ĠG ay
+Ġwhat soever
+Ġsell s
+ĠA irl
+l iv
+ĠO ption
+Ġsol ved
+Ġse ctors
+Ġhorizont al
+Ġequ ation
+ĠSk ill
+ĠB io
+g ement
+ĠSn ap
+ĠLeg al
+Ġtradem ark
+Ġmake up
+Ġassemb led
+Ġsa ves
+ĠHallow een
+ĠVer mont
+ĠFR OM
+Ġfar ming
+ĠP odcast
+accept able
+ĠHig her
+Ġas leep
+ull ivan
+Ġrefere n
+ĠLe v
+Ġbul lets
+ok o
+H C
+Ġst airs
+Ġmain tains
+ĠL ower
+ĠV i
+Ġmar ine
+Ġac res
+Ġcoordin ator
+ĠJ oh
+Ġcounterpart s
+ĠBrother s
+Ġind ict
+b ra
+Ġch unk
+Ġc ents
+H ome
+ĠMon th
+Ġaccording ly
+if les
+ĠGerm ans
+ĠSy n
+H ub
+Ġey eb
+âĶĢâĶĢ âĶĢâĶĢ
+Ġr anges
+ĠHoll and
+ĠRob ot
+f c
+M ike
+Ġpl asma
+Ġsw ap
+Ġath lete
+ĠR ams
+,' "
+Ġinfect ions
+Ġcor rid
+Ġv ib
+Ġpat ches
+Ġtradition ally
+Ġrevel ation
+Ġswe ep
+Ġgl ance
+Ġin ex
+200 3
+ĠR aw
+work ing
+os ures
+ĠD at
+ĠLyn ch
+Ġle verage
+ĠRe id
+Ġcorrel ation
+ian ces
+av ascript
+Ġrep ository
+ret ty
+Ġ19 72
+24 0
+Ġo un
+p ol
+ĠRe ed
+Ġtact ical
+is ite
+App le
+ĠQu inn
+Ġrap ed
+ill o
+Euro pe
+Ġalgorith ms
+ĠRod rig
+i u
+Ġill um
+Ġf ame
+Ġintrodu cing
+Ġdel ays
+ĠRaid ers
+Ġwh istle
+Ġnovel s
+ĠRe ally
+Ġder iv
+Ġpublic ations
+ĠNe ither
+ĠCom merce
+Ġa ston
+l anguage
+Not es
+ĠR oth
+ĠF ear
+Ġm ate
+Ġpar ade
+ĠQ B
+Ġman eu
+ĠC incinnati
+m itting
+Ġwa ist
+ĠR ew
+Ġdisc ont
+Ð °
+Ġst aring
+Ġal ias
+Ġsec urities
+Ġtoile t
+ĠJ edi
+Ġun law
+v ised
+//// ////
+] (
+ĠWe iss
+Ġpre st
+ĠComp an
+Ġmem o
+ĠGr ace
+J uly
+ĠEl ite
+cent er
+ĠSt ay
+Ġgal axy
+Ġto oth
+ĠS ettings
+Ġsubject ed
+ãĤ ¦
+Ġline back
+Ġretail ers
+ĠW ant
+Ġd angers
+A ir
+Ġvolunt ary
+ew ay
+Ġinterpret ed
+ot ine
+Ã §
+Ġp el
+Serv ice
+ĠEvent ually
+Ġcare ers
+Ġthreat en
+Ġmem or
+ĠBrad ley
+anc ies
+s n
+ĠUn known
+N ational
+Ġsh adows
+ail and
+ĠD ash
+Every one
+izz ard
+M arch
+= (
+Ġpull s
+Ġstr anger
+Ġback wards
+ĠBern ard
+imens ional
+Ġch ron
+Ġtheoret ical
+k top
+Ġw are
+ĠInvest ig
+ĠIn iti
+ĠOper ations
+o ven
+oc ide
+* /
+Ġfl ames
+ĠC ash
+sh it
+Ġc ab
+ĠAn aly
+ĠSe ah
+Ġdefin ing
+Ġorder ing
+Ġimm un
+Ġpers istent
+AC H
+Russ ian
+m ans
+Ġh ind
+Ġphot ography
+Â ©
+Ġh ug
+Ġ10 7
+ĠH ence
+i ots
+ude au
+Ġsubsid ies
+Ġroutine ly
+ĠDev ice
+it ic
+Ġdisg ust
+land er
+Ġ19 40
+Ġassign ment
+ĠB esides
+w ick
+ĠD ust
+us c
+struct ed
+11 1
+de velop
+Ġf ond
+Ġinter section
+Ġdign ity
+Ġcommission er
+With out
+re ach
+Ġcart oon
+Ġsc ales
+ãĥ Ń
+F IG
+Ġsurve ys
+ĠIndones ia
+Ġart work
+Ġun ch
+Ġcy cling
+un ct
+au er
+or ate
+ĠOb viously
+Ġcharacter ized
+fe ld
+Ġaff irm
+Ġinn ings
+Ġ é
+Ġal iens
+Ġcl oth
+et ooth
+ĠC ertain
+Â §
+Ġdig est
+k now
+ĠX L
+Ġpredict ions
+Ġd in
+W AR
+Ġafter math
+Ex ample
+ĠSu ccess
+ĠTh r
+IG N
+Ġmin er
+B us
+Ġcl arity
+heim er
+ĠO UT
+ĠS end
+ĠCirc le
+ĠD iet
+Ġpron ounced
+Ġcreat ors
+Ġearthqu ake
+atter y
+ge ons
+Ġo d
+Ġlay ing
+or p
+U lt
+pro ject
+Ġunder min
+Ġsequ el
+S am
+ĠDark ness
+Ġre ception
+b ull
+Y S
+ĠV ir
+Ġsequ ences
+ĠCo in
+Ġout fit
+ĠW ait
+1 19
+Ġdel ivers
+.... ..
+Ġbl own
+ĠE sc
+ĠM ath
+per m
+ĠU l
+Ġgl im
+Ġfac ial
+Ġgreen house
+Ġto kens
+/ -
+ĠAnn ual
+ĠON E
+Ġteen age
+ĠPhys ical
+ĠL ang
+ĠC elt
+Ġsu ed
+ivid ually
+Ġpat ience
+ch air
+reg ular
+Ġa ug
+in v
+ex cept
+ĠL il
+Ġn est
+f d
+s um
+ĠCh ase
+Russ ia
+ĠJenn ifer
+Ġoff season
+Over all
+F ore
+Ġr iot
+A ud
+form er
+Ġdefend ers
+ĠC T
+iot ic
+rib ly
+Ġautom ated
+Ġpen is
+Ġins ist
+Ġdi agram
+ĠS QL
+ĠG arc
+Ġw itch
+cl ient
+ier ra
+am bers
+Ġrec ount
+f ar
+V ery
+oster one
+Ġappreci ated
+ĠPer fect
+S ection
+Ġd oses
+oca ust
+Ġcost ly
+Ġg rams
+ĠSh i
+Ġwrest ling
+Ġ19 71
+Ġtro phy
+Ġn erve
+ĠK az
+ĠExper ience
+Ġpled ged
+Ġplay back
+Ġcreat ivity
+by e
+Ġattack ers
+Ġhold ers
+ĠCo ach
+ĠPh D
+Ġtransf ers
+Ġcol ored
+ĠH indu
+Ġd rown
+Ġlist ened
+ĠW A
+ias m
+P O
+Ġappeal ing
+Ġdiscl osed
+ĠCh icken
+ag ging
+Ġple aded
+Ġnav igation
+ĠReturn s
+Ġ[ [
+R OR
+E A
+Ġphotograp her
+ĠR ider
+ipp ers
+Ġsl ice
+Ġe rect
+Ġhe d
+iss ance
+ĠVik ings
+ur ious
+Ġapp et
+oubted ly
+Ch ild
+Ġauthent ic
+o os
+ĠM aking
+Ġannoun cing
+Ġb od
+Ġmet er
+ĠN ine
+ĠR ogue
+Ġwork force
+Ġrenew ed
+Ġorganis ations
+ac s
+P LE
+Sh ort
+Ġcomp ounds
+ĠVis it
+Ġen velop
+ear th
+Ġsupport ive
+gg le
+ĠBrus sels
+ĠGu ild
+Cre ate
+RE L
+Ġaver aged
+Ġ19 69
+ri ages
+Ġlength y
+Ġforg ot
+O kay
+ĠE rd
+Ġdeal er
+Ġrec ession
+D D
+Ġdesper ately
+Ġhun ger
+Ġst icks
+Ġm ph
+ĠF aith
+Ġintention ally
+Ġdem ol
+ue ller
+ĠS ale
+Ġde bris
+s pring
+Ġle ap
+>> >>
+Ġcontain ers
+se lling
+rane an
+atter ing
+Ġcomment ed
+ĠC M
+on ut
+Ġwood s
+es pecially
+Ġorgan ize
+iv ic
+ĠWood s
+ang a
+s qu
+Ġm aj
+am on
+Ġax is
+Ġ19 74
+ĠDen mark
+Ġwar rior
+ĠP and
+Ġout lined
+ĠB O
+ins ula
+z illa
+eb ook
+Ġd are
+Ġsear ched
+Ġnav igate
+S n
+writ ing
+Ġun ited
+J apan
+ĠHe brew
+Ġfl ame
+Ġrel ies
+Ġcatch ing
+ĠSh o
+Ġimprison ment
+Ġp ockets
+Ġclos ure
+ĠF am
+t im
+ade qu
+Act ivity
+Ġrecru iting
+ĠW ATCH
+ĠArgent ina
+d est
+Ġapolog ize
+or o
+Ġlack s
+Ġtun ed
+ĠGriff in
+Ġinf amous
+Ġcelebr ity
+ss on
+Ġ ----------------------------------------------------------------
+ĠIs is
+ĠDis play
+Ġcred ibility
+Ġeconom ies
+Ġhead line
+ĠCow boys
+Ġind ef
+Ġl ately
+Ġincent ives
+but ton
+ĠM ob
+A ut
+Ġres igned
+ĠO m
+c amp
+Ġprof iles
+Ġsche mes
+olph ins
+ay ed
+Cl inton
+en h
+ĠY ahoo
+Ġab st
+Ġan k
+su its
+Ġw ished
+ĠMar co
+udd en
+Ġsp here
+ĠB ishop
+Ġincorpor ated
+ĠPl ant
+11 4
+Ġh ated
+p ic
+Ġdon ate
+Ġl ined
+Ġbe ans
+Ġsteal ing
+Ġcost ume
+Ġsher iff
+Ġfor ty
+Ġint act
+Ġadapt ed
+Ġtrave lling
+b art
+Ġnice ly
+Ġdri ed
+Ġsc al
+os ity
+NOT E
+ĠB h
+ĠBron cos
+ĠI gn
+Ġint imate
+Ġchem istry
+Ġopt imal
+D eb
+ĠGener ation
+Ġ] ,
+ich i
+ĠW ii
+ĠYOU R
+vent ions
+W rite
+Ġpop ul
+un ning
+ĠW or
+V ol
+Ġqu een
+head s
+K K
+Ġanaly ze
+op ic
+ear chers
+Ġd ot
+leg raph
+ast ically
+Ġupgr ades
+Ġca res
+Ġext ending
+Ġfree ze
+Ġin ability
+Ġorg ans
+Ġpret end
+Ġout let
+11 3
+ol an
+ĠM all
+ul ing
+t alk
+Ġexpress ing
+ĠAl ways
+ĠBe gin
+f iles
+Ġlic enses
+% %
+ĠM itt
+Ġfil ters
+ĠMil waukee
+G N
+Ġunf old
+M o
+Ġnut rition
+pp o
+B o
+Ġfound ing
+Ġunder mine
+Ġeas iest
+ĠC zech
+ĠM ack
+Ġsexual ity
+ĠN ixon
+W in
+ĠAr n
+ĠK in
+ãĤ £
+ic er
+Ġfort un
+Ġsurf aces
+agh d
+Ġcar riers
+ĠP ART
+ĠT ib
+Ġinter val
+Ġfrust rating
+ĠSh ip
+ĠAr med
+ff e
+Ġbo ats
+ĠAb raham
+in is
+Ġsu ited
+th read
+i ov
+ab ul
+ĠVenezuel a
+Ġto m
+su per
+Ġcast le
+alth ough
+iox ide
+ec hes
+Ġevolution ary
+Ġnegoti ate
+Ġconfront ed
+Rem ember
+Ġ17 0
+S uch
+Ġ9 11
+m ult
+ĠA byss
+ur ry
+ke es
+spe c
+ĠBarb ara
+Ġbelong ing
+Ġvill ain
+ist ani
+Ġaccount able
+Ġport ions
+ĠDe cl
+U r
+ĠK ate
+g re
+Ġmag azines
+UC K
+Ġregul ate
+om on
+ĠAl most
+Ġover view
+Ġsc ram
+Ġl oot
+ĠF itz
+Ġcharacter istic
+ĠSn ake
+s ay
+ĠR ico
+Ġtra it
+ĠJo ined
+au cus
+Ġadapt ation
+ĠAirl ines
+Ġarch ae
+ĠI de
+Ġb ikes
+Ġliter ary
+Ġinflu ences
+ĠUs ed
+C reat
+Ġple a
+ĠDef ence
+ĠAss ass
+Ġp ond
+UL T
+) "
+Ġeval uated
+Ġob taining
+Ġdem ographic
+Ġvig il
+ale y
+Ġsp ouse
+ĠSeah awks
+resp ons
+ĠB elt
+um atic
+Ġr ises
+run ner
+ĠMichel le
+Ġpot ent
+r ace
+ĠP AC
+F ind
+olester ol
+IS S
+ĠIntrodu ced
+ress es
+ign ment
+O s
+ĠT u
+ĠDe x
+ic ides
+Ġspark ed
+ĠLaur a
+ĠBry ant
+Ġsm iling
+ĠNex us
+Ġdefend ants
+ĠCat al
+Ġdis hes
+sh aped
+Ġpro long
+m t
+( $
+ãĢ Ĥ
+Ġcalcul ations
+ĠS ame
+Ġp iv
+H H
+Ġcance lled
+Ġgr in
+Ġterrit ories
+ist ically
+C ome
+ĠP arent
+Pro ject
+Ġneg lig
+ĠPriv acy
+Ġam mo
+LE CT
+olute ly
+ĠEp ic
+Ġmis under
+w al
+Apr il
+m os
+path y
+ĠC arson
+Ġalbum s
+ĠE asy
+Ġpist ol
+< <
+Ġ\ (
+t arget
+hel p
+Ġinter pre
+cons cious
+ĠH ousing
+ĠJ oint
+12 7
+Ġbe ers
+s cience
+ĠFire fox
+effect ive
+ĠC abin
+ĠO kay
+ĠApp lic
+Ġspace craft
+ĠS R
+ve t
+ĠStr ange
+S B
+Ġcor ps
+iber al
+e fficient
+Ġpreval ence
+Ġeconom ists
+11 8
+Th read
+ord able
+OD E
+ĠC ant
+=- =-
+if iable
+ĠA round
+Ġpo le
+Ġwilling ness
+CL A
+ĠK id
+Ġcomple ment
+Ġsc attered
+Ġin mates
+Ġble eding
+e very
+Ġque ue
+ĠTr ain
+Ġh ij
+Ġme lee
+ple ted
+Ġdig it
+Ġg em
+offic ial
+Ġlif ting
+Ð µ
+Re qu
+it utes
+Ġpack aging
+ĠWork ers
+h ran
+ĠLeban on
+ol esc
+Ġpun ished
+ĠJ uan
+Ġj am
+ĠD ocument
+Ġm apping
+ic ates
+Ġinev itably
+Ġvan illa
+ĠT on
+Ġwat ches
+Ġle agues
+Ġiniti ated
+deg ree
+port ion
+Ġrec alls
+Ġru in
+Ġm elt
+I AN
+Ġhe m
+Ex p
+Ġb aking
+ĠCol omb
+at ible
+Ġrad ius
+pl ug
+ĠI F
+et ically
+Ġf ict
+H ER
+ĠT ap
+atin um
+Ġin k
+Ġco h
+ĠW izard
+b oth
+te x
+Ġsp ends
+ĠCurrent ly
+ĠP it
+Ġneur ons
+ig nt
+Ġr all
+Ġbus es
+b uilding
+Ġadjust ments
+Ġc ried
+ibl ical
+att ed
+ĠZ ion
+ĠM atter
+Ġmed itation
+ĠD ennis
+Ġour s
+ĠT ab
+Ġrank ings
+ort al
+Ġad vers
+Ġsur render
+ĠG ob
+ci um
+om as
+im eter
+Ġmulti player
+Ġhero in
+Ġoptim istic
+Ġindic ator
+ĠBr ig
+Ġgro cery
+Ġapplic ant
+ĠRock et
+v id
+Ex ception
+p ent
+Ġorgan izing
+Ġenc ounters
+ĠT OD
+Ġjew el
+S ave
+ĠChrist ie
+Ġhe ating
+Ġl azy
+ĠC P
+Ġcous in
+Con fig
+Ġreg ener
+Ġne arest
+Ġachie ving
+EN S
+th row
+ĠRich mond
+ant le
+200 2
+Ġan ten
+b ird
+13 3
+Ġn arc
+r aint
+un ny
+ĠHispan ic
+ourn aments
+Ġprop he
+ĠTh ailand
+ĠT i
+Ġinject ion
+Ġinher it
+rav is
+Ġmed i
+Ġwho ever
+ĠDE BUG
+G P
+ĠH ud
+C ard
+p rom
+Ġp or
+Ġover head
+L aw
+Ġviol ate
+Ġhe ated
+Ġdescript ions
+Ġachieve ments
+ĠBe er
+ĠQu ant
+W as
+Ġe ighth
+ĠI v
+Ġspecial ized
+U PDATE
+ĠD elta
+P op
+J ul
+ĠAs k
+oph y
+Ġnews letters
+ĠT ool
+Ġg ard
+ĠConf eder
+ĠGM T
+ĠAb bott
+Ġimm unity
+ĠV M
+Is lam
+Ġimpl icit
+w d
+Ġ19 44
+rav ity
+omet ric
+Ġsurv iving
+ur ai
+ĠPr ison
+Ġr ust
+ĠSk etch
+Ġbe es
+ĠThe ory
+Ġmer it
+T ex
+ch at
+Ġm im
+Ġpast e
+ĠK och
+Ġignor ance
+ĠSh oot
+Ġbas ement
+Un ited
+ĠAd vis
+he ight
+Ġf oster
+Ġdet ain
+in formation
+Ġne ural
+' ;
+Ġprov es
+all ery
+Ġinv itation
+um bers
+Ġc attle
+Ġbicy cle
+z i
+Ġconsult ant
+Ġap ology
+ĠT iger
+Ġ12 3
+99 9
+Ġind ividually
+r t
+ig ion
+ĠBrazil ian
+Ġdist urb
+Ġentreprene urs
+Ġfore sts
+cer pt
+pl ates
+p her
+clip se
+Ġtw itter
+Ġac ids
+ograph ical
+h um
+ĠB ald
+if ully
+Ġcomp iler
+ĠD A
+Ġdon or
+as i
+Ġtrib al
+l ash
+ĠCon fig
+Ġapplic ants
+Ġsal aries
+13 5
+Put in
+ĠF ocus
+ir s
+Ġmisc onduct
+ĠH az
+Ġeat en
+M obile
+Mus lim
+ĠMar cus
+v iol
+Ġfavor able
+Ġst ub
+ad in
+ĠH ob
+Ġfaith ful
+Ġelectron ics
+Ġvac uum
+w ait
+back ed
+econom ic
+d ist
+Ġten ure
+Ġsince re
+ĠT ogether
+ĠW ave
+Ġprog ression
+Ġden ying
+Ġdist ress
+br aska
+th ird
+Ġmix ing
+Ġcolon ial
+Ġpriv ately
+Ġun rest
+atern ity
+Ġprem ises
+ant i
+greg ation
+Ġlic ence
+ĠH ind
+ĠSam uel
+Ġconvinc ing
+ĠA ce
+ĠR ust
+ĠNet anyahu
+Ġhand les
+ĠP atch
+orient ed
+ah o
+ĠG onz
+Ġhack ers
+claim er
+Ġcustom s
+ĠGr an
+f ighters
+Ġl uc
+Ġman uscript
+aren thood
+Ġdev il
+Ġwar riors
+Ġoff enders
+Will iam
+Ġhol idays
+Ġnight mare
+Ġle ver
+iff erent
+St at
+Ġexhib ition
+put ed
+ĠP ure
+Ġal pha
+Ġenthus iasm
+ĠRepresent atives
+E AR
+ĠT yp
+Ġwhe at
+ĠAl f
+Ġcor rection
+Ġev angel
+AT T
+M iss
+Ġs oup
+Ġimpl ied
+par am
+Ġsex y
+ĠL ux
+Ġrep ublic
+p atch
+ab lish
+Ġic ons
+Ġfather s
+ĠG ET
+ĠCar ib
+Ġregul ated
+ĠCo hen
+ĠBob by
+Ġn er
+Ġb ent
+vent ory
+ĠAl ong
+ĠE ST
+ĠWall ace
+Ġmurd ers
+r ise
+ke ll
+ĠCommon wealth
+Ġn asty
+et a
+ĠM IT
+Ġadminist ered
+Ġgenuine ly
+Ed itor
+n ick
+Ġhyd ro
+**************** ****************
+ĠB le
+Ġfin es
+Ġg orge
+aus ible
+r h
+Ġapp le
+ment ioned
+Ġro pe
+ot yp
+H R
+Ġdisappoint ing
+Ġc age
+n ik
+Ġdoub ts
+ĠF REE
+print s
+ĠM UST
+Ġvend ors
+ĠIn qu
+Ġliber als
+Ġcontract or
+Ġup side
+child ren
+Ġtrick y
+Ġregul ators
+charg ed
+l iter
+Ġ ***
+Ġreb ell
+l ang
+Ġloc als
+Ġphys icians
+Ġhe y
+ar se
+t m
+ĠLe x
+Ġbehavior al
+success ful
+F X
+Ġbr ick
+ov ic
+Ġcon form
+Ġreview ing
+Ġins ights
+Ġbi ology
+ĠRem ove
+ĠExt ra
+Ġcomm itting
+indu ced
+ignt y
+ig m
+Ġat omic
+Comm on
+ĠE M
+ĠP ere
+ĠIt ems
+e h
+Ġpres erved
+ĠH ood
+Ġprison er
+Ġbankrupt cy
+Ġg ren
+us hes
+Ġexplo itation
+Ġsign atures
+Ġfin an
+] ,"
+ĠM R
+Ġme g
+rem lin
+Ġmusic ians
+Ġselect ing
+Ġexam ining
+IN K
+l ated
+H i
+Ġart ic
+Ġp ets
+Ġimp air
+ĠM AN
+Ġtable ts
+in clude
+R ange
+Ġca ut
+Ġlog s
+Ġmount ing
+Ġun aware
+Ġdynam ics
+ĠPalest ine
+ĠQu arter
+ĠPur ple
+Ġm a
+ĠIm port
+Ġcollect ions
+ci ation
+Ġsuccess or
+Ġcl one
+Ġaim ing
+Ġposs essed
+Ġstick ing
+Ġsh aking
+Ġloc ate
+ĠH ockey
+T urn
+17 0
+Ġfif teen
+ĠHar rison
+Ġcontinu ously
+ĠT C
+ĠVal ent
+ĠRes cue
+Ġby pass
+am ount
+Ġm ast
+Ġprotect s
+Ġart istic
+Ġsomet ime
+Ġsh oe
+Ġshout ed
+ific ant
+et itive
+ĠReg ister
+ĠJ in
+Ġconcent rated
+ling ton
+on ies
+Ġgener ator
+yr im
+ĠAr men
+Ġclear ing
+id o
+ĠT W
+al ph
+Ġlad ies
+H ard
+Ġdial og
+Ġinput s
+æ ľ
+Ġpos es
+Ġsl ots
+ĠPrem ium
+Ġle aks
+Ġboss es
+Ġ11 3
+c ourse
+A cc
+ĠNew ton
+ĠAust ria
+ĠM age
+Ġte aches
+ab ad
+Ġwe ars
+Ġc yl
+Ġcur se
+ĠS ales
+ĠW ings
+Ġp sy
+Ġg aps
+ĠIce land
+ĠP interest
+Ġland lord
+Ġdefin itions
+ĠK er
+Ġsufficient ly
+ĠP ence
+ĠArch itect
+Ġsur pass
+Ġ11 4
+Ġsuper hero
+ĠDise ase
+Ġpri ests
+ĠC ulture
+Ġdefin itive
+Ġsecret ly
+ĠD ance
+inst all
+ch ief
+ĠJess ica
+W ould
+Up dated
+Ġlock er
+ĠK ay
+Ġmem orial
+è ¦
+f at
+Ġdis gu
+Ġflav ors
+ĠBase ball
+ĠRes istance
+Ġk icks
+Ġen v
+Ġteen agers
+D ark
+ĠC AR
+Ġh alt
+ĠL G
+ĠGab riel
+Ġfe ver
+Ġs atur
+Ġm all
+Ġaffili ate
+ĠS leep
+ĠSpe cific
+ĠV el
+Ġj ar
+ĠSac red
+ĠEd wards
+ĠA CL
+Ġret ained
+ĠG iant
+Ġlim itation
+in ces
+Ġref usal
+ĠT ale
+ĠBut ler
+Ġacc idents
+ĠC SS
+Ġimport ed
+ĠCop y
+Î ±
+ER T
+z el
+Ġdiv isions
+h ots
+ĠAl b
+ĠD S
+Load er
+W ashington
+at isf
+ĠCreat ive
+\ .
+ĠAut om
+red ict
+Ġrecept or
+ĠCarl os
+Met hod
+ok a
+Ġmal icious
+Ġste pping
+, [
+ĠD ad
+Ġatt raction
+ĠEffect s
+ĠPir ate
+ĠC er
+ĠIndust ry
+ĠR ud
+Ġchar ter
+Ġd ining
+Ġins ists
+Ġconfig ure
+Ġ( #
+ĠSim ple
+ĠSc roll
+UT C
+17 5
+ĠK on
+Ġmarket place
+Ġ ãĤ
+Ġref res
+Ġg ates
+er red
+ĠP od
+Ġbeh ave
+Fr ank
+n ode
+Ġendors ed
+he tt
+as ive
+ĠHom eland
+Ġr ides
+ĠLe ave
+er ness
+Ġflood ing
+A FP
+Ġris en
+Ġcontin ually
+Ġun anim
+ĠCont ract
+ĠP as
+Ġgu ided
+ĠCh ile
+b d
+Ġsu cc
+pt ic
+Ġcomm ittees
+ĠL uther
+ĠAny one
+Ġs ab
+12 4
+Ġp ixel
+ĠB ak
+ĠT ag
+ĠBenn ett
+En ter
+sm all
+ĠPresident ial
+Ġp ul
+Ġcontr ace
+arch ive
+Ġcoast al
+ĠK ids
+19 2
+âĢ ²
+ick y
+ING TON
+Ġw olf
+ĠSt alin
+T ur
+id get
+am as
+ĠUn less
+Ġspons or
+Ġmor ph
+ĠCho ose
+Ġrun ner
+Ġun bel
+Ġm ud
+ĠMan a
+Ġdub bed
+Ġg odd
+ure rs
+wind ow
+Ġrel ied
+Ġcelebr ating
+os c
+Ġ13 5
+Ġlobb ying
+Ġincom plete
+Ġrestrict ion
+Ġinc ap
+it us
+Ġexpect ation
+ĠAp ollo
+Ġint ens
+Ġsyn c
+G H
+Ġmanip ulation
+B Y
+Ġspe ar
+Ġbre asts
+Ġvol can
+il ia
+M aterial
+Ġform ats
+ĠB ast
+Ġparliament ary
+Ġsn ake
+Ġserv ants
+ĠTr udeau
+ĠGr im
+ĠArab ic
+ĠSC P
+ĠBoy s
+st ation
+Ġprospect ive
+ord e
+in itialized
+Ġb ored
+AB LE
+Ġaccess ed
+Ġtax i
+ĠShe ll
+aid en
+urs ed
+in ates
+ĠIns urance
+ĠPet e
+Sept ember
+6 50
+Ġad ventures
+ĠCo ver
+Ġt ribute
+Ġsk etch
+Ġem power
+Ġ Ø
+ĠGl enn
+ĠD aw
+= \"
+ĠPolit ics
+Ġgu ides
+Ġd ioxide
+ĠG ore
+ĠBr ight
+ĠS ierra
+Ġval ued
+c ond
+Ġpo inter
+Se lect
+Ġrisk y
+Ġabsor b
+im ages
+Ġref uses
+Ġbon uses
+__ _
+Ġh ilar
+ĠF eatures
+2 20
+ĠCollect or
+F oot
+Ġ19 64
+cul us
+Ġd awn
+Ġwork out
+ĠL O
+Ġphilosoph ical
+ĠSand y
+ĠYou th
+Ġl iable
+A f
+bl ue
+Ġovert urn
+less ness
+ĠTrib une
+ĠIn g
+Ġfact ories
+Ġcat ches
+Ġpr one
+Ġmat rix
+Ġlog in
+Ġin acc
+Ġex ert
+s ys
+Ġneed le
+ĠQ ur
+Ġnot ified
+ould er
+t x
+Ġremind s
+Ġpublisher s
+Ġn ort
+Ġg it
+Ġfl ies
+ĠEm ily
+Ġflow ing
+ĠAl ien
+ĠStr ateg
+Ġhard est
+Ġmod ification
+AP I
+ĠM Y
+Ġcr ashes
+st airs
+n umber
+Ġur ging
+ch annel
+ĠFal con
+Ġinhabit ants
+Ġterr ifying
+Ġutil ize
+Ġban ner
+Ġcig arettes
+Ġsens es
+ĠHol mes
+Ġpract ition
+ĠPhill ips
+ott o
+Ġcomp ile
+Mod el
+ĠK o
+Ġ[ ]
+Americ ans
+ĠTer ms
+Ġmed ications
+ĠAn a
+Ġfundament ally
+ĠNot ice
+Ġwe aker
+Ġ 0000
+Ġgar lic
+Ġout break
+Ġeconom ist
+ĠB irth
+Ġobst acles
+ar cer
+ĠOr thodox
+Ġplace bo
+ĠC rew
+asp berry
+ĠAng els
+Ġdis charge
+Ġdestruct ive
+11 7
+ĠR ising
+Ġd airy
+l ate
+Ġcoll ision
+ĠTig ers
+ean or
+ocument ed
+ĠIn valid
+Ġd ont
+ĠL iter
+ĠV a
+Ġhyd rogen
+Ġvari ants
+ĠBrown s
+Ġ19 65
+Ġind igenous
+Ġtrad es
+Ġremain der
+Ġswe pt
+ĠImp act
+Ġred ist
+Ġun int
+grad uate
+ãĥ ķ
+ĠW ILL
+ãģ® ç
+ĠCrit ical
+Ġf isher
+Ġv icious
+Ġrevers ed
+Y ear
+ĠS ox
+Ġshoot ings
+Ġfil ming
+Ġtouchdown s
+ai res
+m el
+Ġgrand father
+Ġaffect ion
+ing le
+Ġover ly
+Add itional
+Ġsup reme
+ĠGr ad
+Ġsport ing
+Ġmer cy
+ĠBrook s
+ount y
+Ġperform s
+Ġtight ly
+Ġdem ons
+Ġkill ings
+Ġfact ion
+ĠNov a
+aut s
+Ġund oubtedly
+ar in
+Ġunder way
+ra k
+Ġl iv
+ĠReg ion
+Ġbrief ing
+s ers
+cl oud
+ĠM ik
+us p
+Ġpred iction
+az or
+Ġport able
+ĠG and
+Ġpresent ing
+Ġ10 80
+Â »
+ush i
+ĠSp ark
+there um
+Ġjust ification
+ĠN y
+Ġcontract ors
+ming ham
+ĠSt yle
+å ħ
+ĠChron icles
+ĠPict ure
+Ġprov ing
+Ġw ives
+set t
+Ġmole cules
+ĠFair y
+Ġconsist ing
+Ġp ier
+al one
+in ition
+Ġn ucle
+j son
+Ġg otta
+Ġmob il
+Ġver bal
+ar ium
+Ġmon ument
+uck ed
+Ġ25 6
+T ech
+mine craft
+ĠTr ack
+Ġt ile
+Ġcompat ibility
+as is
+Ġs add
+Ġinstruct ed
+ĠM ueller
+Ġle thal
+Ġhorm one
+Ġor che
+el se
+Ġske let
+Ġentert aining
+Ġminim ize
+ag ain
+Ġunder go
+Ġconst raints
+Ġcig arette
+ĠIslam ist
+Ġtravel s
+ĠPant hers
+l ings
+C are
+Ġlaw suits
+ur as
+Ġcry st
+Ġlow ered
+Ġaer ial
+Ġcomb inations
+Ġha un
+Ġch a
+Ġv ine
+Ġquant ities
+Ġlink ing
+b ank
+Ġso y
+B ill
+ĠAngel a
+Ġrecip ient
+ĠProt est
+Ġs ocket
+Ġsolid arity
+Ġâ Ĩ
+m ill
+Ġvar ies
+ĠPak istani
+Dr agon
+Ġun e
+Ġhor izon
+ÂłÂłÂłÂł ÂłÂłÂłÂł
+Ġprov inces
+Ġfrank ly
+Ġenact ed
+not es
+[ '
+Ġ19 2
+ocr acy
+Ġendorse ment
+Ġover time
+Tr ue
+L ab
+lic ted
+ĠD NC
+Ġbe ats
+ĠJam ie
+15 2
+ĠIN T
+Cont act
+Ġaccount ed
+h ash
+ĠPack ers
+p ires
+Ġles bian
+Ġamend ments
+Ġhop eful
+ĠFin land
+Ġspot light
+Ġconfig ured
+Ġtrou bled
+Ġg aze
+ĠCal gary
+Ġrel iability
+Ġins urg
+sw er
+b uy
+ĠSk in
+Ġp ixels
+Ġhand gun
+Ġpar as
+Ġcateg or
+ĠE L
+ĠRe x
+Ind eed
+Ġkind a
+Ġconj unction
+ĠBry an
+ĠMan ufact
+y ang
+Pl us
+S QL
+ish ment
+Ġdom inate
+Ġn ail
+Ġo ath
+Ġeru pt
+ĠF ine
+it bart
+ĠCh ip
+ĠAb d
+ĠN am
+Ġbuy er
+Ġdiss ent
+Le aks
+Cont in
+Ġr ider
+ĠSome one
+Ġill usion
+c in
+ĠBoe ing
+Ġin adequ
+ov ation
+i ants
+Ġreb uild
+4 50
+ĠDest iny
+S W
+ĠT ill
+H it
+ia z
+ĠBang l
+acher s
+ĠRe form
+Ġse gments
+Ġsystem atic
+d c
+ĠConserv atives
+Ġport al
+h or
+ĠDragon bound
+Ġdrag ged
+om o
+Ġthe e
+ad vert
+ĠRep orts
+ĠE t
+Ġbarrel s
+Aug ust
+Ġcompar isons
+Ġhe x
+Ġan throp
+" [
+bor ough
+ab i
+Ġpict ured
+play ing
+ĠAdd ress
+ĠMir ror
+Sm ith
+Ġt ires
+ĠN PR
+AA AA
+Ġclass ification
+ĠTh an
+ĠH arm
+ĠR A
+Ġreject ion
+min ation
+Ġr anged
+ĠF alls
+D I
+H ost
+ãĤ ´
+ĠEx ample
+list ed
+th irds
+Ġsaf egu
+br and
+Ġprob able
+Can ada
+IT ION
+ĠQ aeda
+Ġch ick
+Ġimport s
+h it
+l oc
+W W
+Ġble w
+Ġany time
+Ġwh oles
+ik ed
+Ġcal culation
+cre ate
+ĠO ri
+Ġupgr aded
+Ġapp ar
+ut ory
+ĠM ol
+B rit
+ĠJ ong
+IN AL
+ĠStart ing
+Ġd ice
+urt le
+Ġre lying
+cl osure
+Ġprof itable
+Ġsl aughter
+ĠMan ual
+c aster
+Ġ" $
+Ġfe ather
+ĠSim ply
+ie ves
+Ġdeter ior
+ĠPC I
+Ġst amp
+Ġfl aws
+Ġsh ade
+ham mer
+Ġpass port
+Ġcont ing
+am el
+Ġobser vers
+Ġneg lect
+ĠR B
+ĠBrother hood
+Ġskept ical
+f amily
+us k
+Ġemotion ally
+â Ļ
+ĠBet a
+ason able
+id ity
+ĠM ul
+Ġkick ing
+ĠC arm
+oll ah
+VERT IS
+ĠAt hen
+Ġlad der
+ĠBul let
+å £
+00 01
+ĠWild life
+ĠM ask
+ĠN an
+R ev
+Ġun acceptable
+leg al
+Ġcrowd ed
+ag i
+ĠC ox
+j e
+Ġmor ality
+Ġfu els
+Ġc ables
+Ġman kind
+ĠCarib bean
+Ġanch or
+Ġby te
+ĠO ften
+ĠO z
+Ġcraft ed
+Ġhistor ian
+ĠW u
+Ġtow ers
+ĠCitiz ens
+Ġhel m
+Ġcred entials
+Ġsing ular
+ĠJes se
+Ġtack les
+Ġcont empt
+Ġa fore
+ĠSh adows
+Ġn il
+Ġur gent
+app le
+bl ood
+Ġv on
+Ġoff line
+Ġbreat he
+Ġj umps
+Ġirre levant
+ox ic
+om al
+import ant
+J im
+Ġgl oves
+arm ing
+dep th
+Ġtal ents
+ook ie
+ĠS B
+Ġpal m
+uff s
+est a
+IG H
+Ġcan on
+ĠVer izon
+ĠP le
+Ġcou pled
+vel t
+Ġfundra ising
+ĠGet ting
+ĠD LC
+Ġmathemat ical
+ĠH S
+ĠCard inals
+te lling
+Ġspons ors
+Ġ Ï
+ĠBull s
+op tion
+Ġprop ose
+Ġmem orable
+Ġembr aced
+Ġdecl ining
+He alth
+ed a
+Ġ} ;
+Ġsp am
+m ile
+Ġpit cher
+ĠE ight
+Ġcar ing
+ut ic
+ro le
+Ġair line
+ernand ez
+ĠAth let
+Ġcert ification
+ux e
+rig er
+Ġem pir
+Ġsens ation
+Ġdis m
+Ġb olt
+Ġev olve
+H ouse
+Ġconsult ation
+ĠD uty
+Ġtou ches
+ĠN athan
+Ġf aint
+h ad
+" (
+ĠCons umer
+ĠExt reme
+Ġ12 7
+ĠHer m
+ĠSac rament
+iz oph
+Ġanx ious
+ul ously
+Ġsoc ially
+ĠU TC
+Ġsol ving
+ĠLet ter
+Hist ory
+ed uc
+Pr ice
+) );
+Ġrel oad
+am ic
+Ġp ork
+Ġdisc ourse
+Ġt ournaments
+ai ro
+ĠK ur
+ĠCost a
+Ġviol ating
+Ġinterf ere
+Ġrecre ational
+uff le
+Ġspe eches
+Ġneed ing
+Ġremem bers
+Ġcred ited
+n ia
+f ocused
+amer a
+Ġb ru
+um bs
+ĠCub an
+Ġpreced ing
+Ġnons ense
+ac ial
+Ġsmart phones
+ĠSt ories
+S ports
+ĠEmer gency
+oun cing
+ef ined
+Ġb er
+Ġconsult ing
+Ġm asters
+he astern
+." [
+ĠRun ning
+Ġsus cept
+ĠF eng
+Americ a
+pr ises
+st itial
+ĠWeek ly
+ĠGreat er
+mod ules
+if ter
+G raphics
+ul er
+Ġwho lly
+Ġsupp ress
+Ġconce aled
+Ġhapp ily
+Ġaccept s
+ĠEn joy
+Ġr ivers
+ĠEx cept
+2 25
+ĠN HS
+ĠMc Connell
+Ġp ussy
+fer red
+ut able
+Ġatt ain
+Ġ> =
+Ġdepos its
+roph ic
+Ġnot orious
+ĠSh aw
+il itation
+Ġepid emic
+all ic
+Ġsmall est
+ov ich
+Ġaccess ories
+per ties
+Ġsur plus
+ĠMe ch
+Ġamb ig
+ĠImm igration
+Ġch im
+ev al
+Ġpract icing
+ĠMyster y
+Ġdom ains
+ĠSil icon
+app s
+Ġkilomet ers
+e a
+ĠSm ash
+Ġwarrant y
+Ġn ost
+s il
+re v
+J on
+ĠDub lin
+Ġtast es
+Ġb out
+g reat
+er ror
+Ġsw itches
+ĠB apt
+D O
+ok i
+Ġsour ced
+pro du
+Ġattach ment
+ĠIss ue
+ĠQuest ion
+Jo in
+Ġf itted
+Ġunlaw ful
+^ ^
+ere k
+Ġauthent ication
+Ġst ole
+Ġaccount ability
+l abel
+S earch
+Ġal beit
+atic an
+fund ed
+ĠAdd ing
+ĠI Q
+Ġsub mar
+l it
+a que
+ĠLear ning
+Ġint eger
+M aster
+ĠCh rom
+Ġprem ier
+O p
+ĠLi u
+Ġbl essed
+ĠGl obe
+ĠResp onse
+Ġlegit im
+ĠMer kel
+Ġdispos al
+Â ´
+Ġgau ge
+pe at
+Ġindu ced
+Ġquestion able
+arth y
+ĠV it
+ĠF eed
+U ntil
+U t
+worth y
+R Y
+ĠH erald
+ĠHam mer
+Ġmed al
+ĠR ivers
+ĠH ack
+Ġclar ify
+Ġtrack ed
+Ġautonom ous
+Ġten ant
+ĠQ atar
+er ie
+Ġgr im
+ĠMon itor
+Ġresist ant
+ĠSpe c
+ĠWell s
+N AS
+14 8
+Ġmin ers
+iot ics
+Ġmiss es
+11 6
+g ian
+g it
+ĠE yes
+p res
+Ġgrad uated
+Ġang el
+Ġsyn chron
+Ġefficient ly
+Ġtrans mitted
+H arry
+Ġglob ally
+EN CE
+ĠMont ana
+r aged
+ĠPre vention
+Ġp iss
+ĠL l
+Ġshe lf
+ĠB JP
+ĠTest ament
+ĠL ate
+ik er
+ĠH app
+ĠJul ian
+h all
+Ġsp ont
+Ġshut down
+Ġincons istent
+Ġsubscrib ers
+Ġske leton
+ĠNe braska
+Ġins pire
+ĠV oid
+F eed
+Ġang les
+ĠSpr ings
+Ġbench mark
+Ġvacc ines
+izoph ren
+se xual
+uff ed
+Ġsh ine
+ĠK ath
+Ġgest ure
+ine a
+Ġr ip
+Ġopp ression
+Ġcons cience
+b t
+ĠL um
+Ġinc idence
+ĠF a
+w r
+Ġmin eral
+ĠSp urs
+alk y
+Ġth under
+Ġop io
+Be ing
+ĠPal m
+Ġwas ted
+Ġl b
+i aries
+ĠIniti ative
+Ġcur ric
+Ġmark er
+ĠMc L
+Ġext ensions
+ĠP v
+ĠAr ms
+Ġoffer ings
+Ġdef enses
+Ġvend or
+Ġcontrad ict
+ĠCol in
+Ġredd it
+Ġper ipher
+12 2
+Ġs ins
+E dit
+IC T
+So ft
+ĠSh ah
+Ġadministr ator
+ĠT rip
+Ġporn ography
+Ġtu ition
+in ence
+ĠPro gress
+Ġcat alog
+Ġsu ite
+Ġh ike
+Ġreprodu ctive
+eng ine
+Ġd rought
+ĠNo ah
+Ġ2 30
+Ġd ude
+Ġrelax ed
+Ġpart ition
+Ġparticip ant
+Ġtel esc
+Ġfe as
+ĠF F
+own er
+Ġswe eping
+Ġl enses
+Ġmatch up
+ĠRe pl
+ourn als
+Ġcred ible
+Ġgrand mother
+Ġther mal
+Ġsubscrib ing
+Ġident ities
+col m
+U CT
+Ġreluct ant
+us ers
+ĠC ort
+Ġassist ed
+OS S
+ATION S
+IS H
+Ġpharm aceutical
+ic able
+ad ian
+ĠSon ic
+ĠF ury
+ĠM ong
+A H
+ĠPsych ology
+Ġph osph
+Ġtreat s
+Ń Ķ
+Ġstead ily
+ĠHell o
+Ġrel ates
+Ġcl ue
+Ex pl
+a uth
+Ġrev ision
+Ġe ld
+os ion
+Ġbr on
+14 4
+ri kes
+Ġmin es
+Ġblank et
+ĠF ail
+el ed
+ĠIm agine
+ĠPl anned
+a ic
+Re quest
+M ad
+ĠHor se
+ĠEag le
+Ġcap ac
+15 7
+Ġl ing
+ĠN ice
+ĠP arenthood
+min ster
+og s
+ens itive
+Not hing
+Ġcar n
+F in
+ĠP E
+Ġr ifles
+ĠL P
+S and
+Ġgui Active
+Ġtour ist
+C NN
+Ġunve iled
+Ġpredec essor
+} {
+u ber
+Ġoff shore
+Ġopt ical
+ĠR ot
+ĠPear l
+et on
+Ġst ared
+Ġfart her
+at ility
+cont in
+ĠG y
+ĠF oster
+ĠC oc
+ri ents
+Ġdesign ing
+ĠEconom y
+ON G
+W omen
+ĠN ancy
+er ver
+Ġmas cul
+Ġcasual ties
+Ġ2 25
+ĠS ullivan
+ĠCh oice
+Ġa ster
+w s
+Ġhot els
+Ġconsider ations
+Ġcou ch
+ĠSt rip
+ĠG n
+Ġmanip ulate
+l ied
+Ġsynt hetic
+Ġassault ed
+Ġoff enses
+ĠDra ke
+Ġim pe
+Oct ober
+ĠHer itage
+h l
+ĠBl air
+Un like
+Ġg rief
+Ġ4 50
+Ġopt ed
+Ġresign ation
+il o
+Ġver se
+ĠT omb
+Ġu pt
+Ġa ired
+ĠH ook
+ĠML B
+Ġassum es
+out ed
+ĠV ers
+Ġinfer ior
+Ġbund le
+ĠD NS
+ograp her
+Ġmult ip
+ĠSoul s
+Ġillust rated
+Ġtact ic
+Ġdress ing
+Ġdu o
+Con f
+Ġrel ent
+Ġc ant
+Ġscar ce
+Ġcand y
+ĠC F
+Ġaffili ated
+Ġspr int
+yl an
+ĠGarc ia
+Ġj unk
+Pr int
+ex ec
+C rit
+Ġport rait
+ir ies
+ĠOF F
+Ġdisp utes
+W R
+L ove
+ãģ Ħ
+ĠRe yn
+Ġh ipp
+op ath
+Ġflo ors
+ĠFe el
+Ġwor ries
+Ġsett lements
+ĠP os
+Ġmos que
+Ġfin als
+Ġcr ushed
+ĠPro bably
+ĠB ot
+ĠM ans
+ĠPer iod
+Ġsovere ignty
+Ġsell er
+Ġap ost
+Ġam ateur
+Ġd orm
+Ġconsum ing
+Ġarm our
+ĠRo ose
+Ġint ensive
+Ġelim inating
+ĠSun ni
+ĠAle ppo
+j in
+Ġadv ise
+p al
+ĠH alo
+Ġdes cent
+Ġsimpl er
+Ġbo oth
+ST R
+L ater
+ĠC ave
+== =
+Ġm ol
+Ġf ist
+Ġshot gun
+su pp
+Ġrob bery
+E ffect
+Ġobsc ure
+ĠProf essional
+Ġemb assy
+Ġmilit ant
+Ġinc arcer
+Ġgener ates
+Ġlaun ches
+Ġadministr ators
+Ġsh aft
+Ġcirc ular
+Ġfresh man
+ĠW es
+ĠJo el
+ĠD rew
+ĠDun can
+ĠApp arently
+s ight
+ĠIntern al
+ĠInd ividual
+ĠF E
+Ġb ore
+ĠM t
+Ġbroad ly
+ĠO ptions
+ount ain
+ip es
+ĠV ideos
+20 4
+Ġh ills
+Ġsim ulation
+Ġdisappoint ment
+it an
+ĠLabor atory
+Ġup ward
+Ġbound ary
+Ġdark er
+h art
+Ġdomin ance
+C ong
+ĠOr acle
+ĠL ords
+Ġscholars hip
+ĠVin cent
+ed e
+ĠR ah
+Ġencour ages
+ro v
+Ġqu o
+Ġprem ise
+ĠCris is
+ĠHol ocaust
+Ġrhyth m
+Ġmet ric
+cl ub
+Ġtransport ed
+Ġn od
+ĠP ist
+Ġancest ors
+ĠFred er
+th umbnails
+ĠC E
+ON D
+Ph il
+ven ge
+ĠProduct s
+cast le
+Ġqual ifying
+ĠK aren
+VERTIS EMENT
+Ġmight y
+Ġexplan ations
+Ġfix ing
+D i
+Ġdecl aring
+Ġanonym ity
+Ġju ven
+ĠN ord
+ĠDo om
+ĠAct ually
+O k
+ph is
+ĠDes ert
+Ġ11 6
+I K
+ĠF M
+Ġinc omes
+V EL
+ok ers
+Ġpe cul
+Ġlight weight
+g ue
+Ġacc ent
+Ġincre ment
+ĠCh an
+Ġcompl aining
+ĠB aghd
+Ġmidfield er
+Ġover haul
+Pro cess
+ĠH ollow
+ĠTit ans
+Sm all
+man uel
+ĠUn ity
+ĠEv ents
+S ty
+Ġdispro portion
+n esty
+en es
+ĠC od
+Ġdemonstr ations
+ĠCrim son
+ĠO H
+Ġen rolled
+Ġc el
+ĠBre tt
+Ġa ide
+Ġhe els
+Ġbroad band
+Ġmark ing
+Ġw izard
+ĠN J
+ĠChief s
+Ġingred ient
+Ġd ug
+ĠSh ut
+urch ase
+end or
+Ġfar mer
+ĠGold man
+12 9
+15 5
+Or der
+Ġl ion
+i ably
+Ġst ain
+ar ray
+ilit ary
+ĠFA Q
+Ġexpl oded
+ĠMcC arthy
+ĠT weet
+ĠG reens
+ek ing
+l n
+ens en
+Ġmotor cycle
+Ġpartic le
+Ġch olesterol
+B ron
+Ġst air
+Ġox id
+Ġdes irable
+ib les
+Ġthe or
+for cing
+Ġpromot ional
+ov o
+b oot
+ĠBon us
+raw ling
+Ġshort age
+ĠP sy
+Ġrecru ited
+Ġinf ants
+Ġtest osterone
+Ġded uct
+Ġdistinct ive
+Ġfirm ware
+bu ilt
+14 5
+Ġexpl ored
+Ġfact ions
+Ġv ide
+Ġtatt oo
+Ġfinan cially
+Ġfat igue
+Ġproceed ing
+const itutional
+Ġmis er
+Ġch airs
+gg ing
+ipp le
+Ġd ent
+Ġdis reg
+ç Ķ
+st ant
+ll o
+b ps
+aken ing
+Ġab normal
+ĠE RA
+å£ «
+ĠH BO
+ĠM AR
+Ġcon cess
+Ġserv ant
+Ġas pir
+l av
+ĠPan el
+am o
+Ġprec ip
+Ġrecord ings
+Ġproceed ed
+Ġcol ony
+ĠT ang
+ab lo
+Ġstri pped
+Le ft
+to o
+Ġpot atoes
+Ġfin est
+% ).
+Ġc rap
+ĠZ ach
+ab ases
+ĠG oth
+Ġbillion aire
+w olf
+Ġsan ction
+S K
+Ġlog ged
+P o
+ey ed
+un al
+Ġcr icket
+Ġarm ies
+Ġunc overed
+Cl oud
+Ã³ n
+Ġreb ounds
+Ġm es
+O per
+P ac
+Ġnation ally
+Ġinsert ed
+p ict
+Ġgovern ance
+Ð ¸
+Ġprivile ges
+G ET
+Ġfavor ites
+im ity
+Ġlo ver
+the m
+em pl
+Ġgorge ous
+An n
+Ġsl ipped
+Ġve to
+B ob
+Ġsl im
+u cc
+ĠF ame
+udden ly
+Ġden ies
+ĠM aur
+Ġdist ances
+Ġw anna
+t ar
+ĠS ER
+Ġâ Ī
+Ġle mon
+at hetic
+Ġlit eral
+Ġdistingu ished
+Ġansw ering
+G I
+Ġrelig ions
+ĠPhil os
+ĠL ay
+Ġcomp os
+ire ments
+ĠK os
+ine z
+roll ing
+Ġyoung est
+and ise
+ĠB orn
+Ġalt ar
+am ina
+ĠB oot
+v oc
+Ġdig ging
+Ġpress ures
+Ġl en
+26 4
+Ġassass ination
+ĠBir mingham
+ĠMy th
+Ġsovere ign
+ĠArt ist
+ĠPhot ograph
+Ġdep icted
+Ġdisp ens
+orth y
+Ġamb ul
+int eg
+ĠC ele
+ĠTib et
+Ġhier archy
+Ġc u
+Ġpre season
+ĠPet erson
+Ġcol ours
+Ġworry ing
+Ġback ers
+ĠPal mer
+ĠÎ ¼
+Ġcontribut or
+Ġhear ings
+Ġur ine
+Ġ Ù
+ourge ois
+Sim ilar
+ĠZ immer
+s omething
+ĠUS C
+Ġstrength s
+ĠF I
+Ġlog ging
+As ked
+ĠTh ai
+in qu
+ĠW alt
+Ġcrew s
+it ism
+3 01
+Ġshar ply
+um ed
+Ġred irect
+r ators
+In f
+ĠWe apons
+Ġte asp
+19 99
+L ive
+ĠEs pecially
+ĠS ter
+ĠVeter ans
+Ġint ro
+other apy
+Ġmal ware
+Ġbre eding
+Ġmole cular
+ĠR oute
+ĠCom ment
+oc hem
+Ġa in
+Se ason
+Ġlineback er
+Ä «
+ĠEconom ics
+es ar
+ĠL ives
+ĠEm ma
+Ġk in
+ĠTer rit
+Ġpl anted
+ot on
+ĠBut ter
+ĠSp ons
+P ER
+Ġdun geon
+Ġsymb olic
+Ġfil med
+Ġdi ets
+Ġconclud es
+Ġcertain ty
+ĠForm at
+Ġstr angers
+form at
+ĠPh ase
+Ġcop ied
+Ġmet res
+ld a
+ĠUs ers
+Ġdeliber ate
+Ġwas hed
+ĠL ance
+im ation
+Ġimpro per
+ĠGen esis
+ick r
+ĠK ush
+Ġreal ise
+Ġembarrass ing
+alk ing
+b ucks
+Ġver ified
+Ġout line
+year s
+ĠIn come
+20 2
+Ġz ombies
+F inal
+ĠMill enn
+Ġmod ifications
+ĠV ision
+ĠM oses
+ver b
+iter ranean
+ĠJ et
+Ġnav al
+ĠA gg
+Ġur l
+Ġvict ories
+Ġnon etheless
+Ġinj ust
+ĠF act
+ç ļ
+Ġins ufficient
+re view
+face book
+Ġnegoti ating
+Ġguarant ees
+im en
+uten berg
+Ġg ambling
+Ġcon gr
+Load ing
+Ġnever theless
+Ġpres idents
+ĠIndust rial
+Ġ11 8
+Ġp oured
+ĠT ory
+Ġ17 5
+Ġ: =
+Sc ott
+ange red
+T ok
+Ġorgan izers
+M at
+ĠG rowth
+Ġad ul
+Ġens ures
+Ġ11 7
+é¾į å
+Ġmass acre
+Ġgr ades
+be fore
+AD VERTISEMENT
+ĠSl ow
+ĠM MA
+âĢĶ "
+ĠV atican
+Q aeda
+Ġo we
+66 66
+ĠS orry
+ĠGr ass
+Ġbackground s
+Ġexha usted
+Ġcl an
+Ġcomprom ised
+ĠE lf
+ĠIsa ac
+ens on
+In vest
+IF A
+Ġinterrupt ed
+ãĥī ãĥ©
+Ġtw isted
+ĠDrag ons
+M ode
+ĠK remlin
+Ġfert il
+he res
+ph an
+ĠN ode
+f ed
+ĠOr c
+Ġunw illing
+C ent
+Ġprior it
+Ġgrad uates
+Ġsubject ive
+Ġiss uing
+ĠL t
+Ġview er
+Ġw oke
+Th us
+bro ok
+Ġdep ressed
+Ġbr acket
+ĠG or
+ĠFight ing
+Ġstri ker
+Rep ort
+ĠPortug al
+Ġne o
+w ed
+19 9
+Ġflee ing
+sh adow
+ident ified
+US E
+Ste am
+Ġstret ched
+Ġrevel ations
+art ed
+ĠD w
+Ġalign ment
+est on
+ĠJ ared
+S ep
+Ġblog s
+up date
+g om
+r isk
+Ġcl ash
+ĠH our
+Ġrun time
+Ġunw anted
+Ġsc am
+Ġr ack
+Ġen light
+on est
+ĠF err
+Ġconv ictions
+Ġp iano
+Ġcirc ulation
+ĠW elcome
+Ġback lash
+ĠW ade
+Ġrece ivers
+ot ive
+J eff
+Ġnetwork ing
+ĠPre p
+ĠExpl orer
+Ġlect ure
+Ġupload ed
+ĠMe at
+B LE
+ĠNaz is
+ĠSy nd
+st ud
+ro ots
+ri ans
+Ġportray ed
+Ġ ??
+ĠBudd ha
+s un
+Rober t
+ĠCom plex
+Ġover see
+Ġste alth
+T itle
+ĠJ obs
+ĠK um
+Ġappreci ation
+ĠM OD
+Ġbas ics
+Ġcl ips
+Ġnurs ing
+Ġpropos ition
+Ġreal ised
+ĠNY C
+Ġall ocated
+ri um
+ar an
+ĠPro duction
+ĠV ote
+Ġsm ugg
+Ġhun ter
+az er
+ĠCh anges
+Ġfl uct
+y on
+Ar ray
+Ġk its
+W ater
+Ġuncom mon
+Ġrest ing
+ell s
+w ould
+Ġpurs ued
+Ġassert ion
+omet own
+ĠMos ul
+ĠPl atform
+io let
+Ġshare holders
+Ġtra ils
+P ay
+ĠEn forcement
+ty pes
+ĠAn onymous
+Ġsatisf ying
+il ogy
+Ġ( '
+w ave
+c ity
+Ste ve
+Ġconfront ation
+ĠE ld
+C apt
+ah an
+ht m
+ĠC trl
+ON S
+2 30
+if a
+hold ing
+Ġdelic ate
+Ġj aw
+ĠGo ing
+or um
+S al
+Ġd ull
+ĠB eth
+Ġpr isons
+Ġe go
+ĠEl sa
+avor ite
+ĠG ang
+ĠN uclear
+Ġsp ider
+ats u
+Ġsam pling
+Ġabsor bed
+ĠPh arm
+iet h
+Ġbuck et
+ĠRec omm
+O F
+ĠF actory
+AN CE
+Ġb acter
+H as
+ĠObs erv
+12 1
+Ġprem iere
+De velop
+Ġcur rencies
+C ast
+Ġaccompany ing
+ĠNash ville
+Ġfat ty
+ĠBre nd
+Ġloc ks
+Ġcent ered
+ĠU T
+augh s
+or ie
+ĠAff ordable
+v ance
+D L
+em et
+Ġthr one
+ĠBlu etooth
+Ġn aming
+if ts
+AD E
+Ġcorrect ed
+Ġprompt ly
+ĠST R
+Ġgen ome
+Ġcop e
+Ġval ley
+Ġround ed
+ĠK end
+al ion
+p ers
+Ġtour ism
+Ġst ark
+v l
+Ġblow ing
+ĠSche dule
+st d
+Ġunh appy
+Ġlit igation
+ced es
+Ġand roid
+Ġinteg ral
+ere rs
+ud ed
+t ax
+Ġre iter
+ĠMot ors
+oci ated
+Ġwond ers
+ĠAp ost
+uck ing
+ĠRoose velt
+f ram
+Ġyield s
+Ġconstit utes
+aw k
+Int erest
+Ġinter im
+Ġbreak through
+ĠC her
+Ġpro sec
+ĠD j
+ĠM T
+Res p
+ĠP T
+Ġs perm
+ed it
+B T
+Lin ux
+count ry
+le ague
+Ġd ick
+Ġo ct
+Ġinsert ing
+Ġsc ra
+ĠBrew ing
+Ġ19 66
+Ġrun ners
+Ġpl un
+id y
+ĠD ian
+Ġdys function
+Ġex clusion
+Ġdis gr
+Ġincorpor ate
+Ġrecon c
+Ġnom inated
+ĠAr cher
+d raw
+achel or
+Ġwrit ings
+Ġshall ow
+Ġh ast
+ĠB MW
+ĠR S
+Ġth igh
+Ġ19 63
+Ġl amb
+Ġfav ored
+ag le
+Ġcool er
+ĠH ours
+ĠG U
+ĠOrig in
+Ġglim pse
+---------------- ----
+L im
+Ġche ek
+Ġj ealous
+- '
+Ġhar ness
+ĠPo ison
+Ġdis abilities
+ne apolis
+Ġout look
+Ġnot ify
+ĠIndian apolis
+Ġab rupt
+ns ic
+Ġenc rypted
+Ġfor fe
+reat h
+Ġr abb
+Ġfound ations
+Ġcompl iment
+ĠInter view
+ĠS we
+Ġad olesc
+Ġmon itors
+ĠSacrament o
+Ġtime ly
+Ġcontem pl
+Ġposition ed
+Ġpost ers
+ph ies
+iov ascular
+v oid
+ĠFif th
+Ġinvestig ative
+OU N
+Ġinteg rate
+ĠIN C
+ish a
+ibl ings
+ĠRe quest
+ĠRodrig uez
+Ġsl ides
+ĠD X
+Ġfemin ism
+Ġdat as
+Ġb end
+ir us
+ĠNig eria
+F ox
+Ch ange
+Ġair plane
+ĠLad en
+Ġpublic ity
+ixt y
+Ġcommit ments
+Ġaggreg ate
+Ġdisplay ing
+ĠAr row
+Ġ12 2
+Ġrespect s
+and roid
+s ix
+ĠSh a
+Ġrest oration
+) \
+W S
+oy s
+Ġillust rate
+with out
+12 6
+ĠâĶ Ĥ
+Ġpick up
+n els
+Ġ ....
+f ood
+ĠF en
+) ?
+Ġphenomen a
+Ġcompan ions
+ĠW rite
+Ġsp ill
+Ġbr idges
+ĠUp dated
+ĠF o
+Ġinsect s
+ASH INGTON
+Ġsc are
+il tr
+ĠZh ang
+Ġsever ity
+Ġind ul
+14 9
+ĠCo ffee
+Ġnorm s
+Ġp ulse
+ĠF T
+Ġhorr ific
+ĠDest roy
+ĠJ SON
+Ġo live
+Ġdiscuss es
+R est
+E lect
+ĠW inn
+ĠSurv iv
+ĠH ait
+S ure
+op ed
+Ġro oted
+ĠS ke
+ĠBron ze
+Ġl ol
+Def ault
+Ġcommod ity
+red ited
+Ġliber tarian
+Ġforb idden
+Ġgr an
+à ¨
+Ġl ag
+en z
+dri ve
+Ġmathemat ics
+Ġw ires
+Ġcrit ically
+Ġcarb ohyd
+ĠChance llor
+ĠEd die
+Ġban ning
+ĠF ri
+Ġcompl ications
+et ric
+ĠBangl adesh
+Ġband width
+St op
+ĠOrig inally
+Ġhalf way
+yn asty
+sh ine
+Ġt ales
+rit ies
+av ier
+Ġspin ning
+ĠWH O
+Ġneighbour hood
+b ach
+Ġcommer ce
+ĠS le
+B U
+Ġentreprene ur
+Ġpecul iar
+ĠCom ments
+f re
+3 20
+IC S
+Ġimag ery
+ĠCan on
+ĠElect ronic
+sh ort
+( (
+D ig
+Ġcomm em
+u ced
+Ġincl ined
+ĠSum mon
+Ġcl iff
+ĠMed iterranean
+Ġpo etry
+Ġprosper ity
+ĠRe ce
+Ġp ills
+m ember
+Ġfin ale
+un c
+ĠG ig
+ä ½
+Ġl od
+Ġback ward
+- +
+ĠFor ward
+Ġth ri
+s ure
+Ġso ap
+ĠF X
+R ES
+ĠSe xual
+oul os
+Ġfool ish
+Ġright eous
+Ġco ff
+terror ism
+ust ain
+ot er
+Ġab uses
+ne xt
+Ġab usive
+Ġthere after
+Ġprohib ition
+ĠS UP
+Ġd ip
+Ġr ipped
+Ġinher ited
+Ġb ats
+st ru
+G T
+Ġflaw ed
+ph abet
+Ġf og
+do ors
+Ġim aging
+Ġdig its
+ĠHung ary
+Ġar rog
+Ġteach ings
+Ġprotocol s
+ĠB anks
+à ¸
+p ound
+ĠC urt
+." )
+. /
+Ġex emption
+end ix
+ĠM ull
+Ġimpro ves
+ĠG amer
+d imensional
+I con
+ĠMarg aret
+St atus
+d ates
+Ġint ends
+Ġdep ict
+Ġpark ed
+J oe
+ĠMar ines
+chn ology
+! ).
+Ġjud ged
+Ġwe ights
+R ay
+Ġapart ments
+he ster
+Ġrein force
+Ġoff ender
+occ up
+Ġs ore
+e pt
+ĠPH P
+ĠB row
+Ġauthor ization
+ĠR isk
+ĠDel aware
+ĠQ U
+Ġnot ifications
+Ġsun light
+Ġex clude
+d at
+Ġm esh
+ĠSud an
+Ġbelong ed
+Ġsub way
+Ġno on
+ĠInter ior
+ol ics
+ĠL akers
+Ġc oding
+Dis claimer
+Cal if
+O ld
+Ġdis l
+???? ?
+Ġconfir ms
+Ġrecruit ment
+Ġhom icide
+Cons ider
+ĠJeff rey
+ft y
+} ;
+Ġobject ion
+do ing
+ĠLe o
+W ant
+Ġgl ow
+ĠClar ke
+ĠNorm an
+Ġver ification
+Ġpack et
+ĠForm ula
+Ġpl ag
+es ville
+Ġshout ing
+Ġo v
+ĠR EC
+ĠB ub
+Ġn inth
+Ġener g
+Ġvalid ity
+Ġup s
+j ack
+Ġneighbor ing
+ĠN ec
+ew orks
+ĠH ab
+are z
+Ġsp ine
+Ġevent ual
+ĠLe aders
+ĠC arn
+Ġprob ation
+Ġrom ance
+ms g
+ĠMechan ical
+ER Y
+R ock
+Ġpart isan
+N ode
+ass ets
+min ent
+Ġforeign ers
+Ġtest ify
+ĠUs ually
+l ords
+ĠG ren
+ĠPow ell
+BI L
+Ġs r
+Ġadd ict
+Ġshell s
+Ġs igh
+ĠY ale
+tern ity
+Ġ7 50
+E U
+ĠR ifle
+Ġpat ron
+em a
+ĠB annon
+an ity
+Ġtrop ical
+ĠV II
+c ross
+Every thing
+ĠIS O
+Ġhum ble
+ass ing
+ĠF IG
+Ġupd ating
+ys on
+Ġcal cium
+Ġcompet ent
+Ġste ering
+Pro t
+ĠS Y
+ĠFin als
+ĠR ug
+15 9
+13 7
+ĠG olf
+Ġ12 6
+Ġaccommod ation
+ĠHug hes
+Ġaest hetic
+art isan
+ĠTw ilight
+Ġpr ince
+ĠAgric ulture
+ĠDis co
+Ġpreced ent
+Ġtyp ing
+author ized
+O ption
+ĠA ub
+l ishes
+ach t
+m ag
+P eter
+ĠU FO
+mont on
+ĠL ith
+Ġa rom
+Ġsec uring
+Ġconf ined
+priv ate
+Ġsw ords
+Ġmark ers
+Ġmetab olic
+se lect
+ĠCur se
+ĠO t
+g ressive
+Ġinc umb
+ĠS aga
+Ġpr iced
+Ġclear ance
+Cont ent
+Ġdr illing
+Ġnot ices
+Ġb ourgeois
+Ġv est
+Ġcook ie
+ĠGuard ians
+ry s
+in yl
+Ġ12 4
+Ġpl ausible
+on gh
+ĠOd in
+Ġconcept ion
+ĠY uk
+ĠBaghd ad
+ĠFl ag
+Aust ral
+ĠI BM
+Ġintern ationally
+ĠWiki Leaks
+I ED
+Ġc yn
+Ġcho oses
+ĠP ill
+Ġcomb ining
+Ġrad i
+ĠMoh ammed
+def ense
+atch ing
+Sub ject
+ic iency
+Fr ame
+Ġ{ "
+Ġche ss
+Ġtim er
+19 0
+Ġt in
+Ġord inance
+emet ery
+Ġacc using
+Ġnotice able
+Ġcent res
+Ġl id
+ĠM ills
+img ur
+Ġz oom
+erg ic
+Ġcomp ression
+pr im
+f ind
+Ġsur g
+Ġp and
+ĠK ee
+ĠCh ad
+cell ence
+oy le
+Ġsocial ism
+ĠT ravis
+ĠM Hz
+Ġgu ild
+ALL Y
+ĠSub scribe
+ĠRel ated
+Ġoccur rence
+itch ing
+Ġfict ional
+Ġcr ush
+ĠE A
+c od
+m ix
+ĠTri ple
+Ġretrie ve
+Ġstimul us
+Ġpsych iat
+ĠDo or
+Ġhomosexual ity
+Ġelement ary
+Ġcell ular
+id ian
+ĠL aun
+Ġintrig uing
+Ġfo am
+ĠB ass
+id i
+its u
+Ġass ure
+Ġcongr at
+Ġbusiness man
+ĠBo ost
+cl ose
+Ġl ied
+Ġsc iences
+ĠO mega
+ĠG raphics
+Ġ< =
+sp oken
+Ġconnect ivity
+S aturday
+ĠAven gers
+Ġto ggle
+Ġank le
+Ġnational ist
+mod el
+ĠP ool
+ophob ia
+V ar
+ĠM ons
+ator ies
+Ġaggress ively
+C lear
+For ge
+act ers
+Ġhed ge
+Ġpip es
+Ġbl unt
+Ġs q
+Ġremote ly
+W ed
+as ers
+Ġref riger
+Ġt iles
+Ġresc ued
+Ġcompr ised
+ins ky
+Ġman if
+avan augh
+Ġprol ifer
+Ġal igned
+x ml
+Ġtri v
+Ġcoord ination
+ĠP ER
+ĠQu ote
+13 4
+b f
+ĠS aw
+Ġtermin ation
+Ġ19 0
+Ġadd itions
+Ġtri o
+Ġproject ions
+Ġpositive ly
+Ġin clusive
+Ġmem br
+19 90
+old er
+Ġpract iced
+ink le
+Ar ch
+Ġstar ters
+ari us
+Ġinter mediate
+ĠBen ef
+ĠK iller
+Ġinter ventions
+ĠK il
+ĠF lying
+In v
+Ġprem ature
+Ġpsych iatric
+Ġind ie
+Ġcoll ar
+ĠRain bow
+af i
+Ġdis ruption
+ĠFO X
+cast ing
+Ġmis dem
+c ro
+Ġw ipe
+ard on
+Ġb ast
+ĠTom my
+ĠRepresent ative
+Ġbell y
+ĠP O
+ĠBre itbart
+13 2
+Ġmess aging
+Sh ould
+Ref erences
+ĠG RE
+ist ical
+L P
+ĠC av
+ĠC razy
+Ġintu itive
+ke eping
+ĠM oss
+Ġdiscont in
+ĠMod ule
+Ġun related
+ĠPract ice
+ĠTrans port
+Ġstatist ically
+orn s
+Ġs ized
+p u
+Ġca f
+ĠWorld s
+ĠRod gers
+ĠL un
+ĠCom ic
+l iving
+Ġc ared
+Ġclim bed
+) {
+Ġconsist ed
+Ġmed ieval
+fol k
+Ġh acked
+Ġd ire
+ĠHerm ione
+Ġt ended
+ce ans
+D aniel
+w ent
+Ġlegisl ators
+Ġred es
+g ames
+Ġg n
+am iliar
+Ġ+ +
+gg y
+th reat
+Ġmag net
+Ġper ceive
+Ġz ip
+Ġindict ment
+Ġcrit ique
+g ard
+ĠSaf e
+ĠC ream
+Ġad vent
+ob a
+Ġv owed
+ous ands
+Ġsk i
+Ġabort ions
+u art
+Ġstun ned
+Ġadv ancing
+Ġlack ed
+Ġ\ "
+Ġsch izophren
+Ġeleg ant
+Ġconf erences
+Ġcance led
+ĠHud son
+ĠHop efully
+Ġtr ump
+Ġfrequ encies
+Ġmet eor
+ĠJun ior
+ĠFle et
+ĠMal colm
+ĠT ools
+Ġ ........
+Ġh obby
+ĠEurope ans
+Ġ15 00
+ĠInt o
+Ġs way
+ĠApp ro
+ĠCom pl
+Comm unity
+Ġt ide
+ĠSum mit
+ä »
+Ġinter vals
+ĠE ther
+Ġhabit at
+ĠSteven s
+lish ing
+ĠDom ain
+Ġtrig gers
+Ġch asing
+Ġchar m
+ĠFl ower
+it ored
+Ġbless ing
+Ġtext ures
+F ive
+Ġliqu or
+R P
+F IN
+Ġ19 62
+C AR
+Un known
+Ġres il
+ĠL ily
+Ġabund ance
+Ġpredict able
+r ar
+Ġbull shit
+le en
+che t
+M or
+M uch
+ä ¹
+Ġemphas ized
+Ġcr ust
+Ġprim itive
+Ġenjoy able
+ĠPict ures
+Ġteam mate
+pl er
+ĠT ol
+ĠK ane
+Ġsummon ed
+th y
+ram a
+ĠH onda
+Ġreal izing
+Ġquick er
+Ġconcent rate
+cle ar
+Ġ2 10
+ĠErd ogan
+ar is
+Ġrespond s
+ĠB I
+Ġelig ibility
+Ġpus hes
+ĠId aho
+Ġagg rav
+Ġru ins
+ur ations
+Ġb ans
+Ġan at
+sh are
+Ġgr ind
+h in
+um en
+Ġut ilities
+ĠYan kees
+Ġdat abases
+ĠD D
+Ġdispl aced
+Ġdepend encies
+Ġstim ulation
+h un
+h ouses
+ĠP retty
+ĠRaven s
+ĠTOD AY
+Ġassoci ates
+Ġthe rape
+cl ed
+Ġde er
+Ġrep airs
+rent ice
+Ġrecept ors
+Ġrem ed
+ĠC e
+Ġmar riages
+Ġball ots
+ĠSold ier
+Ġhilar ious
+op l
+13 8
+Ġinherent ly
+Ġignor ant
+Ġb ounce
+ĠE aster
+REL ATED
+ĠCur rency
+E V
+ãĥ ŀ
+ĠLe ad
+Ġdece ased
+B rien
+ĠMus k
+J S
+Ġmer ge
+heart ed
+c reat
+m itt
+m und
+ĠâĢ ĭ
+ĠB ag
+Ġproject ion
+Ġj ava
+ĠStand ards
+ĠLeon ard
+Ġcoc onut
+ĠPop ulation
+Ġtra ject
+Ġimp ly
+Ġcur iosity
+ĠD B
+ĠF resh
+ĠP or
+Ġheav ier
+ne ys
+gom ery
+Ġdes erved
+Ġphr ases
+ĠG C
+Ġye ast
+d esc
+De ath
+Ġreb oot
+Ġmet adata
+IC AL
+Ġrep ay
+ĠInd ependence
+Ġsubur ban
+ical s
+Ġat op
+Ġall ocation
+gener ation
+ĠG ram
+Ġmoist ure
+Ġp ine
+ĠLiber als
+Ġa ides
+Ġund erest
+ĠBer ry
+Ġcere mon
+3 70
+ast rous
+ĠPir ates
+Ġt ense
+ĠIndust ries
+ĠApp eals
+ĠN ear
+Ġè£ı ç
+Ġlo vers
+ĠC AP
+ĠC raw
+Ġg iants
+Ġeffic acy
+E lement
+ĠBeh avior
+ĠToy ota
+Ġint est
+P riv
+A I
+Ġmaneu ver
+Ġperfect ion
+Ġb ang
+p aper
+r ill
+Ge orge
+b order
+in ters
+ĠS eth
+Ġcl ues
+ĠLe vi
+ĠRe venue
+14 7
+Ġv apor
+Ġfortun ate
+Ġthreat ens
+Ġve t
+Ġdepend ency
+ers ed
+art icle
+ĠBl izzard
+Ġch lor
+Ġmin us
+ĠB ills
+Ġcryptoc urrency
+Ġmetabol ism
+ter ing
+Ġp estic
+step s
+ĠTre asure
+ract ed
+ĠConst ant
+Ġtem p
+13 9
+ĠDet ective
+ur ally
+Ġrecover ing
+Ġcort ex
+Ġ14 4
+cl osed
+Ġprejud ice
+aun ted
+Ġstorm s
+ĠN OW
+Ġmach inery
+Add ress
+Ġcompe lled
+27 0
+Ġdesp air
+b ane
+Ġveget able
+Ġbed s
+Lear n
+Ġcolor ful
+Ġsp ike
+Ġmarg ins
+Ġsymp athy
+Ġworks hop
+ĠC BC
+S at
+Ġburn s
+ĠG ender
+Ġ12 9
+ĠC able
+Ġdeb ts
+ĠThe resa
+Ġreflect ing
+Ġa irst
+Ġr im
+ram id
+Ġweakness es
+W rit
+ogg le
+t i
+ĠCh arge
+Ġwe ighed
+Ġ( .
+Ġl aughter
+Ġrou ter
+ĠDemocr acy
+D ear
+Ġhas ht
+Ġd y
+Ġhint s
+run ning
+Ġfin ishes
+ar us
+M ass
+res ult
+asc us
+Ġv intage
+Ġcon qu
+Ġwild ly
+ac ist
+Ġl ingu
+Ġprot agonist
+st rom
+te enth
+ĠSol o
+m ac
+f illed
+Ġre nown
+it ives
+Ġmot ive
+ĠAnt ar
+ĠM ann
+ĠAd just
+Ġrock ets
+Ġtrou bling
+e i
+Ġorgan isms
+ass is
+Christ ian
+Ġ14 5
+ĠH ass
+Ġsw all
+Ġw ax
+ĠSurv ival
+V S
+ĠM urd
+v d
+stand ard
+Ġdrag ons
+Ġacceler ation
+r ational
+f inal
+Ġp aired
+ĠE thereum
+Ġinterf aces
+Ġres ent
+Ġartif acts
+Å «
+are l
+Ġcompet itor
+ĠNich olas
+ĠSur face
+c pp
+ĠT ot
+Ġeconom ically
+Ġorgan ised
+Ġen forced
+in ho
+Ġvar ieties
+Ġab dom
+ĠBa iley
+id av
+ĠSal v
+p aid
+Ġalt itude
+ess ert
+ĠG utenberg
+are a
+op oulos
+Ġprofess ors
+igg s
+ĠF ate
+he y
+Ġ3 000
+D ist
+Ġtw ins
+c ill
+ĠM aps
+Ġtra ps
+Ġwe ed
+ĠK iss
+Ġy oga
+Ġrecip ients
+ĠWest minster
+Ġpool s
+ĠWal mart
+18 8
+ĠSchool s
+att ack
+ĠAR M
+par agraph
+W arning
+j l
+Ġself ish
+anche z
+ĠHe ights
+F re
+ĠS oph
+Ġ --------------------------------
+t ml
+33 3
+Ġraid s
+Ġsatell ites
+KE Y
+Ġlast s
+Ñ Ĥ
+In s
+ĠD ame
+Ġunp redict
+// /
+gh ai
+Ġart illery
+Ġcru ise
+Ġg el
+ĠCabin et
+Ġbl ows
+ĠE sp
+Ġprox imity
+ot he
+ĠSk ills
+ĠU pper
+ob o
+ĠN DP
+Ġenjoy s
+Ġrepe ating
+ĠConst ruction
+ĠQuest ions
+H illary
+Ġu int
+Ġprocess ors
+ĠGib son
+ĠMult iple
+q a
+ĠB om
+ĠM iles
+vent ional
+Ġhur ts
+s kin
+ĠA IDS
+Ġadvis ers
+ĠR oot
+Ġmethod ology
+ĠD ale
+Ġdet on
+ĠKnow ledge
+sequ ently
+Ġ12 1
+Ġconnect s
+C y
+ĠD anger
+Ġcontribut ors
+ĠB ent
+Ġbr ass
+ĠGun s
+int o
+ĠFort une
+Ġbro ker
+bal ance
+Ġlength s
+Ġv ic
+Ġaver aging
+Ġappropri ately
+ĠCamer a
+Ġsand wich
+ĠCD C
+Ġcoord inate
+Ġnav ig
+Ġgood ness
+l aim
+Ġbra ke
+Ġextrem ist
+ĠW ake
+ĠM end
+ĠT iny
+ĠC OL
+ĠR F
+ĠD ual
+ĠW ine
+C ase
+Ġref ined
+Ġl amp
+L ead
+Ġb apt
+ĠCar b
+ĠS add
+ĠMin neapolis
+PD F
+Ear ly
+ĠH idden
+I ts
+ĠT IME
+Ġp ap
+Ġcommission ed
+ĠF ew
+ĠCol ts
+ĠB ren
+Ġbot hered
+Ġlike wise
+Ex per
+ĠSch w
+c ry
+n n
+ĠM itch
+im on
+M G
+b m
+UM P
+r ays
+Ġregist ry
+Ġ2 70
+ach ine
+re lla
+ant ing
+00 000
+Ġru ined
+sp ot
+Ġt a
+Ġmaxim ize
+Ġincon ven
+D ead
+H uman
+En abled
+ĠMar ie
+Ġch ill
+ĠParad ise
+Ġstar ring
+ĠLat ino
+ĠProt ocol
+ĠE VER
+Ġsuppl iers
+m essage
+ĠBro ck
+Ġser um
+âĸĪâĸĪ âĸĪâĸĪ
+Ġen comp
+Ġamb ition
+ues e
+Ġar rows
+And rew
+Ġanten na
+Ġ19 61
+ĠB ark
+Ġb ool
+ãĤ ª
+ĠSt orage
+Ġrail way
+Ġtoug her
+ĠC ad
+Ġwas hing
+P y
+' ]
+em bed
+ĠMem phis
+ack le
+Ġfam ously
+ĠF ortunately
+ov ies
+Ġmind set
+Ġsne ak
+ĠD h
+RA W
+ĠSim pson
+Ġliv est
+Ġland mark
+Ġc ement
+L ow
+Ġthr illed
+ĠCour se
+in el
+Ġch uck
+id ate
+gl obal
+Ġwh it
+Ġ ï¿½
+ad ays
+s ki
+ĠS V
+Ġvir uses
+30 6
+ĠResp ons
+Ġthe aters
+ĠBr anch
+ĠGene va
+ĠM K
+Ġunbel iev
+Ġcommun ist
+Orig inal
+ĠRe ceived
+ĠTrans fer
+ĠAr g
+In put
+ĠStr ategy
+Ġpal ace
+the ning
+D ri
+Ġsent encing
+umbn ail
+Ġp ins
+re cy
+Ġs iblings
+Get ting
+ĠB U
+ĠNorth west
+Ġprolong ed
+ĠSak ura
+C omb
+ĠB our
+Ġinadequ ate
+ĠK ash
+Ġus ername
+ĠImpro ve
+Ġbatt ling
+ĠM AC
+Ġcurric ulum
+Ġs oda
+ĠC annon
+Ġsens ible
+sp ons
+De cember
+Ġw icked
+ĠP engu
+Ġdict ators
+ĠHe arts
+og yn
+Ġsimilar ities
+ĠSt ats
+Ġh ollow
+it ations
+": [
+Ġh over
+ĠList en
+s ch
+S und
+Ġc ad
+ĠPar ks
+Ġl ur
+Ġhy pe
+ĠL em
+N AME
+is ure
+Fr iday
+Ġshoot s
+Ġclos es
+Ġd b
+ĠR idge
+ĠDiff erent
+Ġrepl ies
+ĠBroad way
+op ers
+Ġint oler
+ĠZe us
+akes pe
+Ġpropri etary
+Ġrequest ing
+Ġcontro llers
+ĠM IN
+im edia
+be cca
+Ġexp ans
+Ġoil s
+B ot
+ĠCh and
+Ġpr inter
+Ġto pped
+ĠP OL
+ĠEar lier
+S ocial
+av in
+Ġdecre ases
+ĠSe b
+Ġspecific ations
+ĠBl ast
+ĠK urt
+Ġfre el
+B rown
+Ġdil ig
+ro e
+ĠPro blem
+ĠQu ad
+Ġdecent ral
+ĠV ector
+an ut
+Ġplug ins
+ĠGreg ory
+Ġfuck ed
+el ines
+ĠAmb assador
+t ake
+Ġcle ans
+ong yang
+An onymous
+st ro
+" }
+al ine
+ĠO dd
+ĠE ug
+2 16
+Ġbo il
+ĠP owers
+Ġnurs es
+Ob viously
+ĠTechn ical
+Ġexceed ed
+OR S
+Ġextrem ists
+Ġtr aces
+ex pl
+Ġcom r
+ĠS ach
+) /
+Ġm asks
+Ġsc i
+B on
+Ġreg ression
+we gian
+Ġadvis or
+it ures
+ĠV o
+ex ample
+ĠInst ruct
+Ġs iege
+Ġredu ctions
+pt r
+Ġstat utory
+Ġrem oves
+Ġp uck
+red its
+Ġbe e
+Ġsal ad
+Ġpromot ions
+ĠJosh ua
+with standing
+ET H
+ĠCh a
+im us
+Ġexpend iture
+aun ting
+Ġdelight ed
+Ġ15 5
+be h
+Ġcar pet
+ĠSp art
+Ġj ungle
+l ists
+Ġbull ying
+ĠNob el
+ĠGl en
+Ġreferen ced
+Ġintrodu ces
+se in
+Ġcho pped
+gl ass
+ĠW rest
+Ġneutral ity
+Ġâ Ļ
+Ġinvestig ator
+Ġshel ves
+Ġun constitutional
+Ġreprodu ction
+Ġmer chant
+m ia
+Ġmet rics
+Ġexplos ives
+ĠSon ia
+Ġbod ily
+Ġthick ness
+Ġpredomin antly
+ĠAb ility
+Ġmon itored
+IC H
+Ġ] .
+ĠMart inez
+Ġvis ibility
+Ġqu eries
+Ġgen ocide
+ĠWar fare
+Qu ery
+Ġstud ios
+Ġemb ry
+Ġcorrid or
+Ġclean ed
+com plete
+ĠM H
+Ġenroll ment
+ING S
+Ġimpact ed
+Ġdis astrous
+ĠY un
+ĠCl aire
+ĠBas ically
+y t
+uster ity
+Ġindirect ly
+w ik
+Ġd od
+ĠCar r
+Ġam p
+Ġprohib it
+ĠIn itial
+ĠR d
+ij i
+Ġeduc ate
+c orn
+i ott
+ĠBeaut y
+Ġdetect ive
+ĠCon n
+s ince
+Ġst agger
+Ġob ese
+Ġb ree
+olog ic
+is se
+walk er
+Ġbl ades
+Ġlaw ful
+fun c
+ĠBeh ind
+Ġappet ite
+Ġ( *
+Ġt ennis
+Ġoff spring
+Ġj ets
+Ġstruct ured
+Ġafore mentioned
+N ov
+Ġsc aling
+f ill
+Ġst ew
+Ġcur b
+ĠStep han
+ed In
+S F
+ob ic
+é ŃĶ
+ou g
+ĠM M
+Ġgen etically
+ope z
+13 6
+Ġu mb
+anc ers
+Ġcoh ort
+Ġmerch andise
+Ġimp osing
+ĠLegisl ature
+ĠArch ive
+iv ia
+ĠN aval
+Ġoff ences
+Ġmir acle
+Ġsn apped
+Ġf oes
+Ġextensive ly
+ĠR af
+Ġc ater
+ed ience
+K it
+ĠB in
+Ġrecomm ends
+ĠC ities
+Ġrig id
+ĠRE AD
+ĠNob le
+ĠT ian
+Ġcertific ates
+ant is
+o iler
+ĠBudd hist
+d id
+Ġsurvey ed
+Ġdown ward
+Ġprint s
+ĠMot ion
+ron ics
+ĠS ans
+oss ibly
+u ctions
+Ġcolon ies
+ĠDan ish
+un it
+Ġsp oil
+Ġadvis ory
+ber ries
+Pl an
+Ġspecific ation
+op hers
+ĠRes ource
+Ġsh irts
+prising ly
+commun ications
+Ġtriv ial
+Ġmention ing
+ise xual
+Ġsupp lements
+Ġsuper vision
+B P
+v or
+Ġw it
+Ġco oldown
+Ġplaint iff
+ĠReview s
+ĠS ri
+ĠM int
+ĠSug ar
+Ġafter ward
+ĠPri est
+ĠInvest ment
+og ene
+ĠT aking
+Ġstretch ing
+Ġinflamm ation
+ĠTe hran
+Ġl ining
+Ġfree zing
+ĠEnt ity
+Ġins piring
+spe cial
+pr ice
+Ġsu e
+ĠP orter
+oun ge
+ET A
+ĠD erek
+ĠLu is
+u o
+ym ph
+Ġex terior
+ih il
+ĠAsh ley
+in ator
+Ġnut rients
+ĠTh rones
+Ġfin ances
+ĠIn spect
+Ġspe cially
+ĠRequ ired
+ĠP TS
+ĠViol ence
+oint ed
+sh ots
+Ġex cerpt
+co on
+IN S
+ĠG ri
+Ġrecogn ised
+We ek
+You ng
+Ġv om
+is le
+ĠCur ry
+ĠBudd h
+Ġnot ebook
+Ġd urable
+/ ?
+ĠG ad
+ĠP upp
+Ġforg ive
+p ark
+Ġpersonal ities
+an alysis
+cl amation
+Ġelev ator
+Ġware house
+ĠR ole
+un n
+Ġillust ration
+ĠSc an
+Ġatmosp heric
+Im port
+AN C
+rict ed
+f u
+01 0
+Ġar che
+Ġreward ed
+akespe are
+Ġintern ally
+ĠR BI
+alk er
+Ġeleph ant
+ow itz
+ĠP izza
+Ġbip artisan
+Ã© s
+Ġslow ed
+ĠSt ark
+Ġover ride
+OU S
+Ġ3 20
+undred s
+ĠDe ck
+ĠC ensus
+be e
+14 6
+ot or
+Ġ ip
+Ġu b
+oc ations
+ĠBut ton
+r ice
+Ġc ripp
+ff f
+Ġorig inated
+Ġoverwhel med
+app a
+Ġfore most
+âĢ ĳ
+ĠL EG
+re lease
+eat ured
+at ches
+Ġre ps
+Ġl ending
+ĠRe ference
+ĠCl ient
+16 5
+vent h
+Com plete
+ĠPat rol
+Ġsw orn
+c am
+Ġshut tle
+ĠR alph
+Ġh ometown
+- ,
+on al
+ĠB P
+å ı
+Ġpersu ade
+ĠAlex and
+Ġcomb ines
+Ġv ivid
+ĠL ag
+Ġenc oding
+Ġsal vation
+w en
+ĠRec overy
+i ya
+Un iversity
+ĠB iden
+Ġbud gets
+ĠTex ans
+f its
+Ġhon ored
+Ġp ython
+T D
+## #
+cl one
+Ġbl ink
+ĠL iquid
+Ġunemploy ed
+Ġcl ashes
+ĠCoun sel
+Ġdirect ing
+Ġpun ct
+ĠFal cons
+Ġsh ark
+ĠDam ascus
+Ġje ans
+Ġemb ark
+Ġse ize
+Ġup wards
+2 80
+ĠE z
+ĠAny thing
+Ġex otic
+l ower
+ĠCreat or
+ĠU m
+Ġsubur bs
+ber ger
+ĠW end
+Ġm int
+ĠX X
+ĠD ro
+Ġsuff ers
+Ġher b
+t ree
+Ġfrag ile
+Ġflood ed
+ĠAl cohol
+ole an
+ny der
+ĠK O
+F ram
+Ġ13 6
+Ġow ed
+ĠMe lee
+ĠH ash
+Ġwh isk
+Ġsu do
+r r
+Qu ick
+app ro
+Ġi i
+ĠEx amples
+he e
+Ġpromot es
+per ature
+k ar
+ĠHon or
+Ġs odium
+ĠL if
+ros so
+intend ent
+Ġcorrespond ent
+F ound
+sec ret
+Ġident ifies
+ag ne
+Ġl ou
+ĠP P
+Ġcoinc idence
+m ove
+Ġmilit ia
+Ġinf iltr
+ĠPrim ary
+Ġpitch ing
+ĠI b
+ĠGO OD
+ãĤ ¸
+ĠW izards
+ir al
+ĠVen us
+R R
+ĠâĢ ķ
+ĠCase y
+Ġsad ly
+Ġadm ire
+Ġembarrass ed
+c b
+M el
+Ġtub es
+Ġbeaut ifully
+ĠQueens land
+Bel ow
+re z
+qu et
+ple asant
+ĠÂ «
+C amp
+Ġdec isive
+19 98
+ĠL amb
+ut ton
+h n
+ĠJ agu
+au nder
+ĠC ord
+Ġcl erk
+Ġca ffe
+Ġwip ed
+Ġre im
+ĠMount ains
+Ġimprison ed
+Ġdevelop s
+ĠP ra
+Ġmodel ing
+Any one
+ance l
+ĠS it
+Ġshield s
+Ġl awn
+Ġcard iovascular
+Ġdemonstr ating
+Ġpar se
+ĠIsrael is
+Ġeuro s
+14 3
+Ġgl orious
+ins ki
+ec d
+Ġcondition ing
+Ġhel pless
+Ġmicro sc
+ĠHar bor
+Ġst akes
+Ġ2 60
+Ġun equ
+ĠFl oyd
+Ġd amp
+Ġappar atus
+ĠLaw s
+Ġcoun ters
+Ġindu ce
+at able
+ĠAh med
+Ġsl am
+N ovember
+Ġpers ist
+Ġim minent
+Ã¡ n
+Ġsh red
+Ġph ases
+ĠEd monton
+ĠArm strong
+ĠMe et
+ĠK itty
+Ñ Ģ
+c irc
+ĠAd ult
+Ġa rose
+ĠX en
+D an
+g ow
+Ġsuper f
+ĠAd mir
+Ġend ure
+Ġkey word
+yr us
+Ġy arn
+Ġpath way
+ĠHop kins
+mid t
+Ġcens orship
+d ependent
+Ġinstruct or
+S ources
+Ġto e
+Ġball oon
+N ob
+Ġsw ear
+ĠCast ro
+Ġgl oss
+ĠK avanaugh
+Ġremark ably
+Ph otos
+ĠN om
+ĠS outheast
+y ers
+Ġvalid ation
+Ġcann on
+ĠVict ory
+ĠPier re
+Ġcaut ious
+Aud io
+Ġf etch
+ĠG ift
+ĠH yp
+Ġrem edy
+Z E
+Ġsc ent
+Ġbe ard
+ĠR ut
+- "
+Ġpat ents
+H y
+Ġun just
+Ġpot ato
+Ġforth coming
+Ġche f
+ĠR ift
+aff e
+ĠR OM
+ĠL aunch
+Ġp ads
+ĠNe o
+Ġon set
+Ġsquee ze
+s afe
+Ġpref ix
+ĠT M
+ĠN early
+ĠClin ical
+ĠM ental
+ot iation
+ĠUn ic
+ant ry
+ĠC ir
+Ġep it
+Ã ¦
+Ġextract ed
+verse ly
+ri ad
+Ġstr ains
+Ġto ps
+Ġpo em
+ĠRand y
+ĠMap le
+TH ER
+up iter
+ĠSS D
+ļ é
+Ġun con
+per ing
+Ġsle pt
+in ers
+Ġunder water
+ĠEv idence
+g one
+20 5
+Ġhistor ians
+Ġsynt hesis
+Ġf rog
+b asketball
+Ġvibr ant
+Ġsub ord
+Ġ3 65
+ĠD ial
+Ġcooper ate
+HA HA
+Ġgreet ed
+15 8
+Ġj azz
+Ġinto x
+ĠWalk ing
+Ġsuper visor
+ĠF usion
+ĠMer cedes
+s end
+H am
+s d
+n l
+Ġtour s
+ĠF IFA
+Ġcul p
+g d
+30 4
+Ġple as
+Ġillust rates
+ĠColomb ia
+Ġhighlight ing
+ĠSum mary
+Ġexp osing
+ĠD ru
+Ġir ony
+r itional
+ĠCar roll
+ĠEll is
+P ict
+ĠR apt
+Ġad apter
+Ġun m
+Ġcor pse
+Ġceleb rities
+D en
+at um
+ĠAp ocalypse
+ĠW ag
+lin ing
+Ġhorm ones
+R ub
+ĠX i
+ĠV aults
+20 8
+alky rie
+inos aur
+Ġfeed s
+v ity
+Ġdefe ating
+W ait
+Ġemphas ize
+ĠSteel ers
+yr inth
+le ys
+ĠWhe never
+Current ly
+ĠCl ock
+Ġcollect ively
+any on
+ĠJ P
+Ġment ality
+Ġdownload s
+Ġsurround ings
+ĠBarn es
+Ġflags hip
+Ġindic ators
+Ġgra pp
+Jan uary
+ĠElement al
+ĠAthen a
+ib al
+Ġs ights
+Ġcap ita
+ĠTreat y
+Ġvo iced
+ĠG az
+let te
+Ġy a
+Ġexp ired
+Leg end
+H ot
+n ature
+Ġunst able
+Ġ2 80
+Ã º
+Com ment
+AL E
+Ġquest s
+Ġhand ler
+n is
+Ġvers atile
+Ġconce al
+enge ance
+ĠInter active
+Ġobs essed
+ĠDog s
+Ġcr acked
+S ound
+s v
+ĠD ylan
+ro ads
+f x
+ĠCath olics
+ĠH ag
+Ġsl ammed
+Ġgl owing
+s ale
+Ġtiss ues
+ĠCh i
+ne e
+Ġc her
+s ic
+ur rection
+Ġb acon
+ul atory
+) ."
+Ġir regular
+FOR M
+ass ed
+Ġintention al
+Ġcompens ate
+ĠSpe aking
+ĠS ets
+15 3
+Ġconvent ions
+b ands
+em ade
+Ġe cc
+ĠWin ston
+ĠAssass in
+ĠBelg ian
+Ġdepend ence
+Ġnic he
+Ġb ark
+ĠJ azz
+Ġdisadvant age
+Ġgas oline
+Ġ16 5
+çļ Ħ
+ess a
+mod ule
+ang ular
+O Y
+ĠTreat ment
+it as
+ol ation
+ĠArn old
+Ġfe ud
+ĠN est
+Ġthe atre
+ew ater
+Ġmin ors
+olic y
+ĠH aven
+div ision
+Ġtr unk
+F ar
+ĠP ull
+Ġcapt uring
+Ġ18 00
+ĠTe en
+Ġex empl
+Ġclin ics
+ĠB urg
+Ġsubst it
+Ġpay load
+ĠL av
+ĠT roy
+ĠW itness
+Ġfrag ments
+Ġpass words
+Ġg ospel
+ĠG in
+Ġten ants
+ol ith
+S ix
+Pre vious
+ĠAg es
+ĠDar win
+Ġbl at
+Ġem pathy
+sm ith
+b ag
+ĠE cho
+ĠC amb
+ĠM add
+ĠB oo
+Ġred e
+ĠBurn ing
+Ġsmooth ly
+ĠAd rian
+ĠV ampire
+ĠMon sters
+ste am
+Sty le
+M a
+re a
+ĠD war
+aly st
+urs or
+Ġelim ination
+Ġcrypt o
+ch t
+ĠE ternal
+âĢ¦ ]
+ĠS orce
+I ll
+N ER
+Ġu h
+Con clusion
+w age
+Ġresp ir
+Ġrem inis
+het ical
+Ġg y
+Ġutil ized
+ic idal
+Ġ19 00
+Ġhun ters
+ĠSw an
+ĠRe act
+Ġvis itor
+ĠThanks giving
+30 8
+Post s
+Ġh ips
+19 97
+om ers
+Ġkn ocking
+ĠVeh icle
+Ġt il
+Ġ13 8
+Ġm i
+ĠInvest igation
+ĠKen ya
+Ġcas ino
+Ġmot ives
+Ġreg ain
+re x
+Ġweek ends
+Ġstab bed
+bor o
+Ġexplo ited
+ĠHA VE
+ĠTe levision
+c ock
+Ġprepar ations
+Ġende av
+ĠRem ote
+ĠM aker
+ĠPro du
+ĠEv an
+Ġinform ational
+ĠLouis ville
+15 4
+ĠDream s
+Ġpl ots
+ĠRun ner
+Ġhur ting
+Ġacad emy
+ĠMont gomery
+n m
+ĠL anc
+ĠAl z
+2 10
+el ong
+Ġretail er
+Ġar ising
+Ġrebell ion
+Ġbl onde
+play ed
+Ġinstrument al
+C ross
+Ġret ention
+Ġtherape utic
+Ġse as
+Ġinfant ry
+ĠCl int
+Ġprompt ing
+Ġbit ch
+Ġst ems
+ĠK ra
+Ġthe sis
+ĠB og
+ru ed
+Ġk ings
+Ġcl ay
+ific ent
+ĠY ES
+ĠTh ing
+ĠCub s
+vey ard
+els h
+in arily
+ĠE y
+ĠRoll ing
+Ġev olving
+Ind ia
+Ġrecogn izes
+Ġgrad uation
+is ers
+Ġfert ility
+ĠMil an
+Comm and
+Ġbox ing
+Ġ19 43
+Ġgl uten
+ĠEm ir
+Ġid ol
+Ġcon ceived
+ĠCre ation
+Mer it
+udd y
+uss ions
+ĠLie utenant
+iet al
+Ġunch anged
+ĠSc ale
+ĠCrime a
+ball s
+ator ial
+Ġdepth s
+Ġempir ical
+Ġtrans m
+Ġuns afe
+miss ible
+com fort
+15 6
+Ġmechan ic
+00 2
+l ins
+Ġsm oked
+P os
+Ġslow ing
+Ġl av
+Tex as
+Ġche ating
+ĠMet ropolitan
+eth yl
+Ġdiscover ing
+as se
+Ġpen cil
+ĠPy ongyang
+Ġclos et
+ĠShe et
+ĠEnt ry
+ou stic
+Ġmy st
+er ate
+ari at
+Ġminer als
+Ġmusic ian
+ĠP ul
+ĠM az
+24 9
+Ġper missions
+Ġ iv
+en ary
+ick ers
+ĠB ing
+he a
+en able
+Ġgri ev
+Ġassert ed
+ĠColon el
+Ġaff idav
+w o
+Ġse ated
+ĠR ide
+Ġpaint ings
+ĠP ix
+Ġ13 7
+ish i
+umb ai
+g otten
+ĠEar l
+Ġin ning
+Ġc ensus
+Ġtrave lled
+ĠCons ult
+18 5
+b ind
+Ġsimpl icity
+Ġoverlook ed
+ĠHelp ful
+Ġmon key
+Ġoverwhelming ly
+Bl ood
+ĠFl int
+ĠJ ama
+ĠPres ent
+ĠR age
+ĠT A
+pt ive
+Ġturn out
+w ald
+ĠD olphins
+ĠV PN
+Ġon ion
+Ġcraft ing
+m ma
+ĠMerc ury
+Ġarr ange
+Ġalert s
+ĠO T
+zb ollah
+Ġg ases
+ĠRichards on
+s al
+l ar
+Ġfro st
+Ġlower ing
+Ġacc laim
+Ġstart ups
+ĠG ain
+ess ment
+Ġguard ian
+äº º
+ĠP ie
+ĠL inks
+Ġmer its
+Ġaw ake
+Ġparent al
+Ġexceed s
+Ġid le
+ĠPil ot
+Ġe Bay
+ĠAc cept
+ipe g
+C am
+ĠK ot
+Ġtrad ers
+olit ics
+unk er
+ĠP ale
+os i
+an mar
+Ġ19 47
+ĠF ell
+est ial
+it ating
+G F
+ĠS r
+if ted
+Ġconnect or
+ĠB one
+ill es
+2 60
+h ma
+Ġoverl ap
+ĠGit Hub
+Ġclean er
+ĠBapt ist
+ĠW AS
+Ġlung s
+Ñ ģ
+ĠB UT
+Ġc ite
+Ġpit ched
+reat ment
+Ġtro phies
+ĠN u
+38 6
+ĠPr ide
+Ġattend ees
+[ ]
+17 9
+Ġspat ial
+Ġpri zes
+ĠRel igion
+Ġshow case
+ĠC ategory
+vid ia
+T arget
+Pro perty
+? ,
+Ġf usion
+p ie
+ĠU CLA
+Ġsound track
+Ġprin cess
+ĠC aval
+sh ould
+Ġlim bs
+Back ground
+Ġlone ly
+Ġc ores
+ĠT ail
+she et
+Ġ13 2
+R a
+ãĤ «
+ĠB olt
+Ġbook ed
+Ġadmin ister
+Ġequ als
+w y
+Ġobserv ing
+ĠBar on
+ĠAd obe
+Ġv irgin
+ĠSocial ist
+M ove
+gh azi
+ĠLind a
+2 12
+Ġbre wing
+Ġmerch ants
+bur se
+Ġdiv or
+Ġmet als
+ĠN er
+Ġsum s
+ĠEn emy
+Ġen vision
+Ġgrant ing
+ĠH oney
+ĠSk yrim
+Ġsoc io
+gr aded
+Ġselect ive
+W ASHINGTON
+Ġ19 48
+ĠSir ius
+ĠG ross
+act ivity
+ĠI van
+Ġfur ious
+BS D
+ĠPre vious
+Ġrespons ive
+Ġchar itable
+Ġle aning
+ĠP ew
+Ġviol ates
+\\\\ \\\\
+ĠCom ing
+w ire
+Ġpo et
+Ġres olutions
+comm and
+ĠPortug uese
+Ġnick name
+Ġde af
+Feb ruary
+Ġrecogn ise
+Ġentire ty
+Ġseason al
+pl aced
+ĠTe legraph
+Ġmicro phone
+our ing
+Ġgr ains
+Ġgovern ed
+Ġpost p
+ĠW aters
+in ement
+Ġund ocumented
+ĠCom cast
+Ġf ox
+Ġassault s
+re on
+man y
+ĠJen kins
+ĠAny way
+Ġassess ments
+Ġdown s
+ĠM ouse
+Ġsuper b
+k t
+ĠD ow
+Ġtax ation
+4 01
+Ġsm iles
+Ġundert aken
+Ġex h
+Ġenthusi astic
+Ġtw ent
+Ġgovernment al
+Ġautonom y
+ĠTechn ologies
+ĠCh ain
+Ġpreval ent
+f b
+Ġnic otine
+og ram
+j ob
+Ġawa iting
+ĠMen u
+Ġdep uties
+k ov
+ish ops
+But ton
+ĠShan ghai
+Ġdies el
+ĠD uck
+R yan
+ĠPC s
+N F
+j ury
+ent e
+Ġinacc urate
+edd y
+Wh atever
+Ġshow c
+ĠN ad
+od us
+et r
+Ġplaint iffs
+ĠW OR
+ĠAss ange
+Ġpriv at
+Ġpremium s
+Ġt am
+UR L
+Ġel ites
+ĠR anger
+otten ham
+ĠH off
+ĠAt hens
+Ġdefin ite
+Ġs ighed
+Ġeven ly
+2 11
+ĠAm ber
+ak ia
+Ġmail ing
+Ġcr ashing
+ĠConfeder ate
+ru gged
+W al
+ĠDep ths
+Ġjuven ile
+Ġreact or
+Introdu ction
+ĠDel uxe
+19 95
+ĠS anchez
+ĠM ead
+iv able
+: -
+ĠPlan ning
+ĠT rap
+qu in
+ĠProt ect
+ve red
+In formation
+Ġkid ney
+inn amon
+l as
+Ġpolic ing
+Ġtoler ate
+ĠQ i
+Ġbi ased
+F ort
+ĠK i
+s ave
+Ġprivile ged
+Ġbe asts
+ĠGl as
+ĠC inem
+Ġcome back
+Sund ay
+Ġext inction
+h ops
+Ġtrans mit
+Ġdoub les
+ĠFl at
+16 7
+Ġdis puted
+Ġinjust ice
+f oo
+V ict
+role um
+ĠJul ie
+Con text
+ĠR arity
+iss ue
+Comp onent
+Ġcounsel ing
+an ne
+d ark
+Ġobject ions
+u ilt
+Ġg ast
+Ġpl ac
+Ġun used
+ãĥ ĩ
+ĠT rial
+ĠJ as
+hed ral
+ob b
+Ġtempor al
+ĠPR O
+ĠN W
+ĠAnn iversary
+L arge
+Ġther m
+Ġd avid
+Ġsystem ic
+ĠSh ir
+m ut
+ĠNe pt
+add ress
+Ġscan ning
+Ġunderstand able
+Ġcan vas
+C at
+ĠZ oo
+Ġang els
+L O
+ĠStat ement
+ĠS ig
+ov able
+ĠA way
+sh aring
+ocr ats
+st ated
+Ġweigh ing
+N or
+w ild
+B ey
+Ġaston ishing
+ĠReyn olds
+Ġop ener
+Ġtrain er
+Ġsurg ical
+p n
+Ġadjust ing
+whe el
+Ġf rown
+erv ative
+Ġsusp end
+With in
+te in
+Ġobst acle
+Ġliber ties
+ym es
+Ġur anium
+ans om
+an ol
+ub a
+ĠL oss
+Ġa rous
+ĠHend erson
+W ow
+s pl
+c ur
+ĠÂ Ń
+Ġtheir s
+Dam age
+Ġdownload ing
+Ġdisc ern
+ĠSt o
+ĠFl a
+Ġh ath
+ĠA j
+Ġun pleasant
+Europe an
+exp ensive
+Ġscreens hot
+ĠU V
+Ġall ied
+ĠPers ian
+Ġmonop oly
+Ġat om
+ĠReds kins
+"> <
+Ġcan cell
+Ġcinem a
+13 1
+f air
+ĠAlf red
+Ġd uck
+arg s
+22 3
+ĠIS I
+Ġsign aling
+in ar
+Ġlaugh s
+Ġfor wards
+Ġreck less
+Ġlisten ers
+at ivity
+Ġvast ly
+n ant
+L ess
+ĠHun ting
+ĠScient ific
+IT ED
+Ġkn ight
+ĠH TC
+us a
+t mp
+Ġr ude
+ĠLegend ary
+Ġar ises
+B ad
+ĠCl aim
+pe g
+Ġreal ities
+Th ink
+ĠÂ °
+Ġro de
+Ġstri ve
+Ġan ecd
+Ġshort s
+Ġhypot hes
+Ġcoord inated
+ĠGand hi
+ĠF PS
+R ED
+Ġsuscept ible
+Ġshr ink
+ĠCh art
+Hel p
+Ġ ion
+de ep
+rib es
+ĠK ai
+ĠCustom er
+Sum mary
+Ġc ough
+w ife
+Ġl end
+Ġposition ing
+Ġlot tery
+ĠC anyon
+Ġf ade
+Ġbron ze
+ĠKenn y
+Ġbo asts
+ĠEnh anced
+rec ord
+Ġemer gence
+Ġa kin
+ĠB ert
+it ous
+âĸ ĳ
+Ġst ip
+Ġexch anged
+om ore
+als h
+Ġreserv oir
+Ġstand point
+W M
+Ġiniti ate
+Ġdec ay
+Ġbrew ery
+Ġter ribly
+Ġmort al
+lev ard
+Ġrev is
+N I
+el o
+Ġconf ess
+ĠMS NBC
+Ġsub missions
+Cont roller
+Ġ20 2
+ĠR uth
+} );
+ĠAz ure
+Ġ ."
+20 6
+ĠMarket ing
+Ġl aund
+ien cies
+Ġrenown ed
+ĠT rou
+ĠN GO
+ble ms
+Ġterr ified
+Ġwar ns
+Ġper t
+Ġuns ure
+4 80
+ale z
+ult z
+ĠOut side
+Ġst yl
+ĠUnder ground
+Ġp anc
+Ġd ictionary
+Ġf oe
+rim inal
+ĠNor wegian
+Ġj ailed
+Ġm aternal
+Ã© e
+ĠLu cy
+c op
+Ch o
+Ġuns igned
+ĠZe lda
+ĠIns ider
+ĠContin ued
+Ġ13 3
+ĠNar uto
+ĠMajor ity
+16 9
+ĠW o
+ãĤ ĵ
+Ġpast or
+Ġinform al
+Ð ½
+an throp
+jo in
+ãģ Ĺ
+it ational
+N P
+ĠWrit ing
+f n
+ĠB ever
+19 5
+Ġy elling
+Ġdr astically
+Ġe ject
+Ġne ut
+Ġth rive
+ĠFre qu
+ou x
+Ġpossess es
+ĠSen ators
+ĠD ES
+ĠSh akespeare
+ĠFran co
+ĠL B
+uch i
+Ġinc arn
+Ġfound ers
+F unction
+Ġbright ness
+ĠB T
+Ġwh ale
+ĠThe ater
+m ass
+ĠD oll
+S omething
+Ġecho ed
+ĠHe x
+c rit
+af ia
+Ġgodd ess
+Ġele ven
+ĠPre view
+ĠAur ora
+Ġ4 01
+uls ive
+ĠLog an
+in burgh
+ĠCent ers
+ĠON LY
+ĠA id
+Ġparad ox
+Ġh urd
+ĠL C
+D ue
+c ourt
+Ġoff ended
+Ġeval uating
+ĠMatthew s
+Ġto mb
+Ġpay roll
+Ġextra ction
+ĠH ands
+if i
+Ġsuper natural
+ĠCOM M
+] =
+dog s
+Ġ5 12
+ĠMe eting
+Rich ard
+ĠMax imum
+Ġide als
+Th ings
+m and
+ĠReg ardless
+Ġhum ili
+b uffer
+L ittle
+ĠD ani
+ĠN ak
+Ġliber ation
+ĠA be
+ĠO L
+Ġstuff ed
+ac a
+ind a
+raph ic
+Ġmos qu
+Ġcampaign ing
+Ġoccup y
+S qu
+r ina
+ĠW el
+ĠV S
+Ġphys ic
+Ġp uls
+r int
+oad ed
+ET F
+ĠArch ives
+Ġven ues
+h ner
+ĠTur bo
+Ġl ust
+Ġappeal ed
+que z
+il ib
+ĠTim othy
+Ġo mn
+d ro
+Ġobs ession
+ĠSav age
+19 96
+Gl obal
+J es
+2 14
+Ġsl iding
+Ġdisapp ro
+ĠMag ical
+Ġvolunt arily
+g b
+ane y
+Ġprop het
+ĠRe in
+ĠJul ia
+ĠW orth
+aur us
+Ġb ounds
+ie u
+)) )
+Ġcro re
+ĠCitiz en
+S ky
+Ġcolumn ist
+Ġseek ers
+ond o
+IS A
+ĠL ength
+Ġnost alg
+Ġnew com
+Ġdet rim
+ent ric
+3 75
+ĠG E
+Ġaut op
+Ġacadem ics
+App Data
+ĠS hen
+Ġid iot
+ĠTrans it
+Ġteasp oon
+W il
+K O
+ĠCom edy
+> ,
+Ġpop ulated
+W D
+Ġp igs
+ĠO culus
+Ġsymp athetic
+Ġmar athon
+19 8
+Ġseiz ure
+s ided
+Ġd op
+irt ual
+L and
+ĠFl oor
+osa urs
+... ]
+Ġl os
+Ġsubsid iary
+E Y
+ĠPart s
+ĠSt ef
+ĠJud iciary
+Ġ13 4
+Ġmir rors
+Ġk et
+t imes
+Ġneuro log
+Ġc av
+ĠGu est
+Ġtum or
+sc ill
+ĠLl oyd
+E st
+Ġcle arer
+Ġstere otypes
+Ġd ur
+not hing
+Red dit
+Ġnegoti ated
+---------------- --------
+23 5
+Ġfl own
+ĠSe oul
+ĠRes ident
+ĠS CH
+Ġdisappear ance
+ĠV ince
+g rown
+Ġgrab s
+r il
+ĠInf inite
+ĠTw enty
+Ġpedest rian
+Ġjer sey
+ĠF ur
+ĠInf inity
+ĠEll iott
+Ġment or
+Ġmor ally
+Ġob ey
+sec ure
+iff e
+Ġantib iotics
+ang led
+ĠFre eman
+ĠIntrodu ction
+J un
+Ġm arsh
+ic ans
+ĠEV ENTS
+och ond
+W all
+icult y
+Ġmisdem eanor
+Ġl y
+Th omas
+ĠRes olution
+Ġanim ations
+ĠD ry
+Ġinter course
+ĠNew castle
+ĠH og
+ĠEqu ipment
+17 7
+Ġterrit orial
+Ġarch ives
+20 3
+Fil ter
+ĠMun ich
+Ġcommand ed
+ĠW and
+Ġpit ches
+ĠCro at
+Ġrat ios
+ĠM its
+Ġaccum ulated
+ĠSpecific ally
+Ġgentle man
+acer b
+Ġp enn
+Ġa ka
+ĠF uk
+Ġinterven e
+ĠRef uge
+ĠAlz heimer
+Ġsuccess ion
+oh an
+d oes
+L ord
+Ġsepar at
+Ġcorrespond ence
+Ġsh iny
+P rior
+Ġs ulf
+Ġmiser able
+Ġded ication
+( ).
+Ġspecial ists
+Ġdefect s
+ĠC ult
+ĠX ia
+Ġje opard
+ĠO re
+Ab ility
+Ġle ar
+Ġamb itions
+ĠB MI
+ĠArab s
+Ġ19 42
+Ġpres ervation
+ific ate
+Ġash amed
+l oss
+ĠRest aur
+Ġrese mble
+Ġen rich
+ĠK N
+ĠCl an
+fl oat
+Ġplay able
+IT T
+Ġharm ony
+arr ison
+ĠWe instein
+w ere
+Ġpoison ing
+ĠCom put
+ĠWord Press
+m ajor
+ĠVal ve
+F an
+ĠTh row
+ĠRom ans
+ĠDep ression
+ad os
+Ġtort ured
+Ġbal ancing
+bott om
+Ġacqu iring
+ĠMon te
+ard i
+Ġa ura
+Ġ# #
+ĠStand ing
+ĠAtl as
+C F
+Ġintr ins
+ĠBen ghazi
+Ġcamp ing
+Ġt apped
+bl ade
+st rous
+ĠR abb
+ĠW ritten
+t ip
+ĠNe igh
+ster dam
+ĠAll ow
+ĠHe aling
+ĠR hod
+n um
+Ġcaffe ine
+ĠPer cent
+Ġbo o
+Ġapp les
+30 5
+Ġwel coming
+Ġappl aud
+Ġa usterity
+Â ±
+ĠRe ality
+ef e
+å ®
+Ġsu cks
+Ġtab s
+ĠPay Pal
+Ġback pack
+Ġgif ted
+abul ary
+ĠSc out
+ir teen
+Ġch in
+Ġo mitted
+Ġnegative ly
+Ġaccess ing
+ĠE arn
+Ġambul ance
+Ġhead phones
+Ġ20 5
+ĠRef resh
+p resident
+ĠKit chen
+ĠEnt ered
+ĠS nyder
+00 5
+om ical
+Ġborrow ed
+ĠN em
+Ġav iation
+Ġst all
+rim ination
+Ġuniform s
+it ime
+ĠSim mons
+ener gy
+ab lished
+y y
+qual ified
+Ġrall ies
+ĠSt uart
+fl ight
+Ġgang s
+r ag
+Ġv ault
+lu x
+ĠCom par
+Ġdesign ation
+20 9
+ĠJ os
+d ollar
+z ero
+Ġwell s
+30 3
+Ġconstitu ents
+Ġhe ck
+Ġc ows
+Ġcommand ers
+Ġdifferent ial
+ĠC atherine
+29 9
+Ġval ve
+Ġbr ace
+Ġperspect ives
+c ert
+f act
+icular ly
+ĠMc N
+pl anes
+Ġint ric
+Ġpe as
+ov an
+Ġtoss ed
+ret ch
+ĠL opez
+Ġunf amiliar
+de ath
+ĠA part
+ĠCh ang
+Ġrelie ved
+rop he
+Ġair ports
+Ġfre ak
+ut il
+M ill
+ĠCh in
+ĠOw en
+m ale
+ĠBro ken
+ĠWind s
+ro b
+r ising
+Ġfire fighters
+Ġauthor itarian
+Ġ14 8
+Bit coin
+ex ternal
+Ġbrow sers
+iche ver
+or ian
+Ġun b
+Ġpo ke
+ĠZ ot
+M id
+ĠPop ular
+Ġco vert
+Ġcont ributes
+Ġ6 50
+Ġcont ention
+G ate
+Ġcons oles
+Ġchrom os
+ĠI X
+Ġvis ually
+ĠE isen
+Ġjewel ry
+Ġdeleg ation
+Ġacceler ate
+ĠR iley
+Ġsl ope
+Ġind oor
+it ially
+Ġhuge ly
+Ġtun nels
+Ġfin ed
+Ġdirect ive
+Ġfore head
+ustom ed
+Ġsk ate
+Mus ic
+g as
+Ġrecogn izing
+am bo
+Ġover weight
+ĠGr ade
+Ù Ĭ
+Ġsound ing
+Ġlock ing
+ĠR EM
+St ore
+Ġexc av
+ĠLike wise
+ĠL ights
+Ġel bow
+ĠSupp ly
+w ic
+Ġhands ome
+19 94
+C oll
+Ġadequ ately
+ĠAssoci ate
+Ġstri ps
+Ġcrack down
+Ġmar vel
+ĠK un
+Ġpass ages
+@@ @@
+ĠT all
+Ġthought ful
+names e
+Ġprost itution
+bus iness
+Ġball istic
+person al
+c ig
+iz ational
+R ound
+ĠÂłĠÂł ĠÂłĠÂł
+ĠCole man
+Ġadm itting
+ĠPl ug
+Ġbit coins
+ĠSu z
+Ġfair ness
+Ġsupp lier
+Ġcatast rophic
+ĠHel en
+o qu
+M arc
+ĠArt icles
+g ie
+Ġend angered
+Ġdest iny
+ĠVol t
+ol ia
+ax is
+Ġche at
+Ġun ified
+IC O
+qu ote
+30 2
+ĠS ed
+Ġsupp ression
+Ġanaly zing
+Ġsqu at
+Ġfig uring
+Ġcoordin ates
+Ġch unks
+Ġ19 46
+Ġsub p
+Ġw iki
+ĠFor bes
+ĠJ upiter
+ĠE rik
+im er
+ĠCom mercial
+\ )
+Ġlegitim acy
+Ġd ental
+ĠMe an
+Ġdefic its
+5 50
+Orig inally
+ĠHor ror
+Ġcontam ination
+ll ah
+Ġconf isc
+ĠCl are
+T B
+ĠF ailed
+an ed
+Ġrul er
+ĠCont roller
+Ġfemin ists
+F ix
+g ay
+20 7
+Ġr abbit
+Th ird
+ownt own
+Ġgl ue
+Ġvol atile
+Ġsh ining
+Ġf oll
+Ġimp aired
+Ġsup ers
+æ Ī
+Ġcl utch
+ļé ĨĴ
+Ġpro let
+Ġ( !
+Ġy elled
+ĠK iev
+ĠEr n
+ĠSh ock
+K B
+Ġsit uated
+qu ery
+ĠN as
+Ġan nex
+char acter
+ĠHol iday
+Ġautom ation
+ĠJ ill
+ĠRem astered
+Ġl inem
+Ġwild erness
+ĠHor izon
+ĠGu inea
+A Z
+Ġmain land
+Ġsec recy
+LE ASE
+Ġp unk
+ĠProv ince
+( ),
+Spe ed
+Ġhand ing
+ĠSeb ast
+S ir
+r ase
+Ġj ournals
+Ġcon gest
+ĠT ut
+ir rel
+Ġschizophren ia
+Ġmis ogyn
+health y
+I ron
+Ġreact ed
+- $
+25 2
+Ġpl ural
+Ġpl um
+Ġbarg ain
+Ġground ed
+f inder
+Ġdis se
+ĠL az
+O OD
+Ġat roc
+F actory
+Ġmin ions
+Ġo ri
+ĠB rave
+ĠP RE
+ĠMy anmar
+ĠH od
+Ġexped ition
+Ġexpl ode
+ĠCo ord
+Ġext r
+ĠB rief
+ĠAD HD
+Ġhard core
+feed ing
+Ġd ile
+ĠF ruit
+Ġvacc ination
+ĠM ao
+osp here
+Ġcont ests
+- |
+Ġf ren
+isp here
+R om
+ĠSh arp
+ĠTre nd
+Ġdis connect
+âĢ¢ âĢ¢
+Ġper secution
+Ear th
+Ġhealth ier
+38 4
+Ġc ob
+ĠTr inity
+OW S
+AN N
+Ġspecial ty
+Ġg ru
+Ġcooper ative
+wh y
+Start ing
+ĠIss ues
+st re
+ens or
+Ġ18 5
+Ad v
+! ?
+ĠRe vel
+em ia
+ĠH ulk
+Ġcelebr ations
+ĠS ou
+ra ud
+ĠKle in
+Ġun real
+con text
+Ġpartners hips
+Ġadop ting
+t ical
+Ġspl ash
+ĠHe zbollah
+c ategory
+cycl op
+xt on
+ĠD ot
+urd y
+t z
+Ġenvelop e
+ĠN L
+â ķ
+Ġwhere in
+Spe c
+18 4
+Ġte lev
+al iation
+Ġmyth s
+å °
+Ġrig orous
+Ġcommun icating
+Ġobser ver
+Ġre he
+ĠW ash
+Ġapolog ized
+ĠT in
+Ġexpend itures
+work ers
+d ocument
+Ġhes itate
+ĠLen in
+Ġunpredict able
+Ġrenew al
+cl er
+ok ia
+ĠCON T
+Ġpost season
+Tok ens
+Ġex acerb
+Ġbet ting
+Ġ14 7
+Ġelev ation
+W ood
+ĠSol omon
+19 4
+00 4
+out put
+Ġredu nd
+ĠM umbai
+Ġp H
+Ġreprodu ce
+ĠD uration
+MA X
+Ġb og
+C BS
+ĠBal ance
+ĠS gt
+ĠRec ent
+Ġc d
+Ġpo pped
+Ġincomp et
+pro p
+ay an
+g uy
+Pac ific
+Ġty r
+Ġ{ {
+ĠMy stic
+ĠD ana
+Ġmast urb
+Ġge ometry
+Ã ¢
+ĠCor rect
+Ġtraject ory
+Ġdistract ed
+Ġf oo
+ĠW elsh
+L uc
+m ith
+Ġrug by
+Ġrespir atory
+Ġtri angle
+Ġ2 15
+Ġunder graduate
+ĠSuper ior
+ch anging
+_ -
+Ġright ly
+Ġrefere e
+Ġluc rative
+Ġun authorized
+Ġresemb les
+ĠGN U
+ĠDer by
+Ġpath ways
+ĠL ed
+Ġend urance
+Ġst int
+Ġcollect or
+F ast
+Ġd ots
+Ġnational s
+ĠSec urities
+Ġwh ip
+Par am
+Ġlearn s
+M agic
+Ġdetail ing
+m oon
+Ġbroadcast ing
+Ġb aked
+26 5
+hol m
+ĠS ah
+ĠHus sein
+ĠCourt esy
+17 4
+Ġ14 6
+Ġge ographic
+pe ace
+Ġjud ging
+ĠS tern
+B ur
+Ġstory line
+G un
+ĠSt ick
+24 5
+30 7
+ãĤ´ ãĥ³
+ĠAdminist rator
+Ġbur nt
+Ġp ave
+ch oes
+Ex ec
+Ġcamp uses
+Res ult
+Ġmut ations
+ĠCh arter
+Ġcapt ures
+Ġcomp ares
+Ġbad ge
+S cient
+Ġer ad
+ier y
+o i
+ett es
+ĠE state
+Ġst rap
+Ġproud ly
+Ġf ried
+Ġwithd rawn
+ĠV oy
+ph ony
+It ems
+ĠP ierce
+b ard
+Ġann otation
+ant on
+ill on
+Im pro
+... )
+Ġhapp ier
+---- --
+ad just
+Ġstaff ers
+Ġactiv ism
+Ġper f
+Ġal right
+N eed
+Ġcomm ence
+Ġopio id
+ĠAm anda
+E s
+ĠP ars
+ĠK aw
+W orks
+24 8
+Ġind o
+t c
+end ant
+ĠM oto
+Ġlegal ization
+OT E
+Ġtask ed
+Ġt sp
+ĠACT IONS
+16 6
+Ġrefres hing
+ĠN R
+ĠPere z
+Ġinfring ement
+S Y
+List en
+in ning
+k u
+Ġrot ate
+pro gram
+ar ah
+Des ign
+Ġ( Â£
+Ġst oring
+Ġwar rants
+Ġjud gement
+ĠB rist
+us ually
+ph oto
+ĠR an
+ĠP ine
+Ġoutrage ous
+ĠValent ine
+lu ence
+ĠEvery body
+Al tern
+Ġrele vance
+Ġtermin ated
+Ġd essert
+Ġfulf illed
+Ġprosecut ed
+ĠW ords
+Ġm igrant
+Ġcultiv ation
+ÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤ ÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤ
+idel ity
+ĠV ern
+ĠLog in
+Ġmetaph or
+ĠT ip
+Ġrecru its
+ĠP ig
+rib ing
+Ġenthusi asts
+ex per
+Ġfright ening
+ĠH air
+ans on
+str ate
+Ġh i
+He ight
+Ġown ing
+n one
+Ġdis like
+Ġkn ives
+pher d
+Ġloud ly
+ĠAP Is
+Dis play
+ĠL ac
+ĠUS S
+ab l
+ver ages
+J ew
+Ġ17 2
+ĠHist orical
+at oon
+ĠPhys ics
+in tern
+Ġwarm th
+Ġto pp
+D M
+Ġgun man
+Ġem peror
+od i
+ãĥ £
+in atory
+ĠR ib
+Ġ13 1
+ĠSat urn
+ĠSh ining
+Ġw aking
+Qu otes
+Ġcomed ian
+en berg
+Â ½
+Ġbelie vers
+Ġpaper work
+c ustom
+Ġle v
+Ġl ament
+Ġpour ing
+22 2
+p olitical
+ĠSupp lement
+m aid
+Ġcruel ty
+Ġt read
+ys ics
+A w
+rit es
+Ġmod ifier
+ĠP osition
+Ad am
+l b
+ub s
+Ġimper fect
+Ġcl usters
+ĠEngine er
+ĠC herry
+Ġinaug uration
+ĠS au
+Ġembod iment
+ĠUn cle
+Ġover r
+Ġexplos ions
+c ule
+ĠPrinc eton
+ĠAndre a
+Ġincorrect ly
+Ġearn est
+Ġpil gr
+ĠS print
+Ġslee ve
+Ġhe ars
+ĠAm azing
+Ġbrow sing
+ag in
+Ġhom eland
+Ġha w
+Ġd iving
+ist ered
+17 8
+Ġbarg aining
+ĠArc ade
+Ġdeleg ate
+ters on
+................................ ................................
+ĠJackson ville
+27 5
+Ġst agn
+Ġad am
+ĠSher man
+C B
+Ġsub urb
+ĠFood s
+Ġconver ting
+ĠAr ist
+Ġch ambers
+l ove
+Ġam ino
+ĠG an
+Ġmad ness
+m c
+ĠUS E
+def ined
+Ġul tr
+ind ust
+Ġw olves
+l ance
+Add itionally
+Ġcr acks
+as ia
+ĠRe ason
+ĠP ump
+Ġaccident al
+ĠL aser
+ĠR id
+Ġinitial ized
+ell i
+Ġun named
+Ġn oun
+ĠPass ed
+Ġhost age
+ĠEth iop
+sh irts
+Ġun rel
+ĠEmb assy
+Ġ19 41
+Ġat oms
+Ġpur ported
+16 4
+ĠF i
+Ġgall ons
+ĠMon ica
+Ġp g
+en ment
+Ġsort ed
+ĠG ospel
+Ġhe ights
+Ġtr aced
+Ġunder going
+She ll
+Ġs acks
+Ġproport ions
+Ġhall uc
+F ont
+ac et
+Ġwar mer
+ĠIN TER
+Ġgrab bing
+Pl ug
+Ġreal ization
+ĠBur ke
+Ġen chant
+AT ER
+ĠSe ed
+Ġabund ant
+F M
+Ġc ivic
+V s
+is i
+Ġv ow
+Ġre per
+ĠPartners hip
+Ġpenet ration
+Ġax e
+Ġsh attered
+ĠZ ombies
+Ġv inyl
+ĠAl ert
+e on
+Ġoblig ed
+ĠIll ust
+ĠPl aza
+ĠFront ier
+Ġdavid jl
+ĠSer ial
+ĠH av
+ĠNut rition
+B i
+Ġâĸ Ī
+ĠJ ays
+lin ux
+Ġhur ry
+Ġv oy
+Ġhop eless
+ĠSte alth
+Ġ ãģ
+ess ors
+tt le
+b org
+ĠSaf ari
+f ell
+Ġw ary
+d ue
+ĠAb ove
+H a
+E LL
+Ġnot or
+ĠW on
+T oo
+Ġoccup ations
+Ġposs essions
+Ġinv iting
+Ġpred ators
+Ġacceler ated
+Ġ15 7
+uter te
+ĠC ube
+e ast
+acc ount
+G ive
+Ġtrans plant
+red ients
+id able
+Ġscreens hots
+ĠG und
+ĠF S
+Ġtravel ers
+Ġsens ory
+ĠF iat
+ĠRock ets
+İ ĭ
+_ {
+F riend
+Ġchar ming
+AL S
+Ġenjoy ment
+m ph
+Ġ5 000
+ĠRE G
+Ù Ĩ
+b ia
+Ġcomp ilation
+ro st
+ĠV P
+ĠSch ne
+201 9
+Ġcop ying
+M ORE
+ĠFl ore
+f alls
+2 15
+t otal
+Ġdis ciples
+d ouble
+Ġexceed ing
+Ġsm ashed
+Ġconcept ual
+ĠRom ania
+ĠB rent
+ĠI CE
+ĠT ou
+Ġg rap
+Ġn ails
+18 9
+ãĥ ĺ
+Ġproc ure
+e ur
+Ġconfir ming
+ĠC ec
+aw i
+ĠEd en
+Ġn g
+Ġengine ered
+at ics
+Ġhook ed
+Ġdisgust ing
+ĠMur der
+ãĤ ¿
+L ibrary
+Ġ16 8
+Al most
+hem atic
+Men u
+ĠNot re
+ĠJ ur
+Ġkidn apped
+Ġhack er
+ĠJ ade
+Ġcreep y
+Ġdraw ings
+ĠSpons or
+Ġcycl ists
+ĠGob lin
+Ġoptim ized
+Ġst aged
+ĠMc D
+bet ween
+A ge
+en o
+S ex
+ĠW ide
+n ings
+av is
+Ġincap able
+ĠK ob
+Ġreward ing
+ĠL one
+oles cent
+Ġcontract ed
+Ġstick y
+J ose
+B all
+f est
+ĠIn put
+ĠRec ently
+Ġto mat
+squ are
+App lication
+Ġnit rogen
+Ġdupl icate
+ĠRec on
+ĠD ear
+L ondon
+Ġint ra
+Ġd ock
+Ġout reach
+ĠM illion
+Ġmamm als
+am pton
+V AL
+Ġsn aps
+Ġd os
+ĠWh ole
+ĠRead y
+T ry
+ĠWinn ipeg
+ear ance
+Ġinc urred
+ren ched
+ĠNS W
+il ot
+rain e
+Ġc ube
+g ot
+Ġrun way
+etermin ed
+ĠHaw ks
+Ġsurviv or
+ĠW ish
+ĠD in
+ĠDE F
+ĠV ault
+18 7
+Ġmush rooms
+Ġcris p
+be y
+ĠDisco very
+Ġdevelopment al
+Ġparad igm
+Ġcha otic
+ĠT su
+Ġ3 33
+b ons
+Ġbacter ial
+Ġcomm its
+Ġcos mic
+Ġme ga
+oc ative
+ĠP aint
+ophob ic
+Ġv ain
+Ġcar ved
+ĠTh ief
+ĠG ul
+ows hip
+Ġc ites
+ĠEd inburgh
+Ġdimin ished
+Ġacknowled ges
+ĠK ills
+Ġmic row
+ĠHer a
+Ġsen iors
+Ġwhere by
+H op
+at ron
+Ġun available
+ĠN ate
+Ġ4 80
+Ġsl ated
+ĠRe becca
+ĠB attery
+Ġgram mar
+Ġhead set
+Ġcurs or
+Ġex cluding
+any e
+aunder ing
+eb in
+Ġfeas ible
+ĠPub lishing
+ĠLab s
+ĠCl iff
+ĠFerr ari
+Ġp ac
+vis ible
+mark ed
+pe ll
+Ġpol ite
+Ġstagger ing
+ĠGal actic
+Ġsuper st
+Ġpar an
+ĠOffic ers
+ãĢ ģ
+Ġspecific s
+ul us
+23 9
+ĠP aste
+AM P
+ĠPan ama
+ĠDe lete
+angu ard
+rest rial
+Ġhero ic
+ĠD y
+Ø§ ÙĦ
+Ġincumb ent
+Ġcr unch
+t ro
+Ġsc oop
+Ġblog ger
+Ġsell ers
+ure n
+Ġmedic ines
+ĠC aps
+ĠAnim ation
+ox y
+Ġout ward
+Ġinqu iries
+22 9
+Ġpsych ologist
+ĠS ask
+ev il
+Ġcontam inated
+ãĤ ¨
+he rence
+Ġbrand ed
+ĠAbd ul
+z h
+Ġparagraph s
+Ġmin s
+Ġcor related
+er b
+Ġimp art
+Ġmil estone
+ĠSol utions
+ot le
+Ġunder cover
+Ġmar ched
+ĠCharg ers
+f ax
+ĠSec rets
+Ġr uth
+we ather
+Ġfemin ine
+Ġsh am
+Ġprest igious
+igg ins
+Ġs ung
+hist ory
+ett le
+gg ie
+Ġout dated
+ol and
+Ġper ceptions
+ĠS ession
+ĠDod gers
+u j
+ĠE ND
+D oc
+Ġdefic iency
+Gr and
+ĠJ oker
+Ġretro spect
+Ġdiagn ostic
+Ġharm less
+Ġro gue
+ĠA val
+E qu
+Ġtrans c
+ĠRoberts on
+ĠDep ending
+ĠBurn s
+iv o
+Ġhost ility
+F eatures
+ĵ ĺ
+Ġdis comfort
+ĠL CD
+spec ified
+ĠEx pect
+3 40
+Ġimper ative
+ĠReg ular
+Ch inese
+Ġstate wide
+Ġsy mm
+Ġlo ops
+Ġaut umn
+N ick
+Ġsh aping
+Ġqu ot
+Ġc herry
+ĠCross ref
+è¦ ļéĨĴ
+Stand ard
+he ed
+ĠD ell
+ĠViet namese
+Ġo st
+ĠV alkyrie
+O A
+Ass ad
+Ġreb ound
+ĠTra ffic
+pl aces
+æ ĺ
+ĠB uc
+17 2
+Ġshel ters
+Ġins isting
+ĠCertain ly
+ĠKenn eth
+ĠT CP
+Ġpen al
+ĠRe play
+he ard
+Ġdial ect
+iz a
+ĠF Y
+it cher
+ĠD L
+Ġspir al
+Ġquarterback s
+Ġh ull
+Ġgo ogle
+Ġto dd
+ĠSter ling
+ĠPl ate
+Ġsp ying
+mb ol
+ĠReal m
+ĠPro ced
+ĠCr ash
+Ġtermin ate
+Ġprotest ing
+C enter
+gu ided
+Ġun cover
+Ġboy cott
+Ġreal izes
+s ound
+Ġpret ending
+ĠV as
+19 80
+Ġfram ed
+Ġ13 9
+Ġdesc ended
+Ġrehab ilitation
+Ġborrow ing
+ĠB uch
+Ġbl ur
+R on
+ĠFro zen
+en za
+Ch ief
+ĠP oor
+Ġtransl ates
+M IN
+Ġ2 12
+J ECT
+Ġerupt ed
+Ġsuccess es
+S EC
+Ġpl ague
+Ġg ems
+d oms
+Ġstret ches
+ĠSp y
+Ġstory telling
+C redit
+ĠP ush
+Ġtra ction
+Ġin effective
+ĠL una
+Ġt apes
+Ġanaly tics
+erc ise
+Ġprogram mes
+ĠCar bon
+Ġbeh old
+he avy
+ĠConserv ation
+ĠF IR
+Ġs ack
+ter min
+ric ks
+Ġhous ed
+Ġunus ually
+I ce
+Ġexecut ing
+ĠMor oc
+ed ay
+Ġed itions
+Ġsm arter
+ĠB A
+Ġout law
+Ġvan ished
+ib a
+AL SE
+ĠSil va
+23 8
+C ould
+Ġphilos opher
+Ġevac uated
+Sec ret
+14 2
+Ġvis as
+ãĤ ¬
+ĠM alt
+ĠClear ly
+ĠN iger
+ĠC airo
+ĠF ist
+3 80
+ĠX ML
+aut o
+it ant
+Ġrein forced
+Rec ord
+ĠSurviv or
+G Hz
+Ġscrew s
+parent s
+Ġo ceans
+ma res
+Ġbra kes
+vas ive
+Ġhell o
+ĠS IM
+rim p
+Ġo re
+ĠArm our
+24 7
+Ġterr ific
+Ġt ones
+14 1
+ĠMin utes
+Ep isode
+Ġcur ves
+Ġinflamm atory
+Ġbat ting
+ĠBeaut iful
+L ay
+Ġunp op
+v able
+Ġr iots
+ĠTact ics
+b augh
+ĠC ock
+Ġorg asm
+ĠS as
+Ġconstruct or
+et z
+G ov
+Ġant agon
+Ġthe at
+Ġde eds
+ha o
+c uts
+ĠMc Cl
+Ġu m
+ĠScient ists
+Ġgrass roots
+ys sey
+"] =>
+Ġsurf aced
+Ġsh ades
+Ġneighb ours
+Ġad vertis
+oy a
+Ġmer ged
+Up on
+Ġg ad
+Ġanticip ate
+Any way
+Ġsl ogan
+Ġdis respect
+I ran
+ĠT B
+act ed
+Ġsubp oen
+medi ately
+OO OO
+Ġwa iver
+Ġvulner abilities
+ott esville
+ĠHuff ington
+J osh
+ĠD H
+M onday
+ĠEll en
+K now
+x on
+it ems
+22 8
+Ġf ills
+ĠN ike
+Ġcum ulative
+and als
+I r
+Ġ ì
+Ġfr iction
+ig ator
+Ġsc ans
+ĠVi enna
+ld om
+Ġperform ers
+P rim
+Ġb idding
+M ur
+Ġlean ed
+ĠPri x
+al ks
+Ġ[ âĢ¦]
+ĠTw itch
+ĠDevelop er
+ĠG ir
+Ġcall back
+Ab stract
+Ġacc ustomed
+Ġfreed oms
+ĠP G
+ur acy
+Ġl ump
+is man
+,, ,,
+19 92
+ĠR ED
+Ġwor m
+M atch
+ĠPl atinum
+I J
+ĠOwn er
+Tri via
+com pl
+Ġnew born
+Ġfant as
+O wn
+Ġ19 59
+Ġsymp ath
+Ġub iqu
+Ġoutput s
+Ġal lev
+Ġpr ag
+K evin
+Ġfav ors
+Ġbur ial
+Ġn urt
+so lete
+c ache
+Ġ15 6
+Ġunl ocks
+te chn
+M aking
+Ġcon quer
+ad ic
+æ ĸ
+Ġel f
+Ġelect orate
+ĠKurd s
+ĠSt ack
+ĠSam urai
+Ġâ ĺħ
+Ġ{ }
+ĠS aid
+ĠFall out
+Ġkind ness
+ĠCustom s
+ĠBou levard
+Ġhelicop ters
+ot ics
+ĠVe get
+com ment
+Ġcritic ised
+Ġpol ished
+ĠRem ix
+ĠC ultural
+Ġrec ons
+Ġdo i
+at em
+Sc reen
+Ġbar red
+Com ments
+ĠGener ally
+Ġsl ap
+7 20
+V ari
+p ine
+Ġem pt
+Ġh ats
+ĠPlay ing
+l ab
+a verage
+form s
+ĠC otton
+Ġcan s
+ĠD ON
+ĠSom alia
+C rypt
+ĠIncre ases
+E ver
+mod ern
+Ġsur geon
+3 000
+Ġrandom ized
+================================ ================================
+B ern
+im pl
+ĠC OR
+Ġpro claim
+th ouse
+Ġto es
+Ġam ple
+Ġpres erving
+Ġdis bel
+gr and
+B esides
+Ġsil k
+ĠPat tern
+h m
+Ġenter prises
+Ġaffidav it
+ĠAdvis ory
+Ġadvert ised
+ĠRel igious
+se ctions
+psy ch
+ĠField s
+aw ays
+Ġhasht ag
+ĠNight mare
+Ġv ampire
+Ġfore nsic
+rosso ver
+n ar
+Ġn avy
+Ġvac ant
+ĠD uel
+Ġhall way
+Ġface book
+ident ally
+ĠN RA
+Ġm att
+Ġhur ricane
+ĠKir by
+ĠP uzzle
+Ġsk irt
+ou st
+du llah
+Ġanal ogy
+in ion
+Ġtomat oes
+ĠN V
+ĠPe ak
+ĠMe yer
+Ġappoint ments
+Ġm asc
+Ġal ley
+re hend
+Ġchar ities
+Ġund o
+Ġdest inations
+ĠTest ing
+"> </
+Ġdest ined
+Ġimp lements
+ĠHar old
+RE CT
+Ġoptim ization
+Ġkilomet res
+Ġc md
+Ġimpair ment
+Ġun successful
+Ġswift ly
+ĠGlas gow
+art en
+ĠSh ares
+ĠAn swer
+ĠAl bum
+Ġnut ritional
+ãĥ ĸ
+ĠF ut
+Ġbl oc
+ĠN FC
+Ġwholes ale
+ĠC W
+Ġneg lected
+Ġlaun cher
+Ġannounce ments
+OU LD
+com b
+Ġrot ating
+Ġrest s
+ĠT icket
+ched el
+L ou
+ĠV ic
+Ġ" '
+Ġtem plates
+Ġrepl aces
+Ar c
+:: ::
+ĠGil bert
+Ġillness es
+Ġsched ules
+Ġheter osexual
+L INE
+Ġhere in
+Ġco erc
+Ġdecre asing
+Ġde portation
+s udo
+ĠInd igenous
+Ġweigh s
+Al ong
+' );
+ĠBeng als
+70 7
+Ġjoint s
+ver ts
+Ġ14 9
+na ire
+Ġsimpl est
+Ġl ore
+10 80
+f iction
+ĠDat abase
+Ġreserv ation
+Ġs ou
+Ġsan ctuary
+aud io
+ap le
+Ġveget arian
+Ġanticip ation
+m icro
+Ġend uring
+Ġdepart ed
+Ġsidew alk
+Ġprohib its
+ĠF ont
+Ġcomp ute
+ĠS ect
+Ġ15 8
+B attle
+Ġbom ber
+Ġdist raction
+Ġend ured
+Ġpractition ers
+Ġdistur bed
+Ġdr ank
+ord ered
+Ġsurpr ises
+se at
+Sec urity
+ĠW isdom
+og o
+Ġsub paragraph
+ĠPen insula
+ĠOrig ins
+ire n
+ĠP av
+igg le
+Ġgrat itude
+ĠG ravity
+over ty
+im an
+ct r
+ĠCa esar
+c ould
+g em
+Ġsk ies
+Ġch amp
+Ġagree ing
+F amily
+D iv
+17 6
+Ġmess y
+um ption
+F ederal
+ern o
+ĠCh at
+Bey ond
+Ġdev ote
+ĠW alsh
+Ġdump ed
+Ġaccum ulation
+st ad
+hib ition
+Ġsm okers
+Ġinspect or
+F rench
+iss an
+ĠV ita
+Ġresearch ing
+R AM
+ĠCelt ics
+Ġcl oak
+ĠTer ra
+M ary
+so ld
+ĠD OM
+mod s
+Int el
+Ġmult itude
+ĠImpro ved
+Ġrel iance
+Ġartif act
+Ġalarm ing
+P rom
+h on
+T ION
+med ium
+Ġref lex
+ĠEx cel
+Ġweaken ed
+16 3
+2 24
+Ġcost umes
+Ġunique ly
+Ġs orrow
+Ġm ansion
+w p
+Ġsal v
+ĠGro ve
+bs p
+ĠSn iper
+ĠSh ipping
+ĠP OW
+Ġund is
+Ġbrand ing
+G irl
+ĠAh mad
+ĠL akes
+ĠCore y
+Ġinherit ance
+ener y
+Ġpack ing
+ĠP rest
+D est
+F W
+Ġregul ator
+l ocked
+Ġcont ested
+ĠMel issa
+ĠD uc
+Ġunpop ular
+Ġst acked
+Ġ19 17
+Ġyear ly
+Ġst are
+Ġassess ing
+Ã ¸
+Ġbe verages
+Ġcompet itions
+Ġstreng thening
+al ong
+ĠL ud
+Ġmel ted
+stan bul
+Ġb ounty
+EN C
+ĠL ands
+Ġdecl ares
+Ġcustom ize
+Ġcomp osite
+ãĥ ¬
+C M
+ograph ics
+ĠTem p
+Ġcont ender
+Ġins ign
+ĠL AN
+Ġdis asters
+ins pired
+Ġjud gments
+ustain able
+urs ion
+Ġvar iance
+ĠUlt imately
+Ġ --------
+u ador
+ĠR X
+Ġmel ting
+ĠExt ended
+ĠT we
+M ajor
+ĠB il
+Ġsy rup
+qu ick
+ĠHold er
+Ġinnoc ence
+U LE
+ĠM ight
+99 99
+Ġf al
+Ġcontinu ity
+Ġ19 53
+ĠB S
+st ill
+L at
+ĠAb use
+Ġun supported
+xxxx xxxx
+Ġinst itute
+Ġfrag ment
+ĠP ep
+W estern
+ĠC ause
+ĠFr ag
+ĠAr s
+à ¥
+ast ics
+Ġb ishop
+Ġcross es
+Ġ15 4
+ĠUp grade
+Ġmit igate
+ĠRay mond
+Mod s
+Ġtom ato
+Ġst umbled
+Ġdiff ers
+In itial
+ĠR aspberry
+Ġign ores
+Ġt ant
+Ã ł
+Ġrel ay
+Ġb isexual
+Ġconf ession
+Ġd ement
+in as
+ĠHe ather
+pl atform
+dri ving
+bour g
+ĠM ush
+Ġhy ster
+Det ails
+Ġdr ift
+ĠW ald
+ĠLuck ily
+or f
+Ġexp ire
+ĠP unch
+zy me
+g old
+Ġunp aid
+ĠT rent
+Ġun armed
+Ġill icit
+ĠT ottenham
+Ġsm ash
+Intern ational
+ink er
+Ġst ing
+ĠSadd am
+ĠAR T
+Ġtruth s
+b irth
+Ġso ber
+ĠN it
+Ġ ib
+Ġus able
+Ġst acks
+ĠSy lv
+Ġnort heast
+Ġdom ination
+ĠM our
+EN SE
+ĠMe asure
+Ġprogram mer
+Ġ< -
+18 2
+ĠCond ition
+Ġback yard
+ir ling
+ĠJ eb
+ĠCre ed
+ĠH ang
+ĠCOM P
+F ER
+ĠIs h
+Ġdetect ives
+------------ ---
+ĠMess enger
+Ġlo oph
+Ġgate way
+15 1
+ĠMaterial s
+ĠD T
+Ġdo omed
+od o
+Ġslic es
+Ġemail ed
+ĠPer l
+Ġren ov
+UT H
+ody nam
+ĠSouth west
+get ic
+ĠT PP
+Ġoptim ism
+ĠT ow
+ul ators
+prot ected
+y les
+Â «
+Ġex ile
+en v
+P rop
+ĠZimmer man
+Ù İ
+C a
+om aly
+ãĥ Ĩ
+Ġrail road
+L ee
+23 2
+Ġrepl icate
+Ġcomfort ably
+act ly
+Ġr av
+Ġtelesc ope
+Ġhonest y
+ĠPe pper
+ĠBr ing
+Ġric hest
+Ġout doors
+Ġh alls
+Ġcont end
+IS E
+Ġsub mitting
+Ġna ive
+ar ations
+Ġ14 3
+Ġpo ised
+respons ible
+Ġsoc ks
+ĠSk ull
+Quest ion
+Ġdiscover ies
+Jo ined
+ĠEn emies
+ĠWire less
+ĠRe venge
+Ġpuzz les
+Ġce ased
+29 0
+cript ions
+ĠCon sole
+Ġbo iling
+Ġdisc rep
+Ġded uction
+Ġar senal
+XX XX
+ĠAm sterdam
+rox imately
+ĠSh ane
+Ġpos ing
+ĠACL U
+ĠCompan ies
+Ġthe ology
+ĠU g
+qu arter
+ĠH ank
+Co in
+ĠL v
+Ġalleg ation
+ĠAv oid
+Ġindef initely
+Ġcommod ities
+Ġbr ig
+ĠMan it
+Ġt enth
+met hod
+ĠKn icks
+ĠâĢ İ
+Ġinv oked
+D ial
+AR A
+Ġc aucus
+22 7
+ĠJ ab
+Ġoun ces
+b ay
+Ġbud dy
+f an
+23 4
+ĠH il
+ad h
+ĠT Y
+ĠIN D
+Ġ19 39
+Ġiter ation
+ĠGonz alez
+ĠV ert
+ĠI O
+em b
+re ra
+en ch
+ĠRequ irements
+ĠW ins
+Ġlivest ock
+h ours
+" âĢ¦
+b ral
+M arg
+ĠD one
+Ġwas ting
+ing ed
+g roups
+Ġw ishing
+ĠT umblr
+Ġt apping
+Ġnational ism
+ĠB yr
+Ġsqu ares
+ĠAct ions
+ãĥ ¥
+In side
+deb ug
+Ġapp end
+Ġstub born
+ĠC ind
+T ell
+Ġt earing
+ĠRe y
+or c
+ĠDay ton
+ĠN H
+ĠMad ness
+Ch arl
+ĠMor rison
+fil ter
+Ġacc use
+Ġ. /
+Ġtor rent
+Ġdecl ines
+g allery
+M ine
+Ġneg otiation
+ĠBash ar
+op ia
+19 93
+em ort
+ĠNo vel
+ĠF ang
+ers ive
+ĠInst ant
+Ġroll er
+A round
+ĠElect ions
+G ames
+Ġin expensive
+Ġwor s
+Ġv ul
+ĠH ole
+Ġunbeliev able
+Ġn ause
+Ġent r
+bo at
+ĠST E
+Ġbus h
+ĠHass an
+Ġw o
+Ġpa used
+ĠM ig
+l ived
+Ġsc out
+Ġl ith
+Pub lished
+du ino
+c ool
+Ġcirc ulating
+id as
+ĠP am
+viol ent
+ĠCraw ford
+udd le
+ĠLet ters
+Gu ard
+mor ph
+Ġwand ering
+Ġsoph omore
+Ġque er
+ĠBl ind
+r ue
+ĠMar riage
+D om
+Ġpadd ing
+Ġfold ers
+Ġmeaning less
+Ġcandid acy
+af ort
+Ġwhistle bl
+ĠIdent ified
+Ġcig ar
+Ġh id
+ĠDub ai
+Ġpost ure
+Ġh iking
+ĠTermin al
+Legend ary
+ĠT P
+ĠAT K
+ĠStar bucks
+ĠR iot
+19 91
+ĠBott om
+e ffic
+ĠEug ene
+ĠWy oming
+ĠRock y
+Ġsal mon
+Ġmet ro
+Ġb ilateral
+Ġcelebr ates
+L ength
+b illion
+B at
+Ġre leg
+Ġpse udo
+D T
+ĠRh ode
+P arent
+ple tion
+Ġatt ribut
+Ġtun ing
+ĠNOT E
+ĠRe bel
+ic us
+F und
+Ġcock tail
+Ġ5 01
+Ġsp oon
+Ġbrut ality
+Ġun ite
+Ġmicro bi
+ĠRe ich
+pos itive
+Ġam azed
+ĠN T
+D esc
+ECT ION
+Ġfalse ly
+ĠHigh lander
+ĠC rist
+ĠVictor ian
+Ġdistribut ions
+the ir
+ĠE instein
+Ġp od
+Ġepid em
+Ġhe ap
+ĠR anch
+Ġan them
+Ġre app
+ĠAub urn
+Ġconc urrent
+ĠThrough out
+ĠP OST
+â ĺ
+Ġhom emade
+k ick
+B eg
+Ġch assis
+c ounter
+Ġmer ger
+Ġl aps
+2 17
+un ion
+ĠTr igger
+Ġdeb ated
+Ġsil ently
+Ġrest raint
+B al
+0000 000
+Ġform idable
+ĠFil ip
+Ġsacrific es
+F ood
+Ġdwar f
+ĠSe qu
+in ian
+More over
+Ġtang ible
+ops is
+ĠMine craft
+ĠRegist ration
+o an
+Ġrepresent ations
+Ġth irst
+Ġcor p
+ire ment
+M ade
+l oe
+> "
+c ats
+* .
+Ġgest ures
+gener al
+Le ague
+Ġpack ets
+ĠInspect or
+ĠBer g
+Ġfraud ulent
+Ġcritic ize
+F un
+Ġbl aming
+nd ra
+Ġsl ash
+ĠE ston
+Ġpropos ing
+Ġwh ales
+Ġtherap ist
+Ġsub set
+Ġle isure
+EL D
+ĠC VE
+ĠAct ivity
+Ġcul min
+sh op
+ĠD AY
+is cher
+ĠAdmir al
+ĠAtt acks
+Ġ19 58
+Ġmem oir
+Ġfold ed
+Ġsex ist
+Ġ15 3
+ĠL I
+Ġread ings
+Ġembarrass ment
+ĠEmploy ment
+w art
+ch in
+Ġcontin uation
+l ia
+Rec ently
+Ġd uel
+Ġevac uation
+ĠKash mir
+Ġdis position
+ĠR ig
+Ġbol ts
+Ġins urers
+4 67
+M ex
+Ġret aliation
+Ġmis ery
+Ġunre asonable
+r aining
+I mm
+ĠP U
+em er
+Ġgen ital
+ãĤ ³
+ĠC andy
+Ġon ions
+ĠP att
+lin er
+Ġconced ed
+Ġf a
+Ġfor c
+ĠH ernandez
+ĠGe off
+deb ian
+ĠTe ams
+Ġc ries
+Ġhome owners
+23 7
+A BC
+Ġst itch
+Ġstat istic
+Ġhead ers
+ĠBi ology
+Ġmot ors
+ĠG EN
+ĠL ip
+Ġh ates
+Ġhe el
+S elf
+i pl
+ED IT
+ort ing
+Ġann ot
+ĠSpe ech
+old emort
+ĠJ avascript
+ĠLe Bron
+Ġfoot print
+Ġf n
+Ġseiz ures
+n as
+h ide
+Ġ19 54
+ĠBe e
+ĠDecl aration
+ĠKat ie
+Ġreserv ations
+N R
+f emale
+Ġsatur ated
+Ġb iblical
+Ġtroll s
+Dev ice
+ph otos
+Ġdr ums
+ãĥīãĥ© ãĤ´ãĥ³
+N ight
+f ighter
+ĠH ak
+ri ber
+Ġc ush
+Ġdiscipl inary
+ba um
+ĠG H
+ĠSch midt
+ilib rium
+Ġs ixty
+ĠKush ner
+ro ts
+Ġp und
+ĠR ac
+Ġspr ings
+Ġcon ve
+Bus iness
+F all
+Ġqual ifications
+Ġvers es
+Ġnarc iss
+ĠK oh
+ĠW ow
+ĠCharl ottesville
+ed o
+Ġinterrog ation
+ĠW ool
+36 5
+B rian
+Ġâľ ĵ
+Ġalleg es
+ond s
+id ation
+ĠJack ie
+y u
+Ġl akes
+Ġworth while
+Ġcryst als
+ĠJud a
+Ġcomp rehend
+Ġfl ush
+Ġabsor ption
+ĠO C
+Ġfright ened
+ĠCh ocolate
+Mart in
+Ġbu ys
+Ġbu cks
+Ġapp ell
+ĠChampions hips
+Ġlist ener
+ĠDef ensive
+Ġc z
+ud s
+ĠM ate
+Ġre play
+Ġdecor ated
+Ġs unk
+ĠV IP
+ĠAn k
+Ġ19 5
+aa aa
+Nob ody
+ĠMil k
+ĠG ur
+ĠM k
+ĠS ara
+Ġse ating
+ĠW id
+Tr ack
+Ġemploy s
+Ġgig antic
+AP P
+ãĤ §
+in ventory
+Ġtow el
+at che
+l asting
+ĠT L
+Ġlat ency
+Ġkn e
+B er
+me aning
+Ġup held
+Ġplay ground
+Ġm ant
+S ide
+Ġstere o
+Ġnorth west
+Ġexception ally
+Ġr ays
+Ġrec urring
+D rive
+Ġup right
+Ġab duct
+ĠMar athon
+Ġgood bye
+Ġal phabet
+h p
+Ġcourt room
+ring ton
+ot hing
+T ag
+Ġdiplom ats
+Ġbar bar
+ĠAqu a
+18 3
+33 33
+Ġmat urity
+Ġinst ability
+ĠAp ache
+Ġ= ==
+Ġfast ing
+ĠGr id
+Mod Loader
+Ġ15 2
+A bs
+ĠOper ating
+ett i
+Ġacqu aint
+Don nell
+ĠK em
+ĠFor ge
+Ġarm ored
+M il
+Ġphilos ophers
+in vest
+Pl ayers
+â Ī
+Ġmy riad
+Ġcomr ades
+R ot
+Ġremember ing
+Ġcorrespond s
+Ġprogram mers
+ĠLyn n
+Ġo lig
+Ġco herent
+yn chron
+ĠChem ical
+Ġj ugg
+p air
+post s
+E ye
+ĠIn ner
+Ġsem ester
+ott est
+ĠEmir ates
+ric anes
+or ously
+m its
+ĠW is
+Ġd odge
+l ocation
+Ġf aded
+Am azon
+ĠPro ceed
+ĠIN FO
+j ournal
+ĠTru ck
+T en
+Ġ2 17
+Ġstat utes
+m obile
+ĠT ypes
+Rec omm
+b uster
+pe x
+Ġleg ends
+Ġhead ache
+f aced
+ĠWi Fi
+if ty
+ĠH ER
+Ġcirc uits
+ER ROR
+22 6
+ol in
+Ġcyl inder
+osp ace
+ik ers
+P rem
+Qu ant
+Ġconflic ting
+Ġslight est
+Ġfor ged
+ion age
+Step hen
+ĠK ub
+ĠOpp ortun
+ĠHe al
+Ġbl o
+Ġrul ers
+Ġh uh
+Ġsubmar ine
+f y
+ass er
+Ġallow ance
+ĠKas ich
+ĠT as
+ĠAustral ians
+Forge ModLoader
+ĠâĨ ĳ
+ĠMat rix
+am ins
+Ġ12 00
+ĠAc qu
+23 6
+D ocument
+ĠBre aking
+19 3
+ĠSub st
+ĠRoll er
+ĠPro perties
+ĠN I
+t ier
+Ġcr ushing
+Ġadvoc ating
+Further more
+keep ers
+Ġsex ism
+x d
+Ġcall er
+ĠS ense
+chie ve
+ĠT F
+Ġfuel ed
+Ġreminis cent
+Ġobs ess
+ur st
+Ġup hold
+ĠF ans
+het ics
+Ġâ Ĺ
+ĠB ath
+Ġbe verage
+Ġo scill
+25 4
+Ġpol es
+Ġgrad ual
+Ġex ting
+ĠS uff
+ĠS uddenly
+Ġlik ing
+Ġ19 49
+un ciation
+am ination
+ĠO mar
+ĠL V
+ĠCon sequently
+Ġsynt hes
+ĠG IF
+Ġp ains
+Ġinteract ing
+u ously
+inc re
+Ġrum or
+ĠScient ology
+19 7
+ĠZ ig
+Ġspe lling
+ĠA SS
+Ġexting u
+ms on
+Ġg h
+Ġremark ed
+ĠStrateg ic
+ĠM ON
+å ¥
+g ae
+ĠWH AT
+E ric
+ĠCamp us
+Ġmeth ane
+Ġimag in
+J UST
+ĠAl m
+X T
+i q
+ĠR SS
+Ġwrong doing
+att a
+Ġbig ot
+Ġdemonstr ators
+ĠCal vin
+ĠV illa
+Ġmembr ane
+ĠAw esome
+Ġbenef ic
+26 8
+Ġmagn ificent
+ĠL ots
+G reg
+ĠBor is
+Ġdetain ees
+ĠH erman
+Ġwhis pered
+Ġa we
+Prof essor
+fund ing
+Ġphys iological
+ĠDest ruction
+Ġlim b
+Ġmanip ulated
+Ġbub bles
+Ġpse ud
+Ġhyd ra
+ĠBrist ol
+Ġst ellar
+ĠExp ansion
+ĠK ell
+ĠInterest ingly
+Ġm ans
+Ġdrag ging
+Ġec ological
+ĠF it
+Ġg ent
+Ġbenef ited
+ĠHait i
+Ġpoly g
+ãĥ İ
+Ġ20 30
+Ġpro w
+Ġrecon struction
+Ġwas t
+Ġpsych ic
+ĠGree ks
+Hand ler
+16 2
+ĠP ulse
+Ġsol icit
+Ġsy s
+Ġinflu x
+ĠG entle
+per cent
+Ġprolifer ation
+Ġtax able
+Ġdisreg ard
+Ġesc aping
+Ġg inger
+Ġwith stand
+Ġdevast ated
+ĠD ew
+ser ies
+Ġinject ed
+ela ide
+Ġturn over
+he at
+Ļ Ĥ
+H appy
+ĠSil ent
+ãĤ Ń
+iv ism
+Ġir rational
+AM A
+Ġre ef
+r ub
+Ġ16 2
+Ġbank ers
+ĠEth ics
+v v
+Ġcritic isms
+K n
+18 6
+M ovie
+ĠT ories
+Ġno od
+Ġdist ortion
+F alse
+od ore
+Ġt asty
+Res earch
+ĠU ID
+- )
+Ġdivor ced
+ĠM U
+ĠHay es
+ĠIs n
+ian i
+ĠH Q
+Ġ" #
+ign ant
+Ġtra umatic
+ĠL ing
+H un
+Ġsab ot
+on line
+r andom
+Ġren amed
+ra red
+K A
+d ead
+Ã© t
+ĠAss istance
+Ġse af
+++++ ++++
+Ġse ldom
+ĠWeb b
+Ġbo olean
+u let
+Ġref rain
+ĠDI Y
+ru le
+Ġshut ting
+Ġutil izing
+load ing
+ĠPar am
+co al
+oot er
+Ġattract ing
+ĠD ol
+Ġher s
+ag netic
+ĠRe ach
+im o
+Ġdisc arded
+ĠP ip
+01 5
+Ã¼ r
+Ġm ug
+Im agine
+C OL
+Ġcurs ed
+ĠSh ows
+ĠCurt is
+ĠSach s
+spe aking
+ĠV ista
+ĠFram ework
+ong o
+Ġsub reddit
+Ġcr us
+ĠO val
+R ow
+g rowing
+Ġinstall ment
+Ġgl ac
+ĠAdv ance
+EC K
+ĠLGBT Q
+LE Y
+Ġac et
+Ġsuccess ive
+ĠNic ole
+Ġ19 57
+Qu ote
+Ġcircumst ance
+ack ets
+Ġ14 2
+ort ium
+Ġguess ed
+ĠFr ame
+Ġperpet rators
+ĠAv iation
+ĠBen ch
+Ġhand c
+A p
+Ġ19 56
+25 9
+r and
+Net Message
+d in
+urt les
+h ig
+ĠV III
+ff iti
+ĠSw ords
+b ial
+Ġkidn apping
+dev ice
+Ġb arn
+ĠEl i
+auc as
+S end
+Con structed
+ĠÂ ½
+Ġneed les
+Ġad vertisements
+Ġv ou
+Ġexhib ited
+ĠFort ress
+As k
+B erry
+TY PE
+Ġcan cers
+ump ing
+ĠTerrit ory
+Ġpr ud
+Ġn as
+Ġathe ist
+Ġbal ances
+ãģ Ł
+ĠSh awn
+& &
+Ġland sc
+ĠR GB
+Ġpet ty
+Ġex cellence
+Ġtransl ations
+Ġpar cel
+ĠChe v
+E ast
+ĠOut put
+im i
+Ġamb ient
+ĠTh reat
+Ġvill ains
+Ġ5 50
+IC A
+Ġtall er
+Ġle aking
+c up
+Ġpol ish
+Ġinfect ious
+ĠK C
+Ġ@ @
+back ground
+Ġbureaucr acy
+ĠS ai
+un less
+it ious
+ĠSky pe
+At l
+ID ENT
+00 8
+Ġhyp ocr
+Ġpit chers
+Ġguess ing
+ĠF INAL
+Bet ween
+Ġvill agers
+Ġ25 2
+f ashion
+ĠTun is
+Be h
+ĠEx c
+ĠM ID
+28 8
+ĠHas kell
+19 6
+ĠN OR
+Ġspec s
+Ġinv ari
+Ġgl ut
+ĠC ars
+Ġimp ulse
+Ġhon ors
+g el
+Ġjurisd ictions
+ĠBund le
+ul as
+Calif ornia
+ĠIncre ase
+Ġp ear
+Ġsing les
+Ġc ues
+Ġunder went
+ĠW S
+Ġexagger ated
+Ġdub ious
+Ġfl ashing
+L OG
+) ].
+J ournal
+t g
+V an
+ĠI stanbul
+ĠIn sp
+ĠFrank en
+D raw
+Ġsad ness
+Ġiron ic
+ĠF ry
+x c
+Ġ16 4
+is ch
+W ay
+ĠProtest ant
+h orn
+Ġun aff
+ĠV iv
+ill as
+ĠProduct ions
+ĠH ogan
+Ġper imeter
+ĠS isters
+Ġspont aneous
+Ġdown side
+Ġdescend ants
+Ġor n
+w orm
+Japan ese
+Ġ19 55
+Ġ15 1
+ĠDo ing
+els en
+umb les
+Ġrad ically
+ĠDr um
+ĠB ach
+Ġli abilities
+ĠO B
+ĠElement ary
+Ġmem e
+yn es
+Ġfinger print
+ĠGr ab
+Ġundert ake
+Mem bers
+ĠRead er
+ĠSim s
+g od
+Ġhypot hetical
+s cient
+ĠA J
+Ġchar ism
+Ġad missions
+ĠMiss ile
+tr ade
+Ġexerc ising
+ĠBack ground
+W ritten
+Ġvoc als
+whe ther
+Ġv i
+ĠW inner
+Ġl itter
+ĠSh ooting
+ST EM
+ãĤ ¡
+ĠA FL
+Ġvari ability
+Ġe ats
+ĠD PS
+b row
+Ġeleph ants
+Ġstr at
+Ġ Å
+Ġsett lers
+Matt hew
+Ġin advert
+H I
+ĠIM F
+ĠGo al
+Ġnerv es
+John son
+ey e
+ablish ment
+Th ursday
+BIL ITY
+H ad
+am oto
+het amine
+ep s
+Ġmit ochond
+Ġcomp ressed
+ĠTre vor
+ĠAnim als
+T ool
+L ock
+Ġtwe ak
+Ġpin ch
+Ġcancell ation
+P ot
+Ġfoc al
+ĠAst ron
+17 3
+ĠA SC
+ĠO THER
+umn i
+Ġdem ise
+d l
+Ù ħ
+Sem itism
+Ġcr acking
+Ġcollabor ative
+Ġexpl ores
+s ql
+Ġher bs
+Ġconfig urations
+m is
+ĠRes ult
+ace y
+ĠSm oke
+Ġsan ct
+el ia
+Ġdeg ener
+Ġdeep est
+Ġscream ed
+Ġn ap
+Soft ware
+ĠST AR
+E F
+ĠX in
+spons ored
+mans hip
+23 3
+Ġprim aries
+Ġfilter ing
+Ġas semble
+m il
+ĠMy ers
+b ows
+Ġpun ched
+M ic
+Ġinnov ations
+Ġfun c
+and o
+Ġfr acking
+ĠV ul
+Ð¾ Ð
+osh op
+ĠIm mun
+Ġsett ling
+Ġadolesc ents
+Ġreb uilding
+Ġtransform ing
+Ġpar ole
+Ġhar bor
+Ġbook ing
+ot ional
+onge vity
+ĠY o
+b ug
+Ġemer ges
+ĠMethod s
+ĠCh u
+P res
+ĠDun geons
+Ġtra iling
+ĠR um
+ĠH ugh
+å¤ ©
+ĠE ra
+ĠBatt les
+Res ults
+ĠTr ading
+Ġvers a
+c ss
+ax ies
+he et
+Ġgre ed
+19 89
+Ġgard ens
+Ġconting ent
+P ark
+ĠLeaf s
+h ook
+ro be
+Ġdiplom acy
+ĠF uel
+ĠInv asion
+Ġupgr ading
+M ale
+Ġe lic
+Ġrelent less
+ĠCo venant
+ap esh
+ĠT rop
+T y
+pro duction
+art y
+Ġpun ches
+ak o
+cyclop edia
+ĠR abbit
+ĠHD MI
+Ġ14 1
+Ġf oil
+Item Image
+ĠF G
+Ġimplement ations
+ĠP om
+ixt ures
+Ġaw ait
+Ġ3 30
+am us
+Ġumb rella
+Ġfore see
+se par
+Ġcircum cision
+Ġperipher al
+S ay
+ĠExper t
+In c
+Ġwithd rew
+ĠAnd ers
+f ried
+Ġradio active
+ĠOp ening
+Ġboard ing
+ĠN D
+Ġover throw
+Act iv
+W P
+ĠAct s
+× Ļ
+Ġmot ions
+v ic
+ĠM ighty
+ĠDef ender
+a er
+Ġthank ful
+ĠK illing
+ĠBr is
+mo il
+Ġpredict ing
+26 6
+ch oice
+Ġkill ers
+Ġinc ub
+ĠChe st
+ather ing
+Ġpro claimed
+fl ower
+oss om
+umbled ore
+ĠCy cling
+ĠOccup y
+AG ES
+P en
+ĠY ug
+Ġpack aged
+Ġheight ened
+c ot
+st ack
+C ond
+Ġst amps
+m age
+Ġpersu aded
+Ġens l
+ĠCard inal
+Ġsol itary
+Ġpossess ing
+ĠC ork
+Ġev id
+ĠT ay
+Ġbl ues
+Ġextrem ism
+Ġlun ar
+Ġcl own
+Te chn
+Ġfest ivals
+ĠPv P
+ĠL ar
+Ġconsequ ently
+p resent
+Ġsom eday
+ç İĭ
+ĠMet eor
+Ġtour ing
+c ulture
+Ġbe aches
+S hip
+c ause
+ĠFl ood
+ãĥ ¯
+Ġpur ity
+th ose
+Ġem ission
+b olt
+Ġch ord
+ĠScript ure
+L u
+Ġ$ {
+cre ated
+Other s
+25 8
+Ġelement al
+Ġannoy ed
+ĠA E
+d an
+ĠS ag
+Res earchers
+Ġfair y
+âĢĵ âĢĵ
+======== ====
+Sm art
+GG GG
+Ġskelet ons
+Ġpup ils
+link ed
+Ġur gency
+en abled
+ĠF uck
+Ġcoun cill
+r ab
+U AL
+T I
+Ġlif es
+Ġconf essed
+B ug
+Ġharm on
+ĠCON FIG
+ĠNe utral
+D ouble
+Ġst aple
+ĠSH A
+Brit ish
+ĠSN P
+AT OR
+oc o
+Ġswing ing
+ge x
+ole on
+pl ain
+ĠMiss ing
+ĠTro phy
+v ari
+ran ch
+Ġ3 01
+4 40
+00000000 00000000
+Ġrest oring
+Ġha ul
+uc ing
+ner g
+Ġfut ures
+Ġstrateg ist
+quest ion
+Ġlater al
+ĠB ard
+Ġs or
+ĠRhod es
+ĠD owntown
+????? -
+ĠL it
+ĠB ened
+Ġco il
+st reet
+ĠPort al
+FI LE
+ĠG ru
+* ,
+23 1
+ne um
+Ġsuck ed
+Ġr apper
+Ġtend encies
+ĠLaure n
+cell aneous
+26 7
+Ġbrow se
+Ġover c
+head er
+o ise
+Ġbe et
+ĠG le
+St ay
+Ġm um
+Ġtyp ed
+Ġdiscount s
+T alk
+ĠO g
+ex isting
+ĠS ell
+u ph
+C I
+ĠAust rian
+ĠW arm
+Ġdismiss al
+Ġaver ages
+c amera
+Ġalleg iance
+L AN
+=" #
+Ġcomment ators
+ĠSet ting
+ĠMid west
+Ġpharm ac
+ĠEX P
+Ġstain less
+Ch icago
+Ġt an
+24 4
+Ġcountry side
+ĠV ac
+29 5
+Ġpin ned
+Ġcr ises
+Ġstandard ized
+T ask
+ĠJ ail
+ĠD ocker
+col ored
+f orth
+" },
+Ġpat rons
+Ġsp ice
+Ġm ourn
+ĠM ood
+Ġlaund ry
+Ġequ ip
+ĠM ole
+y ll
+ĠTH C
+n ation
+ĠSher lock
+Ġiss u
+ĠK re
+ĠAmeric as
+ĠA AA
+Ġsystem atically
+Ġcont ra
+ĠS ally
+Ġrational e
+Ġcar riage
+Ġpe aks
+Ġcontrad iction
+ens ation
+ĠFail ure
+Ġpro ps
+Ġnames pace
+Ġc ove
+field s
+ãĤ ĭ
+Ġw ool
+ĠC atch
+Ġpresum ed
+ĠD iana
+r agon
+ig i
+Ġh amm
+Ġst unt
+ĠG UI
+ĠObserv atory
+ĠSh ore
+Ġsmell s
+ann ah
+Ġcock pit
+ĠD uterte
+8 50
+Ġopp ressed
+bre aker
+ĠCont ribut
+ĠPer u
+ĠMons anto
+ĠAtt empt
+Ġcommand ing
+Ġfr idge
+ĠR in
+ĠChe ss
+ual ity
+Ġo l
+Republic an
+ĠGl ory
+ĠW IN
+.... ...
+ag ent
+read ing
+Ġin h
+J ones
+Ġcl icks
+al an
+Ġ[ ];
+ĠMaj esty
+ĠC ed
+op us
+ate l
+Ã ª
+AR C
+ĠEc uador
+ãĥ ł
+ĠK uro
+Ġritual s
+Ġcapt ive
+Ġoun ce
+Ġdisag reement
+Ġsl og
+f uel
+P et
+M ail
+Ġexerc ised
+Ġsol ic
+Ġrain fall
+Ġdev otion
+ĠAss essment
+Ġrob otic
+opt ions
+ĠR P
+ĠFam ilies
+ĠFl ames
+Ġassign ments
+00 7
+aked own
+Ġvoc abulary
+Re illy
+Ġc aval
+g ars
+Ġsupp ressed
+ĠS ET
+ĠJohn s
+Ġwar p
+bro ken
+Ġstat ues
+Ġadvoc ated
+Ġ2 75
+Ġper il
+om orph
+ĠF emin
+per fect
+Ġh atch
+L ib
+5 12
+Ġlif elong
+3 13
+Ġche eks
+Ġnum bered
+ĠM ug
+B ody
+ra vel
+We ight
+ĠJ ak
+ĠHe ath
+Ġkiss ing
+ĠJ UST
+Ġw aving
+u pload
+Ġins ider
+ĠPro gressive
+ĠFil ter
+tt a
+ĠBe am
+Ġviol ently
+ip ation
+Ġskept icism
+Ġ19 18
+ĠAnn ie
+ĠS I
+Ġgen etics
+Ġon board
+at l
+ĠFried man
+ĠB ri
+cept ive
+Ġpir ate
+ĠRep orter
+27 8
+Ġmyth ology
+Ġe clipse
+Ġsk ins
+Ġgly ph
+ing ham
+F iles
+C our
+w omen
+Ġreg imes
+Ġphotograp hed
+K at
+ĠMA X
+Offic ials
+Ġunexpected ly
+Ġimpress ions
+F ront
+;;;; ;;;;
+Ġsuprem acy
+Ġs ang
+Ġaggrav ated
+Ġabrupt ly
+ĠS ector
+Ġexc uses
+Ġcost ing
+ide press
+St ack
+ĠR NA
+ob il
+Ġghost s
+ld on
+at ibility
+Top ics
+Ġreim burse
+ĠH M
+ĠDe g
+Ġth ief
+y et
+ogen esis
+le aning
+ĠK ol
+ĠB asketball
+Ġf i
+ĠSee ing
+Ġrecy cling
+Ġ[ -
+Cong ress
+Ġlect ures
+P sy
+Ġne p
+Ġm aid
+Ġori ented
+A X
+Ġrespect ful
+re ne
+fl ush
+ĠUn loaded
+re quest
+gr id
+ĠAltern atively
+ĠHug o
+Ġdec ree
+ĠBuddh ism
+and um
+And roid
+ĠCong o
+ĠJoy ce
+Ġacknowled ging
+hes ive
+ĠTom orrow
+ĠH iro
+th ren
+ĠM aced
+Ġho ax
+ĠIncre ased
+ĠPr adesh
+W ild
+____ __
+16 1
+Ġa unt
+Ġdistribut ing
+ĠT ucker
+ĠSS L
+ĠW olves
+B uilding
+ou lt
+ĠLu o
+ĠY as
+ĠSp ir
+ĠSh ape
+ĠCamb od
+ĠIP v
+Ġm l
+Ġext rad
+39 0
+ĠPenn y
+d ream
+Ġstation ed
+opt ional
+ew orthy
+. </
+Ġundert aking
+Ġchick ens
+Ġstimul i
+ĠEl se
+ig ators
+ĠBegin ning
+ct ory
+Ġprep ares
+Ġdel ta
+Ġvic inity
+t ool
+Ġworks hops
+M Hz
+Ġaccus ation
+Ġhist ories
+rop olis
+ĠChurch ill
+Ġne on
+Ġb aff
+d ies
+may be
+Ġè£ı è¦ļéĨĴ
+Ġsympt om
+EC H
+ĠMan uel
+Ġban ana
+ĠH B
+Ġ ****
+ĠKore ans
+c oll
+F B
+Ġpr aying
+ĠCann ot
+ĠM ile
+Ġembr acing
+ĠSil k
+39 3
+ot ers
+F D
+Ġday light
+al ias
+ĠBrig ade
+ĠHann ah
+Ġcler gy
+Ġs outheast
+Ġalcohol ic
+Ġpropos es
+liv ion
+Ġcalcul ating
+Ġstim ulate
+Ġspl itting
+e ight
+ĠInd y
+pl ays
+ĠP ik
+Ġdom est
+Ġforg iveness
+ĠR ings
+pat ient
+kins on
+M ont
+ig ible
+; "
+Ġperiod ically
+amm ad
+ĠBr itt
+p ard
+Ġarbit ration
+ĠSchne ider
+ĠCorpor ate
+ĠMay a
+Ġsn akes
+a um
+Ġbl asted
+Ġmyster ies
+Ġrev ive
+oc amp
+ĠD odge
+ĠOper a
+27 9
+Ġor phan
+Ġspec ifies
+ĠM ets
+D uration
+H en
+Ġfire works
+Ġprosec ute
+ĠTill erson
+d p
+us age
+l iness
+ĠDeb ian
+Ġ2 24
+ris es
+ĠIn fect
+at ra
+ĠR R
+ĠL or
+d iff
+ĠCharl eston
+Ġac oustic
+Ġam use
+3 30
+Ġc er
+ĠT ac
+Ġ[ +
+Ġcard iac
+ĠRestaur ant
+er gy
+Ġf uzz
+Ġbit es
+Ġhazard ous
+Ġbr ighter
+r ans
+ĠStephan ie
+ext ra
+RE T
+ĠChrist ine
+ĠS ue
+stat ement
+Ġbol ster
+Ġant it
+Rad io
+B IT
+ãĤ °
+Ġvis ions
+ĠCon cept
+Ġin line
+ĠPhilos ophy
+is ans
+ĠIr ving
+Ã £
+t aking
+Ġincons ist
+ĠKum ar
+Ġl ig
+ĠSch umer
+ĠReg ulations
+ĠH z
+th ro
+ĠV oldemort
+ĠM ED
+ĠFreder ick
+P ad
+22 1
+Ġalleg ing
+ĠCommun ication
+Ġ16 7
+Ġforecast s
+Ġsp iders
+Or gan
+ĠParticip ants
+ĠO ps
+des ign
+Cl ose
+Ġfact o
+Ġbom bers
+res istant
+ateg ories
+S chool
+Ġhom ework
+Ġcor ro
+T uesday
+ĠBrend an
+ĠM X
+ĠT S
+ĠSt ri
+Ġstake holders
+ĠMillenn ium
+Ġtransfer ring
+J ud
+Ġt ac
+Ġ16 00
+ĠSD K
+r b
+Ġinterpret ations
+ĠS G
+Ġup stairs
+ĠHar vest
+Ġvag ina
+Ġing est
+x f
+ĠOr ion
+ĠJoe y
+Ġsand wic
+Ġimm ortal
+Ġfl ipped
+ort ex
+threat ening
+Ġsn iper
+Ġconver ts
+Ġinstall ations
+ĠBul gar
+ors che
+m ails
+Ġl ure
+Ġnarrow ly
+Ġgren ade
+ĠG ing
+Ġunder wear
+------------ --
+Ġch ased
+ĠV AL
+Ġparent ing
+ĠH amb
+ĠBl az
+Ġanarch ist
+ĠMed ian
+ĠProgram s
+Î ½
+Ġob j
+ĠN okia
+orm an
+an qu
+at ism
+op a
+Ġfulf illing
+Ġpupp y
+Ġent it
+ĠSebast ian
+Ġshoot ers
+Ġric her
+è ¡
+Ġtempt ed
+ĠAT T
+ĠC V
+Ġto re
+Res ource
+ĠDevil s
+40 8
+in ational
+Ġass urance
+ĠDar ren
+Ġwh ichever
+pos ure
+Ġf ury
+St ock
+Ġunivers ally
+resp onse
+Ġo ak
+Ġwork load
+ĠCor ner
+ee le
+" ...
+Ġdepri ved
+k owski
+Ġcast s
+Ġaffili ation
+ĠA ch
+ĠAs ked
+at he
+Ġl act
+ĠTh u
+r m
+Ġair lines
+Ġnot ions
+Form at
+ĠF AA
+ãĥ Ĭ
+dri ver
+Ġtrans cend
+S ettings
+ĠPro secut
+Ġsp inal
+Ġdefault s
+F K
+Ġpref ers
+rend ered
+th us
+fil m
+Ġt iger
+ĠSp icer
+rec ogn
+ĠRug by
+Net work
+Ġp ity
+Ġcomp artment
+c asters
+ĠMon roe
+Ġ7 20
+Ġcorrect ions
+Ġdop amine
+ĠA Z
+C ut
+Ġro omm
+Ġspec ulate
+H ash
+Ġrestrict ive
+11 11
+red ible
+on el
+Ġramp ant
+re ported
+ĠSu ite
+ĠMin imum
+al ys
+az ard
+lo op
+Ġl ent
+sh a
+Ġv andal
+men u
+ĠBoe hner
+Ġnarr atives
+Ġauthent icity
+26 9
+an ic
+d uty
+28 5
+Ġthank ed
+Ġbetray ed
+l ift
+Ġsouth west
+ĠDex ter
+ĠB od
+Ġkey words
+A verage
+D IS
+Ġethnic ity
+! ),
+ĠNational s
+á ¹
+ĠT ah
+iox id
+Ġwid get
+Ġpast a
+Ġbill ing
+Ġtr ilogy
+ĠL ines
+Ġsn iff
+Ġnep hew
+L ate
+Ġprinc ip
+ĠLo op
+ĠMarx ist
+Ġdiss olved
+Ġcontext s
+ĠAm ount
+ĠSp ike
+Ġtot als
+Ġorgan izer
+Ġup rising
+s hips
+Y Y
+ĠNort heast
+m oney
+grad ation
+Ġgoal keeper
+ĠH ear
+Ġste ak
+ĠBuzz Feed
+Ġsole mn
+ĠSc and
+Ġpo pping
+Ġad here
+ĠAl leg
+by te
+ĠW olver
+Ġun in
+Ġrec ol
+it ud
+Ġmim ic
+ib us
+Ġpredict s
+ĠKee per
+i ating
+Ġde ception
+Ġlear nt
+Ġdi ary
+Ġcond itional
+Ġre lic
+Ġinv oke
+ien ced
+å Ī
+ĠP ont
+Ġcell phone
+Ġspeed ing
+Ġtack ling
+Ġn ude
+op ened
+ĠMan afort
+Ġ19 52
+Ġmaj ors
+ĠSil ence
+Ġlog istics
+Ġweight ed
+ĠPsych iat
+": ["
+Ġsick ness
+Ġdivid ends
+z on
+Re lease
+ĠKe ys
+ĠI ch
+Ġen z
+ĠF ernand
+ĠÎ ±
+Ġmean ings
+Ġp enny
+Ġst ern
+Ġl ar
+ĠPub lished
+Ġback drop
+K im
+ĠSy nt
+Ġdeb uted
+w m
+ĠIs le
+Ġregul ating
+ott i
+ĠSch olars
+ices ter
+ĠChe f
+Ġpop s
+ĠLaun cher
+ĠVar ious
+Ġcomment ing
+os lav
+enz ie
+Ġrival ry
+â Ĥ¬
+Re ally
+Ġor c
+Ġbe an
+ĠJud y
+Not ice
+ĠB ike
+? ]
+Ġrent ed
+st en
+Ġfore front
+ĠBald win
+Ġyield ed
+t ails
+Pr ime
+ĠS ources
+ic ator
+Se an
+Ġmarch ing
+Out put
+ĠJ ungle
+Ġres ide
+zz le
+ĠAndrew s
+Ġtor que
+Bas ic
+Act ually
+st rap
+p enter
+Ġexam s
+ĠY a
+Ġ15 9
+ĠDec ision
+Ġr ansom
+ete enth
+ens ing
+2 13
+Ġsun set
+40 4
+ĠRap id
+ĠHe in
+ĠAb original
+Ġorgan ism
+ĠS ever
+Ġcl a
+aj i
+Sim ple
+ĠFl avor
+ĠE val
+pr us
+Ġch orus
+D AY
+Ġden ounced
+Ġbi ography
+ĠTurn bull
+Rec ent
+N ormal
+lect ions
+W ord
+Ġf erry
+ĠWag ner
+h om
+Un it
+Ġsuper market
+ĠS ith
+Ġnomine es
+Ġdictators hip
+idd ler
+Ġannoun ces
+ĠThe m
+ĠNept une
+Ġde ity
+ĠY i
+Ġmon arch
+AR R
+Ġinv aded
+ĠH ok
+unt ary
+C ertain
+eg a
+Ġk idding
+ĠReg ulation
+Ġtr ay
+Ġphotograp hers
+ĠArc ane
+Ġdis charged
+Ġevangel ical
+Ġinter change
+Ġfilm maker
+ĠEnd less
+Ġ29 0
+ĠSalv ador
+AS Y
+ĠSign al
+Ġwr ath
+â ľ
+l ot
+' /
+Ġproject ile
+Ġemploy ing
+ĠInter face
+19 1
+atell ite
+ĠR ath
+pack age
+Ġindic ations
+J ason
+Ġarg s
+ĠG Hz
+Ġt ilt
+n ants
+w on
+ãĤ µ
+red d
+res cent
+ĠCal endar
+Ġmod ular
+Ġassist ing
+Ġred eem
+ĠBe an
+Ġwor sh
+Ġdecentral ized
+) ...
+37 7
+Ġarr ays
+Ġaccomplish ments
+Î ¿
+d ot
+Ġmut ually
+Ġob struct
+Ġmis represent
+ore st
+ion ic
+ru ce
+% ;
+Ġknow ingly
+port ing
+in ently
+A ri
+ĠSch ultz
+D a
+ĠC ere
+Ġob solete
+ħ ĭ
+g ive
+Ġb ait
+Ġen larg
+Ne ill
+Ġ19 33
+Ġrecons ider
+ĠSerge ant
+ĠDian e
+ĠC ogn
+ĠI con
+P osition
+Ġf ost
+Ġstir ring
+se ven
+ĠSpace X
+ugg ets
+Ġmed d
+G al
+ĠS ister
+B oy
+Ġtrigger ing
+T aking
+Ġscream s
+Ġca usal
+Ġaw aken
+Ar m
+29 7
+Ġdisp atched
+ĠF ALSE
+Ġorgan izational
+ĠT ong
+Ġdile mma
+d emon
+S pl
+Ġhook s
+ud ing
+Ġvalid ate
+Ġpot ion
+Ġcl aw
+Ġburg l
+Ġqu ir
+AC A
+ĠBren nan
+Ġdur ability
+Ġbomb ings
+ĠWind ow
+Ġculp rit
+3 25
+There fore
+umb ered
+per formance
+w arts
+Ġen forcing
+ĠBl ow
+Ġre print
+if ax
+al pha
+Ġsin ister
+Ġbur ger
+fight ing
+Sc ore
+ĠSt ones
+i em
+40 5
+che my
+Ġvine gar
+n om
+Ġprev ailing
+ĠLat est
+Â ¶
+Ġb a
+ĠWrit er
+Ġ17 7
+ĠCon way
+Ġcollect s
+Ġquant itative
+Ġhor rors
+og ens
+ĠSl ov
+Ġl ays
+h aw
+ĠSl ash
+Ġnight club
+ĠDav ies
+Ġbr ide
+ĠScar let
+y mm
+ĠApplic ations
+vel ength
+Ġrev ival
+Ġsoft ly
+Ġz oo
+ita ire
+C ur
+Ġelect rom
+Ġplant ing
+OT O
+ĠE lements
+Ġsw allow
+por ter
+Ġlapt ops
+Ġpe anut
+Ġlobby ists
+Î ²
+Pan el
+ĠJo an
+im il
+t nc
+Ġresist ed
+Ġout we
+Ġret aining
+at ri
+Ġpo orer
+ĠSyri ans
+ĠHam mond
+Ġwe ld
+ud er
+top ic
+ĠT T
+ric ia
+Ġth ieves
+L ic
+ĠG ust
+ĠW ays
+are th
+24 3
+Ġbroad caster
+sh ield
+ass ium
+ub le
+Ġairst rikes
+on so
+Ġped al
+Ġcollect ors
+ĠV ander
+ĠMes a
+Ġdict ator
+Ġd ir
+ent on
+c art
+sc ore
+ad der
+C ry
+Ġs sh
+gg er
+Ġdrunk en
+ĠG S
+ĠSe at
+Ġcorner back
+Ġsk ipped
+ĠRes earchers
+ĠAud i
+Ref erence
+Ġhaun ted
+Ã «
+ĠClin ic
+c z
+Ġp s
+ĠPal adin
+ĠRec ipe
+Ġst igma
+opp y
+Ġmon keys
+ĠHaw k
+S ad
+" />
+ĠWorks hop
+ĠRet ail
+ĠAv atar
+6 25
+N a
+ĠV C
+ĠSec ure
+M Y
+19 88
+oss ip
+Ġpro state
+Ġund en
+Ġg amer
+ĠCont ents
+ĠWar hammer
+ĠSent inel
+3 10
+Ġse gregation
+ĠF lex
+ĠM AY
+Ġdr ills
+ĠDrug s
+Islam ic
+Ġsp ur
+Ġca fe
+Ġimag inary
+Ġgu iding
+Ġsw ings
+ĠThe me
+ob y
+Ġn ud
+Ġbe gging
+Ġstr ongh
+Ġreject ing
+Ġpedest rians
+ĠPro spect
+R are
+s le
+Ġconcess ions
+ĠConst itutional
+Ġbe ams
+Ġfib ers
+p oon
+Ġinstinct s
+pro perty
+ĠB IG
+Sand ers
+im ates
+Ġco ating
+Ġcorps es
+ĠTR UE
+check ed
+Ġ16 6
+A sh
+ĠJ S
+ĠF iction
+Ġcommun al
+Ġener getic
+oooo oooo
+Ġnow adays
+IL D
+ib o
+ĠSU V
+R en
+Ġdwell ing
+Sil ver
+Ġt ally
+ĠM oving
+Ġcow ard
+Ġgener als
+Ġhorn s
+Ġcirc ulated
+Ġrob bed
+ĠUn limited
+Ġharass ed
+Ġinhib it
+Ġcomp oser
+ĠSpot ify
+Ġspread s
+3 64
+Ġsu icidal
+Ġno ises
+ĠSt ur
+Ġs aga
+ĠK ag
+is o
+Ġtheoret ically
+M oney
+Ġsimilar ity
+Ġslic ed
+ut ils
+ing es
+" -
+Ġan th
+Ġimp ed
+Mod ule
+Through out
+Ġmen us
+comm ittee
+and i
+ob j
+in av
+f ired
+ĠAb dullah
+Ġund ead
+Ġfont s
+H old
+EN G
+Ġsustain ability
+Ġfl ick
+Ġr azor
+ĠF est
+ĠChar acters
+Ġword ing
+Ġpopul ist
+Ġcritic izing
+Ġm use
+v ine
+Ġcard board
+Ġkind ly
+Ġfr inge
+ĠThe ft
+icult ural
+Ġgovern ors
+Ġ ï¿½ï¿½ï¿½ï¿½
+Ġ16 3
+Ġtime out
+ĠA uth
+Child ren
+A U
+Ġred emption
+ĠAl ger
+Ġ19 14
+Ġw aved
+Ġastron auts
+og rams
+Ġsw amp
+ĠFinn ish
+Ġcand le
+Ġton nes
+ut m
+Ġr ay
+Ġsp un
+Ġfear ful
+art icles
+Ġca us
+or ically
+ĠRequ ires
+ĠG ol
+Ġpop e
+Ġinaug ural
+Ġg le
+AD A
+ĠIS IL
+ĠOff ensive
+Ġwatch dog
+Ġbal con
+ent ity
+ĠH oo
+Ġgall on
+AC C
+Ġdoub ling
+Ġimpl ication
+ĠS ight
+Ġdoct r
+---- ---
+Ġ\ \
+Ġm alt
+R oll
+Ġâī ¥
+Ġrec ap
+add ing
+u ces
+ĠB end
+fig ure
+Ġtur key
+Ġsoc ietal
+ĠT ickets
+Ġcommer cially
+Ġsp icy
+Ġ2 16
+ĠR amp
+Ġsuperior ity
+Ã ¯
+ĠTr acker
+C arl
+ĠC oy
+ĠPatri ot
+Ġconsult ed
+Ġlist ings
+Ġsle w
+reens hot
+ĠG one
+Ġ[ ...]
+30 9
+Ġh ottest
+Ø ±
+Ġrock y
+ĠD iaz
+Ġmass age
+Ġpar aly
+Ġp ony
+A z
+Ġcart ridge
+ĠN Z
+Ġsn ack
+ĠLam ar
+ple ment
+ĠLes lie
+Ġm ater
+Ġsn ipp
+24 6
+Ġjoint ly
+ĠBris bane
+ĠiP od
+Ġpump ing
+Ġgo at
+ĠSh aron
+eal ing
+Ġcor on
+Ġan omal
+rah im
+ĠConnect ion
+Ġsculpt ure
+Ġsched uling
+ĠD addy
+at hing
+Ġeyeb rows
+Ġcur ved
+Ġsent iments
+Ġdraft ing
+D rop
+( [
+Ġnom inal
+ĠLeaders hip
+ĠG row
+Ġ17 6
+Ġconstruct ive
+iv ation
+Ġcorrupt ed
+ger ald
+ĠC ros
+ĠChe ster
+ĠL ap
+ãģ ª
+OT H
+D ATA
+Ġal mond
+pro bably
+I mp
+Ġfe ast
+ĠWar craft
+F lor
+Ġcheck point
+Ġtrans cription
+Ġ20 4
+Ġtwe aks
+Ġrel ieve
+S cience
+Ġperform er
+Z one
+Ġtur moil
+ig ated
+hib it
+ĠC afe
+the med
+Ġflu or
+ben ch
+Ġde com
+ĠU nt
+ĠBar rett
+ĠF acts
+Ġt asting
+ĠPTS D
+ĠSe al
+ĠJuda ism
+ĠDynam ic
+ĠC ors
+V e
+ĠM ing
+ĠTrans form
+v on
+ĠDef enders
+ĠTact ical
+ĠV on
+ĠUn ivers
+Ġdist orted
+ĠB reath
+?' "
+Ġag on
+ĠDead ly
+Ġl an
+ĠCy cle
+orn ed
+Ġrel iably
+Ġgl or
+ĠMon key
+ãĥ ¡
+Ġad ren
+Ġmicrow ave
+ĠAl ban
+irc raft
+dig it
+sm art
+ĠD read
+Â¯Â¯Â¯Â¯Â¯Â¯Â¯Â¯ Â¯Â¯Â¯Â¯Â¯Â¯Â¯Â¯
+{ {
+ĠRoc hester
+Ġsimpl ified
+Ġinf licted
+Ġtake over
+Ġyour selves
+ad itional
+Ġmus cular
+K S
+Ġing en
+T ax
+ĠFe ature
+27 7
+Ġcru c
+Ġcr ate
+Ġun identified
+Ġacclaim ed
+ĠM anga
+ĠFr ances
+ĠNep al
+ĠG erald
+ĠKu wait
+Ġsl ain
+ĠHe b
+ĠG oku
+ãģ® æ
+28 6
+M rs
+ĠC ody
+ĠSan ctuary
+01 6
+Ġdism ant
+Ġdatas et
+ĠH ond
+b uck
+ĠPat terson
+Ġpal ette
+ĠG D
+ic ol
+ĠL odge
+Ġplanet ary
+ak in
+ĠRegist ered
+ab we
+ĠPeters burg
+Ġha iled
+ĠP iece
+S che
+ĠDO J
+Ġen umer
+18 1
+ĠObs erver
+ĠB old
+f ounded
+com merce
+Ġexplo its
+ĠF inding
+UR N
+ĠS ne
+ĠAc id
+ay ette
+ĠVal ues
+Ġdr astic
+Ġarchitect ural
+Ġ" .
+× ķ
+ump ed
+Ġwra pping
+Ġwid ow
+ĠSl ayer
+l ace
+on ce
+German y
+av oid
+Ġtem ples
+P AR
+Ã ´
+ĠLuc ifer
+ĠFl ickr
+l ov
+for ces
+Ġsc outing
+Ġlou der
+tes y
+Ġbefore hand
+Ä ĵ
+ĠNe on
+ĠW ol
+ĠTyp ically
+ĠPolit ico
+-+ -+
+Ġbuild er
+Ġder ive
+K ill
+Ġp oker
+Ġambig uous
+Ġlif ts
+Ġcy t
+Ġrib s
+ood le
+ĠS ounds
+h air
+ĠSynd rome
+t f
+Ġproport ional
+u id
+Ġper taining
+ĠKind le
+ĠNeg ro
+Ġreiter ated
+ĠTon ight
+oth s
+ĠCorn ell
+Ġo wing
+Ġ20 8
+elf are
+oc ating
+ĠB irds
+Sub scribe
+Ġess ays
+Ġburd ens
+Ġillust rations
+ar ious
+ER AL
+ĠCal cul
+Ġx en
+ĠLink edIn
+ĠJ ung
+Ġredes ign
+Con nor
+29 6
+Ġrevers al
+ĠAd elaide
+ĠL L
+Ġs inking
+Ġg um
+US H
+c apt
+ĠGr imm
+Ġfoot steps
+ĠCB D
+isp ers
+Ġpro se
+Wed nesday
+ĠM ovies
+ed in
+Ġoverturn ed
+Ġcontent ious
+US B
+~~~~~~~~ ~~~~~~~~
+ĠCo pper
+Ġpoint less
+N V
+val ues
+olph in
+d ain
+Ġdepos ited
+ĠG W
+Ġpreced ed
+ĠCl a
+ĠGo lem
+ĠN im
+ĠÎ ²
+ĠEngine ers
+m iddle
+Ġfl att
+oper ative
+Ġcouncil s
+imb abwe
+el in
+Ġstress ful
+ĠL D
+Ġres h
+l ake
+Ġwheel chair
+ĠAltern ative
+Ġoptim ize
+oper ation
+Ġpe ek
+Ġones elf
+ig il
+Ġtrans itions
+op athy
+bl ank
+Ġ16 9
+17 1
+________________________________ ________________________________
+Ġl aundering
+En c
+ĠD EC
+Ġwork outs
+Ġsp ikes
+Ġdin osaurs
+Ġdiscrim inatory
+P ool
+R ather
+38 5
+R NA
+tes ters
+et o
+ĠIdent ity
+Ġve in
+ĠBur ton
+Ġarc ade
+4 20
+Ult imately
+ĠSad ly
+Ã °
+p ill
+Ġcub ic
+ĠSpect rum
+the se
+st ates
+Ġun official
+h awks
+ĠEVER Y
+Ġrain bow
+Ġincarcer ation
+and ing
+Ġsy ll
+ĠEver ton
+Ġ17 9
+ĠSer bia
+Ġ18 9
+m eter
+ĠMic key
+Ġant iqu
+Ġfact ual
+ne ck
+ĠN are
+n orm
+m ust
+Ġhigh ways
+Ġgl am
+Ġdivid ing
+ĠSquad ron
+ĠMar tha
+Ġbirth s
+C over
+//////// ////////
+ĠW ong
+Ph ot
+ĠA LS
+ri o
+ĠNon etheless
+ĠL emon
+Ġ20 6
+ĠE E
+Ġderiv ative
+ĠWW II
+v ote
+Ġthere in
+Ġsepar ating
+44 6
+sy nc
+ĠStre ets
+Ġr att
+Ġmunicip ality
+ĠShort ly
+Ġmon k
+) ,"
+Ġscr ub
+Ġoper atives
+Ne ither
+Pl ace
+ĠLim it
+F emale
+ĠAct or
+Char acter
+Ġconstit uted
+35 7
+Ġprotest ed
+ĠSt raw
+ĠHe ight
+ild a
+ĠTy ph
+Ġflood s
+Ġcos metic
+W AY
+pert ure
+up on
+t ons
+ess ing
+ĠP ocket
+Ġro oft
+ĠC aucas
+Ġant idepress
+Ġincomp atible
+EC D
+Ġoper a
+ĠCont est
+Ġgener ators
+l ime
+Def ense
+19 87
+for um
+Ġsav age
+ĠHung arian
+n z
+Ġmet allic
+Ġex pelled
+Ġres idency
+Ġdress es
+66 6
+ĠC lement
+f ires
+C ategory
+Ġge ek
+al is
+Ġc emetery
+educ ated
+Ġc rawl
+ĠUn able
+ĠT yson
+ak is
+Ġp ardon
+ĠW ra
+Ġstrengthen ed
+ĠF ors
+33 5
+ĠH C
+ĠM ond
+Ġvisual s
+ĠBeat les
+ett lement
+Ġ ï
+g ro
+Ġb ash
+Ġpo orest
+Ġex cel
+Ġaspir ations
+ĠM unicip
+ens ible
+Ġceremon ies
+Ġintimid ation
+ĠCON TR
+be ck
+ĠK ap
+as u
+Ġtradem arks
+ĠS ew
+ĠComp etition
+net work
+ĠAr ri
+ĠT et
+Ro aming
+W C
+D at
+Ġso b
+Ġpair ing
+Ġoverd ose
+SA Y
+ab er
+Ġrev olt
+ĠF ah
+act ing
+e q
+est ation
+F ight
+ĠMar ks
+27 3
+Ġ17 8
+R aw
+ãģ ĭ
+34 9
+bl ocks
+Ġver ge
+est ine
+ĠPod esta
+Ġinv asive
+Ġprofound ly
+ĠA o
+e ach
+Ġl est
+inter pret
+Ġshr inking
+Ġerr one
+Ġche es
+ly s
+ĠI vy
+ĠDirect ory
+Ġhint ed
+V ICE
+Ġcontact ing
+ĠG ent
+he i
+Ġlabel ing
+Ġmerc ury
+ĠL ite
+Ġexp ires
+Ġdest abil
+rit is
+c u
+Ġfeather s
+Ġste er
+Ġprogram med
+ĠV ader
+Go ing
+ĠE lim
+Ġy o
+ĠMic he
+Ġ20 3
+Ġslee ves
+Ġb ully
+ĠHum ans
+36 8
+Ġcomp ress
+ĠBan ner
+AR S
+Ġa while
+Ġcal ib
+Ġspons orship
+ĠDiff iculty
+ĠP apers
+Ġident ifier
+} .
+Ġy og
+ĠSh ia
+Ġclean up
+Ġvib e
+int rodu
+im ming
+Austral ia
+Ġout lines
+ĠY outube
+tr ain
+ĠM akes
+Ġde ported
+Ġcent r
+ĠD ug
+ĠB oulder
+ĠBuff y
+Ġinj unction
+ĠHar ley
+ĠG roups
+ĠD umbledore
+ĠCl ara
+Ġ" -
+Ġsacrific ed
+ep h
+Sh adow
+ib ling
+Ġfreel ance
+Ġevident ly
+ph al
+Ġret ains
+M ir
+Ġfin ite
+d ar
+ĠC ous
+Ġrep aired
+Ġperiod ic
+Ġchampions hips
+Ġaster oid
+bl ind
+Ġexpress ly
+ĠAst ros
+Ġsc aled
+Ġge ographical
+ĠRap ids
+En joy
+Ġel astic
+ĠMoh amed
+Mark et
+be gin
+Ġdisco vers
+Ġtele communications
+Ġscan ner
+Ġen large
+Ġsh arks
+Ġpsy chedel
+ĠRou ge
+Ġsnap shot
+is ine
+X P
+Ġpestic ides
+ĠL SD
+ĠDist ribution
+re ally
+Ġde gradation
+Ġdisgu ise
+Ġbi om
+ĠEX T
+Ġequ ations
+Ġhaz ards
+ĠComp ared
+) *
+Ġvirt ues
+Ġeld ers
+Ġenh ancing
+ĠAc ross
+er os
+ang ling
+Ġcomb ust
+ucc i
+Ġconc ussion
+Ġcontrace ption
+ĠK ang
+Ġexpress es
+Ġa ux
+ĠP ione
+Ġexhib its
+Deb ug
+OT AL
+ĠAl ready
+ĠWheel er
+Ġexp ands
+? :
+Ġreconc iliation
+Ġpir ates
+Ġpur se
+Ġdiscour age
+Ġspect acle
+R ank
+Ġwra ps
+ĠTh ought
+Ġimp ending
+O pp
+ĠAng lo
+ĠE UR
+Ġscrew ed
+ret ched
+Ġencour agement
+mod els
+Ġconf use
+mm m
+ĠVit amin
+âĸĳ âĸĳ
+C ru
+Ġkn ights
+Ġdisc ard
+Ġb ishops
+ĠW ear
+ĠGar rett
+k an
+ãĥ Ł
+Ġmascul ine
+cap ital
+ĠA us
+Ġfat ally
+th anks
+ĠA U
+ĠG ut
+12 00
+Ġ 00000000
+Ġsur rog
+ĠBI OS
+ra its
+ĠWat ts
+Ġresur rection
+ĠElect oral
+ĠT ips
+4 000
+Ġnut rient
+Ġdepict ing
+Ġspr ink
+Ġm uff
+ĠL IM
+ĠS ample
+ps c
+ib i
+gener ated
+Ġspec imens
+Ġdiss atisf
+Ġtail ored
+Ġhold ings
+ĠMonth ly
+ĠE at
+po ons
+Ġne c
+ĠC age
+ĠLot us
+ĠLan tern
+Ġfront ier
+Ġp ensions
+Ġj oked
+ĠHard y
+=-=- =-=-
+r ade
+U ID
+Ġr ails
+Ġem it
+Ġsl ate
+Ġsm ug
+Ġsp it
+ĠCall s
+ĠJac obs
+f eat
+ĠU E
+Ġrest ruct
+Ġregener ation
+Ġenerg ies
+ĠCon nor
+OH N
+ĠChe ese
+Ġg er
+Ġresur rect
+man agement
+N W
+Ġpres ently
+ĠBru ins
+M ember
+ĠM ang
+id an
+Ġboost ing
+w yn
++ .
+requ isite
+ĠNY PD
+ĠMe gan
+ĠCond itions
+Ġp ics
+nes ium
+ĠR ash
+Ġ17 4
+ĠD ucks
+Ġemb ro
+z u
+on ian
+rel igious
+Ġc raz
+ĠAC A
+ĠZ ucker
+EM A
+ĠPro s
+We apon
+ĠKn ox
+ĠAr duino
+Ġst ove
+Ġheaven s
+ĠP urchase
+Ġher d
+Ġfundra iser
+Dig ital
+5 000
+Ġprop onents
+/ âĢĭ
+Ġj elly
+ĠVis a
+Ġmon ks
+Ġadvance ment
+ĠW er
+Ġ18 7
+e us
+ert ility
+Ġfet al
+Ġ19 36
+L o
+Ġout fits
+Ġstair case
+b omb
+Ġcustom ized
+cl air
+T ree
+Ġm apped
+ĠConsider ing
+ĠTor res
+Ġmeth yl
+Ġapprox imate
+Ġdo om
+ĠHans en
+Ġc rossover
+Ġstand alone
+ä ¼
+Ġinv ites
+Ġgra veyard
+Ġh p
+Donald Trump
+Ġesc ort
+G ar
+Ġpredec essors
+Ġh ay
+Ġen zyme
+ĠStra ight
+vis ors
+I ng
+ane ously
+ĠApp lied
+Ġf ec
+ĠDur ant
+Ġout spoken
+or b
+Ġz eal
+Ġdisgr ace
+' ).
+ĠChe ng
+28 9
+ĠRen a
+ĠSu icide
+29 4
+Ġout raged
+ĠNew man
+ĠN vidia
+ĠA ber
+ĠB ers
+Ġrecre ation
+Wind ow
+ĠD P
+x e
+Ġped oph
+Ġfall out
+ambo o
+Ġpresent ations
+ĠApp s
+Ġh tml
+3 45
+ĠX XX
+Ġrub bing
+ĠLe ather
+Ġhum idity
+se ys
+est ablished
+ĠUn its
+64 6
+Ġrespect able
+A uto
+Ġthri ving
+ĠInn ovation
+ang s
+Ext ra
+reg ulation
+29 8
+p ick
+Ex amples
+ĠC J
+Att ack
+Ġdr acon
+L T
+Ġstick er
+re rs
+Ġsun ny
+I ss
+reg ulated
+d im
+ĠAb stract
+Ġhus bands
+Off ice
+om ination
+it ars
+AN GE
+asc al
+ĠK ris
+ĠInf antry
+Ġm alf
+ĠA the
+ĠR ally
+bal anced
+................ ........
+OU P
+Ġmole cule
+met ics
+ĠSpl it
+ĠInstruct ions
+ĠN ights
+c ards
+Ġt ug
+Ġcon e
+å Ń
+Ġt x
+ĠDisc ussion
+Ġcatast rophe
+pp e
+g io
+Ġcommun ism
+Ġhal ted
+ĠGu ant
+cle an
+ĠSc hed
+ĠK anye
+Ġw ander
+ĠSer iously
+Ġ18 8
+enn ial
+f ollow
+product ive
+ĠFl ow
+ĠS ail
+Ġc raw
+Ġsim ulations
+or u
+ang les
+ĠN olan
+Ġmen stru
+4 70
+Ġ20 7
+aj a
+Ġcas ually
+board ing
+Ġ2 22
+ov y
+ĠN umbers
+um at
+O E
+28 7
+ĠCle mson
+Ġcert s
+Ġsl id
+ĠT ribe
+Ġto ast
+Ġfort unes
+Ġf als
+ĠComm ittees
+Ġg p
+Ġf iery
+ĠN ets
+ĠAn ime
+Pack age
+ĠComp are
+l aughter
+in fect
+Ġatroc ities
+Ġjust ices
+Ġins ults
+ĠVern on
+Ġsh aken
+Ġperson a
+est amp
+36 7
+br ain
+Ġexperiment ing
+K en
+ĠElect ronics
+Ġ16 1
+dom ain
+Ġgraph ical
+b ishop
+Ġwho pping
+ĠEv angel
+Ġadvertis ers
+ĠSpe ar
+Ġb ids
+Ġdestro ys
+ut z
+Ġunders c
+ĠAD D
+Ġan ts
+ĠC um
+ipp les
+ĠF ill
+Ġgl anced
+Ġind icted
+ĠE ff
+Ġmis con
+ĠDes ktop
+Ġab ide
+ãĥ Ģ
+ĠI o
+ĠC oul
+Ġcaps ule
+ĠCh rys
+M ON
+Ġund es
+ĠI RA
+Ġc itation
+Ġdict ate
+ĠNet works
+ĠConf lict
+ĠSt uff
+x a
+is ec
+ĠChem istry
+Ġquarter ly
+William s
+an an
+O pt
+ĠAlexand ria
+out heastern
+ĠSpring field
+ĠBlack s
+Ġge ography
+24 2
+Ġut most
+ĠEx xon
+ab outs
+E VA
+ĠEn able
+ĠBar r
+Ġdisag reed
+ĠCy prus
+Ġdement ia
+Ġlab s
+Ġubiqu itous
+ĠLO VE
+Ġconsolid ated
+s r
+Ġcream y
+ĠTim ber
+Reg ardless
+ĠCert ificate
+Ġ" ...
+ogen ous
+Capt ain
+Ġinsult ing
+ĠSor os
+ĠInst r
+ĠBulgar ia
+bet ter
+Ġsuck ing
+ĠDavid son
+at z
+Ġcoll ateral
+g if
+Ġplag ued
+ĠC ancel
+ĠGard ner
+R B
+Ġsix teen
+Rem ove
+ur istic
+c ook
+R od
+Ġcompr ising
+f le
+) âĢĶ
+ĠVik ing
+g rowth
+agon al
+Ġsr f
+af ety
+m ot
+N early
+st own
+ĠF actor
+Ġautom obile
+Ġproced ural
+m ask
+amp ires
+Ġdisapp ears
+j ab
+3 15
+Ġ19 51
+ne eded
+Ġd aring
+le ader
+Ġp odium
+Ġun healthy
+Ġm und
+Ġpy ramid
+oc re
+Ġkiss ed
+Ġdream ed
+ĠFant astic
+ĠG ly
+å Ĭ
+Ġgreat ness
+Ġsp ices
+Ġmet ropolitan
+Ġcomp uls
+i ets
+101 6
+ĠSh am
+ĠP yr
+fl ies
+ĠMid night
+Ġswall owed
+Ġgen res
+ĠL ucky
+ĠRew ards
+Ġdisp atch
+ĠI PA
+ĠApp ly
+Ġa ven
+al ities
+3 12
+th ings
+Ġ( ).
+Ġm ates
+ĠS z
+ĠC OP
+ol ate
+O FF
+Ġre charge
+c aps
+ĠYork er
+ic one
+Ġgal axies
+ile aks
+D ave
+ĠP uzz
+ĠCelt ic
+ĠA FC
+27 6
+ĠS ons
+Ġaffirm ative
+H or
+Ġtutorial s
+ĠC ITY
+ĠR osa
+ĠExt ension
+Ser ies
+Ġf ats
+Ġr ab
+l is
+Ġun ic
+Ġe ve
+ĠSp in
+Ġadul thood
+ty p
+Ġsect arian
+Ġcheck out
+ĠCy cl
+S ingle
+Ġmart yr
+Ġch illing
+88 8
+ou fl
+Ġ] ;
+Ġcongest ion
+m k
+ĠWhere as
+Ġ19 38
+ur rencies
+er ion
+Ġbo ast
+ĠPat ients
+Ġch ap
+ĠB D
+real DonaldTrump
+Ġexam ines
+h ov
+Ġstart ling
+ĠBab ylon
+w id
+om ew
+br ance
+ĠOd yssey
+w ig
+Ġtor ch
+ĠV ox
+ĠMo z
+ĠT roll
+ĠAn s
+Similar ly
+ĠF ul
+00 6
+Un less
+ĠAl one
+st ead
+ĠPub lisher
+r ights
+t u
+ĠDoes n
+Ġprofession ally
+Ġcl o
+ic z
+Ġste als
+Ġ á
+19 86
+Ġst urdy
+ĠJoh ann
+Ġmed als
+Ġfil ings
+ĠFr aser
+d one
+Ġmult inational
+Ġf eder
+Ġworth less
+Ġp est
+Yes terday
+ank ind
+Ġg ays
+Ġb orne
+ĠP OS
+Pict ure
+Ġpercent ages
+25 1
+r ame
+Ġpot ions
+AM D
+ĠLeban ese
+Ġr ang
+ĠL SU
+ong s
+Ġpen insula
+ĠCl ause
+AL K
+oh a
+ĠMac Book
+Ġunanim ous
+Ġl enders
+Ġhang s
+Ġfranch ises
+ore rs
+ĠUp dates
+Ġisol ate
+and ro
+S oon
+Ġdisrupt ive
+ĠSur ve
+Ġst itches
+ĠSc orp
+ĠDomin ion
+Ġsupp lying
+Ar g
+Ġtur ret
+ĠL uk
+Ġbr ackets
+* )
+ĠRevolution ary
+ĠHon est
+Ġnot icing
+ĠSh annon
+Ġafford ed
+Ġth a
+ĠJan et
+! --
+ĠNare ndra
+ĠPl ot
+H ol
+se ver
+e enth
+Ġobst ruction
+Ġ10 24
+st aff
+j as
+or get
+sc enes
+l aughs
+ĠF argo
+cr ime
+Ġorche str
+Ġde let
+ili ary
+rie ved
+Ġmilit ar
+ĠGreen e
+âĹ ı
+ãģ ¦
+ĠGu ards
+Ġunle ashed
+ĠWe ber
+Ġadjust able
+Ġcal iber
+Ġmotiv ations
+ĠÃ ł
+m Ah
+ĠL anka
+hand le
+Ġp ent
+ĠR av
+ĠAng ular
+ĠK au
+umb ing
+Ġphil anthrop
+Ġde hyd
+Ġtox icity
+e er
+ĠY ORK
+w itz
+å ¼
+ĠI E
+commun ity
+ĠA H
+Ġret ali
+Ġmass ively
+ĠDani els
+ĠD EL
+Ġcar cin
+Ur l
+Ġrout ing
+ĠNPC s
+ĠR AF
+ry ce
+Ġwa ived
+ĠGu atem
+Every body
+Ġco venant
+Ġ17 3
+Ġrelax ing
+Ġqu art
+al most
+Ġguard ed
+ĠSold iers
+ĠPL AY
+Ġout going
+L AND
+Ġre write
+ĠM OV
+ĠIm per
+ĠS olution
+Ġphenomen al
+Ġl ongevity
+Ġimp at
+ĠN issan
+ir ie
+Ġod or
+ĠZ ar
+ok s
+Ġmilit ias
+ĠSP EC
+Ġtoler ated
+ars er
+ĠBrad ford
++ ,
+Ġsur real
+s f
+Can adian
+Ġresemb lance
+Ġcarbohyd rate
+VI EW
+Ġaccess ory
+me al
+larg est
+ieg el
+Some one
+Ġtoug hest
+os o
+Ġfun nel
+Ġcondemn ation
+lu ent
+Ġw ired
+ĠSun set
+Jes us
+ĠP ST
+ĠP ages
+ĠTy coon
+ĠP F
+Ġselect ions
+Ġ à¤
+part isan
+Ġhigh s
+ĠR une
+Ġcraft s
+le ad
+ĠParent s
+Ġre claim
+ek er
+ĠAll ied
+ae per
+Ġlo oming
+Ġbenefic iaries
+ĠH ull
+Stud ents
+Jew ish
+d j
+Ġp act
+tem plate
+ĠOffic ials
+ĠBay lor
+Ġhe mp
+Ġyouth s
+ĠLevel s
+ĠX iao
+ĠC hes
+Ġende avor
+ĠRem oved
+Ġhipp ocamp
+H ell
+ãĤ Ĭ
+80 5
+Ġd inosaur
+ĠWr ath
+ĠIndones ian
+Ġcalcul ator
+ĠD ictionary
+Ġ4 20
+ĠM AG
+( _
+! ,
+t arians
+Ġrestrict ing
+rac use
+Ġweek day
+OU NT
+Ġsh rugged
+leg round
+Ġb ald
+ĠDo ctors
+Ġt outed
+ĠMax well
+Ġ2 14
+Ġdiplom at
+Ġrep ression
+Ġconstitu ency
+v ice
+r anked
+ĠNap oleon
+g ang
+ĠFore ver
+t un
+Ġbul b
+ĠPD T
+ĠC isco
+V EN
+Ġres umed
+Ste ven
+ĠManit oba
+Ġfab ulous
+ĠAg ents
+19 84
+Ġam using
+ĠMyster ies
+Ġor thodox
+fl oor
+Ġquestion naire
+Ġpenet rate
+Ġfilm makers
+ĠUn c
+Ġst amped
+Ġth irteen
+Ġout field
+Ġforward ed
+Ġapp ra
+Ġa ided
+t ry
+Ġunf ocused
+ĠL iz
+ĠWend y
+ĠSc ene
+Ch arg
+Ġreject s
+Ġleft ist
+ĠProv idence
+ĠBr id
+reg n
+Ġprophe cy
+ĠL IVE
+4 99
+Ġfor ge
+ĠF ML
+Ġintrins ic
+ĠF rog
+Ġw ont
+ĠH olt
+Ġfam ed
+CL US
+aeper nick
+ĠH ate
+ĠC ay
+Ġregister ing
+ort ality
+rop y
+ocaly ptic
+a an
+n av
+Ġfasc ist
+IF IED
+Ġimpl icated
+ĠRes ort
+ĠChand ler
+ĠBr ick
+P in
+ys c
+Us age
+ĠHel m
+us ra
+âĺħ âĺħ
+ĠAb bas
+Ġunanim ously
+Ġke eper
+Ġadd icted
+?? ?
+Ġhelm ets
+Ġant ioxid
+aps ed
+80 8
+gi ene
+Ġwa its
+Ġmin ion
+ra ved
+ĠP orsche
+Ġdream ing
+Ġ17 1
+ĠC ain
+Ġun for
+ass o
+ĠConfig uration
+k un
+hard t
+Ġn ested
+ĠL DS
+L ES
+Ġt ying
+en os
+Ġc ue
+ĠMar qu
+sk irts
+Ġclick ed
+Ġexp iration
+ĠAccording ly
+ĠW C
+Ġbless ings
+Ġaddict ive
+ĠN arr
+y x
+ĠJagu ars
+Ġrent s
+ĠS iber
+Ġt ipped
+ous se
+ĠFitz gerald
+Ġhier arch
+out ine
+Ġwa velength
+> .
+ch id
+ĠProcess ing
+/ +
+r anking
+E asy
+ĠConst ruct
+Ġt et
+ins ured
+H UD
+Ġqu oting
+Ġcommun icated
+in x
+Ġin mate
+Ġerect ed
+ĠAbs olutely
+ĠSure ly
+Ġun im
+ĠThr one
+he id
+Ġcl aws
+Ġsuper star
+ĠL enn
+ĠWh is
+U k
+ab ol
+Ġsk et
+ĠN iet
+Ġper ks
+Ġaff inity
+Ġopen ings
+phas is
+Ġdiscrim inate
+T ip
+v c
+Ġgr inding
+ĠJenn y
+Ġast hma
+hol es
+ĠHom er
+Ġreg isters
+ĠGl ad
+Ġcre ations
+Ġlith ium
+Ġappl ause
+unt il
+Just ice
+ĠTur ks
+Ġsc andals
+Ġb ake
+t ank
+M ech
+ĠMe ans
+ĠM aid
+Republic ans
+is al
+wind ows
+ĠSant os
+Ġveget ation
+33 8
+t ri
+Ġfl ux
+ins ert
+Ġclar ified
+Ġmort g
+ĠCh im
+ĠT ort
+Ġdiscl aim
+met al
+ĠAs ide
+Ġindu ction
+Ġinf l
+Ġathe ists
+amp h
+Ġe ther
+ĠV ital
+ĠBu ilt
+M ind
+Ġweapon ry
+S ET
+Ġ18 6
+ad min
+g am
+cont ract
+af a
+Ġderiv atives
+Ġsn acks
+Ġch urn
+E conom
+Ġca pped
+ĠUnder standing
+ĠH ers
+ĠI z
+Ġd uct
+I ENT
+augh ty
+Ġâľ Ķ
+ĠN P
+Ġsa iling
+In itialized
+Ġt ed
+Ġreact ors
+ĠL omb
+Ġcho ke
+ĠW orm
+Ġadm iration
+Ġsw ung
+ens ibly
+Ġr ash
+ĠGo als
+ĠImport ant
+Sh ot
+ĠR as
+Ġtrain ers
+ĠB un
+Work ing
+Ġhar med
+ĠPand ora
+ĠL TE
+Ġmush room
+ĠCH AR
+ĠF ee
+ĠM oy
+B orn
+ol iberal
+ĠMart ial
+Ġgentle men
+Ġling ering
+Offic ial
+Ġgra ffiti
+ĠN ames
+D er
+Ġqu int
+ist rate
+aze era
+ĠNOT ICE
+ĠFlore nce
+Ġpay able
+Ġdep icts
+ĠSpe cies
+He art
+âĶĢâĶĢâĶĢâĶĢ âĶĢâĶĢâĶĢâĶĢ
+Ġencl osed
+Incre ases
+D aily
+ĠL is
+Ġenact ment
+ĠB acon
+ĠSt eele
+dem and
+Ġ18 3
+Ġmouth s
+Ġstr anded
+Ġenhance ment
+01 1
+ĠWh ats
+Ġhe aled
+en y
+ĠR ab
+Ġ3 40
+ĠLab yrinth
+ro ach
+ĠY osh
+ĠCl ippers
+Ġconcert s
+Intern et
+35 5
+Ġstick ers
+Ġter med
+ĠAx e
+Ġgrand parents
+Fr ance
+ĠCl im
+ĠU h
+ul ic
+Ġthr ill
+cent ric
+ĠOver view
+ĠCond uct
+Ġsubstant ive
+Ġ18 2
+m ur
+Ġstr ay
+ĠCo ff
+Ġrep etitive
+ĠFor gotten
+Ġqual ification
+ew itness
+ĠZ imbabwe
+Ġsim ulated
+ĠJ D
+25 3
+ĠW are
+Ġun sc
+T imes
+Ġsum mons
+Ġdis connected
+Ġ18 4
+ci us
+ĠGu jar
+od ka
+Ġer ase
+ĠTob acco
+elect ed
+Ġun cont
+ĠShe pard
+ĠL amp
+Ġalert ed
+Ġoper ative
+arn a
+u int
+Ġneglig ence
+ac ements
+Ġsup ra
+Ġprev ail
+ĠSh ark
+Ġbel ts
+ãģ «
+Ġt ighter
+Engine ers
+Ġin active
+Ġexp onent
+ĠWill ie
+a ples
+Ġhe ir
+ĠH its
+ian n
+ĠS ays
+Ġcurrent s
+ĠBeng al
+Ġar ist
+B uffer
+Ġbree ze
+ĠWes ley
+Col a
+Ġpron oun
+Ġde ed
+ĠK ling
+Ġof t
+Ġinf lict
+Ġpun ishing
+Ġn m
+ik u
+OD UCT
+01 4
+Ġsubsid y
+ĠDE A
+ĠHer bert
+ĠJ al
+B ank
+Ġdef erred
+Ġship ment
+B ott
+Ġal le
+b earing
+HT ML
+Off line
+Ġ2 13
+Ġscroll ing
+Ġsc anned
+ĠLib yan
+ĠT OP
+ch rom
+d t
+col umn
+Psy NetMessage
+Z ero
+Ġtor so
+0 50
+âķ Ĳ
+Ġimp erson
+ĠSchw artz
+ud ic
+Ġpiss ed
+ĠS app
+25 7
+ĠIS Ps
+og l
+Ġsuper vised
+Ġad olescent
+Ġatt ained
+ĠDel ivery
+ĠB unny
+Ġ19 37
+Ġmini ature
+Ġo s
+Ġ3 70
+60 8
+ĠMour inho
+Ġinn ate
+Ġtem po
+ĠN M
+ĠFall en
+00 9
+Ġprov ocative
+Stream er
+ĠBened ict
+ĠBol she
+Ġt urtle
+ĠPC B
+ĠEqu al
+Direct or
+ĠR end
+Ġflu ids
+Author ities
+Ġcous ins
+requ ency
+ĠNeigh bor
+s ets
+sh ared
+Char les
+pass word
+Ġg ears
+Ġ2 11
+ĠHard ware
+ri ka
+Ġup stream
+H om
+Ġdisproportion ately
+iv ities
+Ġund efined
+Ġelect rons
+Ġcommem or
+Event ually
+Ġ> <
+Ġir responsible
+2 18
+ĠRe leased
+ĠO VER
+ĠI GN
+ĠB read
+st ellar
+ĠS age
+tt ed
+dam age
+ed ition
+ĠPre c
+Ġl ime
+Ġconf inement
+Ġcal orie
+we apon
+Ġdiff ering
+ĠS ina
+m ys
+am d
+Ġintric ate
+k k
+ĠP AT
+Ã£ o
+st ones
+lin ks
+Ġr anch
+Sem itic
+Ġdifferent iate
+ĠS inger
+occup ied
+Ġfort ress
+c md
+Ġinter ception
+ĠAnk ara
+Ġre pt
+ĠSol itaire
+Ġrem ake
+p red
+Ġd ared
+aut ions
+ĠB ACK
+Run ning
+Ġdebug ging
+Ġgraph s
+3 99
+ĠNig el
+Ġb un
+Ġpill ow
+Ġprog ressed
+fashion ed
+Ġob edience
+ER N
+Ġrehe ars
+C ell
+t l
+S her
+Ġher ald
+ĠPay ment
+ĠC ory
+ĠDe pt
+Ġrep ent
+ĠWe ak
+uck land
+Ġple asing
+Ġshort ages
+Ġjur ors
+ĠK ab
+q qa
+Ant i
+Ġw ow
+ĠRC MP
+Ġt sun
+ĠS ic
+Ġcomp rises
+Ġsp ies
+Ġprec inct
+n u
+Ġur ges
+Ġtim ed
+Ġstrip es
+ĠB oots
+Ġy en
+Adv anced
+Ġdisc rete
+ĠArch angel
+employ ment
+D iff
+Ġmon uments
+Ġ20 9
+work er
+Ġ19 6
+ĠI g
+utter stock
+T PS
+J ac
+Ġhomeless ness
+Ġcomment ator
+Ġrac ially
+f ing
+se ed
+E le
+ell ation
+Ġeth anol
+Ġpar ish
+ĠD ong
+ĠAw akening
+Ġdev iation
+ĠB earing
+ĠTsu k
+Ġrec ess
+Ġl ymph
+ĠCann abis
+å ľ
+ĠNEW S
+Ġd ra
+ĠStef an
+ĠWr ong
+ĠS AM
+Ġloose ly
+Ġinterpre ter
+ĠPl ain
+Go vernment
+Ġbigot ry
+Ġgren ades
+ave z
+pict ured
+Ġmand ated
+ĠMon k
+ĠPed ro
+Ġl ava
+27 4
+Ġcyn ical
+ĠScroll s
+l ocks
+M p
+Ġcon gregation
+orn ings
+ph il
+ĠI bid
+Ġf erv
+Ġdisapp earing
+Ġarrog ant
+sy n
+ĠMa ver
+ĠSu it
+24 1
+Ġab bre
+ack ers
+P a
+ĠY el
+Whe never
+Ġ23 5
+ĠV ine
+ĠAn at
+Ġext inct
+LE T
+Ġexecut able
+V ERS
+ox ide
+D NA
+ĠP rel
+Ġresent ment
+Ġcompr ise
+ĠAv iv
+Ġinter ceptions
+Ġprol ific
+IN A
+ĠEr in
+though t
+2 19
+ĠPsychiat ry
+un ky
+chem ist
+H o
+ĠMcC oy
+Ġbr icks
+L os
+ri ly
+ĠUS SR
+Ġr ud
+Ġl aud
+ĠW ise
+ĠEmer ald
+Ġrev ived
+Ġdam ned
+ĠRep air
+id em
+ct ica
+Ġpatri arch
+ĠN urs
+me g
+Ġcheap est
+re ements
+empt y
+ĠCele br
+Ġdepri vation
+ch anted
+ĠTh umbnails
+E nergy
+ĠEth an
+ĠQ ing
+Ġopp oses
+W IND
+v ik
+ĠM au
+ĠS UB
+66 7
+G RE
+ĠVol unte
+nt on
+C ook
+å Ĳ
+es que
+Ġplum met
+Ġsu ing
+Ġpron ounce
+Ġresist ing
+ĠF ishing
+ĠTri als
+Ġy ell
+Ġ3 10
+Ġin duct
+Ġpersonal ized
+oft en
+R eb
+EM BER
+Ġview point
+Ġexist ential
+() )
+rem ove
+MENT S
+l asses
+Ġev apor
+Ġa isle
+met a
+Ġreflect ive
+Ġentit lement
+Ġdev ised
+mus ic
+asc ade
+Ġwind ing
+off set
+Ġaccess ibility
+ke red
+Bet ter
+ĠJohn ston
+th inking
+S now
+ĠCroat ia
+ĠAt omic
+27 1
+34 8
+Ġtext book
+ĠSix th
+Ġ Ø§ÙĦ
+Ġsl ider
+ĠBur ger
+b ol
+S ync
+Ġgrand children
+Ġc erv
++ )
+Ġe ternity
+Ġtweet ing
+Ġspec ulative
+Ġpiv otal
+ĠW P
+ĠT ER
+ynam ic
+Ġu pl
+ĠC ats
+per haps
+Ġclass mates
+Ġblat ant
+' -
+Ġl akh
+ant ine
+ĠB org
+i om
+/ (
+ĠAthlet ic
+Ġs ar
+OT A
+ĠHoff man
+Never theless
+Ġad orable
+Ġspawn ed
+Ass ociated
+ĠDom estic
+Ġimpl ant
+ĠLux em
+ĠK ens
+Ġp umps
+ĠS AT
+Att ributes
+50 9
+av our
+Ġcentral ized
+ĠT N
+Ġfresh ly
+ĠA chieve
+Ġouts iders
+her ty
+ĠRe e
+ĠT owers
+ĠD art
+ak able
+Ġm p
+ĠHeaven ly
+Ġr ipe
+ĠCarol ine
+ry an
+Ġclass ics
+Ġret iring
+Ġ2 28
+Ġa h
+Ġdeal ings
+Ġpunch ing
+ĠChap man
+O ptions
+max well
+vol ume
+Ġst al
+Ġex ported
+ĠQu ite
+Ġnumer ical
+B urn
+F act
+ĠKey stone
+Ġtrend ing
+Ġalter ing
+ĠAfric ans
+47 8
+ĠM N
+ĠKn ock
+Ġtempt ation
+Ġprest ige
+Over view
+ĠTrad itional
+ĠBah rain
+Priv ate
+ĠH OU
+Ġbar r
+ĠT at
+C ube
+US D
+ĠGrand e
+ĠG at
+ĠFl o
+Ġres ides
+Ġind ec
+vol ent
+Ġperpet ual
+ub es
+Ġworld view
+ĠQuant um
+Ġfil tered
+Ġen su
+orget own
+ERS ON
+ĠM ild
+37 9
+OT T
+Ã ¥
+Ġvit amins
+Ġrib bon
+Ġsincere ly
+ĠH in
+Ġeight een
+Ġcontradict ory
+Ġgl aring
+Ġexpect ancy
+Ġcons pir
+Ġmon strous
+Ġ3 80
+re ci
+Ġhand ic
+Ġpump ed
+Ġindic ative
+Ġr app
+Ġav ail
+ĠLEG O
+ĠMar ijuana
+19 85
+ert on
+Ġtwent ieth
+################ ################
+ĠSw amp
+Ġval uation
+Ġaffili ates
+adjust ed
+ĠFac ility
+26 2
+Ġenz ymes
+itud inal
+Ġimp rint
+S ite
+Ġinstall er
+ĠT RA
+m ology
+lin ear
+ĠCollect ive
+ig ating
+ĠT oken
+Ġspec ulated
+K N
+ĠC ly
+or ity
+Ġdef er
+Ġinspect ors
+appro ved
+R M
+ĠSun s
+Ġinform ing
+ĠSy racuse
+ib li
+7 65
+Ġgl ove
+Ġauthor ize
+âĢ¦âĢ¦âĢ¦âĢ¦ âĢ¦âĢ¦âĢ¦âĢ¦
+ĠCru ise
+Ġcontract ing
+she ll
+IF E
+ĠJew el
+p ract
+ĠPhot oshop
+ĠKnow ing
+h arm
+Ġattract ions
+ad an
+et us
+01 8
+w agen
+Al t
+Ġmultip ly
+Ġequ ilibrium
+: {
+ĠF ighters
+ĠEd gar
+Ġfour teen
+Go vern
+Ġmis use
+Ġab using
+Ġancest ry
+ram er
+64 4
+Ġwor ms
+Ġthick er
+ĠComb ine
+Ġpeas ants
+Ġv ind
+Ġcon quest
+Ġm ocked
+Ġc innamon
+ĠC ald
+ĠGall up
+Ġavoid ance
+Ġincarn ation
+ĠStr at
+Ġt asted
+ent a
+ĠN eal
+p ared
+Ġtermin ology
+ject ion
+Scient ists
+ĠIN S
+ĠDe e
+Ġdirect ories
+R oad
+ĠSh ap
+br ight
+ĠDirect ors
+ĠCol umn
+Ġb ob
+Ġprefer ably
+Ġgl itch
+f urt
+Ġe g
+id is
+C BC
+Ġsur rendered
+Ġtest ament
+33 6
+ug gest
+ĠN il
+an other
+Ġpat hetic
+ĠDon na
+Ġ2 18
+ĠA very
+Ġwhis key
+Ġf ixture
+ĠCon quest
+Ġbet s
+O cc
+ĠLe icester
+] ."
+Ġ) );
+Ġfl ashes
+45 6
+Ġmask ed
+ge bra
+Ġcomput ed
+che l
+aud er
+Ġdefe ats
+ĠLiber ation
+ĠOs ama
+ĠV ive
+Ch anges
+Ch annel
+Ġtar iffs
+Ġm age
+ĠS ax
+Ġinadvert ently
+ĠC RE
+ĠRe aper
+ink y
+gr ading
+Ġstere otyp
+Ġcur l
+ĠF ANT
+Ġfram eworks
+M om
+ĠAn ch
+Ġflav our
+car bon
+Ġperm itting
+let cher
+ĠMo zilla
+ĠPark ing
+ĠCh amp
+Sc roll
+Ġmurd erer
+Ġrest ed
+Ġow es
+ĠP oss
+AD D
+IF F
+res olution
+ĠMin ing
+Ġcompar ative
+D im
+Ġneighbour ing
+ĠA ST
+ĠT oxic
+Ġbi ases
+Ġgun fire
+ur ous
+ĠMom ent
+19 83
+Ġper vasive
+tt p
+ĠNorm ally
+r ir
+S arah
+ĠAlb any
+Ġun sett
+ĠS MS
+ip ers
+l ayer
+ĠWh ites
+up le
+Ġtur bo
+ĠLe eds
+Ġthat s
+ĠMin er
+M ER
+ĠRe ign
+Ġper me
+ĠBl itz
+Ġ19 34
+Ġintimid ating
+t ube
+Ġecc entric
+ab olic
+box es
+ĠAssoci ates
+v otes
+Ġsim ulate
+um bo
+aster y
+Ġship ments
+FF FF
+an th
+Ġseason ed
+Ġexperiment ation
+âĸ ł
+law s
+Me et
+idd les
+ant ics
+R ating
+IS IS
+h ift
+Ġfront s
+b uf
+01 7
+Ġun att
+ĠD il
+le ases
+ĠGard ens
+77 7
+t ouch
+ve ll
+45 8
+Ġ= ====
+s aving
+Ġer osion
+ĠQu in
+Ġearn s
+Ġaccomplish ment
+ĠWe i
+Ġ< [
+____ _
+Ġir rig
+ĠT eddy
+Ġconqu ered
+ĠArm ored
+Ġassert s
+Ġmanip ulating
+r Ã©
+Ġtranscript s
+G allery
+Ġplot ting
+Ne il
+Ġbetray al
+load er
+ĠS ul
+Ġdispl acement
+Ġroy alty
+ĠW I
+he it
+ĠDev ices
+alle l
+Ġmunicipal ities
+Ġcan al
+St ars
+ĠU AE
+Ġ" âĢ¦
+ĠC U
+ab ove
+Ġreson ance
+ĠguiActive Un
+add ed
+ĠBra ves
+ĠI bn
+Ġhere by
+ĠB RE
+Ġshare holder
+ĠH ir
+ĠJ i
+Ġstrange ly
+Ġadm ired
+Ġpl ight
+Ġb achelor
+ĠP ole
+cipl inary
+T ony
+ĠArmen ian
+Ġun man
+ĠZion ist
+St age
+isco ver
+Ġautom otive
+Ġs idelines
+Ġsl ick
+ĠRena issance
+ĠF UN
+Im ages
+ĠH aj
+Ġp ing
+Ġshort cut
+ĠBl vd
+ĠLook s
+Ġbur sts
+Ġcl amp
+Ġm ish
+Ġsort ing
+Ġpatri ot
+Ġcorrect ness
+ĠScand inav
+ĠCaval iers
+p ython
+az ar
+Ġ3 75
+ĠJa une
+40 9
+Ġdetrim ental
+Ġstab bing
+Ġpoison ed
+Ġf ountain
+oc ent
+or st
+ĠMar i
+Ġr ains
+ĠO vers
+ĠInst itution
+ud get
+AM Y
+t ale
+ĠK R
+ĠPr ices
+Ġhead aches
+Ġlands l
+ĠA ura
+Bon us
+ĠZ hao
+ĠH ip
+Ġhop s
+ĠKurd istan
+Ġexplo iting
+ry n
+Ġhypocr isy
+op ening
+Ġgun shot
+Ġw ed
+inter stitial
+Inter stitial
+Ġam en
+Bre aking
+Ġmarket ed
+W ire
+ĠC rowd
+Contin ue
+ĠK nown
+ĠEffect ive
+ore an
+iz ons
+Jose ph
+Ġescal ation
+us ername
+Ġcur tain
+AT ES
+ĠP AR
+ĠM iy
+Ġcounter fe
+l ene
+Ġcont enders
+d aily
+ĠAs c
+ĠPhill ip
+most ly
+Ġfil ename
+he ne
+Ġresemb ling
+Ġst aging
+ĠCh loe
+Ġw iring
+H on
+ĠRen ew
+ott age
+ĠHy brid
+m uch
+Ġstro kes
+Ġpolicy makers
+AP TER
+ĠArk ham
+pl ot
+Ġassist ants
+Ġde port
+ĠSe ga
+Ġinflu enza
+ĠC ursed
+ĠK obe
+Ġskin ny
+Prov ider
+ĠR ip
+Ġincrement al
+product s
+B F
+Ġd ome
+ĠC redits
+Ġlos ers
+int s
+ĠBet ty
+ĠTal ent
+ĠD AM
+L v
+E ss
+Ġd ens
+tem p
+J udge
+od ic
+Ġ' (
+UR ES
+ets k
+V O
+Ġretrie ved
+Ġarchitect s
+Ù ĩ
+Ġeth ic
+ĠSecond ary
+st ocks
+ad ia
+Ġ3 25
+ĠOp inion
+Ġsimultane ous
+Ġd izz
+ul p
+Ġsmugg ling
+ipp ery
+R andom
+f acing
+ĠD as
+Ġstock p
+Ġdiscl osures
+po inter
+Ġcor al
+ĠSe lection
+ĠP ike
+ival ent
+Ġruth less
+ĠR im
+Ġensu ing
+ĠExper iment
+Ġcongress man
+Ġbelie ver
+Ġun specified
+ĠM ord
+Ġknowledge able
+ĠV ERY
+T X
+Ġstra ps
+Ġtur f
+apesh ifter
+Ġmar ital
+Ġfl ock
+ãģ Ĩ
+26 3
+AM ES
+ĠOpp osition
+Ġtre asures
+ĠG OD
+Ġmodel ed
+ĠWOR LD
+Ġ( [
+ĠUs age
+H F
+Ġ$ (
+uss ed
+Ġpione er
+E ight
+par se
+b read
+rit z
+ĠMir anda
+ĠK ant
+++ )
+ore n
+Ġprov oked
+Ġbre eds
+ĠIn cludes
+ĠPast ebin
+ĠFl ip
+J ava
+Ġbr ink
+Ġrum ored
+Ġun seen
+Ġgar nered
+ĠDef in
+al ted
+Ġtatt oos
+Ġhes itation
+is itions
+ĠWe aver
+ĠReport ing
+Ġtherap ies
+Ġconsult ants
+Ġresid ual
+ĠMal i
+ĠRom a
+i ago
+ĠRes idents
+ub i
+Ġremed ies
+Ġadapt ive
+ĠAl ive
+ĠBar cl
+Ġwal lets
+c rypt
+etermin ation
+ĠPel osi
+Ġsl ipping
+oton in
+Ġall iances
+pat rick
+ir is
+Ġor th
+ĠPer kins
+ĠDe V
+ĠG ets
+Ġdry ing
+ge e
+fore st
+ĠFor get
+ore m
+33 9
+Ġvague ly
+ĠD ion
+ĠP orn
+ĠH OW
+Ġp neum
+Ġrub ble
+ĠT aste
+enc ia
+ĠG el
+Ġd st
+Ġ24 5
+ĠMoroc co
+inf lamm
+ĠTw ins
+Ġb ots
+d aughter
+ĠB alk
+Ġbre thren
+Ġlog os
+Ġgo bl
+f ps
+Ġsub division
+Ġp awn
+Ġsquee zed
+Ġmor ale
+ĠD W
+' "
+Ġkn ot
+ook y
+Ġdiv isive
+Ġboost ed
+ch y
+ãĥ Ĳ
+if act
+Ġnewcom ers
+ĠWrest ling
+Ġsc outs
+w olves
+R at
+Ġnin eteenth
+ĠOs borne
+St ats
+Ġem powered
+Ġpsych opath
+ĠO EM
+ugg age
+ĠP K
+ĠMoh ammad
+P ak
+Ġanarch ists
+ĠExt ract
+est hes
+ĠStock holm
+l oo
+ĠG raph
+Ġdeploy ing
+ĠStr anger
+ĠM old
+Ġstaff er
+Ġdiscount ed
+uck le
+ple ase
+ĠLand ing
+ÃŃ a
+Ġ19 3
+Ġan te
+Ġrep etition
+Ġ+ /-
+Ġpar ody
+Ġlive ly
+AA A
+ĠHor us
+Ġp its
+ind ers
+L OC
+ĠVen ice
+40 6
+ĠDis cover
+â Ĩ
+ellect ual
+Ġp ens
+Ġey el
+ig uous
+Im pl
+Ġj oking
+Ġinv al
+ĠBel fast
+Ġcredit ors
+ĠSky walker
+ov sky
+Ġcease fire
+Ġse als
+is oft
+) ).
+ĠFel ix
+IT S
+Ġt resp
+ĠBlock chain
+ew are
+ĠSch war
+en ne
+mount ed
+ĠBe acon
+les h
+Ġimmense ly
+Ġche ering
+Em ploy
+sc ene
+ish ly
+atche wan
+ĠNic olas
+Ġdr ained
+ĠEx it
+ĠAz erb
+j un
+Ġflo ated
+u ania
+De ep
+Ġsuper v
+Ġmyst ical
+ĠD ollar
+ĠApost le
+ĠR EL
+ĠProv ided
+ĠB ucks
+ãĥ ´
+cut ting
+Ġenhance ments
+ĠPengu ins
+ĠIsa iah
+Ġj erk
+ĠW yn
+Ġst alled
+Ġcryptoc urrencies
+ĠR oland
+sing le
+Ġl umin
+ĠF ellow
+ĠCap acity
+ĠKaz akh
+W N
+Ġfin anced
+38 9
+Ġt id
+Ġcoll usion
+ĠMy r
+î Ģ
+Sen ator
+Ġped iatric
+Ġneat ly
+Ġsandwic hes
+ĠArchitect ure
+Ġt ucked
+Ġbalcon y
+Ġearthqu akes
+qu ire
+F uture
+Ġhe fty
+é Ĺ
+Ġspecial izes
+Ġstress es
+Ġs ender
+Ġmisunder standing
+Ġep ile
+Ġprov oke
+ĠCol ors
+Ġdis may
+uk o
+[ _
+58 6
+ne utral
+Ġdon ating
+ĠRand all
+Mult i
+Ġconvenient ly
+ĠS ung
+ĠC oca
+Ġt ents
+ĠAc celer
+Ġpart nered
+27 2
+ir ming
+ĠB AS
+s ometimes
+Ġobject ed
+ub ric
+p osed
+LC S
+gr ass
+Ġattribut able
+V IS
+Israel i
+Ġrepe ats
+ĠR M
+v ag
+ut a
+in ous
+Ġin ert
+ĠMig uel
+æ Ń
+ĠHawai ian
+B oard
+Ġart ific
+ĠAzerb ai
+as io
+ĠR ent
+A IN
+Ġappl iances
+Ġnational ity
+Ġass hole
+ĠN eb
+Ġnot ch
+h ani
+ĠBr ide
+Av ailability
+Ġintercept ed
+Ġcontin ental
+Ġsw elling
+ĠPers pect
+b ies
+. <
+ith metic
+ĠL ara
+Ġtempt ing
+add r
+Ġoversee ing
+cl ad
+ĠD V
+ĠGing rich
+Ġm un
+ĠApp ropri
+Ġalter ations
+ĠPat reon
+Ġha voc
+Ġdiscipl ines
+Ġnotor iously
+aku ya
+ier i
+? ).
+ĠW ent
+Ġsil icon
+Ġtre mb
+Cont ainer
+K nown
+Ġmort ar
+est e
+ick a
+Ar thur
+ĠPre viously
+ĠMart y
+Ġsp arse
+g ins
+Ġin ward
+ĠParticip ant
+C opy
+ĠM isc
+Ġantib iotic
+ĠRet ro
+Ġel usive
+Ġass ail
+ĠBatt alion
+ĠB ought
+Ġdimin ish
+ĠEuro pa
+s ession
+ĠDanger ous
+ies el
+Ġdisbel ief
+Ġbl asts
+ext reme
+ĠBoy d
+ĠProject s
+ĠGu ys
+Ġunder gone
+Ġgr ill
+ĠDw ight
+Ġ19 7
+US ER
+Ġfiles ystem
+Ġcl ocks
+T aylor
+Ġwra pper
+Ġfold ing
+ous and
+ĠPhilipp ine
+ATION AL
+ĠPer th
+Ġas hes
+Ġaccum ulate
+ĠGate way
+Sh op
+orks hire
+H an
+ĠBar rel
+ĠLe h
+ĠX V
+Ġwh im
+Ġrep o
+ĠC G
+ĠM am
+Ġincorpor ating
+Ġbail out
+Ġlingu istic
+Ġdis integ
+C LE
+Ġcinem atic
+ĠF iber
+S yn
+il ion
+ĠCom pos
+c hens
+Ġne oc
+Ġbo iled
+F INE
+on o
+un cle
+ik en
+ĠB M
+Î ¹
+Ġreceipt s
+Ġdisp osed
+ĠTh irty
+ĠR ough
+ĠA BS
+Ġnot withstanding
+oll en
+# $
+Ġunrel iable
+Ġbl oom
+Ġmedi ocre
+Ġtr am
+ĠTas man
+Ġsh akes
+Ġmanifest o
+ĠM W
+Ġsatisf actory
+Ġsh ores
+Ġcomput ation
+Ġassert ions
+orm ons
+ar ag
+ab it
+Dem ocrats
+ĠL oot
+ĠVol ks
+ha ired
+Ġgrav itational
+S ing
+ĠM iz
+Ġthro ttle
+Ġtyr anny
+ĠView s
+Ġrob ber
+ĠMinor ity
+Ġsh rine
+sc ope
+pur pose
+Ġnucle us
+our cing
+ĠUS DA
+ĠD HS
+w ra
+ĠBow ie
+Sc ale
+ĠB EL
+x i
+I ter
+Ġ( ),
+w right
+Ġsail ors
+ous ed
+NAS A
+ĠPro of
+ĠMin eral
+t oken
+ĠF D
+R ew
+Ġe ll
+6 30
+Ġchance llor
+ĠG os
+Ġamount ed
+ĠRec re
+ome z
+ĠOpt im
+ĠOl ive
+Ġtrack er
+ow ler
+ĠUn ique
+R oot
+Ġmar itime
+ĠQur an
+ĠAd apt
+Ġecosystem s
+ĠRe peat
+ĠS oy
+ĠI MP
+Ġgrad uating
+and em
+P ur
+ĠRes et
+ĠTr ick
+ĠPh illy
+ĠT ue
+ĠMalays ian
+Ġclim ax
+Ġb ury
+Ġcons pic
+ĠSouth ampton
+ĠFl owers
+Ġesc orted
+ĠEduc ational
+ĠI RC
+Ġbrut ally
+e ating
+Ġpill ar
+ĠS ang
+ĠJ ude
+ar ling
+ĠAm nesty
+Ġrem inding
+ĠAdminist rative
+hes da
+Ġfl ashed
+ĠP BS
+per ate
+fe ature
+Ġsw ipe
+Ġgra ves
+oult ry
+26 1
+bre aks
+ĠGu er
+Ġsh rimp
+ĠV oting
+qu ist
+Ġanaly tical
+Ġtables poons
+ĠS OU
+Ġresear ched
+Ġdisrupt ed
+Ġj our
+Ġrepl ica
+Ġcart oons
+b ians
+} )
+c opy
+G ot
+ou ched
+P UT
+Ġsw arm
+not ations
+s aid
+Ġreb uilt
+Ġcollabor ate
+Ġr aging
+Ġn ar
+Ġdem ographics
+ĠD DR
+Ġdist rust
+oss ier
+ĠK ro
+Ġpump kin
+Ġreg rets
+Ġfatal ities
+ĠL ens
+ĠO le
+p d
+Ġpupp et
+ĠOut look
+ĠSt am
+O l
+F air
+U U
+Ġre written
+Ä ±
+Ġfasc inated
+Ġve ctors
+Ġtrib unal
+u ay
+ĠM ats
+ĠCo ins
+[ [
+Ġ18 1
+Ġrend ers
+ĠK aepernick
+Ġesp ionage
+Ġsum m
+Ġd itch
+Acc ount
+Ġspread sheet
+Ġmut ant
+p ast
+40 7
+Ġd ye
+Ġinit iation
+Ġ4 000
+Ġpunish able
+Ġth inner
+ĠKh al
+Ġinter medi
+D un
+ĠGoth am
+Ġeager ly
+Ġvag inal
+p owers
+V W
+ĠWATCH ED
+Ġpred ator
+ams ung
+Ġdispar ity
+Ġ[ *
+Ġam ph
+Ġout skirts
+ĠSpir its
+Ġskelet al
+Ð »
+ĠR ear
+Ġissu ance
+ĠLog ic
+re leased
+Z Z
+ĠB ound
+Ent ry
+Ġex its
+is ol
+ĠFound er
+Ġw re
+ĠGreen land
+ĠM MO
+t aker
+IN C
+ãģ ¾
+Ġhour ly
+hen ko
+Ġfantas ies
+Ġdis ob
+Ġdemol ition
+ãĥ ĭ
+Ġen listed
+rat ulations
+Ġmis guided
+Ġens ured
+Ġdiscour aged
+m ort
+Ġfl ank
+Ġc ess
+Ġreact s
+ĠS ere
+s ensitive
+ĠSer pent
+ass ad
+Ġ24 7
+Ġcalm ly
+b usters
+Ġble ed
+ĠSt ro
+Ġamuse ment
+ĠAntar ctica
+Ġs cept
+ĠG aw
+a q
+ason ic
+Ġsp rawling
+n ative
+atur ated
+ĠBattle field
+IV ERS
+E B
+ĠG ems
+ĠNorth western
+ĠFil ms
+ĠAut omatic
+Ġappre hend
+ãģ ¨
+Ġgui Name
+Ġback end
+Ġevid enced
+ge ant
+01 2
+ĠS iege
+Ġexternal To
+Ġunfocused Range
+ĠguiActiveUn focused
+Ġgui Icon
+ĠexternalTo EVA
+ĠexternalToEVA Only
+F ri
+ch ard
+en aries
+Ġchief s
+Ġc f
+ĠH UD
+Ġcorro bor
+Ġd B
+ĠT aken
+ĠPat ricia
+ra il
+ĠCh arm
+ĠLiber tarian
+rie ve
+Person al
+ĠO UR
+ger ies
+Ġdump ing
+Ġneurolog ical
+it imate
+ĠClint ons
+raft ed
+ĠM olly
+Ġtermin als
+reg ister
+Ġfl are
+Ġenc oded
+Ġautop sy
+p el
+m achine
+Ġexempt ions
+ĠRoy als
+d istance
+Ġdraft s
+Ġl ame
+ĠC unning
+Ġsp ouses
+ĠMark ets
+ĠCar rier
+Ġimp lying
+ĠY ak
+s id
+Ġl oser
+Ġvigil ant
+Ġimpe achment
+Ġaug mented
+ĠEmploy ees
+Ġunint ended
+tern ally
+ĠW att
+Ġrecogn izable
+ess im
+æ Ŀ
+Ġco ated
+r ha
+Ġlie utenant
+ĠLegisl ation
+pub lished
+44 4
+01 3
+Ġide ally
+ĠPass word
+Ġsimpl ify
+ĠMet a
+ĠM RI
+Ġple ading
+organ ized
+hand ler
+Ġun ravel
+cor rect
+Ġ icy
+Ġparan oid
+Ġpass er
+Ġinspect ions
+of er
+ĠHealth care
+28 3
+ĠBr ut
+iol a
+for ge
+ĠMed ieval
+MS N
+ie vers
+ĠProgram ming
+å ī
+Ġ2 23
+m u
+ĠC LE
+ug a
+Ġsho ppers
+Ġinform ative
+ĠPl ans
+Ġsupplement ation
+ĠT ests
+ty ard
+ocy tes
+ĠVeg a
+ĠGujar at
+erman ent
+Ex cept
+ĠL OT
+all a
+ĠC umm
+ĠO sw
+Ġven om
+ĠDeb t
+ĠD OWN
+Ġreun ion
+Ġm uc
+ĠRel ief
+Ġge op
+ĠðŁ ĺ
+al ogue
+An th
+ech o
+Ġcor ros
+Ġrepl ication
+ĠBl azing
+ĠD aughter
+Ġinf lic
+ĠLind sey
+Ù Ī
+28 4
+Ex it
+Ġgl oom
+TA IN
+Ġundermin ing
+Ġadv ising
+h idden
+Ġover flow
+Ġg or
+urd ue
+Ġe choes
+enh agen
+Ġimp uls
+d rug
+c ash
+Ġas ync
+Ġmir ac
+at ts
+p unk
+Ġpiv ot
+ĠLegisl ative
+Ġblog gers
+ĠCl aw
+s burg
+d yl
+ĠRecomm end
+Ġver te
+Ġprohib iting
+ĠPant her
+Jon athan
+Ġo min
+Ġhate ful
+28 1
+ĠOr che
+ĠMurd och
+down s
+Ġas ymm
+G ER
+Al ways
+Ġinform s
+ĠW M
+ĠP ony
+ĠApp endix
+ĠAr lington
+J am
+Ġmedic inal
+ĠS lam
+IT IES
+Ġre aff
+ĠR i
+F G
+S pring
+b ool
+Ġthigh s
+Ġmark ings
+ĠRa qqa
+ĠL ak
+p oll
+ts ky
+ĠMort y
+ĠDef inition
+Ġdeb unk
+end ered
+ĠLe one
+a vers
+Ġmortg ages
+App arently
+N ic
+ha us
+ĠTh ousands
+au ld
+Ġm ash
+sh oot
+Ġdi arr
+Ġconscious ly
+H ero
+e as
+ĠN aturally
+ĠDestroy er
+Ġdash board
+serv ices
+R og
+Ġmillenn ials
+Ġinv ade
+- (
+Ġcomm issions
+ĠA uckland
+Ġbroadcast s
+Ġfront al
+Ġcr ank
+ĠHist oric
+Ġrum ours
+CT V
+Ġster il
+Ġboost er
+rock et
+ãĤ ¼
+ut sche
+ĠP I
+Ġ2 33
+ĠProdu cer
+ĠAnaly tics
+Ġinval uable
+Ġunint ention
+ĠC Y
+Ġscrut in
+Ġg igg
+Ġeng ulf
+Ġprolet ariat
+Ġh acks
+ĠH ew
+ar ak
+ĠSl ime
+ield ing
+ag her
+ĠEll iot
+Ġtele com
+Ġ2 19
+ult an
+ĠAr bor
+ĠSc outs
+B an
+Ġlifes pan
+Ġbl asp
+38 8
+Ġjud iciary
+ĠContin ental
+ask ing
+Mc C
+L ED
+Ġbag gage
+ĠSorce rer
+Ġrem nants
+ĠGriff ith
+ets u
+ĠSub aru
+ĠPerson ality
+des igned
+ush ima
+agn ar
+Ġrec oil
+Ġpass ions
+\ ":
+Ġte e
+Ġabol ition
+ĠCreat ing
+j ac
+Ġ19 4
+01 9
+Ġpill ars
+ric hed
+/ "
+t k
+Ġlive lihood
+Ġro asted
+ah on
+ĠH utch
+ass ert
+Ġdivid end
+Ġkn it
+Ġd aunting
+Ġdisturb ance
+Ġsh ale
+Ġcultiv ated
+Ġrefriger ator
+L B
+ĠN ET
+Ġcommercial s
+Ġthink ers
+45 5
+Ġch op
+B road
+Ġsuspic ions
+Ġtag ged
+l ifting
+Ġsty lish
+ĠShield s
+Short ly
+Ġt ails
+A uth
+ST E
+ĠG AME
+Ġse ism
+ĠK is
+olog ne
+Ġcow ork
+Ġforc ibly
+Ġthy roid
+ĠP B
+AN E
+mar ried
+h orse
+Ġpoly mer
+ĠCh al
+od or
+DE BUG
+ĠCon text
+Ġbl iss
+Ġpin point
+ĠMat hemat
+leg ram
+ĠWeek end
+Ġlab elled
+Ġb art
+it les
+Ġest rogen
+âĢĶâĢĶâĢĶâĢĶâĢĶâĢĶâĢĶâĢĶ âĢĶâĢĶâĢĶâĢĶâĢĶâĢĶâĢĶâĢĶ
+" '
+Ġvis ibly
+Ġouts ider
+aid a
+Are a
+Ġdisse min
+Ġdish onest
+ĠCl osed
+ĠBullet in
+ĠRam sey
+sw ord
+ĠX I
+our ced
+S ame
+34 6
+ĠRe pe
+ĠK ou
+c ake
+em is
+C ache
+ĠMe aning
+ĠEn light
+onom y
+Ġmanifest ation
+sw orth
+J ay
+Ġch ore
+Ã¶ r
+D ream
+Ġsanction ed
+Ġcult urally
+ĠA ra
+N av
+Ġthe ological
+Ġstr ut
+ĠV O
+ĠHand book
+Ġconstruct ing
+ĠÂ ¶
+ĠBenef its
+ĠPsych ological
+s ac
+å ¸
+p olicy
+ĠMat ters
+ĠReport ed
+ĠBy te
+Ġvit ro
+ĠM aiden
+Ġl am
+ĠJenn ings
+Ġgar ment
+ĠRut gers
+ĠStaff ord
+ĠWell ington
+Ġinter mitt
+Ġn pm
+Ġord eal
+Ġplug ged
+o oming
+in ished
+fram ework
+Ġtim ber
+Ġc ass
+Ġ8 50
+il ess
+ĠRed ux
+7 68
+St re
+Ġsurpass ed
+w hel
+Ġparalle ls
+Ġve il
+ĠG I
+ĠR EST
+Ġread iness
+s ort
+Ġmod ifying
+ĠSl ate
+ru ff
+Ġmar ble
+Ġinf rared
+Ġaud itor
+ĠFANT ASY
+ĠP overty
+ĠS PD
+Ġ" (
+K y
+RA Y
+Ġexecut ions
+ĠBever ly
+ĠMarx ism
+ĠBur st
+ĠK ali
+est ones
+Clear ly
+E ll
+ãģ §
+ĠProceed ings
+T oken
+IF IC
+Ã± a
+Cent ral
+ĠH aley
+ĠD rama
+Ġform ations
+OR N
+Book s
+Ġdom inating
+ĠFly ers
+ĠCompan ion
+Ġdiscipl ined
+ĠYug oslav
+ĠSpell s
+Ġv engeance
+Ġland lords
+L en
+ĠO gre
+ano ia
+Ġpier cing
+Ġcon greg
+Ġscore r
+ob ia
+Ġnic kel
+ĠLear ns
+Ġre jo
+Ġmaster piece
+Fl ash
+Ġinhab ited
+ĠOpen GL
+ĠD ud
+ĠI CO
+Ġar ter
+Ġpl ur
+Ġmaster y
+Ġlong standing
+st ed
+Ġw ines
+Ġtelev ised
+ĠSh rine
+ĠBay ern
+Ġâ ĵĺ
+Ġencl osure
+j ohn
+Ġprophe ts
+ĠRes urrection
+ĠOrd ers
+Ġun even
+r als
+Ġd wind
+ĠL ah
+ĠSl oven
+37 8
+Ġins istence
+aff le
+ĠCl one
+Ġhard ship
+ĠCongress man
+Ġple ad
+Ġreview ers
+Ġc ured
+Ġ19 35
+as ley
+f ake
+ĠTh inking
+yd ia
+P ART
+ĠD ota
+o it
+Ġwh ipped
+Ġb ouncing
+ĠHispan ics
+com ings
+Ġcann abin
+ĠCh ambers
+ĠZ ack
+Option al
+Ġco ats
+Ġprow ess
+ĠNort on
+Ġplain ly
+Ġfre ight
+Ġinhib ition
+Ġcl am
+Ġ30 3
+ke f
+ale igh
+L uke
+Ġpsych o
+ator ium
+M ED
+Ġtreat ies
+Ġind isc
+Ġd c
+OP S
+Ġresil ient
+ĠInter state
+Ġsl ack
+Ġmund ane
+Ġestab lishes
+35 9
+Ġstr ained
+Ġn ond
+S us
+Ġcast e
+ar ate
+ie ving
+Ġunfair ly
+Ġpars er
+on ial
+urs ive
+V ia
+ĠOtt o
+ĠAuthor ities
+stro ke
+K R
+ĠMer cy
+Ġfurn ished
+Ġout set
+Ġmet ic
+19 82
+olith ic
+ĠT ent
+og ical
+ĠA ircraft
+Ġh ides
+ĠBec ame
+Ġeduc ators
+re aching
+Ġvol atility
+Ġtodd ler
+ĠNAS CAR
+ĠTw elve
+ĠHigh lights
+Ġgra pe
+Ġspl its
+Ġpe asant
+Ġre neg
+ĠMS I
+Tem p
+st ars
+Ġtre k
+ĠHy de
+b inding
+Ġreal ism
+Ġox ide
+ĠH os
+Ġmount s
+Ġbit ing
+Ġcollaps ing
+Ġpost al
+Ġmuse ums
+Ġdet ached
+Ġrespect ing
+Ġmonop ol
+Ġwork flow
+ĠC ake
+Tem plate
+ĠOrgan isation
+Ġpers istence
+36 9
+C oming
+B rad
+Ġredund ant
+ĠG TA
+Ġb ending
+Ġrev oked
+Ġoff ending
+Ġfram ing
+Ġprint f
+Comm un
+mem bers
+Out side
+Ġconst rued
+Ġc oded
+F ORE
+Ġch ast
+Ch at
+Ind ian
+ĠY ard
+? !"
+ĠP orts
+ĠX avier
+ĠR ET
+' ."
+ĠBo at
+iv ated
+ich t
+umer able
+D s
+ĠDun n
+Ġcoff in
+Ġsecure ly
+ĠRapt ors
+ĠB es
+Install ation
+Ġin ception
+ĠHealth y
+end ants
+Ġpsych ologists
+ĠShe ikh
+c ultural
+ĠBlack Berry
+sh ift
+F red
+oc he
+Ġc akes
+ĠS EO
+ĠG ian
+ĠAs ians
+og ging
+e lement
+Ġpund its
+ĠV augh
+ĠG avin
+Ġh itter
+Ġdrown ed
+Ġch alk
+ĠZ ika
+Ġmeas les
+80 2
+âĢ¦ ..
+ĠAW S
+] "
+Ġdist ort
+ĠM ast
+Ġantib odies
+ĠM ash
+Mem ory
+ĠUg anda
+ĠPro b
+Ġvom iting
+ĠTurn s
+Ġoccup ying
+Ġev asion
+ĠTher apy
+Ġprom o
+Ġelect r
+Ġblue print
+ĠD re
+pr iced
+ĠDep ot
+Ġallev iate
+ĠSom ali
+m arg
+n ine
+Ġnostalg ia
+ĠShe pherd
+Ġcaval ry
+Ġtor ped
+ĠBlood y
+x b
+Ġs ank
+Ġgo alt
+report print
+embed reportprint
+clone embedreportprint
+ĠIn itially
+ĠF ischer
+Ġnot eworthy
+c ern
+Ġin efficient
+raw download
+rawdownload cloneembedreportprint
+c ation
+ĠD ynasty
+l ag
+D ES
+Ġdistinct ly
+ĠEston ia
+Ġopen ness
+Ġg ossip
+ru ck
+W idth
+ĠIb rahim
+Ġpet roleum
+Ġav atar
+ĠH ed
+ath a
+ĠHog warts
+Ġc aves
+67 8
+Ġsafegu ard
+ĠM og
+iss on
+ĠDur ham
+sl aught
+ĠGrad uate
+Ġsub conscious
+ĠEx cellent
+ĠD um
+---- -
+Ġp iles
+ĠW ORK
+ĠG arn
+ĠF ol
+ĠAT M
+Ġavoid s
+ĠT ul
+Ġble ak
+EL Y
+iv ist
+light ly
+P ers
+ĠD ob
+ĠL S
+Ġins anity
+Î µ
+atal ie
+En large
+Ġtw ists
+Ġfault y
+Ġpir acy
+Ġimp over
+Ġrug ged
+ĠF ashion
+Ġs ands
+' ?
+sw ick
+Ġn atives
+Ġhe n
+ĠNo ise
+ãĥ Ĺ
+Ġg reens
+Ġfree zer
+Ġd ynasty
+ĠFather s
+ĠNew ark
+Ġarchae ological
+Ġo t
+ob ar
+Ġblock ade
+Ġall erg
+L V
+Ġdeb it
+ĠR FC
+ĠMil ton
+ĠPress ure
+Ġwill ingly
+Ġdisproportion ate
+Ġopp ressive
+Ġdiamond s
+Ġbelong ings
+19 70
+Ġbell s
+Ġimperial ism
+Ġ2 27
+Ġexpl oding
+ĠE clipse
+Ġ19 19
+Ġr ant
+Ġnom inations
+34 7
+Ġpeace fully
+ric a
+ĠF UCK
+Ġvib ration
+mal ink
+Ġro pes
+ĠIv anka
+ĠBrew ery
+ĠBook er
+ĠOw ens
+go ers
+Serv ices
+ĠSn ape
+Ġ19 1
+39 5
+Ġ2 99
+just ice
+Ġb ri
+Ġdisc s
+Ġprom inently
+Ġvul gar
+Ġsk ipping
+l ves
+Ġtsun ami
+37 4
+ĠU rug
+ĠE id
+rec ated
+p hen
+Ġfault s
+ĠStart ed
+9 50
+Ġp i
+Ġdetect or
+Ġbast ard
+Ġvalid ated
+Space Engineers
+OUR CE
+Ġ( ~
+Ġuns ur
+Ġaff irmed
+Ġfasc ism
+Ġres olving
+ĠCh avez
+ĠC yn
+Ġdet ract
+L ost
+Ġrig ged
+Ġhom age
+ĠBrun o
+55 5
+ec a
+Ġpress es
+Ġhum our
+Ġsp acing
+Ġ' /
+olk ien
+C oun
+OP ER
+T re
+S on
+ĠCambod ia
+ier re
+m ong
+o zy
+Ġliquid ity
+ĠSov iets
+ĠFernand o
+Ġ2 29
+Ġsl ug
+ĠCatal an
+elect ric
+Ġsc enery
+ĠH earth
+Ġconst rained
+Ġgoal ie
+ĠGu idelines
+ĠAm mo
+ĠPear son
+Ġtax ed
+Ġfet us
+Resp onse
+ĠAlex is
+th ia
+G uy
+Ġrecon struct
+Ġextrem es
+Ġconclud ing
+ĠP eg
+ook s
+Ġded uctions
+R ose
+Ġground breaking
+ĠT arg
+ãĥ ģ
+ĠRe ve
+res ource
+Ġmo ons
+Ġelectrom agnetic
+Ġamid st
+ĠVik tor
+N ESS
+B ACK
+Ġcomm ute
+ĠAna heim
+Ġfluct uations
+6 40
+Ġnood les
+ĠCop enhagen
+ĠT ide
+ĠGri zz
+ĠS EE
+Ġpip elines
+Ġsc ars
+end o
+ag us
+ĠE TF
+/ #
+ĠBec ome
+44 8
+Ġvis c
+ĠRecomm ended
+Ġj umper
+Ġcogn ition
+Ġassass in
+Ġwitness ing
+ĠSet up
+Ġl ac
+v im
+IS M
+p ages
+SS L
+35 8
+Ġad ject
+indust rial
+l ore
+cher y
+Ġgl itter
+Ġc alf
+Flor ida
+Ġspoil ers
+Ġsucceed s
+Ġch anting
+Ġslog ans
+ĠTr acy
+Vis it
+rol ogy
+Ġm ornings
+Ġline age
+Ġs ip
+Ġintense ly
+Ġflour ish
+ĠSle eping
+ĠF em
+or por
+ĠK lan
+ĠDar th
+h ack
+ĠNi elsen
+Ġtum ors
+Ġprocure ment
+ĠY orkshire
+Ġra ided
+K Y
+An na
+Ġ// [
+ĠDis order
+ĠMust ang
+ĠW en
+ĠTry ing
+s q
+Ġdeliver ies
+Ġshut ter
+Ġcere bral
+Ġbip olar
+ĠC N
+l ass
+j et
+Ġdeb ating
+> :
+Ġe agle
+gr ades
+ĠD ixon
+UG C
+M AS
+ĠDr aco
+ĠMach ines
+aff er
+Ġem an
+Â ²
+pr on
+ĠG ym
+Ġcompar atively
+ĠTrib unal
+PR O
+Ġle x
+Ġfert ile
+Ġdep ressing
+Ġsuperf icial
+ess ential
+ĠHun ters
+g p
+Ġprom inence
+L iber
+ĠAn cest
+ote chnology
+Ġm ocking
+ĠTra ff
+ĸ ļ
+Med ium
+I raq
+Ġpsychiat rist
+Quant ity
+ĠL ect
+Ġno isy
+5 20
+G Y
+Ġsl apped
+ĠM TV
+Ġpar a
+p ull
+Mult iple
+as her
+Ġn our
+ĠSe g
+Spe ll
+v ous
+ord ial
+Sen ior
+ĠGold berg
+ĠPl asma
+ne ed
+Ġmess enger
+ere t
+Ġteam ed
+Ġliter acy
+ĠLe ah
+ĠD oyle
+Ġem itted
+U X
+Ġev ade
+Ġm aze
+Ġwrong ly
+ĠL ars
+Ġstere otype
+Ġpled ges
+Ġarom a
+ĠM ET
+Ġac re
+ĠO D
+Ġf f
+Ġbrew eries
+ĠH ilton
+und le
+ĠK ak
+ĠThank fully
+ĠCan ucks
+in ctions
+ĠApp ears
+Ġco er
+Ġundermin ed
+ro vers
+And re
+Ġbl aze
+um ers
+Ġfam ine
+amp hetamine
+ulk an
+Am ount
+Ġdesper ation
+wik ipedia
+develop ment
+ĠCor inth
+uss ia
+Jack son
+L I
+N ative
+R s
+Oh io
+ĠKath leen
+F ortunately
+Ġattend ant
+ĠPre ferred
+ĠDid n
+ĠV s
+M is
+Ġrespond ent
+Ġb oun
+st able
+Ġp aved
+Ġunex pl
+ĠChe ney
+L M
+ĠC ull
+bl own
+Ġconfront ing
+oc ese
+serv ing
+W i
+ĠLith uania
+ann i
+Ġst alk
+h d
+Ġv ener
+AP H
+ynchron ous
+UR R
+um ably
+hist oric
+H alf
+H ay
+Ġresil ience
+spe ction
+Ġabandon ing
+O bs
+ĠDeb bie
+Ġgrad ient
+ĠPl aint
+ĠCan al
+AR CH
+Ġexpans ive
+Ġfun g
+Ġb ounced
+U nd
+Ġprec autions
+Ġclar ification
+Ġd agger
+Ġgri ps
+ĠÂ µ
+ĠRiver a
+ĠUnd ead
+is ites
+ĠFIR ST
+Ã± o
+aud i
+Ġhost ages
+Ġcompl iant
+Ġal umni
+Se ven
+Ġcyber security
+e ither
+Col lect
+Ġinvari ably
+ĠS oci
+Ġlaw maker
+Ġa le
+ĠPerson ally
+N azi
+Ġcustom ization
+ĠPro c
+ĠSask atchewan
+eat uring
+Ġsp ared
+Ġdiscontin ued
+Ġcomput ational
+ĠMotor ola
+Ġsuprem acist
+government al
+Ġparad ise
+ĠDown ing
+ĠNik on
+Ġcat alyst
+ber ra
+Tor onto
+8 75
+bet a
+ĠMac ron
+Ġunreal istic
+ve ctor
+ĠVeh icles
+it iveness
+ĠR V
+ĠCol bert
+s in
+o ji
+ent in
+ĠKr ish
+hell o
+ff ield
+ok y
+ĠT ate
+Ġmap le
+Ġa ids
+chem ical
+33 4
+n uts
+ĠWar p
+Ġx x
+ĠRob b
+umer ous
+_- _
+ft ime
+ĠV W
+Ġw inger
+ĠD ome
+t ools
+ĠP V
+ĠGe orgetown
+Ġg eared
+Ġjihad ists
+Ġc p
+Ġster oids
+M other
+cler osis
+ĠDR M
+nes ia
+Ġl inger
+Ġimm ersive
+ĠC OUN
+Ġoutwe igh
+ens ual
+B and
+Ġtransform s
+mat ched
+ps ons
+ĠJud icial
+f actor
+Ġrefer ral
+Ġodd ly
+ĠW enger
+B ring
+ĠB ows
+60 2
+IC LE
+Ġl ions
+ĠAcad emic
+ĠTh orn
+ĠRa ider
+kef eller
+St orage
+L ower
+ĠOr t
+ĠEqu ality
+AL T
+ĠS OC
+T ypes
+Ġl yn
+ĠAss et
+co at
+TP P
+C VE
+ĠPione er
+app lication
+Mod ern
+ĠH K
+En vironment
+Al right
+R ain
+IP P
+ĠShi ite
+Ġm ound
+ĠAb ilities
+cond ition
+St aff
+Ġcompet ence
+ĠM oor
+ĠDi ablo
+Ġwith held
+Ġost ensibly
+ĠB rom
+Ġms g
+Ġden omin
+ĠRef erences
+ĠF P
+Ġplun ged
+Ġp amph
+m oving
+cent ral
+Ġdown right
+Ġf ading
+T al
+T yp
+ĠTh y
+uk es
+it he
+Ġo ve
+Ġbatt led
+Ġseaf ood
+Ġfig ur
+ĠR D
+c rop
+Ġsqu ads
+{ \
+à ¹
+ĠE h
+Ġinterview ing
+ĠQ in
+Ġas piring
+PL IC
+Ġcla uses
+ĠG ast
+ĠN ir
+Ġl uggage
+Ġh ose
+Ġsystem d
+Ġdesc ending
+ĠRev ised
+ĠR ails
+al ign
+70 9
+33 7
+Ġf ug
+charg ing
+t ags
+Ġut er
+k ish
+WAR NING
+49 0
+prof its
+Ġvoy age
+Ġa ce
+ĠV anguard
+ĠT anks
+ĠM uk
+Ġ2 26
+S afe
+Ar mor
+Ġvolcan ic
+Ġwom b
+ĠM IL
+Ġbegin ner
+ĠRec ogn
+ĠA AP
+PL AY
+) !
+Ġdetect ing
+c n
+Ġbre aches
+Bas ically
+ĠP ag
+ĠMunicip al
+ĠInd ie
+ĠL af
+ĠDis able
+ĠOl son
+Ġrest rained
+Ġrul ings
+Ġhum ane
+ev ents
+ĠCinem a
+display Text
+ĠH atch
+action Date
+onna issance
+Ġassault ing
+ĠL ug
+CH AT
+Ġvig orous
+ĠPer se
+Ġintoler ance
+ĠSnap chat
+ĠSh arks
+Ġd ummy
+ĠDi agn
+ĠGu itar
+im eters
+40 3
+RE G
+A x
+Ġsepar ates
+ĠMah m
+Ġt v
+j ah
+O OL
+C irc
+ĠWinds or
+uss ian
+Ġintu ition
+Ġdis dain
+ĠDon ovan
+Ġ2 21
+E mb
+Ġcondem ning
+Ġgener osity
+zz y
+Ġpant ies
+ĠPre vent
+Action Code
+AN A
+34 2
+external ActionCode
+Ġspec ifying
+Ġcryst all
+J ere
+Ġru pt
+ĠApp rentice
+Ġprof iling
+Ð º
+St rike
+Ġsid eline
+Ġoblig ated
+Ġocc ult
+Ġbureaucr atic
+ant ically
+rupt ed
+neg ative
+ĠEthiop ia
+ĠC ivic
+Ġins iders
+el igible
+ĠTV s
+ĠB AR
+ĠT I
+i ologist
+ĠA IR
+Ġsubstit uted
+Ar ab
+ĠS aul
+ĠY og
+p rem
+Ġbuild ers
+Ġstation ary
+Ġdoubt ful
+Ġvig orously
+Ġthr illing
+Ph ysical
+ĠCare y
+ĠHyd ra
+geon ing
+ĠS ly
+y ton
+Ġborrow ers
+ĠPark inson
+Ġ ë
+ĠJama ica
+Ġsat ir
+Ġinsurg ents
+ĠF irm
+Ġis ot
+ĠK arn
+our ning
+ak ens
+doc s
+l ittle
+ĠMon aco
+CL ASS
+Tur key
+L y
+ĠCon an
+ass ic
+Ġstar red
+ĠPac ers
+et ies
+Ġt ipping
+M oon
+ĠR w
+s ame
+Ġcav ity
+Ġgo of
+ĠZ o
+Sh ock
+um mer
+Ġemphas izes
+Ġreg rett
+Ġnovel ty
+Ġen vy
+ĠPass ive
+r w
+50 5
+Ġind ifferent
+ĠR ica
+ĠHim self
+ĠFred die
+Ġad ip
+ä¸ Ģ
+Ġbreak out
+Ġhur ried
+ĠHu ang
+ĠD isk
+Ġro aming
+?????- ?????-
+U V
+ĠRick y
+ĠS igma
+Ġmarginal ized
+Ġed its
+Ġ30 4
+mem ory
+Ġspec imen
+29 3
+ãģ ¯
+Ġvert ically
+Ġaud ition
+ĠHe ck
+Ġc aster
+ĠHold ings
+ad al
+ĠC ron
+ĠL iam
+Ġdef lect
+P ick
+ĠDeb ug
+RE F
+Ġvers atility
+ot hes
+class ified
+ĠMah ar
+ĠH ort
+C ounter
+st asy
+not iced
+33 1
+ĠSh im
+f uck
+ĠB ie
+Ġair ing
+ĠPro tein
+ĠHold ing
+Ġspect ators
+ili ated
+ĠThat cher
+n osis
+ãĥ¼ ãĥ³
+Te le
+B oston
+ĠTem pl
+st ay
+Ġdecl arations
+47 9
+Vol ume
+ĠDesign er
+ĠOver watch
+id ae
+Ġon wards
+Ġn ets
+ĠMan ila
+part icularly
+Ġpolit ic
+o other
+Ġport raits
+Ġpave ment
+c ffff
+Ġs aints
+Ġbegin ners
+ES PN
+Ġshort comings
+âķĲ âķĲ
+Ġcom et
+ĠOrgan ic
+qu el
+Ġhospital ized
+Bre ak
+Ġpe el
+dyl ib
+asp x
+ur ances
+ĠT IM
+P g
+Ġread able
+ĠMal ik
+Ġm uzzle
+Ġbench marks
+d al
+ĠV acc
+ĠH icks
+60 9
+ĠB iblical
+he ng
+Ġover load
+ĠCivil ization
+Ġimm oral
+Ġf ries
+ãĤ Ĵ
+Ġreprodu ced
+Ġform ulation
+j ug
+ire z
+g ear
+Ġco ached
+Mp Server
+ĠS J
+ĠK w
+In it
+d eal
+ĠO ro
+ĠL oki
+ĠSong s
+Ġ23 2
+ĠLou ise
+asion ally
+Ġunc ond
+olly wood
+Ġprogress ives
+ĠEn ough
+ĠDo e
+Ġwreck age
+Ġbr ushed
+ĠBase Type
+Ġz oning
+ish able
+het ically
+ĠC aucus
+ĠH ue
+Ġk arma
+ĠSport ing
+Ġtrad er
+Ġseem ing
+ĠCapt ure
+4 30
+b ish
+Ġt unes
+Ġindo ors
+ĠSp here
+ĠD ancing
+TER N
+Ġno b
+ĠG ST
+m aps
+Ġpe ppers
+F it
+Ġoverse es
+ĠRabb i
+ĠR uler
+vert ising
+off ice
+xx x
+Ġra ft
+Ch anged
+Ġtext books
+L inks
+ĠO mn
+ãĢ ĳ
+Ġinconven ience
+ĠDon etsk
+= ~
+Ġimplicit ly
+Ġboost s
+ĠB ones
+ĠBo om
+Cour tesy
+Ġsens ational
+AN Y
+Ġgre edy
+ed en
+Ġinex per
+ĠL er
+ĠV ale
+Ġtight en
+ĠE AR
+ĠN um
+Ġancest or
+S ent
+ĠH orde
+urg ical
+all ah
+Ġsa p
+amb a
+ĠSp read
+tw itch
+Ġgrand son
+Ġfract ure
+Ġmoder ator
+ĠSe venth
+ĠRe verse
+Ġestim ation
+Cho ose
+Ġpar ach
+Ġbar ric
+ãĢ Ĳ
+Ġcomp ass
+Ġall ergic
+âĢ ķ
+OT HER
+err illa
+Ġw agon
+Ġz inc
+Ġrub bed
+ĠFull er
+ĠLuxem bourg
+ĠHoo ver
+Ġli ar
+ĠEven ing
+ĠCob b
+est eem
+Ġselect or
+ĠB rawl
+is ance
+ĠE k
+Ġtro op
+Ġg uts
+ĠApp eal
+ĠTibet an
+Ġrout ines
+ĠM ent
+Ġsummar ized
+steam apps
+Ġtr anqu
+Ġ19 29
+or an
+ĠAut hent
+Ġg maxwell
+Ġappre hens
+Ġpo ems
+Ġsa usage
+ĠWeb ster
+ur us
+Ġthem ed
+Ġl ounge
+Ġcharg er
+Sp oiler
+Ġsp illed
+h og
+ĠSu nder
+ĠA in
+ĠAng ry
+Ġdis qual
+ĠFrequ ency
+ĠEther net
+Ġhel per
+Per cent
+Ġhorr ifying
+Ġa il
+ĠAll an
+EE E
+ĠCross ing
+44 9
+Ġh olog
+ĠPuzz les
+ĠGo es
+eren n
+60 4
+ãģ ı
+ĠRaf ael
+Ġatt en
+ĠE manuel
+Ġup ro
+ĠSus p
+P sych
+ĠTr ainer
+ĠN ES
+ĠHun ts
+bec ue
+Ġcounsel or
+R ule
+Ġtox ins
+Ġb anners
+r ifice
+Ġgreet ing
+Ġfren zy
+Ġall ocate
+Ġ* )
+ex pr
+50 3
+ĠCh ick
+ĠT orn
+Ġconsolid ation
+ĠF letcher
+sw itch
+fr ac
+cl ips
+ĠMcK in
+ĠLun ar
+Mon th
+IT CH
+Ġscholar ly
+rap ed
+39 8
+Ġ19 10
+Ġe greg
+Ġin secure
+Ġvict orious
+cffff cc
+Ġsing led
+Ġel ves
+ĠW ond
+bur st
+Ġcam oufl
+ĠBL ACK
+Ġcondition ed
+ç ī
+ans wered
+Ġcompuls ory
+asc ist
+Ġpodcast s
+ĠFrank furt
+bn b
+Ġne oliberal
+ĠKey board
+ĠBel le
+w arm
+Ġtrust s
+Ġins ured
+ĠBu cc
+us able
+60 7
+ĠPl ains
+Ġ18 90
+Ġsabot age
+Ġlod ged
+f elt
+Ġg a
+ĠN arc
+ĠSal em
+Ġsevent y
+ĠBl ank
+p ocket
+Ġwhis per
+Ġm ating
+om ics
+ĠSal man
+ĠK ad
+Ġan gered
+Ġcoll isions
+Ġextraord inarily
+Ġcoerc ion
+G host
+b irds
+è Ģ
+k ok
+Ġper missible
+avor able
+Ġpo inters
+Ġdiss ip
+ac i
+Ġtheat rical
+ĠCos mic
+Ġforget ting
+Ġfinal ized
+å¤ §
+y out
+l ibrary
+Ġbo oming
+ĠBel ieve
+ĠTe acher
+ĠL iv
+ĠGOOD MAN
+ĠDomin ican
+OR ED
+ĠPart ies
+Ġprecip itation
+ĠSl ot
+R oy
+ĠComb ined
+Ġinteg rating
+Ġch rome
+Ġintest inal
+ĠRe bell
+Ġmatch ups
+Ġblock buster
+ĠLore n
+ĠLe vy
+Ġpre aching
+ĠS ending
+ĠPur pose
+ra x
+f if
+Ġauthor itative
+ĠP ET
+ast ical
+Ġdish on
+Ġchat ting
+Ġ"$ :/
+Connect ion
+Ġrecre ate
+Ġdel inqu
+Ġbro th
+ĠD irty
+ĠAd min
+z man
+Ġscholars hips
+Ġ25 3
+cont act
+als a
+7 67
+c reen
+abb age
+Ġ19 15
+Ġbl ended
+Ġal armed
+L anguage
+35 6
+Ġbl ends
+ĠCh anged
+W olf
+Ġhe pat
+Creat ing
+Ġper secut
+Ġsweet ness
+art e
+Ġforfe iture
+ĠRober to
+im pro
+N FL
+ĠMag net
+Det ailed
+Ġinsign ificant
+ĠPOL IT
+ĠBB Q
+ĠC PS
+Ġse aw
+amin er
+m L
+end if
+f inals
+Ġ26 5
+u ish
+Ġ} )
+ĠPro blems
+Ġem blem
+Ġserious ness
+Ġpars ing
+Ġsubst itution
+Ġpress ured
+Ġrecy cled
+ale b
+Rub y
+Ġprof iciency
+Dri ver
+ĠW ester
+: '
+AF TA
+Ġm antle
+ĠClay ton
+fl ag
+Ġpractition er
+c overed
+ĠSt ruct
+add afi
+4 25
+ĠTown ship
+ĠHyd ro
+Lou is
+34 3
+Ġcond o
+ĠT ao
+Ġutil ization
+Ġnause a
+ĠDem s
+rid ges
+p ause
+Ġform ulas
+Ġchall enger
+37 6
+Ġdefect ive
+ĠRail way
+ĠPub Med
+Ġyog urt
+l bs
+ĠNor folk
+OP E
+ĠMood y
+Ġdistribut or
+Ġscroll s
+Ġextract s
+St an
+Ġv iability
+Ġexp oses
+Ġstar vation
+ĠStep s
+ĠD odd
+f ew
+ST D
+33 2
+Ġclos ures
+Ġcomplement ary
+ĠS asha
+ump y
+Ġmon et
+Ġartic ulate
+ĠDo ct
+k iller
+Ġsc rim
+Ġ2 64
+Ġprost itutes
+Ġse vered
+Ġattach ments
+Ġcool ed
+L ev
+ĠF alk
+f ail
+Ġpolic eman
+ĠD ag
+Ġpray ed
+ĠK ernel
+Ġcl ut
+Ġc ath
+Ġan omaly
+St orm
+em aker
+ĠBreak fast
+ul i
+o ire
+J J
+h z
+Oper ation
+ĠS ick
+35 4
+ĠGuatem ala
+R ate
+Ġexp osures
+f aces
+ĠArch ae
+ra f
+ĠM ia
+Ġ20 25
+Ġop aque
+Ġdisgu ised
+ĠHead quarters
+S ah
+Ġp ots
+9 78
+ĠM alf
+Ġfrown ed
+Ġpoison ous
+ĠCon vers
+ee ks
+Ġcr ab
+." "
+Ġtre ason
+Ġr anc
+Ġescal ating
+Ġwar r
+Ġmob s
+Ġl amps
+ĠSun shine
+ĠBrun swick
+Ph ones
+Ġspe lled
+ĠSk ip
+Ġ20 50
+Ġ19 11
+ĠPl uto
+ĠAm end
+Ġme ats
+38 7
+Ġst omp
+ĠZh ou
+ĠLevi athan
+ĠHaz ard
+ad v
+ĠOr well
+Ġal oud
+Ġb umper
+ĠAn arch
+ub untu
+ĠSer ious
+f itting
+ĠOption al
+ĠCec il
+RE AM
+Ġser otonin
+Ġcultiv ate
+ag ogue
+} \
+Ġmos ques
+ĠSun ny
+Ġre active
+rev olution
+ĠL up
+ĠFed ora
+Ġdefense man
+ĠV ID
+ist ine
+Ġdrown ing
+ĠBroad casting
+Ġthr iller
+ĠS cy
+Ġacceler ating
+Ġdirect s
+od ied
+b ike
+d uration
+Ġpain fully
+R edd
+Ġproduct ions
+Ġg ag
+Ġwh ist
+Ġs ock
+Ġinf initely
+ĠConc ern
+ĠCit adel
+Ġlie u
+Ġcand les
+ogene ous
+arg er
+Ġheaven ly
+inflamm atory
+Per formance
+C s
+ruct ose
+az aki
+Ġp essim
+Ġinf erence
+Ġpow d
+ĠZ oe
+Ġpain ts
+Ġd azz
+pt a
+-------- ---
+Ġins pir
+ĠExper imental
+ĠKn ife
+reg or
+b ors
+Ġshow ers
+rom eda
+Ġs aint
+Ġben ign
+ĠJ iang
+Ġenvision ed
+Ġsh roud
+IF T
+H O
+Ġsh uff
+ĠI CC
+Ġse greg
+Ġrevis it
+ighth ouse
+L i
+Ġsub strate
+ĠSe as
+ĠRew ard
+ĠH ep
+ĠBr ass
+s bm
+Ġelim inates
+Ġst amina
+ĠV AT
+ĠLo an
+Ġconst raint
+Ġappropri ated
+Ġp es
+ĠA LE
+r anging
+Ġ40 4
+39 2
+Ġintellectual s
+ach u
+Ġrestruct uring
+ĠLe vin
+Ġrun es
+Ġdelight ful
+Ġcarbohyd rates
+ĠMod els
+ĠExp o
+Ġtransport ing
+all oc
+Ġring ing
+S amsung
+Ġscarce ly
+ĠURL s
+ĠM AS
+Ġprot otypes
+Ġnarr ator
+ĠCPU s
+cd n
+ĠBart on
+Ġdecided ly
+ĠSh u
+ix ir
+oc ious
+ĠMy st
+N intendo
+Ġre use
+Ġforg iven
+F ew
+in ical
+n at
+Ġseam less
+ĠEv a
+ĠE VE
+ĠJ O
+land ers
+Ġso fter
+neg ie
+Ġtrans ient
+Ġorb ital
+Ġfulf il
+ĠK om
+Hop efully
+Ġdynam ically
+ĠHun ger
+å Ľ
+ĠArmen ia
+el man
+ber to
+Ġp ige
+ĠID s
+lim it
+Ġve ins
+Ġso aring
+p acks
+Gold en
+ĠCr ab
+ist or
+ĠR PM
+Ġ$ $
+g ression
+Ġjihad ist
+Ġgam ble
+Ġcare g
+Ġinf lated
+F ace
+ĠFire arms
+ĠEm manuel
+â Ŀ
+Ġsh ocks
+gr ab
+Ġspl end
+ĠHP V
+ab ortion
+Ab ove
+Ent ity
+play ers
+Ġcomm enced
+ul ence
+Ġfulfill ment
+Ġembod iments
+ĠW elfare
+Ġha il
+Ġ< @
+tt en
+Ġcat cher
+ĠJ azeera
+Ġvolcan o
+Ġstabil ize
+ĠHand ler
+Ġintens ified
+ĠAb rams
+Ġhum iliation
+p aced
+60 5
+ĠCent OS
+Spe cific
+Ġhe ed
+ĠC AM
+ĠGal ile
+D ie
+Ġabol ished
+ĠThom son
+ĠTe achers
+ĠW ass
+j ong
+ĠIS BN
+ĠAll ies
+sh ake
+å ·
+v ict
+How ard
+Ġde em
+Ġexceed ingly
+ĠSmart stocks
+ib e
+Ġdoor way
+Ġcompet ed
+ig mat
+Ġnational ists
+Ġg room
+ĠKe en
+Ġdispos able
+de cl
+ĠT olkien
+ĠSche me
+Ġb iod
+Ġav id
+ĠEl on
+ag ar
+ĠT SA
+R oman
+Ġartific ially
+Ġadvis ors
+X L
+ĠInf erno
+36 6
+Ġted ious
+ĠPhot ography
+ĠCar rie
+Ġtro pe
+ĠSand ra
+Ġdec imal
+Que en
+ĠGund am
+ĠO M
+ote ch
+N BA
+Ġ19 32
+Ġent renched
+ĠMar ion
+Ġfr aternity
+Lab our
+Hen ry
+Ġlat itude
+E ither
+Ġenh ances
+ĠPot ential
+Ġsh ines
+id ad
+Ġbread th
+Ġcapac ities
+ĠðŁ ĻĤ
+ĠBron x
+Ġsex es
+Ġdifferent iation
+Ġheavy weight
+ĠT aj
+d ra
+Ġmigr ate
+Ġexhaust ion
+ĠR UN
+els ius
+ĠCu omo
+Ġgu itars
+Ġcl ones
+ĠSom ew
+ĠP ry
+------------ -
+Ġwarr anted
+cy cles
+Ġsalv age
+Ġdis ks
+R ANT
+ĠNGO s
+ĠMart ian
+":[ {"
+Ġadd icts
+oj ure
+il let
+Ġamazing ly
+art ments
+p ixel
+ĠGPU s
+Lay out
+è £
+ĠTam il
+ĠBas il
+Ġimpart ial
+ĠSt ructure
+f ork
+b ryce
+Ġr idge
+ĠHamb urg
+ri ous
+Ġbl itz
+cig arettes
+Ġcan ned
+40 2
+Ġiron ically
+Ġcompassion ate
+ĠHaw kins
+. #
+ĠCat hedral
+Ġrall ied
+in ternal
+Ġqu ota
+st akes
+T EXT
+m om
+Ġcomple tes
+Ġ23 8
+Ġsh rug
+ãĥ ĳ
+ĠN inth
+Ġrev ise
+ĠProv ider
+Ġtre acher
+Ġqu asi
+ĠPR ES
+Ġdep osition
+Ġconfidential ity
+iss ors
+Ġim balance
+Ġspan ning
+Ġang ular
+ĠC ul
+commun ication
+ĠNor a
+ĠGen ius
+op ter
+Ġs acked
+Sp ot
+Ġfine ly
+ĠCH R
+28 2
+w aves
+Pal est
+ĠRo hing
+N L
+è ¿
+Ġsh itty
+ĠSc alia
+4 75
+Pro gress
+Ġreferen cing
+Ġclass rooms
+ab ee
+Ġs od
+hes ion
+70 8
+ĠZucker berg
+ĠFin ish
+ĠScot ia
+ĠSav ior
+ĠInstall ation
+an tha
+( -
+Ġ30 2
+ĠP unk
+Ġcr ater
+yout u
+Ġro ast
+Ġinflu encing
+Ġd up
+ĠJ R
+ĠG rav
+Ġstat ure
+Ġbath rooms
+A side
+W iki
+me an
+ĠZ ak
+ĠOn es
+ĠN ath
+Ġhyper t
+Ġcommence ment
+C ivil
+Ġmoder ately
+Ġdistribut ors
+Ġbreast feeding
+Ġ9 80
+ĠS ik
+ĠC ig
+ĠAM ER
+R IP
+ĠCare er
+ust ing
+Ġmess ed
+Ġe h
+ĠJ ensen
+/ $
+Ġblack mail
+Ġconvers ions
+Ġscientific ally
+Ġmant ra
+p aying
+Ġiv ory
+ĠCour ts
+OU GH
+aunt let
+Ser ial
+B row
+ĠH undreds
+3 23
+Ġpe e
+Ġlin ux
+Ġsub mer
+ĠPrinc ipal
+48 5
+ĠD SL
+ĠCous ins
+Ġdoctr ines
+ĠAthlet ics
+Ġ3 15
+ĠK arma
+Ġatt ent
+ur ger
+Ġpresc ribe
+Ġenc aps
+ĠC ame
+Ġsecret ive
+ĠCr imes
+d n
+C lean
+ĠEgypt ians
+ĠCar penter
+Ġ ll
+H um
+ĠMil o
+Ġcapital ists
+Ġbrief ed
+T we
+ĠBas in
+elve t
+M os
+Ġplun ge
+ĠKa iser
+ĠFu j
+ill in
+Ġsafegu ards
+Ġo ste
+ĠOpportun ity
+ĠM afia
+ĠCall ing
+ap a
+ur ban
+br ush
+ill ard
+c Ã©
+int elligence
+ĠL ob
+ĠDru id
+Ġsm oother
+Ġfoot ing
+Ġmotor ists
+arc ity
+Ġmascul inity
+Ġm ism
+Ġabdom inal
+ĠTa vern
+ĠR oh
+Ġesc apes
+s igned
+Anth ony
+Ġsacrific ing
+Ġintim acy
+Ġan terior
+ĠK od
+Ġmot if
+Ġg raz
+Ġvisual ization
+Ġguitar ist
+ĠTro tsky
+m agic
+D ar
+ĠMor i
+Ġw ards
+Ġtoile ts
+l est
+Ġtele port
+ĠSund ays
+ĠPl at
+ET S
+Ġe Sports
+Pat rick
+ĠK atherine
+en ko
+Ġhas sle
+ĠM ick
+gg les
+Ġh ob
+aint ain
+Ġair borne
+Ġsp ans
+Ġch ili
+Ġa perture
+Ġvolunte ered
+ĠInc ident
+ĠF res
+ĠVeter an
+augh tered
+ing o
+Ġun insured
+CL OSE
+Ġf use
+Ġer otic
+Ġadvert ise
+ra ising
+Text ure
+Ġatt ends
+ĠRE AL
+udd led
+Ġsm oot
+Ġ30 5
+ĠWill is
+Ġbl ond
+An alysis
+ĠV T
+on ica
+Ġstrongh old
+R F
+N M
+. >>
+Ġprosper ous
+Ġbo asted
+29 2
+ĠManufact uring
+PR ESS
+g ren
+Ġpharm acy
+ĠRoc kefeller
+k ai
+Ġth umbs
+ĠH ut
+Ġmother board
+Ġguard ians
+ĠAl ter
+ll ular
+Ġsh ack
+Ġwise ly
+Ġback bone
+erv a
+Ġsu icides
+ĠMcG regor
+ij ah
+E mer
+ĠB rav
+Ġdesign ate
+P OST
+produ ced
+Ġcleans ing
+irl wind
+ex istent
+ĠHum ph
+ĠPay ne
+Ġv ested
+Å ¡
+Ġstring ent
+ion a
+Ġuns ub
+Ġsum med
+ĠHer cules
+sub ject
+ĠR agnar
+ĠN os
+Ġcharacter ization
+Ġsav vy
+ĠDaw son
+ĠCas ino
+Ġf ri
+ĠBar rier
+Ġmis information
+Ġins ulation
+Ġcorrid ors
+Ġair planes
+ĠNo ct
+ah i
+Ġ19 16
+k b
+arm ac
+Ġsh un
+Ġsche ma
+Ġhorr ified
+Ġ23 9
+aund ers
+N B
+i ates
+er ity
+ĠSh ard
+Ġr arity
+Ġgroup ed
+ĠGh ana
+again st
+ĠBi ological
+ĠA ware
+ow ell
+Ï Ħ
+ĠBe au
+sh aw
+H ack
+ĠJul ius
+US S
+ol son
+aun a
+c ru
+ĠMaur ice
+ĠI k
+Ġsequ encing
+Ġradical s
+Ġ( ?,
+v irtual
+Ġany ways
+Ġreper c
+Ġhand lers
+Ġhes itant
+é ĥ
+ĠM F
+ple mentation
+ass ociated
+Ġcampaign ed
+ĠY ue
+ut ations
+ĠY oga
+Ġsim mer
+Ġro ds
+Ġmel ody
+Ġconv oy
+v ideos
+Ġscreen ed
+N eg
+ochem ical
+Ġ( ))
+Ġultr as
+Ġant ip
+ĠIsland ers
+70 4
+Ġfet ish
+Ġridic ulously
+ĠK art
+Ġmitochond rial
+Ġinterf ering
+Build er
+Ġover fl
+Ġac ne
+ĠM ud
+ĠK err
+f lex
+ĠPost al
+ĠBalt ic
+47 7
+ĠPers ons
+our age
+H B
+ĠM use
+ĠImm ortal
+ĠDri ving
+Ġpet itions
+Ġsubsc ript
+Ġs orce
+ĠProcess or
+ut on
+S ony
+Ġph on
+Ġr aced
+ĠAnth rop
+Ġday time
+ĠEx ercise
+Add ing
+Ġeng ages
+ĠQual comm
+Ġmir acles
+Ġmem es
+ĠDr ink
+ĠOri oles
+Ġhair s
+ĠPol ar
+ath om
+Ġsl ippery
+ĠR emy
+Ġcar amel
+ĠY EAR
+Ġal k
+I gn
+a ution
+ĠMer lin
+ĠC ran
+Ġap ologies
+Ġ4 10
+Ġout ing
+ĠMem ories
+app ointed
+Ġcount ered
+u ld
+pos ing
+Ġfire wall
+ĠW ast
+ĠW et
+work ed
+se ller
+Ġrepe aled
+ere o
+ass uming
+BL IC
+m ite
+ĠCEO s
+ĠChap el
+ellig ent
+________________ ________
+D og
+Ġw art
+Ġsubsc riber
+s ports
+Ġbe gged
+ĠM V
+Ġsem if
+eth ical
+Ġpre ach
+Ġrev ital
+Ġpun itive
+Ġshort cuts
+Ġinstit uted
+ĠWars aw
+Ġabdom en
+ĠK ING
+Ġsuper intendent
+Ġf ry
+ĠGe o
+T OR
+Ġcontrad ictions
+apt ic
+Ġlandsc apes
+b ugs
+Ġcl ust
+Ġvol ley
+c ribed
+Ġt andem
+Ġrob es
+WH AT
+Ġpromot er
+Ġel oqu
+review ed
+ĠD K
+ĠPl ato
+Ġf ps
+T ank
+ĠDer rick
+Ġpriorit ize
+as per
+ĠHond uras
+ĠCom pleted
+ne c
+Ġm og
+n ir
+ĠMay o
+DE F
+st all
+in ness
+ĠVolks wagen
+Ġprec aution
+ĠM ell
+i ak
+ist ries
+Ġ24 8
+Ġoverl apping
+Sen ate
+ĠEnh ance
+res y
+rac ial
+OR TS
+ĠM ormons
+Str ong
+ĠCo ch
+Mex ico
+ĠMad uro
+Ġj ars
+Ġcan e
+W ik
+oll a
+iff erence
+Ġphysic ist
+ĠMag gie
+Ġ28 5
+Ġdep iction
+ĠMcL aren
+J u
+Ġsl ows
+Ġcommission ers
+ĠWill ow
+ĠExpl os
+hov ah
+Ġtechn ician
+Ġhom icides
+ĠFl av
+ĠTr uman
+Ġ100 00
+u ctor
+Ġsh ader
+News letter
+45 7
+Ġre ver
+Ġhard ened
+Ġwhere abouts
+Ġrede velop
+Ġcar bs
+Ġtra vers
+Ġsqu irrel
+Ġfoll ower
+Ġs ings
+50 8
+Ġrabb its
+emon ium
+Ġdocument ing
+Ġmisunder stood
+) '
+R ick
+gg ies
+Ġprem ie
+Ġsk ating
+Ġpass ports
+Ġf ists
+aged don
+H aw
+AC P
+0 80
+ĠThough ts
+ĠCarl son
+Ġpriest hood
+h ua
+Ġdun geons
+ĠLo ans
+Ġant is
+Ġfamiliar ity
+ĠS abb
+op al
+ĠIn k
+st rike
+Ġc ram
+Ġlegal ized
+Ġcu isine
+Ġfib re
+Tra vel
+ĠMon ument
+OD Y
+eth y
+Ġinter state
+ĠP UR
+em porary
+ĠArab ian
+develop ed
+Ġsadd le
+Ġg ithub
+ĠOff er
+ĠIS P
+ro let
+ĠSUP ER
+ĠDen is
+Ġmultipl ier
+Ġstir red
+Interest ingly
+Ġcustom ary
+Ġbill ed
+he x
+Ġmultipl ied
+Ġfl ipping
+ĠCros by
+Ġfundament als
+ia e
+ĠPlay ed
+ĠAt om
+am azon
+ĠFl am
+ee z
+activ ated
+Ġtables poon
+Ġliberal ism
+ĠPal in
+ĠP atel
+N um
+ĠT AM
+Ġs urn
+ĠRel oaded
+Ġco ined
+" ],
+ĠCl ash
+ĠAg u
+Ġprag matic
+ĠActiv ate
+Ġ8 02
+Ġtrail ers
+Ġsil hou
+Ġprob es
+Ġcirc us
+ĠB ain
+ĠLind say
+ĠAb bey
+Del ivery
+Ġconcess ion
+Ġgast ro
+ĠSpr ite
+Ä Ł
+and el
+Ġg imm
+Ġaut obi
+ĠT urtle
+Ġwonder fully
+ĠHar am
+ĠWorld wide
+ĠHand le
+Ġtheor ists
+Ġsle ek
+ĠZh u
+ograph ically
+EG A
+ĠOwn ers
+ath s
+ĠAntar ctic
+n atal
+=" "
+fl ags
+`` ``
+Ġs ul
+K h
+Ġpot assium
+Ġlinem an
+Ġcere al
+ĠSe asons
+Ġ20 22
+Ġmat hematic
+Ġastron omers
+prof essional
+Ġf ares
+cknow led
+Ġch i
+Ġyoung sters
+Ġmistaken ly
+Ġhem isphere
+ĠDiv inity
+r one
+Ġ" ,
+r ings
+Ġattract s
+v ana
+å ¹
+C AP
+Ġplay list
+Ġpor ch
+ãģ £
+Ġincorpor ates
+Ġso ak
+Ġassert ing
+ĠTerror ism
+ĠP ablo
+J a
+ces ter
+Ġfear ing
+ĠPr ayer
+Ġescal ated
+G W
+Ġro be
+ĠBright on
+ac ists
+ĠSym phony
+ĠDwar f
+ĠPar ade
+ĠLe go
+Ġinex pl
+Ġl ords
+le af
+RA G
+l iber
+Ġcig ars
+ĠJe hovah
+60 6
+WIND OWS
+ĠLiber ia
+eb us
+He avy
+Ġl ubric
+ĠR W
+angu ages
+Ġnarrow ed
+com puter
+ĠE mber
+Ġmurder ing
+Ġdown stream
+ĠT uls
+ĠT ables
+Top ic
+ĠAcc uracy
+= /
+l ost
+ĠRe i
+Ġprogress es
+b ear
+Ġestablish ments
+Just in
+ĠPe ach
+ĠG omez
+å ¿
+ĠTri angle
+Id ent
+ĠH ive
+Res ources
+Ġmix es
+ĠAss uming
+M u
+Ġhyp oc
+Ġs ane
+ĠW an
+id ious
+Su ccess
+Ġ io
+Ang el
+Ġdanger ously
+ĠCreat ure
+W ORK
+: [
+ĠKat rina
+List ener
+M iller
+ĠId lib
+h ang
+Ġcircum vent
+h ref
+Ġcel estial
+ĠWe eks
+ĠP ug
+ĠDal ton
+Ġsubpoen a
+uk u
+Ġpers isted
+pe i
+old ing
+ĠDoc uments
+ĠH ast
+ĠC ENT
+Ġprim er
+Ġsyn onymous
+Ġn ib
+om bs
+Ġnot ation
+ĠD ish
+ĠAt mosp
+Ġforb id
+ĠAN G
+pat tern
+l os
+Ġproject iles
+b rown
+." ,
+ĠVen om
+Ġfierce ly
+ub lished
+ĠU ran
+ĠNic arag
+4 10
+ĠC AL
+OT OS
+ĠMir acle
+ĠEn chant
+Ġguard ing
+app end
+Att ach
+Ġlevel ed
+Ġcond oms
+ih ilation
+64 9
+Ġnight mares
+ĠTHE Y
+ĠST ART
+ĠK inn
+Ġroomm ate
+Ġhy giene
+o pping
+J ob
+Ġl vl
+ĠV ER
+ĠKe eping
+ab etic
+Ġformat ting
+eral a
+Ġrev isions
+Ġres urg
+T el
+ĠGood man
+35 3
+p od
+Ġind isp
+ĠTrans lation
+Ġg own
+ĠM und
+Ġc is
+Ġby stand
+col lect
+ĠPun jab
+act ively
+ĠG amb
+te ll
+Ġimport ing
+g encies
+Ġloc om
+ĠBr ill
+H oly
+ĠBer ger
+Ġshow down
+Ġrespond ers
+IL Y
+Ġt akedown
+le ted
+Ġmat tered
+Ġpredict ive
+Ġover lay
+G PU
+ĠV ick
+Ġconvey ed
+T ab
+pe er
+Sc an
+Ġdefensive ly
+v ae
+Ġappro ving
+Ġt iers
+ĠV ia
+quer ade
+ĠSaud is
+Ġdemol ished
+ĠProp he
+Ġmon o
+Ġhospital ity
+H AM
+ĠAri el
+M OD
+ĠTor ah
+Ġbl ah
+ĠBel arus
+erent ial
+ĠT uc
+Ġbank er
+39 7
+Ġmosqu it
+ĠScient ist
+ĠMus ical
+Ġh ust
+Sh ift
+Ġtor ment
+Ġstand off
+E duc
+ĠF og
+Ġampl ifier
+Sh ape
+Inst ance
+ĠCrit ics
+Ġda emon
+H ouston
+Ġmatt ress
+ĠID F
+Ġobsc ene
+ĠA mer
+hett i
+Ġcomp iling
+35 2
+vere tt
+ĠRed uction
+ist ration
+ĠBl essed
+ĠB achelor
+3 16
+Ġpr ank
+ĠVul can
+dd ing
+Ġm ourning
+ĠQu int
+ĠBl aster
+test ing
+Ġsed iment
+>> >
+ĠE ternity
+ĠWH ERE
+ĠM aze
+Ġreact ing
+ĠAl v
+oms day
+ĠC RA
+Ġtransl ator
+Ġbog us
+at u
+We bsite
+oll s
+Ġbapt ism
+Ġs ibling
+ĠAut umn
+ve z
+ãģ® é
+gu ards
+Ge org
+assad ors
+ĠFre ud
+Ġcontin ents
+ĠReg istry
+Bern ie
+ĸļ å£«
+Ġtoler ant
+ĠU W
+Ġhor ribly
+99 5
+ĠMID I
+Ġimpat ient
+oc ado
+er i
+ĠWor st
+ĠNor ris
+ĠTalk ing
+Ġdef ends
+ens able
+Ġ20 21
+Ġanat omy
+L ew
+Ġdraw er
+ĠCan berra
+Ġpatri otic
+é¾įå ĸļå£«
+ĠAv g
+AR M
+Ġundis closed
+Ġfare well
+45 9
+b able
+ĠAll ison
+OL OG
+Ġcon co
+t ight
+ĠAC PI
+ĠM ines
+l ich
+ĠâĶ ľ
+represent ed
+200 000
+Ġenthusi ast
+OT S
+b il
+ĠIng redients
+Ġinvent or
+ĠMy SQL
+ÂłÂł Âł
+ĠAB OUT
+with in
+Ġm k
+B ul
+ĠF ake
+Ġdracon ian
+W a
+hel m
+ĠTer ran
+erv ille
+Ġcommon place
+SI ZE
+Ġ" <
+re place
+ograph s
+ĠSE LECT
+inc ible
+ĠMost ly
+ĠShe ffield
+ĠID E
+ugg le
+Ġcit ations
+h urst
+ĠUn ix
+Ġunle ash
+ĠP iper
+ĠN ano
+Ġsucc umb
+Ġreluct ance
+Ġ25 00
+ĠMer chant
+Ġwire t
+Ġcomb os
+ĠBirth day
+Ġchar coal
+ĠU PS
+ĠFair fax
+Ġdrive way
+ĠT ek
+ĠP itch
+ove re
+Ġtechn icians
+ĠAct ual
+fl ation
+ĠF iscal
+ĠEm pty
+an amo
+Ġmag nesium
+Ġsl ut
+Ġgrow ers
+Invest igators
+( ):
+ĠS atellite
+ĠKe ynes
+miss ive
+l ane
+Ġb orough
+3 44
+ĠTE AM
+ĠBet hesda
+C V
+h ower
+ĠR AD
+Ġch ant
+ĠR iy
+Ġcompos itions
+Ġmild ly
+Ġmedd ling
+Ġag ility
+ane ers
+5 01
+Ġsyn th
+ling er
+29 1
+Ġex claimed
+Part y
+Ġcont amin
+ĠMan or
+ĠResp ond
+Ġpra ising
+Ġman ners
+fle et
+Sum mer
+ĠLy nd
+ĠDef initely
+gr im
+Ġbow ling
+st ri
+ç Ľ
+y nt
+Ġmand ates
+D IV
+Ġreconc ile
+view s
+ĠDam on
+vet te
+F lo
+ĠGreat est
+il on
+ic ia
+Ġportray al
+Ġcush ion
+50 4
+19 79
+oss al
+App lic
+sc ription
+Ġmit igation
+AT S
+p ac
+Ġer ased
+Ġdefic iencies
+ĠHolland e
+ĠX u
+Ġb red
+Ġpregn ancies
+f emin
+Ġem ph
+Ġpl anners
+Ġout per
+utter ing
+Ġperpet rator
+Ġm otto
+ĠEll ison
+ĠNE VER
+Ġadmitted ly
+AR I
+ĠAzerbai jan
+Ġmill isec
+Ġcombust ion
+ĠBott le
+ĠL und
+ĠP s
+ĠD ress
+Ġfabric ated
+Ġbat tered
+Ġs idel
+ĠNot ting
+Fore ign
+ĠJer ome
+0 20
+ĠAr bit
+Ġkn ots
+ĠR IGHT
+M oving
+ãģ Ļ
+Ġsur geries
+Ġcour thouse
+Ġm astered
+Ġhover ing
+ĠBr an
+ĠAl ison
+Ġsaf est
+m ilitary
+Ġbull ied
+Ġbar rage
+Read er
+ES E
+ĠGe ographic
+T ools
+3 14
+ĠGe ek
+ro th
+gl ers
+ĠF IN
+Ï ģ
+ĠA ston
+al tern
+48 8
+Ġveter in
+G amer
+Ġint el
+ren ches
+Sh ield
+Ġam nesty
+ĠB har
+Ġp iled
+Ġhonor able
+ĠInst itutes
+Ġso aked
+Ġcom a
+ĠE FF
+34 1
+by tes
+ĠG mail
+le in
+ĠCanad iens
+m aterial
+I l
+Ġinstruct ors
+ĠK Y
+Ġconce ive
+ub b
+ĠP ossible
+Ġeas ing
+ĠChrist ina
+Ġcar ic
+ĠHD R
+R OM
+Ġsho vel
+de lete
+Ġp uff
+ĠCh anging
+Ġseam lessly
+Att ribute
+Ġacqu isitions
+ak ery
+ĠE F
+Ġaut istic
+ĠT akes
+ĠPow der
+ĠSt ir
+5 10
+ĠBub ble
+sett ings
+ĠF owler
+Ġmust ard
+Ġmore over
+Ġcopyright ed
+ĠLED s
+15 00
+æ ī
+ĠH IS
+en f
+Ġcust od
+ĠH uck
+G i
+Ġim g
+An swer
+C t
+j ay
+ĠInf rastructure
+Ġfeder ally
+L oc
+Ġmicro bes
+Ġover run
+dd s
+ot ent
+adi ator
+>>>> >>>>
+Ġtorn ado
+Ġadj ud
+Ġintrig ued
+Ġs i
+ĠRevel ation
+pro gress
+Ġburgl ary
+ĠSai yan
+ĠK athy
+Ġser pent
+ĠAndre as
+Ġcomp el
+ess ler
+ĠPl astic
+ĠAd vent
+ĠPos itive
+ĠQ t
+ĠHind us
+reg istered
+ular ity
+Ġrighteous ness
+Ġdemon ic
+u itive
+ĠB DS
+ĠGre gg
+c ia
+ĠCrus ade
+ĠSina i
+W ARE
++ (
+Ġme ll
+Ġder ail
+y ards
+A st
+Ġnotice ably
+ĠO ber
+R am
+Ġun noticed
+Ġse q
+av age
+T s
+Ġ6 40
+Ġconced e
+Ġ] )
+F ill
+Ġcapt ivity
+ĠImprove ment
+ĠCrus ader
+ara oh
+M AP
+æ Ĺ
+Ġstr ide
+al ways
+F ly
+N it
+Ġal gae
+ĠCook ing
+ĠDo ors
+Mal ley
+Ġpolic emen
+ãģ į
+Ġastron aut
+access ible
+49 5
+ĠR AW
+cl iffe
+udic rous
+Ġdep ended
+al ach
+Ġvent ures
+ra ke
+Ġt its
+ĠH ou
+Ġcond om
+ormon al
+Ġind ent
+Ġupload ing
+Foot note
+Import ant
+Ġ27 1
+Ġmind ful
+Ġcont ends
+C ra
+Ġcal ibr
+ĠO ECD
+plug in
+F at
+ĠIS S
+ĠDynam ics
+ans en
+68 6
+' ),
+Ġsp rite
+Ġhand held
+ĠH ipp
+=~ =~
+Tr ust
+Ġsem antics
+ĠBund es
+ĠRen o
+ĠLiter ature
+s ense
+G ary
+ĠA eg
+ĠTr in
+EE K
+Ġcler ic
+ĠSS H
+Ġch rist
+Ġinv ading
+ib u
+Ġen um
+aur a
+Ġal lege
+ĠInc redible
+B BC
+Ġth ru
+Ġsa iled
+Ġem ulate
+Ġin security
+Ġc rou
+Ġaccommod ations
+Ġincompet ent
+Ġsl ips
+ĠEarth qu
+s ama
+IL LE
+Ġi Phones
+as aki
+Ġby e
+Ġar d
+Ġext ras
+Ġsl aughtered
+Ġcrowd funding
+res so
+Ġfil ib
+ĠER ROR
+ĠT LS
+e gg
+ĠIt al
+Ġen list
+ĠCatal onia
+ĠSc ots
+Ġser geant
+Ġdiss olve
+N H
+Ġstand ings
+ri que
+I Q
+Ġbenef iciary
+Ġaqu arium
+You Tube
+ĠPower Shell
+Ġbright est
+ĠWar rant
+S old
+Writ ing
+Ġbegin nings
+ĠRes erved
+ĠLatin os
+head ing
+Ġ4 40
+Ġrooft op
+AT ING
+Ġ3 90
+VP N
+G s
+k ernel
+turn ed
+Ġprefer able
+Ġturn overs
+ĠH els
+S a
+ĠShin ji
+ve h
+ĠMOD ULE
+V iol
+Ġex iting
+Ġj ab
+ĠVan illa
+Ġac ron
+ĠG ap
+ber n
+A k
+ĠMc Gu
+Ġend lessly
+ĠFar age
+ĠNo el
+V a
+M K
+Ġbr ute
+ĠK ru
+ĠES V
+ĠOl ivia
+âĢ ł
+ĠK af
+Ġtrust ing
+Ġh ots
+3 24
+Ġmal aria
+Ġj son
+Ġp ounding
+ort ment
+Count ry
+Ġpostp oned
+Ġunequ iv
+? ),
+ĠRo oney
+udd ing
+ĠLe ap
+ur rence
+sh apeshifter
+ĠH AS
+os ate
+Ġca vern
+Ġconserv atism
+ĠB AD
+Ġmile age
+Ġarrest ing
+V aults
+Ġmix er
+Dem ocratic
+ĠB enson
+Ġauth ored
+8 000
+Ġpro active
+ĠSpirit ual
+t re
+Ġincarcer ated
+ĠS ort
+Ġpe aked
+Ġwield ing
+re ciation
+×Ļ ×
+P atch
+ĠEm my
+Ġex qu
+tt o
+ĠRat io
+ĠP icks
+ĠG ry
+ph ant
+Ġf ret
+Ġeth n
+Ġarch ived
+% -
+c ases
+ĠBl aze
+Ġim b
+c v
+y ss
+im ony
+Ġcount down
+Ġaw akening
+ĠTunis ia
+ĠRe fer
+ĠM J
+Ġun natural
+ĠCar negie
+iz en
+ĠN uggets
+he ss
+Ġev ils
+64 7
+Ġintrodu ctory
+l oving
+ĠMcM ahon
+Ġambig uity
+L abel
+ĠAlm ighty
+Ġcolor ing
+ĠCl aus
+set ting
+N ULL
+ĠF avorite
+ĠS IG
+> (
+ĠSh iva
+ĠMay er
+Ġstorm ed
+ĠCo verage
+we apons
+igh am
+Ġun answered
+Ġle ve
+Ġc oy
+c as
+b ags
+as ured
+Se attle
+ĠSant orum
+ser ious
+Ġcourage ous
+ĠS oup
+Ġconfisc ated
+Ġ// /
+Ġuncon ventional
+Ġmom s
+ĠRohing ya
+ĠOrche stra
+ĠPot ion
+Ġdisc redit
+ĠF IL
+f ixed
+ĠDe er
+do i
+ĠDim ension
+Ġbureaucr ats
+et een
+Ġaction Group
+oh m
+Ġb umps
+ĠUt ility
+Ġsubmar ines
+ren heit
+re search
+ĠShap iro
+Ġsket ches
+Ġde ceptive
+ĠV il
+es ame
+ĠEss entially
+Ġramp age
+isk y
+Ġmut tered
+th ritis
+Ġ23 6
+f et
+b ars
+Ġpup il
+ĠTh ou
+o S
+s ong
+Ġfract ured
+Ġre vert
+pict ure
+Ġcrit erion
+us her
+Ġreperc ussions
+ĠV intage
+ĠSuper intendent
+Offic ers
+Ġflag ged
+Ġbl ames
+Ġin verse
+ograp hers
+Ġmakes hift
+Ġdev oid
+Ġfoss ils
+ĠArist otle
+ĠFund s
+Ġde pleted
+ĠFl u
+ĠY uan
+Ġw oes
+Ġlip id
+Ġsit u
+requ isites
+Ġfurn ish
+ĠSam ar
+Ġshame ful
+Ġadverse ly
+Ġad ept
+Ġrem orse
+Ġmurder ous
+uck les
+ĠE SL
+Ġ3 14
+s ent
+Ġred ef
+ĠC ache
+ĠP urs
+ig ans
+Ġ4 60
+Ġpres criptions
+Ġf res
+F uck
+ocr ates
+Tw enty
+ĠWe ird
+ĠT oggle
+ĠC alled
+itiz ens
+Ġp oultry
+Ġharvest ing
+ãĤ¦ ãĤ¹
+Bott om
+Ġcaution ed
+t n
+39 6
+ĠNik ki
+Ġeval uations
+Ġharass ing
+Ġbind ings
+ĠMon etary
+Ġhit ters
+Ġadvers ary
+un ts
+Ġset back
+Ġenc rypt
+ĠC ait
+Ġl ows
+eng es
+ĠN orn
+Ġbul bs
+Ġbott led
+ĠVoy ager
+3 17
+Ġsp heres
+p olitics
+Ġsubt ract
+Ġsens ations
+Ġapp alling
+Ġ3 16
+Ġenvironment ally
+ĠST EM
+Ġpub lishes
+5 60
+Ġdilig ence
+48 4
+Ġadv ises
+Ġpet rol
+Ġimag ining
+Ġpatrol s
+ĠInt eger
+ĠAs hes
+act us
+ĠRad iant
+ĠL T
+it ability
+ht aking
+Set ting
+Ġnu anced
+ĠRe ef
+ĠDevelop ers
+N i
+pie ces
+99 0
+Lic ense
+Ġlow ers
+ĠOtt oman
+3 27
+oo o
+Ġqu itting
+mark ets
+Beh ind
+Ġbas in
+Ġdoc s
+an ie
+fl ash
+ct l
+Ġcivil ized
+ĠFuk ushima
+"] ,"
+ĠK S
+ĠHonest ly
+ar at
+Ġconstruct s
+ĠL ans
+ĠD ire
+ĠLI KE
+ĠTrou ble
+Ġwith holding
+ĠOb livion
+Ġsan ity
+any a
+Con st
+Ġgro cer
+ĠC elsius
+Ġrecount ed
+ĠW ife
+B order
+ate red
+h appy
+Ġspo iler
+Ġlog ically
+H all
+Ġsucceed ing
+Ġpoly morph
+Ġax es
+ĠShot gun
+ĠS lim
+ĠPrin ciples
+ĠL eth
+art a
+Ġsc or
+Sc reenshot
+Ġrelax ation
+#$ #$
+Ġdeter rent
+idd y
+Ġpower less
+Ġles bians
+Ġch ords
+ĠEd ited
+se lected
+Ġseparat ists
+000 2
+Ġair space
+Ġturn around
+Ġc unning
+P ATH
+P oly
+Ġbomb ed
+Ġt ion
+x s
+Ġwith hold
+Ġw aged
+ĠLiber ties
+Fl ag
+Ġcomfort ing
+45 4
+ĠI ris
+are rs
+Ġr ag
+Ġrel ocated
+ĠGu arant
+Ġstrateg ically
+Ġgam ma
+uber ty
+ĠLock heed
+g res
+Ġgr illed
+ĠLow e
+st ats
+ĠR ocks
+Ġsens ing
+Ġrent ing
+ĠGe ological
+Ø§ Ø
+ot rop
+Ġse w
+Ġimproper ly
+48 6
+Ġâĸ ł
+Ġstar ving
+ĠB j
+Disc ussion
+3 28
+ĠCom bo
+ĠFix es
+N AT
+Ġstri ving
+th ora
+Ġharvest ed
+ĠP ing
+Ġplay ful
+Ġaven ues
+Ġoccup ational
+Ġw akes
+ĠCou rier
+Ġdrum mer
+ĠBrow ser
+ĠH outh
+it u
+Ġapp arel
+p aste
+Ġhun ted
+ĠSecond ly
+l ain
+X Y
+ĠP IN
+ic ons
+Ġcock tails
+Ġs izable
+Ġhurd les
+est inal
+ĠRecre ation
+Ġe co
+64 8
+ĠD ied
+m int
+Ġfinger prints
+Ġdis pose
+ĠBos nia
+ts y
+22 00
+Ġins pected
+ĠF ou
+Ġf uss
+Ġamb ush
+ĠR ak
+Ġmanif ested
+Pro secut
+Ġsuff ice
+ren ces
+Ġcompens ated
+ĠC yrus
+Ġgen us
+ĠWolver ine
+ĠTrend s
+Ġh ikes
+ĠSe en
+Ġen rol
+C old
+Ġpol itely
+ĠSl av
+ĠRu pert
+Ġey ewitness
+ĠAl to
+Ġun comp
+Ġposter ior
+M ust
+ĠHer z
+Ġprogress ively
+Ġ23 4
+Ġind ifference
+ĠCunning ham
+Ġacadem ia
+Ġse wer
+Ġast ounding
+ĠA ES
+r ather
+Ġeld est
+Ġclim bs
+ĠAdd s
+Ġout cry
+Ġcont ag
+ĠH ouses
+Ġpe pt
+ĠMel ania
+interest ed
+ĠU CH
+ĠR oots
+ĠHub bard
+ĠT BD
+ĠRoman ian
+fil ename
+St one
+ĠIm pl
+Ġchromos ome
+C le
+d x
+Ġscram bled
+ĠP t
+Ġ24 2
+OP LE
+Ġtremend ously
+St reet
+Ġcra ving
+Ġbund led
+ĠR G
+p ipe
+Ġinj uring
+Ġarc ane
+Part icip
+ĠHero ic
+st y
+Ġto pping
+ĠTemp est
+rent ices
+b h
+Ġpar anoia
+ĠUnic ode
+Ġegreg ious
+Ġ\ '
+ĠOsw ald
+Ġgra vel
+ĠSim psons
+Ġbl and
+ĠGuant anamo
+Writ er
+lin ers
+ĠD ice
+J C
+Ġpar ity
+Ġs ided
+Ġ23 7
+ĠPyr rha
+at ters
+d k
+F ine
+comp an
+Ġform ulated
+ĠId ol
+il ers
+hem oth
+ĠF av
+Ġintr usion
+Ġcar rots
+ĠL ayer
+ĠH acker
+Ġ ----------------
+Ġmoder ation
+é ģ
+oc oc
+Ġcharacter ize
+ĠTe resa
+Ġsocio economic
+Ġper k
+ĠParticip ation
+tr aining
+ĠPaul o
+ph ys
+Ġtrust worthy
+Ġembod ied
+ĠMer ch
+c urrency
+ĠPrior ity
+Ġte asing
+Ġabsor bing
+Ġunf inished
+ĠCompar ison
+Ġdis ple
+writ ers
+Ġprofess ions
+ĠPengu in
+Ġang rily
+ĠL INK
+68 8
+ĠCor respond
+Ġprev ailed
+Ġcart el
+l p
+as ms
+ĠRed emption
+ĠIslam ists
+effect s
+d ose
+ĠL atter
+ĠHal ifax
+Ġv as
+ĠTop ics
+ĠN amed
+advert ising
+zz a
+IC ES
+Ġret arded
+ach able
+ĠPupp et
+ĠItem Level
+Ġret ract
+Ġident ifiable
+A aron
+ĠB uster
+s ol
+hel le
+as semb
+H ope
+r anged
+B a
+ĠP urch
+é Ģ
+ĠSir i
+Ġarri vals
+Ġ19 12
+Ġshort ened
+Ġ3 12
+Ġdiscrep ancy
+ĠTem perature
+ĠWal ton
+Ġkind erg
+p olit
+Ġrem ix
+Ġconnect ors
+ãĥĺ ãĥ©
+ĠKazakh stan
+dom inated
+Ġsu gars
+im ble
+ĠPan ic
+ĠDem and
+ĠCol ony
+on en
+ĠM ER
+7 75
+ur ia
+aza ar
+ĠDeg ree
+P ri
+Ġsun shine
+Ġ25 1
+Ġpsychedel ic
+Ġdigit ally
+ĠBra un
+Ġsh immer
+Ġsh ave
+ĠTel esc
+ĠAst ral
+ĠVenezuel an
+ĠO G
+Ġc rawling
+Int eg
+ĠFe ather
+Ġunfold ing
+Ġappropri ation
+Ġè£ı è
+ĠMob ility
+ĠN ey
+- .
+b ilt
+L IN
+ĠT ube
+ĠCon versely
+Ġkey boards
+ĠC ao
+Ġover th
+Ġla ure
+>> \
+ĠV iper
+ach a
+Off set
+ĠR aleigh
+ĠJ ae
+J ordan
+j p
+Ġtotal itarian
+Connect or
+Ġobserv es
+ĠSpart an
+ĠIm mediately
+ĠSc al
+C ool
+Ġt aps
+Ġro ar
+P ast
+Ġch ars
+ĠB ender
+ĠShe ldon
+Ġpain ter
+Ġbe acon
+ĠCreat ures
+Ġdownt urn
+Ġh inder
+ĠAnd romeda
+Ã Ľ
+cc oli
+ĠF itness
+et rical
+Ġutil izes
+Ġsen ate
+Ġen semble
+Ġche ers
+T W
+Ġaff luent
+k il
+ry lic
+ord ering
+Com puter
+Ġgru esome
+ost ics
+ĠUb isoft
+ĠKel ley
+Ġw rench
+Ġbourgeois ie
+IB LE
+ĠPrest on
+w orn
+ar ist
+reat ing
+Ġst ained
+ar ine
+Ġsl ime
+EN N
+Ġche sts
+Ġground water
+ann ot
+ĠTr ay
+ĠLoc ke
+ĠC TR
+Ġd udes
+ĠEx ternal
+ĠDec oder
+Ġpar amed
+ĠMed line
+80 9
+ĠD inner
+rup al
+g z
+ĠG um
+ĠDem o
+j ee
+Ġd h
+ber man
+arch s
+Ġen qu
+ĠEp stein
+Ġdevast ation
+Ġfriends hips
+ĠAr d
+Ġ23 1
+ĠRub in
+ĠDist ance
+Ġsp urred
+Ġd ossier
+Ġover looking
+\\\\\\\\ \\\\\\\\
+Fore st
+ĠCom es
+\ ",
+ĠIran ians
+Ġf ixtures
+L aughs
+Ġcur ry
+ĠKing ston
+Ġsqu ash
+Ġcat alogue
+Ġabnormal ities
+Ġdigest ive
+.... .....
+Ġsubord inate
+og ly
+Ġ24 9
+M iddle
+Ġmass ac
+Ġburg ers
+Ġdown stairs
+Ġ19 31
+39 4
+ĠV G
+Ġl asers
+ĠS ikh
+ĠAlex a
+der ived
+Ġcycl ist
+ãģ® éŃĶ
+onel iness
+!!!! !!!!
+Ġbuff s
+leg ate
+Ġrap ing
+Ġrecomm ending
+ro red
+Ġmult icultural
+un ique
+Ġbusiness men
+Ġune asy
+ĠM AP
+Ġdisp ersed
+cipl ine
+J ess
+ĠK erala
+å §
+Ġabst raction
+Sur v
+U h
+Ġprin ters
+ij a
+ow der
+Ġanalog ous
+ĠA SP
+af er
+Ġunfold ed
+Ġlevel ing
+Ġbre ached
+ĠH earing
+Ġn at
+Ġtransl ating
+crit ical
+Ġant agonist
+ĠYes terday
+Ġfuzz y
+w ash
+m ere
+Ġbe wild
+ĠM ae
+V irgin
+ph rase
+Ġsign aled
+ĠH IGH
+Ġprot ester
+Ġgar ner
+unk nown
+Ġk ay
+Ġabduct ed
+Ġst alking
+am n
+Ġdes erving
+ĠR iv
+ĠJ orge
+Ġscratch ing
+ĠS aving
+ip ing
+Ġte ase
+Ġmission ary
+ĠMor row
+T IME
+P resent
+Ġchem otherapy
+tern ess
+ĠH omes
+ĠP urdue
+Ġst aunch
+ĠWhit ney
+ĠTH ERE
+Î ¼
+iat us
+ĠErn est
+ĠDe ploy
+Ġcove ted
+F ML
+ĠDial ogue
+Ġex ited
+f ruit
+Ġner d
+":" ","
+Ġv ivo
+ru ly
+4 60
+ĠAm en
+rehens ible
+Ġâ ĺ
+D IR
+Ġad herence
+Ġche w
+ĠCo ke
+ĠSerge i
+dig ital
+ĠNe ck
+g ently
+enth al
+/ )
+Ġwe ary
+Ġgu ise
+ĠConc ord
+ĠOn ion
+at cher
+Ġb inge
+ĠDirect ive
+Ġman ned
+ans k
+Ġill usions
+Ġbillion aires
+38 3
+oly n
+odynam ic
+ĠWhe at
+ĠA lic
+Ġcol oured
+ĠN AFTA
+ab o
+Ġmac ros
+ind ependent
+s weet
+Ġsp ac
+ĠK abul
+Ġ Ä
+em e
+Ġdict ated
+Ġsh outs
+= {
+Ġr ipping
+ĠSh ay
+ĠCr icket
+direct ed
+Ġanalys ed
+ĠWAR RANT
+ag ons
+ĠBlaz ers
+Ġche ered
+Ġar ithmetic
+ĠTan z
+37 3
+ĠFl ags
+Ġ29 5
+Ġw itches
+ĠIn cluded
+ĠG ained
+ĠBl ades
+G am
+ĠSam antha
+ĠAtl antis
+ĠPr att
+Ġspo iled
+ĠI B
+ĠRam irez
+Pro bably
+re ro
+ĠN g
+ĠWar lock
+t p
+Ġover he
+Ġadministr ations
+Ġt int
+Ġreg iment
+Ġpist ols
+Ġblank ets
+Ġep ist
+Ġbowl s
+Ġhydra ulic
+Ġde an
+Ġj ung
+Ġasc end
+70 5
+ĠSant iago
+Ã ®
+Ġun avoid
+ĠSh aman
+re b
+Ġstem ming
+99 8
+ĠM G
+st icks
+esthes ia
+ER O
+Ġmor bid
+ĠGr ill
+ĠP oe
+any l
+Ġdele ting
+ĠSurve illance
+Ġdirect ives
+Ġiter ations
+ĠR ox
+ĠMil ky
+F ather
+Ġpat ented
+44 7
+Ġprec ursor
+Ġm aiden
+ĠP hen
+ĠVe gan
+ĠPat ent
+K elly
+Redd itor
+Ġn ods
+Ġvent ilation
+ĠSchwar z
+Ġw izards
+Ġomin ous
+ĠHe ads
+ĠB G
+Ġl umber
+ĠSp iel
+Ġis Enabled
+Ġancest ral
+ĠSh ips
+Ġwrest ler
+ph i
+Ġy uan
+ĠRebell ion
+Ġice berg
+Ġmag ically
+Ġdivers ion
+ar ro
+yth m
+ĠR iders
+ĠRob bie
+ĠK ara
+ĠMain tenance
+ĠHer b
+Ġhar ms
+p acked
+ĠFe instein
+Ġmarry ing
+Ġbl ending
+ĠR ates
+Ġ18 80
+Ġwr ink
+ĠUn ch
+ĠTor ch
+desc ribed
+Ġhuman oid
+ilit ating
+ĠCon v
+ĠFe ld
+IGH TS
+Ġwhistlebl ower
+ort mund
+ets y
+arre tt
+ĠMon o
+ĠI ke
+ĠC NBC
+ĠW AY
+ĠMD MA
+ĠIndividual s
+Ġsupplement al
+Ġpower house
+ĠSt ru
+F ocus
+aph ael
+ĠCol leg
+att i
+Z A
+Ġp erenn
+ĠSign ature
+ĠRod ney
+Ġcub es
+idd led
+ĠD ante
+ĠIN V
+iling ual
+ĠC th
+Ġso fa
+Ġintimid ate
+ĠR oe
+ĠDi plom
+ĠCount ries
+ays on
+Ġextrad ition
+Ġdis abling
+ĠCard iff
+Ġmemor andum
+ĠTr ace
+Ġ?? ?
+se ctor
+ĠRou hani
+ĠY ates
+ĠFree ze
+Ġbl adder
+M otor
+ĠProm ise
+ant asy
+Ġforesee able
+ĠC ologne
+cont ainer
+ĠTre es
+ĠG ors
+ĠSin clair
+Ġbar ring
+key e
+Ġsl ashed
+ĠStat istical
+é ĩ
+Ġâĸ º
+All ows
+Ġhum ility
+Ġdr illed
+ĠF urn
+44 3
+Ġse wage
+Ġhome page
+Ġcour tyard
+Ġv ile
+Ġsubsid iaries
+aj o
+direct ory
+Ġam mon
+V ers
+charg es
+Ġ} }
+ĠCh ains
+Ġ24 6
+n ob
+Ġper cept
+Ġg rit
+Ġfisher men
+ĠIraq is
+ĠDIS TR
+ĠF ULL
+ĠEval uation
+g raph
+at ial
+Ġcooper ating
+Ġmel an
+Ġenlight ened
+Ġal i
+t ailed
+Ġsal ute
+Ġweak est
+ĠBull dogs
+U A
+ĠAll oy
+Ġsem en
+oc ene
+ĠWilliam son
+s pr
+, âĢĶ
+ĠG F
+itt ens
+Be at
+ĠJ unk
+iph ate
+ĠFarm ers
+ĠBit coins
+ig ers
+d h
+ĠL oyal
+p ayer
+Ġentert ained
+Ġpenn ed
+Ġcoup on
+Que ue
+Ġweaken ing
+c arry
+Ġunderest imate
+Ġshoot out
+Ġcharism atic
+ĠProced ure
+Ġprud ent
+in ances
+Ġric hes
+Ġcort ical
+Ġstr ides
+Ġd rib
+ĠOil ers
+5 40
+ĠPer form
+ĠBang kok
+Ġe uth
+S ER
+Ġsimpl istic
+t ops
+camp aign
+Q uality
+Ġimpover ished
+ĠEisen hower
+Ġaug ment
+ĠH arden
+Ġinterven ed
+Ġlist ens
+ĠK ok
+Ġs age
+Ġrub bish
+ĠD ed
+Ġm ull
+pe lling
+Ġvide ot
+Produ ction
+D J
+m iah
+Ġadapt ations
+Ġmed ically
+Ġboard ed
+Ġarrog ance
+Ġscra pped
+Ġopp ress
+FORM ATION
+Ġj unction
+4 15
+EE EE
+S kill
+Ġsub du
+ĠSug gest
+ĠP ett
+Ġle tt
+ĠMan ip
+ĠC af
+ĠCooper ation
+T her
+Ġreg ained
+¶ æ
+ref lect
+Ġth ugs
+ĠShel by
+Ġdict ates
+ĠWe iner
+ĠH ale
+Ġbatt leground
+s child
+Ġcond ol
+h unt
+osit ories
+Ġacc uses
+Fil ename
+Ġsh ri
+Ġmotiv ate
+Ġreflect ions
+N ull
+ĠL obby
+¥ µ
+ĠS ATA
+ĠBack up
+Ñ ĥ
+n in
+ĠCor rection
+Ġju icy
+ut ra
+ĠP ric
+Ġrest raining
+ĠAir bnb
+ĠAr rest
+Ġappropri ations
+Ġsl opes
+Ġmans laughter
+Ġwork ings
+ĠH uss
+ĠF rey
+Le ave
+ĠHarm ony
+ĠF eder
+Ġ4 30
+Ġt rench
+Ġglad ly
+Ġbull pen
+ĠG au
+b ones
+Ġgro ove
+Ġpre text
+ã ħĭ
+Ġtransm itter
+ĠComp onent
+Ġunder age
+ĠEm pires
+T ile
+Ġo y
+ĠMar vin
+ĠC AS
+Ġbl oss
+Ġrepl icated
+ĠMar iners
+Marc us
+ĠBl ocks
+Ġliber ated
+Ġbutter fly
+Fe el
+Ġfer mentation
+Ġyou tube
+Ġoff end
+ĠTer m
+res ist
+Ġcess ation
+Ġinsurg ency
+Ġb ir
+ĠRa ise
+59 5
+Ġhypothes es
+50 2
+Ġpl aque
+ocr at
+Ġjack ets
+ĠHuff Post
+am ong
+Ġconf er
+48 7
+ĠL illy
+Ġadapt ing
+ĠF ay
+Ġsh oved
+ve c
+Ġref ine
+Ġg on
+Ġgun men
+z ai
+ĠShut tle
+ĠI zan
+Ġ19 13
+Ġple thora
+Â· Â·
+Ġ5 10
+Ġp uberty
+Ġ24 1
+ĠWe alth
+ĠAl ma
+ĠM EM
+ĠAd ults
+C as
+pr ison
+R ace
+Ġwater proof
+Ġathlet icism
+Ġcapital ize
+ĠJu ice
+Ġillum inated
+ĠP ascal
+Ġirrit ation
+ĠWitness es
+ad le
+ĠAst ro
+Ġf ax
+ĠEl vis
+Prim ary
+ĠL ich
+ĠEl ves
+Ġres iding
+Ġst umble
+3 19
+ĠP KK
+Ġadvers aries
+D OS
+ĠR itual
+Ġsm ear
+Ġar son
+ident al
+Ġsc ant
+Ġmon archy
+Ġhal ftime
+Ġresid ue
+Ġind ign
+ĠSh aun
+ĠEl m
+aur i
+A ff
+W ATCH
+ĠLy on
+hel ps
+36 1
+Ġlobby ist
+Ġdimin ishing
+Ġout breaks
+Ġgo ats
+f avorite
+ĠN ah
+son ian
+ĠBo oster
+Ġsand box
+ĠF are
+ĠMalt a
+Ġatt Rot
+ĠM OR
+ld e
+Ġnavig ating
+T ouch
+Ġunt rue
+ĠDis aster
+Ġl udicrous
+Pass word
+ĠJ FK
+blog spot
+4 16
+ĠUN DER
+ern al
+Ġdelay ing
+T OP
+Ġimpl ants
+ĠAV G
+ĠH uge
+att r
+Ġjournal istic
+ĠPe yton
+ĠI A
+R ap
+go al
+ĠProgram me
+Ġsm ashing
+w ives
+print ln
+ĠPl ague
+in us
+EE P
+Ġcru iser
+ĠPar ish
+umin ium
+Ġoccup ants
+ĠJ ihad
+m op
+Ġp int
+Ġhe ct
+ĠMe cca
+direct or
+ĠFund ing
+ĠM ixed
+Ġst ag
+T ier
+Ġg ust
+Ġbright ly
+ors i
+Ġup hill
+R D
+Ġles ions
+ĠBund y
+liv ious
+Ġbi ologist
+ĠFac ulty
+ĠAuthor ization
+Ġ24 4
+All ow
+ï ¸
+ĠGi ul
+Ġpert inent
+ot aur
+es se
+ĠRo of
+Ġunman ned
+35 1
+ĠSh ak
+ĠO rient
+Ġend anger
+D ir
+Ġrepl en
+ed ient
+Ġtail or
+Ġgad gets
+Ġaud ible
+âĺ Ĩ
+N ice
+Ġbomb ard
+ĠR ape
+Ġdef iance
+ĠTW O
+ĠFilip ino
+Ġunaff ected
+erv atives
+Ġso ared
+ĠBol ton
+Ġcomprom ising
+ĠBrew ers
+R AL
+ĠA HL
+icy cle
+Ġv ampires
+Ġdi pped
+oy er
+ĠX III
+Ġsidew ays
+ĠW aste
+ĠD iss
+ĠâĶľ âĶĢâĶĢ
+$ .
+Ġhabit ats
+ĠBe ef
+tr uth
+tr ained
+spl it
+R us
+And y
+ĠB ram
+RE P
+p id
+è£ ħ
+ĠMut ant
+An im
+ĠMar ina
+Ġfut ile
+hig hest
+f requency
+Ġepile psy
+Ġcop ing
+Ġconc ise
+Ġtr acing
+ĠS UN
+pan el
+ĠSoph ie
+ĠCrow ley
+ĠAd olf
+ĠShoot er
+Ġsh aky
+ĠI G
+ĠL ies
+ĠBar ber
+p kg
+Ġupt ake
+Ġpred atory
+UL TS
+/ **
+Ġintox icated
+ĠWest brook
+od der
+he ment
+Ġbas eman
+AP D
+st orage
+ĠFif ty
+ed itor
+G EN
+UT ION
+ir ting
+Ġse wing
+r ift
+Ġag ony
+ĠS ands
+Ġ25 4
+C ash
+Ġl odge
+Ġp unt
+N atural
+ĠIde as
+Ġerrone ous
+ĠSens or
+ĠHann ity
+Ġ19 21
+Ġm ould
+ĠG on
+kay a
+Ġanonym ously
+ĠK EY
+Ġsim ulator
+W inter
+Ġstream ed
+50 7
+? ",
+Ġte ased
+Ġco efficient
+Ġwart ime
+ĠTH R
+' '.
+ĠBank ing
+mp ire
+Ġf andom
+Ġl ia
+G a
+Ġdown hill
+Ġinterpre ting
+Ind ividual
+N orm
+Ġjealous y
+bit coin
+Ġple asures
+ĠToy s
+ĠChev rolet
+ĠAd visor
+IZ E
+Ġrecept ions
+70 6
+C ro
+Ġ26 2
+Ġcit rus
+ir u
+Review er
+ject ed
+U ES
+an z
+19 81
+ĠWork er
+Ġcompl ied
+ores cent
+contin ental
+T on
+ĠPr ism
+ĠShe ep
+Ġ28 8
+n ox
+ĠV og
+O rd
+Ġreal ms
+te k
+Ġirrig ation
+Ġbicy cles
+Ġelectron ically
+p oly
+t all
+() );
+Ġaest hetics
+ĠInteg rated
+Expl ore
+Ġd unk
+47 6
+p ain
+ĠJac ques
+ĠD mit
+Fram es
+Ġreun ited
+Ġhum id
+D ro
+P olitical
+Ġyouth ful
+Ġent ails
+Ġmosqu ito
+36 3
+spe cies
+Ġcoord inating
+ĠMay hem
+ĠMagn us
+M ount
+Impro ved
+ĠST ATE
+ATT LE
+Ġflow ed
+Ġtack led
+Ġfashion ed
+Ġre organ
+iv ari
+f inger
+Ġreluct antly
+et ting
+ĠV and
+you ng
+ĠGar land
+Ġpresum ption
+Ġamen ities
+ĠPle asant
+on ential
+ĠO xy
+Ġmor als
+ĠY ah
+Read y
+Sim on
+En h
+D emon
+Ġcl ich
+Mon itor
+ĠD U
+Ġwel comes
+Ġstand out
+Ġdread ful
+Ġban anas
+Ġball oons
+h ooting
+bas ic
+Ġsuff ix
+Ġd uly
+can o
+Ch ain
+at os
+Ġgeop olitical
+Ġ( &
+ĠGem ini
+ÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤ ÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤÃĥÃĤ
+Ġacqu itted
+L uck
+prot ect
+10 24
+Ġsc arcity
+Ġmind fulness
+ec ided
+D N
+pr ime
+ĠPres idents
+ĠVID EO
+Ġ( âĪĴ
+add ock
+N OR
+ĠP ru
+p un
+ĠL OL
+)) ))
+ĠL iqu
+ĠS AS
+Ġsty ling
+Ġpunish ments
+Ġnum b
+Ġasc ertain
+ĠRock ies
+f lu
+Th umbnail
+Ġperpet rated
+ĠSem i
+Ġdis arm
+ĠOld er
+ĠEx ception
+Ġexponent ially
+ĠCommun ities
+Ġabol ish
+ĠPart ner
+pt oms
+Ġ7 77
+ĠFo ley
+ĠC ases
+Ġgre ase
+ĠReb irth
+G round
+Ġ; )
+ĠDoct rine
+ik ini
+Y e
+ĠBl ossom
+Ġpers ists
+b ill
+Ġinf usion
+Ġbud dies
+9 11
+ĠPat ient
+Ġdem os
+Ġacquaint ance
+ĠP aw
+at ari
+Ġx ml
+Ġfasc ination
+ĠSer ve
+Ï Ĥ
+br anded
+Ġa z
+Return s
+Ġover shadow
+Ġro am
+Ġspeed y
+n umbered
+hel ial
+Ġdisc iple
+Ġass urances
+g iven
+pect ing
+ĠN atalie
+çĶ °
+Ġmosquit oes
+rote in
+Ġnumer ic
+Ġindepend ents
+Ġtrans itional
+Ġreaction ary
+ĠMech dragon
+do ctor
+Ġshort est
+Ġsequ ential
+ĠB ac
+ĠAccount s
+ãģ Į
+ach y
+ract ive
+ĠReg iment
+Ġbreat htaking
+ffic iency
+ĠB ates
+Ġ3 11
+Ġward robe
+ft s
+ĠBer k
+Sim ply
+ĠRivers ide
+iver ing
+ident ial
+lu cent
+Ġen riched
+ĠCon ver
+ĠG iving
+ãĥ Ļ
+Ġlegal ize
+ĠF TC
+Ġfre aking
+M ix
+Ġter restrial
+es ian
+ci ents
+W ing
+LO AD
+Ġled ge
+ĠViol ent
+ĠMet all
+Ġ30 8
+Ġs outheastern
+hett o
+M eat
+Ġslow down
+Ġret reated
+Jere my
+end as
+**** *
+er ic
+Ġre ins
+opp able
+ĠHuman ity
+ear ances
+rig an
+C amera
+Ġwa ivers
+s oc
+Ġalter ation
+trans form
+ĠC emetery
+50 6
+Ġindef inite
+Ġstim ulating
+y g
+60 3
+ĠS op
+Ġdescript ive
+Ph ase
+ĠEd mund
+Ġpneum onia
+vent us
+A mb
+Ġlabor atories
+ĠEx clusive
+ug ar
+W ere
+Ġmalf unction
+Ġhomosexual s
+Ġ---- ---
+un i
+Ġturb ines
+ĠEqu ity
+D u
+Ġmind ed
+ĠR H
+ĠBlack hawks
+Ġfe ats
+Ġ17 00
+re pl
+36 2
+lad en
+Ġindisp ensable
+ly ss
+tt i
+Ġre el
+Ġdiver ted
+Ġlik eness
+Ġsubscript ions
+Ġfing ert
+Ġfil thy
+dest ruct
+d raft
+ĠBernard ino
+l aunch
+Ġper plex
+ĠS UM
+car b
+Ġswe ater
+ĠVent ure
+ĠJ ag
+ĠCele b
+ĠV oters
+Ġstead fast
+Ġathlet ics
+ĠHans on
+ĠDr ac
+Tr acker
+Ġcomm end
+ĠPres idency
+ĠD ID
+in formed
+Ġweb page
+P retty
+Ġforce fully
+ãĥĥ ãĤ¯
+Ġrel ocation
+Ġsat ire
+â ī
+ĠSunder land
+æ Ħ
+V oice
+???? ????
+Ġinform ant
+Ġbow el
+ĠUn iform
+Ġ ..."
+Ġpur ge
+Ġpic nic
+ĠU mb
+ĠU PDATE
+ĠSapp hire
+ĠSt all
+le arn
+Ġobject ively
+Ġob liter
+Ġlooph ole
+Ġjour neys
+Ġo mission
+Pro s
+ĠSid ney
+pl oma
+Ġspray ed
+Ġg uru
+Ġtra itor
+Ġtim et
+Ġsn apping
+ĠSe vent
+urn al
+ĠUk ip
+Ġb owed
+por al
+l iberal
+R os
+Quest ions
+i OS
+Ġsummar ize
+ST AT
+Ġ18 50
+ap est
+Ġl ender
+ĠVari able
+br inging
+ĠL ORD
+, )
+Ġcollaps es
+x iety
+ĠN ed
+Y D
+ĠSch a
+Ġantib ody
+Ġdis band
+y re
+ill usion
+Ġro ver
+s hed
+ĠHiro sh
+cc i
+Ġcal am
+ĠMort on
+P interest
+Ġ19 28
+ĠE uras
+ord es
+Ġf ences
+ĠIn ventory
+ĠVal encia
+ĠU d
+ĠT iff
+Ġsqu e
+Ġqu otation
+Ġtroubles ome
+er ker
+QU EST
+ĠKing doms
+s outh
+Ġle vy
+Pr ince
+ĠSt ing
+Ġnick named
+Ġapp e
+Ġphot ographic
+Ġcorp us
+re ference
+ĠT rog
+U nt
+) =(
+ĠLat via
+Ġactiv ating
+Ġlicense e
+Ġdispar ities
+ĠNews letter
+ãĥĥ ãĥĪ
+Ġfree ing
+ĠJe ep
+ĠPer ception
+ins k
+Ġsil icone
+ĠHay den
+Le an
+ĠSuz uki
+ibr arian
+66 8
+Ġsp or
+Ġcorrel ations
+ag hetti
+Ġtu ber
+ĠIP CC
+il us
+ĠV u
+Ġwealth iest
+ĠCarb uncle
+an za
+Ġfool ed
+ĠZ ur
+Ġd addy
+ran o
+il ian
+Ġknock out
+f man
+requ ired
+ĠWik ileaks
+ĠD uffy
+ON T
+Ġins ol
+ĠObject s
+Ġb ou
+ĠNord ic
+ĠIns ert
+sc an
+Ġd ancers
+Ġid iots
+major ity
+ĠNev ille
+ĠFree BSD
+Ġt art
+pan ic
+69 0
+Ġcoc oa
+Ġsam pled
+Ġlook up
+Ind ust
+Ġinject ions
+gen re
+Ġa u
+Ġroad way
+Ġgen itals
+K ind
+ĠEx aminer
+ĠY az
+F resh
+Ġpar alysis
+ĠAl uminum
+Ġre ap
+ok Ã©
+Ġsl oppy
+ĠTun nel
+pos ium
+ner y
+en ic
+Ġher bal
+ĠOut er
+ĠBuild er
+Ġinc ur
+Ġide ologies
+Ġback ups
+cons uming
+ĠDet ect
+de ck
+ĠKN OW
+ĠG ret
+ĠM IC
+Ġtough ness
+ĠEx hibit
+Ġh ive
+L es
+ĠSCH OOL
+ĠAt ari
+ald e
+ĠN ull
+and estine
+m ouse
+Ġbrig ade
+48 9
+Ġrev ol
+ĠLaw son
+ĠW ah
+op oly
+eb ted
+ĠS aunders
+Ġ3 13
+ĠW inc
+Ġtab oo
+ĠHel met
+Ġw edge
+ch ip
+ĠT ina
+b g
+Ġinf uri
+r n
+Ġanomal ies
+ĠSy nc
+ĠEx am
+ĠComm it
+ĠDi ary
+ĠALS O
+ĠDe bor
+omed ical
+Ġcomprehens ion
+6 55
+Ġempower ing
+Ġ ire
+Ġju ices
+ĠE TH
+ĠBox ing
+=" /
+Ġfacilit ated
+p oke
+ĠPars ons
+ĠMod er
+tra vel
+Ġcivil izations
+Ġliber tarians
+Ġrun e
+ĠCl arks
+at hed
+Ġcampaign ers
+ĠDis patch
+ĠFah renheit
+ĠCap com
+-------- --
+Ġl ace
+Ġdr aining
+Ġl iner
+ĠArt ificial
+Ã© n
+t ask
+] ).
+ĠGM O
+ĠOper ator
+ord inary
+ĠInf luence
+ĠU ps
+Ġpot ency
+uss en
+osp ons
+ĠSw im
+ĠDead line
+Un ity
+Ġcul inary
+Ġenlight enment
+Ġwe arer
+Ġmin ed
+Ġp ly
+Ġinc est
+ĠDVD s
+W alk
+B TC
+Tr ade
+Ġdev al
+ib and
+ĠOvers ight
+Palest inian
+Ġd art
+Ġm ul
+L R
+Ġrem ovable
+ĠReal ms
+ì Ŀ
+Ġmisc ar
+ĠV ulkan
+68 5
+Ã¨ re
+ĠS ap
+Ġmer ging
+ĠCar ly
+che ster
+Ġbr isk
+Ġlux urious
+ĠGener ator
+Ġbit terness
+Ġed ible
+Ġ24 3
+T G
+Ġrect angle
+With No
+bel ow
+J enn
+Ġdark est
+Ġh itch
+Ġdos age
+Ġsc aven
+ĠK eller
+ĠIllust rated
+Certain ly
+ĠMaver icks
+Marg inal
+Ġdiarr hea
+Ġenorm ously
+Ġ9 99
+sh r
+qu art
+Ġadam ant
+ĠM ew
+Ġren ovation
+Ġcerv ical
+ĠPercent age
+en ers
+ĠKim ber
+Ġflo ats
+Ġde x
+ĠW itcher
+ĠSwan sea
+d m
+Ġsal ty
+y ellow
+Ġca pe
+ĠDr ain
+ĠPaul a
+ĠTol edo
+les i
+Mag azine
+ĠW ick
+ĠM n
+ĠA ck
+ĠR iding
+AS ON
+Ġhom ophobic
+AR P
+Ġwand ered
+C PU
+ood oo
+ĠP ipe
+Ġtight ening
+ĠBut t
+3 18
+Ġdesert ed
+S ession
+Ġfacilit ating
+J ump
+Ġemer gencies
+OW ER
+Ġexhaust ive
+ĠAF TER
+Ġheart beat
+ĠLab el
+ack y
+ĠCert ified
+ilt ration
+Z e
+ĠU tt
+Ġ13 00
+Ġpres ume
+ĠDis p
+Ġsur ged
+Ġdoll s
+Col umb
+Ġchim pan
+ĠR azor
+Ġt icks
+Ġcouncill or
+Ġpilgr image
+ĠReb els
+ĠQ C
+ĠA uction
+x ia
+ik k
+b red
+Ġinsert ion
+Ġco arse
+d B
+SE E
+ĠZ ap
+ĠF oo
+Ġcontem por
+ĠQuarter ly
+ot ions
+ĠAl chemist
+ĠT rey
+ĠDu o
+S weet
+80 4
+ĠGi ov
+Ġfun n
+N in
+h off
+Ġram ifications
+Ġ19 22
+ĠExper ts
+az es
+Ġgar ments
+ar ial
+ĠN ab
+Ġ25 7
+ĠV ed
+Ġhum orous
+ĠPom pe
+Ġn ylon
+Ġlur king
+ĠSerge y
+ĠMatt is
+Ġmisogyn y
+ĠComp onents
+ĠWatch ing
+ĠF olk
+ract ical
+B ush
+Ġt aped
+Ġgroup ing
+Ġbe ads
+Ġ20 48
+Ġcon du
+quer que
+Read ing
+Ġgriev ances
+Ult ra
+Ġend point
+H ig
+ĠSt atic
+ĠScar borough
+L ua
+ĠMess i
+a qu
+ĠPsy Net
+ĠR udd
+Ġa venue
+v p
+J er
+Ġsh ady
+ĠRes ist
+ĠArt emis
+Ġcare less
+Ġbro kers
+Ġtemper ament
+Ġ5 20
+T ags
+ĠTurn ing
+Ġut tered
+Ġp edd
+Ġimpro vised
+Ġ: (
+Ġtab l
+Ġpl ains
+16 00
+press ure
+ĠEss ence
+marg in
+friend s
+ĠRest oration
+Ġpoll ut
+ĠPok er
+ĠAugust ine
+ĠC IS
+ĠSE AL
+or ama
+Ġth wart
+se ek
+Ġp agan
+Â º
+cp u
+Ġg arn
+Ġass ortment
+ĠI LCS
+t ower
+Recomm ended
+Ġun born
+ĠRandom Redditor
+ĠRandomRedditor WithNo
+Ġparaly zed
+Ġeru ption
+Ġinter sect
+ĠSt oke
+ĠS co
+B ind
+å ¾
+ĠP NG
+ĠNeg ative
+ĠNO AA
+Le on
+Ġall oy
+ĠL ama
+ĠD iversity
+5 75
+Ġunderest imated
+ĠSc or
+Ġm ural
+Ġb usted
+so on
+l if
+Ġnone x
+Ġall ergy
+ĠUnder world
+ĠR ays
+ĠBl asio
+Ġh rs
+ĠD ir
+Ġ3 27
+by ter
+Ġrepl acements
+Ġactiv ates
+ri ved
+M H
+Ġp ans
+ĠH I
+Ġlong itudinal
+Ġnu isance
+al er
+Ġsw ell
+ĠS igned
+s ci
+ĠIs les
+ĠA GA
+Ġdef iant
+Ġson ic
+oc on
+K C
+ĠA im
+t ie
+ah ah
+Ġm L
+D X
+Ġb isc
+ĠBill board
+ĠSY STEM
+NE Y
+ga ard
+Ġdist ressed
+former ly
+Al an
+Ġche fs
+Ġopt ics
+ĠC omet
+ĠAM C
+Ġredes igned
+irm ation
+Ġsight ings
+38 2
+3 11
+ĠW B
+Ġcont raction
+ĠT OTAL
+D ual
+Ġstart led
+Ġunderstand ably
+Ġsung lasses
+ETH OD
+Ġd ocker
+Ġsurf ing
+ĠH EL
+ĠSl ack
+ton es
+Ġsh alt
+Vis ual
+49 8
+Dep artment
+c ussion
+Ġunrest ricted
+Ġt ad
+Ġre name
+employ ed
+Ġeduc ating
+Ġgrin ned
+bed room
+ĠActiv ities
+ĠV elvet
+ĠSW AT
+Ġsh uffle
+ig or
+Ġsatur ation
+F inding
+c ream
+ic ter
+Ġv odka
+tr acking
+te c
+Ġfore ground
+iest a
+Ġve hement
+ĠEC B
+ĠT ie
+E y
+Ġt urtles
+ĠRail road
+ĠKat z
+ĠFram es
+Ġmen ace
+ĠFell owship
+ĠEss ential
+ugg ish
+Ġdri p
+ch witz
+ĠKy oto
+s b
+ĠN ina
+Param eter
+Ġal arms
+ĠCl aud
+Ġpione ering
+Ġchief ly
+ĠSc ream
+Col lection
+Ġthank fully
+ĠRonald o
+åŃ Ĳ
+st rip
+ĠDisney land
+com mercial
+See ing
+S oul
+Ġevac uate
+Ġc iv
+ĠAs he
+Ġdiv ides
+ĠD agger
+rehens ive
+Ġber ries
+ĠD F
+Ġs ushi
+Ġplur ality
+W I
+Ġdisadvant aged
+Ġbatt alion
+ob iles
+45 1
+Ġcl ing
+Ġunden iable
+ĠL ounge
+Ġha unt
+p he
+Ġquant ify
+Ġdiff ered
+Ġ[* ]
+ĠV iz
+c um
+sl ave
+Ġvide og
+Ġqu ar
+Ġbund les
+ĠAl onso
+t ackle
+Ġneur onal
+Ġlandsl ide
+conf irmed
+ĠDep th
+Ġrenew ables
+B ear
+ĠMaced onia
+Ġjer seys
+Ġb unk
+ĠSp awn
+ĠControl s
+ĠBuch anan
+Ġrobot ics
+Ġemphas izing
+ĠTut orial
+h yp
+ist on
+Ġmonument al
+æ °
+ĠCar ry
+Ġt bsp
+en ance
+H ill
+art hed
+Ġro tten
+De an
+Ġtw isting
+Ġgood will
+Ġimm ersion
+L iving
+Ġbr ushes
+ĠC GI
+ĠAt k
+tr aditional
+Ġph antom
+ĠSt amina
+Ġexpans ions
+ĠMar in
+Ġembark ed
+ĠE g
+int estinal
+ĠPE OPLE
+ĠBo oth
+ĠApp alach
+Ġreleg ated
+V T
+M IT
+Ġmust er
+Ġwithdraw ing
+Ġmicrosc ope
+ĠG athering
+ĠC rescent
+ĠArgent ine
+ĠDec re
+ĠDomin ic
+Ġbud s
+ant age
+ĠI on
+Ġwid ened
+ONS ORED
+ĠGl oves
+iann opoulos
+raz en
+fe el
+Ġrepay ment
+Ġhind sight
+ĠRE ALLY
+ĠPist ol
+ĠBra h
+Ġwat ts
+Ġsurv ives
+Ġfl urry
+iss y
+Al ert
+ĠUrug uay
+Ph oenix
+S low
+ĠG rave
+ĠF ir
+Ġmanage able
+Ġtar iff
+ĠU DP
+ĠPist ons
+ĠNiger ian
+Ġstrike outs
+Ġcos metics
+whel ming
+f ab
+c ape
+pro xy
+Ġre think
+Ġover coming
+sim ple
+Ġw oo
+Ġdistract ing
+ĠSt anton
+ĠTuls a
+ĠD ock
+65 9
+Ġdisc ord
+ĠEm acs
+ĠV es
+ĠR OB
+Ġreass uring
+Ġcons ortium
+Muslim s
+3 21
+Ġprompt s
+se i
+ĠH itch
+imp osed
+ĠF ool
+Ġindisc rim
+wr ong
+bu querque
+D avis
+! ]
+Ġtim eless
+ĠNE ED
+Ġpestic ide
+Ġrally ing
+ĠCal der
+Ġå ¤
+Ġx p
+ĠUn le
+ĠEx port
+lu aj
+B uff
+) </
+B oot
+ĠChrys ler
+or ative
+M ess
+Ġneglig ible
+ert odd
+ĠMush room
+ĠG ale
+g c
+ĠCos by
+ĠR ural
+rit ical
+B ell
+Ġturb ine
+00 200000
+Ġlegit imately
+ĠAnim ated
+T ED
+ĠThe odore
+c onduct
+ĠH ier
+Ġcounterfe it
+ĠAlger ia
+Ġun beat
+cont roller
+Ġun res
+Ġscram bling
+ĠFall on
+T es
+Ġam ber
+Ġroy alties
+ĠShel ter
+ĠL ester
+Ġclass ify
+Rem ote
+Ġun heard
+Ġcontrovers ies
+Ġenrich ment
+ĠYan kee
+g amer
+Ġpl atinum
+Ġec ology
+ĠS ark
+Ġunt ouched
+Ġsuper visors
+Ġ" %
+Ġf ooth
+Ġcomm ons
+Ġnarc otics
+Ġind ices
+ĠP ly
+Ġaddition ally
+ĠGaw ker
+ĠE Q
+Pl aying
+Ġcave at
+ĠAbs olute
+oss us
+B aby
+Ġr ation
+Ġres in
+Ġcalib ration
+ĠNew port
+Ġkn ocks
+v t
+Ġcomp ost
+Sc ene
+Ġsar cast
+Ġkiss es
+Ġn s
+all i
+ĠMar cel
+ĠP iet
+iat rics
+Ġsurround s
+ĠRep rodu
+ĠPhill ies
+Ġuncertain ties
+ĠE ur
+ĠRom ance
+ĠH ath
+ĠNeed s
+ĠCl oak
+Ġcre m
+que ue
+Ġ3 55
+Ġup front
+] );
+Ġrecip roc
+Ġ19 27
+Ġ11 00
+ut su
+Ġdep ressive
+ow ment
+F ans
+Ġme ch
+Ġann ihil
+Ġcounter terrorism
+ĠFig ures
+b old
+ĠMo ines
+ĠDri vers
+Ġmanuscript s
+ĠCrypt o
+Ġhyp not
+redd its
+Ġprosec utions
+Ġdiver t
+CR IP
+ĠB ene
+ĠRe ggie
+Ġtax ing
+ĠMor ales
+ent ing
+t ur
+sign ificant
+ĠPR OV
+Ġstr ands
+Ġp ouch
+ĠR ookie
+» Ĵ
+Ġnic er
+he my
+h w
+EC A
+Ġintimid ated
+Ġstr icter
+Ġmicro bial
+det ails
+Ġv ows
+Ġqu ake
+hh hh
+Ġrein vent
+U b
+Ġrel inqu
+ĠBuff ett
+lic ensed
+itte red
+ĠPic ard
+Ġche wing
+u cl
+organ ic
+Ġlocal ized
+ĠEconom ist
+Ġacqu ainted
+Def inition
+s ed
+Crit ics
+Ġc c
+45 3
+38 1
+Ġfell ows
+Ġcheck points
+0 25
+Ġre election
+Ġmed iated
+ĠK DE
+Ġhurd le
+Ġtext ing
+Per fect
+Ġtrust ees
+fect ure
+Ġd ich
+mon ary
+Ġdist inctions
+Ġ14 00
+Ġus her
+Ġparas ites
+ĠSh aring
+ĠV im
+Ġbar becue
+ĠMin isters
+ere lla
+Ġe b
+Ġm c
+ĠSome how
+ĠIn sect
+ch anges
+b road
+ĠBy z
+Ġgrap es
+66 9
+Ġ= ================
+Ġass imil
+Ġhaun ting
+Ġfire power
+Ġdef amation
+em phasis
+Ġcomp ose
+Ġallerg ies
+Ġstr ang
+roll ers
+b ang
+Ġbrew ers
+ron gh
+ri ot
+p oor
+c old
+S ample
+Ġbu oy
+0 40
+ĠCourt ney
+Ġ26 8
+ĠWed ding
+70 2
+Ġobsess ive
+Ġbra king
+ĠL al
+an ical
+å ¦
+at en
+Con struction
+Ġclin ically
+iers hip
+N ames
+ĠDisc uss
+ĠRam os
+Ġloc ale
+ĠAgric ultural
+En able
+Ġhorse power
+ent ure
+P ref
+C ourt
+Ġstaff ing
+Ġfut uristic
+dri vers
+ĠMarket place
+æĪ ¦
+Friend s
+Ġdam ning
+ĠCustom ers
+Ġwe eds
+ĠM ai
+Ġag ile
+ĠT att
+ic ent
+R anked
+cro ft
+ĠKat y
+Ext reme
+Ġcar ve
+ĠR over
+ĠBy ron
+37 2
+Ġconduct s
+r atch
+it ia
+ĠPump kin
+Sad ly
+Rel oaded
+P olicy
+Ġl ick
+pe ak
+is ks
+ĠCD s
+ĠEn cyclopedia
+in itial
+C os
+ĠAware ness
+ĠD ram
+$$ $$
+Ġr iff
+Ġscript ure
+run ners
+Ġbo iler
+ons on
+o in
+Ġham string
+Ġcat aly
+ĠArch bishop
+ch all
+Ġf aux
+ok in
+local host
+ĠN AME
+ad obe
+S AN
+am ate
+Ġscram ble
+Ġcar c
+ĠMan ifest
+ĠCed ar
+ĠSer gio
+l ater
+ff er
+Ġgrapp ling
+ĠDe utsche
+agon ists
+ĠNew sp
+Ġpret ended
+arch ment
+Ġcur ated
+Ġhead phone
+ĠUn common
+ĠS IGN
+A gent
+Ġdead lines
+Ġhorizont ally
+ĠM AT
+ĠSum mers
+Ġord ained
+ĠLast ly
+ĠKend all
+Ġfr ig
+ĠMach ina
+ĠWater loo
+ĠMex icans
+Ġprotect or
+Ġgl are
+} "
+Prem ium
+Ġr ift
+ĠTelesc ope
+Met al
+Ġrec apt
+Ġ; ;
+Ġincl ination
+Ġimp oses
+ing en
+^ {
+Ġh aste
+Ġd olphins
+Ġcomm uters
+pl anned
+c ong
+m x
+ĠU pload
+Ġext rap
+ĠTuc son
+ĠExpl oration
+efe ated
+Ġsl ender
+70 3
+ĠB uk
+is el
+Ġcompet itiveness
+ch lor
+ĠP ermanent
+ĠE verett
+ĠSpecial ist
+ĠS OL
+Ġcy an
+ĠEx actly
+U F
+ĠL IFE
+ary l
+on et
+ĠEmploy ee
+aw ed
+ĠRat ings
+Ġextra vag
+ul hu
+ĠPl ane
+Ġelev ate
+ĠCoord inator
+ĠWat kins
+Ġex cludes
+Ġsent ient
+Ġep och
+Ġall oc
+Pre viously
+ĠSh y
+ĠSlov akia
+L OCK
+Ġmarked ly
+Ġkn ob
+Ġadventure rs
+ĠBe en
+ĠCost s
+amm ers
+Ġon slaught
+ĠSupport ed
+ĠT au
+ik arp
+ĠS overe
+ĠHam pton
+ãĤ ī
+Pre v
+ĠW orse
+Ġc ottage
+ĠH ades
+le z
+b owl
+Ġfrag rance
+ĠL ok
+EM OTE
+ĠPet ro
+Ġ19 25
+ĠP end
+produ cing
+Ġrel ocate
+v ati
+p ole
+Ġsem in
+ĠN UM
+Ġrock ed
+b uff
+b ly
+Rep ly
+ĠH ai
+Ġartic ulated
+ĠIslam abad
+66 5
+ĠClaim s
+Des ktop
+Ġtrust ee
+Ġscript ing
+ĠS ob
+ĠAs ylum
+STD OUT
+ĠCl own
+ĠD ortmund
+ĠDev on
+l ite
+ĠMar ble
+Ġb unker
+Ġcre st
+Ġarous al
+ĠS ears
+ĠBudd y
+ered ith
+ĠP olly
+Ġdec ode
+ĠV ish
+ĠRef lect
+an on
+Ġrefund s
+imm ers
+H M
+Ġwip ing
+Ġpuzz led
+Ġmat te
+un o
+P ierre
+) ),
+Ġt ainted
+Ġsymbol ism
+ĠF raz
+Ġprotest ors
+ethe us
+%% %%
+W ra
+Ġl ax
+ad em
+atur ation
+ãĥ ĵ
+ĠTra iler
+ĠE NG
+ĠBows er
+Ġatt m
+D ur
+80 7
+Ġsid x
+Ġc ider
+ĠA ffect
+Ġw oven
+ĠBark er
+ben ef
+Ġdst g
+ĠRy u
+> [
+Ġsq or
+S audi
+Ġis tg
+Ġindul ge
+pro c
+Ġdisg usted
+Ġcomp ounded
+Ġn em
+Ġschool ing
+ĠC ure
+process ing
+S ol
+Ġpro verb
+it ized
+ĠAlv arez
+Ġscar f
+Ġrect angular
+re ve
+Ġh ormonal
+ĠSt ress
+itiz en
+Ġ4 25
+girl s
+ĠNo ir
+ĠR app
+Ġmar ches
+ch urch
+ĠUs es
+Ġ40 5
+ĠBer m
+Ġord inances
+ĠJud gment
+Charg es
+ĠZ in
+Ġdust y
+Ġstraw berries
+Ġper ce
+ĠTh ur
+ĠDebor ah
+net flix
+ĠLam bert
+Ġam used
+ĠGu ang
+Y OU
+R GB
+ĠC CTV
+Ġf iat
+r ang
+Ġf ederation
+ĠM ant
+ĠB ust
+ĠM are
+respect ive
+ĠM igration
+ĠB IT
+59 0
+Ġpatriot ism
+Ġout lining
+reg ion
+ĠJos Ã©
+Ġbl asting
+ĠEz ra
+B s
+Ġundermin es
+ĠSm ooth
+Ġcl ashed
+rad io
+Ġtransition ing
+ĠBucc aneers
+ĠOw l
+Ġplug s
+Ġh iatus
+ĠPin ball
+Ġm ig
+ĠNut r
+ĠWolf e
+Ġinteg ers
+Ġor bits
+ĠEd win
+ĠDirect X
+b ite
+Ġbl azing
+v r
+Ed ge
+ĠP ID
+ex it
+ĠCom ed
+ĠPath finder
+ĠGu id
+ĠSign s
+ĠZ er
+ĠAg enda
+Ġreimburse ment
+M esh
+i Phone
+ĠMar cos
+ĠS ites
+h ate
+en burg
+Ġs ockets
+p end
+Bat man
+v ir
+ĠSH OW
+Ġprovision al
+con n
+ĠDeath s
+AT IVE
+Pro file
+sy m
+J A
+Ġnin ja
+inst alled
+id ates
+eb ra
+ĠOm aha
+Ġse izing
+ĠBe asts
+Ġsal ts
+M ission
+Gener ally
+ĠTr ilogy
+he on
+leg ates
+Ġd ime
+Ġf aire
+par able
+G raph
+Ġtotal ing
+Ġdiagram s
+ĠYan uk
+ple t
+ĠMe h
+Ġmyth ical
+ĠStep hens
+aut ical
+ochem istry
+Ġkil ograms
+Ġel bows
+anc ock
+ĠB CE
+ĠPr ague
+Ġimpro v
+ĠDev in
+Ġ" \
+par alle
+Ġsuprem acists
+ĠB illion
+Ġreg imen
+inn acle
+Ġrequ isite
+ang an
+ĠBur lington
+ain ment
+ĠObject ive
+oms ky
+G V
+Ġun ilateral
+Ġt c
+Ġh ires
+ment al
+Ġinvol untary
+Ġtrans pl
+ĠASC II
+Â ¨
+Ev ents
+Ġdoub ted
+ĠKa plan
+ĠCour age
+ig on
+ĠMan aging
+ĠT art
+Ġfalse hood
+ĠV iolet
+Ġair s
+Ġfertil izer
+Brit ain
+Ġaqu atic
+ou f
+W ords
+ĠHart ford
+Ġeven ings
+ĠV engeance
+qu ite
+G all
+ĠP ret
+Ġp df
+ĠL M
+ĠSo chi
+ĠInter cept
+9 20
+Ġprofit ability
+ĠId le
+ĠMac Donald
+ĠEst ablishment
+um sy
+Ġgather ings
+ĠN aj
+Charl ie
+Ġas cent
+ĠProt ector
+Ġal gebra
+Ġbi os
+for ums
+EL S
+Introdu ced
+Ġ3 35
+Ġastron omy
+Cont ribut
+ĠPol ic
+Pl atform
+Ġcontain ment
+w rap
+Ġcoron ary
+ĠJ elly
+man ager
+Ġheart breaking
+c air
+ĠChe ro
+c gi
+Med ical
+ĠAccount ability
+! !"
+oph ile
+Ġpsych otic
+ĠRest rict
+Ġequ itable
+iss ues
+Ġ19 05
+ĠN ek
+c ised
+ĠTr acking
+Ġo zone
+Ġcook er
+ros is
+Ġre open
+Ġinf inity
+ĠPharm aceutical
+ens ional
+Att empt
+ĠR ory
+Mar co
+Ġawa its
+H OW
+t reated
+Ġbol st
+Ġreve red
+Ġp ods
+opp ers
+00 10
+Ġampl itude
+ric an
+SP ONSORED
+Ġtrou sers
+Ġhal ves
+ĠK aine
+ĠCut ler
+ĠA UTH
+Ġsplend id
+Ġprevent ive
+ĠDud ley
+if acts
+umin ati
+ĠY in
+Ġad mon
+ĠV ag
+Ġin verted
+Ġhast ily
+ĠH ague
+L yn
+Ġled ger
+Ġastron omical
+get ting
+Ġcirc a
+ĠC ic
+ĠTenn is
+Lim ited
+Ġd ru
+ĠBY U
+Ġtrave llers
+Ġp ane
+ĠInt ro
+Ġpatient ly
+Ġa iding
+Ġlo os
+ĠT ough
+Ġ29 3
+Ġconsum es
+Source File
+Ġ"" "
+Ġbond ing
+Ġtil ted
+Ġmenstru al
+ĠCel estial
+UL AR
+Plug in
+Ġrisk ing
+N az
+ĠRiy adh
+Ġacc redited
+Ġsk irm
+é Ľ
+Ġexam iner
+Ġmess ing
+Ġnear ing
+ĠC hern
+ĠBeck ham
+Ġsw apped
+Ġgo ose
+K ay
+Ġlo fty
+ĠWal let
+Ġ[ '
+Ġap ocalypse
+Ġb amboo
+ĠSP ACE
+ĠEl ena
+Ġ30 6
+ac ons
+Ġtight ened
+Ġadolesc ence
+Ġrain y
+Ġvandal ism
+ĠNew town
+Ġcon ject
+c akes
+Ġche ated
+Ġmoder ators
+par ams
+E FF
+Ġdece it
+ĠST L
+ĠTanz ania
+ĠR I
+Ġ19 23
+ĠEx ile
+the l
+Ġthe olog
+Ġquir ky
+ĠIr vine
+Ġneed y
+or is
+U m
+K a
+Ġmail box
+3 22
+Ġb os
+ĠPet ra
+K ING
+Ġenlarg ed
+O ften
+Ġbad ass
+Ġ3 43
+ĠPl aces
+ĠC AD
+Ġpr istine
+Ġinterven ing
+d irection
+Ġl az
+ĠD SM
+Ġproject ing
+ĠF unk
+ag og
+pay ment
+n ov
+Ġch atter
+AR B
+Ġexam inations
+ĠHouse hold
+ĠG us
+F ord
+4 14
+B oss
+Ġmy stic
+Ġle aps
+ĠB av
+ul z
+b udget
+Foot ball
+Ġsubsid ized
+Ġfirst hand
+Ġcoinc ide
+oc ular
+Con n
+ĠColl abor
+Ġfool s
+am ura
+ah ar
+r ists
+Ġsw ollen
+Ġexp ended
+ĠP au
+s up
+Ġsp ar
+Ġkey note
+s uff
+Ġunequ al
+Ġprogress ing
+str ings
+ĠGamer gate
+Dis ney
+ĠEle ven
+om nia
+Ġscript ed
+Ġear ners
+bro ther
+ĠEn abled
+æ ³
+Ġlar vae
+ĠL OC
+m ess
+Wil son
+ĠTem plate
+success fully
+Ġparam ount
+Ġcamoufl age
+Ġbind s
+ĠQu iet
+ĠSh utterstock
+r ush
+Ġmasc ot
+fort une
+ĠCol t
+ĠBe yon
+hab i
+Ġha irc
+Ġ26 7
+ĠDe us
+Ġtw itch
+Ġconcent rating
+Ġn ipples
+c ible
+Ġg ir
+N Z
+M ath
+n ih
+Requ ired
+Ġp onder
+ĠS AN
+Ġwedd ings
+Ġl oneliness
+N ES
+ĠMah jong
+69 5
+add le
+ĠGar ner
+ĠC OUR
+Br idge
+Ġsp ree
+ĠCald well
+Ġbri bery
+Ġï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½
+plug ins
+Ġr acket
+Ġchamp agne
+vers ible
+V ote
+Ġmod ifiers
+May or
+6 80
+Ġassemb lies
+ĠS ultan
+ĠN ing
+ĠLad ies
+Ġsulf ur
+Ġor bs
+Ġ---- -
+____ ___
+ĠJournal ism
+Ġes ports
+Ġl ush
+Ġh ue
+Ġspect ral
+H onest
+ãĥ ı
+Ġbus hes
+Ġrein forcement
+Ġre opened
+ĠWhe els
+ĠM org
+rie ving
+Ġaux iliary
+Ġj Query
+ĠB AT
+tes que
+Ġver tex
+p ure
+f rey
+ãĤ º
+d os
+Ġty ph
+Ġc ull
+Ġe q
+Ġdec on
+Ġtoss ing
+Ġdispar ate
+ĠBr igham
+print f
+led ged
+Ġsu nd
+Ġco zy
+Ġhepat itis
+per forming
+Ġav al
+ĠG G
+f uture
+Ġpet ertodd
+ĠKos ovo
+Ġmagn ets
+Al ready
+ĠEd ison
+ĠCe res
+ĠRA ID
+Ġbrill iance
+57 6
+Ġder ives
+Ġhypert ension
+ĠÎ Ķ
+Ġlamb da
+Ġfl air
+Ġmission aries
+Ġrap es
+ĠSt arter
+ĠMon ths
+Ġdef y
+Ġseism ic
+ĠR aphael
+Ġeuro zone
+65 6
+z sche
+Ġscr atched
+Ġb ows
+ĠLenn on
+ĠGa ia
+Ġdri pping
+f acts
+A le
+Ġfrog s
+ĠBre ast
+ogene ity
+ĠProsecut or
+Ġampl ified
+ĠHod g
+ĠF n
+Th ousands
+ĠNI H
+ĠMonitor ing
+FT WARE
+ĠPri ebus
+ĠG rowing
+hun ter
+Ġdiagn ose
+ĠM ald
+ĠL R
+Ġcrown ed
+Ġburst ing
+Ġdiss olution
+j avascript
+Ġuseful ness
+ĠExec ution
+: (
+ĠIv ory
+a ah
+Ġpersecut ed
+viol ence
+ist as
+ĠCr ate
+Ġimpuls es
+ĠSp ani
+ed es
+Hand le
+ĠZ erg
+think able
+Last ly
+Ġspont aneously
+Ġinconven ient
+Ġdismiss ing
+Ġpl otted
+Ġeight y
+Ġ7 37
+r ish
+ĠThor nton
+ath am
+Ġsit com
+V en
+Rec ipe
+t el
+l und
+Ġcle ars
+ĠSas uke
+Ġ25 8
+Ġopt ing
+Ġen raged
+est hetic
+ĠA e
+uch s
+Pre p
+Fl ow
+Ġrun off
+ĠE ating
+ĠG iles
+ĠAct ing
+res ources
+ib aba
+Ġr pm
+Ġske wed
+ĠBl anc
+ĠS akuya
+Ġhot ter
+Ġ19 24
+op ian
+ck o
+Ġcr umbling
+Ġcapt ains
+ĠAppropri ations
+le aders
+dro pping
+an uts
+Ġrevers ing
+ĠP ose
+ĠS ek
+Sc ot
+ĠIde a
+c ise
+ĠSloven ia
+Ġ3 17
+Do ctor
+Ġcro cod
+ald i
+Se a
+ĠFar rell
+Ġmerc enaries
+ĠR NC
+ĠGu ess
+Ġp acing
+M achine
+Streamer Bot
+ĠChar ity
+Ġ29 8
+Ġcann ons
+ĠTob y
+TPP StreamerBot
+ĠPass ion
+cf g
+Th om
+Ġbad ges
+ĠBern stein
+. âĢĵ
+ĠP OP
+ĠCon j
+Ġinitial ization
+Ġbiod iversity
+D ub
+Ġfeud al
+Ġdisclaim er
+Ġc row
+Ġign ition
+ar f
+S HA
+Ġk Hz
+h azard
+ĠArt ists
+oe uv
+67 9
+ĠRud y
+N ine
+ĠRam adan
+å ½
+itt o
+Ġadren aline
+C ert
+Ġsmell ed
+Ġimp unity
+Ġag endas
+ĠRe born
+ĠCon cent
+ĠSe ems
+Ġo mega
+ĠDust in
+Ġback er
+ĠSau ce
+ĠBoy le
+W IN
+Ġsp ins
+Ġpa uses
+u pt
+Ġshred ded
+Ġstra pped
+ĠCor ruption
+Ġscr atches
+Ġn i
+Ġatt ire
+ĠS AF
+Factory Reloaded
+ĠI PS
+Ġ( %
+Ġsem inar
+f ocus
+c ivil
+Ġ18 60
+int osh
+Ġcontin ual
+Ġabbre vi
+ĠS ok
+oc obo
+X M
+Ġfr antic
+Ġunavoid able
+Ġar tery
+Ġannot ations
+b ath
+Cl imate
+Ġd ors
+ĠSl ide
+co ord
+ĠRel oad
+ĠL DL
+ĠLove craft
+Ġunim agin
+Ġresemb led
+Ġbarr acks
+n p
+Ġsurrog ate
+Ġcategor ized
+ãĤ ©
+Ġvacc inated
+Ġdrain age
+Ġind ist
+ĠWhats App
+Ġ18 70
+oler ance
+inv oke
+am orph
+Ġrecon nect
+Ġem anc
+Ġblind ness
+Ġ12 80
+intern et
+c ollar
+Ġalt ru
+Ġab yss
+ĠT RI
+65 7
+Ġinf used
+HE AD
+Ġforest ry
+ĠWood y
+ĠC i
+w i
+s am
+78 4
+hol iday
+Ġmog ul
+ĠF ees
+ĠD EN
+In ternal
+ur bed
+f usc
+at om
+ĠIll usion
+Ġpoll ed
+Ġfl ap
+Ġco ax
+L GBT
+An aly
+ĠSect ions
+ĠCalif orn
+em n
+Ġh ither
+ĠN IGHT
+Ġn ailed
+ĠPip eline
+39 1
+o of
+ĠPr imal
+vere nd
+Ġsl ashing
+Ġret ri
+avi our
+Ġdepart ing
+g il
+IS C
+Ġmid way
+Ġultras ound
+Ġbeh aving
+ĠT ara
+class es
+V irtual
+ĠColon ial
+Ġstri pping
+Ġorchestr ated
+ĠGra ves
+45 2
+ĠIron ically
+ĠWrit ers
+Ġl ends
+ĠMan z
+Ġra ven
+Ġoxid ative
+Ġ26 6
+EL F
+act ually
+asc ar
+D raft
+Ġfavour able
+Ġhumili ating
+Ġf idelity
+ĠH of
+ĠX uan
+49 6
+Ġlay ered
+at is
+79 0
+Ġpay check
+it on
+K ar
+ĠVM ware
+ĠFar mer
+Ġserv ic
+gl omer
+Ġsl ump
+ĠFab ric
+ĠD OC
+est ing
+Ġreass ure
+Ġph yl
+v olt
+it ory
+R ules
+Ġoxid ation
+Ġpri zed
+Ġmist ress
+ĠDj ango
+WAR N
+å ĳ
+Ġenc ode
+ĠFeed back
+Ġstupid ity
+I an
+ĠYugoslav ia
+× ¨
+ac l
+UT E
+19 77
+Ġqual ifies
+Ġpuls es
+pret ty
+Ġfro ze
+Ġs s
+Iter ator
+Ġur gently
+Ġm ailed
+ĠCh am
+Ġsust aining
+Ġbas il
+Ġpupp ies
+il ant
+ĠP LEASE
+l ap
+ace ous
+F ear
+ĠMaster y
+aut omatic
+ĠT AG
+Ġant im
+ag les
+47 3
+fram es
+Ġwh ispers
+ĠWho ever
+Ġbra very
+ĠUK IP
+ract ions
+"" "
+Ġt ame
+Ġpart ed
+every thing
+CON T
+Ġind ebted
+Ġadd r
+re k
+IR ED
+Ġem inent
+cl inton
+Ġo usted
+Ġreview er
+Ġmelt down
+Ġre arr
+ĠY ao
+the real
+aby te
+Ġst umbling
+Ġbat ches
+Ġ25 9
+Ġcontrace ptive
+Ġprost itute
+ens is
+De cl
+ĠSt rikes
+M ilitary
+ĠO ath
+v acc
+pp ings
+05 2
+Ġpart Name
+amp ing
+Rep orts
+K I
+CH R
+Ġsubt ly
+sw ers
+Bl ake
+us ual
+Ġcontest ants
+Ġcart ridges
+ĠGRE AT
+Ġbl ush
+ĠâĢ º
+47 2
+Ġreason ed
+ãĥ ¤
+paralle led
+Ġd yn
+ag ate
+Ġnight ly
+å Ĩ
+55 6
+Ġsem antic
+ĠAdv oc
+Ġ !!
+Ġdisag rees
+ĠB W
+V eh
+Ġharm ing
+Ġembr aces
+Ġstri ves
+Ġin land
+ĠK ard
+Ġhe ats
+ĠGin ny
+ut an
+ern aut
+yl ene
+ĠE lev
+J D
+Ġh ars
+ĠStar r
+Ġsk ysc
+Ġcollabor ators
+Us ually
+Ġrev olutions
+ĠSTAT S
+Ġdism antle
+Ġconfident ly
+Ġkin etic
+Al i
+Ġpercent ile
+Ġextract ing
+ill ian
+est ead
+Ġphysic ists
+ĠMarsh al
+Ġfell owship
+Ġd ashed
+ĠU R
+ĠSi oux
+ĠComp act
+am ide
+P ython
+ĠLe igh
+ĠPharm ac
+ist rates
+her ical
+Ġf ue
+ĠE min
+Ġ( {
+ĠNeighbor hood
+Ġdisrupt ing
+ĠD up
+Ġg land
+ĠSe v
+ĠMar ian
+arg on
+ĠD und
+Ġ< !--
+Ġstr and
+Ġstadium s
+z os
+Ġpsych osis
+ĠR ack
+Ġbrilliant ly
+ï¸ ı
+Ġsubmer ged
+ĠInst it
+ĠCh ow
+Ġc ages
+ĠH ats
+ĠU rs
+Ġdil uted
+us at
+ien ne
+ĠMembers hip
+ĠBur k
+Ġ ie
+Ġarche type
+D rug
+ult on
+ĠSp ock
+ĠMcK ay
+ĠDep end
+F eatured
+S oc
+19 78
+ĠB ere
+Ġrelent lessly
+Ġcripp ling
+Ġar thritis
+çĶ Ł
+ĠTrop ical
+ĠBul g
+ĠCher yl
+Ġadm irable
+Ġsub title
+Over ride
+Ġorig inating
+ĠC CP
+Ġsw ore
+ĠSo le
+ĠDis orders
+3 29
+Ġprocess ion
+Ġref urb
+Ġimm ersed
+requ ently
+Ġskept ics
+Ġcer amic
+m itter
+en stein
+b elt
+ĠT IT
+b idden
+Ġf ir
+m ist
+> ]
+Ġwe ave
+ĠParad ox
+Ġentr usted
+ĠBarcl ays
+Ġnovel ist
+og ie
+80 6
+Ġnin ety
+Ġdisag reements
+@@@@ @@@@
+ĠAus chwitz
+c ars
+ĠL ET
+t ub
+arant ine
+P OS
+Ġback story
+Ġcheer ful
+ĠR ag
+ek a
+bi ased
+Ġinexper ienced
+ak ra
+ĠW itt
+t an
+Ġrap ist
+Ġplate au
+ch al
+ĠInqu is
+exp ression
+Ġc ipher
+Ġsh aving
+add en
+re ly
+( \
+ism a
+ĠReg ulatory
+CH AR
+ily n
+N VIDIA
+G U
+Ġmur m
+la us
+Christ opher
+Ġcontract ual
+ĠPro xy
+ĠJa ime
+ĠMethod ist
+Ġstew ards
+st a
+per ia
+Ġphys iology
+Ġbump ed
+Ġf ructose
+Austral ian
+ĠMet allic
+ĠMas querade
+ar b
+Ġprom ul
+Ġdown fall
+Ġbut cher
+Ġb our
+ĠIN FORMATION
+ĠB is
+pect s
+ad ena
+Ġcontempl ating
+ar oo
+cent ered
+ĠPe aks
+Us ed
+Ġmod em
+Ġg enders
+Ġ8 000
+37 1
+Ġm aternity
+ĠR az
+Ġrock ing
+Ġhandgun s
+ĠD ACA
+Aut om
+ĠN ile
+Ġtum ult
+ĠBenef it
+ĠAppro ach
+works hop
+ĠLe aving
+G er
+inst ead
+Ġvibr ations
+Ġrep ositories
+49 7
+ĠA unt
+ĠJ ub
+ĠExp edition
+Al pha
+Ġs ans
+Ġoverd ue
+Ġoverc rowd
+Ġlegisl atures
+Ġp aternal
+ĠLeon ardo
+Ġexp ressive
+Ġdistract ions
+Ġsil enced
+tr ust
+Ġb iking
+Ġ5 60
+Ġpropri et
+Ġimp osition
+Ġcon glomer
+Ġ= ================================================================
+ĠTe aching
+ĠY ose
+int ensive
+T own
+Ġtroll ing
+ĠGr ac
+ĠAS US
+Y o
+Ġspecial s
+ĠNep h
+ĠGod zilla
+Dat abase
+ĠHe gel
+Ġ27 2
+19 76
+ĠGl oria
+Ġdis emb
+ĠInvestig ations
+ĠB ane
+ag ements
+St range
+Ġtre asury
+ĠPl ays
+Ġundes irable
+Ġwid ening
+Ġverb ally
+Ġinf ancy
+Ġcut ter
+f ml
+Ġ21 00
+prot otype
+f ine
+Ġdec riminal
+Ġdysfunction al
+Ġbes ie
+ĠErn st
+z eb
+Ġnort heastern
+Ġa ust
+por ate
+ĠMar lins
+Ġsegreg ated
+ew orld
+ĠMa her
+Ġtra verse
+Ġmon astery
+ur gy
+G ear
+s and
+Com pl
+ĠE MP
+Ġpl ent
+ĠMer cer
+Ġ27 6
+TA BLE
+Config uration
+H undreds
+Ġpr ic
+Ġcollabor ating
+ĠPar amount
+ĠCumm ings
+Ġ( <
+Ġrecord er
+Ġfl ats
+Ġ4 16
+wh ose
+Font Size
+ĠOr bit
+Y R
+Ġwr ists
+Ġb akery
+) }
+ĠB ounty
+ĠLanc aster
+Ġend ings
+acc ording
+ĠSal am
+e asy
+75 5
+ĠBur r
+ĠBarn ett
+onom ous
+Un ion
+Ġpreced ence
+ĠScholars hip
+ĠU X
+Ġroll out
+Ġbo on
+al m
+ĠCan ter
+æ µ
+Ġround ing
+Ġcl ad
+Ġv ap
+ĠF eatured
+is ations
+Ġ5 40
+pol ice
+Ġunsett ling
+Ġdr ifting
+ĠLum ia
+ĠObama Care
+ĠF avor
+Hy per
+ĠRoth schild
+ĠMil iband
+an aly
+ĠJul iet
+H u
+Ġrec alling
+a head
+69 6
+Ġunf avorable
+Ġd ances
+O x
+Ġleg ality
+Ġ40 3
+rom ancer
+Ġinqu ire
+ĠM oves
+\ ">
+ĠVari ant
+ĠMess iah
+ĠL CS
+ĠBah Ã¡
+75 6
+Ġeyeb row
+ĠÂ ¥
+ĠMc F
+ĠFort y
+M as
+Ġpan icked
+Ġtransform ations
+q q
+Ġrev olves
+ring e
+ĠA i
+ax e
+Ġon ward
+ĠC FR
+ĠB are
+log in
+Ġliqu ids
+Ġde comp
+second ary
+il an
+ĠCon vert
+ami ya
+Ġprosecut ing
+Ġâī ¡
+ĠYork ers
+ĠByr ne
+sl ow
+aw ei
+J ean
+Ġ26 9
+ĠSky dragon
+Ġ Ã©
+ĠNicarag ua
+ĠHuck abee
+ĠHigh ly
+Ġamph ib
+ĠPast or
+ĠL ets
+Ġbl urred
+Ġvisc eral
+ĠC BO
+Ġcollabor ated
+z ig
+Leg al
+Ġapart heid
+Ġbr id
+Ġpres et
+ĠD ET
+ĠAM A
+× Ķ
+arch ing
+auc uses
+build er
+Ġpo etic
+Ġem ulator
+ĠMole cular
+Ġhon oring
+ise um
+Ġtract or
+ĠCl uster
+ĠCal m
+ared evil
+Ġsidew alks
+Ġviol in
+Ġgeneral ized
+ĠAle c
+Ġemb argo
+Ġfast ball
+ĠHT TPS
+ĠL ack
+ĠCh ill
+ri ver
+C hel
+ĠSw arm
+ĠLev ine
+ro ying
+L aunch
+Ġkick er
+Ġadd itive
+ĠDe als
+W idget
+cont aining
+Ġescal ate
+ĠOP EN
+Ġtwe aked
+Ġst ash
+Ġsp arks
+ĠEs sex
+ĠE cc
+Ġconv ict
+Ġblog ging
+I ER
+ĠH L
+Ġmurd erers
+75 9
+ĠH ib
+Ġde pl
+ĠJ ord
+S ac
+Ġdis sect
+ĠHow e
+os her
+Ġcustom izable
+ĠFran z
+Ġat ro
+Ä ĩ
+Ġ000 4
+Ġout post
+R oss
+Ġglyph osate
+ĠHast ings
+ĠBE FORE
+Ġsh ove
+o pped
+ĠSc ala
+Ġam ulet
+an ian
+Ġexacerb ated
+Ġe ater
+47 1
+UM E
+Ġpul p
+izont al
+ĠZ am
+ĠAT I
+imm une
+aby tes
+Ġunnecess arily
+ĠC AT
+ĠAx is
+Ġvisual ize
+Ã ī
+ĠRad ical
+f m
+Doc uments
+ĠFor rest
+Ġcontext ual
+ĠSy mbol
+Ġtent ative
+ĠDO ES
+ĠGood s
+Ġintermitt ent
+} :
+medi ated
+Ġridic ule
+Ġathe ism
+Ġpath ogens
+ĠM um
+Ġre introdu
+Ġ30 7
+i HUD
+Ġflash light
+Ġsw earing
+Ġp engu
+B u
+Ġrot ated
+ĠCr ane
+Ġ() );
+Ġfashion able
+Ġendors ing
+46 3
+) [
+Ġingest ion
+Ġcook s
+Ġ9 50
+ot omy
+ĠIm am
+Ġk a
+Ġte aser
+ĠGhost s
+ĠãĤ µ
+19 69
+Ï ĥ
+ub by
+Ġconver ter
+zan ne
+end e
+ĠPre par
+ĠNic kel
+ĠChim era
+h im
+ĠTyr ann
+ĠSabb ath
+ĠNich ols
+Ġra pt
+ih ar
+Ġshe lling
+Ġillum inate
+Ġdent ist
+ut or
+ĠInteg ration
+Ġwh ims
+ĠLiter ary
+Be aut
+Ġp archment
+ag ara
+Br and
+Ġder og
+âĢ¦ )
+ĠNor se
+Ġunw itting
+Ġc uc
+Ġborder line
+Ġupset ting
+Ġrec ourse
+Ġd raped
+ĠRad ar
+Ġcold er
+ĠPep si
+im inary
+], [
+65 8
+V i
+ĠF rem
+ĠP es
+Ġveter inary
+ĠT ED
+ĠEp idem
+n ova
+k id
+Ġdev out
+o ct
+j ad
+M oh
+ĠP AY
+Ġge ometric
+Ġ3 23
+Ġcircum ference
+ich ick
+19 75
+ĠY uri
+ĠSh all
+ĠH over
+un in
+S pr
+Ġg raft
+ĠHapp iness
+Ġdisadvant ages
+att acks
+Ġhub s
+ĠStar Craft
+é ĸ
+Ġgall eries
+ĠKor ra
+Ġgrocer ies
+ĠGors uch
+Ġrap ists
+Ġfun gi
+ĠTyph oon
+V ector
+ĠEm press
+b attle
+4 68
+Ġparas ite
+ĠBom ber
+S G
+ex ist
+ĠP f
+Ġun se
+Ġsurge ons
+B irth
+ĠUn sure
+ĠPrint ed
+ĠBehavior al
+ĠA ster
+Pak istan
+Ġun ethical
+Ġs v
+ĠIo T
+Ġlay outs
+P ain
+Ġconst ants
+ĠL W
+ĠB ake
+Ġtow els
+Ġdeterior ation
+ĠBol ivia
+Ġblind ed
+ĠW arden
+ĠMist ress
+Ġon stage
+Ġcl ans
+ĠB EST
+19 60
+Ġant ique
+Ġrhet orical
+ĠPer cy
+ĠRw anda
+, .
+B ruce
+Ġtra umat
+ĠParliament ary
+Ġfoot note
+id ia
+ĠLear ned
+se eking
+gen ic
+Ġdim ensional
+H ide
+èĢ ħ
+Ġintrig ue
+in se
+Ġle ases
+Ġapp rentices
+w ashing
+Ġ19 26
+V ILLE
+Ġsw oop
+s cl
+Ġbed rooms
+on ics
+ĠCr unch
+comp atible
+Ġincap ac
+ĠYemen i
+ash tra
+z hou
+d anger
+Ġmanifest ations
+ĠDem ons
+AA F
+Secret ary
+ACT ED
+L OD
+Ġam y
+ra per
+eth nic
+4 17
+Ġpos itives
+Ġ27 3
+ĠRefuge es
+Ġus b
+ĠV ald
+odd y
+ĠMahm oud
+As ia
+Ġskull s
+ĠEx odus
+ĠComp et
+ĠL IC
+ĠM ansion
+ĠA me
+Ġconsolid ate
+storm s
+ont ent
+99 6
+Ġcl en
+Ġm ummy
+fl at
+75 8
+ĠV OL
+oter ic
+n en
+ĠMin ute
+S ov
+Ġfin er
+R h
+ly cer
+Ġreinforce ments
+ĠJohann es
+ĠGall agher
+Ġgym n
+S uddenly
+Ġext ortion
+k r
+i ator
+T a
+Ġhippocamp us
+N PR
+ĠComput ing
+Ġsquare ly
+Ġmod elling
+ĠFor ums
+ĠL isp
+ĠKrish na
+Ġ3 24
+Ġr ushes
+Ġens ued
+Ġcre eping
+on te
+n ai
+il ater
+ĠHorn ets
+Ġob livious
+IN ST
+55 9
+Ġjeopard y
+Ġdistingu ishing
+j ured
+Ġbeg s
+sim ilar
+ph ot
+5 30
+ĠPark way
+Ġs inks
+ĠHearth stone
+ib ur
+ĠBat on
+Av oid
+Ġd ancer
+Ġmag istrate
+ary n
+Ġdisturb ances
+ĠRom ero
+Ġpar aph
+Ġmis chief
+âĸ ĵ
+ĠSh aria
+Ġur inary
+r oute
+iv as
+f itted
+Ġeject ed
+ĠAl buquerque
+Ġ4 70
+Ġirrit ated
+ĠZ ip
+ĠB iol
+Ã į
+Ġden ounce
+Ġbin aries
+ĠVer se
+Ġopp os
+ĠKend rick
+ĠG PL
+Ġsp ew
+ĠEl ijah
+ĠE as
+Ġdr ifted
+so far
+Ġannoy ance
+ĠB ET
+47 4
+ĠSt rongh
+it ates
+ĠCogn itive
+oph one
+ĠIdent ification
+ocr ine
+connect ion
+Ġbox er
+ĠAS D
+ĠAre as
+Y ang
+t ch
+ull ah
+Ġdece ive
+Comb at
+ep isode
+cre te
+W itness
+Ġcondol ences
+ht ar
+Ġhe als
+Ġbuck ets
+ĠLA W
+B lu
+Ġsl ab
+ĠOR DER
+oc l
+att on
+ĠSteven son
+ĠG inger
+ĠFriend ly
+ĠVander bilt
+sp irit
+ig l
+ĠReg arding
+ĠPR OG
+Ġse aling
+start ing
+Ġcard inal
+ĠV ec
+ĠBe ir
+Ġmillisec onds
+we ak
+per se
+Ġster ile
+ĠCont emporary
+ĠPh ant
+ĠCl o
+Ġout p
+Ġex iled
+Ġ27 7
+Ġself ie
+Ġman ic
+Ġn ano
+ter ms
+Alex ander
+Ġres olves
+Ġmillenn ia
+Ġexpl odes
+Ġconst ellation
+Ġadul tery
+m otion
+D OC
+Ġbroad casters
+Ġkinderg arten
+ĠMay weather
+ĠE co
+ich o
+Ġ28 7
+l aun
+Ġm ute
+Ġdisc reet
+Ġpres chool
+Ġpre empt
+De lete
+ĠFre ed
+P i
+H K
+Ġblock er
+ĠC umber
+Ġw rought
+d ating
+Ġins urer
+Ġquot as
+Ġpre ached
+Ġev iction
+ĠReg ina
+ĠP ens
+Ġsevent een
+ĠN ass
+D ick
+Ġfold s
+Ġd otted
+ĠA ad
+Un iversal
+Ġp izz
+ĠG uru
+Ġso ils
+Ġno vice
+ĠNe ander
+Ġst ool
+Ġdeton ated
+ĠPik achu
+ĠMass ive
+IV ER
+ĠAb del
+Ġsubdu ed
+Ġtall est
+Ġprec arious
+Ġa y
+r ification
+ĠOb j
+c ale
+Ġun question
+cul osis
+ad as
+igr ated
+D ays
+Ġque ens
+ĠGaz ette
+ĠCol our
+ĠBow man
+ĠJ J
+Ã¯ ve
+Ġdomin ates
+Stud ent
+Ġm u
+Ġback log
+ĠElect ro
+Tr uth
+48 3
+Ġcond ensed
+r ules
+ĠCons piracy
+Ġacron ym
+hand led
+ĠMat te
+j ri
+ĠImp ossible
+l ude
+cre ation
+Ġwar med
+ĠSl ave
+Ġmis led
+Ġfer ment
+ĠK ah
+ink i
+ke leton
+cy l
+ĠKar in
+Hun ter
+Reg ister
+ĠSur rey
+Ġst ares
+ĠW idth
+ĠN ay
+ĠSk i
+Ġblack list
+uck et
+Ġexp ulsion
+im et
+Ġret weet
+vant age
+Fe ature
+Ġtro opers
+Ġhom ers
+9 69
+Ġconting ency
+ĠW TC
+ĠBrew er
+fore ign
+W are
+S olar
+Ġund ue
+RE C
+ulner able
+path ic
+ĠBo ise
+Ġ3 22
+Ġarous ed
+ĠY ing
+ä¸ į
+uel ess
+Ġp as
+Ġmor p
+Ġfl oral
+Ex press
+ud ging
+k B
+ĠGr anted
+Ø ¯
+ĠMich a
+ĠGoth ic
+ĠSPEC IAL
+ĠRic ardo
+F ran
+Ġadminister ing
+6 20
+por a
+ĠÂ ®
+Ġcomprom ises
+Ġb itten
+Ac cept
+Th irty
+Ð ²
+Ġmater ially
+ĠTer r
+ig matic
+ch ains
+Ġdo ve
+stad t
+Mar vel
+FA ULT
+Ġwind shield
+Ġ3 36
+ad ier
+Ġsw apping
+Ġflaw less
+ĠPred ator
+ĠMiche le
+Ġprop ulsion
+ĠPsych ic
+Ġassign ing
+Ġfabric ation
+Ġbar ley
+l ust
+Ġtow ering
+Ġalter cation
+ĠBent ley
+Sp here
+Ġtun a
+ĠClass es
+Fre edom
+un er
+L ady
+v oice
+Ġcool est
+or r
+Ġpal p
+$ {
+Ġhyster ia
+ĠMet atron
+p ants
+Ġspawn ing
+Exper ts
+ĠInvest ors
+ĠAn archy
+Ġshr unk
+ĠVict im
+Ġ28 9
+Ġec stasy
+ĠB inding
+58 5
+ĠMel ody
+57 8
+ot ally
+ĠE tsy
+lig a
+Ġapplaud ed
+Ġswe ating
+Ġredist ributed
+Ġpop corn
+Ġsem inal
+f ur
+ĠNeuro science
+R and
+ĠO st
+ĠMadd en
+ĠIncre asing
+ĠDaw kins
+ĠSub way
+Ġar sen
+cons erv
+B UR
+Ġsp iked
+ĠLy ft
+ĠImper ium
+ĠDrop box
+Ġfav oured
+Ġencomp asses
+gh ost
+Ġins pires
+Ġbur geoning
+ĠY oshi
+ĠVert ical
+ĠAud itor
+Ġint ending
+Ġfilib uster
+Bl oom
+f ac
+ĠCav s
+ign ing
+Ġcowork ers
+ĠBarb arian
+rem ember
+FL AG
+Ġaudit ory
+ason ry
+Col lege
+Ġmut ed
+gem ony
+ob in
+ĠPsych o
+9 68
+Ġlav ish
+Ġhierarch ical
+ĠDr one
+ou k
+Ġcripp led
+ĠMax im
+Sl ot
+Ġqu iz
+ĠV id
+if ling
+Ġarchae ologists
+Ġabandon ment
+d ial
+le on
+ĠF as
+T ed
+Ġr aspberry
+Ġmaneu vers
+Ġbehavi ours
+Ġins ure
+Ġrem od
+Sw itch
+h oe
+Ġsp aced
+Ġafford ability
+ĠF ern
+not ation
+ĠBal anced
+Ġoccup ies
+en vironment
+Ġneck lace
+Ġsed an
+F U
+ĠBrav o
+Ġab users
+ĠAn ita
+met adata
+ĠG ithub
+ait o
+ĠF aster
+ĠWass erman
+ĠF lesh
+Ġth orn
+r arily
+ĠMer ry
+w ine
+Ġpopul ace
+ĠL ann
+Ġrepair ing
+Ġpsy che
+Ġmod ulation
+aw aru
+âĢĭ âĢĭ
+ari j
+Ġdecor ations
+Ġapolog ise
+ĠG arg
+app ly
+Ġgive away
+ĠFl an
+ĠWy att
+U ber
+Ġauthor ised
+ĠMor al
+HAHA HAHA
+activ ate
+Ġtorped o
+ĠF AR
+Ġam assed
+ĠA ram
+ark in
+ĠVict ims
+st ab
+Ġo m
+ĠE CO
+Ġopio ids
+Ġpurpose ly
+ĠV est
+Ġer g
+at an
+ĠSur gery
+Ġcorrect ing
+ĠOrt iz
+ĠBe et
+Ġrev oke
+Ġfre eway
+ĠH iggins
+F ail
+ĠFar ms
+ĠAT P
+h ound
+Ġp oking
+ĠCommun ists
+mon ster
+iment ary
+Ġunlock ing
+Ġunf it
+we ed
+en ario
+at ical
+ĠEnlight enment
+ĠN G
+ĠComp ensation
+de en
+ĠWid ow
+ĠCind y
+ĠAfter wards
+Ġ6 000
+ikh ail
+ag ically
+Ġrat ified
+Ġcasual ty
+H OME
+p sey
+f ee
+Ġspark ling
+Ġd Ã©
+Ġconcert ed
+C atal
+Ġcomp lying
+ĠA res
+ĠD ent
+Sh ut
+Ġsk im
+ad minist
+Ġhost ilities
+ĠG ins
+Ġ6 08
+Ġm uddy
+ĠMc Int
+ĠDec ay
+5 25
+Ġconspic uous
+ĠEx posure
+Ġresc ind
+Ġwear able
+Ġ3 28
+our met
+ah s
+ĠRob ots
+Ġe clips
+inst ance
+ĠRE PORT
+ĠApp l
+0 30
+ĠSk ies
+01 00
+Ġfall acy
+S ocket
+ĠRece iver
+Ġsol ves
+ĠButter fly
+ĠSho pping
+ĠFI RE
+65 4
+Med ic
+Ġsing ers
+ĠNeed less
+'' ''
+isher s
+ĠD ive
+58 8
+Ġselect ively
+Ġcl umsy
+88 9
+Ġpurch aser
+ear ned
+ard y
+Ġbenef iting
+eng lish
+Ġyield ing
+ĠP our
+Ġspin ach
+Ġdel ve
+ĠC rom
+6 10
+Ġexport ing
+ĠMA KE
+Ġ26 3
+Ġg rop
+Ġenv oy
+ĠInqu iry
+ĠLu igi
+d ry
+ĠT uring
+Thumbnail Image
+ĠVar iety
+Ġfac et
+Ġfl uffy
+Ġexcerpt s
+Ġsh orth
+ĠOl sen
+CL UD
+Ġrel iant
+ĠUN C
+T our
+Ġbat hing
+Comp any
+Ġglobal ization
+P red
+ĠMalf oy
+Ġh oc
+j am
+craft ed
+ĠBond s
+ĠKiss inger
+Eng land
+Ġorder ly
+cat entry
+Ġ26 1
+Ġexch anging
+ĠInt ent
+ĠAmend ments
+D OM
+Ġst out
+ÂłÂłÂłÂłÂłÂłÂłÂł ÂłÂłÂłÂłÂłÂłÂłÂł
+ĠAir bus
+Ġ27 8
+hy de
+P oll
+Item ThumbnailImage
+Ġlooph oles
+ĠPill ar
+Ġexpl or
+St retch
+A part
+Ġun married
+Lim it
+ĠTransform ers
+Ġintellect ually
+unct ure
+18 00
+Ġd arn
+B razil
+Ġleft over
+ber us
+f red
+Mine craft
+3 26
+ĠForm s
+Ġproof s
+ĠDes igned
+Ġindex es
+ĠSupp ose
+EM S
+ĠL oving
+ĠBon nie
+im ating
+OT US
+Ġconduct or
+Ġbehav ed
+ĠF ren
+Ġsy nerg
+Ġmillenn ium
+Ġcater ing
+ĠL auder
+W r
+ĠY iannopoulos
+ĠAT F
+Ġensl aved
+Ġawaken ed
+D VD
+ĠED ITION
+ĠConc ert
+ĠChall enger
+ĠH aku
+umer ic
+Ġdep recated
+ĠSH AR
+4 12
+Ġdy stop
+Ġtremb ling
+Ġdread ed
+ĠSp ac
+p adding
+Re pl
+ĠG arrison
+M ini
+Ġun paralleled
+am ar
+URR ENT
+w reck
+c ertain
+t al
+ĠC LS
+app ings
+Ġsens ed
+Ġf encing
+ĠPas o
+ĠDes k
+Ġsc off
+Ġcontem plate
+ĠL iga
+l iquid
+75 7
+Ġapp rentice
+ĠUCH IJ
+5 70
+ĠTh ousand
+ĠIll um
+Ġchampion ed
+ãĤ Į
+Ġelect ors
+Ġ3 98
+ĠH ancock
+round ed
+ĠJ OHN
+Ġuns atisf
+Ġqual ifier
+ĠGad get
+EN E
+Ġdead liest
+ĠPl ants
+Ġ ions
+Ġacc ents
+Ġtwe aking
+Ġsh aved
+F REE
+ĠCh aser
+Again st
+9 60
+Ġmeth amphetamine
+Ġnormal ized
+Ġ$ \
+ĠPre cision
+ĠGu am
+Ġch oked
+ĠX II
+ĠCast ing
+Tor rent
+Ġscal p
+ĠJagu ar
+w it
+Ġsem ic
+ix ie
+ĠG ould
+Ġconf ines
+N usra
+ĠL on
+ĠJ ugg
+y cle
+ĠCod ec
+E gypt
+Ġrest rain
+ĠAl iens
+Ġch oking
+ĠD unk
+ĠBell a
+ab c
+Ġsl ang
+Ġneuro trans
+s av
+Ġempower ment
+â ĨĴ
+Ġclim bers
+ĠM im
+ĠF ra
+ros se
+Cap ital
+ĠCth ulhu
+Inter face
+Ġprof icient
+ĠIN TO
+Ġ3 18
+ront al
+5 80
+ĠDes pair
+K enn
+Ġscrim mage
+ĠCo at
+as ions
+Ġwall paper
+ĠJ ol
+Ġresurg ence
+Ġant iv
+ĠB alls
+² ¾
+Ġbuff ers
+Ġsub system
+ĠSt ellar
+ĠL ung
+A IDS
+Ġerad icate
+Ġblat antly
+Ġbehav es
+ĠN un
+Ġant ics
+ex port
+DE V
+w b
+Ġph p
+ĠInteg rity
+Ġexplore r
+Ġrev olving
+auth ored
+g ans
+Ġbas k
+Ġas ynchronous
+å į
+TH ING
+69 8
+G ene
+ĠR acer
+ĠN ico
+iss ued
+Ġser mon
+p ossibly
+Ġsize of
+Ġentrepreneur ial
+ox in
+ĠMin erva
+Ġpl atoon
+n os
+ri ks
+A UT
+ĠAval anche
+ĠDes c
+ĳ å£«
+ĠP oc
+Ġconf erred
+Î »
+Ġpat ched
+F BI
+66 2
+Ġfract ures
+Ġdetect s
+Ġded icate
+Ġconstitu ent
+Ġcos mos
+W T
+Ġswe ats
+Ġspr ung
+b ara
+s olid
+Ġuns us
+Ġbul ky
+ĠPhilipp e
+ĠFen rir
+Ġtherap ists
+ore al
+^^ ^^
+Ġtotal ed
+Ġboo ze
+ĠR PC
+Prosecut ors
+Ġdis eng
+ĠSh ared
+Ġmotor cycles
+Ġinvent ions
+Ġlett uce
+ĠMer ge
+ĠJ C
+Ġspiritual ity
+ĠWAR NING
+Ġunl ucky
+ĠT ess
+Ġtong ues
+ĠD UI
+T umblr
+Ġle ans
+Ġinv aders
+Ġcan opy
+ĠHur ricanes
+ĠB ret
+ĠAP PLIC
+id ine
+ick le
+Reg arding
+Ġve ggies
+Ġe jac
+ju ven
+F ish
+D EM
+ĠD ino
+Th row
+ĠCheck ing
+be ard
+( &
+Ġj ails
+Ġh r
+trans fer
+iv ating
+Ġfle ets
+ĠIm ag
+ĠMc Donnell
+Ġsnipp et
+Is a
+ĠCh att
+ĠSt ain
+ĠSet FontSize
+ĠO y
+ĠMathemat ics
+49 4
+Ġelectro ly
+ĠG ott
+ĠBr as
+B OOK
+ĠF inger
+d ump
+Ġmut ants
+Ġrent als
+Ġinter tw
+Ġc reek
+ail a
+Bro ther
+ĠDisc ord
+pe e
+raw ler
+Ġcar p
+Ġ27 9
+ãĤ· ãĥ£
+rel ations
+Ġcontr asts
+Col umn
+Ġrec onnaissance
+Ġun know
+Ġl ooting
+Ġregul ates
+Ġopt imum
+ĠChero kee
+ĠA ry
+Lat est
+Ġroad side
+Ġd anced
+ĠUnic orn
+A cknowled
+Ġuncont roll
+ĠM US
+at io
+ch ance
+ha ven
+VAL UE
+Ġfavour ites
+Ġceremon ial
+b inary
+pe ed
+wood s
+EM P
+Ġv ascular
+Ġcontempl ated
+Ġbar ren
+ĠL IST
+Y ellow
+ospons ors
+Ġwhisk y
+ĠM amm
+ĠDeV os
+min imum
+H ung
+44 2
+P ic
+ĠSnap dragon
+77 6
+Ġcar ving
+Ġund ecided
+Ġadvantage ous
+Ġpal ms
+ĠA Q
+Ġst arch
+L oop
+Ġpadd le
+Ġfl aming
+ĠHor izons
+An imation
+bo ost
+Ġprob abilities
+ĠM ish
+Ġex odus
+ĠEditor ial
+Ġfung us
+Ġdissent ing
+ĠDel icious
+rog ram
+ĠD yn
+d isk
+t om
+Ġfab rics
+ĠC ove
+ĠB ans
+Ġsoft en
+ĠCON S
+Ġin eligible
+Ġestim ating
+ĠLex ington
+pract ice
+of i
+Ġshe dding
+ĠN ope
+Ġbreat hed
+ĠCorinth ians
+y ne
+ek i
+B ull
+Ġatt aching
+reens hots
+Ġanaly se
+ĠK appa
+Ġuns ustainable
+Ġinter pol
+ank y
+he mer
+Ġprot agonists
+Ġform atted
+ĠBry ce
+ĠAch illes
+ĠAb edin
+sh ock
+Ġb um
+b os
+qu a
+ĠW arn
+q t
+ĠDi abetes
+8 64
+ĠIn visible
+Ġvan ish
+Ġtrans mitting
+Ġmur ky
+ĠFe i
+Ġawa ited
+ĠJur assic
+umm ies
+Ġmen acing
+g all
+C ath
+B uilt
+ild o
+ĠV otes
+Ġon t
+Ġmun itions
+ĠFre em
+ÃŃ n
+Ġdec ency
+lo pp
+ie ved
+ĠG ord
+Ġun thinkable
+ĠNews week
+Ġ3 21
+He at
+Ġpresent er
+ji ang
+Ġpl ank
+ĠAval on
+Ġben z
+ĠR out
+Ġslam ming
+ĠD ai
+ou ter
+ĠCook ie
+ĠAlic ia
+ge y
+Ġvan ity
+Ġow l
+á µ
+t ested
+ĠAw akens
+Ġcan v
+Ġblind ly
+ĠRid ley
+ĠEm ails
+Requ ires
+ĠSer bian
+ograp hed
+if rame
+eter ia
+Ġaltern ating
+qu iet
+Ġsoc iology
+ĠUn lock
+ĠCommun ism
+Ġo ps
+Ġatt ribution
+Ġab duction
+ĠAb ram
+Ġsidel ined
+ĠB OOK
+Ġref ining
+ĠFe eling
+ĠOs lo
+ĠPru itt
+r ack
+ang ible
+Ġcaut iously
+ĠM ARK
+eed s
+M ouse
+ĠStep h
+ĠP air
+S ab
+99 7
+ĠBa al
+B ec
+Ġcomm a
+ĠP all
+ĠG ael
+Ġmisunder stand
+ĠP esh
+Order able
+Ġdis mal
+ĠSh iny
+% "
+Ġreal istically
+Ġpat io
+ĠG w
+ĠVirt ue
+Ġexhaust ing
+wh atever
+oph ys
+y ip
+4 18
+Ad just
+ĠWa iting
+ess on
+ĠMaz da
+ĠDo zens
+Ġstream lined
+Ġincompet ence
+ĠM eth
+Ġeth os
+ON ES
+Ġincent iv
+Ġgr itty
+ĠBut cher
+Head er
+Ġexp onential
+Ã Ł
+Ġcorrel ate
+Ġcons ensual
+s ounding
+R ing
+Orig in
+Ġcon clusive
+fe et
+ac ly
+ĠF ernandez
+Buy able
+Ġd ucks
+aunt lets
+Ġel ong
+Ġ28 6
+Ġsim ul
+G as
+ĠK irst
+Ġprot r
+ĠRob o
+ĠAo E
+op ol
+Ġpsych ologically
+sp in
+ilater ally
+ĠCon rad
+W ave
+44 1
+ĠAd vertisement
+ĠHarm on
+ĠOri ental
+is Special
+Ġpresum ptive
+Ġw il
+ĠK ier
+ne a
+Ġp pm
+Ġhar bour
+ĠW ired
+comp any
+Ġcor oner
+atur days
+ĠP roud
+ĠN EXT
+ĠFl ake
+val ued
+ce iver
+Ġfra ught
+Ġc asing
+Ġrun away
+Ġg in
+ĠLaure nt
+ĠHar lem
+ĠCur iosity
+qu ished
+Ġneuro science
+ĠH ulu
+Ġborrow er
+Ġpetition er
+ĠCo oldown
+W ARD
+Ġinv oking
+conf idence
+For ward
+Ġst s
+pop ulation
+Delivery Date
+Fil m
+ĠC ov
+quick Ship
+quickShip Available
+prim ary
+isSpecial Orderable
+inventory Quantity
+channel Availability
+BO X
+ĠMulti player
+ĠJen ner
+77 8
+ĠM d
+Ġ~ /.
+M N
+Ġchild ish
+Ġantioxid ant
+ĠChrom ebook
+Ġ27 4
+Ġscreen play
+Ġadvent urous
+ĠRelations hip
+respons ive
+ming ton
+Ġcorner stone
+ĠF ey
+F IR
+Ġrook ies
+ĠF eaturing
+Ġorig inate
+Ġelectro des
+ant es
+Ġscript ures
+Ġgl ued
+Ġdiscont ent
+Ġaff licted
+lay out
+B rave
+Ġm osa
+ĠQuant ity
+ĠH ik
+w inner
+H ours
+Ġent ail
+ĠCell s
+olog ue
+Ġv il
+Ġpre acher
+Ġdecor ative
+d ifferent
+Ġprejud ices
+ĠSm oking
+ĠNotting ham
+so Type
+Ġrhyth ms
+ĠAl ph
+bl ast
+Ste el
+ĠDaniel le
+Ġstr ife
+Ġrem atch
+so DeliveryDate
+ĠF ork
+t rip
+ol ulu
+hes es
+C G
+ĠPOLIT ICO
+ost a
+ĠDr ift
+é¾įå ¥
+é¾įå¥ ĳå£«
+Ġvet ting
+ĠJin ping
+ĠRec ession
+Min or
+ĠF raud
+enf ranch
+Ġconven ed
+ĠNA ACP
+ĠMill ions
+ĠFarm ing
+ĠW oo
+ĠFl are
+rit o
+imm igrant
+Ġvac ancy
+ĠHE AD
+ĠV aj
+eg al
+ĠV igil
+Stud y
+Ġru ining
+Ġr acks
+Ġhe ater
+ĠRand olph
+ĠBr ush
+ĠT ir
+Ø ¨
+Ġc ov
+% ]
+Ġrecount s
+ĠO PT
+ĠM elt
+Ġtr uce
+Ġcas inos
+Ġcrus ade
+Ġcarn age
+Ġstri pe
+ĠK yl
+Text ures
+Ġ6 98
+Ġpro clamation
+Ġgood ies
+Ġ........ ..
+pro claimed
+P olit
+Ġtop ical
+Ġspecial ize
+ĠA min
+g m
+Ġanch ored
+Ġbear ings
+s ample
+ĠHigh land
+ĠAut ism
+Ġmerc enary
+Ġinterview er
+L ER
+ĠSom ers
+Ġembry o
+ĠAss y
+Ġ28 1
+ĠEd iting
+ĠCh osen
+6 60
+Ġp ci
+ĠThunder bolt
+BI LL
+Ġchuck led
+jri wal
+h of
+Ġearth ly
+() {
+ind ependence
+Ġdisp ers
+ĠV endor
+ĠG areth
+Ġp als
+P enn
+ĠSub mit
+ic um
+Th u
+Ġcl andestine
+Ġcann ibal
+ĠCl erk
+E Stream
+gal itarian
+âĻ ¥
+g ew
+Ġhor rend
+ĠL ov
+ĠRe action
+ocr in
+Class ic
+Ġecho ing
+Ġdiscl osing
+ĠIns ight
+og un
+ĠInc arn
+upload s
+pp erc
+guy en
+Ġ19 01
+ĠB ars
+68 7
+Ġb ribes
+ĠFres no
+ur at
+ĠRe ese
+Ġintr usive
+Ġgri pping
+ĠBlue print
+ĠR asm
+un ia
+man aged
+ĠHeb do
+Ġ3 45
+Ġdec oding
+Ġpo ets
+Ġj aws
+ĠF IGHT
+am eless
+ĠMead ows
+ĠHar baugh
+Inter view
+ĠH osp
+ĠB RA
+Ġdelet ion
+m ob
+W alker
+ĠMoon light
+ĠJ ed
+ĠSoph ia
+Ġus ur
+Ġfortun ately
+ĠPut ting
+ĠF old
+Ġsan itation
+Ġpart isans
+IS ON
+B ow
+ĠCON C
+ĠRed uced
+ĠS utton
+Ġtouch screen
+Ġembry os
+âĢ¢âĢ¢ âĢ¢âĢ¢
+ĠK rug
+com bat
+ĠPet roleum
+Ġam d
+ĠCos mos
+Ġpresc ribing
+Ġconform ity
+ours es
+Ġplent iful
+Ġdis illusion
+ĠEc ology
+itt al
+Ġf anc
+Ġassass inated
+regn ancy
+Ġperenn ial
+ĠBul lets
+Ġst ale
+Ġc ached
+ĠJud ith
+ĠDise ases
+All en
+Ġl as
+Ġsh ards
+ĠSu arez
+ĠFriend ship
+inter face
+ĠSupp orters
+add ons
+46 2
+ĠIm ran
+ĠW im
+Ġnew found
+ĠM b
+An imal
+Ġd arling
+and e
+Ġrh y
+ĠTw isted
+pos al
+yn ski
+Var ious
+× ľ
+ĠK iw
+uy omi
+Ġwell being
+ĠL au
+an os
+Ġunm ist
+Ġmac OS
+Ġrest room
+ĠOl iv
+ĠAir ways
+Ġtimet able
+9 80
+Ġrad ios
+v oy
+ias co
+Ġcloud y
+ĠDraw ing
+Any thing
+Sy ria
+ĠH ert
+st aking
+Ġun checked
+Ġb razen
+ĠN RS
+69 7
+onom ic
+est ablish
+Ġl eng
+Ġdi agonal
+ĠF ior
+L air
+ĠSt ard
+Ġdef icient
+jo ining
+be am
+Ġomn ip
+Ġbl ender
+Ġsun rise
+Mo ore
+ĠF ault
+ĠCost ume
+ĠM ub
+Fl ags
+an se
+Ġpay out
+ĠGovern ors
+ĠD illon
+ĠBan ana
+N ar
+Ġtra iled
+Ġimperial ist
+um ann
+ats uki
+4 35
+ĠRoad s
+Ġsl ur
+ĠIde ally
+Ġt renches
+C trl
+Ġmir rored
+ĠZ el
+ĠC rest
+Comp at
+ĠRoll s
+sc rib
+ĠTra ils
+omet ers
+w inter
+Ġimm ortality
+il ated
+Ġcontrad icts
+un iversal
+ill ions
+ĠM ama
+opt im
+AT URE
+Ġge o
+et ter
+ĠCar lo
+4 24
+Ġcanon ical
+ĠStrongh old
+n ear
+Ġperf ume
+Ġorche stra
+od iac
+Ġup he
+Ġreign ing
+vers ive
+Ġc aucuses
+ĠD EM
+Ġinsult ed
+Ġ---- --
+ĠCr ush
+Ġroot ing
+ĠWra ith
+Ġwh ore
+Ġto fu
+C md
+ĠB ree
+Ġ$ _
+Ġr ive
+ĠAd vertising
+Ġw att
+ĠH O
+Ġpersu asive
+ĠParam eters
+Ġobserv ational
+ĠN CT
+ĠMo j
+ĠSal on
+Ġtr unc
+Ġexqu isite
+ĠMar a
+Ġpo op
+ĠAN N
+Ex c
+ĠWonder ful
+ĠT aco
+Ġhome owner
+ĠSmith sonian
+orpor ated
+mm mm
+Ġlo af
+ĠYam ato
+ĠInd o
+Ġcl inging
+Ã¡ s
+Ġimm utable
+h ub
+Or ange
+Ġfingert ips
+ĠWood en
+ĠK idd
+ĠJ PM
+ĠDam n
+C ow
+c odes
+48 2
+Ġiniti ating
+ĠEl k
+ĠCut ting
+Ġabsent ee
+ĠV ance
+ĠLil ith
+G UI
+Ġobsc ured
+Ġdwar ves
+ĠCh op
+ĠB oko
+Val ues
+Ġmult imedia
+Ġbrew ed
+Reg ular
+CRIP TION
+ĠMort al
+Ġa pex
+Ġtravel er
+Ġbo ils
+Ġspray ing
+Rep resent
+ĠStars hip
+4 28
+Ġdisappro val
+Ġshadow y
+Ġlament ed
+ĠRe place
+ĠFran Ã§
+67 7
+d or
+Ġunst oppable
+Ġcoh orts
+gy n
+ĠClass ics
+ĠAm ph
+Ġsl uggish
+ĠAdd iction
+ĠPad res
+Ġins cription
+Ġin human
+min us
+ĠJere miah
+at ars
+Ter ror
+ĠT os
+ĠSh arma
+ast a
+c atch
+Ġpl umbing
+ĠTim bers
+Sh ar
+H al
+ĠO sc
+Ġcou pling
+hum ans
+Ġsp onge
+Ġid ols
+ĠSp a
+ĠAdv ocate
+ĠBe ats
+lu a
+Ġtick ing
+Ġload er
+ĠG ron
+8 10
+Ġstim ulated
+Ġside bar
+ĠManufact urer
+ore And
+19 73
+Ġpra ises
+ĠFl ores
+dis able
+ĠElect rical
+ra ise
+E th
+Ġmigr ated
+Ġlect urer
+K ids
+ĠCa vern
+Ġk ettle
+Ġgly c
+ĠMand ela
+ĠF ully
+å§ «
+FIN EST
+Ġsquee zing
+ĠRy der
+amp oo
+oreAnd Online
+Inst oreAndOnline
+Buyable InstoreAndOnline
+Ġcommem orate
+ĠRamp age
+Aust in
+ĠSh roud
+ĠRu ins
+9 15
+ĠK H
+Ġwater front
+ĠE SC
+b aby
+ĠC out
+ĠEm blem
+Ġequival ents
+49 2
+Un ique
+ĠNiet zsche
+brow ser
+Ġim itation
+ĠWere wolf
+ĠKir in
+ac as
+' ,"
+ĠÃ ¾
+Review ed
+Ġc unt
+Ġvo ic
+ĠLen ovo
+Ġbond ed
+48 1
+Ġinhib itors
+Ġendeav ors
+ĠHav ana
+ĠSt out
+ĠJ olly
+A ctor
+*/ (
+Ġoccur rences
+ĠT ens
+Incre ased
+ĠACT ION
+Ġ ãĢĮ
+ĠRank ings
+ĠB reat
+Ġ30 9
+D ou
+Ġimpact ing
+ĠDuc hess
+pre fix
+Q B
+Ġsummon ing
+Ġbest owed
+ĠKe pler
+ĠPOW ER
+c ube
+ĠK its
+ĠG rip
+Ġop ium
+Ġrep utable
+t oc
+ich ael
+ĠR ipple
+Ġcaf Ã©
+ĠZ oom
+ĠBur ma
+Ġwa ive
+Ġst alls
+Ġdem eanor
+inc erity
+Ġfluor ide
+ĠSH OULD
+Par is
+Ġlong ing
+Ġpl at
+Ġgross ly
+Ġbull s
+Ġshowc asing
+ex pected
+ĠG addafi
+engine ering
+Re peat
+ĠK ut
+Ġconce ivable
+Ġtrim med
+osc ope
+ĠCand idate
+ĠT ears
+rol og
+Lew is
+S UP
+Ġroad map
+Ġsal iva
+Ġtrump et
+Jim my
+Ġmirac ulous
+Ġcolon ization
+Ġam put
+ĠGN OME
+ate ch
+D ifferent
+ĠE LE
+ĠGovern ments
+ĠA head
+ãħĭ ãħĭ
+word press
+L IB
+ĠIn clude
+ĠDor othy
+0 45
+ĠColomb ian
+Ġle ased
+88 4
+Ġde grading
+ĠDa isy
+i ations
+Ġbapt ized
+Ġsurn ame
+co x
+Ġblink ed
+ãĥ ¢
+Ġpoll en
+Ġder mat
+Ġre gex
+ĠNich olson
+ĠE ater
+ç ľ
+rad or
+Ġnarrow er
+Ġhur ricanes
+Ġhalluc inations
+r idden
+ISS ION
+ĠFire fly
+Ġattain ment
+Ġnom inate
+Ġav ocado
+ĠM eredith
+Ġt s
+Ġreve rence
+Ġe uph
+Ġcr ates
+ĠT EXT
+Ġ4 43
+Ġ3 19
+J SON
+iqu ette
+Ġshort stop
+ic key
+Ġpro pelled
+Ġap i
+ĠTh ieves
+77 9
+Ġovers aw
+Ġcol i
+ĠNic ola
+Ġover cl
+ik awa
+ĠC yr
+Ġ38 4
+78 9
+ĠAll ows
+10 27
+Det roit
+TR Y
+set up
+ĠSocial ism
+Sov iet
+s usp
+ĠAP R
+ĠShut down
+Ġal uminium
+zb ek
+ĠL over
+GGGG GGGG
+Ġdemocr acies
+Ġ19 08
+ĠMer rill
+ĠFranco is
+gd ala
+Ġtraff ickers
+ĠT il
+ĠGo at
+Ġsp ed
+ĠRes erv
+Ġpro d
+55 2
+Ġc ac
+ĠUn iv
+ĠSch we
+Ġsw irling
+ĠWild erness
+ĠEgg s
+Ġsadd ened
+Ġarch aic
+H yd
+Ġexcess ively
+B RE
+Ġaer ospace
+ĠVo ices
+Cra ig
+Ġign ited
+In itially
+ĠMc A
+Ġhand set
+Ġreform ing
+Ġfrust rations
+ĠDead pool
+ĠBel ichick
+ract or
+ĠRagnar ok
+ĠD rupal
+ĠApp roximately
+19 20
+ĠHub ble
+arm or
+ĠSar as
+ĠJon as
+Ġnostalg ic
+Ġfeas ibility
+Sah aran
+Ġorb iting
+Ġ9 70
+R u
+Ġsh in
+ĠInvestig ators
+Ġinconsist encies
+ĠP AN
+B G
+Ġgraz ing
+Ġdetect ors
+ĠStart up
+ĠFun ny
+ĠNa omi
+Consider ing
+Ġh og
+ut f
+ce mic
+Ġfort ified
+ĠFun ctions
+Ġcod ec
+nut rition
+H at
+" !
+micro soft
+55 8
+ĠTh in
+ĠA CE
+Al ias
+ĠO PS
+p apers
+P K
+ãĢ İ
+Ġimpro bable
+N orthern
+equ al
+Ġlook out
+Ġty res
+ĠMod ified
+ĠK op
+Abs olutely
+Ġbuild up
+sil ver
+Ġaud i
+Ġgro tesque
+ĠSab er
+ĠPres byter
+ON Y
+Ġglac iers
+ĠSho als
+ĠK ass
+ĠH RC
+ĠNic ol
+ĠL unch
+ĠF oss
+âĸ Ĵ
+AD RA
+ĠOne Plus
+o ing
+ground s
+Ġincident al
+Ġdatas ets
+68 9
+ĠClarks on
+Ġassemb ling
+ĠCorrect ions
+Ġdrink ers
+Ġqual ifiers
+Ġle ash
+Ġunf ounded
+ĠH undred
+Ġkick off
+T i
+Ġrecon cil
+ĠGr ants
+ĠCompl iance
+ĠDexter ity
+Ġ19 06
+w arn
+D allas
+Max imum
+n ard
+av ia
+be aut
+ens itivity
+tr ace
+Ġpione ers
+ĠF ract
+ãĢ ı
+Ġpre cept
+Ġgloss y
+ĠI EEE
+Ac ross
+Ġ6 80
+S leep
+che on
+Ġsatir ical
+ĠMin otaur
+ĠCla ude
+Ġr Ã©
+ape go
+Ġcar rot
+ĠSem in
+ino a
+Ġz o
+Ind ependent
+Ġdiagn oses
+ĠC ue
+M AR
+Ġrend ition
+ĠK ik
+Ġpath ology
+Ġselect s
+Link edIn
+Ġass ay
+ĠD res
+Ġtext ual
+post ed
+IT AL
+ĠM aul
+N eal
+Ġinter connected
+Ġerr atic
+ĠVir us
+Ġ5 30
+Ġenvironmental ists
+ĠP helps
+Ġeng agements
+ĠIN ST
+Ġeconom ical
+nox ious
+Ġg earing
+izz y
+Ġfavor ably
+ĠMcG ill
+T erm
+Ġh anged
+Ġball park
+ĠRe yes
+Ġbe ware
+ĠP sal
+ĠMass acre
+q i
+Ġin accessible
+acly sm
+Ġfr ay
+ill ac
+Ġbitter ly
+ĠCert ification
+Mich igan
+Ġir respective
+al ore
+Em pty
+Ġendorse ments
+Ġund et
+f g
+equ ipped
+Ġmerc iless
+ĠC ust
+Ġimm ature
+Ġvou cher
+ĠBlack well
+Ñ ı
+h awk
+dis ciplinary
+ile e
+ĠMak oto
+ĠD ude
+ãĥĩ ãĤ£
+Y ears
+Ġin ver
+Ġsh aman
+ĠY ong
+ip el
+ell en
+ĠCath y
+br ids
+Ġs arc
+65 1
+N ear
+Ġground work
+Ġam az
+Ġ4 15
+ĠHunting ton
+hew s
+ĠB ung
+Ġarbit rarily
+ĠW it
+ĠAl berto
+Ġdis qualified
+best os
+46 1
+Ġp c
+Ġ28 4
+ro bat
+Rob in
+Ġh ugs
+ĠTrans ition
+ĠOcc asionally
+Ġ3 26
+ĠWh ilst
+ĠLe y
+Ġspaces hip
+cs v
+Ġun successfully
+ĠA u
+le ck
+ĠWing ed
+ĠGrizz lies
+. ï¿½
+Ġne arer
+ĠSorce ress
+ĠInd igo
+El se
+8 40
+let es
+Co ach
+Ġup bringing
+ĠK es
+Ġseparat ist
+Ġrac ists
+Ġch ained
+Ġabst inence
+lear ning
+Ġrein stated
+Ġsymm etry
+Ġremind ers
+ĠChe vy
+Ġm ont
+Ġexempl ary
+ĠT OR
+Z X
+Ġqual itative
+ĠSt amp
+ĠSav annah
+ĠRoss i
+Ġp aed
+Ġdispens aries
+ĠWall s
+ĠCh ronic
+Ġcompliment ary
+ĠBeir ut
+Ġ+ ---
+igs list
+Ġcrypt ographic
+mas ters
+ĠCap itals
+Ġmax imal
+Ġent ropy
+Point s
+Ġcombat ants
+l ip
+ĠGl ob
+ĠB MC
+ph ase
+th ank
+HT TP
+Ġcomm uter
+Ġ\( \
+.. /
+ĠReg ener
+ĠDO I
+ĠActiv ision
+Ġsl it
+os al
+RE M
+Ġch ants
+Y u
+Ke ys
+Bre xit
+ĠFor ced
+Ari zona
+Ġsquad ron
+IS O
+ĠMal one
+Ġ3 38
+Ġcontrast ing
+Ġt idal
+Ġlib el
+Ġimpl anted
+Ġupro ar
+ĠC ater
+Ġpropos itions
+M anchester
+ĠEuro s
+it amin
+G il
+ĠEl ven
+ĠSe ek
+ĠB ai
+Ġredevelop ment
+ĠTown s
+ĠL ub
+! ",
+al on
+K rist
+Ġmeas urable
+Ġimagin able
+Ġapost les
+Y N
+7 60
+Ġster oid
+Ġspecific ity
+ĠL ocated
+ĠBeck er
+ĠE du
+ĠDiet ary
+uts ch
+ĠMar ilyn
+Ġbl ister
+ĠM EP
+ĠK oz
+ĠC MS
+y ahoo
+ĠCar ney
+Ġbo asting
+ĠC aleb
+By te
+read s
+ad en
+Pro blem
+ĠWood ward
+S we
+S up
+ĠK GB
+Set up
+Ġtac it
+Ġret ribution
+Ġd ues
+ĠM Ã¼
+. ?
+ä¸ Ń
+p ots
+Ġcame o
+ĠP AL
+educ ation
+A my
+like ly
+g ling
+Ġconstitution ally
+ĠHam m
+ĠSpe ak
+Ġwid gets
+br ate
+Ġcra ppy
+ĠI ter
+Ġanticip ating
+ĠB out
+P ixel
+ĠY ep
+ĠLaur ie
+Ġh ut
+Ġbullet in
+ĠSal vation
+Ġch ats
+ear able
+Honest ly
+AL TH
+onse qu
+c ult
+isco very
+ovy ch
+Ġse lves
+ĠSat oshi
+S ounds
+Ġconver gence
+ĠRosen berg
+19 74
+Ġnas al
+Ġfull est
+Ġfer ocious
+x us
+ist e
+AM S
+Ġlobb ied
+Ġso othing
+ĠGun n
+t oday
+0 24
+Ġinspir ational
+ĠN BN
+p b
+g ewater
+or ah
+all owed
+ĠCol iseum
+Ġspecial izing
+Ġinsane ly
+ĠT ape
+del ay
+Ġt arn
+ĠP ound
+Ġmel anch
+Ġdeploy ments
+il and
+Ġless en
+Ġfur ry
+ĠUE FA
+Ġblood shed
+ĠMe ier
+ither ing
+Ġhe irs
+ĠJ aw
+ax ter
+ĠPublic ations
+Ġal ters
+int ention
+ĠWinc hester
+d etermination
+ĠLif etime
+th in
+Mon ster
+7 80
+Ġapprox imation
+Ġsuper markets
+ĠSecond s
+or os
+h uge
+Ġb ribe
+ĠLIM ITED
+un ed
+Ġmis interpret
+ĠIn jury
+Ġ3 67
+Ġthreshold s
+ĠCarn ival
+Ġgastro intestinal
+Ġguid eline
+Ġde ceived
+f eatures
+Ġpurported ly
+ĠRon nie
+ĠNew t
+Ġsp acious
+as us
+Ġsuperhero es
+ĠCyn thia
+le gged
+k amp
+ch io
+Ġth umbnail
+ĠShir ley
+ill ation
+Ġshe ds
+ĠZ y
+E PA
+Ġdam s
+Ġy awn
+n ah
+ĠPe ggy
+ĠE rie
+ĠJu ventus
+ĠF ountain
+r x
+don ald
+al bum
+ĠComp rehensive
+Ġc aching
+ĠU z
+ulner ability
+ĠPrinc iple
+ĠJ ian
+ing ers
+cast s
+ĠOs iris
+ch art
+t ile
+ĠTiff any
+ĠPatt on
+ĠWh ip
+Ġovers ized
+J e
+ĠCind erella
+ĠB orders
+ĠDa esh
+M ah
+Ġdog ma
+Ġcommun ists
+v u
+Coun cil
+Ġfresh water
+Ġw ounding
+Ġdeb acle
+Ġyoung ster
+Ġthread ed
+ĠB ots
+ĠSav ings
+ãģ Ĥ
+ol ing
+oh o
+Ġillum ination
+M RI
+Ġlo osen
+tr ump
+ag ency
+ur ion
+Ġmoment arily
+ĠCh un
+ĠBud apest
+ĠAl ley
+D isk
+Ġaston ished
+ĠCon quer
+ĠAccount ing
+h aving
+ĠWe in
+ĠAl right
+Ġrev olver
+Ġdel usion
+Ġrelic s
+Ġad herent
+qu ant
+Ġhand made
+or io
+Ġcomb ating
+c oded
+Ġquad ru
+re th
+N ik
+ĠTrib al
+ĠMyster ious
+Ġin hal
+ĠWin ning
+ĠClass ification
+ch anged
+Ġun ab
+Ġsc orn
+icip ated
+w l
+ond uctor
+Ġrein forcing
+ĠChild hood
+an ova
+Ġadventure r
+Ġdoctor al
+ĠStrateg ies
+Ġengulf ed
+ĠEnc ounter
+Ġl ashes
+Crit ical
+ric ular
+ĠU TF
+oci ation
+check ing
+ĠConsult ing
+Run time
+per iod
+ĠAs gard
+Ġdist illed
+ĠPas adena
+ĠD ying
+ĠCOUN TY
+Ġgran ite
+Ġsm ack
+Ġparach ute
+ĠS UR
+Virgin ia
+ĠF urious
+78 7
+ĠO kin
+Ġcam el
+ĠM bps
+19 72
+ĠCh ao
+ĠC yan
+j oice
+ef er
+ĠW rap
+ĠDeb ate
+S eg
+Ġfore arm
+ĠIgn ore
+Ġtim estamp
+Ġprob ing
+ĠNo on
+ĠGra il
+f en
+Ġdorm ant
+ĠFirst ly
+ĠE ighth
+ĠH UN
+ĠDes ire
+or as
+Girl s
+ĠDes mond
+z ar
+am ines
+O AD
+exec ute
+Ġbo obs
+ĠAT L
+_ (
+Chel sea
+Ġmasturb ation
+ĠCo C
+Ġdestroy er
+ĠCh omsky
+Ġsc atter
+ĠAss ets
+79 6
+ĠC argo
+Ġrecept ive
+ĠSc ope
+Ġmarket ers
+Ġlaun chers
+Ġax le
+ĠSE A
+se q
+ĠM off
+f inding
+ĠGib bs
+Georg ia
+extreme ly
+N J
+Ġlab orers
+st als
+Ġmed iation
+ĠH edge
+at own
+Ġi od
+des pite
+v ill
+J ane
+ex istence
+Ġcoinc ided
+ĠUt ilities
+ĠChe ap
+Ġlog istical
+Ġcul mination
+ĠNic otine
+p ak
+F older
+Ġrod ents
+st uff
+Ġlaw fully
+Ġreper to
+io ch
+j j
+Dial ogue
+HH HH
+lic tion
+Look s
+Ġ29 7
+Ġtur rets
+ĠAb andon
+Ġinc ess
+ĠTraff ord
+Ġcur led
+Ġprefer ring
+Ġprivat ization
+Ġir resist
+ĠP anda
+ĠSh ake
+ĠMc Gr
+ãĥ Ħ
+und ers
+Ġdiscrim inated
+Ġbart ender
+I LE
+Atl antic
+Ġprop ensity
+ĠW iz
+ĠG im
+con ference
+Ġrein forces
+G h
+w agon
+Ġe erie
+F al
+Ġhug ged
+rac ist
+R IC
+F u
+Ġf iller
+ĠSt ub
+Ġeng raved
+ĠWrest le
+Ġimagin ative
+ĠPe er
+ĠFact ors
+an us
+ĠDrac ula
+mon itor
+Ġrou ters
+ib ia
+ĠBoo lean
+end ale
+ĠSl aughter
+ĠSh ack
+R FC
+ĠSpiel berg
+S ax
+ĠPH OTO
+ĠCl over
+ĠR ae
+Dep ending
+ĠMem or
+ar am
+Ġpier ced
+Ġcur tains
+v ale
+ĠInqu isition
+ĠP oke
+Ġforecast ing
+Ġcompl ains
+S ense
+ĠHer mes
+isc overed
+Ġb ible
+ĠMor ph
+Ġg erm
+78 5
+D ON
+Ġcon gen
+Ġcr ane
+ĠD PR
+Ġrespect fully
+R oom
+ĠN aw
+ĠDal ai
+re ason
+ĠAng us
+Educ ation
+ĠTitan ic
+Ë ľ
+Ġo val
+un ited
+Ġthird s
+Ġmoist ur
+ĠC PC
+M iami
+Ġtent acles
+ĠPol aris
+ex c
+ex clusive
+ĠPra irie
+Ġcol ossal
+ĠBl end
+sur prisingly
+ÃŃ s
+Ġindo ctr
+Ġbas al
+ĠMP EG
+und o
+Spl it
+Develop ment
+Ġlan tern
+19 71
+Ġprov ocation
+Ġang uish
+ĠB ind
+ĠLe ia
+duc ers
+ipp y
+conserv ancy
+Ġinitial ize
+ĠTw ice
+ĠSu k
+Ġpred ic
+Ġdi ploma
+Ġsoc iop
+Ing redients
+Ġhamm ered
+ĠIr ma
+Q aida
+Ġglim ps
+ĠB ian
+Ġst acking
+Ġf end
+gov track
+Ġun n
+dem ocratic
+ig ree
+Ġ5 80
+Ġ29 4
+Ġstraw berry
+ID ER
+Ġcher ished
+ĠH ots
+Ġinfer red
+Ġ8 08
+ĠS ocrates
+O regon
+ĠR oses
+ĠFO IA
+Ġins ensitive
+Ġ40 8
+Recomm end
+ĠSh ine
+Ġpain staking
+UG E
+ĠHell er
+ĠEnter prises
+I OR
+ad j
+N RS
+L G
+Ġalien ated
+Ġacknowled gement
+ĠA UD
+ĠRen eg
+Ġvou chers
+Ġ9 60
+Ġm oot
+ĠDim ensions
+Ġc abbage
+B right
+g at
+ĠK lu
+Ġlat ent
+Ġz e
+ĠM eng
+Ġdis perse
+Ġpand emonium
+H Q
+Ġvirt uous
+ĠLoc ations
+ee per
+prov ided
+Ġse ams
+ĠW T
+iz o
+PR OV
+Ġtit anium
+Ġrecol lection
+Ġcr an
+Ġ7 80
+ĠN F
+49 1
+64 2
+p acking
+59 8
+text ure
+Sp ider
+fre edom
+cipl ed
+ĠTAM ADRA
+âĻ ¦
+aut hent
+ĠW ANT
+r ified
+Ġr ites
+Ġuter us
+k iss
+Ġâī ¤
+Ġsk illet
+Ġdis enfranch
+ĠGa al
+Comp an
+Ġage ing
+gu ide
+B alt
+Ġiter ator
+Ġdiscretion ary
+t ips
+Ġprim ates
+ĠTechn ique
+ĠPay ments
+az el
+ĠR OCK
+stant ial
+0 60
+Ġd mg
+ĠJack ets
+ĠPlay off
+Ġnurs ery
+ĠSy mb
+art on
+Ġannex ation
+Color ado
+Ġco ils
+ĠSh oes
+âĦ¢ :
+ĠRo z
+COM PLE
+ĠEve rest
+ĠTri umph
+J oy
+G rid
+à ¼
+process or
+ĠPros per
+ĠSever us
+ĠSelect ed
+r g
+ĠTay yip
+St ra
+Ġski ing
+Ġ? )
+Ġpe g
+Tes la
+Ġtime frame
+Ġmaster mind
+ĠN B
+scient ific
+ĠSh it
+gener ic
+IN TER
+N UM
+Ġst roll
+ĠEn ix
+ĠM MR
+ĠE MS
+m ovie
+Ĥ ª
+Ġminim izing
+idd ling
+Ġilleg itimate
+Ġprot otyp
+Ġpremature ly
+Ġmanual s
+obb ies
+ĠCass idy
+D EC
+des ktop
+Ġaer os
+Ġscreen ings
+Ġdeb ilitating
+ĠGr ind
+nature conservancy
+Ġf ades
+ter mination
+assets adobe
+F actor
+Ġdefinitive ly
+P okÃ©
+ap ult
+ĠLaf ayette
+C orn
+ĠCor al
+Ġstagn ant
+T ue
+Ġdissatisf action
+G ender
+Ġkid neys
+ĠG ow
+ĠDef eat
+ĠAsh ton
+Ġcart els
+Ġfore closure
+ĠExpl ore
+stre ngth
+ot in
+Ġveterin arian
+Ġf umble
+Ġpar ap
+ĠSt rait
+r ils
+Ġpr ick
+ĠBerm uda
+ĠAm munition
+skin ned
+Ġab ound
+ĠB raz
+Ġshar per
+ĠAsc ension
+Ġ9 78
+Ġpreview s
+Ġcommun ion
+ĠX Y
+Ġph ony
+Ġnewcom er
+Ġ3 32
+." ,"
+Ġredist ribution
+Prot ect
+ĠSo f
+K al
+Ġlip stick
+w orst
+Ġtang led
+Ġretrospect ive
+int eger
+Ġvolunte ering
+Ġ19 07
+Ġ --------------------
+ic hen
+Ġunve iling
+Ġsen seless
+Ġfisher ies
+\ -
+Ġh inges
+Ġcalcul us
+My th
+Ġund efeated
+Ġoptim izations
+Ġdep ress
+Ġbill board
+ĠY ad
+ĠPy ramid
+Is n
+I de
+Ġleg ion
+ĠK ramer
+ent anyl
+Ġpenet rating
+ĠHaw th
+ĠPR ODUCT
+ĠGer ard
+ĠP act
+ĠIn cluding
+ĠEl ias
+ĠEl aine
+vis ual
+Ġhum ming
+Ġcond esc
+ĠF asc
+ä¸ Ĭ
+Ġe galitarian
+Ġdev s
+ĠD ahl
+O ps
+D H
+ĠB ounce
+id ated
+ald o
+Ġrepublic an
+Ġh amb
+ĠS ett
+ograph ies
+CH APTER
+Ġtrans sexual
+Ġsky rocket
+ans wer
+Ġmark up
+Ø ª
+Ġhero ine
+Comp are
+ĠT av
+Be ast
+Ġsuccess ors
+Ġna Ã¯ve
+ĠBuck ley
+st ress
+me at
+Ġdownload able
+Ġindex ed
+Ġsc aff
+ĠL ump
+ĠHom o
+Stud io
+In sp
+Ġr acked
+far ious
+ĠPet ty
+Ex ternal
+Ġ19 09
+W ars
+com mit
+put ers
+Ġun ob
+ĠEr r
+ĠE G
+ĠAl am
+ĠSiber ia
+ĠAtmosp heric
+IS TER
+ĠSatan ic
+trans lation
+ĠL oud
+tra umatic
+l ique
+Ġreson ate
+ĠWel ch
+Ġspark ing
+ĠT OM
+t one
+Ġout l
+Ġhandc uffed
+ĠSer ie
+8 01
+Ġland marks
+ĠRee ves
+Ġsoft ened
+Ġdazz ling
+ĠW anted
+month s
+Mag ikarp
+Ġunt reated
+ĠBed ford
+M i
+ĠDynam o
+O re
+79 5
+Ġwrong ful
+Ġl ured
+Ġcort isol
+Ġve x
+d rawn
+ile t
+Download ha
+ĠF action
+Ġlab yrinth
+Ġhij acked
+w aters
+er ick
+Ġsuper iors
+ĠRow ling
+ĠGu inness
+Ġt d
+99 2
+Ġune arthed
+Ġcentr if
+Ġsham eless
+P od
+ĠF ib
+Ġ icing
+Ġpredict or
+Ġ29 2
+fore station
+con struct
+C and
+@ #
+Ġag itated
+Ġre pr
+OV A
+Ġkn itting
+ĠLim a
+Ġf odder
+68 4
+ĠPerson a
+k l
+7 01
+Ġbreak up
+á ¸
+Ġapp alled
+Ġantidepress ants
+ĠSus sex
+Har ris
+ĠTher mal
+ee ee
+U pload
+Ġg ulf
+Ġdoor step
+ĠSh ank
+L U
+ĠM EN
+ĠP ond
+s orry
+Ġmis fortune
+n ance
+Ġb ona
+M ut
+Ġde graded
+ĠL OG
+ĠN ess
+an imal
+Ġa version
+und own
+Ġsupplement ed
+ĠC ups
+Ġ50 4
+Ġdep rive
+ĠSpark le
+Å Ĥ
+ĠMed itation
+auth ors
+ĠSab an
+ĠN aked
+air d
+ĠMand arin
+ĠScript ures
+ĠPerson nel
+ĠMahar ashtra
+Ġ19 03
+ĠP ai
+ĠMir age
+omb at
+Access ory
+Ġfrag mented
+T ogether
+Ġbelie vable
+ĠGl adiator
+al igned
+ĠSl ug
+M AT
+Ġconvert ible
+ĠBour bon
+amer on
+ĠRe hab
+nt ax
+Ġpowd ered
+pill ar
+Ġsm oker
+ĠMans on
+ĠB F
+5 11
+ĠGood ell
+ĠD AR
+m ud
+g art
+Ġob edient
+ĠTrans mission
+ĠDon ation
+8 80
+Ġbother ing
+Material s
+ãĤ ±
+dest roy
+Ġfore going
+Ġanarch ism
+ĠK ry
+ice ps
+Ġl ittered
+ĠSch iff
+Ġanecd otal
+un its
+Ġf ian
+ĠSt im
+ĠS OME
+ĠInv aders
+Ġbehaviour al
+ĠVent ures
+Ġsub lime
+Ġfru ition
+ĠPen alty
+Ġcorros ion
+¶ ħ
+Ġlik ened
+Ġbesie ged
+ween ey
+ĠCre ep
+Ġlinem en
+mult i
+ic ably
+ud der
+Ġvital ity
+Ġshort fall
+ĠP ants
+ap ist
+H idden
+ĠDro ps
+med ical
+Ġpron unciation
+ĠN RL
+Ġinsight ful
+J V
+ĠBe ard
+ĠCh ou
+Ġchar ms
+Ġb ins
+Ġamb assadors
+ĠS aturdays
+Ġinhib itor
+ĠFr anch
+6 01
+', '
+ĠCon or
+art ney
+ĠX peria
+g rave
+be es
+ĠProtest ants
+Ġso aking
+ĠM andal
+Ġph ased
+Ġ6 60
+Ġsc ams
+Ġbuzz ing
+ĠItal ians
+ĠLoren zo
+ĠJ A
+Ġhes itated
+Ġcl iffs
+ĠG OT
+ingu ishable
+Ġk o
+Ġinter ruption
+Z ip
+Lear ning
+Ġundersc ores
+ĠBl ink
+K u
+57 9
+ĠAut ob
+I RE
+Ġwater ing
+Ġpast ry
+8 20
+Ġvision ary
+ĠTempl ar
+awa ited
+Ġpist on
+Ġant id
+current ly
+Ġp ard
+Ġw aging
+Ġnob ility
+ĠY us
+Ġinject ing
+f aith
+ĠP ASS
+å º
+Ġret ake
+ĠPR OC
+Ġcat hedral
+b ash
+Ġwrest lers
+Ġpartner ing
+Ġn oses
+Ġ3 58
+Trans form
+am en
+Ġb outs
+ĠId eal
+ĠConstant in
+Ġse p
+ĠMon arch
+att en
+ĠPe oples
+mod ified
+Ġmor atorium
+Ġpen chant
+Ġoffensive ly
+Ġprox ies
+ok ane
+ĠTaiwan ese
+ĠP oo
+ĠH OME
+us ional
+Ġver bs
+ĠO man
+vis ory
+Ġpersu asion
+Ġmult it
+Ġsc issors
+G ay
+ow ay
+oph ysical
+l us
+gn u
+Ġap ocalyptic
+Ġabsurd ity
+Ġplay book
+Ġautobi ography
+I UM
+Ġsne aking
+ĠSim ulation
+pp s
+ell ery
+Plan et
+Ġright fully
+Ġn iece
+ĠN EC
+ĠIP O
+ĠDis closure
+lean or
+ous y
+ST ER
+Ġ28 2
+Cru z
+Ch all
+64 3
+ĠSurv ive
+ĠF atal
+ĠAm id
+ap o
+We apons
+D EN
+7 70
+ĠGreen wald
+Ġlin en
+al os
+Ġpollut ants
+ĠPCI e
+k at
+Ġp aw
+ĠK raft
+C hem
+ĠTermin ator
+Ġre incarn
+Ġ] [
+ĠSe eds
+Ġsilhou ette
+ĠSt ores
+Ġgro oming
+ĠD irection
+ĠIs abel
+ĠBr idges
+ðŁ ĳ
+E ED
+ĠM orsi
+Ġval ves
+ĠRank ed
+ĠPh arma
+ĠOrgan izations
+Ġpenet rated
+ĠRod ham
+ĠProt oss
+Ġove rest
+Ġex asper
+ĠT J
+Ġ 000000
+Ġtrick le
+Ġbour bon
+WH O
+Ġw retched
+Ġmicrosc opic
+Ġcheck list
+Ġad orned
+R oyal
+Ad minist
+ĠRet irement
+ĠHig hest
+We ather
+ile ge
+Ġincre ments
+ĠC osponsors
+Ġmas se
+ĠS inn
+r f
+Ġh ordes
+as sembly
+75 4
+ĠNat asha
+ĠTY PE
+ĠGEN ERAL
+Ġarr anging
+Ġ40 7
+l ator
+Ġg lean
+Ġdisc redited
+Ġclin icians
+UN E
+Ġachie ves
+ĠEm erson
+com plex
+= [
+Ġprincip ally
+Ġfra il
+p icked
+Ġthan king
+Ġre cl
+ĠL AST
+Ġsupp ressing
+il ic
+Ġantidepress ant
+ĠLis bon
+Ġth or
+Ġsp a
+Ġking doms
+ĠPear ce
+em o
+Ġpl ung
+Ġdiv est
+Ġ ********************************
+b is
+osp els
+ad r
+Sp irit
+hall a
+P ink
+end ez
+Ġresurrect ed
+esc ape
+ĠRosen stein
+Ġge ological
+Ġnecess ities
+Ġcarn iv
+ĠE lys
+ĠBar ney
+Ġ29 6
+dig y
+ST ON
+D OWN
+Ġmil estones
+Ġk er
+Ġdismant ling
+Ġre prim
+Ġcross ings
+19 45
+Ġpatri archy
+Ġblasp hemy
+Ġ3 59
+met ry
+ĠOb esity
+ĠDiff erences
+bl ocking
+ãĥķ ãĤ¡
+ich ita
+ĠSab ha
+ph alt
+ĠCol o
+ual a
+effic ients
+ĠMed ina
+con sole
+55 7
+ĠHann ibal
+ĠHab it
+ĠF ever
+Ġthen ce
+Ġsyn agogue
+Ġessential s
+Ġw ink
+ĠTr ader
+ID A
+ĠSp oiler
+ĠIceland ic
+ĠHay ward
+Ġpe ac
+Ġmal ice
+Ġflash back
+Ġth w
+Ġlay offs
+L iquid
+Ġtro oper
+Ġh inge
+ĠRead ers
+Ph ill
+ĠB auer
+Cre ated
+Ġaud its
+ac compan
+Ġunsus pecting
+ier a
+6666 6666
+Ġbro ch
+Ġapprehend ed
+ĠM alk
+cer ning
+ĠCod ex
+O VER
+M arsh
+ĠD eng
+ĠExp ression
+Ġdisrespect ful
+Ġasc ending
+t ests
+ĠPlaint iff
+ster y
+ĠAl ibaba
+din and
+ĠDem psey
+Applic ations
+mor al
+Ġthrough put
+Ġquar rel
+Ġm ills
+Ġhe mor
+ĠC ASE
+terror ist
+st im
+ifest yle
+ro zen
+CE PT
+Ar k
+u ci
+lect ic
+Ġirrit ating
+she ets
+A y
+Ġrede emed
+Ġhorn y
+ĠTe ach
+ĠS ear
+dem ocracy
+4 65
+ĠRest ore
+Ġstand by
+ĠP is
+iff in
+Ġsleep y
+Ġextr ater
+Ġcompl iments
+Fram eworks
+Ġinstall s
+Ġb anging
+sur face
+found land
+Ġmetaph ysical
+Ġ28 3
+oul s
+dev ices
+Ar gs
+ĠSac rifice
+ĠMcC orm
+es on
+Cons ervative
+ĠM ikhail
+see ing
+is ively
+ĠRo oms
+ĠGener ic
+Ġenthusi astically
+Ġgri pped
+Ġcomed ic
+ĠElectric ity
+Ġgu errilla
+Ġdec oration
+ĠPerspect ive
+Ġconsult ations
+Ġun amb
+Ġplag iar
+Ġmagic ian
+Ġe rection
+ĠTour ism
+or ied
+ro xy
+11 00
+T am
+Ī è
+Î ³
+× ª
+ĠPred ators
+Nit rome
+Ġtelesc opes
+project s
+Ġun protected
+Ġst ocked
+ĠEnt reprene
+nex pected
+Ġwast ewater
+V ill
+Ġint imately
+Ġi Cloud
+ĠConst able
+Ġspo of
+Ġne farious
+Ġfin s
+Ġcens or
+ĠMod es
+ĠEs per
+ar bon
+Ġinter sections
+Ġlaud ed
+Ġphys i
+Ġgener ously
+ĠThe Nitrome
+ĠTheNitrome Fan
+Ġar isen
+ĠÙ Ī
+Ġg lands
+ĠPav ilion
+ĠGu pta
+Ġuniform ly
+Ġr amps
+ri et
+ĠWH EN
+ĠVan essa
+Ġrout ed
+Ġlim p
+ĠC PI
+p ter
+int uitive
+Ġv aping
+Ġexperiment ed
+ĠOlymp us
+ĠAm on
+Ġsight ing
+Ġinfiltr ate
+ĠGentle man
+Ġsign ings
+ĠMe ow
+ĠNav igation
+che cks
+4 33
+Ġel apsed
+ĠBulg arian
+esp ie
+ĠS OM
+d uring
+Ġsp ills
+anc a
+ĠPly mouth
+M AL
+Ġdomest ically
+ĠWater gate
+ĠF AM
+k illed
+ed ited
+ĠYour self
+Ġsynchron ization
+ĠPract ices
+ST EP
+Ġgen omes
+ĠQ R
+not ice
+Ġloc ating
+z in
+Ġ3 29
+al cohol
+Ġk itten
+V o
+Ġr inse
+Ġgrapp le
+ĠSc rew
+ĠD ul
+A IR
+Ġle asing
+ĠCaf Ã©
+Ġro ses
+ĠRes pect
+Ġmis lead
+Ġperfect ed
+Ġnud ity
+Ġnon partisan
+ĠCons umption
+Report ing
+Ġnu ances
+Ġdeduct ible
+ĠSh ots
+Ġ3 77
+Ġæ ľ
+ano oga
+Ben ef
+ĠB am
+ĠS amp
+if ix
+Ġgal van
+ĠMed als
+rad ius
+Ġno bles
+Ġe aves
+igr ate
+K T
+ĠHar bour
+u ers
+Ġrisk ed
+re q
+Ġneuro t
+get table
+ain a
+Rom ney
+Ġunder pin
+Ġlo ft
+ĠSub committee
+ĠMong ol
+b iz
+Ġmanif ests
+ass isted
+ĠG aga
+Ġsy nergy
+Ġreligious ly
+ĠPre f
+ĠG erry
+T AG
+ĠCho i
+4 66
+beh ind
+ĠO u
+Gold Magikarp
+Ġhemor rh
+R iver
+Ġtend on
+Ġinj ure
+ĠF iona
+Ġp ag
+Ġag itation
+|| ||
+ur an
+ĠE SA
+Ġest eem
+Ġdod ging
+Ġ4 12
+r ss
+Ġce ases
+ex cluding
+Ġint akes
+Ġinsert s
+Ġemb old
+ĠO ral
+up uncture
+4 11
+ĠUn ified
+ĠDe le
+Ġfurn ace
+ĠCoy otes
+ĠBr ach
+L abor
+Ġhand shake
+Ġbru ises
+Gr ade
+éĹ ĺ
+ĠGram my
+ile en
+St ates
+ĠScandinav ian
+ĠKard ash
+8 66
+Ġeffort lessly
+ĠDI RECT
+ĠTH EN
+ĠMe i
+ert ation
+19 68
+Ġgro in
+w itch
+Requ irements
+98 5
+Ġroof s
+Ġest ates
+ĠH F
+Ġha ha
+Ġdense ly
+ĠO CT
+Ġpl astics
+Ġincident ally
+ĠTr acks
+ĠTax es
+Ġch anted
+Ġforce ful
+ĠBie ber
+ĠK ahn
+K ent
+ĠC ot
+lic ts
+F ed
+Ġhide ous
+ĠVer d
+ĠSynd icate
+ĠIl legal
+J et
+ĠD AV
+re asonable
+c rew
+Ġfundamental ist
+Ġtruth ful
+ĠJ ing
+Ġl il
+Ġdown ed
+Ġen chanted
+ĠPolic ies
+ĠMcM aster
+ĠH are
+ides how
+Ġpar ams
+en cers
+gorith m
+Ġallow ances
+Ġturb ulent
+Ġcomplex ities
+ĠK T
+Ġ3 37
+ĠGen etic
+F UN
+D oug
+t ick
+Ġg igs
+ument hal
+Ġpatriarch al
+Ġcal c
+, ...
+Ġc out
+ĠGu an
+Ġpath ological
+ĠR ivals
+Ġunder rated
+Ġflu orescent
+ĠJ iu
+arna ev
+ĠQu an
+Ġ4 29
+Ġ à¨
+M ario
+Con struct
+ĠC itation
+ĠR acial
+ĠR SA
+ĠF idel
+Ġ3 95
+Person ally
+C ause
+Ã »
+rad ical
+in en
+Ġvehement ly
+ĠPap a
+Ġintern ship
+Ġfl akes
+ĠRe ck
+Luck ily
+B ra
+20 20
+rav ings
+R N
+W onder
+Ser iously
+Ġre usable
+Ġpoll uted
+ĠP eng
+le igh
+ind le
+Ġcircuit ry
+ĠMad onna
+ĠB ART
+Res idents
+att ribute
+Phil adelphia
+Cl ub
+Ġplan ner
+Ġfr antically
+Ġfaith fully
+ĠTerrit ories
+ĠL AT
+ĠAnders en
+an u
+ĠP ARK
+ĠS ora
+i age
+ĠPlay offs
+ĠG CC
+4 27
+Ġab norm
+ĠL ever
+Ġdisob edience
+As ync
+ĠShe a
+V ert
+Ġsk irts
+ĠSaw yer
+x p
+Ġwors ening
+Ġsc apego
+ĠAng le
+oth al
+Ġtro ve
+ĠSt y
+ĠN guyen
+mar ine
+ide on
+Dep ths
+Bl og
+ĠIll uminati
+Ġtract s
+Ġorgan ise
+Ġo str
+F s
+Ġlever aging
+ĠD aredevil
+as ar
+Ġl ang
+Ġex termin
+urs ions
+ĠRom o
+ãĤ¤ ãĥĪ
+Ġcont ended
+Ġencounter ing
+ĠTable t
+ĠAltern ate
+sk ill
+Ġswe ets
+Ġco hesive
+cap acity
+Ġrep ud
+Ġl izard
+ro o
+Ġpilgr ims
+ĠR uff
+ĠInstr ument
+ĠLog o
+uit ous
+E H
+Ġsales man
+Ġank les
+L ed
+ĠPat ty
+ud os
+Own er
+Ġdiscrep ancies
+k j
+M U
+Ġuncond itional
+Dragon Magazine
+i ard
+O ak
+ĠConvers ation
+be er
+ĠOs aka
+D elta
+us ky
+Ġsecret ion
+Ġpl aza
+Ġm ing
+Ġde pletion
+ĠM ous
+ĠI TS
+ĠH imal
+ĠFle ming
+Ġcyt ok
+ĠH ick
+Ġbat ters
+ĠInt ellectual
+6 75
+Ã© r
+IS ION
+ĠQu entin
+ĠCh apters
+ih adi
+Ġco aster
+WAY S
+ĠL izard
+ĠY or
+and ering
+S kin
+ha ust
+ab by
+Ġportray ing
+Ġwield ed
+d ash
+Ġprop onent
+Ġr ipple
+Ġgrap hene
+Ġfly er
+Ġrec urrent
+Ġdev ils
+Ġwater fall
+æĺ ¯
+go o
+Text Color
+Ġtam pering
+IV ES
+TR UMP
+ĠAb el
+ĠS AL
+ĠHend ricks
+ĠLu cius
+b ots
+Ġ40 96
+IST ORY
+Gu est
+ĠN X
+in ant
+Ben z
+ĠLoad ed
+ĠCle ver
+t reatment
+Ġta vern
+Ġ3 39
+ĠT NT
+ific antly
+Tem perature
+F el
+Ġunder world
+ĠJud ges
+Ġ< +
+Ġst ump
+Ġoccup ancy
+Ġab er
+ĠF inder
+) ",
+ĠN unes
+res et
+in et
+ect omy
+Ġwell ness
+ĠP eb
+quart ered
+and an
+Ġneg atives
+ĠTh iel
+ĠCl ip
+ĠL TD
+Ġbl ight
+Ġreperto ire
+K yle
+Ġqu er
+ĠC es
+Ġha pl
+98 9
+ĠTh ames
+isc opal
+Des k
+ivari ate
+ĠEx cellence
+found ation
+Ġâ ĩ
+X i
+Ġmyster iously
+esty les
+Ġper ish
+ĠEng els
+ĠDE AD
+09 0
+}} }
+ĠUn real
+Ġrest less
+ID ES
+orth odox
+ĠInter mediate
+Ġdin ners
+ĠTr out
+ĠSe ym
+ĠHall s
+og ged
+Ġtraged ies
+Ġdid nt
+67 6
+Ġail ments
+Ġobserv able
+ĠV ide
+ad apt
+ĠD usk
+Ġprofessional ism
+ĠPres cott
+ĠInd ies
+p ox
+ĠMe hran
+W ide
+Ġend emic
+ĠPar an
+B ird
+Ġped als
+ĠI U
+ĠAdam ant
+ĠH urt
+Ġcorrel ates
+urd en
+Ġspons oring
+cl imate
+ĠUnivers ities
+ĠK not
+enn es
+ĠDam ian
+ĠAx el
+S port
+Ġbar b
+ĠS no
+sh own
+ste en
+ud ence
+Ġnon violent
+Ġhom ophobia
+Ġbiom ass
+ĠDet ail
+Ġsrf N
+ĠT une
+accompan ied
+I ENCE
+Al bert
+ĠMong o
+z x
+ĠCer berus
+or bit
+c ens
+Ġsl ay
+SH ARE
+H Y
+Ġb rawl
+ĠPro be
+Ġnonex istent
+ĠClare nce
+ĠBlack burn
+Ġport als
+ĠR ita
+ĠRem ain
+ĠLe vant
+Ġtrick ed
+ĠF erry
+aver ing
+ĠStraw berry
+ĠAn swers
+Ġhorrend ous
+ĠA man
+Supp lement
+ĠT oad
+Ġpe eled
+Ġman oeuv
+ĠU zbek
+mond s
+ĠH ector
+Ġ40 2
+pe es
+fix es
+Ġd j
+Ġres umes
+Ġaccount ant
+Ġadvers ity
+Ġham pered
+ĠL arson
+Ġd oping
+part s
+H ur
+Ġbe arded
+Ġy r
+ĠPlug in
+å¥ ³
+Ġ/ **
+rol ley
+Ġwaters hed
+ĠSub mission
+if lower
+AS C
+Ġcho ir
+Ġsculpt ures
+m A
+incre asing
+ai i
+Ġsne akers
+Ġconfront s
+ĠEle phant
+ĠEl ixir
+Ġrec al
+ĠT TL
+w idget
+ĠW ax
+ĠGr ayson
+Ġha irst
+Ġhumili ated
+ĠWAR N
+app iness
+ĠT TC
+F uel
+Ġpol io
+Ġcomplex es
+Ġbab e
+ĠX IV
+P F
+). [
+P arts
+Ġ4 35
+M eg
+ĠY ards
+ĠAL P
+Ġy ells
+Ġprin ces
+Ġbull ies
+ĠCapital ism
+ex empt
+FA Q
+ĠSp onge
+ĠAl a
+Ġpleas antly
+Ġbu f
+Ġden ote
+Ġunp ublished
+Ġkne eling
+asc a
+Ġl apse
+al ien
+99 4
+Ġrefere es
+ĠLaw yers
+S anta
+Ġpuzz ling
+ĠProm etheus
+ĠPh araoh
+ĠDel ay
+Ġfacilit ates
+ĠC ES
+Ġjew els
+Ġbook let
+ond ing
+Ġpolar ization
+ĠMor an
+ĠSal ad
+ĠS OS
+ĠAdv ice
+PH OTOS
+IC AN
+iat ures
+ex press
+ĠWonder land
+ĠC ODE
+ĠCL ASS
+9 75
+Ġg rep
+ĠD iesel
+ĠGl ac
+! ?"
+Ġr m
+o ine
+disc rimination
+ĠN urse
+m allow
+Ġv ortex
+ĠCons ortium
+Ġlarge Download
+stra ight
+augh lin
+G rad
+Ġpublic ized
+ĠW aves
+ĠRed d
+Ġfest ivities
+ĠM ane
+ar ov
+Ġfleet ing
+ĠDr unk
+ug en
+C ele
+Ġchromos omes
+ĠD OT
+-+-+ -+-+
+Ġbus iest
+ĠBe aver
+Sy rian
+ĠK yr
+k as
+ĠCross Ref
+19 50
+76 01
+Ġrepe aling
+ĠWin ners
+ĠMac ro
+ĠD OD
+bl ance
+S ort
+64 1
+Ġmet re
+ĠD irk
+Ġgo ggles
+Ġdraw backs
+Ġcomplain ant
+Ġauthor izing
+Ġantit rust
+oper ated
+Ġm ah
+Ġexagger ation
+Am azing
+ĠSer aph
+Ġha ze
+w ow
+Ġextingu ished
+Ġcan yon
+ĠB osh
+Ġv ents
+Ġsc rape
+Cor rect
+4 26
+Ġav g
+Dem and
+ĠâĪ ¼
+Ġmicrobi ota
+"} ],"
+ĠSt ev
+B io
+ĠPlan es
+Ġsuggest ive
+Ġdec ipher
+ĠRefuge e
+ĠKe jriwal
+ĠGreen peace
+Ġdecl ass
+ĠSound ers
+Ġth o
+Ġdec rypt
+Ġbr ushing
+ĠJane iro
+ip op
+S i
+8 77
+ĠGeoff rey
+Ġc pu
+ĠHaz el
+Ġview points
+Ġcris py
+ĠNot ification
+Ġsold er
+ĠMod est
+ĠHem isphere
+Ġcass ette
+in cludes
+Ġident ifiers
+ĠC ALL
+in cent
+T odd
+ĠSwe ep
+Ġ3 34
+b oss
+Ġsm ir
+gin x
+Ġtown ship
+Ġg rieving
+ĠMos que
+Net flix
+AS ED
+ĠMillenn ials
+oc om
+19 67
+Ġbold ly
+s leep
+Ġes che
+arij uana
+Ġsw irl
+ĠPen al
+Ġneglig ent
+ĠStephen son
+K ER
+ĠZ oro
+ris is
+Ġlocal ization
+ĠSeym our
+ĠAng lic
+red itation
+prot ection
+ĠPa ige
+Ġo mit
+ĠR ousse
+ĠT ub
+Ġinv itations
+t ty
+Ġm oss
+ph ysical
+C redits
+Ġan archy
+Ġchild care
+Ġl ull
+ĠM ek
+ĠL anguages
+lat est
+ĠSan ford
+Ġus ability
+Ġdiff use
+ĠD ATA
+Ġsp rites
+ĠVeget a
+ĠProm otion
+ãĥ¼ ãĤ¯
+rict ing
+z ee
+Tur kish
+ĠTD s
+pro ven
+57 1
+Ġsmug glers
+707 10
+Ġreform ed
+ĠLo is
+Ġun fl
+ĠWITH OUT
+ĠReturn ing
+ann ie
+ĠTom as
+Fr anc
+ĠProf it
+ĠSER V
+ĠR umble
+ik uman
+es an
+Ġt esters
+Ġgad get
+Ġbrace let
+ĠF SA
+comp onent
+Ġparamed ics
+Ġj an
+ĠRem em
+ĠSk inner
+Ġl ov
+ĠQu ake
+rom a
+Ġfl ask
+Pr inc
+Ġover power
+Ġlod ging
+ĠK KK
+ret te
+Ġabsor bs
+w rote
+Ġ ,"
+K ings
+ĠH ail
+ĠFall ing
+xt ap
+ĠHel ena
+ire ns
+L arry
+Ġpamph let
+ĠC PR
+G ro
+ĠHirosh ima
+Ġhol istic
+". [
+Ġdet achment
+Ġas pire
+Ġcompl icit
+ĠGreen wood
+Ġresp awn
+ĠSt upid
+ĠFin ished
+f al
+b ass
+Ġab hor
+Ġmock ery
+ĠFe ast
+VID EO
+Ġcon sec
+ĠHung ry
+P ull
+ĠH ust
+it ance
+? ãĢį
+) --
+ĠPar allel
+con v
+4 69
+ha ar
+w ant
+P aper
+m ins
+ĠTor o
+ĠTR UMP
+ĠR ai
+D W
+ĠW icked
+ĠL ep
+Ġfun ky
+Ġdetrim ent
+ios is
+ache v
+Ġde grade
+im ilation
+Ġret ard
+Ġfrag mentation
+Ġcow boy
+ĠY PG
+ĠH AL
+Parent s
+ĠS ieg
+ĠStra uss
+ĠRub ber
+× Ĳ
+Fr ag
+Ġp t
+Ġoption ally
+ĠZ IP
+ĠTrans cript
+ĠD well
+88 2
+M erc
+ĠM OT
+ãĥ¯ ãĥ³
+Ġhun ts
+Ġexec utes
+In cludes
+Ġacid ic
+ĠRespons ibility
+ĠD umb
+we i
+And erson
+ĠJas per
+ight on
+abs olutely
+Ad ult
+Ġpl under
+Mor ning
+ĠT ours
+ĠD ane
+Î º
+ĠT EST
+ĠG ina
+Ġcan ine
+aw an
+Ġsocial ists
+ĠS oda
+Ġimp etus
+ĠSupplement ary
+oli ath
+ĠKinn ikuman
+mitted ly
+second s
+Ġorganis ers
+Ġdocument aries
+Vari able
+GRE EN
+Ġres orts
+Ġbr agging
+Ġ3 68
+Art ist
+w k
+bl ers
+Un common
+ĠRet rieved
+Ġhect ares
+Ġtox in
+r ank
+Ġfaith s
+ĠG raphic
+Ġve c
+ĠL IA
+Af rican
+Ġard ent
+end iary
+L ake
+ĠD OS
+cient ious
+ĠOk awaru
+ĠAll y
+ĠTim eline
+D ash
+ĠI c
+contin ue
+Ġt idy
+Ġinstinct ively
+ĠP ossibly
+ĠOut door
+ĠWould n
+Ġl ich
+ĠBr ay
+ĠA X
+ĠÃ ī
+Ġ+ #
+\ '
+Direct ory
+ab iding
+Ġf eral
+ic ative
+but t
+Ġper verse
+S alt
+Ġwar ped
+Ġnin eteen
+Ġcabin ets
+Ġsrf Attach
+ĠSl oan
+Ġpower ing
+reg ation
+F light
+se vere
+Ġst ren
+Ġc og
+ap ache
+Ġâ Ŀ
+Ġcaf eteria
+p aces
+ĠGrim oire
+uton ium
+Ġr aining
+Ġcir cling
+Ġlineback ers
+c redit
+Ġrep atri
+ĠCam den
+lic ense
+Ġly ric
+Ġdescript or
+Ġval leys
+Ġre q
+Ġback stage
+ĠPro hibition
+ĠK et
+Op ening
+S ym
+æĸ ¹
+Ġserv ings
+Ġoverse en
+Ġaster oids
+ĠMod s
+ĠSpr inger
+ĠCont ainer
+è »
+ĠM ens
+Ġmult im
+Ġfire fighter
+pe c
+Ġchlor ine
+Ð ¼
+end i
+Ġsp aring
+Ġpolyg amy
+ĠR N
+ĠP ell
+Ġt igers
+Ġflash y
+ĠMad ame
+S word
+Ġpref rontal
+Ġpre requisite
+uc a
+Ġw ifi
+Ġmiscon ception
+Ġharsh ly
+ĠStream ing
+ot om
+ĠGiul iani
+foot ed
+Ġtub ing
+ind ividual
+z ek
+n uclear
+m ol
+Ġright ful
+49 3
+Ġspecial ization
+Ġpassion ately
+ĠVel ocity
+ĠAv ailability
+T enn
+Ġl atch
+ĠSome body
+Ġhel ium
+cl aw
+Ġdi pping
+XX X
+Ġinter personal
+7 10
+Ġsub ter
+Ġbi ologists
+ĠLight ing
+Ġopt ic
+Ġden im
+end on
+ĠC orm
+Ġ3 41
+ĠC oup
+Ġfear less
+Ġal ot
+ĠCliff ord
+ĠRun time
+ĠProv ision
+up dated
+lene ck
+Ġneur on
+Ġgrad ing
+ĠC t
+sequ ence
+in ia
+con cept
+Ġro aring
+ri val
+ĠCaucas ian
+Ġmon og
+key es
+Ġappell ate
+Ġlia ison
+EStream Frame
+ĠPl um
+! .
+Ġsp herical
+Ġper ished
+Ġbl ot
+Ġben ches
+Ġ4 11
+Ġpione ered
+Ġhur led
+Jenn ifer
+ĠYose mite
+Ch air
+Ġreef s
+Ġelect or
+ĠAnt hem
+65 2
+Ġun install
+Ġimp ede
+Ġbl inking
+Ġgot o
+Dec re
+A ren
+Ġstabil ization
+ĠDis abled
+ĠYanuk ovych
+Ġoutlaw ed
+ĠVent ura
+ten ess
+Ġplant ation
+Ġy acht
+ĠHu awei
+Ġsol vent
+Ġgr acious
+Ġcur iously
+Ġcapac itor
+Ġc x
+ĠRef lex
+Ph ys
+ĠC f
+pt in
+cons ervative
+Ġinv ocation
+c our
+F N
+ĠNew ly
+H our
+As ian
+ĠLe ading
+ĠAer ospace
+An ne
+Ġpre natal
+Ġdeterior ating
+H CR
+ĠNorm andy
+ol ini
+ĠAm bro
+9 10
+Ġset backs
+ĠT RE
+Ġs ig
+ĠSc ourge
+59 7
+79 8
+Game play
+Ġm sec
+M X
+Ġprice y
+ĠL LP
+aker u
+Ġover arching
+ĠB ale
+Ġworld ly
+Cl ark
+Ġscen ic
+Ġdisl iked
+ĠCont rolled
+T ickets
+ĠE W
+ab ies
+ĠPl enty
+Non etheless
+Ġart isan
+Trans fer
+ĠF amous
+Ġinf ield
+ble y
+Ġunres olved
+ĠML A
+ãĤ Ĥ
+Cor rection
+Ġdemocr at
+ĠMore no
+ro cal
+il ings
+Ġsail or
+Ġr ife
+h ung
+Ġtrop es
+Ġsn atched
+ĠL IN
+ĠB ib
+ES A
+ĠPre v
+ĠCam el
+run time
+Ġob noxious
+4 37
+Ġsum mers
+Ġunexpl ained
+ĠWal ters
+cal iber
+Ġg ull
+ĠEnd urance
+ä½ ľ
+Ġ3 47
+Ir ish
+Ġaer obic
+Ġcr amped
+ĠHon olulu
+à ©
+us erc
+ec ast
+AC Y
+ĠQu ery
+ãĤ¹ ãĥĪ
+Bet a
+Ġsuscept ibility
+ĠSh iv
+ĠLim baugh
+ĠÃ ĸ
+ĠN XT
+ĠM uss
+ĠBrit ons
+ES CO
+EG IN
+Ġ% %
+Ġsec ession
+ĠPat ron
+ĠLu a
+n aires
+ĠJPM organ
+us b
+ocy te
+Ġcouncill ors
+ĠLi ang
+f arm
+Ġnerv ously
+Ġattract iveness
+ĠK ov
+j ump
+Pl ot
+Ġst ains
+ĠStat ue
+ĠApost les
+he ter
+ĠSUP PORT
+Ġoverwhel m
+Y ES
+Ġ29 1
+d ensity
+Ġtra pping
+M it
+Ġf ide
+ĠPam ela
+atl antic
+Dam n
+Ġp ts
+OP A
+Ġserv icing
+Ġoverfl owing
+ul o
+ĠE rit
+t icket
+light ing
+ĠH mm
+ãĥ¼ ãĥ«
+im oto
+Ġchuck le
+4 23
+ãģ ķ
+sh ape
+Ġque ues
+Ġanch ors
+ãĤ¼ ãĤ¦ãĤ¹
+F er
+Ġaw oke
+Ġ6 66
+h ands
+Ġdiver gence
+Ġ50 5
+T ips
+Ġdep ot
+Ġske w
+ĠDel iver
+op ot
+Ġdiv ul
+ĠE B
+uns igned
+ĠUn i
+X box
+Ġfor ks
+Ġ7 02
+å ¯
+Ġpromot ers
+ĠV apor
+Ġlev ied
+sl ot
+Ġpig ment
+Ġcyl inders
+C RE
+Ġsn atch
+Ġperpet ually
+Ġl icking
+ĠFe et
+ĠKra ken
+ĠHold en
+ĠCLS ID
+m r
+Ġproject or
+Ġden otes
+Ġchap el
+ĠTor rent
+b ler
+R oute
+ĠDef endant
+ĠPublisher s
+ĠM ales
+ĠInn ov
+ĠAg ility
+rit er
+ty mology
+st ores
+L ind
+Ġf olly
+ĠZur ich
+B le
+Ġnurt ure
+Ġcoast line
+uch in
+D omin
+Ġfri vol
+ĠCons olid
+res ults
+M J
+Ġphyl ogen
+Ġha uled
+ĠW iley
+ĠJess ie
+ĠPrep are
+ĠE ps
+Ġtreasure r
+I AS
+Ġcolon ists
+Ġin und
+ĠWW F
+ĠCon verted
+6 000
+out side
+ĠApp earance
+ĠRel ic
+ĠM ister
+s aw
+Ġresult ant
+Ġadject ive
+ĠLaure l
+ĠHind i
+b da
+Pe ace
+Ġreb irth
+Ġmembr anes
+Ġforward ing
+Ġcoll ided
+ĠCar olyn
+K ansas
+5 99
+ĠSolid GoldMagikarp
+Be ck
+Ġstress ing
+ĠGo o
+ĠCooper ative
+Ġf s
+ĠAr chie
+L iter
+ĠK lopp
+J erry
+Ġfoot wear
+War ren
+Ġsc ree
+h are
+Under standing
+P ed
+Ġanth ology
+ĠAnn ounce
+M ega
+Ġflu ent
+Ġbond age
+ĠDisc ount
+il ial
+C art
+ĠNight mares
+Sh am
+ĠB oll
+uss ie
+H ttp
+Atl anta
+Ġun recogn
+ĠB id
+Ġunder grad
+Ġforg iving
+ĠGl over
+AAAA AAAA
+4 45
+V G
+pa io
+kill ers
+Ġrespons ibly
+Ġmobil ize
+Ġeffect ed
+ĠL umin
+Ġk ale
+Ġinfring ing
+ann ounced
+Ġf itt
+b atch
+ĠT ackle
+ĠL ime
+ĠAP P
+uke mia
+Ġrub y
+Ġex oner
+ĠCas ual
+0 70
+Ġpel vic
+Ġautom ate
+ĠK ear
+ĠCoast al
+Ġcre ed
+Ġbored om
+ĠSt un
+ri ott
+Ĥ İ
+Ġregener ate
+Ġcomed ians
+ĠOP ER
+Sp ons
+id ium
+on is
+L ocated
+05 7
+Ġsusp ense
+ĠD ating
+C ass
+Ġneoc ons
+ĠShin zo
+Ġaw oken
+ch rist
+ĠMess ages
+att led
+ĠSpr ay
+ĠSp ice
+C W
+Ġshield ing
+ĠG aul
+Am id
+Ġparam ilitary
+Ġmult if
+ĠTan ner
+il k
+Ġgodd amn
+g ements
+Ġbe friend
+m obi
+Ġ3 88
+fold er
+acc a
+Ġins in
+g ap
+N ev
+fif th
+Ġpsychiat ry
+b anks
+TH IS
+Ġhar b
+ac qu
+Ġfac ade
+ĠPower Point
+80 3
+Ġbl uff
+Sh ares
+Ġfavor ing
+El izabeth
+Ãį Ãį
+Ġr anger
+77 2
+ĠAr che
+h ak
+ĠGen etics
+ĠF EMA
+Ġev olves
+Ġest e
+ĠP ets
+ĠM Ã©
+ĠInterest ing
+ĠCanter bury
+ch apter
+ĠStar fleet
+Sp anish
+Ġdraw back
+ĠNor wich
+9 70
+n orth
+ag anda
+Ġtransform ative
+ram ids
+bi ology
+ad ay
+Ġpropag ation
+ĠGam ma
+ĠDen ise
+ĠCalcul ator
+ent imes
+ĠB ett
+Ġapp endix
+ĠHD D
+AK ING
+Ġst igmat
+Ġhol ster
+Ġord inarily
+Ch ance
+ĠCont rary
+Ġad hesive
+Ġgather s
+6 12
+re au
+ony ms
+ew ays
+Ġindu ces
+Ġinterchange able
+se m
+Wh it
+Ġtr ance
+Ġincorpor ation
+ĠExt ras
+Fin ancial
+Ġawkward ly
+ĠStur geon
+ĠH Y
+Norm ally
+ĠEnd ing
+ĠAss ist
+enc rypted
+Ġsub jug
+Ġn os
+Ġfan atic
+C ub
+C U
+?" .
+Ġirre versible
+å Ĥ
+03 1
+ĠH AR
+sp read
+ul ia
+= $
+Sc ope
+L ots
+Ġlif estyles
+ol on
+Ġf eds
+Ġcongrat ulate
+web kit
+Ġindist inguishable
+ĠSw ing
+Ġcommand ments
+qu ila
+ab ella
+m ethyl
+ann abin
+Ġo vere
+Ġlob ster
+ĠQU EST
+ĠCONT IN
+bern atorial
+:::: ::::
+ĠTra ve
+ĠSam oa
+AN I
+75 2
+Ð ´
+userc ontent
+ĠMod erate
+y eah
+ĠK itt
+Ġwe e
+Ġstuff ing
+ĠInter vention
+ĠD ign
+Ġware houses
+ĠF iji
+Ġpel lets
+Ġtake away
+ĠT ABLE
+ĠClass ical
+col lection
+Ġland fall
+ĠMus cle
+Ġsett les
+ĠAD V
+Ġ3 44
+L aura
+Ġf ared
+ĠPart ial
+4 36
+oss ibility
+ĠD aly
+ĠT arant
+ĠFu ji
+am l
+c ence
+55 1
+ĠProced ures
+ĠO CD
+ĠU D
+t in
+Q UI
+ach o
+4 38
+Ġgl itches
+Ġenchant ment
+Ġcalcul ates
+IR O
+ĠH ua
+alys es
+ĠL ift
+um o
+Ġle apt
+Ġhypothes ized
+ĠGust av
+it ans
+VERS ION
+æ ł
+Rog er
+Ġr and
+ĠAd apter
+Ġ3 31
+ĠPet ition
+k ies
+M ars
+Ġunder cut
+ze es
+ĠLy ons
+ĠDH CP
+Miss ing
+Ġretire es
+Ġins idious
+el i
+> )
+. ãĢį
+Ġfinal ists
+ĠA ure
+Ġacc user
+Ġwas tes
+ĠY s
+ĠL ori
+Ġconstitu encies
+Ġsupp er
+Ġmay hem
+or ange
+Ġmis placed
+Ġmanager ial
+Ġex ce
+ĠCL I
+Ġprim al
+ĠL ent
+Cry stal
+h over
+ĠN TS
+end um
+Ġd w
+ĠAl c
+n ostic
+Ġpres erves
+ĠTs arnaev
+Ġtri pled
+rel ative
+Arc ade
+k illing
+ĠW EEK
+ĠH anna
+D ust
+Com pleted
+ģ «
+Ġappro ves
+ĠSur f
+ĠLuther an
+ven ants
+Ġrobber ies
+we ights
+soft ware
+at ana
+ug al
+Ġgrav y
+ĠC ance
+OLOG Y
+ly ak
+Ton ight
+Ġunve il
+Ġ19 04
+ĠMin ion
+ent ious
+st ice
+pack ages
+ĠG EAR
+Ġg ol
+ĠHutch inson
+ĠProf ession
+ĠG UN
+ĠDiff erence
+ĠTsuk uyomi
+ĠLes bian
+6 70
+Ġfug itive
+ĠPlan etary
+-------------------------------- ------------------------
+Ġacc rued
+Ġch icks
+Ġsto pp
+Ġblock ers
+C od
+Ġcomment ers
+ĠSomew here
+ĠPhot ographer
+the me
+Ġmay oral
+w u
+Ġanten nas
+Ġrev amped
+ĠSubject s
+it Ã©
+im ura
+Ġentr ances
+liter ally
+Ġten ets
+ĠO MG
+ĠMP H
+ĠDon key
+ĠOff ense
+Ġ" +
+Sn ap
+ĠAF B
+Ġan imate
+ĠS od
+His panic
+Ġinconsist ency
+D b
+F Y
+Ex port
+Ġa pe
+Ġpear l
+ib el
+ĠPAC s
+Ġ{ \
+Ġact u
+ĠHS BC
+camp us
+Ġpay off
+Ġde ities
+ĠN ato
+ou ple
+Ġcens ored
+ĠCl ojure
+Ġconf ounding
+en i
+Ġreck on
+op he
+Ġspot ting
+Ġsign ifies
+Ġprop el
+Ġfest ive
+S uggest
+Ġpled ging
+ĠB erman
+Ġrebell ious
+Ġovershadow ed
+Ġinfiltr ated
+j obs
+67 2
+Ġscal able
+Ġdomin ion
+ĠNew foundland
+ĠMead ow
+Ġpart itions
+AM I
+Ġsupplement ary
+str ument
+Ġhair y
+Ġperpet uate
+Ġnuts hell
+ĠPot ato
+ĠHob bit
+Ġcur ses
+Flo at
+Ġquiet er
+Ġfuel ing
+Ġcaps ules
+ĠL ust
+ĠH aunted
+Exec utive
+Ġchild birth
+G re
+Ġrad iant
+å İ
+Ġm alls
+Ġin ept
+ĠWarrant y
+Ġspect ator
+E h
+t hens
+Ġculmin ating
+æ ©
+ary a
+ãĤ ®
+ilit arian
+ĠOR IG
+ĠSp ending
+pt ives
+ĠS iren
+ĠRec ording
+ay ne
+Ġv im
+Ġspr ang
+T ang
+ĠM FT
+mor ning
+ĠWe ed
+m peg
+cess ion
+ĠCh ung
+7 30
+w arning
+56 2
+handed ly
+P oor
+P olitics
+: #
+Ġp ian
+Ġfec es
+ĠDocument ation
+Ġban ished
+Ġ3 99
+ĠAR C
+Ġhe inous
+J ake
+ĠAm ir
+way ne
+v re
+os henko
+Ġnotebook s
+Ġfound ational
+Ġmarvel ous
+ixt ape
+Ġwithdraw als
+Ġh orde
+ĠD habi
+is able
+ĠK D
+Ġcontag ious
+ĠD ip
+ĠAr rows
+Ġpronoun s
+Ġmorph ine
+ĠB US
+68 2
+Ġk osher
+fin ished
+ĠInstr uments
+Ġf used
+yd en
+ĠSal mon
+F ab
+aff ected
+K EN
+C ENT
+Dom ain
+Ġpoke mon
+ĠDr inking
+G rowing
+ĠInvestig ative
+ĠA ether
+em i
+Ġtabl oid
+Ġrep ro
+ĠNot withstanding
+ĠBers erker
+Ġdram as
+Ġclich Ã©
+Ġb ung
+ĠU RI
+ĠD os
+0 44
+Ġpast ors
+Ġl s
+Ġac rylic
+aun ts
+Ed ward
+Ġmajor ities
+B ang
+Ġfield ing
+ĠRepl acement
+ĠAl chemy
+pp ard
+ĠRome o
+ĠSan ct
+ĠLav rov
+ib ble
+Inst ruct
+Ġimp ractical
+ĠPlay boy
+ce phal
+Ġsw aps
+Ġk an
+ĠThe o
+Ġillust rating
+Ġdismant led
+ĠTrans gender
+ĠG uth
+UG H
+Ġtriumph ant
+Ġencomp ass
+Ġbook mark
+udd in
+j er
+Ġpred icate
+ES H
+Ġwhen ce
+ĠAB E
+Ġnon profits
+Se qu
+Ġdi abetic
+Ġp end
+Ġheart felt
+sh i
+Ġinter acts
+ĠTele com
+Ġbombard ment
+dep ending
+ĠLow ry
+ĠAd mission
+ĠBl ooming
+ust ration
+ene gger
+B rew
+Ġmol ten
+ĠNer d
+P IN
+âĸ Ģ
+ave ment
+Ġtou red
+Ġco efficients
+ĠTray von
+ans son
+Ġsand y
+t old
+fl ows
+Ġpop ulous
+ĠT inder
+ĠBl iss
+R achel
+Min imum
+Ġcontest ant
+ĠRed uce
+ĠMor se
+ĠGrass ley
+ĠClick er
+Ġexp r
+Ġs incerity
+Ġmar qu
+Ġelic it
+ĠPro position
+ĠDemon ic
+Ġtac os
+G reek
+Ġpost war
+Ġin sofar
+ĠP ork
+Ġ35 2
+doctor al
+walk ing
+Ġmid term
+ĠSam my
+sight ed
+ĠTR ANS
+ic i
+AL D
+ĠUS L
+ĠF ISA
+ĠAm pl
+ĠAlex andra
+ine lli
+Tr ain
+Ġsign ify
+ĠVers us
+Ġob fusc
+Ġk h
+Ġagg ro
+ĠRen ault
+Ġ3 48
+5 18
+ox icity
+0 22
+ĠTw ist
+Ġgoof y
+D ynamic
+Ġbrief ings
+m ight
+8 99
+Ġderog atory
+T ro
+Ġfor ging
+ĠKor an
+ĠMar ried
+ĠBuc s
+Ġpal ate
+ĠCon version
+m able
+4 13
+Ġ( _
+Ġs iph
+ĠN EO
+col lege
+Ġmarg inally
+Ġfl irt
+ĠTra ps
+ĠP ace
+é »Ĵ
+Ġgoalt ender
+Ġforb ids
+Ġcler ks
+ĠT ant
+ĠRobb ins
+ĠPrint ing
+Ġpremie red
+Ġmagn ification
+ĠT G
+ĠR ouse
+ĠM ock
+odynam ics
+Ġpre clude
+ism o
+ĠPul itzer
+Ġaval anche
+ĠK odi
+rib une
+ĠL ena
+Elect ric
+Ġref inery
+Ġend owed
+Ġcounsel ors
+Ġd olphin
+ĠM ith
+Ġarm oured
+hib ited
+Beg in
+ĠP W
+O il
+ĠV or
+ĠShar if
+ĠFraz ier
+est ate
+Ġj ams
+Pro xy
+Ġband its
+ĠPresbyter ian
+ĠPrem iere
+t iny
+ĠCru el
+Test ing
+Ġhom er
+ĠV ERS
+ĠPro l
+ĠDep osit
+ĠCoff in
+Ġsemin ars
+Ġs ql
+ĠDef endants
+Altern atively
+ĠR ats
+ç «
+ethy st
+' >
+Ġiss uer
+58 9
+Ġch aired
+ĠAccess ories
+man ent
+Ġmar row
+ĠPrim ordial
+C N
+Ġlimit less
+ĠCarn age
+Ġund rafted
+q v
+IN ESS
+on ew
+Ġco hesion
+98 7
+Ġne cks
+Ġfootball er
+ĠG ER
+Ġdetect able
+ĠSupport ing
+ĠCS V
+oc ally
+k Hz
+Ġund e
+Ġsh one
+Ġbud ding
+tra k
+Stand ing
+ĠStar craft
+ĠKem p
+Ben ch
+Ġthw arted
+ĠGround s
+ath i
+L isa
+Dial og
+ĠS X
+V ision
+Ġingen ious
+Ù Ĳ
+Ġfost ering
+ĠZ a
+ĠIn gram
+Ġ" @
+N aturally
+6 16
+0 35
+ĠF AC
+H mm
+55 4
+Ġacceler ator
+ĠV end
+Ġsun screen
+Ġtuber culosis
+rav iolet
+ĠFunction al
+ĠEr rors
+ed ar
+19 66
+ĠSpect re
+ĠRec ipes
+88 5
+ĠM ankind
+L iverpool
+Ġ| --
+Ġsubst itutes
+ĠX T
+w ired
+Ġinc o
+ĠAf gh
+E va
+ic c
+S ong
+K night
+Ġdilig ently
+ĠBroad cast
+A id
+Ġaf ar
+ĠH MS
+aton in
+ĠGr ateful
+Ġfire place
+ĠOm ni
+e uro
+ĠF RE
+ĠSh ib
+ĠDig est
+t oggle
+Ġheads ets
+Ġdiff usion
+ĠSqu irrel
+ĠF N
+Ġdark ened
+out her
+Ġsleep s
+ĠX er
+gun s
+Ġset ups
+Ġpars ed
+Ġmamm oth
+ĠCur ious
+g ob
+ĠFitz patrick
+ĠEm il
+im ov
+........ .....
+ĠB enny
+Second ly
+Ġheart y
+Ġcons on
+st ained
+Ġgal actic
+cl ave
+Ġplummet ed
+Ġp ests
+Ġsw at
+Ġrefer rals
+ĠLion el
+h oly
+Ġunder dog
+ĠSl ater
+ĠProv ide
+ĠAm ar
+ress or
+å Į
+ong a
+Ġtim id
+Ġp iety
+ĠD ek
+Ġsur ging
+az o
+Ġ6 10
+Ġdes ks
+ĠSp okane
+ĠAn field
+Ġwars hips
+ĠCob ra
+Ġar ming
+clus ively
+ĠBad ge
+ag ascar
+ĠPR ESS
+ĠMcK enzie
+ĠFer dinand
+burn ing
+Af ee
+Ġtyr ann
+ĠI w
+ĠBo one
+100 7
+ĠRe pt
+Ċ Âł
+Ġcar avan
+ĠD ill
+ĠBundes liga
+Ch uck
+Ġheal er
+ãĥ¼ãĥ Ĩ
+ĠH obby
+Ġneg ate
+Ġcrit iques
+section al
+mop olitan
+Ġd x
+Ġouts ourcing
+ĠC ipher
+t ap
+Sh arp
+Ġup beat
+Ġhang ar
+Ġcru ising
+ĠNi agara
+Ġ3 42
+ill us
+ĠS v
+Ġsubt itles
+Ġsqu ared
+Ġbook store
+Ġrevolution aries
+ĠCarl ton
+ab al
+Ut ah
+Ġdesp ise
+ĠU M
+cons ider
+aid o
+Ġc arts
+ĠT urtles
+Tr aining
+Ġhonor ary
+Â ¢
+Ġtri angles
+4 22
+Ġreprint ed
+Ġgrace ful
+ĠMong olia
+Ġdisrupt ions
+ĠB oh
+Ġ3 49
+Ġdr ains
+Ġcons ulate
+Ġb ends
+Ġm afia
+ur on
+ĠF ulton
+m isc
+Ġren al
+Ġin action
+ck ing
+Ġphot ons
+Ġbru ised
+ĠC odes
+og i
+Ġn ests
+ĠLove ly
+ĠLib re
+ĠD aryl
+Ġ# ##
+S ys
+. ,"
+Ġfree zes
+est ablishment
+and owski
+Ġcum bers
+ĠSt arg
+ĠBom bs
+Ġleg ions
+Ġhand writing
+Ġgr un
+ĠC ah
+sequ ent
+Ġm oth
+ĠMS M
+Ins ert
+F if
+Ġmot el
+Ġdex ter
+ĠB ild
+hearted ly
+Ġpro pe
+ĠText ure
+ĠJ unction
+ynt hesis
+oc ard
+ĠVer a
+ĠBar th
+ĠÎ¼ g
+Ġl ashed
+Ġ35 1
+ĠZ amb
+ĠSt aples
+ĠCort ex
+ĠCork er
+Ġcontinu um
+ĠWR ITE
+unt a
+rid or
+Ġde ems
+0 33
+ĠG OLD
+p as
+Ġrep ressive
+ãĥĨ ãĤ£
+Ġbaff led
+Sc ar
+Ġc rave
+Ġ ______
+Ġentrepreneurs hip
+ĠDirector ate
+Ġ' [
+Ġv ines
+Ġasc ended
+ĠGR OUP
+ĠGood bye
+Ġdo gged
+ãĥ´ ãĤ¡
+Man ufact
+Ġunimagin able
+ri ots
+ier rez
+Ġrel ativity
+ĠCraft ing
+ra ught
+ud en
+c ookie
+Ġassass ins
+Ġdissatisf ied
+ac ci
+Ġcondu it
+Sp read
+ĠR ican
+n ice
+izz le
+Ġsc ares
+ĠWH Y
+ph ans
+5 35
+Ġprot racted
+ĠKrist en
+5 36
+ĠSc rib
+ĠNe h
+Ġtwent ies
+Ġpredic ament
+Ġhandc uffs
+Ġfruit ful
+ĠU L
+ĠLud wig
+Ġatt est
+ĠBre aker
+Ġbi ologically
+ĠDeal er
+Ġrenov ations
+f w
+ess en
+Al ice
+ĠHen ri
+Ġun ilaterally
+ĠS idd
+h ai
+ĠSt retch
+S ales
+Ġcumbers ome
+ĠJ avier
+Ġtrend y
+Ġrot ting
+ĠChall enges
+Ġscra ps
+Ġfac ets
+ĠVer onica
+ĠVer ge
+ĠS ana
+Al ien
+ĠR ih
+Ġrad ial
+ect ar
+Ġ6 30
+cl i
+Mar ie
+Ġwild fire
+ĠCat o
+h ander
+Ġwait ress
+Ġch ops
+ĠS ECTION
+Ġblunt ly
+ĠCat alog
+n ian
+stud y
+Ġpat rolling
+ĠT enth
+nex us
+ĠN ON
+op sy
+Ġsc athing
+s ie
+Ġdeterior ated
+V B
+Naz is
+Ġdep ictions
+Ġauthent icated
+ĠCon ce
+k rit
+Ġpromul g
+ĠL ONG
+U FC
+ĠVis itors
+ĠRec all
+Ġrehab ilit
+ĠSL I
+Ġglac ier
+ĠB ite
+Ġ50 3
+Ġvom it
+Ġfer mented
+ĠKh alid
+Ġgrad ed
+ĠMag icka
+ĠIch igo
+power ful
+ic ators
+75 3
+Ġsh rew
+Ġ35 6
+Ġlegal izing
+Ġall otted
+ĠArch demon
+ith ing
+igg urat
+V OL
+Le od
+Ġo ily
+Ġindu cing
+Ġamy gdala
+Ġadm ins
+ĠAcqu isition
+C AN
+Ġsche matic
+Ġmo an
+ĠCamer oon
+Ġt ink
+Ġmer ry
+Ġbutter flies
+ĠGo ff
+Ġworks pace
+ĠCor ona
+Ġj avascript
+ĠD olphin
+ĠCant or
+4 64
+to e
+AP S
+ĠAg ing
+Ġpadd ed
+ĠZ heng
+ĠHe ld
+Ġest ranged
+Ġ7 70
+. }
+ĠDun ham
+Ġsm okes
+Ġcap itals
+und ai
+Sh in
+ĠFound ing
+Ġent itle
+Ġcenter piece
+D iscover
+Ġthere to
+al ert
+ĠN ou
+ĠAnaly st
+l c
+F H
+FI ELD
+ĠP OV
+gr ay
+Ġar cs
+ĠH OT
+Ġr s
+Ġoblig atory
+ĠArchitect s
+ĠS ven
+ĠF EC
+0 200
+Christ mas
+ĠAlban ia
+rat om
+58 7
+Ġhard ships
+Ġaut os
+ĠCharg es
+Ġap es
+Ġ3 76
+wal let
+Ġintox ication
+Ġgobl in
+Ġ5 70
+++++++++ ++++++++
+ĠYel p
+ĠMag netic
+ĠBr iggs
+R ail
+Ġspawn s
+ĠW iggins
+Ġshowc ased
+Ġres orted
+ub en
+Ġwh ipping
+Ġim itate
+Ġdigest ion
+ĠUS PS
+ĠG est
+Ġye a
+ĠT ight
+ind al
+ic as
+` .
+C AST
+'' ;
+ĠF et
+opath ic
+In valid
+Ġregrett ed
+Ġbro ccoli
+ĠSc ores
+e ve
+Ġpost ings
+Ġaccum ulating
+Ġneed less
+elf th
+Ġmay ors
+Ġsc rib
+Ġanecd otes
+Ġbot ched
+ĠRib bon
+ĠConstant ine
+i uses
+ess es
+Ġdev ise
+Comp ared
+Ġp udding
+Ġg arg
+Ġev oke
+79 7
+Ġdet ox
+9 09
+ĠPie ces
+ĠMcC artney
+Ġmet ast
+ĠK rypt
+P OR
+Ġt ending
+ĠMerch ants
+Pro of
+ĠV arg
+ĠPort able
+ãĥ¼ãĥĨ ãĤ£
+B rain
+25 00
+Ġfol iage
+Ø ¹
+Ġment ors
+ĠA ires
+Ġminimal ist
+Ġing ested
+ĠTro jan
+ĠQ ian
+inv olved
+0 27
+Ġer oded
+RA FT
+Ġbl urry
+M ob
+Ġbuff et
+ĠFn atic
+ae a
+KN OWN
+ĠIn it
+s afety
+en um
+ACT ION
+ĠCrus her
+ĠD ates
+Ġ ................
+c alling
+ak ov
+Ġvent ured
+Ġ5 55
+au ga
+H art
+ĠA ero
+M AC
+Ġthin ly
+Ġar ra
+ST ATE
+ild e
+ĠJac qu
+ĠFem ales
+Ġthe orem
+Ġ3 46
+Ġsmart est
+ĠPU BLIC
+ĠK ron
+ĠB its
+ĠV essel
+ĠTele phone
+Ġdec ap
+Ġadj unct
+ĠS EN
+mer ga
+Ġred acted
+Ġpre historic
+Ġexplan atory
+ĠRun s
+ĠUtt ar
+ĠM anny
+ĠAUTH OR
+ĠUnle ashed
+ĠBow ling
+be ans
+79 3
+Ġunivers es
+Ġsens it
+ĠK ung
+re peat
+ctr l
+Ġp aced
+Ġfull er
+Cl ock
+Ġrec omb
+ĠF aul
+ĠB unker
+Ġpool ed
+Ġan a
+ĠM outh
+LL OW
+hum ane
+Ġbull do
+ĠMicha els
+f am
+Ġwreck ed
+Ġport rays
+ĠWh ale
+ĠH es
+Ġguess es
+ĠBrow se
+ĠL APD
+Ġconsequ ential
+ĠInn ocent
+ĠD RAG
+Ġtrans gress
+ĠO aks
+Ġtri via
+ĠRes on
+ĠA DS
+-- +
+ĠT oll
+Ġgrasp ing
+ĠTHE M
+ĠT ags
+ĠCon clusion
+Ġpract icable
+Ġho op
+Ġunintention ally
+Ġign ite
+ĠM ov
+ur ized
+le hem
+Ter min
+Ġcolour ful
+ĠLin ear
+ĠEll ie
+G y
+Ġman power
+Ġj s
+Ġem oji
+ĠSHAR ES
+_ .
+0000 7
+Ġsophistic ation
+Ġunders core
+Ġpract ise
+Ġbl ob
+op ens
+Uk raine
+Ke eping
+Y C
+J R
+ult imate
+Cl aim
+Ġautom obiles
+99 3
+ste el
+Ġpart ing
+ĠL ank
+... ?
+Ġ38 5
+Ġremem brance
+Ġe ased
+Ġcov ari
+ĠS ind
+Effect ive
+Ġdisse mination
+ĠMo ose
+ĠCl apper
+br ates
+App ly
+Ġinv is
+Ġwors ened
+âĢĶ -
+Ġlegisl ator
+ĠL ol
+ĠRow e
+Ġdealers hip
+um ar
+id ences
+Ġinvestig ates
+Ġc ascade
+Ġbid der
+ĠB EN
+Iron ically
+Ġpres iding
+Ġd ing
+Ġcontrad icted
+Ġshut s
+ĠF IX
+Ġ3 66
+Dist rict
+Ġsin ful
+ĠChar isma
+o ops
+Ġtot ality
+Ġrest itution
+ĠOpt imus
+ĠD ah
+Ġcl ueless
+urn ed
+Ġnut rit
+Ġland owners
+Ġfl ushed
+Ġbroad en
+m ie
+Ġprint ln
+Ġn ig
+ĠCorp us
+J en
+Ġprot o
+ĠWik imedia
+ĠPal o
+C OR
+Ġstory lines
+Ġevangel icals
+ĠDar rell
+Ġrot or
+ĠH W
+sk illed
+ery l
+Ġbe gg
+ĠBl umenthal
+Ġwe aving
+Ġdown wards
+ĠJack et
+ĠANG EL
+Te chnology
+Ġes oteric
+alde hyde
+Ġfur iously
+Ġforeign er
+We ak
+CH O
+ĠH ound
+Exper ience
+ĠPlay station
+ĠM IA
+ĠU ng
+cl oth
+ag all
+Ġcal ming
+iz ens
+St ruct
+ĠW itches
+ĠCeleb ration
+Ġ........ ......
+pt roller
+ĠTC U
+Ġb unny
+ãĥ į
+ut orial
+Ġup scale
+ĠSt a
+ĠCol ossus
+Ġchlor ide
+ĠZ ac
+ĠRe asons
+ĠBrook ings
+ĠWH ITE
+][ /
+ĠL ose
+9 05
+Ġunders ide
+ern els
+Ġv ape
+do zen
+upp et
+ĠST OP
+mat ical
+ĠStat ements
+hed dar
+P AC
+Custom er
+Ġmem os
+ĠP J
+end ars
+ĠLim its
+l augh
+Ġstabil ized
+ĠALE C
+Y A
+Up grade
+al am
+Ġtechn o
+Ġan ew
+fore seen
+Ġcolleg iate
+ĠPy ro
+ĠD ism
+Ġfront line
+Ġammon ia
+I U
+Qu ite
+John ny
+ass in
+G OP
+ĠSt yles
+ĠSovere ign
+acter ial
+5 49
+ĠR IP
+ĠL ists
+Ġ3 64
+ĠRece p
+s ocket
+ĠByr d
+ĠCand le
+An cient
+Ġappell ant
+en forcement
+ace a
+ans ki
+Ġold s
+88 6
+Ġsl urs
+Ġem pires
+Ġbuck le
+Ġalien ation
+ĠAber deen
+Ġunic orn
+Ġoverr iding
+ĠL X
+pp a
+Ġdesp ised
+ĠB ugs
+ĠB ST
+S outhern
+5 33
+Ġhall mark
+ĠPost er
+Ġstem med
+Ġprincip als
+ĠT ECH
+ĠSand wich
+It aly
+Ġche esy
+ĠSet TextColor
+ĠProt ective
+ĠC ohn
+J O
+apt op
+Re ason
+Lead er
+ĠUnder stand
+ĠFr idays
+ĠContin uous
+Ġcl ipping
+ĠR ye
+Ġber th
+tim er
+ann is
+re act
+Ġbuff alo
+ĠPar as
+Ġ6 55
+Ġpres ided
+ĠSun rise
+Ġve ts
+Ġcl oves
+ĠMcC ull
+Stre ngth
+G AN
+Ġill iter
+ĠPric ing
+l Ã©
+Ġresist or
+Ġbr un
+ĠSuff olk
+Ñ ĭ
+ĠL iver
+Re leased
+Ġwhat s
+8 60
+ĠMe asures
+Ġden ouncing
+ĠRy zen
+Ġsou ven
+Ġcareg ivers
+ch ini
+ĠScar lett
+Ġt rough
+Cong ratulations
+Ġtax is
+ĠTrad ition
+j it
+Ġtable top
+Ġhither to
+Ġdis information
+off ensive
+h ra
+ĠDISTR ICT
+Ġcompl icate
+chen ko
+ĠRecon struction
+Ġpalp able
+Ġa usp
+Ġ4 28
+Ġshowc ases
+ĠPublic ation
+know ledge
+inn on
+4 19
+Ġretri eval
+and ers
+Ġref ute
+Ġinqu ired
+g ur
+Ġneg ativity
+Ġcons erve
+Ġafter life
+Ġpres upp
+ĠGill espie
+Ġm t
+ĠD N
+T ap
+Ġper pend
+ĠS my
+does n
+Ġsp illing
+Ġhyp ers
+K ate
+Â® ,
+ke pt
+ĠP owered
+Ġj a
+ĠK lux
+ard e
+ab an
+Ġ4 44
+Ġflatt ened
+ĠImprove ments
+urg a
+ĠK und
+Ġins cribed
+Ġfac ult
+Ġunpre pared
+ĠCons umers
+Ġsatisf ies
+Ġpul monary
+Ġinf iltration
+Ġex ternally
+Ġcongrat ulations
+ag han
+Ġair liner
+Ġfl ung
+Ġfly ers
+G D
+Ġsnipp ets
+Ġrec ursive
+Ġmaster ing
+L ex
+Ġovert ly
+v g
+Ġluck ily
+Ġenc ro
+ĠLanc et
+ĠAbyss al
+function al
+Ġs ow
+Ġsqu id
+Ġnar ration
+Ġn aughty
+ĠHon our
+ĠSpart ans
+Ġsh atter
+ĠTac oma
+ĠCal ories
+ĠR aces
+Sub mit
+Ġpurpose fully
+w av
+ĠY ok
+F est
+ĠG err
+Met ro
+Ġit iner
+f amous
+Ġ" {
+in line
+was her
+Iss ue
+ĠCL IENT
+oz o
+Vers ions
+7 25
+ĠGl ock
+Ġshield ed
+ĠPC R
+ENC Y
+ĠWe ld
+ĠSim pl
+Ġredirect ed
+ĠK ham
+Ġ( >
+Ġlab ou
+Ġdi apers
+ss l
+Ġcell ar
+organ isms
+ore sc
+ĠBer ks
+did n
+Sh ipping
+C hest
+Ġund one
+Ġmillion aire
+Ġc ords
+ĠYoung er
+appropri ately
+Ġsequ els
+u ve
+ant icipated
+Ġle wd
+ĠSh irt
+ĠDmit ry
+V eter
+Ġsl aying
+ĠY ar
+Ġcompl ication
+I owa
+ĠEric a
+ĠBL M
+g irlfriend
+b odied
+6 26
+19 63
+Ġintermedi ary
+Ġcons olation
+M ask
+ĠSi em
+ow an
+Beg inning
+Ġfix me
+Ġculmin ated
+Ġcon duc
+ĠVolunte er
+Ġpos itional
+Ġgre ets
+ĠDefin itions
+Ġthink er
+Ġingen uity
+Ġfresh men
+ĠMom ents
+Ġ35 7
+ate urs
+ĠFed Ex
+s g
+69 4
+Ġdwind ling
+ĠBO X
+sel age
+Ġt mp
+Ġst en
+ĠS ut
+Ġneighbourhood s
+Ġclass mate
+f ledged
+Ġleft ists
+Ġclim ates
+ATH ER
+ĠScy the
+ul iffe
+Ġs ag
+Ġho pped
+ĠF t
+ĠE ck
+ĠC K
+ĠDo omsday
+k ids
+Ġgas ped
+Ġmon iker
+ĠL od
+ĠC FL
+t ions
+r ums
+fol ios
+Ġm d
+Ġunc anny
+Ġtrans ports
+ĠLab rador
+Ġrail ways
+Ġappl iance
+ĠCTR L
+æ Ģ
+Pop ulation
+ĠConfeder acy
+Ġunb earable
+Ġdors al
+ĠIn form
+op ted
+ĠK ILL
+Mar x
+Ġhypoc ritical
+q us
+ĠN umerous
+ĠGeorg ian
+ĠAmbro se
+ĠL och
+Ġgu bernatorial
+ĠX eon
+ĠSupp orts
+ens er
+ee ly
+ĠAven ger
+19 65
+Ar my
+Ġju xtap
+Ġcho pping
+ĠSpl ash
+ĠS ustainable
+ĠFin ch
+Ġ18 61
+ict ive
+at meal
+ĠG ohan
+Ġlights aber
+ĠG PA
+ug u
+ĠRE PL
+vari able
+Ġher pes
+Ġdesert s
+ac iously
+Ġsitu ational
+week ly
+ob l
+Ġtext ile
+ĠCorn wall
+Ġcontrace ptives
+ĠA ke
+] -
+ä¹ ĭ
+: ,
+ĠW em
+ĠB ihar
+Ġ' .
+Ġbe re
+Ġanal ogue
+ĠCook ies
+Ġtake off
+Whe el
+Ġmaj estic
+Ġcomm uting
+0 23
+ĠCor pse
+ass ment
+min i
+Ġgor illa
+ĠAl as
+ere e
+Ġacquaint ances
+ĠAd vantage
+Ġspirit ually
+Ġey ed
+pm wiki
+ĠE nder
+Ġtrans lucent
+Ġnight time
+ĠIM AGES
+5 45
+ĠK amp
+ĠFre ak
+Ġ ig
+Port land
+4 32
+ĠM ata
+Ġmar ines
+Ġh ors
+ater asu
+ĠAtt ribution
+Ġ-------- -
+Ġk ins
+ĠBEL OW
+++ +
+Ġre eling
+ol ed
+Ġcl utter
+ĠRel ative
+Ġ4 27
+B US
+Ġa vert
+ĠChe ong
+ĠA ble
+ĠPry or
+Develop er
+Ġen cyclopedia
+ĠUSA F
+ĠG arry
+Sp ain
+Bl ocks
+Ġexp osition
+ĠGamer Gate
+W OR
+Ġstockp ile
+Ġclot hed
+ĠT one
+ĠR ue
+t umblr
+Ġtreacher ous
+Ġf rying
+Ñ Į
+ĠS ph
+Ġrest raints
+Ġemb odies
+ĠG es
+S afety
+Ġnegoti ators
+min ing
+ĠAppalach ian
+L OS
+ĠJenn a
+Ġpass ers
+ç ĭ
+sn ap
+Ġshort en
+creat or
+Ġinn umerable
+uther land
+67 4
+ĠW OM
+ĠAs cend
+ĠArm ory
+ĠTrans action
+K ick
+Ġsuit case
+day Name
+Ġwaste ful
+mar riage
+ĠMcC abe
+ite ch
+ĠO ss
+Cl osure
+ĠTreasure r
+Ġindec ent
+ĠD ull
+Ġresid ences
+19 59
+ĠS ettlement
+Ham ilton
+Ġself ies
+ĠRank ing
+ĠBark ley
+ĠB ore
+ĠW CS
+ĠMar itime
+ĠH uh
+ĠForest ry
+Ġcultiv ating
+ĠBall ard
+Ġg arrison
+ĠSD L
+9 30
+Ġnas cent
+Ġirresist ible
+Ġaw fully
+\/ \/
+Ġequ ate
+Ġanthrop ology
+ĠSylv ia
+Ġintest ine
+Ġinnoc uous
+cess ive
+ag ra
+ĠMet roid
+G rant
+8 55
+ģ ĸ
+Ġ" _
+ãĥĥ ãĥī
+Ġappra isal
+ĠFred dy
+04 6
+Ġ40 6
+Ġ18 30
+Ġd ocking
+St atic
+Ġp ont
+ĠVolt age
+ĠSt ead
+ĠMort gage
+ĠJon ah
+Y L
+CLASS IFIED
+Ġas bestos
+nik ov
+Ġcoll agen
+ĠOrb ital
+P ocket
+7 99
+Ġhy brids
+inc hes
+Ġinv oice
+und y
+Ġinequ alities
+T rend
+w ashed
+B ALL
+Ġluc id
+ĠComment ary
+Ġw itty
+Br andon
+Ġbru ising
+Ġ6 20
+es cent
+box ing
+P OL
+Ġ3 78
+R ect
+Ġlic ences
+ĠMcG ee
+p ressed
+D anny
+Ġj ammed
+ord inate
+Ġle th
+Ġdistingu ishes
+ĠYam aha
+IL S
+ĠH ume
+ĠC ategories
+Rober ts
+Ch art
+Ġbeet le
+ĠGra veyard
+Ġ($ )
+o ÄŁ
+Ġtw ilight
+are lla
+á ½
+Ġbooth s
+ĠH HS
+ĠFeld man
+Ġexcav ation
+Ġphilosoph ies
+at ography
+ĠGar age
+te chnology
+Ġunfor gettable
+Ġver ifying
+Ġsubord inates
+E ls
+Ġne b
+G aming
+EN A
+ĠAchieve ment
+it ters
+ĠG abe
+Ġd umps
+for cer
+Ġpo ignant
+ĠM BA
+ĠHe idi
+ime i
+Ġm ages
+Ġliber ate
+Ġcircum cised
+ĠMer maid
+ĠMat th
+t ogether
+ĠW ichita
+Ġstore front
+ĠAd in
+V II
+Four th
+Ġexplore rs
+W ER
+Not able
+Bro ok
+m ens
+F aith
+-------- -
+ĠJ ou
+¬ ¼
+Ġpine apple
+Ġam alg
+el n
+ark able
+ĠãĤµ ãĥ¼ãĥĨãĤ£
+ĠãĤµãĥ¼ãĥĨãĤ£ ãĥ¯ãĥ³
+Ġov arian
+ĠE choes
+Ġhairc ut
+Ġp av
+Ġch illed
+anas ia
+Ġsty led
+Ġd ab
+ni per
+Ġminister ial
+ĠD UP
+T an
+Ġsul ph
+ĠD eter
+ĠBo hem
+od an
+Ġeduc ator
+â ĵĺ
+sp ir
+Ch icken
+ĠE leanor
+Ġqu i
+Ġheav iest
+Ġgrasp ed
+U RA
+Ġcro oked
+Jess ica
+pro blem
+Ġpred etermined
+Ġman iac
+Ġbreath s
+ĠLauder dale
+Ġh obbies
+y z
+Cr ime
+Ġcharism a
+d L
+Ġle aping
+Ġk ittens
+Ang elo
+ĠJ ACK
+ĠSu zanne
+Ġhal ting
+ENT ION
+Ġswall owing
+ĠEarthqu ake
+Ġeight eenth
+ĠN IC
+ĠIN F
+ĠCons cious
+Ġparticular s
+circ le
+7 40
+Ġbene volent
+Ġ7 47
+Ġ4 90
+Ġr undown
+ĠVal erie
+ĠB UR
+Ġcivil isation
+ĠS chn
+W B
+ot ide
+intern ational
+Ġj ohn
+Ġ19 02
+Ġpe anuts
+Ġflav ored
+k us
+Ġro ared
+Ġcut off
+é £
+Ġorn ament
+Ġarchitect ures
+Ġ3 69
+ol or
+ĠWild e
+ĠC RC
+ĠAdjust ed
+Ġprov oking
+land ish
+Ġrational ity
+Ġjust ifies
+Ġdisp el
+Ġa meric
+ĠPol es
+Ø ©
+Ġen vis
+ĠD oodle
+ä½ ¿
+igs aw
+auld ron
+Techn ical
+T een
+up hem
+ĠX iang
+Ġdetract ors
+ĠZ i
+ĠJournal ists
+Ġconduc ive
+ĠVolunte ers
+Ġs d
+Know ing
+Ġtrans missions
+ĠPL AN
+ĠL IB
+Ġall uded
+Ġob e
+Ġd ope
+ĠGold stein
+Ġwavelength s
+ĠDest ination
+nd a
+ug i
+Ġattent ive
+ĠLe an
+ral tar
+Ġman g
+mb uds
+ak ings
+b ender
+Ġacc ol
+Ġcraw led
+N OW
+Min nesota
+Ġflour ished
+ĠZ up
+ĠSuper visor
+ĠOliv ier
+Ex cellent
+Ġwid en
+D one
+Ġw ig
+Ġmiscon ceptions
+Cor p
+W an
+Ġvener able
+ĠNot ably
+ĠKling on
+an imate
+Bo ost
+ĠS AY
+miss ing
+ibli ography
+mel on
+Ġpay day
+Ø ³
+bo le
+Ġve iled
+ĠAl phabet
+It alian
+Ġever lasting
+ĠR IS
+ĠC ree
+rom pt
+Ġh ating
+Ġgrin ning
+Ġge ographically
+OS H
+Ġwe eping
+ĠÂłĠÂłĠÂłĠÂł ĠÂłĠÂłĠÂłĠÂł
+Ġimpe cc
+Let ter
+Ġblo ated
+PL A
+ĠFe in
+Ġper sever
+Th under
+Ġa ur
+ĠR L
+Ġpit falls
+âĸ º
+Ġpredomin ant
+Ġ5 25
+7 18
+AP E
+7 14
+Ġfarm land
+ĠQ iao
+Ġv iolet
+ĠBah amas
+Ġinflic ting
+ĠE fficiency
+Ġhome brew
+Ġundert ook
+Ġcur ly
+ĠHard ing
+man ia
+59 6
+Ġtem pered
+Ġhar rowing
+ĠP ledge
+ĠFranken stein
+è ª
+M otion
+Ġpredict ably
+ĠExpl osion
+oc using
+er d
+col o
+FF ER
+Ġback field
+ĠV IDE
+ue bl
+N arr
+ĠArg ument
+Ġgen omic
+Ġbout ique
+Ġbatt ed
+ĠB inary
+Ġg amb
+ĠRh ythm
+67 3
+Ġa float
+ĠOlymp ia
+Y ING
+Ġend if
+is in
+Ġwin ters
+Ġsc attering
+I v
+D istance
+Ġtr u
+ĠCom fort
+Ġne xus
+Ġair flow
+ĠByz antine
+p ayers
+con i
+ĠB etsy
+D eal
+ĠN ug
+ĠContin ent
+red ibly
+Ġoptim izing
+al beit
+Ġec static
+ĠPro to
+ç ·
+iv ot
+âĸ Ħ
+em p
+rou nder
+Ġcl out
+ĠI ST
+66 3
+ĠDoll ars
+ĠD AC
+Ġsubsc ribed
+Ġrehears al
+Ġam ps
+ĠSh ang
+es m
+Ġspr inkle
+Ġassail ant
+ĠO o
+ĠCoin base
+T act
+Ġret ina
+Ġn uns
+R ON
+att o
+Ġj ug
+ĠSV G
+Ġb ikini
+ĠFI LE
+ĠFound ers
+ep ort
+ĠK P
+Ġrest ores
+ĠTh ick
+Ġash ore
+Ġappro vals
+R ender
+M AG
+G raham
+ĠCort ana
+ãĥ³ ãĤ¸
+ss h
+or ians
+ars ity
+ĠInsp ired
+u pper
+Ġsign alling
+Ġreb uke
+Ġfl ares
+Ġdownt ime
+Stud ies
+Ġstagn ation
+ĠSequ ence
+Ġgr unt
+Ġass ures
+ĠPL A
+59 2
+Ġintra ven
+d epend
+Sus an
+ĠManz iel
+Man ia
+Cont ract
+Ġsl ams
+Ġcult ured
+Ġcred itor
+L IST
+ĠH UM
+ĠChatt anooga
+serv ed
+Ġclo aked
+ĠF TP
+p owder
+ĠSt ella
+uct ive
+Ġcheap ly
+ĠMU CH
+ĠGalile o
+Ġsu ites
+spe ech
+Ġdeliber ations
+ĠCh ips
+« ĺ
+Bal ance
+ĠWyn ne
+ĠAk ron
+Ass et
+Ġhon oured
+Ġed ged
+Like wise
+anim ous
+ĠW age
+ĠEz ek
+ad vertisement
+ĠRT X
+ĠM AD
+Ġmigr ating
+ĠS QU
+Ġ4 75
+Ed ited
+Ġshorth and
+ĠBas ics
+Ġcro tch
+ĠEV EN
+Ġv m
+effic iency
+Ġcal ves
+ĠF rie
+ĠBrill iant
+Ġstri kers
+Ġrepent ance
+Ġarter ies
+r l
+B ed
+h ap
+Ġcrypt ography
+ĠSab res
+Ġ4 14
+vi ks
+ih ara
+aps es
+T alking
+Ġintertw ined
+Ġdoc ks
+Ġalle le
+ĠArt ifact
+ĠH IM
+t orn
+ç ķ
+Ġop acity
+ĠE ly
+os uke
+Ġn ipple
+Ġhand written
+ĠV K
+ĠChamber lain
+ĠLa os
+ig raph
+g row
+Ġtr illions
+Ġdescend ant
+ĠSail or
+as uring
+Ġce ilings
+ĠWare house
+f lying
+ĠGl ow
+Ġn ont
+Ġmiscar riage
+Ġrig s
+Ġmin istries
+Ġelabor ated
+Ġdel usional
+ĠHum ane
+Ġ3 79
+n ets
+Ġblack out
+add ers
+Ġn p
+ĠT ire
+ro sc
+Ġsub div
+Ġlink age
+Ġchron ological
+ĠHER O
+Ġres ettlement
+ĠVin yl
+Ġpast oral
+ĠMob il
+ĠBar bar
+Co oldown
+ĠF ritz
+c riminal
+re pe
+Ġbell ig
+ĠBre ed
+Ġ4 18
+Ġsem blance
+ij k
+Ġcur tail
+Ġclin ch
+cont ained
+ĠProm pt
+ast on
+Ġw i
+Ġpursu its
+5 15
+ĠGl oss
+Ġfl ips
+Ġcoup ons
+Ġcl oning
+ĠLike ly
+Rem oved
+ĠQu artz
+r ices
+ĠSpe ars
+Ġp ious
+Ġdep reciation
+ĠD are
+oun ces
+am az
+O nt
+Ġp innacle
+d ocker
+0 26
+ĠW yr
+ĠPro per
+Ë Ī
+n il
+By tes
+Ġseek er
+t rial
+Ġunf olds
+ĠMar se
+Ġextravag ant
+ĠSurviv ors
+RED ACTED
+ĠSpeed way
+ĠCra igslist
+sub mit
+ĠGener ations
+Ġup holding
+Ġblood stream
+ĠMiss ions
+ĠL awn
+Ġlim bo
+ene i
+H uh
+ĠWild cats
+pre p
+ĠMark us
+ĠFor bidden
+rit ic
+IN O
+Ġexhib iting
+requ ent
+ch uk
+Ġhabit ual
+ĠComp atibility
+Dr ag
+RIP T
+uj ah
+GR OUND
+Ġdelinqu ent
+Ġburn er
+Ġcontempor aries
+Ġgimm ick
+load s
+Ġno zzle
+p odcast
+ĠW ak
+ĠStat en
+ĠK uh
+ãģ ĵ
+inter rupted
+Ġinv incible
+ĠBurn ett
+cig arette
+ĠPeb ble
+ĠTem porary
+ĠMar ino
+58 2
+Ġwast eland
+ident ly
+T x
+Ġr ite
+ĠPan asonic
+ĠM iddles
+ĠHort on
+ae us
+Ġc uring
+Ġm ats
+Ġadj ourn
+Ġfears ome
+pe z
+bo ats
+Ġpro pell
+Ġconflic ted
+ĠAng er
+Ġinsurg ent
+K arl
+Ġco ales
+Ġsouth western
+Ġdis su
+ĠO vert
+******** ****
+Ġbox ed
+ĠBr une
+aa a
+Ġgard ening
+ĠEng el
+tr acks
+Ġpur ified
+Ġplace holder
+ĠL ikes
+Ġd an
+G ab
+Ġe ct
+ĠF aw
+ĠEl iot
+Ġ' ,
+otrop ic
+ĠRu in
+hed on
+Ġca ul
+Ġa ft
+ĠCad illac
+gh a
+ass ian
+ud eb
+ĠT ick
+Ġadjust s
+AR GET
+5 37
+isc he
+ant y
+ĠFried rich
+ĠBl izz
+ĠA OL
+Camp aign
+Ġmamm al
+ĠVe il
+ĠK ev
+ĠMaur it
+ĠDam ien
+N ation
+E astern
+Ġ{ :
+Ġ= ================================
+Ġstereotyp ical
+Ġatt ic
+ĠCy borg
+requ ire
+Ġaward ing
+ĠPap ua
+bt n
+b ent
+B oo
+Ġ( =
+ĠX ander
+ĠSomers et
+Ġcatch y
+Ġcert ify
+STR UCT
+Ġit al
+Ġt ides
+ĠBr ands
+G ray
+comp etitive
+Ġcur ator
+ĠD G
+omin ium
+ĠGM Os
+ci ating
+ĠCarm en
+ow ard
+Balt imore
+Ġr gb
+C u
+Ġwip es
+spe ll
+IT NESS
+Ġsummar izes
+ĠRe vis
+Ġwhistlebl owers
+ĠBre ach
+Ġcro chet
+k os
+ews ki
+Ġrep et
+Ġcrim son
+ĠKar achi
+read able
+dim ension
+ĠI gor
+ild ed
+ĠZ ed
+ĠKe ane
+ĠCos metic
+DE P
+Ġretreat ing
+ĠU A
+ens ical
+Ġd usk
+ĠDick ens
+Ġaren as
+ĠPass age
+level s
+Ġcur v
+P ope
+Ġch ores
+ĠEl ise
+ĠComp ass
+b ub
+Ġmamm alian
+ĠSans krit
+ĠAN C
+ĠCr ack
+Q ual
+L aun
+amp unk
+Ġlearn ers
+Ġglam orous
+Ġfur the
+erm ott
+c and
+Gener ic
+Ġnarr ated
+Ġdisorder ly
+ĠTrans actions
+ĠDet ention
+ĠR oku
+Ä į
+Ġunder statement
+ĠS aur
+ĠRodrig o
+ĠAS AP
+S in
+Ġre joice
+Method s
+Ġelectro de
+Ġworsh ipped
+Ġid i
+ĠPhys icians
+Ġpop up
+Ġde ft
+ĠRem oval
+ĠBu enos
+ver bs
+Ġfun k
+ush a
+rict ion
+ore a
+ĠBang alore
+ĠKen obi
+zz i
+Ġnorm ative
+Ġgobl ins
+Ġcaf es
+ĠUN CLASSIFIED
+ĠF ired
+S IGN
+Ġs clerosis
+ĠV oter
+ĠSon ny
+ĠExt end
+ĠEV s
+Ar senal
+Ġp si
+Ġwid est
+ĠT us
+Ġlo oms
+Ġjust ifying
+ĠGr anger
+è ¯
+Ref er
+58 3
+Ġflour ishing
+ab re
+Ġr ave
+ĠCont ra
+Ġ18 98
+Add s
+Ġf ul
+ĠCo oke
+some one
+= #
+67 1
+Ġy ak
+Ġar te
+ĠMis cellaneous
+ĠDet ection
+ĠCl ancy
+â ģ
+ass ies
+Ġval iant
+ĠFemin ist
+cor ruption
+V el
+P ear
+Ġsucc inct
+Ġquick est
+k w
+Ġsp itting
+ĠL ibraries
+åħ ī
+ant z
+D ad
+ĠSpec ifications
+rup ulous
+and r
+RES ULTS
+Ġsnow ball
+Ġpred is
+ĠB axter
+ĠNurs ing
+ĠCh aff
+s we
+Ġout age
+Ġnest ing
+Ġnotor iety
+tr igger
+on ite
+j on
+Ġf ou
+ook ed
+ĠCelebr ity
+re ality
+Ġfat ig
+Ġhug ging
+Ġbother s
+ĠPan zer
+ĠCh andra
+fig ured
+Ġvol ts
+ĠCloud s
+Ġfee ble
+ĠCur ve
+ĠAs us
+78 6
+abs or
+ĠV ICE
+ĠH ess
+Ġmanufact ures
+Ġgri zz
+ĠPower ful
+ac id
+Ġsub sections
+ĠKrug man
+ĠAl ps
+is u
+Ġsequ est
+ĠUlt ron
+ĠT inker
+ĠGo ose
+Ġmism atch
+Att orney
+Ġmorph ology
+ĠSix ers
+ut tered
+ĠE LECT
+gr an
+Rus sell
+ĠG SL
+Ġfort night
+Ġ. )
+Ġapost le
+pr one
+el ist
+Unt itled
+ĠIm plementation
+ist ors
+Ġtank er
+Ġpl ush
+Ġattend ants
+ĠT ik
+ĠGreen wich
+ĠY on
+ĠSP L
+cell s
+unt led
+S olution
+ĠQu Ã©
+Ġvac ated
+Ġupt ick
+ĠMer idian
+æ ĥ
+ĠDr ill
+9 25
+58 4
+Ġrenov ated
+ĠKub rick
+zy k
+Ġl ousy
+pp el
+ohyd rate
+ĠI zzy
+lesi astical
+CC C
+ĠAj ax
+Ġad apters
+ĠPetra eus
+Ġaffirm ation
+ĠST OR
+le ms
+ad oes
+ĠConstantin ople
+Ġp onies
+Ġl ighthouse
+Ġadherent s
+ĠBre es
+omorph ic
+Fight ing
+Ġpl aster
+ĠP VC
+ĠOb st
+Ġdear ly
+ĠTo oth
+icks on
+Ġsh aming
+P lex
+A gg
+ĠâĢ¦ "
+Ġsub reddits
+Ġpige on
+ĠResident ial
+ĠPass ing
+Ġl um
+ĠP ension
+Ġpessim istic
+Ġ4 32
+z inski
+c ade
+0 75
+Ġapolog ised
+iy ah
+Put ting
+Ġgloom y
+ĠLy me
+=-=-=-=- =-=-=-=-
+ĠT ome
+ĠPsych iatric
+ĠH IT
+c ms
+ap olog
+Ġbreak er
+Ġdeep en
+Ġtheor ist
+ĠHigh lands
+Ġb aker
+Ġst aples
+Ġinterf ered
+ĠAb ortion
+jo ined
+ch u
+Ġform ulate
+Ġvacc inations
+Ġban ter
+phe us
+Ġoutfield er
+ĠM eter
+Ġ# ####
+Ġ18 95
+Ġnarrow ing
+ĠST ORY
+f p
+ĠC ST
+ign ore
+Ġproclaim ing
+ĠR U
+ĠB ALL
+yn a
+65 3
+Ġpos it
+P RE
+59 4
+ĠRegist rar
+ĠPil grim
+ic io
+Ġpre tt
+Ġlif eless
+Ġ__ _
+Ne igh
+ĠCh urches
+orn o
+Ġor cs
+Ġkind red
+ĠAud it
+Ġmillenn ial
+ĠPers ia
+g ravity
+ĠDis ability
+ĠD ARK
+W s
+od on
+Ġgrand daughter
+ĠBro oke
+ĠA DA
+ER A
+Ġpick ups
+ĠWil kinson
+ĠSh ards
+ĠN K
+Ġexp el
+ĠKis lyak
+Ġj argon
+Ġpolar ized
+ian e
+Pub lisher
+Ġreb utt
+Ġapprehens ion
+ĠK essler
+Ġpr ism
+F UL
+19 64
+ĠL oll
+ä ¿
+le thal
+Å Ł
+Ġg hetto
+Ġb oulder
+ĠSlow ly
+ĠOsc ars
+ĠInst ruction
+ĠUl tr
+ĠM oe
+N ich
+ĠP ATH
+( *
+ĠRE LEASE
+un ing
+rou se
+en eg
+Ġre imb
+ĠDet ected
+Do S
+Ġster ling
+Ġaggreg ation
+ĠLone ly
+ĠAtt end
+hig her
+Ġairst rike
+ks on
+SE LECT
+Ġdef lation
+ĠHer rera
+C ole
+rit ch
+Ġadvis able
+F ax
+Ġwork around
+Ġp id
+mort em
+ers en
+Ġtyp o
+Ġal um
+78 2
+ĠJam al
+script s
+Ġcapt ives
+ĠPres ence
+ĠLie berman
+angel o
+Ġalcohol ism
+ass i
+Ġrec ite
+Ġgap ing
+Ġbask ets
+ĠG ou
+Brow ser
+ne au
+Ġcorrect ive
+und a
+sc oring
+ĠX D
+Ġfil ament
+Ġdeep ening
+ĠStain less
+Int eger
+Ġbu ggy
+Ġten ancy
+ĠMub arak
+Ġt uple
+ĠD roid
+ĠS itting
+Ġforfe it
+ĠRasm ussen
+ixt ies
+es i
+ĠKim mel
+Ġmetic ulously
+Ġap opt
+ĠS eller
+08 8
+ec ake
+hem atically
+T N
+Ġmind less
+Ġdig s
+ĠAcc ord
+ons ense
+em ing
+br ace
+Ġe Book
+ĠDist ribut
+ĠInvest ments
+w t
+] ),
+beh avior
+56 3
+Ġbl inding
+ĠPro testers
+top ia
+Ġreb orn
+ĠKel vin
+ĠDo ver
+ĠD airy
+ĠOut s
+Ġ[ /
+Ï Ģ
+b p
+ĠVan ity
+ĠRec ap
+ĠHOU SE
+ĠF ACE
+Ġ4 22
+69 2
+ĠAnt ioch
+cook ed
+Ġcoll ide
+Ġa pr
+Ġsle eper
+ĠJar vis
+Ġalternative ly
+ĠLe aves
+ĠM aw
+Ġantiqu ity
+ĠAdin ida
+Ġab user
+PokÃ© mon
+Ġass orted
+ĠRev ision
+ĠP iano
+ĠG ideon
+O cean
+Ġsal on
+Ġbust ling
+ogn itive
+ĠRah man
+Ġwa iter
+Ġpres ets
+ĠO sh
+ĠG HC
+oper ator
+Ġrept iles
+Ġ4 13
+ĠG arr
+ĠCh ak
+Ġhas hes
+Ġfail ings
+Ġfolk lore
+Ġab l
+ĠC ena
+ĠMac Arthur
+ĠCOUR T
+Ġperipher y
+app ers
+Ġreck oned
+ĠInf lu
+ĠC ET
+Ġ3 72
+ĠDefin itive
+ass ault
+4 21
+Ġreservoir s
+Ġd ives
+ĠCo il
+DA Q
+Ġvivid ly
+ĠR J
+ĠBel lev
+Ġec lectic
+ĠShow down
+ĠK M
+ip ed
+reet ings
+ĠAs uka
+L iberal
+ĠÏ Ħ
+Ġbystand ers
+ĠGood win
+uk ong
+S it
+ĠT rem
+Ġcrim inally
+ĠCirc us
+ch rome
+88 7
+Ġnan op
+ĠOb i
+ĠL OW
+o gh
+ĠAuth ors
+ob yl
+Ur ban
+Ġt i
+ĠWe ir
+t rap
+ag y
+Ġparent heses
+Ġout numbered
+Ġcounter productive
+ĠTob ias
+ub is
+P arser
+ST AR
+Ġsyn aptic
+ĠG ears
+Ġh iber
+Ġdebunk ed
+Ġex alted
+aw atts
+H OU
+Ch urch
+ĠPix ie
+ĠU ri
+ĠForm ation
+ĠPred iction
+C EO
+Ġthro tt
+ĠBrit ann
+ĠMad agascar
+ë ĭ
+Ġbill boards
+ĠRPG s
+ĠBe es
+complete ly
+F IL
+Ġdoes nt
+ĠGreen berg
+re ys
+Ġsl ing
+Ġempt ied
+ĠPix ar
+ĠDh arma
+l uck
+ingu ished
+Ġend ot
+Ġbab ys
+05 9
+che st
+r ats
+Ġr idden
+Ġbeet les
+Ġillum inating
+Ġfict itious
+ĠProv incial
+Ġ7 68
+Ġshe pherd
+ĠR ender
+Ġ18 96
+C rew
+Ġmold ed
+ĠXia omi
+ĠSp iral
+Ġdel im
+Ġorgan ising
+Ġho ops
+ĠBe i
+z hen
+Ġfuck in
+Ġdec ad
+Ġun biased
+am my
+sw ing
+Ġsmugg led
+Ġk ios
+ĠP ERSON
+ĠInquis itor
+Ġsnow y
+Ġscrap ing
+ĠBurg ess
+P tr
+ag ame
+R W
+Ġdro id
+ĠL ys
+ĠCass andra
+Jac ob
+Ġ35 4
+Ġpast ure
+Ġfr anc
+ĠScot ch
+ĠEnd s
+ĠI GF
+def inition
+Ġhyster ical
+ĠBrown e
+77 1
+Ġmobil ization
+æ ķ
+iqu eness
+Th or
+Ġspear headed
+Ġembro iled
+Ġconject ure
+jud icial
+Ch oice
+Ġpaper back
+P ir
+Ġrec overs
+ĠSur ge
+ĠSh ogun
+ĠPed iatrics
+ãģ ł
+Ġsweep s
+ĠLabor atories
+ĠP acks
+al us
+add in
+Ġhead lights
+g ra
+Ev idence
+COL OR
+Ad min
+Ĭ ±
+Ġconco ct
+s ufficient
+Ġun marked
+Ġrich ness
+Ġdiss ertation
+Ġseason ing
+Ġg ib
+ĠM ages
+un ctions
+ĠN id
+che at
+ĠTM Z
+c itizens
+ĠCatholic ism
+n b
+Ġdisemb ark
+ĠPROG RAM
+a ques
+Ty ler
+Or g
+ĠSl ay
+ĠN ero
+ĠTown send
+IN TON
+te le
+Ġmes mer
+9 01
+Ġfire ball
+ev idence
+aff iliated
+ĠFrench man
+ĠAugust a
+0 21
+Ġs led
+Ġre used
+ĠImmun ity
+Ġwrest le
+assemb led
+Mar ia
+Ġgun shots
+ĠBarb ie
+Ġcannabin oids
+ĠTo ast
+ĠK inder
+IR D
+Ġre juven
+Ġg ore
+Ġrupt ure
+Ġbre aching
+ĠCart oon
+Ġ4 55
+ĠPale o
+6 14
+Ġspe ars
+ĠAm es
+ab us
+Mad ison
+GR OUP
+Ġab orted
+y ah
+Ġfel on
+Ġcaus ation
+Ġprep aid
+Ġp itted
+op lan
+ĠShel ley
+ĠRus so
+ĠP agan
+Ġwill fully
+ĠCan aver
+und rum
+ĠSal ary
+ĠAr paio
+read er
+ĠR ational
+ĠOver se
+ĠCa uses
+Ġ* .
+Ġw ob
+Ke ith
+ĠCons ent
+man ac
+77 3
+6 23
+Ġfate ful
+et imes
+Ġspir ited
+ĠD ys
+Ġhe gemony
+Ġboy cot
+ĠEn rique
+em outh
+Ġtim elines
+ĠSah ara
+ĠRel ax
+ĠQuin cy
+ĠLess ons
+ĠE QU
+SE A
+N K
+ĠCost co
+Incre ase
+Ġmotiv ating
+ĠCh ong
+am aru
+ĠDiv ide
+Ġped igree
+ĠTasman ia
+ĠPrel ude
+L as
+9 40
+57 4
+Ġch au
+ĠSp iegel
+un ic
+-- >
+ĠPhil ips
+ĠKaf ka
+Ġuphe aval
+Ġsent imental
+Ġsa x
+ĠAk ira
+ser ial
+Mat rix
+Ġelect ing
+Ġcomment er
+ĠNeb ula
+ple ts
+ĠNad u
+ĠAd ren
+Ġen shr
+ĠR AND
+fin ancial
+ĠCly de
+uther ford
+Ġsign age
+Ġde line
+Ġphosph ate
+rovers ial
+f ascist
+ĠV all
+ĠBeth lehem
+Ġfor s
+Ġeng lish
+S olid
+N ature
+Ġv a
+ĠGu ests
+Ġtant al
+Ġauto immune
+;;;;;;;; ;;;;
+ĠTot ally
+ĠO v
+Ġdef ences
+ĠCoc onut
+Ġtranqu il
+Ġpl oy
+Ġflav ours
+ĠFl ask
+ãĤ¨ ãĥ«
+ĠWest on
+ĠVol vo
+8 70
+Ġmicro phones
+ver bal
+R PG
+Ġi ii
+; }
+0 28
+Ġhead lined
+Ġprim ed
+Ġho ard
+ĠSh ad
+ĠEN TER
+Ġtri angular
+Ġcap it
+l ik
+ĠAn cients
+Ġl ash
+Ġconv ol
+Ġcolon el
+en emy
+G ra
+Ġpub s
+ut ters
+Ġassign s
+ĠPen et
+ĠMon strous
+ĠBow en
+il ver
+H aunted
+ĠD ing
+start ed
+pl in
+Ġcontamin ants
+ĠDO E
+ff en
+ĠTechn ician
+R y
+Ġrob bers
+Ġhot line
+ĠGuard iola
+ĠKau fman
+row er
+ĠDres den
+ĠAl pine
+E lf
+Ġf mt
+ĠS ard
+urs es
+g pu
+Un ix
+Ġunequiv ocally
+ĠCitizens hip
+qu ad
+m ire
+ĠS weeney
+B attery
+6 15
+Ġpanc akes
+Ġo ats
+M aps
+ĠCont rast
+mbuds man
+ĠE PS
+Ġsub committee
+Ġsour cing
+Ġs izing
+ĠBuff er
+ĠMand atory
+Ġmoder ates
+ĠPattern s
+ĠCh ocobo
+ĠZ an
+ĠSTAT ES
+ĠJud ging
+ĠIn her
+* :
+Ġb il
+ĠY en
+Ġexh ilar
+oll ower
+z ers
+Ġsn ug
+max imum
+Ġdesp icable
+ĠP ACK
+ĠAn nex
+Ġsarcast ic
+Ġlate x
+Ġt amp
+ĠS ao
+b ah
+ĠRe verend
+ĠChin atown
+ĠA UT
+d ocumented
+ĠGA BA
+ĠCan aan
+ĠÙ ħ
+Ġgovern s
+pre v
+E sc
+ĠEst imates
+OS P
+Ġendeav our
+ĠCl osing
+omet ime
+every one
+Ġwor sen
+Ġsc anners
+Ġdev iations
+ĠRobot ics
+ĠCom pton
+Ġsorce rer
+Ġend ogenous
+Ġem ulation
+ĠPier cing
+ĠA ph
+ĠS ocket
+Ġb ould
+ĠO U
+ĠBorder lands
+Ġ18 63
+G ordon
+ĠW TO
+Ġrestrict s
+Ġmosa ic
+Ġmel odies
+ç Ħ
+T ar
+Ġdis son
+ĠProv ides
+Ġ ......
+b ek
+F IX
+Ġbro om
+ans hip
+Do ctors
+Ġner ds
+ĠReg ions
+na issance
+Ġmet e
+Ġcre pt
+pl ings
+Ġgirlfriend s
+kn it
+ig ent
+ow e
+Ġus hered
+ĠB az
+M obil
+4 34
+ĠPres ents
+orig in
+Ġins omnia
+ĠA ux
+4 39
+ĠCh ili
+irs ch
+G AME
+Ġgest ation
+alg ia
+rom ising
+$ ,
+c row
+ĠIn spection
+at omic
+Rel ations
+J OHN
+rom an
+ĠClock work
+ĠBak r
+m one
+M ET
+Ġthirst y
+Ġb c
+Ġfacult ies
+R um
+Ġnu ance
+ĠD arius
+ple ting
+fter s
+etch up
+Reg istration
+ĠK E
+R ah
+Ġpref erential
+ĠL ash
+ĠH H
+Val id
+ĠN AV
+Ġstar ve
+ĠG ong
+z ynski
+ĠAct ress
+Ġw ik
+Ġun accompanied
+lv l
+Br ide
+AD S
+ĠCommand o
+ĠVaugh n
+Wal let
+Ġho pping
+ĠV ie
+Ġcave ats
+Ġal as
+if led
+ab use
+66 1
+Ġib n
+Ġg ul
+Ġrob bing
+t il
+IL A
+Ġmit igating
+Ġapt ly
+Ġty rant
+Ġmid day
+ĠGil more
+ĠDe cker
+ĠÂ§ Â§
+part ial
+Ex actly
+Ġphen otype
+Ġ[+ ]
+ĠP lex
+ĠI ps
+vers ions
+Ġe book
+Ġch ic
+g ross
+":" "},{"
+ĠSur prisingly
+M organ
+Ġresid ues
+ĠConf ederation
+in feld
+Ġl yr
+mod erate
+Ġperpend icular
+V K
+Ġsynchron ized
+Ġrefres hed
+Ġad ore
+ĠTor ment
+ol ina
+Ġ26 00
+Item Tracker
+Ġp ies
+ĠF AT
+ĠR HP
+0 48
+ĠRES P
+ĠB J
+all ows
+P and
+Ġunw elcome
+ĠV oc
+ĠBast ard
+ĠO W
+ĠL AR
+ĠHeal er
+Environment al
+ĠKen yan
+ĠTr ance
+ĠP ats
+Ġali ases
+ĠGar field
+Ġcampaign er
+Ġadvance ments
+ĠOkin awa
+ĠC oh
+ows ky
+Ġstar ved
+Ġsize able
+Ġ: -)
+Ġm RNA
+Ġsusp ensions
+ist ar
+Scot land
+Pr in
+-------------------------------- ----------------
+Ġ50 2
+Ġteasp oons
+Ġ10 50
+Ġcoerc ive
+ĠMason ic
+edd ed
+ĠPass enger
+Ġl att
+Ġbr aces
+ĠSt eal
+ĠNY T
+ĠK ats
+ĠCel est
+ae z
+T u
+ĠCoul ter
+ðŁ ĺ
+Fl ickr
+ĠWil mington
+ith s
+++ ;
+Ġv ending
+Ġneg ro
+ĠPh i
+ĠYellow stone
+Call back
+Ġsh ampoo
+ĠSh ades
+w at
+Ġsuper human
+Ġridic uled
+Ġhol iest
+om bo
+Ġintern s
+Ġh one
+ĠPar agu
+UR I
+Ġd angling
+ãĤ »
+so v
+ict ional
+av ailability
+Ġrev ocation
+Ġd ow
+in ic
+ĠTHE IR
+Ġis o
+Ġout ings
+ĠLeth al
+Ġ) ))
+Ġinacc ur
+Ġout landish
+Ġan us
+let ico
+id on
+l ol
+Ġun regulated
+Ġsuccumb ed
+Ġc uff
+ĠWast eland
+let al
+Ġsub str
+Ġcoff ers
+Ġautom akers
+ov i
+ĠX ue
+ĠDayton a
+Ġjar ring
+Ġf umes
+Ġdisband ed
+z ik
+itt on
+Ġstriking ly
+Ġsp ores
+Ad apter
+.) :
+ĠLynd on
+ival ry
+Ġor ally
+Ġtumult uous
+Ġdisple asure
+Ġcon es
+or rect
+Ġappe ase
+Ġder by
+ĠTrip oli
+ĠAl ess
+Ġp oked
+ĠGu ilty
+v P
+En ough
+Ġorig inals
+6 99
+Ġrabb i
+Ġproverb ial
+Ġpostp one
+el ope
+ĠMist y
+Ġstaff ed
+ĠUn employment
+redit ary
+Ġdilig ent
+re comm
+me asures
+as in
+8 25
+Ġpond s
+Ġmm ol
+ĠS AR
+ĠC ARE
+Ġ3 71
+Ġclen ched
+ĠCors air
+Ġcaric ature
+z n
+att ach
+ĠSch ro
+spe ak
+p ainted
+ĠS uc
+ĠE NT
+Ġcell ul
+ĠP aid
+di agn
+WH ERE
+Ġtext ed
+B arn
+Ġret racted
+ĠRe ferred
+S av
+Ġup keep
+Ġwork places
+ĠTok ens
+Ġampl ify
+cl inical
+Ġmult ic
+mber g
+Ġconvol uted
+Reg ion
+5 65
+ĠTop ic
+Ġsn ail
+Ġsal ine
+Ġins urrection
+ĠPet r
+f orts
+B AT
+ĠNav ajo
+Ġrud imentary
+ĠLak sh
+OND ON
+Me asure
+Ġtransform er
+ĠGodd ard
+Ġcoinc ides
+ir in
+R ex
+ĠB ok
+qu it
+Ġshotgun s
+Ġprolet arian
+Ġsc orp
+ĠAd a
+5 14
+Ġsl ander
+record ed
+Ġemb ell
+ris ome
+Ġapolog izing
+ĠMul cair
+ĠGib raltar
+Cl a
+Ġall ot
+ĠAtt ention
+Ġ4 33
+le ave
+Ġwh ine
+ĠIss a
+ĠFa ust
+ĠBar ron
+hen y
+Ġvictim ized
+J ews
+Ġnurt uring
+ett el
+W inged
+ĠSub tle
+Ġflavor ful
+ĠRep s
+eng ed
+call back
+Ġdirection al
+Ġcl asp
+ĠDirect ions
+plan et
+icult ure
+Hel per
+ic ion
+ac ia
+Ġç ¥ŀ
+Ġsur ges
+Ġcan oe
+ĠPrem iership
+be en
+Ġdef ied
+ĠTro oper
+Ġtrip od
+Ġgas p
+ĠE uph
+ĠAd s
+vern ight
+high ly
+R ole
+Ġent angled
+ĠZe it
+6 18
+ĠRust y
+Ġhaven s
+ĠVaugh an
+HA EL
+ĠSER VICE
+/ ,
+Ġstr icken
+Ġdel usions
+Ġb is
+ĠH af
+Ġgrat ification
+Ġent icing
+UN CH
+Ad ams
+ĠOL ED
+ĠBeet le
+Ġ18 99
+ĠSO FTWARE
+ateg or
+V L
+ĠTot em
+ĠG ators
+AT URES
+Ġimped ance
+Reg istered
+ĠC ary
+ĠAer ial
+on ne
+en ium
+Ġd red
+ĠBe g
+Ġconcurrent ly
+Ġsuper power
+ĠX an
+j ew
+imes ter
+ĠDick inson
+âĶ ģ
+F la
+Ġp ree
+ĠRoll ins
+© ¶æ
+Ġden omination
+ĠL ana
+5 16
+Ġinc iting
+sc ribed
+j uries
+ĠWond ers
+app roximately
+Ġsusp ending
+Ġmountain ous
+ĠL augh
+oid al
+N s
+Det ect
+) =
+ĠL uthor
+ĠSchwarz enegger
+ĠMull er
+ĠDev i
+ec ycle
+J ar
+6 13
+ĠL ongh
+B ah
+ĠSP ORTS
+n w
+Ġref inement
+Ġwater ways
+Ġd iner
+Bl ade
+68 3
+F ac
+Ġinitial s
+Ġro g
+Ġparan ormal
+B UT
+Ġ[ (
+ĠSw anson
+ĠM esh
+âĸ ¬
+Impro ve
+ĠRad iation
+ĠEst her
+ĠE sk
+ĠA ly
+ik y
+Ġir rad
+ĠBuck ingham
+Ġref ill
+Ġ. _
+Re pe
+CON CLUS
+Ġdifferent iated
+Ġchi rop
+ĠAt kins
+Pat tern
+Ġexc ise
+Ġcab al
+N SA
+ĠST A
+ĠS IL
+ĠPar aly
+Ġr ye
+ĠHow ell
+ĠCount down
+ness es
+alys ed
+Ġres ize
+ãĤ ½
+Ġbudget ary
+ĠStr as
+w ang
+Ġap iece
+Ġprecinct s
+Ġpe ach
+Ġsky line
+Ġ35 3
+pop ular
+App earances
+ĠMechan ics
+ĠDev Online
+S ullivan
+Z en
+Ġp u
+op olis
+5 44
+Ġde form
+Ġcounter act
+ĠL ange
+Ġ4 17
+Con sole
+77 4
+Ġnodd ing
+Ġpopul ism
+Ġhe p
+Ġcoun selling
+compl iance
+U FF
+Ġunden iably
+Ġrail ing
+ĠHor owitz
+ĠSim one
+ĠBung ie
+Ġa k
+ĠTal ks
+x ff
+fl ake
+Cr ash
+Ġsweat y
+Ġban quet
+ĠOFF IC
+Ġinvent ive
+Ġastron omer
+ĠStam ford
+ĠSc are
+ĠGRE EN
+olic ited
+Ġr usher
+Ġcent rist
+ight ing
+Ġsub class
+Ġdis av
+Ġdef und
+ĠN anto
+oci ate
+m ast
+Ġpac if
+Ġm end
+e ers
+imm igration
+ESS ION
+Ġnumber ing
+Ġlaugh able
+ĠEnd ed
+v iation
+em ark
+P itt
+Ġmetic ulous
+ĠL F
+Ġcongrat ulated
+ĠBir ch
+Ġsway ed
+Ġsemif inals
+Ġhum ankind
+m atter
+ĠEqu ip
+opa usal
+S aid
+ĠLay out
+Ġvo icing
+Ġth ug
+Ġporn ographic
+I PS
+Ġmo aning
+Ġgriev ance
+Ġconf essions
+esc al
+TEXT URE
+Aut hent
+os aurus
+P urchase
+Ġreleg ation
+al ter
+ĠÂł Âł
+Ġr iddled
+Ġo gre
+ĠLow ell
+Occ up
+E at
+ĠHy der
+ĠAdvis er
+Com merce
+H unt
+ĠOr th
+ĠComp etitive
+ĠCL A
+CD C
+Ġsal ads
+F le
+Ġindustrial ized
+` ,
+ĠO WN
+Ġbec k
+ĠPart icularly
+oub t
+Ġm M
+ĠHuss ain
+ĠChen nai
+Ġ9 20
+Ġappoint ing
+ĠCull en
+,,,, ,,,,
+Ġp ores
+ver ified
+Ġbi ochemical
+em ate
+Ġcoward ly
+ĠHels inki
+ĠEthiop ian
+S OURCE
+ER C
+est ro
+Ġbi otech
+ĠS our
+Ġbrew er
+Bloom berg
+Ġintens ify
+Gl ass
+an co
+ĠF DR
+gre SQL
+ĠF ires
+©¶æ ¥µ
+ec o
+100 1
+ĠHom eless
+Ġinstant aneous
+ĠH aste
+ig el
+D iamond
+Ġp aving
+Ġland fill
+Ġd ads
+h oun
+: ]
+Ġinc endiary
+ĠLiving ston
+ĠHil bert
+ĠChe cks
+st yles
+in ators
+ĠCl ive
+ph rine
+Ġchimpan zees
+Ġp all
+ĠJ M
+ĠAad haar
+ð Ŀ
+Ġachie vable
+dis abled
+P ET
+OOOO OOOO
+M ot
+Ġint angible
+Ġbal let
+ĠWe bs
+ĠEst imated
+Effect s
+Ġb ailed
+Josh ua
+Ġturb ulence
+Ġoccup ant
+ĠDay light
+Ġ36 1
+me et
+Ġstat ically
+Ġon look
+Ġk i
+il legal
+Ġvel vet
+Ġdehyd ration
+Ġacqu ies
+ĠRe z
+ak ura
+ĠU pton
+at ro
+Ġincomp rehensible
+Ġback door
+ĠRh ino
+7 27
+Ġmath s
+) +
+Ġhe resy
+Ġd f
+ĠRoc he
+ĠL ydia
+Ġpanc reat
+re ply
+arre ll
+Ġsolicit ation
+Ġcirc adian
+BI P
+Ġfor ay
+Ġcrypt ic
+iz u
+ime o
+ĠTom ato
+ĠH oms
+ex amination
+Ġqu arry
+ĠVal iant
+ĠJer icho
+ĠIN CLUD
+Ġ18 40
+5 19
+Ġres ists
+Ġsnap shots
+ĠSp ur
+ĠAnt iqu
+Log in
+Ġbest selling
+Ġant ic
+ĠS utherland
+ãĤ¢ ãĥ«
+Ġ~ /
+ĠP arm
+è ĥ
+P ages
+int ensity
+Ġimm obil
+Ġ18 65
+zz o
+Ġn ifty
+Ġf entanyl
+ĠPres ervation
+op hen
+Ġd arts
+ĠD inosaur
+po inters
+ĠR ite
+s uggest
+aware ness
+ĠSher idan
+Ġst ances
+Ġsor cery
+Ġper jury
+ĠNik ola
+ie ver
+Ġf iance
+ĠJordan ian
+ĠBall oon
+Ġn ab
+Ġk b
+Ġhuman ities
+ĠTan aka
+hill ary
+Ġconsult ancy
+ĠZ ub
+Ġrem ission
+Ġconf id
+CH Q
+ĠF ug
+Ġimpro vis
+Y ep
+/ _
+Ġunwilling ness
+Ġport folios
+05 5
+ĠInstruct or
+aim an
+Ġclaim ants
+M bps
+ĠBy e
+re ceived
+T weet
+Ġind emn
+ri z
+am ara
+N at
+Ġeval uates
+ĠL ur
+ep ad
+FO X
+ĠTh ro
+Ġrust y
+Ġbed rock
+ĠOp rah
+J B
+Ġmanip ulative
+Ġwill ful
+Ġrel apse
+Ġext ant
+The me
+S ensor
+ĠSt ability
+go vern
+Ġpo ppy
+Ġkn ack
+Ġins ulated
+ĠT ile
+ĠExt rem
+Ġunt old
+Ġconver ge
+Ġref uel
+ig roup
+Ġdistort ions
+Ġrav aged
+Ġmechan ically
+ĠRe illy
+ĠN ose
+ĠIncarn ation
+ĠBeck y
+abb ling
+Ġt aco
+Ġr ake
+Ġmelanch oly
+Ġillust rious
+ĠDart mouth
+Gu ide
+ĠR azer
+ĠBen z
+Ult imate
+ĠSur prise
+Ġpage ant
+off er
+Who ever
+Ġw iser
+Ġchem ist
+ĠHE LL
+ĠBul k
+Ġpl utonium
+ĠCO VER
+Ö ¼
+f ailed
+Ġtire lessly
+Ġinf ertility
+ĠTr ident
+ĠShow time
+ĠC iv
+V ice
+requ ires
+itt ance
+Ġun controlled
+interest ing
+56 1
+Ġinnov ate
+ateg ic
+L ie
+ĠS elling
+U l
+Ġsav ior
+ĠT osh
+Ġsw ast
+P ASS
+Ġr ink
+Ġcard io
+ĠI ro
+ud i
+Ġv antage
+Ġv ans
+ĠNi Ã±o
++ =
+Ġpropag ate
+< ?
+Ġmethod ological
+204 39
+Ġtrig lycer
+Ġing rained
+ĠAn notations
+arr anted
+6 17
+ĠS odium
+ĠA AC
+techn ical
+mult ipl
+Ġ3 73
+å ĭ
+Ġdec isively
+Ġboost ers
+Ġdessert s
+ĠGren ade
+Ġtest ifying
+ĠSc ully
+ID s
+Ġlock down
+ĠSc her
+ĠR Ã©
+ĠWhit man
+ĠRams ay
+rem ote
+Ġh ikers
+ĠHy undai
+Ġcons cientious
+Ġcler ics
+ĠSiber ian
+ut i
+is bury
+Ġrel ayed
+Ġqu artz
+ĠC BI
+seek ers
+ull a
+Ġweld ing
+ĠSh al
+ble acher
+T ai
+ĠSam son
+Ġt umble
+ĠInvest or
+Ġsub contract
+ĠShin ra
+ow icz
+j andro
+d ad
+Ġtermin ating
+ĠNe ural
+ä» £
+Ġleak age
+ĠMid lands
+ĠCaucas us
+í ķ
+c it
+ll an
+iv ably
+ĠAlb ion
+Ġ4 57
+Ġregist rations
+Ġcomr ade
+Ġclip board
+0 47
+Ġdiscour aging
+ĠO ops
+Ad apt
+Ġem path
+n v
+ĠPR OT
+ĠDon n
+ĠP ax
+ĠB ayer
+t is
+Squ are
+Ġfoot prints
+part icip
+ĠChile an
+B rend
+ind ucing
+M agn
+Ġclub house
+ĠMagn um
+Ġenc amp
+ĠEth nic
+uch a
+ere y
+Ġw atered
+ĠCal ais
+Ġcomplex ion
+Ġsect s
+Ġren ters
+Ġbr as
+oÄŁ an
+Time out
+Man agement
+Ġinf ographic
+P okemon
+Cl ar
+Ġloc ality
+Ġfl ora
+as el
+P ont
+Ġpop ulate
+ĠO ng
+Ġsubs istence
+Ġa uctions
+ĠMcA uliffe
+ĠL OOK
+br inger
+Ġtit an
+Ġmanif old
+ĠâĹ ı
+Ġcalibr ated
+Ġcal iphate
+ĠSH E
+ĠCommission ers
+ce ivable
+j c
+W inner
+5 24
+Ġcond one
+Other wise
+Ġp iling
+Ġem body
+ĠCrime an
+ut ics
+ĠEx hibition
+Ġ4 26
+e ering
+Ġv ying
+ĠH UGE
+* =-
+Ġprin cipled
+à ¦
+Ġquir ks
+ĠEdit ors
+put ing
+G ES
+ĠF TA
+à¤ ¾
+add on
+ĠH AM
+ĠFrie za
+W oman
+. $
+Ġc rib
+ĠHer od
+Ġtim ers
+ĠSp aces
+ĠMac intosh
+at aka
+Ġgl ide
+Ġsmell ing
+ĠB AL
+Ġun su
+Ġcond os
+Ġbicy cl
+ĠRev ival
+55 3
+Ġjugg ling
+H ug
+ĠKardash ian
+ĠBalk ans
+mult iple
+Ġnutrit ious
+oc ry
+19 00
+Ġinteg rates
+Ġad joining
+ĠF older
+roll ment
+ven ient
+Ġu ber
+y i
+Ġwh iff
+ĠJu ven
+ĠB orough
+net te
+Ġb ilingual
+ĠSp arks
+ph thal
+man ufact
+Ġt outing
+ĠPH I
+Ke efe
+Rew ard
+Ġinf all
+ĠTem per
+typ ically
+ĠNik ol
+Ġregular s
+Ġpseud onym
+Ġexhib itions
+Ġbl aster
+Ġ40 9
+w arming
+Ġrever ber
+Ġrecip rocal
+Ġ6 70
+ip ient
+b ett
+ĠBe gins
+Ġit ching
+ĠPh ar
+Ass uming
+Ġem itting
+ĠML G
+Ġbirth place
+Ġt aunt
+ĠL uffy
+ĠAm it
+Ġcir cled
+ĠN ost
+enn ett
+Ġde forestation
+ĠHist orically
+ĠEvery day
+Ġovert ake
+79 2
+Ġn un
+ĠLuc ia
+Ġaccompan ies
+ĠSe eking
+ĠTr ash
+an ism
+R ogue
+Ġnorth western
+ĠSupplement al
+ĠNY U
+ĠF RI
+ĠSat isf
+x es
+5 17
+Ġreass ured
+Ġspor adic
+Ġ7 01
+Ġmed ial
+Ġcannabin oid
+Ġbarbar ic
+Ġep is
+ĠExplos ive
+ĠD ough
+Ġuns olved
+Support ed
+Ġacknowled gment
+sp awn
+Ġkit chens
+Ġ- =
+talk ing
+ic ist
+ĠPeg asus
+ĠPS U
+Ġphot on
+ĠAuthent ication
+R G
+@# &
+76 2
+ĠCl air
+Ġdi aper
+Ġbr ist
+ĠProsecut ors
+ĠJ em
+6 28
+ĠEvery where
+ĠJean ne
+equ ality
+ãĥ© ãĥ³
+object s
+ĠPel icans
+Ġ39 2
+Ġbl u
+b ys
+ĠA go
+Ġinstruction al
+Ġdiscrim inating
+ĠTR AN
+ĠCorn el
+ag os
+Ġty re
+Ġas piration
+ĠBrid gewater
+": -
+! ".
+ĠEn s
+ĠCoc o
+P ie
+Ġdet ach
+ĠC ouch
+Ġphys ique
+ĠOccup ations
+osc opic
+en ough
+B uzz
+App earance
+Y P
+Ġrac er
+Ġcompl icity
+r pm
+T oy
+Ġinterrupt s
+ĠCat alyst
+Ġut ilitarian
+imp act
+Ġsp aghetti
+Ġp orous
+Ġeste emed
+Ġinc iner
+ĠI OC
+7 48
+Ġesp resso
+ĠSm ile
+abil ia
+6 35
+Ġmathematic ian
+Ġ4 24
+ĠK L
+ĠH IP
+Ġover heard
+ĠT ud
+ĠT ec
+Ġqu izz
+Ġfl attering
+Ġcon n
+âĢ İ
+Ġatt aches
+ĠR OS
+ĠAC S
+Ġt cp
+ĠSh ame
+sk ip
+res pected
+ĠTrin idad
+gr ain
+Ġfooth old
+ĠUnch arted
+ĠJul io
+z l
+av ored
+ĠAn xiety
+er rors
+ĠCent auri
+its ch
+D addy
+Ġclutch ing
+ĠIm plement
+ĠGut ierrez
+Ġ7 60
+Ġtele portation
+end ra
+Ġrevers ible
+st ros
+Ad venture
+08 3
+Ġliber ating
+Ġas phalt
+ĠSp end
+AR DS
+im sy
+PR ES
+ĠEmer ging
+Ġwild fires
+Ġtechn ologically
+Ġem its
+ĠART ICLE
+Ġirregular ities
+Ġcher ish
+çī Ī
+Ġst ink
+ĠR ost
+Econom ic
+Ġcough ing
+ĠMcC ann
+pro perties
+ilant ro
+Ġreneg oti
+Trans lation
+Ġin quest
+ĠGra pe
+oot ers
+gu i
+ĠSwords man
+ace ae
+h itting
+Ġr c
+Ġexert ed
+ĠS AP
+it ent
+Ġperil ous
+Ġobsc urity
+Ġassass inate
+Ġab original
+Ġresc uing
+ĠSh attered
+lock ing
+all ion
+Ch anging
+ĠHar rington
+ĠB ord
+ĠAfgh ans
+Jam ie
+aret z
+ĠAugust us
+Ġ38 6
+8 30
+Ġj og
+ok ingly
+Tr igger
+ĠH OR
+Stat istics
+Ġviewers hip
+Ġadd itives
+h ur
+Ġmaxim izing
+ĠR ove
+ĠLou ie
+ĠBuck et
+ĠCHR IST
+ou sel
+Ġstre aks
+ir ted
+Ġt ert
+Ġcolonial ism
+Ġbur ying
+y k
+Cond ition
+ĠDPR K
+By Id
+75 1
+âĹ ¼
+Ġwor risome
+Ġvoc ational
+sl ice
+Ġsa ils
+ĠCorrection al
+95 4
+Ġt ul
+K id
+l uster
+Ġfam ilial
+ĠSp it
+ĠEp iscopal
+Specific ally
+ĠVol cano
+run s
+q s
+Ġve tted
+Ġcram med
+t rop
+here r
+Thank fully
+Ġper cussion
+Ġor anges
+Ġround up
+Ġ4 99
+x ious
+Char acters
+ĠZion ism
+ĠR ao
+ÃĽ ÃĽ
+W F
+Ġunintention al
+ONE Y
+Gr ab
+Com mercial
+Ġglut amate
+ĠMcK enna
+ru ciating
+ning ton
+ih u
+Ch an
+ĠSw ap
+Ġleaf lets
+Ġfunction ally
+er ous
+F arm
+Ġcal oric
+ĠLiter ally
+con cert
+Ġshe nan
+Ġrep aid
+ey es
+Ġbas hing
+ĠG orge
+Ġcollabor ations
+Ġun account
+itch ie
+Ġteam work
+pp elin
+Ġpip ing
+Ġmin ced
+Ġd iam
+ri eg
+Ġmasc ara
+Ġsuck er
+ĠMo ons
+App s
+ĠPe ck
+Ġper v
+ĠFl oat
+o ley
+ĠN ish
+im ize
+Ġarom atic
+u in
+end ish
+! /
+ĠB icycle
+ĠAS IC
+ile ged
+ĠQuad ro
+ios yn
+Ġlock out
+ĠW ink
+SP EC
+Attempt s
+Ġseed ed
+red o
+ias is
+Ġsn ag
+ãĥķ ãĤ©
+ãĤ ¶
+Ġground ing
+Ġrelie ver
+Ġfrivol ous
+ĠG ifts
+ĠF aces
+Es pecially
+Ġmicrobi ome
+im ag
+ĠSch l
+ĠP les
+ĠBle ach
+ĠIr win
+ĠE aton
+ĠDisc iple
+Ġmultipl ication
+Ġcoer ced
+Ġ4 19
+st h
+E vil
+B omb
+Ġex orc
+Ġstag gered
+L ESS
+Ġinert ia
+ĠED IT
+Ġgo b
+Tr aditional
+Ġclass y
+Lear y
+ĠP AGE
+yr s
+Ġtrans porter
+Ġmat ured
+Ġhij ab
+Ġbi ome
+Where as
+Ġex termination
+ĠT ues
+ĠT akeru
+ĠAud rey
+er ial
+ĠAd en
+aff les
+Ġnarciss istic
+ĠB aird
+UT F
+I re
+ĠCon nie
+Ch amp
+Ġwhis pering
+ĠH att
+D K
+Ġdis infect
+Ġdeduct ed
+Ġpart ake
+Ġdown grade
+ĠEs ports
+ĠContin uing
+Ġdemocr atically
+icro bial
+itt a
+Ġlim estone
+Ġexempt ed
+ĠFren zy
+H erm
+7 28
+Ġfled gling
+Met a
+765 61
+69 3
+% :
+w ake
+5 26
+ĠDis cipline
+Ġvirgin ity
+ĠLeg ions
+ĠFrank ie
+int ent
+Ġrest rooms
+ĠRou ter
+da q
+Ġobjection able
+âĨ ĳ
+w ark
+ĠRah ul
+g ain
+activ ation
+abs olute
+ĠAccess ed
+Ġ24 00
+ogg les
+Ġsecond ly
+ĠDEF ENSE
+Ġpost age
+wra pper
+sh arp
+7 29
+Ġcommun icates
+Ġadd on
+ĠMil itia
+H ong
+Ġsl umped
+ĠJP EG
+ĠI car
+ad ish
+68 1
+Ġmaj esty
+ĠWolf gang
+ĠEl astic
+u per
+Ġv iz
+Ġunconscious ly
+ĠST D
+ĠS ass
+Ġflower ing
+ĠHel ic
+ĠDra per
+ĠAm ateur
+Ġman ure
+Ġdis ingen
+ĠLe i
+br ing
+9 49
+Ġinhib ited
+Ġhead quartered
+Ġen igmatic
+ï¿½ï¿½ ï¿½
+Ġred ress
+R H
+Ġratt led
+Ġd iction
+l io
+ĠT BA
+ĠSN AP
+C alling
+Ġfasc ists
+ĠD ove
+iew icz
+0 36
+Ġco asts
+ĠR ect
+Ġ) ]
+L ot
+6 29
+ĠS EM
+ĠPeters en
+ĠExpl ain
+ĠBo ards
+ĠBe zos
+ĠJ ournals
+Ġ20 24
+p arser
+Ġmist rust
+Ġgr ate
+ĠL ocked
+bo a
+S aint
+g aming
+Ġvow el
+in ately
+bl ow
+All ah
+Ġun matched
+Ġb ordering
+ĠExp end
+n r
+Or acle
+rou ch
+Ġcont iguous
+ac us
+Ġdist raught
+58 1
+Ġanat omical
+O X
+ap ixel
+8 33
+ĠPL US
+Ġres usc
+Ġab iding
+57 3
+Ġvac ancies
+Em ily
+Ġhyp othal
+ĠWer ner
+ĠWe e
+ĠDJ s
+5 13
+Ġwitch craft
+Ġac upuncture
+ent ary
+benef it
+Product s
+ĠP SP
+ĠMP G
+ĠJ inn
+ĠJ arrett
+Ġ4 45
+ĠIm aging
+ĠP yth
+Fin ish
+Ġte x
+Ġjuven iles
+Ġhero ism
+Ġdoubt less
+ĠA ki
+ĠT end
+ĠPatri arch
+Ġbit ters
+ĠTele communications
+it atively
+ag na
+Ġr g
+ĠS OLD
+Ġcomp ulsion
+ĠN asa
+ĠKath ryn
+Ġmillion aires
+Ġintrins ically
+Ġbolst ered
+time out
+fl o
+Ġtut or
+p our
+Stat ement
+Ġ{ *
+ĠRud olph
+ĠKimber ly
+rog ens
+adi q
+] +
+Ġindign ation
+Ġfract uring
+ĠRe leases
+ĠGr ain
+pro tein
+L ago
+Ġvac ations
+Ġboot ed
+ĠTH REE
+ĠH G
+oresc ence
+Ġt f
+Ġso ar
+iosyn cr
+Ġgl ances
+ĠSp oon
+ĠJ ury
+ĠCow boy
+Ġcreat ively
+Hig her
+Ġsolic itor
+Ġhaw k
+ac io
+89 6
+Ġsuperf lu
+Ġbombs hell
+ct ure
+Ġbroker age
+Ġraid ing
+Ġf rench
+Ġang led
+Trans action
+ĠGen ocide
+u pe
+ĠHait ian
+57 2
+! :
+Ġunwitting ly
+iter ator
+sc roll
+Ġtall ied
+Ġbi omedical
+ĠC ARD
+Ġe uphem
+Ġbrain storm
+a quin
+K o
+Mic helle
+ĠR unes
+ĠBall istic
+ud ers
+Ġmod esty
+ĠiP ads
+ĠEzek iel
+Y E
+Ġstars hip
+Ġpower fully
+Ġper l
+ĠSh ade
+ĠQu art
+ĠE EG
+Ġfisher man
+OS ED
+ĠTyp ical
+df x
+Ġmes hes
+Ġet ched
+worth iness
+Ġtopp led
+Ġ3 96
+or ius
+We iss
+Ġmy sql
+ĠVal halla
+Ù Ĵ
+le asing
+Ġrec omp
+rap nel
+S el
+04 3
+Ġder ailed
+ĠGu ides
+IR T
+Ġde human
+ĠBritt any
+" ))
+Ġex claim
+Ġb alk
+Ġ8 40
+CLA IM
+int el
+L AB
+Ġpe gged
+Ġast roph
+sm oking
+Ġrig ging
+Ġfix ation
+Ġcat apult
+ins ide
+ĠC ascade
+ĠBolshe vik
+G aza
+Dep th
+Ġloud spe
+Ġalmond s
+me yer
+l eness
+j en
+f resh
+Ġunbeat en
+ĠSqu id
+ĠPres umably
+Tim er
+B W
+Ġro sters
+Ġell ipt
+ĠHar riet
+dat abase
+ĠMut ual
+ĠComm odore
+uk ed
+kn ife
+ĠCOMM UN
+h ya
+Ġmel ts
+arch ives
+Ġrat ification
+Ġmultip lying
+Ġinter oper
+Ġasc ert
+w ings
+ver ting
+ĠScorp ion
+ay e
+ĠPorts mouth
+ĠM TA
+n it
+iaz ep
+Ġqu arantine
+Ġslides how
+Ġcent imeters
+Ġsyn opsis
+Ġsp ate
+th irst
+Ġnom inating
+ĠMel vin
+Pre view
+Ġthro b
+Ġgener ational
+ĠRad ius
+rest ling
+put able
+aw ar
+N ECT
+Ġunlaw fully
+ĠRevel ations
+Wik ipedia
+sur v
+Ġeye ing
+ij n
+ĠF W
+Ġbr unt
+Ġinter stellar
+Ġcl itor
+ĠCroat ian
+ĠCh ic
+ev a
+ĠDis app
+ĠA kin
+iner ies
+d ust
+Interest ed
+Ġgen esis
+ĠE ucl
+Ã¶ n
+p icking
+Ġmut ated
+Ġdisappro ve
+ĠHD L
+Ġ6 25
+Ì ¶
+c ancer
+Ġsqu ats
+Ġle vers
+Disc uss
+= ]
+D ex
+ĠVIDE OS
+A UD
+Ġtrans act
+ĠKin ect
+ĠK uala
+ĠC yp
+7 47
+Ġsh attering
+Ġarsen ic
+ĠInt ake
+ĠAngel o
+ĠQu it
+ĠK he
+Ġ18 93
+M aker
+0 29
+ĠPain ting
+Dis able
+9 16
+Ġanal ges
+Ġtact ile
+Ġprop hes
+Ġd iced
+ĠTravel s
+ĠHe ader
+ĠClub s
+Ass istant
+Ġinc rim
+Ġd ips
+Ġcruc ifix
+ĠShan ahan
+ĠInter pret
+Ġ40 90
+al ogy
+abb a
+Ġsimul ac
+hus band
+S IM
+Ġrecy cle
+uc er
+ed ged
+Ġre naissance
+ĠBomb ay
+Cath olic
+ĠL INE
+ĠCl othing
+re ports
+Ġpl aus
+Ġd ag
+ĠM ace
+Z I
+Ġintr uder
+ĠVeter inary
+g ru
+Ġsne aky
+ĠS ie
+ĠC innamon
+P OSE
+Ġcou rier
+ĠC NS
+Ġemanc ipation
+s it
+Ġplay through
+ĠFac ilities
+v irt
+ĠG auntlet
+Thom pson
+Ġunbeliev ably
+Param eters
+Ġst itching
+ign e
+ĠTH ESE
+Priv acy
+Ġshenan igans
+Ġvit ri
+ĠVal id
+59 1
+Ń ·
+ĠProt otype
+ink a
+SC P
+ĠT id
+è Ī
+old ed
+Ġindividual ity
+Ġbark ing
+Ġm ars
+ĠW D
+Ġ8 20
+Ġt ir
+Ġsl apping
+Ġdisgr untled
+ĠAng ola
+ri us
+ĠTorn ado
+ĠTh urs
+Ġcapt cha
+Ġang st
+ĠP og
+ĠAssass ins
+ĠAd idas
+Ġjoy ful
+Ġwh ining
+Emer gency
+Ġphosph orus
+Ġatt rition
+oph on
+ĠTimber wolves
+ĠJ ah
+ĠBr inging
+ĠW ad
+ĠEn sure
+oh l
+ĠX ie
+omm el
+c mp
+Ġz ipper
+Ġrel at
+ĠCor ridor
+m ilo
+T ING
+Av g
+Ġcro pped
+] }
+Ġr aged
+ĠLump ur
+ĠGuer rero
+our ke
+N ut
+Ġoff sets
+og lu
+dr m
+Ġmort als
+lat able
+Ġdismiss ive
+ä¸ ī
+Ġthro ats
+Ġchips et
+ĠSpot light
+Catal og
+art ist
+G b
+Ġch illy
+Ġst oked
+Ġ3 74
+W ard
+L atin
+Ġf iasco
+Ġble ach
+Ġb rav
+Enh anced
+Ġin oc
+ĠFior ina
+_ >
+Ġle ukemia
+Ġel uc
+Ġannoun cer
+ĠLith uan
+ĠArm ageddon
+å ĩ
+Len in
+ĠR uk
+Ġpe pp
+ĠRom antic
+ĠP IT
+ĠInter stellar
+ĠAt kinson
+R aid
+J s
+Go al
+C ourse
+Ġvan ishing
+es ley
+ĠR ounds
+Els a
+59 3
+Ġredund ancy
+ĠST AND
+Ġprop hetic
+Ġhabit able
+ry u
+Ġfaint ly
+M ODE
+Ġfl anked
+IR C
+Aw esome
+Ġsp urious
+ĠZ ah
+ĠMS G
+Ġsh ading
+Ġmotiv ational
+ĠSant ana
+ĠS PR
+Ġexc ruciating
+om ial
+ĠM iko
+ĠLe opard
+A byss
+Ġ[ |
+d irty
+Ġbath s
+Ġdem oral
+and re
+P B
+Ġun ification
+Ġsac rament
+Ġ[ &
+Ġpric eless
+Ġgel atin
+Ġeman ating
+ĠAll aah
+98 6
+Ġout burst
+Ġer as
+ĠX VI
+ĠSP I
+O tt
+ĠLaz arus
+PL IED
+F lying
+blog s
+W isconsin
+R aven
+Ġreb ate
+Ġcreep s
+ĠSp an
+ĠPain ter
+ĠKir a
+ĠAm os
+ĠCor vette
+Cons umer
+ĠRec over
+ck i
+Ġpes ky
+ĠIn vention
+Compan ies
+Ġchalleng ers
+ad emic
+ĠUkrain ians
+ĠNeuro log
+ĠFors aken
+Ġent rants
+Ġemb attled
+Ġdef unct
+ĠGlac ier
+Ġpo isons
+ĠH orses
+m akes
+ĠD irt
+Ġ4 23
+hh h
+ĠTrans formation
+QUI RE
+................ ..
+Ġtrave ller
+ĠSe xy
+ĠK ern
+ip olar
+Ġransom ware
+oooooooo oooooooo
+E c
+rub y
+Prof essional
+ĠOut break
+arg ument
+G rey
+ĠFif a
+ĠCH O
+ĠFOR M
+ĠAm trak
+- [
+Ġcr adle
+Ġantioxid ants
+ãģ®å ®
+7 36
+ĠNAS L
+ĠContribut ions
+Ind iana
+ĠST EP
+C SS
+Ġsal ient
+Ġall ocations
+yr ights
+Ġm ashed
+ĠCut ter
+Sex ual
+Ġp ounded
+Ġfan base
+Ġc asc
+ĠTrans parency
+Ġanaly tic
+ĠSummon er
+× ŀ
+ĠAD C
+det ail
+Ġvan quished
+Ġcr abs
+ar ie
+Dest roy
+ĠS ack
+Ġtrans istor
+Al abama
+ĠK oen
+ĠFisher ies
+c one
+Ġannex ed
+ĠM GM
+es a
+Ġf aked
+ĠCong ratulations
+Ġhind ered
+Ġcorrection al
+ĠI TV
+lee ve
+Ġin appropriately
+lic ks
+Ġtresp ass
+Ġp aws
+Ġnegoti ator
+ĠChrist ensen
+lim its
+ĠDian ne
+Ġeleg ance
+ĠContract s
+an ke
+Ob j
+Ġvigil ance
+Ġcast les
+ĠN AD
+ĠHol o
+Ġemph atically
+ĠTit us
+ĠServ ing
+ĠRich ie
+ĠP igs
+5 68
+Ġanim osity
+ĠAtt ributes
+ĠU riel
+M Q
+my ra
+ĠApplic ant
+Ġpsychiat rists
+ĠV ij
+ĠAb by
+ag ree
+P ush
+Ġk Wh
+hib a
+Ġinc ite
+ĠWe asley
+ĠTax i
+minist ic
+hy per
+ĠF arn
+Ġ6 01
+ĠNation wide
+F ake
+95 2
+Ġma ize
+Ġinteract ed
+Ġtransition ed
+Ġparas itic
+Ġharm onic
+Ġdec aying
+Ġbas eless
+ns ics
+Ġtrans pired
+Ġabund antly
+ĠFore nsic
+Ġtread mill
+ĠJ av
+ab and
+Ġssh d
+Ġfront man
+ĠJak arta
+oll er
+dro ps
+ĠSERV ICES
+rompt u
+oph ical
+h ospital
+bled on
+6 45
+Ġmid range
+ĠEV ENT
+cul ated
+raw led
+Ġper ched
+Ġover board
+ĠPe el
+ĠP wr
+ĠCar th
+ĠCOM PLE
+co e
+sh all
+Ġdeter rence
+M ETHOD
+ĠAbs ent
+M EN
+Ġs ill
+ĠLE VEL
+Y ork
+Ġsin ners
+ĠOP EC
+ĠN ur
+ĠDesign s
+se lection
+Ġunw orthy
+CH A
+Ġstreng thens
+88 3
+ed ly
+Ġslic ing
+Ġmal nutrition
+Ġfilm making
+ĠPol k
+ur ated
+Ġ4 21
+bre akers
+!' "
+Ġwet lands
+ĠDisc rimination
+Ġallow able
+Ġste ered
+ĠSic ily
+S AM
+Ġmust ache
+Ġm ids
+Ġcl ipped
+Ġcirc ulate
+Ġbr ittle
+ĠBuild ings
+ra ised
+ĠRound up
+Ġwealth ier
+Ġoverw rite
+Ġover powered
+ĠGerr ard
+s ites
+PD ATED
+Ġacute ly
+ĠGam ble
+Ġp im
+ĠK us
+Typ ically
+De ploy
+ĠMoroc can
+p otion
+com be
+Ġvigil ante
+Ġ36 3
+St ew
+ĠB agg
+Ġres ided
+ĠSp o
+Ġrem nant
+Ġempt iness
+br ainer
+Ġout patient
+pri ority
+Ġle ptin
+ĠPay ton
+ĠGle aming
+ĠS hed
+ĠPol o
+ĠMormon ism
+rest ricted
+arl ane
+w x
+Ġcreat ine
+ĠAn on
+ĠST UD
+ĠJ UL
+ĠT ee
+5 28
+08 9
+Ġhat ched
+Dis patch
+ĠCompos ite
+Ġ45 1
+p uff
+ĠX COM
+ĠOr n
+ĠTH ANK
+END ED
+ĠAshe ville
+ĠÃ ľ
+Ġman go
+ĠS lightly
+world ly
+ĠW ander
+ĠExp and
+ĠCh r
+M ist
+Ġorthodox y
+ĠUN ESCO
+reg ate
+Else where
+k ie
+ir led
+Ġtopp le
+Ġadopt ive
+ĠLeg s
+d ress
+ĠS agan
+b are
+ĠGl ou
+Cr unch
+Ġhelp ers
+Ġchron ically
+ĠH uma
+1 0000
+Ġaccommod ating
+äº Ķ
+Ġwrink les
+Ġdod ged
+four th
+Ġpre con
+Ġcompress or
+ĠK are
+Ġev ict
+ĠWar wick
+im ar
+Ġmodern ization
+Ġband wagon
+Ġref uted
+Ġnet ted
+ĠNa ples
+ĠGen ie
+per ors
+Ġfield ed
+Ġde re
+ĠPar ables
+le es
+Ġtr out
+asp ers
+Ġn ihil
+Ġhapp iest
+Ġflo ppy
+ĠLo ft
+ĠHe ard
+Ġun ison
+Ġl ug
+ĠRed mond
+class ic
+Supp orters
+SH IP
+G MT
+Ġfue lled
+ç Ĳ
+Ġd d
+ĠEmin em
+Ġ18 97
+NY SE
+Ġsecret aries
+ĠF IA
+ĠCanaver al
+F avorite
+Ġp omp
+Ġdetain ee
+ers hip
+aim on
+i our
+ĠA pex
+Ġplant ations
+am ia
+ac ion
+R ust
+Ġtow ed
+ĠTru ly
+5 77
+Ġshel tered
+r ider
+W o
+Ġl air
+ĠInt elligent
+impro ve
+m atically
+Ġet iquette
+ad ra
+all o
+ĠJun o
+any thing
+ĠStru ggle
+ĠPred ict
+ĠGr imes
+ĠAMER ICA
+ct x
+ĠSit uation
+W OOD
+Ġsol uble
+me ier
+Ġintoler able
+ang ering
+Ġun interrupted
+Ġtool tip
+Ġinterrog ated
+Ġgun ned
+ĠSne ak
+æŃ ¦
+Ġt ether
+Ġcr umble
+L ens
+Ġclust ered
+ĠSy l
+ĠHas an
+Ġdystop ian
+w ana
+Ġjoy stick
+ĠTh ib
+amm u
+Tom orrow
+5 46
+Ġoverc ame
+Ġminim ized
+cept or
+Run ner
+ENG TH
+ĠBrend a
+ĠAchieve ments
+Ġtor ches
+Ġrapp ort
+ĠInvestig ator
+ĠHand ling
+rel ation
+g rey
+8 15
+Ġk cal
+ĠComm ands
+d q
+Ġcur ls
+Ġbe arer
+Ġcyn icism
+it ri
+ĠUse ful
+B ee
+D CS
+Ġab ras
+P ract
+BIL ITIES
+7 12
+Ġdebug ger
+Ġdebt or
+ĠL ia
+ĠK ers
+Ġexacerb ate
+ĠSt acy
+ĠB land
+ĠSc enes
+Ġbranch ing
+âĸĪâĸĪâĸĪâĸĪ âĸĪâĸĪâĸĪâĸĪ
+ape ake
+Ġs alsa
+Ġmish and
+ĠKon ami
+ĠN ib
+Ġanecd ote
+Ġagree able
+Ï ī
+ĠNath aniel
+ĠHe isman
+ĠB eware
+Ġ18 86
+spect ive
+69 1
+5 22
+Ġinhib its
+Ġhas hing
+Ġ18 89
+å° Ĩ
+v ich
+P ure
+Ġsolid ly
+Ġaspir in
+im aru
+Ġstreet car
+ĠU CS
+ĠJ udd
+Ġflash backs
+p ins
+Ġ14 40
+ĠUN HCR
+ĠSym ptoms
+T IT
+5 38
+F ra
+% );
+Ġo oz
+Ġcur few
+Ġcal med
+Ġparticip ates
+Te X
+Ġnons ensical
+Ġfull back
+ĠDe L
+mon key
+h ari
+Ġmetabol ites
+Ġloot ed
+ĠAL WAYS
+ĠB CC
+L t
+oc het
+B one
+Ġveto ed
+Ġg cc
+ĠCL ICK
+Ġ18 88
+s af
+Ġstiff ness
+Ġlow ly
+ĠGe h
+vers on
+ors et
+Ġun foreseen
+Ġan esthesia
+ĠOpt ical
+Ġrecon structed
+ĠT up
+sh ows
+NEW S
+ĠNewsp aper
+ĠA SA
+ter a
+N umbers
+Ġinexpl icable
+× ĳ
+Ġhard ness
+unt arily
+ĠA cer
+grad ient
+ARD IS
+Ġwood land
+Ġmetaph ors
+ĠWem bley
+ĠPa vel
+phil is
+Ġre writing
+Ġpercept ual
+Ġ10 70
+worm s
+ĠDown s
+Ġunsur prisingly
+Ġtag ging
+fl ame
+Ġlit res
+Ġboun ces
+ĠB abe
+sh ut
+Ġoverd oses
+ĠShe ila
+ĠCh au
+ĠBl ess
+Capt ure
+ĠSign ificant
+ĠSc ion
+Ġ38 9
+ĠMc H
+ĠTitan ium
+ĠMe al
+amed a
+ag ents
+agg ressive
+B illy
+76 3
+ĠS aying
+DER R
+it one
+Coll ins
+B ound
+Ġbol ted
+ĠDM CA
+95 3
+Ġun iqueness
+Ġep igen
+un ci
+ant am
+Ġreck oning
+ch airs
+OG R
+ĠSen egal
+Ġ18 62
+re levant
+ĠÂ ¯
+Ġpharm acies
+ĠG eral
+v ier
+Y an
+OR PG
+Ġrab id
+b ending
+ĠUN ITED
+Ġ4 65
+As sembly
+Ġwe ep
+Ġbe hest
+ĠMother s
+ĠJ ace
+h id
+Ġwh irlwind
+ĠUN IVERS
+Ġut opian
+Ġkidn ap
+Ph ilipp
+K in
+89 3
+Ġlivest ream
+ĠM ISS
+Ġsub versive
+ĠTechn iques
+ĠJUST ICE
+ĠB ASE
+Ġ38 7
+Ġassail ants
+ĠHard core
+Ġsprink led
+ĠP se
+é ļ
+print ed
+ĠH au
+OR GE
+ĠT OUR
+Ġl aced
+Ġit ch
+G iving
+Ġport ed
+78 1
+//////////////// ////////////////
+bre eding
+Ġlog ger
+ĠH OL
+inn ie
+First ly
+Ġembry onic
+Ġdeleg ated
+p ai
+O IL
+Ġcentr ally
+ĠR x
+ĠSc outing
+D utch
+Ġhe reditary
+ĠCru iser
+s at
+5 29
+ĠMar riott
+other mal
+Ġprohib itions
+E arn
+ĠSt ab
+ĠColleg es
+ĠBel ief
+st retched
+ĠL H
+ĠEntity Item
+C IA
+Ġun rem
+Ġlaure ate
+Ġdenomin ations
+sum mary
+h ler
+S pect
+ĠK laus
+ĠBe ans
+Ġins ur
+ĠPA X
+Ġfield er
+ĠV et
+ĠSp arrow
+z ie
+ĠS Q
+ĠMond ays
+ĠOff line
+ĠLer ner
+ĠExt ensions
+Ire land
+Ġpatron age
+Ġcontrast ed
+ĠMan ia
+h irt
+Mos cow
+Ġcondem ns
+ĠAn ge
+Ġcomp osing
+ĠPe pe
+ĠP addock
+Ġheter ogeneity
+Ġide ologically
+Ġf ishes
+Ġcur sing
+ĠR utherford
+ĠFlo ating
+ĠAm elia
+Te a
+Syn opsis
+Ġstun ts
+Ġbe ad
+Ġstock ing
+ĠM ILL
+ob ook
+mass ive
+\ <
+Ġh ump
+ĠPref erences
+Engine Debug
+ge ist
+ĠNiet o
+ome ver
+ish y
+eval uate
+col onial
+Altern ative
+ĠGo Pro
+ĠV ortex
+ĠNET WORK
+ans ky
+Sec ure
+ĠTh rust
+Sn ake
+Ġparcel s
+Ġsam urai
+Ġactress es
+N ap
+M F
+ifer ation
+Be er
+5 23
+ĠI ly
+oint ment
+P ing
+Ġstri ped
+ĠMell on
+oss ession
+Ġneut ron
+end ium
+Ġa ph
+ĠFlav oring
+Ġ38 3
+Ġrespons iveness
+ĠJ indal
+ĠHitch cock
+Den ver
+ĠDRAG ON
+sm anship
+ĠDu pl
+Ġs ly
+Ġweb cam
+ĠTw ain
+ĠDar ling
+ili ate
+cons umer
+D IT
+Ġnames ake
+Ġun orthodox
+Ġfun er
+ĠPL oS
+ĠCONTR OL
+ozy g
+ogl obin
+F ACE
+ER G
+ĠD ia
+ĠF iesta
+ce le
+0 34
+Ġencl ave
+âĸ¬ âĸ¬
+on ement
+al ist
+M and
+Ġhome grown
+ĠF ancy
+Ġconcept ions
+ĠCont ains
+ure en
+Ġreiter ate
+Ġme ager
+Ġinstall ments
+Sp awn
+6 27
+Ġphot oc
+ĠCab rera
+ĠRos enthal
+ĠLans ing
+is ner
+Ġinvest s
+ĠUFO s
+EX P
+Hard ware
+Ġtr agically
+Ġconced es
+ie ft
+ch am
+bor gh
+ĠSch r
+ĠMel anie
+ĠH oy
+Ġvisit ation
+Ġid iosyncr
+Ġfract ions
+Ġfore skin
+ob os
+Ġpo aching
+ĠVI EW
+Ġstimul ates
+ĠG ork
+can on
+M IC
+ĠNem esis
+ĠInd ra
+ĠDM V
+Ġ5 29
+Ġinspect ing
+Ġgrand ma
+ĠW hedon
+ĠSh ant
+ĠP urg
+ik an
+ĠT eg
+ĠCL R
+z ac
+Vict oria
+ĠVer ify
+ion ics
+Ġpart ying
+ĠM ou
+col our
+Ġtestim onies
+l ations
+Ġpress uring
+hi ro
+ac ers
+Ġf id
+ang ler
+ĠCS I
+Ġhere after
+Ġdiss idents
+report ing
+iph any
+che v
+Ġsol itude
+Ġl obe
+Ġind is
+Ġcred ential
+re cent
+ad ult
+ĠNir vana
+ĠFranch ise
+L ayer
+H yp
+ĠBerks hire
+Ġwill s
+t if
+Ġtot em
+ĠJud ah
+rep air
+Inst ant
+5 48
+Ġemb assies
+Ġbott leneck
+Ġb ount
+Ġtyp ew
+ĠAl vin
+j ing
+im ilar
+R ush
+Ġbr im
+ĠHEL P
+A im
+] '
+Ġpass ively
+Ġbound ed
+ĠR ated
+Ġcriminal ity
+Ġbiom ark
+Ġdisp atcher
+ĠTow ards
+Ġ+ ++
+right eous
+f rog
+ĠP anc
+C arter
+0 32
+æ© Ł
+Ġult raviolet
+ĠLic ensed
+ĠT ata
+ĠBl essing
+ĠG AM
+Ġchem ically
+ĠSe af
+ĠRE LE
+ĠMerc enary
+capital ist
+Ġform ulations
+Ġann ihilation
+ĠVer b
+ĠAr gon
+Ġun loaded
+Ġmorp hed
+Ġconqu ering
+back er
+I ELD
+Ġtheft s
+Ġfront runner
+ĠRoy ale
+ĠFund amental
+el ight
+C hip
+necess ary
+ay n
+ĠSl ip
+Ġ4 48
+cern ed
+P ause
+Ġshock ingly
+ĠAB V
+Ġcomp osure
+7 33
+ĠMotors port
+ah ime
+Mur ray
+M ach
+Ġgr ids
+Ġdeb ian
+Ġfurther more
+Ġdexter ity
+ĠCollect ions
+os lov
+il age
+b j
+ĠMont eneg
+Ġstrut Connector
+Ġmassac res
+Ġbrief s
+fet ched
+uv ian
+ol ition
+Fail ure
+emon ic
+Ġfl ared
+Ġclaim ant
+Ġc ures
+Ġgive aways
+ĠSubst ance
+al ions
+Ġcr inge
+ĠK ul
+Ġarist ocracy
+ĠUl ster
+ol ated
+h ousing
+ĠM IS
+Ġgl ared
+ĠWil helm
+ne eds
+lam bda
+build ers
+ĠV IS
+Ġradi ator
+ĠGhost busters
+Ġ4 36
+act ual
+Ġher ds
+Ã§ a
+watch ing
+Ġcounter ing
+Ch arge
+Ġchar red
+Ġwar heads
+Ġiod ine
+ĠM acy
+04 1
+Ġdepart ures
+ĠS ins
+Ġdy ed
+ĠConcept s
+g ado
+7 13
+Ġquot ations
+Ġg ist
+ĠChrist y
+Ġant igen
+ĠHem p
+ĠD rawn
+ĠB arg
+ez vous
+Ġp aternity
+Ġar du
+ĠAnch orage
+ĠR ik
+Ġover loaded
+ĠUs ername
+ĠTam my
+ĠN au
+ĠCell ular
+Ġw aning
+Ġrod ent
+ĠWor cester
+il ts
+ĠT ad
+Ġdwell ings
+Ġbull ish
+4 31
+Ġretali ate
+Ġmig raine
+ĠChev ron
+CH ECK
+Ġdon key
+c rim
+SP A
+ĠAn alog
+Ġmarqu ee
+ĠHa as
+B ir
+ĠGD DR
+ĠDownload s
+Ġwill power
+ĠFor th
+ĠRecord ed
+Ġimp ossibility
+ĠLog ged
+ĠFr anks
+ĠR att
+in itions
+Ġclean ers
+Ġsore ly
+Ġflick ering
+ĠEx amination
+c atching
+allow een
+Ms g
+Ġdun no
+F a
+Ġdys ph
+c razy
+.' '.
+Ġmain line
+Ġc s
+Ġp tr
+ĠW ally
+ig un
+95 1
+ĠBig foot
+f ights
+Ġretrie ving
+J r
+Ġdupl ication
+ĠExpl an
+Ġrel ational
+Ġqu aint
+Ġbisc uits
+Ġad o
+Ġsh udder
+Ġantid ote
+blood ed
+ks h
+Ġsa uces
+Ġrein vest
+Ġdispens ary
+ĠD iver
+Ġ9 000
+stud ent
+Ġin separ
+esc ap
+Ġtodd lers
+ĠGP IO
+ĠAss ignment
+head ers
+Ġlack luster
+Ġab ack
+95 6
+Ġtool bar
+7 45
+Ġo ust
+Ġcontempl ation
+ĠPRES IDENT
+Ġ4 58
+==== ==
+Ġguarantee ing
+ĠHe ist
+ĠCann es
+Ļ ½
+Ġcollabor ator
+ĠAm p
+Ġg ou
+ĠSH ALL
+st ories
+78 3
+Ġmobil ized
+Ġbro od
+ĠL U
+ĠðŁ ĳ
+Ġref in
+ĠAnthrop ology
+v ind
+ill i
+Ġwarrant ies
+ĠB abel
+Ġsw ath
+Ġc aches
+Ġantagon ists
+art ifacts
+Ġhot ly
+ĠSt arts
+ĠG Ã¶
+z ag
+!! !!!
+Ġsc ourge
+Ġcons piring
+ru its
+re verse
+ĠShe en
+ĠJes uit
+ĠGiov anni
+ad ies
+Ġbutt ocks
+ear cher
+ac an
+Ġvolley ball
+Ġshroud ed
+Ġscore board
+b ats
+ĠI PM
+Ġass es
+Ġde regulation
+ĠTe legram
+ĠReb oot
+Ġ7 000
+ĠCan ary
+Ġk ernels
+ĠFranÃ§ ois
+ĠD uff
+ĠP on
+ĠLe ica
+ĠGar min
+Ġor phans
+ĠClaud ia
+Ġcal endars
+ĠLe ilan
+ent o
+R ocket
+Ġbr unch
+ĠHaw king
+ain ers
+Ġsens ibilities
+Ġk W
+ĠK and
+Ġre claimed
+Ġinteresting ly
+× ©
+rom y
+J M
+ĠEnhance ment
+b ush
+Sk ip
+Ġrapp ers
+Ġg azing
+p edia
+ath lon
+Rev olution
+Ġsn ipers
+Ġre verted
+Ġconglomer ate
+T erry
+79 4
+Ġhars her
+Ġdes olate
+ĠHit man
+Comm ission
+Ġ( /
+âĢ¦ ."
+Com par
+Ġampl ification
+om inated
+Ġreg ress
+ĠColl ider
+Ġinform ants
+Ġg azed
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/utils/checkpoint_utils.py b/utils/checkpoint_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca397385b3e8a6296e5729828de526b27bbcd3fe
--- /dev/null
+++ b/utils/checkpoint_utils.py
@@ -0,0 +1,875 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import ast
+import collections
+import contextlib
+import logging
+import numpy as np
+import os
+import re
+import time
+import traceback
+import math
+from collections import OrderedDict
+from typing import Any, Dict, Optional, Union
+
+import torch
+from fairseq.dataclass.configs import CheckpointConfig
+from fairseq.dataclass.utils import (
+    convert_namespace_to_omegaconf,
+    overwrite_args_by_name,
+)
+from fairseq.distributed.fully_sharded_data_parallel import FSDP, has_FSDP
+from fairseq.file_io import PathManager
+from fairseq.models import FairseqDecoder, FairseqEncoder
+from omegaconf import DictConfig, open_dict, OmegaConf
+
+from data import data_utils
+
+logger = logging.getLogger(__name__)
+
+
+def save_checkpoint(cfg: CheckpointConfig, trainer, epoch_itr, val_loss):
+    from fairseq import meters
+
+    # only one worker should attempt to create the required dir
+    if trainer.data_parallel_rank == 0:
+        os.makedirs(cfg.save_dir, exist_ok=True)
+
+    prev_best = getattr(save_checkpoint, "best", val_loss)
+    if val_loss is not None:
+        best_function = max if cfg.maximize_best_checkpoint_metric else min
+        save_checkpoint.best = best_function(val_loss, prev_best)
+
+    if cfg.no_save:
+        return
+
+    trainer.consolidate_optimizer()  # TODO(SS): do we need this if no_save_optimizer_state
+
+    if not trainer.should_save_checkpoint_on_current_rank:
+        if trainer.always_call_state_dict_during_save_checkpoint:
+            trainer.state_dict()
+        return
+
+    write_timer = meters.StopwatchMeter()
+    write_timer.start()
+
+    epoch = epoch_itr.epoch
+    end_of_epoch = epoch_itr.end_of_epoch()
+    updates = trainer.get_num_updates()
+
+    logger.info(f"Preparing to save checkpoint for epoch {epoch} @ {updates} updates")
+
+    def is_better(a, b):
+        return a >= b if cfg.maximize_best_checkpoint_metric else a <= b
+
+    suffix = trainer.checkpoint_suffix
+    checkpoint_conds = collections.OrderedDict()
+    checkpoint_conds["checkpoint{}{}.pt".format(epoch, suffix)] = (
+        end_of_epoch and not cfg.no_epoch_checkpoints and epoch % cfg.save_interval == 0
+    )
+    checkpoint_conds["checkpoint_{}_{}{}.pt".format(epoch, updates, suffix)] = (
+        not end_of_epoch
+        and cfg.save_interval_updates > 0
+        and updates % cfg.save_interval_updates == 0
+    )
+    checkpoint_conds["checkpoint_best{}.pt".format(suffix)] = val_loss is not None and (
+        not hasattr(save_checkpoint, "best")
+        or is_better(val_loss, save_checkpoint.best)
+    )
+    if val_loss is not None and cfg.keep_best_checkpoints > 0:
+        worst_best = getattr(save_checkpoint, "best", None)
+        chkpts = checkpoint_paths(
+            cfg.save_dir,
+            pattern=r"checkpoint\.best_{}_(\d+\.?\d*){}\.pt".format(
+                cfg.best_checkpoint_metric, suffix
+            ),
+        )
+        if len(chkpts) > 0:
+            p = chkpts[-1] if cfg.maximize_best_checkpoint_metric else chkpts[0]
+            worst_best = float(p.rsplit("_")[-1].replace("{}.pt".format(suffix), ""))
+        # add random digits to resolve ties
+        with data_utils.numpy_seed(epoch, updates, val_loss):
+            rand_sfx = np.random.randint(0, cfg.keep_best_checkpoints)
+
+        checkpoint_conds[
+            "checkpoint.best_{}_{:.3f}{}{}.pt".format(
+                cfg.best_checkpoint_metric,
+                val_loss,
+                rand_sfx,
+                suffix
+            )
+        ] = worst_best is None or is_better(val_loss, worst_best)
+    checkpoint_conds[
+        "checkpoint_last{}.pt".format(suffix)
+    ] = not cfg.no_last_checkpoints
+
+    extra_state = {"train_iterator": epoch_itr.state_dict(), "val_loss": val_loss}
+    if hasattr(save_checkpoint, "best"):
+        extra_state.update({"best": save_checkpoint.best})
+
+    checkpoints = [
+        os.path.join(cfg.save_dir, fn) for fn, cond in checkpoint_conds.items() if cond
+    ]
+    if len(checkpoints) > 0:
+        trainer.save_checkpoint(checkpoints[0], extra_state)
+        for cp in checkpoints[1:]:
+            if cfg.write_checkpoints_asynchronously:
+                # TODO[ioPath]: Need to implement a delayed asynchronous
+                # file copying/moving feature.
+                logger.warning(
+                    f"ioPath is not copying {checkpoints[0]} to {cp} "
+                    "since async write mode is on."
+                )
+            else:
+                assert PathManager.copy(
+                    checkpoints[0], cp, overwrite=True
+                ), f"Failed to copy {checkpoints[0]} to {cp}"
+
+        write_timer.stop()
+        logger.info(
+            "Saved checkpoint {} (epoch {} @ {} updates, score {}) (writing took {} seconds)".format(
+                checkpoints[0], epoch, updates, val_loss, write_timer.sum
+            )
+        )
+
+    if not end_of_epoch and cfg.keep_interval_updates > 0:
+        # remove old checkpoints; checkpoints are sorted in descending order
+        if cfg.keep_interval_updates_pattern == -1:
+            checkpoints = checkpoint_paths(
+                cfg.save_dir, pattern=r"checkpoint_\d+_(\d+){}\.pt".format(suffix)
+            )
+        else:
+            checkpoints = checkpoint_paths(
+                cfg.save_dir,
+                pattern=r"checkpoint_\d+_(\d+){}\.pt".format(suffix),
+                keep_match=True,
+            )
+            checkpoints = [
+                x[0]
+                for x in checkpoints
+                if x[1] % cfg.keep_interval_updates_pattern != 0
+            ]
+
+        for old_chk in checkpoints[cfg.keep_interval_updates :]:
+            if os.path.lexists(old_chk):
+                os.remove(old_chk)
+            elif PathManager.exists(old_chk):
+                PathManager.rm(old_chk)
+
+    if cfg.keep_last_epochs > 0:
+        # remove old epoch checkpoints; checkpoints are sorted in descending order
+        checkpoints = checkpoint_paths(
+            cfg.save_dir, pattern=r"checkpoint(\d+){}\.pt".format(suffix)
+        )
+        for old_chk in checkpoints[cfg.keep_last_epochs :]:
+            if os.path.lexists(old_chk):
+                os.remove(old_chk)
+            elif PathManager.exists(old_chk):
+                PathManager.rm(old_chk)
+
+    if cfg.keep_best_checkpoints > 0:
+        # only keep the best N checkpoints according to validation metric
+        checkpoints = checkpoint_paths(
+            cfg.save_dir,
+            pattern=r"checkpoint\.best_{}_(\d+\.?\d*){}\.pt".format(
+                cfg.best_checkpoint_metric, suffix
+            ),
+        )
+        if not cfg.maximize_best_checkpoint_metric:
+            checkpoints = checkpoints[::-1]
+        for old_chk in checkpoints[cfg.keep_best_checkpoints :]:
+            if os.path.lexists(old_chk):
+                os.remove(old_chk)
+            elif PathManager.exists(old_chk):
+                PathManager.rm(old_chk)
+
+
+def load_checkpoint(cfg: CheckpointConfig, trainer, **passthrough_args):
+    """
+    Load a checkpoint and restore the training iterator.
+
+    *passthrough_args* will be passed through to
+    ``trainer.get_train_iterator``.
+    """
+
+    reset_optimizer = cfg.reset_optimizer
+    reset_lr_scheduler = cfg.reset_lr_scheduler
+    optimizer_overrides = ast.literal_eval(cfg.optimizer_overrides)
+    reset_meters = cfg.reset_meters
+    reset_dataloader = cfg.reset_dataloader
+
+    if cfg.finetune_from_model is not None and (
+        reset_optimizer or reset_lr_scheduler or reset_meters or reset_dataloader
+    ):
+        raise ValueError(
+            "--finetune-from-model can not be set together with either --reset-optimizer"
+            " or reset_lr_scheduler or reset_meters or reset_dataloader"
+        )
+
+    suffix = trainer.checkpoint_suffix
+    if (
+        cfg.restore_file == "checkpoint_last.pt"
+    ):  # default value of restore_file is 'checkpoint_last.pt'
+        checkpoint_path = os.path.join(
+            cfg.save_dir, "checkpoint_last{}.pt".format(suffix)
+        )
+        first_launch = not PathManager.exists(checkpoint_path)
+        if cfg.finetune_from_model is not None and first_launch:
+            # if there is no last checkpoint to restore, start the finetune from pretrained model
+            # else just use usual logic to load checkpoint, e.g. restart from last checkpoint and etc.
+            if PathManager.exists(cfg.finetune_from_model):
+                checkpoint_path = cfg.finetune_from_model
+                reset_optimizer = True
+                reset_lr_scheduler = True
+                reset_meters = True
+                reset_dataloader = True
+                logger.info(
+                    f"loading pretrained model from {checkpoint_path}: "
+                    "optimizer, lr scheduler, meters, dataloader will be reset"
+                )
+            else:
+                raise ValueError(
+                    f"--funetune-from-model {cfg.finetune_from_model} does not exist"
+                )
+    elif suffix is not None:
+        checkpoint_path = cfg.restore_file.replace(".pt", suffix + ".pt")
+    else:
+        checkpoint_path = cfg.restore_file
+
+    if cfg.restore_file != "checkpoint_last.pt" and cfg.finetune_from_model:
+        raise ValueError(
+            "--finetune-from-model and --restore-file (non-default value) "
+            "can not be specified together: " + str(cfg)
+        )
+
+    extra_state = trainer.load_checkpoint(
+        checkpoint_path,
+        reset_optimizer,
+        reset_lr_scheduler,
+        optimizer_overrides,
+        reset_meters=reset_meters,
+    )
+
+    if (
+        extra_state is not None
+        and "best" in extra_state
+        and not reset_optimizer
+        and not reset_meters
+    ):
+        save_checkpoint.best = extra_state["best"]
+
+    if extra_state is not None and not reset_dataloader:
+        # restore iterator from checkpoint
+        itr_state = extra_state["train_iterator"]
+        epoch_itr = trainer.get_train_iterator(
+            epoch=itr_state["epoch"], load_dataset=True, **passthrough_args
+        )
+        epoch_itr.load_state_dict(itr_state)
+        _n = itr_state['iterations_in_epoch']
+        offset = sum(len(_) for _ in epoch_itr.batch_sampler[:_n])
+        epoch_itr.dataset.dataset._seek(offset=offset)
+        true_num = int(math.ceil(len(epoch_itr.dataset) / 8)) * 8
+        another_offset = ((epoch_itr.epoch - 1) * true_num + offset) // 8
+        if hasattr(epoch_itr.dataset, 'pure_text_dataset'):
+            text_offset = (2 * another_offset) % len(epoch_itr.dataset.pure_text_dataset)
+            epoch_itr.dataset.pure_text_dataset._seek(offset=text_offset)
+        if hasattr(epoch_itr.dataset, 'pure_image_dataset'):
+            image_offset = another_offset % len(epoch_itr.dataset.pure_image_dataset)
+            epoch_itr.dataset.pure_image_dataset._seek(offset=image_offset)
+        if hasattr(epoch_itr.dataset, 'detection_dataset'):
+            detection_offset = another_offset % len(epoch_itr.dataset.detection_dataset)
+            epoch_itr.dataset.detection_dataset._seek(offset=detection_offset)
+    else:
+        epoch_itr = trainer.get_train_iterator(
+            epoch=1, load_dataset=True, **passthrough_args
+        )
+
+    trainer.lr_step(epoch_itr.epoch)
+
+    return extra_state, epoch_itr
+
+
+def load_checkpoint_to_cpu(path, arg_overrides=None, load_on_all_ranks=False):
+    """Loads a checkpoint to CPU (with upgrading for backward compatibility).
+
+    If doing single-GPU training or if the checkpoint is only being loaded by at
+    most one process on each node (current default behavior is for only rank 0
+    to read the checkpoint from disk), load_on_all_ranks should be False to
+    avoid errors from torch.distributed not having been initialized or
+    torch.distributed.barrier() hanging.
+
+    If all processes on each node may be loading the checkpoint
+    simultaneously, load_on_all_ranks should be set to True to avoid I/O
+    conflicts.
+
+    There's currently no support for > 1 but < all processes loading the
+    checkpoint on each node.
+    """
+    local_path = PathManager.get_local_path(path)
+    # The locally cached file returned by get_local_path() may be stale for
+    # remote files that are periodically updated/overwritten (ex:
+    # checkpoint_last.pt) - so we remove the local copy, sync across processes
+    # (if needed), and then download a fresh copy.
+    if local_path != path and PathManager.path_requires_pathmanager(path):
+        try:
+            os.remove(local_path)
+        except FileNotFoundError:
+            # With potentially multiple processes removing the same file, the
+            # file being missing is benign (missing_ok isn't available until
+            # Python 3.8).
+            pass
+        if load_on_all_ranks:
+            torch.distributed.barrier()
+        local_path = PathManager.get_local_path(path)
+
+    with open(local_path, "rb") as f:
+        state = torch.load(f, map_location=torch.device("cpu"))
+
+    if "args" in state and state["args"] is not None and arg_overrides is not None:
+        args = state["args"]
+        for arg_name, arg_val in arg_overrides.items():
+            setattr(args, arg_name, arg_val)
+
+    if "cfg" in state and state["cfg"] is not None:
+
+        # hack to be able to set Namespace in dict config. this should be removed when we update to newer
+        # omegaconf version that supports object flags, or when we migrate all existing models
+        from omegaconf import _utils
+
+        old_primitive = _utils.is_primitive_type
+        _utils.is_primitive_type = lambda _: True
+
+        state["cfg"] = OmegaConf.create(state["cfg"])
+
+        _utils.is_primitive_type = old_primitive
+        OmegaConf.set_struct(state["cfg"], True)
+
+        if arg_overrides is not None:
+            overwrite_args_by_name(state["cfg"], arg_overrides)
+
+    state = _upgrade_state_dict(state)
+    return state
+
+
+def load_model_ensemble(
+    filenames,
+    arg_overrides: Optional[Dict[str, Any]] = None,
+    task=None,
+    strict=True,
+    suffix="",
+    num_shards=1,
+    state=None,
+):
+    """Loads an ensemble of models.
+
+    Args:
+        filenames (List[str]): checkpoint files to load
+        arg_overrides (Dict[str,Any], optional): override model args that
+            were used during model training
+        task (fairseq.tasks.FairseqTask, optional): task to use for loading
+    """
+    assert not (
+        strict and num_shards > 1
+    ), "Cannot load state dict with strict=True and checkpoint shards > 1"
+    ensemble, args, _task = load_model_ensemble_and_task(
+        filenames,
+        arg_overrides,
+        task,
+        strict,
+        suffix,
+        num_shards,
+        state,
+    )
+    return ensemble, args
+
+
+def get_maybe_sharded_checkpoint_filename(
+    filename: str, suffix: str, shard_idx: int, num_shards: int
+) -> str:
+    orig_filename = filename
+    filename = filename.replace(".pt", suffix + ".pt")
+    fsdp_filename = filename[:-3] + f"-shard{shard_idx}.pt"
+    model_parallel_filename = orig_filename[:-3] + f"_part{shard_idx}.pt"
+    if PathManager.exists(fsdp_filename):
+        return fsdp_filename
+    elif num_shards > 1:
+        return model_parallel_filename
+    else:
+        return filename
+
+
+def load_model_ensemble_and_task(
+    filenames,
+    arg_overrides: Optional[Dict[str, Any]] = None,
+    task=None,
+    strict=True,
+    suffix="",
+    num_shards=1,
+    state=None,
+):
+    assert state is None or len(filenames) == 1
+
+    from fairseq import tasks
+
+    assert not (
+        strict and num_shards > 1
+    ), "Cannot load state dict with strict=True and checkpoint shards > 1"
+    ensemble = []
+    cfg = None
+    for filename in filenames:
+        orig_filename = filename
+        model_shard_state = {"shard_weights": [], "shard_metadata": []}
+        assert num_shards > 0
+        st = time.time()
+        for shard_idx in range(num_shards):
+            filename = get_maybe_sharded_checkpoint_filename(
+                orig_filename, suffix, shard_idx, num_shards
+            )
+
+            if not PathManager.exists(filename):
+                raise IOError("Model file not found: {}".format(filename))
+            if state is None:
+                state = load_checkpoint_to_cpu(filename, arg_overrides)
+            if "args" in state and state["args"] is not None:
+                cfg = convert_namespace_to_omegaconf(state["args"])
+            elif "cfg" in state and state["cfg"] is not None:
+                cfg = state["cfg"]
+            else:
+                raise RuntimeError(
+                    f"Neither args nor cfg exist in state keys = {state.keys()}"
+                )
+
+            if task is None:
+                task = tasks.setup_task(cfg.task)
+
+            if "task_state" in state:
+                task.load_state_dict(state["task_state"])
+
+            if "fsdp_metadata" in state and num_shards > 1:
+                model_shard_state["shard_weights"].append(state["model"])
+                model_shard_state["shard_metadata"].append(state["fsdp_metadata"])
+                # check FSDP import before the code goes too far
+                if not has_FSDP:
+                    raise ImportError(
+                        "Cannot find FullyShardedDataParallel. "
+                        "Please install fairscale with: pip install fairscale"
+                    )
+                if shard_idx == num_shards - 1:
+                    consolidated_model_state = FSDP.consolidate_shard_weights(
+                        shard_weights=model_shard_state["shard_weights"],
+                        shard_metadata=model_shard_state["shard_metadata"],
+                    )
+                    model = task.build_model(cfg.model)
+                    model.load_state_dict(
+                        consolidated_model_state, strict=strict, model_cfg=cfg.model
+                    )
+            else:
+                # model parallel checkpoint or unsharded checkpoint
+                model = task.build_model(cfg.model)
+                model.load_state_dict(
+                    state["model"], strict=False, model_cfg=cfg.model
+                )
+
+            # reset state so it gets loaded for the next model in ensemble
+            state = None
+            if shard_idx % 10 == 0 and shard_idx > 0:
+                elapsed = time.time() - st
+                logger.info(
+                    f"Loaded {shard_idx} shards in {elapsed:.2f}s, {elapsed / (shard_idx+1):.2f}s/shard"
+                )
+
+        # build model for ensemble
+        ensemble.append(model)
+    return ensemble, cfg, task
+
+
+def checkpoint_paths(path, pattern=r"checkpoint(\d+)\.pt", keep_match=False):
+    """Retrieves all checkpoints found in `path` directory.
+
+    Checkpoints are identified by matching filename to the specified pattern. If
+    the pattern contains groups, the result will be sorted by the first group in
+    descending order.
+    """
+    pt_regexp = re.compile(pattern)
+    files = PathManager.ls(path)
+
+    entries = []
+    for i, f in enumerate(files):
+        m = pt_regexp.fullmatch(f)
+        if m is not None:
+            idx = float(m.group(1)) if len(m.groups()) > 0 else i
+            entries.append((idx, m.group(0)))
+    if keep_match:
+        return [(os.path.join(path, x[1]), x[0]) for x in sorted(entries, reverse=True)]
+    else:
+        return [os.path.join(path, x[1]) for x in sorted(entries, reverse=True)]
+
+
+def torch_persistent_save(obj, filename, async_write: bool = False):
+    if async_write:
+        with PathManager.opena(filename, "wb") as f:
+            _torch_persistent_save(obj, f)
+    else:
+        with PathManager.open(filename, "wb") as f:
+            _torch_persistent_save(obj, f)
+        # if PathManager.supports_rename(filename):
+        #     # do atomic save
+        #     with PathManager.open(filename + ".tmp", "wb") as f:
+        #         _torch_persistent_save(obj, f)
+        #     PathManager.rename(filename + ".tmp", filename)
+        # else:
+        #     # fallback to non-atomic save
+        #     with PathManager.open(filename, "wb") as f:
+        #         _torch_persistent_save(obj, f)
+
+
+def _torch_persistent_save(obj, f):
+    if isinstance(f, str):
+        with PathManager.open(f, "wb") as h:
+            torch_persistent_save(obj, h)
+        return
+    for i in range(3):
+        try:
+            return torch.save(obj, f)
+        except Exception:
+            if i == 2:
+                logger.error(traceback.format_exc())
+                raise
+
+
+def _upgrade_state_dict(state):
+    """Helper for upgrading old model checkpoints."""
+
+    # add optimizer_history
+    if "optimizer_history" not in state:
+        state["optimizer_history"] = [
+            {"criterion_name": "CrossEntropyCriterion", "best_loss": state["best_loss"]}
+        ]
+        state["last_optimizer_state"] = state["optimizer"]
+        del state["optimizer"]
+        del state["best_loss"]
+    # move extra_state into sub-dictionary
+    if "epoch" in state and "extra_state" not in state:
+        state["extra_state"] = {
+            "epoch": state["epoch"],
+            "batch_offset": state["batch_offset"],
+            "val_loss": state["val_loss"],
+        }
+        del state["epoch"]
+        del state["batch_offset"]
+        del state["val_loss"]
+    # reduce optimizer history's memory usage (only keep the last state)
+    if "optimizer" in state["optimizer_history"][-1]:
+        state["last_optimizer_state"] = state["optimizer_history"][-1]["optimizer"]
+        for optim_hist in state["optimizer_history"]:
+            del optim_hist["optimizer"]
+    # record the optimizer class name
+    if "optimizer_name" not in state["optimizer_history"][-1]:
+        state["optimizer_history"][-1]["optimizer_name"] = "FairseqNAG"
+    # move best_loss into lr_scheduler_state
+    if "lr_scheduler_state" not in state["optimizer_history"][-1]:
+        state["optimizer_history"][-1]["lr_scheduler_state"] = {
+            "best": state["optimizer_history"][-1]["best_loss"]
+        }
+        del state["optimizer_history"][-1]["best_loss"]
+    # keep track of number of updates
+    if "num_updates" not in state["optimizer_history"][-1]:
+        state["optimizer_history"][-1]["num_updates"] = 0
+    # old model checkpoints may not have separate source/target positions
+    if (
+        "args" in state
+        and hasattr(state["args"], "max_positions")
+        and not hasattr(state["args"], "max_source_positions")
+    ):
+        state["args"].max_source_positions = state["args"].max_positions
+        state["args"].max_target_positions = state["args"].max_positions
+    # use stateful training data iterator
+    if "train_iterator" not in state["extra_state"]:
+        state["extra_state"]["train_iterator"] = {
+            "epoch": state["extra_state"]["epoch"],
+            "iterations_in_epoch": state["extra_state"].get("batch_offset", 0),
+        }
+
+    # backward compatibility, cfg updates
+    if "args" in state and state["args"] is not None:
+        # default to translation task
+        if not hasattr(state["args"], "task"):
+            state["args"].task = "translation"
+        # --raw-text and --lazy-load are deprecated
+        if getattr(state["args"], "raw_text", False):
+            state["args"].dataset_impl = "raw"
+        elif getattr(state["args"], "lazy_load", False):
+            state["args"].dataset_impl = "lazy"
+        # epochs start at 1
+        if state["extra_state"]["train_iterator"] is not None:
+            state["extra_state"]["train_iterator"]["epoch"] = max(
+                state["extra_state"]["train_iterator"].get("epoch", 1), 1
+            )
+        # --remove-bpe ==> --postprocess
+        if hasattr(state["args"], "remove_bpe"):
+            state["args"].post_process = state["args"].remove_bpe
+        # --min-lr ==> --stop-min-lr
+        if hasattr(state["args"], "min_lr"):
+            state["args"].stop_min_lr = state["args"].min_lr
+            del state["args"].min_lr
+        # binary_cross_entropy / kd_binary_cross_entropy => wav2vec criterion
+        if (
+            hasattr(state["args"], "criterion")
+            and state["args"].criterion in [
+                "binary_cross_entropy",
+                "kd_binary_cross_entropy",
+            ]
+        ):
+            state["args"].criterion = "wav2vec"
+        # remove log_keys if it's None (criteria will supply a default value of [])
+        if hasattr(state["args"], "log_keys") and state["args"].log_keys is None:
+            delattr(state["args"], "log_keys")
+        # speech_pretraining => audio pretraining
+        if (
+            hasattr(state["args"], "task")
+            and state["args"].task == "speech_pretraining"
+        ):
+            state["args"].task = "audio_pretraining"
+        # audio_cpc => wav2vec
+        if hasattr(state["args"], "arch") and state["args"].arch == "audio_cpc":
+            state["args"].arch = "wav2vec"
+        # convert legacy float learning rate to List[float]
+        if hasattr(state["args"], "lr") and isinstance(state["args"].lr, float):
+            state["args"].lr = [state["args"].lr]
+        # convert task data arg to a string instead of List[string]
+        if (
+            hasattr(state["args"], "data")
+            and isinstance(state["args"].data, list)
+            and len(state["args"].data) > 0
+        ):
+            state["args"].data = state["args"].data[0]
+        # remove keys in state["args"] related to teacher-student learning
+        for key in [
+            "static_teachers",
+            "static_teacher_weights",
+            "dynamic_teachers",
+            "dynamic_teacher_weights",
+        ]:
+            if key in state["args"]:
+                delattr(state["args"], key)
+
+        state["cfg"] = convert_namespace_to_omegaconf(state["args"])
+
+    if "cfg" in state and state["cfg"] is not None:
+        cfg = state["cfg"]
+        with open_dict(cfg):
+            # any upgrades for Hydra-based configs
+            if (
+                "task" in cfg
+                and "eval_wer_config" in cfg.task
+                and isinstance(cfg.task.eval_wer_config.print_alignment, bool)
+            ):
+                cfg.task.eval_wer_config.print_alignment = "hard"
+            if "generation" in cfg and isinstance(cfg.generation.print_alignment, bool):
+                cfg.generation.print_alignment = "hard" if cfg.generation.print_alignment else None
+            if (
+                "model" in cfg
+                and "w2v_args" in cfg.model
+                and cfg.model.w2v_args is not None
+                and (
+                    hasattr(cfg.model.w2v_args, "task") or "task" in cfg.model.w2v_args
+                )
+                and hasattr(cfg.model.w2v_args.task, "eval_wer_config")
+                and cfg.model.w2v_args.task.eval_wer_config is not None
+                and isinstance(
+                    cfg.model.w2v_args.task.eval_wer_config.print_alignment, bool
+                )
+            ):
+                cfg.model.w2v_args.task.eval_wer_config.print_alignment = "hard"
+
+    return state
+
+
+def prune_state_dict(state_dict, model_cfg: Optional[DictConfig]):
+    """Prune the given state_dict if desired for LayerDrop
+    (https://arxiv.org/abs/1909.11556).
+
+    Training with LayerDrop allows models to be robust to pruning at inference
+    time. This function prunes state_dict to allow smaller models to be loaded
+    from a larger model and re-maps the existing state_dict for this to occur.
+
+    It's called by functions that load models from checkpoints and does not
+    need to be called directly.
+    """
+    arch = None
+    if model_cfg is not None:
+        arch = (
+            model_cfg._name
+            if isinstance(model_cfg, DictConfig)
+            else getattr(model_cfg, "arch", None)
+        )
+
+    if not model_cfg or arch is None or arch == "ptt_transformer":
+        # args should not be none, but don't crash if it is.
+        return state_dict
+
+    encoder_layers_to_keep = getattr(model_cfg, "encoder_layers_to_keep", None)
+    decoder_layers_to_keep = getattr(model_cfg, "decoder_layers_to_keep", None)
+
+    if not encoder_layers_to_keep and not decoder_layers_to_keep:
+        return state_dict
+
+    # apply pruning
+    logger.info(
+        "Pruning model to specified layer configuration - this works best if the model was trained with LayerDrop"
+    )
+
+    def create_pruning_pass(layers_to_keep, layer_name):
+        keep_layers = sorted(
+            int(layer_string) for layer_string in layers_to_keep.split(",")
+        )
+        mapping_dict = {}
+        for i in range(len(keep_layers)):
+            mapping_dict[str(keep_layers[i])] = str(i)
+
+        regex = re.compile(r"^{layer}.*\.layers\.(\d+)".format(layer=layer_name))
+        return {"substitution_regex": regex, "mapping_dict": mapping_dict}
+
+    pruning_passes = []
+    if encoder_layers_to_keep:
+        pruning_passes.append(create_pruning_pass(encoder_layers_to_keep, "encoder"))
+    if decoder_layers_to_keep:
+        pruning_passes.append(create_pruning_pass(decoder_layers_to_keep, "decoder"))
+
+    new_state_dict = {}
+    for layer_name in state_dict.keys():
+        match = re.search(r"\.layers\.(\d+)\.", layer_name)
+        # if layer has no number in it, it is a supporting layer, such as an
+        # embedding
+        if not match:
+            new_state_dict[layer_name] = state_dict[layer_name]
+            continue
+
+        # otherwise, layer should be pruned.
+        original_layer_number = match.group(1)
+        # figure out which mapping dict to replace from
+        for pruning_pass in pruning_passes:
+            if original_layer_number in pruning_pass["mapping_dict"] and pruning_pass[
+                "substitution_regex"
+            ].search(layer_name):
+                new_layer_number = pruning_pass["mapping_dict"][original_layer_number]
+                substitution_match = pruning_pass["substitution_regex"].search(
+                    layer_name
+                )
+                new_state_key = (
+                    layer_name[: substitution_match.start(1)]
+                    + new_layer_number
+                    + layer_name[substitution_match.end(1) :]
+                )
+                new_state_dict[new_state_key] = state_dict[layer_name]
+
+    # Since layers are now pruned, *_layers_to_keep are no longer needed.
+    # This is more of "It would make it work fix" rather than a proper fix.
+    if isinstance(model_cfg, DictConfig):
+        context = open_dict(model_cfg)
+    else:
+        context = contextlib.ExitStack()
+    with context:
+        if hasattr(model_cfg, "encoder_layers_to_keep"):
+            model_cfg.encoder_layers_to_keep = None
+        if hasattr(model_cfg, "decoder_layers_to_keep"):
+            model_cfg.decoder_layers_to_keep = None
+
+    return new_state_dict
+
+
+def load_pretrained_component_from_model(
+    component: Union[FairseqEncoder, FairseqDecoder], checkpoint: str
+):
+    """
+    Load a pretrained FairseqEncoder or FairseqDecoder from checkpoint into the
+    provided `component` object. If state_dict fails to load, there may be a
+    mismatch in the architecture of the corresponding `component` found in the
+    `checkpoint` file.
+    """
+    if not PathManager.exists(checkpoint):
+        raise IOError("Model file not found: {}".format(checkpoint))
+    state = load_checkpoint_to_cpu(checkpoint)
+    if isinstance(component, FairseqEncoder):
+        component_type = "encoder"
+    elif isinstance(component, FairseqDecoder):
+        component_type = "decoder"
+    else:
+        raise ValueError(
+            "component to load must be either a FairseqEncoder or "
+            "FairseqDecoder. Loading other component types are not supported."
+        )
+    component_state_dict = OrderedDict()
+    for key in state["model"].keys():
+        if key.startswith(component_type):
+            # encoder.input_layers.0.0.weight --> input_layers.0.0.weight
+            component_subkey = key[len(component_type) + 1 :]
+            component_state_dict[component_subkey] = state["model"][key]
+    component.load_state_dict(component_state_dict, strict=True)
+    return component
+
+
+def verify_checkpoint_directory(save_dir: str) -> None:
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir, exist_ok=True)
+    temp_file_path = os.path.join(save_dir, "dummy")
+    try:
+        with open(temp_file_path, "w"):
+            pass
+    except OSError as e:
+        logger.warning(
+            "Unable to access checkpoint save directory: {}".format(save_dir)
+        )
+        raise e
+    else:
+        os.remove(temp_file_path)
+
+
+def load_ema_from_checkpoint(fpath):
+    """Loads exponential moving averaged (EMA) checkpoint from input and
+    returns a model with ema weights.
+
+    Args:
+      fpath: A string path of checkpoint to load from.
+
+    Returns:
+      A dict of string keys mapping to various values. The 'model' key
+      from the returned dict should correspond to an OrderedDict mapping
+      string parameter names to torch Tensors.
+    """
+    params_dict = collections.OrderedDict()
+    new_state = None
+
+    with PathManager.open(fpath, 'rb') as f:
+        new_state = torch.load(
+            f,
+            map_location=(
+                lambda s, _: torch.serialization.default_restore_location(s, 'cpu')
+            ),
+        )
+
+        # EMA model is stored in a separate "extra state"
+        model_params = new_state['extra_state']['ema']
+
+        for key in list(model_params.keys()):
+            p = model_params[key]
+            if isinstance(p, torch.HalfTensor):
+                p = p.float()
+            if key not in params_dict:
+                params_dict[key] = p.clone()
+                # NOTE: clone() is needed in case of p is a shared parameter
+            else:
+                raise ValueError("Key {} is repeated in EMA model params.".format(key))
+
+        if len(params_dict) == 0:
+            raise ValueError(
+                f"Input checkpoint path '{fpath}' does not contain "
+                "ema model weights, is this model trained with EMA?"
+            )
+
+    new_state['model'] = params_dict
+    return new_state
diff --git a/utils/cider/pyciderevalcap/__init__.py b/utils/cider/pyciderevalcap/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..3f7d85bba884ea8f83fc6ab2a1e6ade80d98d4d9
--- /dev/null
+++ b/utils/cider/pyciderevalcap/__init__.py
@@ -0,0 +1 @@
+__author__ = 'tylin'
diff --git a/utils/cider/pyciderevalcap/cider/__init__.py b/utils/cider/pyciderevalcap/cider/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..3f7d85bba884ea8f83fc6ab2a1e6ade80d98d4d9
--- /dev/null
+++ b/utils/cider/pyciderevalcap/cider/__init__.py
@@ -0,0 +1 @@
+__author__ = 'tylin'
diff --git a/utils/cider/pyciderevalcap/cider/cider.py b/utils/cider/pyciderevalcap/cider/cider.py
new file mode 100755
index 0000000000000000000000000000000000000000..5b65978370cb82dd2111500e7f05c4d05306162c
--- /dev/null
+++ b/utils/cider/pyciderevalcap/cider/cider.py
@@ -0,0 +1,65 @@
+# Filename: cider.py
+#
+#
+# Description: Describes the class to compute the CIDEr
+# (Consensus-Based Image Description Evaluation) Metric
+#          by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
+#
+# Creation Date: Sun Feb  8 14:16:54 2015
+#
+# Authors: Ramakrishna Vedantam <vrama91@vt.edu> and
+# Tsung-Yi Lin <tl483@cornell.edu>
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from .cider_scorer import CiderScorer
+
+
+class Cider:
+    """
+    Main Class to compute the CIDEr metric
+
+    """
+    def __init__(self, n=4, df="corpus"):
+        """
+        Initialize the CIDEr scoring function
+        : param n (int): n-gram size
+        : param df (string): specifies where to get the IDF values from
+                    takes values 'corpus', 'coco-train'
+        : return: None
+        """
+        # set cider to sum over 1 to 4-grams
+        self._n = n
+        self._df = df
+        self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
+
+    def compute_score(self, gts, res):
+        """
+        Main function to compute CIDEr score
+        : param  gts (dict) : {image:tokenized reference sentence}
+        : param res (dict)  : {image:tokenized candidate sentence}
+        : return: cider (float) : computed CIDEr score for the corpus
+        """
+
+        # clear all the previous hypos and refs
+        self.cider_scorer.clear()
+
+        for res_id in res:
+
+            hypo = res_id['caption']
+            ref = gts[res_id['image_id']]
+
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) > 0)
+            self.cider_scorer += (hypo[0], ref)
+
+        (score, scores) = self.cider_scorer.compute_score()
+
+        return score, scores
+
+    def method(self):
+        return "CIDEr"
diff --git a/utils/cider/pyciderevalcap/cider/cider_scorer.py b/utils/cider/pyciderevalcap/cider/cider_scorer.py
new file mode 100755
index 0000000000000000000000000000000000000000..d7f9505916f2210617cc529bf3c05acfa06d5a62
--- /dev/null
+++ b/utils/cider/pyciderevalcap/cider/cider_scorer.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+# Tsung-Yi Lin <tl483@cornell.edu>
+# Ramakrishna Vedantam <vrama91@vt.edu>
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import six
+from six.moves import cPickle
+from collections import defaultdict
+import numpy as np
+import math
+import os
+
+def precook(s, n=4, out=False):
+    """
+    Takes a string as input and returns an object that can be given to
+    either cook_refs or cook_test. This is optional: cook_refs and cook_test
+    can take string arguments as well.
+    :param s: string : sentence to be converted into ngrams
+    :param n: int    : number of ngrams for which representation is calculated
+    :return: term frequency vector for occuring ngrams
+    """
+    words = s.split()
+    counts = defaultdict(int)
+    for k in range(1,n+1):
+        for i in range(len(words)-k+1):
+            ngram = tuple(words[i:i+k])
+            counts[ngram] += 1
+    return counts
+
+def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
+    '''Takes a list of reference sentences for a single segment
+    and returns an object that encapsulates everything that BLEU
+    needs to know about them.
+    :param refs: list of string : reference sentences for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (list of dict)
+    '''
+    return [precook(ref, n) for ref in refs]
+
+def cook_test(test, n=4):
+    '''Takes a test sentence and returns an object that
+    encapsulates everything that BLEU needs to know about it.
+    :param test: list of string : hypothesis sentence for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (dict)
+    '''
+    return precook(test, n, True)
+
+class CiderScorer(object):
+    """CIDEr scorer.
+    """
+
+    def copy(self):
+        ''' copy the refs.'''
+        new = CiderScorer(n=self.n)
+        new.ctest = copy.copy(self.ctest)
+        new.crefs = copy.copy(self.crefs)
+        return new
+
+    def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0):
+        ''' singular instance '''
+        self.n = n
+        self.sigma = sigma
+        self.crefs = []
+        self.ctest = []
+        self.df_mode = df_mode
+        self.ref_len = None
+        if self.df_mode != "corpus":
+            pkl_file = cPickle.load(open(os.path.join('data', df_mode + '.p'),'rb'), **(dict(encoding='latin1') if six.PY3 else {}))
+            self.ref_len = np.log(float(pkl_file['ref_len']))
+            self.document_frequency = pkl_file['document_frequency']
+        self.cook_append(test, refs)
+    
+    def clear(self):
+        self.crefs = []
+        self.ctest = []
+
+    def cook_append(self, test, refs):
+        '''called by constructor and __iadd__ to avoid creating new instances.'''
+
+        if refs is not None:
+            self.crefs.append(cook_refs(refs))
+            if test is not None:
+                self.ctest.append(cook_test(test)) ## N.B.: -1
+            else:
+                self.ctest.append(None) # lens of crefs and ctest have to match
+
+    def size(self):
+        assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
+        return len(self.crefs)
+
+    def __iadd__(self, other):
+        '''add an instance (e.g., from another sentence).'''
+
+        if type(other) is tuple:
+            ## avoid creating new CiderScorer instances
+            self.cook_append(other[0], other[1])
+        else:
+            self.ctest.extend(other.ctest)
+            self.crefs.extend(other.crefs)
+
+        return self
+    def compute_doc_freq(self):
+        '''
+        Compute term frequency for reference data.
+        This will be used to compute idf (inverse document frequency later)
+        The term frequency is stored in the object
+        :return: None
+        '''
+        for refs in self.crefs:
+            # refs, k ref captions of one image
+            for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]):
+                self.document_frequency[ngram] += 1
+            # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
+
+    def compute_cider(self):
+        def counts2vec(cnts):
+            """
+            Function maps counts of ngram to vector of tfidf weights.
+            The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
+            The n-th entry of array denotes length of n-grams.
+            :param cnts:
+            :return: vec (array of dict), norm (array of float), length (int)
+            """
+            vec = [defaultdict(float) for _ in range(self.n)]
+            length = 0
+            norm = [0.0 for _ in range(self.n)]
+            for (ngram,term_freq) in cnts.items():
+                # give word count 1 if it doesn't appear in reference corpus
+                df = np.log(max(1.0, self.document_frequency[ngram]))
+                # ngram index
+                n = len(ngram)-1
+                # tf (term_freq) * idf (precomputed idf) for n-grams
+                vec[n][ngram] = float(term_freq)*(self.ref_len - df)
+                # compute norm for the vector.  the norm will be used for
+                # computing similarity
+                norm[n] += pow(vec[n][ngram], 2)
+
+                if n == 1:
+                    length += term_freq
+            norm = [np.sqrt(n) for n in norm]
+            return vec, norm, length
+
+        def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
+            '''
+            Compute the cosine similarity of two vectors.
+            :param vec_hyp: array of dictionary for vector corresponding to hypothesis
+            :param vec_ref: array of dictionary for vector corresponding to reference
+            :param norm_hyp: array of float for vector corresponding to hypothesis
+            :param norm_ref: array of float for vector corresponding to reference
+            :param length_hyp: int containing length of hypothesis
+            :param length_ref: int containing length of reference
+            :return: array of score for each n-grams cosine similarity
+            '''
+            delta = float(length_hyp - length_ref)
+            # measure consine similarity
+            val = np.array([0.0 for _ in range(self.n)])
+            for n in range(self.n):
+                # ngram
+                for (ngram,count) in vec_hyp[n].items():
+                    val[n] += vec_hyp[n][ngram] * vec_ref[n][ngram]
+
+                if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
+                    val[n] /= (norm_hyp[n]*norm_ref[n])
+
+                assert(not math.isnan(val[n]))
+            return val
+
+        # compute log reference length
+        if self.df_mode == "corpus":
+            self.ref_len = np.log(float(len(self.crefs)))
+
+        scores = []
+        for test, refs in zip(self.ctest, self.crefs):
+            # compute vector for test captions
+            vec, norm, length = counts2vec(test)
+            # compute vector for ref captions
+            score = np.array([0.0 for _ in range(self.n)])
+            for ref in refs:
+                vec_ref, norm_ref, length_ref = counts2vec(ref)
+                score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
+            # change by vrama91 - mean of ngram scores, instead of sum
+            score_avg = np.mean(score)
+            # divide by number of references
+            score_avg /= len(refs)
+            # multiply score by 10
+            score_avg *= 10.0
+            # append score of an image to the score list
+            scores.append(score_avg)
+        return scores
+
+    def compute_score(self, option=None, verbose=0):
+        # compute idf
+        if self.df_mode == "corpus":
+            self.document_frequency = defaultdict(float)
+            self.compute_doc_freq()
+            # assert to check document frequency
+            assert(len(self.ctest) >= max(self.document_frequency.values()))
+            # import json for now and write the corresponding files
+        # compute cider score
+        score = self.compute_cider()
+        # debug
+        # print score
+        return np.mean(np.array(score)), np.array(score)
diff --git a/utils/cider/pyciderevalcap/ciderD/__init__.py b/utils/cider/pyciderevalcap/ciderD/__init__.py
new file mode 100755
index 0000000000000000000000000000000000000000..3f7d85bba884ea8f83fc6ab2a1e6ade80d98d4d9
--- /dev/null
+++ b/utils/cider/pyciderevalcap/ciderD/__init__.py
@@ -0,0 +1 @@
+__author__ = 'tylin'
diff --git a/utils/cider/pyciderevalcap/ciderD/ciderD.py b/utils/cider/pyciderevalcap/ciderD/ciderD.py
new file mode 100755
index 0000000000000000000000000000000000000000..280f9890312a76b54695b2a8c456c5d52a87e186
--- /dev/null
+++ b/utils/cider/pyciderevalcap/ciderD/ciderD.py
@@ -0,0 +1,58 @@
+# Filename: ciderD.py
+#
+# Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric
+#               by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
+#
+# Creation Date: Sun Feb  8 14:16:54 2015
+#
+# Authors: Ramakrishna Vedantam <vrama91@vt.edu> and Tsung-Yi Lin <tl483@cornell.edu>
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from .ciderD_scorer import CiderScorer
+import pdb
+
+class CiderD:
+    """
+    Main Class to compute the CIDEr metric
+
+    """
+    def __init__(self, n=4, sigma=6.0, df="corpus"):
+        # set cider to sum over 1 to 4-grams
+        self._n = n
+        # set the standard deviation parameter for gaussian penalty
+        self._sigma = sigma
+        # set which where to compute document frequencies from
+        self._df = df
+        self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
+
+    def compute_score(self, gts, res):
+        """
+        Main function to compute CIDEr score
+        :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
+                ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
+        :return: cider (float) : computed CIDEr score for the corpus
+        """
+
+        # clear all the previous hypos and refs
+        tmp_cider_scorer = self.cider_scorer.copy_empty()
+        tmp_cider_scorer.clear()
+        for res_id in res:
+
+            hypo = res_id['caption']
+            ref = gts[res_id['image_id']]
+
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) > 0)
+            tmp_cider_scorer += (hypo[0], ref)
+
+        (score, scores) = tmp_cider_scorer.compute_score()
+
+        return score, scores
+
+    def method(self):
+        return "CIDEr-D"
diff --git a/utils/cider/pyciderevalcap/ciderD/ciderD_scorer.py b/utils/cider/pyciderevalcap/ciderD/ciderD_scorer.py
new file mode 100755
index 0000000000000000000000000000000000000000..144f58350322bcae42e152300778f491908a1576
--- /dev/null
+++ b/utils/cider/pyciderevalcap/ciderD/ciderD_scorer.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+# Tsung-Yi Lin <tl483@cornell.edu>
+# Ramakrishna Vedantam <vrama91@vt.edu>
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+from collections import defaultdict
+import numpy as np
+import pdb
+import math
+import six
+from six.moves import cPickle
+import os
+
+def precook(s, n=4, out=False):
+    """
+    Takes a string as input and returns an object that can be given to
+    either cook_refs or cook_test. This is optional: cook_refs and cook_test
+    can take string arguments as well.
+    :param s: string : sentence to be converted into ngrams
+    :param n: int    : number of ngrams for which representation is calculated
+    :return: term frequency vector for occuring ngrams
+    """
+    words = s.split()
+    counts = defaultdict(int)
+    for k in range(1,n+1):
+        for i in range(len(words)-k+1):
+            ngram = tuple(words[i:i+k])
+            counts[ngram] += 1
+    return counts
+
+def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
+    '''Takes a list of reference sentences for a single segment
+    and returns an object that encapsulates everything that BLEU
+    needs to know about them.
+    :param refs: list of string : reference sentences for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (list of dict)
+    '''
+    return [precook(ref, n) for ref in refs]
+
+def cook_test(test, n=4):
+    '''Takes a test sentence and returns an object that
+    encapsulates everything that BLEU needs to know about it.
+    :param test: list of string : hypothesis sentence for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (dict)
+    '''
+    return precook(test, n, True)
+
+class CiderScorer(object):
+    """CIDEr scorer.
+    """
+
+    def copy(self):
+        ''' copy the refs.'''
+        new = CiderScorer(n=self.n)
+        new.ctest = copy.copy(self.ctest)
+        new.crefs = copy.copy(self.crefs)
+        return new
+
+    def copy_empty(self):
+        new = CiderScorer(df_mode="corpus", n=self.n, sigma=self.sigma)
+        new.df_mode = self.df_mode
+        new.ref_len = self.ref_len
+        new.document_frequency = self.document_frequency
+        return new
+
+    def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0):
+        ''' singular instance '''
+        self.n = n
+        self.sigma = sigma
+        self.crefs = []
+        self.ctest = []
+        self.df_mode = df_mode
+        self.ref_len = None
+        if self.df_mode != "corpus":
+            pkl_file = cPickle.load(open(df_mode,'rb'), **(dict(encoding='latin1') if six.PY3 else {}))
+            self.ref_len = np.log(float(pkl_file['ref_len']))
+            self.document_frequency = pkl_file['document_frequency']
+        else:
+            self.document_frequency = None
+        self.cook_append(test, refs)
+    
+    def clear(self):
+        self.crefs = []
+        self.ctest = []
+
+    def cook_append(self, test, refs):
+        '''called by constructor and __iadd__ to avoid creating new instances.'''
+
+        if refs is not None:
+            self.crefs.append(cook_refs(refs))
+            if test is not None:
+                self.ctest.append(cook_test(test)) ## N.B.: -1
+            else:
+                self.ctest.append(None) # lens of crefs and ctest have to match
+
+    def size(self):
+        assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
+        return len(self.crefs)
+
+    def __iadd__(self, other):
+        '''add an instance (e.g., from another sentence).'''
+
+        if type(other) is tuple:
+            ## avoid creating new CiderScorer instances
+            self.cook_append(other[0], other[1])
+        else:
+            self.ctest.extend(other.ctest)
+            self.crefs.extend(other.crefs)
+
+        return self
+    def compute_doc_freq(self):
+        '''
+        Compute term frequency for reference data.
+        This will be used to compute idf (inverse document frequency later)
+        The term frequency is stored in the object
+        :return: None
+        '''
+        for refs in self.crefs:
+            # refs, k ref captions of one image
+            for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]):
+                self.document_frequency[ngram] += 1
+            # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
+
+    def compute_cider(self):
+        def counts2vec(cnts):
+            """
+            Function maps counts of ngram to vector of tfidf weights.
+            The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
+            The n-th entry of array denotes length of n-grams.
+            :param cnts:
+            :return: vec (array of dict), norm (array of float), length (int)
+            """
+            vec = [defaultdict(float) for _ in range(self.n)]
+            length = 0
+            norm = [0.0 for _ in range(self.n)]
+            for (ngram,term_freq) in cnts.items():
+                # give word count 1 if it doesn't appear in reference corpus
+                df = np.log(max(1.0, self.document_frequency[ngram]))
+                # ngram index
+                n = len(ngram)-1
+                # tf (term_freq) * idf (precomputed idf) for n-grams
+                vec[n][ngram] = float(term_freq)*(self.ref_len - df)
+                # compute norm for the vector.  the norm will be used for computing similarity
+                norm[n] += pow(vec[n][ngram], 2)
+
+                if n == 1:
+                    length += term_freq
+            norm = [np.sqrt(n) for n in norm]
+            return vec, norm, length
+
+        def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
+            '''
+            Compute the cosine similarity of two vectors.
+            :param vec_hyp: array of dictionary for vector corresponding to hypothesis
+            :param vec_ref: array of dictionary for vector corresponding to reference
+            :param norm_hyp: array of float for vector corresponding to hypothesis
+            :param norm_ref: array of float for vector corresponding to reference
+            :param length_hyp: int containing length of hypothesis
+            :param length_ref: int containing length of reference
+            :return: array of score for each n-grams cosine similarity
+            '''
+            delta = float(length_hyp - length_ref)
+            # measure consine similarity
+            val = np.array([0.0 for _ in range(self.n)])
+            for n in range(self.n):
+                # ngram
+                for (ngram,count) in vec_hyp[n].items():
+                    # vrama91 : added clipping
+                    val[n] += min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram]
+
+                if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
+                    val[n] /= (norm_hyp[n]*norm_ref[n])
+
+                assert(not math.isnan(val[n]))
+                # vrama91: added a length based gaussian penalty
+                val[n] *= np.e**(-(delta**2)/(2*self.sigma**2))
+            return val
+
+        # compute log reference length
+        if self.df_mode == "corpus":
+            self.ref_len = np.log(float(len(self.crefs)))
+        #elif self.df_mode == "coco-val-df":
+            # if coco option selected, use length of coco-val set
+        #    self.ref_len = np.log(float(40504))
+
+        scores = []
+        for test, refs in zip(self.ctest, self.crefs):
+            # compute vector for test captions
+            vec, norm, length = counts2vec(test)
+            # compute vector for ref captions
+            score = np.array([0.0 for _ in range(self.n)])
+            for ref in refs:
+                vec_ref, norm_ref, length_ref = counts2vec(ref)
+                score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
+            # change by vrama91 - mean of ngram scores, instead of sum
+            score_avg = np.mean(score)
+            # divide by number of references
+            score_avg /= len(refs)
+            # multiply score by 10
+            score_avg *= 10.0
+            # append score of an image to the score list
+            scores.append(score_avg)
+        return scores
+
+    def compute_score(self, option=None, verbose=0):
+        # compute idf
+        if self.df_mode == "corpus":
+            self.document_frequency = defaultdict(float)
+            self.compute_doc_freq()
+            # assert to check document frequency
+            assert(len(self.ctest) >= max(self.document_frequency.values()))
+            # import json for now and write the corresponding files
+        # compute cider score
+        score = self.compute_cider()
+        # debug
+        # print score
+        return np.mean(np.array(score)), np.array(score)
diff --git a/utils/eval_utils.py b/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..218263bf25a5e40eb75f69ff055ff10f0f71d68e
--- /dev/null
+++ b/utils/eval_utils.py
@@ -0,0 +1,261 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+from itertools import chain
+import os
+import torch
+import torch.distributed as dist
+import numpy as np
+from skimage import draw
+from PIL import Image
+from utils.vis_utils import overlay_predictions
+from torchvision.utils import save_image
+
+SMOOTH = 1e-6
+
+
+def check_length(polygons):
+    length = 0
+    for polygon in polygons:
+        length += len(polygon)
+    return length
+
+
+def eval_refcoco(task, generator, models, sample, **kwargs):
+    def _computeIoU(pred_seg, gd_seg):
+        I = np.sum(np.logical_and(pred_seg, gd_seg))
+        U = np.sum(np.logical_or(pred_seg, gd_seg))
+        return I, U
+
+    def _calculate_ap_score(hyps, refs, thresh=0.5):
+        interacts = torch.cat(
+            [torch.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]),
+             torch.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:])],
+            dim=1
+        )
+        area_predictions = (hyps[:, 2] - hyps[:, 0]) * (hyps[:, 3] - hyps[:, 1])
+        area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1])
+        interacts_w = interacts[:, 2] - interacts[:, 0]
+        interacts_h = interacts[:, 3] - interacts[:, 1]
+        area_interacts = interacts_w * interacts_h
+        ious = area_interacts / (area_predictions + area_targets - area_interacts + 1e-6)
+        return ((ious >= thresh) & (interacts_w > 0) & (interacts_h > 0)).float()
+
+    def convert_pts(coeffs):
+        pts = []
+        for i in range(len(coeffs) // 2):
+            pts.append([coeffs[2 * i + 1], coeffs[2 * i]])  # y, x
+        return np.array(pts, np.int32)
+
+    def get_mask_from_codes(codes, img_size):
+        masks = [np.zeros(img_size)]
+        for code in codes:
+            if len(code) > 0:
+                try:
+                    mask = draw.polygon2mask(img_size, convert_pts(code))
+                    mask = np.array(mask, np.uint8)
+                except:
+                    mask = np.zeros(img_size)
+                masks.append(mask)
+        mask = sum(masks)
+        mask = mask > 0
+        return mask.astype(np.uint8)
+
+    def _calculate_score(hyps, hyps_det, refs, sample, n_poly_pred, n_poly_gt, vis=True, vis_dir=None):
+        if vis:
+            os.makedirs(vis_dir, exist_ok=True)
+
+        def compute_jf(pred_mask, gt_mask):
+            I, U = _computeIoU(pred_mask, gt_mask)
+            if U == 0:
+                this_iou = 0.0
+            else:
+                this_iou = I * 1.0 / U
+
+            prec = (I + SMOOTH) / (pred_mask.sum() + SMOOTH)
+            rec = (I + SMOOTH) / (gt_mask.sum() + SMOOTH)
+            this_f = 2 * prec * rec / (prec + rec)
+            return this_iou, this_f, I, U
+
+        IoU = []
+        F_score = []
+        cum_I = []
+        cum_U = []
+        bboxes = hyps_det
+        b = len(hyps)
+        bboxes = torch.tensor(np.stack(bboxes, 0))
+        bboxes = bboxes.to(sample['w_resize_ratios'].device)
+        ap_scores = _calculate_ap_score(bboxes.float(), sample['region_coords'].float())
+        for i in range(b):
+            hyps_i = hyps[i]
+            gt_mask = refs[i]
+            pred_mask = get_mask_from_codes(hyps_i, gt_mask.shape[0:2])
+            this_iou, this_f, this_I, this_U = compute_jf(pred_mask, gt_mask)
+            IoU.append(this_iou)
+            F_score.append(this_f)
+            cum_I.append(this_I)
+            cum_U.append(this_U)
+
+            if vis:
+                def pre_caption(caption):
+                    import re
+                    caption = caption.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ').replace(
+                        '<person>',
+                        'person')
+                    caption = re.sub(
+                        r"\s{2,}",
+                        ' ',
+                        caption,
+                    )
+                    caption = caption.rstrip('\n')
+                    return caption
+
+                gt_box = sample['region_coords'][i].cpu().numpy()
+                pred_box = bboxes[i].cpu().numpy()
+                pred_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy()
+                pred_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy()
+                gt_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy()
+                gt_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy()
+                uniq_id = sample["id"][i]
+                text = sample["text"][i]
+                text = pre_caption(text)
+                img = sample["net_input"]['patch_images'][i]
+                img = (img + 1) / 2
+                img_ndarray = img.permute(1, 2, 0).cpu().numpy() * 255
+                img_ndarray = img_ndarray.astype(np.uint8)
+
+                gt_overlayed_fn = f"{uniq_id}_{text}_gt_overlayed.png"
+                pred_overlayed_fn = f"{uniq_id}_{text}_pred_overlayed.png"
+
+                pred_overlayed = overlay_predictions(img_ndarray, pred_mask, hyps_i, pred_box)
+                gt_overlayed = overlay_predictions(img_ndarray, gt_mask, None, gt_box)
+
+                pred_overlayed = Image.fromarray(pred_overlayed.astype(np.uint8))
+                pred_overlayed.save(os.path.join(vis_dir, pred_overlayed_fn))
+                gt_overlayed = Image.fromarray(gt_overlayed.astype(np.uint8))
+                gt_overlayed.save(os.path.join(vis_dir, gt_overlayed_fn))
+
+                img_fn = f"{uniq_id}_{text}.png"
+                save_image(img, os.path.join(vis_dir, img_fn))
+
+        return torch.tensor(IoU), torch.tensor(F_score), ap_scores, torch.tensor(cum_I), torch.tensor(cum_U)
+
+    gen_out = task.inference_step(models, sample)
+    hyps = []
+    hyps_det = []
+    n_poly_pred = []
+    b = len(gen_out)
+    poly_len = []
+    for i in range(b):
+        gen_out_i = np.array(gen_out[i])
+        gen_out_i = gen_out_i[gen_out_i != -1]  # excluding eos and padding indices
+
+        gen_out_i_det = gen_out_i[:4]
+        gen_out_i_det[::2] *= sample['w'][i].cpu().numpy()
+        gen_out_i_det[1::2] *= sample['h'][i].cpu().numpy()
+
+        polygons_pred = gen_out_i[4:]
+        polygons_pred = np.append(polygons_pred, [2])
+        size = len(polygons_pred)
+        idx_list = [idx for idx, val in
+                    enumerate(polygons_pred) if val == 2]  # 2 indicates separator token
+
+        polygons_pred *= task.cfg.patch_image_size
+        # extract the sequence for each polygon
+        polygons = []
+        prev_idx = 0
+        for idx in idx_list:
+            cur_idx = idx
+            if prev_idx == cur_idx or prev_idx == size:
+                pass
+            else:
+                polygons.append(polygons_pred[prev_idx: cur_idx])
+            prev_idx = cur_idx + 1
+
+        poly_len.append(check_length(polygons))
+        n_poly_pred.append(len(polygons))
+        hyps.append(polygons)
+        hyps_det.append(gen_out_i_det)
+    gt = sample['label']
+    results = [
+        {"uniq_id": sample_id}
+        for i, sample_id in enumerate(sample["id"].tolist())
+    ]
+
+    iou_scores, f_scores, ap_scores, cum_I, cum_U = _calculate_score(hyps, hyps_det, gt, sample, n_poly_pred,
+                                                                     sample['n_poly'],
+                                                                     vis=kwargs['vis'], vis_dir=kwargs['vis_dir'])
+    result_dir = kwargs['result_dir']
+    os.makedirs(result_dir, exist_ok=True)
+    torch.save({"iou_scores": iou_scores, "ap_scores": ap_scores, "n_poly_pred": n_poly_pred,
+                "n_poly_gt": sample['n_poly'], "poly_len": poly_len, "uniq_id": sample["id"]},
+               os.path.join(result_dir, f'{sample["id"][0]}.pt'))
+
+    return results, iou_scores, f_scores, ap_scores, cum_I, cum_U
+
+
+def eval_step(task, generator, models, sample, **kwargs):
+    if task.cfg._name == 'refcoco':
+        return eval_refcoco(task, generator, models, sample, **kwargs)
+    else:
+        raise NotImplementedError
+
+
+def merge_results(task, cfg, logger, score_cnt, score_sum, f_score_sum=None, ap_det_score_sum=None, prec_score_sum=None,
+                  cum_I_sum=None, cum_U_sum=None, results=None):
+    if task.cfg._name == 'image_gen':
+        if cfg.distributed_training.distributed_world_size > 1:
+            dist.all_reduce(score_sum.data)
+            dist.all_reduce(score_cnt.data)
+        if score_cnt.item() > 0:
+            logger.info("score_sum: {}, score_cnt: {}, score: {}".format(
+                score_sum, score_cnt, round(score_sum.item() / score_cnt.item(), 4)
+            ))
+    else:
+        gather_results = None
+        if cfg.distributed_training.distributed_world_size > 1:
+            gather_results = [None for _ in range(dist.get_world_size())]
+            dist.all_gather_object(gather_results, results)
+            dist.all_reduce(score_sum.data)
+            dist.all_reduce(f_score_sum.data)
+            dist.all_reduce(cum_I_sum.data)
+            dist.all_reduce(cum_U_sum.data)
+            for prec_score in prec_score_sum:
+                dist.all_reduce(prec_score.data)
+            dist.all_reduce(ap_det_score_sum.data)
+            dist.all_reduce(score_cnt.data)
+        if score_cnt.item() > 0:
+            prec_list = [.5, .6, .7, .8, .9]
+            txt = "sample_cnt: {}, mIoU score: {}, oIoU score: {}, ap det score: {}, f score: {}, J&F: {}\n".format(
+                score_cnt, round(score_sum.item() / score_cnt.item(), 4),
+                round(cum_I_sum.item() / cum_U_sum.item(), 4),
+                round(ap_det_score_sum.item() / score_cnt.item(), 4),
+                round(f_score_sum.item() / score_cnt.item(), 4),
+                round((f_score_sum.item() + score_sum.item()) / (2 * score_cnt.item()), 4)
+            )
+
+            prec_txt = " ".join(
+                [f"prec@{prec}: {round(prec_score.item() / score_cnt.item(), 4)}\n" for prec, prec_score in
+                 zip(prec_list, prec_score_sum)])
+            txt += prec_txt
+
+            logger.info(txt)
+            output_path = os.path.join(cfg.common_eval.results_path, "{}_result.txt".format(cfg.dataset.gen_subset))
+            os.makedirs(cfg.common_eval.results_path, exist_ok=True)
+            with open(output_path, 'w') as f:
+                f.write(txt)
+
+        if cfg.distributed_training.distributed_world_size == 1 or dist.get_rank() == 0:
+            os.makedirs(cfg.common_eval.results_path, exist_ok=True)
+            output_path = os.path.join(cfg.common_eval.results_path, "{}_predict.json".format(cfg.dataset.gen_subset))
+            gather_results = list(chain(*gather_results)) if gather_results is not None else results
+            with open(output_path, 'w') as fw:
+                json.dump(gather_results, fw)
diff --git a/utils/rouge.py b/utils/rouge.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d653f029d8b439bfa297af8ee1a6e40d45e58f8
--- /dev/null
+++ b/utils/rouge.py
@@ -0,0 +1,128 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" ROUGE metric from Google Research github repo. """
+
+# The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt
+import absl  # Here to have a nice missing dependency error message early on
+import nltk  # Here to have a nice missing dependency error message early on
+import numpy  # Here to have a nice missing dependency error message early on
+import six  # Here to have a nice missing dependency error message early on
+from rouge_score import rouge_scorer, scoring
+
+import datasets
+
+
+_CITATION = """\
+@inproceedings{lin-2004-rouge,
+    title = "{ROUGE}: A Package for Automatic Evaluation of Summaries",
+    author = "Lin, Chin-Yew",
+    booktitle = "Text Summarization Branches Out",
+    month = jul,
+    year = "2004",
+    address = "Barcelona, Spain",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/W04-1013",
+    pages = "74--81",
+}
+"""
+
+_DESCRIPTION = """\
+ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for
+evaluating automatic summarization and machine translation software in natural language processing.
+The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation.
+Note that ROUGE is case insensitive, meaning that upper case letters are treated the same way as lower case letters.
+This metrics is a wrapper around Google Research reimplementation of ROUGE:
+https://github.com/google-research/google-research/tree/master/rouge
+"""
+
+_KWARGS_DESCRIPTION = """
+Calculates average rouge scores for a list of hypotheses and references
+Args:
+    predictions: list of predictions to score. Each predictions
+        should be a string with tokens separated by spaces.
+    references: list of reference for each prediction. Each
+        reference should be a string with tokens separated by spaces.
+    rouge_types: A list of rouge types to calculate.
+        Valid names:
+        `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
+        `"rougeL"`: Longest common subsequence based scoring.
+        `"rougeLSum"`: rougeLsum splits text using `"\n"`.
+        See details in https://github.com/huggingface/datasets/issues/617
+    use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes.
+    use_agregator: Return aggregates if this is set to True
+Returns:
+    rouge1: rouge_1 (precision, recall, f1),
+    rouge2: rouge_2 (precision, recall, f1),
+    rougeL: rouge_l (precision, recall, f1),
+    rougeLsum: rouge_lsum (precision, recall, f1)
+Examples:
+    >>> rouge = datasets.load_metric('rouge')
+    >>> predictions = ["hello there", "general kenobi"]
+    >>> references = ["hello there", "general kenobi"]
+    >>> results = rouge.compute(predictions=predictions, references=references)
+    >>> print(list(results.keys()))
+    ['rouge1', 'rouge2', 'rougeL', 'rougeLsum']
+    >>> print(results["rouge1"])
+    AggregateScore(low=Score(precision=1.0, recall=1.0, fmeasure=1.0), mid=Score(precision=1.0, recall=1.0, fmeasure=1.0), high=Score(precision=1.0, recall=1.0, fmeasure=1.0))
+    >>> print(results["rouge1"].mid.fmeasure)
+    1.0
+"""
+
+
+@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Rouge(datasets.Metric):
+    def _info(self):
+        return datasets.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("string", id="sequence"),
+                    "references": datasets.Value("string", id="sequence"),
+                }
+            ),
+            codebase_urls=["https://github.com/google-research/google-research/tree/master/rouge"],
+            reference_urls=[
+                "https://en.wikipedia.org/wiki/ROUGE_(metric)",
+                "https://github.com/google-research/google-research/tree/master/rouge",
+            ],
+        )
+
+    def _compute(self, predictions, references, rouge_types=None, use_agregator=True, use_stemmer=False):
+        if rouge_types is None:
+            rouge_types = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
+
+        scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=use_stemmer)
+        if use_agregator:
+            aggregator = scoring.BootstrapAggregator()
+        else:
+            scores = []
+
+        for ref, pred in zip(references, predictions):
+            score = scorer.score(ref, pred)
+            if use_agregator:
+                aggregator.add_scores(score)
+            else:
+                scores.append(score)
+
+        if use_agregator:
+            result = aggregator.aggregate()
+        else:
+            result = {}
+            for key in scores[0]:
+                result[key] = list(score[key] for score in scores)
+
+        return result
\ No newline at end of file
diff --git a/utils/transforms.py b/utils/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2249ba7fd9da0b7f65b2d9b719517014f14265f
--- /dev/null
+++ b/utils/transforms.py
@@ -0,0 +1,524 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import random
+
+import torch
+import torchvision.transforms as T
+import torchvision.transforms.functional as F
+import numpy as np
+from PIL import Image
+
+
+def crop(image, target, region, delete=True):
+    cropped_image = F.crop(image, *region)
+
+    target = target.copy()
+    i, j, h, w = region
+
+    # should we do something wrt the original size?
+    target["size"] = torch.tensor([h, w])
+
+    fields = ["labels", "area"]
+
+    if "boxes" in target:
+        boxes = target["boxes"]
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+        target["boxes"] = cropped_boxes.reshape(-1, 4)
+        target["area"] = area
+        fields.append("boxes")
+
+    if "polygons" in target:
+        polygons = target["polygons"]
+        num_polygons = polygons.shape[0]
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        start_coord = torch.cat([torch.tensor([j, i], dtype=torch.float32)
+                                 for _ in range(polygons.shape[1] // 2)], dim=0)
+        cropped_boxes = polygons - start_coord
+        cropped_boxes = torch.min(cropped_boxes.reshape(num_polygons, -1, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        target["polygons"] = cropped_boxes.reshape(num_polygons, -1)
+        fields.append("polygons")
+
+    if "masks" in target:
+        # FIXME should we update the area here if there are no boxes?
+        target['masks'] = target['masks'][:, i:i + h, j:j + w]
+        fields.append("masks")
+
+    # remove elements for which the boxes or masks that have zero area
+    if delete and ("boxes" in target or "masks" in target):
+        # favor boxes selection when defining which elements to keep
+        # this is compatible with previous implementation
+        if "boxes" in target:
+            cropped_boxes = target['boxes'].reshape(-1, 2, 2)
+            keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
+        else:
+            keep = target['masks'].flatten(1).any(1)
+
+        for field in fields:
+            target[field] = target[field][keep.tolist()]
+
+    return cropped_image, target
+
+
+def hflip(image, target):
+    flipped_image = F.hflip(image)
+
+    w, h = image.size
+
+    target = target.copy()
+    if "boxes" in target:
+        boxes = target["boxes"]
+        boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor([w, 0, w, 0])
+        target["boxes"] = boxes
+
+    if "polygons" in target:
+        polygons = target["polygons"]
+        num_polygons = polygons.shape[0]
+        polygons = polygons.reshape(num_polygons, -1, 2) * torch.as_tensor([-1, 1]) + torch.as_tensor([w, 0])
+        target["polygons"] = polygons
+
+    if "masks" in target:
+        target['masks'] = target['masks'].flip(-1)
+
+    return flipped_image, target
+
+
+def resize(image, target, size, max_size=None):
+    # size can be min_size (scalar) or (w, h) tuple
+
+    def get_size_with_aspect_ratio(image_size, size, max_size=None):
+        w, h = image_size
+
+        if (w <= h and w == size) or (h <= w and h == size):
+            if max_size is not None:
+                max_size = int(max_size)
+                h = min(h, max_size)
+                w = min(w, max_size)
+            return (h, w)
+
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+        else:
+            oh = size
+            ow = int(size * w / h)
+
+        if max_size is not None:
+           max_size = int(max_size)
+           oh = min(oh, max_size)
+           ow = min(ow, max_size)
+
+        return (oh, ow)
+
+    def get_size(image_size, size, max_size=None):
+        if isinstance(size, (list, tuple)):
+            return size[::-1]
+        else:
+            return get_size_with_aspect_ratio(image_size, size, max_size)
+
+    size = get_size(image.size, size, max_size)
+    rescaled_image = F.resize(image, size, interpolation=Image.BICUBIC)
+
+    if target is None:
+        return rescaled_image
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    target = target.copy()
+    if "boxes" in target:
+        boxes = target["boxes"]
+        scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+        target["boxes"] = scaled_boxes
+
+    if "polygons" in target:
+        polygons = target["polygons"]
+        scaled_ratio = torch.cat([torch.tensor([ratio_width, ratio_height])
+                                 for _ in range(polygons.shape[1] // 2)], dim=0)
+        scaled_polygons = polygons * scaled_ratio
+        target["polygons"] = scaled_polygons
+
+    if "area" in target:
+        area = target["area"]
+        scaled_area = area * (ratio_width * ratio_height)
+        target["area"] = scaled_area
+
+    h, w = size
+    target["size"] = torch.tensor([h, w])
+
+    if "masks" in target:
+        assert False
+        # target['masks'] = interpolate(
+        #     target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5
+
+    return rescaled_image, target
+
+
+class CenterCrop(object):
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, img, target):
+        image_width, image_height = img.size
+        crop_height, crop_width = self.size
+        crop_top = int(round((image_height - crop_height) / 2.))
+        crop_left = int(round((image_width - crop_width) / 2.))
+        return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
+
+
+class ObjectCenterCrop(object):
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, img, target):
+        image_width, image_height = img.size
+        crop_height, crop_width = self.size
+
+        x0 = float(target['boxes'][0][0])
+        y0 = float(target['boxes'][0][1])
+        x1 = float(target['boxes'][0][2])
+        y1 = float(target['boxes'][0][3])
+
+        center_x = (x0 + x1) / 2
+        center_y = (y0 + y1) / 2
+        crop_left = max(center_x-crop_width/2 + min(image_width-center_x-crop_width/2, 0), 0)
+        crop_top = max(center_y-crop_height/2 + min(image_height-center_y-crop_height/2, 0), 0)
+
+        return crop(img, target, (crop_top, crop_left, crop_height, crop_width), delete=False)
+
+
+class RandomHorizontalFlip(object):
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img, target):
+        if random.random() < self.p:
+            return hflip(img, target)
+        return img, target
+
+
+class RandomResize(object):
+    def __init__(self, sizes, max_size=None, equal=False):
+        assert isinstance(sizes, (list, tuple))
+        self.sizes = sizes
+        self.max_size = max_size
+        self.equal = equal
+
+    def __call__(self, img, target=None):
+        size = random.choice(self.sizes)
+        if self.equal:
+            return resize(img, target, size, size)
+        else:
+            return resize(img, target, size, self.max_size)
+
+
+class ToTensor(object):
+    def __call__(self, img, target=None):
+        if target is None:
+            return F.to_tensor(img)
+        return F.to_tensor(img), target
+
+
+class Normalize(object):
+    def __init__(self, mean, std, max_image_size=512):
+        self.mean = mean
+        self.std = std
+        self.max_image_size = max_image_size
+
+    def __call__(self, image, target=None):
+        image = F.normalize(image, mean=self.mean, std=self.std)
+        if target is None:
+            return image
+        target = target.copy()
+        # h, w = image.shape[-2:]
+        h, w = target["size"][0], target["size"][1]
+        if "boxes" in target:
+            boxes = target["boxes"]
+            boxes = boxes / self.max_image_size
+            target["boxes"] = boxes
+        if "polygons" in target:
+            polygons = target["polygons"]
+            scale = torch.cat([torch.tensor([w, h], dtype=torch.float32)
+                               for _ in range(polygons.shape[1] // 2)], dim=0)
+            polygons = polygons / scale
+            target["polygons"] = polygons
+        return image, target
+
+
+class Compose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, image, target):
+        if target is None:
+            for t in self.transforms:
+                image = t(image)
+            return image
+        for t in self.transforms:
+            image, target = t(image, target)
+        return image, target
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+
+
+class LargeScaleJitter(object):
+    """
+        implementation of large scale jitter from copy_paste
+    """
+
+    def __init__(self, output_size=512, aug_scale_min=0.3, aug_scale_max=2.0):
+        self.desired_size = torch.tensor([output_size])
+        self.aug_scale_min = aug_scale_min
+        self.aug_scale_max = aug_scale_max
+
+    def rescale_target(self, scaled_size, image_size, target):
+        # compute rescaled targets
+        image_scale = scaled_size / image_size
+        ratio_height, ratio_width = image_scale
+
+        target = target.copy()
+        target["size"] = scaled_size
+
+        if "boxes" in target:
+            boxes = target["boxes"]
+            scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+            target["boxes"] = scaled_boxes
+
+        if "area" in target:
+            area = target["area"]
+            scaled_area = area * (ratio_width * ratio_height)
+            target["area"] = scaled_area
+
+        if "masks" in target:
+            assert False
+            masks = target['masks']
+            # masks = interpolate(
+            #     masks[:, None].float(), scaled_size, mode="nearest")[:, 0] > 0.5
+            target['masks'] = masks
+        return target
+
+    def crop_target(self, region, target):
+        i, j, h, w = region
+        fields = ["labels", "area"]
+
+        target = target.copy()
+        target["size"] = torch.tensor([h, w])
+
+        if "boxes" in target:
+            boxes = target["boxes"]
+            max_size = torch.as_tensor([w, h], dtype=torch.float32)
+            cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
+            cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+            cropped_boxes = cropped_boxes.clamp(min=0)
+            area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+            target["boxes"] = cropped_boxes.reshape(-1, 4)
+            target["area"] = area
+            fields.append("boxes")
+
+        if "masks" in target:
+            # FIXME should we update the area here if there are no boxes?
+            target['masks'] = target['masks'][:, i:i + h, j:j + w]
+            fields.append("masks")
+
+        # remove elements for which the boxes or masks that have zero area
+        if "boxes" in target or "masks" in target:
+            # favor boxes selection when defining which elements to keep
+            # this is compatible with previous implementation
+            if "boxes" in target:
+                cropped_boxes = target['boxes'].reshape(-1, 2, 2)
+                keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
+            else:
+                keep = target['masks'].flatten(1).any(1)
+
+            for field in fields:
+                target[field] = target[field][keep.tolist()]
+        return target
+
+    def pad_target(self, padding, target):
+        target = target.copy()
+        if "masks" in target:
+            target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[1], 0, padding[0]))
+        return target
+
+    def __call__(self, image, target=None):
+        image_size = image.size
+        image_size = torch.tensor(image_size[::-1])
+
+        random_scale = torch.rand(1) * (self.aug_scale_max - self.aug_scale_min) + self.aug_scale_min
+        scaled_size = (random_scale * self.desired_size).round()
+
+        scale = torch.maximum(scaled_size / image_size[0], scaled_size / image_size[1])
+        scaled_size = (image_size * scale).round().int()
+
+        scaled_image = F.resize(image, scaled_size.tolist(), interpolation=Image.BICUBIC)
+
+        if target is not None:
+            target = self.rescale_target(scaled_size, image_size, target)
+
+        # randomly crop or pad images
+        if random_scale >= 1:
+            # Selects non-zero random offset (x, y) if scaled image is larger than desired_size.
+            max_offset = scaled_size - self.desired_size
+            offset = (max_offset * torch.rand(2)).floor().int()
+            region = (offset[0].item(), offset[1].item(),
+                      self.desired_size[0].item(), self.desired_size[0].item())
+            output_image = F.crop(scaled_image, *region)
+            if target is not None:
+                target = self.crop_target(region, target)
+        else:
+            assert False
+            padding = self.desired_size - scaled_size
+            output_image = F.pad(scaled_image, [0, 0, padding[1].item(), padding[0].item()])
+            if target is not None:
+                target = self.pad_target(padding, target)
+
+        return output_image, target
+
+
+class OriginLargeScaleJitter(object):
+    """
+        implementation of large scale jitter from copy_paste
+    """
+
+    def __init__(self, output_size=512, aug_scale_min=0.3, aug_scale_max=2.0):
+        self.desired_size = torch.tensor(output_size)
+        self.aug_scale_min = aug_scale_min
+        self.aug_scale_max = aug_scale_max
+
+    def rescale_target(self, scaled_size, image_size, target):
+        # compute rescaled targets
+        image_scale = scaled_size / image_size
+        ratio_height, ratio_width = image_scale
+
+        target = target.copy()
+        target["size"] = scaled_size
+
+        if "boxes" in target:
+            boxes = target["boxes"]
+            scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+            target["boxes"] = scaled_boxes
+
+        if "area" in target:
+            area = target["area"]
+            scaled_area = area * (ratio_width * ratio_height)
+            target["area"] = scaled_area
+
+        if "masks" in target:
+            assert False
+            masks = target['masks']
+            # masks = interpolate(
+            #     masks[:, None].float(), scaled_size, mode="nearest")[:, 0] > 0.5
+            target['masks'] = masks
+        return target
+
+    def crop_target(self, region, target):
+        i, j, h, w = region
+        fields = ["labels", "area"]
+
+        target = target.copy()
+        target["size"] = torch.tensor([h, w])
+
+        if "boxes" in target:
+            boxes = target["boxes"]
+            max_size = torch.as_tensor([w, h], dtype=torch.float32)
+            cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
+            cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+            cropped_boxes = cropped_boxes.clamp(min=0)
+            area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+            target["boxes"] = cropped_boxes.reshape(-1, 4)
+            target["area"] = area
+            fields.append("boxes")
+
+        if "masks" in target:
+            # FIXME should we update the area here if there are no boxes?
+            target['masks'] = target['masks'][:, i:i + h, j:j + w]
+            fields.append("masks")
+
+        # remove elements for which the boxes or masks that have zero area
+        if "boxes" in target or "masks" in target:
+            # favor boxes selection when defining which elements to keep
+            # this is compatible with previous implementation
+            if "boxes" in target:
+                cropped_boxes = target['boxes'].reshape(-1, 2, 2)
+                keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
+            else:
+                keep = target['masks'].flatten(1).any(1)
+
+            for field in fields:
+                target[field] = target[field][keep.tolist()]
+        return target
+
+    def pad_target(self, padding, target):
+        target = target.copy()
+        if "masks" in target:
+            target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[1], 0, padding[0]))
+        return target
+
+    def __call__(self, image, target=None):
+        image_size = image.size
+        image_size = torch.tensor(image_size[::-1])
+
+        out_desired_size = (self.desired_size * image_size / max(image_size)).round().int()
+
+        random_scale = torch.rand(1) * (self.aug_scale_max - self.aug_scale_min) + self.aug_scale_min
+        scaled_size = (random_scale * self.desired_size).round()
+
+        scale = torch.minimum(scaled_size / image_size[0], scaled_size / image_size[1])
+        scaled_size = (image_size * scale).round().int()
+
+        scaled_image = F.resize(image, scaled_size.tolist())
+
+        if target is not None:
+            target = self.rescale_target(scaled_size, image_size, target)
+
+        # randomly crop or pad images
+        if random_scale > 1:
+            # Selects non-zero random offset (x, y) if scaled image is larger than desired_size.
+            max_offset = scaled_size - out_desired_size
+            offset = (max_offset * torch.rand(2)).floor().int()
+            region = (offset[0].item(), offset[1].item(),
+                      out_desired_size[0].item(), out_desired_size[1].item())
+            output_image = F.crop(scaled_image, *region)
+            if target is not None:
+                target = self.crop_target(region, target)
+        else:
+            padding = out_desired_size - scaled_size
+            output_image = F.pad(scaled_image, [0, 0, padding[1].item(), padding[0].item()])
+            if target is not None:
+                target = self.pad_target(padding, target)
+
+        return output_image, target
+
+
+class RandomDistortion(object):
+    """
+    Distort image w.r.t hue, saturation and exposure.
+    """
+
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, prob=0.5):
+        self.prob = prob
+        self.tfm = T.ColorJitter(brightness, contrast, saturation, hue)
+
+    def __call__(self, img, target=None):
+        if np.random.random() < self.prob:
+            return self.tfm(img), target
+        else:
+            return img, target
diff --git a/utils/trie.py b/utils/trie.py
new file mode 100644
index 0000000000000000000000000000000000000000..294f277d05bd94fc1364645524c0955f97bebbef
--- /dev/null
+++ b/utils/trie.py
@@ -0,0 +1,35 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from collections import defaultdict
+
+
+class TreeNode():
+    def __init__(self):
+        self.child = defaultdict(TreeNode)
+
+class Trie:
+
+    def __init__(self, eos):
+        self.root = TreeNode()
+        self.eos = eos
+
+    def insert(self, word):
+        cur = self.root
+        for c in word:
+            cur = cur.child[c]
+
+    def get_next_layer(self, word):
+        cur = self.root
+        for c in word:
+            cur = cur.child.get(c)
+            if cur is None:
+                return [self.eos]
+        return list(cur.child.keys())
diff --git a/utils/vis_utils.py b/utils/vis_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..20b0d7a5e1f178a5b4392055f739917c3752a491
--- /dev/null
+++ b/utils/vis_utils.py
@@ -0,0 +1,58 @@
+import numpy as np
+import cv2
+
+def overlay_predictions(img, mask=None, polygons=None, bbox=None):
+    overlayed = img.copy()
+    if bbox is not None:
+        overlayed = draw_bbox(overlayed, bbox)
+    if mask is not None:
+        overlayed = overlay_davis(overlayed, mask)
+    if polygons is not None:
+        overlayed = plot_polygons(overlayed, polygons)
+    return overlayed
+
+
+def overlay_davis(image, mask, colors=[[0, 0, 0], [255, 102, 102]], cscale=1, alpha=0.4):
+    from scipy.ndimage.morphology import binary_dilation
+
+    colors = np.reshape(colors, (-1, 3))
+    colors = np.atleast_2d(colors) * cscale
+
+    im_overlay = image.copy()
+    object_ids = np.unique(mask)
+
+    h_i, w_i = image.shape[0:2]
+    h_m, w_m = mask.shape[0:2]
+    if h_i != h_m:
+        mask = cv2.resize(mask, [h_i, w_i], interpolation=cv2.INTER_NEAREST)
+    for object_id in object_ids[1:]:
+        # Overlay color on  binary mask
+        foreground = image*alpha + np.ones(image.shape)*(1-alpha) * np.array(colors[object_id])
+        binary_mask = mask == object_id
+
+        # Compose image
+        im_overlay[binary_mask] = foreground[binary_mask]
+
+        # countours = skimage.morphology.binary.binary_dilation(binary_mask) - binary_mask
+        countours = binary_dilation(binary_mask) ^ binary_mask
+        # countours = cv2.dilate(binary_mask, cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))) - binary_mask
+        im_overlay[countours, :] = 0
+
+    return im_overlay.astype(image.dtype)
+
+
+def draw_bbox(img, box, color=(0, 255, 0), thickness=3):
+    x1, y1, x2, y2 = box
+    return cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness=thickness)
+
+
+def plot_polygons(img, polygons, radius=3):
+    for polygon in polygons:
+        if len(polygon) > 0:
+            polygon = np.reshape(polygon[:len(polygon)-len(polygon)%2], (len(polygon)//2, 2)).astype(np.int16)
+            for i, point in enumerate(polygon):
+                color = (255, 0, 0)
+                #if i == 0:
+                #    color = (0, 0, 255)
+                img = cv2.circle(img, point, radius, color, thickness=-1)
+    return img
\ No newline at end of file
diff --git a/utils/vision_helper.py b/utils/vision_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9dc1f8a3839248998456683724ab7127e34c42d
--- /dev/null
+++ b/utils/vision_helper.py
@@ -0,0 +1,343 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import cv2
+import numpy as np
+
+## aug functions
+def identity_func(img):
+    return img
+
+
+def autocontrast_func(img, cutoff=0):
+    '''
+        same output as PIL.ImageOps.autocontrast
+    '''
+    n_bins = 256
+
+    def tune_channel(ch):
+        n = ch.size
+        cut = cutoff * n // 100
+        if cut == 0:
+            high, low = ch.max(), ch.min()
+        else:
+            hist = cv2.calcHist([ch], [0], None, [n_bins], [0, n_bins])
+            low = np.argwhere(np.cumsum(hist) > cut)
+            low = 0 if low.shape[0] == 0 else low[0]
+            high = np.argwhere(np.cumsum(hist[::-1]) > cut)
+            high = n_bins - 1 if high.shape[0] == 0 else n_bins - 1 - high[0]
+        if high <= low:
+            table = np.arange(n_bins)
+        else:
+            scale = (n_bins - 1) / (high - low)
+            offset = -low * scale
+            table = np.arange(n_bins) * scale + offset
+            table[table < 0] = 0
+            table[table > n_bins - 1] = n_bins - 1
+        table = table.clip(0, 255).astype(np.uint8)
+        return table[ch]
+
+    channels = [tune_channel(ch) for ch in cv2.split(img)]
+    out = cv2.merge(channels)
+    return out
+
+
+def equalize_func(img):
+    '''
+        same output as PIL.ImageOps.equalize
+        PIL's implementation is different from cv2.equalize
+    '''
+    n_bins = 256
+
+    def tune_channel(ch):
+        hist = cv2.calcHist([ch], [0], None, [n_bins], [0, n_bins])
+        non_zero_hist = hist[hist != 0].reshape(-1)
+        step = np.sum(non_zero_hist[:-1]) // (n_bins - 1)
+        if step == 0: return ch
+        n = np.empty_like(hist)
+        n[0] = step // 2
+        n[1:] = hist[:-1]
+        table = (np.cumsum(n) // step).clip(0, 255).astype(np.uint8)
+        return table[ch]
+
+    channels = [tune_channel(ch) for ch in cv2.split(img)]
+    out = cv2.merge(channels)
+    return out
+
+
+def rotate_func(img, degree, fill=(0, 0, 0)):
+    '''
+    like PIL, rotate by degree, not radians
+    '''
+    H, W = img.shape[0], img.shape[1]
+    center = W / 2, H / 2
+    M = cv2.getRotationMatrix2D(center, degree, 1)
+    out = cv2.warpAffine(img, M, (W, H), borderValue=fill)
+    return out
+
+
+def solarize_func(img, thresh=128):
+    '''
+        same output as PIL.ImageOps.posterize
+    '''
+    table = np.array([el if el < thresh else 255 - el for el in range(256)])
+    table = table.clip(0, 255).astype(np.uint8)
+    out = table[img]
+    return out
+
+
+def color_func(img, factor):
+    '''
+        same output as PIL.ImageEnhance.Color
+    '''
+    ## implementation according to PIL definition, quite slow
+    #  degenerate = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[:, :, np.newaxis]
+    #  out = blend(degenerate, img, factor)
+    #  M = (
+    #      np.eye(3) * factor
+    #      + np.float32([0.114, 0.587, 0.299]).reshape(3, 1) * (1. - factor)
+    #  )[np.newaxis, np.newaxis, :]
+    M = (
+            np.float32([
+                [0.886, -0.114, -0.114],
+                [-0.587, 0.413, -0.587],
+                [-0.299, -0.299, 0.701]]) * factor
+            + np.float32([[0.114], [0.587], [0.299]])
+    )
+    out = np.matmul(img, M).clip(0, 255).astype(np.uint8)
+    return out
+
+
+def contrast_func(img, factor):
+    """
+        same output as PIL.ImageEnhance.Contrast
+    """
+    mean = np.sum(np.mean(img, axis=(0, 1)) * np.array([0.114, 0.587, 0.299]))
+    table = np.array([(
+        el - mean) * factor + mean
+        for el in range(256)
+    ]).clip(0, 255).astype(np.uint8)
+    out = table[img]
+    return out
+
+
+def brightness_func(img, factor):
+    '''
+        same output as PIL.ImageEnhance.Contrast
+    '''
+    table = (np.arange(256, dtype=np.float32) * factor).clip(0, 255).astype(np.uint8)
+    out = table[img]
+    return out
+
+
+def sharpness_func(img, factor):
+    '''
+    The differences the this result and PIL are all on the 4 boundaries, the center
+    areas are same
+    '''
+    kernel = np.ones((3, 3), dtype=np.float32)
+    kernel[1][1] = 5
+    kernel /= 13
+    degenerate = cv2.filter2D(img, -1, kernel)
+    if factor == 0.0:
+        out = degenerate
+    elif factor == 1.0:
+        out = img
+    else:
+        out = img.astype(np.float32)
+        degenerate = degenerate.astype(np.float32)[1:-1, 1:-1, :]
+        out[1:-1, 1:-1, :] = degenerate + factor * (out[1:-1, 1:-1, :] - degenerate)
+        out = out.astype(np.uint8)
+    return out
+
+
+def shear_x_func(img, factor, fill=(0, 0, 0)):
+    H, W = img.shape[0], img.shape[1]
+    M = np.float32([[1, factor, 0], [0, 1, 0]])
+    out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
+    return out
+
+
+def translate_x_func(img, offset, fill=(0, 0, 0)):
+    '''
+        same output as PIL.Image.transform
+    '''
+    H, W = img.shape[0], img.shape[1]
+    M = np.float32([[1, 0, -offset], [0, 1, 0]])
+    out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
+    return out
+
+
+def translate_y_func(img, offset, fill=(0, 0, 0)):
+    '''
+        same output as PIL.Image.transform
+    '''
+    H, W = img.shape[0], img.shape[1]
+    M = np.float32([[1, 0, 0], [0, 1, -offset]])
+    out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
+    return out
+
+
+def posterize_func(img, bits):
+    '''
+        same output as PIL.ImageOps.posterize
+    '''
+    out = np.bitwise_and(img, np.uint8(255 << (8 - bits)))
+    return out
+
+
+def shear_y_func(img, factor, fill=(0, 0, 0)):
+    H, W = img.shape[0], img.shape[1]
+    M = np.float32([[1, 0, 0], [factor, 1, 0]])
+    out = cv2.warpAffine(img, M, (W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
+    return out
+
+
+def cutout_func(img, pad_size, replace=(0, 0, 0)):
+    replace = np.array(replace, dtype=np.uint8)
+    H, W = img.shape[0], img.shape[1]
+    rh, rw = np.random.random(2)
+    pad_size = pad_size // 2
+    ch, cw = int(rh * H), int(rw * W)
+    x1, x2 = max(ch - pad_size, 0), min(ch + pad_size, H)
+    y1, y2 = max(cw - pad_size, 0), min(cw + pad_size, W)
+    out = img.copy()
+    out[x1:x2, y1:y2, :] = replace
+    return out
+
+
+### level to args
+def enhance_level_to_args(MAX_LEVEL):
+    def level_to_args(level):
+        return ((level / MAX_LEVEL) * 1.8 + 0.1,)
+    return level_to_args
+
+
+def shear_level_to_args(MAX_LEVEL, replace_value):
+    def level_to_args(level):
+        level = (level / MAX_LEVEL) * 0.3
+        if np.random.random() > 0.5: level = -level
+        return (level, replace_value)
+
+    return level_to_args
+
+
+def translate_level_to_args(translate_const, MAX_LEVEL, replace_value):
+    def level_to_args(level):
+        level = (level / MAX_LEVEL) * float(translate_const)
+        if np.random.random() > 0.5: level = -level
+        return (level, replace_value)
+
+    return level_to_args
+
+
+def cutout_level_to_args(cutout_const, MAX_LEVEL, replace_value):
+    def level_to_args(level):
+        level = int((level / MAX_LEVEL) * cutout_const)
+        return (level, replace_value)
+
+    return level_to_args
+
+
+def solarize_level_to_args(MAX_LEVEL):
+    def level_to_args(level):
+        level = int((level / MAX_LEVEL) * 256)
+        return (level, )
+    return level_to_args
+
+
+def none_level_to_args(level):
+    return ()
+
+
+def posterize_level_to_args(MAX_LEVEL):
+    def level_to_args(level):
+        level = int((level / MAX_LEVEL) * 4)
+        return (level, )
+    return level_to_args
+
+
+def rotate_level_to_args(MAX_LEVEL, replace_value):
+    def level_to_args(level):
+        level = (level / MAX_LEVEL) * 30
+        if np.random.random() < 0.5:
+            level = -level
+        return (level, replace_value)
+
+    return level_to_args
+
+
+func_dict = {
+    'Identity': identity_func,
+    'AutoContrast': autocontrast_func,
+    'Equalize': equalize_func,
+    'Rotate': rotate_func,
+    'Solarize': solarize_func,
+    'Color': color_func,
+    'Contrast': contrast_func,
+    'Brightness': brightness_func,
+    'Sharpness': sharpness_func,
+    'ShearX': shear_x_func,
+    'TranslateX': translate_x_func,
+    'TranslateY': translate_y_func,
+    'Posterize': posterize_func,
+    'ShearY': shear_y_func,
+}
+
+translate_const = 10
+MAX_LEVEL = 10
+replace_value = (128, 128, 128)
+arg_dict = {
+    'Identity': none_level_to_args,
+    'AutoContrast': none_level_to_args,
+    'Equalize': none_level_to_args,
+    'Rotate': rotate_level_to_args(MAX_LEVEL, replace_value),
+    'Solarize': solarize_level_to_args(MAX_LEVEL),
+    'Color': enhance_level_to_args(MAX_LEVEL),
+    'Contrast': enhance_level_to_args(MAX_LEVEL),
+    'Brightness': enhance_level_to_args(MAX_LEVEL),
+    'Sharpness': enhance_level_to_args(MAX_LEVEL),
+    'ShearX': shear_level_to_args(MAX_LEVEL, replace_value),
+    'TranslateX': translate_level_to_args(
+        translate_const, MAX_LEVEL, replace_value
+    ),
+    'TranslateY': translate_level_to_args(
+        translate_const, MAX_LEVEL, replace_value
+    ),
+    'Posterize': posterize_level_to_args(MAX_LEVEL),
+    'ShearY': shear_level_to_args(MAX_LEVEL, replace_value),
+}
+
+
+class RandomAugment(object):
+
+    def __init__(self, N=2, M=10, isPIL=False, augs=[]):
+        self.N = N
+        self.M = M
+        self.isPIL = isPIL
+        if augs:
+            self.augs = augs
+        else:
+            self.augs = list(arg_dict.keys())
+
+    def get_random_ops(self):
+        sampled_ops = np.random.choice(self.augs, self.N)
+        return [(op, 0.5, self.M) for op in sampled_ops]
+
+    def __call__(self, img):
+        if self.isPIL:
+            img = np.array(img)
+        ops = self.get_random_ops()
+        for name, prob, level in ops:
+            if np.random.random() > prob:
+                continue
+            args = arg_dict[name](level)
+            img = func_dict[name](img, *args)
+        return img
diff --git a/utils/zero_shot_utils.py b/utils/zero_shot_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..fffd89bc48e249efb9a73063743d872c72459dce
--- /dev/null
+++ b/utils/zero_shot_utils.py
@@ -0,0 +1,51 @@
+# ------------------------------------------------------------------------
+# Modified from OFA (https://github.com/OFA-Sys/OFA)
+# Copyright 2022 The OFA-Sys Team. 
+# All rights reserved.
+# This source code is licensed under the Apache 2.0 license 
+# found in the LICENSE file in the root directory.
+# ------------------------------------------------------------------------
+# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import string
+import math
+
+import torch
+
+from data import data_utils
+
+
+def get_symbols_to_strip_from_output(generator):
+    if hasattr(generator, "symbols_to_strip_from_output"):
+        return generator.symbols_to_strip_from_output
+    else:
+        return {generator.bos, generator.eos}
+
+
+def decode_fn(x, tgt_dict, bpe, generator, tokenizer=None):
+    x = tgt_dict.string(x.int().cpu(), extra_symbols_to_ignore=get_symbols_to_strip_from_output(generator))
+    if bpe is not None:
+        x = bpe.decode(x)
+    if tokenizer is not None:
+        x = tokenizer.decode(x)
+    return x
+
+
+def eval_vqa_gen(task, generator, models, sample, **kwargs):
+    hypos = task.inference_step(generator, models, sample)
+    results = []
+    for i, sample_id in enumerate(sample["id"].tolist()):
+        detok_hypo_str = decode_fn(hypos[i][0]["tokens"], task.tgt_dict, task.bpe, generator)
+        results.append({"question_id": sample_id, "answer": detok_hypo_str.strip()})
+    scores = [ref_dict.get(result['answer'], 0) for ref_dict, result in zip(sample['ref_dict'], results)]
+    return results, scores
+
+
+def zero_shot_step(task, generator, models, sample, **kwargs):
+    generator.zero_shot = True
+    if task.cfg._name == 'vqa_gen':
+        generator.constraint_trie = None
+        return eval_vqa_gen(task, generator, models, sample, **kwargs)
+    else:
+        raise NotImplementedError